diff --git a/.gitignore b/.gitignore index fb410e1d..fec3e9a0 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,5 @@ -dist -artifacts +/dist +/artifacts *.swp *.swo /coverage.out* @@ -10,4 +10,4 @@ artifacts /nvidia-container-toolkit /nvidia-ctk /shared-* -/release-* \ No newline at end of file +/release-* diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index f90bb629..a9da30a0 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -176,12 +176,6 @@ image-packaging: optional: true # Define publish test helpers -.test:toolkit: - extends: - - .integration - variables: - TEST_CASES: "toolkit" - .test:docker: extends: - .integration diff --git a/deployments/container/Makefile b/deployments/container/Makefile index bd990bd1..3d838551 100644 --- a/deployments/container/Makefile +++ b/deployments/container/Makefile @@ -122,7 +122,7 @@ build-packaging: PACKAGE_DIST = all # Test targets test-%: DIST = $(*) -TEST_CASES ?= toolkit docker crio containerd +TEST_CASES ?= docker crio containerd $(TEST_TARGETS): test-%: TEST_CASES="$(TEST_CASES)" bash -x $(CURDIR)/test/container/main.sh run \ $(CURDIR)/shared-$(*) \ diff --git a/internal/lookup/device.go b/internal/lookup/device.go index 8ec6c4c9..e4fbe330 100644 --- a/internal/lookup/device.go +++ b/internal/lookup/device.go @@ -28,9 +28,15 @@ const ( // NewCharDeviceLocator creates a Locator that can be used to find char devices at the specified root. A logger is // also specified. func NewCharDeviceLocator(opts ...Option) Locator { + filter := assertCharDevice + // TODO: We should have a better way to inject this logic than this envvar. + if os.Getenv("__NVCT_TESTING_DEVICES_ARE_FILES") == "true" { + filter = assertFile + } + opts = append(opts, WithSearchPaths("", devRoot), - WithFilter(assertCharDevice), + WithFilter(filter), ) return NewFileLocator( opts..., diff --git a/test/container/main.sh b/test/container/main.sh index 9b6c91f9..4b64f154 100644 --- a/test/container/main.sh +++ b/test/container/main.sh @@ -19,7 +19,6 @@ shopt -s lastpipe readonly basedir="$(dirname "$(realpath "$0")")" source "${basedir}/common.sh" -source "${basedir}/toolkit_test.sh" source "${basedir}/docker_test.sh" source "${basedir}/crio_test.sh" source "${basedir}/containerd_test.sh" @@ -66,7 +65,7 @@ done trap '"$CLEANUP" && testing::cleanup' ERR -readonly test_cases="${TEST_CASES:-toolkit docker crio containerd}" +readonly test_cases="${TEST_CASES:-docker crio containerd}" testing::cleanup for tc in ${test_cases}; do diff --git a/test/container/toolkit_test.sh b/test/container/toolkit_test.sh deleted file mode 100644 index 46d2e206..00000000 --- a/test/container/toolkit_test.sh +++ /dev/null @@ -1,76 +0,0 @@ -#! /bin/bash -# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -testing::toolkit::install() { - local -r uid=$(id -u) - local -r gid=$(id -g) - - local READLINK="readlink" - local -r platform=$(uname) - if [[ "${platform}" == "Darwin" ]]; then - READLINK="greadlink" - fi - - testing::docker_run::toolkit::shell 'toolkit install --toolkit-root=/usr/local/nvidia/toolkit' - docker run --rm -v "${shared_dir}:/work" alpine sh -c "chown -R ${uid}:${gid} /work/" - - # Ensure toolkit dir is correctly setup - test ! -z "$(ls -A "${shared_dir}/usr/local/nvidia/toolkit")" - - test -L "${shared_dir}/usr/local/nvidia/toolkit/libnvidia-container.so.1" - test -e "$(${READLINK} -f "${shared_dir}/usr/local/nvidia/toolkit/libnvidia-container.so.1")" - test -L "${shared_dir}/usr/local/nvidia/toolkit/libnvidia-container-go.so.1" - test -e "$(${READLINK} -f "${shared_dir}/usr/local/nvidia/toolkit/libnvidia-container-go.so.1")" - - test -e "${shared_dir}/usr/local/nvidia/toolkit/nvidia-container-cli" - test -e "${shared_dir}/usr/local/nvidia/toolkit/nvidia-container-runtime-hook" - test -L "${shared_dir}/usr/local/nvidia/toolkit/nvidia-container-toolkit" - test -e "${shared_dir}/usr/local/nvidia/toolkit/nvidia-container-runtime" - - grep -q -E "nvidia driver modules are not yet loaded, invoking runc directly" "${shared_dir}/usr/local/nvidia/toolkit/nvidia-container-runtime" - grep -q -E "exec runc \".@\"" "${shared_dir}/usr/local/nvidia/toolkit/nvidia-container-runtime" - - test -e "${shared_dir}/usr/local/nvidia/toolkit/nvidia-container-cli.real" - test -e "${shared_dir}/usr/local/nvidia/toolkit/nvidia-container-runtime-hook.real" - test -e "${shared_dir}/usr/local/nvidia/toolkit/nvidia-container-runtime.real" - - test -e "${shared_dir}/usr/local/nvidia/toolkit/.config/nvidia-container-runtime/config.toml" - - # Ensure that the config file has the required contents. - # NOTE: This assumes that RUN_DIR is '/run/nvidia' - local -r nvidia_run_dir="/run/nvidia" - grep -q -E "^\s*ldconfig = \"@${nvidia_run_dir}/driver/sbin/ldconfig(.real)?\"" "${shared_dir}/usr/local/nvidia/toolkit/.config/nvidia-container-runtime/config.toml" - grep -q -E "^\s*root = \"${nvidia_run_dir}/driver\"" "${shared_dir}/usr/local/nvidia/toolkit/.config/nvidia-container-runtime/config.toml" - grep -q -E "^\s*path = \"/usr/local/nvidia/toolkit/nvidia-container-cli\"" "${shared_dir}/usr/local/nvidia/toolkit/.config/nvidia-container-runtime/config.toml" - grep -q -E "^\s*path = \"/usr/local/nvidia/toolkit/nvidia-ctk\"" "${shared_dir}/usr/local/nvidia/toolkit/.config/nvidia-container-runtime/config.toml" -} - -testing::toolkit::delete() { - testing::docker_run::toolkit::shell 'mkdir -p /usr/local/nvidia/delete-toolkit' - testing::docker_run::toolkit::shell 'touch /usr/local/nvidia/delete-toolkit/test.file' - testing::docker_run::toolkit::shell 'toolkit delete --toolkit-root=/usr/local/nvidia/delete-toolkit' - - test ! -z "$(ls -A "${shared_dir}/usr/local/nvidia")" - test ! -e "${shared_dir}/usr/local/nvidia/delete-toolkit" -} - -testing::toolkit::main() { - testing::toolkit::install - testing::toolkit::delete -} - -testing::toolkit::cleanup() { - : -} diff --git a/testdata/installer/artifacts/deb/usr/bin/nvidia-cdi-hook b/testdata/installer/artifacts/deb/usr/bin/nvidia-cdi-hook new file mode 100755 index 00000000..667d97d4 --- /dev/null +++ b/testdata/installer/artifacts/deb/usr/bin/nvidia-cdi-hook @@ -0,0 +1 @@ +nvidia-cdi-hook diff --git a/testdata/installer/artifacts/deb/usr/bin/nvidia-container-cli b/testdata/installer/artifacts/deb/usr/bin/nvidia-container-cli new file mode 100755 index 00000000..4ccf7b79 --- /dev/null +++ b/testdata/installer/artifacts/deb/usr/bin/nvidia-container-cli @@ -0,0 +1 @@ +nvidia-container-cli diff --git a/testdata/installer/artifacts/deb/usr/bin/nvidia-container-runtime b/testdata/installer/artifacts/deb/usr/bin/nvidia-container-runtime new file mode 100755 index 00000000..2bff7238 --- /dev/null +++ b/testdata/installer/artifacts/deb/usr/bin/nvidia-container-runtime @@ -0,0 +1 @@ +nvidia-container-runtime diff --git a/testdata/installer/artifacts/deb/usr/bin/nvidia-container-runtime-hook b/testdata/installer/artifacts/deb/usr/bin/nvidia-container-runtime-hook new file mode 100755 index 00000000..b09c13bf --- /dev/null +++ b/testdata/installer/artifacts/deb/usr/bin/nvidia-container-runtime-hook @@ -0,0 +1 @@ +nvidia-container-runtime-hook diff --git a/testdata/installer/artifacts/deb/usr/bin/nvidia-container-runtime.cdi b/testdata/installer/artifacts/deb/usr/bin/nvidia-container-runtime.cdi new file mode 100755 index 00000000..7588394b --- /dev/null +++ b/testdata/installer/artifacts/deb/usr/bin/nvidia-container-runtime.cdi @@ -0,0 +1 @@ +nvidia-container-runtime.cdi diff --git a/testdata/installer/artifacts/deb/usr/bin/nvidia-container-runtime.legacy b/testdata/installer/artifacts/deb/usr/bin/nvidia-container-runtime.legacy new file mode 100755 index 00000000..121fce5e --- /dev/null +++ b/testdata/installer/artifacts/deb/usr/bin/nvidia-container-runtime.legacy @@ -0,0 +1 @@ +nvidia-container-runtime.legacy diff --git a/testdata/installer/artifacts/deb/usr/bin/nvidia-ctk b/testdata/installer/artifacts/deb/usr/bin/nvidia-ctk new file mode 100755 index 00000000..86b95e40 --- /dev/null +++ b/testdata/installer/artifacts/deb/usr/bin/nvidia-ctk @@ -0,0 +1 @@ +nvidia-ctk diff --git a/testdata/installer/artifacts/deb/usr/lib/x86_64-linux-gnu/libnvidia-container-go.so b/testdata/installer/artifacts/deb/usr/lib/x86_64-linux-gnu/libnvidia-container-go.so new file mode 120000 index 00000000..843e6e7a --- /dev/null +++ b/testdata/installer/artifacts/deb/usr/lib/x86_64-linux-gnu/libnvidia-container-go.so @@ -0,0 +1 @@ +libnvidia-container-go.so.1 \ No newline at end of file diff --git a/testdata/installer/artifacts/deb/usr/lib/x86_64-linux-gnu/libnvidia-container-go.so.1 b/testdata/installer/artifacts/deb/usr/lib/x86_64-linux-gnu/libnvidia-container-go.so.1 new file mode 120000 index 00000000..545965a8 --- /dev/null +++ b/testdata/installer/artifacts/deb/usr/lib/x86_64-linux-gnu/libnvidia-container-go.so.1 @@ -0,0 +1 @@ +libnvidia-container-go.so.99.88.77 \ No newline at end of file diff --git a/testdata/installer/artifacts/deb/usr/lib/x86_64-linux-gnu/libnvidia-container-go.so.99.88.77 b/testdata/installer/artifacts/deb/usr/lib/x86_64-linux-gnu/libnvidia-container-go.so.99.88.77 new file mode 100644 index 00000000..e69de29b diff --git a/testdata/installer/artifacts/deb/usr/lib/x86_64-linux-gnu/libnvidia-container.so b/testdata/installer/artifacts/deb/usr/lib/x86_64-linux-gnu/libnvidia-container.so new file mode 120000 index 00000000..f2bc5d78 --- /dev/null +++ b/testdata/installer/artifacts/deb/usr/lib/x86_64-linux-gnu/libnvidia-container.so @@ -0,0 +1 @@ +libnvidia-container.so.1 \ No newline at end of file diff --git a/testdata/installer/artifacts/deb/usr/lib/x86_64-linux-gnu/libnvidia-container.so.1 b/testdata/installer/artifacts/deb/usr/lib/x86_64-linux-gnu/libnvidia-container.so.1 new file mode 120000 index 00000000..5e129c11 --- /dev/null +++ b/testdata/installer/artifacts/deb/usr/lib/x86_64-linux-gnu/libnvidia-container.so.1 @@ -0,0 +1 @@ +libnvidia-container.so.99.88.77 \ No newline at end of file diff --git a/testdata/installer/artifacts/deb/usr/lib/x86_64-linux-gnu/libnvidia-container.so.99.88.77 b/testdata/installer/artifacts/deb/usr/lib/x86_64-linux-gnu/libnvidia-container.so.99.88.77 new file mode 100644 index 00000000..e69de29b diff --git a/testdata/installer/artifacts/rpm/usr/bin/nvidia-cdi-hook b/testdata/installer/artifacts/rpm/usr/bin/nvidia-cdi-hook new file mode 100755 index 00000000..667d97d4 --- /dev/null +++ b/testdata/installer/artifacts/rpm/usr/bin/nvidia-cdi-hook @@ -0,0 +1 @@ +nvidia-cdi-hook diff --git a/testdata/installer/artifacts/rpm/usr/bin/nvidia-container-cli b/testdata/installer/artifacts/rpm/usr/bin/nvidia-container-cli new file mode 100755 index 00000000..4ccf7b79 --- /dev/null +++ b/testdata/installer/artifacts/rpm/usr/bin/nvidia-container-cli @@ -0,0 +1 @@ +nvidia-container-cli diff --git a/testdata/installer/artifacts/rpm/usr/bin/nvidia-container-runtime b/testdata/installer/artifacts/rpm/usr/bin/nvidia-container-runtime new file mode 100755 index 00000000..2bff7238 --- /dev/null +++ b/testdata/installer/artifacts/rpm/usr/bin/nvidia-container-runtime @@ -0,0 +1 @@ +nvidia-container-runtime diff --git a/testdata/installer/artifacts/rpm/usr/bin/nvidia-container-runtime-hook b/testdata/installer/artifacts/rpm/usr/bin/nvidia-container-runtime-hook new file mode 100755 index 00000000..b09c13bf --- /dev/null +++ b/testdata/installer/artifacts/rpm/usr/bin/nvidia-container-runtime-hook @@ -0,0 +1 @@ +nvidia-container-runtime-hook diff --git a/testdata/installer/artifacts/rpm/usr/bin/nvidia-container-runtime.cdi b/testdata/installer/artifacts/rpm/usr/bin/nvidia-container-runtime.cdi new file mode 100755 index 00000000..7588394b --- /dev/null +++ b/testdata/installer/artifacts/rpm/usr/bin/nvidia-container-runtime.cdi @@ -0,0 +1 @@ +nvidia-container-runtime.cdi diff --git a/testdata/installer/artifacts/rpm/usr/bin/nvidia-container-runtime.legacy b/testdata/installer/artifacts/rpm/usr/bin/nvidia-container-runtime.legacy new file mode 100755 index 00000000..121fce5e --- /dev/null +++ b/testdata/installer/artifacts/rpm/usr/bin/nvidia-container-runtime.legacy @@ -0,0 +1 @@ +nvidia-container-runtime.legacy diff --git a/testdata/installer/artifacts/rpm/usr/bin/nvidia-ctk b/testdata/installer/artifacts/rpm/usr/bin/nvidia-ctk new file mode 100755 index 00000000..86b95e40 --- /dev/null +++ b/testdata/installer/artifacts/rpm/usr/bin/nvidia-ctk @@ -0,0 +1 @@ +nvidia-ctk diff --git a/testdata/installer/artifacts/rpm/usr/lib64/libnvidia-container-go.so b/testdata/installer/artifacts/rpm/usr/lib64/libnvidia-container-go.so new file mode 120000 index 00000000..843e6e7a --- /dev/null +++ b/testdata/installer/artifacts/rpm/usr/lib64/libnvidia-container-go.so @@ -0,0 +1 @@ +libnvidia-container-go.so.1 \ No newline at end of file diff --git a/testdata/installer/artifacts/rpm/usr/lib64/libnvidia-container-go.so.1 b/testdata/installer/artifacts/rpm/usr/lib64/libnvidia-container-go.so.1 new file mode 120000 index 00000000..545965a8 --- /dev/null +++ b/testdata/installer/artifacts/rpm/usr/lib64/libnvidia-container-go.so.1 @@ -0,0 +1 @@ +libnvidia-container-go.so.99.88.77 \ No newline at end of file diff --git a/testdata/installer/artifacts/rpm/usr/lib64/libnvidia-container-go.so.99.88.77 b/testdata/installer/artifacts/rpm/usr/lib64/libnvidia-container-go.so.99.88.77 new file mode 100644 index 00000000..e69de29b diff --git a/testdata/installer/artifacts/rpm/usr/lib64/libnvidia-container.so b/testdata/installer/artifacts/rpm/usr/lib64/libnvidia-container.so new file mode 120000 index 00000000..f2bc5d78 --- /dev/null +++ b/testdata/installer/artifacts/rpm/usr/lib64/libnvidia-container.so @@ -0,0 +1 @@ +libnvidia-container.so.1 \ No newline at end of file diff --git a/testdata/installer/artifacts/rpm/usr/lib64/libnvidia-container.so.1 b/testdata/installer/artifacts/rpm/usr/lib64/libnvidia-container.so.1 new file mode 120000 index 00000000..5e129c11 --- /dev/null +++ b/testdata/installer/artifacts/rpm/usr/lib64/libnvidia-container.so.1 @@ -0,0 +1 @@ +libnvidia-container.so.99.88.77 \ No newline at end of file diff --git a/testdata/installer/artifacts/rpm/usr/lib64/libnvidia-container.so.99.88.77 b/testdata/installer/artifacts/rpm/usr/lib64/libnvidia-container.so.99.88.77 new file mode 100644 index 00000000..e69de29b diff --git a/testdata/lookup/rootfs-1/dev/nvidia0 b/testdata/lookup/rootfs-1/dev/nvidia0 new file mode 100644 index 00000000..e69de29b diff --git a/testdata/lookup/rootfs-1/dev/nvidiactl b/testdata/lookup/rootfs-1/dev/nvidiactl new file mode 100644 index 00000000..e69de29b diff --git a/tools/container/nvidia-toolkit/run.go b/tools/container/nvidia-toolkit/run.go index 265814a5..842d7099 100644 --- a/tools/container/nvidia-toolkit/run.go +++ b/tools/container/nvidia-toolkit/run.go @@ -164,7 +164,7 @@ func Run(c *cli.Context, o *options) error { o.toolkitOptions.ContainerRuntimeRuntimes = *cli.NewStringSlice(lowlevelRuntimePaths...) } - err = toolkit.Install(c, &o.toolkitOptions, o.toolkitRoot()) + err = toolkit.Install(c, &o.toolkitOptions, "", o.toolkitRoot()) if err != nil { return fmt.Errorf("unable to install toolkit: %v", err) } diff --git a/tools/container/toolkit/runtime.go b/tools/container/toolkit/runtime.go index bdfca983..7bdf7f77 100644 --- a/tools/container/toolkit/runtime.go +++ b/tools/container/toolkit/runtime.go @@ -29,10 +29,10 @@ const ( // installContainerRuntimes sets up the NVIDIA container runtimes, copying the executables // and implementing the required wrapper -func installContainerRuntimes(toolkitDir string, driverRoot string) error { +func installContainerRuntimes(sourceRoot string, toolkitDir string) error { runtimes := operator.GetRuntimes() for _, runtime := range runtimes { - r := newNvidiaContainerRuntimeInstaller(runtime.Path) + r := newNvidiaContainerRuntimeInstaller(filepath.Join(sourceRoot, runtime.Path)) _, err := r.install(toolkitDir) if err != nil { diff --git a/tools/container/toolkit/toolkit.go b/tools/container/toolkit/toolkit.go index 43e68ca5..9b97b419 100644 --- a/tools/container/toolkit/toolkit.go +++ b/tools/container/toolkit/toolkit.go @@ -285,8 +285,9 @@ func TryDelete(cli *cli.Context, toolkitRoot string) error { } // Install installs the components of the NVIDIA container toolkit. +// The specified sourceRoot is searched for the components to install. // Any existing installation is removed. -func Install(cli *cli.Context, opts *Options, toolkitRoot string) error { +func Install(cli *cli.Context, opts *Options, sourceRoot string, toolkitRoot string) error { log.Infof("Installing NVIDIA container toolkit to '%v'", toolkitRoot) log.Infof("Removing existing NVIDIA container toolkit installation") @@ -307,42 +308,42 @@ func Install(cli *cli.Context, opts *Options, toolkitRoot string) error { log.Errorf("Ignoring error: %v", fmt.Errorf("could not create required directories: %v", err)) } - err = installContainerLibraries(toolkitRoot) + err = installContainerLibraries(sourceRoot, toolkitRoot) if err != nil && !opts.ignoreErrors { return fmt.Errorf("error installing NVIDIA container library: %v", err) } else if err != nil { log.Errorf("Ignoring error: %v", fmt.Errorf("error installing NVIDIA container library: %v", err)) } - err = installContainerRuntimes(toolkitRoot, opts.DriverRoot) + err = installContainerRuntimes(sourceRoot, toolkitRoot) if err != nil && !opts.ignoreErrors { return fmt.Errorf("error installing NVIDIA container runtime: %v", err) } else if err != nil { log.Errorf("Ignoring error: %v", fmt.Errorf("error installing NVIDIA container runtime: %v", err)) } - nvidiaContainerCliExecutable, err := installContainerCLI(toolkitRoot) + nvidiaContainerCliExecutable, err := installContainerCLI(sourceRoot, toolkitRoot) if err != nil && !opts.ignoreErrors { return fmt.Errorf("error installing NVIDIA container CLI: %v", err) } else if err != nil { log.Errorf("Ignoring error: %v", fmt.Errorf("error installing NVIDIA container CLI: %v", err)) } - nvidiaContainerRuntimeHookPath, err := installRuntimeHook(toolkitRoot, toolkitConfigPath) + nvidiaContainerRuntimeHookPath, err := installRuntimeHook(sourceRoot, toolkitRoot, toolkitConfigPath) if err != nil && !opts.ignoreErrors { return fmt.Errorf("error installing NVIDIA container runtime hook: %v", err) } else if err != nil { log.Errorf("Ignoring error: %v", fmt.Errorf("error installing NVIDIA container runtime hook: %v", err)) } - nvidiaCTKPath, err := installContainerToolkitCLI(toolkitRoot) + nvidiaCTKPath, err := installContainerToolkitCLI(sourceRoot, toolkitRoot) if err != nil && !opts.ignoreErrors { return fmt.Errorf("error installing NVIDIA Container Toolkit CLI: %v", err) } else if err != nil { log.Errorf("Ignoring error: %v", fmt.Errorf("error installing NVIDIA Container Toolkit CLI: %v", err)) } - nvidiaCDIHookPath, err := installContainerCDIHookCLI(toolkitRoot) + nvidiaCDIHookPath, err := installContainerCDIHookCLI(sourceRoot, toolkitRoot) if err != nil && !opts.ignoreErrors { return fmt.Errorf("error installing NVIDIA Container CDI Hook CLI: %v", err) } else if err != nil { @@ -378,7 +379,7 @@ func Install(cli *cli.Context, opts *Options, toolkitRoot string) error { // A predefined set of library candidates are considered, with the first one // resulting in success being installed to the toolkit folder. The install process // resolves the symlink for the library and copies the versioned library itself. -func installContainerLibraries(toolkitRoot string) error { +func installContainerLibraries(sourceRoot string, toolkitRoot string) error { log.Infof("Installing NVIDIA container library to '%v'", toolkitRoot) libs := []string{ @@ -387,7 +388,7 @@ func installContainerLibraries(toolkitRoot string) error { } for _, l := range libs { - err := installLibrary(l, toolkitRoot) + err := installLibrary(l, sourceRoot, toolkitRoot) if err != nil { return fmt.Errorf("failed to install %s: %v", l, err) } @@ -397,8 +398,8 @@ func installContainerLibraries(toolkitRoot string) error { } // installLibrary installs the specified library to the toolkit directory. -func installLibrary(libName string, toolkitRoot string) error { - libraryPath, err := findLibrary("", libName) +func installLibrary(libName string, sourceRoot string, toolkitRoot string) error { + libraryPath, err := findLibrary(sourceRoot, libName) if err != nil { return fmt.Errorf("error locating NVIDIA container library: %v", err) } @@ -524,9 +525,9 @@ func installToolkitConfig(c *cli.Context, toolkitConfigPath string, nvidiaContai } // installContainerToolkitCLI installs the nvidia-ctk CLI executable and wrapper. -func installContainerToolkitCLI(toolkitDir string) (string, error) { +func installContainerToolkitCLI(sourceRoot string, toolkitDir string) (string, error) { e := executable{ - source: "/usr/bin/nvidia-ctk", + source: filepath.Join(sourceRoot, "/usr/bin/nvidia-ctk"), target: executableTarget{ dotfileName: "nvidia-ctk.real", wrapperName: "nvidia-ctk", @@ -537,9 +538,9 @@ func installContainerToolkitCLI(toolkitDir string) (string, error) { } // installContainerCDIHookCLI installs the nvidia-cdi-hook CLI executable and wrapper. -func installContainerCDIHookCLI(toolkitDir string) (string, error) { +func installContainerCDIHookCLI(sourceRoot string, toolkitDir string) (string, error) { e := executable{ - source: "/usr/bin/nvidia-cdi-hook", + source: filepath.Join(sourceRoot, "/usr/bin/nvidia-cdi-hook"), target: executableTarget{ dotfileName: "nvidia-cdi-hook.real", wrapperName: "nvidia-cdi-hook", @@ -551,7 +552,7 @@ func installContainerCDIHookCLI(toolkitDir string) (string, error) { // installContainerCLI sets up the NVIDIA container CLI executable, copying the executable // and implementing the required wrapper -func installContainerCLI(toolkitRoot string) (string, error) { +func installContainerCLI(sourceRoot string, toolkitRoot string) (string, error) { log.Infof("Installing NVIDIA container CLI from '%v'", nvidiaContainerCliSource) env := map[string]string{ @@ -559,7 +560,7 @@ func installContainerCLI(toolkitRoot string) (string, error) { } e := executable{ - source: nvidiaContainerCliSource, + source: filepath.Join(sourceRoot, nvidiaContainerCliSource), target: executableTarget{ dotfileName: "nvidia-container-cli.real", wrapperName: "nvidia-container-cli", @@ -576,7 +577,7 @@ func installContainerCLI(toolkitRoot string) (string, error) { // installRuntimeHook sets up the NVIDIA runtime hook, copying the executable // and implementing the required wrapper -func installRuntimeHook(toolkitRoot string, configFilePath string) (string, error) { +func installRuntimeHook(sourceRoot string, toolkitRoot string, configFilePath string) (string, error) { log.Infof("Installing NVIDIA container runtime hook from '%v'", nvidiaContainerRuntimeHookSource) argLines := []string{ @@ -584,7 +585,7 @@ func installRuntimeHook(toolkitRoot string, configFilePath string) (string, erro } e := executable{ - source: nvidiaContainerRuntimeHookSource, + source: filepath.Join(sourceRoot, nvidiaContainerRuntimeHookSource), target: executableTarget{ dotfileName: "nvidia-container-runtime-hook.real", wrapperName: "nvidia-container-runtime-hook", diff --git a/tools/container/toolkit/toolkit_test.go b/tools/container/toolkit/toolkit_test.go new file mode 100644 index 00000000..bab94c4a --- /dev/null +++ b/tools/container/toolkit/toolkit_test.go @@ -0,0 +1,202 @@ +/** +# Copyright 2024 NVIDIA CORPORATION +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package toolkit + +import ( + "fmt" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/stretchr/testify/require" + "github.com/urfave/cli/v2" + + "github.com/NVIDIA/nvidia-container-toolkit/internal/config" + "github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/symlinks" + "github.com/NVIDIA/nvidia-container-toolkit/internal/test" +) + +func TestInstall(t *testing.T) { + t.Setenv("__NVCT_TESTING_DEVICES_ARE_FILES", "true") + + moduleRoot, err := test.GetModuleRoot() + require.NoError(t, err) + + artifactRoot := filepath.Join(moduleRoot, "testdata", "installer", "artifacts") + + testCases := []struct { + description string + hostRoot string + packageType string + cdiEnabled bool + expectedError error + expectedCdiSpec string + }{ + { + hostRoot: "rootfs-empty", + packageType: "deb", + }, + { + hostRoot: "rootfs-empty", + packageType: "rpm", + }, + { + hostRoot: "rootfs-empty", + packageType: "deb", + cdiEnabled: true, + expectedError: fmt.Errorf("no NVIDIA device nodes found"), + }, + { + hostRoot: "rootfs-1", + packageType: "deb", + cdiEnabled: true, + expectedCdiSpec: `--- +cdiVersion: 0.5.0 +containerEdits: + env: + - NVIDIA_VISIBLE_DEVICES=void + hooks: + - args: + - nvidia-cdi-hook + - create-symlinks + - --link + - libcuda.so.1::/lib/x86_64-linux-gnu/libcuda.so + hookName: createContainer + path: {{ .toolkitRoot }}/nvidia-cdi-hook + - args: + - nvidia-cdi-hook + - update-ldcache + - --folder + - /lib/x86_64-linux-gnu + hookName: createContainer + path: {{ .toolkitRoot }}/nvidia-cdi-hook + mounts: + - containerPath: /lib/x86_64-linux-gnu/libcuda.so.999.88.77 + hostPath: /host/driver/root/lib/x86_64-linux-gnu/libcuda.so.999.88.77 + options: + - ro + - nosuid + - nodev + - bind +devices: +- containerEdits: + deviceNodes: + - hostPath: /host/driver/root/dev/nvidia0 + path: /dev/nvidia0 + - hostPath: /host/driver/root/dev/nvidiactl + path: /dev/nvidiactl + name: all +kind: example.com/class +`, + }, + } + + for _, tc := range testCases { + // hostRoot := filepath.Join(moduleRoot, "testdata", "lookup", tc.hostRoot) + t.Run(tc.description, func(t *testing.T) { + testRoot := t.TempDir() + toolkitRoot := filepath.Join(testRoot, "toolkit-test") + cdiOutputDir := filepath.Join(moduleRoot, "toolkit-test", "/var/cdi") + sourceRoot := filepath.Join(artifactRoot, tc.packageType) + options := Options{ + DriverRoot: "/host/driver/root", + DriverRootCtrPath: filepath.Join(moduleRoot, "testdata", "lookup", tc.hostRoot), + cdiEnabled: tc.cdiEnabled, + cdiOutputDir: cdiOutputDir, + cdiKind: "example.com/class", + } + + require.NoError(t, ValidateOptions(&options, toolkitRoot)) + + err := Install(&cli.Context{}, &options, sourceRoot, toolkitRoot) + if tc.expectedError == nil { + require.NoError(t, err) + } else { + require.Contains(t, err.Error(), tc.expectedError.Error()) + } + + require.DirExists(t, toolkitRoot) + requireSymlink(t, toolkitRoot, "libnvidia-container.so.1", "libnvidia-container.so.99.88.77") + requireSymlink(t, toolkitRoot, "libnvidia-container-go.so.1", "libnvidia-container-go.so.99.88.77") + + requireWrappedExecutable(t, toolkitRoot, "nvidia-cdi-hook") + requireWrappedExecutable(t, toolkitRoot, "nvidia-container-cli") + requireWrappedExecutable(t, toolkitRoot, "nvidia-container-runtime") + requireWrappedExecutable(t, toolkitRoot, "nvidia-container-runtime-hook") + requireWrappedExecutable(t, toolkitRoot, "nvidia-container-runtime.cdi") + requireWrappedExecutable(t, toolkitRoot, "nvidia-container-runtime.legacy") + requireWrappedExecutable(t, toolkitRoot, "nvidia-ctk") + + requireSymlink(t, toolkitRoot, "nvidia-container-toolkit", "nvidia-container-runtime-hook") + + // TODO: Add checks for wrapper contents + // grep -q -E "nvidia driver modules are not yet loaded, invoking runc directly" "${shared_dir}/usr/local/nvidia/toolkit/nvidia-container-runtime" + // grep -q -E "exec runc \".@\"" "${shared_dir}/usr/local/nvidia/toolkit/nvidia-container-runtime" + + require.DirExists(t, filepath.Join(toolkitRoot, ".config")) + require.DirExists(t, filepath.Join(toolkitRoot, ".config", "nvidia-container-runtime")) + require.FileExists(t, filepath.Join(toolkitRoot, ".config", "nvidia-container-runtime", "config.toml")) + + cfgToml, err := config.New(config.WithConfigFile(filepath.Join(toolkitRoot, ".config", "nvidia-container-runtime", "config.toml"))) + require.NoError(t, err) + + cfg, err := cfgToml.Config() + require.NoError(t, err) + + // Ensure that the config file has the required contents. + // TODO: Add checks for additional config options. + require.Equal(t, "/host/driver/root", cfg.NVIDIAContainerCLIConfig.Root) + require.Equal(t, "@/host/driver/root/sbin/ldconfig", cfg.NVIDIAContainerCLIConfig.Ldconfig) + require.EqualValues(t, filepath.Join(toolkitRoot, "nvidia-container-cli"), cfg.NVIDIAContainerCLIConfig.Path) + require.EqualValues(t, filepath.Join(toolkitRoot, "nvidia-ctk"), cfg.NVIDIACTKConfig.Path) + + if len(tc.expectedCdiSpec) > 0 { + cdiSpecFile := filepath.Join(cdiOutputDir, "example.com-class.yaml") + require.FileExists(t, cdiSpecFile) + info, err := os.Stat(cdiSpecFile) + require.NoError(t, err) + require.NotZero(t, info.Mode()&0004) + contents, err := os.ReadFile(cdiSpecFile) + require.NoError(t, err) + require.Equal(t, strings.ReplaceAll(tc.expectedCdiSpec, "{{ .toolkitRoot }}", toolkitRoot), string(contents)) + } + }) + } +} + +func requireWrappedExecutable(t *testing.T, toolkitRoot string, expectedExecutable string) { + requireExecutable(t, toolkitRoot, expectedExecutable) + requireExecutable(t, toolkitRoot, expectedExecutable+".real") +} + +func requireExecutable(t *testing.T, toolkitRoot string, expectedExecutable string) { + executable := filepath.Join(toolkitRoot, expectedExecutable) + require.FileExists(t, executable) + info, err := os.Lstat(executable) + require.NoError(t, err) + require.Zero(t, info.Mode()&os.ModeSymlink) + require.NotZero(t, info.Mode()&0111) +} + +func requireSymlink(t *testing.T, toolkitRoot string, expectedLink string, expectedTarget string) { + link := filepath.Join(toolkitRoot, expectedLink) + require.FileExists(t, link) + target, err := symlinks.Resolve(link) + require.NoError(t, err) + require.Equal(t, expectedTarget, target) +}