/* * Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package e2e import ( "context" "fmt" "strings" "text/template" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" ) const ( libnvidiaContainerCliDockerRunTemplate = ` docker run -d --name test-nvidia-container-cli \ --privileged \ --runtime=nvidia \ -e NVIDIA_VISIBLE_DEVICES=all \ -e NVIDIA_DRIVER_CAPABILITIES=all \ -v $HOME/libnvidia-container-cli.sh:/usr/local/bin/libnvidia-container-cli.sh \ -v {{.NvidiaContainerCliSrc}}:/usr/bin/nvidia-container-cli \ -v {{.NvidiaContainerCliRealSrc}}:{{.NvidiaContainerCliRealTarget}} \ -v {{.NvidiaCtkSrc}}:/usr/bin/nvidia-ctk \ -v {{.NvidiaCtkRealSrc}}:{{.NvidiaCtkRealTarget}} \ -v {{.NvidiaContainerRuntimeSrc}}:/usr/bin/nvidia-container-runtime \ -v {{.NvidiaContainerRuntimeRealSrc}}:{{.NvidiaContainerRuntimeRealTarget}} \ -v {{.NvidiaContainerRuntimeHookSrc}}:/usr/bin/nvidia-container-runtime-hook \ -v {{.NvidiaContainerRuntimeHookRealSrc}}:{{.NvidiaContainerRuntimeHookRealTarget}} \ -v {{.NvidiaContainerToolkitSrc}}:/usr/bin/nvidia-container-toolkit \ -v {{.NvidiaCdiHookSrc}}:/usr/bin/nvidia-cdi-hook \ -v {{.NvidiaCdiHookRealSrc}}:{{.NvidiaCdiHookRealTarget}} \ -v {{.NvidiaContainerRuntimeCdiSrc}}:/usr/bin/nvidia-container-runtime.cdi \ -v {{.NvidiaContainerRuntimeCdiRealSrc}}:{{.NvidiaContainerRuntimeCdiRealTarget}} \ -v {{.NvidiaContainerRuntimeLegacySrc}}:/usr/bin/nvidia-container-runtime.legacy \ -v {{.NvidiaContainerRuntimeLegacyRealSrc}}:{{.NvidiaContainerRuntimeLegacyRealTarget}} \ -v {{.ToolkitDir}}/toolkit:/usr/local/nvidia/toolkit \ -v /etc/nvidia-container-runtime:/etc/nvidia-container-runtime \ -v {{.LibNvidiaContainerSo1Src}}:/usr/lib/x86_64-linux-gnu/libnvidia-container.so.1 \ -v {{.LibNvidiaContainerTargetSrc}}:/usr/lib/x86_64-linux-gnu/{{.LibNvidiaContainerTarget}} \ -v {{.LibNvidiaContainerGoSo1Src}}:/usr/lib/x86_64-linux-gnu/libnvidia-container-go.so.1 \ -v {{.LibNvidiaContainerGoTargetSrc}}:/usr/lib/x86_64-linux-gnu/{{.LibNvidiaContainerGoTarget}} \ -e LD_LIBRARY_PATH=/usr/lib64:/usr/lib/x86_64-linux-gnu:/usr/lib/aarch64-linux-gnu:/lib64:/lib/x86_64-linux-gnu:/lib/aarch64-linux-gnu \ --entrypoint /usr/local/bin/libnvidia-container-cli.sh \ ubuntu ` libnvidiaContainerCliTestTemplate = `#!/usr/bin/env bash set -euo pipefail apt-get update -y && apt-get install -y curl gnupg2 WORKDIR="$(mktemp -d)" ROOTFS="${WORKDIR}/rootfs" mkdir -p "${ROOTFS}" export WORKDIR ROOTFS # make them visible in the child shell unshare --mount --pid --fork --propagation private -- bash -eux <<'IN_NS' : "${ROOTFS:?}" "${WORKDIR:?}" # abort if either is empty # 1 Populate minimal Ubuntu base curl -L http://cdimage.ubuntu.com/ubuntu-base/releases/22.04/release/ubuntu-base-22.04-base-amd64.tar.gz \ | tar -C "$ROOTFS" -xz # 2 Add non-root user useradd -R "$ROOTFS" -U -u 1000 -s /bin/bash nvidia # 3 Bind-mount new root and unshare mounts mount --bind "$ROOTFS" "$ROOTFS" mount --make-private "$ROOTFS" cd "$ROOTFS" # 4 Minimal virtual filesystems mount -t proc proc proc mount -t sysfs sys sys mount -t tmpfs tmp tmp mount -t tmpfs run run # 5 GPU setup nvidia-container-cli --load-kmods --debug=container-cli.log \ configure --ldconfig=@/sbin/ldconfig.real \ --no-cgroups --utility --device=0 "$(pwd)" # 6 Switch root mkdir -p mnt pivot_root . mnt umount -l /mnt exec nvidia-smi -L IN_NS ` ) // getToolkitDir tries to read the toolkit dir from /tmp/ctk_e2e_temp_dir.txt using the runner. Returns empty string if not found. func getToolkitDir(runner Runner) string { out, _, err := runner.Run("cat /tmp/ctk_e2e_temp_dir.txt") if err == nil { dir := strings.TrimSpace(out) if dir != "" { return dir } } return "" } // getToolkitLayout returns the toolkit dir, a flag for flat layout, and a function to get the source path for a given filename. func getToolkitLayout(runner Runner) (toolkitDir string, useFlatLayout bool, srcPath func(string) string) { tempDir := getToolkitDir(runner) if tempDir == "" { return "", false, func(filename string) string { if strings.HasPrefix(filename, "lib") { return "/usr/lib/x86_64-linux-gnu/" + filename } return "/usr/bin/" + filename } } return tempDir, true, func(filename string) string { return tempDir + "/toolkit/" + filename } } // getRealTargetPath returns the correct target path for a .real binary depending on the install type. func getRealTargetPath(filename, toolkitDir string) string { if toolkitDir == "" { return "/usr/bin/" + filename } return toolkitDir + "/toolkit/" + filename } // Integration tests for Docker runtime var _ = Describe("nvidia-container-cli", Ordered, ContinueOnFailure, func() { var runner Runner // Install the NVIDIA Container Toolkit BeforeAll(func(ctx context.Context) { runner = NewRunner( WithHost(sshHost), WithPort(sshPort), WithSshKey(sshKey), WithSshUser(sshUser), ) if installCTK { installer, err := NewToolkitInstaller( WithRunner(runner), WithImage(imageName+":"+imageTag), WithTemplate(dockerInstallTemplate), ) Expect(err).ToNot(HaveOccurred()) err = installer.Install() Expect(err).ToNot(HaveOccurred()) } }) When("running nvidia-smi -L", Ordered, func() { var hostOutput string var err error BeforeAll(func(ctx context.Context) { hostOutput, _, err = runner.Run("nvidia-smi -L") Expect(err).ToNot(HaveOccurred()) _, _, err := runner.Run("docker pull ubuntu") Expect(err).ToNot(HaveOccurred()) }) AfterAll(func(ctx context.Context) { _, _, err := runner.Run("docker rm -f test-nvidia-container-cli") Expect(err).ToNot(HaveOccurred()) }) It("should support NVIDIA_VISIBLE_DEVICES and NVIDIA_DRIVER_CAPABILITIES", func(ctx context.Context) { // 1. Create the test script on the remote host at $HOME/test.sh using a here-document testScriptPath := "$HOME/libnvidia-container-cli.sh" testScript := libnvidiaContainerCliTestTemplate createScriptCmd := fmt.Sprintf("cat > %s <<'EOF'\n%s\nEOF\nchmod +x %s", testScriptPath, testScript, testScriptPath) _, _, err := runner.Run(createScriptCmd) Expect(err).ToNot(HaveOccurred()) // 2. Discover the symlink targets for the libraries on the remote host getTargetCmd := func(lib string) string { return fmt.Sprintf("readlink -f /usr/lib/x86_64-linux-gnu/%s.1", lib) } libNvidiaContainerTarget, _, err := runner.Run(getTargetCmd("libnvidia-container.so")) Expect(err).ToNot(HaveOccurred()) libNvidiaContainerTarget = strings.TrimSpace(libNvidiaContainerTarget) libNvidiaContainerTarget = strings.TrimPrefix(libNvidiaContainerTarget, "/usr/lib/x86_64-linux-gnu/") libNvidiaContainerGoTarget, _, err := runner.Run(getTargetCmd("libnvidia-container-go.so")) Expect(err).ToNot(HaveOccurred()) libNvidiaContainerGoTarget = strings.TrimSpace(libNvidiaContainerGoTarget) libNvidiaContainerGoTarget = strings.TrimPrefix(libNvidiaContainerGoTarget, "/usr/lib/x86_64-linux-gnu/") // 3. Get toolkit layout info and source path helper toolkitDir, _, srcPath := getToolkitLayout(runner) // 4. Render the docker run template with the discovered targets and computed source paths tmpl, err := template.New("dockerRun").Parse(libnvidiaContainerCliDockerRunTemplate) Expect(err).ToNot(HaveOccurred()) var dockerRunCmdBuilder strings.Builder err = tmpl.Execute(&dockerRunCmdBuilder, map[string]string{ "ToolkitDir": toolkitDir, "NvidiaContainerCliSrc": srcPath("nvidia-container-cli"), "NvidiaContainerCliRealSrc": srcPath("nvidia-container-cli.real"), "NvidiaContainerCliRealTarget": getRealTargetPath("nvidia-container-cli.real", toolkitDir), "NvidiaCtkSrc": srcPath("nvidia-ctk"), "NvidiaCtkRealSrc": srcPath("nvidia-ctk.real"), "NvidiaCtkRealTarget": getRealTargetPath("nvidia-ctk.real", toolkitDir), "NvidiaContainerRuntimeSrc": srcPath("nvidia-container-runtime"), "NvidiaContainerRuntimeRealSrc": srcPath("nvidia-container-runtime.real"), "NvidiaContainerRuntimeRealTarget": getRealTargetPath("nvidia-container-runtime.real", toolkitDir), "NvidiaContainerRuntimeHookSrc": srcPath("nvidia-container-runtime-hook"), "NvidiaContainerRuntimeHookRealSrc": srcPath("nvidia-container-runtime-hook.real"), "NvidiaContainerRuntimeHookRealTarget": getRealTargetPath("nvidia-container-runtime-hook.real", toolkitDir), "NvidiaContainerToolkitSrc": srcPath("nvidia-container-toolkit"), "NvidiaCdiHookSrc": srcPath("nvidia-cdi-hook"), "NvidiaCdiHookRealSrc": srcPath("nvidia-cdi-hook.real"), "NvidiaCdiHookRealTarget": getRealTargetPath("nvidia-cdi-hook.real", toolkitDir), "NvidiaContainerRuntimeCdiSrc": srcPath("nvidia-container-runtime.cdi"), "NvidiaContainerRuntimeCdiRealSrc": srcPath("nvidia-container-runtime.cdi.real"), "NvidiaContainerRuntimeCdiRealTarget": getRealTargetPath("nvidia-container-runtime.cdi.real", toolkitDir), "NvidiaContainerRuntimeLegacySrc": srcPath("nvidia-container-runtime.legacy"), "NvidiaContainerRuntimeLegacyRealSrc": srcPath("nvidia-container-runtime.legacy.real"), "NvidiaContainerRuntimeLegacyRealTarget": getRealTargetPath("nvidia-container-runtime.legacy.real", toolkitDir), "LibNvidiaContainerSo1Src": srcPath("libnvidia-container.so.1"), "LibNvidiaContainerTargetSrc": srcPath(libNvidiaContainerTarget), "LibNvidiaContainerGoSo1Src": srcPath("libnvidia-container-go.so.1"), "LibNvidiaContainerGoTargetSrc": srcPath(libNvidiaContainerGoTarget), "LibNvidiaContainerTarget": libNvidiaContainerTarget, "LibNvidiaContainerGoTarget": libNvidiaContainerGoTarget, }) Expect(err).ToNot(HaveOccurred()) dockerRunCmd := dockerRunCmdBuilder.String() // 5. Start the container using the rendered docker run command _, _, err = runner.Run(dockerRunCmd) Expect(err).ToNot(HaveOccurred()) // 6. Use Eventually to check the container logs contain hostOutput expected := strings.TrimSpace(strings.ReplaceAll(hostOutput, "\r", "")) Eventually(func() string { logs, _, err := runner.Run("docker logs test-nvidia-container-cli | tail -n 20") if err != nil { return "" } logLines := strings.Split(strings.TrimSpace(logs), "\n") if len(logLines) == 0 { return "" } lastLine := strings.TrimSpace(strings.ReplaceAll(logLines[len(logLines)-1], "\r", "")) return lastLine }, "5m", "5s").Should(Equal(expected)) }) }) })