mirror of
https://github.com/NVIDIA/nvidia-container-toolkit
synced 2025-06-14 02:20:52 +00:00
[no-relnote] Add E2E for libnvidia-container
Some checks failed
Some checks failed
Signed-off-by: Carlos Eduardo Arango Gutierrez <eduardoa@nvidia.com>
This commit is contained in:
parent
890db82b46
commit
9674787e7e
@ -20,8 +20,14 @@ LOG_ARTIFACTS_DIR ?= $(CURDIR)/e2e_logs
|
|||||||
|
|
||||||
GINKGO_BIN := $(CURDIR)/bin/ginkgo
|
GINKGO_BIN := $(CURDIR)/bin/ginkgo
|
||||||
|
|
||||||
|
# If GINKGO_FOCUS is not set, run all tests
|
||||||
|
# current available tests:
|
||||||
|
# - nvidia-container-cli
|
||||||
|
# - docker
|
||||||
|
GINKGO_FOCUS ?=
|
||||||
|
|
||||||
test: $(GINKGO_BIN)
|
test: $(GINKGO_BIN)
|
||||||
$(GINKGO_BIN) $(GINKGO_ARGS) -v --json-report ginkgo.json ./tests/e2e/...
|
$(GINKGO_BIN) $(GINKGO_ARGS) -v --json-report ginkgo.json --focus="$(GINKGO_FOCUS)" ./tests/e2e/...
|
||||||
|
|
||||||
$(GINKGO_BIN):
|
$(GINKGO_BIN):
|
||||||
mkdir -p $(CURDIR)/bin
|
mkdir -p $(CURDIR)/bin
|
||||||
|
@ -28,11 +28,20 @@ var dockerInstallTemplate = `
|
|||||||
#! /usr/bin/env bash
|
#! /usr/bin/env bash
|
||||||
set -xe
|
set -xe
|
||||||
|
|
||||||
: ${IMAGE:={{.Image}}}
|
# if the TEMP_DIR is already set, use it
|
||||||
|
if [ -f /tmp/ctk_e2e_temp_dir.txt ]; then
|
||||||
|
TEMP_DIR=$(cat /tmp/ctk_e2e_temp_dir.txt)
|
||||||
|
else
|
||||||
|
TEMP_DIR="/tmp/ctk_e2e.$(date +%s)_$RANDOM"
|
||||||
|
echo "$TEMP_DIR" > /tmp/ctk_e2e_temp_dir.txt
|
||||||
|
fi
|
||||||
|
|
||||||
# Create a temporary directory
|
# if TEMP_DIR does not exist, create it
|
||||||
TEMP_DIR="/tmp/ctk_e2e.$(date +%s)_$RANDOM"
|
if [ ! -d "$TEMP_DIR" ]; then
|
||||||
mkdir -p "$TEMP_DIR"
|
mkdir -p "$TEMP_DIR"
|
||||||
|
fi
|
||||||
|
|
||||||
|
: ${IMAGE:={{.Image}}}
|
||||||
|
|
||||||
# Given that docker has an init function that checks for the existence of the
|
# Given that docker has an init function that checks for the existence of the
|
||||||
# nvidia-container-toolkit, we need to create a symlink to the nvidia-container-runtime-hook
|
# nvidia-container-toolkit, we need to create a symlink to the nvidia-container-runtime-hook
|
||||||
|
208
tests/e2e/nvidia-container-cli_test.go
Normal file
208
tests/e2e/nvidia-container-cli_test.go
Normal file
@ -0,0 +1,208 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package e2e
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"strings"
|
||||||
|
"text/template"
|
||||||
|
|
||||||
|
. "github.com/onsi/ginkgo/v2"
|
||||||
|
. "github.com/onsi/gomega"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
dockerDindTemplate = `docker run -d --rm --privileged \
|
||||||
|
-v {{.SharedDir}}/etc/docker:/etc/docker \
|
||||||
|
-v {{.SharedDir}}/run/nvidia:/run/nvidia \
|
||||||
|
-v {{.SharedDir}}/usr/local/nvidia:/usr/local/nvidia \
|
||||||
|
--name {{.ContainerName}} \
|
||||||
|
docker:dind -H unix://{{.DockerSocket}}`
|
||||||
|
|
||||||
|
dockerToolkitTemplate = `docker run -d --rm --privileged \
|
||||||
|
--volumes-from {{.DindContainerName}} \
|
||||||
|
--pid "container:{{.DindContainerName}}" \
|
||||||
|
-e RUNTIME_ARGS="--socket {{.DockerSocket}}" \
|
||||||
|
-v {{.TestScriptPath}}:/usr/local/bin/libnvidia-container-cli.sh \
|
||||||
|
--name {{.ContainerName}} \
|
||||||
|
{{.ToolkitImage}} /usr/local/bin/libnvidia-container-cli.sh`
|
||||||
|
|
||||||
|
dockerDefaultConfigTemplate = `
|
||||||
|
{
|
||||||
|
"registry-mirrors": ["https://mirror.gcr.io"]
|
||||||
|
}`
|
||||||
|
|
||||||
|
libnvidiaContainerCliTestTemplate = `#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
apt-get update -y && apt-get install -y curl gnupg2
|
||||||
|
|
||||||
|
WORKDIR="$(mktemp -d)"
|
||||||
|
ROOTFS="${WORKDIR}/rootfs"
|
||||||
|
mkdir -p "${ROOTFS}"
|
||||||
|
|
||||||
|
export WORKDIR ROOTFS # make them visible in the child shell
|
||||||
|
|
||||||
|
unshare --mount --pid --fork --propagation private -- bash -eux <<'IN_NS'
|
||||||
|
: "${ROOTFS:?}" "${WORKDIR:?}" # abort if either is empty
|
||||||
|
|
||||||
|
# 1 Populate minimal Ubuntu base
|
||||||
|
curl -L http://cdimage.ubuntu.com/ubuntu-base/releases/22.04/release/ubuntu-base-22.04-base-amd64.tar.gz \
|
||||||
|
| tar -C "$ROOTFS" -xz
|
||||||
|
|
||||||
|
# 2 Add non-root user
|
||||||
|
useradd -R "$ROOTFS" -U -u 1000 -s /bin/bash nvidia
|
||||||
|
|
||||||
|
# 3 Bind-mount new root and unshare mounts
|
||||||
|
mount --bind "$ROOTFS" "$ROOTFS"
|
||||||
|
mount --make-private "$ROOTFS"
|
||||||
|
cd "$ROOTFS"
|
||||||
|
|
||||||
|
# 4 Minimal virtual filesystems
|
||||||
|
mount -t proc proc proc
|
||||||
|
mount -t sysfs sys sys
|
||||||
|
mount -t tmpfs tmp tmp
|
||||||
|
mount -t tmpfs run run
|
||||||
|
|
||||||
|
# 5 GPU setup
|
||||||
|
nvidia-container-cli --load-kmods --debug=container-cli.log \
|
||||||
|
configure --ldconfig=@/sbin/ldconfig.real \
|
||||||
|
--no-cgroups --utility --device=0 "$(pwd)"
|
||||||
|
|
||||||
|
# 6 Switch root
|
||||||
|
mkdir -p mnt
|
||||||
|
pivot_root . mnt
|
||||||
|
umount -l /mnt
|
||||||
|
|
||||||
|
exec nvidia-smi -L
|
||||||
|
IN_NS
|
||||||
|
`
|
||||||
|
)
|
||||||
|
|
||||||
|
// Integration tests for Docker runtime
|
||||||
|
var _ = Describe("nvidia-container-cli", Ordered, ContinueOnFailure, func() {
|
||||||
|
var runner Runner
|
||||||
|
var sharedDir string
|
||||||
|
var dindContainerName string
|
||||||
|
var toolkitContainerName string
|
||||||
|
var dockerSocket string
|
||||||
|
var hostOutput string
|
||||||
|
|
||||||
|
// Install the NVIDIA Container Toolkit
|
||||||
|
BeforeAll(func(ctx context.Context) {
|
||||||
|
runner = NewRunner(
|
||||||
|
WithHost(sshHost),
|
||||||
|
WithPort(sshPort),
|
||||||
|
WithSshKey(sshKey),
|
||||||
|
WithSshUser(sshUser),
|
||||||
|
)
|
||||||
|
|
||||||
|
// Setup shared directory and container names
|
||||||
|
sharedDir = "/tmp/nvidia-container-toolkit-test"
|
||||||
|
dindContainerName = "nvidia-container-toolkit-dind"
|
||||||
|
toolkitContainerName = "nvidia-container-toolkit-test"
|
||||||
|
dockerSocket = "/run/nvidia/docker.sock"
|
||||||
|
|
||||||
|
// Get host nvidia-smi output
|
||||||
|
var err error
|
||||||
|
hostOutput, _, err = runner.Run("nvidia-smi -L")
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
|
||||||
|
// Pull ubuntu image
|
||||||
|
_, _, err = runner.Run("docker pull ubuntu")
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
|
||||||
|
// Create shared directory structure
|
||||||
|
_, _, err = runner.Run(fmt.Sprintf("mkdir -p %s/{etc/docker,run/nvidia,usr/local/nvidia}", sharedDir))
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
|
||||||
|
// Copy docker default config
|
||||||
|
createDockerConfigCmd := fmt.Sprintf("cat > %s/etc/docker/daemon.json <<'EOF'\n%s\nEOF",
|
||||||
|
sharedDir, dockerDefaultConfigTemplate)
|
||||||
|
_, _, err = runner.Run(createDockerConfigCmd)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
|
||||||
|
// Start Docker-in-Docker container
|
||||||
|
tmpl, err := template.New("dockerDind").Parse(dockerDindTemplate)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
|
||||||
|
var dindCmdBuilder strings.Builder
|
||||||
|
err = tmpl.Execute(&dindCmdBuilder, map[string]string{
|
||||||
|
"SharedDir": sharedDir,
|
||||||
|
"ContainerName": dindContainerName,
|
||||||
|
"DockerSocket": dockerSocket,
|
||||||
|
})
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
|
||||||
|
_, _, err = runner.Run(dindCmdBuilder.String())
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
})
|
||||||
|
|
||||||
|
AfterAll(func(ctx context.Context) {
|
||||||
|
// Cleanup containers
|
||||||
|
runner.Run(fmt.Sprintf("docker rm -f %s", toolkitContainerName))
|
||||||
|
runner.Run(fmt.Sprintf("docker rm -f %s", dindContainerName))
|
||||||
|
|
||||||
|
// Cleanup shared directory
|
||||||
|
_, _, err := runner.Run(fmt.Sprintf("rm -rf %s", sharedDir))
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
})
|
||||||
|
|
||||||
|
When("running nvidia-smi -L", Ordered, func() {
|
||||||
|
It("should support NVIDIA_VISIBLE_DEVICES and NVIDIA_DRIVER_CAPABILITIES", func(ctx context.Context) {
|
||||||
|
// 1. Create the test script
|
||||||
|
testScriptPath := fmt.Sprintf("%s/libnvidia-container-cli.sh", sharedDir)
|
||||||
|
createScriptCmd := fmt.Sprintf("cat > %s <<'EOF'\n%s\nEOF\nchmod +x %s",
|
||||||
|
testScriptPath, libnvidiaContainerCliTestTemplate, testScriptPath)
|
||||||
|
_, _, err := runner.Run(createScriptCmd)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
|
||||||
|
// 2. Start the toolkit container
|
||||||
|
tmpl, err := template.New("dockerToolkit").Parse(dockerToolkitTemplate)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
|
||||||
|
var toolkitCmdBuilder strings.Builder
|
||||||
|
err = tmpl.Execute(&toolkitCmdBuilder, map[string]string{
|
||||||
|
"DindContainerName": dindContainerName,
|
||||||
|
"ContainerName": toolkitContainerName,
|
||||||
|
"DockerSocket": dockerSocket,
|
||||||
|
"TestScriptPath": testScriptPath,
|
||||||
|
"ToolkitImage": imageName + ":" + imageTag,
|
||||||
|
})
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
|
||||||
|
_, _, err = runner.Run(toolkitCmdBuilder.String())
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
|
||||||
|
// 3. Wait for and verify the output
|
||||||
|
expected := strings.TrimSpace(strings.ReplaceAll(hostOutput, "\r", ""))
|
||||||
|
Eventually(func() string {
|
||||||
|
logs, _, err := runner.Run(fmt.Sprintf("docker logs %s | tail -n 20", toolkitContainerName))
|
||||||
|
if err != nil {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
logLines := strings.Split(strings.TrimSpace(logs), "\n")
|
||||||
|
if len(logLines) == 0 {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
return strings.TrimSpace(strings.ReplaceAll(logLines[len(logLines)-1], "\r", ""))
|
||||||
|
}, "5m", "5s").Should(Equal(expected))
|
||||||
|
})
|
||||||
|
})
|
||||||
|
})
|
Loading…
Reference in New Issue
Block a user