mirror of
https://github.com/NVIDIA/nvidia-container-toolkit
synced 2025-03-10 06:10:08 +00:00
This change adds a create-soname-symlinks hook that can be used to ensure that the soname symlinks for injected libraries exist in a container. This is done by calling ldconfig -n -N for the folders containing the injected libraries. This also ensures that libcuda.so is present in the ldcache when the update-ldcache hook is run. Signed-off-by: Evan Lezar <elezar@nvidia.com>
241 lines
9.9 KiB
Go
241 lines
9.9 KiB
Go
/*
|
|
* Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
package e2e
|
|
|
|
import (
|
|
"context"
|
|
"path/filepath"
|
|
"strings"
|
|
|
|
. "github.com/onsi/ginkgo/v2"
|
|
. "github.com/onsi/gomega"
|
|
)
|
|
|
|
// Integration tests for Docker runtime
|
|
var _ = Describe("docker", Ordered, ContinueOnFailure, func() {
|
|
var r Runner
|
|
|
|
// Install the NVIDIA Container Toolkit
|
|
BeforeAll(func(ctx context.Context) {
|
|
r = NewRunner(
|
|
WithHost(host),
|
|
WithPort(sshPort),
|
|
WithSshKey(sshKey),
|
|
WithSshUser(sshUser),
|
|
)
|
|
if installCTK {
|
|
installer, err := NewToolkitInstaller(
|
|
WithRunner(r),
|
|
WithImage(image),
|
|
WithTemplate(dockerInstallTemplate),
|
|
)
|
|
Expect(err).ToNot(HaveOccurred())
|
|
err = installer.Install()
|
|
Expect(err).ToNot(HaveOccurred())
|
|
}
|
|
})
|
|
|
|
// GPUs are accessible in a container: Running nvidia-smi -L inside the
|
|
// container shows the same output inside the container as outside the
|
|
// container. This means that the following commands must all produce
|
|
// the same output
|
|
When("running nvidia-smi -L", Ordered, func() {
|
|
var hostOutput string
|
|
|
|
BeforeAll(func(ctx context.Context) {
|
|
_, _, err := r.Run("docker pull ubuntu")
|
|
Expect(err).ToNot(HaveOccurred())
|
|
|
|
hostOutput, _, err = r.Run("nvidia-smi -L")
|
|
Expect(err).ToNot(HaveOccurred())
|
|
})
|
|
|
|
It("should support NVIDIA_VISIBLE_DEVICES", func(ctx context.Context) {
|
|
containerOutput, _, err := r.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all ubuntu nvidia-smi -L")
|
|
Expect(err).ToNot(HaveOccurred())
|
|
Expect(containerOutput).To(Equal(hostOutput))
|
|
})
|
|
|
|
It("should support automatic CDI spec generation", func(ctx context.Context) {
|
|
containerOutput, _, err := r.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all ubuntu nvidia-smi -L")
|
|
Expect(err).ToNot(HaveOccurred())
|
|
Expect(containerOutput).To(Equal(hostOutput))
|
|
})
|
|
|
|
It("should support automatic CDI spec generation with the --gpus flag", func(ctx context.Context) {
|
|
containerOutput, _, err := r.Run("docker run --rm -i --gpus=all --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all ubuntu nvidia-smi -L")
|
|
Expect(err).ToNot(HaveOccurred())
|
|
Expect(containerOutput).To(Equal(hostOutput))
|
|
})
|
|
|
|
It("should support the --gpus flag using the nvidia-container-runtime", func(ctx context.Context) {
|
|
containerOutput, _, err := r.Run("docker run --rm -i --runtime=nvidia --gpus all ubuntu nvidia-smi -L")
|
|
Expect(err).ToNot(HaveOccurred())
|
|
Expect(containerOutput).To(Equal(hostOutput))
|
|
})
|
|
|
|
It("should support the --gpus flag using the nvidia-container-runtime-hook", func(ctx context.Context) {
|
|
containerOutput, _, err := r.Run("docker run --rm -i --gpus all ubuntu nvidia-smi -L")
|
|
Expect(err).ToNot(HaveOccurred())
|
|
Expect(containerOutput).To(Equal(hostOutput))
|
|
})
|
|
})
|
|
|
|
// A vectorAdd sample runs in a container with access to all GPUs.
|
|
// The following should all produce the same result.
|
|
When("Running the cuda-vectorAdd sample", Ordered, func() {
|
|
BeforeAll(func(ctx context.Context) {
|
|
_, _, err := r.Run("docker pull nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
|
|
Expect(err).ToNot(HaveOccurred())
|
|
})
|
|
|
|
var referenceOutput string
|
|
|
|
It("should support NVIDIA_VISIBLE_DEVICES", func(ctx context.Context) {
|
|
var err error
|
|
referenceOutput, _, err = r.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
|
|
Expect(err).ToNot(HaveOccurred())
|
|
|
|
Expect(referenceOutput).To(ContainSubstring("Test PASSED"))
|
|
})
|
|
|
|
It("should support automatic CDI spec generation", func(ctx context.Context) {
|
|
out2, _, err := r.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
|
|
Expect(err).ToNot(HaveOccurred())
|
|
Expect(referenceOutput).To(Equal(out2))
|
|
})
|
|
|
|
It("should support the --gpus flag using the nvidia-container-runtime", func(ctx context.Context) {
|
|
out3, _, err := r.Run("docker run --rm -i --runtime=nvidia --gpus all nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
|
|
Expect(err).ToNot(HaveOccurred())
|
|
Expect(referenceOutput).To(Equal(out3))
|
|
})
|
|
|
|
It("should support the --gpus flag using the nvidia-container-runtime-hook", func(ctx context.Context) {
|
|
out4, _, err := r.Run("docker run --rm -i --gpus all nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
|
|
Expect(err).ToNot(HaveOccurred())
|
|
Expect(referenceOutput).To(Equal(out4))
|
|
})
|
|
})
|
|
|
|
// A deviceQuery sample runs in a container with access to all GPUs
|
|
// The following should all produce the same result.
|
|
When("Running the cuda-deviceQuery sample", Ordered, func() {
|
|
BeforeAll(func(ctx context.Context) {
|
|
_, _, err := r.Run("docker pull nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
|
|
Expect(err).ToNot(HaveOccurred())
|
|
})
|
|
|
|
var referenceOutput string
|
|
|
|
It("should support NVIDIA_VISIBLE_DEVICES", func(ctx context.Context) {
|
|
var err error
|
|
referenceOutput, _, err = r.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
|
|
Expect(err).ToNot(HaveOccurred())
|
|
|
|
Expect(referenceOutput).To(ContainSubstring("Result = PASS"))
|
|
})
|
|
|
|
It("should support automatic CDI spec generation", func(ctx context.Context) {
|
|
out2, _, err := r.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
|
|
Expect(err).ToNot(HaveOccurred())
|
|
Expect(referenceOutput).To(Equal(out2))
|
|
})
|
|
|
|
It("should support the --gpus flag using the nvidia-container-runtime", func(ctx context.Context) {
|
|
out3, _, err := r.Run("docker run --rm -i --runtime=nvidia --gpus all nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
|
|
Expect(err).ToNot(HaveOccurred())
|
|
Expect(referenceOutput).To(Equal(out3))
|
|
})
|
|
|
|
It("should support the --gpus flag using the nvidia-container-runtime-hook", func(ctx context.Context) {
|
|
out4, _, err := r.Run("docker run --rm -i --gpus all nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
|
|
Expect(err).ToNot(HaveOccurred())
|
|
Expect(referenceOutput).To(Equal(out4))
|
|
})
|
|
})
|
|
|
|
Describe("CUDA Forward compatibility", Ordered, func() {
|
|
BeforeAll(func(ctx context.Context) {
|
|
_, _, err := r.Run("docker pull nvcr.io/nvidia/cuda:12.8.0-base-ubi8")
|
|
Expect(err).ToNot(HaveOccurred())
|
|
})
|
|
|
|
BeforeAll(func(ctx context.Context) {
|
|
compatOutput, _, err := r.Run("docker run --rm -i -e NVIDIA_VISIBLE_DEVICES=void nvcr.io/nvidia/cuda:12.8.0-base-ubi8 bash -c \"ls /usr/local/cuda/compat/libcuda.*.*\"")
|
|
Expect(err).ToNot(HaveOccurred())
|
|
Expect(compatOutput).ToNot(BeEmpty())
|
|
compatDriverVersion := strings.TrimPrefix(filepath.Base(compatOutput), "libcuda.so.")
|
|
compatMajor := strings.SplitN(compatDriverVersion, ".", 2)[0]
|
|
|
|
driverOutput, _, err := r.Run("nvidia-smi -q | grep \"Driver Version\"")
|
|
Expect(err).ToNot(HaveOccurred())
|
|
parts := strings.SplitN(driverOutput, ":", 2)
|
|
Expect(parts).To(HaveLen(2))
|
|
|
|
hostDriverVersion := strings.TrimSpace(parts[1])
|
|
Expect(hostDriverVersion).ToNot(BeEmpty())
|
|
driverMajor := strings.SplitN(hostDriverVersion, ".", 2)[0]
|
|
|
|
if driverMajor >= compatMajor {
|
|
GinkgoLogr.Info("CUDA Forward Compatibility tests require an older driver version", "hostDriverVersion", hostDriverVersion, "compatDriverVersion", compatDriverVersion)
|
|
Skip("CUDA Forward Compatibility tests require an older driver version")
|
|
}
|
|
})
|
|
|
|
It("should work with the nvidia runtime in legacy mode", func(ctx context.Context) {
|
|
ldconfigOut, _, err := r.Run("docker run --rm -i -e NVIDIA_DISABLE_REQUIRE=true --runtime=nvidia --gpus all nvcr.io/nvidia/cuda:12.8.0-base-ubi8 bash -c \"ldconfig -p | grep libcuda.so.1\"")
|
|
Expect(err).ToNot(HaveOccurred())
|
|
Expect(ldconfigOut).To(ContainSubstring("/usr/local/cuda/compat"))
|
|
})
|
|
|
|
It("should work with the nvidia runtime in CDI mode", func(ctx context.Context) {
|
|
ldconfigOut, _, err := r.Run("docker run --rm -i -e NVIDIA_DISABLE_REQUIRE=true --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all nvcr.io/nvidia/cuda:12.8.0-base-ubi8 bash -c \"ldconfig -p | grep libcuda.so.1\"")
|
|
Expect(err).ToNot(HaveOccurred())
|
|
Expect(ldconfigOut).To(ContainSubstring("/usr/local/cuda/compat"))
|
|
})
|
|
|
|
It("should NOT work with nvidia-container-runtime-hook", func(ctx context.Context) {
|
|
ldconfigOut, _, err := r.Run("docker run --rm -i -e NVIDIA_DISABLE_REQUIRE=true --runtime=runc --gpus all nvcr.io/nvidia/cuda:12.8.0-base-ubi8 bash -c \"ldconfig -p | grep libcuda.so.1\"")
|
|
Expect(err).ToNot(HaveOccurred())
|
|
Expect(ldconfigOut).To(ContainSubstring("/usr/lib64"))
|
|
})
|
|
})
|
|
|
|
When("A container is run using CDI", Ordered, func() {
|
|
BeforeAll(func(ctx context.Context) {
|
|
_, _, err := r.Run("docker pull ubuntu")
|
|
Expect(err).ToNot(HaveOccurred())
|
|
})
|
|
|
|
It("should include libcuda.so in the ldcache", func(ctx context.Context) {
|
|
ldcacheOutput, _, err := r.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all ubuntu bash -c \"ldconfig -p | grep 'libcuda.so'\"")
|
|
Expect(err).ToNot(HaveOccurred())
|
|
Expect(ldcacheOutput).ToNot(BeEmpty())
|
|
|
|
ldcacheLines := strings.Split(ldcacheOutput, "\n")
|
|
var libs []string
|
|
for _, line := range ldcacheLines {
|
|
parts := strings.SplitN(line, " (", 2)
|
|
libs = append(libs, strings.TrimSpace(parts[0]))
|
|
}
|
|
|
|
Expect(libs).To(ContainElements([]string{"libcuda.so", "libcuda.so.1"}))
|
|
})
|
|
})
|
|
})
|