Merge pull request #1048 from ArangoGutierrez/updated_e2e
Some checks failed
CI Pipeline / code-scanning (push) Has been cancelled
CI Pipeline / variables (push) Has been cancelled
CI Pipeline / golang (push) Has been cancelled
CI Pipeline / image (push) Has been cancelled
CI Pipeline / e2e-test (push) Has been cancelled

[no-relnote] Update E2E test suite
This commit is contained in:
Carlos Eduardo Arango Gutierrez 2025-05-14 12:27:01 +02:00 committed by GitHub
commit 241881f12f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 148 additions and 102 deletions

View File

@ -70,20 +70,25 @@ jobs:
- name: Run e2e tests - name: Run e2e tests
env: env:
IMAGE_NAME: ghcr.io/nvidia/container-toolkit E2E_INSTALL_CTK: "true"
VERSION: ${{ inputs.version }} E2E_IMAGE_NAME: ghcr.io/nvidia/container-toolkit
SSH_KEY: ${{ secrets.AWS_SSH_KEY }} E2E_IMAGE_TAG: ${{ inputs.version }}-ubuntu20.04
E2E_SSH_USER: ${{ secrets.E2E_SSH_USER }} E2E_SSH_USER: ${{ secrets.E2E_SSH_USER }}
E2E_SSH_HOST: ${{ steps.holodeck_public_dns_name.outputs.result }} E2E_SSH_HOST: ${{ steps.holodeck_public_dns_name.outputs.result }}
E2E_INSTALL_CTK: "true"
run: | run: |
e2e_ssh_key=$(mktemp) e2e_ssh_key=$(mktemp)
echo "$SSH_KEY" > "$e2e_ssh_key" echo "${{ secrets.AWS_SSH_KEY }}" > "$e2e_ssh_key"
chmod 600 "$e2e_ssh_key" chmod 600 "$e2e_ssh_key"
export E2E_SSH_KEY="$e2e_ssh_key" export E2E_SSH_KEY="$e2e_ssh_key"
make -f tests/e2e/Makefile test make -f tests/e2e/Makefile test
- name: Archive Ginkgo logs
uses: actions/upload-artifact@v4
with:
name: ginkgo-logs
path: ginkgo.json
retention-days: 15
- name: Send Slack alert notification - name: Send Slack alert notification
if: ${{ failure() }} if: ${{ failure() }}
uses: slackapi/slack-github-action@v2.1.0 uses: slackapi/slack-github-action@v2.1.0
@ -94,5 +99,5 @@ jobs:
channel: ${{ secrets.SLACK_CHANNEL_ID }} channel: ${{ secrets.SLACK_CHANNEL_ID }}
text: | text: |
:x: On repository ${{ github.repository }}, the Workflow *${{ github.workflow }}* has failed. :x: On repository ${{ github.repository }}, the Workflow *${{ github.workflow }}* has failed.
Details: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }} Details: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}

1
.gitignore vendored
View File

@ -11,3 +11,4 @@
/nvidia-ctk /nvidia-ctk
/shared-* /shared-*
/release-* /release-*
/bin

View File

@ -1,4 +1,5 @@
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
@ -12,34 +13,16 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
GO_CMD ?= go .PHONY: test $(GINKGO_BIN)
include $(CURDIR)/versions.mk GINKGO_ARGS ?=
LOG_ARTIFACTS_DIR ?= $(CURDIR)/e2e_logs
E2E_RUNTIME ?= docker GINKGO_BIN := $(CURDIR)/bin/ginkgo
E2E_INSTALL_CTK ?= false test: $(GINKGO_BIN)
$(GINKGO_BIN) $(GINKGO_ARGS) -v --json-report ginkgo.json ./tests/e2e/...
ifeq ($($(DIST)),) $(GINKGO_BIN):
DIST ?= ubuntu20.04 mkdir -p $(CURDIR)/bin
endif GOBIN=$(CURDIR)/bin go install github.com/onsi/ginkgo/v2/ginkgo@latest
IMAGE_TAG ?= $(VERSION)-$(DIST)
IMAGE = $(IMAGE_NAME):$(IMAGE_TAG)
E2E_SSH_KEY ?=
E2E_SSH_USER ?=
E2E_SSH_HOST ?=
E2E_SSH_PORT ?= 22
.PHONY: test
test:
cd $(CURDIR)/tests/e2e && $(GO_CMD) test -v . -args \
-ginkgo.focus="$(E2E_RUNTIME)" \
-test.timeout=1h \
-ginkgo.v \
-install-ctk=$(E2E_INSTALL_CTK) \
-toolkit-image=$(IMAGE) \
-ssh-key=$(E2E_SSH_KEY) \
-ssh-user=$(E2E_SSH_USER) \
-remote-host=$(E2E_SSH_HOST) \
-remote-port=$(E2E_SSH_PORT)

View File

@ -1,24 +1,27 @@
/* /**
* Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* # SPDX-License-Identifier: Apache-2.0
* Licensed under the Apache License, Version 2.0 (the "License"); #
* you may not use this file except in compliance with the License. # Licensed under the Apache License, Version 2.0 (the "License");
* You may obtain a copy of the License at # you may not use this file except in compliance with the License.
* # You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0 #
* # http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software #
* distributed under the License is distributed on an "AS IS" BASIS, # Unless required by applicable law or agreed to in writing, software
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # distributed under the License is distributed on an "AS IS" BASIS,
* See the License for the specific language governing permissions and # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* limitations under the License. # See the License for the specific language governing permissions and
*/ # limitations under the License.
**/
package e2e package e2e
import ( import (
"context" "context"
"flag" "errors"
"os"
"strconv"
"testing" "testing"
. "github.com/onsi/ginkgo/v2" . "github.com/onsi/ginkgo/v2"
@ -31,33 +34,86 @@ var (
installCTK bool installCTK bool
image string imageName string
imageTag string
sshKey string sshKey string
sshUser string sshUser string
host string sshHost string
sshPort string sshPort string
) )
func init() {
flag.BoolVar(&installCTK, "install-ctk", false, "Install the NVIDIA Container Toolkit")
flag.StringVar(&image, "toolkit-image", "", "Repository of the image to test")
flag.StringVar(&sshKey, "ssh-key", "", "SSH key to use for remote login")
flag.StringVar(&sshUser, "ssh-user", "", "SSH user to use for remote login")
flag.StringVar(&host, "remote-host", "", "Hostname of the remote machine")
flag.StringVar(&sshPort, "remote-port", "22", "SSH port to use for remote login")
}
func TestMain(t *testing.T) { func TestMain(t *testing.T) {
suiteName := "NVIDIA Container Toolkit E2E" suiteName := "E2E NVIDIA Container Toolkit"
RegisterFailHandler(Fail) RegisterFailHandler(Fail)
ctx = context.Background()
getTestEnv()
RunSpecs(t, RunSpecs(t,
suiteName, suiteName,
) )
} }
// BeforeSuite runs before the test suite // getTestEnv gets the test environment variables
var _ = BeforeSuite(func() { func getTestEnv() {
ctx = context.Background() defer GinkgoRecover()
})
installCTK = getEnvVarOrDefault("E2E_INSTALL_CTK", false)
if installCTK {
imageName = getRequiredEnvvar[string]("E2E_IMAGE_NAME")
imageTag = getRequiredEnvvar[string]("E2E_IMAGE_TAG")
}
sshKey = getRequiredEnvvar[string]("E2E_SSH_KEY")
sshUser = getRequiredEnvvar[string]("E2E_SSH_USER")
sshHost = getRequiredEnvvar[string]("E2E_SSH_HOST")
sshPort = getEnvVarOrDefault("E2E_SSH_PORT", "22")
}
// getRequiredEnvvar returns the specified envvar if set or raises an error.
func getRequiredEnvvar[T any](key string) T {
v, err := getEnvVarAs[T](key)
Expect(err).To(BeNil(), "required environement variable not set", key)
return v
}
func getEnvVarAs[T any](key string) (T, error) {
var zero T
value := os.Getenv(key)
if value == "" {
return zero, errors.New("env var not set")
}
switch any(zero).(type) {
case bool:
v, err := strconv.ParseBool(value)
if err != nil {
return zero, err
}
return any(v).(T), nil
case int:
v, err := strconv.Atoi(value)
if err != nil {
return zero, err
}
return any(v).(T), nil
case string:
return any(value).(T), nil
default:
return zero, errors.New("unsupported type")
}
}
func getEnvVarOrDefault[T any](key string, defaultValue T) T {
val, err := getEnvVarAs[T](key)
if err != nil {
return defaultValue
}
return val
}

View File

@ -27,23 +27,25 @@ import (
// Integration tests for Docker runtime // Integration tests for Docker runtime
var _ = Describe("docker", Ordered, ContinueOnFailure, func() { var _ = Describe("docker", Ordered, ContinueOnFailure, func() {
var r Runner var runner Runner
// Install the NVIDIA Container Toolkit // Install the NVIDIA Container Toolkit
BeforeAll(func(ctx context.Context) { BeforeAll(func(ctx context.Context) {
r = NewRunner( runner = NewRunner(
WithHost(host), WithHost(sshHost),
WithPort(sshPort), WithPort(sshPort),
WithSshKey(sshKey), WithSshKey(sshKey),
WithSshUser(sshUser), WithSshUser(sshUser),
) )
if installCTK { if installCTK {
installer, err := NewToolkitInstaller( installer, err := NewToolkitInstaller(
WithRunner(r), WithRunner(runner),
WithImage(image), WithImage(imageName+":"+imageTag),
WithTemplate(dockerInstallTemplate), WithTemplate(dockerInstallTemplate),
) )
Expect(err).ToNot(HaveOccurred()) Expect(err).ToNot(HaveOccurred())
err = installer.Install() err = installer.Install()
Expect(err).ToNot(HaveOccurred()) Expect(err).ToNot(HaveOccurred())
} }
@ -55,41 +57,42 @@ var _ = Describe("docker", Ordered, ContinueOnFailure, func() {
// the same output // the same output
When("running nvidia-smi -L", Ordered, func() { When("running nvidia-smi -L", Ordered, func() {
var hostOutput string var hostOutput string
var err error
BeforeAll(func(ctx context.Context) { BeforeAll(func(ctx context.Context) {
_, _, err := r.Run("docker pull ubuntu") hostOutput, _, err = runner.Run("nvidia-smi -L")
Expect(err).ToNot(HaveOccurred()) Expect(err).ToNot(HaveOccurred())
hostOutput, _, err = r.Run("nvidia-smi -L") _, _, err := runner.Run("docker pull ubuntu")
Expect(err).ToNot(HaveOccurred()) Expect(err).ToNot(HaveOccurred())
}) })
It("should support NVIDIA_VISIBLE_DEVICES", func(ctx context.Context) { It("should support NVIDIA_VISIBLE_DEVICES", func(ctx context.Context) {
containerOutput, _, err := r.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all ubuntu nvidia-smi -L") containerOutput, _, err := runner.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all ubuntu nvidia-smi -L")
Expect(err).ToNot(HaveOccurred()) Expect(err).ToNot(HaveOccurred())
Expect(containerOutput).To(Equal(hostOutput)) Expect(containerOutput).To(Equal(hostOutput))
}) })
It("should support automatic CDI spec generation", func(ctx context.Context) { It("should support automatic CDI spec generation", func(ctx context.Context) {
containerOutput, _, err := r.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all ubuntu nvidia-smi -L") containerOutput, _, err := runner.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all ubuntu nvidia-smi -L")
Expect(err).ToNot(HaveOccurred()) Expect(err).ToNot(HaveOccurred())
Expect(containerOutput).To(Equal(hostOutput)) Expect(containerOutput).To(Equal(hostOutput))
}) })
It("should support automatic CDI spec generation with the --gpus flag", func(ctx context.Context) { It("should support automatic CDI spec generation with the --gpus flag", func(ctx context.Context) {
containerOutput, _, err := r.Run("docker run --rm -i --gpus=all --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all ubuntu nvidia-smi -L") containerOutput, _, err := runner.Run("docker run --rm -i --gpus=all --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all ubuntu nvidia-smi -L")
Expect(err).ToNot(HaveOccurred()) Expect(err).ToNot(HaveOccurred())
Expect(containerOutput).To(Equal(hostOutput)) Expect(containerOutput).To(Equal(hostOutput))
}) })
It("should support the --gpus flag using the nvidia-container-runtime", func(ctx context.Context) { It("should support the --gpus flag using the nvidia-container-runtime", func(ctx context.Context) {
containerOutput, _, err := r.Run("docker run --rm -i --runtime=nvidia --gpus all ubuntu nvidia-smi -L") containerOutput, _, err := runner.Run("docker run --rm -i --runtime=nvidia --gpus all ubuntu nvidia-smi -L")
Expect(err).ToNot(HaveOccurred()) Expect(err).ToNot(HaveOccurred())
Expect(containerOutput).To(Equal(hostOutput)) Expect(containerOutput).To(Equal(hostOutput))
}) })
It("should support the --gpus flag using the nvidia-container-runtime-hook", func(ctx context.Context) { It("should support the --gpus flag using the nvidia-container-runtime-hook", func(ctx context.Context) {
containerOutput, _, err := r.Run("docker run --rm -i --gpus all ubuntu nvidia-smi -L") containerOutput, _, err := runner.Run("docker run --rm -i --gpus all ubuntu nvidia-smi -L")
Expect(err).ToNot(HaveOccurred()) Expect(err).ToNot(HaveOccurred())
Expect(containerOutput).To(Equal(hostOutput)) Expect(containerOutput).To(Equal(hostOutput))
}) })
@ -98,35 +101,35 @@ var _ = Describe("docker", Ordered, ContinueOnFailure, func() {
// A vectorAdd sample runs in a container with access to all GPUs. // A vectorAdd sample runs in a container with access to all GPUs.
// The following should all produce the same result. // The following should all produce the same result.
When("Running the cuda-vectorAdd sample", Ordered, func() { When("Running the cuda-vectorAdd sample", Ordered, func() {
var referenceOutput string
BeforeAll(func(ctx context.Context) { BeforeAll(func(ctx context.Context) {
_, _, err := r.Run("docker pull nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0") _, _, err := runner.Run("docker pull nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
Expect(err).ToNot(HaveOccurred()) Expect(err).ToNot(HaveOccurred())
}) })
var referenceOutput string
It("should support NVIDIA_VISIBLE_DEVICES", func(ctx context.Context) { It("should support NVIDIA_VISIBLE_DEVICES", func(ctx context.Context) {
var err error var err error
referenceOutput, _, err = r.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0") referenceOutput, _, err = runner.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
Expect(err).ToNot(HaveOccurred()) Expect(err).ToNot(HaveOccurred())
Expect(referenceOutput).To(ContainSubstring("Test PASSED")) Expect(referenceOutput).To(ContainSubstring("Test PASSED"))
}) })
It("should support automatic CDI spec generation", func(ctx context.Context) { It("should support automatic CDI spec generation", func(ctx context.Context) {
out2, _, err := r.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0") out2, _, err := runner.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
Expect(err).ToNot(HaveOccurred()) Expect(err).ToNot(HaveOccurred())
Expect(referenceOutput).To(Equal(out2)) Expect(referenceOutput).To(Equal(out2))
}) })
It("should support the --gpus flag using the nvidia-container-runtime", func(ctx context.Context) { It("should support the --gpus flag using the nvidia-container-runtime", func(ctx context.Context) {
out3, _, err := r.Run("docker run --rm -i --runtime=nvidia --gpus all nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0") out3, _, err := runner.Run("docker run --rm -i --runtime=nvidia --gpus all nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
Expect(err).ToNot(HaveOccurred()) Expect(err).ToNot(HaveOccurred())
Expect(referenceOutput).To(Equal(out3)) Expect(referenceOutput).To(Equal(out3))
}) })
It("should support the --gpus flag using the nvidia-container-runtime-hook", func(ctx context.Context) { It("should support the --gpus flag using the nvidia-container-runtime-hook", func(ctx context.Context) {
out4, _, err := r.Run("docker run --rm -i --gpus all nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0") out4, _, err := runner.Run("docker run --rm -i --gpus all nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
Expect(err).ToNot(HaveOccurred()) Expect(err).ToNot(HaveOccurred())
Expect(referenceOutput).To(Equal(out4)) Expect(referenceOutput).To(Equal(out4))
}) })
@ -135,54 +138,52 @@ var _ = Describe("docker", Ordered, ContinueOnFailure, func() {
// A deviceQuery sample runs in a container with access to all GPUs // A deviceQuery sample runs in a container with access to all GPUs
// The following should all produce the same result. // The following should all produce the same result.
When("Running the cuda-deviceQuery sample", Ordered, func() { When("Running the cuda-deviceQuery sample", Ordered, func() {
var referenceOutput string
BeforeAll(func(ctx context.Context) { BeforeAll(func(ctx context.Context) {
_, _, err := r.Run("docker pull nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0") _, _, err := runner.Run("docker pull nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
Expect(err).ToNot(HaveOccurred()) Expect(err).ToNot(HaveOccurred())
}) })
var referenceOutput string
It("should support NVIDIA_VISIBLE_DEVICES", func(ctx context.Context) { It("should support NVIDIA_VISIBLE_DEVICES", func(ctx context.Context) {
var err error var err error
referenceOutput, _, err = r.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0") referenceOutput, _, err = runner.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
Expect(err).ToNot(HaveOccurred()) Expect(err).ToNot(HaveOccurred())
Expect(referenceOutput).To(ContainSubstring("Result = PASS")) Expect(referenceOutput).To(ContainSubstring("Result = PASS"))
}) })
It("should support automatic CDI spec generation", func(ctx context.Context) { It("should support automatic CDI spec generation", func(ctx context.Context) {
out2, _, err := r.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0") out2, _, err := runner.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
Expect(err).ToNot(HaveOccurred()) Expect(err).ToNot(HaveOccurred())
Expect(referenceOutput).To(Equal(out2)) Expect(referenceOutput).To(Equal(out2))
}) })
It("should support the --gpus flag using the nvidia-container-runtime", func(ctx context.Context) { It("should support the --gpus flag using the nvidia-container-runtime", func(ctx context.Context) {
out3, _, err := r.Run("docker run --rm -i --runtime=nvidia --gpus all nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0") out3, _, err := runner.Run("docker run --rm -i --runtime=nvidia --gpus all nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
Expect(err).ToNot(HaveOccurred()) Expect(err).ToNot(HaveOccurred())
Expect(referenceOutput).To(Equal(out3)) Expect(referenceOutput).To(Equal(out3))
}) })
It("should support the --gpus flag using the nvidia-container-runtime-hook", func(ctx context.Context) { It("should support the --gpus flag using the nvidia-container-runtime-hook", func(ctx context.Context) {
out4, _, err := r.Run("docker run --rm -i --gpus all nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0") out4, _, err := runner.Run("docker run --rm -i --gpus all nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
Expect(err).ToNot(HaveOccurred()) Expect(err).ToNot(HaveOccurred())
Expect(referenceOutput).To(Equal(out4)) Expect(referenceOutput).To(Equal(out4))
}) })
}) })
Describe("CUDA Forward compatibility", Ordered, func() { When("Testing CUDA Forward compatibility", Ordered, func() {
BeforeAll(func(ctx context.Context) { BeforeAll(func(ctx context.Context) {
_, _, err := r.Run("docker pull nvcr.io/nvidia/cuda:12.8.0-base-ubi8") _, _, err := runner.Run("docker pull nvcr.io/nvidia/cuda:12.8.0-base-ubi8")
Expect(err).ToNot(HaveOccurred()) Expect(err).ToNot(HaveOccurred())
})
BeforeAll(func(ctx context.Context) { compatOutput, _, err := runner.Run("docker run --rm -i -e NVIDIA_VISIBLE_DEVICES=void nvcr.io/nvidia/cuda:12.8.0-base-ubi8 bash -c \"ls /usr/local/cuda/compat/libcuda.*.*\"")
compatOutput, _, err := r.Run("docker run --rm -i -e NVIDIA_VISIBLE_DEVICES=void nvcr.io/nvidia/cuda:12.8.0-base-ubi8 bash -c \"ls /usr/local/cuda/compat/libcuda.*.*\"")
Expect(err).ToNot(HaveOccurred()) Expect(err).ToNot(HaveOccurred())
Expect(compatOutput).ToNot(BeEmpty()) Expect(compatOutput).ToNot(BeEmpty())
compatDriverVersion := strings.TrimPrefix(filepath.Base(compatOutput), "libcuda.so.") compatDriverVersion := strings.TrimPrefix(filepath.Base(compatOutput), "libcuda.so.")
compatMajor := strings.SplitN(compatDriverVersion, ".", 2)[0] compatMajor := strings.SplitN(compatDriverVersion, ".", 2)[0]
driverOutput, _, err := r.Run("nvidia-smi -q | grep \"Driver Version\"") driverOutput, _, err := runner.Run("nvidia-smi -q | grep \"Driver Version\"")
Expect(err).ToNot(HaveOccurred()) Expect(err).ToNot(HaveOccurred())
parts := strings.SplitN(driverOutput, ":", 2) parts := strings.SplitN(driverOutput, ":", 2)
Expect(parts).To(HaveLen(2)) Expect(parts).To(HaveLen(2))
@ -198,19 +199,19 @@ var _ = Describe("docker", Ordered, ContinueOnFailure, func() {
}) })
It("should work with the nvidia runtime in legacy mode", func(ctx context.Context) { It("should work with the nvidia runtime in legacy mode", func(ctx context.Context) {
ldconfigOut, _, err := r.Run("docker run --rm -i -e NVIDIA_DISABLE_REQUIRE=true --runtime=nvidia --gpus all nvcr.io/nvidia/cuda:12.8.0-base-ubi8 bash -c \"ldconfig -p | grep libcuda.so.1\"") ldconfigOut, _, err := runner.Run("docker run --rm -i -e NVIDIA_DISABLE_REQUIRE=true --runtime=nvidia --gpus all nvcr.io/nvidia/cuda:12.8.0-base-ubi8 bash -c \"ldconfig -p | grep libcuda.so.1\"")
Expect(err).ToNot(HaveOccurred()) Expect(err).ToNot(HaveOccurred())
Expect(ldconfigOut).To(ContainSubstring("/usr/local/cuda/compat")) Expect(ldconfigOut).To(ContainSubstring("/usr/local/cuda/compat"))
}) })
It("should work with the nvidia runtime in CDI mode", func(ctx context.Context) { It("should work with the nvidia runtime in CDI mode", func(ctx context.Context) {
ldconfigOut, _, err := r.Run("docker run --rm -i -e NVIDIA_DISABLE_REQUIRE=true --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all nvcr.io/nvidia/cuda:12.8.0-base-ubi8 bash -c \"ldconfig -p | grep libcuda.so.1\"") ldconfigOut, _, err := runner.Run("docker run --rm -i -e NVIDIA_DISABLE_REQUIRE=true --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all nvcr.io/nvidia/cuda:12.8.0-base-ubi8 bash -c \"ldconfig -p | grep libcuda.so.1\"")
Expect(err).ToNot(HaveOccurred()) Expect(err).ToNot(HaveOccurred())
Expect(ldconfigOut).To(ContainSubstring("/usr/local/cuda/compat")) Expect(ldconfigOut).To(ContainSubstring("/usr/local/cuda/compat"))
}) })
It("should NOT work with nvidia-container-runtime-hook", func(ctx context.Context) { It("should NOT work with nvidia-container-runtime-hook", func(ctx context.Context) {
ldconfigOut, _, err := r.Run("docker run --rm -i -e NVIDIA_DISABLE_REQUIRE=true --runtime=runc --gpus all nvcr.io/nvidia/cuda:12.8.0-base-ubi8 bash -c \"ldconfig -p | grep libcuda.so.1\"") ldconfigOut, _, err := runner.Run("docker run --rm -i -e NVIDIA_DISABLE_REQUIRE=true --runtime=runc --gpus all nvcr.io/nvidia/cuda:12.8.0-base-ubi8 bash -c \"ldconfig -p | grep libcuda.so.1\"")
Expect(err).ToNot(HaveOccurred()) Expect(err).ToNot(HaveOccurred())
Expect(ldconfigOut).To(ContainSubstring("/usr/lib64")) Expect(ldconfigOut).To(ContainSubstring("/usr/lib64"))
}) })