mirror of
https://github.com/NVIDIA/nvidia-container-toolkit
synced 2025-05-08 14:05:28 +00:00
[no-relnote] Update Github Actions E2E
Some checks failed
Some checks failed
Signed-off-by: Carlos Eduardo Arango Gutierrez <eduardoa@nvidia.com>
This commit is contained in:
parent
6df26cc7a5
commit
9defe37fa2
11
.github/workflows/e2e.yaml
vendored
11
.github/workflows/e2e.yaml
vendored
@ -70,8 +70,8 @@ jobs:
|
|||||||
|
|
||||||
- name: Run e2e tests
|
- name: Run e2e tests
|
||||||
env:
|
env:
|
||||||
IMAGE_NAME: ghcr.io/nvidia/container-toolkit
|
E2E_IMAGE_REPO: ghcr.io/nvidia/container-toolkit
|
||||||
VERSION: ${{ inputs.version }}
|
E2E_IMAGE_TAG: ${{ inputs.version }}-ubuntu20.04
|
||||||
SSH_KEY: ${{ secrets.AWS_SSH_KEY }}
|
SSH_KEY: ${{ secrets.AWS_SSH_KEY }}
|
||||||
E2E_SSH_USER: ${{ secrets.E2E_SSH_USER }}
|
E2E_SSH_USER: ${{ secrets.E2E_SSH_USER }}
|
||||||
E2E_SSH_HOST: ${{ steps.holodeck_public_dns_name.outputs.result }}
|
E2E_SSH_HOST: ${{ steps.holodeck_public_dns_name.outputs.result }}
|
||||||
@ -84,6 +84,13 @@ jobs:
|
|||||||
|
|
||||||
make -f tests/e2e/Makefile test
|
make -f tests/e2e/Makefile test
|
||||||
|
|
||||||
|
- name: Archive Ginkgo logs
|
||||||
|
uses: actions/upload-artifact@v4
|
||||||
|
with:
|
||||||
|
name: ginkgo-logs
|
||||||
|
path: ginkgo.json
|
||||||
|
retention-days: 15
|
||||||
|
|
||||||
- name: Send Slack alert notification
|
- name: Send Slack alert notification
|
||||||
if: ${{ failure() }}
|
if: ${{ failure() }}
|
||||||
uses: slackapi/slack-github-action@v2.0.0
|
uses: slackapi/slack-github-action@v2.0.0
|
||||||
|
1
.gitignore
vendored
1
.gitignore
vendored
@ -11,3 +11,4 @@
|
|||||||
/nvidia-ctk
|
/nvidia-ctk
|
||||||
/shared-*
|
/shared-*
|
||||||
/release-*
|
/release-*
|
||||||
|
/bin
|
@ -13,14 +13,16 @@
|
|||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
.PHONY: test-e2e ginkgo
|
.PHONY: test $(GINKGO_BIN)
|
||||||
|
|
||||||
GINKGO_ARGS ?=
|
GINKGO_ARGS ?=
|
||||||
LOG_ARTIFACTS_DIR ?= $(CURDIR)/e2e_logs
|
LOG_ARTIFACTS_DIR ?= $(CURDIR)/e2e_logs
|
||||||
|
|
||||||
ginkgo:
|
GINKGO_BIN := $(CURDIR)/bin/ginkgo
|
||||||
|
|
||||||
|
test: $(GINKGO_BIN)
|
||||||
|
$(GINKGO_BIN) $(GINKGO_ARGS) -v --json-report ginkgo.json ./tests/e2e/...
|
||||||
|
|
||||||
|
$(GINKGO_BIN):
|
||||||
mkdir -p $(CURDIR)/bin
|
mkdir -p $(CURDIR)/bin
|
||||||
GOBIN=$(CURDIR)/bin go install github.com/onsi/ginkgo/v2/ginkgo@latest
|
GOBIN=$(CURDIR)/bin go install github.com/onsi/ginkgo/v2/ginkgo@latest
|
||||||
|
|
||||||
test-e2e: ginkgo
|
|
||||||
$(CURDIR)/bin/ginkgo $(GINKGO_ARGS) -v --json-report ginkgo.json ./tests/e2e/...
|
|
||||||
|
@ -20,7 +20,7 @@ limitations under the License.
|
|||||||
---
|
---
|
||||||
|
|
||||||
## 1 Scope & Goals
|
## 1 Scope & Goals
|
||||||
This repository contains a **Ginkgo v2 / Gomega** test harness that exercises an
|
This folder contains a **Ginkgo v2 / Gomega** test harness that exercises an
|
||||||
NVIDIA Container Toolkit (CTK) installation on a **remote GPU‑enabled host** via
|
NVIDIA Container Toolkit (CTK) installation on a **remote GPU‑enabled host** via
|
||||||
SSH. The suite validates that:
|
SSH. The suite validates that:
|
||||||
|
|
||||||
@ -58,12 +58,13 @@ compatibility runs, and pre‑release validation of new CTK builds.
|
|||||||
|
|
||||||
| Variable | Required | Example | Description |
|
| Variable | Required | Example | Description |
|
||||||
|----------|----------|---------|-------------|
|
|----------|----------|---------|-------------|
|
||||||
| `INSTALL_CTK` | ✖ | `true` | When `true` the test installs CTK on the remote host before running the image. When `false` it assumes CTK is already present. |
|
| `E2E_INSTALL_CTK` | ✖ | `true` | When `true` the test installs CTK on the remote host before running the image. When `false` it assumes CTK is already present. |
|
||||||
| `TOOLKIT_IMAGE` | ✔ | `nvcr.io/nvidia/cuda:12.4.0-runtime-ubi9` | Image that will be pulled & executed. |
|
| `E2E_IMAGE_REPO` | ✔ | `ghcr.io/nvidia/container-toolkit` | Container Toolkit Image |
|
||||||
| `SSH_KEY` | ✔ | `/home/ci/.ssh/id_rsa` | Private key used for authentication. |
|
| `E2E_IMAGE_TAG` | ✔ | `latest` | Image tag |
|
||||||
| `SSH_USER` | ✔ | `ubuntu` | Username on the remote host. |
|
| `E2E_SSH_KEY` | ✔ | `/home/ci/.ssh/id_rsa` | Private key used for authentication. |
|
||||||
| `REMOTE_HOST` | ✔ | `gpurunner01.corp.local` | Hostname or IP address of the target node. |
|
| `E2E_SSH_USER` | ✔ | `ubuntu` | Username on the remote host. |
|
||||||
| `REMOTE_PORT` | ✔ | `22` | SSH port of the target node. |
|
| `E2E_SSH_HOST` | ✔ | `10.0.0.0` | Hostname or IP address of the target node. |
|
||||||
|
| `E2E_SSH_PORT` | ✔ | `22` | SSH port of the target node. |
|
||||||
|
|
||||||
> All variables are validated at start‑up; the suite aborts early with a clear
|
> All variables are validated at start‑up; the suite aborts early with a clear
|
||||||
> message if any are missing or ill‑formed.
|
> message if any are missing or ill‑formed.
|
||||||
@ -92,12 +93,13 @@ bin/ginkgo:
|
|||||||
### 6.1 Basic invocation
|
### 6.1 Basic invocation
|
||||||
```bash
|
```bash
|
||||||
INSTALL_CTK=true \
|
INSTALL_CTK=true \
|
||||||
TOOLKIT_IMAGE=nvcr.io/nvidia/cuda:12.4.0-runtime-ubi9 \
|
E2E_IMAGE_REPO=ghcr.io/nvidia/container-toolkit \
|
||||||
|
E2E_IMAGE_TAG=<image-tag> \
|
||||||
SSH_KEY=$HOME/.ssh/id_rsa \
|
SSH_KEY=$HOME/.ssh/id_rsa \
|
||||||
SSH_USER=ubuntu \
|
SSH_USER=ubuntu \
|
||||||
REMOTE_HOST=10.0.0.15 \
|
REMOTE_HOST=10.0.0.15 \
|
||||||
REMOTE_PORT=22 \
|
REMOTE_PORT=22 \
|
||||||
make test-e2e
|
make test
|
||||||
```
|
```
|
||||||
This downloads the image on the remote host, installs CTK (if requested), and
|
This downloads the image on the remote host, installs CTK (if requested), and
|
||||||
executes a minimal CUDA‑based workload.
|
executes a minimal CUDA‑based workload.
|
||||||
|
@ -1,6 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
* Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
|
||||||
* SPDX-License-Identifier: Apache-2.0
|
|
||||||
*
|
*
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
* you may not use this file except in compliance with the License.
|
* you may not use this file except in compliance with the License.
|
||||||
@ -19,6 +18,7 @@ package e2e
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"errors"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"runtime"
|
"runtime"
|
||||||
@ -81,15 +81,6 @@ var _ = BeforeSuite(func() {
|
|||||||
err = installer.Install()
|
err = installer.Install()
|
||||||
Expect(err).ToNot(HaveOccurred())
|
Expect(err).ToNot(HaveOccurred())
|
||||||
}
|
}
|
||||||
|
|
||||||
_, _, err := runner.Run("docker pull ubuntu")
|
|
||||||
Expect(err).ToNot(HaveOccurred())
|
|
||||||
|
|
||||||
_, _, err = runner.Run("docker pull nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
|
|
||||||
Expect(err).ToNot(HaveOccurred())
|
|
||||||
|
|
||||||
_, _, err = runner.Run("docker pull nvcr.io/nvidia/cuda:12.8.0-base-ubi8")
|
|
||||||
Expect(err).ToNot(HaveOccurred())
|
|
||||||
})
|
})
|
||||||
|
|
||||||
// getTestEnv gets the test environment variables
|
// getTestEnv gets the test environment variables
|
||||||
@ -100,40 +91,63 @@ func getTestEnv() {
|
|||||||
_, thisFile, _, _ := runtime.Caller(0)
|
_, thisFile, _, _ := runtime.Caller(0)
|
||||||
packagePath = filepath.Dir(thisFile)
|
packagePath = filepath.Dir(thisFile)
|
||||||
|
|
||||||
installCTK = getBoolEnvVar("INSTALL_CTK", false)
|
installCTK = getEnvVarOrDefault("E2E_INSTALL_CTK", true)
|
||||||
|
|
||||||
|
if installCTK {
|
||||||
ImageRepo = os.Getenv("E2E_IMAGE_REPO")
|
ImageRepo = os.Getenv("E2E_IMAGE_REPO")
|
||||||
Expect(ImageRepo).NotTo(BeEmpty(), "E2E_IMAGE_REPO environment variable must be set")
|
Expect(ImageRepo).NotTo(BeEmpty(), "E2E_IMAGE_REPO environment variable must be set")
|
||||||
|
|
||||||
ImageTag = os.Getenv("E2E_IMAGE_TAG")
|
ImageTag = os.Getenv("E2E_IMAGE_TAG")
|
||||||
Expect(ImageTag).NotTo(BeEmpty(), "E2E_IMAGE_TAG environment variable must be set")
|
Expect(ImageTag).NotTo(BeEmpty(), "E2E_IMAGE_TAG environment variable must be set")
|
||||||
|
}
|
||||||
|
|
||||||
sshKey = os.Getenv("SSH_KEY")
|
sshKey = os.Getenv("E2E_SSH_KEY")
|
||||||
Expect(sshKey).NotTo(BeEmpty(), "SSH_KEY environment variable must be set")
|
Expect(sshKey).NotTo(BeEmpty(), "E2E_SSH_KEY environment variable must be set")
|
||||||
|
|
||||||
sshUser = os.Getenv("SSH_USER")
|
sshUser = os.Getenv("E2E_SSH_USER")
|
||||||
Expect(sshUser).NotTo(BeEmpty(), "SSH_USER environment variable must be set")
|
Expect(sshUser).NotTo(BeEmpty(), "E2E_SSH_USER environment variable must be set")
|
||||||
|
|
||||||
host = os.Getenv("REMOTE_HOST")
|
host = os.Getenv("E2E_SSH_HOST")
|
||||||
Expect(host).NotTo(BeEmpty(), "REMOTE_HOST environment variable must be set")
|
Expect(host).NotTo(BeEmpty(), "E2E_SSH_HOST environment variable must be set")
|
||||||
|
|
||||||
sshPort = os.Getenv("REMOTE_PORT")
|
sshPort = getEnvVarOrDefault("E2E_SSH_PORT", "22")
|
||||||
Expect(sshPort).NotTo(BeEmpty(), "REMOTE_PORT environment variable must be set")
|
|
||||||
|
|
||||||
// Get current working directory
|
// Get current working directory
|
||||||
cwd, err = os.Getwd()
|
cwd, err = os.Getwd()
|
||||||
Expect(err).NotTo(HaveOccurred())
|
Expect(err).NotTo(HaveOccurred())
|
||||||
}
|
}
|
||||||
|
|
||||||
// getBoolEnvVar returns the boolean value of the environment variable or the default value if not set.
|
func getEnvVarAs[T any](key string) (T, error) {
|
||||||
func getBoolEnvVar(key string, defaultValue bool) bool {
|
var zero T
|
||||||
value := os.Getenv(key)
|
value := os.Getenv(key)
|
||||||
if value == "" {
|
if value == "" {
|
||||||
return defaultValue
|
return zero, errors.New("env var not set")
|
||||||
}
|
}
|
||||||
boolValue, err := strconv.ParseBool(value)
|
|
||||||
|
switch any(zero).(type) {
|
||||||
|
case bool:
|
||||||
|
v, err := strconv.ParseBool(value)
|
||||||
|
if err != nil {
|
||||||
|
return zero, err
|
||||||
|
}
|
||||||
|
return any(v).(T), nil
|
||||||
|
case int:
|
||||||
|
v, err := strconv.Atoi(value)
|
||||||
|
if err != nil {
|
||||||
|
return zero, err
|
||||||
|
}
|
||||||
|
return any(v).(T), nil
|
||||||
|
case string:
|
||||||
|
return any(value).(T), nil
|
||||||
|
default:
|
||||||
|
return zero, errors.New("unsupported type")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func getEnvVarOrDefault[T any](key string, defaultValue T) T {
|
||||||
|
val, err := getEnvVarAs[T](key)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return defaultValue
|
return defaultValue
|
||||||
}
|
}
|
||||||
return boolValue
|
return val
|
||||||
}
|
}
|
||||||
|
@ -1,6 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
* Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
|
||||||
* SPDX-License-Identifier: Apache-2.0
|
|
||||||
*
|
*
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
* you may not use this file except in compliance with the License.
|
* you may not use this file except in compliance with the License.
|
||||||
@ -14,6 +13,7 @@
|
|||||||
* See the License for the specific language governing permissions and
|
* See the License for the specific language governing permissions and
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package e2e
|
package e2e
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
@ -1,6 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
* Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
|
||||||
* SPDX-License-Identifier: Apache-2.0
|
|
||||||
*
|
*
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
* you may not use this file except in compliance with the License.
|
* you may not use this file except in compliance with the License.
|
||||||
@ -39,38 +38,36 @@ var _ = Describe("docker", Ordered, ContinueOnFailure, func() {
|
|||||||
BeforeAll(func(ctx context.Context) {
|
BeforeAll(func(ctx context.Context) {
|
||||||
hostOutput, _, err = runner.Run("nvidia-smi -L")
|
hostOutput, _, err = runner.Run("nvidia-smi -L")
|
||||||
Expect(err).ToNot(HaveOccurred())
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
|
||||||
|
_, _, err := runner.Run("docker pull ubuntu")
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
})
|
})
|
||||||
|
|
||||||
It("should support NVIDIA_VISIBLE_DEVICES", func(ctx context.Context) {
|
It("should support NVIDIA_VISIBLE_DEVICES", func(ctx context.Context) {
|
||||||
By("Running docker run with --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all")
|
|
||||||
containerOutput, _, err := runner.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all ubuntu nvidia-smi -L")
|
containerOutput, _, err := runner.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all ubuntu nvidia-smi -L")
|
||||||
Expect(err).ToNot(HaveOccurred())
|
Expect(err).ToNot(HaveOccurred())
|
||||||
Expect(containerOutput).To(Equal(hostOutput))
|
Expect(containerOutput).To(Equal(hostOutput))
|
||||||
})
|
})
|
||||||
|
|
||||||
It("should support automatic CDI spec generation", func(ctx context.Context) {
|
It("should support automatic CDI spec generation", func(ctx context.Context) {
|
||||||
By("Running docker run with --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all")
|
|
||||||
containerOutput, _, err := runner.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all ubuntu nvidia-smi -L")
|
containerOutput, _, err := runner.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all ubuntu nvidia-smi -L")
|
||||||
Expect(err).ToNot(HaveOccurred())
|
Expect(err).ToNot(HaveOccurred())
|
||||||
Expect(containerOutput).To(Equal(hostOutput))
|
Expect(containerOutput).To(Equal(hostOutput))
|
||||||
})
|
})
|
||||||
|
|
||||||
It("should support automatic CDI spec generation with the --gpus flag", func(ctx context.Context) {
|
It("should support automatic CDI spec generation with the --gpus flag", func(ctx context.Context) {
|
||||||
By("Running docker run with --gpus=all --runtime=nvidia --gpus all")
|
|
||||||
containerOutput, _, err := runner.Run("docker run --rm -i --gpus=all --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all ubuntu nvidia-smi -L")
|
containerOutput, _, err := runner.Run("docker run --rm -i --gpus=all --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all ubuntu nvidia-smi -L")
|
||||||
Expect(err).ToNot(HaveOccurred())
|
Expect(err).ToNot(HaveOccurred())
|
||||||
Expect(containerOutput).To(Equal(hostOutput))
|
Expect(containerOutput).To(Equal(hostOutput))
|
||||||
})
|
})
|
||||||
|
|
||||||
It("should support the --gpus flag using the nvidia-container-runtime", func(ctx context.Context) {
|
It("should support the --gpus flag using the nvidia-container-runtime", func(ctx context.Context) {
|
||||||
By("Running docker run with --runtime=nvidia --gpus all")
|
|
||||||
containerOutput, _, err := runner.Run("docker run --rm -i --runtime=nvidia --gpus all ubuntu nvidia-smi -L")
|
containerOutput, _, err := runner.Run("docker run --rm -i --runtime=nvidia --gpus all ubuntu nvidia-smi -L")
|
||||||
Expect(err).ToNot(HaveOccurred())
|
Expect(err).ToNot(HaveOccurred())
|
||||||
Expect(containerOutput).To(Equal(hostOutput))
|
Expect(containerOutput).To(Equal(hostOutput))
|
||||||
})
|
})
|
||||||
|
|
||||||
It("should support the --gpus flag using the nvidia-container-runtime-hook", func(ctx context.Context) {
|
It("should support the --gpus flag using the nvidia-container-runtime-hook", func(ctx context.Context) {
|
||||||
By("Running docker run with --gpus all")
|
|
||||||
containerOutput, _, err := runner.Run("docker run --rm -i --gpus all ubuntu nvidia-smi -L")
|
containerOutput, _, err := runner.Run("docker run --rm -i --gpus all ubuntu nvidia-smi -L")
|
||||||
Expect(err).ToNot(HaveOccurred())
|
Expect(err).ToNot(HaveOccurred())
|
||||||
Expect(containerOutput).To(Equal(hostOutput))
|
Expect(containerOutput).To(Equal(hostOutput))
|
||||||
@ -82,8 +79,12 @@ var _ = Describe("docker", Ordered, ContinueOnFailure, func() {
|
|||||||
When("Running the cuda-vectorAdd sample", Ordered, func() {
|
When("Running the cuda-vectorAdd sample", Ordered, func() {
|
||||||
var referenceOutput string
|
var referenceOutput string
|
||||||
|
|
||||||
|
BeforeAll(func(ctx context.Context) {
|
||||||
|
_, _, err := runner.Run("docker pull nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
})
|
||||||
|
|
||||||
It("should support NVIDIA_VISIBLE_DEVICES", func(ctx context.Context) {
|
It("should support NVIDIA_VISIBLE_DEVICES", func(ctx context.Context) {
|
||||||
By("Running docker run with --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all")
|
|
||||||
var err error
|
var err error
|
||||||
referenceOutput, _, err = runner.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
|
referenceOutput, _, err = runner.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
|
||||||
Expect(err).ToNot(HaveOccurred())
|
Expect(err).ToNot(HaveOccurred())
|
||||||
@ -92,21 +93,18 @@ var _ = Describe("docker", Ordered, ContinueOnFailure, func() {
|
|||||||
})
|
})
|
||||||
|
|
||||||
It("should support automatic CDI spec generation", func(ctx context.Context) {
|
It("should support automatic CDI spec generation", func(ctx context.Context) {
|
||||||
By("Running docker run with --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all")
|
|
||||||
out2, _, err := runner.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
|
out2, _, err := runner.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
|
||||||
Expect(err).ToNot(HaveOccurred())
|
Expect(err).ToNot(HaveOccurred())
|
||||||
Expect(referenceOutput).To(Equal(out2))
|
Expect(referenceOutput).To(Equal(out2))
|
||||||
})
|
})
|
||||||
|
|
||||||
It("should support the --gpus flag using the nvidia-container-runtime", func(ctx context.Context) {
|
It("should support the --gpus flag using the nvidia-container-runtime", func(ctx context.Context) {
|
||||||
By("Running docker run with --runtime=nvidia --gpus all")
|
|
||||||
out3, _, err := runner.Run("docker run --rm -i --runtime=nvidia --gpus all nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
|
out3, _, err := runner.Run("docker run --rm -i --runtime=nvidia --gpus all nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
|
||||||
Expect(err).ToNot(HaveOccurred())
|
Expect(err).ToNot(HaveOccurred())
|
||||||
Expect(referenceOutput).To(Equal(out3))
|
Expect(referenceOutput).To(Equal(out3))
|
||||||
})
|
})
|
||||||
|
|
||||||
It("should support the --gpus flag using the nvidia-container-runtime-hook", func(ctx context.Context) {
|
It("should support the --gpus flag using the nvidia-container-runtime-hook", func(ctx context.Context) {
|
||||||
By("Running docker run with --gpus all")
|
|
||||||
out4, _, err := runner.Run("docker run --rm -i --gpus all nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
|
out4, _, err := runner.Run("docker run --rm -i --gpus all nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
|
||||||
Expect(err).ToNot(HaveOccurred())
|
Expect(err).ToNot(HaveOccurred())
|
||||||
Expect(referenceOutput).To(Equal(out4))
|
Expect(referenceOutput).To(Equal(out4))
|
||||||
@ -116,15 +114,14 @@ var _ = Describe("docker", Ordered, ContinueOnFailure, func() {
|
|||||||
// A deviceQuery sample runs in a container with access to all GPUs
|
// A deviceQuery sample runs in a container with access to all GPUs
|
||||||
// The following should all produce the same result.
|
// The following should all produce the same result.
|
||||||
When("Running the cuda-deviceQuery sample", Ordered, func() {
|
When("Running the cuda-deviceQuery sample", Ordered, func() {
|
||||||
|
var referenceOutput string
|
||||||
|
|
||||||
BeforeAll(func(ctx context.Context) {
|
BeforeAll(func(ctx context.Context) {
|
||||||
_, _, err := runner.Run("docker pull nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
|
_, _, err := runner.Run("docker pull nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
|
||||||
Expect(err).ToNot(HaveOccurred())
|
Expect(err).ToNot(HaveOccurred())
|
||||||
})
|
})
|
||||||
|
|
||||||
var referenceOutput string
|
|
||||||
|
|
||||||
It("should support NVIDIA_VISIBLE_DEVICES", func(ctx context.Context) {
|
It("should support NVIDIA_VISIBLE_DEVICES", func(ctx context.Context) {
|
||||||
By("Running docker run with --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all")
|
|
||||||
var err error
|
var err error
|
||||||
referenceOutput, _, err = runner.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
|
referenceOutput, _, err = runner.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
|
||||||
Expect(err).ToNot(HaveOccurred())
|
Expect(err).ToNot(HaveOccurred())
|
||||||
@ -132,21 +129,18 @@ var _ = Describe("docker", Ordered, ContinueOnFailure, func() {
|
|||||||
})
|
})
|
||||||
|
|
||||||
It("should support automatic CDI spec generation", func(ctx context.Context) {
|
It("should support automatic CDI spec generation", func(ctx context.Context) {
|
||||||
By("Running docker run with --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all")
|
|
||||||
out2, _, err := runner.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
|
out2, _, err := runner.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
|
||||||
Expect(err).ToNot(HaveOccurred())
|
Expect(err).ToNot(HaveOccurred())
|
||||||
Expect(referenceOutput).To(Equal(out2))
|
Expect(referenceOutput).To(Equal(out2))
|
||||||
})
|
})
|
||||||
|
|
||||||
It("should support the --gpus flag using the nvidia-container-runtime", func(ctx context.Context) {
|
It("should support the --gpus flag using the nvidia-container-runtime", func(ctx context.Context) {
|
||||||
By("Running docker run with --runtime=nvidia --gpus all")
|
|
||||||
out3, _, err := runner.Run("docker run --rm -i --runtime=nvidia --gpus all nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
|
out3, _, err := runner.Run("docker run --rm -i --runtime=nvidia --gpus all nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
|
||||||
Expect(err).ToNot(HaveOccurred())
|
Expect(err).ToNot(HaveOccurred())
|
||||||
Expect(referenceOutput).To(Equal(out3))
|
Expect(referenceOutput).To(Equal(out3))
|
||||||
})
|
})
|
||||||
|
|
||||||
It("should support the --gpus flag using the nvidia-container-runtime-hook", func(ctx context.Context) {
|
It("should support the --gpus flag using the nvidia-container-runtime-hook", func(ctx context.Context) {
|
||||||
By("Running docker run with --gpus all")
|
|
||||||
out4, _, err := runner.Run("docker run --rm -i --gpus all nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
|
out4, _, err := runner.Run("docker run --rm -i --gpus all nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
|
||||||
Expect(err).ToNot(HaveOccurred())
|
Expect(err).ToNot(HaveOccurred())
|
||||||
Expect(referenceOutput).To(Equal(out4))
|
Expect(referenceOutput).To(Equal(out4))
|
||||||
@ -155,6 +149,9 @@ var _ = Describe("docker", Ordered, ContinueOnFailure, func() {
|
|||||||
|
|
||||||
When("Testing CUDA Forward compatibility", Ordered, func() {
|
When("Testing CUDA Forward compatibility", Ordered, func() {
|
||||||
BeforeAll(func(ctx context.Context) {
|
BeforeAll(func(ctx context.Context) {
|
||||||
|
_, _, err := runner.Run("docker pull nvcr.io/nvidia/cuda:12.8.0-base-ubi8")
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
|
||||||
compatOutput, _, err := runner.Run("docker run --rm -i -e NVIDIA_VISIBLE_DEVICES=void nvcr.io/nvidia/cuda:12.8.0-base-ubi8 bash -c \"ls /usr/local/cuda/compat/libcuda.*.*\"")
|
compatOutput, _, err := runner.Run("docker run --rm -i -e NVIDIA_VISIBLE_DEVICES=void nvcr.io/nvidia/cuda:12.8.0-base-ubi8 bash -c \"ls /usr/local/cuda/compat/libcuda.*.*\"")
|
||||||
Expect(err).ToNot(HaveOccurred())
|
Expect(err).ToNot(HaveOccurred())
|
||||||
Expect(compatOutput).ToNot(BeEmpty())
|
Expect(compatOutput).ToNot(BeEmpty())
|
||||||
@ -178,21 +175,18 @@ var _ = Describe("docker", Ordered, ContinueOnFailure, func() {
|
|||||||
})
|
})
|
||||||
|
|
||||||
It("should work with the nvidia runtime in legacy mode", func(ctx context.Context) {
|
It("should work with the nvidia runtime in legacy mode", func(ctx context.Context) {
|
||||||
By("Running docker run with -e NVIDIA_DISABLE_REQUIRE=true --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all")
|
|
||||||
ldconfigOut, _, err := runner.Run("docker run --rm -i -e NVIDIA_DISABLE_REQUIRE=true --runtime=nvidia --gpus all nvcr.io/nvidia/cuda:12.8.0-base-ubi8 bash -c \"ldconfig -p | grep libcuda.so.1\"")
|
ldconfigOut, _, err := runner.Run("docker run --rm -i -e NVIDIA_DISABLE_REQUIRE=true --runtime=nvidia --gpus all nvcr.io/nvidia/cuda:12.8.0-base-ubi8 bash -c \"ldconfig -p | grep libcuda.so.1\"")
|
||||||
Expect(err).ToNot(HaveOccurred())
|
Expect(err).ToNot(HaveOccurred())
|
||||||
Expect(ldconfigOut).To(ContainSubstring("/usr/local/cuda/compat"))
|
Expect(ldconfigOut).To(ContainSubstring("/usr/local/cuda/compat"))
|
||||||
})
|
})
|
||||||
|
|
||||||
It("should work with the nvidia runtime in CDI mode", func(ctx context.Context) {
|
It("should work with the nvidia runtime in CDI mode", func(ctx context.Context) {
|
||||||
By("Running docker run with -e NVIDIA_DISABLE_REQUIRE=true --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all")
|
|
||||||
ldconfigOut, _, err := runner.Run("docker run --rm -i -e NVIDIA_DISABLE_REQUIRE=true --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all nvcr.io/nvidia/cuda:12.8.0-base-ubi8 bash -c \"ldconfig -p | grep libcuda.so.1\"")
|
ldconfigOut, _, err := runner.Run("docker run --rm -i -e NVIDIA_DISABLE_REQUIRE=true --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all nvcr.io/nvidia/cuda:12.8.0-base-ubi8 bash -c \"ldconfig -p | grep libcuda.so.1\"")
|
||||||
Expect(err).ToNot(HaveOccurred())
|
Expect(err).ToNot(HaveOccurred())
|
||||||
Expect(ldconfigOut).To(ContainSubstring("/usr/local/cuda/compat"))
|
Expect(ldconfigOut).To(ContainSubstring("/usr/local/cuda/compat"))
|
||||||
})
|
})
|
||||||
|
|
||||||
It("should NOT work with nvidia-container-runtime-hook", func(ctx context.Context) {
|
It("should NOT work with nvidia-container-runtime-hook", func(ctx context.Context) {
|
||||||
By("Running docker run with -e NVIDIA_DISABLE_REQUIRE=true --gpus all")
|
|
||||||
ldconfigOut, _, err := runner.Run("docker run --rm -i -e NVIDIA_DISABLE_REQUIRE=true --runtime=runc --gpus all nvcr.io/nvidia/cuda:12.8.0-base-ubi8 bash -c \"ldconfig -p | grep libcuda.so.1\"")
|
ldconfigOut, _, err := runner.Run("docker run --rm -i -e NVIDIA_DISABLE_REQUIRE=true --runtime=runc --gpus all nvcr.io/nvidia/cuda:12.8.0-base-ubi8 bash -c \"ldconfig -p | grep libcuda.so.1\"")
|
||||||
Expect(err).ToNot(HaveOccurred())
|
Expect(err).ToNot(HaveOccurred())
|
||||||
Expect(ldconfigOut).To(ContainSubstring("/usr/lib64"))
|
Expect(ldconfigOut).To(ContainSubstring("/usr/lib64"))
|
||||||
|
@ -1,6 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
* Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
|
||||||
* SPDX-License-Identifier: Apache-2.0
|
|
||||||
*
|
*
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
* you may not use this file except in compliance with the License.
|
* you may not use this file except in compliance with the License.
|
||||||
|
@ -1,6 +1,8 @@
|
|||||||
module github.com/NVIDIA/nvidia-container-toolkit/tests
|
module github.com/NVIDIA/nvidia-container-toolkit/tests
|
||||||
|
|
||||||
go 1.24.1
|
go 1.23.2
|
||||||
|
|
||||||
|
toolchain go1.24.1
|
||||||
|
|
||||||
require (
|
require (
|
||||||
github.com/onsi/ginkgo/v2 v2.23.4
|
github.com/onsi/ginkgo/v2 v2.23.4
|
||||||
|
Loading…
Reference in New Issue
Block a user