Merge b3728406e0 into ca061bb4f0

2025-06-26 18:18:24 +00:00 · 2025-04-30 17:42:44 +02:00 · 2025-04-30 17:42:44 +02:00 · b378f08975
commit b378f08975
parent ca061bb4f0 b3728406e0
8 changed files with 349 additions and 150 deletions
--- a/.github/workflows/e2e.yaml
+++ b/.github/workflows/e2e.yaml
@ -70,8 +70,8 @@ jobs:

      - name: Run e2e tests
        env:
-          IMAGE_NAME: ghcr.io/nvidia/container-toolkit
-          VERSION: ${{ inputs.version }}
+          E2E_IMAGE_REPO: ghcr.io/nvidia/container-toolkit
+          E2E_IMAGE_TAG: ${{ inputs.version }}-ubuntu20.04
          SSH_KEY: ${{ secrets.AWS_SSH_KEY }}
          E2E_SSH_USER: ${{ secrets.E2E_SSH_USER }}
          E2E_SSH_HOST: ${{ steps.holodeck_public_dns_name.outputs.result }}
@ -82,7 +82,14 @@ jobs:
          chmod 600 "$e2e_ssh_key"
          export E2E_SSH_KEY="$e2e_ssh_key"

-          make -f tests/e2e/Makefile test
+          make -f tests/e2e/Makefile test-e2e
+
+      - name: Archive Ginkgo logs
+        uses: actions/upload-artifact@v4
+        with:
+          name: ginkgo-logs
+          path: ginkgo.json
+          retention-days: 15
        
      - name: Send Slack alert notification
        if: ${{ failure() }}
--- a/tests/e2e/Makefile
+++ b/tests/e2e/Makefile
@ -1,4 +1,5 @@
-# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 
+# SPDX-License-Identifier: Apache-2.0
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@ -12,34 +13,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-GO_CMD ?= go
+.PHONY: test-e2e ginkgo

-include $(CURDIR)/versions.mk
+GINKGO_ARGS ?=
+LOG_ARTIFACTS_DIR ?= $(CURDIR)/e2e_logs

-E2E_RUNTIME ?= docker
+ginkgo:
+	mkdir -p $(CURDIR)/bin
+	GOBIN=$(CURDIR)/bin go install github.com/onsi/ginkgo/v2/ginkgo@latest

-E2E_INSTALL_CTK ?= false
-
-ifeq ($($(DIST)),)
-DIST ?= ubuntu20.04
-endif
-IMAGE_TAG ?= $(VERSION)-$(DIST)
-IMAGE = $(IMAGE_NAME):$(IMAGE_TAG)
-
-E2E_SSH_KEY ?=
-E2E_SSH_USER ?=
-E2E_SSH_HOST ?=
-E2E_SSH_PORT ?= 22
-
-.PHONY: test
-test:
-	cd $(CURDIR)/tests/e2e && $(GO_CMD) test -v . -args \
-		-ginkgo.focus="$(E2E_RUNTIME)" \
-		-test.timeout=1h \
-		-ginkgo.v \
-		-install-ctk=$(E2E_INSTALL_CTK) \
-		-toolkit-image=$(IMAGE) \
-		-ssh-key=$(E2E_SSH_KEY) \
-		-ssh-user=$(E2E_SSH_USER) \
-		-remote-host=$(E2E_SSH_HOST) \
-		-remote-port=$(E2E_SSH_PORT)
+test-e2e: ginkgo
+	$(CURDIR)/bin/ginkgo $(GINKGO_ARGS) -v --json-report ginkgo.json ./tests/e2e/...
--- a/tests/e2e/README.md
+++ b/tests/e2e/README.md
@ -0,0 +1,141 @@
+<!--
+SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+SPDX-License-Identifier: Apache-2.0
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+-->
+
+# NVIDIA Container Toolkit – End‑to‑End (E2E) Test Suite
+
+---
+
+## 1  Scope & Goals
+This repository contains a **Ginkgo v2 / Gomega** test harness that exercises an
+NVIDIA Container Toolkit (CTK) installation on a **remote GPU‑enabled host** via
+SSH.  The suite validates that:
+
+1. CTK can be installed (or upgraded) head‑less (`INSTALL_CTK=true`).
+2. The specified **container image** runs successfully under `nvidia-container-runtime`.
+3. Errors and diagnostics are captured for post‑mortem analysis.
+
+The tests are intended for continuous‑integration pipelines, nightly
+compatibility runs, and pre‑release validation of new CTK builds.
+
+---
+
+## 2  Execution model
+* The framework **does not** spin up a Kubernetes cluster; it drives a single
+  host reachable over SSH.
+* All commands run in a Ginkgo‑managed context (`ctx`) so they abort cleanly on
+  timeout or Ctrl‑C.
+* Environment discovery happens once in `TestMain` → `getTestEnv()`; parameters
+  are therefore immutable for the duration of the run.
+
+---
+
+## 3  Prerequisites
+
+| Item | Version / requirement |
+|------|-----------------------|
+| **Go toolchain** | ≥ 1.22 (for building Ginkgo helper binaries) |
+| **GPU‑enabled Linux host** | Running a supported NVIDIA driver; reachable via SSH |
+| **SSH connectivity** | Public‑key authentication *without* pass‑phrase for unattended CI |
+| **Local OS** | Linux/macOS; POSIX shell required by the Makefile |
+
+---
+
+## 4  Environment variables
+
+| Variable | Required | Example | Description |
+|----------|----------|---------|-------------|
+| `INSTALL_CTK` | ✖ | `true` | When `true` the test installs CTK on the remote host before running the image. When `false` it assumes CTK is already present. |
+| `E2E_IMAGE_REPO` | ✔ | `ghcr.io/nvidia/container-toolkit` | Container Toolkit Image  |
+| `E2E_IMAGE_TAG` | ✔ | `latest` | Image tag |
+| `E2E_SSH_KEY` | ✔ | `/home/ci/.ssh/id_rsa` | Private key used for authentication. |
+| `E2E_SSH_USER` | ✔ | `ubuntu` | Username on the remote host. |
+| `REMOTE_HOST` | ✔ | `gpurunner01.corp.local` | Hostname or IP address of the target node. |
+| `REMOTE_PORT` | ✔ | `22` | SSH port of the target node. |
+
+> All variables are validated at start‑up; the suite aborts early with a clear
+> message if any are missing or ill‑formed.
+
+---
+
+## 5  Build helper binaries
+
+Install the latest Ginkgo CLI locally so that the Makefile can invoke it:
+
+```bash
+make ginkgo  # installs ./bin/ginkgo
+```
+
+The Makefile entry mirrors the pattern used in other NVIDIA E2E suites:
+
+```make
+bin/ginkgo:
+	GOBIN=$(CURDIR)/bin go install github.com/onsi/ginkgo/v2/ginkgo@latest
+```
+
+---
+
+## 6  Running the suite
+
+### 6.1  Basic invocation
+```bash
+INSTALL_CTK=true \
+TOOLKIT_IMAGE=nvcr.io/nvidia/cuda:12.4.0-runtime-ubi9 \
+SSH_KEY=$HOME/.ssh/id_rsa \
+SSH_USER=ubuntu \
+REMOTE_HOST=10.0.0.15 \
+REMOTE_PORT=22 \
+make test-e2e
+```
+This downloads the image on the remote host, installs CTK (if requested), and
+executes a minimal CUDA‑based workload.
+
+---
+
+## 7  Internal test flow
+
+| Phase | Key function(s) | Notes |
+|-------|-----------------|-------|
+| **Init** | `TestMain` → `getTestEnv` | Collects env vars, initializes `ctx`. |
+| **Connection check** | `BeforeSuite` (not shown) | Verifies SSH reachability using `ssh -o BatchMode=yes`. |
+| **Optional CTK install** | `installCTK == true` path | Runs the distro‑specific install script on the remote host. |
+| **Runtime validation** | Leaf `It` blocks | Pulls `TOOLKIT_IMAGE`, runs `nvidia-smi` inside the container, asserts exit code `0`. |
+| **Failure diagnostics** | `AfterEach` | Copies `/var/log/nvidia-container-runtime.log` & dmesg to `${LOG_ARTIFACTS_DIR}` via `scp`. |
+
+---
+
+## 8  Extending the suite
+
+1. Create a new `_test.go` file under `tests/e2e`.
+2. Use the Ginkgo DSL (`Describe`, `When`, `It` …). Each leaf node receives a
+   `context.Context` so you can run remote commands with deadline control.
+3. Helper utilities such as `runSSH`, `withSudo`, and `collectLogs` are already
+   available from the shared test harness (see `ssh_helpers.go`).
+4. Keep tests **idempotent** and clean any artefacts you create on the host.
+
+---
+
+## 9  Common issues & fixes
+
+| Symptom | Likely cause | Fix |
+|---------|--------------|-----|
+| `Permission denied (publickey)` | Wrong `SSH_KEY` or `SSH_USER` | Check variables; ensure key is readable by the CI user. |
+| `docker: Error response from daemon: could not select device driver` | CTK not installed or wrong runtime class | Verify `INSTALL_CTK=true` or confirm CTK installation on the host. |
+| Test hangs at image pull | No outbound internet on remote host | Pre‑load the image or use a local registry mirror. |
+
+## 10  License
+Distributed under the terms of the **Apache License 2.0** (see header).
+
--- a/tests/e2e/e2e_test.go
+++ b/tests/e2e/e2e_test.go
@ -1,24 +1,28 @@
 /*
-* Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
-*
-* Licensed under the Apache License, Version 2.0 (the "License");
-* you may not use this file except in compliance with the License.
-* You may obtain a copy of the License at
-*
-*     http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
+ * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
 */

 package e2e

 import (
 	"context"
-	"flag"
+	"os"
+	"path/filepath"
+	"runtime"
+	"strconv"
 	"testing"

 	. "github.com/onsi/ginkgo/v2"
@ -31,27 +35,27 @@ var (

 	installCTK bool

-	image string
+	ImageRepo string
+	ImageTag  string

-	sshKey  string
-	sshUser string
-	host    string
-	sshPort string
+	sshKey      string
+	sshUser     string
+	host        string
+	sshPort     string
+	cwd         string
+	packagePath string
+
+	runner Runner
 )

-func init() {
-	flag.BoolVar(&installCTK, "install-ctk", false, "Install the NVIDIA Container Toolkit")
-	flag.StringVar(&image, "toolkit-image", "", "Repository of the image to test")
-	flag.StringVar(&sshKey, "ssh-key", "", "SSH key to use for remote login")
-	flag.StringVar(&sshUser, "ssh-user", "", "SSH user to use for remote login")
-	flag.StringVar(&host, "remote-host", "", "Hostname of the remote machine")
-	flag.StringVar(&sshPort, "remote-port", "22", "SSH port to use for remote login")
-}
-
 func TestMain(t *testing.T) {
-	suiteName := "NVIDIA Container Toolkit E2E"
+	suiteName := "E2E NVIDIA Container Toolkit"

 	RegisterFailHandler(Fail)
+
+	ctx = context.Background()
+	getTestEnv()
+
 	RunSpecs(t,
 		suiteName,
 	)
@ -59,5 +63,89 @@ func TestMain(t *testing.T) {

 // BeforeSuite runs before the test suite
 var _ = BeforeSuite(func() {
-	ctx = context.Background()
+	runner = NewRunner(
+		WithHost(host),
+		WithPort(sshPort),
+		WithSshKey(sshKey),
+		WithSshUser(sshUser),
+	)
+
+	if installCTK {
+		installer, err := NewToolkitInstaller(
+			WithRunner(runner),
+			WithImage(ImageRepo+":"+ImageTag),
+			WithTemplate(dockerInstallTemplate),
+		)
+		Expect(err).ToNot(HaveOccurred())
+
+		err = installer.Install()
+		Expect(err).ToNot(HaveOccurred())
+	}
+
+	_, _, err := runner.Run("docker pull ubuntu")
+	Expect(err).ToNot(HaveOccurred())
+
+	_, _, err = runner.Run("docker pull nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
+	Expect(err).ToNot(HaveOccurred())
+
+	_, _, err = runner.Run("docker pull nvcr.io/nvidia/cuda:12.8.0-base-ubi8")
+	Expect(err).ToNot(HaveOccurred())
 })
+
+// getTestEnv gets the test environment variables
+func getTestEnv() {
+	defer GinkgoRecover()
+	var err error
+
+	_, thisFile, _, _ := runtime.Caller(0)
+	packagePath = filepath.Dir(thisFile)
+
+	installCTK = getBoolEnvVar("E2E_INSTALL_CTK", false)
+
+	ImageRepo = os.Getenv("E2E_IMAGE_REPO")
+	Expect(ImageRepo).NotTo(BeEmpty(), "E2E_IMAGE_REPO environment variable must be set")
+
+	ImageTag = os.Getenv("E2E_IMAGE_TAG")
+	Expect(ImageTag).NotTo(BeEmpty(), "E2E_IMAGE_TAG environment variable must be set")
+
+	sshKey = os.Getenv("E2E_SSH_KEY")
+	Expect(sshKey).NotTo(BeEmpty(), "E2E_SSH_KEY environment variable must be set")
+
+	sshUser = os.Getenv("E2E_SSH_USER")
+	Expect(sshUser).NotTo(BeEmpty(), "SSH_USER environment variable must be set")
+
+	host = os.Getenv("E2E_SSH_HOST")
+	Expect(host).NotTo(BeEmpty(), "REMOTE_HOST environment variable must be set")
+
+	sshPort = getIntEnvVar("E2E_SSH_PORT", 22)
+
+	// Get current working directory
+	cwd, err = os.Getwd()
+	Expect(err).NotTo(HaveOccurred())
+}
+
+// getBoolEnvVar returns the boolean value of the environment variable or the default value if not set.
+func getBoolEnvVar(key string, defaultValue bool) bool {
+	value := os.Getenv(key)
+	if value == "" {
+		return defaultValue
+	}
+	boolValue, err := strconv.ParseBool(value)
+	if err != nil {
+		return defaultValue
+	}
+	return boolValue
+}
+
+// getIntEnvVar returns the integer value of the environment variable or the default value if not set.
+func getIntEnvVar(key string, defaultValue int) string {
+	value := os.Getenv(key)
+	if value == "" {
+		return strconv.Itoa(defaultValue)
+	}
+	intValue, err := strconv.Atoi(value)
+	if err != nil {
+		return strconv.Itoa(defaultValue)
+	}
+	return strconv.Itoa(intValue)
+}
--- a/tests/e2e/installer.go
+++ b/tests/e2e/installer.go
@ -1,19 +1,19 @@
 /*
-* Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
-*
-* Licensed under the Apache License, Version 2.0 (the "License");
-* you may not use this file except in compliance with the License.
-* You may obtain a copy of the License at
-*
-*     http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
+ * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
 */
-
 package e2e

 import (
--- a/tests/e2e/nvidia-container-toolkit_test.go
+++ b/tests/e2e/nvidia-container-toolkit_test.go
@ -1,5 +1,6 @@
 /*
- * Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@ -27,69 +28,50 @@ import (

 // Integration tests for Docker runtime
 var _ = Describe("docker", Ordered, ContinueOnFailure, func() {
-	var r Runner
-
-	// Install the NVIDIA Container Toolkit
-	BeforeAll(func(ctx context.Context) {
-		r = NewRunner(
-			WithHost(host),
-			WithPort(sshPort),
-			WithSshKey(sshKey),
-			WithSshUser(sshUser),
-		)
-		if installCTK {
-			installer, err := NewToolkitInstaller(
-				WithRunner(r),
-				WithImage(image),
-				WithTemplate(dockerInstallTemplate),
-			)
-			Expect(err).ToNot(HaveOccurred())
-			err = installer.Install()
-			Expect(err).ToNot(HaveOccurred())
-		}
-	})
-
 	// GPUs are accessible in a container: Running nvidia-smi -L inside the
 	// container shows the same output inside the container as outside the
 	// container. This means that the following commands must all produce
 	// the same output
 	When("running nvidia-smi -L", Ordered, func() {
 		var hostOutput string
+		var err error

 		BeforeAll(func(ctx context.Context) {
-			_, _, err := r.Run("docker pull ubuntu")
-			Expect(err).ToNot(HaveOccurred())
-
-			hostOutput, _, err = r.Run("nvidia-smi -L")
+			hostOutput, _, err = runner.Run("nvidia-smi -L")
 			Expect(err).ToNot(HaveOccurred())
 		})

 		It("should support NVIDIA_VISIBLE_DEVICES", func(ctx context.Context) {
-			containerOutput, _, err := r.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all ubuntu nvidia-smi -L")
+			By("Running docker run with --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all")
+			containerOutput, _, err := runner.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all ubuntu nvidia-smi -L")
 			Expect(err).ToNot(HaveOccurred())
 			Expect(containerOutput).To(Equal(hostOutput))
 		})

 		It("should support automatic CDI spec generation", func(ctx context.Context) {
-			containerOutput, _, err := r.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all ubuntu nvidia-smi -L")
+			By("Running docker run with --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all")
+			containerOutput, _, err := runner.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all ubuntu nvidia-smi -L")
 			Expect(err).ToNot(HaveOccurred())
 			Expect(containerOutput).To(Equal(hostOutput))
 		})

 		It("should support automatic CDI spec generation with the --gpus flag", func(ctx context.Context) {
-			containerOutput, _, err := r.Run("docker run --rm -i --gpus=all --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all ubuntu nvidia-smi -L")
+			By("Running docker run with --gpus=all --runtime=nvidia --gpus all")
+			containerOutput, _, err := runner.Run("docker run --rm -i --gpus=all --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all ubuntu nvidia-smi -L")
 			Expect(err).ToNot(HaveOccurred())
 			Expect(containerOutput).To(Equal(hostOutput))
 		})

 		It("should support the --gpus flag using the nvidia-container-runtime", func(ctx context.Context) {
-			containerOutput, _, err := r.Run("docker run --rm -i --runtime=nvidia --gpus all ubuntu nvidia-smi -L")
+			By("Running docker run with --runtime=nvidia --gpus all")
+			containerOutput, _, err := runner.Run("docker run --rm -i --runtime=nvidia --gpus all ubuntu nvidia-smi -L")
 			Expect(err).ToNot(HaveOccurred())
 			Expect(containerOutput).To(Equal(hostOutput))
 		})

 		It("should support the --gpus flag using the nvidia-container-runtime-hook", func(ctx context.Context) {
-			containerOutput, _, err := r.Run("docker run --rm -i --gpus all ubuntu nvidia-smi -L")
+			By("Running docker run with --gpus all")
+			containerOutput, _, err := runner.Run("docker run --rm -i --gpus all ubuntu nvidia-smi -L")
 			Expect(err).ToNot(HaveOccurred())
 			Expect(containerOutput).To(Equal(hostOutput))
 		})
@ -98,35 +80,34 @@ var _ = Describe("docker", Ordered, ContinueOnFailure, func() {
 	// A vectorAdd sample runs in a container with access to all GPUs.
 	// The following should all produce the same result.
 	When("Running the cuda-vectorAdd sample", Ordered, func() {
-		BeforeAll(func(ctx context.Context) {
-			_, _, err := r.Run("docker pull nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
-			Expect(err).ToNot(HaveOccurred())
-		})
-
 		var referenceOutput string

 		It("should support NVIDIA_VISIBLE_DEVICES", func(ctx context.Context) {
+			By("Running docker run with --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all")
 			var err error
-			referenceOutput, _, err = r.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
+			referenceOutput, _, err = runner.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
 			Expect(err).ToNot(HaveOccurred())

 			Expect(referenceOutput).To(ContainSubstring("Test PASSED"))
 		})

 		It("should support automatic CDI spec generation", func(ctx context.Context) {
-			out2, _, err := r.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
+			By("Running docker run with --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all")
+			out2, _, err := runner.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
 			Expect(err).ToNot(HaveOccurred())
 			Expect(referenceOutput).To(Equal(out2))
 		})

 		It("should support the --gpus flag using the nvidia-container-runtime", func(ctx context.Context) {
-			out3, _, err := r.Run("docker run --rm -i --runtime=nvidia --gpus all nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
+			By("Running docker run with --runtime=nvidia --gpus all")
+			out3, _, err := runner.Run("docker run --rm -i --runtime=nvidia --gpus all nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
 			Expect(err).ToNot(HaveOccurred())
 			Expect(referenceOutput).To(Equal(out3))
 		})

 		It("should support the --gpus flag using the nvidia-container-runtime-hook", func(ctx context.Context) {
-			out4, _, err := r.Run("docker run --rm -i --gpus all nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
+			By("Running docker run with --gpus all")
+			out4, _, err := runner.Run("docker run --rm -i --gpus all nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
 			Expect(err).ToNot(HaveOccurred())
 			Expect(referenceOutput).To(Equal(out4))
 		})
@ -136,53 +117,52 @@ var _ = Describe("docker", Ordered, ContinueOnFailure, func() {
 	// The following should all produce the same result.
 	When("Running the cuda-deviceQuery sample", Ordered, func() {
 		BeforeAll(func(ctx context.Context) {
-			_, _, err := r.Run("docker pull nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
+			_, _, err := runner.Run("docker pull nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
 			Expect(err).ToNot(HaveOccurred())
 		})

 		var referenceOutput string

 		It("should support NVIDIA_VISIBLE_DEVICES", func(ctx context.Context) {
+			By("Running docker run with --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all")
 			var err error
-			referenceOutput, _, err = r.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
+			referenceOutput, _, err = runner.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
 			Expect(err).ToNot(HaveOccurred())
-
 			Expect(referenceOutput).To(ContainSubstring("Result = PASS"))
 		})

 		It("should support automatic CDI spec generation", func(ctx context.Context) {
-			out2, _, err := r.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
+			By("Running docker run with --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all")
+			out2, _, err := runner.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
 			Expect(err).ToNot(HaveOccurred())
 			Expect(referenceOutput).To(Equal(out2))
 		})

 		It("should support the --gpus flag using the nvidia-container-runtime", func(ctx context.Context) {
-			out3, _, err := r.Run("docker run --rm -i --runtime=nvidia --gpus all nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
+			By("Running docker run with --runtime=nvidia --gpus all")
+			out3, _, err := runner.Run("docker run --rm -i --runtime=nvidia --gpus all nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
 			Expect(err).ToNot(HaveOccurred())
 			Expect(referenceOutput).To(Equal(out3))
 		})

 		It("should support the --gpus flag using the nvidia-container-runtime-hook", func(ctx context.Context) {
-			out4, _, err := r.Run("docker run --rm -i --gpus all nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
+			By("Running docker run with --gpus all")
+			out4, _, err := runner.Run("docker run --rm -i --gpus all nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
 			Expect(err).ToNot(HaveOccurred())
 			Expect(referenceOutput).To(Equal(out4))
 		})
 	})

-	Describe("CUDA Forward compatibility", Ordered, func() {
+	When("Testing CUDA Forward compatibility", Ordered, func() {
 		BeforeAll(func(ctx context.Context) {
-			_, _, err := r.Run("docker pull nvcr.io/nvidia/cuda:12.8.0-base-ubi8")
-			Expect(err).ToNot(HaveOccurred())
-		})
-
-		BeforeAll(func(ctx context.Context) {
-			compatOutput, _, err := r.Run("docker run --rm -i -e NVIDIA_VISIBLE_DEVICES=void nvcr.io/nvidia/cuda:12.8.0-base-ubi8 bash -c \"ls /usr/local/cuda/compat/libcuda.*.*\"")
+			compatOutput, _, err := runner.Run("docker run --rm -i -e NVIDIA_VISIBLE_DEVICES=void nvcr.io/nvidia/cuda:12.8.0-base-ubi8 bash -c \"ls /usr/local/cuda/compat/libcuda.*.*\"")
 			Expect(err).ToNot(HaveOccurred())
 			Expect(compatOutput).ToNot(BeEmpty())
+
 			compatDriverVersion := strings.TrimPrefix(filepath.Base(compatOutput), "libcuda.so.")
 			compatMajor := strings.SplitN(compatDriverVersion, ".", 2)[0]

-			driverOutput, _, err := r.Run("nvidia-smi -q | grep \"Driver Version\"")
+			driverOutput, _, err := runner.Run("nvidia-smi -q | grep \"Driver Version\"")
 			Expect(err).ToNot(HaveOccurred())
 			parts := strings.SplitN(driverOutput, ":", 2)
 			Expect(parts).To(HaveLen(2))
@ -198,19 +178,22 @@ var _ = Describe("docker", Ordered, ContinueOnFailure, func() {
 		})

 		It("should work with the nvidia runtime in legacy mode", func(ctx context.Context) {
-			ldconfigOut, _, err := r.Run("docker run --rm -i -e NVIDIA_DISABLE_REQUIRE=true --runtime=nvidia --gpus all nvcr.io/nvidia/cuda:12.8.0-base-ubi8 bash -c \"ldconfig -p | grep libcuda.so.1\"")
+			By("Running docker run with -e NVIDIA_DISABLE_REQUIRE=true --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all")
+			ldconfigOut, _, err := runner.Run("docker run --rm -i -e NVIDIA_DISABLE_REQUIRE=true --runtime=nvidia --gpus all nvcr.io/nvidia/cuda:12.8.0-base-ubi8 bash -c \"ldconfig -p | grep libcuda.so.1\"")
 			Expect(err).ToNot(HaveOccurred())
 			Expect(ldconfigOut).To(ContainSubstring("/usr/local/cuda/compat"))
 		})

 		It("should work with the nvidia runtime in CDI mode", func(ctx context.Context) {
-			ldconfigOut, _, err := r.Run("docker run --rm -i -e NVIDIA_DISABLE_REQUIRE=true  --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all nvcr.io/nvidia/cuda:12.8.0-base-ubi8 bash -c \"ldconfig -p | grep libcuda.so.1\"")
+			By("Running docker run with -e NVIDIA_DISABLE_REQUIRE=true --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all")
+			ldconfigOut, _, err := runner.Run("docker run --rm -i -e NVIDIA_DISABLE_REQUIRE=true  --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all nvcr.io/nvidia/cuda:12.8.0-base-ubi8 bash -c \"ldconfig -p | grep libcuda.so.1\"")
 			Expect(err).ToNot(HaveOccurred())
 			Expect(ldconfigOut).To(ContainSubstring("/usr/local/cuda/compat"))
 		})

 		It("should NOT work with nvidia-container-runtime-hook", func(ctx context.Context) {
-			ldconfigOut, _, err := r.Run("docker run --rm -i -e NVIDIA_DISABLE_REQUIRE=true --runtime=runc --gpus all nvcr.io/nvidia/cuda:12.8.0-base-ubi8 bash -c \"ldconfig -p | grep libcuda.so.1\"")
+			By("Running docker run with -e NVIDIA_DISABLE_REQUIRE=true --gpus all")
+			ldconfigOut, _, err := runner.Run("docker run --rm -i -e NVIDIA_DISABLE_REQUIRE=true --runtime=runc --gpus all nvcr.io/nvidia/cuda:12.8.0-base-ubi8 bash -c \"ldconfig -p | grep libcuda.so.1\"")
 			Expect(err).ToNot(HaveOccurred())
 			Expect(ldconfigOut).To(ContainSubstring("/usr/lib64"))
 		})
--- a/tests/e2e/runner.go
+++ b/tests/e2e/runner.go
@ -1,17 +1,18 @@
 /*
-* Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
-*
-* Licensed under the Apache License, Version 2.0 (the "License");
-* you may not use this file except in compliance with the License.
-* You may obtain a copy of the License at
-*
-*     http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
+ * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
 */

 package e2e
--- a/tests/go.mod
+++ b/tests/go.mod
@ -1,8 +1,6 @@
 module github.com/NVIDIA/nvidia-container-toolkit/tests

-go 1.23.2
-
-toolchain go1.24.1
+go 1.24.1

 require (
 	github.com/onsi/ginkgo/v2 v2.23.4