Merge b3728406e0 into ca061bb4f0

2025-06-26 18:18:24 +00:00 · 2025-04-30 17:42:44 +02:00 · 2025-04-30 17:42:44 +02:00 · b378f08975
commit b378f08975
parent ca061bb4f0 b3728406e0
8 changed files with 349 additions and 150 deletions
--- a/.github/workflows/e2e.yaml
+++ b/.github/workflows/e2e.yaml
@ -70,8 +70,8 @@ jobs:
      - name: Run e2e tests
        env:
-          IMAGE_NAME: ghcr.io/nvidia/container-toolkit
+          E2E_IMAGE_REPO: ghcr.io/nvidia/container-toolkit
-          VERSION: ${{ inputs.version }}
+          E2E_IMAGE_TAG: ${{ inputs.version }}-ubuntu20.04
          SSH_KEY: ${{ secrets.AWS_SSH_KEY }}
          E2E_SSH_USER: ${{ secrets.E2E_SSH_USER }}
          E2E_SSH_HOST: ${{ steps.holodeck_public_dns_name.outputs.result }}
@ -82,7 +82,14 @@ jobs:
          chmod 600 "$e2e_ssh_key"
          export E2E_SSH_KEY="$e2e_ssh_key"
-          make -f tests/e2e/Makefile test
+          make -f tests/e2e/Makefile test-e2e
      - name: Archive Ginkgo logs
        uses: actions/upload-artifact@v4
        with:
          name: ginkgo-logs
          path: ginkgo.json
          retention-days: 15
      - name: Send Slack alert notification
        if: ${{ failure() }}
--- a/tests/e2e/Makefile
+++ b/tests/e2e/Makefile
@ -1,4 +1,5 @@
-# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 
 # SPDX-License-Identifier: Apache-2.0
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@ -12,34 +13,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-GO_CMD ?= go
+.PHONY: test-e2e ginkgo
-include $(CURDIR)/versions.mk
+GINKGO_ARGS ?=
 LOG_ARTIFACTS_DIR ?= $(CURDIR)/e2e_logs
-E2E_RUNTIME ?= docker
+ginkgo:
 	mkdir -p $(CURDIR)/bin
 	GOBIN=$(CURDIR)/bin go install github.com/onsi/ginkgo/v2/ginkgo@latest
-E2E_INSTALL_CTK ?= false
+test-e2e: ginkgo
-
+	$(CURDIR)/bin/ginkgo $(GINKGO_ARGS) -v --json-report ginkgo.json ./tests/e2e/...
 ifeq ($($(DIST)),)
 DIST ?= ubuntu20.04
 endif
 IMAGE_TAG ?= $(VERSION)-$(DIST)
 IMAGE = $(IMAGE_NAME):$(IMAGE_TAG)
 E2E_SSH_KEY ?=
 E2E_SSH_USER ?=
 E2E_SSH_HOST ?=
 E2E_SSH_PORT ?= 22
 .PHONY: test
 test:
 	cd $(CURDIR)/tests/e2e && $(GO_CMD) test -v . -args \
 		-ginkgo.focus="$(E2E_RUNTIME)" \
 		-test.timeout=1h \
 		-ginkgo.v \
 		-install-ctk=$(E2E_INSTALL_CTK) \
 		-toolkit-image=$(IMAGE) \
 		-ssh-key=$(E2E_SSH_KEY) \
 		-ssh-user=$(E2E_SSH_USER) \
 		-remote-host=$(E2E_SSH_HOST) \
 		-remote-port=$(E2E_SSH_PORT)
--- a/tests/e2e/README.md
+++ b/tests/e2e/README.md
@ -0,0 +1,141 @@
 <!--
 SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 SPDX-License-Identifier: Apache-2.0
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 -->
 # NVIDIA Container Toolkit – End‑to‑End (E2E) Test Suite
 ---
 ## 1  Scope & Goals
 This repository contains a **Ginkgo v2 / Gomega** test harness that exercises an
 NVIDIA Container Toolkit (CTK) installation on a **remote GPU‑enabled host** via
 SSH.  The suite validates that:
 1. CTK can be installed (or upgraded) head‑less (`INSTALL_CTK=true`).
 2. The specified **container image** runs successfully under `nvidia-container-runtime`.
 3. Errors and diagnostics are captured for post‑mortem analysis.
 The tests are intended for continuous‑integration pipelines, nightly
 compatibility runs, and pre‑release validation of new CTK builds.
 ---
 ## 2  Execution model
 * The framework **does not** spin up a Kubernetes cluster; it drives a single
  host reachable over SSH.
 * All commands run in a Ginkgo‑managed context (`ctx`) so they abort cleanly on
  timeout or Ctrl‑C.
 * Environment discovery happens once in `TestMain` → `getTestEnv()`; parameters
  are therefore immutable for the duration of the run.
 ---
 ## 3  Prerequisites
 | Item | Version / requirement |
 |------|-----------------------|
 | **Go toolchain** | ≥ 1.22 (for building Ginkgo helper binaries) |
 | **GPU‑enabled Linux host** | Running a supported NVIDIA driver; reachable via SSH |
 | **SSH connectivity** | Public‑key authentication *without* pass‑phrase for unattended CI |
 | **Local OS** | Linux/macOS; POSIX shell required by the Makefile |
 ---
 ## 4  Environment variables
 | Variable | Required | Example | Description |
 |----------|----------|---------|-------------|
 | `INSTALL_CTK` | ✖ | `true` | When `true` the test installs CTK on the remote host before running the image. When `false` it assumes CTK is already present. |
 | `E2E_IMAGE_REPO` | ✔ | `ghcr.io/nvidia/container-toolkit` | Container Toolkit Image  |
 | `E2E_IMAGE_TAG` | ✔ | `latest` | Image tag |
 | `E2E_SSH_KEY` | ✔ | `/home/ci/.ssh/id_rsa` | Private key used for authentication. |
 | `E2E_SSH_USER` | ✔ | `ubuntu` | Username on the remote host. |
 | `REMOTE_HOST` | ✔ | `gpurunner01.corp.local` | Hostname or IP address of the target node. |
 | `REMOTE_PORT` | ✔ | `22` | SSH port of the target node. |
 > All variables are validated at start‑up; the suite aborts early with a clear
 > message if any are missing or ill‑formed.
 ---
 ## 5  Build helper binaries
 Install the latest Ginkgo CLI locally so that the Makefile can invoke it:
 ```bash
 make ginkgo  # installs ./bin/ginkgo
 ```
 The Makefile entry mirrors the pattern used in other NVIDIA E2E suites:
 ```make
 bin/ginkgo:
 	GOBIN=$(CURDIR)/bin go install github.com/onsi/ginkgo/v2/ginkgo@latest
 ```
 ---
 ## 6  Running the suite
 ### 6.1  Basic invocation
 ```bash
 INSTALL_CTK=true \
 TOOLKIT_IMAGE=nvcr.io/nvidia/cuda:12.4.0-runtime-ubi9 \
 SSH_KEY=$HOME/.ssh/id_rsa \
 SSH_USER=ubuntu \
 REMOTE_HOST=10.0.0.15 \
 REMOTE_PORT=22 \
 make test-e2e
 ```
 This downloads the image on the remote host, installs CTK (if requested), and
 executes a minimal CUDA‑based workload.
 ---
 ## 7  Internal test flow
 | Phase | Key function(s) | Notes |
 |-------|-----------------|-------|
 | **Init** | `TestMain` → `getTestEnv` | Collects env vars, initializes `ctx`. |
 | **Connection check** | `BeforeSuite` (not shown) | Verifies SSH reachability using `ssh -o BatchMode=yes`. |
 | **Optional CTK install** | `installCTK == true` path | Runs the distro‑specific install script on the remote host. |
 | **Runtime validation** | Leaf `It` blocks | Pulls `TOOLKIT_IMAGE`, runs `nvidia-smi` inside the container, asserts exit code `0`. |
 | **Failure diagnostics** | `AfterEach` | Copies `/var/log/nvidia-container-runtime.log` & dmesg to `${LOG_ARTIFACTS_DIR}` via `scp`. |
 ---
 ## 8  Extending the suite
 1. Create a new `_test.go` file under `tests/e2e`.
 2. Use the Ginkgo DSL (`Describe`, `When`, `It` …). Each leaf node receives a
   `context.Context` so you can run remote commands with deadline control.
 3. Helper utilities such as `runSSH`, `withSudo`, and `collectLogs` are already
   available from the shared test harness (see `ssh_helpers.go`).
 4. Keep tests **idempotent** and clean any artefacts you create on the host.
 ---
 ## 9  Common issues & fixes
 | Symptom | Likely cause | Fix |
 |---------|--------------|-----|
 | `Permission denied (publickey)` | Wrong `SSH_KEY` or `SSH_USER` | Check variables; ensure key is readable by the CI user. |
 | `docker: Error response from daemon: could not select device driver` | CTK not installed or wrong runtime class | Verify `INSTALL_CTK=true` or confirm CTK installation on the host. |
 | Test hangs at image pull | No outbound internet on remote host | Pre‑load the image or use a local registry mirror. |
 ## 10  License
 Distributed under the terms of the **Apache License 2.0** (see header).
--- a/tests/e2e/e2e_test.go
+++ b/tests/e2e/e2e_test.go
@ -1,5 +1,6 @@
 /*
-* Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@ -18,7 +19,10 @@ package e2e
 import (
 	"context"
-	"flag"
+	"os"
 	"path/filepath"
 	"runtime"
 	"strconv"
 	"testing"
 	. "github.com/onsi/ginkgo/v2"
@ -31,27 +35,27 @@ var (
 	installCTK bool
-	image string
+	ImageRepo string
 	ImageTag  string
 	sshKey      string
 	sshUser     string
 	host        string
 	sshPort     string
 	cwd         string
 	packagePath string
 	runner Runner
 )
 func init() {
 	flag.BoolVar(&installCTK, "install-ctk", false, "Install the NVIDIA Container Toolkit")
 	flag.StringVar(&image, "toolkit-image", "", "Repository of the image to test")
 	flag.StringVar(&sshKey, "ssh-key", "", "SSH key to use for remote login")
 	flag.StringVar(&sshUser, "ssh-user", "", "SSH user to use for remote login")
 	flag.StringVar(&host, "remote-host", "", "Hostname of the remote machine")
 	flag.StringVar(&sshPort, "remote-port", "22", "SSH port to use for remote login")
 }
 func TestMain(t *testing.T) {
-	suiteName := "NVIDIA Container Toolkit E2E"
+	suiteName := "E2E NVIDIA Container Toolkit"
 	RegisterFailHandler(Fail)
 	ctx = context.Background()
 	getTestEnv()
 	RunSpecs(t,
 		suiteName,
 	)
@ -59,5 +63,89 @@ func TestMain(t *testing.T) {
 // BeforeSuite runs before the test suite
 var _ = BeforeSuite(func() {
-	ctx = context.Background()
+	runner = NewRunner(
 		WithHost(host),
 		WithPort(sshPort),
 		WithSshKey(sshKey),
 		WithSshUser(sshUser),
 	)
 	if installCTK {
 		installer, err := NewToolkitInstaller(
 			WithRunner(runner),
 			WithImage(ImageRepo+":"+ImageTag),
 			WithTemplate(dockerInstallTemplate),
 		)
 		Expect(err).ToNot(HaveOccurred())
 		err = installer.Install()
 		Expect(err).ToNot(HaveOccurred())
 	}
 	_, _, err := runner.Run("docker pull ubuntu")
 	Expect(err).ToNot(HaveOccurred())
 	_, _, err = runner.Run("docker pull nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
 	Expect(err).ToNot(HaveOccurred())
 	_, _, err = runner.Run("docker pull nvcr.io/nvidia/cuda:12.8.0-base-ubi8")
 	Expect(err).ToNot(HaveOccurred())
 })
 // getTestEnv gets the test environment variables
 func getTestEnv() {
 	defer GinkgoRecover()
 	var err error
 	_, thisFile, _, _ := runtime.Caller(0)
 	packagePath = filepath.Dir(thisFile)
 	installCTK = getBoolEnvVar("E2E_INSTALL_CTK", false)
 	ImageRepo = os.Getenv("E2E_IMAGE_REPO")
 	Expect(ImageRepo).NotTo(BeEmpty(), "E2E_IMAGE_REPO environment variable must be set")
 	ImageTag = os.Getenv("E2E_IMAGE_TAG")
 	Expect(ImageTag).NotTo(BeEmpty(), "E2E_IMAGE_TAG environment variable must be set")
 	sshKey = os.Getenv("E2E_SSH_KEY")
 	Expect(sshKey).NotTo(BeEmpty(), "E2E_SSH_KEY environment variable must be set")
 	sshUser = os.Getenv("E2E_SSH_USER")
 	Expect(sshUser).NotTo(BeEmpty(), "SSH_USER environment variable must be set")
 	host = os.Getenv("E2E_SSH_HOST")
 	Expect(host).NotTo(BeEmpty(), "REMOTE_HOST environment variable must be set")
 	sshPort = getIntEnvVar("E2E_SSH_PORT", 22)
 	// Get current working directory
 	cwd, err = os.Getwd()
 	Expect(err).NotTo(HaveOccurred())
 }
 // getBoolEnvVar returns the boolean value of the environment variable or the default value if not set.
 func getBoolEnvVar(key string, defaultValue bool) bool {
 	value := os.Getenv(key)
 	if value == "" {
 		return defaultValue
 	}
 	boolValue, err := strconv.ParseBool(value)
 	if err != nil {
 		return defaultValue
 	}
 	return boolValue
 }
 // getIntEnvVar returns the integer value of the environment variable or the default value if not set.
 func getIntEnvVar(key string, defaultValue int) string {
 	value := os.Getenv(key)
 	if value == "" {
 		return strconv.Itoa(defaultValue)
 	}
 	intValue, err := strconv.Atoi(value)
 	if err != nil {
 		return strconv.Itoa(defaultValue)
 	}
 	return strconv.Itoa(intValue)
 }
--- a/tests/e2e/installer.go
+++ b/tests/e2e/installer.go
@ -1,5 +1,6 @@
 /*
-* Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@ -13,7 +14,6 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package e2e
 import (
--- a/tests/e2e/nvidia-container-toolkit_test.go
+++ b/tests/e2e/nvidia-container-toolkit_test.go
@ -1,5 +1,6 @@
 /*
- * Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@ -27,69 +28,50 @@ import (
 // Integration tests for Docker runtime
 var _ = Describe("docker", Ordered, ContinueOnFailure, func() {
 	var r Runner
 	// Install the NVIDIA Container Toolkit
 	BeforeAll(func(ctx context.Context) {
 		r = NewRunner(
 			WithHost(host),
 			WithPort(sshPort),
 			WithSshKey(sshKey),
 			WithSshUser(sshUser),
 		)
 		if installCTK {
 			installer, err := NewToolkitInstaller(
 				WithRunner(r),
 				WithImage(image),
 				WithTemplate(dockerInstallTemplate),
 			)
 			Expect(err).ToNot(HaveOccurred())
 			err = installer.Install()
 			Expect(err).ToNot(HaveOccurred())
 		}
 	})
 	// GPUs are accessible in a container: Running nvidia-smi -L inside the
 	// container shows the same output inside the container as outside the
 	// container. This means that the following commands must all produce
 	// the same output
 	When("running nvidia-smi -L", Ordered, func() {
 		var hostOutput string
 		var err error
 		BeforeAll(func(ctx context.Context) {
-			_, _, err := r.Run("docker pull ubuntu")
+			hostOutput, _, err = runner.Run("nvidia-smi -L")
 			Expect(err).ToNot(HaveOccurred())
 			hostOutput, _, err = r.Run("nvidia-smi -L")
 			Expect(err).ToNot(HaveOccurred())
 		})
 		It("should support NVIDIA_VISIBLE_DEVICES", func(ctx context.Context) {
-			containerOutput, _, err := r.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all ubuntu nvidia-smi -L")
+			By("Running docker run with --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all")
 			containerOutput, _, err := runner.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all ubuntu nvidia-smi -L")
 			Expect(err).ToNot(HaveOccurred())
 			Expect(containerOutput).To(Equal(hostOutput))
 		})
 		It("should support automatic CDI spec generation", func(ctx context.Context) {
-			containerOutput, _, err := r.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all ubuntu nvidia-smi -L")
+			By("Running docker run with --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all")
 			containerOutput, _, err := runner.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all ubuntu nvidia-smi -L")
 			Expect(err).ToNot(HaveOccurred())
 			Expect(containerOutput).To(Equal(hostOutput))
 		})
 		It("should support automatic CDI spec generation with the --gpus flag", func(ctx context.Context) {
-			containerOutput, _, err := r.Run("docker run --rm -i --gpus=all --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all ubuntu nvidia-smi -L")
+			By("Running docker run with --gpus=all --runtime=nvidia --gpus all")
 			containerOutput, _, err := runner.Run("docker run --rm -i --gpus=all --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all ubuntu nvidia-smi -L")
 			Expect(err).ToNot(HaveOccurred())
 			Expect(containerOutput).To(Equal(hostOutput))
 		})
 		It("should support the --gpus flag using the nvidia-container-runtime", func(ctx context.Context) {
-			containerOutput, _, err := r.Run("docker run --rm -i --runtime=nvidia --gpus all ubuntu nvidia-smi -L")
+			By("Running docker run with --runtime=nvidia --gpus all")
 			containerOutput, _, err := runner.Run("docker run --rm -i --runtime=nvidia --gpus all ubuntu nvidia-smi -L")
 			Expect(err).ToNot(HaveOccurred())
 			Expect(containerOutput).To(Equal(hostOutput))
 		})
 		It("should support the --gpus flag using the nvidia-container-runtime-hook", func(ctx context.Context) {
-			containerOutput, _, err := r.Run("docker run --rm -i --gpus all ubuntu nvidia-smi -L")
+			By("Running docker run with --gpus all")
 			containerOutput, _, err := runner.Run("docker run --rm -i --gpus all ubuntu nvidia-smi -L")
 			Expect(err).ToNot(HaveOccurred())
 			Expect(containerOutput).To(Equal(hostOutput))
 		})
@ -98,35 +80,34 @@ var _ = Describe("docker", Ordered, ContinueOnFailure, func() {
 	// A vectorAdd sample runs in a container with access to all GPUs.
 	// The following should all produce the same result.
 	When("Running the cuda-vectorAdd sample", Ordered, func() {
 		BeforeAll(func(ctx context.Context) {
 			_, _, err := r.Run("docker pull nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
 			Expect(err).ToNot(HaveOccurred())
 		})
 		var referenceOutput string
 		It("should support NVIDIA_VISIBLE_DEVICES", func(ctx context.Context) {
 			By("Running docker run with --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all")
 			var err error
-			referenceOutput, _, err = r.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
+			referenceOutput, _, err = runner.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
 			Expect(err).ToNot(HaveOccurred())
 			Expect(referenceOutput).To(ContainSubstring("Test PASSED"))
 		})
 		It("should support automatic CDI spec generation", func(ctx context.Context) {
-			out2, _, err := r.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
+			By("Running docker run with --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all")
 			out2, _, err := runner.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
 			Expect(err).ToNot(HaveOccurred())
 			Expect(referenceOutput).To(Equal(out2))
 		})
 		It("should support the --gpus flag using the nvidia-container-runtime", func(ctx context.Context) {
-			out3, _, err := r.Run("docker run --rm -i --runtime=nvidia --gpus all nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
+			By("Running docker run with --runtime=nvidia --gpus all")
 			out3, _, err := runner.Run("docker run --rm -i --runtime=nvidia --gpus all nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
 			Expect(err).ToNot(HaveOccurred())
 			Expect(referenceOutput).To(Equal(out3))
 		})
 		It("should support the --gpus flag using the nvidia-container-runtime-hook", func(ctx context.Context) {
-			out4, _, err := r.Run("docker run --rm -i --gpus all nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
+			By("Running docker run with --gpus all")
 			out4, _, err := runner.Run("docker run --rm -i --gpus all nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
 			Expect(err).ToNot(HaveOccurred())
 			Expect(referenceOutput).To(Equal(out4))
 		})
@ -136,53 +117,52 @@ var _ = Describe("docker", Ordered, ContinueOnFailure, func() {
 	// The following should all produce the same result.
 	When("Running the cuda-deviceQuery sample", Ordered, func() {
 		BeforeAll(func(ctx context.Context) {
-			_, _, err := r.Run("docker pull nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
+			_, _, err := runner.Run("docker pull nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
 			Expect(err).ToNot(HaveOccurred())
 		})
 		var referenceOutput string
 		It("should support NVIDIA_VISIBLE_DEVICES", func(ctx context.Context) {
 			By("Running docker run with --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all")
 			var err error
-			referenceOutput, _, err = r.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
+			referenceOutput, _, err = runner.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
 			Expect(err).ToNot(HaveOccurred())
 			Expect(referenceOutput).To(ContainSubstring("Result = PASS"))
 		})
 		It("should support automatic CDI spec generation", func(ctx context.Context) {
-			out2, _, err := r.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
+			By("Running docker run with --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all")
 			out2, _, err := runner.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
 			Expect(err).ToNot(HaveOccurred())
 			Expect(referenceOutput).To(Equal(out2))
 		})
 		It("should support the --gpus flag using the nvidia-container-runtime", func(ctx context.Context) {
-			out3, _, err := r.Run("docker run --rm -i --runtime=nvidia --gpus all nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
+			By("Running docker run with --runtime=nvidia --gpus all")
 			out3, _, err := runner.Run("docker run --rm -i --runtime=nvidia --gpus all nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
 			Expect(err).ToNot(HaveOccurred())
 			Expect(referenceOutput).To(Equal(out3))
 		})
 		It("should support the --gpus flag using the nvidia-container-runtime-hook", func(ctx context.Context) {
-			out4, _, err := r.Run("docker run --rm -i --gpus all nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
+			By("Running docker run with --gpus all")
 			out4, _, err := runner.Run("docker run --rm -i --gpus all nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
 			Expect(err).ToNot(HaveOccurred())
 			Expect(referenceOutput).To(Equal(out4))
 		})
 	})
-	Describe("CUDA Forward compatibility", Ordered, func() {
+	When("Testing CUDA Forward compatibility", Ordered, func() {
 		BeforeAll(func(ctx context.Context) {
-			_, _, err := r.Run("docker pull nvcr.io/nvidia/cuda:12.8.0-base-ubi8")
+			compatOutput, _, err := runner.Run("docker run --rm -i -e NVIDIA_VISIBLE_DEVICES=void nvcr.io/nvidia/cuda:12.8.0-base-ubi8 bash -c \"ls /usr/local/cuda/compat/libcuda.*.*\"")
 			Expect(err).ToNot(HaveOccurred())
 		})
 		BeforeAll(func(ctx context.Context) {
 			compatOutput, _, err := r.Run("docker run --rm -i -e NVIDIA_VISIBLE_DEVICES=void nvcr.io/nvidia/cuda:12.8.0-base-ubi8 bash -c \"ls /usr/local/cuda/compat/libcuda.*.*\"")
 			Expect(err).ToNot(HaveOccurred())
 			Expect(compatOutput).ToNot(BeEmpty())
 			compatDriverVersion := strings.TrimPrefix(filepath.Base(compatOutput), "libcuda.so.")
 			compatMajor := strings.SplitN(compatDriverVersion, ".", 2)[0]
-			driverOutput, _, err := r.Run("nvidia-smi -q | grep \"Driver Version\"")
+			driverOutput, _, err := runner.Run("nvidia-smi -q | grep \"Driver Version\"")
 			Expect(err).ToNot(HaveOccurred())
 			parts := strings.SplitN(driverOutput, ":", 2)
 			Expect(parts).To(HaveLen(2))
@ -198,19 +178,22 @@ var _ = Describe("docker", Ordered, ContinueOnFailure, func() {
 		})
 		It("should work with the nvidia runtime in legacy mode", func(ctx context.Context) {
-			ldconfigOut, _, err := r.Run("docker run --rm -i -e NVIDIA_DISABLE_REQUIRE=true --runtime=nvidia --gpus all nvcr.io/nvidia/cuda:12.8.0-base-ubi8 bash -c \"ldconfig -p | grep libcuda.so.1\"")
+			By("Running docker run with -e NVIDIA_DISABLE_REQUIRE=true --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all")
 			ldconfigOut, _, err := runner.Run("docker run --rm -i -e NVIDIA_DISABLE_REQUIRE=true --runtime=nvidia --gpus all nvcr.io/nvidia/cuda:12.8.0-base-ubi8 bash -c \"ldconfig -p | grep libcuda.so.1\"")
 			Expect(err).ToNot(HaveOccurred())
 			Expect(ldconfigOut).To(ContainSubstring("/usr/local/cuda/compat"))
 		})
 		It("should work with the nvidia runtime in CDI mode", func(ctx context.Context) {
-			ldconfigOut, _, err := r.Run("docker run --rm -i -e NVIDIA_DISABLE_REQUIRE=true  --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all nvcr.io/nvidia/cuda:12.8.0-base-ubi8 bash -c \"ldconfig -p | grep libcuda.so.1\"")
+			By("Running docker run with -e NVIDIA_DISABLE_REQUIRE=true --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all")
 			ldconfigOut, _, err := runner.Run("docker run --rm -i -e NVIDIA_DISABLE_REQUIRE=true  --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all nvcr.io/nvidia/cuda:12.8.0-base-ubi8 bash -c \"ldconfig -p | grep libcuda.so.1\"")
 			Expect(err).ToNot(HaveOccurred())
 			Expect(ldconfigOut).To(ContainSubstring("/usr/local/cuda/compat"))
 		})
 		It("should NOT work with nvidia-container-runtime-hook", func(ctx context.Context) {
-			ldconfigOut, _, err := r.Run("docker run --rm -i -e NVIDIA_DISABLE_REQUIRE=true --runtime=runc --gpus all nvcr.io/nvidia/cuda:12.8.0-base-ubi8 bash -c \"ldconfig -p | grep libcuda.so.1\"")
+			By("Running docker run with -e NVIDIA_DISABLE_REQUIRE=true --gpus all")
 			ldconfigOut, _, err := runner.Run("docker run --rm -i -e NVIDIA_DISABLE_REQUIRE=true --runtime=runc --gpus all nvcr.io/nvidia/cuda:12.8.0-base-ubi8 bash -c \"ldconfig -p | grep libcuda.so.1\"")
 			Expect(err).ToNot(HaveOccurred())
 			Expect(ldconfigOut).To(ContainSubstring("/usr/lib64"))
 		})
--- a/tests/e2e/runner.go
+++ b/tests/e2e/runner.go
@ -1,5 +1,6 @@
 /*
-* Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
--- a/tests/go.mod
+++ b/tests/go.mod
@ -1,8 +1,6 @@
 module github.com/NVIDIA/nvidia-container-toolkit/tests
-go 1.23.2
+go 1.24.1
 toolchain go1.24.1
 require (
 	github.com/onsi/ginkgo/v2 v2.23.4