diff --git a/cmd/nvidia-ctk/cdi/generate/generate_test.go b/cmd/nvidia-ctk/cdi/generate/generate_test.go index a08ab19c..6f762761 100644 --- a/cmd/nvidia-ctk/cdi/generate/generate_test.go +++ b/cmd/nvidia-ctk/cdi/generate/generate_test.go @@ -111,6 +111,13 @@ containerEdits: - /lib/x86_64-linux-gnu env: - NVIDIA_CTK_DEBUG=false + - hookName: createContainer + path: /usr/bin/nvidia-cdi-hook + args: + - nvidia-cdi-hook + - disable-device-node-modification + env: + - NVIDIA_CTK_DEBUG=false mounts: - hostPath: {{ .driverRoot }}/lib/x86_64-linux-gnu/libcuda.so.999.88.77 containerPath: /lib/x86_64-linux-gnu/libcuda.so.999.88.77 @@ -180,6 +187,13 @@ containerEdits: - /lib/x86_64-linux-gnu env: - NVIDIA_CTK_DEBUG=false + - hookName: createContainer + path: /usr/bin/nvidia-cdi-hook + args: + - nvidia-cdi-hook + - disable-device-node-modification + env: + - NVIDIA_CTK_DEBUG=false mounts: - hostPath: {{ .driverRoot }}/lib/x86_64-linux-gnu/libcuda.so.999.88.77 containerPath: /lib/x86_64-linux-gnu/libcuda.so.999.88.77 @@ -240,6 +254,13 @@ containerEdits: - libcuda.so.1::/lib/x86_64-linux-gnu/libcuda.so env: - NVIDIA_CTK_DEBUG=false + - hookName: createContainer + path: /usr/bin/nvidia-cdi-hook + args: + - nvidia-cdi-hook + - disable-device-node-modification + env: + - NVIDIA_CTK_DEBUG=false mounts: - hostPath: {{ .driverRoot }}/lib/x86_64-linux-gnu/libcuda.so.999.88.77 containerPath: /lib/x86_64-linux-gnu/libcuda.so.999.88.77 diff --git a/internal/discover/hooks.go b/internal/discover/hooks.go index 5e2cdec4..3f2c9ebb 100644 --- a/internal/discover/hooks.go +++ b/internal/discover/hooks.go @@ -35,6 +35,11 @@ const ( ChmodHook = HookName("chmod") // A CreateSymlinksHook is used to create symlinks in the container. CreateSymlinksHook = HookName("create-symlinks") + // DisableDeviceNodeModificationHook refers to the hook used to ensure that + // device nodes are not created by libnvidia-ml.so or nvidia-smi in a + // container. + // Added in v1.17.8 + DisableDeviceNodeModificationHook = HookName("disable-device-node-modification") // An EnableCudaCompatHook is used to enabled CUDA Forward Compatibility. // Added in v1.17.5 EnableCudaCompatHook = HookName("enable-cuda-compat") diff --git a/pkg/nvcdi/api.go b/pkg/nvcdi/api.go index 4ff11e47..fa05029c 100644 --- a/pkg/nvcdi/api.go +++ b/pkg/nvcdi/api.go @@ -46,6 +46,11 @@ const ( // A CreateSymlinksHook is used to create symlinks in the container. CreateSymlinksHook = discover.CreateSymlinksHook + // DisableDeviceNodeModificationHook refers to the hook used to ensure that + // device nodes are not created by libnvidia-ml.so or nvidia-smi in a + // container. + // Added in v1.17.8 + DisableDeviceNodeModificationHook = discover.DisableDeviceNodeModificationHook // An EnableCudaCompatHook is used to enabled CUDA Forward Compatibility. // Added in v1.17.5 EnableCudaCompatHook = discover.EnableCudaCompatHook diff --git a/pkg/nvcdi/driver-nvml.go b/pkg/nvcdi/driver-nvml.go index ff02ac72..764b648a 100644 --- a/pkg/nvcdi/driver-nvml.go +++ b/pkg/nvcdi/driver-nvml.go @@ -113,6 +113,9 @@ func (l *nvcdilib) NewDriverLibraryDiscoverer(version string) (discover.Discover updateLDCache, _ := discover.NewLDCacheUpdateHook(l.logger, libraries, l.hookCreator, l.ldconfigPath) discoverers = append(discoverers, updateLDCache) + disableDeviceNodeModification := l.hookCreator.Create(DisableDeviceNodeModificationHook) + discoverers = append(discoverers, disableDeviceNodeModification) + d := discover.Merge(discoverers...) return d, nil diff --git a/pkg/nvcdi/lib.go b/pkg/nvcdi/lib.go index 409721ef..8d7177fc 100644 --- a/pkg/nvcdi/lib.go +++ b/pkg/nvcdi/lib.go @@ -130,7 +130,7 @@ func New(opts ...Option) (Interface, error) { l.vendor = "management.nvidia.com" } // Management containers in general do not require CUDA Forward compatibility. - l.disabledHooks = append(l.disabledHooks, HookEnableCudaCompat) + l.disabledHooks = append(l.disabledHooks, HookEnableCudaCompat, DisableDeviceNodeModificationHook) lib = (*managementlib)(l) case ModeNvml: lib = (*nvmllib)(l) diff --git a/tests/e2e/nvidia-container-toolkit_test.go b/tests/e2e/nvidia-container-toolkit_test.go index 2b1ef289..9fcee330 100644 --- a/tests/e2e/nvidia-container-toolkit_test.go +++ b/tests/e2e/nvidia-container-toolkit_test.go @@ -216,4 +216,23 @@ var _ = Describe("docker", Ordered, ContinueOnFailure, func() { Expect(ldconfigOut).To(ContainSubstring("/usr/lib64")) }) }) + + Describe("Disabling device node creation", Ordered, func() { + BeforeAll(func(ctx context.Context) { + _, _, err := runner.Run("docker pull ubuntu") + Expect(err).ToNot(HaveOccurred()) + }) + + It("should work with nvidia-container-runtime-hook", func(ctx context.Context) { + output, _, err := runner.Run("docker run --rm -i --runtime=runc --gpus=all ubuntu bash -c \"grep ModifyDeviceFiles: /proc/driver/nvidia/params\"") + Expect(err).ToNot(HaveOccurred()) + Expect(output).To(Equal("ModifyDeviceFiles: 0\n")) + }) + + It("should work with automatic CDI spec generation", func(ctx context.Context) { + output, _, err := runner.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all ubuntu bash -c \"grep ModifyDeviceFiles: /proc/driver/nvidia/params\"") + Expect(err).ToNot(HaveOccurred()) + Expect(output).To(Equal("ModifyDeviceFiles: 0\n")) + }) + }) })