diff --git a/cmd/nvidia-ctk/cdi/generate/generate_test.go b/cmd/nvidia-ctk/cdi/generate/generate_test.go index 9e9a6a4e..02b9ffaa 100644 --- a/cmd/nvidia-ctk/cdi/generate/generate_test.go +++ b/cmd/nvidia-ctk/cdi/generate/generate_test.go @@ -104,6 +104,11 @@ containerEdits: - update-ldcache - --folder - /lib/x86_64-linux-gnu + - hookName: createContainer + path: /usr/bin/nvidia-cdi-hook + args: + - nvidia-cdi-hook + - disable-device-node-modification mounts: - hostPath: {{ .driverRoot }}/lib/x86_64-linux-gnu/libcuda.so.999.88.77 containerPath: /lib/x86_64-linux-gnu/libcuda.so.999.88.77 diff --git a/pkg/nvcdi/api.go b/pkg/nvcdi/api.go index 2988026f..86b45e25 100644 --- a/pkg/nvcdi/api.go +++ b/pkg/nvcdi/api.go @@ -44,4 +44,7 @@ const ( // HookEnableCudaCompat refers to the hook used to enable CUDA Forward Compatibility. // This was added with v1.17.5 of the NVIDIA Container Toolkit. HookEnableCudaCompat = HookName("enable-cuda-compat") + // HookDisableDeviceNodeModification refers to the hook used to ensure that device nodes + // are not created by nvidia-smi in a container. + HookDisableDeviceNodeModification = HookName("disable-device-node-modification") ) diff --git a/pkg/nvcdi/driver-nvml.go b/pkg/nvcdi/driver-nvml.go index f49f1129..954483df 100644 --- a/pkg/nvcdi/driver-nvml.go +++ b/pkg/nvcdi/driver-nvml.go @@ -115,6 +115,14 @@ func (l *nvcdilib) NewDriverLibraryDiscoverer(version string) (discover.Discover updateLDCache, _ := discover.NewLDCacheUpdateHook(l.logger, libraries, l.nvidiaCDIHookPath, l.ldconfigPath) discoverers = append(discoverers, updateLDCache) + if l.HookIsSupported(HookDisableDeviceNodeModification) { + updateNvidiaParams := discover.CreateNvidiaCDIHook( + l.nvidiaCDIHookPath, + "disable-device-node-modification", + ) + discoverers = append(discoverers, updateNvidiaParams) + } + d := discover.Merge(discoverers...) return d, nil diff --git a/pkg/nvcdi/lib.go b/pkg/nvcdi/lib.go index 8e7653b4..94793a4a 100644 --- a/pkg/nvcdi/lib.go +++ b/pkg/nvcdi/lib.go @@ -146,6 +146,9 @@ func New(opts ...Option) (Interface, error) { } // Management containers in general do not require CUDA Forward compatibility. l.disabledHooks[HookEnableCudaCompat] = true + // For Management containers we allow device node creation: + l.disabledHooks[HookDisableDeviceNodeModification] = true + lib = (*managementlib)(l) case ModeNvml: lib = (*nvmllib)(l) diff --git a/tests/e2e/nvidia-container-toolkit_test.go b/tests/e2e/nvidia-container-toolkit_test.go index 5948014b..815aafee 100644 --- a/tests/e2e/nvidia-container-toolkit_test.go +++ b/tests/e2e/nvidia-container-toolkit_test.go @@ -215,4 +215,23 @@ var _ = Describe("docker", Ordered, ContinueOnFailure, func() { Expect(ldconfigOut).To(ContainSubstring("/usr/lib64")) }) }) + + Describe("Disabling device node creation", Ordered, func() { + BeforeAll(func(ctx context.Context) { + _, _, err := r.Run("docker pull ubuntu") + Expect(err).ToNot(HaveOccurred()) + }) + + It("should work with nvidia-container-runtime-hook", func(ctx context.Context) { + output, _, err := r.Run("docker run --rm -i --runtime=runc --gpus=all ubuntu bash -c \"grep ModifyDeviceFiles: /proc/driver/nvidia/params\"") + Expect(err).ToNot(HaveOccurred()) + Expect(output).To(Equal("ModifyDeviceFiles: 0\n")) + }) + + It("should work with automatic CDI spec generation", func(ctx context.Context) { + output, _, err := r.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all ubuntu bash -c \"grep ModifyDeviceFiles: /proc/driver/nvidia/params\"") + Expect(err).ToNot(HaveOccurred()) + Expect(output).To(Equal("ModifyDeviceFiles: 0\n")) + }) + }) })