Add update-nvidia-params hook to CDI spec
Some checks failed
CI Pipeline / code-scanning (push) Has been cancelled
CI Pipeline / variables (push) Has been cancelled
CI Pipeline / golang (push) Has been cancelled
CI Pipeline / image (push) Has been cancelled
CI Pipeline / e2e-test (push) Has been cancelled

This hook is not added to management specs.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
This commit is contained in:
Evan Lezar 2025-02-13 15:15:32 +01:00
parent 62483b5779
commit fd37ec2f1a
No known key found for this signature in database
6 changed files with 38 additions and 5 deletions

View File

@ -102,11 +102,6 @@ containerEdits:
- update-ldcache
- --folder
- /lib/x86_64-linux-gnu
- hookName: createContainer
path: {{ .toolkitRoot }}/nvidia-cdi-hook
args:
- nvidia-cdi-hook
- update-nvidia-params
mounts:
- hostPath: /host/driver/root/lib/x86_64-linux-gnu/libcuda.so.999.88.77
containerPath: /lib/x86_64-linux-gnu/libcuda.so.999.88.77

View File

@ -104,6 +104,11 @@ containerEdits:
- update-ldcache
- --folder
- /lib/x86_64-linux-gnu
- hookName: createContainer
path: /usr/bin/nvidia-cdi-hook
args:
- nvidia-cdi-hook
- update-nvidia-params
mounts:
- hostPath: {{ .driverRoot }}/lib/x86_64-linux-gnu/libcuda.so.999.88.77
containerPath: /lib/x86_64-linux-gnu/libcuda.so.999.88.77

View File

@ -44,4 +44,7 @@ const (
// HookEnableCudaCompat refers to the hook used to enable CUDA Forward Compatibility.
// This was added with v1.17.5 of the NVIDIA Container Toolkit.
HookEnableCudaCompat = HookName("enable-cuda-compat")
// HookUpdateNvidiaParams refers to the hook used to ensure that device nodes
// are not created by nvidia-smi in a container.
HookUpdateNvidiaParams = HookName("update-nvidia-params")
)

View File

@ -115,6 +115,14 @@ func (l *nvcdilib) NewDriverLibraryDiscoverer(version string) (discover.Discover
updateLDCache, _ := discover.NewLDCacheUpdateHook(l.logger, libraries, l.nvidiaCDIHookPath, l.ldconfigPath)
discoverers = append(discoverers, updateLDCache)
if l.HookIsSupported(HookUpdateNvidiaParams) {
updateNvidiaParams := discover.CreateNvidiaCDIHook(
l.nvidiaCDIHookPath,
"update-nvidia-params",
)
discoverers = append(discoverers, updateNvidiaParams)
}
d := discover.Merge(discoverers...)
return d, nil

View File

@ -146,6 +146,9 @@ func New(opts ...Option) (Interface, error) {
}
// Management containers in general do not require CUDA Forward compatibility.
l.disabledHooks[HookEnableCudaCompat] = true
// For Management containers we allow device node creation:
l.disabledHooks[HookUpdateNvidiaParams] = true
lib = (*managementlib)(l)
case ModeNvml:
lib = (*nvmllib)(l)

View File

@ -215,4 +215,23 @@ var _ = Describe("docker", Ordered, ContinueOnFailure, func() {
Expect(ldconfigOut).To(ContainSubstring("/usr/lib64"))
})
})
Describe("Disabling device node creation", Ordered, func() {
BeforeAll(func(ctx context.Context) {
_, _, err := r.Run("docker pull ubuntu")
Expect(err).ToNot(HaveOccurred())
})
It("should work with nvidia-container-runtime-hook", func(ctx context.Context) {
output, _, err := r.Run("docker run --rm -i --runtime=runc --gpus=all ubuntu bash -c \"grep ModifyDeviceFiles: /proc/driver/nvidia/params\"")
Expect(err).ToNot(HaveOccurred())
Expect(output).To(Equal("ModifyDeviceFiles: 0\n"))
})
It("should work with automatic CDI spec generation", func(ctx context.Context) {
output, _, err := r.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all ubuntu bash -c \"grep ModifyDeviceFiles: /proc/driver/nvidia/params\"")
Expect(err).ToNot(HaveOccurred())
Expect(output).To(Equal("ModifyDeviceFiles: 0\n"))
})
})
})