This commit is contained in:
Carlos Eduardo Arango Gutierrez 2025-05-23 11:12:28 +02:00 committed by GitHub
commit d88b7db098
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 262 additions and 61 deletions

View File

@ -57,6 +57,7 @@ type options struct {
configSearchPaths cli.StringSlice
librarySearchPaths cli.StringSlice
disabledHooks cli.StringSlice
csv struct {
files cli.StringSlice
@ -176,6 +177,13 @@ func (m command) build() *cli.Command {
Usage: "Specify a pattern the CSV mount specifications.",
Destination: &opts.csv.ignorePatterns,
},
&cli.StringSliceFlag{
Name: "disable-hook",
Aliases: []string{"disable-hooks"},
Usage: "Hook to skip when generating the CDI specification. Can be specified multiple times. Can be a comma-separated list of hooks or a single hook name.",
Value: cli.NewStringSlice(),
Destination: &opts.disabledHooks,
},
}
return &c
@ -262,7 +270,7 @@ func (m command) generateSpec(opts *options) (spec.Interface, error) {
deviceNamers = append(deviceNamers, deviceNamer)
}
cdilib, err := nvcdi.New(
initOpts := []nvcdi.Option{
nvcdi.WithLogger(m.logger),
nvcdi.WithDriverRoot(opts.driverRoot),
nvcdi.WithDevRoot(opts.devRoot),
@ -276,7 +284,13 @@ func (m command) generateSpec(opts *options) (spec.Interface, error) {
nvcdi.WithCSVIgnorePatterns(opts.csv.ignorePatterns.Value()),
// We set the following to allow for dependency injection:
nvcdi.WithNvmlLib(opts.nvmllib),
)
}
for _, hook := range opts.disabledHooks.Value() {
initOpts = append(initOpts, nvcdi.WithDisabledHook(hook))
}
cdilib, err := nvcdi.New(initOpts...)
if err != nil {
return nil, fmt.Errorf("failed to create CDI library: %v", err)
}

View File

@ -26,6 +26,7 @@ import (
"github.com/NVIDIA/go-nvml/pkg/nvml/mock/dgxa100"
testlog "github.com/sirupsen/logrus/hooks/test"
"github.com/stretchr/testify/require"
"github.com/urfave/cli/v2"
"github.com/NVIDIA/nvidia-container-toolkit/internal/test"
)
@ -36,6 +37,9 @@ func TestGenerateSpec(t *testing.T) {
require.NoError(t, err)
driverRoot := filepath.Join(moduleRoot, "testdata", "lookup", "rootfs-1")
disableHook1 := cli.NewStringSlice("enable-cuda-compat")
disableHook2 := cli.NewStringSlice("enable-cuda-compat", "update-ldcache")
disableHook3 := cli.NewStringSlice("all")
logger, _ := testlog.NewNullLogger()
testCases := []struct {
@ -113,6 +117,179 @@ containerEdits:
- nodev
- rbind
- rprivate
`,
},
{
description: "disableHooks1",
options: options{
format: "yaml",
mode: "nvml",
vendor: "example.com",
class: "device",
driverRoot: driverRoot,
disabledHooks: *disableHook1,
},
expectedOptions: options{
format: "yaml",
mode: "nvml",
vendor: "example.com",
class: "device",
nvidiaCDIHookPath: "/usr/bin/nvidia-cdi-hook",
driverRoot: driverRoot,
disabledHooks: *disableHook1,
},
expectedSpec: `---
cdiVersion: 0.5.0
kind: example.com/device
devices:
- name: "0"
containerEdits:
deviceNodes:
- path: /dev/nvidia0
hostPath: {{ .driverRoot }}/dev/nvidia0
- name: all
containerEdits:
deviceNodes:
- path: /dev/nvidia0
hostPath: {{ .driverRoot }}/dev/nvidia0
containerEdits:
env:
- NVIDIA_VISIBLE_DEVICES=void
deviceNodes:
- path: /dev/nvidiactl
hostPath: {{ .driverRoot }}/dev/nvidiactl
hooks:
- hookName: createContainer
path: /usr/bin/nvidia-cdi-hook
args:
- nvidia-cdi-hook
- create-symlinks
- --link
- libcuda.so.1::/lib/x86_64-linux-gnu/libcuda.so
- hookName: createContainer
path: /usr/bin/nvidia-cdi-hook
args:
- nvidia-cdi-hook
- update-ldcache
- --folder
- /lib/x86_64-linux-gnu
mounts:
- hostPath: {{ .driverRoot }}/lib/x86_64-linux-gnu/libcuda.so.999.88.77
containerPath: /lib/x86_64-linux-gnu/libcuda.so.999.88.77
options:
- ro
- nosuid
- nodev
- rbind
- rprivate
`,
},
{
description: "disableHooks2",
options: options{
format: "yaml",
mode: "nvml",
vendor: "example.com",
class: "device",
driverRoot: driverRoot,
disabledHooks: *disableHook2,
},
expectedOptions: options{
format: "yaml",
mode: "nvml",
vendor: "example.com",
class: "device",
nvidiaCDIHookPath: "/usr/bin/nvidia-cdi-hook",
driverRoot: driverRoot,
disabledHooks: *disableHook2,
},
expectedSpec: `---
cdiVersion: 0.5.0
kind: example.com/device
devices:
- name: "0"
containerEdits:
deviceNodes:
- path: /dev/nvidia0
hostPath: {{ .driverRoot }}/dev/nvidia0
- name: all
containerEdits:
deviceNodes:
- path: /dev/nvidia0
hostPath: {{ .driverRoot }}/dev/nvidia0
containerEdits:
env:
- NVIDIA_VISIBLE_DEVICES=void
deviceNodes:
- path: /dev/nvidiactl
hostPath: {{ .driverRoot }}/dev/nvidiactl
hooks:
- hookName: createContainer
path: /usr/bin/nvidia-cdi-hook
args:
- nvidia-cdi-hook
- create-symlinks
- --link
- libcuda.so.1::/lib/x86_64-linux-gnu/libcuda.so
mounts:
- hostPath: {{ .driverRoot }}/lib/x86_64-linux-gnu/libcuda.so.999.88.77
containerPath: /lib/x86_64-linux-gnu/libcuda.so.999.88.77
options:
- ro
- nosuid
- nodev
- rbind
- rprivate
`,
},
{
description: "disableHooksAll",
options: options{
format: "yaml",
mode: "nvml",
vendor: "example.com",
class: "device",
driverRoot: driverRoot,
disabledHooks: *disableHook3,
},
expectedOptions: options{
format: "yaml",
mode: "nvml",
vendor: "example.com",
class: "device",
nvidiaCDIHookPath: "/usr/bin/nvidia-cdi-hook",
driverRoot: driverRoot,
disabledHooks: *disableHook3,
},
expectedSpec: `---
cdiVersion: 0.5.0
kind: example.com/device
devices:
- name: "0"
containerEdits:
deviceNodes:
- path: /dev/nvidia0
hostPath: {{ .driverRoot }}/dev/nvidia0
- name: all
containerEdits:
deviceNodes:
- path: /dev/nvidia0
hostPath: {{ .driverRoot }}/dev/nvidia0
containerEdits:
env:
- NVIDIA_VISIBLE_DEVICES=void
deviceNodes:
- path: /dev/nvidiactl
hostPath: {{ .driverRoot }}/dev/nvidiactl
mounts:
- hostPath: {{ .driverRoot }}/lib/x86_64-linux-gnu/libcuda.so.999.88.77
containerPath: /lib/x86_64-linux-gnu/libcuda.so.999.88.77
options:
- ro
- nosuid
- nodev
- rbind
- rprivate
`,
},
}

View File

@ -44,26 +44,66 @@ func (h *Hook) Hooks() ([]Hook, error) {
return []Hook{*h}, nil
}
// Option is a function that configures the nvcdilib
type HookName string
// disabledHooks allows individual hooks to be disabled.
type disabledHooks map[HookName]bool
const (
// HookEnableCudaCompat refers to the hook used to enable CUDA Forward Compatibility.
// This was added with v1.17.5 of the NVIDIA Container Toolkit.
HookEnableCudaCompat = HookName("enable-cuda-compat")
// directory path to be mounted into a container.
HookCreateSymlinks = HookName("create-symlinks")
// HookUpdateLDCache refers to the hook used to Update the dynamic linker
// cache inside the directory path to be mounted into a container.
HookUpdateLDCache = HookName("update-ldcache")
)
// AllHooks maintains a future-proof list of all defined hooks.
var AllHooks = []HookName{
HookEnableCudaCompat,
HookCreateSymlinks,
HookUpdateLDCache,
}
type Option func(*CDIHook)
type CDIHook struct {
nvidiaCDIHookPath string
disabledHooks disabledHooks
}
type HookCreator interface {
Create(string, ...string) *Hook
Create(HookName, ...string) *Hook
}
func NewHookCreator(nvidiaCDIHookPath string) HookCreator {
func WithDisabledHooks(hooks ...HookName) Option {
return func(c *CDIHook) {
for _, hook := range hooks {
c.disabledHooks[hook] = true
}
}
}
func NewHookCreator(nvidiaCDIHookPath string, opts ...Option) HookCreator {
CDIHook := &CDIHook{
nvidiaCDIHookPath: nvidiaCDIHookPath,
disabledHooks: disabledHooks{},
}
for _, opt := range opts {
opt(CDIHook)
}
return CDIHook
}
func (c CDIHook) Create(name string, args ...string) *Hook {
func (c CDIHook) Create(name HookName, args ...string) *Hook {
if c.disabledHooks[name] {
return nil
}
if name == "create-symlinks" {
if len(args) == 0 {
return nil
@ -79,7 +119,7 @@ func (c CDIHook) Create(name string, args ...string) *Hook {
return &Hook{
Lifecycle: cdi.CreateContainerHook,
Path: c.nvidiaCDIHookPath,
Args: append(c.requiredArgs(name), args...),
Args: append(c.requiredArgs(string(name)), args...),
}
}

View File

@ -72,7 +72,7 @@ func createLDCacheUpdateHook(hookCreator HookCreator, ldconfig string, libraries
args = append(args, "--folder", f)
}
return hookCreator.Create("update-ldcache", args...)
return hookCreator.Create(HookUpdateLDCache, args...)
}
// getLibraryPaths extracts the library dirs from the specified mounts

View File

@ -21,6 +21,7 @@ import (
"tags.cncf.io/container-device-interface/pkg/cdi"
"tags.cncf.io/container-device-interface/specs-go"
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/spec"
)
@ -36,12 +37,12 @@ type Interface interface {
GetDeviceSpecsByID(...string) ([]specs.Device, error)
}
// A HookName refers to one of the predefined set of CDI hooks that may be
// included in the generated CDI specification.
type HookName string
// HookName is an alias for the discover.HookName type.
type HookName = discover.HookName
// Aliases for the discover.HookName constants.
const (
// HookEnableCudaCompat refers to the hook used to enable CUDA Forward Compatibility.
// This was added with v1.17.5 of the NVIDIA Container Toolkit.
HookEnableCudaCompat = HookName("enable-cuda-compat")
HookEnableCudaCompat = discover.HookEnableCudaCompat
HookCreateSymlinks = discover.HookCreateSymlinks
HookUpdateLDCache = discover.HookUpdateLDCache
)

View File

@ -106,11 +106,9 @@ func (l *nvcdilib) NewDriverLibraryDiscoverer(version string) (discover.Discover
)
discoverers = append(discoverers, driverDotSoSymlinksDiscoverer)
if l.HookIsSupported(HookEnableCudaCompat) {
// TODO: The following should use the version directly.
cudaCompatLibHookDiscoverer := discover.NewCUDACompatHookDiscoverer(l.logger, l.hookCreator, l.driver)
discoverers = append(discoverers, cudaCompatLibHookDiscoverer)
}
// TODO: The following should use the version directly.
cudaCompatLibHookDiscoverer := discover.NewCUDACompatHookDiscoverer(l.logger, l.hookCreator, l.driver)
discoverers = append(discoverers, cudaCompatLibHookDiscoverer)
updateLDCache, _ := discover.NewLDCacheUpdateHook(l.logger, libraries, l.hookCreator, l.ldconfigPath)
discoverers = append(discoverers, updateLDCache)

View File

@ -135,7 +135,7 @@ func (m nvidiaSMISimlinkHook) Hooks() ([]discover.Hook, error) {
}
link := "/usr/bin/nvidia-smi"
links := []string{fmt.Sprintf("%s::%s", target, link)}
symlinkHook := m.hookCreator.Create("create-symlinks", links...)
symlinkHook := m.hookCreator.Create(HookCreateSymlinks, links...)
return symlinkHook.Hooks()
}

View File

@ -1,30 +0,0 @@
/**
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package nvcdi
// disabledHooks allows individual hooks to be disabled.
type disabledHooks map[HookName]bool
// HookIsSupported checks whether a hook of the specified name is supported.
// Hooks must be explicitly disabled, meaning that if no disabled hooks are
// all hooks are supported.
func (l *nvcdilib) HookIsSupported(h HookName) bool {
if len(l.disabledHooks) == 0 {
return true
}
return !l.disabledHooks[h]
}

View File

@ -56,15 +56,13 @@ type nvcdilib struct {
mergedDeviceOptions []transform.MergedDeviceOption
disabledHooks disabledHooks
disabledHooks []discover.HookName
hookCreator discover.HookCreator
}
// New creates a new nvcdi library
func New(opts ...Option) (Interface, error) {
l := &nvcdilib{
disabledHooks: make(disabledHooks),
}
l := &nvcdilib{}
for _, opt := range opts {
opt(l)
}
@ -81,8 +79,6 @@ func New(opts ...Option) (Interface, error) {
if l.nvidiaCDIHookPath == "" {
l.nvidiaCDIHookPath = "/usr/bin/nvidia-cdi-hook"
}
// create hookCreator
l.hookCreator = discover.NewHookCreator(l.nvidiaCDIHookPath)
if l.driverRoot == "" {
l.driverRoot = "/"
@ -150,7 +146,7 @@ func New(opts ...Option) (Interface, error) {
l.vendor = "management.nvidia.com"
}
// Management containers in general do not require CUDA Forward compatibility.
l.disabledHooks[HookEnableCudaCompat] = true
l.disabledHooks = append(l.disabledHooks, HookEnableCudaCompat)
lib = (*managementlib)(l)
case ModeNvml:
lib = (*nvmllib)(l)
@ -175,6 +171,9 @@ func New(opts ...Option) (Interface, error) {
return nil, fmt.Errorf("unknown mode %q", l.mode)
}
// create hookCreator
l.hookCreator = discover.NewHookCreator(l.nvidiaCDIHookPath, discover.WithDisabledHooks(l.disabledHooks...))
w := wrapper{
Interface: lib,
vendor: l.vendor,

View File

@ -21,6 +21,7 @@ import (
"github.com/NVIDIA/go-nvlib/pkg/nvlib/info"
"github.com/NVIDIA/go-nvml/pkg/nvml"
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/transform"
)
@ -158,11 +159,12 @@ func WithLibrarySearchPaths(paths []string) Option {
// WithDisabledHook allows specific hooks to the disabled.
// This option can be specified multiple times for each hook.
func WithDisabledHook(hook HookName) Option {
func WithDisabledHook[T string | HookName](hook T) Option {
return func(o *nvcdilib) {
if o.disabledHooks == nil {
o.disabledHooks = make(map[HookName]bool)
if hook == "all" {
o.disabledHooks = discover.AllHooks
} else {
o.disabledHooks = append(o.disabledHooks, discover.HookName(hook))
}
o.disabledHooks[hook] = true
}
}