diff --git a/cmd/nvidia-ctk/cdi/generate/device-folder-permissions.go b/cmd/nvidia-ctk/cdi/generate/device-folder-permissions.go index 9359d116..4d180c80 100644 --- a/cmd/nvidia-ctk/cdi/generate/device-folder-permissions.go +++ b/cmd/nvidia-ctk/cdi/generate/device-folder-permissions.go @@ -17,10 +17,7 @@ package generate import ( - "fmt" - "os" "path/filepath" - "strings" "github.com/NVIDIA/nvidia-container-toolkit/internal/discover" "github.com/NVIDIA/nvidia-container-toolkit/internal/lookup" @@ -29,9 +26,9 @@ import ( ) type deviceFolderPermissions struct { - logger *logrus.Logger - root string - foldersByMode map[string][]string + logger *logrus.Logger + root string + folders []string } var _ discover.Discover = (*deviceFolderPermissions)(nil) @@ -39,41 +36,38 @@ var _ discover.Discover = (*deviceFolderPermissions)(nil) // NewDeviceFolderPermissionHookDiscoverer creates a discoverer that can be used to update the permissions for the parent folders of nested device nodes from the specified set of device specs. // This works around an issue with rootless podman when using crun as a low-level runtime. // See https://github.com/containers/crun/issues/1047 -// TODO: This currently assumes `root == ""` +// The nested devices that are applicable to the NVIDIA GPU devices are: +// - DRM devices at /dev/dri/* +// - NVIDIA Caps devices at /dev/nvidia-caps/* func NewDeviceFolderPermissionHookDiscoverer(logger *logrus.Logger, root string, deviceSpecs []specs.Device) (discover.Discover, error) { - var paths []string + var folders []string seen := make(map[string]bool) - for _, device := range deviceSpecs { for _, dn := range device.ContainerEdits.DeviceNodes { - if !strings.HasPrefix(dn.Path, "/dev") { - logger.Warningf("Skipping unexpected device folder path for device %v", dn) + df := filepath.Dir(dn.Path) + if seen[df] { continue } - for df := filepath.Dir(dn.Path); df != "/dev"; df = filepath.Dir(df) { - if seen[df] { - continue - } - paths = append(paths, df) - seen[df] = true + // We only consider the special case paths + if df != "/dev/dri" && df != "/dev/nvidia-caps" { + continue } + folders = append(folders, df) + seen[df] = true + } + if len(folders) == 2 { + break } } - foldersByMode := make(map[string][]string) - for _, p := range paths { - info, err := os.Stat(p) - if err != nil { - return nil, fmt.Errorf("failed to get info for path %v: %v", p, err) - } - mode := fmt.Sprintf("%o", info.Mode().Perm()) - foldersByMode[mode] = append(foldersByMode[mode], p) + if len(folders) == 0 { + return discover.None{}, nil } d := &deviceFolderPermissions{ - logger: logger, - root: root, - foldersByMode: foldersByMode, + logger: logger, + root: root, + folders: folders, } return d, nil @@ -84,31 +78,26 @@ func (d *deviceFolderPermissions) Devices() ([]discover.Device, error) { return nil, nil } -// Hooks returns a set of hooks that sets the file modes of parent folders for device nodes. -// One hook is returned per mode. +// Hooks returns a set of hooks that sets the file mode to 755 of parent folders for nested device nodes. func (d *deviceFolderPermissions) Hooks() ([]discover.Hook, error) { - locator := lookup.NewExecutableLocator(d.logger, d.root) - - var hooks []discover.Hook - for mode, folders := range d.foldersByMode { - args := []string{"--mode", mode} - for _, folder := range folders { - args = append(args, "--path", folder) - } - - hook := discover.CreateNvidiaCTKHook( - d.logger, - locator, - nvidiaCTKExecutable, - nvidiaCTKDefaultFilePath, - "chmod", - args..., - ) - - hooks = append(hooks, hook) + if len(d.folders) == 0 { + return nil, nil } - return hooks, nil + args := []string{"--mode", "755"} + for _, folder := range d.folders { + args = append(args, "--path", folder) + } + hook := discover.CreateNvidiaCTKHook( + d.logger, + lookup.NewExecutableLocator(d.logger, d.root), + nvidiaCTKExecutable, + nvidiaCTKDefaultFilePath, + "chmod", + args..., + ) + + return []discover.Hook{hook}, nil } // Mounts are empty for this discoverer diff --git a/cmd/nvidia-ctk/cdi/generate/generate.go b/cmd/nvidia-ctk/cdi/generate/generate.go index 544f1219..cd4f52f0 100644 --- a/cmd/nvidia-ctk/cdi/generate/generate.go +++ b/cmd/nvidia-ctk/cdi/generate/generate.go @@ -49,6 +49,7 @@ type command struct { type config struct { output string format string + root string } // NewCommand constructs a generate-cdi command with the specified logger @@ -87,6 +88,11 @@ func (m command) build() *cli.Command { Value: formatYAML, Destination: &cfg.format, }, + &cli.StringFlag{ + Name: "root", + Usage: "Specify the root to use when discovering the entities that should be included in the CDI specification.", + Destination: &cfg.root, + }, } return &c @@ -105,7 +111,7 @@ func (m command) validateFlags(r *cli.Context, cfg *config) error { } func (m command) run(c *cli.Context, cfg *config) error { - spec, err := m.generateSpec() + spec, err := m.generateSpec(cfg.root) if err != nil { return fmt.Errorf("failed to generate CDI spec: %v", err) } @@ -184,7 +190,7 @@ func writeToOutput(format string, data []byte, output io.Writer) error { return nil } -func (m command) generateSpec() (*specs.Spec, error) { +func (m command) generateSpec(root string) (*specs.Spec, error) { nvmllib := nvml.New() if r := nvmllib.Init(); r != nvml.SUCCESS { return nil, r @@ -193,7 +199,7 @@ func (m command) generateSpec() (*specs.Spec, error) { devicelib := device.New(device.WithNvml(nvmllib)) - deviceSpecs, err := m.generateDeviceSpecs(devicelib) + deviceSpecs, err := m.generateDeviceSpecs(devicelib, root) if err != nil { return nil, fmt.Errorf("failed to create device CDI specs: %v", err) } @@ -204,7 +210,7 @@ func (m command) generateSpec() (*specs.Spec, error) { allEdits := cdi.ContainerEdits{} - ipcs, err := NewIPCDiscoverer(m.logger, "") + ipcs, err := NewIPCDiscoverer(m.logger, root) if err != nil { return nil, fmt.Errorf("failed to create discoverer for IPC sockets: %v", err) } @@ -220,12 +226,12 @@ func (m command) generateSpec() (*specs.Spec, error) { allEdits.Append(ipcEdits) - common, err := NewCommonDiscoverer(m.logger, "", nvmllib) + common, err := NewCommonDiscoverer(m.logger, root, nvmllib) if err != nil { return nil, fmt.Errorf("failed to create discoverer for common entities: %v", err) } - deviceFolderPermissionHooks, err := NewDeviceFolderPermissionHookDiscoverer(m.logger, "", deviceSpecs) + deviceFolderPermissionHooks, err := NewDeviceFolderPermissionHookDiscoverer(m.logger, root, deviceSpecs) if err != nil { return nil, fmt.Errorf("failed to generated permission hooks for device nodes: %v", err) } @@ -249,7 +255,7 @@ func (m command) generateSpec() (*specs.Spec, error) { return &spec, nil } -func (m command) generateDeviceSpecs(devicelib device.Interface) ([]specs.Device, error) { +func (m command) generateDeviceSpecs(devicelib device.Interface, root string) ([]specs.Device, error) { var deviceSpecs []specs.Device err := devicelib.VisitDevices(func(i int, d device.Device) error { @@ -260,7 +266,7 @@ func (m command) generateDeviceSpecs(devicelib device.Interface) ([]specs.Device if isMigEnabled { return nil } - device, err := NewFullGPUDiscoverer(m.logger, "", d) + device, err := NewFullGPUDiscoverer(m.logger, root, d) if err != nil { return fmt.Errorf("failed to create device: %v", err) }