diff --git a/cmd/nvidia-ctk/cdi/generate/common.go b/cmd/nvidia-ctk/cdi/generate/common.go index 3e6cf944..7775acfb 100644 --- a/cmd/nvidia-ctk/cdi/generate/common.go +++ b/cmd/nvidia-ctk/cdi/generate/common.go @@ -27,7 +27,7 @@ import ( // NewCommonDiscoverer returns a discoverer for entities that are not associated with a specific CDI device. // This includes driver libraries and meta devices, for example. -func NewCommonDiscoverer(logger *logrus.Logger, root string, nvmllib nvml.Interface) (discover.Discover, error) { +func NewCommonDiscoverer(logger *logrus.Logger, root string, nvidiaCTKPath string, nvmllib nvml.Interface) (discover.Discover, error) { metaDevices := discover.NewDeviceDiscoverer( logger, lookup.NewCharDeviceLocator(logger, root), @@ -45,7 +45,7 @@ func NewCommonDiscoverer(logger *logrus.Logger, root string, nvmllib nvml.Interf return nil, fmt.Errorf("error constructing discoverer for graphics mounts: %v", err) } - driverFiles, err := NewDriverDiscoverer(logger, root, nvmllib) + driverFiles, err := NewDriverDiscoverer(logger, root, nvidiaCTKPath, nvmllib) if err != nil { return nil, fmt.Errorf("failed to create discoverer for driver files: %v", err) } diff --git a/cmd/nvidia-ctk/cdi/generate/device-folder-permissions.go b/cmd/nvidia-ctk/cdi/generate/device-folder-permissions.go index 4d180c80..d4602544 100644 --- a/cmd/nvidia-ctk/cdi/generate/device-folder-permissions.go +++ b/cmd/nvidia-ctk/cdi/generate/device-folder-permissions.go @@ -20,15 +20,15 @@ import ( "path/filepath" "github.com/NVIDIA/nvidia-container-toolkit/internal/discover" - "github.com/NVIDIA/nvidia-container-toolkit/internal/lookup" "github.com/container-orchestrated-devices/container-device-interface/specs-go" "github.com/sirupsen/logrus" ) type deviceFolderPermissions struct { - logger *logrus.Logger - root string - folders []string + logger *logrus.Logger + root string + nvidiaCTKPath string + folders []string } var _ discover.Discover = (*deviceFolderPermissions)(nil) @@ -39,7 +39,7 @@ var _ discover.Discover = (*deviceFolderPermissions)(nil) // The nested devices that are applicable to the NVIDIA GPU devices are: // - DRM devices at /dev/dri/* // - NVIDIA Caps devices at /dev/nvidia-caps/* -func NewDeviceFolderPermissionHookDiscoverer(logger *logrus.Logger, root string, deviceSpecs []specs.Device) (discover.Discover, error) { +func NewDeviceFolderPermissionHookDiscoverer(logger *logrus.Logger, root string, nvidiaCTKPath string, deviceSpecs []specs.Device) (discover.Discover, error) { var folders []string seen := make(map[string]bool) for _, device := range deviceSpecs { @@ -65,9 +65,10 @@ func NewDeviceFolderPermissionHookDiscoverer(logger *logrus.Logger, root string, } d := &deviceFolderPermissions{ - logger: logger, - root: root, - folders: folders, + logger: logger, + root: root, + nvidiaCTKPath: nvidiaCTKPath, + folders: folders, } return d, nil @@ -88,11 +89,9 @@ func (d *deviceFolderPermissions) Hooks() ([]discover.Hook, error) { for _, folder := range d.folders { args = append(args, "--path", folder) } + hook := discover.CreateNvidiaCTKHook( - d.logger, - lookup.NewExecutableLocator(d.logger, d.root), - nvidiaCTKExecutable, - nvidiaCTKDefaultFilePath, + d.nvidiaCTKPath, "chmod", args..., ) diff --git a/cmd/nvidia-ctk/cdi/generate/driver.go b/cmd/nvidia-ctk/cdi/generate/driver.go index 0408514c..c89718cb 100644 --- a/cmd/nvidia-ctk/cdi/generate/driver.go +++ b/cmd/nvidia-ctk/cdi/generate/driver.go @@ -29,22 +29,23 @@ import ( ) type driverLibraries struct { - logger *logrus.Logger - root string - libraries []string + logger *logrus.Logger + root string + nvidiaCTKPath string + libraries []string } var _ discover.Discover = (*driverLibraries)(nil) // NewDriverDiscoverer creates a discoverer for the libraries and binaries associated with a driver installation. // The supplied NVML Library is used to query the expected driver version. -func NewDriverDiscoverer(logger *logrus.Logger, root string, nvmllib nvml.Interface) (discover.Discover, error) { +func NewDriverDiscoverer(logger *logrus.Logger, root string, nvidiaCTKPath string, nvmllib nvml.Interface) (discover.Discover, error) { version, r := nvmllib.SystemGetDriverVersion() if r != nvml.SUCCESS { return nil, fmt.Errorf("failed to determine driver version: %v", r) } - libraries, err := NewDriverLibraryDiscoverer(logger, root, version) + libraries, err := NewDriverLibraryDiscoverer(logger, root, nvidiaCTKPath, version) if err != nil { return nil, fmt.Errorf("failed to create discoverer for driver libraries: %v", err) } @@ -63,16 +64,17 @@ func NewDriverDiscoverer(logger *logrus.Logger, root string, nvmllib nvml.Interf } // NewDriverLibraryDiscoverer creates a discoverer for the libraries associated with the specified driver version. -func NewDriverLibraryDiscoverer(logger *logrus.Logger, root string, version string) (discover.Discover, error) { +func NewDriverLibraryDiscoverer(logger *logrus.Logger, root string, nvidiaCTKPath, version string) (discover.Discover, error) { libraries, err := findVersionLibs(logger, root, version) if err != nil { return nil, fmt.Errorf("failed to get libraries for driver version: %v", err) } d := driverLibraries{ - logger: logger, - root: root, - libraries: libraries, + logger: logger, + root: root, + nvidiaCTKPath: nvidiaCTKPath, + libraries: libraries, } return &d, nil @@ -129,13 +131,8 @@ func (d *driverLibraries) Mounts() ([]discover.Mount, error) { // Hooks returns a hook that updates the LDCache for the specified driver library paths. func (d *driverLibraries) Hooks() ([]discover.Hook, error) { - locator := lookup.NewExecutableLocator(d.logger, d.root) - hook := discover.CreateLDCacheUpdateHook( - d.logger, - locator, - nvidiaCTKExecutable, - nvidiaCTKDefaultFilePath, + d.nvidiaCTKPath, d.libraries, ) diff --git a/cmd/nvidia-ctk/cdi/generate/full-gpu.go b/cmd/nvidia-ctk/cdi/generate/full-gpu.go index 71fc7ce1..7b19f246 100644 --- a/cmd/nvidia-ctk/cdi/generate/full-gpu.go +++ b/cmd/nvidia-ctk/cdi/generate/full-gpu.go @@ -31,15 +31,16 @@ import ( // byPathHookDiscoverer discovers the entities required for injecting by-path DRM device links type byPathHookDiscoverer struct { - logger *logrus.Logger - root string - pciBusID string + logger *logrus.Logger + root string + nvidiaCTKPath string + pciBusID string } var _ discover.Discover = (*byPathHookDiscoverer)(nil) // NewFullGPUDiscoverer creates a discoverer for the full GPU defined by the specified device. -func NewFullGPUDiscoverer(logger *logrus.Logger, root string, d device.Device) (discover.Discover, error) { +func NewFullGPUDiscoverer(logger *logrus.Logger, root string, nvidiaCTKPath string, d device.Device) (discover.Discover, error) { // TODO: The functionality to get device paths should be integrated into the go-nvlib/pkg/device.Device interface. // This will allow reuse here and in other code where the paths are queried such as the NVIDIA device plugin. minor, ret := d.GetMinorNumber() @@ -68,9 +69,10 @@ func NewFullGPUDiscoverer(logger *logrus.Logger, root string, d device.Device) ( ) byPathHooks := &byPathHookDiscoverer{ - logger: logger, - root: root, - pciBusID: pciBusID, + logger: logger, + root: root, + nvidiaCTKPath: nvidiaCTKPath, + pciBusID: pciBusID, } dd := discover.Merge( @@ -98,21 +100,18 @@ func (d *byPathHookDiscoverer) Hooks() ([]discover.Hook, error) { return nil, nil } - hookPath := "nvidia-ctk" - args := []string{hookPath, "hook", "create-symlinks"} + var args []string for _, l := range links { args = append(args, "--link", l) } - var hooks []discover.Hook - hook := discover.Hook{ - Lifecycle: "createContainer", - Path: hookPath, - Args: args, - } - hooks = append(hooks, hook) + hook := discover.CreateNvidiaCTKHook( + d.nvidiaCTKPath, + "create-symlinks", + args..., + ) - return hooks, nil + return []discover.Hook{hook}, nil } // Mounts returns an empty slice for a full GPU diff --git a/cmd/nvidia-ctk/cdi/generate/generate.go b/cmd/nvidia-ctk/cdi/generate/generate.go index 23922ba7..cb427b3d 100644 --- a/cmd/nvidia-ctk/cdi/generate/generate.go +++ b/cmd/nvidia-ctk/cdi/generate/generate.go @@ -25,6 +25,7 @@ import ( "github.com/NVIDIA/nvidia-container-toolkit/internal/discover" "github.com/NVIDIA/nvidia-container-toolkit/internal/edits" + "github.com/NVIDIA/nvidia-container-toolkit/internal/lookup" "github.com/container-orchestrated-devices/container-device-interface/pkg/cdi" specs "github.com/container-orchestrated-devices/container-device-interface/specs-go" "github.com/sirupsen/logrus" @@ -47,9 +48,10 @@ type command struct { } type config struct { - output string - format string - root string + output string + format string + root string + nvidiaCTKPath string } // NewCommand constructs a generate-cdi command with the specified logger @@ -93,6 +95,11 @@ func (m command) build() *cli.Command { Usage: "Specify the root to use when discovering the entities that should be included in the CDI specification.", Destination: &cfg.root, }, + &cli.StringFlag{ + Name: "nvidia-ctk-path", + Usage: "Specify the path to use for the nvidia-ctk in the generated CDI specification. If this is left empty, the path will be searched.", + Destination: &cfg.nvidiaCTKPath, + }, } return &c @@ -111,7 +118,7 @@ func (m command) validateFlags(r *cli.Context, cfg *config) error { } func (m command) run(c *cli.Context, cfg *config) error { - spec, err := m.generateSpec(cfg.root) + spec, err := m.generateSpec(cfg.root, cfg.nvidiaCTKPath) if err != nil { return fmt.Errorf("failed to generate CDI spec: %v", err) } @@ -161,6 +168,30 @@ func (m command) run(c *cli.Context, cfg *config) error { return nil } +// findNvidiaCTK locates the nvidia-ctk executable to be used in hooks. +// If an override is specified, this is used instead. +func (m command) findNvidiaCTK(override string) string { + if override != "" { + m.logger.Debugf("Using specified NVIDIA Container Toolkit CLI path %v", override) + return override + } + + lookup := lookup.NewExecutableLocator(m.logger, "") + hookPath := nvidiaCTKDefaultFilePath + targets, err := lookup.Locate(nvidiaCTKExecutable) + if err != nil { + m.logger.Warnf("Failed to locate %v: %v", nvidiaCTKExecutable, err) + } else if len(targets) == 0 { + m.logger.Warnf("%v not found", nvidiaCTKExecutable) + } else { + m.logger.Debugf("Found %v candidates: %v", nvidiaCTKExecutable, targets) + hookPath = targets[0] + } + m.logger.Debugf("Using NVIDIA Container Toolkit CLI path %v", hookPath) + + return hookPath +} + func formatFromFilename(filename string) string { ext := filepath.Ext(filename) switch strings.ToLower(ext) { @@ -190,7 +221,7 @@ func writeToOutput(format string, data []byte, output io.Writer) error { return nil } -func (m command) generateSpec(root string) (*specs.Spec, error) { +func (m command) generateSpec(root string, nvidiaCTKPath string) (*specs.Spec, error) { nvmllib := nvml.New() if r := nvmllib.Init(); r != nvml.SUCCESS { return nil, r @@ -199,7 +230,9 @@ func (m command) generateSpec(root string) (*specs.Spec, error) { devicelib := device.New(device.WithNvml(nvmllib)) - deviceSpecs, err := m.generateDeviceSpecs(devicelib, root) + useNvidiaCTKPath := m.findNvidiaCTK(nvidiaCTKPath) + + deviceSpecs, err := m.generateDeviceSpecs(devicelib, root, useNvidiaCTKPath) if err != nil { return nil, fmt.Errorf("failed to create device CDI specs: %v", err) } @@ -226,12 +259,12 @@ func (m command) generateSpec(root string) (*specs.Spec, error) { allEdits.Append(ipcEdits) - common, err := NewCommonDiscoverer(m.logger, root, nvmllib) + common, err := NewCommonDiscoverer(m.logger, root, nvidiaCTKPath, nvmllib) if err != nil { return nil, fmt.Errorf("failed to create discoverer for common entities: %v", err) } - deviceFolderPermissionHooks, err := NewDeviceFolderPermissionHookDiscoverer(m.logger, root, deviceSpecs) + deviceFolderPermissionHooks, err := NewDeviceFolderPermissionHookDiscoverer(m.logger, root, nvidiaCTKPath, deviceSpecs) if err != nil { return nil, fmt.Errorf("failed to generated permission hooks for device nodes: %v", err) } @@ -255,7 +288,7 @@ func (m command) generateSpec(root string) (*specs.Spec, error) { return &spec, nil } -func (m command) generateDeviceSpecs(devicelib device.Interface, root string) ([]specs.Device, error) { +func (m command) generateDeviceSpecs(devicelib device.Interface, root string, nvidiaCTKPath string) ([]specs.Device, error) { var deviceSpecs []specs.Device err := devicelib.VisitDevices(func(i int, d device.Device) error { @@ -266,7 +299,7 @@ func (m command) generateDeviceSpecs(devicelib device.Interface, root string) ([ if isMigEnabled { return nil } - device, err := NewFullGPUDiscoverer(m.logger, root, d) + device, err := NewFullGPUDiscoverer(m.logger, root, nvidiaCTKPath, d) if err != nil { return fmt.Errorf("failed to create device: %v", err) } diff --git a/internal/discover/ldconfig.go b/internal/discover/ldconfig.go index d42fa4bf..19430041 100644 --- a/internal/discover/ldconfig.go +++ b/internal/discover/ldconfig.go @@ -57,27 +57,21 @@ func (d ldconfig) Hooks() ([]Hook, error) { return nil, fmt.Errorf("failed to discover mounts for ldcache update: %v", err) } h := CreateLDCacheUpdateHook( - d.logger, - d.lookup, d.nvidiaCTKExecutablePath, - nvidiaCTKDefaultFilePath, getLibraryPaths(mounts), ) return []Hook{h}, nil } // CreateLDCacheUpdateHook locates the NVIDIA Container Toolkit CLI and creates a hook for updating the LD Cache -func CreateLDCacheUpdateHook(logger *logrus.Logger, lookup lookup.Locator, executable string, defaultPath string, libraries []string) Hook { +func CreateLDCacheUpdateHook(executable string, libraries []string) Hook { var args []string for _, f := range uniqueFolders(libraries) { args = append(args, "--folder", f) } hook := CreateNvidiaCTKHook( - logger, - lookup, executable, - defaultPath, "update-ldcache", args..., ) @@ -87,23 +81,11 @@ func CreateLDCacheUpdateHook(logger *logrus.Logger, lookup lookup.Locator, execu } // CreateNvidiaCTKHook creates a hook which invokes the NVIDIA Container CLI hook subcommand. -func CreateNvidiaCTKHook(logger *logrus.Logger, lookup lookup.Locator, executable string, defaultPath string, hookName string, additionalArgs ...string) Hook { - hookPath := defaultPath - targets, err := lookup.Locate(executable) - if err != nil { - logger.Warnf("Failed to locate %v: %v", executable, err) - } else if len(targets) == 0 { - logger.Warnf("%v not found", executable) - } else { - logger.Debugf("Found %v candidates: %v", executable, targets) - hookPath = targets[0] - } - logger.Debugf("Using NVIDIA Container Toolkit CLI path %v", hookPath) - +func CreateNvidiaCTKHook(executable string, hookName string, additionalArgs ...string) Hook { return Hook{ Lifecycle: cdi.CreateContainerHook, - Path: hookPath, - Args: append([]string{filepath.Base(hookPath), "hook", hookName}, additionalArgs...), + Path: executable, + Args: append([]string{filepath.Base(executable), "hook", hookName}, additionalArgs...), } } diff --git a/internal/discover/ldconfig_test.go b/internal/discover/ldconfig_test.go index 763aa420..e160e437 100644 --- a/internal/discover/ldconfig_test.go +++ b/internal/discover/ldconfig_test.go @@ -24,12 +24,16 @@ import ( "github.com/stretchr/testify/require" ) +const ( + testNvidiaCTKPath = "/foo/bar/nvidia-ctk" +) + func TestLDCacheUpdateHook(t *testing.T) { logger, _ := testlog.NewNullLogger() cfg := Config{ Root: "/", - NVIDIAContainerToolkitCLIExecutablePath: "/foo/bar/nvidia-ctk", + NVIDIAContainerToolkitCLIExecutablePath: testNvidiaCTKPath, } testCases := []struct { @@ -86,7 +90,7 @@ func TestLDCacheUpdateHook(t *testing.T) { }, } expectedHook := Hook{ - Path: "/usr/bin/nvidia-ctk", + Path: testNvidiaCTKPath, Args: tc.expectedArgs, Lifecycle: "createContainer", } diff --git a/test/release/Makefile b/test/release/Makefile index 386468c1..18f0b156 100644 --- a/test/release/Makefile +++ b/test/release/Makefile @@ -14,7 +14,7 @@ WORKFLOW ?= nvidia-docker -DISTRIBUTIONS := ubuntu18.04 centos8 fedora35 +DISTRIBUTIONS := ubuntu20.04 ubuntu18.04 centos8 fedora35 IMAGE_TARGETS := $(patsubst %,image-%, $(DISTRIBUTIONS)) RUN_TARGETS := $(patsubst %,run-%, $(DISTRIBUTIONS)) @@ -33,6 +33,7 @@ $(IMAGE_TARGETS): image-%: $(DOCKERFILE) $(shell dirname $(DOCKERFILE)) +%-ubuntu20.04: ARCH ?= amd64 %-ubuntu18.04: ARCH ?= amd64 %-centos8: ARCH ?= x86_64 %-fedora35: ARCH ?= x86_64