From 179d8655f9b5fce634a66c0e574be83471e0ef0b Mon Sep 17 00:00:00 2001 From: Avi Deitcher Date: Wed, 24 Apr 2024 11:47:45 +0300 Subject: [PATCH] Move nvidia-ctk hook command into own binary This change creates an nvidia-cdi-hook binary for implementing CDI hooks. This allows for these hooks to be separated from the nvidia-ctk command which may, for example, require libnvidia-ml to support other functionality. The nvidia-ctk hook subcommand is maintained as an alias for the time being to allow for existing CDI specifications referring to this path to work as expected. Signed-off-by: Avi Deitcher --- CHANGELOG.md | 2 + cmd/nvidia-cdi-hook/README.md | 31 +++++++ .../hook => nvidia-cdi-hook}/chmod/chmod.go | 0 cmd/nvidia-cdi-hook/commands/commands.go | 36 +++++++ .../create-symlinks/create-symlinks.go | 0 cmd/nvidia-cdi-hook/main.go | 93 +++++++++++++++++++ .../update-ldcache/update-ldcache.go | 0 cmd/nvidia-ctk/cdi/generate/generate.go | 15 +-- cmd/nvidia-ctk/hook/hook.go | 11 +-- internal/config/config.go | 28 +++++- internal/discover/graphics.go | 50 +++++----- internal/discover/hooks.go | 27 ++++-- internal/discover/ldconfig.go | 22 ++--- internal/discover/ldconfig_test.go | 16 ++-- internal/modifier/cdi.go | 2 +- internal/modifier/csv.go | 2 +- internal/modifier/graphics.go | 6 +- internal/platform-support/tegra/csv_test.go | 12 +-- internal/platform-support/tegra/symlinks.go | 12 +-- internal/platform-support/tegra/tegra.go | 10 +- internal/runtime/runtime.go | 1 + .../nvidia-container-toolkit-base.install | 1 + .../rpm/SPECS/nvidia-container-toolkit.spec | 5 +- pkg/nvcdi/common-nvml.go | 4 +- pkg/nvcdi/driver-nvml.go | 12 +-- pkg/nvcdi/driver-wsl.go | 22 ++--- pkg/nvcdi/driver-wsl_test.go | 18 ++-- pkg/nvcdi/full-gpu-nvml.go | 30 +++--- pkg/nvcdi/lib-csv.go | 2 +- pkg/nvcdi/lib-wsl.go | 2 +- pkg/nvcdi/lib.go | 6 +- pkg/nvcdi/management.go | 4 +- pkg/nvcdi/options.go | 9 +- pkg/nvcdi/transform/deduplicate_test.go | 12 +-- pkg/nvcdi/transform/merged-device_test.go | 20 ++-- .../transform/root/container-root_test.go | 36 +++---- pkg/nvcdi/transform/simplify_test.go | 28 +++--- .../workarounds-device-folder-permissions.go | 22 ++--- tools/container/toolkit/toolkit.go | 28 +++++- 39 files changed, 431 insertions(+), 206 deletions(-) create mode 100644 cmd/nvidia-cdi-hook/README.md rename cmd/{nvidia-ctk/hook => nvidia-cdi-hook}/chmod/chmod.go (100%) create mode 100644 cmd/nvidia-cdi-hook/commands/commands.go rename cmd/{nvidia-ctk/hook => nvidia-cdi-hook}/create-symlinks/create-symlinks.go (100%) create mode 100644 cmd/nvidia-cdi-hook/main.go rename cmd/{nvidia-ctk/hook => nvidia-cdi-hook}/update-ldcache/update-ldcache.go (100%) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1b08deca..6250f595 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,7 @@ # NVIDIA Container Toolkit Changelog +* Move `nvidia-ctk hook` commands to a separate `nvidia-cdi-hook` binary. The same subcommands are supported. + ## v1.15.0 * Remove `nvidia-container-runtime` and `nvidia-docker2` packages. diff --git a/cmd/nvidia-cdi-hook/README.md b/cmd/nvidia-cdi-hook/README.md new file mode 100644 index 00000000..807cf6c7 --- /dev/null +++ b/cmd/nvidia-cdi-hook/README.md @@ -0,0 +1,31 @@ +# NVIDIA CDI Hook + +The CLI `nvidia-cdi-hook` provides container device runtime hook capabilities when +called by a container runtime, as specific in a +[Container Device Interface](https://tags.cncf.io/container-device-interface/blob/main/SPEC.md) +file. + +## Generating a CDI + +The CDI itself is created for an NVIDIA-capable device using the +[`nvidia-ctk cdi generate`](../nvidia-ctk/) command. + +When `nvidia-ctk cdi generate` is run, the CDI specification is generated as a yaml file. +The CDI specification provides instructions for a container runtime to set up devices, files and +other resources for the container prior to starting it. Those instructions +may include executing command-line tools to prepare the filesystem. The execution +of such command-line tools is called a hook. + +`nvidia-cdi-hook` is the CLI tool that is expected to be called by the container runtime, +when specified by the CDI file. + +See the [`nvidia-ctk` documentation](../nvidia-ctk/README.md) for more information +on generating a CDI file. + +## Functionality + +The `nvidia-cdi-hook` CLI provides the following functionality: + +* `chmod` - Change the permissions of a file or directory inside the directory path to be mounted into a container. +* `create-symlinks` - Create symlinks inside the directory path to be mounted into a container. +* `update-ldcache` - Update the dynamic linker cache inside the directory path to be mounted into a container. diff --git a/cmd/nvidia-ctk/hook/chmod/chmod.go b/cmd/nvidia-cdi-hook/chmod/chmod.go similarity index 100% rename from cmd/nvidia-ctk/hook/chmod/chmod.go rename to cmd/nvidia-cdi-hook/chmod/chmod.go diff --git a/cmd/nvidia-cdi-hook/commands/commands.go b/cmd/nvidia-cdi-hook/commands/commands.go new file mode 100644 index 00000000..a222acf2 --- /dev/null +++ b/cmd/nvidia-cdi-hook/commands/commands.go @@ -0,0 +1,36 @@ +/** +# Copyright 2024 NVIDIA CORPORATION +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package commands + +import ( + "github.com/urfave/cli/v2" + + "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-cdi-hook/chmod" + symlinks "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-cdi-hook/create-symlinks" + ldcache "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-cdi-hook/update-ldcache" + "github.com/NVIDIA/nvidia-container-toolkit/internal/logger" +) + +// New creates the commands associated with supported CDI hooks. +// These are shared by the nvidia-cdi-hook and nvidia-ctk hook commands. +func New(logger logger.Interface) []*cli.Command { + return []*cli.Command{ + ldcache.NewCommand(logger), + symlinks.NewCommand(logger), + chmod.NewCommand(logger), + } +} diff --git a/cmd/nvidia-ctk/hook/create-symlinks/create-symlinks.go b/cmd/nvidia-cdi-hook/create-symlinks/create-symlinks.go similarity index 100% rename from cmd/nvidia-ctk/hook/create-symlinks/create-symlinks.go rename to cmd/nvidia-cdi-hook/create-symlinks/create-symlinks.go diff --git a/cmd/nvidia-cdi-hook/main.go b/cmd/nvidia-cdi-hook/main.go new file mode 100644 index 00000000..d7b7c271 --- /dev/null +++ b/cmd/nvidia-cdi-hook/main.go @@ -0,0 +1,93 @@ +/** +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package main + +import ( + "os" + + "github.com/sirupsen/logrus" + + "github.com/NVIDIA/nvidia-container-toolkit/internal/info" + + cli "github.com/urfave/cli/v2" + + "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-cdi-hook/commands" +) + +// options defines the options that can be set for the CLI through config files, +// environment variables, or command line flags +type options struct { + // Debug indicates whether the CLI is started in "debug" mode + Debug bool + // Quiet indicates whether the CLI is started in "quiet" mode + Quiet bool +} + +func main() { + logger := logrus.New() + + // Create a options struct to hold the parsed environment variables or command line flags + opts := options{} + + // Create the top-level CLI + c := cli.NewApp() + c.Name = "NVIDIA CDI Hook" + c.UseShortOptionHandling = true + c.EnableBashCompletion = true + c.Usage = "Command to structure files for usage inside a container, called as hooks from a container runtime, defined in a CDI yaml file" + c.Version = info.GetVersionString() + + // Setup the flags for this command + c.Flags = []cli.Flag{ + &cli.BoolFlag{ + Name: "debug", + Aliases: []string{"d"}, + Usage: "Enable debug-level logging", + Destination: &opts.Debug, + EnvVars: []string{"NVIDIA_CDI_DEBUG"}, + }, + &cli.BoolFlag{ + Name: "quiet", + Usage: "Suppress all output except for errors; overrides --debug", + Destination: &opts.Quiet, + EnvVars: []string{"NVIDIA_CDI_QUIET"}, + }, + } + + // Set log-level for all subcommands + c.Before = func(c *cli.Context) error { + logLevel := logrus.InfoLevel + if opts.Debug { + logLevel = logrus.DebugLevel + } + if opts.Quiet { + logLevel = logrus.ErrorLevel + } + logger.SetLevel(logLevel) + return nil + } + + // Define the subcommands + c.Commands = commands.New(logger) + + // Run the CLI + err := c.Run(os.Args) + if err != nil { + logger.Errorf("%v", err) + os.Exit(1) + } +} diff --git a/cmd/nvidia-ctk/hook/update-ldcache/update-ldcache.go b/cmd/nvidia-cdi-hook/update-ldcache/update-ldcache.go similarity index 100% rename from cmd/nvidia-ctk/hook/update-ldcache/update-ldcache.go rename to cmd/nvidia-cdi-hook/update-ldcache/update-ldcache.go diff --git a/cmd/nvidia-ctk/cdi/generate/generate.go b/cmd/nvidia-ctk/cdi/generate/generate.go index 5653b342..9f9e994b 100644 --- a/cmd/nvidia-ctk/cdi/generate/generate.go +++ b/cmd/nvidia-ctk/cdi/generate/generate.go @@ -47,7 +47,7 @@ type options struct { deviceNameStrategies cli.StringSlice driverRoot string devRoot string - nvidiaCTKPath string + nvidiaCDIHookPath string ldconfigPath string mode string vendor string @@ -132,9 +132,12 @@ func (m command) build() *cli.Command { Destination: &opts.librarySearchPaths, }, &cli.StringFlag{ - Name: "nvidia-ctk-path", - Usage: "Specify the path to use for the nvidia-ctk in the generated CDI specification. If this is left empty, the path will be searched.", - Destination: &opts.nvidiaCTKPath, + Name: "nvidia-cdi-hook-path", + Aliases: []string{"nvidia-ctk-path"}, + Usage: "Specify the path to use for the nvidia-cdi-hook in the generated CDI specification. " + + "If not specified, the PATH will be searched for `nvidia-cdi-hook`. " + + "NOTE: That if this is specified as `nvidia-ctk`, the PATH will be searched for `nvidia-ctk` instead.", + Destination: &opts.nvidiaCDIHookPath, }, &cli.StringFlag{ Name: "ldconfig-path", @@ -198,7 +201,7 @@ func (m command) validateFlags(c *cli.Context, opts *options) error { } } - opts.nvidiaCTKPath = config.ResolveNVIDIACTKPath(m.logger, opts.nvidiaCTKPath) + opts.nvidiaCDIHookPath = config.ResolveNVIDIACDIHookPath(m.logger, opts.nvidiaCDIHookPath) if outputFileFormat := formatFromFilename(opts.output); outputFileFormat != "" { m.logger.Debugf("Inferred output format as %q from output file name", outputFileFormat) @@ -262,7 +265,7 @@ func (m command) generateSpec(opts *options) (spec.Interface, error) { nvcdi.WithLogger(m.logger), nvcdi.WithDriverRoot(opts.driverRoot), nvcdi.WithDevRoot(opts.devRoot), - nvcdi.WithNVIDIACTKPath(opts.nvidiaCTKPath), + nvcdi.WithNVIDIACDIHookPath(opts.nvidiaCDIHookPath), nvcdi.WithLdconfigPath(opts.ldconfigPath), nvcdi.WithDeviceNamers(deviceNamers...), nvcdi.WithMode(opts.mode), diff --git a/cmd/nvidia-ctk/hook/hook.go b/cmd/nvidia-ctk/hook/hook.go index 54088988..a638d2a7 100644 --- a/cmd/nvidia-ctk/hook/hook.go +++ b/cmd/nvidia-ctk/hook/hook.go @@ -17,13 +17,10 @@ package hook import ( - chmod "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/hook/chmod" + "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-cdi-hook/commands" "github.com/NVIDIA/nvidia-container-toolkit/internal/logger" "github.com/urfave/cli/v2" - - symlinks "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/hook/create-symlinks" - ldcache "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/hook/update-ldcache" ) type hookCommand struct { @@ -46,11 +43,7 @@ func (m hookCommand) build() *cli.Command { Usage: "A collection of hooks that may be injected into an OCI spec", } - hook.Subcommands = []*cli.Command{ - ldcache.NewCommand(m.logger), - symlinks.NewCommand(m.logger), - chmod.NewCommand(m.logger), - } + hook.Subcommands = commands.New(m.logger) return &hook } diff --git a/internal/config/config.go b/internal/config/config.go index f3114fba..33b8ba4d 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -33,8 +33,9 @@ const ( configOverride = "XDG_CONFIG_HOME" configFilePath = "nvidia-container-runtime/config.toml" - nvidiaCTKExecutable = "nvidia-ctk" - nvidiaCTKDefaultFilePath = "/usr/bin/nvidia-ctk" + nvidiaCTKExecutable = "nvidia-ctk" + nvidiaCTKDefaultFilePath = "/usr/bin/nvidia-ctk" + nvidiaCDIHookDefaultFilePath = "/usr/bin/nvidia-cdi-hook" nvidiaContainerRuntimeHookExecutable = "nvidia-container-runtime-hook" nvidiaContainerRuntimeHookDefaultPath = "/usr/bin/nvidia-container-runtime-hook" @@ -177,6 +178,8 @@ var getDistIDLike = func() []string { // This executable is used in hooks and needs to be an absolute path. // If the path is specified as an absolute path, it is used directly // without checking for existence of an executable at that path. +// +// Deprecated: Use ResolveNVIDIACDIHookPath directly instead. func ResolveNVIDIACTKPath(logger logger.Interface, nvidiaCTKPath string) string { return resolveWithDefault( logger, @@ -186,6 +189,27 @@ func ResolveNVIDIACTKPath(logger logger.Interface, nvidiaCTKPath string) string ) } +// ResolveNVIDIACDIHookPath resolves the path to the nvidia-cdi-hook binary. +// This executable is used in hooks and needs to be an absolute path. +// If the path is specified as an absolute path, it is used directly +// without checking for existence of an executable at that path. +func ResolveNVIDIACDIHookPath(logger logger.Interface, nvidiaCDIHookPath string) string { + if filepath.Base(nvidiaCDIHookPath) == "nvidia-ctk" { + return resolveWithDefault( + logger, + "NVIDIA Container Toolkit CLI", + nvidiaCDIHookPath, + nvidiaCTKDefaultFilePath, + ) + } + return resolveWithDefault( + logger, + "NVIDIA CDI Hook CLI", + nvidiaCDIHookPath, + nvidiaCDIHookDefaultFilePath, + ) +} + // ResolveNVIDIAContainerRuntimeHookPath resolves the path the nvidia-container-runtime-hook binary. func ResolveNVIDIAContainerRuntimeHookPath(logger logger.Interface, nvidiaContainerRuntimeHookPath string) string { return resolveWithDefault( diff --git a/internal/discover/graphics.go b/internal/discover/graphics.go index 35b51982..b7efec8b 100644 --- a/internal/discover/graphics.go +++ b/internal/discover/graphics.go @@ -36,20 +36,20 @@ import ( // TODO: The logic for creating DRM devices should be consolidated between this // and the logic for generating CDI specs for a single device. This is only used // when applying OCI spec modifications to an incoming spec in "legacy" mode. -func NewDRMNodesDiscoverer(logger logger.Interface, devices image.VisibleDevices, devRoot string, nvidiaCTKPath string) (Discover, error) { +func NewDRMNodesDiscoverer(logger logger.Interface, devices image.VisibleDevices, devRoot string, nvidiaCDIHookPath string) (Discover, error) { drmDeviceNodes, err := newDRMDeviceDiscoverer(logger, devices, devRoot) if err != nil { return nil, fmt.Errorf("failed to create DRM device discoverer: %v", err) } - drmByPathSymlinks := newCreateDRMByPathSymlinks(logger, drmDeviceNodes, devRoot, nvidiaCTKPath) + drmByPathSymlinks := newCreateDRMByPathSymlinks(logger, drmDeviceNodes, devRoot, nvidiaCDIHookPath) discover := Merge(drmDeviceNodes, drmByPathSymlinks) return discover, nil } // NewGraphicsMountsDiscoverer creates a discoverer for the mounts required by graphics tools such as vulkan. -func NewGraphicsMountsDiscoverer(logger logger.Interface, driver *root.Driver, nvidiaCTKPath string) (Discover, error) { +func NewGraphicsMountsDiscoverer(logger logger.Interface, driver *root.Driver, nvidiaCDIHookPath string) (Discover, error) { libraries := NewMounts( logger, driver.Libraries(), @@ -74,7 +74,7 @@ func NewGraphicsMountsDiscoverer(logger logger.Interface, driver *root.Driver, n }, ) - xorg := optionalXorgDiscoverer(logger, driver, nvidiaCTKPath) + xorg := optionalXorgDiscoverer(logger, driver, nvidiaCDIHookPath) discover := Merge( libraries, @@ -87,19 +87,19 @@ func NewGraphicsMountsDiscoverer(logger logger.Interface, driver *root.Driver, n type drmDevicesByPath struct { None - logger logger.Interface - nvidiaCTKPath string - devRoot string - devicesFrom Discover + logger logger.Interface + nvidiaCDIHookPath string + devRoot string + devicesFrom Discover } // newCreateDRMByPathSymlinks creates a discoverer for a hook to create the by-path symlinks for DRM devices discovered by the specified devices discoverer -func newCreateDRMByPathSymlinks(logger logger.Interface, devices Discover, devRoot string, nvidiaCTKPath string) Discover { +func newCreateDRMByPathSymlinks(logger logger.Interface, devices Discover, devRoot string, nvidiaCDIHookPath string) Discover { d := drmDevicesByPath{ - logger: logger, - nvidiaCTKPath: nvidiaCTKPath, - devRoot: devRoot, - devicesFrom: devices, + logger: logger, + nvidiaCDIHookPath: nvidiaCDIHookPath, + devRoot: devRoot, + devicesFrom: devices, } return &d @@ -127,8 +127,8 @@ func (d drmDevicesByPath) Hooks() ([]Hook, error) { args = append(args, "--link", l) } - hook := CreateNvidiaCTKHook( - d.nvidiaCTKPath, + hook := CreateNvidiaCDIHook( + d.nvidiaCDIHookPath, "create-symlinks", args..., ) @@ -233,17 +233,17 @@ func newDRMDeviceFilter(devices image.VisibleDevices, devRoot string) (Filter, e } type xorgHooks struct { - libraries Discover - driverVersion string - nvidiaCTKPath string + libraries Discover + driverVersion string + nvidiaCDIHookPath string } var _ Discover = (*xorgHooks)(nil) // optionalXorgDiscoverer creates a discoverer for Xorg libraries. // If the creation of the discoverer fails, a None discoverer is returned. -func optionalXorgDiscoverer(logger logger.Interface, driver *root.Driver, nvidiaCTKPath string) Discover { - xorg, err := newXorgDiscoverer(logger, driver, nvidiaCTKPath) +func optionalXorgDiscoverer(logger logger.Interface, driver *root.Driver, nvidiaCDIHookPath string) Discover { + xorg, err := newXorgDiscoverer(logger, driver, nvidiaCDIHookPath) if err != nil { logger.Warningf("Failed to create Xorg discoverer: %v; skipping xorg libraries", err) return None{} @@ -251,7 +251,7 @@ func optionalXorgDiscoverer(logger logger.Interface, driver *root.Driver, nvidia return xorg } -func newXorgDiscoverer(logger logger.Interface, driver *root.Driver, nvidiaCTKPath string) (Discover, error) { +func newXorgDiscoverer(logger logger.Interface, driver *root.Driver, nvidiaCDIHookPath string) (Discover, error) { libCudaPaths, err := cuda.New( driver.Libraries(), ).Locate(".*.*") @@ -281,9 +281,9 @@ func newXorgDiscoverer(logger logger.Interface, driver *root.Driver, nvidiaCTKPa }, ) xorgHooks := xorgHooks{ - libraries: xorgLibs, - driverVersion: version, - nvidiaCTKPath: nvidiaCTKPath, + libraries: xorgLibs, + driverVersion: version, + nvidiaCDIHookPath: nvidiaCDIHookPath, } xorgConfig := NewMounts( @@ -332,7 +332,7 @@ func (m xorgHooks) Hooks() ([]Hook, error) { link := strings.TrimSuffix(target, "."+m.driverVersion) links := []string{fmt.Sprintf("%s::%s", filepath.Base(target), link)} symlinkHook := CreateCreateSymlinkHook( - m.nvidiaCTKPath, + m.nvidiaCDIHookPath, links, ) diff --git a/internal/discover/hooks.go b/internal/discover/hooks.go index d68ecdbd..4259ccf8 100644 --- a/internal/discover/hooks.go +++ b/internal/discover/hooks.go @@ -41,7 +41,7 @@ func (h Hook) Hooks() ([]Hook, error) { } // CreateCreateSymlinkHook creates a hook which creates a symlink from link -> target. -func CreateCreateSymlinkHook(nvidiaCTKPath string, links []string) Discover { +func CreateCreateSymlinkHook(nvidiaCDIHookPath string, links []string) Discover { if len(links) == 0 { return None{} } @@ -50,18 +50,31 @@ func CreateCreateSymlinkHook(nvidiaCTKPath string, links []string) Discover { for _, link := range links { args = append(args, "--link", link) } - return CreateNvidiaCTKHook( - nvidiaCTKPath, + return CreateNvidiaCDIHook( + nvidiaCDIHookPath, "create-symlinks", args..., ) } -// CreateNvidiaCTKHook creates a hook which invokes the NVIDIA Container CLI hook subcommand. -func CreateNvidiaCTKHook(nvidiaCTKPath string, hookName string, additionalArgs ...string) Hook { +// CreateNvidiaCDIHook creates a hook which invokes the NVIDIA Container CLI hook subcommand. +func CreateNvidiaCDIHook(nvidiaCDIHookPath string, hookName string, additionalArgs ...string) Hook { + return cdiHook(nvidiaCDIHookPath).Create(hookName, additionalArgs...) +} + +type cdiHook string + +func (c cdiHook) Create(name string, args ...string) Hook { return Hook{ Lifecycle: cdi.CreateContainerHook, - Path: nvidiaCTKPath, - Args: append([]string{filepath.Base(nvidiaCTKPath), "hook", hookName}, additionalArgs...), + Path: string(c), + Args: append(c.requiredArgs(name), args...), } } +func (c cdiHook) requiredArgs(name string) []string { + base := filepath.Base(string(c)) + if base == "nvidia-ctk" { + return []string{base, "hook", name} + } + return []string{base, name} +} diff --git a/internal/discover/ldconfig.go b/internal/discover/ldconfig.go index a1182ed2..b81b9be5 100644 --- a/internal/discover/ldconfig.go +++ b/internal/discover/ldconfig.go @@ -25,12 +25,12 @@ import ( ) // NewLDCacheUpdateHook creates a discoverer that updates the ldcache for the specified mounts. A logger can also be specified -func NewLDCacheUpdateHook(logger logger.Interface, mounts Discover, nvidiaCTKPath, ldconfigPath string) (Discover, error) { +func NewLDCacheUpdateHook(logger logger.Interface, mounts Discover, nvidiaCDIHookPath, ldconfigPath string) (Discover, error) { d := ldconfig{ - logger: logger, - nvidiaCTKPath: nvidiaCTKPath, - ldconfigPath: ldconfigPath, - mountsFrom: mounts, + logger: logger, + nvidiaCDIHookPath: nvidiaCDIHookPath, + ldconfigPath: ldconfigPath, + mountsFrom: mounts, } return &d, nil @@ -38,10 +38,10 @@ func NewLDCacheUpdateHook(logger logger.Interface, mounts Discover, nvidiaCTKPat type ldconfig struct { None - logger logger.Interface - nvidiaCTKPath string - ldconfigPath string - mountsFrom Discover + logger logger.Interface + nvidiaCDIHookPath string + ldconfigPath string + mountsFrom Discover } // Hooks checks the required mounts for libraries and returns a hook to update the LDcache for the discovered paths. @@ -51,7 +51,7 @@ func (d ldconfig) Hooks() ([]Hook, error) { return nil, fmt.Errorf("failed to discover mounts for ldcache update: %v", err) } h := CreateLDCacheUpdateHook( - d.nvidiaCTKPath, + d.nvidiaCDIHookPath, d.ldconfigPath, getLibraryPaths(mounts), ) @@ -70,7 +70,7 @@ func CreateLDCacheUpdateHook(executable string, ldconfig string, libraries []str args = append(args, "--folder", f) } - hook := CreateNvidiaCTKHook( + hook := CreateNvidiaCDIHook( executable, "update-ldcache", args..., diff --git a/internal/discover/ldconfig_test.go b/internal/discover/ldconfig_test.go index 612c209e..0b214c77 100644 --- a/internal/discover/ldconfig_test.go +++ b/internal/discover/ldconfig_test.go @@ -25,8 +25,8 @@ import ( ) const ( - testNvidiaCTKPath = "/foo/bar/nvidia-ctk" - testLdconfigPath = "/bar/baz/ldconfig" + testNvidiaCDIHookPath = "/foo/bar/nvidia-cdi-hook" + testLdconfigPath = "/bar/baz/ldconfig" ) func TestLDCacheUpdateHook(t *testing.T) { @@ -42,7 +42,7 @@ func TestLDCacheUpdateHook(t *testing.T) { }{ { description: "empty mounts", - expectedArgs: []string{"nvidia-ctk", "hook", "update-ldcache"}, + expectedArgs: []string{"nvidia-cdi-hook", "update-ldcache"}, }, { description: "mount error", @@ -65,7 +65,7 @@ func TestLDCacheUpdateHook(t *testing.T) { Path: "/usr/local/lib/libbar.so", }, }, - expectedArgs: []string{"nvidia-ctk", "hook", "update-ldcache", "--folder", "/usr/local/lib", "--folder", "/usr/local/libother"}, + expectedArgs: []string{"nvidia-cdi-hook", "update-ldcache", "--folder", "/usr/local/lib", "--folder", "/usr/local/libother"}, }, { description: "host paths are ignored", @@ -75,12 +75,12 @@ func TestLDCacheUpdateHook(t *testing.T) { Path: "/usr/local/lib/libfoo.so", }, }, - expectedArgs: []string{"nvidia-ctk", "hook", "update-ldcache", "--folder", "/usr/local/lib"}, + expectedArgs: []string{"nvidia-cdi-hook", "update-ldcache", "--folder", "/usr/local/lib"}, }, { description: "explicit ldconfig path is passed", ldconfigPath: testLdconfigPath, - expectedArgs: []string{"nvidia-ctk", "hook", "update-ldcache", "--ldconfig-path", testLdconfigPath}, + expectedArgs: []string{"nvidia-cdi-hook", "update-ldcache", "--ldconfig-path", testLdconfigPath}, }, } @@ -92,12 +92,12 @@ func TestLDCacheUpdateHook(t *testing.T) { }, } expectedHook := Hook{ - Path: testNvidiaCTKPath, + Path: testNvidiaCDIHookPath, Args: tc.expectedArgs, Lifecycle: "createContainer", } - d, err := NewLDCacheUpdateHook(logger, mountMock, testNvidiaCTKPath, tc.ldconfigPath) + d, err := NewLDCacheUpdateHook(logger, mountMock, testNvidiaCDIHookPath, tc.ldconfigPath) require.NoError(t, err) hooks, err := d.Hooks() diff --git a/internal/modifier/cdi.go b/internal/modifier/cdi.go index c53cb996..c5af4f88 100644 --- a/internal/modifier/cdi.go +++ b/internal/modifier/cdi.go @@ -185,7 +185,7 @@ func newAutomaticCDISpecModifier(logger logger.Interface, cfg *config.Config, de func generateAutomaticCDISpec(logger logger.Interface, cfg *config.Config, devices []string) (spec.Interface, error) { cdilib, err := nvcdi.New( nvcdi.WithLogger(logger), - nvcdi.WithNVIDIACTKPath(cfg.NVIDIACTKConfig.Path), + nvcdi.WithNVIDIACDIHookPath(cfg.NVIDIACTKConfig.Path), nvcdi.WithDriverRoot(cfg.NVIDIAContainerCLIConfig.Root), nvcdi.WithVendor("runtime.nvidia.com"), nvcdi.WithClass("gpu"), diff --git a/internal/modifier/csv.go b/internal/modifier/csv.go index 54304428..0905d5da 100644 --- a/internal/modifier/csv.go +++ b/internal/modifier/csv.go @@ -62,7 +62,7 @@ func NewCSVModifier(logger logger.Interface, cfg *config.Config, image image.CUD cdilib, err := nvcdi.New( nvcdi.WithLogger(logger), nvcdi.WithDriverRoot(cfg.NVIDIAContainerCLIConfig.Root), - nvcdi.WithNVIDIACTKPath(cfg.NVIDIACTKConfig.Path), + nvcdi.WithNVIDIACDIHookPath(cfg.NVIDIACTKConfig.Path), nvcdi.WithMode(nvcdi.ModeCSV), nvcdi.WithCSVFiles(csvFiles), ) diff --git a/internal/modifier/graphics.go b/internal/modifier/graphics.go index 1f3f2c48..d9784d5e 100644 --- a/internal/modifier/graphics.go +++ b/internal/modifier/graphics.go @@ -35,12 +35,12 @@ func NewGraphicsModifier(logger logger.Interface, cfg *config.Config, image imag return nil, nil } - nvidiaCTKPath := cfg.NVIDIACTKConfig.Path + nvidiaCDIHookPath := cfg.NVIDIACTKConfig.Path mounts, err := discover.NewGraphicsMountsDiscoverer( logger, driver, - nvidiaCTKPath, + nvidiaCDIHookPath, ) if err != nil { return nil, fmt.Errorf("failed to create mounts discoverer: %v", err) @@ -52,7 +52,7 @@ func NewGraphicsModifier(logger logger.Interface, cfg *config.Config, image imag logger, image.DevicesFromEnvvars(visibleDevicesEnvvar), devRoot, - nvidiaCTKPath, + nvidiaCDIHookPath, ) if err != nil { return nil, fmt.Errorf("failed to construct discoverer: %v", err) diff --git a/internal/platform-support/tegra/csv_test.go b/internal/platform-support/tegra/csv_test.go index 69ea1e53..2e8e42fe 100644 --- a/internal/platform-support/tegra/csv_test.go +++ b/internal/platform-support/tegra/csv_test.go @@ -90,10 +90,9 @@ func TestDiscovererFromCSVFiles(t *testing.T) { expectedHooks: []discover.Hook{ { Lifecycle: "createContainer", - Path: "/usr/bin/nvidia-ctk", + Path: "/usr/bin/nvidia-cdi-hook", Args: []string{ - "nvidia-ctk", - "hook", + "nvidia-cdi-hook", "create-symlinks", "--link", "/usr/lib/aarch64-linux-gnu/tegra/libv4l2_nvargus.so::/usr/lib/aarch64-linux-gnu/libv4l/plugins/nv/libv4l2_nvargus.so", @@ -147,10 +146,9 @@ func TestDiscovererFromCSVFiles(t *testing.T) { expectedHooks: []discover.Hook{ { Lifecycle: "createContainer", - Path: "/usr/bin/nvidia-ctk", + Path: "/usr/bin/nvidia-cdi-hook", Args: []string{ - "nvidia-ctk", - "hook", + "nvidia-cdi-hook", "create-symlinks", "--link", "/usr/lib/aarch64-linux-gnu/tegra/libv4l2_nvargus.so::/usr/lib/aarch64-linux-gnu/libv4l/plugins/nv/libv4l2_nvargus.so", @@ -189,7 +187,7 @@ func TestDiscovererFromCSVFiles(t *testing.T) { o := tegraOptions{ logger: logger, - nvidiaCTKPath: "/usr/bin/nvidia-ctk", + nvidiaCDIHookPath: "/usr/bin/nvidia-cdi-hook", csvFiles: []string{"dummy"}, ignorePatterns: tc.ignorePatterns, symlinkLocator: tc.symlinkLocator, diff --git a/internal/platform-support/tegra/symlinks.go b/internal/platform-support/tegra/symlinks.go index 283b2f4d..37b07e6d 100644 --- a/internal/platform-support/tegra/symlinks.go +++ b/internal/platform-support/tegra/symlinks.go @@ -28,10 +28,10 @@ import ( type symlinkHook struct { discover.None - logger logger.Interface - nvidiaCTKPath string - targets []string - mountsFrom discover.Discover + logger logger.Interface + nvidiaCDIHookPath string + targets []string + mountsFrom discover.Discover // The following can be overridden for testing symlinkChainLocator lookup.Locator @@ -42,7 +42,7 @@ type symlinkHook struct { func (o tegraOptions) createCSVSymlinkHooks(targets []string, mounts discover.Discover) discover.Discover { return symlinkHook{ logger: o.logger, - nvidiaCTKPath: o.nvidiaCTKPath, + nvidiaCDIHookPath: o.nvidiaCDIHookPath, targets: targets, mountsFrom: mounts, symlinkChainLocator: o.symlinkChainLocator, @@ -60,7 +60,7 @@ func (d symlinkHook) Hooks() ([]discover.Hook, error) { csvSymlinks := d.getCSVFileSymlinks() return discover.CreateCreateSymlinkHook( - d.nvidiaCTKPath, + d.nvidiaCDIHookPath, append(csvSymlinks, specificLinks...), ).Hooks() } diff --git a/internal/platform-support/tegra/tegra.go b/internal/platform-support/tegra/tegra.go index 771b31f2..1031fc72 100644 --- a/internal/platform-support/tegra/tegra.go +++ b/internal/platform-support/tegra/tegra.go @@ -30,7 +30,7 @@ type tegraOptions struct { csvFiles []string driverRoot string devRoot string - nvidiaCTKPath string + nvidiaCDIHookPath string ldconfigPath string librarySearchPaths []string ignorePatterns ignoreMountSpecPatterns @@ -80,7 +80,7 @@ func New(opts ...Option) (discover.Discover, error) { return nil, fmt.Errorf("failed to create CSV discoverer: %v", err) } - ldcacheUpdateHook, err := discover.NewLDCacheUpdateHook(o.logger, csvDiscoverer, o.nvidiaCTKPath, o.ldconfigPath) + ldcacheUpdateHook, err := discover.NewLDCacheUpdateHook(o.logger, csvDiscoverer, o.nvidiaCDIHookPath, o.ldconfigPath) if err != nil { return nil, fmt.Errorf("failed to create ldcach update hook discoverer: %v", err) } @@ -133,10 +133,10 @@ func WithCSVFiles(csvFiles []string) Option { } } -// WithNVIDIACTKPath sets the path to the nvidia-container-toolkit binary. -func WithNVIDIACTKPath(nvidiaCTKPath string) Option { +// WithNVIDIACDIHookPath sets the path to the nvidia-cdi-hook binary. +func WithNVIDIACDIHookPath(nvidiaCDIHookPath string) Option { return func(o *tegraOptions) { - o.nvidiaCTKPath = nvidiaCTKPath + o.nvidiaCDIHookPath = nvidiaCDIHookPath } } diff --git a/internal/runtime/runtime.go b/internal/runtime/runtime.go index 4b00b772..842ca0dc 100644 --- a/internal/runtime/runtime.go +++ b/internal/runtime/runtime.go @@ -66,6 +66,7 @@ func (r rt) Run(argv []string) (rerr error) { if r.modeOverride != "" { cfg.NVIDIAContainerRuntimeConfig.Mode = r.modeOverride } + //nolint:staticcheck // TODO(elezar): We should swith the nvidia-container-runtime from using nvidia-ctk to using nvidia-cdi-hook. cfg.NVIDIACTKConfig.Path = config.ResolveNVIDIACTKPath(r.logger, cfg.NVIDIACTKConfig.Path) cfg.NVIDIAContainerRuntimeHookConfig.Path = config.ResolveNVIDIAContainerRuntimeHookPath(r.logger, cfg.NVIDIAContainerRuntimeHookConfig.Path) diff --git a/packaging/debian/nvidia-container-toolkit-base.install b/packaging/debian/nvidia-container-toolkit-base.install index af03bd06..62c0d095 100644 --- a/packaging/debian/nvidia-container-toolkit-base.install +++ b/packaging/debian/nvidia-container-toolkit-base.install @@ -1,2 +1,3 @@ nvidia-container-runtime /usr/bin nvidia-ctk /usr/bin +nvidia-cdi-hook /usr/bin diff --git a/packaging/rpm/SPECS/nvidia-container-toolkit.spec b/packaging/rpm/SPECS/nvidia-container-toolkit.spec index 79ef91b2..60552827 100644 --- a/packaging/rpm/SPECS/nvidia-container-toolkit.spec +++ b/packaging/rpm/SPECS/nvidia-container-toolkit.spec @@ -16,6 +16,7 @@ Source2: LICENSE Source3: nvidia-container-runtime Source4: nvidia-container-runtime.cdi Source5: nvidia-container-runtime.legacy +Source6: nvidia-cdi-hook Obsoletes: nvidia-container-runtime <= 3.5.0-1, nvidia-container-runtime-hook <= 1.4.0-2 Provides: nvidia-container-runtime @@ -27,7 +28,7 @@ Requires: nvidia-container-toolkit-base == %{version}-%{release} Provides tools and utilities to enable GPU support in containers. %prep -cp %{SOURCE0} %{SOURCE1} %{SOURCE2} %{SOURCE3} %{SOURCE4} %{SOURCE5} . +cp %{SOURCE0} %{SOURCE1} %{SOURCE2} %{SOURCE3} %{SOURCE4} %{SOURCE5} %{SOURCE6} . %install mkdir -p %{buildroot}%{_bindir} @@ -36,6 +37,7 @@ install -m 755 -t %{buildroot}%{_bindir} nvidia-container-runtime install -m 755 -t %{buildroot}%{_bindir} nvidia-container-runtime.cdi install -m 755 -t %{buildroot}%{_bindir} nvidia-container-runtime.legacy install -m 755 -t %{buildroot}%{_bindir} nvidia-ctk +install -m 755 -t %{buildroot}%{_bindir} nvidia-cdi-hook %post if [ $1 -gt 1 ]; then # only on package upgrade @@ -86,6 +88,7 @@ Provides tools such as the NVIDIA Container Runtime and NVIDIA Container Toolkit %license LICENSE %{_bindir}/nvidia-container-runtime %{_bindir}/nvidia-ctk +%{_bindir}/nvidia-cdi-hook # The OPERATOR EXTENSIONS package consists of components that are required to enable GPU support in Kubernetes. # This package is not distributed as part of the NVIDIA Container Toolkit RPMs. diff --git a/pkg/nvcdi/common-nvml.go b/pkg/nvcdi/common-nvml.go index f4bfe30a..4dd1bc35 100644 --- a/pkg/nvcdi/common-nvml.go +++ b/pkg/nvcdi/common-nvml.go @@ -36,12 +36,12 @@ func (l *nvmllib) newCommonNVMLDiscoverer() (discover.Discover, error) { }, ) - graphicsMounts, err := discover.NewGraphicsMountsDiscoverer(l.logger, l.driver, l.nvidiaCTKPath) + graphicsMounts, err := discover.NewGraphicsMountsDiscoverer(l.logger, l.driver, l.nvidiaCDIHookPath) if err != nil { l.logger.Warningf("failed to create discoverer for graphics mounts: %v", err) } - driverFiles, err := NewDriverDiscoverer(l.logger, l.driver, l.nvidiaCTKPath, l.ldconfigPath, l.nvmllib) + driverFiles, err := NewDriverDiscoverer(l.logger, l.driver, l.nvidiaCDIHookPath, l.ldconfigPath, l.nvmllib) if err != nil { return nil, fmt.Errorf("failed to create discoverer for driver files: %v", err) } diff --git a/pkg/nvcdi/driver-nvml.go b/pkg/nvcdi/driver-nvml.go index 99374003..8fb39888 100644 --- a/pkg/nvcdi/driver-nvml.go +++ b/pkg/nvcdi/driver-nvml.go @@ -34,7 +34,7 @@ import ( // NewDriverDiscoverer creates a discoverer for the libraries and binaries associated with a driver installation. // The supplied NVML Library is used to query the expected driver version. -func NewDriverDiscoverer(logger logger.Interface, driver *root.Driver, nvidiaCTKPath string, ldconfigPath string, nvmllib nvml.Interface) (discover.Discover, error) { +func NewDriverDiscoverer(logger logger.Interface, driver *root.Driver, nvidiaCDIHookPath string, ldconfigPath string, nvmllib nvml.Interface) (discover.Discover, error) { if r := nvmllib.Init(); r != nvml.SUCCESS { return nil, fmt.Errorf("failed to initialize NVML: %v", r) } @@ -49,11 +49,11 @@ func NewDriverDiscoverer(logger logger.Interface, driver *root.Driver, nvidiaCTK return nil, fmt.Errorf("failed to determine driver version: %v", r) } - return newDriverVersionDiscoverer(logger, driver, nvidiaCTKPath, ldconfigPath, version) + return newDriverVersionDiscoverer(logger, driver, nvidiaCDIHookPath, ldconfigPath, version) } -func newDriverVersionDiscoverer(logger logger.Interface, driver *root.Driver, nvidiaCTKPath, ldconfigPath, version string) (discover.Discover, error) { - libraries, err := NewDriverLibraryDiscoverer(logger, driver, nvidiaCTKPath, ldconfigPath, version) +func newDriverVersionDiscoverer(logger logger.Interface, driver *root.Driver, nvidiaCDIHookPath, ldconfigPath, version string) (discover.Discover, error) { + libraries, err := NewDriverLibraryDiscoverer(logger, driver, nvidiaCDIHookPath, ldconfigPath, version) if err != nil { return nil, fmt.Errorf("failed to create discoverer for driver libraries: %v", err) } @@ -81,7 +81,7 @@ func newDriverVersionDiscoverer(logger logger.Interface, driver *root.Driver, nv } // NewDriverLibraryDiscoverer creates a discoverer for the libraries associated with the specified driver version. -func NewDriverLibraryDiscoverer(logger logger.Interface, driver *root.Driver, nvidiaCTKPath, ldconfigPath, version string) (discover.Discover, error) { +func NewDriverLibraryDiscoverer(logger logger.Interface, driver *root.Driver, nvidiaCDIHookPath, ldconfigPath, version string) (discover.Discover, error) { libraryPaths, err := getVersionLibs(logger, driver, version) if err != nil { return nil, fmt.Errorf("failed to get libraries for driver version: %v", err) @@ -97,7 +97,7 @@ func NewDriverLibraryDiscoverer(logger logger.Interface, driver *root.Driver, nv libraryPaths, ) - hooks, _ := discover.NewLDCacheUpdateHook(logger, libraries, nvidiaCTKPath, ldconfigPath) + hooks, _ := discover.NewLDCacheUpdateHook(logger, libraries, nvidiaCDIHookPath, ldconfigPath) d := discover.Merge( libraries, diff --git a/pkg/nvcdi/driver-wsl.go b/pkg/nvcdi/driver-wsl.go index e87bcb03..d184d777 100644 --- a/pkg/nvcdi/driver-wsl.go +++ b/pkg/nvcdi/driver-wsl.go @@ -39,7 +39,7 @@ var requiredDriverStoreFiles = []string{ } // newWSLDriverDiscoverer returns a Discoverer for WSL2 drivers. -func newWSLDriverDiscoverer(logger logger.Interface, driverRoot string, nvidiaCTKPath, ldconfigPath string) (discover.Discover, error) { +func newWSLDriverDiscoverer(logger logger.Interface, driverRoot string, nvidiaCDIHookPath, ldconfigPath string) (discover.Discover, error) { err := dxcore.Init() if err != nil { return nil, fmt.Errorf("failed to initialize dxcore: %v", err) @@ -56,11 +56,11 @@ func newWSLDriverDiscoverer(logger logger.Interface, driverRoot string, nvidiaCT } logger.Infof("Using WSL driver store paths: %v", driverStorePaths) - return newWSLDriverStoreDiscoverer(logger, driverRoot, nvidiaCTKPath, ldconfigPath, driverStorePaths) + return newWSLDriverStoreDiscoverer(logger, driverRoot, nvidiaCDIHookPath, ldconfigPath, driverStorePaths) } // newWSLDriverStoreDiscoverer returns a Discoverer for WSL2 drivers in the driver store associated with a dxcore adapter. -func newWSLDriverStoreDiscoverer(logger logger.Interface, driverRoot string, nvidiaCTKPath string, ldconfigPath string, driverStorePaths []string) (discover.Discover, error) { +func newWSLDriverStoreDiscoverer(logger logger.Interface, driverRoot string, nvidiaCDIHookPath string, ldconfigPath string, driverStorePaths []string) (discover.Discover, error) { var searchPaths []string seen := make(map[string]bool) for _, path := range driverStorePaths { @@ -88,12 +88,12 @@ func newWSLDriverStoreDiscoverer(logger logger.Interface, driverRoot string, nvi ) symlinkHook := nvidiaSMISimlinkHook{ - logger: logger, - mountsFrom: libraries, - nvidiaCTKPath: nvidiaCTKPath, + logger: logger, + mountsFrom: libraries, + nvidiaCDIHookPath: nvidiaCDIHookPath, } - ldcacheHook, _ := discover.NewLDCacheUpdateHook(logger, libraries, nvidiaCTKPath, ldconfigPath) + ldcacheHook, _ := discover.NewLDCacheUpdateHook(logger, libraries, nvidiaCDIHookPath, ldconfigPath) d := discover.Merge( libraries, @@ -106,9 +106,9 @@ func newWSLDriverStoreDiscoverer(logger logger.Interface, driverRoot string, nvi type nvidiaSMISimlinkHook struct { discover.None - logger logger.Interface - mountsFrom discover.Discover - nvidiaCTKPath string + logger logger.Interface + mountsFrom discover.Discover + nvidiaCDIHookPath string } // Hooks returns a hook that creates a symlink to nvidia-smi in the driver store. @@ -135,7 +135,7 @@ func (m nvidiaSMISimlinkHook) Hooks() ([]discover.Hook, error) { } link := "/usr/bin/nvidia-smi" links := []string{fmt.Sprintf("%s::%s", target, link)} - symlinkHook := discover.CreateCreateSymlinkHook(m.nvidiaCTKPath, links) + symlinkHook := discover.CreateCreateSymlinkHook(m.nvidiaCDIHookPath, links) return symlinkHook.Hooks() } diff --git a/pkg/nvcdi/driver-wsl_test.go b/pkg/nvcdi/driver-wsl_test.go index b579d75f..b9aac1a1 100644 --- a/pkg/nvcdi/driver-wsl_test.go +++ b/pkg/nvcdi/driver-wsl_test.go @@ -92,8 +92,8 @@ func TestNvidiaSMISymlinkHook(t *testing.T) { expectedHooks: []discover.Hook{ { Lifecycle: "createContainer", - Path: "nvidia-ctk", - Args: []string{"nvidia-ctk", "hook", "create-symlinks", + Path: "nvidia-cdi-hook", + Args: []string{"nvidia-cdi-hook", "create-symlinks", "--link", "nvidia-smi::/usr/bin/nvidia-smi"}, }, }, @@ -112,8 +112,8 @@ func TestNvidiaSMISymlinkHook(t *testing.T) { expectedHooks: []discover.Hook{ { Lifecycle: "createContainer", - Path: "nvidia-ctk", - Args: []string{"nvidia-ctk", "hook", "create-symlinks", + Path: "nvidia-cdi-hook", + Args: []string{"nvidia-cdi-hook", "create-symlinks", "--link", "/some/path/nvidia-smi::/usr/bin/nvidia-smi"}, }, }, @@ -132,8 +132,8 @@ func TestNvidiaSMISymlinkHook(t *testing.T) { expectedHooks: []discover.Hook{ { Lifecycle: "createContainer", - Path: "nvidia-ctk", - Args: []string{"nvidia-ctk", "hook", "create-symlinks", + Path: "nvidia-cdi-hook", + Args: []string{"nvidia-cdi-hook", "create-symlinks", "--link", "/some/path/nvidia-smi::/usr/bin/nvidia-smi"}, }, }, @@ -143,9 +143,9 @@ func TestNvidiaSMISymlinkHook(t *testing.T) { for _, tc := range testCases { t.Run(tc.description, func(t *testing.T) { m := nvidiaSMISimlinkHook{ - logger: logger, - mountsFrom: tc.mounts, - nvidiaCTKPath: "nvidia-ctk", + logger: logger, + mountsFrom: tc.mounts, + nvidiaCDIHookPath: "nvidia-cdi-hook", } devices, err := m.Devices() diff --git a/pkg/nvcdi/full-gpu-nvml.go b/pkg/nvcdi/full-gpu-nvml.go index c7327757..aa46c4c6 100644 --- a/pkg/nvcdi/full-gpu-nvml.go +++ b/pkg/nvcdi/full-gpu-nvml.go @@ -58,7 +58,7 @@ func (l *nvmllib) GetGPUDeviceSpecs(i int, d device.Device) ([]specs.Device, err // GetGPUDeviceEdits returns the CDI edits for the full GPU represented by 'device'. func (l *nvmllib) GetGPUDeviceEdits(d device.Device) (*cdi.ContainerEdits, error) { - device, err := newFullGPUDiscoverer(l.logger, l.devRoot, l.nvidiaCTKPath, d) + device, err := newFullGPUDiscoverer(l.logger, l.devRoot, l.nvidiaCDIHookPath, d) if err != nil { return nil, fmt.Errorf("failed to create device discoverer: %v", err) } @@ -73,17 +73,17 @@ func (l *nvmllib) GetGPUDeviceEdits(d device.Device) (*cdi.ContainerEdits, error // byPathHookDiscoverer discovers the entities required for injecting by-path DRM device links type byPathHookDiscoverer struct { - logger logger.Interface - devRoot string - nvidiaCTKPath string - pciBusID string - deviceNodes discover.Discover + logger logger.Interface + devRoot string + nvidiaCDIHookPath string + pciBusID string + deviceNodes discover.Discover } var _ discover.Discover = (*byPathHookDiscoverer)(nil) // newFullGPUDiscoverer creates a discoverer for the full GPU defined by the specified device. -func newFullGPUDiscoverer(logger logger.Interface, devRoot string, nvidiaCTKPath string, d device.Device) (discover.Discover, error) { +func newFullGPUDiscoverer(logger logger.Interface, devRoot string, nvidiaCDIHookPath string, d device.Device) (discover.Discover, error) { // TODO: The functionality to get device paths should be integrated into the go-nvlib/pkg/device.Device interface. // This will allow reuse here and in other code where the paths are queried such as the NVIDIA device plugin. minor, ret := d.GetMinorNumber() @@ -112,17 +112,17 @@ func newFullGPUDiscoverer(logger logger.Interface, devRoot string, nvidiaCTKPath ) byPathHooks := &byPathHookDiscoverer{ - logger: logger, - devRoot: devRoot, - nvidiaCTKPath: nvidiaCTKPath, - pciBusID: pciBusID, - deviceNodes: deviceNodes, + logger: logger, + devRoot: devRoot, + nvidiaCDIHookPath: nvidiaCDIHookPath, + pciBusID: pciBusID, + deviceNodes: deviceNodes, } deviceFolderPermissionHooks := newDeviceFolderPermissionHookDiscoverer( logger, devRoot, - nvidiaCTKPath, + nvidiaCDIHookPath, deviceNodes, ) @@ -157,8 +157,8 @@ func (d *byPathHookDiscoverer) Hooks() ([]discover.Hook, error) { args = append(args, "--link", l) } - hook := discover.CreateNvidiaCTKHook( - d.nvidiaCTKPath, + hook := discover.CreateNvidiaCDIHook( + d.nvidiaCDIHookPath, "create-symlinks", args..., ) diff --git a/pkg/nvcdi/lib-csv.go b/pkg/nvcdi/lib-csv.go index 4acc5cd6..649b801a 100644 --- a/pkg/nvcdi/lib-csv.go +++ b/pkg/nvcdi/lib-csv.go @@ -44,7 +44,7 @@ func (l *csvlib) GetAllDeviceSpecs() ([]specs.Device, error) { tegra.WithLogger(l.logger), tegra.WithDriverRoot(l.driverRoot), tegra.WithDevRoot(l.devRoot), - tegra.WithNVIDIACTKPath(l.nvidiaCTKPath), + tegra.WithNVIDIACDIHookPath(l.nvidiaCDIHookPath), tegra.WithLdconfigPath(l.ldconfigPath), tegra.WithCSVFiles(l.csvFiles), tegra.WithLibrarySearchPaths(l.librarySearchPaths...), diff --git a/pkg/nvcdi/lib-wsl.go b/pkg/nvcdi/lib-wsl.go index 5e8ea97b..1c96c538 100644 --- a/pkg/nvcdi/lib-wsl.go +++ b/pkg/nvcdi/lib-wsl.go @@ -54,7 +54,7 @@ func (l *wsllib) GetAllDeviceSpecs() ([]specs.Device, error) { // GetCommonEdits generates a CDI specification that can be used for ANY devices func (l *wsllib) GetCommonEdits() (*cdi.ContainerEdits, error) { - driver, err := newWSLDriverDiscoverer(l.logger, l.driverRoot, l.nvidiaCTKPath, l.ldconfigPath) + driver, err := newWSLDriverDiscoverer(l.logger, l.driverRoot, l.nvidiaCDIHookPath, l.ldconfigPath) if err != nil { return nil, fmt.Errorf("failed to create discoverer for WSL driver: %v", err) } diff --git a/pkg/nvcdi/lib.go b/pkg/nvcdi/lib.go index 7f07fd7e..c56c18e1 100644 --- a/pkg/nvcdi/lib.go +++ b/pkg/nvcdi/lib.go @@ -48,7 +48,7 @@ type nvcdilib struct { deviceNamers DeviceNamers driverRoot string devRoot string - nvidiaCTKPath string + nvidiaCDIHookPath string ldconfigPath string configSearchPaths []string librarySearchPaths []string @@ -87,8 +87,8 @@ func New(opts ...Option) (Interface, error) { if l.devRoot == "" { l.devRoot = l.driverRoot } - if l.nvidiaCTKPath == "" { - l.nvidiaCTKPath = "/usr/bin/nvidia-ctk" + if l.nvidiaCDIHookPath == "" { + l.nvidiaCDIHookPath = "/usr/bin/nvidia-cdi-hook" } if l.infolib == nil { l.infolib = info.New() diff --git a/pkg/nvcdi/management.go b/pkg/nvcdi/management.go index c906db2c..4648e5bb 100644 --- a/pkg/nvcdi/management.go +++ b/pkg/nvcdi/management.go @@ -66,7 +66,7 @@ func (m *managementlib) GetCommonEdits() (*cdi.ContainerEdits, error) { return nil, fmt.Errorf("failed to get CUDA version: %v", err) } - driver, err := newDriverVersionDiscoverer(m.logger, m.driver, m.nvidiaCTKPath, m.ldconfigPath, version) + driver, err := newDriverVersionDiscoverer(m.logger, m.driver, m.nvidiaCDIHookPath, m.ldconfigPath, version) if err != nil { return nil, fmt.Errorf("failed to create driver library discoverer: %v", err) } @@ -123,7 +123,7 @@ func (m *managementlib) newManagementDeviceDiscoverer() (discover.Discover, erro deviceFolderPermissionHooks := newDeviceFolderPermissionHookDiscoverer( m.logger, m.devRoot, - m.nvidiaCTKPath, + m.nvidiaCDIHookPath, deviceNodes, ) diff --git a/pkg/nvcdi/options.go b/pkg/nvcdi/options.go index 89992a54..5a490619 100644 --- a/pkg/nvcdi/options.go +++ b/pkg/nvcdi/options.go @@ -63,9 +63,16 @@ func WithLogger(logger logger.Interface) Option { } // WithNVIDIACTKPath sets the path to the NVIDIA Container Toolkit CLI path for the library +// +// Deprecated: Use WithNVIDIACDIHookPath instead. func WithNVIDIACTKPath(path string) Option { + return WithNVIDIACDIHookPath(path) +} + +// WithNVIDIACDIHookPath sets the path to the NVIDIA Container Toolkit CLI path for the library +func WithNVIDIACDIHookPath(path string) Option { return func(l *nvcdilib) { - l.nvidiaCTKPath = path + l.nvidiaCDIHookPath = path } } diff --git a/pkg/nvcdi/transform/deduplicate_test.go b/pkg/nvcdi/transform/deduplicate_test.go index 3a5e6ce9..2991233d 100644 --- a/pkg/nvcdi/transform/deduplicate_test.go +++ b/pkg/nvcdi/transform/deduplicate_test.go @@ -98,13 +98,13 @@ func TestDeduplicate(t *testing.T) { Hooks: []*specs.Hook{ { HookName: "createContainer", - Path: "/usr/bin/nvidia-ctk", - Args: []string{"nvidia-ctk", "hook", "chmod", "--mode", "755", "--path", "/dev/dri"}, + Path: "/usr/bin/nvidia-cdi-hook", + Args: []string{"nvidia-cdi-hook", "chmod", "--mode", "755", "--path", "/dev/dri"}, }, { HookName: "createContainer", - Path: "/usr/bin/nvidia-ctk", - Args: []string{"nvidia-ctk", "hook", "chmod", "--mode", "755", "--path", "/dev/dri"}, + Path: "/usr/bin/nvidia-cdi-hook", + Args: []string{"nvidia-cdi-hook", "chmod", "--mode", "755", "--path", "/dev/dri"}, }, }, }, @@ -114,8 +114,8 @@ func TestDeduplicate(t *testing.T) { Hooks: []*specs.Hook{ { HookName: "createContainer", - Path: "/usr/bin/nvidia-ctk", - Args: []string{"nvidia-ctk", "hook", "chmod", "--mode", "755", "--path", "/dev/dri"}, + Path: "/usr/bin/nvidia-cdi-hook", + Args: []string{"nvidia-cdi-hook", "chmod", "--mode", "755", "--path", "/dev/dri"}, }, }, }, diff --git a/pkg/nvcdi/transform/merged-device_test.go b/pkg/nvcdi/transform/merged-device_test.go index 14b19fb7..44cee13a 100644 --- a/pkg/nvcdi/transform/merged-device_test.go +++ b/pkg/nvcdi/transform/merged-device_test.go @@ -141,8 +141,8 @@ func TestMergedDevice(t *testing.T) { Hooks: []*specs.Hook{ { HookName: "createContainer", - Path: "/usr/bin/nvidia-ctk", - Args: []string{"nvidia-ctk", "hook", "chmod", "--mode", "755", "--path", "/dev/dri"}, + Path: "/usr/bin/nvidia-cdi-hook", + Args: []string{"nvidia-cdi-hook", "chmod", "--mode", "755", "--path", "/dev/dri"}, }, }, }, @@ -153,8 +153,8 @@ func TestMergedDevice(t *testing.T) { Hooks: []*specs.Hook{ { HookName: "createContainer", - Path: "/usr/bin/nvidia-ctk", - Args: []string{"nvidia-ctk", "hook", "chmod", "--mode", "755", "--path", "/dev/dri"}, + Path: "/usr/bin/nvidia-cdi-hook", + Args: []string{"nvidia-cdi-hook", "chmod", "--mode", "755", "--path", "/dev/dri"}, }, }, }, @@ -169,8 +169,8 @@ func TestMergedDevice(t *testing.T) { Hooks: []*specs.Hook{ { HookName: "createContainer", - Path: "/usr/bin/nvidia-ctk", - Args: []string{"nvidia-ctk", "hook", "chmod", "--mode", "755", "--path", "/dev/dri"}, + Path: "/usr/bin/nvidia-cdi-hook", + Args: []string{"nvidia-cdi-hook", "chmod", "--mode", "755", "--path", "/dev/dri"}, }, }, }, @@ -181,8 +181,8 @@ func TestMergedDevice(t *testing.T) { Hooks: []*specs.Hook{ { HookName: "createContainer", - Path: "/usr/bin/nvidia-ctk", - Args: []string{"nvidia-ctk", "hook", "chmod", "--mode", "755", "--path", "/dev/dri"}, + Path: "/usr/bin/nvidia-cdi-hook", + Args: []string{"nvidia-cdi-hook", "chmod", "--mode", "755", "--path", "/dev/dri"}, }, }, }, @@ -193,8 +193,8 @@ func TestMergedDevice(t *testing.T) { Hooks: []*specs.Hook{ { HookName: "createContainer", - Path: "/usr/bin/nvidia-ctk", - Args: []string{"nvidia-ctk", "hook", "chmod", "--mode", "755", "--path", "/dev/dri"}, + Path: "/usr/bin/nvidia-cdi-hook", + Args: []string{"nvidia-cdi-hook", "chmod", "--mode", "755", "--path", "/dev/dri"}, }, }, }, diff --git a/pkg/nvcdi/transform/root/container-root_test.go b/pkg/nvcdi/transform/root/container-root_test.go index fd098cbc..1a420cf3 100644 --- a/pkg/nvcdi/transform/root/container-root_test.go +++ b/pkg/nvcdi/transform/root/container-root_test.go @@ -165,27 +165,27 @@ func TestContainerRootTransformer(t *testing.T) { Hooks: []*specs.Hook{ { HookName: "createContainer", - Path: "/root/usr/bin/nvidia-ctk", + Path: "/root/usr/bin/nvidia-cdi-hook", Args: []string{ - "nvidia-ctk", "hook", "update-ldcache", + "nvidia-cdi-hook", "update-ldcache", "--folder", "/root/path/to/target", }, }, { HookName: "createContainer", - Path: "/target-root/usr/bin/nvidia-ctk", + Path: "/target-root/usr/bin/nvidia-cdi-hook", Args: []string{ - "nvidia-ctk", "hook", "update-ldcache", + "nvidia-cdi-hook", "update-ldcache", "--folder", "/target-root/path/to/target", }, }, { HookName: "createContainer", - Path: "/different-root/usr/bin/nvidia-ctk", + Path: "/different-root/usr/bin/nvidia-cdi-hook", Args: []string{ - "nvidia-ctk", "hook", "update-ldcache", + "nvidia-cdi-hook", "update-ldcache", "--folder", "/different-root/path/to/target", }, @@ -198,27 +198,27 @@ func TestContainerRootTransformer(t *testing.T) { Hooks: []*specs.Hook{ { HookName: "createContainer", - Path: "/root/usr/bin/nvidia-ctk", + Path: "/root/usr/bin/nvidia-cdi-hook", Args: []string{ - "nvidia-ctk", "hook", "update-ldcache", + "nvidia-cdi-hook", "update-ldcache", "--folder", "/target-root/path/to/target", }, }, { HookName: "createContainer", - Path: "/target-root/usr/bin/nvidia-ctk", + Path: "/target-root/usr/bin/nvidia-cdi-hook", Args: []string{ - "nvidia-ctk", "hook", "update-ldcache", + "nvidia-cdi-hook", "update-ldcache", "--folder", "/target-root/path/to/target", }, }, { HookName: "createContainer", - Path: "/different-root/usr/bin/nvidia-ctk", + Path: "/different-root/usr/bin/nvidia-cdi-hook", Args: []string{ - "nvidia-ctk", "hook", "update-ldcache", + "nvidia-cdi-hook", "update-ldcache", "--folder", "/different-root/path/to/target", }, @@ -236,7 +236,7 @@ func TestContainerRootTransformer(t *testing.T) { Hooks: []*specs.Hook{ { HookName: "startContainer", - Path: "/root/usr/bin/nvidia-ctk", + Path: "/root/usr/bin/nvidia-cdi-hook", Args: []string{ "--link", "/root/path/to/target::/root/path/to/link", @@ -250,7 +250,7 @@ func TestContainerRootTransformer(t *testing.T) { Hooks: []*specs.Hook{ { HookName: "startContainer", - Path: "/target-root/usr/bin/nvidia-ctk", + Path: "/target-root/usr/bin/nvidia-cdi-hook", Args: []string{ "--link", "/target-root/path/to/target::/target-root/path/to/link", @@ -269,7 +269,7 @@ func TestContainerRootTransformer(t *testing.T) { Hooks: []*specs.Hook{ { HookName: "createContainer", - Path: "/root/usr/bin/nvidia-ctk", + Path: "/root/usr/bin/nvidia-cdi-hook", Args: []string{ "--link", "/root/path/to/target::/root/path/to/link", @@ -283,7 +283,7 @@ func TestContainerRootTransformer(t *testing.T) { Hooks: []*specs.Hook{ { HookName: "createContainer", - Path: "/root/usr/bin/nvidia-ctk", + Path: "/root/usr/bin/nvidia-cdi-hook", Args: []string{ "--link", "/target-root/path/to/target::/target-root/path/to/link", @@ -302,7 +302,7 @@ func TestContainerRootTransformer(t *testing.T) { Hooks: []*specs.Hook{ { HookName: "createRuntime", - Path: "/root/usr/bin/nvidia-ctk", + Path: "/root/usr/bin/nvidia-cdi-hook", Args: []string{ "--link", "/root/path/to/target::/root/path/to/link", @@ -316,7 +316,7 @@ func TestContainerRootTransformer(t *testing.T) { Hooks: []*specs.Hook{ { HookName: "createRuntime", - Path: "/root/usr/bin/nvidia-ctk", + Path: "/root/usr/bin/nvidia-cdi-hook", Args: []string{ "--link", "/root/path/to/target::/root/path/to/link", diff --git a/pkg/nvcdi/transform/simplify_test.go b/pkg/nvcdi/transform/simplify_test.go index 2e1c1527..32729154 100644 --- a/pkg/nvcdi/transform/simplify_test.go +++ b/pkg/nvcdi/transform/simplify_test.go @@ -115,8 +115,8 @@ func TestSimplify(t *testing.T) { Hooks: []*specs.Hook{ { HookName: "createContainer", - Path: "/usr/bin/nvidia-ctk", - Args: []string{"nvidia-ctk", "hook", "chmod", "--mode", "755", "--path", "/dev/dri"}, + Path: "/usr/bin/nvidia-cdi-hook", + Args: []string{"nvidia-cdi-hook", "chmod", "--mode", "755", "--path", "/dev/dri"}, }, }, }, @@ -127,8 +127,8 @@ func TestSimplify(t *testing.T) { Hooks: []*specs.Hook{ { HookName: "createContainer", - Path: "/usr/bin/nvidia-ctk", - Args: []string{"nvidia-ctk", "hook", "chmod", "--mode", "755", "--path", "/dev/dri"}, + Path: "/usr/bin/nvidia-cdi-hook", + Args: []string{"nvidia-cdi-hook", "chmod", "--mode", "755", "--path", "/dev/dri"}, }, }, }, @@ -139,13 +139,13 @@ func TestSimplify(t *testing.T) { Hooks: []*specs.Hook{ { HookName: "createContainer", - Path: "/usr/bin/nvidia-ctk", - Args: []string{"nvidia-ctk", "hook", "chmod", "--mode", "755", "--path", "/dev/dri"}, + Path: "/usr/bin/nvidia-cdi-hook", + Args: []string{"nvidia-cdi-hook", "chmod", "--mode", "755", "--path", "/dev/dri"}, }, { HookName: "createContainer", - Path: "/usr/bin/nvidia-ctk", - Args: []string{"nvidia-ctk", "hook", "chmod", "--mode", "755", "--path", "/dev/dri"}, + Path: "/usr/bin/nvidia-cdi-hook", + Args: []string{"nvidia-cdi-hook", "chmod", "--mode", "755", "--path", "/dev/dri"}, }, }, }, @@ -160,8 +160,8 @@ func TestSimplify(t *testing.T) { Hooks: []*specs.Hook{ { HookName: "createContainer", - Path: "/usr/bin/nvidia-ctk", - Args: []string{"nvidia-ctk", "hook", "chmod", "--mode", "755", "--path", "/dev/dri"}, + Path: "/usr/bin/nvidia-cdi-hook", + Args: []string{"nvidia-cdi-hook", "chmod", "--mode", "755", "--path", "/dev/dri"}, }, }, }, @@ -172,8 +172,8 @@ func TestSimplify(t *testing.T) { Hooks: []*specs.Hook{ { HookName: "createContainer", - Path: "/usr/bin/nvidia-ctk", - Args: []string{"nvidia-ctk", "hook", "chmod", "--mode", "755", "--path", "/dev/dri"}, + Path: "/usr/bin/nvidia-cdi-hook", + Args: []string{"nvidia-cdi-hook", "chmod", "--mode", "755", "--path", "/dev/dri"}, }, }, }, @@ -184,8 +184,8 @@ func TestSimplify(t *testing.T) { Hooks: []*specs.Hook{ { HookName: "createContainer", - Path: "/usr/bin/nvidia-ctk", - Args: []string{"nvidia-ctk", "hook", "chmod", "--mode", "755", "--path", "/dev/dri"}, + Path: "/usr/bin/nvidia-cdi-hook", + Args: []string{"nvidia-cdi-hook", "chmod", "--mode", "755", "--path", "/dev/dri"}, }, }, }, diff --git a/pkg/nvcdi/workarounds-device-folder-permissions.go b/pkg/nvcdi/workarounds-device-folder-permissions.go index e183ed6e..511eb1fc 100644 --- a/pkg/nvcdi/workarounds-device-folder-permissions.go +++ b/pkg/nvcdi/workarounds-device-folder-permissions.go @@ -25,10 +25,10 @@ import ( ) type deviceFolderPermissions struct { - logger logger.Interface - devRoot string - nvidiaCTKPath string - devices discover.Discover + logger logger.Interface + devRoot string + nvidiaCDIHookPath string + devices discover.Discover } var _ discover.Discover = (*deviceFolderPermissions)(nil) @@ -39,12 +39,12 @@ var _ discover.Discover = (*deviceFolderPermissions)(nil) // The nested devices that are applicable to the NVIDIA GPU devices are: // - DRM devices at /dev/dri/* // - NVIDIA Caps devices at /dev/nvidia-caps/* -func newDeviceFolderPermissionHookDiscoverer(logger logger.Interface, devRoot string, nvidiaCTKPath string, devices discover.Discover) discover.Discover { +func newDeviceFolderPermissionHookDiscoverer(logger logger.Interface, devRoot string, nvidiaCDIHookPath string, devices discover.Discover) discover.Discover { d := &deviceFolderPermissions{ - logger: logger, - devRoot: devRoot, - nvidiaCTKPath: nvidiaCTKPath, - devices: devices, + logger: logger, + devRoot: devRoot, + nvidiaCDIHookPath: nvidiaCDIHookPath, + devices: devices, } return d @@ -70,8 +70,8 @@ func (d *deviceFolderPermissions) Hooks() ([]discover.Hook, error) { args = append(args, "--path", folder) } - hook := discover.CreateNvidiaCTKHook( - d.nvidiaCTKPath, + hook := discover.CreateNvidiaCDIHook( + d.nvidiaCDIHookPath, "chmod", args..., ) diff --git a/tools/container/toolkit/toolkit.go b/tools/container/toolkit/toolkit.go index 0702386c..25ee9497 100644 --- a/tools/container/toolkit/toolkit.go +++ b/tools/container/toolkit/toolkit.go @@ -355,6 +355,13 @@ func Install(cli *cli.Context, opts *options) error { log.Errorf("Ignoring error: %v", fmt.Errorf("error installing NVIDIA Container Toolkit CLI: %v", err)) } + nvidiaCDIHookPath, err := installContainerCDIHookCLI(opts.toolkitRoot) + if err != nil && !opts.ignoreErrors { + return fmt.Errorf("error installing NVIDIA Container CDI Hook CLI: %v", err) + } else if err != nil { + log.Errorf("Ignoring error: %v", fmt.Errorf("error installing NVIDIA Container CDI Hook CLI: %v", err)) + } + err = installToolkitConfig(cli, toolkitConfigPath, nvidiaContainerCliExecutable, nvidiaCTKPath, nvidiaContainerRuntimeHookPath, opts) if err != nil && !opts.ignoreErrors { return fmt.Errorf("error installing NVIDIA container toolkit config: %v", err) @@ -369,7 +376,7 @@ func Install(cli *cli.Context, opts *options) error { log.Errorf("Ignoring error: %v", fmt.Errorf("error creating device nodes: %v", err)) } - err = generateCDISpec(opts, nvidiaCTKPath) + err = generateCDISpec(opts, nvidiaCDIHookPath) if err != nil && !opts.ignoreErrors { return fmt.Errorf("error generating CDI specification: %v", err) } else if err != nil { @@ -539,6 +546,19 @@ func installContainerToolkitCLI(toolkitDir string) (string, error) { return e.install(toolkitDir) } +// installContainerCDIHookCLI installs the nvidia-cdi-hook CLI executable and wrapper. +func installContainerCDIHookCLI(toolkitDir string) (string, error) { + e := executable{ + source: "/usr/bin/nvidia-cdi-hook", + target: executableTarget{ + dotfileName: "nvidia-cdi-hook.real", + wrapperName: "nvidia-cdi-hook", + }, + } + + return e.install(toolkitDir) +} + // installContainerCLI sets up the NVIDIA container CLI executable, copying the executable // and implementing the required wrapper func installContainerCLI(toolkitRoot string) (string, error) { @@ -749,8 +769,8 @@ func createDeviceNodes(opts *options) error { return nil } -// generateCDISpec generates a CDI spec for use in managemnt containers -func generateCDISpec(opts *options, nvidiaCTKPath string) error { +// generateCDISpec generates a CDI spec for use in management containers +func generateCDISpec(opts *options, nvidiaCDIHookPath string) error { if !opts.cdiEnabled { return nil } @@ -758,7 +778,7 @@ func generateCDISpec(opts *options, nvidiaCTKPath string) error { cdilib, err := nvcdi.New( nvcdi.WithMode(nvcdi.ModeManagement), nvcdi.WithDriverRoot(opts.DriverRootCtrPath), - nvcdi.WithNVIDIACTKPath(nvidiaCTKPath), + nvcdi.WithNVIDIACDIHookPath(nvidiaCDIHookPath), nvcdi.WithVendor(opts.cdiVendor), nvcdi.WithClass(opts.cdiClass), )