/** # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. */ package toolkit import ( "errors" "fmt" "io" "os" "path/filepath" "strings" log "github.com/sirupsen/logrus" "github.com/urfave/cli/v2" "tags.cncf.io/container-device-interface/pkg/cdi" "tags.cncf.io/container-device-interface/pkg/parser" "github.com/NVIDIA/nvidia-container-toolkit/internal/config" "github.com/NVIDIA/nvidia-container-toolkit/internal/system/nvdevices" "github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi" transformroot "github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/transform/root" ) const ( // DefaultNvidiaDriverRoot specifies the default NVIDIA driver run directory DefaultNvidiaDriverRoot = "/run/nvidia/driver" nvidiaContainerCliSource = "/usr/bin/nvidia-container-cli" nvidiaContainerRuntimeHookSource = "/usr/bin/nvidia-container-runtime-hook" nvidiaContainerToolkitConfigSource = "/etc/nvidia-container-runtime/config.toml" configFilename = "config.toml" toolkitPidFilename = "toolkit.pid" ) type cdiOptions struct { Enabled bool outputDir string kind string vendor string class string } type Options struct { DriverRoot string DevRoot string DriverRootCtrPath string DevRootCtrPath string ContainerRuntimeMode string ContainerRuntimeDebug string ContainerRuntimeLogLevel string ContainerRuntimeModesCdiDefaultKind string ContainerRuntimeModesCDIAnnotationPrefixes cli.StringSlice ContainerRuntimeRuntimes cli.StringSlice ContainerRuntimeHookSkipModeDetection bool ContainerCLIDebug string // CDI stores the CDI options for the toolkit. CDI cdiOptions createDeviceNodes cli.StringSlice acceptNVIDIAVisibleDevicesWhenUnprivileged bool acceptNVIDIAVisibleDevicesAsVolumeMounts bool ignoreErrors bool optInFeatures cli.StringSlice } func Flags(opts *Options) []cli.Flag { flags := []cli.Flag{ &cli.StringFlag{ Name: "driver-root", Aliases: []string{"nvidia-driver-root"}, Value: DefaultNvidiaDriverRoot, Destination: &opts.DriverRoot, EnvVars: []string{"NVIDIA_DRIVER_ROOT", "DRIVER_ROOT"}, }, &cli.StringFlag{ Name: "driver-root-ctr-path", Value: DefaultNvidiaDriverRoot, Destination: &opts.DriverRootCtrPath, EnvVars: []string{"DRIVER_ROOT_CTR_PATH"}, }, &cli.StringFlag{ Name: "dev-root", Usage: "Specify the root where `/dev` is located. If this is not specified, the driver-root is assumed.", Destination: &opts.DevRoot, EnvVars: []string{"NVIDIA_DEV_ROOT", "DEV_ROOT"}, }, &cli.StringFlag{ Name: "dev-root-ctr-path", Usage: "Specify the root where `/dev` is located in the container. If this is not specified, the driver-root-ctr-path is assumed.", Destination: &opts.DevRootCtrPath, EnvVars: []string{"DEV_ROOT_CTR_PATH"}, }, &cli.StringFlag{ Name: "nvidia-container-runtime.debug", Aliases: []string{"nvidia-container-runtime-debug"}, Usage: "Specify the location of the debug log file for the NVIDIA Container Runtime", Destination: &opts.ContainerRuntimeDebug, EnvVars: []string{"NVIDIA_CONTAINER_RUNTIME_DEBUG"}, }, &cli.StringFlag{ Name: "nvidia-container-runtime.log-level", Aliases: []string{"nvidia-container-runtime-debug-log-level"}, Destination: &opts.ContainerRuntimeLogLevel, EnvVars: []string{"NVIDIA_CONTAINER_RUNTIME_LOG_LEVEL"}, }, &cli.StringFlag{ Name: "nvidia-container-runtime.mode", Aliases: []string{"nvidia-container-runtime-mode"}, Destination: &opts.ContainerRuntimeMode, EnvVars: []string{"NVIDIA_CONTAINER_RUNTIME_MODE"}, }, &cli.StringFlag{ Name: "nvidia-container-runtime.modes.cdi.default-kind", Destination: &opts.ContainerRuntimeModesCdiDefaultKind, EnvVars: []string{"NVIDIA_CONTAINER_RUNTIME_MODES_CDI_DEFAULT_KIND"}, }, &cli.StringSliceFlag{ Name: "nvidia-container-runtime.modes.cdi.annotation-prefixes", Destination: &opts.ContainerRuntimeModesCDIAnnotationPrefixes, EnvVars: []string{"NVIDIA_CONTAINER_RUNTIME_MODES_CDI_ANNOTATION_PREFIXES"}, }, &cli.StringSliceFlag{ Name: "nvidia-container-runtime.runtimes", Destination: &opts.ContainerRuntimeRuntimes, EnvVars: []string{"NVIDIA_CONTAINER_RUNTIME_RUNTIMES"}, }, &cli.BoolFlag{ Name: "nvidia-container-runtime-hook.skip-mode-detection", Value: true, Destination: &opts.ContainerRuntimeHookSkipModeDetection, EnvVars: []string{"NVIDIA_CONTAINER_RUNTIME_HOOK_SKIP_MODE_DETECTION"}, }, &cli.StringFlag{ Name: "nvidia-container-cli.debug", Aliases: []string{"nvidia-container-cli-debug"}, Usage: "Specify the location of the debug log file for the NVIDIA Container CLI", Destination: &opts.ContainerCLIDebug, EnvVars: []string{"NVIDIA_CONTAINER_CLI_DEBUG"}, }, &cli.BoolFlag{ Name: "accept-nvidia-visible-devices-envvar-when-unprivileged", Usage: "Set the accept-nvidia-visible-devices-envvar-when-unprivileged config option", Value: true, Destination: &opts.acceptNVIDIAVisibleDevicesWhenUnprivileged, EnvVars: []string{"ACCEPT_NVIDIA_VISIBLE_DEVICES_ENVVAR_WHEN_UNPRIVILEGED"}, }, &cli.BoolFlag{ Name: "accept-nvidia-visible-devices-as-volume-mounts", Usage: "Set the accept-nvidia-visible-devices-as-volume-mounts config option", Destination: &opts.acceptNVIDIAVisibleDevicesAsVolumeMounts, EnvVars: []string{"ACCEPT_NVIDIA_VISIBLE_DEVICES_AS_VOLUME_MOUNTS"}, }, &cli.BoolFlag{ Name: "cdi-enabled", Aliases: []string{"enable-cdi"}, Usage: "enable the generation of a CDI specification", Destination: &opts.CDI.Enabled, EnvVars: []string{"CDI_ENABLED", "ENABLE_CDI"}, }, &cli.StringFlag{ Name: "cdi-output-dir", Usage: "the directory where the CDI output files are to be written. If this is set to '', no CDI specification is generated.", Value: "/var/run/cdi", Destination: &opts.CDI.outputDir, EnvVars: []string{"CDI_OUTPUT_DIR"}, }, &cli.StringFlag{ Name: "cdi-kind", Usage: "the vendor string to use for the generated CDI specification", Value: "management.nvidia.com/gpu", Destination: &opts.CDI.kind, EnvVars: []string{"CDI_KIND"}, }, &cli.BoolFlag{ Name: "ignore-errors", Usage: "ignore errors when installing the NVIDIA Container toolkit. This is used for testing purposes only.", Hidden: true, Destination: &opts.ignoreErrors, }, &cli.StringSliceFlag{ Name: "create-device-nodes", Usage: "(Only applicable with --cdi-enabled) specifies which device nodes should be created. If any one of the options is set to '' or 'none', no device nodes will be created.", Value: cli.NewStringSlice("control"), Destination: &opts.createDeviceNodes, EnvVars: []string{"CREATE_DEVICE_NODES"}, }, &cli.StringSliceFlag{ Name: "opt-in-features", Hidden: true, Destination: &opts.optInFeatures, EnvVars: []string{"NVIDIA_CONTAINER_TOOLKIT_OPT_IN_FEATURES"}, }, } return flags } // ValidateOptions checks whether the specified options are valid func ValidateOptions(opts *Options, toolkitRoot string) error { if toolkitRoot == "" { return fmt.Errorf("invalid --toolkit-root option: %v", toolkitRoot) } vendor, class := parser.ParseQualifier(opts.CDI.kind) if err := parser.ValidateVendorName(vendor); err != nil { return fmt.Errorf("invalid CDI vendor name: %v", err) } if err := parser.ValidateClassName(class); err != nil { return fmt.Errorf("invalid CDI class name: %v", err) } opts.CDI.vendor = vendor opts.CDI.class = class if opts.CDI.Enabled && opts.CDI.outputDir == "" { log.Warning("Skipping CDI spec generation (no output directory specified)") opts.CDI.Enabled = false } isDisabled := false for _, mode := range opts.createDeviceNodes.Value() { if mode != "" && mode != "none" && mode != "control" { return fmt.Errorf("invalid --create-device-nodes value: %v", mode) } if mode == "" || mode == "none" { isDisabled = true break } } if !opts.CDI.Enabled && !isDisabled { log.Info("disabling device node creation since --cdi-enabled=false") isDisabled = true } if isDisabled { opts.createDeviceNodes = *cli.NewStringSlice() } return nil } // TryDelete attempts to remove the specified toolkit folder. // A toolkit.pid file -- if present -- is skipped. func TryDelete(cli *cli.Context, toolkitRoot string) error { log.Infof("Attempting to delete NVIDIA container toolkit from '%v'", toolkitRoot) contents, err := os.ReadDir(toolkitRoot) if err != nil && errors.Is(err, os.ErrNotExist) { return nil } else if err != nil { return fmt.Errorf("failed to read the contents of %v: %w", toolkitRoot, err) } for _, content := range contents { if content.Name() == toolkitPidFilename { continue } name := filepath.Join(toolkitRoot, content.Name()) if err := os.RemoveAll(name); err != nil { log.Warningf("could not remove %v: %v", name, err) } } if err := os.RemoveAll(toolkitRoot); err != nil { log.Warningf("could not remove %v: %v", toolkitRoot, err) } return nil } // Install installs the components of the NVIDIA container toolkit. // Any existing installation is removed. func Install(cli *cli.Context, opts *Options, toolkitRoot string) error { log.Infof("Installing NVIDIA container toolkit to '%v'", toolkitRoot) log.Infof("Removing existing NVIDIA container toolkit installation") err := os.RemoveAll(toolkitRoot) if err != nil && !opts.ignoreErrors { return fmt.Errorf("error removing toolkit directory: %v", err) } else if err != nil { log.Errorf("Ignoring error: %v", fmt.Errorf("error removing toolkit directory: %v", err)) } toolkitConfigDir := filepath.Join(toolkitRoot, ".config", "nvidia-container-runtime") toolkitConfigPath := filepath.Join(toolkitConfigDir, configFilename) err = createDirectories(toolkitRoot, toolkitConfigDir) if err != nil && !opts.ignoreErrors { return fmt.Errorf("could not create required directories: %v", err) } else if err != nil { log.Errorf("Ignoring error: %v", fmt.Errorf("could not create required directories: %v", err)) } err = installContainerLibraries(toolkitRoot) if err != nil && !opts.ignoreErrors { return fmt.Errorf("error installing NVIDIA container library: %v", err) } else if err != nil { log.Errorf("Ignoring error: %v", fmt.Errorf("error installing NVIDIA container library: %v", err)) } err = installContainerRuntimes(toolkitRoot, opts.DriverRoot) if err != nil && !opts.ignoreErrors { return fmt.Errorf("error installing NVIDIA container runtime: %v", err) } else if err != nil { log.Errorf("Ignoring error: %v", fmt.Errorf("error installing NVIDIA container runtime: %v", err)) } nvidiaContainerCliExecutable, err := installContainerCLI(toolkitRoot) if err != nil && !opts.ignoreErrors { return fmt.Errorf("error installing NVIDIA container CLI: %v", err) } else if err != nil { log.Errorf("Ignoring error: %v", fmt.Errorf("error installing NVIDIA container CLI: %v", err)) } nvidiaContainerRuntimeHookPath, err := installRuntimeHook(toolkitRoot, toolkitConfigPath) if err != nil && !opts.ignoreErrors { return fmt.Errorf("error installing NVIDIA container runtime hook: %v", err) } else if err != nil { log.Errorf("Ignoring error: %v", fmt.Errorf("error installing NVIDIA container runtime hook: %v", err)) } nvidiaCTKPath, err := installContainerToolkitCLI(toolkitRoot) if err != nil && !opts.ignoreErrors { return fmt.Errorf("error installing NVIDIA Container Toolkit CLI: %v", err) } else if err != nil { log.Errorf("Ignoring error: %v", fmt.Errorf("error installing NVIDIA Container Toolkit CLI: %v", err)) } nvidiaCDIHookPath, err := installContainerCDIHookCLI(toolkitRoot) if err != nil && !opts.ignoreErrors { return fmt.Errorf("error installing NVIDIA Container CDI Hook CLI: %v", err) } else if err != nil { log.Errorf("Ignoring error: %v", fmt.Errorf("error installing NVIDIA Container CDI Hook CLI: %v", err)) } err = installToolkitConfig(cli, toolkitConfigPath, nvidiaContainerCliExecutable, nvidiaCTKPath, nvidiaContainerRuntimeHookPath, opts) if err != nil && !opts.ignoreErrors { return fmt.Errorf("error installing NVIDIA container toolkit config: %v", err) } else if err != nil { log.Errorf("Ignoring error: %v", fmt.Errorf("error installing NVIDIA container toolkit config: %v", err)) } err = createDeviceNodes(opts) if err != nil && !opts.ignoreErrors { return fmt.Errorf("error creating device nodes: %v", err) } else if err != nil { log.Errorf("Ignoring error: %v", fmt.Errorf("error creating device nodes: %v", err)) } err = generateCDISpec(opts, nvidiaCDIHookPath) if err != nil && !opts.ignoreErrors { return fmt.Errorf("error generating CDI specification: %v", err) } else if err != nil { log.Errorf("Ignoring error: %v", fmt.Errorf("error generating CDI specification: %v", err)) } return nil } // installContainerLibraries locates and installs the libraries that are part of // the nvidia-container-toolkit. // A predefined set of library candidates are considered, with the first one // resulting in success being installed to the toolkit folder. The install process // resolves the symlink for the library and copies the versioned library itself. func installContainerLibraries(toolkitRoot string) error { log.Infof("Installing NVIDIA container library to '%v'", toolkitRoot) libs := []string{ "libnvidia-container.so.1", "libnvidia-container-go.so.1", } for _, l := range libs { err := installLibrary(l, toolkitRoot) if err != nil { return fmt.Errorf("failed to install %s: %v", l, err) } } return nil } // installLibrary installs the specified library to the toolkit directory. func installLibrary(libName string, toolkitRoot string) error { libraryPath, err := findLibrary("", libName) if err != nil { return fmt.Errorf("error locating NVIDIA container library: %v", err) } installedLibPath, err := installFileToFolder(toolkitRoot, libraryPath) if err != nil { return fmt.Errorf("error installing %v to %v: %v", libraryPath, toolkitRoot, err) } log.Infof("Installed '%v' to '%v'", libraryPath, installedLibPath) if filepath.Base(installedLibPath) == libName { return nil } err = installSymlink(toolkitRoot, libName, installedLibPath) if err != nil { return fmt.Errorf("error installing symlink for NVIDIA container library: %v", err) } return nil } // installToolkitConfig installs the config file for the NVIDIA container toolkit ensuring // that the settings are updated to match the desired install and nvidia driver directories. func installToolkitConfig(c *cli.Context, toolkitConfigPath string, nvidiaContainerCliExecutablePath string, nvidiaCTKPath string, nvidaContainerRuntimeHookPath string, opts *Options) error { log.Infof("Installing NVIDIA container toolkit config '%v'", toolkitConfigPath) cfg, err := config.New( config.WithConfigFile(nvidiaContainerToolkitConfigSource), ) if err != nil { return fmt.Errorf("could not open source config file: %v", err) } targetConfig, err := os.Create(toolkitConfigPath) if err != nil { return fmt.Errorf("could not create target config file: %v", err) } defer targetConfig.Close() // Read the ldconfig path from the config as this may differ per platform // On ubuntu-based systems this ends in `.real` ldconfigPath := fmt.Sprintf("%s", cfg.GetDefault("nvidia-container-cli.ldconfig", "/sbin/ldconfig")) // Use the driver run root as the root: driverLdconfigPath := config.NormalizeLDConfigPath("@" + filepath.Join(opts.DriverRoot, strings.TrimPrefix(ldconfigPath, "@/"))) configValues := map[string]interface{}{ // Set the options in the root toml table "accept-nvidia-visible-devices-envvar-when-unprivileged": opts.acceptNVIDIAVisibleDevicesWhenUnprivileged, "accept-nvidia-visible-devices-as-volume-mounts": opts.acceptNVIDIAVisibleDevicesAsVolumeMounts, // Set the nvidia-container-cli options "nvidia-container-cli.root": opts.DriverRoot, "nvidia-container-cli.path": nvidiaContainerCliExecutablePath, "nvidia-container-cli.ldconfig": driverLdconfigPath, // Set nvidia-ctk options "nvidia-ctk.path": nvidiaCTKPath, // Set the nvidia-container-runtime-hook options "nvidia-container-runtime-hook.path": nvidaContainerRuntimeHookPath, "nvidia-container-runtime-hook.skip-mode-detection": opts.ContainerRuntimeHookSkipModeDetection, } toolkitRuntimeList := opts.ContainerRuntimeRuntimes.Value() if len(toolkitRuntimeList) > 0 { configValues["nvidia-container-runtime.runtimes"] = toolkitRuntimeList } for _, optInFeature := range opts.optInFeatures.Value() { configValues["features."+optInFeature] = true } for key, value := range configValues { cfg.Set(key, value) } // Set the optional config options optionalConfigValues := map[string]interface{}{ "nvidia-container-runtime.debug": opts.ContainerRuntimeDebug, "nvidia-container-runtime.log-level": opts.ContainerRuntimeLogLevel, "nvidia-container-runtime.mode": opts.ContainerRuntimeMode, "nvidia-container-runtime.modes.cdi.annotation-prefixes": opts.ContainerRuntimeModesCDIAnnotationPrefixes, "nvidia-container-runtime.modes.cdi.default-kind": opts.ContainerRuntimeModesCdiDefaultKind, "nvidia-container-runtime.runtimes": opts.ContainerRuntimeRuntimes, "nvidia-container-cli.debug": opts.ContainerCLIDebug, } for key, value := range optionalConfigValues { if !c.IsSet(key) { log.Infof("Skipping unset option: %v", key) continue } if value == nil { log.Infof("Skipping option with nil value: %v", key) continue } switch v := value.(type) { case string: if v == "" { continue } case cli.StringSlice: if len(v.Value()) == 0 { continue } value = v.Value() default: log.Warningf("Unexpected type for option %v=%v: %T", key, value, v) } cfg.Set(key, value) } if _, err := cfg.WriteTo(targetConfig); err != nil { return fmt.Errorf("error writing config: %v", err) } os.Stdout.WriteString("Using config:\n") if _, err = cfg.WriteTo(os.Stdout); err != nil { log.Warningf("Failed to output config to STDOUT: %v", err) } return nil } // installContainerToolkitCLI installs the nvidia-ctk CLI executable and wrapper. func installContainerToolkitCLI(toolkitDir string) (string, error) { e := executable{ source: "/usr/bin/nvidia-ctk", target: executableTarget{ dotfileName: "nvidia-ctk.real", wrapperName: "nvidia-ctk", }, } return e.install(toolkitDir) } // installContainerCDIHookCLI installs the nvidia-cdi-hook CLI executable and wrapper. func installContainerCDIHookCLI(toolkitDir string) (string, error) { e := executable{ source: "/usr/bin/nvidia-cdi-hook", target: executableTarget{ dotfileName: "nvidia-cdi-hook.real", wrapperName: "nvidia-cdi-hook", }, } return e.install(toolkitDir) } // installContainerCLI sets up the NVIDIA container CLI executable, copying the executable // and implementing the required wrapper func installContainerCLI(toolkitRoot string) (string, error) { log.Infof("Installing NVIDIA container CLI from '%v'", nvidiaContainerCliSource) env := map[string]string{ "LD_LIBRARY_PATH": toolkitRoot, } e := executable{ source: nvidiaContainerCliSource, target: executableTarget{ dotfileName: "nvidia-container-cli.real", wrapperName: "nvidia-container-cli", }, env: env, } installedPath, err := e.install(toolkitRoot) if err != nil { return "", fmt.Errorf("error installing NVIDIA container CLI: %v", err) } return installedPath, nil } // installRuntimeHook sets up the NVIDIA runtime hook, copying the executable // and implementing the required wrapper func installRuntimeHook(toolkitRoot string, configFilePath string) (string, error) { log.Infof("Installing NVIDIA container runtime hook from '%v'", nvidiaContainerRuntimeHookSource) argLines := []string{ fmt.Sprintf("-config \"%s\"", configFilePath), } e := executable{ source: nvidiaContainerRuntimeHookSource, target: executableTarget{ dotfileName: "nvidia-container-runtime-hook.real", wrapperName: "nvidia-container-runtime-hook", }, argLines: argLines, } installedPath, err := e.install(toolkitRoot) if err != nil { return "", fmt.Errorf("error installing NVIDIA container runtime hook: %v", err) } err = installSymlink(toolkitRoot, "nvidia-container-toolkit", installedPath) if err != nil { return "", fmt.Errorf("error installing symlink to NVIDIA container runtime hook: %v", err) } return installedPath, nil } // installSymlink creates a symlink in the toolkitDirectory that points to the specified target. // Note: The target is assumed to be local to the toolkit directory func installSymlink(toolkitRoot string, link string, target string) error { symlinkPath := filepath.Join(toolkitRoot, link) targetPath := filepath.Base(target) log.Infof("Creating symlink '%v' -> '%v'", symlinkPath, targetPath) err := os.Symlink(targetPath, symlinkPath) if err != nil { return fmt.Errorf("error creating symlink '%v' => '%v': %v", symlinkPath, targetPath, err) } return nil } // installFileToFolder copies a source file to a destination folder. // The path of the input file is ignored. // e.g. installFileToFolder("/some/path/file.txt", "/output/path") // will result in a file "/output/path/file.txt" being generated func installFileToFolder(destFolder string, src string) (string, error) { name := filepath.Base(src) return installFileToFolderWithName(destFolder, name, src) } // cp src destFolder/name func installFileToFolderWithName(destFolder string, name, src string) (string, error) { dest := filepath.Join(destFolder, name) err := installFile(dest, src) if err != nil { return "", fmt.Errorf("error copying '%v' to '%v': %v", src, dest, err) } return dest, nil } // installFile copies a file from src to dest and maintains // file modes func installFile(dest string, src string) error { log.Infof("Installing '%v' to '%v'", src, dest) source, err := os.Open(src) if err != nil { return fmt.Errorf("error opening source: %v", err) } defer source.Close() destination, err := os.Create(dest) if err != nil { return fmt.Errorf("error creating destination: %v", err) } defer destination.Close() _, err = io.Copy(destination, source) if err != nil { return fmt.Errorf("error copying file: %v", err) } err = applyModeFromSource(dest, src) if err != nil { return fmt.Errorf("error setting destination file mode: %v", err) } return nil } // applyModeFromSource sets the file mode for a destination file // to match that of a specified source file func applyModeFromSource(dest string, src string) error { sourceInfo, err := os.Stat(src) if err != nil { return fmt.Errorf("error getting file info for '%v': %v", src, err) } err = os.Chmod(dest, sourceInfo.Mode()) if err != nil { return fmt.Errorf("error setting mode for '%v': %v", dest, err) } return nil } // findLibrary searches a set of candidate libraries in the specified root for // a given library name func findLibrary(root string, libName string) (string, error) { log.Infof("Finding library %v (root=%v)", libName, root) candidateDirs := []string{ "/usr/lib64", "/usr/lib/x86_64-linux-gnu", "/usr/lib/aarch64-linux-gnu", } for _, d := range candidateDirs { l := filepath.Join(root, d, libName) log.Infof("Checking library candidate '%v'", l) libraryCandidate, err := resolveLink(l) if err != nil { log.Infof("Skipping library candidate '%v': %v", l, err) continue } return libraryCandidate, nil } return "", fmt.Errorf("error locating library '%v'", libName) } // resolveLink finds the target of a symlink or the file itself in the // case of a regular file. // This is equivalent to running `readlink -f ${l}` func resolveLink(l string) (string, error) { resolved, err := filepath.EvalSymlinks(l) if err != nil { return "", fmt.Errorf("error resolving link '%v': %v", l, err) } if l != resolved { log.Infof("Resolved link: '%v' => '%v'", l, resolved) } return resolved, nil } func createDirectories(dir ...string) error { for _, d := range dir { log.Infof("Creating directory '%v'", d) err := os.MkdirAll(d, 0755) if err != nil { return fmt.Errorf("error creating directory: %v", err) } } return nil } func createDeviceNodes(opts *Options) error { modes := opts.createDeviceNodes.Value() if len(modes) == 0 { return nil } devices, err := nvdevices.New( nvdevices.WithDevRoot(opts.DevRootCtrPath), ) if err != nil { return fmt.Errorf("failed to create library: %v", err) } for _, mode := range modes { log.Infof("Creating %v device nodes at %v", mode, opts.DevRootCtrPath) if mode != "control" { log.Warningf("Unrecognised device mode: %v", mode) continue } if err := devices.CreateNVIDIAControlDevices(); err != nil { return fmt.Errorf("failed to create control device nodes: %v", err) } } return nil } // generateCDISpec generates a CDI spec for use in management containers func generateCDISpec(opts *Options, nvidiaCDIHookPath string) error { if !opts.CDI.Enabled { return nil } log.Info("Generating CDI spec for management containers") cdilib, err := nvcdi.New( nvcdi.WithMode(nvcdi.ModeManagement), nvcdi.WithDriverRoot(opts.DriverRootCtrPath), nvcdi.WithDevRoot(opts.DevRootCtrPath), nvcdi.WithNVIDIACDIHookPath(nvidiaCDIHookPath), nvcdi.WithVendor(opts.CDI.vendor), nvcdi.WithClass(opts.CDI.class), ) if err != nil { return fmt.Errorf("failed to create CDI library for management containers: %v", err) } spec, err := cdilib.GetSpec() if err != nil { return fmt.Errorf("failed to genereate CDI spec for management containers: %v", err) } transformer := transformroot.NewDriverTransformer( transformroot.WithDriverRoot(opts.DriverRootCtrPath), transformroot.WithTargetDriverRoot(opts.DriverRoot), transformroot.WithDevRoot(opts.DevRootCtrPath), transformroot.WithTargetDevRoot(opts.DevRoot), ) if err := transformer.Transform(spec.Raw()); err != nil { return fmt.Errorf("failed to transform driver root in CDI spec: %v", err) } name, err := cdi.GenerateNameForSpec(spec.Raw()) if err != nil { return fmt.Errorf("failed to generate CDI name for management containers: %v", err) } err = spec.Save(filepath.Join(opts.CDI.outputDir, name)) if err != nil { return fmt.Errorf("failed to save CDI spec for management containers: %v", err) } return nil }