2021-10-11 14:31:02 +00:00
/ * *
# Copyright ( c ) 2021 , NVIDIA CORPORATION . All rights reserved .
#
# Licensed under the Apache License , Version 2.0 ( the "License" ) ;
# you may not use this file except in compliance with the License .
# You may obtain a copy of the License at
#
# http : //www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing , software
# distributed under the License is distributed on an "AS IS" BASIS ,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND , either express or implied .
# See the License for the specific language governing permissions and
# limitations under the License .
* /
2024-09-19 11:35:42 +00:00
package toolkit
2021-10-11 14:31:02 +00:00
import (
2024-06-14 12:15:54 +00:00
"errors"
2021-10-11 14:31:02 +00:00
"fmt"
"os"
"path/filepath"
"strings"
log "github.com/sirupsen/logrus"
"github.com/urfave/cli/v2"
2023-11-01 11:40:51 +00:00
"tags.cncf.io/container-device-interface/pkg/cdi"
"tags.cncf.io/container-device-interface/pkg/parser"
2023-12-01 01:10:10 +00:00
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
"github.com/NVIDIA/nvidia-container-toolkit/internal/system/nvdevices"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi"
transformroot "github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/transform/root"
2024-09-04 10:02:00 +00:00
"github.com/NVIDIA/nvidia-container-toolkit/tools/container/toolkit/installer"
2021-10-11 14:31:02 +00:00
)
const (
// DefaultNvidiaDriverRoot specifies the default NVIDIA driver run directory
DefaultNvidiaDriverRoot = "/run/nvidia/driver"
nvidiaContainerToolkitConfigSource = "/etc/nvidia-container-runtime/config.toml"
configFilename = "config.toml"
2024-06-14 12:15:54 +00:00
toolkitPidFilename = "toolkit.pid"
2021-10-11 14:31:02 +00:00
)
2024-09-19 11:35:42 +00:00
type Options struct {
2023-03-07 14:17:49 +00:00
DriverRoot string
2024-02-14 09:53:38 +00:00
DevRoot string
2023-03-07 14:17:49 +00:00
DriverRootCtrPath string
2024-02-14 09:53:38 +00:00
DevRootCtrPath string
2023-03-07 14:17:49 +00:00
2023-03-23 18:40:19 +00:00
ContainerRuntimeMode string
ContainerRuntimeDebug string
ContainerRuntimeLogLevel string
ContainerRuntimeModesCdiDefaultKind string
ContainerRuntimeModesCDIAnnotationPrefixes cli . StringSlice
2023-03-07 14:17:49 +00:00
2023-03-28 15:39:17 +00:00
ContainerRuntimeRuntimes cli . StringSlice
2023-03-09 07:49:50 +00:00
ContainerRuntimeHookSkipModeDetection bool
2023-03-07 14:17:49 +00:00
ContainerCLIDebug string
2022-07-25 08:31:31 +00:00
2023-03-13 16:18:54 +00:00
cdiEnabled bool
2023-03-01 10:44:32 +00:00
cdiOutputDir string
cdiKind string
cdiVendor string
cdiClass string
2024-02-09 13:08:22 +00:00
createDeviceNodes cli . StringSlice
2022-07-25 08:31:31 +00:00
acceptNVIDIAVisibleDevicesWhenUnprivileged bool
acceptNVIDIAVisibleDevicesAsVolumeMounts bool
2023-03-28 14:20:27 +00:00
ignoreErrors bool
2024-09-18 20:20:56 +00:00
optInFeatures cli . StringSlice
2024-09-04 10:02:00 +00:00
packageType string
2022-07-25 08:01:33 +00:00
}
2021-10-11 14:31:02 +00:00
2024-09-19 11:35:42 +00:00
func Flags ( opts * Options ) [ ] cli . Flag {
2021-10-11 14:31:02 +00:00
flags := [ ] cli . Flag {
& cli . StringFlag {
2024-02-09 13:28:02 +00:00
Name : "driver-root" ,
Aliases : [ ] string { "nvidia-driver-root" } ,
2021-10-11 14:31:02 +00:00
Value : DefaultNvidiaDriverRoot ,
2022-07-25 08:01:33 +00:00
Destination : & opts . DriverRoot ,
2024-02-09 13:28:02 +00:00
EnvVars : [ ] string { "NVIDIA_DRIVER_ROOT" , "DRIVER_ROOT" } ,
2021-10-11 14:31:02 +00:00
} ,
2023-03-01 10:44:32 +00:00
& cli . StringFlag {
Name : "driver-root-ctr-path" ,
Value : DefaultNvidiaDriverRoot ,
Destination : & opts . DriverRootCtrPath ,
EnvVars : [ ] string { "DRIVER_ROOT_CTR_PATH" } ,
} ,
2024-02-14 09:53:38 +00:00
& cli . StringFlag {
Name : "dev-root" ,
Usage : "Specify the root where `/dev` is located. If this is not specified, the driver-root is assumed." ,
Destination : & opts . DevRoot ,
EnvVars : [ ] string { "NVIDIA_DEV_ROOT" , "DEV_ROOT" } ,
} ,
& cli . StringFlag {
Name : "dev-root-ctr-path" ,
Usage : "Specify the root where `/dev` is located in the container. If this is not specified, the driver-root-ctr-path is assumed." ,
Destination : & opts . DevRootCtrPath ,
EnvVars : [ ] string { "DEV_ROOT_CTR_PATH" } ,
} ,
2021-10-11 14:31:02 +00:00
& cli . StringFlag {
2023-03-23 18:40:19 +00:00
Name : "nvidia-container-runtime.debug" ,
Aliases : [ ] string { "nvidia-container-runtime-debug" } ,
2021-10-11 14:31:02 +00:00
Usage : "Specify the location of the debug log file for the NVIDIA Container Runtime" ,
2022-07-25 08:01:33 +00:00
Destination : & opts . ContainerRuntimeDebug ,
2021-10-11 14:31:02 +00:00
EnvVars : [ ] string { "NVIDIA_CONTAINER_RUNTIME_DEBUG" } ,
} ,
& cli . StringFlag {
2023-03-23 18:40:19 +00:00
Name : "nvidia-container-runtime.log-level" ,
Aliases : [ ] string { "nvidia-container-runtime-debug-log-level" } ,
2022-07-25 08:01:33 +00:00
Destination : & opts . ContainerRuntimeLogLevel ,
2021-10-11 14:31:02 +00:00
EnvVars : [ ] string { "NVIDIA_CONTAINER_RUNTIME_LOG_LEVEL" } ,
} ,
2023-02-17 16:04:46 +00:00
& cli . StringFlag {
2023-03-23 18:40:19 +00:00
Name : "nvidia-container-runtime.mode" ,
Aliases : [ ] string { "nvidia-container-runtime-mode" } ,
2023-02-17 16:04:46 +00:00
Destination : & opts . ContainerRuntimeMode ,
EnvVars : [ ] string { "NVIDIA_CONTAINER_RUNTIME_MODE" } ,
} ,
2023-03-07 14:17:49 +00:00
& cli . StringFlag {
2023-03-23 18:40:19 +00:00
Name : "nvidia-container-runtime.modes.cdi.default-kind" ,
2023-03-07 14:17:49 +00:00
Destination : & opts . ContainerRuntimeModesCdiDefaultKind ,
EnvVars : [ ] string { "NVIDIA_CONTAINER_RUNTIME_MODES_CDI_DEFAULT_KIND" } ,
} ,
2023-03-23 18:40:19 +00:00
& cli . StringSliceFlag {
Name : "nvidia-container-runtime.modes.cdi.annotation-prefixes" ,
Destination : & opts . ContainerRuntimeModesCDIAnnotationPrefixes ,
EnvVars : [ ] string { "NVIDIA_CONTAINER_RUNTIME_MODES_CDI_ANNOTATION_PREFIXES" } ,
} ,
2023-03-28 15:39:17 +00:00
& cli . StringSliceFlag {
Name : "nvidia-container-runtime.runtimes" ,
Destination : & opts . ContainerRuntimeRuntimes ,
EnvVars : [ ] string { "NVIDIA_CONTAINER_RUNTIME_RUNTIMES" } ,
} ,
2023-03-09 07:49:50 +00:00
& cli . BoolFlag {
Name : "nvidia-container-runtime-hook.skip-mode-detection" ,
Value : true ,
Destination : & opts . ContainerRuntimeHookSkipModeDetection ,
EnvVars : [ ] string { "NVIDIA_CONTAINER_RUNTIME_HOOK_SKIP_MODE_DETECTION" } ,
} ,
2021-10-11 14:31:02 +00:00
& cli . StringFlag {
2023-03-23 18:40:19 +00:00
Name : "nvidia-container-cli.debug" ,
Aliases : [ ] string { "nvidia-container-cli-debug" } ,
2021-10-11 14:31:02 +00:00
Usage : "Specify the location of the debug log file for the NVIDIA Container CLI" ,
2022-07-25 08:01:33 +00:00
Destination : & opts . ContainerCLIDebug ,
2021-10-11 14:31:02 +00:00
EnvVars : [ ] string { "NVIDIA_CONTAINER_CLI_DEBUG" } ,
} ,
2022-07-25 08:31:31 +00:00
& cli . BoolFlag {
Name : "accept-nvidia-visible-devices-envvar-when-unprivileged" ,
Usage : "Set the accept-nvidia-visible-devices-envvar-when-unprivileged config option" ,
2022-08-09 08:50:51 +00:00
Value : true ,
2022-07-25 08:31:31 +00:00
Destination : & opts . acceptNVIDIAVisibleDevicesWhenUnprivileged ,
EnvVars : [ ] string { "ACCEPT_NVIDIA_VISIBLE_DEVICES_ENVVAR_WHEN_UNPRIVILEGED" } ,
} ,
& cli . BoolFlag {
Name : "accept-nvidia-visible-devices-as-volume-mounts" ,
Usage : "Set the accept-nvidia-visible-devices-as-volume-mounts config option" ,
2022-08-09 08:27:51 +00:00
Destination : & opts . acceptNVIDIAVisibleDevicesAsVolumeMounts ,
2022-07-25 08:31:31 +00:00
EnvVars : [ ] string { "ACCEPT_NVIDIA_VISIBLE_DEVICES_AS_VOLUME_MOUNTS" } ,
} ,
2023-03-13 16:18:54 +00:00
& cli . BoolFlag {
Name : "cdi-enabled" ,
Aliases : [ ] string { "enable-cdi" } ,
Usage : "enable the generation of a CDI specification" ,
Destination : & opts . cdiEnabled ,
EnvVars : [ ] string { "CDI_ENABLED" , "ENABLE_CDI" } ,
} ,
2023-03-01 10:44:32 +00:00
& cli . StringFlag {
Name : "cdi-output-dir" ,
Usage : "the directory where the CDI output files are to be written. If this is set to '', no CDI specification is generated." ,
Value : "/var/run/cdi" ,
Destination : & opts . cdiOutputDir ,
2023-03-09 10:25:05 +00:00
EnvVars : [ ] string { "CDI_OUTPUT_DIR" } ,
2023-03-01 10:44:32 +00:00
} ,
& cli . StringFlag {
Name : "cdi-kind" ,
Usage : "the vendor string to use for the generated CDI specification" ,
Value : "management.nvidia.com/gpu" ,
Destination : & opts . cdiKind ,
2023-03-09 10:25:05 +00:00
EnvVars : [ ] string { "CDI_KIND" } ,
2023-03-01 10:44:32 +00:00
} ,
2023-03-28 14:20:27 +00:00
& cli . BoolFlag {
Name : "ignore-errors" ,
Usage : "ignore errors when installing the NVIDIA Container toolkit. This is used for testing purposes only." ,
Hidden : true ,
Destination : & opts . ignoreErrors ,
} ,
2024-02-09 13:08:22 +00:00
& cli . StringSliceFlag {
Name : "create-device-nodes" ,
Usage : "(Only applicable with --cdi-enabled) specifies which device nodes should be created. If any one of the options is set to '' or 'none', no device nodes will be created." ,
Value : cli . NewStringSlice ( "control" ) ,
Destination : & opts . createDeviceNodes ,
EnvVars : [ ] string { "CREATE_DEVICE_NODES" } ,
} ,
2024-09-18 20:20:56 +00:00
& cli . StringSliceFlag {
Name : "opt-in-features" ,
Hidden : true ,
Destination : & opts . optInFeatures ,
EnvVars : [ ] string { "NVIDIA_CONTAINER_TOOLKIT_OPT_IN_FEATURES" } ,
} ,
2024-09-04 10:02:00 +00:00
& cli . StringFlag {
Name : "package-type" ,
Usage : "specify the package type to use. One of ['deb', 'rpm', 'auto', '']. If 'auto' or '' are used, the type is inferred automatically." ,
Value : "auto" ,
Destination : & opts . packageType ,
EnvVars : [ ] string { "PACKAGE_TYPE" } ,
} ,
2021-10-11 14:31:02 +00:00
}
2024-09-19 11:35:42 +00:00
return flags
2021-10-11 14:31:02 +00:00
}
2024-09-19 11:35:42 +00:00
// ValidateOptions checks whether the specified options are valid
func ValidateOptions ( opts * Options , toolkitRoot string ) error {
if toolkitRoot == "" {
return fmt . Errorf ( "invalid --toolkit-root option: %v" , toolkitRoot )
2021-10-11 14:31:02 +00:00
}
2023-08-25 14:15:30 +00:00
vendor , class := parser . ParseQualifier ( opts . cdiKind )
if err := parser . ValidateVendorName ( vendor ) ; err != nil {
2023-03-01 10:44:32 +00:00
return fmt . Errorf ( "invalid CDI vendor name: %v" , err )
}
2023-08-25 14:15:30 +00:00
if err := parser . ValidateClassName ( class ) ; err != nil {
2023-03-01 10:44:32 +00:00
return fmt . Errorf ( "invalid CDI class name: %v" , err )
}
opts . cdiVendor = vendor
opts . cdiClass = class
2024-02-09 13:08:22 +00:00
if opts . cdiEnabled && opts . cdiOutputDir == "" {
log . Warning ( "Skipping CDI spec generation (no output directory specified)" )
opts . cdiEnabled = false
}
isDisabled := false
for _ , mode := range opts . createDeviceNodes . Value ( ) {
if mode != "" && mode != "none" && mode != "control" {
return fmt . Errorf ( "invalid --create-device-nodes value: %v" , mode )
}
if mode == "" || mode == "none" {
isDisabled = true
break
}
}
if ! opts . cdiEnabled && ! isDisabled {
log . Info ( "disabling device node creation since --cdi-enabled=false" )
isDisabled = true
}
if isDisabled {
opts . createDeviceNodes = * cli . NewStringSlice ( )
}
2021-10-11 14:31:02 +00:00
return nil
}
2024-06-14 12:15:54 +00:00
// TryDelete attempts to remove the specified toolkit folder.
// A toolkit.pid file -- if present -- is skipped.
2024-09-19 11:35:42 +00:00
func TryDelete ( cli * cli . Context , toolkitRoot string ) error {
log . Infof ( "Attempting to delete NVIDIA container toolkit from '%v'" , toolkitRoot )
2024-06-14 12:15:54 +00:00
2024-09-19 11:35:42 +00:00
contents , err := os . ReadDir ( toolkitRoot )
2024-06-14 12:15:54 +00:00
if err != nil && errors . Is ( err , os . ErrNotExist ) {
return nil
} else if err != nil {
2024-09-19 11:35:42 +00:00
return fmt . Errorf ( "failed to read the contents of %v: %w" , toolkitRoot , err )
2024-06-14 12:15:54 +00:00
}
for _ , content := range contents {
if content . Name ( ) == toolkitPidFilename {
continue
}
2024-09-19 11:35:42 +00:00
name := filepath . Join ( toolkitRoot , content . Name ( ) )
2024-06-14 12:15:54 +00:00
if err := os . RemoveAll ( name ) ; err != nil {
log . Warningf ( "could not remove %v: %v" , name , err )
}
}
2024-09-19 11:35:42 +00:00
if err := os . RemoveAll ( toolkitRoot ) ; err != nil {
log . Warningf ( "could not remove %v: %v" , toolkitRoot , err )
2021-10-11 14:31:02 +00:00
}
return nil
}
// Install installs the components of the NVIDIA container toolkit.
// Any existing installation is removed.
2024-09-04 10:02:00 +00:00
func Install ( cli * cli . Context , opts * Options , toolkitRoot string , hostRoot string ) error {
2024-09-19 11:35:42 +00:00
log . Infof ( "Installing NVIDIA container toolkit to '%v'" , toolkitRoot )
2021-10-11 14:31:02 +00:00
log . Infof ( "Removing existing NVIDIA container toolkit installation" )
2024-09-19 11:35:42 +00:00
err := os . RemoveAll ( toolkitRoot )
2023-03-28 14:20:27 +00:00
if err != nil && ! opts . ignoreErrors {
2021-10-11 14:31:02 +00:00
return fmt . Errorf ( "error removing toolkit directory: %v" , err )
2023-03-28 14:20:27 +00:00
} else if err != nil {
log . Errorf ( "Ignoring error: %v" , fmt . Errorf ( "error removing toolkit directory: %v" , err ) )
2021-10-11 14:31:02 +00:00
}
2024-09-19 11:35:42 +00:00
toolkitConfigDir := filepath . Join ( toolkitRoot , ".config" , "nvidia-container-runtime" )
2021-10-11 14:31:02 +00:00
toolkitConfigPath := filepath . Join ( toolkitConfigDir , configFilename )
2024-09-19 11:35:42 +00:00
err = createDirectories ( toolkitRoot , toolkitConfigDir )
2023-03-28 14:20:27 +00:00
if err != nil && ! opts . ignoreErrors {
2021-10-11 14:31:02 +00:00
return fmt . Errorf ( "could not create required directories: %v" , err )
2023-03-28 14:20:27 +00:00
} else if err != nil {
log . Errorf ( "Ignoring error: %v" , fmt . Errorf ( "could not create required directories: %v" , err ) )
2021-10-11 14:31:02 +00:00
}
2024-09-04 10:02:00 +00:00
toolkit , err := installer . New (
installer . WithHostRoot ( hostRoot ) ,
installer . WithPackageType ( opts . packageType ) ,
)
if err != nil {
if ! opts . ignoreErrors {
return fmt . Errorf ( "could not create toolkit installer: %w" , err )
}
log . Errorf ( "Ignoring error: %v" , fmt . Errorf ( "could not create toolkit installer: %w" , err ) )
2021-10-11 14:31:02 +00:00
}
2024-09-04 10:02:00 +00:00
if err := toolkit . Install ( toolkitRoot ) ; err != nil {
if ! opts . ignoreErrors {
return fmt . Errorf ( "could not install toolkit components: %w" , err )
}
log . Errorf ( "Ignoring error: %v" , fmt . Errorf ( "could not install toolkit components: %w" , err ) )
2024-04-24 08:47:45 +00:00
}
2024-09-04 10:02:00 +00:00
// TODO: The toolkit config installation should also use the installer.
nvidiaContainerCliExecutable := filepath . Join ( toolkitRoot , "nvidia-container-cli" )
nvidiaCTKPath := filepath . Join ( toolkitRoot , "nvidia-ctk" )
nvidiaCDIHookPath := filepath . Join ( toolkitRoot , "nvidia-cdi-hook" )
nvidiaContainerRuntimeHookPath := filepath . Join ( toolkitRoot , "nvidia-container-runtime-hook" )
2023-05-24 08:34:01 +00:00
err = installToolkitConfig ( cli , toolkitConfigPath , nvidiaContainerCliExecutable , nvidiaCTKPath , nvidiaContainerRuntimeHookPath , opts )
2023-03-28 14:20:27 +00:00
if err != nil && ! opts . ignoreErrors {
2023-03-09 15:39:12 +00:00
return fmt . Errorf ( "error installing NVIDIA container toolkit config: %v" , err )
2023-03-28 14:20:27 +00:00
} else if err != nil {
log . Errorf ( "Ignoring error: %v" , fmt . Errorf ( "error installing NVIDIA container toolkit config: %v" , err ) )
2023-03-01 12:51:11 +00:00
}
2024-02-09 13:08:22 +00:00
err = createDeviceNodes ( opts )
if err != nil && ! opts . ignoreErrors {
return fmt . Errorf ( "error creating device nodes: %v" , err )
} else if err != nil {
log . Errorf ( "Ignoring error: %v" , fmt . Errorf ( "error creating device nodes: %v" , err ) )
}
2024-04-24 08:47:45 +00:00
err = generateCDISpec ( opts , nvidiaCDIHookPath )
2024-02-09 13:17:27 +00:00
if err != nil && ! opts . ignoreErrors {
return fmt . Errorf ( "error generating CDI specification: %v" , err )
} else if err != nil {
log . Errorf ( "Ignoring error: %v" , fmt . Errorf ( "error generating CDI specification: %v" , err ) )
}
return nil
2021-10-11 14:31:02 +00:00
}
// installToolkitConfig installs the config file for the NVIDIA container toolkit ensuring
// that the settings are updated to match the desired install and nvidia driver directories.
2024-09-19 11:35:42 +00:00
func installToolkitConfig ( c * cli . Context , toolkitConfigPath string , nvidiaContainerCliExecutablePath string , nvidiaCTKPath string , nvidaContainerRuntimeHookPath string , opts * Options ) error {
2021-10-11 14:31:02 +00:00
log . Infof ( "Installing NVIDIA container toolkit config '%v'" , toolkitConfigPath )
2024-08-08 22:40:00 +00:00
cfg , err := config . New (
config . WithConfigFile ( nvidiaContainerToolkitConfigSource ) ,
)
2021-10-11 14:31:02 +00:00
if err != nil {
return fmt . Errorf ( "could not open source config file: %v" , err )
}
targetConfig , err := os . Create ( toolkitConfigPath )
if err != nil {
return fmt . Errorf ( "could not create target config file: %v" , err )
}
defer targetConfig . Close ( )
// Read the ldconfig path from the config as this may differ per platform
// On ubuntu-based systems this ends in `.real`
2023-11-14 15:56:50 +00:00
ldconfigPath := fmt . Sprintf ( "%s" , cfg . GetDefault ( "nvidia-container-cli.ldconfig" , "/sbin/ldconfig" ) )
2021-10-11 14:31:02 +00:00
// Use the driver run root as the root:
2023-11-14 15:56:50 +00:00
driverLdconfigPath := config . NormalizeLDConfigPath ( "@" + filepath . Join ( opts . DriverRoot , strings . TrimPrefix ( ldconfigPath , "@/" ) ) )
2021-10-11 14:31:02 +00:00
2023-03-23 18:51:00 +00:00
configValues := map [ string ] interface { } {
// Set the options in the root toml table
"accept-nvidia-visible-devices-envvar-when-unprivileged" : opts . acceptNVIDIAVisibleDevicesWhenUnprivileged ,
"accept-nvidia-visible-devices-as-volume-mounts" : opts . acceptNVIDIAVisibleDevicesAsVolumeMounts ,
// Set the nvidia-container-cli options
"nvidia-container-cli.root" : opts . DriverRoot ,
"nvidia-container-cli.path" : nvidiaContainerCliExecutablePath ,
"nvidia-container-cli.ldconfig" : driverLdconfigPath ,
// Set nvidia-ctk options
"nvidia-ctk.path" : nvidiaCTKPath ,
// Set the nvidia-container-runtime-hook options
2023-05-24 08:34:01 +00:00
"nvidia-container-runtime-hook.path" : nvidaContainerRuntimeHookPath ,
2023-03-23 18:51:00 +00:00
"nvidia-container-runtime-hook.skip-mode-detection" : opts . ContainerRuntimeHookSkipModeDetection ,
}
2024-08-08 22:40:00 +00:00
toolkitRuntimeList := opts . ContainerRuntimeRuntimes . Value ( )
if len ( toolkitRuntimeList ) > 0 {
configValues [ "nvidia-container-runtime.runtimes" ] = toolkitRuntimeList
}
2024-09-18 20:20:56 +00:00
for _ , optInFeature := range opts . optInFeatures . Value ( ) {
configValues [ "features." + optInFeature ] = true
}
2023-03-23 18:51:00 +00:00
for key , value := range configValues {
2023-11-14 15:56:50 +00:00
cfg . Set ( key , value )
2023-03-23 18:51:00 +00:00
}
2021-10-11 14:31:02 +00:00
2023-03-23 18:40:19 +00:00
// Set the optional config options
optionalConfigValues := map [ string ] interface { } {
"nvidia-container-runtime.debug" : opts . ContainerRuntimeDebug ,
"nvidia-container-runtime.log-level" : opts . ContainerRuntimeLogLevel ,
"nvidia-container-runtime.mode" : opts . ContainerRuntimeMode ,
"nvidia-container-runtime.modes.cdi.annotation-prefixes" : opts . ContainerRuntimeModesCDIAnnotationPrefixes ,
"nvidia-container-runtime.modes.cdi.default-kind" : opts . ContainerRuntimeModesCdiDefaultKind ,
2023-03-28 15:39:17 +00:00
"nvidia-container-runtime.runtimes" : opts . ContainerRuntimeRuntimes ,
2023-03-23 18:40:19 +00:00
"nvidia-container-cli.debug" : opts . ContainerCLIDebug ,
2021-10-11 14:31:02 +00:00
}
2024-09-18 20:20:56 +00:00
2023-03-23 18:40:19 +00:00
for key , value := range optionalConfigValues {
if ! c . IsSet ( key ) {
log . Infof ( "Skipping unset option: %v" , key )
continue
}
if value == nil {
log . Infof ( "Skipping option with nil value: %v" , key )
2021-10-11 14:31:02 +00:00
continue
}
2023-03-23 18:40:19 +00:00
switch v := value . ( type ) {
case string :
if v == "" {
continue
}
case cli . StringSlice :
if len ( v . Value ( ) ) == 0 {
continue
}
value = v . Value ( )
default :
2023-06-06 19:46:38 +00:00
log . Warningf ( "Unexpected type for option %v=%v: %T" , key , value , v )
2023-03-23 18:40:19 +00:00
}
2023-03-09 15:39:12 +00:00
2023-11-14 15:56:50 +00:00
cfg . Set ( key , value )
2021-10-11 14:31:02 +00:00
}
2023-03-09 07:49:50 +00:00
2023-11-14 15:56:50 +00:00
if _ , err := cfg . WriteTo ( targetConfig ) ; err != nil {
2021-10-11 14:31:02 +00:00
return fmt . Errorf ( "error writing config: %v" , err )
}
2022-08-09 09:44:19 +00:00
os . Stdout . WriteString ( "Using config:\n" )
2023-11-14 15:56:50 +00:00
if _ , err = cfg . WriteTo ( os . Stdout ) ; err != nil {
2023-08-25 14:48:11 +00:00
log . Warningf ( "Failed to output config to STDOUT: %v" , err )
}
2022-08-09 09:44:19 +00:00
2021-10-11 14:31:02 +00:00
return nil
}
func createDirectories ( dir ... string ) error {
for _ , d := range dir {
log . Infof ( "Creating directory '%v'" , d )
err := os . MkdirAll ( d , 0755 )
if err != nil {
return fmt . Errorf ( "error creating directory: %v" , err )
}
}
return nil
}
2023-03-01 10:44:32 +00:00
2024-09-19 11:35:42 +00:00
func createDeviceNodes ( opts * Options ) error {
2024-02-09 13:08:22 +00:00
modes := opts . createDeviceNodes . Value ( )
if len ( modes ) == 0 {
2023-03-01 10:44:32 +00:00
return nil
}
2023-06-12 18:46:56 +00:00
devices , err := nvdevices . New (
2024-02-14 09:53:38 +00:00
nvdevices . WithDevRoot ( opts . DevRootCtrPath ) ,
2023-06-12 18:46:56 +00:00
)
2023-03-27 21:02:24 +00:00
if err != nil {
return fmt . Errorf ( "failed to create library: %v" , err )
}
2024-02-09 13:08:22 +00:00
for _ , mode := range modes {
2024-02-14 09:53:38 +00:00
log . Infof ( "Creating %v device nodes at %v" , mode , opts . DevRootCtrPath )
2024-02-09 13:08:22 +00:00
if mode != "control" {
log . Warningf ( "Unrecognised device mode: %v" , mode )
continue
}
if err := devices . CreateNVIDIAControlDevices ( ) ; err != nil {
return fmt . Errorf ( "failed to create control device nodes: %v" , err )
}
2023-03-27 21:02:24 +00:00
}
2024-02-09 13:08:22 +00:00
return nil
}
2023-03-27 21:02:24 +00:00
2024-04-24 08:47:45 +00:00
// generateCDISpec generates a CDI spec for use in management containers
2024-09-19 11:35:42 +00:00
func generateCDISpec ( opts * Options , nvidiaCDIHookPath string ) error {
2024-02-09 13:08:22 +00:00
if ! opts . cdiEnabled {
return nil
}
2023-03-27 21:02:24 +00:00
log . Info ( "Generating CDI spec for management containers" )
2023-03-22 12:04:12 +00:00
cdilib , err := nvcdi . New (
2023-03-01 10:44:32 +00:00
nvcdi . WithMode ( nvcdi . ModeManagement ) ,
nvcdi . WithDriverRoot ( opts . DriverRootCtrPath ) ,
2024-02-14 09:53:38 +00:00
nvcdi . WithDevRoot ( opts . DevRootCtrPath ) ,
2024-04-24 08:47:45 +00:00
nvcdi . WithNVIDIACDIHookPath ( nvidiaCDIHookPath ) ,
2023-03-01 10:44:32 +00:00
nvcdi . WithVendor ( opts . cdiVendor ) ,
nvcdi . WithClass ( opts . cdiClass ) ,
)
2023-03-22 12:04:12 +00:00
if err != nil {
return fmt . Errorf ( "failed to create CDI library for management containers: %v" , err )
}
2023-03-01 10:44:32 +00:00
spec , err := cdilib . GetSpec ( )
if err != nil {
return fmt . Errorf ( "failed to genereate CDI spec for management containers: %v" , err )
}
2024-02-14 09:53:38 +00:00
transformer := transformroot . NewDriverTransformer (
transformroot . WithDriverRoot ( opts . DriverRootCtrPath ) ,
transformroot . WithTargetDriverRoot ( opts . DriverRoot ) ,
transformroot . WithDevRoot ( opts . DevRootCtrPath ) ,
transformroot . WithTargetDevRoot ( opts . DevRoot ) ,
)
if err := transformer . Transform ( spec . Raw ( ) ) ; err != nil {
2023-03-01 10:44:32 +00:00
return fmt . Errorf ( "failed to transform driver root in CDI spec: %v" , err )
}
name , err := cdi . GenerateNameForSpec ( spec . Raw ( ) )
if err != nil {
return fmt . Errorf ( "failed to generate CDI name for management containers: %v" , err )
}
err = spec . Save ( filepath . Join ( opts . cdiOutputDir , name ) )
if err != nil {
return fmt . Errorf ( "failed to save CDI spec for management containers: %v" , err )
}
return nil
}