diff --git a/cmd/nvidia-container-runtime-hook/container_config_test.go b/cmd/nvidia-container-runtime-hook/container_config_test.go index eb0aec61..a52adebc 100644 --- a/cmd/nvidia-container-runtime-hook/container_config_test.go +++ b/cmd/nvidia-container-runtime-hook/container_config_test.go @@ -496,7 +496,7 @@ func TestGetNvidiaConfig(t *testing.T) { getConfig := func() { hookConfig := tc.hookConfig if hookConfig == nil { - defaultConfig := getDefaultHookConfig() + defaultConfig, _ := getDefaultHookConfig() hookConfig = &defaultConfig } config = getNvidiaConfig(hookConfig, tc.env, nil, tc.privileged) @@ -708,7 +708,7 @@ func TestDeviceListSourcePriority(t *testing.T) { env := map[string]string{ envNVVisibleDevices: tc.envvarDevices, } - hookConfig := getDefaultHookConfig() + hookConfig, _ := getDefaultHookConfig() hookConfig.AcceptEnvvarUnprivileged = tc.acceptUnprivileged hookConfig.AcceptDeviceListAsVolumeMounts = tc.acceptMounts devices = getDevices(&hookConfig, env, tc.mountDevices, tc.privileged) diff --git a/cmd/nvidia-container-runtime-hook/hook_config.go b/cmd/nvidia-container-runtime-hook/hook_config.go index 7e6bc7a5..cc66d3a4 100644 --- a/cmd/nvidia-container-runtime-hook/hook_config.go +++ b/cmd/nvidia-container-runtime-hook/hook_config.go @@ -1,6 +1,7 @@ package main import ( + "fmt" "log" "os" "path" @@ -48,8 +49,18 @@ type HookConfig struct { NVIDIAContainerRuntimeHook config.RuntimeHookConfig `toml:"nvidia-container-runtime-hook"` } -func getDefaultHookConfig() HookConfig { - return HookConfig{ +func getDefaultHookConfig() (HookConfig, error) { + rtConfig, err := config.GetDefaultRuntimeConfig() + if err != nil { + return HookConfig{}, err + } + + rtHookConfig, err := config.GetDefaultRuntimeHookConfig() + if err != nil { + return HookConfig{}, err + } + + c := HookConfig{ DisableRequire: false, SwarmResource: nil, AcceptEnvvarUnprivileged: true, @@ -67,28 +78,37 @@ func getDefaultHookConfig() HookConfig { User: nil, Ldconfig: nil, }, - NVIDIAContainerRuntime: *config.GetDefaultRuntimeConfig(), - NVIDIAContainerRuntimeHook: *config.GetDefaultRuntimeHookConfig(), + NVIDIAContainerRuntime: *rtConfig, + NVIDIAContainerRuntimeHook: *rtHookConfig, } + + return c, nil } -func getHookConfig() (config HookConfig) { +func getHookConfig() (*HookConfig, error) { var err error + var config HookConfig if len(*configflag) > 0 { - config = getDefaultHookConfig() + config, err = getDefaultHookConfig() + if err != nil { + return nil, fmt.Errorf("couldn't get default configuration: %v", err) + } _, err = toml.DecodeFile(*configflag, &config) if err != nil { - log.Panicln("couldn't open configuration file:", err) + return nil, fmt.Errorf("couldn't open configuration file: %v", err) } } else { for _, p := range defaultPaths { - config = getDefaultHookConfig() + config, err = getDefaultHookConfig() + if err != nil { + return nil, fmt.Errorf("couldn't get default configuration: %v", err) + } _, err = toml.DecodeFile(p, &config) if err == nil { break } else if !os.IsNotExist(err) { - log.Panicln("couldn't open default configuration file:", err) + return nil, fmt.Errorf("couldn't open default configuration file: %v", err) } } } @@ -102,7 +122,7 @@ func getHookConfig() (config HookConfig) { log.Panicf("Invalid value for config option '%v'; %v (supported: %v)\n", configName, config.SupportedDriverCapabilities, allDriverCapabilities) } - return config + return &config, nil } // getConfigOption returns the toml config option associated with the diff --git a/cmd/nvidia-container-runtime-hook/hook_config_test.go b/cmd/nvidia-container-runtime-hook/hook_config_test.go index 510e66a4..7cddafab 100644 --- a/cmd/nvidia-container-runtime-hook/hook_config_test.go +++ b/cmd/nvidia-container-runtime-hook/hook_config_test.go @@ -89,7 +89,8 @@ func TestGetHookConfig(t *testing.T) { var config HookConfig getHookConfig := func() { - config = getHookConfig() + c, _ := getHookConfig() + config = *c } if tc.expectedPanic { diff --git a/cmd/nvidia-container-runtime-hook/main.go b/cmd/nvidia-container-runtime-hook/main.go index 95b02364..ee8a0397 100644 --- a/cmd/nvidia-container-runtime-hook/main.go +++ b/cmd/nvidia-container-runtime-hook/main.go @@ -71,14 +71,17 @@ func doPrestart() { defer exit() log.SetFlags(0) - hook := getHookConfig() + hook, err := getHookConfig() + if err != nil || hook == nil { + log.Panicln("error getting hook config:", err) + } cli := hook.NvidiaContainerCLI if !hook.NVIDIAContainerRuntimeHook.SkipModeDetection && info.ResolveAutoMode(&logInterceptor{}, hook.NVIDIAContainerRuntime.Mode) != "legacy" { log.Panicln("invoking the NVIDIA Container Runtime Hook directly (e.g. specifying the docker --gpus flag) is not supported. Please use the NVIDIA Container Runtime (e.g. specify the --runtime=nvidia flag) instead.") } - container := getContainerConfig(hook) + container := getContainerConfig(*hook) nvidia := container.Nvidia if nvidia == nil { // Not a GPU container, nothing to do. diff --git a/internal/config/cli.go b/internal/config/cli.go index 650f5321..7849e2f1 100644 --- a/internal/config/cli.go +++ b/internal/config/cli.go @@ -16,33 +16,7 @@ package config -import ( - "github.com/pelletier/go-toml" -) - // ContainerCLIConfig stores the options for the nvidia-container-cli type ContainerCLIConfig struct { Root string `toml:"root"` } - -// getContainerCLIConfigFrom reads the nvidia container runtime config from the specified toml Tree. -func getContainerCLIConfigFrom(toml *toml.Tree) *ContainerCLIConfig { - cfg := getDefaultContainerCLIConfig() - - if toml == nil { - return cfg - } - - cfg.Root = toml.GetDefault("nvidia-container-cli.root", cfg.Root).(string) - - return cfg -} - -// getDefaultContainerCLIConfig defines the default values for the config -func getDefaultContainerCLIConfig() *ContainerCLIConfig { - c := ContainerCLIConfig{ - Root: "", - } - - return &c -} diff --git a/internal/config/config.go b/internal/config/config.go index 9340bb6a..c9d8c5af 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -66,7 +66,7 @@ func GetConfig() (*Config, error) { tomlFile, err := os.Open(configFilePath) if err != nil { - return getDefaultConfig(), nil + return getDefaultConfig() } defer tomlFile.Close() @@ -90,41 +90,53 @@ func loadConfigFrom(reader io.Reader) (*Config, error) { // getConfigFrom reads the nvidia container runtime config from the specified toml Tree. func getConfigFrom(toml *toml.Tree) (*Config, error) { - cfg := getDefaultConfig() - - if toml == nil { - return cfg, nil - } - - cfg.AcceptEnvvarUnprivileged = toml.GetDefault("accept-nvidia-visible-devices-envvar-when-unprivileged", cfg.AcceptEnvvarUnprivileged).(bool) - - cfg.NVIDIAContainerCLIConfig = *getContainerCLIConfigFrom(toml) - cfg.NVIDIACTKConfig = *getCTKConfigFrom(toml) - runtimeConfig, err := getRuntimeConfigFrom(toml) + cfg, err := getDefaultConfig() if err != nil { - return nil, fmt.Errorf("failed to load nvidia-container-runtime config: %v", err) + return nil, err } - cfg.NVIDIAContainerRuntimeConfig = *runtimeConfig - runtimeHookConfig, err := getRuntimeHookConfigFrom(toml) - if err != nil { - return nil, fmt.Errorf("failed to load nvidia-container-runtime-hook config: %v", err) + if err := toml.Unmarshal(cfg); err != nil { + return nil, fmt.Errorf("failed to unmarshal config: %v", err) } - cfg.NVIDIAContainerRuntimeHookConfig = *runtimeHookConfig return cfg, nil } // getDefaultConfig defines the default values for the config -func getDefaultConfig() *Config { - c := Config{ - AcceptEnvvarUnprivileged: true, - NVIDIAContainerCLIConfig: *getDefaultContainerCLIConfig(), - NVIDIACTKConfig: *getDefaultCTKConfig(), - NVIDIAContainerRuntimeConfig: *GetDefaultRuntimeConfig(), +func getDefaultConfig() (*Config, error) { + tomlConfig, err := GetDefaultConfigToml() + if err != nil { + return nil, err } - return &c + // tomlConfig above includes information about the default values and comments. + // we need to marshal it back to a string and then unmarshal it to strip the comments. + contents, err := tomlConfig.ToTomlString() + if err != nil { + return nil, err + } + + reloaded, err := toml.Load(contents) + if err != nil { + return nil, err + } + + d := Config{} + if err := reloaded.Unmarshal(&d); err != nil { + return nil, fmt.Errorf("failed to unmarshal config: %v", err) + } + + // The default value for the accept-nvidia-visible-devices-envvar-when-unprivileged is non-standard. + // As such we explicitly handle it being set here. + if reloaded.Get("accept-nvidia-visible-devices-envvar-when-unprivileged") == nil { + d.AcceptEnvvarUnprivileged = true + } + // The default value for the nvidia-container-runtime.debug is non-standard. + // As such we explicitly handle it being set here. + if reloaded.Get("nvidia-container-runtime.debug") == nil { + d.NVIDIAContainerRuntimeConfig.DebugFilePath = "/dev/null" + } + return &d, nil } // GetDefaultConfigToml returns the default config as a toml Tree. diff --git a/internal/config/hook.go b/internal/config/hook.go index 4c35a1d6..5a3b27dc 100644 --- a/internal/config/hook.go +++ b/internal/config/hook.go @@ -16,47 +16,18 @@ package config -import ( - "fmt" - - "github.com/pelletier/go-toml" -) - // RuntimeHookConfig stores the config options for the NVIDIA Container Runtime type RuntimeHookConfig struct { // SkipModeDetection disables the mode check for the runtime hook. SkipModeDetection bool `toml:"skip-mode-detection"` } -// dummyHookConfig allows us to unmarshal only a RuntimeHookConfig from a *toml.Tree -type dummyHookConfig struct { - RuntimeHook RuntimeHookConfig `toml:"nvidia-container-runtime-hook"` -} - -// getRuntimeHookConfigFrom reads the nvidia container runtime config from the specified toml Tree. -func getRuntimeHookConfigFrom(toml *toml.Tree) (*RuntimeHookConfig, error) { - cfg := GetDefaultRuntimeHookConfig() - - if toml == nil { - return cfg, nil - } - - d := dummyHookConfig{ - RuntimeHook: *cfg, - } - - if err := toml.Unmarshal(&d); err != nil { - return nil, fmt.Errorf("failed to unmarshal runtime config: %v", err) - } - - return &d.RuntimeHook, nil -} - // GetDefaultRuntimeHookConfig defines the default values for the config -func GetDefaultRuntimeHookConfig() *RuntimeHookConfig { - c := RuntimeHookConfig{ - SkipModeDetection: false, +func GetDefaultRuntimeHookConfig() (*RuntimeHookConfig, error) { + cfg, err := getDefaultConfig() + if err != nil { + return nil, err } - return &c + return &cfg.NVIDIAContainerRuntimeHookConfig, nil } diff --git a/internal/config/runtime.go b/internal/config/runtime.go index 8ff8c6e8..4dc89e2d 100644 --- a/internal/config/runtime.go +++ b/internal/config/runtime.go @@ -16,21 +16,6 @@ package config -import ( - "fmt" - - "github.com/container-orchestrated-devices/container-device-interface/pkg/cdi" - "github.com/pelletier/go-toml" - "github.com/sirupsen/logrus" -) - -const ( - dockerRuncExecutableName = "docker-runc" - runcExecutableName = "runc" - - auto = "auto" -) - // RuntimeConfig stores the config options for the NVIDIA Container Runtime type RuntimeConfig struct { DebugFilePath string `toml:"debug"` @@ -61,52 +46,12 @@ type csvModeConfig struct { MountSpecPath string `toml:"mount-spec-path"` } -// dummy allows us to unmarshal only a RuntimeConfig from a *toml.Tree -type dummy struct { - Runtime RuntimeConfig `toml:"nvidia-container-runtime"` -} - -// getRuntimeConfigFrom reads the nvidia container runtime config from the specified toml Tree. -func getRuntimeConfigFrom(toml *toml.Tree) (*RuntimeConfig, error) { - cfg := GetDefaultRuntimeConfig() - - if toml == nil { - return cfg, nil - } - - d := dummy{ - Runtime: *cfg, - } - - if err := toml.Unmarshal(&d); err != nil { - return nil, fmt.Errorf("failed to unmarshal runtime config: %v", err) - } - - return &d.Runtime, nil -} - // GetDefaultRuntimeConfig defines the default values for the config -func GetDefaultRuntimeConfig() *RuntimeConfig { - c := RuntimeConfig{ - DebugFilePath: "/dev/null", - LogLevel: logrus.InfoLevel.String(), - Runtimes: []string{ - dockerRuncExecutableName, - runcExecutableName, - }, - Mode: auto, - Modes: modesConfig{ - CSV: csvModeConfig{ - MountSpecPath: "/etc/nvidia-container-runtime/host-files-for-container.d", - }, - CDI: cdiModeConfig{ - DefaultKind: "nvidia.com/gpu", - AnnotationPrefixes: []string{ - cdi.AnnotationPrefix, - }, - }, - }, +func GetDefaultRuntimeConfig() (*RuntimeConfig, error) { + cfg, err := getDefaultConfig() + if err != nil { + return nil, err } - return &c + return &cfg.NVIDIAContainerRuntimeConfig, nil } diff --git a/internal/config/toolkit-cli.go b/internal/config/toolkit-cli.go index 1fe89717..67ae974e 100644 --- a/internal/config/toolkit-cli.go +++ b/internal/config/toolkit-cli.go @@ -16,31 +16,7 @@ package config -import "github.com/pelletier/go-toml" - // CTKConfig stores the config options for the NVIDIA Container Toolkit CLI (nvidia-ctk) type CTKConfig struct { Path string `toml:"path"` } - -// getCTKConfigFrom reads the nvidia container runtime config from the specified toml Tree. -func getCTKConfigFrom(toml *toml.Tree) *CTKConfig { - cfg := getDefaultCTKConfig() - - if toml == nil { - return cfg - } - - cfg.Path = toml.GetDefault("nvidia-ctk.path", cfg.Path).(string) - - return cfg -} - -// getDefaultCTKConfig defines the default values for the config -func getDefaultCTKConfig() *CTKConfig { - c := CTKConfig{ - Path: "nvidia-ctk", - } - - return &c -}