diff --git a/cmd/nvidia-ctk-installer/container/container.go b/cmd/nvidia-ctk-installer/container/container.go index 68e5f1f8..5e838608 100644 --- a/cmd/nvidia-ctk-installer/container/container.go +++ b/cmd/nvidia-ctk-installer/container/container.go @@ -36,8 +36,10 @@ const ( // Options defines the shared options for the CLIs to configure containers runtimes. type Options struct { - Config string - Socket string + Config string + Socket string + // EnabledCDI indicates whether CDI should be enabled. + EnableCDI bool RuntimeName string RuntimeDir string SetAsDefault bool @@ -111,6 +113,10 @@ func (o Options) UpdateConfig(cfg engine.Interface) error { } } + if o.EnableCDI { + cfg.EnableCDI() + } + return nil } diff --git a/cmd/nvidia-ctk-installer/container/runtime/containerd/config_v1_test.go b/cmd/nvidia-ctk-installer/container/runtime/containerd/config_v1_test.go index ea06b555..862d5992 100644 --- a/cmd/nvidia-ctk-installer/container/runtime/containerd/config_v1_test.go +++ b/cmd/nvidia-ctk-installer/container/runtime/containerd/config_v1_test.go @@ -410,6 +410,51 @@ func TestUpdateV1ConfigWithRuncPresent(t *testing.T) { } } +func TestUpdateV1EnableCDI(t *testing.T) { + logger, _ := testlog.NewNullLogger() + const runtimeDir = "/test/runtime/dir" + + testCases := []struct { + enableCDI bool + expectedEnableCDIValue interface{} + }{ + {}, + { + enableCDI: false, + expectedEnableCDIValue: nil, + }, + { + enableCDI: true, + expectedEnableCDIValue: true, + }, + } + + for _, tc := range testCases { + t.Run(fmt.Sprintf("%v", tc.enableCDI), func(t *testing.T) { + o := &container.Options{ + EnableCDI: tc.enableCDI, + RuntimeName: "nvidia", + RuntimeDir: runtimeDir, + } + + cfg, err := toml.Empty.Load() + require.NoError(t, err) + + v1 := &containerd.ConfigV1{ + Logger: logger, + Tree: cfg, + RuntimeType: runtimeType, + } + + err = o.UpdateConfig(v1) + require.NoError(t, err) + + enableCDIValue := v1.GetPath([]string{"plugins", "cri", "containerd", "enable_cdi"}) + require.EqualValues(t, tc.expectedEnableCDIValue, enableCDIValue) + }) + } +} + func TestRevertV1Config(t *testing.T) { logger, _ := testlog.NewNullLogger() testCases := []struct { diff --git a/cmd/nvidia-ctk-installer/container/runtime/containerd/config_v2_test.go b/cmd/nvidia-ctk-installer/container/runtime/containerd/config_v2_test.go index e206c59d..a6570e8f 100644 --- a/cmd/nvidia-ctk-installer/container/runtime/containerd/config_v2_test.go +++ b/cmd/nvidia-ctk-installer/container/runtime/containerd/config_v2_test.go @@ -366,6 +366,53 @@ func TestUpdateV2ConfigWithRuncPresent(t *testing.T) { } } +func TestUpdateV2ConfigEnableCDI(t *testing.T) { + logger, _ := testlog.NewNullLogger() + const runtimeDir = "/test/runtime/dir" + + testCases := []struct { + enableCDI bool + expectedEnableCDIValue interface{} + }{ + {}, + { + enableCDI: false, + expectedEnableCDIValue: nil, + }, + { + enableCDI: true, + expectedEnableCDIValue: true, + }, + } + + for _, tc := range testCases { + t.Run(fmt.Sprintf("%v", tc.enableCDI), func(t *testing.T) { + o := &container.Options{ + EnableCDI: tc.enableCDI, + RuntimeName: "nvidia", + RuntimeDir: runtimeDir, + SetAsDefault: false, + } + + cfg, err := toml.LoadMap(map[string]interface{}{}) + require.NoError(t, err) + + v2 := &containerd.Config{ + Logger: logger, + Tree: cfg, + RuntimeType: runtimeType, + CRIRuntimePluginName: "io.containerd.grpc.v1.cri", + } + + err = o.UpdateConfig(v2) + require.NoError(t, err) + + enableCDIValue := cfg.GetPath([]string{"plugins", "io.containerd.grpc.v1.cri", "enable_cdi"}) + require.EqualValues(t, tc.expectedEnableCDIValue, enableCDIValue) + }) + } +} + func TestRevertV2Config(t *testing.T) { logger, _ := testlog.NewNullLogger() diff --git a/cmd/nvidia-ctk-installer/container/runtime/runtime.go b/cmd/nvidia-ctk-installer/container/runtime/runtime.go index cbe68830..2920262c 100644 --- a/cmd/nvidia-ctk-installer/container/runtime/runtime.go +++ b/cmd/nvidia-ctk-installer/container/runtime/runtime.go @@ -25,6 +25,7 @@ import ( "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk-installer/container/runtime/containerd" "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk-installer/container/runtime/crio" "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk-installer/container/runtime/docker" + "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk-installer/container/toolkit" ) const ( @@ -66,6 +67,12 @@ func Flags(opts *Options) []cli.Flag { Destination: &opts.RestartMode, EnvVars: []string{"RUNTIME_RESTART_MODE"}, }, + &cli.BoolFlag{ + Name: "enable-cdi-in-runtime", + Usage: "Enable CDI in the configured runt ime", + Destination: &opts.EnableCDI, + EnvVars: []string{"RUNTIME_ENABLE_CDI"}, + }, &cli.StringFlag{ Name: "host-root", Usage: "Specify the path to the host root to be used when restarting the runtime using systemd", @@ -98,10 +105,14 @@ func Flags(opts *Options) []cli.Flag { } // ValidateOptions checks whether the specified options are valid -func ValidateOptions(opts *Options, runtime string, toolkitRoot string) error { +func ValidateOptions(c *cli.Context, opts *Options, runtime string, toolkitRoot string, to *toolkit.Options) error { // We set this option here to ensure that it is available in future calls. opts.RuntimeDir = toolkitRoot + if !c.IsSet("enable-cdi-in-runtime") { + opts.EnableCDI = to.CDI.Enabled + } + // Apply the runtime-specific config changes. switch runtime { case containerd.Name: diff --git a/cmd/nvidia-ctk-installer/container/toolkit/toolkit.go b/cmd/nvidia-ctk-installer/container/toolkit/toolkit.go index a8d4e890..80e302db 100644 --- a/cmd/nvidia-ctk-installer/container/toolkit/toolkit.go +++ b/cmd/nvidia-ctk-installer/container/toolkit/toolkit.go @@ -44,6 +44,14 @@ const ( configFilename = "config.toml" ) +type cdiOptions struct { + Enabled bool + outputDir string + kind string + vendor string + class string +} + type Options struct { DriverRoot string DevRoot string @@ -63,11 +71,8 @@ type Options struct { ContainerCLIDebug string - cdiEnabled bool - cdiOutputDir string - cdiKind string - cdiVendor string - cdiClass string + // CDI stores the CDI options for the toolkit. + CDI cdiOptions createDeviceNodes cli.StringSlice @@ -170,21 +175,21 @@ func Flags(opts *Options) []cli.Flag { Name: "cdi-enabled", Aliases: []string{"enable-cdi"}, Usage: "enable the generation of a CDI specification", - Destination: &opts.cdiEnabled, + Destination: &opts.CDI.Enabled, EnvVars: []string{"CDI_ENABLED", "ENABLE_CDI"}, }, &cli.StringFlag{ Name: "cdi-output-dir", Usage: "the directory where the CDI output files are to be written. If this is set to '', no CDI specification is generated.", Value: "/var/run/cdi", - Destination: &opts.cdiOutputDir, + Destination: &opts.CDI.outputDir, EnvVars: []string{"CDI_OUTPUT_DIR"}, }, &cli.StringFlag{ Name: "cdi-kind", Usage: "the vendor string to use for the generated CDI specification", Value: "management.nvidia.com/gpu", - Destination: &opts.cdiKind, + Destination: &opts.CDI.kind, EnvVars: []string{"CDI_KIND"}, }, &cli.BoolFlag{ @@ -240,19 +245,19 @@ func (t *Installer) ValidateOptions(opts *Options) error { return fmt.Errorf("invalid --toolkit-root option: %v", t.toolkitRoot) } - vendor, class := parser.ParseQualifier(opts.cdiKind) + vendor, class := parser.ParseQualifier(opts.CDI.kind) if err := parser.ValidateVendorName(vendor); err != nil { return fmt.Errorf("invalid CDI vendor name: %v", err) } if err := parser.ValidateClassName(class); err != nil { return fmt.Errorf("invalid CDI class name: %v", err) } - opts.cdiVendor = vendor - opts.cdiClass = class + opts.CDI.vendor = vendor + opts.CDI.class = class - if opts.cdiEnabled && opts.cdiOutputDir == "" { + if opts.CDI.Enabled && opts.CDI.outputDir == "" { t.logger.Warning("Skipping CDI spec generation (no output directory specified)") - opts.cdiEnabled = false + opts.CDI.Enabled = false } isDisabled := false @@ -265,7 +270,7 @@ func (t *Installer) ValidateOptions(opts *Options) error { break } } - if !opts.cdiEnabled && !isDisabled { + if !opts.CDI.Enabled && !isDisabled { t.logger.Info("disabling device node creation since --cdi-enabled=false") isDisabled = true } @@ -698,7 +703,7 @@ func (t *Installer) createDeviceNodes(opts *Options) error { // generateCDISpec generates a CDI spec for use in management containers func (t *Installer) generateCDISpec(opts *Options, nvidiaCDIHookPath string) error { - if !opts.cdiEnabled { + if !opts.CDI.Enabled { return nil } t.logger.Info("Generating CDI spec for management containers") @@ -708,8 +713,8 @@ func (t *Installer) generateCDISpec(opts *Options, nvidiaCDIHookPath string) err nvcdi.WithDriverRoot(opts.DriverRootCtrPath), nvcdi.WithDevRoot(opts.DevRootCtrPath), nvcdi.WithNVIDIACDIHookPath(nvidiaCDIHookPath), - nvcdi.WithVendor(opts.cdiVendor), - nvcdi.WithClass(opts.cdiClass), + nvcdi.WithVendor(opts.CDI.vendor), + nvcdi.WithClass(opts.CDI.class), ) if err != nil { return fmt.Errorf("failed to create CDI library for management containers: %v", err) @@ -734,7 +739,7 @@ func (t *Installer) generateCDISpec(opts *Options, nvidiaCDIHookPath string) err if err != nil { return fmt.Errorf("failed to generate CDI name for management containers: %v", err) } - err = spec.Save(filepath.Join(opts.cdiOutputDir, name)) + err = spec.Save(filepath.Join(opts.CDI.outputDir, name)) if err != nil { return fmt.Errorf("failed to save CDI spec for management containers: %v", err) } diff --git a/cmd/nvidia-ctk-installer/container/toolkit/toolkit_test.go b/cmd/nvidia-ctk-installer/container/toolkit/toolkit_test.go index 9cfcda2a..855141ff 100644 --- a/cmd/nvidia-ctk-installer/container/toolkit/toolkit_test.go +++ b/cmd/nvidia-ctk-installer/container/toolkit/toolkit_test.go @@ -124,9 +124,11 @@ kind: example.com/class options := Options{ DriverRoot: "/host/driver/root", DriverRootCtrPath: filepath.Join(moduleRoot, "testdata", "lookup", tc.hostRoot), - cdiEnabled: tc.cdiEnabled, - cdiOutputDir: cdiOutputDir, - cdiKind: "example.com/class", + CDI: cdiOptions{ + Enabled: tc.cdiEnabled, + outputDir: cdiOutputDir, + kind: "example.com/class", + }, } ti := NewInstaller( diff --git a/cmd/nvidia-ctk-installer/main.go b/cmd/nvidia-ctk-installer/main.go index 835acbb0..0f2f1eb9 100644 --- a/cmd/nvidia-ctk-installer/main.go +++ b/cmd/nvidia-ctk-installer/main.go @@ -38,6 +38,7 @@ type options struct { runtimeArgs string root string pidFile string + sourceRoot string toolkitOptions toolkit.Options runtimeOptions runtime.Options @@ -141,6 +142,13 @@ func (a app) build() *cli.App { Destination: &options.root, EnvVars: []string{"ROOT"}, }, + &cli.StringFlag{ + Name: "source-root", + Value: "/", + Usage: "The folder where the required toolkit artifacts can be found", + Destination: &options.sourceRoot, + EnvVars: []string{"SOURCE_ROOT"}, + }, &cli.StringFlag{ Name: "pid-file", Value: defaultPidFile, @@ -159,12 +167,13 @@ func (a app) build() *cli.App { func (a *app) Before(c *cli.Context, o *options) error { a.toolkit = toolkit.NewInstaller( toolkit.WithLogger(a.logger), + toolkit.WithSourceRoot(o.sourceRoot), toolkit.WithToolkitRoot(o.toolkitRoot()), ) return a.validateFlags(c, o) } -func (a *app) validateFlags(_ *cli.Context, o *options) error { +func (a *app) validateFlags(c *cli.Context, o *options) error { if o.root == "" { return fmt.Errorf("the install root must be specified") } @@ -178,7 +187,7 @@ func (a *app) validateFlags(_ *cli.Context, o *options) error { if err := a.toolkit.ValidateOptions(&o.toolkitOptions); err != nil { return err } - if err := runtime.ValidateOptions(&o.runtimeOptions, o.runtime, o.toolkitRoot()); err != nil { + if err := runtime.ValidateOptions(c, &o.runtimeOptions, o.runtime, o.toolkitRoot(), &o.toolkitOptions); err != nil { return err } return nil diff --git a/cmd/nvidia-ctk-installer/main_test.go b/cmd/nvidia-ctk-installer/main_test.go index f7ba5866..759ae8c1 100644 --- a/cmd/nvidia-ctk-installer/main_test.go +++ b/cmd/nvidia-ctk-installer/main_test.go @@ -18,10 +18,15 @@ package main import ( "fmt" + "os" + "path/filepath" + "strings" "testing" testlog "github.com/sirupsen/logrus/hooks/test" "github.com/stretchr/testify/require" + + "github.com/NVIDIA/nvidia-container-toolkit/internal/test" ) func TestParseArgs(t *testing.T) { @@ -84,3 +89,413 @@ func TestParseArgs(t *testing.T) { }) } } + +func TestApp(t *testing.T) { + t.Setenv("__NVCT_TESTING_DEVICES_ARE_FILES", "true") + logger, _ := testlog.NewNullLogger() + + moduleRoot, err := test.GetModuleRoot() + require.NoError(t, err) + + artifactRoot := filepath.Join(moduleRoot, "testdata", "installer", "artifacts") + hostRoot := filepath.Join(moduleRoot, "testdata", "lookup", "rootfs-1") + + testCases := []struct { + description string + args []string + expectedToolkitConfig string + expectedRuntimeConfig string + }{ + { + description: "no args", + expectedToolkitConfig: `accept-nvidia-visible-devices-as-volume-mounts = false +accept-nvidia-visible-devices-envvar-when-unprivileged = true +disable-require = false +supported-driver-capabilities = "compat32,compute,display,graphics,ngx,utility,video" +swarm-resource = "" + +[nvidia-container-cli] + debug = "" + environment = [] + ldcache = "" + ldconfig = "@/run/nvidia/driver/sbin/ldconfig" + load-kmods = true + no-cgroups = false + path = "{{ .toolkitRoot }}/toolkit/nvidia-container-cli" + root = "/run/nvidia/driver" + user = "" + +[nvidia-container-runtime] + debug = "/dev/null" + log-level = "info" + mode = "auto" + runtimes = ["docker-runc", "runc", "crun"] + + [nvidia-container-runtime.modes] + + [nvidia-container-runtime.modes.cdi] + annotation-prefixes = ["cdi.k8s.io/"] + default-kind = "nvidia.com/gpu" + spec-dirs = ["/etc/cdi", "/var/run/cdi"] + + [nvidia-container-runtime.modes.csv] + mount-spec-path = "/etc/nvidia-container-runtime/host-files-for-container.d" + +[nvidia-container-runtime-hook] + path = "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime-hook" + skip-mode-detection = true + +[nvidia-ctk] + path = "{{ .toolkitRoot }}/toolkit/nvidia-ctk" +`, + expectedRuntimeConfig: `{ + "default-runtime": "nvidia", + "runtimes": { + "nvidia": { + "args": [], + "path": "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime" + }, + "nvidia-cdi": { + "args": [], + "path": "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime.cdi" + }, + "nvidia-legacy": { + "args": [], + "path": "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime.legacy" + } + } +}`, + }, + { + description: "CDI enabled enables CDI in docker", + args: []string{"--cdi-enabled", "--create-device-nodes=none"}, + expectedToolkitConfig: `accept-nvidia-visible-devices-as-volume-mounts = false +accept-nvidia-visible-devices-envvar-when-unprivileged = true +disable-require = false +supported-driver-capabilities = "compat32,compute,display,graphics,ngx,utility,video" +swarm-resource = "" + +[nvidia-container-cli] + debug = "" + environment = [] + ldcache = "" + ldconfig = "@/run/nvidia/driver/sbin/ldconfig" + load-kmods = true + no-cgroups = false + path = "{{ .toolkitRoot }}/toolkit/nvidia-container-cli" + root = "/run/nvidia/driver" + user = "" + +[nvidia-container-runtime] + debug = "/dev/null" + log-level = "info" + mode = "auto" + runtimes = ["docker-runc", "runc", "crun"] + + [nvidia-container-runtime.modes] + + [nvidia-container-runtime.modes.cdi] + annotation-prefixes = ["cdi.k8s.io/"] + default-kind = "nvidia.com/gpu" + spec-dirs = ["/etc/cdi", "/var/run/cdi"] + + [nvidia-container-runtime.modes.csv] + mount-spec-path = "/etc/nvidia-container-runtime/host-files-for-container.d" + +[nvidia-container-runtime-hook] + path = "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime-hook" + skip-mode-detection = true + +[nvidia-ctk] + path = "{{ .toolkitRoot }}/toolkit/nvidia-ctk" +`, + expectedRuntimeConfig: `{ + "default-runtime": "nvidia", + "features": { + "cdi": true + }, + "runtimes": { + "nvidia": { + "args": [], + "path": "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime" + }, + "nvidia-cdi": { + "args": [], + "path": "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime.cdi" + }, + "nvidia-legacy": { + "args": [], + "path": "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime.legacy" + } + } +}`, + }, + { + description: "--enable-cdi-in-runtime=false overrides --cdi-enabled in Docker", + args: []string{"--cdi-enabled", "--create-device-nodes=none", "--enable-cdi-in-runtime=false"}, + expectedToolkitConfig: `accept-nvidia-visible-devices-as-volume-mounts = false +accept-nvidia-visible-devices-envvar-when-unprivileged = true +disable-require = false +supported-driver-capabilities = "compat32,compute,display,graphics,ngx,utility,video" +swarm-resource = "" + +[nvidia-container-cli] + debug = "" + environment = [] + ldcache = "" + ldconfig = "@/run/nvidia/driver/sbin/ldconfig" + load-kmods = true + no-cgroups = false + path = "{{ .toolkitRoot }}/toolkit/nvidia-container-cli" + root = "/run/nvidia/driver" + user = "" + +[nvidia-container-runtime] + debug = "/dev/null" + log-level = "info" + mode = "auto" + runtimes = ["docker-runc", "runc", "crun"] + + [nvidia-container-runtime.modes] + + [nvidia-container-runtime.modes.cdi] + annotation-prefixes = ["cdi.k8s.io/"] + default-kind = "nvidia.com/gpu" + spec-dirs = ["/etc/cdi", "/var/run/cdi"] + + [nvidia-container-runtime.modes.csv] + mount-spec-path = "/etc/nvidia-container-runtime/host-files-for-container.d" + +[nvidia-container-runtime-hook] + path = "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime-hook" + skip-mode-detection = true + +[nvidia-ctk] + path = "{{ .toolkitRoot }}/toolkit/nvidia-ctk" +`, + expectedRuntimeConfig: `{ + "default-runtime": "nvidia", + "runtimes": { + "nvidia": { + "args": [], + "path": "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime" + }, + "nvidia-cdi": { + "args": [], + "path": "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime.cdi" + }, + "nvidia-legacy": { + "args": [], + "path": "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime.legacy" + } + } +}`, + }, + { + description: "CDI enabled enables CDI in containerd", + args: []string{"--cdi-enabled", "--runtime=containerd"}, + expectedToolkitConfig: `accept-nvidia-visible-devices-as-volume-mounts = false +accept-nvidia-visible-devices-envvar-when-unprivileged = true +disable-require = false +supported-driver-capabilities = "compat32,compute,display,graphics,ngx,utility,video" +swarm-resource = "" + +[nvidia-container-cli] + debug = "" + environment = [] + ldcache = "" + ldconfig = "@/run/nvidia/driver/sbin/ldconfig" + load-kmods = true + no-cgroups = false + path = "{{ .toolkitRoot }}/toolkit/nvidia-container-cli" + root = "/run/nvidia/driver" + user = "" + +[nvidia-container-runtime] + debug = "/dev/null" + log-level = "info" + mode = "auto" + runtimes = ["docker-runc", "runc", "crun"] + + [nvidia-container-runtime.modes] + + [nvidia-container-runtime.modes.cdi] + annotation-prefixes = ["cdi.k8s.io/"] + default-kind = "nvidia.com/gpu" + spec-dirs = ["/etc/cdi", "/var/run/cdi"] + + [nvidia-container-runtime.modes.csv] + mount-spec-path = "/etc/nvidia-container-runtime/host-files-for-container.d" + +[nvidia-container-runtime-hook] + path = "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime-hook" + skip-mode-detection = true + +[nvidia-ctk] + path = "{{ .toolkitRoot }}/toolkit/nvidia-ctk" +`, + expectedRuntimeConfig: `version = 2 + +[plugins] + + [plugins."io.containerd.grpc.v1.cri"] + enable_cdi = true + + [plugins."io.containerd.grpc.v1.cri".containerd] + default_runtime_name = "nvidia" + + [plugins."io.containerd.grpc.v1.cri".containerd.runtimes] + + [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia] + privileged_without_host_devices = false + runtime_engine = "" + runtime_root = "" + runtime_type = "io.containerd.runc.v2" + + [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia.options] + BinaryName = "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime" + + [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia-cdi] + privileged_without_host_devices = false + runtime_engine = "" + runtime_root = "" + runtime_type = "io.containerd.runc.v2" + + [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia-cdi.options] + BinaryName = "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime.cdi" + + [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia-legacy] + privileged_without_host_devices = false + runtime_engine = "" + runtime_root = "" + runtime_type = "io.containerd.runc.v2" + + [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia-legacy.options] + BinaryName = "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime.legacy" +`, + }, + { + description: "--enable-cdi-in-runtime=false overrides --cdi-enabled in containerd", + args: []string{"--cdi-enabled", "--create-device-nodes=none", "--enable-cdi-in-runtime=false", "--runtime=containerd"}, + expectedToolkitConfig: `accept-nvidia-visible-devices-as-volume-mounts = false +accept-nvidia-visible-devices-envvar-when-unprivileged = true +disable-require = false +supported-driver-capabilities = "compat32,compute,display,graphics,ngx,utility,video" +swarm-resource = "" + +[nvidia-container-cli] + debug = "" + environment = [] + ldcache = "" + ldconfig = "@/run/nvidia/driver/sbin/ldconfig" + load-kmods = true + no-cgroups = false + path = "{{ .toolkitRoot }}/toolkit/nvidia-container-cli" + root = "/run/nvidia/driver" + user = "" + +[nvidia-container-runtime] + debug = "/dev/null" + log-level = "info" + mode = "auto" + runtimes = ["docker-runc", "runc", "crun"] + + [nvidia-container-runtime.modes] + + [nvidia-container-runtime.modes.cdi] + annotation-prefixes = ["cdi.k8s.io/"] + default-kind = "nvidia.com/gpu" + spec-dirs = ["/etc/cdi", "/var/run/cdi"] + + [nvidia-container-runtime.modes.csv] + mount-spec-path = "/etc/nvidia-container-runtime/host-files-for-container.d" + +[nvidia-container-runtime-hook] + path = "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime-hook" + skip-mode-detection = true + +[nvidia-ctk] + path = "{{ .toolkitRoot }}/toolkit/nvidia-ctk" +`, + expectedRuntimeConfig: `version = 2 + +[plugins] + + [plugins."io.containerd.grpc.v1.cri"] + + [plugins."io.containerd.grpc.v1.cri".containerd] + default_runtime_name = "nvidia" + + [plugins."io.containerd.grpc.v1.cri".containerd.runtimes] + + [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia] + privileged_without_host_devices = false + runtime_engine = "" + runtime_root = "" + runtime_type = "io.containerd.runc.v2" + + [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia.options] + BinaryName = "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime" + + [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia-cdi] + privileged_without_host_devices = false + runtime_engine = "" + runtime_root = "" + runtime_type = "io.containerd.runc.v2" + + [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia-cdi.options] + BinaryName = "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime.cdi" + + [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia-legacy] + privileged_without_host_devices = false + runtime_engine = "" + runtime_root = "" + runtime_type = "io.containerd.runc.v2" + + [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia-legacy.options] + BinaryName = "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime.legacy" +`, + }, + } + + for _, tc := range testCases { + t.Run(tc.description, func(t *testing.T) { + testRoot := t.TempDir() + + cdiOutputDir := filepath.Join(testRoot, "/var/run/cdi") + runtimeConfigFile := filepath.Join(testRoot, "config.file") + + toolkitRoot := filepath.Join(testRoot, "toolkit-test") + toolkitConfigFile := filepath.Join(toolkitRoot, "toolkit/.config/nvidia-container-runtime/config.toml") + + app := NewApp(logger, toolkitRoot) + + testArgs := []string{ + "nvidia-ctk-installer", + "--no-daemon", + "--cdi-output-dir=" + cdiOutputDir, + "--config=" + runtimeConfigFile, + "--create-device-nodes=none", + "--driver-root-ctr-path=" + hostRoot, + "--pid-file=" + filepath.Join(testRoot, "toolkit.pid"), + "--restart-mode=none", + "--source-root=" + filepath.Join(artifactRoot, "deb"), + } + + err := app.Run(append(testArgs, tc.args...)) + + require.NoError(t, err) + + require.FileExists(t, toolkitConfigFile) + toolkitConfigFileContents, err := os.ReadFile(toolkitConfigFile) + require.NoError(t, err) + require.EqualValues(t, strings.ReplaceAll(tc.expectedToolkitConfig, "{{ .toolkitRoot }}", toolkitRoot), string(toolkitConfigFileContents)) + + require.FileExists(t, runtimeConfigFile) + runtimeConfigFileContents, err := os.ReadFile(runtimeConfigFile) + require.NoError(t, err) + require.EqualValues(t, strings.ReplaceAll(tc.expectedRuntimeConfig, "{{ .toolkitRoot }}", toolkitRoot), string(runtimeConfigFileContents)) + }) + } + +} diff --git a/cmd/nvidia-ctk/runtime/configure/configure.go b/cmd/nvidia-ctk/runtime/configure/configure.go index d2528853..aa8a496c 100644 --- a/cmd/nvidia-ctk/runtime/configure/configure.go +++ b/cmd/nvidia-ctk/runtime/configure/configure.go @@ -163,7 +163,7 @@ func (m command) build() *cli.Command { }, &cli.BoolFlag{ Name: "cdi.enabled", - Aliases: []string{"cdi.enable"}, + Aliases: []string{"cdi.enable", "enable-cdi"}, Usage: "Enable CDI in the configured runtime", Destination: &config.cdi.enabled, }, @@ -292,9 +292,8 @@ func (m command) configureConfigFile(c *cli.Context, config *config) error { return fmt.Errorf("unable to update config: %v", err) } - err = enableCDI(config, cfg) - if err != nil { - return fmt.Errorf("failed to enable CDI in %s: %w", config.runtime, err) + if config.cdi.enabled { + cfg.EnableCDI() } outputPath := config.getOutputConfigPath() @@ -354,19 +353,3 @@ func (m *command) configureOCIHook(c *cli.Context, config *config) error { } return nil } - -// enableCDI enables the use of CDI in the corresponding container engine -func enableCDI(config *config, cfg engine.Interface) error { - if !config.cdi.enabled { - return nil - } - switch config.runtime { - case "containerd": - cfg.Set("enable_cdi", true) - case "docker": - cfg.Set("features", map[string]bool{"cdi": true}) - default: - return fmt.Errorf("enabling CDI in %s is not supported", config.runtime) - } - return nil -} diff --git a/pkg/config/engine/api.go b/pkg/config/engine/api.go index 8c7d1b50..d27f09e9 100644 --- a/pkg/config/engine/api.go +++ b/pkg/config/engine/api.go @@ -20,10 +20,10 @@ package engine type Interface interface { AddRuntime(string, string, bool) error DefaultRuntime() string + EnableCDI() GetRuntimeConfig(string) (RuntimeConfig, error) RemoveRuntime(string) error Save(string) (int64, error) - Set(string, interface{}) String() string } diff --git a/pkg/config/engine/containerd/config.go b/pkg/config/engine/containerd/config.go index 52a336ba..62468fe5 100644 --- a/pkg/config/engine/containerd/config.go +++ b/pkg/config/engine/containerd/config.go @@ -96,13 +96,6 @@ func (c *Config) getRuntimeAnnotations(path []string) ([]string, error) { return annotations, nil } -// Set sets the specified containerd option. -func (c *Config) Set(key string, value interface{}) { - config := *c.Tree - config.SetPath([]string{"plugins", c.CRIRuntimePluginName, key}, value) - *c.Tree = config -} - // DefaultRuntime returns the default runtime for the cri-o config func (c Config) DefaultRuntime() string { if runtime, ok := c.GetPath([]string{"plugins", c.CRIRuntimePluginName, "containerd", "default_runtime_name"}).(string); ok { @@ -111,6 +104,13 @@ func (c Config) DefaultRuntime() string { return "" } +// EnableCDI sets the enable_cdi field in the Containerd config to true. +func (c *Config) EnableCDI() { + config := *c.Tree + config.SetPath([]string{"plugins", c.CRIRuntimePluginName, "enable_cdi"}, true) + *c.Tree = config +} + // RemoveRuntime removes a runtime from the docker config func (c *Config) RemoveRuntime(name string) error { if c == nil || c.Tree == nil { diff --git a/pkg/config/engine/containerd/config_v1.go b/pkg/config/engine/containerd/config_v1.go index 10b6d087..2189a8de 100644 --- a/pkg/config/engine/containerd/config_v1.go +++ b/pkg/config/engine/containerd/config_v1.go @@ -143,13 +143,6 @@ func (c *ConfigV1) RemoveRuntime(name string) error { return nil } -// Set sets the specified containerd option. -func (c *ConfigV1) Set(key string, value interface{}) { - config := *c.Tree - config.SetPath([]string{"plugins", "cri", "containerd", key}, value) - *c.Tree = config -} - // Save writes the config to a file func (c ConfigV1) Save(path string) (int64, error) { return (Config)(c).Save(path) @@ -165,3 +158,9 @@ func (c *ConfigV1) GetRuntimeConfig(name string) (engine.RuntimeConfig, error) { tree: runtimeData, }, nil } + +func (c *ConfigV1) EnableCDI() { + config := *c.Tree + config.SetPath([]string{"plugins", "cri", "containerd", "enable_cdi"}, true) + *c.Tree = config +} diff --git a/pkg/config/engine/crio/crio.go b/pkg/config/engine/crio/crio.go index 3d5629d7..c0cc60be 100644 --- a/pkg/config/engine/crio/crio.go +++ b/pkg/config/engine/crio/crio.go @@ -153,6 +153,9 @@ func (c *Config) GetRuntimeConfig(name string) (engine.RuntimeConfig, error) { }, nil } +// EnableCDI is a no-op for CRI-O since it always enabled where supported. +func (c *Config) EnableCDI() {} + // CommandLineSource returns the CLI-based crio config loader func CommandLineSource(hostRoot string) toml.Loader { return toml.LoadFirst( diff --git a/pkg/config/engine/docker/docker.go b/pkg/config/engine/docker/docker.go index eda700d0..86512df9 100644 --- a/pkg/config/engine/docker/docker.go +++ b/pkg/config/engine/docker/docker.go @@ -103,6 +103,24 @@ func (c Config) DefaultRuntime() string { return r } +// EnableCDI sets features.cdi to true in the docker config. +func (c *Config) EnableCDI() { + if c == nil { + return + } + config := *c + + features, ok := config["features"].(map[string]bool) + if !ok { + features = make(map[string]bool) + } + features["cdi"] = true + + config["features"] = features + + *c = config +} + // RemoveRuntime removes a runtime from the docker config func (c *Config) RemoveRuntime(name string) error { if c == nil { @@ -132,11 +150,6 @@ func (c *Config) RemoveRuntime(name string) error { return nil } -// Set sets the specified docker option -func (c *Config) Set(key string, value interface{}) { - (*c)[key] = value -} - // Save writes the config to the specified path func (c Config) Save(path string) (int64, error) { output, err := json.MarshalIndent(c, "", " ")