From a7786d4d41a755a733de10015f9cd8258f5309a9 Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Wed, 29 Jan 2025 11:03:10 +0100 Subject: [PATCH] Enable CDI in runtime if CDI_ENABLED is set This change also enables CDI in the configured runtime when the toolkit is installed with CDI enabled. Signed-off-by: Evan Lezar --- .../container/runtime/runtime.go | 7 +++- .../container/toolkit/toolkit.go | 41 +++++++++++-------- .../container/toolkit/toolkit_test.go | 8 ++-- cmd/nvidia-ctk-installer/main.go | 4 +- cmd/nvidia-ctk/runtime/configure/configure.go | 2 +- 5 files changed, 37 insertions(+), 25 deletions(-) diff --git a/cmd/nvidia-ctk-installer/container/runtime/runtime.go b/cmd/nvidia-ctk-installer/container/runtime/runtime.go index 480fdc61..2920262c 100644 --- a/cmd/nvidia-ctk-installer/container/runtime/runtime.go +++ b/cmd/nvidia-ctk-installer/container/runtime/runtime.go @@ -25,6 +25,7 @@ import ( "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk-installer/container/runtime/containerd" "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk-installer/container/runtime/crio" "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk-installer/container/runtime/docker" + "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk-installer/container/toolkit" ) const ( @@ -104,10 +105,14 @@ func Flags(opts *Options) []cli.Flag { } // ValidateOptions checks whether the specified options are valid -func ValidateOptions(opts *Options, runtime string, toolkitRoot string) error { +func ValidateOptions(c *cli.Context, opts *Options, runtime string, toolkitRoot string, to *toolkit.Options) error { // We set this option here to ensure that it is available in future calls. opts.RuntimeDir = toolkitRoot + if !c.IsSet("enable-cdi-in-runtime") { + opts.EnableCDI = to.CDI.Enabled + } + // Apply the runtime-specific config changes. switch runtime { case containerd.Name: diff --git a/cmd/nvidia-ctk-installer/container/toolkit/toolkit.go b/cmd/nvidia-ctk-installer/container/toolkit/toolkit.go index a8d4e890..80e302db 100644 --- a/cmd/nvidia-ctk-installer/container/toolkit/toolkit.go +++ b/cmd/nvidia-ctk-installer/container/toolkit/toolkit.go @@ -44,6 +44,14 @@ const ( configFilename = "config.toml" ) +type cdiOptions struct { + Enabled bool + outputDir string + kind string + vendor string + class string +} + type Options struct { DriverRoot string DevRoot string @@ -63,11 +71,8 @@ type Options struct { ContainerCLIDebug string - cdiEnabled bool - cdiOutputDir string - cdiKind string - cdiVendor string - cdiClass string + // CDI stores the CDI options for the toolkit. + CDI cdiOptions createDeviceNodes cli.StringSlice @@ -170,21 +175,21 @@ func Flags(opts *Options) []cli.Flag { Name: "cdi-enabled", Aliases: []string{"enable-cdi"}, Usage: "enable the generation of a CDI specification", - Destination: &opts.cdiEnabled, + Destination: &opts.CDI.Enabled, EnvVars: []string{"CDI_ENABLED", "ENABLE_CDI"}, }, &cli.StringFlag{ Name: "cdi-output-dir", Usage: "the directory where the CDI output files are to be written. If this is set to '', no CDI specification is generated.", Value: "/var/run/cdi", - Destination: &opts.cdiOutputDir, + Destination: &opts.CDI.outputDir, EnvVars: []string{"CDI_OUTPUT_DIR"}, }, &cli.StringFlag{ Name: "cdi-kind", Usage: "the vendor string to use for the generated CDI specification", Value: "management.nvidia.com/gpu", - Destination: &opts.cdiKind, + Destination: &opts.CDI.kind, EnvVars: []string{"CDI_KIND"}, }, &cli.BoolFlag{ @@ -240,19 +245,19 @@ func (t *Installer) ValidateOptions(opts *Options) error { return fmt.Errorf("invalid --toolkit-root option: %v", t.toolkitRoot) } - vendor, class := parser.ParseQualifier(opts.cdiKind) + vendor, class := parser.ParseQualifier(opts.CDI.kind) if err := parser.ValidateVendorName(vendor); err != nil { return fmt.Errorf("invalid CDI vendor name: %v", err) } if err := parser.ValidateClassName(class); err != nil { return fmt.Errorf("invalid CDI class name: %v", err) } - opts.cdiVendor = vendor - opts.cdiClass = class + opts.CDI.vendor = vendor + opts.CDI.class = class - if opts.cdiEnabled && opts.cdiOutputDir == "" { + if opts.CDI.Enabled && opts.CDI.outputDir == "" { t.logger.Warning("Skipping CDI spec generation (no output directory specified)") - opts.cdiEnabled = false + opts.CDI.Enabled = false } isDisabled := false @@ -265,7 +270,7 @@ func (t *Installer) ValidateOptions(opts *Options) error { break } } - if !opts.cdiEnabled && !isDisabled { + if !opts.CDI.Enabled && !isDisabled { t.logger.Info("disabling device node creation since --cdi-enabled=false") isDisabled = true } @@ -698,7 +703,7 @@ func (t *Installer) createDeviceNodes(opts *Options) error { // generateCDISpec generates a CDI spec for use in management containers func (t *Installer) generateCDISpec(opts *Options, nvidiaCDIHookPath string) error { - if !opts.cdiEnabled { + if !opts.CDI.Enabled { return nil } t.logger.Info("Generating CDI spec for management containers") @@ -708,8 +713,8 @@ func (t *Installer) generateCDISpec(opts *Options, nvidiaCDIHookPath string) err nvcdi.WithDriverRoot(opts.DriverRootCtrPath), nvcdi.WithDevRoot(opts.DevRootCtrPath), nvcdi.WithNVIDIACDIHookPath(nvidiaCDIHookPath), - nvcdi.WithVendor(opts.cdiVendor), - nvcdi.WithClass(opts.cdiClass), + nvcdi.WithVendor(opts.CDI.vendor), + nvcdi.WithClass(opts.CDI.class), ) if err != nil { return fmt.Errorf("failed to create CDI library for management containers: %v", err) @@ -734,7 +739,7 @@ func (t *Installer) generateCDISpec(opts *Options, nvidiaCDIHookPath string) err if err != nil { return fmt.Errorf("failed to generate CDI name for management containers: %v", err) } - err = spec.Save(filepath.Join(opts.cdiOutputDir, name)) + err = spec.Save(filepath.Join(opts.CDI.outputDir, name)) if err != nil { return fmt.Errorf("failed to save CDI spec for management containers: %v", err) } diff --git a/cmd/nvidia-ctk-installer/container/toolkit/toolkit_test.go b/cmd/nvidia-ctk-installer/container/toolkit/toolkit_test.go index 9cfcda2a..855141ff 100644 --- a/cmd/nvidia-ctk-installer/container/toolkit/toolkit_test.go +++ b/cmd/nvidia-ctk-installer/container/toolkit/toolkit_test.go @@ -124,9 +124,11 @@ kind: example.com/class options := Options{ DriverRoot: "/host/driver/root", DriverRootCtrPath: filepath.Join(moduleRoot, "testdata", "lookup", tc.hostRoot), - cdiEnabled: tc.cdiEnabled, - cdiOutputDir: cdiOutputDir, - cdiKind: "example.com/class", + CDI: cdiOptions{ + Enabled: tc.cdiEnabled, + outputDir: cdiOutputDir, + kind: "example.com/class", + }, } ti := NewInstaller( diff --git a/cmd/nvidia-ctk-installer/main.go b/cmd/nvidia-ctk-installer/main.go index 835acbb0..d42b524e 100644 --- a/cmd/nvidia-ctk-installer/main.go +++ b/cmd/nvidia-ctk-installer/main.go @@ -164,7 +164,7 @@ func (a *app) Before(c *cli.Context, o *options) error { return a.validateFlags(c, o) } -func (a *app) validateFlags(_ *cli.Context, o *options) error { +func (a *app) validateFlags(c *cli.Context, o *options) error { if o.root == "" { return fmt.Errorf("the install root must be specified") } @@ -178,7 +178,7 @@ func (a *app) validateFlags(_ *cli.Context, o *options) error { if err := a.toolkit.ValidateOptions(&o.toolkitOptions); err != nil { return err } - if err := runtime.ValidateOptions(&o.runtimeOptions, o.runtime, o.toolkitRoot()); err != nil { + if err := runtime.ValidateOptions(c, &o.runtimeOptions, o.runtime, o.toolkitRoot(), &o.toolkitOptions); err != nil { return err } return nil diff --git a/cmd/nvidia-ctk/runtime/configure/configure.go b/cmd/nvidia-ctk/runtime/configure/configure.go index 5a7c16a1..aa8a496c 100644 --- a/cmd/nvidia-ctk/runtime/configure/configure.go +++ b/cmd/nvidia-ctk/runtime/configure/configure.go @@ -163,7 +163,7 @@ func (m command) build() *cli.Command { }, &cli.BoolFlag{ Name: "cdi.enabled", - Aliases: []string{"cdi.enable"}, + Aliases: []string{"cdi.enable", "enable-cdi"}, Usage: "Enable CDI in the configured runtime", Destination: &config.cdi.enabled, },