Merge pull request #838 from cdesiniotis/enable-cdi-toolkit-container
Some checks are pending
CodeQL / Analyze Go code with CodeQL (push) Waiting to run
Golang / check (push) Waiting to run
Golang / Unit test (push) Waiting to run
Golang / Build (push) Waiting to run
image / packages (${{github.event_name == 'pull_request'}}, centos7-aarch64) (push) Waiting to run
image / packages (${{github.event_name == 'pull_request'}}, centos7-x86_64) (push) Waiting to run
image / packages (${{github.event_name == 'pull_request'}}, centos8-ppc64le) (push) Waiting to run
image / packages (${{github.event_name == 'pull_request'}}, ubuntu18.04-amd64) (push) Waiting to run
image / packages (${{github.event_name == 'pull_request'}}, ubuntu18.04-arm64) (push) Waiting to run
image / packages (${{github.event_name == 'pull_request'}}, ubuntu18.04-ppc64le) (push) Waiting to run
image / image (packaging, ${{github.event_name == 'pull_request'}}) (push) Blocked by required conditions
image / image (ubi8, ${{github.event_name == 'pull_request'}}) (push) Blocked by required conditions
image / image (ubuntu20.04, ${{github.event_name == 'pull_request'}}) (push) Blocked by required conditions

Enable CDI in the container runtime if enabled in the toolkit
This commit is contained in:
Evan Lezar 2025-02-03 16:37:49 +01:00 committed by GitHub
commit df4c87b877
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
14 changed files with 604 additions and 66 deletions

View File

@ -38,6 +38,8 @@ const (
type Options struct {
Config string
Socket string
// EnabledCDI indicates whether CDI should be enabled.
EnableCDI bool
RuntimeName string
RuntimeDir string
SetAsDefault bool
@ -111,6 +113,10 @@ func (o Options) UpdateConfig(cfg engine.Interface) error {
}
}
if o.EnableCDI {
cfg.EnableCDI()
}
return nil
}

View File

@ -410,6 +410,51 @@ func TestUpdateV1ConfigWithRuncPresent(t *testing.T) {
}
}
func TestUpdateV1EnableCDI(t *testing.T) {
logger, _ := testlog.NewNullLogger()
const runtimeDir = "/test/runtime/dir"
testCases := []struct {
enableCDI bool
expectedEnableCDIValue interface{}
}{
{},
{
enableCDI: false,
expectedEnableCDIValue: nil,
},
{
enableCDI: true,
expectedEnableCDIValue: true,
},
}
for _, tc := range testCases {
t.Run(fmt.Sprintf("%v", tc.enableCDI), func(t *testing.T) {
o := &container.Options{
EnableCDI: tc.enableCDI,
RuntimeName: "nvidia",
RuntimeDir: runtimeDir,
}
cfg, err := toml.Empty.Load()
require.NoError(t, err)
v1 := &containerd.ConfigV1{
Logger: logger,
Tree: cfg,
RuntimeType: runtimeType,
}
err = o.UpdateConfig(v1)
require.NoError(t, err)
enableCDIValue := v1.GetPath([]string{"plugins", "cri", "containerd", "enable_cdi"})
require.EqualValues(t, tc.expectedEnableCDIValue, enableCDIValue)
})
}
}
func TestRevertV1Config(t *testing.T) {
logger, _ := testlog.NewNullLogger()
testCases := []struct {

View File

@ -366,6 +366,53 @@ func TestUpdateV2ConfigWithRuncPresent(t *testing.T) {
}
}
func TestUpdateV2ConfigEnableCDI(t *testing.T) {
logger, _ := testlog.NewNullLogger()
const runtimeDir = "/test/runtime/dir"
testCases := []struct {
enableCDI bool
expectedEnableCDIValue interface{}
}{
{},
{
enableCDI: false,
expectedEnableCDIValue: nil,
},
{
enableCDI: true,
expectedEnableCDIValue: true,
},
}
for _, tc := range testCases {
t.Run(fmt.Sprintf("%v", tc.enableCDI), func(t *testing.T) {
o := &container.Options{
EnableCDI: tc.enableCDI,
RuntimeName: "nvidia",
RuntimeDir: runtimeDir,
SetAsDefault: false,
}
cfg, err := toml.LoadMap(map[string]interface{}{})
require.NoError(t, err)
v2 := &containerd.Config{
Logger: logger,
Tree: cfg,
RuntimeType: runtimeType,
CRIRuntimePluginName: "io.containerd.grpc.v1.cri",
}
err = o.UpdateConfig(v2)
require.NoError(t, err)
enableCDIValue := cfg.GetPath([]string{"plugins", "io.containerd.grpc.v1.cri", "enable_cdi"})
require.EqualValues(t, tc.expectedEnableCDIValue, enableCDIValue)
})
}
}
func TestRevertV2Config(t *testing.T) {
logger, _ := testlog.NewNullLogger()

View File

@ -25,6 +25,7 @@ import (
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk-installer/container/runtime/containerd"
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk-installer/container/runtime/crio"
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk-installer/container/runtime/docker"
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk-installer/container/toolkit"
)
const (
@ -66,6 +67,12 @@ func Flags(opts *Options) []cli.Flag {
Destination: &opts.RestartMode,
EnvVars: []string{"RUNTIME_RESTART_MODE"},
},
&cli.BoolFlag{
Name: "enable-cdi-in-runtime",
Usage: "Enable CDI in the configured runt ime",
Destination: &opts.EnableCDI,
EnvVars: []string{"RUNTIME_ENABLE_CDI"},
},
&cli.StringFlag{
Name: "host-root",
Usage: "Specify the path to the host root to be used when restarting the runtime using systemd",
@ -98,10 +105,14 @@ func Flags(opts *Options) []cli.Flag {
}
// ValidateOptions checks whether the specified options are valid
func ValidateOptions(opts *Options, runtime string, toolkitRoot string) error {
func ValidateOptions(c *cli.Context, opts *Options, runtime string, toolkitRoot string, to *toolkit.Options) error {
// We set this option here to ensure that it is available in future calls.
opts.RuntimeDir = toolkitRoot
if !c.IsSet("enable-cdi-in-runtime") {
opts.EnableCDI = to.CDI.Enabled
}
// Apply the runtime-specific config changes.
switch runtime {
case containerd.Name:

View File

@ -44,6 +44,14 @@ const (
configFilename = "config.toml"
)
type cdiOptions struct {
Enabled bool
outputDir string
kind string
vendor string
class string
}
type Options struct {
DriverRoot string
DevRoot string
@ -63,11 +71,8 @@ type Options struct {
ContainerCLIDebug string
cdiEnabled bool
cdiOutputDir string
cdiKind string
cdiVendor string
cdiClass string
// CDI stores the CDI options for the toolkit.
CDI cdiOptions
createDeviceNodes cli.StringSlice
@ -170,21 +175,21 @@ func Flags(opts *Options) []cli.Flag {
Name: "cdi-enabled",
Aliases: []string{"enable-cdi"},
Usage: "enable the generation of a CDI specification",
Destination: &opts.cdiEnabled,
Destination: &opts.CDI.Enabled,
EnvVars: []string{"CDI_ENABLED", "ENABLE_CDI"},
},
&cli.StringFlag{
Name: "cdi-output-dir",
Usage: "the directory where the CDI output files are to be written. If this is set to '', no CDI specification is generated.",
Value: "/var/run/cdi",
Destination: &opts.cdiOutputDir,
Destination: &opts.CDI.outputDir,
EnvVars: []string{"CDI_OUTPUT_DIR"},
},
&cli.StringFlag{
Name: "cdi-kind",
Usage: "the vendor string to use for the generated CDI specification",
Value: "management.nvidia.com/gpu",
Destination: &opts.cdiKind,
Destination: &opts.CDI.kind,
EnvVars: []string{"CDI_KIND"},
},
&cli.BoolFlag{
@ -240,19 +245,19 @@ func (t *Installer) ValidateOptions(opts *Options) error {
return fmt.Errorf("invalid --toolkit-root option: %v", t.toolkitRoot)
}
vendor, class := parser.ParseQualifier(opts.cdiKind)
vendor, class := parser.ParseQualifier(opts.CDI.kind)
if err := parser.ValidateVendorName(vendor); err != nil {
return fmt.Errorf("invalid CDI vendor name: %v", err)
}
if err := parser.ValidateClassName(class); err != nil {
return fmt.Errorf("invalid CDI class name: %v", err)
}
opts.cdiVendor = vendor
opts.cdiClass = class
opts.CDI.vendor = vendor
opts.CDI.class = class
if opts.cdiEnabled && opts.cdiOutputDir == "" {
if opts.CDI.Enabled && opts.CDI.outputDir == "" {
t.logger.Warning("Skipping CDI spec generation (no output directory specified)")
opts.cdiEnabled = false
opts.CDI.Enabled = false
}
isDisabled := false
@ -265,7 +270,7 @@ func (t *Installer) ValidateOptions(opts *Options) error {
break
}
}
if !opts.cdiEnabled && !isDisabled {
if !opts.CDI.Enabled && !isDisabled {
t.logger.Info("disabling device node creation since --cdi-enabled=false")
isDisabled = true
}
@ -698,7 +703,7 @@ func (t *Installer) createDeviceNodes(opts *Options) error {
// generateCDISpec generates a CDI spec for use in management containers
func (t *Installer) generateCDISpec(opts *Options, nvidiaCDIHookPath string) error {
if !opts.cdiEnabled {
if !opts.CDI.Enabled {
return nil
}
t.logger.Info("Generating CDI spec for management containers")
@ -708,8 +713,8 @@ func (t *Installer) generateCDISpec(opts *Options, nvidiaCDIHookPath string) err
nvcdi.WithDriverRoot(opts.DriverRootCtrPath),
nvcdi.WithDevRoot(opts.DevRootCtrPath),
nvcdi.WithNVIDIACDIHookPath(nvidiaCDIHookPath),
nvcdi.WithVendor(opts.cdiVendor),
nvcdi.WithClass(opts.cdiClass),
nvcdi.WithVendor(opts.CDI.vendor),
nvcdi.WithClass(opts.CDI.class),
)
if err != nil {
return fmt.Errorf("failed to create CDI library for management containers: %v", err)
@ -734,7 +739,7 @@ func (t *Installer) generateCDISpec(opts *Options, nvidiaCDIHookPath string) err
if err != nil {
return fmt.Errorf("failed to generate CDI name for management containers: %v", err)
}
err = spec.Save(filepath.Join(opts.cdiOutputDir, name))
err = spec.Save(filepath.Join(opts.CDI.outputDir, name))
if err != nil {
return fmt.Errorf("failed to save CDI spec for management containers: %v", err)
}

View File

@ -124,9 +124,11 @@ kind: example.com/class
options := Options{
DriverRoot: "/host/driver/root",
DriverRootCtrPath: filepath.Join(moduleRoot, "testdata", "lookup", tc.hostRoot),
cdiEnabled: tc.cdiEnabled,
cdiOutputDir: cdiOutputDir,
cdiKind: "example.com/class",
CDI: cdiOptions{
Enabled: tc.cdiEnabled,
outputDir: cdiOutputDir,
kind: "example.com/class",
},
}
ti := NewInstaller(

View File

@ -38,6 +38,7 @@ type options struct {
runtimeArgs string
root string
pidFile string
sourceRoot string
toolkitOptions toolkit.Options
runtimeOptions runtime.Options
@ -141,6 +142,13 @@ func (a app) build() *cli.App {
Destination: &options.root,
EnvVars: []string{"ROOT"},
},
&cli.StringFlag{
Name: "source-root",
Value: "/",
Usage: "The folder where the required toolkit artifacts can be found",
Destination: &options.sourceRoot,
EnvVars: []string{"SOURCE_ROOT"},
},
&cli.StringFlag{
Name: "pid-file",
Value: defaultPidFile,
@ -159,12 +167,13 @@ func (a app) build() *cli.App {
func (a *app) Before(c *cli.Context, o *options) error {
a.toolkit = toolkit.NewInstaller(
toolkit.WithLogger(a.logger),
toolkit.WithSourceRoot(o.sourceRoot),
toolkit.WithToolkitRoot(o.toolkitRoot()),
)
return a.validateFlags(c, o)
}
func (a *app) validateFlags(_ *cli.Context, o *options) error {
func (a *app) validateFlags(c *cli.Context, o *options) error {
if o.root == "" {
return fmt.Errorf("the install root must be specified")
}
@ -178,7 +187,7 @@ func (a *app) validateFlags(_ *cli.Context, o *options) error {
if err := a.toolkit.ValidateOptions(&o.toolkitOptions); err != nil {
return err
}
if err := runtime.ValidateOptions(&o.runtimeOptions, o.runtime, o.toolkitRoot()); err != nil {
if err := runtime.ValidateOptions(c, &o.runtimeOptions, o.runtime, o.toolkitRoot(), &o.toolkitOptions); err != nil {
return err
}
return nil

View File

@ -18,10 +18,15 @@ package main
import (
"fmt"
"os"
"path/filepath"
"strings"
"testing"
testlog "github.com/sirupsen/logrus/hooks/test"
"github.com/stretchr/testify/require"
"github.com/NVIDIA/nvidia-container-toolkit/internal/test"
)
func TestParseArgs(t *testing.T) {
@ -84,3 +89,413 @@ func TestParseArgs(t *testing.T) {
})
}
}
func TestApp(t *testing.T) {
t.Setenv("__NVCT_TESTING_DEVICES_ARE_FILES", "true")
logger, _ := testlog.NewNullLogger()
moduleRoot, err := test.GetModuleRoot()
require.NoError(t, err)
artifactRoot := filepath.Join(moduleRoot, "testdata", "installer", "artifacts")
hostRoot := filepath.Join(moduleRoot, "testdata", "lookup", "rootfs-1")
testCases := []struct {
description string
args []string
expectedToolkitConfig string
expectedRuntimeConfig string
}{
{
description: "no args",
expectedToolkitConfig: `accept-nvidia-visible-devices-as-volume-mounts = false
accept-nvidia-visible-devices-envvar-when-unprivileged = true
disable-require = false
supported-driver-capabilities = "compat32,compute,display,graphics,ngx,utility,video"
swarm-resource = ""
[nvidia-container-cli]
debug = ""
environment = []
ldcache = ""
ldconfig = "@/run/nvidia/driver/sbin/ldconfig"
load-kmods = true
no-cgroups = false
path = "{{ .toolkitRoot }}/toolkit/nvidia-container-cli"
root = "/run/nvidia/driver"
user = ""
[nvidia-container-runtime]
debug = "/dev/null"
log-level = "info"
mode = "auto"
runtimes = ["docker-runc", "runc", "crun"]
[nvidia-container-runtime.modes]
[nvidia-container-runtime.modes.cdi]
annotation-prefixes = ["cdi.k8s.io/"]
default-kind = "nvidia.com/gpu"
spec-dirs = ["/etc/cdi", "/var/run/cdi"]
[nvidia-container-runtime.modes.csv]
mount-spec-path = "/etc/nvidia-container-runtime/host-files-for-container.d"
[nvidia-container-runtime-hook]
path = "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime-hook"
skip-mode-detection = true
[nvidia-ctk]
path = "{{ .toolkitRoot }}/toolkit/nvidia-ctk"
`,
expectedRuntimeConfig: `{
"default-runtime": "nvidia",
"runtimes": {
"nvidia": {
"args": [],
"path": "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime"
},
"nvidia-cdi": {
"args": [],
"path": "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime.cdi"
},
"nvidia-legacy": {
"args": [],
"path": "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime.legacy"
}
}
}`,
},
{
description: "CDI enabled enables CDI in docker",
args: []string{"--cdi-enabled", "--create-device-nodes=none"},
expectedToolkitConfig: `accept-nvidia-visible-devices-as-volume-mounts = false
accept-nvidia-visible-devices-envvar-when-unprivileged = true
disable-require = false
supported-driver-capabilities = "compat32,compute,display,graphics,ngx,utility,video"
swarm-resource = ""
[nvidia-container-cli]
debug = ""
environment = []
ldcache = ""
ldconfig = "@/run/nvidia/driver/sbin/ldconfig"
load-kmods = true
no-cgroups = false
path = "{{ .toolkitRoot }}/toolkit/nvidia-container-cli"
root = "/run/nvidia/driver"
user = ""
[nvidia-container-runtime]
debug = "/dev/null"
log-level = "info"
mode = "auto"
runtimes = ["docker-runc", "runc", "crun"]
[nvidia-container-runtime.modes]
[nvidia-container-runtime.modes.cdi]
annotation-prefixes = ["cdi.k8s.io/"]
default-kind = "nvidia.com/gpu"
spec-dirs = ["/etc/cdi", "/var/run/cdi"]
[nvidia-container-runtime.modes.csv]
mount-spec-path = "/etc/nvidia-container-runtime/host-files-for-container.d"
[nvidia-container-runtime-hook]
path = "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime-hook"
skip-mode-detection = true
[nvidia-ctk]
path = "{{ .toolkitRoot }}/toolkit/nvidia-ctk"
`,
expectedRuntimeConfig: `{
"default-runtime": "nvidia",
"features": {
"cdi": true
},
"runtimes": {
"nvidia": {
"args": [],
"path": "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime"
},
"nvidia-cdi": {
"args": [],
"path": "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime.cdi"
},
"nvidia-legacy": {
"args": [],
"path": "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime.legacy"
}
}
}`,
},
{
description: "--enable-cdi-in-runtime=false overrides --cdi-enabled in Docker",
args: []string{"--cdi-enabled", "--create-device-nodes=none", "--enable-cdi-in-runtime=false"},
expectedToolkitConfig: `accept-nvidia-visible-devices-as-volume-mounts = false
accept-nvidia-visible-devices-envvar-when-unprivileged = true
disable-require = false
supported-driver-capabilities = "compat32,compute,display,graphics,ngx,utility,video"
swarm-resource = ""
[nvidia-container-cli]
debug = ""
environment = []
ldcache = ""
ldconfig = "@/run/nvidia/driver/sbin/ldconfig"
load-kmods = true
no-cgroups = false
path = "{{ .toolkitRoot }}/toolkit/nvidia-container-cli"
root = "/run/nvidia/driver"
user = ""
[nvidia-container-runtime]
debug = "/dev/null"
log-level = "info"
mode = "auto"
runtimes = ["docker-runc", "runc", "crun"]
[nvidia-container-runtime.modes]
[nvidia-container-runtime.modes.cdi]
annotation-prefixes = ["cdi.k8s.io/"]
default-kind = "nvidia.com/gpu"
spec-dirs = ["/etc/cdi", "/var/run/cdi"]
[nvidia-container-runtime.modes.csv]
mount-spec-path = "/etc/nvidia-container-runtime/host-files-for-container.d"
[nvidia-container-runtime-hook]
path = "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime-hook"
skip-mode-detection = true
[nvidia-ctk]
path = "{{ .toolkitRoot }}/toolkit/nvidia-ctk"
`,
expectedRuntimeConfig: `{
"default-runtime": "nvidia",
"runtimes": {
"nvidia": {
"args": [],
"path": "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime"
},
"nvidia-cdi": {
"args": [],
"path": "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime.cdi"
},
"nvidia-legacy": {
"args": [],
"path": "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime.legacy"
}
}
}`,
},
{
description: "CDI enabled enables CDI in containerd",
args: []string{"--cdi-enabled", "--runtime=containerd"},
expectedToolkitConfig: `accept-nvidia-visible-devices-as-volume-mounts = false
accept-nvidia-visible-devices-envvar-when-unprivileged = true
disable-require = false
supported-driver-capabilities = "compat32,compute,display,graphics,ngx,utility,video"
swarm-resource = ""
[nvidia-container-cli]
debug = ""
environment = []
ldcache = ""
ldconfig = "@/run/nvidia/driver/sbin/ldconfig"
load-kmods = true
no-cgroups = false
path = "{{ .toolkitRoot }}/toolkit/nvidia-container-cli"
root = "/run/nvidia/driver"
user = ""
[nvidia-container-runtime]
debug = "/dev/null"
log-level = "info"
mode = "auto"
runtimes = ["docker-runc", "runc", "crun"]
[nvidia-container-runtime.modes]
[nvidia-container-runtime.modes.cdi]
annotation-prefixes = ["cdi.k8s.io/"]
default-kind = "nvidia.com/gpu"
spec-dirs = ["/etc/cdi", "/var/run/cdi"]
[nvidia-container-runtime.modes.csv]
mount-spec-path = "/etc/nvidia-container-runtime/host-files-for-container.d"
[nvidia-container-runtime-hook]
path = "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime-hook"
skip-mode-detection = true
[nvidia-ctk]
path = "{{ .toolkitRoot }}/toolkit/nvidia-ctk"
`,
expectedRuntimeConfig: `version = 2
[plugins]
[plugins."io.containerd.grpc.v1.cri"]
enable_cdi = true
[plugins."io.containerd.grpc.v1.cri".containerd]
default_runtime_name = "nvidia"
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes]
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia]
privileged_without_host_devices = false
runtime_engine = ""
runtime_root = ""
runtime_type = "io.containerd.runc.v2"
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia.options]
BinaryName = "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime"
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia-cdi]
privileged_without_host_devices = false
runtime_engine = ""
runtime_root = ""
runtime_type = "io.containerd.runc.v2"
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia-cdi.options]
BinaryName = "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime.cdi"
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia-legacy]
privileged_without_host_devices = false
runtime_engine = ""
runtime_root = ""
runtime_type = "io.containerd.runc.v2"
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia-legacy.options]
BinaryName = "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime.legacy"
`,
},
{
description: "--enable-cdi-in-runtime=false overrides --cdi-enabled in containerd",
args: []string{"--cdi-enabled", "--create-device-nodes=none", "--enable-cdi-in-runtime=false", "--runtime=containerd"},
expectedToolkitConfig: `accept-nvidia-visible-devices-as-volume-mounts = false
accept-nvidia-visible-devices-envvar-when-unprivileged = true
disable-require = false
supported-driver-capabilities = "compat32,compute,display,graphics,ngx,utility,video"
swarm-resource = ""
[nvidia-container-cli]
debug = ""
environment = []
ldcache = ""
ldconfig = "@/run/nvidia/driver/sbin/ldconfig"
load-kmods = true
no-cgroups = false
path = "{{ .toolkitRoot }}/toolkit/nvidia-container-cli"
root = "/run/nvidia/driver"
user = ""
[nvidia-container-runtime]
debug = "/dev/null"
log-level = "info"
mode = "auto"
runtimes = ["docker-runc", "runc", "crun"]
[nvidia-container-runtime.modes]
[nvidia-container-runtime.modes.cdi]
annotation-prefixes = ["cdi.k8s.io/"]
default-kind = "nvidia.com/gpu"
spec-dirs = ["/etc/cdi", "/var/run/cdi"]
[nvidia-container-runtime.modes.csv]
mount-spec-path = "/etc/nvidia-container-runtime/host-files-for-container.d"
[nvidia-container-runtime-hook]
path = "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime-hook"
skip-mode-detection = true
[nvidia-ctk]
path = "{{ .toolkitRoot }}/toolkit/nvidia-ctk"
`,
expectedRuntimeConfig: `version = 2
[plugins]
[plugins."io.containerd.grpc.v1.cri"]
[plugins."io.containerd.grpc.v1.cri".containerd]
default_runtime_name = "nvidia"
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes]
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia]
privileged_without_host_devices = false
runtime_engine = ""
runtime_root = ""
runtime_type = "io.containerd.runc.v2"
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia.options]
BinaryName = "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime"
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia-cdi]
privileged_without_host_devices = false
runtime_engine = ""
runtime_root = ""
runtime_type = "io.containerd.runc.v2"
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia-cdi.options]
BinaryName = "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime.cdi"
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia-legacy]
privileged_without_host_devices = false
runtime_engine = ""
runtime_root = ""
runtime_type = "io.containerd.runc.v2"
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia-legacy.options]
BinaryName = "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime.legacy"
`,
},
}
for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) {
testRoot := t.TempDir()
cdiOutputDir := filepath.Join(testRoot, "/var/run/cdi")
runtimeConfigFile := filepath.Join(testRoot, "config.file")
toolkitRoot := filepath.Join(testRoot, "toolkit-test")
toolkitConfigFile := filepath.Join(toolkitRoot, "toolkit/.config/nvidia-container-runtime/config.toml")
app := NewApp(logger, toolkitRoot)
testArgs := []string{
"nvidia-ctk-installer",
"--no-daemon",
"--cdi-output-dir=" + cdiOutputDir,
"--config=" + runtimeConfigFile,
"--create-device-nodes=none",
"--driver-root-ctr-path=" + hostRoot,
"--pid-file=" + filepath.Join(testRoot, "toolkit.pid"),
"--restart-mode=none",
"--source-root=" + filepath.Join(artifactRoot, "deb"),
}
err := app.Run(append(testArgs, tc.args...))
require.NoError(t, err)
require.FileExists(t, toolkitConfigFile)
toolkitConfigFileContents, err := os.ReadFile(toolkitConfigFile)
require.NoError(t, err)
require.EqualValues(t, strings.ReplaceAll(tc.expectedToolkitConfig, "{{ .toolkitRoot }}", toolkitRoot), string(toolkitConfigFileContents))
require.FileExists(t, runtimeConfigFile)
runtimeConfigFileContents, err := os.ReadFile(runtimeConfigFile)
require.NoError(t, err)
require.EqualValues(t, strings.ReplaceAll(tc.expectedRuntimeConfig, "{{ .toolkitRoot }}", toolkitRoot), string(runtimeConfigFileContents))
})
}
}

View File

@ -163,7 +163,7 @@ func (m command) build() *cli.Command {
},
&cli.BoolFlag{
Name: "cdi.enabled",
Aliases: []string{"cdi.enable"},
Aliases: []string{"cdi.enable", "enable-cdi"},
Usage: "Enable CDI in the configured runtime",
Destination: &config.cdi.enabled,
},
@ -292,9 +292,8 @@ func (m command) configureConfigFile(c *cli.Context, config *config) error {
return fmt.Errorf("unable to update config: %v", err)
}
err = enableCDI(config, cfg)
if err != nil {
return fmt.Errorf("failed to enable CDI in %s: %w", config.runtime, err)
if config.cdi.enabled {
cfg.EnableCDI()
}
outputPath := config.getOutputConfigPath()
@ -354,19 +353,3 @@ func (m *command) configureOCIHook(c *cli.Context, config *config) error {
}
return nil
}
// enableCDI enables the use of CDI in the corresponding container engine
func enableCDI(config *config, cfg engine.Interface) error {
if !config.cdi.enabled {
return nil
}
switch config.runtime {
case "containerd":
cfg.Set("enable_cdi", true)
case "docker":
cfg.Set("features", map[string]bool{"cdi": true})
default:
return fmt.Errorf("enabling CDI in %s is not supported", config.runtime)
}
return nil
}

View File

@ -20,10 +20,10 @@ package engine
type Interface interface {
AddRuntime(string, string, bool) error
DefaultRuntime() string
EnableCDI()
GetRuntimeConfig(string) (RuntimeConfig, error)
RemoveRuntime(string) error
Save(string) (int64, error)
Set(string, interface{})
String() string
}

View File

@ -96,13 +96,6 @@ func (c *Config) getRuntimeAnnotations(path []string) ([]string, error) {
return annotations, nil
}
// Set sets the specified containerd option.
func (c *Config) Set(key string, value interface{}) {
config := *c.Tree
config.SetPath([]string{"plugins", c.CRIRuntimePluginName, key}, value)
*c.Tree = config
}
// DefaultRuntime returns the default runtime for the cri-o config
func (c Config) DefaultRuntime() string {
if runtime, ok := c.GetPath([]string{"plugins", c.CRIRuntimePluginName, "containerd", "default_runtime_name"}).(string); ok {
@ -111,6 +104,13 @@ func (c Config) DefaultRuntime() string {
return ""
}
// EnableCDI sets the enable_cdi field in the Containerd config to true.
func (c *Config) EnableCDI() {
config := *c.Tree
config.SetPath([]string{"plugins", c.CRIRuntimePluginName, "enable_cdi"}, true)
*c.Tree = config
}
// RemoveRuntime removes a runtime from the docker config
func (c *Config) RemoveRuntime(name string) error {
if c == nil || c.Tree == nil {

View File

@ -143,13 +143,6 @@ func (c *ConfigV1) RemoveRuntime(name string) error {
return nil
}
// Set sets the specified containerd option.
func (c *ConfigV1) Set(key string, value interface{}) {
config := *c.Tree
config.SetPath([]string{"plugins", "cri", "containerd", key}, value)
*c.Tree = config
}
// Save writes the config to a file
func (c ConfigV1) Save(path string) (int64, error) {
return (Config)(c).Save(path)
@ -165,3 +158,9 @@ func (c *ConfigV1) GetRuntimeConfig(name string) (engine.RuntimeConfig, error) {
tree: runtimeData,
}, nil
}
func (c *ConfigV1) EnableCDI() {
config := *c.Tree
config.SetPath([]string{"plugins", "cri", "containerd", "enable_cdi"}, true)
*c.Tree = config
}

View File

@ -153,6 +153,9 @@ func (c *Config) GetRuntimeConfig(name string) (engine.RuntimeConfig, error) {
}, nil
}
// EnableCDI is a no-op for CRI-O since it always enabled where supported.
func (c *Config) EnableCDI() {}
// CommandLineSource returns the CLI-based crio config loader
func CommandLineSource(hostRoot string) toml.Loader {
return toml.LoadFirst(

View File

@ -103,6 +103,24 @@ func (c Config) DefaultRuntime() string {
return r
}
// EnableCDI sets features.cdi to true in the docker config.
func (c *Config) EnableCDI() {
if c == nil {
return
}
config := *c
features, ok := config["features"].(map[string]bool)
if !ok {
features = make(map[string]bool)
}
features["cdi"] = true
config["features"] = features
*c = config
}
// RemoveRuntime removes a runtime from the docker config
func (c *Config) RemoveRuntime(name string) error {
if c == nil {
@ -132,11 +150,6 @@ func (c *Config) RemoveRuntime(name string) error {
return nil
}
// Set sets the specified docker option
func (c *Config) Set(key string, value interface{}) {
(*c)[key] = value
}
// Save writes the config to the specified path
func (c Config) Save(path string) (int64, error) {
output, err := json.MarshalIndent(c, "", " ")