mirror of
https://github.com/NVIDIA/nvidia-container-toolkit
synced 2025-02-04 03:56:23 +00:00
Merge pull request #838 from cdesiniotis/enable-cdi-toolkit-container
Some checks are pending
CodeQL / Analyze Go code with CodeQL (push) Waiting to run
Golang / check (push) Waiting to run
Golang / Unit test (push) Waiting to run
Golang / Build (push) Waiting to run
image / packages (${{github.event_name == 'pull_request'}}, centos7-aarch64) (push) Waiting to run
image / packages (${{github.event_name == 'pull_request'}}, centos7-x86_64) (push) Waiting to run
image / packages (${{github.event_name == 'pull_request'}}, centos8-ppc64le) (push) Waiting to run
image / packages (${{github.event_name == 'pull_request'}}, ubuntu18.04-amd64) (push) Waiting to run
image / packages (${{github.event_name == 'pull_request'}}, ubuntu18.04-arm64) (push) Waiting to run
image / packages (${{github.event_name == 'pull_request'}}, ubuntu18.04-ppc64le) (push) Waiting to run
image / image (packaging, ${{github.event_name == 'pull_request'}}) (push) Blocked by required conditions
image / image (ubi8, ${{github.event_name == 'pull_request'}}) (push) Blocked by required conditions
image / image (ubuntu20.04, ${{github.event_name == 'pull_request'}}) (push) Blocked by required conditions
Some checks are pending
CodeQL / Analyze Go code with CodeQL (push) Waiting to run
Golang / check (push) Waiting to run
Golang / Unit test (push) Waiting to run
Golang / Build (push) Waiting to run
image / packages (${{github.event_name == 'pull_request'}}, centos7-aarch64) (push) Waiting to run
image / packages (${{github.event_name == 'pull_request'}}, centos7-x86_64) (push) Waiting to run
image / packages (${{github.event_name == 'pull_request'}}, centos8-ppc64le) (push) Waiting to run
image / packages (${{github.event_name == 'pull_request'}}, ubuntu18.04-amd64) (push) Waiting to run
image / packages (${{github.event_name == 'pull_request'}}, ubuntu18.04-arm64) (push) Waiting to run
image / packages (${{github.event_name == 'pull_request'}}, ubuntu18.04-ppc64le) (push) Waiting to run
image / image (packaging, ${{github.event_name == 'pull_request'}}) (push) Blocked by required conditions
image / image (ubi8, ${{github.event_name == 'pull_request'}}) (push) Blocked by required conditions
image / image (ubuntu20.04, ${{github.event_name == 'pull_request'}}) (push) Blocked by required conditions
Enable CDI in the container runtime if enabled in the toolkit
This commit is contained in:
commit
df4c87b877
@ -38,6 +38,8 @@ const (
|
||||
type Options struct {
|
||||
Config string
|
||||
Socket string
|
||||
// EnabledCDI indicates whether CDI should be enabled.
|
||||
EnableCDI bool
|
||||
RuntimeName string
|
||||
RuntimeDir string
|
||||
SetAsDefault bool
|
||||
@ -111,6 +113,10 @@ func (o Options) UpdateConfig(cfg engine.Interface) error {
|
||||
}
|
||||
}
|
||||
|
||||
if o.EnableCDI {
|
||||
cfg.EnableCDI()
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
@ -410,6 +410,51 @@ func TestUpdateV1ConfigWithRuncPresent(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestUpdateV1EnableCDI(t *testing.T) {
|
||||
logger, _ := testlog.NewNullLogger()
|
||||
const runtimeDir = "/test/runtime/dir"
|
||||
|
||||
testCases := []struct {
|
||||
enableCDI bool
|
||||
expectedEnableCDIValue interface{}
|
||||
}{
|
||||
{},
|
||||
{
|
||||
enableCDI: false,
|
||||
expectedEnableCDIValue: nil,
|
||||
},
|
||||
{
|
||||
enableCDI: true,
|
||||
expectedEnableCDIValue: true,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(fmt.Sprintf("%v", tc.enableCDI), func(t *testing.T) {
|
||||
o := &container.Options{
|
||||
EnableCDI: tc.enableCDI,
|
||||
RuntimeName: "nvidia",
|
||||
RuntimeDir: runtimeDir,
|
||||
}
|
||||
|
||||
cfg, err := toml.Empty.Load()
|
||||
require.NoError(t, err)
|
||||
|
||||
v1 := &containerd.ConfigV1{
|
||||
Logger: logger,
|
||||
Tree: cfg,
|
||||
RuntimeType: runtimeType,
|
||||
}
|
||||
|
||||
err = o.UpdateConfig(v1)
|
||||
require.NoError(t, err)
|
||||
|
||||
enableCDIValue := v1.GetPath([]string{"plugins", "cri", "containerd", "enable_cdi"})
|
||||
require.EqualValues(t, tc.expectedEnableCDIValue, enableCDIValue)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestRevertV1Config(t *testing.T) {
|
||||
logger, _ := testlog.NewNullLogger()
|
||||
testCases := []struct {
|
||||
|
@ -366,6 +366,53 @@ func TestUpdateV2ConfigWithRuncPresent(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestUpdateV2ConfigEnableCDI(t *testing.T) {
|
||||
logger, _ := testlog.NewNullLogger()
|
||||
const runtimeDir = "/test/runtime/dir"
|
||||
|
||||
testCases := []struct {
|
||||
enableCDI bool
|
||||
expectedEnableCDIValue interface{}
|
||||
}{
|
||||
{},
|
||||
{
|
||||
enableCDI: false,
|
||||
expectedEnableCDIValue: nil,
|
||||
},
|
||||
{
|
||||
enableCDI: true,
|
||||
expectedEnableCDIValue: true,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(fmt.Sprintf("%v", tc.enableCDI), func(t *testing.T) {
|
||||
o := &container.Options{
|
||||
EnableCDI: tc.enableCDI,
|
||||
RuntimeName: "nvidia",
|
||||
RuntimeDir: runtimeDir,
|
||||
SetAsDefault: false,
|
||||
}
|
||||
|
||||
cfg, err := toml.LoadMap(map[string]interface{}{})
|
||||
require.NoError(t, err)
|
||||
|
||||
v2 := &containerd.Config{
|
||||
Logger: logger,
|
||||
Tree: cfg,
|
||||
RuntimeType: runtimeType,
|
||||
CRIRuntimePluginName: "io.containerd.grpc.v1.cri",
|
||||
}
|
||||
|
||||
err = o.UpdateConfig(v2)
|
||||
require.NoError(t, err)
|
||||
|
||||
enableCDIValue := cfg.GetPath([]string{"plugins", "io.containerd.grpc.v1.cri", "enable_cdi"})
|
||||
require.EqualValues(t, tc.expectedEnableCDIValue, enableCDIValue)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestRevertV2Config(t *testing.T) {
|
||||
logger, _ := testlog.NewNullLogger()
|
||||
|
||||
|
@ -25,6 +25,7 @@ import (
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk-installer/container/runtime/containerd"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk-installer/container/runtime/crio"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk-installer/container/runtime/docker"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk-installer/container/toolkit"
|
||||
)
|
||||
|
||||
const (
|
||||
@ -66,6 +67,12 @@ func Flags(opts *Options) []cli.Flag {
|
||||
Destination: &opts.RestartMode,
|
||||
EnvVars: []string{"RUNTIME_RESTART_MODE"},
|
||||
},
|
||||
&cli.BoolFlag{
|
||||
Name: "enable-cdi-in-runtime",
|
||||
Usage: "Enable CDI in the configured runt ime",
|
||||
Destination: &opts.EnableCDI,
|
||||
EnvVars: []string{"RUNTIME_ENABLE_CDI"},
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "host-root",
|
||||
Usage: "Specify the path to the host root to be used when restarting the runtime using systemd",
|
||||
@ -98,10 +105,14 @@ func Flags(opts *Options) []cli.Flag {
|
||||
}
|
||||
|
||||
// ValidateOptions checks whether the specified options are valid
|
||||
func ValidateOptions(opts *Options, runtime string, toolkitRoot string) error {
|
||||
func ValidateOptions(c *cli.Context, opts *Options, runtime string, toolkitRoot string, to *toolkit.Options) error {
|
||||
// We set this option here to ensure that it is available in future calls.
|
||||
opts.RuntimeDir = toolkitRoot
|
||||
|
||||
if !c.IsSet("enable-cdi-in-runtime") {
|
||||
opts.EnableCDI = to.CDI.Enabled
|
||||
}
|
||||
|
||||
// Apply the runtime-specific config changes.
|
||||
switch runtime {
|
||||
case containerd.Name:
|
||||
|
@ -44,6 +44,14 @@ const (
|
||||
configFilename = "config.toml"
|
||||
)
|
||||
|
||||
type cdiOptions struct {
|
||||
Enabled bool
|
||||
outputDir string
|
||||
kind string
|
||||
vendor string
|
||||
class string
|
||||
}
|
||||
|
||||
type Options struct {
|
||||
DriverRoot string
|
||||
DevRoot string
|
||||
@ -63,11 +71,8 @@ type Options struct {
|
||||
|
||||
ContainerCLIDebug string
|
||||
|
||||
cdiEnabled bool
|
||||
cdiOutputDir string
|
||||
cdiKind string
|
||||
cdiVendor string
|
||||
cdiClass string
|
||||
// CDI stores the CDI options for the toolkit.
|
||||
CDI cdiOptions
|
||||
|
||||
createDeviceNodes cli.StringSlice
|
||||
|
||||
@ -170,21 +175,21 @@ func Flags(opts *Options) []cli.Flag {
|
||||
Name: "cdi-enabled",
|
||||
Aliases: []string{"enable-cdi"},
|
||||
Usage: "enable the generation of a CDI specification",
|
||||
Destination: &opts.cdiEnabled,
|
||||
Destination: &opts.CDI.Enabled,
|
||||
EnvVars: []string{"CDI_ENABLED", "ENABLE_CDI"},
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "cdi-output-dir",
|
||||
Usage: "the directory where the CDI output files are to be written. If this is set to '', no CDI specification is generated.",
|
||||
Value: "/var/run/cdi",
|
||||
Destination: &opts.cdiOutputDir,
|
||||
Destination: &opts.CDI.outputDir,
|
||||
EnvVars: []string{"CDI_OUTPUT_DIR"},
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "cdi-kind",
|
||||
Usage: "the vendor string to use for the generated CDI specification",
|
||||
Value: "management.nvidia.com/gpu",
|
||||
Destination: &opts.cdiKind,
|
||||
Destination: &opts.CDI.kind,
|
||||
EnvVars: []string{"CDI_KIND"},
|
||||
},
|
||||
&cli.BoolFlag{
|
||||
@ -240,19 +245,19 @@ func (t *Installer) ValidateOptions(opts *Options) error {
|
||||
return fmt.Errorf("invalid --toolkit-root option: %v", t.toolkitRoot)
|
||||
}
|
||||
|
||||
vendor, class := parser.ParseQualifier(opts.cdiKind)
|
||||
vendor, class := parser.ParseQualifier(opts.CDI.kind)
|
||||
if err := parser.ValidateVendorName(vendor); err != nil {
|
||||
return fmt.Errorf("invalid CDI vendor name: %v", err)
|
||||
}
|
||||
if err := parser.ValidateClassName(class); err != nil {
|
||||
return fmt.Errorf("invalid CDI class name: %v", err)
|
||||
}
|
||||
opts.cdiVendor = vendor
|
||||
opts.cdiClass = class
|
||||
opts.CDI.vendor = vendor
|
||||
opts.CDI.class = class
|
||||
|
||||
if opts.cdiEnabled && opts.cdiOutputDir == "" {
|
||||
if opts.CDI.Enabled && opts.CDI.outputDir == "" {
|
||||
t.logger.Warning("Skipping CDI spec generation (no output directory specified)")
|
||||
opts.cdiEnabled = false
|
||||
opts.CDI.Enabled = false
|
||||
}
|
||||
|
||||
isDisabled := false
|
||||
@ -265,7 +270,7 @@ func (t *Installer) ValidateOptions(opts *Options) error {
|
||||
break
|
||||
}
|
||||
}
|
||||
if !opts.cdiEnabled && !isDisabled {
|
||||
if !opts.CDI.Enabled && !isDisabled {
|
||||
t.logger.Info("disabling device node creation since --cdi-enabled=false")
|
||||
isDisabled = true
|
||||
}
|
||||
@ -698,7 +703,7 @@ func (t *Installer) createDeviceNodes(opts *Options) error {
|
||||
|
||||
// generateCDISpec generates a CDI spec for use in management containers
|
||||
func (t *Installer) generateCDISpec(opts *Options, nvidiaCDIHookPath string) error {
|
||||
if !opts.cdiEnabled {
|
||||
if !opts.CDI.Enabled {
|
||||
return nil
|
||||
}
|
||||
t.logger.Info("Generating CDI spec for management containers")
|
||||
@ -708,8 +713,8 @@ func (t *Installer) generateCDISpec(opts *Options, nvidiaCDIHookPath string) err
|
||||
nvcdi.WithDriverRoot(opts.DriverRootCtrPath),
|
||||
nvcdi.WithDevRoot(opts.DevRootCtrPath),
|
||||
nvcdi.WithNVIDIACDIHookPath(nvidiaCDIHookPath),
|
||||
nvcdi.WithVendor(opts.cdiVendor),
|
||||
nvcdi.WithClass(opts.cdiClass),
|
||||
nvcdi.WithVendor(opts.CDI.vendor),
|
||||
nvcdi.WithClass(opts.CDI.class),
|
||||
)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create CDI library for management containers: %v", err)
|
||||
@ -734,7 +739,7 @@ func (t *Installer) generateCDISpec(opts *Options, nvidiaCDIHookPath string) err
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to generate CDI name for management containers: %v", err)
|
||||
}
|
||||
err = spec.Save(filepath.Join(opts.cdiOutputDir, name))
|
||||
err = spec.Save(filepath.Join(opts.CDI.outputDir, name))
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to save CDI spec for management containers: %v", err)
|
||||
}
|
||||
|
@ -124,9 +124,11 @@ kind: example.com/class
|
||||
options := Options{
|
||||
DriverRoot: "/host/driver/root",
|
||||
DriverRootCtrPath: filepath.Join(moduleRoot, "testdata", "lookup", tc.hostRoot),
|
||||
cdiEnabled: tc.cdiEnabled,
|
||||
cdiOutputDir: cdiOutputDir,
|
||||
cdiKind: "example.com/class",
|
||||
CDI: cdiOptions{
|
||||
Enabled: tc.cdiEnabled,
|
||||
outputDir: cdiOutputDir,
|
||||
kind: "example.com/class",
|
||||
},
|
||||
}
|
||||
|
||||
ti := NewInstaller(
|
||||
|
@ -38,6 +38,7 @@ type options struct {
|
||||
runtimeArgs string
|
||||
root string
|
||||
pidFile string
|
||||
sourceRoot string
|
||||
|
||||
toolkitOptions toolkit.Options
|
||||
runtimeOptions runtime.Options
|
||||
@ -141,6 +142,13 @@ func (a app) build() *cli.App {
|
||||
Destination: &options.root,
|
||||
EnvVars: []string{"ROOT"},
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "source-root",
|
||||
Value: "/",
|
||||
Usage: "The folder where the required toolkit artifacts can be found",
|
||||
Destination: &options.sourceRoot,
|
||||
EnvVars: []string{"SOURCE_ROOT"},
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "pid-file",
|
||||
Value: defaultPidFile,
|
||||
@ -159,12 +167,13 @@ func (a app) build() *cli.App {
|
||||
func (a *app) Before(c *cli.Context, o *options) error {
|
||||
a.toolkit = toolkit.NewInstaller(
|
||||
toolkit.WithLogger(a.logger),
|
||||
toolkit.WithSourceRoot(o.sourceRoot),
|
||||
toolkit.WithToolkitRoot(o.toolkitRoot()),
|
||||
)
|
||||
return a.validateFlags(c, o)
|
||||
}
|
||||
|
||||
func (a *app) validateFlags(_ *cli.Context, o *options) error {
|
||||
func (a *app) validateFlags(c *cli.Context, o *options) error {
|
||||
if o.root == "" {
|
||||
return fmt.Errorf("the install root must be specified")
|
||||
}
|
||||
@ -178,7 +187,7 @@ func (a *app) validateFlags(_ *cli.Context, o *options) error {
|
||||
if err := a.toolkit.ValidateOptions(&o.toolkitOptions); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := runtime.ValidateOptions(&o.runtimeOptions, o.runtime, o.toolkitRoot()); err != nil {
|
||||
if err := runtime.ValidateOptions(c, &o.runtimeOptions, o.runtime, o.toolkitRoot(), &o.toolkitOptions); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
|
@ -18,10 +18,15 @@ package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
testlog "github.com/sirupsen/logrus/hooks/test"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/test"
|
||||
)
|
||||
|
||||
func TestParseArgs(t *testing.T) {
|
||||
@ -84,3 +89,413 @@ func TestParseArgs(t *testing.T) {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestApp(t *testing.T) {
|
||||
t.Setenv("__NVCT_TESTING_DEVICES_ARE_FILES", "true")
|
||||
logger, _ := testlog.NewNullLogger()
|
||||
|
||||
moduleRoot, err := test.GetModuleRoot()
|
||||
require.NoError(t, err)
|
||||
|
||||
artifactRoot := filepath.Join(moduleRoot, "testdata", "installer", "artifacts")
|
||||
hostRoot := filepath.Join(moduleRoot, "testdata", "lookup", "rootfs-1")
|
||||
|
||||
testCases := []struct {
|
||||
description string
|
||||
args []string
|
||||
expectedToolkitConfig string
|
||||
expectedRuntimeConfig string
|
||||
}{
|
||||
{
|
||||
description: "no args",
|
||||
expectedToolkitConfig: `accept-nvidia-visible-devices-as-volume-mounts = false
|
||||
accept-nvidia-visible-devices-envvar-when-unprivileged = true
|
||||
disable-require = false
|
||||
supported-driver-capabilities = "compat32,compute,display,graphics,ngx,utility,video"
|
||||
swarm-resource = ""
|
||||
|
||||
[nvidia-container-cli]
|
||||
debug = ""
|
||||
environment = []
|
||||
ldcache = ""
|
||||
ldconfig = "@/run/nvidia/driver/sbin/ldconfig"
|
||||
load-kmods = true
|
||||
no-cgroups = false
|
||||
path = "{{ .toolkitRoot }}/toolkit/nvidia-container-cli"
|
||||
root = "/run/nvidia/driver"
|
||||
user = ""
|
||||
|
||||
[nvidia-container-runtime]
|
||||
debug = "/dev/null"
|
||||
log-level = "info"
|
||||
mode = "auto"
|
||||
runtimes = ["docker-runc", "runc", "crun"]
|
||||
|
||||
[nvidia-container-runtime.modes]
|
||||
|
||||
[nvidia-container-runtime.modes.cdi]
|
||||
annotation-prefixes = ["cdi.k8s.io/"]
|
||||
default-kind = "nvidia.com/gpu"
|
||||
spec-dirs = ["/etc/cdi", "/var/run/cdi"]
|
||||
|
||||
[nvidia-container-runtime.modes.csv]
|
||||
mount-spec-path = "/etc/nvidia-container-runtime/host-files-for-container.d"
|
||||
|
||||
[nvidia-container-runtime-hook]
|
||||
path = "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime-hook"
|
||||
skip-mode-detection = true
|
||||
|
||||
[nvidia-ctk]
|
||||
path = "{{ .toolkitRoot }}/toolkit/nvidia-ctk"
|
||||
`,
|
||||
expectedRuntimeConfig: `{
|
||||
"default-runtime": "nvidia",
|
||||
"runtimes": {
|
||||
"nvidia": {
|
||||
"args": [],
|
||||
"path": "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime"
|
||||
},
|
||||
"nvidia-cdi": {
|
||||
"args": [],
|
||||
"path": "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime.cdi"
|
||||
},
|
||||
"nvidia-legacy": {
|
||||
"args": [],
|
||||
"path": "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime.legacy"
|
||||
}
|
||||
}
|
||||
}`,
|
||||
},
|
||||
{
|
||||
description: "CDI enabled enables CDI in docker",
|
||||
args: []string{"--cdi-enabled", "--create-device-nodes=none"},
|
||||
expectedToolkitConfig: `accept-nvidia-visible-devices-as-volume-mounts = false
|
||||
accept-nvidia-visible-devices-envvar-when-unprivileged = true
|
||||
disable-require = false
|
||||
supported-driver-capabilities = "compat32,compute,display,graphics,ngx,utility,video"
|
||||
swarm-resource = ""
|
||||
|
||||
[nvidia-container-cli]
|
||||
debug = ""
|
||||
environment = []
|
||||
ldcache = ""
|
||||
ldconfig = "@/run/nvidia/driver/sbin/ldconfig"
|
||||
load-kmods = true
|
||||
no-cgroups = false
|
||||
path = "{{ .toolkitRoot }}/toolkit/nvidia-container-cli"
|
||||
root = "/run/nvidia/driver"
|
||||
user = ""
|
||||
|
||||
[nvidia-container-runtime]
|
||||
debug = "/dev/null"
|
||||
log-level = "info"
|
||||
mode = "auto"
|
||||
runtimes = ["docker-runc", "runc", "crun"]
|
||||
|
||||
[nvidia-container-runtime.modes]
|
||||
|
||||
[nvidia-container-runtime.modes.cdi]
|
||||
annotation-prefixes = ["cdi.k8s.io/"]
|
||||
default-kind = "nvidia.com/gpu"
|
||||
spec-dirs = ["/etc/cdi", "/var/run/cdi"]
|
||||
|
||||
[nvidia-container-runtime.modes.csv]
|
||||
mount-spec-path = "/etc/nvidia-container-runtime/host-files-for-container.d"
|
||||
|
||||
[nvidia-container-runtime-hook]
|
||||
path = "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime-hook"
|
||||
skip-mode-detection = true
|
||||
|
||||
[nvidia-ctk]
|
||||
path = "{{ .toolkitRoot }}/toolkit/nvidia-ctk"
|
||||
`,
|
||||
expectedRuntimeConfig: `{
|
||||
"default-runtime": "nvidia",
|
||||
"features": {
|
||||
"cdi": true
|
||||
},
|
||||
"runtimes": {
|
||||
"nvidia": {
|
||||
"args": [],
|
||||
"path": "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime"
|
||||
},
|
||||
"nvidia-cdi": {
|
||||
"args": [],
|
||||
"path": "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime.cdi"
|
||||
},
|
||||
"nvidia-legacy": {
|
||||
"args": [],
|
||||
"path": "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime.legacy"
|
||||
}
|
||||
}
|
||||
}`,
|
||||
},
|
||||
{
|
||||
description: "--enable-cdi-in-runtime=false overrides --cdi-enabled in Docker",
|
||||
args: []string{"--cdi-enabled", "--create-device-nodes=none", "--enable-cdi-in-runtime=false"},
|
||||
expectedToolkitConfig: `accept-nvidia-visible-devices-as-volume-mounts = false
|
||||
accept-nvidia-visible-devices-envvar-when-unprivileged = true
|
||||
disable-require = false
|
||||
supported-driver-capabilities = "compat32,compute,display,graphics,ngx,utility,video"
|
||||
swarm-resource = ""
|
||||
|
||||
[nvidia-container-cli]
|
||||
debug = ""
|
||||
environment = []
|
||||
ldcache = ""
|
||||
ldconfig = "@/run/nvidia/driver/sbin/ldconfig"
|
||||
load-kmods = true
|
||||
no-cgroups = false
|
||||
path = "{{ .toolkitRoot }}/toolkit/nvidia-container-cli"
|
||||
root = "/run/nvidia/driver"
|
||||
user = ""
|
||||
|
||||
[nvidia-container-runtime]
|
||||
debug = "/dev/null"
|
||||
log-level = "info"
|
||||
mode = "auto"
|
||||
runtimes = ["docker-runc", "runc", "crun"]
|
||||
|
||||
[nvidia-container-runtime.modes]
|
||||
|
||||
[nvidia-container-runtime.modes.cdi]
|
||||
annotation-prefixes = ["cdi.k8s.io/"]
|
||||
default-kind = "nvidia.com/gpu"
|
||||
spec-dirs = ["/etc/cdi", "/var/run/cdi"]
|
||||
|
||||
[nvidia-container-runtime.modes.csv]
|
||||
mount-spec-path = "/etc/nvidia-container-runtime/host-files-for-container.d"
|
||||
|
||||
[nvidia-container-runtime-hook]
|
||||
path = "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime-hook"
|
||||
skip-mode-detection = true
|
||||
|
||||
[nvidia-ctk]
|
||||
path = "{{ .toolkitRoot }}/toolkit/nvidia-ctk"
|
||||
`,
|
||||
expectedRuntimeConfig: `{
|
||||
"default-runtime": "nvidia",
|
||||
"runtimes": {
|
||||
"nvidia": {
|
||||
"args": [],
|
||||
"path": "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime"
|
||||
},
|
||||
"nvidia-cdi": {
|
||||
"args": [],
|
||||
"path": "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime.cdi"
|
||||
},
|
||||
"nvidia-legacy": {
|
||||
"args": [],
|
||||
"path": "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime.legacy"
|
||||
}
|
||||
}
|
||||
}`,
|
||||
},
|
||||
{
|
||||
description: "CDI enabled enables CDI in containerd",
|
||||
args: []string{"--cdi-enabled", "--runtime=containerd"},
|
||||
expectedToolkitConfig: `accept-nvidia-visible-devices-as-volume-mounts = false
|
||||
accept-nvidia-visible-devices-envvar-when-unprivileged = true
|
||||
disable-require = false
|
||||
supported-driver-capabilities = "compat32,compute,display,graphics,ngx,utility,video"
|
||||
swarm-resource = ""
|
||||
|
||||
[nvidia-container-cli]
|
||||
debug = ""
|
||||
environment = []
|
||||
ldcache = ""
|
||||
ldconfig = "@/run/nvidia/driver/sbin/ldconfig"
|
||||
load-kmods = true
|
||||
no-cgroups = false
|
||||
path = "{{ .toolkitRoot }}/toolkit/nvidia-container-cli"
|
||||
root = "/run/nvidia/driver"
|
||||
user = ""
|
||||
|
||||
[nvidia-container-runtime]
|
||||
debug = "/dev/null"
|
||||
log-level = "info"
|
||||
mode = "auto"
|
||||
runtimes = ["docker-runc", "runc", "crun"]
|
||||
|
||||
[nvidia-container-runtime.modes]
|
||||
|
||||
[nvidia-container-runtime.modes.cdi]
|
||||
annotation-prefixes = ["cdi.k8s.io/"]
|
||||
default-kind = "nvidia.com/gpu"
|
||||
spec-dirs = ["/etc/cdi", "/var/run/cdi"]
|
||||
|
||||
[nvidia-container-runtime.modes.csv]
|
||||
mount-spec-path = "/etc/nvidia-container-runtime/host-files-for-container.d"
|
||||
|
||||
[nvidia-container-runtime-hook]
|
||||
path = "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime-hook"
|
||||
skip-mode-detection = true
|
||||
|
||||
[nvidia-ctk]
|
||||
path = "{{ .toolkitRoot }}/toolkit/nvidia-ctk"
|
||||
`,
|
||||
expectedRuntimeConfig: `version = 2
|
||||
|
||||
[plugins]
|
||||
|
||||
[plugins."io.containerd.grpc.v1.cri"]
|
||||
enable_cdi = true
|
||||
|
||||
[plugins."io.containerd.grpc.v1.cri".containerd]
|
||||
default_runtime_name = "nvidia"
|
||||
|
||||
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes]
|
||||
|
||||
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia]
|
||||
privileged_without_host_devices = false
|
||||
runtime_engine = ""
|
||||
runtime_root = ""
|
||||
runtime_type = "io.containerd.runc.v2"
|
||||
|
||||
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia.options]
|
||||
BinaryName = "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime"
|
||||
|
||||
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia-cdi]
|
||||
privileged_without_host_devices = false
|
||||
runtime_engine = ""
|
||||
runtime_root = ""
|
||||
runtime_type = "io.containerd.runc.v2"
|
||||
|
||||
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia-cdi.options]
|
||||
BinaryName = "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime.cdi"
|
||||
|
||||
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia-legacy]
|
||||
privileged_without_host_devices = false
|
||||
runtime_engine = ""
|
||||
runtime_root = ""
|
||||
runtime_type = "io.containerd.runc.v2"
|
||||
|
||||
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia-legacy.options]
|
||||
BinaryName = "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime.legacy"
|
||||
`,
|
||||
},
|
||||
{
|
||||
description: "--enable-cdi-in-runtime=false overrides --cdi-enabled in containerd",
|
||||
args: []string{"--cdi-enabled", "--create-device-nodes=none", "--enable-cdi-in-runtime=false", "--runtime=containerd"},
|
||||
expectedToolkitConfig: `accept-nvidia-visible-devices-as-volume-mounts = false
|
||||
accept-nvidia-visible-devices-envvar-when-unprivileged = true
|
||||
disable-require = false
|
||||
supported-driver-capabilities = "compat32,compute,display,graphics,ngx,utility,video"
|
||||
swarm-resource = ""
|
||||
|
||||
[nvidia-container-cli]
|
||||
debug = ""
|
||||
environment = []
|
||||
ldcache = ""
|
||||
ldconfig = "@/run/nvidia/driver/sbin/ldconfig"
|
||||
load-kmods = true
|
||||
no-cgroups = false
|
||||
path = "{{ .toolkitRoot }}/toolkit/nvidia-container-cli"
|
||||
root = "/run/nvidia/driver"
|
||||
user = ""
|
||||
|
||||
[nvidia-container-runtime]
|
||||
debug = "/dev/null"
|
||||
log-level = "info"
|
||||
mode = "auto"
|
||||
runtimes = ["docker-runc", "runc", "crun"]
|
||||
|
||||
[nvidia-container-runtime.modes]
|
||||
|
||||
[nvidia-container-runtime.modes.cdi]
|
||||
annotation-prefixes = ["cdi.k8s.io/"]
|
||||
default-kind = "nvidia.com/gpu"
|
||||
spec-dirs = ["/etc/cdi", "/var/run/cdi"]
|
||||
|
||||
[nvidia-container-runtime.modes.csv]
|
||||
mount-spec-path = "/etc/nvidia-container-runtime/host-files-for-container.d"
|
||||
|
||||
[nvidia-container-runtime-hook]
|
||||
path = "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime-hook"
|
||||
skip-mode-detection = true
|
||||
|
||||
[nvidia-ctk]
|
||||
path = "{{ .toolkitRoot }}/toolkit/nvidia-ctk"
|
||||
`,
|
||||
expectedRuntimeConfig: `version = 2
|
||||
|
||||
[plugins]
|
||||
|
||||
[plugins."io.containerd.grpc.v1.cri"]
|
||||
|
||||
[plugins."io.containerd.grpc.v1.cri".containerd]
|
||||
default_runtime_name = "nvidia"
|
||||
|
||||
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes]
|
||||
|
||||
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia]
|
||||
privileged_without_host_devices = false
|
||||
runtime_engine = ""
|
||||
runtime_root = ""
|
||||
runtime_type = "io.containerd.runc.v2"
|
||||
|
||||
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia.options]
|
||||
BinaryName = "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime"
|
||||
|
||||
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia-cdi]
|
||||
privileged_without_host_devices = false
|
||||
runtime_engine = ""
|
||||
runtime_root = ""
|
||||
runtime_type = "io.containerd.runc.v2"
|
||||
|
||||
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia-cdi.options]
|
||||
BinaryName = "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime.cdi"
|
||||
|
||||
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia-legacy]
|
||||
privileged_without_host_devices = false
|
||||
runtime_engine = ""
|
||||
runtime_root = ""
|
||||
runtime_type = "io.containerd.runc.v2"
|
||||
|
||||
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia-legacy.options]
|
||||
BinaryName = "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime.legacy"
|
||||
`,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
testRoot := t.TempDir()
|
||||
|
||||
cdiOutputDir := filepath.Join(testRoot, "/var/run/cdi")
|
||||
runtimeConfigFile := filepath.Join(testRoot, "config.file")
|
||||
|
||||
toolkitRoot := filepath.Join(testRoot, "toolkit-test")
|
||||
toolkitConfigFile := filepath.Join(toolkitRoot, "toolkit/.config/nvidia-container-runtime/config.toml")
|
||||
|
||||
app := NewApp(logger, toolkitRoot)
|
||||
|
||||
testArgs := []string{
|
||||
"nvidia-ctk-installer",
|
||||
"--no-daemon",
|
||||
"--cdi-output-dir=" + cdiOutputDir,
|
||||
"--config=" + runtimeConfigFile,
|
||||
"--create-device-nodes=none",
|
||||
"--driver-root-ctr-path=" + hostRoot,
|
||||
"--pid-file=" + filepath.Join(testRoot, "toolkit.pid"),
|
||||
"--restart-mode=none",
|
||||
"--source-root=" + filepath.Join(artifactRoot, "deb"),
|
||||
}
|
||||
|
||||
err := app.Run(append(testArgs, tc.args...))
|
||||
|
||||
require.NoError(t, err)
|
||||
|
||||
require.FileExists(t, toolkitConfigFile)
|
||||
toolkitConfigFileContents, err := os.ReadFile(toolkitConfigFile)
|
||||
require.NoError(t, err)
|
||||
require.EqualValues(t, strings.ReplaceAll(tc.expectedToolkitConfig, "{{ .toolkitRoot }}", toolkitRoot), string(toolkitConfigFileContents))
|
||||
|
||||
require.FileExists(t, runtimeConfigFile)
|
||||
runtimeConfigFileContents, err := os.ReadFile(runtimeConfigFile)
|
||||
require.NoError(t, err)
|
||||
require.EqualValues(t, strings.ReplaceAll(tc.expectedRuntimeConfig, "{{ .toolkitRoot }}", toolkitRoot), string(runtimeConfigFileContents))
|
||||
})
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -163,7 +163,7 @@ func (m command) build() *cli.Command {
|
||||
},
|
||||
&cli.BoolFlag{
|
||||
Name: "cdi.enabled",
|
||||
Aliases: []string{"cdi.enable"},
|
||||
Aliases: []string{"cdi.enable", "enable-cdi"},
|
||||
Usage: "Enable CDI in the configured runtime",
|
||||
Destination: &config.cdi.enabled,
|
||||
},
|
||||
@ -292,9 +292,8 @@ func (m command) configureConfigFile(c *cli.Context, config *config) error {
|
||||
return fmt.Errorf("unable to update config: %v", err)
|
||||
}
|
||||
|
||||
err = enableCDI(config, cfg)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to enable CDI in %s: %w", config.runtime, err)
|
||||
if config.cdi.enabled {
|
||||
cfg.EnableCDI()
|
||||
}
|
||||
|
||||
outputPath := config.getOutputConfigPath()
|
||||
@ -354,19 +353,3 @@ func (m *command) configureOCIHook(c *cli.Context, config *config) error {
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// enableCDI enables the use of CDI in the corresponding container engine
|
||||
func enableCDI(config *config, cfg engine.Interface) error {
|
||||
if !config.cdi.enabled {
|
||||
return nil
|
||||
}
|
||||
switch config.runtime {
|
||||
case "containerd":
|
||||
cfg.Set("enable_cdi", true)
|
||||
case "docker":
|
||||
cfg.Set("features", map[string]bool{"cdi": true})
|
||||
default:
|
||||
return fmt.Errorf("enabling CDI in %s is not supported", config.runtime)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
@ -20,10 +20,10 @@ package engine
|
||||
type Interface interface {
|
||||
AddRuntime(string, string, bool) error
|
||||
DefaultRuntime() string
|
||||
EnableCDI()
|
||||
GetRuntimeConfig(string) (RuntimeConfig, error)
|
||||
RemoveRuntime(string) error
|
||||
Save(string) (int64, error)
|
||||
Set(string, interface{})
|
||||
String() string
|
||||
}
|
||||
|
||||
|
@ -96,13 +96,6 @@ func (c *Config) getRuntimeAnnotations(path []string) ([]string, error) {
|
||||
return annotations, nil
|
||||
}
|
||||
|
||||
// Set sets the specified containerd option.
|
||||
func (c *Config) Set(key string, value interface{}) {
|
||||
config := *c.Tree
|
||||
config.SetPath([]string{"plugins", c.CRIRuntimePluginName, key}, value)
|
||||
*c.Tree = config
|
||||
}
|
||||
|
||||
// DefaultRuntime returns the default runtime for the cri-o config
|
||||
func (c Config) DefaultRuntime() string {
|
||||
if runtime, ok := c.GetPath([]string{"plugins", c.CRIRuntimePluginName, "containerd", "default_runtime_name"}).(string); ok {
|
||||
@ -111,6 +104,13 @@ func (c Config) DefaultRuntime() string {
|
||||
return ""
|
||||
}
|
||||
|
||||
// EnableCDI sets the enable_cdi field in the Containerd config to true.
|
||||
func (c *Config) EnableCDI() {
|
||||
config := *c.Tree
|
||||
config.SetPath([]string{"plugins", c.CRIRuntimePluginName, "enable_cdi"}, true)
|
||||
*c.Tree = config
|
||||
}
|
||||
|
||||
// RemoveRuntime removes a runtime from the docker config
|
||||
func (c *Config) RemoveRuntime(name string) error {
|
||||
if c == nil || c.Tree == nil {
|
||||
|
@ -143,13 +143,6 @@ func (c *ConfigV1) RemoveRuntime(name string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Set sets the specified containerd option.
|
||||
func (c *ConfigV1) Set(key string, value interface{}) {
|
||||
config := *c.Tree
|
||||
config.SetPath([]string{"plugins", "cri", "containerd", key}, value)
|
||||
*c.Tree = config
|
||||
}
|
||||
|
||||
// Save writes the config to a file
|
||||
func (c ConfigV1) Save(path string) (int64, error) {
|
||||
return (Config)(c).Save(path)
|
||||
@ -165,3 +158,9 @@ func (c *ConfigV1) GetRuntimeConfig(name string) (engine.RuntimeConfig, error) {
|
||||
tree: runtimeData,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (c *ConfigV1) EnableCDI() {
|
||||
config := *c.Tree
|
||||
config.SetPath([]string{"plugins", "cri", "containerd", "enable_cdi"}, true)
|
||||
*c.Tree = config
|
||||
}
|
||||
|
@ -153,6 +153,9 @@ func (c *Config) GetRuntimeConfig(name string) (engine.RuntimeConfig, error) {
|
||||
}, nil
|
||||
}
|
||||
|
||||
// EnableCDI is a no-op for CRI-O since it always enabled where supported.
|
||||
func (c *Config) EnableCDI() {}
|
||||
|
||||
// CommandLineSource returns the CLI-based crio config loader
|
||||
func CommandLineSource(hostRoot string) toml.Loader {
|
||||
return toml.LoadFirst(
|
||||
|
@ -103,6 +103,24 @@ func (c Config) DefaultRuntime() string {
|
||||
return r
|
||||
}
|
||||
|
||||
// EnableCDI sets features.cdi to true in the docker config.
|
||||
func (c *Config) EnableCDI() {
|
||||
if c == nil {
|
||||
return
|
||||
}
|
||||
config := *c
|
||||
|
||||
features, ok := config["features"].(map[string]bool)
|
||||
if !ok {
|
||||
features = make(map[string]bool)
|
||||
}
|
||||
features["cdi"] = true
|
||||
|
||||
config["features"] = features
|
||||
|
||||
*c = config
|
||||
}
|
||||
|
||||
// RemoveRuntime removes a runtime from the docker config
|
||||
func (c *Config) RemoveRuntime(name string) error {
|
||||
if c == nil {
|
||||
@ -132,11 +150,6 @@ func (c *Config) RemoveRuntime(name string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Set sets the specified docker option
|
||||
func (c *Config) Set(key string, value interface{}) {
|
||||
(*c)[key] = value
|
||||
}
|
||||
|
||||
// Save writes the config to the specified path
|
||||
func (c Config) Save(path string) (int64, error) {
|
||||
output, err := json.MarshalIndent(c, "", " ")
|
||||
|
Loading…
Reference in New Issue
Block a user