From 1c05a463bdcadd6e5036f7e989126638623625cc Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Fri, 25 Feb 2022 10:54:04 +0200 Subject: [PATCH] Add csv discovery mode to experimental runtime This change adds support for a "csv" discovery mode to the experimental runtime. If this is set with experimental = true, a CSV-based discovery of devices and mounts are used to define the modifications required to the OCI spec. The edits are expressed as CDI ContainerEdits. Signed-off-by: Evan Lezar --- cmd/nvidia-container-runtime/README.md | 1 + .../modifier/experimental.go | 27 ++++++- .../modifier/experimental_test.go | 72 +++++++++++++++++-- .../runtime_factory.go | 10 ++- .../runtime_factory_test.go | 32 ++++++++- internal/edits/device.go | 45 ++++++++++++ internal/edits/edits.go | 27 +++++++ internal/edits/hook.go | 1 + internal/edits/mount.go | 53 ++++++++++++++ 9 files changed, 255 insertions(+), 13 deletions(-) create mode 100644 internal/edits/device.go create mode 100644 internal/edits/mount.go diff --git a/cmd/nvidia-container-runtime/README.md b/cmd/nvidia-container-runtime/README.md index af4e1040..705701ee 100644 --- a/cmd/nvidia-container-runtime/README.md +++ b/cmd/nvidia-container-runtime/README.md @@ -18,6 +18,7 @@ experimental = true When this setting is enabled, the modifications made to the OCI specification are controlled by the `nvidia-container-runtime.discover-mode` option, with the following mode supported: * `"legacy"`: This mode mirrors the behaviour of the standard mode, inserting the NVIDIA Container Runtime Hook as a `prestart` hook into the container's OCI specification. +* `"csv"`: This mode uses CSV files at `/etc/nvidia-container-runtime/host-files-for-container.d` to define the devices and mounts that are to be injected into a container when it is created. ### Notes on using the docker CLI diff --git a/cmd/nvidia-container-runtime/modifier/experimental.go b/cmd/nvidia-container-runtime/modifier/experimental.go index d5a42c05..f169e3b7 100644 --- a/cmd/nvidia-container-runtime/modifier/experimental.go +++ b/cmd/nvidia-container-runtime/modifier/experimental.go @@ -21,6 +21,7 @@ import ( "github.com/NVIDIA/nvidia-container-toolkit/internal/config" "github.com/NVIDIA/nvidia-container-toolkit/internal/discover" + "github.com/NVIDIA/nvidia-container-toolkit/internal/discover/csv" "github.com/NVIDIA/nvidia-container-toolkit/internal/edits" "github.com/NVIDIA/nvidia-container-toolkit/internal/oci" "github.com/opencontainers/runtime-spec/specs-go" @@ -33,9 +34,27 @@ type experimental struct { discoverer discover.Discover } +const ( + visibleDevicesEnvvar = "NVIDIA_VISIBLE_DEVICES" + visibleDevicesVoid = "void" + + nvidiaRequireJetpackEnvvar = "NVIDIA_REQUIRE_JETPACK" +) + // NewExperimentalModifier creates a modifier that applies the experimental // modications to an OCI spec if required by the runtime wrapper. -func NewExperimentalModifier(logger *logrus.Logger, cfg *config.Config) (oci.SpecModifier, error) { +func NewExperimentalModifier(logger *logrus.Logger, cfg *config.Config, ociSpec oci.Spec) (oci.SpecModifier, error) { + if err := ociSpec.Load(); err != nil { + return nil, fmt.Errorf("failed to load OCI spec: %v", err) + } + + // In experimental mode, we check whether a modification is required at all and return the lowlevelRuntime directly + // if no modification is required. + visibleDevices, exists := ociSpec.LookupEnv(visibleDevicesEnvvar) + if !exists || visibleDevices == "" || visibleDevices == visibleDevicesVoid { + logger.Infof("No modification required: %v=%v (exists=%v)", visibleDevicesEnvvar, visibleDevices, exists) + return nil, nil + } logger.Infof("Constructing modifier from config: %+v", cfg) root := cfg.NVIDIAContainerCLIConfig.Root @@ -48,6 +67,12 @@ func NewExperimentalModifier(logger *logrus.Logger, cfg *config.Config) (oci.Spe return nil, fmt.Errorf("failed to create legacy discoverer: %v", err) } d = legacyDiscoverer + case "csv": + csvDiscoverer, err := discover.NewFromCSV(logger, csv.DefaultRoot, "") + if err != nil { + return nil, fmt.Errorf("failed to create CSV discoverer: %v", err) + } + d = csvDiscoverer default: return nil, fmt.Errorf("invalid discover mode: %v", cfg.NVIDIAContainerRuntimeConfig.DiscoverMode) } diff --git a/cmd/nvidia-container-runtime/modifier/experimental_test.go b/cmd/nvidia-container-runtime/modifier/experimental_test.go index aefcff17..271a2a87 100644 --- a/cmd/nvidia-container-runtime/modifier/experimental_test.go +++ b/cmd/nvidia-container-runtime/modifier/experimental_test.go @@ -22,25 +22,54 @@ import ( "github.com/NVIDIA/nvidia-container-toolkit/internal/config" "github.com/NVIDIA/nvidia-container-toolkit/internal/discover" + "github.com/NVIDIA/nvidia-container-toolkit/internal/oci" "github.com/opencontainers/runtime-spec/specs-go" testlog "github.com/sirupsen/logrus/hooks/test" "github.com/stretchr/testify/require" ) -func TestConstructor(t *testing.T) { +func TestNewExperimentalModifier(t *testing.T) { logger, _ := testlog.NewNullLogger() testCases := []struct { - description string - cfg *config.Config - expectedError error + description string + cfg *config.Config + spec oci.Spec + visibleDevices string + expectedError error + expectedNil bool }{ + { + description: "spec load error returns error", + spec: &oci.SpecMock{ + LoadFunc: func() error { + return fmt.Errorf("load failed") + }, + }, + expectedError: fmt.Errorf("load failed"), + }, + { + description: "visible devices not set returns nil", + visibleDevices: "NOT_SET", + expectedNil: true, + }, + { + description: "visible devices empty returns nil", + visibleDevices: "", + expectedNil: true, + }, + { + description: "visible devices 'void' returns nil", + visibleDevices: "void", + expectedNil: true, + }, { description: "empty config raises error", cfg: &config.Config{ NVIDIAContainerRuntimeConfig: config.RuntimeConfig{}, }, - expectedError: fmt.Errorf("invalid discover mode"), + visibleDevices: "all", + expectedError: fmt.Errorf("invalid discover mode"), }, { description: "non-legacy discover mode raises error", @@ -49,7 +78,8 @@ func TestConstructor(t *testing.T) { DiscoverMode: "non-legacy", }, }, - expectedError: fmt.Errorf("invalid discover mode"), + visibleDevices: "all", + expectedError: fmt.Errorf("invalid discover mode"), }, { description: "legacy discover mode returns modifier", @@ -58,17 +88,45 @@ func TestConstructor(t *testing.T) { DiscoverMode: "legacy", }, }, + visibleDevices: "all", + }, + { + description: "csv discover mode returns modifier", + cfg: &config.Config{ + NVIDIAContainerRuntimeConfig: config.RuntimeConfig{ + DiscoverMode: "csv", + }, + }, + visibleDevices: "all", }, } for _, tc := range testCases { t.Run(tc.description, func(t *testing.T) { - _, err := NewExperimentalModifier(logger, tc.cfg) + spec := tc.spec + if spec == nil { + spec = &oci.SpecMock{ + LookupEnvFunc: func(s string) (string, bool) { + if tc.visibleDevices != "NOT_SET" && s == visibleDevicesEnvvar { + return tc.visibleDevices, true + } + return "", false + }, + } + } + + m, err := NewExperimentalModifier(logger, tc.cfg, spec) if tc.expectedError != nil { require.Error(t, err) } else { require.NoError(t, err) } + + if tc.expectedNil || tc.expectedError != nil { + require.Nil(t, m) + } else { + require.NotNil(t, m) + } }) } } diff --git a/cmd/nvidia-container-runtime/runtime_factory.go b/cmd/nvidia-container-runtime/runtime_factory.go index 8eb23302..20025e45 100644 --- a/cmd/nvidia-container-runtime/runtime_factory.go +++ b/cmd/nvidia-container-runtime/runtime_factory.go @@ -44,10 +44,14 @@ func newNVIDIAContainerRuntime(logger *logrus.Logger, cfg *config.Config, argv [ return nil, fmt.Errorf("error constructing low-level runtime: %v", err) } - specModifier, err := newSpecModifier(logger, cfg) + specModifier, err := newSpecModifier(logger, cfg, ociSpec) if err != nil { return nil, fmt.Errorf("failed to construct OCI spec modifier: %v", err) } + if specModifier == nil { + logger.Infof("Using low-level runtime with no modification") + return lowLevelRuntime, nil + } // Create the wrapping runtime with the specified modifier r := runtime.NewModifyingRuntimeWrapper( @@ -61,10 +65,10 @@ func newNVIDIAContainerRuntime(logger *logrus.Logger, cfg *config.Config, argv [ } // newSpecModifier is a factory method that creates constructs an OCI spec modifer based on the provided config. -func newSpecModifier(logger *logrus.Logger, cfg *config.Config) (oci.SpecModifier, error) { +func newSpecModifier(logger *logrus.Logger, cfg *config.Config, ociSpec oci.Spec) (oci.SpecModifier, error) { if !cfg.NVIDIAContainerRuntimeConfig.Experimental { return modifier.NewStableRuntimeModifier(logger), nil } - return modifier.NewExperimentalModifier(logger, cfg) + return modifier.NewExperimentalModifier(logger, cfg, ociSpec) } diff --git a/cmd/nvidia-container-runtime/runtime_factory_test.go b/cmd/nvidia-container-runtime/runtime_factory_test.go index 145970d7..a1bbef26 100644 --- a/cmd/nvidia-container-runtime/runtime_factory_test.go +++ b/cmd/nvidia-container-runtime/runtime_factory_test.go @@ -17,9 +17,13 @@ package main import ( + "encoding/json" + "os" + "path/filepath" "testing" "github.com/NVIDIA/nvidia-container-toolkit/internal/config" + "github.com/opencontainers/runtime-spec/specs-go" testlog "github.com/sirupsen/logrus/hooks/test" "github.com/stretchr/testify/require" ) @@ -30,7 +34,7 @@ func TestFactoryMethod(t *testing.T) { testCases := []struct { description string cfg *config.Config - argv []string + spec *specs.Spec expectedError bool }{ { @@ -39,11 +43,35 @@ func TestFactoryMethod(t *testing.T) { NVIDIAContainerRuntimeConfig: config.RuntimeConfig{}, }, }, + { + description: "experimental flag supported", + cfg: &config.Config{ + NVIDIAContainerRuntimeConfig: config.RuntimeConfig{ + Experimental: true, + DiscoverMode: "legacy", + }, + }, + spec: &specs.Spec{ + Process: &specs.Process{ + Env: []string{ + "NVIDIA_VISIBLE_DEVICES=all", + }, + }, + }, + }, } for _, tc := range testCases { t.Run(tc.description, func(t *testing.T) { - _, err := newNVIDIAContainerRuntime(logger, tc.cfg, tc.argv) + bundleDir := t.TempDir() + + specFile, err := os.Create(filepath.Join(bundleDir, "config.json")) + require.NoError(t, err) + require.NoError(t, json.NewEncoder(specFile).Encode(tc.spec)) + + argv := []string{"--bundle", bundleDir} + + _, err = newNVIDIAContainerRuntime(logger, tc.cfg, argv) if tc.expectedError { require.Error(t, err) } else { diff --git a/internal/edits/device.go b/internal/edits/device.go new file mode 100644 index 00000000..5e11f41d --- /dev/null +++ b/internal/edits/device.go @@ -0,0 +1,45 @@ +/** +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package edits + +import ( + "github.com/NVIDIA/nvidia-container-toolkit/internal/discover" + "github.com/container-orchestrated-devices/container-device-interface/pkg/cdi" + "github.com/container-orchestrated-devices/container-device-interface/specs-go" +) + +type device discover.Device + +// toEdits converts a discovered device to CDI Container Edits. +func (d device) toEdits() *cdi.ContainerEdits { + e := cdi.ContainerEdits{ + ContainerEdits: &specs.ContainerEdits{ + DeviceNodes: []*specs.DeviceNode{d.toSpec()}, + }, + } + return &e +} + +// toSpec converts a discovered Device to a CDI Spec Device. Note +// that missing info is filled in when edits are applied by querying the Device node. +func (d device) toSpec() *specs.DeviceNode { + s := specs.DeviceNode{ + Path: d.Path, + } + + return &s +} diff --git a/internal/edits/edits.go b/internal/edits/edits.go index 36fecd81..4f4ee150 100644 --- a/internal/edits/edits.go +++ b/internal/edits/edits.go @@ -34,12 +34,30 @@ type edits struct { // NewSpecEdits creates a SpecModifier that defines the required OCI spec edits (as CDI ContainerEdits) from the specified // discoverer. func NewSpecEdits(logger *logrus.Logger, d discover.Discover) (oci.SpecModifier, error) { + devices, err := d.Devices() + if err != nil { + return nil, fmt.Errorf("failed to discover devices: %v", err) + } + + mounts, err := d.Mounts() + if err != nil { + return nil, fmt.Errorf("failed to discover mounts: %v", err) + } + hooks, err := d.Hooks() if err != nil { return nil, fmt.Errorf("failed to discover hooks: %v", err) } c := cdi.ContainerEdits{} + for _, d := range devices { + c.Append(device(d).toEdits()) + } + + for _, m := range mounts { + c.Append(mount(m).toEdits()) + } + for _, h := range hooks { c.Append(hook(h).toEdits()) } @@ -58,9 +76,18 @@ func (e *edits) Modify(spec *ociSpecs.Spec) error { return nil } + e.logger.Info("Mounts:") + for _, mount := range e.Mounts { + e.logger.Infof("Mounting %v at %v", mount.HostPath, mount.ContainerPath) + } + e.logger.Infof("Devices:") + for _, device := range e.DeviceNodes { + e.logger.Infof("Injecting %v", device.Path) + } e.logger.Infof("Hooks:") for _, hook := range e.Hooks { e.logger.Infof("Injecting %v", hook.Args) } + return e.Apply(spec) } diff --git a/internal/edits/hook.go b/internal/edits/hook.go index 990d8565..a0e56a85 100644 --- a/internal/edits/hook.go +++ b/internal/edits/hook.go @@ -42,5 +42,6 @@ func (d hook) toSpec() *specs.Hook { Path: d.Path, Args: d.Args, } + return &s } diff --git a/internal/edits/mount.go b/internal/edits/mount.go new file mode 100644 index 00000000..3b85034e --- /dev/null +++ b/internal/edits/mount.go @@ -0,0 +1,53 @@ +/** +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package edits + +import ( + "github.com/NVIDIA/nvidia-container-toolkit/internal/discover" + "github.com/container-orchestrated-devices/container-device-interface/pkg/cdi" + "github.com/container-orchestrated-devices/container-device-interface/specs-go" +) + +type mount discover.Mount + +// toEdits converts a discovered mount to CDI Container Edits. +func (d mount) toEdits() *cdi.ContainerEdits { + e := cdi.ContainerEdits{ + ContainerEdits: &specs.ContainerEdits{ + Mounts: []*specs.Mount{d.toSpec()}, + }, + } + return &e +} + +// toSpec converts a discovered Mount to a CDI Spec Mount. Note +// that missing info is filled in when edits are applied by querying the Mount node. +func (d mount) toSpec() *specs.Mount { + s := specs.Mount{ + HostPath: d.Path, + // TODO: We need to update the container path + ContainerPath: d.Path, + Options: []string{ + "ro", + "nosuid", + "nodev", + "bind", + }, + } + + return &s +}