Add csv discovery mode to experimental runtime

This change adds support for a "csv" discovery mode to the experimental runtime.
If this is set with experimental = true, a CSV-based discovery of devices and
mounts are used to define the modifications required to the OCI spec. The edits
are expressed as CDI ContainerEdits.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
This commit is contained in:
Evan Lezar 2022-02-25 10:54:04 +02:00
parent 14f9e986c9
commit 1c05a463bd
9 changed files with 255 additions and 13 deletions

View File

@ -18,6 +18,7 @@ experimental = true
When this setting is enabled, the modifications made to the OCI specification are controlled by the `nvidia-container-runtime.discover-mode` option, with the following mode supported:
* `"legacy"`: This mode mirrors the behaviour of the standard mode, inserting the NVIDIA Container Runtime Hook as a `prestart` hook into the container's OCI specification.
* `"csv"`: This mode uses CSV files at `/etc/nvidia-container-runtime/host-files-for-container.d` to define the devices and mounts that are to be injected into a container when it is created.
### Notes on using the docker CLI

View File

@ -21,6 +21,7 @@ import (
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover/csv"
"github.com/NVIDIA/nvidia-container-toolkit/internal/edits"
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
"github.com/opencontainers/runtime-spec/specs-go"
@ -33,9 +34,27 @@ type experimental struct {
discoverer discover.Discover
}
const (
visibleDevicesEnvvar = "NVIDIA_VISIBLE_DEVICES"
visibleDevicesVoid = "void"
nvidiaRequireJetpackEnvvar = "NVIDIA_REQUIRE_JETPACK"
)
// NewExperimentalModifier creates a modifier that applies the experimental
// modications to an OCI spec if required by the runtime wrapper.
func NewExperimentalModifier(logger *logrus.Logger, cfg *config.Config) (oci.SpecModifier, error) {
func NewExperimentalModifier(logger *logrus.Logger, cfg *config.Config, ociSpec oci.Spec) (oci.SpecModifier, error) {
if err := ociSpec.Load(); err != nil {
return nil, fmt.Errorf("failed to load OCI spec: %v", err)
}
// In experimental mode, we check whether a modification is required at all and return the lowlevelRuntime directly
// if no modification is required.
visibleDevices, exists := ociSpec.LookupEnv(visibleDevicesEnvvar)
if !exists || visibleDevices == "" || visibleDevices == visibleDevicesVoid {
logger.Infof("No modification required: %v=%v (exists=%v)", visibleDevicesEnvvar, visibleDevices, exists)
return nil, nil
}
logger.Infof("Constructing modifier from config: %+v", cfg)
root := cfg.NVIDIAContainerCLIConfig.Root
@ -48,6 +67,12 @@ func NewExperimentalModifier(logger *logrus.Logger, cfg *config.Config) (oci.Spe
return nil, fmt.Errorf("failed to create legacy discoverer: %v", err)
}
d = legacyDiscoverer
case "csv":
csvDiscoverer, err := discover.NewFromCSV(logger, csv.DefaultRoot, "")
if err != nil {
return nil, fmt.Errorf("failed to create CSV discoverer: %v", err)
}
d = csvDiscoverer
default:
return nil, fmt.Errorf("invalid discover mode: %v", cfg.NVIDIAContainerRuntimeConfig.DiscoverMode)
}

View File

@ -22,25 +22,54 @@ import (
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
"github.com/opencontainers/runtime-spec/specs-go"
testlog "github.com/sirupsen/logrus/hooks/test"
"github.com/stretchr/testify/require"
)
func TestConstructor(t *testing.T) {
func TestNewExperimentalModifier(t *testing.T) {
logger, _ := testlog.NewNullLogger()
testCases := []struct {
description string
cfg *config.Config
expectedError error
description string
cfg *config.Config
spec oci.Spec
visibleDevices string
expectedError error
expectedNil bool
}{
{
description: "spec load error returns error",
spec: &oci.SpecMock{
LoadFunc: func() error {
return fmt.Errorf("load failed")
},
},
expectedError: fmt.Errorf("load failed"),
},
{
description: "visible devices not set returns nil",
visibleDevices: "NOT_SET",
expectedNil: true,
},
{
description: "visible devices empty returns nil",
visibleDevices: "",
expectedNil: true,
},
{
description: "visible devices 'void' returns nil",
visibleDevices: "void",
expectedNil: true,
},
{
description: "empty config raises error",
cfg: &config.Config{
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{},
},
expectedError: fmt.Errorf("invalid discover mode"),
visibleDevices: "all",
expectedError: fmt.Errorf("invalid discover mode"),
},
{
description: "non-legacy discover mode raises error",
@ -49,7 +78,8 @@ func TestConstructor(t *testing.T) {
DiscoverMode: "non-legacy",
},
},
expectedError: fmt.Errorf("invalid discover mode"),
visibleDevices: "all",
expectedError: fmt.Errorf("invalid discover mode"),
},
{
description: "legacy discover mode returns modifier",
@ -58,17 +88,45 @@ func TestConstructor(t *testing.T) {
DiscoverMode: "legacy",
},
},
visibleDevices: "all",
},
{
description: "csv discover mode returns modifier",
cfg: &config.Config{
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{
DiscoverMode: "csv",
},
},
visibleDevices: "all",
},
}
for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) {
_, err := NewExperimentalModifier(logger, tc.cfg)
spec := tc.spec
if spec == nil {
spec = &oci.SpecMock{
LookupEnvFunc: func(s string) (string, bool) {
if tc.visibleDevices != "NOT_SET" && s == visibleDevicesEnvvar {
return tc.visibleDevices, true
}
return "", false
},
}
}
m, err := NewExperimentalModifier(logger, tc.cfg, spec)
if tc.expectedError != nil {
require.Error(t, err)
} else {
require.NoError(t, err)
}
if tc.expectedNil || tc.expectedError != nil {
require.Nil(t, m)
} else {
require.NotNil(t, m)
}
})
}
}

View File

@ -44,10 +44,14 @@ func newNVIDIAContainerRuntime(logger *logrus.Logger, cfg *config.Config, argv [
return nil, fmt.Errorf("error constructing low-level runtime: %v", err)
}
specModifier, err := newSpecModifier(logger, cfg)
specModifier, err := newSpecModifier(logger, cfg, ociSpec)
if err != nil {
return nil, fmt.Errorf("failed to construct OCI spec modifier: %v", err)
}
if specModifier == nil {
logger.Infof("Using low-level runtime with no modification")
return lowLevelRuntime, nil
}
// Create the wrapping runtime with the specified modifier
r := runtime.NewModifyingRuntimeWrapper(
@ -61,10 +65,10 @@ func newNVIDIAContainerRuntime(logger *logrus.Logger, cfg *config.Config, argv [
}
// newSpecModifier is a factory method that creates constructs an OCI spec modifer based on the provided config.
func newSpecModifier(logger *logrus.Logger, cfg *config.Config) (oci.SpecModifier, error) {
func newSpecModifier(logger *logrus.Logger, cfg *config.Config, ociSpec oci.Spec) (oci.SpecModifier, error) {
if !cfg.NVIDIAContainerRuntimeConfig.Experimental {
return modifier.NewStableRuntimeModifier(logger), nil
}
return modifier.NewExperimentalModifier(logger, cfg)
return modifier.NewExperimentalModifier(logger, cfg, ociSpec)
}

View File

@ -17,9 +17,13 @@
package main
import (
"encoding/json"
"os"
"path/filepath"
"testing"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
"github.com/opencontainers/runtime-spec/specs-go"
testlog "github.com/sirupsen/logrus/hooks/test"
"github.com/stretchr/testify/require"
)
@ -30,7 +34,7 @@ func TestFactoryMethod(t *testing.T) {
testCases := []struct {
description string
cfg *config.Config
argv []string
spec *specs.Spec
expectedError bool
}{
{
@ -39,11 +43,35 @@ func TestFactoryMethod(t *testing.T) {
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{},
},
},
{
description: "experimental flag supported",
cfg: &config.Config{
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{
Experimental: true,
DiscoverMode: "legacy",
},
},
spec: &specs.Spec{
Process: &specs.Process{
Env: []string{
"NVIDIA_VISIBLE_DEVICES=all",
},
},
},
},
}
for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) {
_, err := newNVIDIAContainerRuntime(logger, tc.cfg, tc.argv)
bundleDir := t.TempDir()
specFile, err := os.Create(filepath.Join(bundleDir, "config.json"))
require.NoError(t, err)
require.NoError(t, json.NewEncoder(specFile).Encode(tc.spec))
argv := []string{"--bundle", bundleDir}
_, err = newNVIDIAContainerRuntime(logger, tc.cfg, argv)
if tc.expectedError {
require.Error(t, err)
} else {

45
internal/edits/device.go Normal file
View File

@ -0,0 +1,45 @@
/**
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package edits
import (
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
"github.com/container-orchestrated-devices/container-device-interface/specs-go"
)
type device discover.Device
// toEdits converts a discovered device to CDI Container Edits.
func (d device) toEdits() *cdi.ContainerEdits {
e := cdi.ContainerEdits{
ContainerEdits: &specs.ContainerEdits{
DeviceNodes: []*specs.DeviceNode{d.toSpec()},
},
}
return &e
}
// toSpec converts a discovered Device to a CDI Spec Device. Note
// that missing info is filled in when edits are applied by querying the Device node.
func (d device) toSpec() *specs.DeviceNode {
s := specs.DeviceNode{
Path: d.Path,
}
return &s
}

View File

@ -34,12 +34,30 @@ type edits struct {
// NewSpecEdits creates a SpecModifier that defines the required OCI spec edits (as CDI ContainerEdits) from the specified
// discoverer.
func NewSpecEdits(logger *logrus.Logger, d discover.Discover) (oci.SpecModifier, error) {
devices, err := d.Devices()
if err != nil {
return nil, fmt.Errorf("failed to discover devices: %v", err)
}
mounts, err := d.Mounts()
if err != nil {
return nil, fmt.Errorf("failed to discover mounts: %v", err)
}
hooks, err := d.Hooks()
if err != nil {
return nil, fmt.Errorf("failed to discover hooks: %v", err)
}
c := cdi.ContainerEdits{}
for _, d := range devices {
c.Append(device(d).toEdits())
}
for _, m := range mounts {
c.Append(mount(m).toEdits())
}
for _, h := range hooks {
c.Append(hook(h).toEdits())
}
@ -58,9 +76,18 @@ func (e *edits) Modify(spec *ociSpecs.Spec) error {
return nil
}
e.logger.Info("Mounts:")
for _, mount := range e.Mounts {
e.logger.Infof("Mounting %v at %v", mount.HostPath, mount.ContainerPath)
}
e.logger.Infof("Devices:")
for _, device := range e.DeviceNodes {
e.logger.Infof("Injecting %v", device.Path)
}
e.logger.Infof("Hooks:")
for _, hook := range e.Hooks {
e.logger.Infof("Injecting %v", hook.Args)
}
return e.Apply(spec)
}

View File

@ -42,5 +42,6 @@ func (d hook) toSpec() *specs.Hook {
Path: d.Path,
Args: d.Args,
}
return &s
}

53
internal/edits/mount.go Normal file
View File

@ -0,0 +1,53 @@
/**
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package edits
import (
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
"github.com/container-orchestrated-devices/container-device-interface/specs-go"
)
type mount discover.Mount
// toEdits converts a discovered mount to CDI Container Edits.
func (d mount) toEdits() *cdi.ContainerEdits {
e := cdi.ContainerEdits{
ContainerEdits: &specs.ContainerEdits{
Mounts: []*specs.Mount{d.toSpec()},
},
}
return &e
}
// toSpec converts a discovered Mount to a CDI Spec Mount. Note
// that missing info is filled in when edits are applied by querying the Mount node.
func (d mount) toSpec() *specs.Mount {
s := specs.Mount{
HostPath: d.Path,
// TODO: We need to update the container path
ContainerPath: d.Path,
Options: []string{
"ro",
"nosuid",
"nodev",
"bind",
},
}
return &s
}