Implement experimental modifier for NVIDIA Container Runtime

This change enables the experimental mode of the NVIDIA Container Runtime. If
enabled, the nvidia-container-runtime.discover-mode config option is
queried to determine how required OCI spec modifications should be defined.
If "legacy" is selected, the existing NVIDIA Container Runtime hooks is
discovered and injected into the OCI spec.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
This commit is contained in:
Evan Lezar 2022-03-15 14:29:14 +02:00
parent 9dfe60b8b7
commit 239b6d3739
10 changed files with 531 additions and 20 deletions

View File

@ -39,7 +39,7 @@ func run(argv []string) (rerr error) {
runtime, err := newNVIDIAContainerRuntime(logger.Logger, cfg, argv)
if err != nil {
return fmt.Errorf("error creating runtime: %v", err)
return fmt.Errorf("failed to create NVIDIA Container Runtime: %v", err)
}
return runtime.Exec(argv)

View File

@ -0,0 +1,119 @@
/**
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package modifier
import (
"fmt"
"path/filepath"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
"github.com/NVIDIA/nvidia-container-toolkit/internal/edits"
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/sirupsen/logrus"
)
// experiemental represents the modifications required by the experimental runtime
type experimental struct {
logger *logrus.Logger
discoverer discover.Discover
}
// NewExperimentalModifier creates a modifier that applied the experimental
// modications to an OCI spec if required by the runtime wrapper.
func NewExperimentalModifier(logger *logrus.Logger, cfg *config.RuntimeConfig) (oci.SpecModifier, error) {
logger.Infof("Constructing modifier from config: %+v", cfg)
// TODO: We need to specify the root
root := ""
var d discover.Discover
switch cfg.DiscoverMode {
case "legacy":
legacyDiscoverer, err := discover.NewLegacyDiscoverer(logger, root)
if err != nil {
return nil, fmt.Errorf("failed to create legacy discoverer: %v", err)
}
d = legacyDiscoverer
default:
return nil, fmt.Errorf("invalid discover mode: %v", cfg.DiscoverMode)
}
return newExperimentalModifierFromDiscoverer(logger, d)
}
// newExperimentalModifierFromDiscoverer created a modifier that aplies the discovered
// modifications to an OCI spec if require by the runtime wrapper.
func newExperimentalModifierFromDiscoverer(logger *logrus.Logger, d discover.Discover) (oci.SpecModifier, error) {
m := experimental{
logger: logger,
discoverer: d,
}
return &m, nil
}
// Modify applies the required modifications to the incomming OCI spec. These modifications
// are applied in-place.
func (m experimental) Modify(spec *specs.Spec) error {
err := m.assertSpecIsCompatible(spec)
if err != nil {
return fmt.Errorf("OCI specification cannot be modified: %v", err)
}
specEdits, err := edits.NewSpecEdits(m.logger, m.discoverer)
if err != nil {
return fmt.Errorf("failed to get required container edits: %v", err)
}
return specEdits.Modify(spec)
}
func (m experimental) assertSpecIsCompatible(spec *specs.Spec) error {
if spec == nil {
return nil
}
if spec.Hooks == nil {
return nil
}
if hookPath := findStableHook(spec.Hooks.Prestart); hookPath != "" {
return fmt.Errorf("spec already contains required 'prestart' hook: %v", hookPath)
}
return nil
}
// findStableHook checks the list of OCI hooks for the nvidia-container-runtime-hook
// or nvidia-container-toolkit hook. These are included, for example, by the non-experimental
// nvidia-container-runtime or docker when specifying the --gpus flag.
func findStableHook(hooks []specs.Hook) string {
lookFor := map[string]bool{
nvidiaContainerRuntimeHookExecuable: true,
nvidiaContainerToolkitExecutable: true,
}
for _, h := range hooks {
base := filepath.Base(h.Path)
if lookFor[base] {
return h.Path
}
}
return ""
}

View File

@ -0,0 +1,258 @@
/**
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package modifier
import (
"fmt"
"testing"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
"github.com/opencontainers/runtime-spec/specs-go"
testlog "github.com/sirupsen/logrus/hooks/test"
"github.com/stretchr/testify/require"
)
func TestConstructor(t *testing.T) {
logger, _ := testlog.NewNullLogger()
testCases := []struct {
description string
cfg *config.RuntimeConfig
expectedError error
}{
{
description: "empty config raises error",
cfg: &config.RuntimeConfig{},
expectedError: fmt.Errorf("invalid discover mode"),
},
{
description: "non-legacy discover mode raises error",
cfg: &config.RuntimeConfig{
DiscoverMode: "non-legacy",
},
expectedError: fmt.Errorf("invalid discover mode"),
},
{
description: "legacy discover mode returns modifier",
cfg: &config.RuntimeConfig{
DiscoverMode: "legacy",
},
},
}
for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) {
_, err := NewExperimentalModifier(logger, tc.cfg)
if tc.expectedError != nil {
require.Error(t, err)
} else {
require.NoError(t, err)
}
})
}
}
func TestExperimentalModifier(t *testing.T) {
logger, _ := testlog.NewNullLogger()
testCases := []struct {
description string
discover *discover.DiscoverMock
spec *specs.Spec
expectedError error
expectedSpec *specs.Spec
}{
{
description: "empty discoverer does not modify spec",
discover: &discover.DiscoverMock{},
},
{
description: "failed hooks discoverer returns error",
discover: &discover.DiscoverMock{
HooksFunc: func() ([]discover.Hook, error) {
return nil, fmt.Errorf("discover.Hooks error")
},
},
expectedError: fmt.Errorf("discover.Hooks error"),
},
{
description: "discovered hooks are injected into spec",
spec: &specs.Spec{},
discover: &discover.DiscoverMock{
HooksFunc: func() ([]discover.Hook, error) {
hooks := []discover.Hook{
{
Lifecycle: "prestart",
Path: "/hook/a",
Args: []string{"/hook/a", "arga"},
},
{
Lifecycle: "createContainer",
Path: "/hook/b",
Args: []string{"/hook/b", "argb"},
},
}
return hooks, nil
},
},
expectedSpec: &specs.Spec{
Hooks: &specs.Hooks{
Prestart: []specs.Hook{
{
Path: "/hook/a",
Args: []string{"/hook/a", "arga"},
},
},
CreateContainer: []specs.Hook{
{
Path: "/hook/b",
Args: []string{"/hook/b", "argb"},
},
},
},
},
},
{
description: "existing hooks are maintained",
spec: &specs.Spec{
Hooks: &specs.Hooks{
Prestart: []specs.Hook{
{
Path: "/hook/a",
Args: []string{"/hook/a", "arga"},
},
},
},
},
discover: &discover.DiscoverMock{
HooksFunc: func() ([]discover.Hook, error) {
hooks := []discover.Hook{
{
Lifecycle: "prestart",
Path: "/hook/b",
Args: []string{"/hook/b", "argb"},
},
}
return hooks, nil
},
},
expectedSpec: &specs.Spec{
Hooks: &specs.Hooks{
Prestart: []specs.Hook{
{
Path: "/hook/a",
Args: []string{"/hook/a", "arga"},
},
{
Path: "/hook/b",
Args: []string{"/hook/b", "argb"},
},
},
},
},
},
{
description: "modification fails for existing nvidia-container-runtime-hook",
spec: &specs.Spec{
Hooks: &specs.Hooks{
Prestart: []specs.Hook{
{
Path: "/path/to/nvidia-container-runtime-hook",
Args: []string{"/path/to/nvidia-container-runtime-hook", "prestart"},
},
},
},
},
discover: &discover.DiscoverMock{
HooksFunc: func() ([]discover.Hook, error) {
hooks := []discover.Hook{
{
Lifecycle: "prestart",
Path: "/hook/b",
Args: []string{"/hook/b", "argb"},
},
}
return hooks, nil
},
},
expectedError: fmt.Errorf("nvidia-container-runtime-hook already exists"),
expectedSpec: &specs.Spec{
Hooks: &specs.Hooks{
Prestart: []specs.Hook{
{
Path: "/path/to/nvidia-container-runtime-hook",
Args: []string{"/path/to/nvidia-container-runtime-hook", "prestart"},
},
},
},
},
},
{
description: "modification fails for existing nvidia-container-toolkit",
spec: &specs.Spec{
Hooks: &specs.Hooks{
Prestart: []specs.Hook{
{
Path: "/path/to/nvidia-container-toolkit",
Args: []string{"/path/to/nvidia-container-toolkit", "prestart"},
},
},
},
},
discover: &discover.DiscoverMock{
HooksFunc: func() ([]discover.Hook, error) {
hooks := []discover.Hook{
{
Lifecycle: "prestart",
Path: "/hook/b",
Args: []string{"/hook/b", "argb"},
},
}
return hooks, nil
},
},
expectedError: fmt.Errorf("nvidia-container-toolkit already exists"),
expectedSpec: &specs.Spec{
Hooks: &specs.Hooks{
Prestart: []specs.Hook{
{
Path: "/path/to/nvidia-container-toolkit",
Args: []string{"/path/to/nvidia-container-toolkit", "prestart"},
},
},
},
},
},
}
for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) {
m, err := newExperimentalModifierFromDiscoverer(logger, tc.discover)
require.NoError(t, err)
err = m.Modify(tc.spec)
if tc.expectedError != nil {
require.Error(t, err)
} else {
require.NoError(t, err)
}
require.EqualValues(t, tc.expectedSpec, tc.spec)
})
}
}

View File

@ -27,7 +27,10 @@ import (
)
const (
hookDefaultFilePath = "/usr/bin/nvidia-container-runtime-hook"
nvidiaContainerRuntimeHookExecuable = "nvidia-container-runtime-hook"
nvidiaContainerRuntimeHookDefaultPath = "/usr/bin/nvidia-container-runtime-hook"
nvidiaContainerToolkitExecutable = "nvidia-container-toolkit"
)
// NewStableRuntimeModifier creates an OCI spec modifier that inserts the NVIDIA Container Runtime Hook into an OCI
@ -47,9 +50,9 @@ type stableRuntimeModifier struct {
// Modify applies the required modification to the incoming OCI spec, inserting the nvidia-container-runtime-hook
// as a prestart hook.
func (m stableRuntimeModifier) Modify(spec *specs.Spec) error {
path, err := exec.LookPath("nvidia-container-runtime-hook")
path, err := exec.LookPath(nvidiaContainerRuntimeHookExecuable)
if err != nil {
path = hookDefaultFilePath
path = nvidiaContainerRuntimeHookDefaultPath
_, err = os.Stat(path)
if err != nil {
return err
@ -63,7 +66,7 @@ func (m stableRuntimeModifier) Modify(spec *specs.Spec) error {
spec.Hooks = &specs.Hooks{}
} else if len(spec.Hooks.Prestart) != 0 {
for _, hook := range spec.Hooks.Prestart {
if strings.Contains(hook.Path, "nvidia-container-runtime-hook") {
if strings.Contains(hook.Path, nvidiaContainerRuntimeHookExecuable) {
m.logger.Infof("existing nvidia prestart hook found in OCI spec")
return nil
}

View File

@ -46,9 +46,13 @@ func newNVIDIAContainerRuntime(logger *logrus.Logger, cfg *config.RuntimeConfig,
var specModifier oci.SpecModifier
if cfg.Experimental {
return nil, fmt.Errorf("experimental mode is not supported")
specModifier, err = modifier.NewExperimentalModifier(logger, cfg)
if err != nil {
return nil, fmt.Errorf("failed to construct experimental modifier: %v", err)
}
} else {
specModifier = modifier.NewStableRuntimeModifier(logger)
}
specModifier = modifier.NewStableRuntimeModifier(logger)
// Create the wrapping runtime with the specified modifier
r := runtime.NewModifyingRuntimeWrapper(

View File

@ -38,6 +38,7 @@ var (
type RuntimeConfig struct {
DebugFilePath string
Experimental bool
DiscoverMode string
}
// GetRuntimeConfig sets up the config struct. Values are read from a toml file
@ -74,6 +75,7 @@ func getRuntimeConfigFrom(reader io.Reader) (*RuntimeConfig, error) {
cfg.DebugFilePath = toml.GetDefault("nvidia-container-runtime.debug", cfg.DebugFilePath).(string)
cfg.Experimental = toml.GetDefault("nvidia-container-runtime.experimental", cfg.Experimental).(bool)
cfg.DiscoverMode = toml.GetDefault("nvidia-container-runtime.discover-mode", cfg.DiscoverMode).(string)
return cfg, nil
}
@ -83,6 +85,7 @@ func getDefaultRuntimeConfig() *RuntimeConfig {
c := RuntimeConfig{
DebugFilePath: "/dev/null",
Experimental: false,
DiscoverMode: "legacy",
}
return &c

View File

@ -58,15 +58,21 @@ func TestGerRuntimeConfig(t *testing.T) {
description: "empty config is default",
expectedConfig: &RuntimeConfig{
DebugFilePath: "/dev/null",
Experimental: false,
DiscoverMode: "legacy",
},
},
{
description: "config options set inline",
contents: []string{
"nvidia-container-runtime.debug = \"/foo/bar\"",
"nvidia-container-runtime.experimental = true",
"nvidia-container-runtime.discover-mode = \"not-legacy\"",
},
expectedConfig: &RuntimeConfig{
DebugFilePath: "/foo/bar",
Experimental: true,
DiscoverMode: "not-legacy",
},
},
{
@ -74,9 +80,13 @@ func TestGerRuntimeConfig(t *testing.T) {
contents: []string{
"[nvidia-container-runtime]",
"debug = \"/foo/bar\"",
"experimental = true",
"discover-mode = \"not-legacy\"",
},
expectedConfig: &RuntimeConfig{
DebugFilePath: "/foo/bar",
Experimental: true,
DiscoverMode: "not-legacy",
},
},
}

View File

@ -22,21 +22,21 @@ import (
"github.com/sirupsen/logrus"
)
type stable struct {
type legacy struct {
logger *logrus.Logger
lookup lookup.Locator
}
const (
nvidiaContainerRuntimeHookExecuable = "nvidia-container-runtime-hook"
hookDefaultFilePath = "/usr/bin/nvidia-container-runtime-hook"
nvidiaContainerRuntimeHookExecutable = "nvidia-container-runtime-hook"
hookDefaultFilePath = "/usr/bin/nvidia-container-runtime-hook"
)
var _ Discover = (*stable)(nil)
var _ Discover = (*legacy)(nil)
// NewStableDiscoverer creates a discoverer for the stable runtime
func NewStableDiscoverer(logger *logrus.Logger, root string) (Discover, error) {
d := stable{
// NewLegacyDiscoverer creates a discoverer for the legacy runtime
func NewLegacyDiscoverer(logger *logrus.Logger, root string) (Discover, error) {
d := legacy{
logger: logger,
lookup: lookup.NewPathLocator(logger, root),
}
@ -44,18 +44,20 @@ func NewStableDiscoverer(logger *logrus.Logger, root string) (Discover, error) {
return &d, nil
}
// Hooks returns the "stable" NVIDIA Container Runtime hook
func (d stable) Hooks() ([]Hook, error) {
// Hooks returns the "legacy" NVIDIA Container Runtime hook. This hook calls out
// to the nvidia-container-cli to make modifications to the container as defined
// in libnvidia-container.
func (d legacy) Hooks() ([]Hook, error) {
var hooks []Hook
hookPath := hookDefaultFilePath
targets, err := d.lookup.Locate(nvidiaContainerRuntimeHookExecuable)
targets, err := d.lookup.Locate(nvidiaContainerRuntimeHookExecutable)
if err != nil {
d.logger.Warnf("Failed to locate %v: %v", nvidiaContainerRuntimeHookExecuable, err)
d.logger.Warnf("Failed to locate %v: %v", nvidiaContainerRuntimeHookExecutable, err)
} else if len(targets) == 0 {
d.logger.Warnf("%v not found", nvidiaContainerRuntimeHookExecuable)
d.logger.Warnf("%v not found", nvidiaContainerRuntimeHookExecutable)
} else {
d.logger.Debugf("Found %v candidates: %v", nvidiaContainerRuntimeHookExecuable, targets)
d.logger.Debugf("Found %v candidates: %v", nvidiaContainerRuntimeHookExecutable, targets)
hookPath = targets[0]
}
d.logger.Debugf("Using NVIDIA Container Runtime Hook path %v", hookPath)

66
internal/edits/edits.go Normal file
View File

@ -0,0 +1,66 @@
/**
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package edits
import (
"fmt"
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
ociSpecs "github.com/opencontainers/runtime-spec/specs-go"
"github.com/sirupsen/logrus"
)
type edits struct {
cdi.ContainerEdits
logger *logrus.Logger
}
// NewSpecEdits creates a SpecModifier that defines the required OCI spec edits (as CDI ContainerEdits) from the specified
// discoverer.
func NewSpecEdits(logger *logrus.Logger, d discover.Discover) (oci.SpecModifier, error) {
hooks, err := d.Hooks()
if err != nil {
return nil, fmt.Errorf("failed to discover hooks: %v", err)
}
c := cdi.ContainerEdits{}
for _, h := range hooks {
c.Append(hook(h).toEdits())
}
e := edits{
ContainerEdits: c,
logger: logger,
}
return &e, nil
}
// Modify applies the defined edits to the incoming OCI spec
func (e *edits) Modify(spec *ociSpecs.Spec) error {
if e == nil || e.ContainerEdits.ContainerEdits == nil {
return nil
}
e.logger.Infof("Hooks:")
for _, hook := range e.Hooks {
e.logger.Infof("Injecting %v", hook.Args)
}
return e.Apply(spec)
}

46
internal/edits/hook.go Normal file
View File

@ -0,0 +1,46 @@
/**
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package edits
import (
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
"github.com/container-orchestrated-devices/container-device-interface/specs-go"
)
type hook discover.Hook
// toEdits converts a discovered hook to CDI Container Edits.
func (d hook) toEdits() *cdi.ContainerEdits {
e := cdi.ContainerEdits{
ContainerEdits: &specs.ContainerEdits{
Hooks: []*specs.Hook{d.toSpec()},
},
}
return &e
}
// toSpec converts a discovered Hook to a CDI Spec Hook. Note
// that missing info is filled in when edits are applied by querying the Hook node.
func (d hook) toSpec() *specs.Hook {
s := specs.Hook{
HookName: d.Lifecycle,
Path: d.Path,
Args: d.Args,
}
return &s
}