Replace experimental and discover-mode

These changes replace the nvidia-container-runtime config options
experimental and discover-mode with a single mode config option.

Note that mode is now a string with a default value of "auto"
and a mode value of "legacy" is equivalent to experimental == false.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
This commit is contained in:
Evan Lezar 2022-05-09 15:42:59 +02:00
parent e0ad82e467
commit e591f3f26b
7 changed files with 108 additions and 117 deletions

View File

@ -25,15 +25,14 @@ import (
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover/csv"
"github.com/NVIDIA/nvidia-container-toolkit/internal/edits"
"github.com/NVIDIA/nvidia-container-toolkit/internal/info"
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
"github.com/NVIDIA/nvidia-container-toolkit/internal/requirements"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/sirupsen/logrus"
)
// experiemental represents the modifications required by the experimental runtime
type experimental struct {
// csvMode represents the modifications as performed by the csv runtime mode
type csvMode struct {
logger *logrus.Logger
discoverer discover.Discover
}
@ -45,9 +44,9 @@ const (
nvidiaRequireJetpackEnvvar = "NVIDIA_REQUIRE_JETPACK"
)
// NewExperimentalModifier creates a modifier that applies the experimental
// modications to an OCI spec if required by the runtime wrapper.
func NewExperimentalModifier(logger *logrus.Logger, cfg *config.Config, ociSpec oci.Spec) (oci.SpecModifier, error) {
// NewCSVModifier creates a modifier that applies modications to an OCI spec if required by the runtime wrapper.
// The modifications are defined by CSV MountSpecs.
func NewCSVModifier(logger *logrus.Logger, cfg *config.Config, ociSpec oci.Spec) (oci.SpecModifier, error) {
rawSpec, err := ociSpec.Load()
if err != nil {
return nil, fmt.Errorf("failed to load OCI spec: %v", err)
@ -67,64 +66,51 @@ func NewExperimentalModifier(logger *logrus.Logger, cfg *config.Config, ociSpec
NVIDIAContainerToolkitCLIExecutablePath: cfg.NVIDIACTKConfig.Path,
}
var d discover.Discover
switch info.ResolveAutoMode(logger, cfg.NVIDIAContainerRuntimeConfig.DiscoverMode) {
case "legacy":
legacyDiscoverer, err := discover.NewLegacyDiscoverer(logger, config)
if err != nil {
return nil, fmt.Errorf("failed to create legacy discoverer: %v", err)
}
d = legacyDiscoverer
case "csv":
// TODO: Once the devices have been encapsulated in the CUDA image, this can be moved to before the
// visible devices are checked.
image, err := image.NewCUDAImageFromSpec(rawSpec)
if err != nil {
return nil, err
}
if err := checkRequirements(logger, &image); err != nil {
return nil, fmt.Errorf("requirements not met: %v", err)
}
csvFiles, err := csv.GetFileList(csv.DefaultMountSpecPath)
if err != nil {
return nil, fmt.Errorf("failed to get list of CSV files: %v", err)
}
nvidiaRequireJetpack, _ := ociSpec.LookupEnv(nvidiaRequireJetpackEnvvar)
if nvidiaRequireJetpack != "csv-mounts=all" {
csvFiles = csv.BaseFilesOnly(csvFiles)
}
csvDiscoverer, err := discover.NewFromCSVFiles(logger, csvFiles, config.Root)
if err != nil {
return nil, fmt.Errorf("failed to create CSV discoverer: %v", err)
}
ldcacheUpdateHook, err := discover.NewLDCacheUpdateHook(logger, csvDiscoverer, config)
if err != nil {
return nil, fmt.Errorf("failed to create ldcach update hook discoverer: %v", err)
}
createSymlinksHook, err := discover.NewCreateSymlinksHook(logger, csvFiles, csvDiscoverer, config)
if err != nil {
return nil, fmt.Errorf("failed to create symlink hook discoverer: %v", err)
}
d = discover.NewList(csvDiscoverer, ldcacheUpdateHook, createSymlinksHook)
default:
return nil, fmt.Errorf("invalid discover mode: %v", cfg.NVIDIAContainerRuntimeConfig.DiscoverMode)
// TODO: Once the devices have been encapsulated in the CUDA image, this can be moved to before the
// visible devices are checked.
image, err := image.NewCUDAImageFromSpec(rawSpec)
if err != nil {
return nil, err
}
return newExperimentalModifierFromDiscoverer(logger, d)
if err := checkRequirements(logger, &image); err != nil {
return nil, fmt.Errorf("requirements not met: %v", err)
}
csvFiles, err := csv.GetFileList(cfg.NVIDIAContainerRuntimeConfig.Modes.CSV.MountSpecPath)
if err != nil {
return nil, fmt.Errorf("failed to get list of CSV files: %v", err)
}
nvidiaRequireJetpack, _ := ociSpec.LookupEnv(nvidiaRequireJetpackEnvvar)
if nvidiaRequireJetpack != "csv-mounts=all" {
csvFiles = csv.BaseFilesOnly(csvFiles)
}
csvDiscoverer, err := discover.NewFromCSVFiles(logger, csvFiles, config.Root)
if err != nil {
return nil, fmt.Errorf("failed to create CSV discoverer: %v", err)
}
ldcacheUpdateHook, err := discover.NewLDCacheUpdateHook(logger, csvDiscoverer, config)
if err != nil {
return nil, fmt.Errorf("failed to create ldcach update hook discoverer: %v", err)
}
createSymlinksHook, err := discover.NewCreateSymlinksHook(logger, csvFiles, csvDiscoverer, config)
if err != nil {
return nil, fmt.Errorf("failed to create symlink hook discoverer: %v", err)
}
d := discover.NewList(csvDiscoverer, ldcacheUpdateHook, createSymlinksHook)
return newModifierFromDiscoverer(logger, d)
}
// newExperimentalModifierFromDiscoverer created a modifier that aplies the discovered
// newModifierFromDiscoverer created a modifier that aplies the discovered
// modifications to an OCI spec if require by the runtime wrapper.
func newExperimentalModifierFromDiscoverer(logger *logrus.Logger, d discover.Discover) (oci.SpecModifier, error) {
m := experimental{
func newModifierFromDiscoverer(logger *logrus.Logger, d discover.Discover) (oci.SpecModifier, error) {
m := csvMode{
logger: logger,
discoverer: d,
}
@ -133,7 +119,7 @@ func newExperimentalModifierFromDiscoverer(logger *logrus.Logger, d discover.Dis
// Modify applies the required modifications to the incomming OCI spec. These modifications
// are applied in-place.
func (m experimental) Modify(spec *specs.Spec) error {
func (m csvMode) Modify(spec *specs.Spec) error {
err := nvidiaContainerRuntimeHookRemover{m.logger}.Modify(spec)
if err != nil {
return fmt.Errorf("failed to remove existing hooks: %v", err)

View File

@ -28,7 +28,7 @@ import (
"github.com/stretchr/testify/require"
)
func TestNewExperimentalModifier(t *testing.T) {
func TestNewCSVModifier(t *testing.T) {
logger, _ := testlog.NewNullLogger()
testCases := []struct {
@ -63,42 +63,6 @@ func TestNewExperimentalModifier(t *testing.T) {
visibleDevices: "void",
expectedNil: true,
},
{
description: "empty config raises error",
cfg: &config.Config{
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{},
},
visibleDevices: "all",
expectedError: fmt.Errorf("invalid discover mode"),
},
{
description: "non-legacy discover mode raises error",
cfg: &config.Config{
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{
DiscoverMode: "non-legacy",
},
},
visibleDevices: "all",
expectedError: fmt.Errorf("invalid discover mode"),
},
{
description: "legacy discover mode returns modifier",
cfg: &config.Config{
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{
DiscoverMode: "legacy",
},
},
visibleDevices: "all",
},
{
description: "csv discover mode returns modifier",
cfg: &config.Config{
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{
DiscoverMode: "csv",
},
},
visibleDevices: "all",
},
}
for _, tc := range testCases {
@ -115,7 +79,7 @@ func TestNewExperimentalModifier(t *testing.T) {
}
}
m, err := NewExperimentalModifier(logger, tc.cfg, spec)
m, err := NewCSVModifier(logger, tc.cfg, spec)
if tc.expectedError != nil {
require.Error(t, err)
} else {
@ -304,7 +268,7 @@ func TestExperimentalModifier(t *testing.T) {
for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) {
m, err := newExperimentalModifierFromDiscoverer(logger, tc.discover)
m, err := newModifierFromDiscoverer(logger, tc.discover)
require.NoError(t, err)
err = m.Modify(tc.spec)

View File

@ -21,6 +21,7 @@ import (
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-container-runtime/modifier"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
"github.com/NVIDIA/nvidia-container-toolkit/internal/info"
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
"github.com/NVIDIA/nvidia-container-toolkit/internal/runtime"
"github.com/sirupsen/logrus"
@ -61,9 +62,12 @@ func newNVIDIAContainerRuntime(logger *logrus.Logger, cfg *config.Config, argv [
// newSpecModifier is a factory method that creates constructs an OCI spec modifer based on the provided config.
func newSpecModifier(logger *logrus.Logger, cfg *config.Config, ociSpec oci.Spec, argv []string) (oci.SpecModifier, error) {
if !cfg.NVIDIAContainerRuntimeConfig.Experimental {
switch info.ResolveAutoMode(logger, cfg.NVIDIAContainerRuntimeConfig.Mode) {
case "legacy":
return modifier.NewStableRuntimeModifier(logger), nil
case "csv":
return modifier.NewCSVModifier(logger, cfg, ociSpec)
}
return modifier.NewExperimentalModifier(logger, cfg, ociSpec)
return nil, fmt.Errorf("invalid runtime mode: %v", cfg.NVIDIAContainerRuntimeConfig.Mode)
}

View File

@ -49,16 +49,16 @@ func TestFactoryMethod(t *testing.T) {
cfg: &config.Config{
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{
Runtimes: []string{"runc"},
Mode: "legacy",
},
},
},
{
description: "experimental flag supported",
description: "csv mode is supported",
cfg: &config.Config{
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{
Experimental: true,
DiscoverMode: "legacy",
Runtimes: []string{"runc"},
Runtimes: []string{"runc"},
Mode: "csv",
},
},
spec: &specs.Spec{
@ -69,6 +69,43 @@ func TestFactoryMethod(t *testing.T) {
},
},
},
{
description: "non-legacy discover mode raises error",
cfg: &config.Config{
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{
Runtimes: []string{"runc"},
Mode: "non-legacy",
},
},
expectedError: true,
},
{
description: "legacy discover mode returns modifier",
cfg: &config.Config{
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{
Runtimes: []string{"runc"},
Mode: "legacy",
},
},
},
{
description: "csv discover mode returns modifier",
cfg: &config.Config{
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{
Runtimes: []string{"runc"},
Mode: "csv",
},
},
},
{
description: "empty mode raises error",
cfg: &config.Config{
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{
Runtimes: []string{"runc"},
},
},
expectedError: true,
},
}
for _, tc := range testCases {
@ -79,7 +116,7 @@ func TestFactoryMethod(t *testing.T) {
require.NoError(t, err)
require.NoError(t, json.NewEncoder(specFile).Encode(tc.spec))
argv := []string{"--bundle", bundleDir}
argv := []string{"--bundle", bundleDir, "create"}
_, err = newNVIDIAContainerRuntime(logger, tc.cfg, argv)
if tc.expectedError {

View File

@ -13,6 +13,7 @@ import (
"strings"
"syscall"
"github.com/NVIDIA/nvidia-container-toolkit/internal/info"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
)
@ -73,7 +74,7 @@ func doPrestart() {
hook := getHookConfig()
cli := hook.NvidiaContainerCLI
if hook.NVIDIAContainerRuntime.Experimental && !*forceflag {
if !*forceflag && info.ResolveAutoMode(&logInterceptor{}, hook.NVIDIAContainerRuntime.Mode) != "legacy" {
log.Panicln("invoking the NVIDIA Container Runtime Hook directly (e.g. specifying the docker --gpus flag) is not supported. Please use the NVIDIA Container Runtime instead.")
}
@ -178,3 +179,12 @@ func main() {
os.Exit(2)
}
}
// logInterceptor implements the info.Logger interface to allow for logging from this function.
type logInterceptor struct{}
func (l *logInterceptor) Infof(format string, args ...interface{}) {
log.Printf(format, args...)
}
func (l *logInterceptor) Debugf(format string, args ...interface{}) {}

View File

@ -62,8 +62,6 @@ func TestGetConfig(t *testing.T) {
},
NVIDIAContainerRuntimeConfig: RuntimeConfig{
DebugFilePath: "/dev/null",
Experimental: false,
DiscoverMode: "auto",
LogLevel: "info",
Runtimes: []string{"docker-runc", "runc"},
Mode: "auto",
@ -97,8 +95,6 @@ func TestGetConfig(t *testing.T) {
},
NVIDIAContainerRuntimeConfig: RuntimeConfig{
DebugFilePath: "/foo/bar",
Experimental: true,
DiscoverMode: "not-legacy",
LogLevel: "debug",
Runtimes: []string{"/some/runtime"},
Mode: "not-auto",
@ -136,8 +132,6 @@ func TestGetConfig(t *testing.T) {
},
NVIDIAContainerRuntimeConfig: RuntimeConfig{
DebugFilePath: "/foo/bar",
Experimental: true,
DiscoverMode: "not-legacy",
LogLevel: "debug",
Runtimes: []string{"/some/runtime"},
Mode: "not-auto",

View File

@ -33,8 +33,6 @@ const (
// RuntimeConfig stores the config options for the NVIDIA Container Runtime
type RuntimeConfig struct {
DebugFilePath string `toml:"debug"`
Experimental bool `toml:"experimental"`
DiscoverMode string `toml:"discover-mode"`
// LogLevel defines the logging level for the application
LogLevel string `toml:"log-level"`
// Runtimes defines the candidates for the low-level runtime
@ -80,8 +78,6 @@ func getRuntimeConfigFrom(toml *toml.Tree) (*RuntimeConfig, error) {
func GetDefaultRuntimeConfig() *RuntimeConfig {
c := RuntimeConfig{
DebugFilePath: "/dev/null",
Experimental: false,
DiscoverMode: auto,
LogLevel: logrus.InfoLevel.String(),
Runtimes: []string{
dockerRuncExecutableName,