mirror of
https://github.com/NVIDIA/nvidia-container-toolkit
synced 2025-03-12 07:03:01 +00:00
Merge branch 'CNT-2953/new-options' into 'main'
Update config options to control OCI Spec modification See merge request nvidia/container-toolkit/container-toolkit!145
This commit is contained in:
commit
d62cce3c75
@ -18,8 +18,6 @@ package modifier
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
|
||||
@ -33,8 +31,8 @@ import (
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// experiemental represents the modifications required by the experimental runtime
|
||||
type experimental struct {
|
||||
// csvMode represents the modifications as performed by the csv runtime mode
|
||||
type csvMode struct {
|
||||
logger *logrus.Logger
|
||||
discoverer discover.Discover
|
||||
}
|
||||
@ -46,9 +44,9 @@ const (
|
||||
nvidiaRequireJetpackEnvvar = "NVIDIA_REQUIRE_JETPACK"
|
||||
)
|
||||
|
||||
// NewExperimentalModifier creates a modifier that applies the experimental
|
||||
// modications to an OCI spec if required by the runtime wrapper.
|
||||
func NewExperimentalModifier(logger *logrus.Logger, cfg *config.Config, ociSpec oci.Spec) (oci.SpecModifier, error) {
|
||||
// NewCSVModifier creates a modifier that applies modications to an OCI spec if required by the runtime wrapper.
|
||||
// The modifications are defined by CSV MountSpecs.
|
||||
func NewCSVModifier(logger *logrus.Logger, cfg *config.Config, ociSpec oci.Spec) (oci.SpecModifier, error) {
|
||||
rawSpec, err := ociSpec.Load()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to load OCI spec: %v", err)
|
||||
@ -68,64 +66,51 @@ func NewExperimentalModifier(logger *logrus.Logger, cfg *config.Config, ociSpec
|
||||
NVIDIAContainerToolkitCLIExecutablePath: cfg.NVIDIACTKConfig.Path,
|
||||
}
|
||||
|
||||
var d discover.Discover
|
||||
|
||||
switch resolveAutoDiscoverMode(logger, cfg.NVIDIAContainerRuntimeConfig.DiscoverMode) {
|
||||
case "legacy":
|
||||
legacyDiscoverer, err := discover.NewLegacyDiscoverer(logger, config)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create legacy discoverer: %v", err)
|
||||
}
|
||||
d = legacyDiscoverer
|
||||
case "csv":
|
||||
// TODO: Once the devices have been encapsulated in the CUDA image, this can be moved to before the
|
||||
// visible devices are checked.
|
||||
image, err := image.NewCUDAImageFromSpec(rawSpec)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := checkRequirements(logger, &image); err != nil {
|
||||
return nil, fmt.Errorf("requirements not met: %v", err)
|
||||
}
|
||||
|
||||
csvFiles, err := csv.GetFileList(csv.DefaultMountSpecPath)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get list of CSV files: %v", err)
|
||||
}
|
||||
|
||||
nvidiaRequireJetpack, _ := ociSpec.LookupEnv(nvidiaRequireJetpackEnvvar)
|
||||
if nvidiaRequireJetpack != "csv-mounts=all" {
|
||||
csvFiles = csv.BaseFilesOnly(csvFiles)
|
||||
}
|
||||
|
||||
csvDiscoverer, err := discover.NewFromCSVFiles(logger, csvFiles, config.Root)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create CSV discoverer: %v", err)
|
||||
}
|
||||
|
||||
ldcacheUpdateHook, err := discover.NewLDCacheUpdateHook(logger, csvDiscoverer, config)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create ldcach update hook discoverer: %v", err)
|
||||
}
|
||||
|
||||
createSymlinksHook, err := discover.NewCreateSymlinksHook(logger, csvFiles, csvDiscoverer, config)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create symlink hook discoverer: %v", err)
|
||||
}
|
||||
|
||||
d = discover.NewList(csvDiscoverer, ldcacheUpdateHook, createSymlinksHook)
|
||||
default:
|
||||
return nil, fmt.Errorf("invalid discover mode: %v", cfg.NVIDIAContainerRuntimeConfig.DiscoverMode)
|
||||
// TODO: Once the devices have been encapsulated in the CUDA image, this can be moved to before the
|
||||
// visible devices are checked.
|
||||
image, err := image.NewCUDAImageFromSpec(rawSpec)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return newExperimentalModifierFromDiscoverer(logger, d)
|
||||
if err := checkRequirements(logger, &image); err != nil {
|
||||
return nil, fmt.Errorf("requirements not met: %v", err)
|
||||
}
|
||||
|
||||
csvFiles, err := csv.GetFileList(cfg.NVIDIAContainerRuntimeConfig.Modes.CSV.MountSpecPath)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get list of CSV files: %v", err)
|
||||
}
|
||||
|
||||
nvidiaRequireJetpack, _ := ociSpec.LookupEnv(nvidiaRequireJetpackEnvvar)
|
||||
if nvidiaRequireJetpack != "csv-mounts=all" {
|
||||
csvFiles = csv.BaseFilesOnly(csvFiles)
|
||||
}
|
||||
|
||||
csvDiscoverer, err := discover.NewFromCSVFiles(logger, csvFiles, config.Root)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create CSV discoverer: %v", err)
|
||||
}
|
||||
|
||||
ldcacheUpdateHook, err := discover.NewLDCacheUpdateHook(logger, csvDiscoverer, config)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create ldcach update hook discoverer: %v", err)
|
||||
}
|
||||
|
||||
createSymlinksHook, err := discover.NewCreateSymlinksHook(logger, csvFiles, csvDiscoverer, config)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create symlink hook discoverer: %v", err)
|
||||
}
|
||||
|
||||
d := discover.NewList(csvDiscoverer, ldcacheUpdateHook, createSymlinksHook)
|
||||
|
||||
return newModifierFromDiscoverer(logger, d)
|
||||
}
|
||||
|
||||
// newExperimentalModifierFromDiscoverer created a modifier that aplies the discovered
|
||||
// newModifierFromDiscoverer created a modifier that aplies the discovered
|
||||
// modifications to an OCI spec if require by the runtime wrapper.
|
||||
func newExperimentalModifierFromDiscoverer(logger *logrus.Logger, d discover.Discover) (oci.SpecModifier, error) {
|
||||
m := experimental{
|
||||
func newModifierFromDiscoverer(logger *logrus.Logger, d discover.Discover) (oci.SpecModifier, error) {
|
||||
m := csvMode{
|
||||
logger: logger,
|
||||
discoverer: d,
|
||||
}
|
||||
@ -134,7 +119,7 @@ func newExperimentalModifierFromDiscoverer(logger *logrus.Logger, d discover.Dis
|
||||
|
||||
// Modify applies the required modifications to the incomming OCI spec. These modifications
|
||||
// are applied in-place.
|
||||
func (m experimental) Modify(spec *specs.Spec) error {
|
||||
func (m csvMode) Modify(spec *specs.Spec) error {
|
||||
err := nvidiaContainerRuntimeHookRemover{m.logger}.Modify(spec)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to remove existing hooks: %v", err)
|
||||
@ -179,47 +164,3 @@ func checkRequirements(logger *logrus.Logger, image *image.CUDA) error {
|
||||
|
||||
return r.Assert()
|
||||
}
|
||||
|
||||
// resolveAutoDiscoverMode determines the correct discover mode for the specified platform if set to "auto"
|
||||
func resolveAutoDiscoverMode(logger *logrus.Logger, mode string) (rmode string) {
|
||||
if mode != "auto" {
|
||||
return mode
|
||||
}
|
||||
defer func() {
|
||||
logger.Infof("Auto-detected discover mode as '%v'", rmode)
|
||||
}()
|
||||
|
||||
isTegra, reason := isTegraSystem()
|
||||
logger.Debugf("Is Tegra-based system? %v: %v", isTegra, reason)
|
||||
|
||||
if isTegra {
|
||||
return "csv"
|
||||
}
|
||||
|
||||
return "legacy"
|
||||
}
|
||||
|
||||
// isTegraSystem returns true if the system is detected as a Tegra-based system
|
||||
func isTegraSystem() (bool, string) {
|
||||
const tegraReleaseFile = "/etc/nv_tegra_release"
|
||||
const tegraFamilyFile = "/sys/devices/soc0/family"
|
||||
|
||||
if info, err := os.Stat(tegraReleaseFile); err == nil && !info.IsDir() {
|
||||
return true, fmt.Sprintf("%v found", tegraReleaseFile)
|
||||
}
|
||||
|
||||
if info, err := os.Stat(tegraFamilyFile); err != nil || !info.IsDir() {
|
||||
return false, fmt.Sprintf("%v not found", tegraFamilyFile)
|
||||
}
|
||||
|
||||
contents, err := os.ReadFile(tegraFamilyFile)
|
||||
if err != nil {
|
||||
return false, fmt.Sprintf("could not read %v", tegraFamilyFile)
|
||||
}
|
||||
|
||||
if strings.HasPrefix(strings.ToLower(string(contents)), "tegra") {
|
||||
return true, fmt.Sprintf("%v has 'tegra' prefix", tegraFamilyFile)
|
||||
}
|
||||
|
||||
return false, fmt.Sprintf("%v has no 'tegra' prefix", tegraFamilyFile)
|
||||
}
|
@ -28,7 +28,7 @@ import (
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestNewExperimentalModifier(t *testing.T) {
|
||||
func TestNewCSVModifier(t *testing.T) {
|
||||
logger, _ := testlog.NewNullLogger()
|
||||
|
||||
testCases := []struct {
|
||||
@ -63,42 +63,6 @@ func TestNewExperimentalModifier(t *testing.T) {
|
||||
visibleDevices: "void",
|
||||
expectedNil: true,
|
||||
},
|
||||
{
|
||||
description: "empty config raises error",
|
||||
cfg: &config.Config{
|
||||
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{},
|
||||
},
|
||||
visibleDevices: "all",
|
||||
expectedError: fmt.Errorf("invalid discover mode"),
|
||||
},
|
||||
{
|
||||
description: "non-legacy discover mode raises error",
|
||||
cfg: &config.Config{
|
||||
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{
|
||||
DiscoverMode: "non-legacy",
|
||||
},
|
||||
},
|
||||
visibleDevices: "all",
|
||||
expectedError: fmt.Errorf("invalid discover mode"),
|
||||
},
|
||||
{
|
||||
description: "legacy discover mode returns modifier",
|
||||
cfg: &config.Config{
|
||||
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{
|
||||
DiscoverMode: "legacy",
|
||||
},
|
||||
},
|
||||
visibleDevices: "all",
|
||||
},
|
||||
{
|
||||
description: "csv discover mode returns modifier",
|
||||
cfg: &config.Config{
|
||||
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{
|
||||
DiscoverMode: "csv",
|
||||
},
|
||||
},
|
||||
visibleDevices: "all",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
@ -115,7 +79,7 @@ func TestNewExperimentalModifier(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
m, err := NewExperimentalModifier(logger, tc.cfg, spec)
|
||||
m, err := NewCSVModifier(logger, tc.cfg, spec)
|
||||
if tc.expectedError != nil {
|
||||
require.Error(t, err)
|
||||
} else {
|
||||
@ -304,7 +268,7 @@ func TestExperimentalModifier(t *testing.T) {
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
m, err := newExperimentalModifierFromDiscoverer(logger, tc.discover)
|
||||
m, err := newModifierFromDiscoverer(logger, tc.discover)
|
||||
require.NoError(t, err)
|
||||
|
||||
err = m.Modify(tc.spec)
|
||||
@ -318,32 +282,3 @@ func TestExperimentalModifier(t *testing.T) {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveDiscoverMode(t *testing.T) {
|
||||
logger, _ := testlog.NewNullLogger()
|
||||
|
||||
testCases := []struct {
|
||||
description string
|
||||
mode string
|
||||
expectedMode string
|
||||
}{
|
||||
{
|
||||
description: "non-auto resolves to input",
|
||||
mode: "not-auto",
|
||||
expectedMode: "not-auto",
|
||||
},
|
||||
// TODO: The following test is brittle in that it will break on Tegra-based systems.
|
||||
// {
|
||||
// description: "auto resolves to legacy",
|
||||
// mode: "auto",
|
||||
// expectedMode: "legacy",
|
||||
// },
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
mode := resolveAutoDiscoverMode(logger, tc.mode)
|
||||
require.EqualValues(t, tc.expectedMode, mode)
|
||||
})
|
||||
}
|
||||
}
|
@ -21,6 +21,7 @@ import (
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-container-runtime/modifier"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/info"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/runtime"
|
||||
"github.com/sirupsen/logrus"
|
||||
@ -61,9 +62,12 @@ func newNVIDIAContainerRuntime(logger *logrus.Logger, cfg *config.Config, argv [
|
||||
|
||||
// newSpecModifier is a factory method that creates constructs an OCI spec modifer based on the provided config.
|
||||
func newSpecModifier(logger *logrus.Logger, cfg *config.Config, ociSpec oci.Spec, argv []string) (oci.SpecModifier, error) {
|
||||
if !cfg.NVIDIAContainerRuntimeConfig.Experimental {
|
||||
switch info.ResolveAutoMode(logger, cfg.NVIDIAContainerRuntimeConfig.Mode) {
|
||||
case "legacy":
|
||||
return modifier.NewStableRuntimeModifier(logger), nil
|
||||
case "csv":
|
||||
return modifier.NewCSVModifier(logger, cfg, ociSpec)
|
||||
}
|
||||
|
||||
return modifier.NewExperimentalModifier(logger, cfg, ociSpec)
|
||||
return nil, fmt.Errorf("invalid runtime mode: %v", cfg.NVIDIAContainerRuntimeConfig.Mode)
|
||||
}
|
||||
|
@ -49,16 +49,16 @@ func TestFactoryMethod(t *testing.T) {
|
||||
cfg: &config.Config{
|
||||
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{
|
||||
Runtimes: []string{"runc"},
|
||||
Mode: "legacy",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "experimental flag supported",
|
||||
description: "csv mode is supported",
|
||||
cfg: &config.Config{
|
||||
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{
|
||||
Experimental: true,
|
||||
DiscoverMode: "legacy",
|
||||
Runtimes: []string{"runc"},
|
||||
Runtimes: []string{"runc"},
|
||||
Mode: "csv",
|
||||
},
|
||||
},
|
||||
spec: &specs.Spec{
|
||||
@ -69,6 +69,43 @@ func TestFactoryMethod(t *testing.T) {
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "non-legacy discover mode raises error",
|
||||
cfg: &config.Config{
|
||||
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{
|
||||
Runtimes: []string{"runc"},
|
||||
Mode: "non-legacy",
|
||||
},
|
||||
},
|
||||
expectedError: true,
|
||||
},
|
||||
{
|
||||
description: "legacy discover mode returns modifier",
|
||||
cfg: &config.Config{
|
||||
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{
|
||||
Runtimes: []string{"runc"},
|
||||
Mode: "legacy",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "csv discover mode returns modifier",
|
||||
cfg: &config.Config{
|
||||
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{
|
||||
Runtimes: []string{"runc"},
|
||||
Mode: "csv",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "empty mode raises error",
|
||||
cfg: &config.Config{
|
||||
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{
|
||||
Runtimes: []string{"runc"},
|
||||
},
|
||||
},
|
||||
expectedError: true,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
@ -79,7 +116,7 @@ func TestFactoryMethod(t *testing.T) {
|
||||
require.NoError(t, err)
|
||||
require.NoError(t, json.NewEncoder(specFile).Encode(tc.spec))
|
||||
|
||||
argv := []string{"--bundle", bundleDir}
|
||||
argv := []string{"--bundle", bundleDir, "create"}
|
||||
|
||||
_, err = newNVIDIAContainerRuntime(logger, tc.cfg, argv)
|
||||
if tc.expectedError {
|
||||
|
@ -13,12 +13,12 @@ import (
|
||||
"strings"
|
||||
"syscall"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/info"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
)
|
||||
|
||||
var (
|
||||
debugflag = flag.Bool("debug", false, "enable debug output")
|
||||
forceflag = flag.Bool("force", false, "force execution of prestart hook in experimental mode")
|
||||
configflag = flag.String("config", "", "configuration file")
|
||||
)
|
||||
|
||||
@ -73,7 +73,7 @@ func doPrestart() {
|
||||
hook := getHookConfig()
|
||||
cli := hook.NvidiaContainerCLI
|
||||
|
||||
if hook.NVIDIAContainerRuntime.Experimental && !*forceflag {
|
||||
if info.ResolveAutoMode(&logInterceptor{}, hook.NVIDIAContainerRuntime.Mode) != "legacy" {
|
||||
log.Panicln("invoking the NVIDIA Container Runtime Hook directly (e.g. specifying the docker --gpus flag) is not supported. Please use the NVIDIA Container Runtime instead.")
|
||||
}
|
||||
|
||||
@ -178,3 +178,12 @@ func main() {
|
||||
os.Exit(2)
|
||||
}
|
||||
}
|
||||
|
||||
// logInterceptor implements the info.Logger interface to allow for logging from this function.
|
||||
type logInterceptor struct{}
|
||||
|
||||
func (l *logInterceptor) Infof(format string, args ...interface{}) {
|
||||
log.Printf(format, args...)
|
||||
}
|
||||
|
||||
func (l *logInterceptor) Debugf(format string, args ...interface{}) {}
|
||||
|
@ -80,22 +80,26 @@ func loadConfigFrom(reader io.Reader) (*Config, error) {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return getConfigFrom(toml), nil
|
||||
return getConfigFrom(toml)
|
||||
}
|
||||
|
||||
// getConfigFrom reads the nvidia container runtime config from the specified toml Tree.
|
||||
func getConfigFrom(toml *toml.Tree) *Config {
|
||||
func getConfigFrom(toml *toml.Tree) (*Config, error) {
|
||||
cfg := getDefaultConfig()
|
||||
|
||||
if toml == nil {
|
||||
return cfg
|
||||
return cfg, nil
|
||||
}
|
||||
|
||||
cfg.NVIDIAContainerCLIConfig = *getContainerCLIConfigFrom(toml)
|
||||
cfg.NVIDIACTKConfig = *getCTKConfigFrom(toml)
|
||||
cfg.NVIDIAContainerRuntimeConfig = *getRuntimeConfigFrom(toml)
|
||||
runtimeConfig, err := getRuntimeConfigFrom(toml)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to load nvidia-container-runtime config: %v", err)
|
||||
}
|
||||
cfg.NVIDIAContainerRuntimeConfig = *runtimeConfig
|
||||
|
||||
return cfg
|
||||
return cfg, nil
|
||||
}
|
||||
|
||||
// getDefaultConfig defines the default values for the config
|
||||
|
@ -62,10 +62,14 @@ func TestGetConfig(t *testing.T) {
|
||||
},
|
||||
NVIDIAContainerRuntimeConfig: RuntimeConfig{
|
||||
DebugFilePath: "/dev/null",
|
||||
Experimental: false,
|
||||
DiscoverMode: "auto",
|
||||
LogLevel: "info",
|
||||
Runtimes: []string{"docker-runc", "runc"},
|
||||
Mode: "auto",
|
||||
Modes: modesConfig{
|
||||
CSV: csvModeConfig{
|
||||
MountSpecPath: "/etc/nvidia-container-runtime/host-files-for-container.d",
|
||||
},
|
||||
},
|
||||
},
|
||||
NVIDIACTKConfig: CTKConfig{
|
||||
Path: "nvidia-ctk",
|
||||
@ -81,6 +85,8 @@ func TestGetConfig(t *testing.T) {
|
||||
"nvidia-container-runtime.discover-mode = \"not-legacy\"",
|
||||
"nvidia-container-runtime.log-level = \"debug\"",
|
||||
"nvidia-container-runtime.runtimes = [\"/some/runtime\",]",
|
||||
"nvidia-container-runtime.mode = \"not-auto\"",
|
||||
"nvidia-container-runtime.modes.csv.mount-spec-path = \"/not/etc/nvidia-container-runtime/host-files-for-container.d\"",
|
||||
"nvidia-ctk.path = \"/foo/bar/nvidia-ctk\"",
|
||||
},
|
||||
expectedConfig: &Config{
|
||||
@ -89,10 +95,14 @@ func TestGetConfig(t *testing.T) {
|
||||
},
|
||||
NVIDIAContainerRuntimeConfig: RuntimeConfig{
|
||||
DebugFilePath: "/foo/bar",
|
||||
Experimental: true,
|
||||
DiscoverMode: "not-legacy",
|
||||
LogLevel: "debug",
|
||||
Runtimes: []string{"/some/runtime"},
|
||||
Mode: "not-auto",
|
||||
Modes: modesConfig{
|
||||
CSV: csvModeConfig{
|
||||
MountSpecPath: "/not/etc/nvidia-container-runtime/host-files-for-container.d",
|
||||
},
|
||||
},
|
||||
},
|
||||
NVIDIACTKConfig: CTKConfig{
|
||||
Path: "/foo/bar/nvidia-ctk",
|
||||
@ -110,6 +120,9 @@ func TestGetConfig(t *testing.T) {
|
||||
"discover-mode = \"not-legacy\"",
|
||||
"log-level = \"debug\"",
|
||||
"runtimes = [\"/some/runtime\",]",
|
||||
"mode = \"not-auto\"",
|
||||
"[nvidia-container-runtime.modes.csv]",
|
||||
"mount-spec-path = \"/not/etc/nvidia-container-runtime/host-files-for-container.d\"",
|
||||
"[nvidia-ctk]",
|
||||
"path = \"/foo/bar/nvidia-ctk\"",
|
||||
},
|
||||
@ -119,10 +132,14 @@ func TestGetConfig(t *testing.T) {
|
||||
},
|
||||
NVIDIAContainerRuntimeConfig: RuntimeConfig{
|
||||
DebugFilePath: "/foo/bar",
|
||||
Experimental: true,
|
||||
DiscoverMode: "not-legacy",
|
||||
LogLevel: "debug",
|
||||
Runtimes: []string{"/some/runtime"},
|
||||
Mode: "not-auto",
|
||||
Modes: modesConfig{
|
||||
CSV: csvModeConfig{
|
||||
MountSpecPath: "/not/etc/nvidia-container-runtime/host-files-for-container.d",
|
||||
},
|
||||
},
|
||||
},
|
||||
NVIDIACTKConfig: CTKConfig{
|
||||
Path: "/foo/bar/nvidia-ctk",
|
||||
|
@ -17,6 +17,8 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/pelletier/go-toml"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
@ -24,55 +26,69 @@ import (
|
||||
const (
|
||||
dockerRuncExecutableName = "docker-runc"
|
||||
runcExecutableName = "runc"
|
||||
|
||||
auto = "auto"
|
||||
)
|
||||
|
||||
// RuntimeConfig stores the config options for the NVIDIA Container Runtime
|
||||
type RuntimeConfig struct {
|
||||
DebugFilePath string
|
||||
Experimental bool
|
||||
DiscoverMode string
|
||||
DebugFilePath string `toml:"debug"`
|
||||
// LogLevel defines the logging level for the application
|
||||
LogLevel string
|
||||
LogLevel string `toml:"log-level"`
|
||||
// Runtimes defines the candidates for the low-level runtime
|
||||
Runtimes []string
|
||||
Runtimes []string `toml:"runtimes"`
|
||||
Mode string `toml:"mode"`
|
||||
Modes modesConfig `toml:"modes"`
|
||||
}
|
||||
|
||||
// modesConfig defines (optional) per-mode configs
|
||||
type modesConfig struct {
|
||||
CSV csvModeConfig `toml:"csv"`
|
||||
}
|
||||
|
||||
type csvModeConfig struct {
|
||||
MountSpecPath string `toml:"mount-spec-path"`
|
||||
}
|
||||
|
||||
// dummy allows us to unmarshal only a RuntimeConfig from a *toml.Tree
|
||||
type dummy struct {
|
||||
Runtime RuntimeConfig `toml:"nvidia-container-runtime"`
|
||||
}
|
||||
|
||||
// getRuntimeConfigFrom reads the nvidia container runtime config from the specified toml Tree.
|
||||
func getRuntimeConfigFrom(toml *toml.Tree) *RuntimeConfig {
|
||||
func getRuntimeConfigFrom(toml *toml.Tree) (*RuntimeConfig, error) {
|
||||
cfg := GetDefaultRuntimeConfig()
|
||||
|
||||
if toml == nil {
|
||||
return cfg
|
||||
return cfg, nil
|
||||
}
|
||||
|
||||
cfg.DebugFilePath = toml.GetDefault("nvidia-container-runtime.debug", cfg.DebugFilePath).(string)
|
||||
cfg.Experimental = toml.GetDefault("nvidia-container-runtime.experimental", cfg.Experimental).(bool)
|
||||
cfg.DiscoverMode = toml.GetDefault("nvidia-container-runtime.discover-mode", cfg.DiscoverMode).(string)
|
||||
cfg.LogLevel = toml.GetDefault("nvidia-container-runtime.log-level", cfg.LogLevel).(string)
|
||||
|
||||
configRuntimes := toml.Get("nvidia-container-runtime.runtimes")
|
||||
if configRuntimes != nil {
|
||||
var runtimes []string
|
||||
for _, r := range configRuntimes.([]interface{}) {
|
||||
runtimes = append(runtimes, r.(string))
|
||||
}
|
||||
cfg.Runtimes = runtimes
|
||||
d := dummy{
|
||||
Runtime: *cfg,
|
||||
}
|
||||
|
||||
return cfg
|
||||
if err := toml.Unmarshal(&d); err != nil {
|
||||
return nil, fmt.Errorf("failed to unmarshal runtime config: %v", err)
|
||||
}
|
||||
|
||||
return &d.Runtime, nil
|
||||
}
|
||||
|
||||
// GetDefaultRuntimeConfig defines the default values for the config
|
||||
func GetDefaultRuntimeConfig() *RuntimeConfig {
|
||||
c := RuntimeConfig{
|
||||
DebugFilePath: "/dev/null",
|
||||
Experimental: false,
|
||||
DiscoverMode: "auto",
|
||||
LogLevel: logrus.InfoLevel.String(),
|
||||
Runtimes: []string{
|
||||
dockerRuncExecutableName,
|
||||
runcExecutableName,
|
||||
},
|
||||
Mode: auto,
|
||||
Modes: modesConfig{
|
||||
CSV: csvModeConfig{
|
||||
MountSpecPath: "/etc/nvidia-container-runtime/host-files-for-container.d",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
return &c
|
||||
|
@ -1,70 +0,0 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package discover
|
||||
|
||||
import (
|
||||
"path/filepath"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// NewLegacyDiscoverer creates a discoverer for the experimental runtime
|
||||
func NewLegacyDiscoverer(logger *logrus.Logger, cfg *Config) (Discover, error) {
|
||||
d := legacy{
|
||||
logger: logger,
|
||||
lookup: lookup.NewExecutableLocator(logger, cfg.Root),
|
||||
}
|
||||
|
||||
return &d, nil
|
||||
}
|
||||
|
||||
type legacy struct {
|
||||
None
|
||||
logger *logrus.Logger
|
||||
lookup lookup.Locator
|
||||
}
|
||||
|
||||
var _ Discover = (*legacy)(nil)
|
||||
|
||||
// Hooks returns the "legacy" NVIDIA Container Runtime hook. This hook calls out
|
||||
// to the nvidia-container-cli to make modifications to the container as defined
|
||||
// in libnvidia-container.
|
||||
func (d legacy) Hooks() ([]Hook, error) {
|
||||
hookPath := filepath.Join(config.DefaultExecutableDir, config.NVIDIAContainerRuntimeHookExecutable)
|
||||
targets, err := d.lookup.Locate(config.NVIDIAContainerRuntimeHookExecutable)
|
||||
if err != nil {
|
||||
d.logger.Warnf("Failed to locate %v: %v", config.NVIDIAContainerRuntimeHookExecutable, err)
|
||||
} else if len(targets) == 0 {
|
||||
d.logger.Warnf("%v not found", config.NVIDIAContainerRuntimeHookExecutable)
|
||||
} else {
|
||||
d.logger.Debugf("Found %v candidates: %v", config.NVIDIAContainerRuntimeHookExecutable, targets)
|
||||
hookPath = targets[0]
|
||||
}
|
||||
d.logger.Debugf("Using NVIDIA Container Runtime Hook path %v", hookPath)
|
||||
|
||||
args := []string{hookPath, "--force", "prestart"}
|
||||
h := Hook{
|
||||
Lifecycle: cdi.PrestartHook,
|
||||
Path: hookPath,
|
||||
Args: args,
|
||||
}
|
||||
|
||||
return []Hook{h}, nil
|
||||
}
|
43
internal/info/auto.go
Normal file
43
internal/info/auto.go
Normal file
@ -0,0 +1,43 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package info
|
||||
|
||||
// Logger is a basic interface for logging to allow these functions to be called
|
||||
// from code where logrus is not used.
|
||||
type Logger interface {
|
||||
Infof(string, ...interface{})
|
||||
Debugf(string, ...interface{})
|
||||
}
|
||||
|
||||
// ResolveAutoMode determines the correct mode for the platform if set to "auto"
|
||||
func ResolveAutoMode(logger Logger, mode string) (rmode string) {
|
||||
if mode != "auto" {
|
||||
return mode
|
||||
}
|
||||
defer func() {
|
||||
logger.Infof("Auto-detected mode as '%v'", rmode)
|
||||
}()
|
||||
|
||||
isTegra, reason := IsTegraSystem()
|
||||
logger.Debugf("Is Tegra-based system? %v: %v", isTegra, reason)
|
||||
|
||||
if isTegra {
|
||||
return "csv"
|
||||
}
|
||||
|
||||
return "legacy"
|
||||
}
|
53
internal/info/auto_test.go
Normal file
53
internal/info/auto_test.go
Normal file
@ -0,0 +1,53 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package info
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
testlog "github.com/sirupsen/logrus/hooks/test"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestResolveAutoMode(t *testing.T) {
|
||||
logger, _ := testlog.NewNullLogger()
|
||||
|
||||
testCases := []struct {
|
||||
description string
|
||||
mode string
|
||||
expectedMode string
|
||||
}{
|
||||
{
|
||||
description: "non-auto resolves to input",
|
||||
mode: "not-auto",
|
||||
expectedMode: "not-auto",
|
||||
},
|
||||
// TODO: The following test is brittle in that it will break on Tegra-based systems.
|
||||
// {
|
||||
// description: "auto resolves to legacy",
|
||||
// mode: "auto",
|
||||
// expectedMode: "legacy",
|
||||
// },
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
mode := ResolveAutoMode(logger, tc.mode)
|
||||
require.EqualValues(t, tc.expectedMode, mode)
|
||||
})
|
||||
}
|
||||
}
|
48
internal/info/tegra.go
Normal file
48
internal/info/tegra.go
Normal file
@ -0,0 +1,48 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package info
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// IsTegraSystem returns true if the system is detected as a Tegra-based system
|
||||
func IsTegraSystem() (bool, string) {
|
||||
const tegraReleaseFile = "/etc/nv_tegra_release"
|
||||
const tegraFamilyFile = "/sys/devices/soc0/family"
|
||||
|
||||
if info, err := os.Stat(tegraReleaseFile); err == nil && !info.IsDir() {
|
||||
return true, fmt.Sprintf("%v found", tegraReleaseFile)
|
||||
}
|
||||
|
||||
if info, err := os.Stat(tegraFamilyFile); err != nil || !info.IsDir() {
|
||||
return false, fmt.Sprintf("%v not found", tegraFamilyFile)
|
||||
}
|
||||
|
||||
contents, err := os.ReadFile(tegraFamilyFile)
|
||||
if err != nil {
|
||||
return false, fmt.Sprintf("could not read %v", tegraFamilyFile)
|
||||
}
|
||||
|
||||
if strings.HasPrefix(strings.ToLower(string(contents)), "tegra") {
|
||||
return true, fmt.Sprintf("%v has 'tegra' prefix", tegraFamilyFile)
|
||||
}
|
||||
|
||||
return false, fmt.Sprintf("%v has no 'tegra' prefix", tegraFamilyFile)
|
||||
}
|
Loading…
Reference in New Issue
Block a user