From 2d7bb636b9d9c911ae96f748a97e464fb84f87cd Mon Sep 17 00:00:00 2001 From: Evan Lezar <7723350-elezar@users.noreply.gitlab.com> Date: Fri, 26 May 2023 08:29:52 +0000 Subject: [PATCH] Merge branch 'CNT-4285/add-runtime-hook-path' into 'main' Add nvidia-contianer-runtime-hook.path config option See merge request nvidia/container-toolkit/container-toolkit!401 --- CHANGELOG.md | 2 ++ cmd/nvidia-container-runtime/main_test.go | 2 +- internal/config/config.go | 44 +++++++++++++++++++++++ internal/config/config_test.go | 12 +++++++ internal/config/hook.go | 4 +++ internal/modifier/stable.go | 27 ++++++-------- internal/modifier/stable_test.go | 8 ++--- internal/runtime/runtime.go | 4 +++ internal/runtime/runtime_factory.go | 2 +- third_party/nvidia-container-runtime | 2 +- third_party/nvidia-docker | 2 +- tools/container/toolkit/toolkit.go | 7 ++-- 12 files changed, 88 insertions(+), 28 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ea6f298d..4fda2c9b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,8 @@ ## v1.13.2 +* Add `nvidia-container-runtime-hook.path` config option to specify NVIDIA Container Runtime Hook path explicitly. + ## v1.13.1 * Update `update-ldcache` hook to only update ldcache if it exists. diff --git a/cmd/nvidia-container-runtime/main_test.go b/cmd/nvidia-container-runtime/main_test.go index 05809aee..b8037751 100644 --- a/cmd/nvidia-container-runtime/main_test.go +++ b/cmd/nvidia-container-runtime/main_test.go @@ -172,7 +172,7 @@ func TestDuplicateHook(t *testing.T) { // addNVIDIAHook is a basic wrapper for an addHookModifier that is used for // testing. func addNVIDIAHook(spec *specs.Spec) error { - m := modifier.NewStableRuntimeModifier(logrus.StandardLogger()) + m := modifier.NewStableRuntimeModifier(logrus.StandardLogger(), nvidiaHook) return m.Modify(spec) } diff --git a/internal/config/config.go b/internal/config/config.go index b4f773a6..eb504c0c 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -21,13 +21,19 @@ import ( "io" "os" "path" + "path/filepath" + "github.com/NVIDIA/nvidia-container-toolkit/internal/lookup" "github.com/pelletier/go-toml" + "github.com/sirupsen/logrus" ) const ( configOverride = "XDG_CONFIG_HOME" configFilePath = "nvidia-container-runtime/config.toml" + + nvidiaContainerRuntimeHookExecutable = "nvidia-container-runtime-hook" + nvidiaContainerRuntimeHookDefaultPath = "/usr/bin/nvidia-container-runtime-hook" ) var ( @@ -124,3 +130,41 @@ func getDefaultConfig() *Config { return &c } + +// ResolveNVIDIAContainerRuntimeHookPath resolves the path the nvidia-container-runtime-hook binary. +func ResolveNVIDIAContainerRuntimeHookPath(logger *logrus.Logger, nvidiaContainerRuntimeHookPath string) string { + return resolveWithDefault( + logger, + "NVIDIA Container Runtime Hook", + nvidiaContainerRuntimeHookPath, + nvidiaContainerRuntimeHookDefaultPath, + ) +} + +// resolveWithDefault resolves the path to the specified binary. +// If an absolute path is specified, it is used directly without searching for the binary. +// If the binary cannot be found in the path, the specified default is used instead. +func resolveWithDefault(logger *logrus.Logger, label string, path string, defaultPath string) string { + if filepath.IsAbs(path) { + logger.Debugf("Using specified %v path %v", label, path) + return path + } + + if path == "" { + path = filepath.Base(defaultPath) + } + logger.Debugf("Locating %v as %v", label, path) + lookup := lookup.NewExecutableLocator(logger, "") + + resolvedPath := defaultPath + targets, err := lookup.Locate(path) + if err != nil { + logger.Warnf("Failed to locate %v: %v", path, err) + } else { + logger.Debugf("Found %v candidates: %v", path, targets) + resolvedPath = targets[0] + } + logger.Debugf("Using %v path %v", label, path) + + return resolvedPath +} diff --git a/internal/config/config_test.go b/internal/config/config_test.go index 72f03336..867edf79 100644 --- a/internal/config/config_test.go +++ b/internal/config/config_test.go @@ -76,6 +76,9 @@ func TestGetConfig(t *testing.T) { }, }, }, + NVIDIAContainerRuntimeHookConfig: RuntimeHookConfig{ + Path: "nvidia-container-runtime-hook", + }, NVIDIACTKConfig: CTKConfig{ Path: "nvidia-ctk", }, @@ -95,6 +98,7 @@ func TestGetConfig(t *testing.T) { "nvidia-container-runtime.modes.cdi.default-kind = \"example.vendor.com/device\"", "nvidia-container-runtime.modes.cdi.annotation-prefixes = [\"cdi.k8s.io/\", \"example.vendor.com/\",]", "nvidia-container-runtime.modes.csv.mount-spec-path = \"/not/etc/nvidia-container-runtime/host-files-for-container.d\"", + "nvidia-container-runtime-hook.path = \"/foo/bar/nvidia-container-runtime-hook\"", "nvidia-ctk.path = \"/foo/bar/nvidia-ctk\"", }, expectedConfig: &Config{ @@ -120,6 +124,9 @@ func TestGetConfig(t *testing.T) { }, }, }, + NVIDIAContainerRuntimeHookConfig: RuntimeHookConfig{ + Path: "/foo/bar/nvidia-container-runtime-hook", + }, NVIDIACTKConfig: CTKConfig{ Path: "/foo/bar/nvidia-ctk", }, @@ -143,6 +150,8 @@ func TestGetConfig(t *testing.T) { "annotation-prefixes = [\"cdi.k8s.io/\", \"example.vendor.com/\",]", "[nvidia-container-runtime.modes.csv]", "mount-spec-path = \"/not/etc/nvidia-container-runtime/host-files-for-container.d\"", + "[nvidia-container-runtime-hook]", + "path = \"/foo/bar/nvidia-container-runtime-hook\"", "[nvidia-ctk]", "path = \"/foo/bar/nvidia-ctk\"", }, @@ -169,6 +178,9 @@ func TestGetConfig(t *testing.T) { }, }, }, + NVIDIAContainerRuntimeHookConfig: RuntimeHookConfig{ + Path: "/foo/bar/nvidia-container-runtime-hook", + }, NVIDIACTKConfig: CTKConfig{ Path: "/foo/bar/nvidia-ctk", }, diff --git a/internal/config/hook.go b/internal/config/hook.go index 4c35a1d6..a3861dc6 100644 --- a/internal/config/hook.go +++ b/internal/config/hook.go @@ -24,6 +24,9 @@ import ( // RuntimeHookConfig stores the config options for the NVIDIA Container Runtime type RuntimeHookConfig struct { + // Path specifies the path to the NVIDIA Container Runtime hook binary. + // If an executable name is specified, this will be resolved in the path. + Path string `toml:"path"` // SkipModeDetection disables the mode check for the runtime hook. SkipModeDetection bool `toml:"skip-mode-detection"` } @@ -55,6 +58,7 @@ func getRuntimeHookConfigFrom(toml *toml.Tree) (*RuntimeHookConfig, error) { // GetDefaultRuntimeHookConfig defines the default values for the config func GetDefaultRuntimeHookConfig() *RuntimeHookConfig { c := RuntimeHookConfig{ + Path: NVIDIAContainerRuntimeHookExecutable, SkipModeDetection: false, } diff --git a/internal/modifier/stable.go b/internal/modifier/stable.go index 1b4d8401..8281ce89 100644 --- a/internal/modifier/stable.go +++ b/internal/modifier/stable.go @@ -17,10 +17,8 @@ package modifier import ( - "fmt" + "path/filepath" - "github.com/NVIDIA/nvidia-container-toolkit/internal/config" - "github.com/NVIDIA/nvidia-container-toolkit/internal/lookup" "github.com/NVIDIA/nvidia-container-toolkit/internal/oci" "github.com/opencontainers/runtime-spec/specs-go" "github.com/sirupsen/logrus" @@ -28,8 +26,11 @@ import ( // NewStableRuntimeModifier creates an OCI spec modifier that inserts the NVIDIA Container Runtime Hook into an OCI // spec. The specified logger is used to capture log output. -func NewStableRuntimeModifier(logger *logrus.Logger) oci.SpecModifier { - m := stableRuntimeModifier{logger: logger} +func NewStableRuntimeModifier(logger *logrus.Logger, nvidiaContainerRuntimeHookPath string) oci.SpecModifier { + m := stableRuntimeModifier{ + logger: logger, + nvidiaContainerRuntimeHookPath: nvidiaContainerRuntimeHookPath, + } return &m } @@ -37,7 +38,8 @@ func NewStableRuntimeModifier(logger *logrus.Logger) oci.SpecModifier { // stableRuntimeModifier modifies an OCI spec inplace, inserting the nvidia-container-runtime-hook as a // prestart hook. If the hook is already present, no modification is made. type stableRuntimeModifier struct { - logger *logrus.Logger + logger *logrus.Logger + nvidiaContainerRuntimeHookPath string } // Modify applies the required modification to the incoming OCI spec, inserting the nvidia-container-runtime-hook @@ -53,18 +55,9 @@ func (m stableRuntimeModifier) Modify(spec *specs.Spec) error { } } - // We create a locator and look for the NVIDIA Container Runtime Hook in the path. - candidates, err := lookup.NewExecutableLocator(m.logger, "").Locate(config.NVIDIAContainerRuntimeHookExecutable) - if err != nil { - return fmt.Errorf("failed to locate NVIDIA Container Runtime Hook: %v", err) - } - path := candidates[0] - if len(candidates) > 1 { - m.logger.Debugf("Using %v from multiple NVIDIA Container Runtime Hook candidates: %v", path, candidates) - } - + path := m.nvidiaContainerRuntimeHookPath m.logger.Infof("Using prestart hook path: %v", path) - args := []string{path} + args := []string{filepath.Base(path)} if spec.Hooks == nil { spec.Hooks = &specs.Hooks{} } diff --git a/internal/modifier/stable_test.go b/internal/modifier/stable_test.go index ac05c199..0b8eaff9 100644 --- a/internal/modifier/stable_test.go +++ b/internal/modifier/stable_test.go @@ -79,7 +79,7 @@ func TestAddHookModifier(t *testing.T) { Prestart: []specs.Hook{ { Path: testHookPath, - Args: []string{testHookPath, "prestart"}, + Args: []string{"nvidia-container-runtime-hook", "prestart"}, }, }, }, @@ -95,7 +95,7 @@ func TestAddHookModifier(t *testing.T) { Prestart: []specs.Hook{ { Path: testHookPath, - Args: []string{testHookPath, "prestart"}, + Args: []string{"nvidia-container-runtime-hook", "prestart"}, }, }, }, @@ -141,7 +141,7 @@ func TestAddHookModifier(t *testing.T) { }, { Path: testHookPath, - Args: []string{testHookPath, "prestart"}, + Args: []string{"nvidia-container-runtime-hook", "prestart"}, }, }, }, @@ -154,7 +154,7 @@ func TestAddHookModifier(t *testing.T) { t.Run(tc.description, func(t *testing.T) { - m := NewStableRuntimeModifier(logger) + m := NewStableRuntimeModifier(logger, testHookPath) err := m.Modify(&tc.spec) if tc.expectedError != nil { diff --git a/internal/runtime/runtime.go b/internal/runtime/runtime.go index 5634e75a..62838ec1 100644 --- a/internal/runtime/runtime.go +++ b/internal/runtime/runtime.go @@ -63,6 +63,10 @@ func (r rt) Run(argv []string) (rerr error) { r.logger.Reset() }() + // We apply some config updates here to ensure that the config is valid in + // all cases. + cfg.NVIDIAContainerRuntimeHookConfig.Path = config.ResolveNVIDIAContainerRuntimeHookPath(r.logger.Logger, cfg.NVIDIAContainerRuntimeHookConfig.Path) + // Print the config to the output. configJSON, err := json.MarshalIndent(cfg, "", " ") if err == nil { diff --git a/internal/runtime/runtime_factory.go b/internal/runtime/runtime_factory.go index 29219ab8..0547d3ad 100644 --- a/internal/runtime/runtime_factory.go +++ b/internal/runtime/runtime_factory.go @@ -99,7 +99,7 @@ func newSpecModifier(logger *logrus.Logger, cfg *config.Config, ociSpec oci.Spec func newModeModifier(logger *logrus.Logger, cfg *config.Config, ociSpec oci.Spec, argv []string) (oci.SpecModifier, error) { switch info.ResolveAutoMode(logger, cfg.NVIDIAContainerRuntimeConfig.Mode) { case "legacy": - return modifier.NewStableRuntimeModifier(logger), nil + return modifier.NewStableRuntimeModifier(logger, cfg.NVIDIAContainerRuntimeHookConfig.Path), nil case "csv": return modifier.NewCSVModifier(logger, cfg, ociSpec) case "cdi": diff --git a/third_party/nvidia-container-runtime b/third_party/nvidia-container-runtime index 68b81a20..6f328ae6 160000 --- a/third_party/nvidia-container-runtime +++ b/third_party/nvidia-container-runtime @@ -1 +1 @@ -Subproject commit 68b81a207bc936328955621708862ec72c6225d4 +Subproject commit 6f328ae638853b5b295deabfa08becc74f7273ef diff --git a/third_party/nvidia-docker b/third_party/nvidia-docker index 80902fe3..8bef6b0a 160000 --- a/third_party/nvidia-docker +++ b/third_party/nvidia-docker @@ -1 +1 @@ -Subproject commit 80902fe3afab0b08502a47b2f0e134869f813aa2 +Subproject commit 8bef6b0a49951bfde3426ae80dba5e5810b15ee8 diff --git a/tools/container/toolkit/toolkit.go b/tools/container/toolkit/toolkit.go index 95ff0f8c..e86d2375 100644 --- a/tools/container/toolkit/toolkit.go +++ b/tools/container/toolkit/toolkit.go @@ -305,7 +305,7 @@ func Install(cli *cli.Context, opts *options) error { log.Errorf("Ignoring error: %v", fmt.Errorf("error installing NVIDIA container CLI: %v", err)) } - _, err = installRuntimeHook(opts.toolkitRoot, toolkitConfigPath) + nvidiaContainerRuntimeHookPath, err := installRuntimeHook(opts.toolkitRoot, toolkitConfigPath) if err != nil && !opts.ignoreErrors { return fmt.Errorf("error installing NVIDIA container runtime hook: %v", err) } else if err != nil { @@ -319,7 +319,7 @@ func Install(cli *cli.Context, opts *options) error { log.Errorf("Ignoring error: %v", fmt.Errorf("error installing NVIDIA Container Toolkit CLI: %v", err)) } - err = installToolkitConfig(cli, toolkitConfigPath, nvidiaContainerCliExecutable, nvidiaCTKPath, opts) + err = installToolkitConfig(cli, toolkitConfigPath, nvidiaContainerCliExecutable, nvidiaCTKPath, nvidiaContainerRuntimeHookPath, opts) if err != nil && !opts.ignoreErrors { return fmt.Errorf("error installing NVIDIA container toolkit config: %v", err) } else if err != nil { @@ -379,7 +379,7 @@ func installLibrary(libName string, toolkitRoot string) error { // installToolkitConfig installs the config file for the NVIDIA container toolkit ensuring // that the settings are updated to match the desired install and nvidia driver directories. -func installToolkitConfig(c *cli.Context, toolkitConfigPath string, nvidiaContainerCliExecutablePath string, nvidiaCTKPath string, opts *options) error { +func installToolkitConfig(c *cli.Context, toolkitConfigPath string, nvidiaContainerCliExecutablePath string, nvidiaCTKPath string, nvidaContainerRuntimeHookPath string, opts *options) error { log.Infof("Installing NVIDIA container toolkit config '%v'", toolkitConfigPath) config, err := loadConfig(nvidiaContainerToolkitConfigSource) @@ -410,6 +410,7 @@ func installToolkitConfig(c *cli.Context, toolkitConfigPath string, nvidiaContai // Set nvidia-ctk options "nvidia-ctk.path": nvidiaCTKPath, // Set the nvidia-container-runtime-hook options + "nvidia-container-runtime-hook.path": nvidaContainerRuntimeHookPath, "nvidia-container-runtime-hook.skip-mode-detection": opts.ContainerRuntimeHookSkipModeDetection, } for key, value := range configValues {