Add nvidia-contianer-runtime-hook.path config option

This change adds an nvidia-container-runtime-hook.path config option
to allow the path used for the prestart hook to be overridden. This
is useful in cases where multiple NVIDIA Container Toolkit installations
are present.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
This commit is contained in:
Evan Lezar 2023-05-24 10:21:50 +02:00
parent 3bd5baa3c5
commit ac11727ec5
9 changed files with 83 additions and 46 deletions

View File

@ -9,6 +9,7 @@
* Add support for generating merged devices (e.g. `all` device) to the nvcdi API. * Add support for generating merged devices (e.g. `all` device) to the nvcdi API.
* Use *.* pattern to locate libcuda.so when generating a CDI specification to support platforms where a patch version is not specified. * Use *.* pattern to locate libcuda.so when generating a CDI specification to support platforms where a patch version is not specified.
* Update go-nvlib to skip devices that are not MIG capable when generating CDI specifications. * Update go-nvlib to skip devices that are not MIG capable when generating CDI specifications.
* Add `nvidia-container-runtime-hook.path` config option to specify NVIDIA Container Runtime Hook path explicitly.
## v1.13.1 ## v1.13.1

View File

@ -172,7 +172,7 @@ func TestDuplicateHook(t *testing.T) {
// addNVIDIAHook is a basic wrapper for an addHookModifier that is used for // addNVIDIAHook is a basic wrapper for an addHookModifier that is used for
// testing. // testing.
func addNVIDIAHook(spec *specs.Spec) error { func addNVIDIAHook(spec *specs.Spec) error {
m := modifier.NewStableRuntimeModifier(logrus.StandardLogger()) m := modifier.NewStableRuntimeModifier(logrus.StandardLogger(), nvidiaHook)
return m.Modify(spec) return m.Modify(spec)
} }

View File

@ -37,6 +37,9 @@ const (
nvidiaCTKExecutable = "nvidia-ctk" nvidiaCTKExecutable = "nvidia-ctk"
nvidiaCTKDefaultFilePath = "/usr/bin/nvidia-ctk" nvidiaCTKDefaultFilePath = "/usr/bin/nvidia-ctk"
nvidiaContainerRuntimeHookExecutable = "nvidia-container-runtime-hook"
nvidiaContainerRuntimeHookDefaultPath = "/usr/bin/nvidia-container-runtime-hook"
) )
var ( var (
@ -189,6 +192,9 @@ func GetDefaultConfigToml() (*toml.Tree, error) {
// nvidia-ctk // nvidia-ctk
tree.Set("nvidia-ctk.path", nvidiaCTKExecutable) tree.Set("nvidia-ctk.path", nvidiaCTKExecutable)
// nvidia-container-runtime-hook
tree.Set("nvidia-container-runtime-hook.path", nvidiaContainerRuntimeHookExecutable)
return tree, nil return tree, nil
} }
@ -244,27 +250,48 @@ func getDistIDLike() []string {
// If the path is specified as an absolute path, it is used directly // If the path is specified as an absolute path, it is used directly
// without checking for existence of an executable at that path. // without checking for existence of an executable at that path.
func ResolveNVIDIACTKPath(logger *logrus.Logger, nvidiaCTKPath string) string { func ResolveNVIDIACTKPath(logger *logrus.Logger, nvidiaCTKPath string) string {
if filepath.IsAbs(nvidiaCTKPath) { return resolveWithDefault(
logger.Debugf("Using specified NVIDIA Container Toolkit CLI path %v", nvidiaCTKPath) logger,
return nvidiaCTKPath "NVIDIA Container Toolkit CLI",
} nvidiaCTKPath,
nvidiaCTKDefaultFilePath,
if nvidiaCTKPath == "" { )
nvidiaCTKPath = nvidiaCTKExecutable }
}
logger.Debugf("Locating NVIDIA Container Toolkit CLI as %v", nvidiaCTKPath) // ResolveNVIDIAContainerRuntimeHookPath resolves the path the nvidia-container-runtime-hook binary.
lookup := lookup.NewExecutableLocator(logger, "") func ResolveNVIDIAContainerRuntimeHookPath(logger *logrus.Logger, nvidiaContainerRuntimeHookPath string) string {
hookPath := nvidiaCTKDefaultFilePath return resolveWithDefault(
targets, err := lookup.Locate(nvidiaCTKPath) logger,
if err != nil { "NVIDIA Container Runtime Hook",
logger.Warnf("Failed to locate %v: %v", nvidiaCTKPath, err) nvidiaContainerRuntimeHookPath,
} else if len(targets) == 0 { nvidiaContainerRuntimeHookDefaultPath,
logger.Warnf("%v not found", nvidiaCTKPath) )
} else { }
logger.Debugf("Found %v candidates: %v", nvidiaCTKPath, targets)
hookPath = targets[0] // resolveWithDefault resolves the path to the specified binary.
} // If an absolute path is specified, it is used directly without searching for the binary.
logger.Debugf("Using NVIDIA Container Toolkit CLI path %v", hookPath) // If the binary cannot be found in the path, the specified default is used instead.
func resolveWithDefault(logger *logrus.Logger, label string, path string, defaultPath string) string {
return hookPath if filepath.IsAbs(path) {
logger.Debugf("Using specified %v path %v", label, path)
return path
}
if path == "" {
path = filepath.Base(defaultPath)
}
logger.Debugf("Locating %v as %v", label, path)
lookup := lookup.NewExecutableLocator(logger, "")
resolvedPath := defaultPath
targets, err := lookup.Locate(path)
if err != nil {
logger.Warnf("Failed to locate %v: %v", path, err)
} else {
logger.Debugf("Found %v candidates: %v", path, targets)
resolvedPath = targets[0]
}
logger.Debugf("Using %v path %v", label, path)
return resolvedPath
} }

View File

@ -76,6 +76,9 @@ func TestGetConfig(t *testing.T) {
}, },
}, },
}, },
NVIDIAContainerRuntimeHookConfig: RuntimeHookConfig{
Path: "nvidia-container-runtime-hook",
},
NVIDIACTKConfig: CTKConfig{ NVIDIACTKConfig: CTKConfig{
Path: "nvidia-ctk", Path: "nvidia-ctk",
}, },
@ -95,6 +98,7 @@ func TestGetConfig(t *testing.T) {
"nvidia-container-runtime.modes.cdi.default-kind = \"example.vendor.com/device\"", "nvidia-container-runtime.modes.cdi.default-kind = \"example.vendor.com/device\"",
"nvidia-container-runtime.modes.cdi.annotation-prefixes = [\"cdi.k8s.io/\", \"example.vendor.com/\",]", "nvidia-container-runtime.modes.cdi.annotation-prefixes = [\"cdi.k8s.io/\", \"example.vendor.com/\",]",
"nvidia-container-runtime.modes.csv.mount-spec-path = \"/not/etc/nvidia-container-runtime/host-files-for-container.d\"", "nvidia-container-runtime.modes.csv.mount-spec-path = \"/not/etc/nvidia-container-runtime/host-files-for-container.d\"",
"nvidia-container-runtime-hook.path = \"/foo/bar/nvidia-container-runtime-hook\"",
"nvidia-ctk.path = \"/foo/bar/nvidia-ctk\"", "nvidia-ctk.path = \"/foo/bar/nvidia-ctk\"",
}, },
expectedConfig: &Config{ expectedConfig: &Config{
@ -120,6 +124,9 @@ func TestGetConfig(t *testing.T) {
}, },
}, },
}, },
NVIDIAContainerRuntimeHookConfig: RuntimeHookConfig{
Path: "/foo/bar/nvidia-container-runtime-hook",
},
NVIDIACTKConfig: CTKConfig{ NVIDIACTKConfig: CTKConfig{
Path: "/foo/bar/nvidia-ctk", Path: "/foo/bar/nvidia-ctk",
}, },
@ -143,6 +150,8 @@ func TestGetConfig(t *testing.T) {
"annotation-prefixes = [\"cdi.k8s.io/\", \"example.vendor.com/\",]", "annotation-prefixes = [\"cdi.k8s.io/\", \"example.vendor.com/\",]",
"[nvidia-container-runtime.modes.csv]", "[nvidia-container-runtime.modes.csv]",
"mount-spec-path = \"/not/etc/nvidia-container-runtime/host-files-for-container.d\"", "mount-spec-path = \"/not/etc/nvidia-container-runtime/host-files-for-container.d\"",
"[nvidia-container-runtime-hook]",
"path = \"/foo/bar/nvidia-container-runtime-hook\"",
"[nvidia-ctk]", "[nvidia-ctk]",
"path = \"/foo/bar/nvidia-ctk\"", "path = \"/foo/bar/nvidia-ctk\"",
}, },
@ -169,6 +178,9 @@ func TestGetConfig(t *testing.T) {
}, },
}, },
}, },
NVIDIAContainerRuntimeHookConfig: RuntimeHookConfig{
Path: "/foo/bar/nvidia-container-runtime-hook",
},
NVIDIACTKConfig: CTKConfig{ NVIDIACTKConfig: CTKConfig{
Path: "/foo/bar/nvidia-ctk", Path: "/foo/bar/nvidia-ctk",
}, },

View File

@ -18,6 +18,9 @@ package config
// RuntimeHookConfig stores the config options for the NVIDIA Container Runtime // RuntimeHookConfig stores the config options for the NVIDIA Container Runtime
type RuntimeHookConfig struct { type RuntimeHookConfig struct {
// Path specifies the path to the NVIDIA Container Runtime hook binary.
// If an executable name is specified, this will be resolved in the path.
Path string `toml:"path"`
// SkipModeDetection disables the mode check for the runtime hook. // SkipModeDetection disables the mode check for the runtime hook.
SkipModeDetection bool `toml:"skip-mode-detection"` SkipModeDetection bool `toml:"skip-mode-detection"`
} }

View File

@ -17,10 +17,8 @@
package modifier package modifier
import ( import (
"fmt" "path/filepath"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci" "github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
"github.com/opencontainers/runtime-spec/specs-go" "github.com/opencontainers/runtime-spec/specs-go"
"github.com/sirupsen/logrus" "github.com/sirupsen/logrus"
@ -28,8 +26,11 @@ import (
// NewStableRuntimeModifier creates an OCI spec modifier that inserts the NVIDIA Container Runtime Hook into an OCI // NewStableRuntimeModifier creates an OCI spec modifier that inserts the NVIDIA Container Runtime Hook into an OCI
// spec. The specified logger is used to capture log output. // spec. The specified logger is used to capture log output.
func NewStableRuntimeModifier(logger *logrus.Logger) oci.SpecModifier { func NewStableRuntimeModifier(logger *logrus.Logger, nvidiaContainerRuntimeHookPath string) oci.SpecModifier {
m := stableRuntimeModifier{logger: logger} m := stableRuntimeModifier{
logger: logger,
nvidiaContainerRuntimeHookPath: nvidiaContainerRuntimeHookPath,
}
return &m return &m
} }
@ -37,7 +38,8 @@ func NewStableRuntimeModifier(logger *logrus.Logger) oci.SpecModifier {
// stableRuntimeModifier modifies an OCI spec inplace, inserting the nvidia-container-runtime-hook as a // stableRuntimeModifier modifies an OCI spec inplace, inserting the nvidia-container-runtime-hook as a
// prestart hook. If the hook is already present, no modification is made. // prestart hook. If the hook is already present, no modification is made.
type stableRuntimeModifier struct { type stableRuntimeModifier struct {
logger *logrus.Logger logger *logrus.Logger
nvidiaContainerRuntimeHookPath string
} }
// Modify applies the required modification to the incoming OCI spec, inserting the nvidia-container-runtime-hook // Modify applies the required modification to the incoming OCI spec, inserting the nvidia-container-runtime-hook
@ -53,18 +55,9 @@ func (m stableRuntimeModifier) Modify(spec *specs.Spec) error {
} }
} }
// We create a locator and look for the NVIDIA Container Runtime Hook in the path. path := m.nvidiaContainerRuntimeHookPath
candidates, err := lookup.NewExecutableLocator(m.logger, "").Locate(config.NVIDIAContainerRuntimeHookExecutable)
if err != nil {
return fmt.Errorf("failed to locate NVIDIA Container Runtime Hook: %v", err)
}
path := candidates[0]
if len(candidates) > 1 {
m.logger.Debugf("Using %v from multiple NVIDIA Container Runtime Hook candidates: %v", path, candidates)
}
m.logger.Infof("Using prestart hook path: %v", path) m.logger.Infof("Using prestart hook path: %v", path)
args := []string{path} args := []string{filepath.Base(path)}
if spec.Hooks == nil { if spec.Hooks == nil {
spec.Hooks = &specs.Hooks{} spec.Hooks = &specs.Hooks{}
} }

View File

@ -79,7 +79,7 @@ func TestAddHookModifier(t *testing.T) {
Prestart: []specs.Hook{ Prestart: []specs.Hook{
{ {
Path: testHookPath, Path: testHookPath,
Args: []string{testHookPath, "prestart"}, Args: []string{"nvidia-container-runtime-hook", "prestart"},
}, },
}, },
}, },
@ -95,7 +95,7 @@ func TestAddHookModifier(t *testing.T) {
Prestart: []specs.Hook{ Prestart: []specs.Hook{
{ {
Path: testHookPath, Path: testHookPath,
Args: []string{testHookPath, "prestart"}, Args: []string{"nvidia-container-runtime-hook", "prestart"},
}, },
}, },
}, },
@ -141,7 +141,7 @@ func TestAddHookModifier(t *testing.T) {
}, },
{ {
Path: testHookPath, Path: testHookPath,
Args: []string{testHookPath, "prestart"}, Args: []string{"nvidia-container-runtime-hook", "prestart"},
}, },
}, },
}, },
@ -154,7 +154,7 @@ func TestAddHookModifier(t *testing.T) {
t.Run(tc.description, func(t *testing.T) { t.Run(tc.description, func(t *testing.T) {
m := NewStableRuntimeModifier(logger) m := NewStableRuntimeModifier(logger, testHookPath)
err := m.Modify(&tc.spec) err := m.Modify(&tc.spec)
if tc.expectedError != nil { if tc.expectedError != nil {

View File

@ -65,6 +65,7 @@ func (r rt) Run(argv []string) (rerr error) {
cfg.NVIDIAContainerRuntimeConfig.Mode = r.modeOverride cfg.NVIDIAContainerRuntimeConfig.Mode = r.modeOverride
} }
cfg.NVIDIACTKConfig.Path = config.ResolveNVIDIACTKPath(r.logger.Logger, cfg.NVIDIACTKConfig.Path) cfg.NVIDIACTKConfig.Path = config.ResolveNVIDIACTKPath(r.logger.Logger, cfg.NVIDIACTKConfig.Path)
cfg.NVIDIAContainerRuntimeHookConfig.Path = config.ResolveNVIDIAContainerRuntimeHookPath(r.logger.Logger, cfg.NVIDIAContainerRuntimeHookConfig.Path)
// Print the config to the output. // Print the config to the output.
configJSON, err := json.MarshalIndent(cfg, "", " ") configJSON, err := json.MarshalIndent(cfg, "", " ")

View File

@ -93,7 +93,7 @@ func newSpecModifier(logger *logrus.Logger, cfg *config.Config, ociSpec oci.Spec
func newModeModifier(logger *logrus.Logger, cfg *config.Config, ociSpec oci.Spec, argv []string) (oci.SpecModifier, error) { func newModeModifier(logger *logrus.Logger, cfg *config.Config, ociSpec oci.Spec, argv []string) (oci.SpecModifier, error) {
switch info.ResolveAutoMode(logger, cfg.NVIDIAContainerRuntimeConfig.Mode) { switch info.ResolveAutoMode(logger, cfg.NVIDIAContainerRuntimeConfig.Mode) {
case "legacy": case "legacy":
return modifier.NewStableRuntimeModifier(logger), nil return modifier.NewStableRuntimeModifier(logger, cfg.NVIDIAContainerRuntimeHookConfig.Path), nil
case "csv": case "csv":
return modifier.NewCSVModifier(logger, cfg, ociSpec) return modifier.NewCSVModifier(logger, cfg, ociSpec)
case "cdi": case "cdi":