From 00f1d5a62755ce3ddcf992521f3ad28ea0363b07 Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Wed, 11 Sep 2024 16:47:16 +0200 Subject: [PATCH] Only allow host-relative LDConfig paths This change only allows host-relative LDConfig paths. An allow-ldconfig-from-container feature flag is added to allow for this the default behaviour to be changed. Signed-off-by: Evan Lezar --- internal/config/cli.go | 60 +++++++-- internal/config/cli_test.go | 6 +- internal/config/config.go | 19 ++- internal/config/config_test.go | 163 +++++++++++++++++++----- internal/config/features.go | 7 +- internal/config/toml.go | 13 ++ internal/config/toml_test.go | 31 ++++- tools/container/toolkit/toolkit_test.go | 2 +- 8 files changed, 247 insertions(+), 54 deletions(-) diff --git a/internal/config/cli.go b/internal/config/cli.go index 4a1e199d..3621df25 100644 --- a/internal/config/cli.go +++ b/internal/config/cli.go @@ -17,6 +17,7 @@ package config import ( + "fmt" "os" "strings" ) @@ -34,29 +35,62 @@ type ContainerCLIConfig struct { NoPivot bool `toml:"no-pivot,omitempty"` NoCgroups bool `toml:"no-cgroups"` User string `toml:"user"` - Ldconfig string `toml:"ldconfig"` + // Ldconfig represents the path to the ldconfig binary to be used to update + // the ldcache in a container as it is being created. + // If this path starts with a '@' the path is relative to the host and if + // not it is treated as a container path. + // + // Note that the use of container paths are disabled by default and if this + // is required, the features.allow-ldconfig-from-container feature gate must + // be enabled explicitly. + Ldconfig ldconfigPath `toml:"ldconfig"` } // NormalizeLDConfigPath returns the resolved path of the configured LDConfig binary. // This is only done for host LDConfigs and is required to handle systems where // /sbin/ldconfig is a wrapper around /sbin/ldconfig.real. func (c *ContainerCLIConfig) NormalizeLDConfigPath() string { - return NormalizeLDConfigPath(c.Ldconfig) + return string(c.Ldconfig.normalize()) +} + +// An ldconfigPath is used to represent the path to ldconfig. +type ldconfigPath string + +func (p ldconfigPath) assertValid(allowContainerRelativePath bool) error { + if p.isHostRelative() { + return nil + } + if allowContainerRelativePath { + return nil + } + return fmt.Errorf("nvidia-container-cli.ldconfig value %q is not host-relative (does not start with a '@')", p) +} + +func (p ldconfigPath) isHostRelative() bool { + return strings.HasPrefix(string(p), "@") +} + +// normalize returns the resolved path of the configured LDConfig binary. +// This is only done for host LDConfigs and is required to handle systems where +// /sbin/ldconfig is a wrapper around /sbin/ldconfig.real. +func (p ldconfigPath) normalize() ldconfigPath { + if !p.isHostRelative() { + return p + } + + path := string(p) + trimmedPath := strings.TrimSuffix(strings.TrimPrefix(path, "@"), ".real") + // If the .real path exists, we return that. + if _, err := os.Stat(trimmedPath + ".real"); err == nil { + return ldconfigPath("@" + trimmedPath + ".real") + } + // If the .real path does not exists (or cannot be read) we return the non-.real path. + return ldconfigPath("@" + trimmedPath) } // NormalizeLDConfigPath returns the resolved path of the configured LDConfig binary. // This is only done for host LDConfigs and is required to handle systems where // /sbin/ldconfig is a wrapper around /sbin/ldconfig.real. func NormalizeLDConfigPath(path string) string { - if !strings.HasPrefix(path, "@") { - return path - } - - trimmedPath := strings.TrimSuffix(strings.TrimPrefix(path, "@"), ".real") - // If the .real path exists, we return that. - if _, err := os.Stat(trimmedPath + ".real"); err == nil { - return "@" + trimmedPath + ".real" - } - // If the .real path does not exists (or cannot be read) we return the non-.real path. - return "@" + trimmedPath + return string(ldconfigPath(path).normalize()) } diff --git a/internal/config/cli_test.go b/internal/config/cli_test.go index 100bea95..0d7bf27b 100644 --- a/internal/config/cli_test.go +++ b/internal/config/cli_test.go @@ -33,7 +33,7 @@ func TestNormalizeLDConfigPath(t *testing.T) { testCases := []struct { description string - ldconfig string + ldconfig ldconfigPath expected string }{ { @@ -51,12 +51,12 @@ func TestNormalizeLDConfigPath(t *testing.T) { }, { description: "host .real file exists is returned", - ldconfig: "@" + filepath.Join(testDir, "exists.real"), + ldconfig: ldconfigPath("@" + filepath.Join(testDir, "exists.real")), expected: "@" + filepath.Join(testDir, "exists.real"), }, { description: "host resolves .real file", - ldconfig: "@" + filepath.Join(testDir, "exists"), + ldconfig: ldconfigPath("@" + filepath.Join(testDir, "exists")), expected: "@" + filepath.Join(testDir, "exists.real"), }, { diff --git a/internal/config/config.go b/internal/config/config.go index 33b8ba4d..652cc83a 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -18,6 +18,7 @@ package config import ( "bufio" + "errors" "os" "path/filepath" "strings" @@ -51,6 +52,8 @@ var ( NVIDIAContainerToolkitExecutable = "nvidia-container-toolkit" ) +var errInvalidConfig = errors.New("invalid config value") + // Config represents the contents of the config.toml file for the NVIDIA Container Toolkit // Note: This is currently duplicated by the HookConfig in cmd/nvidia-container-toolkit/hook_config.go type Config struct { @@ -127,8 +130,20 @@ func GetDefault() (*Config, error) { return &d, nil } -func getLdConfigPath() string { - return NormalizeLDConfigPath("@/sbin/ldconfig") +// assertValid checks for a valid config. +func (c *Config) assertValid() error { + err := c.NVIDIAContainerCLIConfig.Ldconfig.assertValid(c.Features.AllowLDConfigFromContainer.IsEnabled()) + if err != nil { + return errors.Join(err, errInvalidConfig) + } + return nil +} + +// getLdConfigPath allows us to override this function for testing. +var getLdConfigPath = getLdConfigPathStub + +func getLdConfigPathStub() ldconfigPath { + return ldconfigPath("@/sbin/ldconfig").normalize() } func getUserGroup() string { diff --git a/internal/config/config_test.go b/internal/config/config_test.go index 0873ebd2..963058e1 100644 --- a/internal/config/config_test.go +++ b/internal/config/config_test.go @@ -44,23 +44,21 @@ func TestGetConfigWithCustomConfig(t *testing.T) { func TestGetConfig(t *testing.T) { testCases := []struct { - description string - contents []string - expectedError error - inspectLdconfig bool - distIdsLike []string - expectedConfig *Config + description string + contents []string + expectedError error + distIdsLike []string + expectedConfig *Config }{ { - description: "empty config is default", - inspectLdconfig: true, + description: "empty config is default", expectedConfig: &Config{ AcceptEnvvarUnprivileged: true, SupportedDriverCapabilities: "compat32,compute,display,graphics,ngx,utility,video", NVIDIAContainerCLIConfig: ContainerCLIConfig{ Root: "", LoadKmods: true, - Ldconfig: "WAS_CHECKED", + Ldconfig: "@/test/ld/config/path", }, NVIDIAContainerRuntimeConfig: RuntimeConfig{ DebugFilePath: "/dev/null", @@ -93,7 +91,7 @@ func TestGetConfig(t *testing.T) { "supported-driver-capabilities = \"compute,utility\"", "nvidia-container-cli.root = \"/bar/baz\"", "nvidia-container-cli.load-kmods = false", - "nvidia-container-cli.ldconfig = \"/foo/bar/ldconfig\"", + "nvidia-container-cli.ldconfig = \"@/foo/bar/ldconfig\"", "nvidia-container-cli.user = \"foo:bar\"", "nvidia-container-runtime.debug = \"/foo/bar\"", "nvidia-container-runtime.discover-mode = \"not-legacy\"", @@ -113,7 +111,7 @@ func TestGetConfig(t *testing.T) { NVIDIAContainerCLIConfig: ContainerCLIConfig{ Root: "/bar/baz", LoadKmods: false, - Ldconfig: "/foo/bar/ldconfig", + Ldconfig: "@/foo/bar/ldconfig", User: "foo:bar", }, NVIDIAContainerRuntimeConfig: RuntimeConfig{ @@ -146,6 +144,53 @@ func TestGetConfig(t *testing.T) { }, }, }, + { + description: "feature allows ldconfig to be overridden", + contents: []string{ + "[nvidia-container-cli]", + "ldconfig = \"/foo/bar/ldconfig\"", + "[features]", + "allow-ldconfig-from-container = true", + }, + expectedConfig: &Config{ + AcceptEnvvarUnprivileged: true, + SupportedDriverCapabilities: "compat32,compute,display,graphics,ngx,utility,video", + NVIDIAContainerCLIConfig: ContainerCLIConfig{ + Ldconfig: "/foo/bar/ldconfig", + LoadKmods: true, + }, + NVIDIAContainerRuntimeConfig: RuntimeConfig{ + DebugFilePath: "/dev/null", + LogLevel: "info", + Runtimes: []string{"docker-runc", "runc", "crun"}, + Mode: "auto", + Modes: modesConfig{ + CSV: csvModeConfig{ + MountSpecPath: "/etc/nvidia-container-runtime/host-files-for-container.d", + }, + CDI: cdiModeConfig{ + DefaultKind: "nvidia.com/gpu", + AnnotationPrefixes: []string{ + "cdi.k8s.io/", + }, + SpecDirs: []string{ + "/etc/cdi", + "/var/run/cdi", + }, + }, + }, + }, + NVIDIAContainerRuntimeHookConfig: RuntimeHookConfig{ + Path: "nvidia-container-runtime-hook", + }, + NVIDIACTKConfig: CTKConfig{ + Path: "nvidia-ctk", + }, + Features: features{ + AllowLDConfigFromContainer: ptr(feature(true)), + }, + }, + }, { description: "config options set in section", contents: []string{ @@ -154,7 +199,7 @@ func TestGetConfig(t *testing.T) { "[nvidia-container-cli]", "root = \"/bar/baz\"", "load-kmods = false", - "ldconfig = \"/foo/bar/ldconfig\"", + "ldconfig = \"@/foo/bar/ldconfig\"", "user = \"foo:bar\"", "[nvidia-container-runtime]", "debug = \"/foo/bar\"", @@ -179,7 +224,7 @@ func TestGetConfig(t *testing.T) { NVIDIAContainerCLIConfig: ContainerCLIConfig{ Root: "/bar/baz", LoadKmods: false, - Ldconfig: "/foo/bar/ldconfig", + Ldconfig: "@/foo/bar/ldconfig", User: "foo:bar", }, NVIDIAContainerRuntimeConfig: RuntimeConfig{ @@ -213,16 +258,15 @@ func TestGetConfig(t *testing.T) { }, }, { - description: "suse config", - distIdsLike: []string{"suse", "opensuse"}, - inspectLdconfig: true, + description: "suse config", + distIdsLike: []string{"suse", "opensuse"}, expectedConfig: &Config{ AcceptEnvvarUnprivileged: true, SupportedDriverCapabilities: "compat32,compute,display,graphics,ngx,utility,video", NVIDIAContainerCLIConfig: ContainerCLIConfig{ Root: "", LoadKmods: true, - Ldconfig: "WAS_CHECKED", + Ldconfig: "@/test/ld/config/path", User: "root:video", }, NVIDIAContainerRuntimeConfig: RuntimeConfig{ @@ -250,9 +294,8 @@ func TestGetConfig(t *testing.T) { }, }, { - description: "suse config overrides user", - distIdsLike: []string{"suse", "opensuse"}, - inspectLdconfig: true, + description: "suse config overrides user", + distIdsLike: []string{"suse", "opensuse"}, contents: []string{ "nvidia-container-cli.user = \"foo:bar\"", }, @@ -262,7 +305,7 @@ func TestGetConfig(t *testing.T) { NVIDIAContainerCLIConfig: ContainerCLIConfig{ Root: "", LoadKmods: true, - Ldconfig: "WAS_CHECKED", + Ldconfig: "@/test/ld/config/path", User: "foo:bar", }, NVIDIAContainerRuntimeConfig: RuntimeConfig{ @@ -293,6 +336,7 @@ func TestGetConfig(t *testing.T) { for _, tc := range testCases { t.Run(tc.description, func(t *testing.T) { + defer setGetLdConfigPathForTest()() defer setGetDistIDLikeForTest(tc.distIdsLike)() reader := strings.NewReader(strings.Join(tc.contents, "\n")) @@ -305,21 +349,63 @@ func TestGetConfig(t *testing.T) { cfg, err := tomlCfg.Config() require.NoError(t, err) - // We first handle the ldconfig path since this is currently system-dependent. - if tc.inspectLdconfig { - ldconfig := cfg.NVIDIAContainerCLIConfig.Ldconfig - require.True(t, strings.HasPrefix(ldconfig, "@/sbin/ldconfig")) - remaining := strings.TrimPrefix(ldconfig, "@/sbin/ldconfig") - require.True(t, remaining == ".real" || remaining == "") - - cfg.NVIDIAContainerCLIConfig.Ldconfig = "WAS_CHECKED" - } - require.EqualValues(t, tc.expectedConfig, cfg) }) } } +func TestAssertValid(t *testing.T) { + defer setGetLdConfigPathForTest()() + + testCases := []struct { + description string + config *Config + expectedError error + }{ + { + description: "default is valid", + config: func() *Config { + config, _ := GetDefault() + return config + }(), + }, + { + description: "alternative host ldconfig path is valid", + config: &Config{ + NVIDIAContainerCLIConfig: ContainerCLIConfig{ + Ldconfig: "@/some/host/path", + }, + }, + }, + { + description: "non-host path is invalid", + config: &Config{ + NVIDIAContainerCLIConfig: ContainerCLIConfig{ + Ldconfig: "/non/host/path", + }, + }, + expectedError: errInvalidConfig, + }, + { + description: "feature flag allows non-host path", + config: &Config{ + NVIDIAContainerCLIConfig: ContainerCLIConfig{ + Ldconfig: "/non/host/path", + }, + Features: features{ + AllowLDConfigFromContainer: ptr(feature(true)), + }, + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.description, func(t *testing.T) { + require.ErrorIs(t, tc.config.assertValid(), tc.expectedError) + }) + } +} + // setGetDistIDsLikeForTest overrides the distribution IDs that would normally be read from the /etc/os-release file. func setGetDistIDLikeForTest(ids []string) func() { if ids == nil { @@ -335,3 +421,18 @@ func setGetDistIDLikeForTest(ids []string) func() { getDistIDLike = original } } + +// prt returns a reference to whatever type is passed into it +func ptr[T any](x T) *T { + return &x +} + +func setGetLdConfigPathForTest() func() { + previous := getLdConfigPath + getLdConfigPath = func() ldconfigPath { + return "@/test/ld/config/path" + } + return func() { + getLdConfigPath = previous + } +} diff --git a/internal/config/features.go b/internal/config/features.go index 80d3c95a..b418daef 100644 --- a/internal/config/features.go +++ b/internal/config/features.go @@ -21,14 +21,15 @@ type features struct { // DisableImexChannelCreation ensures that the implicit creation of // requested IMEX channels is skipped when invoking the nvidia-container-cli. DisableImexChannelCreation *feature `toml:"disable-imex-channel-creation,omitempty"` + // AllowLDConfigFromContainer allows non-host ldconfig paths to be used. + // If this feature flag is not set to 'true' only host-rooted config paths + // (i.e. paths starting with an '@' are considered valid) + AllowLDConfigFromContainer *feature `toml:"allow-ldconfig-from-container,omitempty"` } -//nolint:unused type feature bool // IsEnabled checks whether a feature is explicitly enabled. -// -//nolint:unused func (f *feature) IsEnabled() bool { if f != nil { return bool(*f) diff --git a/internal/config/toml.go b/internal/config/toml.go index 4c39e6b4..fbda458f 100644 --- a/internal/config/toml.go +++ b/internal/config/toml.go @@ -108,6 +108,19 @@ func loadConfigTomlFrom(reader io.Reader) (*Toml, error) { // Config returns the typed config associated with the toml tree. func (t *Toml) Config() (*Config, error) { + cfg, err := t.configNoOverrides() + if err != nil { + return nil, err + } + if err := cfg.assertValid(); err != nil { + return nil, err + } + return cfg, nil +} + +// configNoOverrides returns the typed config associated with the toml tree. +// This config does not include feature-specific overrides. +func (t *Toml) configNoOverrides() (*Config, error) { cfg, err := GetDefault() if err != nil { return nil, err diff --git a/internal/config/toml_test.go b/internal/config/toml_test.go index e017db15..f7c649f7 100644 --- a/internal/config/toml_test.go +++ b/internal/config/toml_test.go @@ -198,9 +198,12 @@ func TestTomlContents(t *testing.T) { } func TestConfigFromToml(t *testing.T) { + defer setGetLdConfigPathForTest()() + testCases := []struct { description string contents map[string]interface{} + expectedError error expectedConfig *Config }{ { @@ -226,13 +229,39 @@ func TestConfigFromToml(t *testing.T) { return c }(), }, + { + description: "invalid ldconfig value raises error", + contents: map[string]interface{}{ + "nvidia-container-cli": map[string]interface{}{ + "ldconfig": "/some/ldconfig/path", + }, + }, + expectedError: errInvalidConfig, + }, + { + description: "feature allows ldconfig override", + contents: map[string]interface{}{ + "nvidia-container-cli": map[string]interface{}{ + "ldconfig": "/some/ldconfig/path", + }, + "features": map[string]interface{}{ + "allow-ldconfig-from-container": true, + }, + }, + expectedConfig: func() *Config { + c, _ := GetDefault() + c.NVIDIAContainerCLIConfig.Ldconfig = "/some/ldconfig/path" + c.Features.AllowLDConfigFromContainer = ptr(feature(true)) + return c + }(), + }, } for _, tc := range testCases { t.Run(tc.description, func(t *testing.T) { tomlCfg := fromMap(tc.contents) config, err := tomlCfg.Config() - require.NoError(t, err) + require.ErrorIs(t, err, tc.expectedError) require.EqualValues(t, tc.expectedConfig, config) }) } diff --git a/tools/container/toolkit/toolkit_test.go b/tools/container/toolkit/toolkit_test.go index bab94c4a..358d9166 100644 --- a/tools/container/toolkit/toolkit_test.go +++ b/tools/container/toolkit/toolkit_test.go @@ -161,7 +161,7 @@ kind: example.com/class // Ensure that the config file has the required contents. // TODO: Add checks for additional config options. require.Equal(t, "/host/driver/root", cfg.NVIDIAContainerCLIConfig.Root) - require.Equal(t, "@/host/driver/root/sbin/ldconfig", cfg.NVIDIAContainerCLIConfig.Ldconfig) + require.Equal(t, "@/host/driver/root/sbin/ldconfig", string(cfg.NVIDIAContainerCLIConfig.Ldconfig)) require.EqualValues(t, filepath.Join(toolkitRoot, "nvidia-container-cli"), cfg.NVIDIAContainerCLIConfig.Path) require.EqualValues(t, filepath.Join(toolkitRoot, "nvidia-ctk"), cfg.NVIDIACTKConfig.Path)