From 838493b8b93ea21d3b77c1317074d2550c6a06d8 Mon Sep 17 00:00:00 2001 From: Jared Baur Date: Thu, 14 Dec 2023 16:46:00 -0800 Subject: [PATCH 1/2] Allow for customizing the path to ldconfig Since the `createContainer` `runc` hook runs with the environment that the container's config.json specifies, the path to `ldconfig` may not be easily resolvable if the host environment differs enough from the container (e.g. on a NixOS host where all binaries are under hashed paths in /nix/store with an Ubuntu container whose PATH contains FHS-style paths such as /bin and /usr/bin). This change allows for specifying exactly where ldconfig comes from. Signed-off-by: Jared Baur --- CHANGELOG.md | 1 + cmd/nvidia-ctk/cdi/generate/generate.go | 7 +++++++ .../hook/update-ldcache/update-ldcache.go | 9 ++++++++- internal/discover/ldconfig.go | 14 ++++++++++---- internal/discover/ldconfig_test.go | 11 ++++++++--- internal/platform-support/tegra/tegra.go | 10 +++++++++- pkg/nvcdi/common-nvml.go | 2 +- pkg/nvcdi/driver-nvml.go | 12 ++++++------ pkg/nvcdi/driver-wsl.go | 8 ++++---- pkg/nvcdi/lib-csv.go | 1 + pkg/nvcdi/lib-wsl.go | 2 +- pkg/nvcdi/lib.go | 1 + pkg/nvcdi/management.go | 2 +- pkg/nvcdi/options.go | 7 +++++++ 14 files changed, 65 insertions(+), 22 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0e758ce1..eb819582 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ * Use devRoot to resolve MIG device nodes. * Fix bug in determining default nvidia-container-runtime.user config value on SUSE-based systems. * Add `crun` to the list of configured low-level runtimes. +* Added support for `--ldconfig-path` to `nvidia-ctk cdi generate` command. * [toolkit-container] Bump CUDA base image version to 12.3.1. diff --git a/cmd/nvidia-ctk/cdi/generate/generate.go b/cmd/nvidia-ctk/cdi/generate/generate.go index 4a7ae18d..4eed5d5d 100644 --- a/cmd/nvidia-ctk/cdi/generate/generate.go +++ b/cmd/nvidia-ctk/cdi/generate/generate.go @@ -48,6 +48,7 @@ type options struct { driverRoot string devRoot string nvidiaCTKPath string + ldconfigPath string mode string vendor string class string @@ -129,6 +130,11 @@ func (m command) build() *cli.Command { Usage: "Specify the path to use for the nvidia-ctk in the generated CDI specification. If this is left empty, the path will be searched.", Destination: &opts.nvidiaCTKPath, }, + &cli.StringFlag{ + Name: "ldconfig-path", + Usage: "Specify the path to use for ldconfig in the generated CDI specification", + Destination: &opts.ldconfigPath, + }, &cli.StringFlag{ Name: "vendor", Aliases: []string{"cdi-vendor"}, @@ -245,6 +251,7 @@ func (m command) generateSpec(opts *options) (spec.Interface, error) { nvcdi.WithDriverRoot(opts.driverRoot), nvcdi.WithDevRoot(opts.devRoot), nvcdi.WithNVIDIACTKPath(opts.nvidiaCTKPath), + nvcdi.WithLdconfigPath(opts.ldconfigPath), nvcdi.WithDeviceNamer(deviceNamer), nvcdi.WithMode(opts.mode), nvcdi.WithLibrarySearchPaths(opts.librarySearchPaths.Value()), diff --git a/cmd/nvidia-ctk/hook/update-ldcache/update-ldcache.go b/cmd/nvidia-ctk/hook/update-ldcache/update-ldcache.go index 10fd8398..b65a01c5 100644 --- a/cmd/nvidia-ctk/hook/update-ldcache/update-ldcache.go +++ b/cmd/nvidia-ctk/hook/update-ldcache/update-ldcache.go @@ -36,6 +36,7 @@ type command struct { type options struct { folders cli.StringSlice + ldconfigPath string containerSpec string } @@ -66,6 +67,12 @@ func (m command) build() *cli.Command { Usage: "Specify a folder to add to /etc/ld.so.conf before updating the ld cache", Destination: &cfg.folders, }, + &cli.StringFlag{ + Name: "ldconfig-path", + Usage: "Specify the path to the ldconfig program", + Destination: &cfg.ldconfigPath, + DefaultText: "/sbin/ldconfig", + }, &cli.StringFlag{ Name: "container-spec", Usage: "Specify the path to the OCI container spec. If empty or '-' the spec will be read from STDIN", @@ -87,7 +94,7 @@ func (m command) run(c *cli.Context, cfg *options) error { return fmt.Errorf("failed to determined container root: %v", err) } - ldconfigPath := m.resolveLDConfigPath("/sbin/ldconfig") + ldconfigPath := m.resolveLDConfigPath(cfg.ldconfigPath) args := []string{filepath.Base(ldconfigPath)} if containerRoot != "" { args = append(args, "-r", containerRoot) diff --git a/internal/discover/ldconfig.go b/internal/discover/ldconfig.go index 1a4c5955..a1182ed2 100644 --- a/internal/discover/ldconfig.go +++ b/internal/discover/ldconfig.go @@ -25,10 +25,11 @@ import ( ) // NewLDCacheUpdateHook creates a discoverer that updates the ldcache for the specified mounts. A logger can also be specified -func NewLDCacheUpdateHook(logger logger.Interface, mounts Discover, nvidiaCTKPath string) (Discover, error) { +func NewLDCacheUpdateHook(logger logger.Interface, mounts Discover, nvidiaCTKPath, ldconfigPath string) (Discover, error) { d := ldconfig{ logger: logger, nvidiaCTKPath: nvidiaCTKPath, + ldconfigPath: ldconfigPath, mountsFrom: mounts, } @@ -39,6 +40,7 @@ type ldconfig struct { None logger logger.Interface nvidiaCTKPath string + ldconfigPath string mountsFrom Discover } @@ -50,14 +52,20 @@ func (d ldconfig) Hooks() ([]Hook, error) { } h := CreateLDCacheUpdateHook( d.nvidiaCTKPath, + d.ldconfigPath, getLibraryPaths(mounts), ) return []Hook{h}, nil } // CreateLDCacheUpdateHook locates the NVIDIA Container Toolkit CLI and creates a hook for updating the LD Cache -func CreateLDCacheUpdateHook(executable string, libraries []string) Hook { +func CreateLDCacheUpdateHook(executable string, ldconfig string, libraries []string) Hook { var args []string + + if ldconfig != "" { + args = append(args, "--ldconfig-path", ldconfig) + } + for _, f := range uniqueFolders(libraries) { args = append(args, "--folder", f) } @@ -69,7 +77,6 @@ func CreateLDCacheUpdateHook(executable string, libraries []string) Hook { ) return hook - } // getLibraryPaths extracts the library dirs from the specified mounts @@ -86,7 +93,6 @@ func getLibraryPaths(mounts []Mount) []string { // isLibName checks if the specified filename is a library (i.e. ends in `.so*`) func isLibName(filename string) bool { - base := filepath.Base(filename) isLib, err := filepath.Match("lib?*.so*", base) diff --git a/internal/discover/ldconfig_test.go b/internal/discover/ldconfig_test.go index 8d72dde6..612c209e 100644 --- a/internal/discover/ldconfig_test.go +++ b/internal/discover/ldconfig_test.go @@ -26,6 +26,7 @@ import ( const ( testNvidiaCTKPath = "/foo/bar/nvidia-ctk" + testLdconfigPath = "/bar/baz/ldconfig" ) func TestLDCacheUpdateHook(t *testing.T) { @@ -33,6 +34,7 @@ func TestLDCacheUpdateHook(t *testing.T) { testCases := []struct { description string + ldconfigPath string mounts []Mount mountError error expectedError error @@ -75,6 +77,11 @@ func TestLDCacheUpdateHook(t *testing.T) { }, expectedArgs: []string{"nvidia-ctk", "hook", "update-ldcache", "--folder", "/usr/local/lib"}, }, + { + description: "explicit ldconfig path is passed", + ldconfigPath: testLdconfigPath, + expectedArgs: []string{"nvidia-ctk", "hook", "update-ldcache", "--ldconfig-path", testLdconfigPath}, + }, } for _, tc := range testCases { @@ -90,7 +97,7 @@ func TestLDCacheUpdateHook(t *testing.T) { Lifecycle: "createContainer", } - d, err := NewLDCacheUpdateHook(logger, mountMock, testNvidiaCTKPath) + d, err := NewLDCacheUpdateHook(logger, mountMock, testNvidiaCTKPath, tc.ldconfigPath) require.NoError(t, err) hooks, err := d.Hooks() @@ -114,10 +121,8 @@ func TestLDCacheUpdateHook(t *testing.T) { mounts, err := d.Mounts() require.NoError(t, err) require.Empty(t, mounts) - }) } - } func TestIsLibName(t *testing.T) { diff --git a/internal/platform-support/tegra/tegra.go b/internal/platform-support/tegra/tegra.go index 37a8af72..771b31f2 100644 --- a/internal/platform-support/tegra/tegra.go +++ b/internal/platform-support/tegra/tegra.go @@ -31,6 +31,7 @@ type tegraOptions struct { driverRoot string devRoot string nvidiaCTKPath string + ldconfigPath string librarySearchPaths []string ignorePatterns ignoreMountSpecPatterns @@ -79,7 +80,7 @@ func New(opts ...Option) (discover.Discover, error) { return nil, fmt.Errorf("failed to create CSV discoverer: %v", err) } - ldcacheUpdateHook, err := discover.NewLDCacheUpdateHook(o.logger, csvDiscoverer, o.nvidiaCTKPath) + ldcacheUpdateHook, err := discover.NewLDCacheUpdateHook(o.logger, csvDiscoverer, o.nvidiaCTKPath, o.ldconfigPath) if err != nil { return nil, fmt.Errorf("failed to create ldcach update hook discoverer: %v", err) } @@ -139,6 +140,13 @@ func WithNVIDIACTKPath(nvidiaCTKPath string) Option { } } +// WithLdconfigPath sets the path to the ldconfig program +func WithLdconfigPath(ldconfigPath string) Option { + return func(o *tegraOptions) { + o.ldconfigPath = ldconfigPath + } +} + // WithLibrarySearchPaths sets the library search paths for the discoverer. func WithLibrarySearchPaths(librarySearchPaths ...string) Option { return func(o *tegraOptions) { diff --git a/pkg/nvcdi/common-nvml.go b/pkg/nvcdi/common-nvml.go index 4c634a72..f4bfe30a 100644 --- a/pkg/nvcdi/common-nvml.go +++ b/pkg/nvcdi/common-nvml.go @@ -41,7 +41,7 @@ func (l *nvmllib) newCommonNVMLDiscoverer() (discover.Discover, error) { l.logger.Warningf("failed to create discoverer for graphics mounts: %v", err) } - driverFiles, err := NewDriverDiscoverer(l.logger, l.driver, l.nvidiaCTKPath, l.nvmllib) + driverFiles, err := NewDriverDiscoverer(l.logger, l.driver, l.nvidiaCTKPath, l.ldconfigPath, l.nvmllib) if err != nil { return nil, fmt.Errorf("failed to create discoverer for driver files: %v", err) } diff --git a/pkg/nvcdi/driver-nvml.go b/pkg/nvcdi/driver-nvml.go index 052e3241..10d154d6 100644 --- a/pkg/nvcdi/driver-nvml.go +++ b/pkg/nvcdi/driver-nvml.go @@ -34,7 +34,7 @@ import ( // NewDriverDiscoverer creates a discoverer for the libraries and binaries associated with a driver installation. // The supplied NVML Library is used to query the expected driver version. -func NewDriverDiscoverer(logger logger.Interface, driver *root.Driver, nvidiaCTKPath string, nvmllib nvml.Interface) (discover.Discover, error) { +func NewDriverDiscoverer(logger logger.Interface, driver *root.Driver, nvidiaCTKPath string, ldconfigPath string, nvmllib nvml.Interface) (discover.Discover, error) { if r := nvmllib.Init(); r != nvml.SUCCESS { return nil, fmt.Errorf("failed to initialize NVML: %v", r) } @@ -49,11 +49,11 @@ func NewDriverDiscoverer(logger logger.Interface, driver *root.Driver, nvidiaCTK return nil, fmt.Errorf("failed to determine driver version: %v", r) } - return newDriverVersionDiscoverer(logger, driver, nvidiaCTKPath, version) + return newDriverVersionDiscoverer(logger, driver, nvidiaCTKPath, ldconfigPath, version) } -func newDriverVersionDiscoverer(logger logger.Interface, driver *root.Driver, nvidiaCTKPath string, version string) (discover.Discover, error) { - libraries, err := NewDriverLibraryDiscoverer(logger, driver, nvidiaCTKPath, version) +func newDriverVersionDiscoverer(logger logger.Interface, driver *root.Driver, nvidiaCTKPath, ldconfigPath, version string) (discover.Discover, error) { + libraries, err := NewDriverLibraryDiscoverer(logger, driver, nvidiaCTKPath, ldconfigPath, version) if err != nil { return nil, fmt.Errorf("failed to create discoverer for driver libraries: %v", err) } @@ -81,7 +81,7 @@ func newDriverVersionDiscoverer(logger logger.Interface, driver *root.Driver, nv } // NewDriverLibraryDiscoverer creates a discoverer for the libraries associated with the specified driver version. -func NewDriverLibraryDiscoverer(logger logger.Interface, driver *root.Driver, nvidiaCTKPath string, version string) (discover.Discover, error) { +func NewDriverLibraryDiscoverer(logger logger.Interface, driver *root.Driver, nvidiaCTKPath, ldconfigPath, version string) (discover.Discover, error) { libraryPaths, err := getVersionLibs(logger, driver, version) if err != nil { return nil, fmt.Errorf("failed to get libraries for driver version: %v", err) @@ -97,7 +97,7 @@ func NewDriverLibraryDiscoverer(logger logger.Interface, driver *root.Driver, nv libraryPaths, ) - hooks, _ := discover.NewLDCacheUpdateHook(logger, libraries, nvidiaCTKPath) + hooks, _ := discover.NewLDCacheUpdateHook(logger, libraries, nvidiaCTKPath, ldconfigPath) d := discover.Merge( libraries, diff --git a/pkg/nvcdi/driver-wsl.go b/pkg/nvcdi/driver-wsl.go index 00c9968b..e87bcb03 100644 --- a/pkg/nvcdi/driver-wsl.go +++ b/pkg/nvcdi/driver-wsl.go @@ -39,7 +39,7 @@ var requiredDriverStoreFiles = []string{ } // newWSLDriverDiscoverer returns a Discoverer for WSL2 drivers. -func newWSLDriverDiscoverer(logger logger.Interface, driverRoot string, nvidiaCTKPath string) (discover.Discover, error) { +func newWSLDriverDiscoverer(logger logger.Interface, driverRoot string, nvidiaCTKPath, ldconfigPath string) (discover.Discover, error) { err := dxcore.Init() if err != nil { return nil, fmt.Errorf("failed to initialize dxcore: %v", err) @@ -56,11 +56,11 @@ func newWSLDriverDiscoverer(logger logger.Interface, driverRoot string, nvidiaCT } logger.Infof("Using WSL driver store paths: %v", driverStorePaths) - return newWSLDriverStoreDiscoverer(logger, driverRoot, nvidiaCTKPath, driverStorePaths) + return newWSLDriverStoreDiscoverer(logger, driverRoot, nvidiaCTKPath, ldconfigPath, driverStorePaths) } // newWSLDriverStoreDiscoverer returns a Discoverer for WSL2 drivers in the driver store associated with a dxcore adapter. -func newWSLDriverStoreDiscoverer(logger logger.Interface, driverRoot string, nvidiaCTKPath string, driverStorePaths []string) (discover.Discover, error) { +func newWSLDriverStoreDiscoverer(logger logger.Interface, driverRoot string, nvidiaCTKPath string, ldconfigPath string, driverStorePaths []string) (discover.Discover, error) { var searchPaths []string seen := make(map[string]bool) for _, path := range driverStorePaths { @@ -93,7 +93,7 @@ func newWSLDriverStoreDiscoverer(logger logger.Interface, driverRoot string, nvi nvidiaCTKPath: nvidiaCTKPath, } - ldcacheHook, _ := discover.NewLDCacheUpdateHook(logger, libraries, nvidiaCTKPath) + ldcacheHook, _ := discover.NewLDCacheUpdateHook(logger, libraries, nvidiaCTKPath, ldconfigPath) d := discover.Merge( libraries, diff --git a/pkg/nvcdi/lib-csv.go b/pkg/nvcdi/lib-csv.go index 31604345..77b5a6d4 100644 --- a/pkg/nvcdi/lib-csv.go +++ b/pkg/nvcdi/lib-csv.go @@ -45,6 +45,7 @@ func (l *csvlib) GetAllDeviceSpecs() ([]specs.Device, error) { tegra.WithDriverRoot(l.driverRoot), tegra.WithDevRoot(l.devRoot), tegra.WithNVIDIACTKPath(l.nvidiaCTKPath), + tegra.WithLdconfigPath(l.ldconfigPath), tegra.WithCSVFiles(l.csvFiles), tegra.WithLibrarySearchPaths(l.librarySearchPaths...), tegra.WithIngorePatterns(l.csvIgnorePatterns...), diff --git a/pkg/nvcdi/lib-wsl.go b/pkg/nvcdi/lib-wsl.go index 385007cf..620aa75d 100644 --- a/pkg/nvcdi/lib-wsl.go +++ b/pkg/nvcdi/lib-wsl.go @@ -54,7 +54,7 @@ func (l *wsllib) GetAllDeviceSpecs() ([]specs.Device, error) { // GetCommonEdits generates a CDI specification that can be used for ANY devices func (l *wsllib) GetCommonEdits() (*cdi.ContainerEdits, error) { - driver, err := newWSLDriverDiscoverer(l.logger, l.driverRoot, l.nvidiaCTKPath) + driver, err := newWSLDriverDiscoverer(l.logger, l.driverRoot, l.nvidiaCTKPath, l.ldconfigPath) if err != nil { return nil, fmt.Errorf("failed to create discoverer for WSL driver: %v", err) } diff --git a/pkg/nvcdi/lib.go b/pkg/nvcdi/lib.go index 2424c84b..80916540 100644 --- a/pkg/nvcdi/lib.go +++ b/pkg/nvcdi/lib.go @@ -48,6 +48,7 @@ type nvcdilib struct { driverRoot string devRoot string nvidiaCTKPath string + ldconfigPath string librarySearchPaths []string csvFiles []string diff --git a/pkg/nvcdi/management.go b/pkg/nvcdi/management.go index 8c3d4b32..f21ac34b 100644 --- a/pkg/nvcdi/management.go +++ b/pkg/nvcdi/management.go @@ -66,7 +66,7 @@ func (m *managementlib) GetCommonEdits() (*cdi.ContainerEdits, error) { return nil, fmt.Errorf("failed to get CUDA version: %v", err) } - driver, err := newDriverVersionDiscoverer(m.logger, m.driver, m.nvidiaCTKPath, version) + driver, err := newDriverVersionDiscoverer(m.logger, m.driver, m.nvidiaCTKPath, m.ldconfigPath, version) if err != nil { return nil, fmt.Errorf("failed to create driver library discoverer: %v", err) } diff --git a/pkg/nvcdi/options.go b/pkg/nvcdi/options.go index e1c7cf52..731e2ee6 100644 --- a/pkg/nvcdi/options.go +++ b/pkg/nvcdi/options.go @@ -69,6 +69,13 @@ func WithNVIDIACTKPath(path string) Option { } } +// WithLdconfigPath sets the path to the ldconfig program +func WithLdconfigPath(path string) Option { + return func(l *nvcdilib) { + l.ldconfigPath = path + } +} + // WithNvmlLib sets the nvml library for the library func WithNvmlLib(nvmllib nvml.Interface) Option { return func(l *nvcdilib) { From d80657dd0a9d1826ad884aa53ad8631f349b7597 Mon Sep 17 00:00:00 2001 From: Jared Baur Date: Thu, 14 Dec 2023 16:49:55 -0800 Subject: [PATCH 2/2] Explicitly set ldconfig cache and config file Since the `update-ldcache` hook uses the host's `ldconfig`, the default cache and config files configured on the host will be used. If those defaults differ from what nvidia-ctk expects it to be (/etc/ld.so.cache and /etc/ld.so.conf, respectively), then the hook will fail. This change makes the call to ldconfig explicit in which cache and config files are being used. Signed-off-by: Jared Baur --- cmd/nvidia-ctk/hook/update-ldcache/update-ldcache.go | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/cmd/nvidia-ctk/hook/update-ldcache/update-ldcache.go b/cmd/nvidia-ctk/hook/update-ldcache/update-ldcache.go index b65a01c5..55ab7116 100644 --- a/cmd/nvidia-ctk/hook/update-ldcache/update-ldcache.go +++ b/cmd/nvidia-ctk/hook/update-ldcache/update-ldcache.go @@ -100,21 +100,27 @@ func (m command) run(c *cli.Context, cfg *options) error { args = append(args, "-r", containerRoot) } - if !root(containerRoot).hasPath("/etc/ld.so.cache") { + if root(containerRoot).hasPath("/etc/ld.so.cache") { + args = append(args, "-C", "/etc/ld.so.cache") + } else { m.logger.Debugf("No ld.so.cache found, skipping update") args = append(args, "-N") } folders := cfg.folders.Value() if root(containerRoot).hasPath("/etc/ld.so.conf.d") { - err = m.createConfig(containerRoot, folders) + err := m.createConfig(containerRoot, folders) if err != nil { - return fmt.Errorf("failed to update ld.so.conf: %v", err) + return fmt.Errorf("failed to update ld.so.conf.d: %v", err) } } else { args = append(args, folders...) } + // Explicitly specify using /etc/ld.so.conf since the host's ldconfig may + // be configured to use a different config file by default. + args = append(args, "-f", "/etc/ld.so.conf") + //nolint:gosec // TODO: Can we harden this so that there is less risk of command injection return syscall.Exec(ldconfigPath, args, nil) }