From 79149571056e752bb2431461f783fca90688b7c5 Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Wed, 16 Mar 2022 11:22:31 +0200 Subject: [PATCH 1/8] Refactor hook creation Signed-off-by: Evan Lezar --- internal/discover/legacy.go | 43 +++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 23 deletions(-) diff --git a/internal/discover/legacy.go b/internal/discover/legacy.go index 0d0d8e38..0f7a425b 100644 --- a/internal/discover/legacy.go +++ b/internal/discover/legacy.go @@ -22,20 +22,7 @@ import ( "github.com/sirupsen/logrus" ) -type legacy struct { - None - logger *logrus.Logger - lookup lookup.Locator -} - -const ( - nvidiaContainerRuntimeHookExecutable = "nvidia-container-runtime-hook" - hookDefaultFilePath = "/usr/bin/nvidia-container-runtime-hook" -) - -var _ Discover = (*legacy)(nil) - -// NewLegacyDiscoverer creates a discoverer for the legacy runtime +// NewLegacyDiscoverer creates a discoverer for the experimental runtime func NewLegacyDiscoverer(logger *logrus.Logger, root string) (Discover, error) { d := legacy{ logger: logger, @@ -45,13 +32,23 @@ func NewLegacyDiscoverer(logger *logrus.Logger, root string) (Discover, error) { return &d, nil } -// Hooks returns the "legacy" NVIDIA Container Runtime hook. This hook calls out -// to the nvidia-container-cli to make modifications to the container as defined -// in libnvidia-container. -func (d legacy) Hooks() ([]Hook, error) { - var hooks []Hook +type legacy struct { + None + logger *logrus.Logger + lookup lookup.Locator +} - hookPath := hookDefaultFilePath +var _ Discover = (*legacy)(nil) + +const ( + nvidiaContainerRuntimeHookExecutable = "nvidia-container-runtime-hook" + nvidiaContainerRuntimeHookDefaultFilePath = "/usr/bin/nvidia-container-runtime-hook" +) + +// Hooks returns the "legacy" NVIDIA Container Runtime hook. This mirrors the behaviour of the stable +// modifier. +func (d legacy) Hooks() ([]Hook, error) { + hookPath := nvidiaContainerRuntimeHookDefaultFilePath targets, err := d.lookup.Locate(nvidiaContainerRuntimeHookExecutable) if err != nil { d.logger.Warnf("Failed to locate %v: %v", nvidiaContainerRuntimeHookExecutable, err) @@ -64,11 +61,11 @@ func (d legacy) Hooks() ([]Hook, error) { d.logger.Debugf("Using NVIDIA Container Runtime Hook path %v", hookPath) args := []string{hookPath, "prestart"} - legacyHook := Hook{ + h := Hook{ Lifecycle: cdi.PrestartHook, Path: hookPath, Args: args, } - hooks = append(hooks, legacyHook) - return hooks, nil + + return []Hook{h}, nil } From c945cc714d307d614de9a2cad0211be5e822cc03 Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Thu, 13 Jan 2022 13:29:00 +0100 Subject: [PATCH 2/8] Add stub nvidia-ctk CLI This change adds an nvidia-ctk CLI that is used as the basis for utilities related to the NVIDIA Container Toolkit. Signed-off-by: Evan Lezar --- .gitignore | 1 + cmd/nvidia-ctk/README.md | 3 ++ cmd/nvidia-ctk/main.go | 79 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 83 insertions(+) create mode 100644 cmd/nvidia-ctk/README.md create mode 100644 cmd/nvidia-ctk/main.go diff --git a/.gitignore b/.gitignore index b40619c2..588f7091 100644 --- a/.gitignore +++ b/.gitignore @@ -5,4 +5,5 @@ dist /test/output/ /nvidia-container-runtime /nvidia-container-toolkit +/nvidia-ctk /shared-* diff --git a/cmd/nvidia-ctk/README.md b/cmd/nvidia-ctk/README.md new file mode 100644 index 00000000..1080e2f5 --- /dev/null +++ b/cmd/nvidia-ctk/README.md @@ -0,0 +1,3 @@ +# NVIDIA Container Toolkit CLI + +The NVIDIA Container Toolkit CLI `nvidia-ctk` provides a number of utilities that are useful for working with the NVIDIA Container Toolkit. diff --git a/cmd/nvidia-ctk/main.go b/cmd/nvidia-ctk/main.go new file mode 100644 index 00000000..a2b77bb7 --- /dev/null +++ b/cmd/nvidia-ctk/main.go @@ -0,0 +1,79 @@ +/** +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package main + +import ( + "os" + + log "github.com/sirupsen/logrus" + cli "github.com/urfave/cli/v2" +) + +var version string + +var logger = log.New() + +// config defines the options that can be set for the CLI through config files, +// environment variables, or command line flags +type config struct { + // Debug indicates whether the CLI is started in "debug" mode + Debug bool +} + +func main() { + // Create a config struct to hold the parsed environment variables or command line flags + config := config{} + + // Create the top-level CLI + c := cli.NewApp() + c.UseShortOptionHandling = true + c.EnableBashCompletion = true + c.Usage = "Tools to configure the NVIDIA Container Toolkit" + c.Version = version + + // Setup the flags for this command + c.Flags = []cli.Flag{ + &cli.BoolFlag{ + Name: "debug", + Aliases: []string{"d"}, + Usage: "Enable debug-level logging", + Destination: &config.Debug, + EnvVars: []string{"NVIDIA_CTK_DEBUG"}, + }, + } + + // Set log-level for all subcommands + c.Before = func(c *cli.Context) error { + logLevel := log.InfoLevel + if config.Debug { + logLevel = log.DebugLevel + } + + logger.SetLevel(logLevel) + return nil + } + + // Define the subcommands + c.Commands = []*cli.Command{} + + // Run the CLI + err := c.Run(os.Args) + if err != nil { + log.Errorf("%v", err) + log.Exit(1) + } +} From 1c892af2159a565b1eaec73292596b3c1e39996f Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Mon, 14 Mar 2022 13:53:22 +0200 Subject: [PATCH 3/8] Add hook command to nvidia-ctk with update-ldcache subcommand Signed-off-by: Evan Lezar --- cmd/nvidia-ctk/hook/hook.go | 50 ++++++ .../hook/update-ldcache/update-ldcache.go | 155 ++++++++++++++++++ cmd/nvidia-ctk/main.go | 5 +- internal/oci/spec_file.go | 6 +- internal/oci/spec_file_test.go | 2 +- 5 files changed, 213 insertions(+), 5 deletions(-) create mode 100644 cmd/nvidia-ctk/hook/hook.go create mode 100644 cmd/nvidia-ctk/hook/update-ldcache/update-ldcache.go diff --git a/cmd/nvidia-ctk/hook/hook.go b/cmd/nvidia-ctk/hook/hook.go new file mode 100644 index 00000000..4d85dcd5 --- /dev/null +++ b/cmd/nvidia-ctk/hook/hook.go @@ -0,0 +1,50 @@ +/** +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package hook + +import ( + ldcache "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/hook/update-ldcache" + "github.com/sirupsen/logrus" + "github.com/urfave/cli/v2" +) + +type hookCommand struct { + logger *logrus.Logger +} + +// NewCommand constructs a hook command with the specified logger +func NewCommand(logger *logrus.Logger) *cli.Command { + c := hookCommand{ + logger: logger, + } + return c.build() +} + +// build +func (m hookCommand) build() *cli.Command { + // Create the 'hook' command + hook := cli.Command{ + Name: "hook", + Usage: "A collection of hooks that may be injected into an OCI spec", + } + + hook.Subcommands = []*cli.Command{ + ldcache.NewCommand(m.logger), + } + + return &hook +} diff --git a/cmd/nvidia-ctk/hook/update-ldcache/update-ldcache.go b/cmd/nvidia-ctk/hook/update-ldcache/update-ldcache.go new file mode 100644 index 00000000..c0132b33 --- /dev/null +++ b/cmd/nvidia-ctk/hook/update-ldcache/update-ldcache.go @@ -0,0 +1,155 @@ +/** +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package ldcache + +import ( + "encoding/json" + "fmt" + "os" + "path/filepath" + "syscall" + + "github.com/NVIDIA/nvidia-container-toolkit/internal/oci" + "github.com/opencontainers/runtime-spec/specs-go" + "github.com/sirupsen/logrus" + "github.com/urfave/cli/v2" +) + +type command struct { + logger *logrus.Logger +} + +type config struct { + folders cli.StringSlice + containerSpec string +} + +// NewCommand constructs an update-ldcache command with the specified logger +func NewCommand(logger *logrus.Logger) *cli.Command { + c := command{ + logger: logger, + } + return c.build() +} + +// build the update-ldcache command +func (m command) build() *cli.Command { + cfg := config{} + + // Create the 'update-ldcache' command + c := cli.Command{ + Name: "update-ldcache", + Usage: "Update ldcache in a container by running ldconfig", + Action: func(c *cli.Context) error { + return m.run(c, &cfg) + }, + } + + c.Flags = []cli.Flag{ + &cli.StringSliceFlag{ + Name: "folders", + Usage: "Specifiy the additional folders to add to /etc/ld.so.conf before updating the ld cache", + Destination: &cfg.folders, + }, + &cli.StringFlag{ + Name: "containerSpec", + Usage: "Specify the path to the OCI container spec. If empty or '-' the spec will be read from STDIN", + Destination: &cfg.containerSpec, + }, + } + + return &c +} + +func (m command) run(c *cli.Context, cfg *config) error { + var s specs.State + + inputReader := os.Stdin + if cfg.containerSpec != "" && cfg.containerSpec != "-" { + inputFile, err := os.Open(cfg.containerSpec) + if err != nil { + return fmt.Errorf("failed to open intput: %v", err) + } + defer inputFile.Close() + inputReader = inputFile + } + + d := json.NewDecoder(inputReader) + if err := d.Decode(&s); err != nil { + return fmt.Errorf("failed to decode container state: %v", err) + } + + specFilePath := oci.GetSpecFilePath(s.Bundle) + specFile, err := os.Open(specFilePath) + if err != nil { + return fmt.Errorf("failed to open OCI spec file: %v", err) + } + defer specFile.Close() + + spec, err := oci.LoadFrom(specFile) + if err != nil { + return fmt.Errorf("failed to load OCI spec: %v", err) + } + + var containerRoot string + if spec.Root != nil { + containerRoot = spec.Root.Path + } + + err = m.createConfig(containerRoot, cfg.folders.Value()) + if err != nil { + return fmt.Errorf("failed to update ld.so.conf: %v", err) + } + + args := []string{"/sbin/ldconfig"} + if containerRoot != "" { + args = append(args, "-r", containerRoot) + } + + return syscall.Exec(args[0], args, nil) +} + +// createConfig creates (or updates) /etc/ld.so.conf.d/nvcr-.conf in the container +// to include the required paths. +func (m command) createConfig(root string, folders []string) error { + if len(folders) == 0 { + m.logger.Debugf("No folders to add to /etc/ld.so.conf") + return nil + } + + configFile, err := os.CreateTemp(filepath.Join(root, "/etc/ld.so.conf.d"), "nvcr-*.conf") + if err != nil { + return fmt.Errorf("failed to create config file: %v", err) + } + defer configFile.Close() + + m.logger.Debugf("Adding folders %v to %v", folders, configFile.Name()) + + configured := make(map[string]bool) + for _, folder := range folders { + if configured[folder] { + continue + } + _, err = configFile.WriteString(fmt.Sprintf("%s\n", folder)) + if err != nil { + return fmt.Errorf("failed to update ld.so.conf.d: %v", err) + } + configured[folder] = true + } + + return nil +} diff --git a/cmd/nvidia-ctk/main.go b/cmd/nvidia-ctk/main.go index a2b77bb7..08374ed6 100644 --- a/cmd/nvidia-ctk/main.go +++ b/cmd/nvidia-ctk/main.go @@ -19,6 +19,7 @@ package main import ( "os" + "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/hook" log "github.com/sirupsen/logrus" cli "github.com/urfave/cli/v2" ) @@ -68,7 +69,9 @@ func main() { } // Define the subcommands - c.Commands = []*cli.Command{} + c.Commands = []*cli.Command{ + hook.NewCommand(logger), + } // Run the CLI err := c.Run(os.Args) diff --git a/internal/oci/spec_file.go b/internal/oci/spec_file.go index ff0cbb46..3465652d 100644 --- a/internal/oci/spec_file.go +++ b/internal/oci/spec_file.go @@ -52,7 +52,7 @@ func (s *fileSpec) Load() error { } defer specFile.Close() - spec, err := loadFrom(specFile) + spec, err := LoadFrom(specFile) if err != nil { return fmt.Errorf("error loading OCI specification from file: %v", err) } @@ -60,8 +60,8 @@ func (s *fileSpec) Load() error { return nil } -// loadFrom reads the contents of the OCI spec from the specified io.Reader. -func loadFrom(reader io.Reader) (*specs.Spec, error) { +// LoadFrom reads the contents of the OCI spec from the specified io.Reader. +func LoadFrom(reader io.Reader) (*specs.Spec, error) { decoder := json.NewDecoder(reader) var spec specs.Spec diff --git a/internal/oci/spec_file_test.go b/internal/oci/spec_file_test.go index 94dfb3b3..e1c1fe0f 100644 --- a/internal/oci/spec_file_test.go +++ b/internal/oci/spec_file_test.go @@ -44,7 +44,7 @@ func TestLoadFrom(t *testing.T) { for i, tc := range testCases { var spec *specs.Spec - spec, err := loadFrom(bytes.NewReader(tc.contents)) + spec, err := LoadFrom(bytes.NewReader(tc.contents)) if tc.isError { require.Error(t, err, "%d: %v", i, tc) From 740bd3fb9df28e378bb4d70e5b3a5d22518a4f56 Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Tue, 29 Mar 2022 14:02:40 +0200 Subject: [PATCH 4/8] Add nvidia-ctk config section Signed-off-by: Evan Lezar --- internal/config/config.go | 3 ++ .../{runtime_test.go => config_test.go} | 19 +++++++- internal/config/toolkit-cli.go | 46 +++++++++++++++++++ 3 files changed, 66 insertions(+), 2 deletions(-) rename internal/config/{runtime_test.go => config_test.go} (87%) create mode 100644 internal/config/toolkit-cli.go diff --git a/internal/config/config.go b/internal/config/config.go index 8e328487..9f4f16f3 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -38,6 +38,7 @@ var ( // Note: This is currently duplicated by the HookConfig in cmd/nvidia-container-toolkit/hook_config.go type Config struct { NVIDIAContainerCLIConfig ContainerCLIConfig `toml:"nvidia-container-cli"` + NVIDIACTKConfig CTKConfig `toml:"nvidia-ctk"` NVIDIAContainerRuntimeConfig RuntimeConfig `toml:"nvidia-container-runtime"` } @@ -83,6 +84,7 @@ func getConfigFrom(toml *toml.Tree) *Config { } cfg.NVIDIAContainerCLIConfig = *getContainerCLIConfigFrom(toml) + cfg.NVIDIACTKConfig = *getCTKConfigFrom(toml) cfg.NVIDIAContainerRuntimeConfig = *getRuntimeConfigFrom(toml) return cfg @@ -92,6 +94,7 @@ func getConfigFrom(toml *toml.Tree) *Config { func getDefaultConfig() *Config { c := Config{ NVIDIAContainerCLIConfig: *getDefaultContainerCLIConfig(), + NVIDIACTKConfig: *getDefaultCTKConfig(), NVIDIAContainerRuntimeConfig: *getDefaultRuntimeConfig(), } diff --git a/internal/config/runtime_test.go b/internal/config/config_test.go similarity index 87% rename from internal/config/runtime_test.go rename to internal/config/config_test.go index fe78b529..cf50c4ed 100644 --- a/internal/config/runtime_test.go +++ b/internal/config/config_test.go @@ -65,43 +65,58 @@ func TestGetConfig(t *testing.T) { Experimental: false, DiscoverMode: "auto", }, + NVIDIACTKConfig: CTKConfig{ + Path: "nvidia-ctk", + }, }, }, { description: "config options set inline", contents: []string{ + "nvidia-container-cli.root = \"/bar/baz\"", "nvidia-container-runtime.debug = \"/foo/bar\"", "nvidia-container-runtime.experimental = true", "nvidia-container-runtime.discover-mode = \"not-legacy\"", + "nvidia-ctk.path = \"/foo/bar/nvidia-ctk\"", }, expectedConfig: &Config{ NVIDIAContainerCLIConfig: ContainerCLIConfig{ - Root: "", + Root: "/bar/baz", }, NVIDIAContainerRuntimeConfig: RuntimeConfig{ DebugFilePath: "/foo/bar", Experimental: true, DiscoverMode: "not-legacy", }, + NVIDIACTKConfig: CTKConfig{ + Path: "/foo/bar/nvidia-ctk", + }, }, }, { description: "config options set in section", contents: []string{ + "[nvidia-container-cli]", + "root = \"/bar/baz\"", "[nvidia-container-runtime]", "debug = \"/foo/bar\"", "experimental = true", "discover-mode = \"not-legacy\"", + "[nvidia-ctk]", + "path = \"/foo/bar/nvidia-ctk\"", }, expectedConfig: &Config{ NVIDIAContainerCLIConfig: ContainerCLIConfig{ - Root: "", + Root: "/bar/baz", }, NVIDIAContainerRuntimeConfig: RuntimeConfig{ DebugFilePath: "/foo/bar", Experimental: true, DiscoverMode: "not-legacy", }, + NVIDIACTKConfig: CTKConfig{ + Path: "/foo/bar/nvidia-ctk", + }, }, }, } diff --git a/internal/config/toolkit-cli.go b/internal/config/toolkit-cli.go new file mode 100644 index 00000000..1fe89717 --- /dev/null +++ b/internal/config/toolkit-cli.go @@ -0,0 +1,46 @@ +/** +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package config + +import "github.com/pelletier/go-toml" + +// CTKConfig stores the config options for the NVIDIA Container Toolkit CLI (nvidia-ctk) +type CTKConfig struct { + Path string `toml:"path"` +} + +// getCTKConfigFrom reads the nvidia container runtime config from the specified toml Tree. +func getCTKConfigFrom(toml *toml.Tree) *CTKConfig { + cfg := getDefaultCTKConfig() + + if toml == nil { + return cfg + } + + cfg.Path = toml.GetDefault("nvidia-ctk.path", cfg.Path).(string) + + return cfg +} + +// getDefaultCTKConfig defines the default values for the config +func getDefaultCTKConfig() *CTKConfig { + c := CTKConfig{ + Path: "nvidia-ctk", + } + + return &c +} From d970d0a627b92befc7ab0219b5f0a153fe875c24 Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Mon, 14 Mar 2022 14:16:46 +0200 Subject: [PATCH 5/8] Add discovery for ldconfig hook that updates the LDCache This change adds a discovered hook for updating the ldcache as a container-create hook. The mounts from a discoverer are inspected to determine the folders that must be added to the cache using the nvidia-ctk hook update-ldcache command. This is added to the "csv" discovery mode for the experimental runtime. Signed-off-by: Evan Lezar --- .../modifier/experimental.go | 17 ++- internal/discover/discover.go | 6 + internal/discover/ldconfig.go | 126 ++++++++++++++++++ internal/discover/legacy.go | 4 +- internal/discover/list.go | 9 ++ internal/discover/none.go | 2 +- 6 files changed, 157 insertions(+), 7 deletions(-) create mode 100644 internal/discover/ldconfig.go diff --git a/cmd/nvidia-container-runtime/modifier/experimental.go b/cmd/nvidia-container-runtime/modifier/experimental.go index ead4ce19..b669de40 100644 --- a/cmd/nvidia-container-runtime/modifier/experimental.go +++ b/cmd/nvidia-container-runtime/modifier/experimental.go @@ -59,13 +59,16 @@ func NewExperimentalModifier(logger *logrus.Logger, cfg *config.Config, ociSpec } logger.Infof("Constructing modifier from config: %+v", cfg) - root := cfg.NVIDIAContainerCLIConfig.Root + config := &discover.Config{ + Root: cfg.NVIDIAContainerCLIConfig.Root, + NVIDIAContainerToolkitCLIExecutablePath: cfg.NVIDIACTKConfig.Path, + } var d discover.Discover switch resolveAutoDiscoverMode(logger, cfg.NVIDIAContainerRuntimeConfig.DiscoverMode) { case "legacy": - legacyDiscoverer, err := discover.NewLegacyDiscoverer(logger, root) + legacyDiscoverer, err := discover.NewLegacyDiscoverer(logger, config) if err != nil { return nil, fmt.Errorf("failed to create legacy discoverer: %v", err) } @@ -81,11 +84,17 @@ func NewExperimentalModifier(logger *logrus.Logger, cfg *config.Config, ociSpec csvFiles = csv.BaseFilesOnly(csvFiles) } - csvDiscoverer, err := discover.NewFromCSVFiles(logger, csvFiles, root) + csvDiscoverer, err := discover.NewFromCSVFiles(logger, csvFiles, config.Root) if err != nil { return nil, fmt.Errorf("failed to create CSV discoverer: %v", err) } - d = csvDiscoverer + + hooks, err := discover.NewLDCacheUpdateHook(logger, csvDiscoverer, config) + if err != nil { + return nil, fmt.Errorf("failed to create hook discoverer: %v", err) + } + + d = discover.NewList(csvDiscoverer, hooks) default: return nil, fmt.Errorf("invalid discover mode: %v", cfg.NVIDIAContainerRuntimeConfig.DiscoverMode) } diff --git a/internal/discover/discover.go b/internal/discover/discover.go index 77a8fdd5..64dbef85 100644 --- a/internal/discover/discover.go +++ b/internal/discover/discover.go @@ -16,6 +16,12 @@ package discover +// Config represents the configuration options for discovery +type Config struct { + Root string + NVIDIAContainerToolkitCLIExecutablePath string +} + // Device represents a discovered character device. type Device struct { Path string diff --git a/internal/discover/ldconfig.go b/internal/discover/ldconfig.go new file mode 100644 index 00000000..f40befb7 --- /dev/null +++ b/internal/discover/ldconfig.go @@ -0,0 +1,126 @@ +/** +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package discover + +import ( + "fmt" + "path/filepath" + "sort" + "strings" + + "github.com/NVIDIA/nvidia-container-toolkit/internal/lookup" + "github.com/container-orchestrated-devices/container-device-interface/pkg/cdi" + "github.com/sirupsen/logrus" +) + +// NewLDCacheUpdateHook creates a discoverer that updates the ldcache for the specified mounts. A logger can also be specified +func NewLDCacheUpdateHook(logger *logrus.Logger, mounts Discover, cfg *Config) (Discover, error) { + d := ldconfig{ + logger: logger, + mountsFrom: mounts, + lookup: lookup.NewExecutableLocator(logger, cfg.Root), + nvidiaCTKExecutablePath: cfg.NVIDIAContainerToolkitCLIExecutablePath, + } + + return &d, nil +} + +const ( + nvidiaCTKDefaultFilePath = "/usr/bin/nvidia-ctk" +) + +type ldconfig struct { + None + logger *logrus.Logger + mountsFrom Discover + lookup lookup.Locator + nvidiaCTKExecutablePath string +} + +// Hooks checks the required mounts for libraries and returns a hook to update the LDcache for the discovered paths. +func (d ldconfig) Hooks() ([]Hook, error) { + mounts, err := d.mountsFrom.Mounts() + if err != nil { + return nil, fmt.Errorf("failed to discover mounts for ldcache update: %v", err) + } + + libDirs := getLibDirs(mounts) + + hookPath := nvidiaCTKDefaultFilePath + targets, err := d.lookup.Locate(d.nvidiaCTKExecutablePath) + if err != nil { + d.logger.Warnf("Failed to locate %v: %v", d.nvidiaCTKExecutablePath, err) + } else if len(targets) == 0 { + d.logger.Warnf("%v not found", d.nvidiaCTKExecutablePath) + } else { + d.logger.Debugf("Found %v candidates: %v", d.nvidiaCTKExecutablePath, targets) + hookPath = targets[0] + } + d.logger.Debugf("Using NVIDIA Container Toolkit CLI path %v", hookPath) + + args := []string{hookPath, "hook", "update-ldcache"} + for _, f := range libDirs { + args = append(args, "--folders", f) + } + h := Hook{ + Lifecycle: cdi.CreateContainerHook, + Path: hookPath, + Args: args, + } + + return []Hook{h}, nil +} + +// getLibDirs extracts the library dirs from the specified mounts +func getLibDirs(mounts []Mount) []string { + var paths []string + checked := make(map[string]bool) + + for _, m := range mounts { + dir := filepath.Dir(m.Path) + if dir == "" { + continue + } + + _, exists := checked[dir] + if exists { + continue + } + checked[dir] = isLibName(filepath.Base(m.Path)) + + if checked[dir] { + paths = append(paths, dir) + } + } + + sort.Strings(paths) + + return paths +} + +// isLibName checks if the specified filename is a library (i.e. ends in `.so*`) +func isLibName(filename string) bool { + parts := strings.Split(filename, ".") + + for _, p := range parts { + if p == "so" { + return true + } + } + + return false +} diff --git a/internal/discover/legacy.go b/internal/discover/legacy.go index 0f7a425b..adc4c0c5 100644 --- a/internal/discover/legacy.go +++ b/internal/discover/legacy.go @@ -23,10 +23,10 @@ import ( ) // NewLegacyDiscoverer creates a discoverer for the experimental runtime -func NewLegacyDiscoverer(logger *logrus.Logger, root string) (Discover, error) { +func NewLegacyDiscoverer(logger *logrus.Logger, cfg *Config) (Discover, error) { d := legacy{ logger: logger, - lookup: lookup.NewExecutableLocator(logger, root), + lookup: lookup.NewExecutableLocator(logger, cfg.Root), } return &d, nil diff --git a/internal/discover/list.go b/internal/discover/list.go index f19c8c15..a30cf0fc 100644 --- a/internal/discover/list.go +++ b/internal/discover/list.go @@ -27,6 +27,15 @@ type list struct { var _ Discover = (*list)(nil) +// NewList creates a discoverer that is the composite of a list of discoveres. +func NewList(d ...Discover) Discover { + l := list{ + discoverers: d, + } + + return &l +} + // Devices returns all devices from the included discoverers func (d list) Devices() ([]Device, error) { var allDevices []Device diff --git a/internal/discover/none.go b/internal/discover/none.go index 989a2e16..2a1d2c57 100644 --- a/internal/discover/none.go +++ b/internal/discover/none.go @@ -32,7 +32,7 @@ func (e None) Mounts() ([]Mount, error) { return []Mount{}, nil } -// Hooks returns and empty list of hooks +// Hooks returns an empty list of hooks func (e None) Hooks() ([]Hook, error) { return []Hook{}, nil } From 9c3c8e038a76426a8c00dfdab4d6c1a4d781443d Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Tue, 5 Apr 2022 09:35:27 +0200 Subject: [PATCH 6/8] Add cache for mounts This change adds a cache to the mounts type. This means that if called to get a list of folders, for example, the result is reused instead of recalculated. This also avoids duplicate logging. Signed-off-by: Evan Lezar --- internal/discover/csv.go | 4 ++-- internal/discover/csv_test.go | 10 +++++----- internal/discover/mounts.go | 15 ++++++++++++++- internal/discover/mounts_test.go | 15 ++++++++------- 4 files changed, 29 insertions(+), 15 deletions(-) diff --git a/internal/discover/csv.go b/internal/discover/csv.go index 59a1eed4..2a6cf6c9 100644 --- a/internal/discover/csv.go +++ b/internal/discover/csv.go @@ -119,7 +119,7 @@ func newFromMountSpecs(logger *logrus.Logger, locators map[csv.MountSpecType]loo // Mounts returns the discovered mounts for the csvDiscoverer. // Note that if the discoverer is for the device MountSpecType, the list of mounts is empty. -func (d csvDiscoverer) Mounts() ([]Mount, error) { +func (d *csvDiscoverer) Mounts() ([]Mount, error) { if d.mountType == csv.MountSpecDev { return d.None.Mounts() } @@ -129,7 +129,7 @@ func (d csvDiscoverer) Mounts() ([]Mount, error) { // Devices returns the discovered devices for the csvDiscoverer. // Note that if the discoverer is not for the device MountSpecType, the list of devices is empty. -func (d csvDiscoverer) Devices() ([]Device, error) { +func (d *csvDiscoverer) Devices() ([]Device, error) { if d.mountType != csv.MountSpecDev { return d.None.Devices() } diff --git a/internal/discover/csv_test.go b/internal/discover/csv_test.go index f11d7af0..ac0ef482 100644 --- a/internal/discover/csv_test.go +++ b/internal/discover/csv_test.go @@ -31,7 +31,7 @@ func TestCSVDiscoverer(t *testing.T) { testCases := []struct { description string - input csvDiscoverer + input *csvDiscoverer expectedMounts []Mount expectedMountsError error expectedDevicesError error @@ -39,7 +39,7 @@ func TestCSVDiscoverer(t *testing.T) { }{ { description: "dev mounts are empty", - input: csvDiscoverer{ + input: &csvDiscoverer{ mounts: mounts{ lookup: &lookup.LocatorMock{ LocateFunc: func(string) ([]string, error) { @@ -54,14 +54,14 @@ func TestCSVDiscoverer(t *testing.T) { }, { description: "dev devices returns error for nil lookup", - input: csvDiscoverer{ + input: &csvDiscoverer{ mountType: "dev", }, expectedDevicesError: fmt.Errorf("no lookup defined"), }, { description: "lib devices are empty", - input: csvDiscoverer{ + input: &csvDiscoverer{ mounts: mounts{ lookup: &lookup.LocatorMock{ LocateFunc: func(string) ([]string, error) { @@ -76,7 +76,7 @@ func TestCSVDiscoverer(t *testing.T) { }, { description: "lib mounts returns error for nil lookup", - input: csvDiscoverer{ + input: &csvDiscoverer{ mountType: "lib", }, expectedMountsError: fmt.Errorf("no lookup defined"), diff --git a/internal/discover/mounts.go b/internal/discover/mounts.go index f294e522..07650509 100644 --- a/internal/discover/mounts.go +++ b/internal/discover/mounts.go @@ -18,6 +18,7 @@ package discover import ( "fmt" + "sync" "github.com/NVIDIA/nvidia-container-toolkit/internal/lookup" "github.com/sirupsen/logrus" @@ -31,15 +32,25 @@ type mounts struct { logger *logrus.Logger lookup lookup.Locator required []string + sync.Mutex + cache []Mount } var _ Discover = (*mounts)(nil) -func (d mounts) Mounts() ([]Mount, error) { +func (d *mounts) Mounts() ([]Mount, error) { if d.lookup == nil { return nil, fmt.Errorf("no lookup defined") } + if d.cache != nil { + d.logger.Debugf("returning cached mounts") + return d.cache, nil + } + + d.Lock() + defer d.Unlock() + paths := make(map[string]bool) for _, candidate := range d.required { @@ -68,5 +79,7 @@ func (d mounts) Mounts() ([]Mount, error) { mounts = append(mounts, mount) } + d.cache = mounts + return mounts, nil } diff --git a/internal/discover/mounts_test.go b/internal/discover/mounts_test.go index 35e68e76..b443d18c 100644 --- a/internal/discover/mounts_test.go +++ b/internal/discover/mounts_test.go @@ -41,16 +41,17 @@ func TestMounts(t *testing.T) { description string expectedError error expectedMounts []Mount - input mounts + input *mounts }{ { description: "nill lookup returns error", expectedError: fmt.Errorf("no lookup defined"), + input: &mounts{}, }, { description: "empty required returns no mounts", expectedError: nil, - input: mounts{ + input: &mounts{ lookup: &lookup.LocatorMock{ LocateFunc: func(string) ([]string, error) { return []string{"located"}, nil @@ -61,7 +62,7 @@ func TestMounts(t *testing.T) { { description: "required returns located", expectedError: nil, - input: mounts{ + input: &mounts{ lookup: &lookup.LocatorMock{ LocateFunc: func(string) ([]string, error) { return []string{"located"}, nil @@ -74,7 +75,7 @@ func TestMounts(t *testing.T) { { description: "mounts removes located duplicates", expectedError: nil, - input: mounts{ + input: &mounts{ lookup: &lookup.LocatorMock{ LocateFunc: func(string) ([]string, error) { return []string{"located"}, nil @@ -86,7 +87,7 @@ func TestMounts(t *testing.T) { }, { description: "mounts skips located errors", - input: mounts{ + input: &mounts{ lookup: &lookup.LocatorMock{ LocateFunc: func(s string) ([]string, error) { if s == "error" { @@ -101,7 +102,7 @@ func TestMounts(t *testing.T) { }, { description: "mounts skips unlocated", - input: mounts{ + input: &mounts{ lookup: &lookup.LocatorMock{ LocateFunc: func(s string) ([]string, error) { if s == "empty" { @@ -116,7 +117,7 @@ func TestMounts(t *testing.T) { }, { description: "mounts skips unlocated", - input: mounts{ + input: &mounts{ lookup: &lookup.LocatorMock{ LocateFunc: func(s string) ([]string, error) { if s == "multiple" { From 7c10762768a62ec589ea87f485b10dea05bebdfe Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Wed, 6 Apr 2022 20:53:51 +0200 Subject: [PATCH 7/8] Include nvidia-ctk in deb and rpm packages Signed-off-by: Evan Lezar --- packaging/debian/changelog | 1 + packaging/debian/nvidia-container-toolkit.install | 3 ++- packaging/rpm/SPECS/nvidia-container-toolkit.spec | 14 +++++++++----- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/packaging/debian/changelog b/packaging/debian/changelog index 67ad3690..73e93a8a 100644 --- a/packaging/debian/changelog +++ b/packaging/debian/changelog @@ -1,5 +1,6 @@ nvidia-container-toolkit (1.10.0~rc.1-1) experimental; urgency=medium + * Include nvidia-ctk CLI in installed binaries * Add experimental option to NVIDIA Container Runtime -- NVIDIA CORPORATION Thu, 24 Mar 2022 13:22:24 +0200 diff --git a/packaging/debian/nvidia-container-toolkit.install b/packaging/debian/nvidia-container-toolkit.install index 9547c0c9..0faa1ede 100644 --- a/packaging/debian/nvidia-container-toolkit.install +++ b/packaging/debian/nvidia-container-toolkit.install @@ -1,3 +1,4 @@ config.toml /etc/nvidia-container-runtime nvidia-container-toolkit /usr/bin -nvidia-container-runtime /usr/bin \ No newline at end of file +nvidia-container-runtime /usr/bin +nvidia-ctk /usr/bin diff --git a/packaging/rpm/SPECS/nvidia-container-toolkit.spec b/packaging/rpm/SPECS/nvidia-container-toolkit.spec index 678c3012..f940ec97 100644 --- a/packaging/rpm/SPECS/nvidia-container-toolkit.spec +++ b/packaging/rpm/SPECS/nvidia-container-toolkit.spec @@ -12,10 +12,11 @@ License: Apache-2.0 Source0: nvidia-container-toolkit Source1: nvidia-container-runtime -Source2: config.toml -Source3: oci-nvidia-hook -Source4: oci-nvidia-hook.json -Source5: LICENSE +Source2: nvidia-ctk +Source3: config.toml +Source4: oci-nvidia-hook +Source5: oci-nvidia-hook.json +Source6: LICENSE Obsoletes: nvidia-container-runtime <= 3.5.0-1, nvidia-container-runtime-hook Provides: nvidia-container-runtime @@ -33,12 +34,13 @@ Requires: libseccomp Provides a OCI hook to enable GPU support in containers. %prep -cp %{SOURCE0} %{SOURCE1} %{SOURCE2} %{SOURCE3} %{SOURCE4} %{SOURCE5} . +cp %{SOURCE0} %{SOURCE1} %{SOURCE2} %{SOURCE3} %{SOURCE4} %{SOURCE5} %{SOURCE6} . %install mkdir -p %{buildroot}%{_bindir} install -m 755 -t %{buildroot}%{_bindir} nvidia-container-toolkit install -m 755 -t %{buildroot}%{_bindir} nvidia-container-runtime +install -m 755 -t %{buildroot}%{_bindir} nvidia-ctk mkdir -p %{buildroot}/etc/nvidia-container-runtime install -m 644 -t %{buildroot}/etc/nvidia-container-runtime config.toml @@ -59,12 +61,14 @@ rm -f %{_bindir}/nvidia-container-runtime-hook %license LICENSE %{_bindir}/nvidia-container-toolkit %{_bindir}/nvidia-container-runtime +%{_bindir}/nvidia-ctk %config /etc/nvidia-container-runtime/config.toml /usr/libexec/oci/hooks.d/oci-nvidia-hook /usr/share/containers/oci/hooks.d/oci-nvidia-hook.json %changelog * Thu Mar 24 2022 NVIDIA CORPORATION 1.10.0-0.1.rc.1 +- Include nvidia-ctk CLI in installed binaries - Add experimental option to NVIDIA Container Runtime * Fri Mar 18 2022 NVIDIA CORPORATION 1.9.0-1 From c87ae586d4c2b4537b1c14cbd24c087105cc4949 Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Thu, 7 Apr 2022 16:45:43 +0200 Subject: [PATCH 8/8] FIX: Rename containerSpec flag to container-spec Signed-off-by: Evan Lezar --- cmd/nvidia-ctk/hook/update-ldcache/update-ldcache.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/nvidia-ctk/hook/update-ldcache/update-ldcache.go b/cmd/nvidia-ctk/hook/update-ldcache/update-ldcache.go index c0132b33..56f9f229 100644 --- a/cmd/nvidia-ctk/hook/update-ldcache/update-ldcache.go +++ b/cmd/nvidia-ctk/hook/update-ldcache/update-ldcache.go @@ -66,7 +66,7 @@ func (m command) build() *cli.Command { Destination: &cfg.folders, }, &cli.StringFlag{ - Name: "containerSpec", + Name: "container-spec", Usage: "Specify the path to the OCI container spec. If empty or '-' the spec will be read from STDIN", Destination: &cfg.containerSpec, },