From 3bb539a5f780590891b14354a769793d279d4adc Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Mon, 4 Apr 2022 13:52:12 +0200 Subject: [PATCH 01/20] Update libnvidia-container Signed-off-by: Evan Lezar --- third_party/libnvidia-container | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/third_party/libnvidia-container b/third_party/libnvidia-container index a9aa8238..a0c0a43e 160000 --- a/third_party/libnvidia-container +++ b/third_party/libnvidia-container @@ -1 +1 @@ -Subproject commit a9aa8238398d2c1706bd0dbfca48f7690b5e0e70 +Subproject commit a0c0a43e6a6d8c9cc584d15497230a2ecd0d65c7 From 0c7eb93d622b6a06a91b6d5d467075c70cfe8f3e Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Thu, 24 Mar 2022 13:45:03 +0200 Subject: [PATCH 02/20] Add experimental option to NVIDIA Container Runtime config This change adds an experimental option to the NVIDIA Container Runtime config. To simplify the extension of this experimental mode in future an error is raised if this is enabled. Signed-off-by: Evan Lezar --- cmd/nvidia-container-runtime/main.go | 6 ++++-- cmd/nvidia-container-runtime/runtime_factory.go | 6 +++++- packaging/debian/changelog | 2 +- packaging/rpm/SPECS/nvidia-container-toolkit.spec | 2 +- 4 files changed, 11 insertions(+), 5 deletions(-) diff --git a/cmd/nvidia-container-runtime/main.go b/cmd/nvidia-container-runtime/main.go index 69f822bf..6d103983 100644 --- a/cmd/nvidia-container-runtime/main.go +++ b/cmd/nvidia-container-runtime/main.go @@ -47,16 +47,17 @@ func run(argv []string) (rerr error) { logger.CloseFile() }() - r, err := newNVIDIAContainerRuntime(logger.Logger, cfg, argv) + runtime, err := newNVIDIAContainerRuntime(logger.Logger, cfg, argv) if err != nil { return fmt.Errorf("error creating runtime: %v", err) } - return r.Exec(argv) + return runtime.Exec(argv) } type config struct { debugFilePath string + Experimental bool } // getConfig sets up the config struct. Values are read from a toml file @@ -81,6 +82,7 @@ func getConfig() (*config, error) { } cfg.debugFilePath = toml.GetDefault("nvidia-container-runtime.debug", "/dev/null").(string) + cfg.Experimental = toml.GetDefault("nvidia-container-runtime.experimental", false).(bool) return cfg, nil } diff --git a/cmd/nvidia-container-runtime/runtime_factory.go b/cmd/nvidia-container-runtime/runtime_factory.go index 8b547935..3b6dbef7 100644 --- a/cmd/nvidia-container-runtime/runtime_factory.go +++ b/cmd/nvidia-container-runtime/runtime_factory.go @@ -43,7 +43,11 @@ func newNVIDIAContainerRuntime(logger *logrus.Logger, cfg *config, argv []string return nil, fmt.Errorf("error constructing low-level runtime: %v", err) } - specModifier := modifier.NewStableRuntimeModifier(logger) + var specModifier oci.SpecModifier + if cfg.Experimental { + return nil, fmt.Errorf("experimental mode is not supported") + } + specModifier = modifier.NewStableRuntimeModifier(logger) // Create the wrapping runtime with the specified modifier r := runtime.NewModifyingRuntimeWrapper( diff --git a/packaging/debian/changelog b/packaging/debian/changelog index 5b0ec14d..67ad3690 100644 --- a/packaging/debian/changelog +++ b/packaging/debian/changelog @@ -1,6 +1,6 @@ nvidia-container-toolkit (1.10.0~rc.1-1) experimental; urgency=medium - * Dummy entry + * Add experimental option to NVIDIA Container Runtime -- NVIDIA CORPORATION Thu, 24 Mar 2022 13:22:24 +0200 diff --git a/packaging/rpm/SPECS/nvidia-container-toolkit.spec b/packaging/rpm/SPECS/nvidia-container-toolkit.spec index 72533802..678c3012 100644 --- a/packaging/rpm/SPECS/nvidia-container-toolkit.spec +++ b/packaging/rpm/SPECS/nvidia-container-toolkit.spec @@ -65,7 +65,7 @@ rm -f %{_bindir}/nvidia-container-runtime-hook %changelog * Thu Mar 24 2022 NVIDIA CORPORATION 1.10.0-0.1.rc.1 -- Dummy entry +- Add experimental option to NVIDIA Container Runtime * Fri Mar 18 2022 NVIDIA CORPORATION 1.9.0-1 - [libnvidia-container] Add additional check for Tegra in /sys/.../family file in CLI From 9be6cca6db0f132c49938fb4e666a7676143db10 Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Fri, 11 Mar 2022 13:26:23 +0200 Subject: [PATCH 03/20] Don't skip internal packages for linting Signed-off-by: Evan Lezar --- Makefile | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/Makefile b/Makefile index 18b05684..bc1a4e5f 100644 --- a/Makefile +++ b/Makefile @@ -90,11 +90,7 @@ ineffassign: lint: # We use `go list -f '{{.Dir}}' $(MODULE)/...` to skip the `vendor` folder. - go list -f '{{.Dir}}' $(MODULE)/... | grep -v /internal/ | xargs golint -set_exit_status - -lint-internal: -# We use `go list -f '{{.Dir}}' $(MODULE)/...` to skip the `vendor` folder. - go list -f '{{.Dir}}' $(MODULE)/internal/... | xargs golint -set_exit_status + go list -f '{{.Dir}}' $(MODULE)/... | xargs golint -set_exit_status misspell: misspell $(MODULE)/... From 7137f4b05ba25ecc5465c738955a29b57b5c653b Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Tue, 15 Mar 2022 17:27:34 +0200 Subject: [PATCH 04/20] Move runtime config to internal package Signed-off-by: Evan Lezar --- cmd/nvidia-container-runtime/main.go | 48 +-------- cmd/nvidia-container-runtime/main_test.go | 21 ---- .../runtime_factory.go | 3 +- .../runtime_factory_test.go | 5 +- internal/config/runtime.go | 89 +++++++++++++++++ internal/config/runtime_test.go | 98 +++++++++++++++++++ 6 files changed, 195 insertions(+), 69 deletions(-) create mode 100644 internal/config/runtime.go create mode 100644 internal/config/runtime_test.go diff --git a/cmd/nvidia-container-runtime/main.go b/cmd/nvidia-container-runtime/main.go index 6d103983..3bc6d36f 100644 --- a/cmd/nvidia-container-runtime/main.go +++ b/cmd/nvidia-container-runtime/main.go @@ -3,18 +3,8 @@ package main import ( "fmt" "os" - "path" - "github.com/pelletier/go-toml" -) - -const ( - configOverride = "XDG_CONFIG_HOME" - configFilePath = "nvidia-container-runtime/config.toml" -) - -var ( - configDir = "/etc/" + "github.com/NVIDIA/nvidia-container-toolkit/internal/config" ) var logger = NewLogger() @@ -30,12 +20,12 @@ func main() { // run is an entry point that allows for idiomatic handling of errors // when calling from the main function. func run(argv []string) (rerr error) { - cfg, err := getConfig() + cfg, err := config.GetRuntimeConfig() if err != nil { return fmt.Errorf("error loading config: %v", err) } - err = logger.LogToFile(cfg.debugFilePath) + err = logger.LogToFile(cfg.DebugFilePath) if err != nil { return fmt.Errorf("error opening debug log file: %v", err) } @@ -54,35 +44,3 @@ func run(argv []string) (rerr error) { return runtime.Exec(argv) } - -type config struct { - debugFilePath string - Experimental bool -} - -// getConfig sets up the config struct. Values are read from a toml file -// or set via the environment. -func getConfig() (*config, error) { - cfg := &config{} - - if XDGConfigDir := os.Getenv(configOverride); len(XDGConfigDir) != 0 { - configDir = XDGConfigDir - } - - configFilePath := path.Join(configDir, configFilePath) - - tomlContent, err := os.ReadFile(configFilePath) - if err != nil { - return nil, err - } - - toml, err := toml.Load(string(tomlContent)) - if err != nil { - return nil, err - } - - cfg.debugFilePath = toml.GetDefault("nvidia-container-runtime.debug", "/dev/null").(string) - cfg.Experimental = toml.GetDefault("nvidia-container-runtime.experimental", false).(bool) - - return cfg, nil -} diff --git a/cmd/nvidia-container-runtime/main_test.go b/cmd/nvidia-container-runtime/main_test.go index 0b576661..d6ba14ae 100644 --- a/cmd/nvidia-container-runtime/main_test.go +++ b/cmd/nvidia-container-runtime/main_test.go @@ -245,24 +245,3 @@ func nvidiaHookCount(hooks *specs.Hooks) int { } return count } - -func TestGetConfigWithCustomConfig(t *testing.T) { - wd, err := os.Getwd() - require.NoError(t, err) - - // By default debug is disabled - contents := []byte("[nvidia-container-runtime]\ndebug = \"/nvidia-container-toolkit.log\"") - testDir := filepath.Join(wd, "test") - filename := filepath.Join(testDir, configFilePath) - - os.Setenv(configOverride, testDir) - - require.NoError(t, os.MkdirAll(filepath.Dir(filename), 0766)) - require.NoError(t, ioutil.WriteFile(filename, contents, 0766)) - - defer func() { require.NoError(t, os.RemoveAll(testDir)) }() - - cfg, err := getConfig() - require.NoError(t, err) - require.Equal(t, cfg.debugFilePath, "/nvidia-container-toolkit.log") -} diff --git a/cmd/nvidia-container-runtime/runtime_factory.go b/cmd/nvidia-container-runtime/runtime_factory.go index 3b6dbef7..fd4c0960 100644 --- a/cmd/nvidia-container-runtime/runtime_factory.go +++ b/cmd/nvidia-container-runtime/runtime_factory.go @@ -20,6 +20,7 @@ import ( "fmt" "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-container-runtime/modifier" + "github.com/NVIDIA/nvidia-container-toolkit/internal/config" "github.com/NVIDIA/nvidia-container-toolkit/internal/oci" "github.com/NVIDIA/nvidia-container-toolkit/internal/runtime" "github.com/sirupsen/logrus" @@ -31,7 +32,7 @@ const ( ) // newNVIDIAContainerRuntime is a factory method that constructs a runtime based on the selected configuration and specified logger -func newNVIDIAContainerRuntime(logger *logrus.Logger, cfg *config, argv []string) (oci.Runtime, error) { +func newNVIDIAContainerRuntime(logger *logrus.Logger, cfg *config.RuntimeConfig, argv []string) (oci.Runtime, error) { ociSpec, err := oci.NewSpec(logger, argv) if err != nil { return nil, fmt.Errorf("error constructing OCI specification: %v", err) diff --git a/cmd/nvidia-container-runtime/runtime_factory_test.go b/cmd/nvidia-container-runtime/runtime_factory_test.go index b29e07bf..467e5efc 100644 --- a/cmd/nvidia-container-runtime/runtime_factory_test.go +++ b/cmd/nvidia-container-runtime/runtime_factory_test.go @@ -19,6 +19,7 @@ package main import ( "testing" + "github.com/NVIDIA/nvidia-container-toolkit/internal/config" testlog "github.com/sirupsen/logrus/hooks/test" "github.com/stretchr/testify/require" ) @@ -28,13 +29,13 @@ func TestFactoryMethod(t *testing.T) { testCases := []struct { description string - config config + config config.RuntimeConfig argv []string expectedError bool }{ { description: "empty config no error", - config: config{}, + config: config.RuntimeConfig{}, }, } diff --git a/internal/config/runtime.go b/internal/config/runtime.go new file mode 100644 index 00000000..ca2d1565 --- /dev/null +++ b/internal/config/runtime.go @@ -0,0 +1,89 @@ +/** +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package config + +import ( + "fmt" + "io" + "os" + "path" + + "github.com/pelletier/go-toml" +) + +const ( + configOverride = "XDG_CONFIG_HOME" + configFilePath = "nvidia-container-runtime/config.toml" +) + +var ( + configDir = "/etc/" +) + +// RuntimeConfig stores the config options for the NVIDIA Container Runtime +type RuntimeConfig struct { + DebugFilePath string + Experimental bool +} + +// GetRuntimeConfig sets up the config struct. Values are read from a toml file +// or set via the environment. +func GetRuntimeConfig() (*RuntimeConfig, error) { + if XDGConfigDir := os.Getenv(configOverride); len(XDGConfigDir) != 0 { + configDir = XDGConfigDir + } + + configFilePath := path.Join(configDir, configFilePath) + + tomlFile, err := os.Open(configFilePath) + if err != nil { + return nil, fmt.Errorf("failed to open config file %v: %v", configFilePath, err) + } + defer tomlFile.Close() + + cfg, err := getRuntimeConfigFrom(tomlFile) + if err != nil { + return nil, fmt.Errorf("failed to read config values: %v", err) + } + + return cfg, nil +} + +// getRuntimeConfigFrom reads the config from the specified Reader +func getRuntimeConfigFrom(reader io.Reader) (*RuntimeConfig, error) { + toml, err := toml.LoadReader(reader) + if err != nil { + return nil, err + } + + cfg := getDefaultRuntimeConfig() + + cfg.DebugFilePath = toml.GetDefault("nvidia-container-runtime.debug", cfg.DebugFilePath).(string) + cfg.Experimental = toml.GetDefault("nvidia-container-runtime.experimental", cfg.Experimental).(bool) + + return cfg, nil +} + +// getDefaultRuntimeConfig defines the default values for the config +func getDefaultRuntimeConfig() *RuntimeConfig { + c := RuntimeConfig{ + DebugFilePath: "/dev/null", + Experimental: false, + } + + return &c +} diff --git a/internal/config/runtime_test.go b/internal/config/runtime_test.go new file mode 100644 index 00000000..91f1b3a5 --- /dev/null +++ b/internal/config/runtime_test.go @@ -0,0 +1,98 @@ +/** +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package config + +import ( + "io/ioutil" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestGerRuntimeConfigWithCustomConfig(t *testing.T) { + wd, err := os.Getwd() + require.NoError(t, err) + + // By default debug is disabled + contents := []byte("[nvidia-container-runtime]\ndebug = \"/nvidia-container-toolkit.log\"") + testDir := filepath.Join(wd, "test") + filename := filepath.Join(testDir, configFilePath) + + os.Setenv(configOverride, testDir) + + require.NoError(t, os.MkdirAll(filepath.Dir(filename), 0766)) + require.NoError(t, ioutil.WriteFile(filename, contents, 0766)) + + defer func() { require.NoError(t, os.RemoveAll(testDir)) }() + + cfg, err := GetRuntimeConfig() + require.NoError(t, err) + require.Equal(t, cfg.DebugFilePath, "/nvidia-container-toolkit.log") +} + +func TestGerRuntimeConfig(t *testing.T) { + testCases := []struct { + description string + contents []string + expectedError error + expectedConfig *RuntimeConfig + }{ + { + description: "empty config is default", + expectedConfig: &RuntimeConfig{ + DebugFilePath: "/dev/null", + }, + }, + { + description: "config options set inline", + contents: []string{ + "nvidia-container-runtime.debug = \"/foo/bar\"", + }, + expectedConfig: &RuntimeConfig{ + DebugFilePath: "/foo/bar", + }, + }, + { + description: "config options set in section", + contents: []string{ + "[nvidia-container-runtime]", + "debug = \"/foo/bar\"", + }, + expectedConfig: &RuntimeConfig{ + DebugFilePath: "/foo/bar", + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.description, func(t *testing.T) { + reader := strings.NewReader(strings.Join(tc.contents, "\n")) + + cfg, err := getRuntimeConfigFrom(reader) + if tc.expectedError != nil { + require.Error(t, err) + } else { + require.NoError(t, err) + } + + require.EqualValues(t, tc.expectedConfig, cfg) + }) + } +} From 390e5747ea5fa388505fdb1024c99505f5cafc0d Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Wed, 9 Mar 2022 17:51:51 +0200 Subject: [PATCH 05/20] Add lookup abstraction for locating executable files Signed-off-by: Evan Lezar --- internal/lookup/file.go | 85 ++++++++++++++++++++++++++++++ internal/lookup/locator.go | 24 +++++++++ internal/lookup/locator_mock.go | 77 +++++++++++++++++++++++++++ internal/lookup/path.go | 93 +++++++++++++++++++++++++++++++++ 4 files changed, 279 insertions(+) create mode 100644 internal/lookup/file.go create mode 100644 internal/lookup/locator.go create mode 100644 internal/lookup/locator_mock.go create mode 100644 internal/lookup/path.go diff --git a/internal/lookup/file.go b/internal/lookup/file.go new file mode 100644 index 00000000..c9008f93 --- /dev/null +++ b/internal/lookup/file.go @@ -0,0 +1,85 @@ +/* +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +*/ + +package lookup + +import ( + "fmt" + "os" + "path/filepath" + + log "github.com/sirupsen/logrus" +) + +// file can be used to locate file (or file-like elements) at a specified set of +// prefixes. The validity of a file is determined by a filter function. +type file struct { + logger *log.Logger + prefixes []string + filter func(string) error +} + +// NewFileLocator creates a Locator that can be used to find files at the specified root. A logger +// can also be specified. +func NewFileLocator(logger *log.Logger, root string) Locator { + l := newFileLocator(logger, root) + + return &l +} + +func newFileLocator(logger *log.Logger, root string) file { + return file{ + logger: logger, + prefixes: []string{root}, + filter: assertFile, + } +} + +var _ Locator = (*file)(nil) + +// Locate attempts to find the specified file. All prefixes are searched and any matching +// candidates are returned. If no matches are found, an error is returned. +func (p file) Locate(filename string) ([]string, error) { + var filenames []string + for _, prefix := range p.prefixes { + candidate := filepath.Join(prefix, filename) + p.logger.Debugf("Checking candidate '%v'", candidate) + err := p.filter(candidate) + if err != nil { + p.logger.Debugf("Candidate '%v' does not meet requirements: %v", candidate, err) + continue + } + filenames = append(filenames, candidate) + } + if len(filename) == 0 { + return nil, fmt.Errorf("file %v not found", filename) + } + return filenames, nil +} + +// assertFile checks whether the specified path is a regular file +func assertFile(filename string) error { + info, err := os.Stat(filename) + if err != nil { + return fmt.Errorf("error getting info for %v: %v", filename, err) + } + + if info.IsDir() { + return fmt.Errorf("specified path '%v' is a directory", filename) + } + + return nil +} diff --git a/internal/lookup/locator.go b/internal/lookup/locator.go new file mode 100644 index 00000000..871e1b02 --- /dev/null +++ b/internal/lookup/locator.go @@ -0,0 +1,24 @@ +/* +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +*/ + +package lookup + +//go:generate moq -stub -out locator_mock.go . Locator + +// Locator defines the interface for locating files on a system. +type Locator interface { + Locate(string) ([]string, error) +} diff --git a/internal/lookup/locator_mock.go b/internal/lookup/locator_mock.go new file mode 100644 index 00000000..0c50f345 --- /dev/null +++ b/internal/lookup/locator_mock.go @@ -0,0 +1,77 @@ +// Code generated by moq; DO NOT EDIT. +// github.com/matryer/moq + +package lookup + +import ( + "sync" +) + +// Ensure, that LocatorMock does implement Locator. +// If this is not the case, regenerate this file with moq. +var _ Locator = &LocatorMock{} + +// LocatorMock is a mock implementation of Locator. +// +// func TestSomethingThatUsesLocator(t *testing.T) { +// +// // make and configure a mocked Locator +// mockedLocator := &LocatorMock{ +// LocateFunc: func(s string) ([]string, error) { +// panic("mock out the Locate method") +// }, +// } +// +// // use mockedLocator in code that requires Locator +// // and then make assertions. +// +// } +type LocatorMock struct { + // LocateFunc mocks the Locate method. + LocateFunc func(s string) ([]string, error) + + // calls tracks calls to the methods. + calls struct { + // Locate holds details about calls to the Locate method. + Locate []struct { + // S is the s argument value. + S string + } + } + lockLocate sync.RWMutex +} + +// Locate calls LocateFunc. +func (mock *LocatorMock) Locate(s string) ([]string, error) { + callInfo := struct { + S string + }{ + S: s, + } + mock.lockLocate.Lock() + mock.calls.Locate = append(mock.calls.Locate, callInfo) + mock.lockLocate.Unlock() + if mock.LocateFunc == nil { + var ( + stringsOut []string + errOut error + ) + return stringsOut, errOut + } + return mock.LocateFunc(s) +} + +// LocateCalls gets all the calls that were made to Locate. +// Check the length with: +// len(mockedLocator.LocateCalls()) +func (mock *LocatorMock) LocateCalls() []struct { + S string +} { + var calls []struct { + S string + } + mock.lockLocate.RLock() + calls = mock.calls.Locate + mock.lockLocate.RUnlock() + return calls +} diff --git a/internal/lookup/path.go b/internal/lookup/path.go new file mode 100644 index 00000000..9dffa4c5 --- /dev/null +++ b/internal/lookup/path.go @@ -0,0 +1,93 @@ +/* +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +*/ + +package lookup + +import ( + "fmt" + "os" + "path/filepath" + "strings" + + log "github.com/sirupsen/logrus" +) + +const ( + envPath = "PATH" +) + +var defaultPaths = []string{"/usr/local/sbin", "/usr/local/bin", "/usr/sbin", "/usr/bin", "/sbin", "/bin"} + +type path struct { + file +} + +// NewPathLocator creates a locator to fine executable files in the path. A logger can also be specified. +func NewPathLocator(logger *log.Logger, root string) Locator { + pathEnv := os.Getenv(envPath) + paths := filepath.SplitList(pathEnv) + + if root != "" { + paths = append(paths, defaultPaths...) + } + + var prefixes []string + for _, dir := range paths { + prefixes = append(prefixes, filepath.Join(root, dir)) + } + l := path{ + file: file{ + logger: logger, + prefixes: prefixes, + filter: assertExecutable, + }, + } + return &l +} + +var _ Locator = (*path)(nil) + +// Locate finds executable files in the path. If a relative or absolute path is specified, the prefix paths are not considered. +func (p path) Locate(filename string) ([]string, error) { + // For absolute paths we ensure that it is executable + if strings.Contains(filename, "/") { + err := assertExecutable(filename) + if err != nil { + return nil, fmt.Errorf("absolute path %v is not an executable file: %v", filename, err) + } + return []string{filename}, nil + } + + return p.file.Locate(filename) +} + +// assertExecutable checks whether the specified path is an execuable file. +func assertExecutable(filename string) error { + err := assertFile(filename) + if err != nil { + return err + } + info, err := os.Stat(filename) + if err != nil { + return err + } + + if info.Mode()&0111 == 0 { + return fmt.Errorf("specified file '%v' is not executable", filename) + } + + return nil +} From 9dfe60b8b75806e21243ff1260e3d470ce142cc1 Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Tue, 15 Mar 2022 17:28:42 +0200 Subject: [PATCH 06/20] Add stable discoverer for nvidia-container-runtime hook Signed-off-by: Evan Lezar --- internal/discover/discover.go | 30 +++++++++++++ internal/discover/discover_mock.go | 70 +++++++++++++++++++++++++++++ internal/discover/stable.go | 71 ++++++++++++++++++++++++++++++ 3 files changed, 171 insertions(+) create mode 100644 internal/discover/discover.go create mode 100644 internal/discover/discover_mock.go create mode 100644 internal/discover/stable.go diff --git a/internal/discover/discover.go b/internal/discover/discover.go new file mode 100644 index 00000000..4858b282 --- /dev/null +++ b/internal/discover/discover.go @@ -0,0 +1,30 @@ +/* +# Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +*/ + +package discover + +// Hook represents a discovered hook. +type Hook struct { + Lifecycle string + Path string + Args []string +} + +//go:generate moq -stub -out discover_mock.go . Discover +// Discover defines an interface for discovering the devices, mounts, and hooks available on a system +type Discover interface { + Hooks() ([]Hook, error) +} diff --git a/internal/discover/discover_mock.go b/internal/discover/discover_mock.go new file mode 100644 index 00000000..0a0f211f --- /dev/null +++ b/internal/discover/discover_mock.go @@ -0,0 +1,70 @@ +// Code generated by moq; DO NOT EDIT. +// github.com/matryer/moq + +package discover + +import ( + "sync" +) + +// Ensure, that DiscoverMock does implement Discover. +// If this is not the case, regenerate this file with moq. +var _ Discover = &DiscoverMock{} + +// DiscoverMock is a mock implementation of Discover. +// +// func TestSomethingThatUsesDiscover(t *testing.T) { +// +// // make and configure a mocked Discover +// mockedDiscover := &DiscoverMock{ +// HooksFunc: func() ([]Hook, error) { +// panic("mock out the Hooks method") +// }, +// } +// +// // use mockedDiscover in code that requires Discover +// // and then make assertions. +// +// } +type DiscoverMock struct { + // HooksFunc mocks the Hooks method. + HooksFunc func() ([]Hook, error) + + // calls tracks calls to the methods. + calls struct { + // Hooks holds details about calls to the Hooks method. + Hooks []struct { + } + } + lockHooks sync.RWMutex +} + +// Hooks calls HooksFunc. +func (mock *DiscoverMock) Hooks() ([]Hook, error) { + callInfo := struct { + }{} + mock.lockHooks.Lock() + mock.calls.Hooks = append(mock.calls.Hooks, callInfo) + mock.lockHooks.Unlock() + if mock.HooksFunc == nil { + var ( + hooksOut []Hook + errOut error + ) + return hooksOut, errOut + } + return mock.HooksFunc() +} + +// HooksCalls gets all the calls that were made to Hooks. +// Check the length with: +// len(mockedDiscover.HooksCalls()) +func (mock *DiscoverMock) HooksCalls() []struct { +} { + var calls []struct { + } + mock.lockHooks.RLock() + calls = mock.calls.Hooks + mock.lockHooks.RUnlock() + return calls +} diff --git a/internal/discover/stable.go b/internal/discover/stable.go new file mode 100644 index 00000000..d5273002 --- /dev/null +++ b/internal/discover/stable.go @@ -0,0 +1,71 @@ +/** +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package discover + +import ( + "github.com/NVIDIA/nvidia-container-toolkit/internal/lookup" + "github.com/container-orchestrated-devices/container-device-interface/pkg/cdi" + "github.com/sirupsen/logrus" +) + +type stable struct { + logger *logrus.Logger + lookup lookup.Locator +} + +const ( + nvidiaContainerRuntimeHookExecuable = "nvidia-container-runtime-hook" + hookDefaultFilePath = "/usr/bin/nvidia-container-runtime-hook" +) + +var _ Discover = (*stable)(nil) + +// NewStableDiscoverer creates a discoverer for the stable runtime +func NewStableDiscoverer(logger *logrus.Logger, root string) (Discover, error) { + d := stable{ + logger: logger, + lookup: lookup.NewPathLocator(logger, root), + } + + return &d, nil +} + +// Hooks returns the "stable" NVIDIA Container Runtime hook +func (d stable) Hooks() ([]Hook, error) { + var hooks []Hook + + hookPath := hookDefaultFilePath + targets, err := d.lookup.Locate(nvidiaContainerRuntimeHookExecuable) + if err != nil { + d.logger.Warnf("Failed to locate %v: %v", nvidiaContainerRuntimeHookExecuable, err) + } else if len(targets) == 0 { + d.logger.Warnf("%v not found", nvidiaContainerRuntimeHookExecuable) + } else { + d.logger.Debugf("Found %v candidates: %v", nvidiaContainerRuntimeHookExecuable, targets) + hookPath = targets[0] + } + d.logger.Debugf("Using NVIDIA Container Runtime Hook path %v", hookPath) + + args := []string{hookPath, "prestart"} + legacyHook := Hook{ + Lifecycle: cdi.PrestartHook, + Path: hookPath, + Args: args, + } + hooks = append(hooks, legacyHook) + return hooks, nil +} From 239b6d3739d7ffeca34f0cb832fd570e8203a440 Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Tue, 15 Mar 2022 14:29:14 +0200 Subject: [PATCH 07/20] Implement experimental modifier for NVIDIA Container Runtime This change enables the experimental mode of the NVIDIA Container Runtime. If enabled, the nvidia-container-runtime.discover-mode config option is queried to determine how required OCI spec modifications should be defined. If "legacy" is selected, the existing NVIDIA Container Runtime hooks is discovered and injected into the OCI spec. Signed-off-by: Evan Lezar --- cmd/nvidia-container-runtime/main.go | 2 +- .../modifier/experimental.go | 119 ++++++++ .../modifier/experimental_test.go | 258 ++++++++++++++++++ .../modifier/stable.go | 11 +- .../runtime_factory.go | 8 +- internal/config/runtime.go | 3 + internal/config/runtime_test.go | 10 + internal/discover/{stable.go => legacy.go} | 28 +- internal/edits/edits.go | 66 +++++ internal/edits/hook.go | 46 ++++ 10 files changed, 531 insertions(+), 20 deletions(-) create mode 100644 cmd/nvidia-container-runtime/modifier/experimental.go create mode 100644 cmd/nvidia-container-runtime/modifier/experimental_test.go rename internal/discover/{stable.go => legacy.go} (71%) create mode 100644 internal/edits/edits.go create mode 100644 internal/edits/hook.go diff --git a/cmd/nvidia-container-runtime/main.go b/cmd/nvidia-container-runtime/main.go index 3bc6d36f..ac2f0f84 100644 --- a/cmd/nvidia-container-runtime/main.go +++ b/cmd/nvidia-container-runtime/main.go @@ -39,7 +39,7 @@ func run(argv []string) (rerr error) { runtime, err := newNVIDIAContainerRuntime(logger.Logger, cfg, argv) if err != nil { - return fmt.Errorf("error creating runtime: %v", err) + return fmt.Errorf("failed to create NVIDIA Container Runtime: %v", err) } return runtime.Exec(argv) diff --git a/cmd/nvidia-container-runtime/modifier/experimental.go b/cmd/nvidia-container-runtime/modifier/experimental.go new file mode 100644 index 00000000..bc39cd6a --- /dev/null +++ b/cmd/nvidia-container-runtime/modifier/experimental.go @@ -0,0 +1,119 @@ +/** +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package modifier + +import ( + "fmt" + "path/filepath" + + "github.com/NVIDIA/nvidia-container-toolkit/internal/config" + "github.com/NVIDIA/nvidia-container-toolkit/internal/discover" + "github.com/NVIDIA/nvidia-container-toolkit/internal/edits" + "github.com/NVIDIA/nvidia-container-toolkit/internal/oci" + "github.com/opencontainers/runtime-spec/specs-go" + "github.com/sirupsen/logrus" +) + +// experiemental represents the modifications required by the experimental runtime +type experimental struct { + logger *logrus.Logger + discoverer discover.Discover +} + +// NewExperimentalModifier creates a modifier that applied the experimental +// modications to an OCI spec if required by the runtime wrapper. +func NewExperimentalModifier(logger *logrus.Logger, cfg *config.RuntimeConfig) (oci.SpecModifier, error) { + logger.Infof("Constructing modifier from config: %+v", cfg) + + // TODO: We need to specify the root + root := "" + + var d discover.Discover + switch cfg.DiscoverMode { + case "legacy": + legacyDiscoverer, err := discover.NewLegacyDiscoverer(logger, root) + if err != nil { + return nil, fmt.Errorf("failed to create legacy discoverer: %v", err) + } + d = legacyDiscoverer + default: + return nil, fmt.Errorf("invalid discover mode: %v", cfg.DiscoverMode) + } + + return newExperimentalModifierFromDiscoverer(logger, d) +} + +// newExperimentalModifierFromDiscoverer created a modifier that aplies the discovered +// modifications to an OCI spec if require by the runtime wrapper. +func newExperimentalModifierFromDiscoverer(logger *logrus.Logger, d discover.Discover) (oci.SpecModifier, error) { + m := experimental{ + logger: logger, + discoverer: d, + } + return &m, nil +} + +// Modify applies the required modifications to the incomming OCI spec. These modifications +// are applied in-place. +func (m experimental) Modify(spec *specs.Spec) error { + err := m.assertSpecIsCompatible(spec) + if err != nil { + return fmt.Errorf("OCI specification cannot be modified: %v", err) + } + + specEdits, err := edits.NewSpecEdits(m.logger, m.discoverer) + if err != nil { + return fmt.Errorf("failed to get required container edits: %v", err) + } + + return specEdits.Modify(spec) +} + +func (m experimental) assertSpecIsCompatible(spec *specs.Spec) error { + if spec == nil { + return nil + } + + if spec.Hooks == nil { + return nil + } + + if hookPath := findStableHook(spec.Hooks.Prestart); hookPath != "" { + return fmt.Errorf("spec already contains required 'prestart' hook: %v", hookPath) + } + + return nil +} + +// findStableHook checks the list of OCI hooks for the nvidia-container-runtime-hook +// or nvidia-container-toolkit hook. These are included, for example, by the non-experimental +// nvidia-container-runtime or docker when specifying the --gpus flag. +func findStableHook(hooks []specs.Hook) string { + lookFor := map[string]bool{ + nvidiaContainerRuntimeHookExecuable: true, + nvidiaContainerToolkitExecutable: true, + } + + for _, h := range hooks { + base := filepath.Base(h.Path) + if lookFor[base] { + return h.Path + } + } + + return "" +} diff --git a/cmd/nvidia-container-runtime/modifier/experimental_test.go b/cmd/nvidia-container-runtime/modifier/experimental_test.go new file mode 100644 index 00000000..07b67e8a --- /dev/null +++ b/cmd/nvidia-container-runtime/modifier/experimental_test.go @@ -0,0 +1,258 @@ +/** +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package modifier + +import ( + "fmt" + "testing" + + "github.com/NVIDIA/nvidia-container-toolkit/internal/config" + "github.com/NVIDIA/nvidia-container-toolkit/internal/discover" + "github.com/opencontainers/runtime-spec/specs-go" + testlog "github.com/sirupsen/logrus/hooks/test" + "github.com/stretchr/testify/require" +) + +func TestConstructor(t *testing.T) { + logger, _ := testlog.NewNullLogger() + + testCases := []struct { + description string + cfg *config.RuntimeConfig + expectedError error + }{ + { + description: "empty config raises error", + cfg: &config.RuntimeConfig{}, + expectedError: fmt.Errorf("invalid discover mode"), + }, + { + description: "non-legacy discover mode raises error", + cfg: &config.RuntimeConfig{ + DiscoverMode: "non-legacy", + }, + expectedError: fmt.Errorf("invalid discover mode"), + }, + { + description: "legacy discover mode returns modifier", + cfg: &config.RuntimeConfig{ + DiscoverMode: "legacy", + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.description, func(t *testing.T) { + _, err := NewExperimentalModifier(logger, tc.cfg) + if tc.expectedError != nil { + require.Error(t, err) + } else { + require.NoError(t, err) + } + }) + } +} + +func TestExperimentalModifier(t *testing.T) { + logger, _ := testlog.NewNullLogger() + + testCases := []struct { + description string + discover *discover.DiscoverMock + spec *specs.Spec + expectedError error + expectedSpec *specs.Spec + }{ + { + description: "empty discoverer does not modify spec", + discover: &discover.DiscoverMock{}, + }, + { + description: "failed hooks discoverer returns error", + discover: &discover.DiscoverMock{ + HooksFunc: func() ([]discover.Hook, error) { + return nil, fmt.Errorf("discover.Hooks error") + }, + }, + expectedError: fmt.Errorf("discover.Hooks error"), + }, + { + description: "discovered hooks are injected into spec", + spec: &specs.Spec{}, + discover: &discover.DiscoverMock{ + HooksFunc: func() ([]discover.Hook, error) { + hooks := []discover.Hook{ + { + Lifecycle: "prestart", + Path: "/hook/a", + Args: []string{"/hook/a", "arga"}, + }, + { + Lifecycle: "createContainer", + Path: "/hook/b", + Args: []string{"/hook/b", "argb"}, + }, + } + return hooks, nil + }, + }, + expectedSpec: &specs.Spec{ + Hooks: &specs.Hooks{ + Prestart: []specs.Hook{ + { + Path: "/hook/a", + Args: []string{"/hook/a", "arga"}, + }, + }, + CreateContainer: []specs.Hook{ + { + Path: "/hook/b", + Args: []string{"/hook/b", "argb"}, + }, + }, + }, + }, + }, + { + description: "existing hooks are maintained", + spec: &specs.Spec{ + Hooks: &specs.Hooks{ + Prestart: []specs.Hook{ + { + Path: "/hook/a", + Args: []string{"/hook/a", "arga"}, + }, + }, + }, + }, + discover: &discover.DiscoverMock{ + HooksFunc: func() ([]discover.Hook, error) { + hooks := []discover.Hook{ + { + Lifecycle: "prestart", + Path: "/hook/b", + Args: []string{"/hook/b", "argb"}, + }, + } + return hooks, nil + }, + }, + expectedSpec: &specs.Spec{ + Hooks: &specs.Hooks{ + Prestart: []specs.Hook{ + { + Path: "/hook/a", + Args: []string{"/hook/a", "arga"}, + }, + { + Path: "/hook/b", + Args: []string{"/hook/b", "argb"}, + }, + }, + }, + }, + }, + { + description: "modification fails for existing nvidia-container-runtime-hook", + spec: &specs.Spec{ + Hooks: &specs.Hooks{ + Prestart: []specs.Hook{ + { + Path: "/path/to/nvidia-container-runtime-hook", + Args: []string{"/path/to/nvidia-container-runtime-hook", "prestart"}, + }, + }, + }, + }, + discover: &discover.DiscoverMock{ + HooksFunc: func() ([]discover.Hook, error) { + hooks := []discover.Hook{ + { + Lifecycle: "prestart", + Path: "/hook/b", + Args: []string{"/hook/b", "argb"}, + }, + } + return hooks, nil + }, + }, + expectedError: fmt.Errorf("nvidia-container-runtime-hook already exists"), + expectedSpec: &specs.Spec{ + Hooks: &specs.Hooks{ + Prestart: []specs.Hook{ + { + Path: "/path/to/nvidia-container-runtime-hook", + Args: []string{"/path/to/nvidia-container-runtime-hook", "prestart"}, + }, + }, + }, + }, + }, + { + description: "modification fails for existing nvidia-container-toolkit", + spec: &specs.Spec{ + Hooks: &specs.Hooks{ + Prestart: []specs.Hook{ + { + Path: "/path/to/nvidia-container-toolkit", + Args: []string{"/path/to/nvidia-container-toolkit", "prestart"}, + }, + }, + }, + }, + discover: &discover.DiscoverMock{ + HooksFunc: func() ([]discover.Hook, error) { + hooks := []discover.Hook{ + { + Lifecycle: "prestart", + Path: "/hook/b", + Args: []string{"/hook/b", "argb"}, + }, + } + return hooks, nil + }, + }, + expectedError: fmt.Errorf("nvidia-container-toolkit already exists"), + expectedSpec: &specs.Spec{ + Hooks: &specs.Hooks{ + Prestart: []specs.Hook{ + { + Path: "/path/to/nvidia-container-toolkit", + Args: []string{"/path/to/nvidia-container-toolkit", "prestart"}, + }, + }, + }, + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.description, func(t *testing.T) { + m, err := newExperimentalModifierFromDiscoverer(logger, tc.discover) + require.NoError(t, err) + + err = m.Modify(tc.spec) + if tc.expectedError != nil { + require.Error(t, err) + } else { + require.NoError(t, err) + } + + require.EqualValues(t, tc.expectedSpec, tc.spec) + }) + } +} diff --git a/cmd/nvidia-container-runtime/modifier/stable.go b/cmd/nvidia-container-runtime/modifier/stable.go index 37dfa1e7..9d955632 100644 --- a/cmd/nvidia-container-runtime/modifier/stable.go +++ b/cmd/nvidia-container-runtime/modifier/stable.go @@ -27,7 +27,10 @@ import ( ) const ( - hookDefaultFilePath = "/usr/bin/nvidia-container-runtime-hook" + nvidiaContainerRuntimeHookExecuable = "nvidia-container-runtime-hook" + nvidiaContainerRuntimeHookDefaultPath = "/usr/bin/nvidia-container-runtime-hook" + + nvidiaContainerToolkitExecutable = "nvidia-container-toolkit" ) // NewStableRuntimeModifier creates an OCI spec modifier that inserts the NVIDIA Container Runtime Hook into an OCI @@ -47,9 +50,9 @@ type stableRuntimeModifier struct { // Modify applies the required modification to the incoming OCI spec, inserting the nvidia-container-runtime-hook // as a prestart hook. func (m stableRuntimeModifier) Modify(spec *specs.Spec) error { - path, err := exec.LookPath("nvidia-container-runtime-hook") + path, err := exec.LookPath(nvidiaContainerRuntimeHookExecuable) if err != nil { - path = hookDefaultFilePath + path = nvidiaContainerRuntimeHookDefaultPath _, err = os.Stat(path) if err != nil { return err @@ -63,7 +66,7 @@ func (m stableRuntimeModifier) Modify(spec *specs.Spec) error { spec.Hooks = &specs.Hooks{} } else if len(spec.Hooks.Prestart) != 0 { for _, hook := range spec.Hooks.Prestart { - if strings.Contains(hook.Path, "nvidia-container-runtime-hook") { + if strings.Contains(hook.Path, nvidiaContainerRuntimeHookExecuable) { m.logger.Infof("existing nvidia prestart hook found in OCI spec") return nil } diff --git a/cmd/nvidia-container-runtime/runtime_factory.go b/cmd/nvidia-container-runtime/runtime_factory.go index fd4c0960..c40f873e 100644 --- a/cmd/nvidia-container-runtime/runtime_factory.go +++ b/cmd/nvidia-container-runtime/runtime_factory.go @@ -46,9 +46,13 @@ func newNVIDIAContainerRuntime(logger *logrus.Logger, cfg *config.RuntimeConfig, var specModifier oci.SpecModifier if cfg.Experimental { - return nil, fmt.Errorf("experimental mode is not supported") + specModifier, err = modifier.NewExperimentalModifier(logger, cfg) + if err != nil { + return nil, fmt.Errorf("failed to construct experimental modifier: %v", err) + } + } else { + specModifier = modifier.NewStableRuntimeModifier(logger) } - specModifier = modifier.NewStableRuntimeModifier(logger) // Create the wrapping runtime with the specified modifier r := runtime.NewModifyingRuntimeWrapper( diff --git a/internal/config/runtime.go b/internal/config/runtime.go index ca2d1565..4b197b74 100644 --- a/internal/config/runtime.go +++ b/internal/config/runtime.go @@ -38,6 +38,7 @@ var ( type RuntimeConfig struct { DebugFilePath string Experimental bool + DiscoverMode string } // GetRuntimeConfig sets up the config struct. Values are read from a toml file @@ -74,6 +75,7 @@ func getRuntimeConfigFrom(reader io.Reader) (*RuntimeConfig, error) { cfg.DebugFilePath = toml.GetDefault("nvidia-container-runtime.debug", cfg.DebugFilePath).(string) cfg.Experimental = toml.GetDefault("nvidia-container-runtime.experimental", cfg.Experimental).(bool) + cfg.DiscoverMode = toml.GetDefault("nvidia-container-runtime.discover-mode", cfg.DiscoverMode).(string) return cfg, nil } @@ -83,6 +85,7 @@ func getDefaultRuntimeConfig() *RuntimeConfig { c := RuntimeConfig{ DebugFilePath: "/dev/null", Experimental: false, + DiscoverMode: "legacy", } return &c diff --git a/internal/config/runtime_test.go b/internal/config/runtime_test.go index 91f1b3a5..244d0f66 100644 --- a/internal/config/runtime_test.go +++ b/internal/config/runtime_test.go @@ -58,15 +58,21 @@ func TestGerRuntimeConfig(t *testing.T) { description: "empty config is default", expectedConfig: &RuntimeConfig{ DebugFilePath: "/dev/null", + Experimental: false, + DiscoverMode: "legacy", }, }, { description: "config options set inline", contents: []string{ "nvidia-container-runtime.debug = \"/foo/bar\"", + "nvidia-container-runtime.experimental = true", + "nvidia-container-runtime.discover-mode = \"not-legacy\"", }, expectedConfig: &RuntimeConfig{ DebugFilePath: "/foo/bar", + Experimental: true, + DiscoverMode: "not-legacy", }, }, { @@ -74,9 +80,13 @@ func TestGerRuntimeConfig(t *testing.T) { contents: []string{ "[nvidia-container-runtime]", "debug = \"/foo/bar\"", + "experimental = true", + "discover-mode = \"not-legacy\"", }, expectedConfig: &RuntimeConfig{ DebugFilePath: "/foo/bar", + Experimental: true, + DiscoverMode: "not-legacy", }, }, } diff --git a/internal/discover/stable.go b/internal/discover/legacy.go similarity index 71% rename from internal/discover/stable.go rename to internal/discover/legacy.go index d5273002..6fb74139 100644 --- a/internal/discover/stable.go +++ b/internal/discover/legacy.go @@ -22,21 +22,21 @@ import ( "github.com/sirupsen/logrus" ) -type stable struct { +type legacy struct { logger *logrus.Logger lookup lookup.Locator } const ( - nvidiaContainerRuntimeHookExecuable = "nvidia-container-runtime-hook" - hookDefaultFilePath = "/usr/bin/nvidia-container-runtime-hook" + nvidiaContainerRuntimeHookExecutable = "nvidia-container-runtime-hook" + hookDefaultFilePath = "/usr/bin/nvidia-container-runtime-hook" ) -var _ Discover = (*stable)(nil) +var _ Discover = (*legacy)(nil) -// NewStableDiscoverer creates a discoverer for the stable runtime -func NewStableDiscoverer(logger *logrus.Logger, root string) (Discover, error) { - d := stable{ +// NewLegacyDiscoverer creates a discoverer for the legacy runtime +func NewLegacyDiscoverer(logger *logrus.Logger, root string) (Discover, error) { + d := legacy{ logger: logger, lookup: lookup.NewPathLocator(logger, root), } @@ -44,18 +44,20 @@ func NewStableDiscoverer(logger *logrus.Logger, root string) (Discover, error) { return &d, nil } -// Hooks returns the "stable" NVIDIA Container Runtime hook -func (d stable) Hooks() ([]Hook, error) { +// Hooks returns the "legacy" NVIDIA Container Runtime hook. This hook calls out +// to the nvidia-container-cli to make modifications to the container as defined +// in libnvidia-container. +func (d legacy) Hooks() ([]Hook, error) { var hooks []Hook hookPath := hookDefaultFilePath - targets, err := d.lookup.Locate(nvidiaContainerRuntimeHookExecuable) + targets, err := d.lookup.Locate(nvidiaContainerRuntimeHookExecutable) if err != nil { - d.logger.Warnf("Failed to locate %v: %v", nvidiaContainerRuntimeHookExecuable, err) + d.logger.Warnf("Failed to locate %v: %v", nvidiaContainerRuntimeHookExecutable, err) } else if len(targets) == 0 { - d.logger.Warnf("%v not found", nvidiaContainerRuntimeHookExecuable) + d.logger.Warnf("%v not found", nvidiaContainerRuntimeHookExecutable) } else { - d.logger.Debugf("Found %v candidates: %v", nvidiaContainerRuntimeHookExecuable, targets) + d.logger.Debugf("Found %v candidates: %v", nvidiaContainerRuntimeHookExecutable, targets) hookPath = targets[0] } d.logger.Debugf("Using NVIDIA Container Runtime Hook path %v", hookPath) diff --git a/internal/edits/edits.go b/internal/edits/edits.go new file mode 100644 index 00000000..36fecd81 --- /dev/null +++ b/internal/edits/edits.go @@ -0,0 +1,66 @@ +/** +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package edits + +import ( + "fmt" + + "github.com/NVIDIA/nvidia-container-toolkit/internal/discover" + "github.com/NVIDIA/nvidia-container-toolkit/internal/oci" + "github.com/container-orchestrated-devices/container-device-interface/pkg/cdi" + ociSpecs "github.com/opencontainers/runtime-spec/specs-go" + "github.com/sirupsen/logrus" +) + +type edits struct { + cdi.ContainerEdits + logger *logrus.Logger +} + +// NewSpecEdits creates a SpecModifier that defines the required OCI spec edits (as CDI ContainerEdits) from the specified +// discoverer. +func NewSpecEdits(logger *logrus.Logger, d discover.Discover) (oci.SpecModifier, error) { + hooks, err := d.Hooks() + if err != nil { + return nil, fmt.Errorf("failed to discover hooks: %v", err) + } + + c := cdi.ContainerEdits{} + for _, h := range hooks { + c.Append(hook(h).toEdits()) + } + + e := edits{ + ContainerEdits: c, + logger: logger, + } + + return &e, nil +} + +// Modify applies the defined edits to the incoming OCI spec +func (e *edits) Modify(spec *ociSpecs.Spec) error { + if e == nil || e.ContainerEdits.ContainerEdits == nil { + return nil + } + + e.logger.Infof("Hooks:") + for _, hook := range e.Hooks { + e.logger.Infof("Injecting %v", hook.Args) + } + return e.Apply(spec) +} diff --git a/internal/edits/hook.go b/internal/edits/hook.go new file mode 100644 index 00000000..990d8565 --- /dev/null +++ b/internal/edits/hook.go @@ -0,0 +1,46 @@ +/** +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package edits + +import ( + "github.com/NVIDIA/nvidia-container-toolkit/internal/discover" + "github.com/container-orchestrated-devices/container-device-interface/pkg/cdi" + "github.com/container-orchestrated-devices/container-device-interface/specs-go" +) + +type hook discover.Hook + +// toEdits converts a discovered hook to CDI Container Edits. +func (d hook) toEdits() *cdi.ContainerEdits { + e := cdi.ContainerEdits{ + ContainerEdits: &specs.ContainerEdits{ + Hooks: []*specs.Hook{d.toSpec()}, + }, + } + return &e +} + +// toSpec converts a discovered Hook to a CDI Spec Hook. Note +// that missing info is filled in when edits are applied by querying the Hook node. +func (d hook) toSpec() *specs.Hook { + s := specs.Hook{ + HookName: d.Lifecycle, + Path: d.Path, + Args: d.Args, + } + return &s +} From 33d9c1dd571d731c23b3f818258c0e9a89638f1e Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Tue, 29 Mar 2022 11:52:38 +0200 Subject: [PATCH 08/20] Split loading config from reader and getting config from toml.Tree Signed-off-by: Evan Lezar --- internal/config/runtime.go | 17 +++++++++++++---- internal/config/runtime_test.go | 2 +- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/internal/config/runtime.go b/internal/config/runtime.go index 4b197b74..5132c6d3 100644 --- a/internal/config/runtime.go +++ b/internal/config/runtime.go @@ -56,7 +56,7 @@ func GetRuntimeConfig() (*RuntimeConfig, error) { } defer tomlFile.Close() - cfg, err := getRuntimeConfigFrom(tomlFile) + cfg, err := loadRuntimeConfigFrom(tomlFile) if err != nil { return nil, fmt.Errorf("failed to read config values: %v", err) } @@ -64,20 +64,29 @@ func GetRuntimeConfig() (*RuntimeConfig, error) { return cfg, nil } -// getRuntimeConfigFrom reads the config from the specified Reader -func getRuntimeConfigFrom(reader io.Reader) (*RuntimeConfig, error) { +// loadRuntimeConfigFrom reads the config from the specified Reader +func loadRuntimeConfigFrom(reader io.Reader) (*RuntimeConfig, error) { toml, err := toml.LoadReader(reader) if err != nil { return nil, err } + return getRuntimeConfigFrom(toml), nil +} + +// getRuntimeConfigFrom reads the nvidia container runtime config from the specified toml Tree. +func getRuntimeConfigFrom(toml *toml.Tree) *RuntimeConfig { cfg := getDefaultRuntimeConfig() + if toml == nil { + return cfg + } + cfg.DebugFilePath = toml.GetDefault("nvidia-container-runtime.debug", cfg.DebugFilePath).(string) cfg.Experimental = toml.GetDefault("nvidia-container-runtime.experimental", cfg.Experimental).(bool) cfg.DiscoverMode = toml.GetDefault("nvidia-container-runtime.discover-mode", cfg.DiscoverMode).(string) - return cfg, nil + return cfg } // getDefaultRuntimeConfig defines the default values for the config diff --git a/internal/config/runtime_test.go b/internal/config/runtime_test.go index 244d0f66..1601f392 100644 --- a/internal/config/runtime_test.go +++ b/internal/config/runtime_test.go @@ -95,7 +95,7 @@ func TestGerRuntimeConfig(t *testing.T) { t.Run(tc.description, func(t *testing.T) { reader := strings.NewReader(strings.Join(tc.contents, "\n")) - cfg, err := getRuntimeConfigFrom(reader) + cfg, err := loadRuntimeConfigFrom(reader) if tc.expectedError != nil { require.Error(t, err) } else { From d12dbd1befce786d0acd49beb86542f943988ed7 Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Tue, 29 Mar 2022 12:01:12 +0200 Subject: [PATCH 09/20] Read top-level config to propagate Root to experimental runtime Signed-off-by: Evan Lezar --- cmd/nvidia-container-runtime/main.go | 4 +- .../modifier/experimental.go | 9 +- .../modifier/experimental_test.go | 20 ++-- .../runtime_factory.go | 4 +- .../runtime_factory_test.go | 8 +- internal/config/cli.go | 48 +++++++++ internal/config/config.go | 99 +++++++++++++++++++ internal/config/runtime.go | 47 --------- internal/config/runtime_test.go | 51 ++++++---- 9 files changed, 206 insertions(+), 84 deletions(-) create mode 100644 internal/config/cli.go create mode 100644 internal/config/config.go diff --git a/cmd/nvidia-container-runtime/main.go b/cmd/nvidia-container-runtime/main.go index ac2f0f84..787d9361 100644 --- a/cmd/nvidia-container-runtime/main.go +++ b/cmd/nvidia-container-runtime/main.go @@ -20,12 +20,12 @@ func main() { // run is an entry point that allows for idiomatic handling of errors // when calling from the main function. func run(argv []string) (rerr error) { - cfg, err := config.GetRuntimeConfig() + cfg, err := config.GetConfig() if err != nil { return fmt.Errorf("error loading config: %v", err) } - err = logger.LogToFile(cfg.DebugFilePath) + err = logger.LogToFile(cfg.NVIDIAContainerRuntimeConfig.DebugFilePath) if err != nil { return fmt.Errorf("error opening debug log file: %v", err) } diff --git a/cmd/nvidia-container-runtime/modifier/experimental.go b/cmd/nvidia-container-runtime/modifier/experimental.go index bc39cd6a..9693d05b 100644 --- a/cmd/nvidia-container-runtime/modifier/experimental.go +++ b/cmd/nvidia-container-runtime/modifier/experimental.go @@ -36,14 +36,13 @@ type experimental struct { // NewExperimentalModifier creates a modifier that applied the experimental // modications to an OCI spec if required by the runtime wrapper. -func NewExperimentalModifier(logger *logrus.Logger, cfg *config.RuntimeConfig) (oci.SpecModifier, error) { +func NewExperimentalModifier(logger *logrus.Logger, cfg *config.Config) (oci.SpecModifier, error) { logger.Infof("Constructing modifier from config: %+v", cfg) - // TODO: We need to specify the root - root := "" + root := cfg.NVIDIAContainerCLIConfig.Root var d discover.Discover - switch cfg.DiscoverMode { + switch cfg.NVIDIAContainerRuntimeConfig.DiscoverMode { case "legacy": legacyDiscoverer, err := discover.NewLegacyDiscoverer(logger, root) if err != nil { @@ -51,7 +50,7 @@ func NewExperimentalModifier(logger *logrus.Logger, cfg *config.RuntimeConfig) ( } d = legacyDiscoverer default: - return nil, fmt.Errorf("invalid discover mode: %v", cfg.DiscoverMode) + return nil, fmt.Errorf("invalid discover mode: %v", cfg.NVIDIAContainerRuntimeConfig.DiscoverMode) } return newExperimentalModifierFromDiscoverer(logger, d) diff --git a/cmd/nvidia-container-runtime/modifier/experimental_test.go b/cmd/nvidia-container-runtime/modifier/experimental_test.go index 07b67e8a..aefcff17 100644 --- a/cmd/nvidia-container-runtime/modifier/experimental_test.go +++ b/cmd/nvidia-container-runtime/modifier/experimental_test.go @@ -32,25 +32,31 @@ func TestConstructor(t *testing.T) { testCases := []struct { description string - cfg *config.RuntimeConfig + cfg *config.Config expectedError error }{ { - description: "empty config raises error", - cfg: &config.RuntimeConfig{}, + description: "empty config raises error", + cfg: &config.Config{ + NVIDIAContainerRuntimeConfig: config.RuntimeConfig{}, + }, expectedError: fmt.Errorf("invalid discover mode"), }, { description: "non-legacy discover mode raises error", - cfg: &config.RuntimeConfig{ - DiscoverMode: "non-legacy", + cfg: &config.Config{ + NVIDIAContainerRuntimeConfig: config.RuntimeConfig{ + DiscoverMode: "non-legacy", + }, }, expectedError: fmt.Errorf("invalid discover mode"), }, { description: "legacy discover mode returns modifier", - cfg: &config.RuntimeConfig{ - DiscoverMode: "legacy", + cfg: &config.Config{ + NVIDIAContainerRuntimeConfig: config.RuntimeConfig{ + DiscoverMode: "legacy", + }, }, }, } diff --git a/cmd/nvidia-container-runtime/runtime_factory.go b/cmd/nvidia-container-runtime/runtime_factory.go index c40f873e..9ae9a572 100644 --- a/cmd/nvidia-container-runtime/runtime_factory.go +++ b/cmd/nvidia-container-runtime/runtime_factory.go @@ -32,7 +32,7 @@ const ( ) // newNVIDIAContainerRuntime is a factory method that constructs a runtime based on the selected configuration and specified logger -func newNVIDIAContainerRuntime(logger *logrus.Logger, cfg *config.RuntimeConfig, argv []string) (oci.Runtime, error) { +func newNVIDIAContainerRuntime(logger *logrus.Logger, cfg *config.Config, argv []string) (oci.Runtime, error) { ociSpec, err := oci.NewSpec(logger, argv) if err != nil { return nil, fmt.Errorf("error constructing OCI specification: %v", err) @@ -45,7 +45,7 @@ func newNVIDIAContainerRuntime(logger *logrus.Logger, cfg *config.RuntimeConfig, } var specModifier oci.SpecModifier - if cfg.Experimental { + if cfg.NVIDIAContainerRuntimeConfig.Experimental { specModifier, err = modifier.NewExperimentalModifier(logger, cfg) if err != nil { return nil, fmt.Errorf("failed to construct experimental modifier: %v", err) diff --git a/cmd/nvidia-container-runtime/runtime_factory_test.go b/cmd/nvidia-container-runtime/runtime_factory_test.go index 467e5efc..145970d7 100644 --- a/cmd/nvidia-container-runtime/runtime_factory_test.go +++ b/cmd/nvidia-container-runtime/runtime_factory_test.go @@ -29,19 +29,21 @@ func TestFactoryMethod(t *testing.T) { testCases := []struct { description string - config config.RuntimeConfig + cfg *config.Config argv []string expectedError bool }{ { description: "empty config no error", - config: config.RuntimeConfig{}, + cfg: &config.Config{ + NVIDIAContainerRuntimeConfig: config.RuntimeConfig{}, + }, }, } for _, tc := range testCases { t.Run(tc.description, func(t *testing.T) { - _, err := newNVIDIAContainerRuntime(logger, &tc.config, tc.argv) + _, err := newNVIDIAContainerRuntime(logger, tc.cfg, tc.argv) if tc.expectedError { require.Error(t, err) } else { diff --git a/internal/config/cli.go b/internal/config/cli.go new file mode 100644 index 00000000..8eca29de --- /dev/null +++ b/internal/config/cli.go @@ -0,0 +1,48 @@ +/** +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package config + +import ( + "github.com/pelletier/go-toml" +) + +// CLIConfig stores the options for the nvidia-container-cli +type CLIConfig struct { + Root string +} + +// getCLIConfigFrom reads the nvidia container runtime config from the specified toml Tree. +func getCLIConfigFrom(toml *toml.Tree) *CLIConfig { + cfg := getDefaultCLIConfig() + + if toml == nil { + return cfg + } + + cfg.Root = toml.GetDefault("nvidia-container-cli.root", cfg.Root).(string) + + return cfg +} + +// getDefaultCLIConfig defines the default values for the config +func getDefaultCLIConfig() *CLIConfig { + c := CLIConfig{ + Root: "", + } + + return &c +} diff --git a/internal/config/config.go b/internal/config/config.go new file mode 100644 index 00000000..df880954 --- /dev/null +++ b/internal/config/config.go @@ -0,0 +1,99 @@ +/** +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package config + +import ( + "fmt" + "io" + "os" + "path" + + "github.com/pelletier/go-toml" +) + +const ( + configOverride = "XDG_CONFIG_HOME" + configFilePath = "nvidia-container-runtime/config.toml" +) + +var ( + configDir = "/etc/" +) + +// Config represents the contents of the config.toml file for the NVIDIA Container Toolkit +// Note: This is currently duplicated by the HookConfig in cmd/nvidia-container-toolkit/hook_config.go +type Config struct { + NVIDIAContainerCLIConfig CLIConfig `toml:"nvidia-container-cli"` + NVIDIAContainerRuntimeConfig RuntimeConfig `toml:"nvidia-container-runtime"` +} + +// GetConfig sets up the config struct. Values are read from a toml file +// or set via the environment. +func GetConfig() (*Config, error) { + if XDGConfigDir := os.Getenv(configOverride); len(XDGConfigDir) != 0 { + configDir = XDGConfigDir + } + + configFilePath := path.Join(configDir, configFilePath) + + tomlFile, err := os.Open(configFilePath) + if err != nil { + return nil, fmt.Errorf("failed to open config file %v: %v", configFilePath, err) + } + defer tomlFile.Close() + + cfg, err := loadConfigFrom(tomlFile) + if err != nil { + return nil, fmt.Errorf("failed to read config values: %v", err) + } + + return cfg, nil +} + +// loadRuntimeConfigFrom reads the config from the specified Reader +func loadConfigFrom(reader io.Reader) (*Config, error) { + toml, err := toml.LoadReader(reader) + if err != nil { + return nil, err + } + + return getConfigFrom(toml), nil +} + +// getConfigFrom reads the nvidia container runtime config from the specified toml Tree. +func getConfigFrom(toml *toml.Tree) *Config { + cfg := getDefaultConfig() + + if toml == nil { + return cfg + } + + cfg.NVIDIAContainerCLIConfig = *getCLIConfigFrom(toml) + cfg.NVIDIAContainerRuntimeConfig = *getRuntimeConfigFrom(toml) + + return cfg +} + +// getDefaultConfig defines the default values for the config +func getDefaultConfig() *Config { + c := Config{ + NVIDIAContainerCLIConfig: *getDefaultCLIConfig(), + NVIDIAContainerRuntimeConfig: *getDefaultRuntimeConfig(), + } + + return &c +} diff --git a/internal/config/runtime.go b/internal/config/runtime.go index 5132c6d3..c0c43011 100644 --- a/internal/config/runtime.go +++ b/internal/config/runtime.go @@ -17,23 +17,9 @@ package config import ( - "fmt" - "io" - "os" - "path" - "github.com/pelletier/go-toml" ) -const ( - configOverride = "XDG_CONFIG_HOME" - configFilePath = "nvidia-container-runtime/config.toml" -) - -var ( - configDir = "/etc/" -) - // RuntimeConfig stores the config options for the NVIDIA Container Runtime type RuntimeConfig struct { DebugFilePath string @@ -41,39 +27,6 @@ type RuntimeConfig struct { DiscoverMode string } -// GetRuntimeConfig sets up the config struct. Values are read from a toml file -// or set via the environment. -func GetRuntimeConfig() (*RuntimeConfig, error) { - if XDGConfigDir := os.Getenv(configOverride); len(XDGConfigDir) != 0 { - configDir = XDGConfigDir - } - - configFilePath := path.Join(configDir, configFilePath) - - tomlFile, err := os.Open(configFilePath) - if err != nil { - return nil, fmt.Errorf("failed to open config file %v: %v", configFilePath, err) - } - defer tomlFile.Close() - - cfg, err := loadRuntimeConfigFrom(tomlFile) - if err != nil { - return nil, fmt.Errorf("failed to read config values: %v", err) - } - - return cfg, nil -} - -// loadRuntimeConfigFrom reads the config from the specified Reader -func loadRuntimeConfigFrom(reader io.Reader) (*RuntimeConfig, error) { - toml, err := toml.LoadReader(reader) - if err != nil { - return nil, err - } - - return getRuntimeConfigFrom(toml), nil -} - // getRuntimeConfigFrom reads the nvidia container runtime config from the specified toml Tree. func getRuntimeConfigFrom(toml *toml.Tree) *RuntimeConfig { cfg := getDefaultRuntimeConfig() diff --git a/internal/config/runtime_test.go b/internal/config/runtime_test.go index 1601f392..82495719 100644 --- a/internal/config/runtime_test.go +++ b/internal/config/runtime_test.go @@ -26,7 +26,7 @@ import ( "github.com/stretchr/testify/require" ) -func TestGerRuntimeConfigWithCustomConfig(t *testing.T) { +func TestGetConfigWithCustomConfig(t *testing.T) { wd, err := os.Getwd() require.NoError(t, err) @@ -42,24 +42,29 @@ func TestGerRuntimeConfigWithCustomConfig(t *testing.T) { defer func() { require.NoError(t, os.RemoveAll(testDir)) }() - cfg, err := GetRuntimeConfig() + cfg, err := GetConfig() require.NoError(t, err) - require.Equal(t, cfg.DebugFilePath, "/nvidia-container-toolkit.log") + require.Equal(t, cfg.NVIDIAContainerRuntimeConfig.DebugFilePath, "/nvidia-container-toolkit.log") } -func TestGerRuntimeConfig(t *testing.T) { +func TestGetConfig(t *testing.T) { testCases := []struct { description string contents []string expectedError error - expectedConfig *RuntimeConfig + expectedConfig *Config }{ { description: "empty config is default", - expectedConfig: &RuntimeConfig{ - DebugFilePath: "/dev/null", - Experimental: false, - DiscoverMode: "legacy", + expectedConfig: &Config{ + NVIDIAContainerCLIConfig: CLIConfig{ + Root: "", + }, + NVIDIAContainerRuntimeConfig: RuntimeConfig{ + DebugFilePath: "/dev/null", + Experimental: false, + DiscoverMode: "legacy", + }, }, }, { @@ -69,10 +74,15 @@ func TestGerRuntimeConfig(t *testing.T) { "nvidia-container-runtime.experimental = true", "nvidia-container-runtime.discover-mode = \"not-legacy\"", }, - expectedConfig: &RuntimeConfig{ - DebugFilePath: "/foo/bar", - Experimental: true, - DiscoverMode: "not-legacy", + expectedConfig: &Config{ + NVIDIAContainerCLIConfig: CLIConfig{ + Root: "", + }, + NVIDIAContainerRuntimeConfig: RuntimeConfig{ + DebugFilePath: "/foo/bar", + Experimental: true, + DiscoverMode: "not-legacy", + }, }, }, { @@ -83,10 +93,15 @@ func TestGerRuntimeConfig(t *testing.T) { "experimental = true", "discover-mode = \"not-legacy\"", }, - expectedConfig: &RuntimeConfig{ - DebugFilePath: "/foo/bar", - Experimental: true, - DiscoverMode: "not-legacy", + expectedConfig: &Config{ + NVIDIAContainerCLIConfig: CLIConfig{ + Root: "", + }, + NVIDIAContainerRuntimeConfig: RuntimeConfig{ + DebugFilePath: "/foo/bar", + Experimental: true, + DiscoverMode: "not-legacy", + }, }, }, } @@ -95,7 +110,7 @@ func TestGerRuntimeConfig(t *testing.T) { t.Run(tc.description, func(t *testing.T) { reader := strings.NewReader(strings.Join(tc.contents, "\n")) - cfg, err := loadRuntimeConfigFrom(reader) + cfg, err := loadConfigFrom(reader) if tc.expectedError != nil { require.Error(t, err) } else { From 14fe35c3f4c562479e2f501f5d7d46588b72f582 Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Wed, 30 Mar 2022 12:14:52 +0200 Subject: [PATCH 10/20] Implement hook remover for existing nvidia-container-runtime-hooks Signed-off-by: Evan Lezar --- .../modifier/experimental.go | 40 +--------- .../modifier/hook_remover.go | 79 +++++++++++++++++++ 2 files changed, 81 insertions(+), 38 deletions(-) create mode 100644 cmd/nvidia-container-runtime/modifier/hook_remover.go diff --git a/cmd/nvidia-container-runtime/modifier/experimental.go b/cmd/nvidia-container-runtime/modifier/experimental.go index 9693d05b..399f9534 100644 --- a/cmd/nvidia-container-runtime/modifier/experimental.go +++ b/cmd/nvidia-container-runtime/modifier/experimental.go @@ -18,7 +18,6 @@ package modifier import ( "fmt" - "path/filepath" "github.com/NVIDIA/nvidia-container-toolkit/internal/config" "github.com/NVIDIA/nvidia-container-toolkit/internal/discover" @@ -69,9 +68,9 @@ func newExperimentalModifierFromDiscoverer(logger *logrus.Logger, d discover.Dis // Modify applies the required modifications to the incomming OCI spec. These modifications // are applied in-place. func (m experimental) Modify(spec *specs.Spec) error { - err := m.assertSpecIsCompatible(spec) + err := nvidiaContainerRuntimeHookRemover{m.logger}.Modify(spec) if err != nil { - return fmt.Errorf("OCI specification cannot be modified: %v", err) + return fmt.Errorf("failed to remove existing hooks: %v", err) } specEdits, err := edits.NewSpecEdits(m.logger, m.discoverer) @@ -81,38 +80,3 @@ func (m experimental) Modify(spec *specs.Spec) error { return specEdits.Modify(spec) } - -func (m experimental) assertSpecIsCompatible(spec *specs.Spec) error { - if spec == nil { - return nil - } - - if spec.Hooks == nil { - return nil - } - - if hookPath := findStableHook(spec.Hooks.Prestart); hookPath != "" { - return fmt.Errorf("spec already contains required 'prestart' hook: %v", hookPath) - } - - return nil -} - -// findStableHook checks the list of OCI hooks for the nvidia-container-runtime-hook -// or nvidia-container-toolkit hook. These are included, for example, by the non-experimental -// nvidia-container-runtime or docker when specifying the --gpus flag. -func findStableHook(hooks []specs.Hook) string { - lookFor := map[string]bool{ - nvidiaContainerRuntimeHookExecuable: true, - nvidiaContainerToolkitExecutable: true, - } - - for _, h := range hooks { - base := filepath.Base(h.Path) - if lookFor[base] { - return h.Path - } - } - - return "" -} diff --git a/cmd/nvidia-container-runtime/modifier/hook_remover.go b/cmd/nvidia-container-runtime/modifier/hook_remover.go new file mode 100644 index 00000000..ac3354d7 --- /dev/null +++ b/cmd/nvidia-container-runtime/modifier/hook_remover.go @@ -0,0 +1,79 @@ +/** +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package modifier + +import ( + "fmt" + "path/filepath" + + "github.com/NVIDIA/nvidia-container-toolkit/internal/oci" + "github.com/opencontainers/runtime-spec/specs-go" + "github.com/sirupsen/logrus" +) + +// nvidiaContainerRuntimeHookRemover is a spec modifer that detects and removes inserted nvidia-container-runtime hooks +type nvidiaContainerRuntimeHookRemover struct { + logger *logrus.Logger +} + +var _ oci.SpecModifier = (*nvidiaContainerRuntimeHookRemover)(nil) + +// Modify removes any NVIDIA Container Runtime hooks from the provided spec +func (m nvidiaContainerRuntimeHookRemover) Modify(spec *specs.Spec) error { + if spec == nil { + return nil + } + + if spec.Hooks == nil { + return nil + } + + var updateRequired bool + newPrestart := make([]specs.Hook, 0, len(spec.Hooks.Prestart)) + + for _, hook := range spec.Hooks.Prestart { + if isNVIDIAContainerRuntimeHook(&hook) { + m.logger.Infof("Removing hook %v", hook) + updateRequired = true + continue + } + newPrestart = append(newPrestart, hook) + } + + if updateRequired { + // TODO: Once we have updated the hook implementation to give an error if invoked incorrectly, we will update the spec hooks here instead of just logging. + // We can then also use a boolean to track whether this is required instead of storing the removed hooks + // spec.Hooks.Prestart = newPrestart + m.logger.Debugf("Updating 'prestart' hooks to %v", newPrestart) + return fmt.Errorf("spec already contains required 'prestart' hook") + } + + return nil +} + +// isNVIDIAContainerRuntimeHook checks if the provided hook is an nvidia-container-runtime-hook +// or nvidia-container-toolkit hook. These are included, for example, by the non-experimental +// nvidia-container-runtime or docker when specifying the --gpus flag. +func isNVIDIAContainerRuntimeHook(hook *specs.Hook) bool { + lookFor := map[string]bool{ + nvidiaContainerRuntimeHookExecuable: true, + nvidiaContainerToolkitExecutable: true, + } + base := filepath.Base(hook.Path) + + return lookFor[base] +} From a26d02890f88ad6f7dba9a2a77cb1877d8226600 Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Wed, 30 Mar 2022 13:18:57 +0200 Subject: [PATCH 11/20] Make error logging less verbose by default Signed-off-by: Evan Lezar --- cmd/nvidia-container-runtime/main.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cmd/nvidia-container-runtime/main.go b/cmd/nvidia-container-runtime/main.go index 787d9361..b0df4f87 100644 --- a/cmd/nvidia-container-runtime/main.go +++ b/cmd/nvidia-container-runtime/main.go @@ -12,7 +12,7 @@ var logger = NewLogger() func main() { err := run(os.Args) if err != nil { - logger.Errorf("Error running %v: %v", os.Args, err) + logger.Errorf("%v", err) os.Exit(1) } } @@ -20,6 +20,7 @@ func main() { // run is an entry point that allows for idiomatic handling of errors // when calling from the main function. func run(argv []string) (rerr error) { + logger.Debugf("Running %v", argv) cfg, err := config.GetConfig() if err != nil { return fmt.Errorf("error loading config: %v", err) @@ -32,7 +33,7 @@ func run(argv []string) (rerr error) { defer func() { // We capture and log a returning error before closing the log file. if rerr != nil { - logger.Errorf("Error running %v: %v", argv, rerr) + logger.Errorf("%v", rerr) } logger.CloseFile() }() From d0608844dc6007d455d8e1e6fca0de52f315fdce Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Wed, 30 Mar 2022 15:31:25 +0200 Subject: [PATCH 12/20] Add basic README for nvidia-container-runtime Signed-off-by: Evan Lezar --- cmd/nvidia-container-runtime/README.md | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 cmd/nvidia-container-runtime/README.md diff --git a/cmd/nvidia-container-runtime/README.md b/cmd/nvidia-container-runtime/README.md new file mode 100644 index 00000000..af4e1040 --- /dev/null +++ b/cmd/nvidia-container-runtime/README.md @@ -0,0 +1,24 @@ +# The NVIDIA Container Runtime + +The NVIDIA Container Runtime is a shim for OCI-compliant low-level runtimes such as [runc](https://github.com/opencontainers/runc). When a `create` command is detected, the incoming [OCI runtime specification](https://github.com/opencontainers/runtime-spec) is modified in place and the command is forwarded to the low-level runtime. + +## Standard Mode + +In the standard mode configuration, the NVIDIA Container Runtime adds a [`prestart` hook](https://github.com/opencontainers/runtime-spec/blob/master/config.md#prestart) to the incomming OCI specification that invokes the NVIDIA Container Runtime Hook for all containers created. This hook checks whether NVIDIA devices are requested and ensures GPU access is configured using the `nvidia-container-cli` from project [libnvidia-container](https://github.com/NVIDIA/libnvidia-container). + +## Experimental Mode + +The NVIDIA Container Runtime can be configured in an experimental mode by setting the following options in the runtime's `config.toml` file: + +```toml +[nvidia-container-runtime] +experimental = true +``` + +When this setting is enabled, the modifications made to the OCI specification are controlled by the `nvidia-container-runtime.discover-mode` option, with the following mode supported: + +* `"legacy"`: This mode mirrors the behaviour of the standard mode, inserting the NVIDIA Container Runtime Hook as a `prestart` hook into the container's OCI specification. + +### Notes on using the docker CLI + +The `docker` CLI supports the `--gpus` flag to select GPUs for inclusion in a container. Since specifying this flag inserts the same NVIDIA Container Runtime Hook into the OCI runtime specification. When experimental mode is activated, the NVIDIA Container Runtime detects the presence of the hook and raises an error. This requirement will be relaxed in the near future. From 282a2c145ea5981726ad98b242d896ec241440d4 Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Mon, 4 Apr 2022 07:38:03 +0200 Subject: [PATCH 13/20] Fix typo in variable name Signed-off-by: Evan Lezar --- cmd/nvidia-container-runtime/modifier/hook_remover.go | 4 ++-- cmd/nvidia-container-runtime/modifier/stable.go | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/cmd/nvidia-container-runtime/modifier/hook_remover.go b/cmd/nvidia-container-runtime/modifier/hook_remover.go index ac3354d7..e8743352 100644 --- a/cmd/nvidia-container-runtime/modifier/hook_remover.go +++ b/cmd/nvidia-container-runtime/modifier/hook_remover.go @@ -70,8 +70,8 @@ func (m nvidiaContainerRuntimeHookRemover) Modify(spec *specs.Spec) error { // nvidia-container-runtime or docker when specifying the --gpus flag. func isNVIDIAContainerRuntimeHook(hook *specs.Hook) bool { lookFor := map[string]bool{ - nvidiaContainerRuntimeHookExecuable: true, - nvidiaContainerToolkitExecutable: true, + nvidiaContainerRuntimeHookExecutable: true, + nvidiaContainerToolkitExecutable: true, } base := filepath.Base(hook.Path) diff --git a/cmd/nvidia-container-runtime/modifier/stable.go b/cmd/nvidia-container-runtime/modifier/stable.go index 9d955632..fa0fb8f1 100644 --- a/cmd/nvidia-container-runtime/modifier/stable.go +++ b/cmd/nvidia-container-runtime/modifier/stable.go @@ -27,7 +27,7 @@ import ( ) const ( - nvidiaContainerRuntimeHookExecuable = "nvidia-container-runtime-hook" + nvidiaContainerRuntimeHookExecutable = "nvidia-container-runtime-hook" nvidiaContainerRuntimeHookDefaultPath = "/usr/bin/nvidia-container-runtime-hook" nvidiaContainerToolkitExecutable = "nvidia-container-toolkit" @@ -50,7 +50,7 @@ type stableRuntimeModifier struct { // Modify applies the required modification to the incoming OCI spec, inserting the nvidia-container-runtime-hook // as a prestart hook. func (m stableRuntimeModifier) Modify(spec *specs.Spec) error { - path, err := exec.LookPath(nvidiaContainerRuntimeHookExecuable) + path, err := exec.LookPath(nvidiaContainerRuntimeHookExecutable) if err != nil { path = nvidiaContainerRuntimeHookDefaultPath _, err = os.Stat(path) @@ -66,7 +66,7 @@ func (m stableRuntimeModifier) Modify(spec *specs.Spec) error { spec.Hooks = &specs.Hooks{} } else if len(spec.Hooks.Prestart) != 0 { for _, hook := range spec.Hooks.Prestart { - if strings.Contains(hook.Path, nvidiaContainerRuntimeHookExecuable) { + if strings.Contains(hook.Path, nvidiaContainerRuntimeHookExecutable) { m.logger.Infof("existing nvidia prestart hook found in OCI spec") return nil } From 8db287af8b1b5002c213ae9d7656c4bc1c9fe7fc Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Wed, 6 Apr 2022 14:46:27 +0200 Subject: [PATCH 14/20] FIX: Fix typo in comment Signed-off-by: Evan Lezar --- cmd/nvidia-container-runtime/modifier/experimental.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/nvidia-container-runtime/modifier/experimental.go b/cmd/nvidia-container-runtime/modifier/experimental.go index 399f9534..d5a42c05 100644 --- a/cmd/nvidia-container-runtime/modifier/experimental.go +++ b/cmd/nvidia-container-runtime/modifier/experimental.go @@ -33,7 +33,7 @@ type experimental struct { discoverer discover.Discover } -// NewExperimentalModifier creates a modifier that applied the experimental +// NewExperimentalModifier creates a modifier that applies the experimental // modications to an OCI spec if required by the runtime wrapper. func NewExperimentalModifier(logger *logrus.Logger, cfg *config.Config) (oci.SpecModifier, error) { logger.Infof("Constructing modifier from config: %+v", cfg) From e6730fd0f08a77ee943c6f00b8d425b92f186208 Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Wed, 6 Apr 2022 15:13:32 +0200 Subject: [PATCH 15/20] FIX: Don't log that hooks is being removed if it is not Signed-off-by: Evan Lezar --- cmd/nvidia-container-runtime/modifier/hook_remover.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/cmd/nvidia-container-runtime/modifier/hook_remover.go b/cmd/nvidia-container-runtime/modifier/hook_remover.go index e8743352..69249730 100644 --- a/cmd/nvidia-container-runtime/modifier/hook_remover.go +++ b/cmd/nvidia-container-runtime/modifier/hook_remover.go @@ -42,12 +42,16 @@ func (m nvidiaContainerRuntimeHookRemover) Modify(spec *specs.Spec) error { return nil } + if len(spec.Hooks.Prestart) == 0 { + return nil + } + var updateRequired bool newPrestart := make([]specs.Hook, 0, len(spec.Hooks.Prestart)) for _, hook := range spec.Hooks.Prestart { if isNVIDIAContainerRuntimeHook(&hook) { - m.logger.Infof("Removing hook %v", hook) + m.logger.Warnf("Found existing NVIDIA Container Runtime Hook: %v", hook) updateRequired = true continue } From 11aa1d2a7d5f725f79eb12318e06aadcc6d2ad2a Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Wed, 6 Apr 2022 15:18:12 +0200 Subject: [PATCH 16/20] FIX: Factor out specModifier construction into function Signed-off-by: Evan Lezar --- .../runtime_factory.go | 20 +++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/cmd/nvidia-container-runtime/runtime_factory.go b/cmd/nvidia-container-runtime/runtime_factory.go index 9ae9a572..8eb23302 100644 --- a/cmd/nvidia-container-runtime/runtime_factory.go +++ b/cmd/nvidia-container-runtime/runtime_factory.go @@ -44,14 +44,9 @@ func newNVIDIAContainerRuntime(logger *logrus.Logger, cfg *config.Config, argv [ return nil, fmt.Errorf("error constructing low-level runtime: %v", err) } - var specModifier oci.SpecModifier - if cfg.NVIDIAContainerRuntimeConfig.Experimental { - specModifier, err = modifier.NewExperimentalModifier(logger, cfg) - if err != nil { - return nil, fmt.Errorf("failed to construct experimental modifier: %v", err) - } - } else { - specModifier = modifier.NewStableRuntimeModifier(logger) + specModifier, err := newSpecModifier(logger, cfg) + if err != nil { + return nil, fmt.Errorf("failed to construct OCI spec modifier: %v", err) } // Create the wrapping runtime with the specified modifier @@ -64,3 +59,12 @@ func newNVIDIAContainerRuntime(logger *logrus.Logger, cfg *config.Config, argv [ return r, nil } + +// newSpecModifier is a factory method that creates constructs an OCI spec modifer based on the provided config. +func newSpecModifier(logger *logrus.Logger, cfg *config.Config) (oci.SpecModifier, error) { + if !cfg.NVIDIAContainerRuntimeConfig.Experimental { + return modifier.NewStableRuntimeModifier(logger), nil + } + + return modifier.NewExperimentalModifier(logger, cfg) +} From 0054481e1539ae719b3bc3cead05ac47c2e67a35 Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Wed, 6 Apr 2022 15:21:57 +0200 Subject: [PATCH 17/20] FIX: Rename CLIConfig to ContainerCLIConfig Signed-off-by: Evan Lezar --- internal/config/cli.go | 16 ++++++++-------- internal/config/config.go | 8 ++++---- internal/config/runtime_test.go | 6 +++--- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/internal/config/cli.go b/internal/config/cli.go index 8eca29de..5cd95751 100644 --- a/internal/config/cli.go +++ b/internal/config/cli.go @@ -20,14 +20,14 @@ import ( "github.com/pelletier/go-toml" ) -// CLIConfig stores the options for the nvidia-container-cli -type CLIConfig struct { +// ContainerCLIConfig stores the options for the nvidia-container-cli +type ContainerCLIConfig struct { Root string } -// getCLIConfigFrom reads the nvidia container runtime config from the specified toml Tree. -func getCLIConfigFrom(toml *toml.Tree) *CLIConfig { - cfg := getDefaultCLIConfig() +// getContainerCLIConfigFrom reads the nvidia container runtime config from the specified toml Tree. +func getContainerCLIConfigFrom(toml *toml.Tree) *ContainerCLIConfig { + cfg := getDefaultContainerCLIConfig() if toml == nil { return cfg @@ -38,9 +38,9 @@ func getCLIConfigFrom(toml *toml.Tree) *CLIConfig { return cfg } -// getDefaultCLIConfig defines the default values for the config -func getDefaultCLIConfig() *CLIConfig { - c := CLIConfig{ +// getDefaultContainerCLIConfig defines the default values for the config +func getDefaultContainerCLIConfig() *ContainerCLIConfig { + c := ContainerCLIConfig{ Root: "", } diff --git a/internal/config/config.go b/internal/config/config.go index df880954..8e328487 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -37,8 +37,8 @@ var ( // Config represents the contents of the config.toml file for the NVIDIA Container Toolkit // Note: This is currently duplicated by the HookConfig in cmd/nvidia-container-toolkit/hook_config.go type Config struct { - NVIDIAContainerCLIConfig CLIConfig `toml:"nvidia-container-cli"` - NVIDIAContainerRuntimeConfig RuntimeConfig `toml:"nvidia-container-runtime"` + NVIDIAContainerCLIConfig ContainerCLIConfig `toml:"nvidia-container-cli"` + NVIDIAContainerRuntimeConfig RuntimeConfig `toml:"nvidia-container-runtime"` } // GetConfig sets up the config struct. Values are read from a toml file @@ -82,7 +82,7 @@ func getConfigFrom(toml *toml.Tree) *Config { return cfg } - cfg.NVIDIAContainerCLIConfig = *getCLIConfigFrom(toml) + cfg.NVIDIAContainerCLIConfig = *getContainerCLIConfigFrom(toml) cfg.NVIDIAContainerRuntimeConfig = *getRuntimeConfigFrom(toml) return cfg @@ -91,7 +91,7 @@ func getConfigFrom(toml *toml.Tree) *Config { // getDefaultConfig defines the default values for the config func getDefaultConfig() *Config { c := Config{ - NVIDIAContainerCLIConfig: *getDefaultCLIConfig(), + NVIDIAContainerCLIConfig: *getDefaultContainerCLIConfig(), NVIDIAContainerRuntimeConfig: *getDefaultRuntimeConfig(), } diff --git a/internal/config/runtime_test.go b/internal/config/runtime_test.go index 82495719..32ea7a0b 100644 --- a/internal/config/runtime_test.go +++ b/internal/config/runtime_test.go @@ -57,7 +57,7 @@ func TestGetConfig(t *testing.T) { { description: "empty config is default", expectedConfig: &Config{ - NVIDIAContainerCLIConfig: CLIConfig{ + NVIDIAContainerCLIConfig: ContainerCLIConfig{ Root: "", }, NVIDIAContainerRuntimeConfig: RuntimeConfig{ @@ -75,7 +75,7 @@ func TestGetConfig(t *testing.T) { "nvidia-container-runtime.discover-mode = \"not-legacy\"", }, expectedConfig: &Config{ - NVIDIAContainerCLIConfig: CLIConfig{ + NVIDIAContainerCLIConfig: ContainerCLIConfig{ Root: "", }, NVIDIAContainerRuntimeConfig: RuntimeConfig{ @@ -94,7 +94,7 @@ func TestGetConfig(t *testing.T) { "discover-mode = \"not-legacy\"", }, expectedConfig: &Config{ - NVIDIAContainerCLIConfig: CLIConfig{ + NVIDIAContainerCLIConfig: ContainerCLIConfig{ Root: "", }, NVIDIAContainerRuntimeConfig: RuntimeConfig{ From 96e8eb3dde6717ee5e16cb220a232b67638d5404 Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Wed, 6 Apr 2022 15:24:02 +0200 Subject: [PATCH 18/20] FIX: Rename path locator as executable locator Signed-off-by: Evan Lezar --- internal/discover/legacy.go | 2 +- internal/lookup/{path.go => executable.go} | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) rename internal/lookup/{path.go => executable.go} (86%) diff --git a/internal/discover/legacy.go b/internal/discover/legacy.go index 6fb74139..53f6ffc1 100644 --- a/internal/discover/legacy.go +++ b/internal/discover/legacy.go @@ -38,7 +38,7 @@ var _ Discover = (*legacy)(nil) func NewLegacyDiscoverer(logger *logrus.Logger, root string) (Discover, error) { d := legacy{ logger: logger, - lookup: lookup.NewPathLocator(logger, root), + lookup: lookup.NewExecutaleLocator(logger, root), } return &d, nil diff --git a/internal/lookup/path.go b/internal/lookup/executable.go similarity index 86% rename from internal/lookup/path.go rename to internal/lookup/executable.go index 9dffa4c5..be7577ff 100644 --- a/internal/lookup/path.go +++ b/internal/lookup/executable.go @@ -31,12 +31,12 @@ const ( var defaultPaths = []string{"/usr/local/sbin", "/usr/local/bin", "/usr/sbin", "/usr/bin", "/sbin", "/bin"} -type path struct { +type executable struct { file } -// NewPathLocator creates a locator to fine executable files in the path. A logger can also be specified. -func NewPathLocator(logger *log.Logger, root string) Locator { +// NewExecutaleLocator creates a locator to fine executable files in the path. A logger can also be specified. +func NewExecutaleLocator(logger *log.Logger, root string) Locator { pathEnv := os.Getenv(envPath) paths := filepath.SplitList(pathEnv) @@ -48,7 +48,7 @@ func NewPathLocator(logger *log.Logger, root string) Locator { for _, dir := range paths { prefixes = append(prefixes, filepath.Join(root, dir)) } - l := path{ + l := executable{ file: file{ logger: logger, prefixes: prefixes, @@ -58,10 +58,10 @@ func NewPathLocator(logger *log.Logger, root string) Locator { return &l } -var _ Locator = (*path)(nil) +var _ Locator = (*executable)(nil) // Locate finds executable files in the path. If a relative or absolute path is specified, the prefix paths are not considered. -func (p path) Locate(filename string) ([]string, error) { +func (p executable) Locate(filename string) ([]string, error) { // For absolute paths we ensure that it is executable if strings.Contains(filename, "/") { err := assertExecutable(filename) From b8dd473343ae521c83325b973012c1fd897d6b9d Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Wed, 6 Apr 2022 17:15:57 +0200 Subject: [PATCH 19/20] FIX: Simplify hook remover Signed-off-by: Evan Lezar --- .../modifier/hook_remover.go | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/cmd/nvidia-container-runtime/modifier/hook_remover.go b/cmd/nvidia-container-runtime/modifier/hook_remover.go index 69249730..70067e2b 100644 --- a/cmd/nvidia-container-runtime/modifier/hook_remover.go +++ b/cmd/nvidia-container-runtime/modifier/hook_remover.go @@ -46,24 +46,10 @@ func (m nvidiaContainerRuntimeHookRemover) Modify(spec *specs.Spec) error { return nil } - var updateRequired bool - newPrestart := make([]specs.Hook, 0, len(spec.Hooks.Prestart)) - for _, hook := range spec.Hooks.Prestart { if isNVIDIAContainerRuntimeHook(&hook) { - m.logger.Warnf("Found existing NVIDIA Container Runtime Hook: %v", hook) - updateRequired = true - continue + return fmt.Errorf("spec already contains required 'prestart' hook") } - newPrestart = append(newPrestart, hook) - } - - if updateRequired { - // TODO: Once we have updated the hook implementation to give an error if invoked incorrectly, we will update the spec hooks here instead of just logging. - // We can then also use a boolean to track whether this is required instead of storing the removed hooks - // spec.Hooks.Prestart = newPrestart - m.logger.Debugf("Updating 'prestart' hooks to %v", newPrestart) - return fmt.Errorf("spec already contains required 'prestart' hook") } return nil From 9ce690093d3b3dc0d583b6d94e54a29e55d212c6 Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Wed, 6 Apr 2022 17:18:06 +0200 Subject: [PATCH 20/20] FIX: Make isNVIDIAContainerRuntimeHook mode idiomatic Signed-off-by: Evan Lezar --- cmd/nvidia-container-runtime/modifier/hook_remover.go | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/cmd/nvidia-container-runtime/modifier/hook_remover.go b/cmd/nvidia-container-runtime/modifier/hook_remover.go index 70067e2b..7df4b2f1 100644 --- a/cmd/nvidia-container-runtime/modifier/hook_remover.go +++ b/cmd/nvidia-container-runtime/modifier/hook_remover.go @@ -59,11 +59,12 @@ func (m nvidiaContainerRuntimeHookRemover) Modify(spec *specs.Spec) error { // or nvidia-container-toolkit hook. These are included, for example, by the non-experimental // nvidia-container-runtime or docker when specifying the --gpus flag. func isNVIDIAContainerRuntimeHook(hook *specs.Hook) bool { - lookFor := map[string]bool{ - nvidiaContainerRuntimeHookExecutable: true, - nvidiaContainerToolkitExecutable: true, + bins := map[string]struct{}{ + nvidiaContainerRuntimeHookExecutable: {}, + nvidiaContainerToolkitExecutable: {}, } - base := filepath.Base(hook.Path) - return lookFor[base] + _, exists := bins[filepath.Base(hook.Path)] + + return exists }