From ec8a92c17f08030a9bc32e24095f213b3df07033 Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Fri, 24 Feb 2023 17:16:27 +0200 Subject: [PATCH 1/9] Use nvidia-container-runtime.experimental as wrapper This change switches to using nvidia-container-runtime.experimental as the wrapper name over nvidia-container-runtime-experimental. This is consistent with upcoming mode-specific binaries. The wrapper is created at nvidia-container-runtime.experimental.real. Signed-off-by: Evan Lezar --- .../nvidia/driver/usr/lib64/libnvidia-ml.so | 2 +- test/container/toolkit_test.sh | 8 ++++---- tools/container/README.md | 4 ++-- tools/container/containerd/config_v1_test.go | 10 +++++----- tools/container/containerd/config_v2_test.go | 8 ++++---- tools/container/containerd/containerd.go | 2 +- tools/container/containerd/containerd_test.go | 14 +++++++------- tools/container/docker/docker.go | 4 ++-- tools/container/docker/docker_test.go | 18 +++++++++--------- tools/container/toolkit/runtime.go | 13 +++++++------ 10 files changed, 42 insertions(+), 41 deletions(-) diff --git a/test/container/shared/run/nvidia/driver/usr/lib64/libnvidia-ml.so b/test/container/shared/run/nvidia/driver/usr/lib64/libnvidia-ml.so index 619a146a..5f0613d7 100644 --- a/test/container/shared/run/nvidia/driver/usr/lib64/libnvidia-ml.so +++ b/test/container/shared/run/nvidia/driver/usr/lib64/libnvidia-ml.so @@ -1 +1 @@ -# This is a dummy lib file to test nvidia-runtime-experimental \ No newline at end of file +# This is a dummy lib file to test nvidia-runtime.experimental diff --git a/test/container/toolkit_test.sh b/test/container/toolkit_test.sh index 3495a1ee..f92bb0ff 100644 --- a/test/container/toolkit_test.sh +++ b/test/container/toolkit_test.sh @@ -47,11 +47,11 @@ testing::toolkit::install() { test -e "${shared_dir}/usr/local/nvidia/toolkit/nvidia-container-runtime.real" test -e "${shared_dir}/usr/local/nvidia/toolkit/nvidia-container-runtime.experimental" - test -e "${shared_dir}/usr/local/nvidia/toolkit/nvidia-container-runtime-experimental" + test -e "${shared_dir}/usr/local/nvidia/toolkit/nvidia-container-runtime.experimental.real" - grep -q -E "nvidia driver modules are not yet loaded, invoking runc directly" "${shared_dir}/usr/local/nvidia/toolkit/nvidia-container-runtime-experimental" - grep -q -E "exec runc \".@\"" "${shared_dir}/usr/local/nvidia/toolkit/nvidia-container-runtime-experimental" - grep -q -E "LD_LIBRARY_PATH=/run/nvidia/driver/usr/lib64:\\\$LD_LIBRARY_PATH " "${shared_dir}/usr/local/nvidia/toolkit/nvidia-container-runtime-experimental" + grep -q -E "nvidia driver modules are not yet loaded, invoking runc directly" "${shared_dir}/usr/local/nvidia/toolkit/nvidia-container-runtime.experimental" + grep -q -E "exec runc \".@\"" "${shared_dir}/usr/local/nvidia/toolkit/nvidia-container-runtime.experimental" + grep -q -E "LD_LIBRARY_PATH=/run/nvidia/driver/usr/lib64:\\\$LD_LIBRARY_PATH " "${shared_dir}/usr/local/nvidia/toolkit/nvidia-container-runtime.experimental" test -e "${shared_dir}/usr/local/nvidia/toolkit/.config/nvidia-container-runtime/config.toml" diff --git a/tools/container/README.md b/tools/container/README.md index eb95f107..16e4c055 100644 --- a/tools/container/README.md +++ b/tools/container/README.md @@ -15,7 +15,7 @@ docker setup \ /run/nvidia/toolkit ``` -Configure the `nvidia-container-runtime` as a docker runtime named `NAME`. If the `--runtime-name` flag is not specified, this runtime would be called `nvidia`. A runtime named `nvidia-experimental` will also be configured using the `nvidia-container-runtime-experimental` OCI-compliant runtime shim. +Configure the `nvidia-container-runtime` as a docker runtime named `NAME`. If the `--runtime-name` flag is not specified, this runtime would be called `nvidia`. A runtime named `nvidia-experimental` will also be configured using the `nvidia-container-runtime.experimental` OCI-compliant runtime shim. Since `--set-as-default` is enabled by default, the specified runtime name will also be set as the default docker runtime. This can be disabled by explicityly specifying `--set-as-default=false`. @@ -48,7 +48,7 @@ containerd setup \ /run/nvidia/toolkit ``` -Configure the `nvidia-container-runtime` as a runtime class named `NAME`. If the `--runtime-class` flag is not specified, this runtime would be called `nvidia`. A runtime class named `nvidia-experimental` will also be configured using the `nvidia-container-runtime-experimental` OCI-compliant runtime shim. +Configure the `nvidia-container-runtime` as a runtime class named `NAME`. If the `--runtime-class` flag is not specified, this runtime would be called `nvidia`. A runtime class named `nvidia-experimental` will also be configured using the `nvidia-container-runtime.experimental` OCI-compliant runtime shim. Adding the `--set-as-default` flag as follows: ```bash diff --git a/tools/container/containerd/config_v1_test.go b/tools/container/containerd/config_v1_test.go index 993d4f1d..6c51467b 100644 --- a/tools/container/containerd/config_v1_test.go +++ b/tools/container/containerd/config_v1_test.go @@ -58,7 +58,7 @@ func TestUpdateV1ConfigDefaultRuntime(t *testing.T) { setAsDefault: true, runtimeClass: "nvidia-experimental", expectedDefaultRuntimeName: nil, - expectedDefaultRuntimeBinary: "/test/runtime/dir/nvidia-container-runtime-experimental", + expectedDefaultRuntimeBinary: "/test/runtime/dir/nvidia-container-runtime.experimental", }, { legacyConfig: false, @@ -128,7 +128,7 @@ func TestUpdateV1Config(t *testing.T) { expectedBinaries := []string{ "/test/runtime/dir/nvidia-container-runtime", - "/test/runtime/dir/nvidia-container-runtime-experimental", + "/test/runtime/dir/nvidia-container-runtime.experimental", } testCases := []struct { @@ -195,7 +195,7 @@ func TestUpdateV1ConfigWithRuncPresent(t *testing.T) { expectedBinaries := []string{ runcBinary, "/test/runtime/dir/nvidia-container-runtime", - "/test/runtime/dir/nvidia-container-runtime-experimental", + "/test/runtime/dir/nvidia-container-runtime.experimental", } testCases := []struct { @@ -274,7 +274,7 @@ func TestRevertV1Config(t *testing.T) { "containerd": map[string]interface{}{ "runtimes": map[string]interface{}{ "nvidia": runtimeMapV1("/test/runtime/dir/nvidia-container-runtime"), - "nvidia-experimental": runtimeMapV1("/test/runtime/dir/nvidia-container-runtime-experimental"), + "nvidia-experimental": runtimeMapV1("/test/runtime/dir/nvidia-container-runtime.experimental"), }, }, }, @@ -289,7 +289,7 @@ func TestRevertV1Config(t *testing.T) { "containerd": map[string]interface{}{ "runtimes": map[string]interface{}{ "nvidia": runtimeMapV1("/test/runtime/dir/nvidia-container-runtime"), - "nvidia-experimental": runtimeMapV1("/test/runtime/dir/nvidia-container-runtime-experimental"), + "nvidia-experimental": runtimeMapV1("/test/runtime/dir/nvidia-container-runtime.experimental"), }, "default_runtime": defaultRuntimeV1("/test/runtime/dir/nvidia-container-runtime"), "default_runtime_name": "nvidia", diff --git a/tools/container/containerd/config_v2_test.go b/tools/container/containerd/config_v2_test.go index 9e2342d6..68798446 100644 --- a/tools/container/containerd/config_v2_test.go +++ b/tools/container/containerd/config_v2_test.go @@ -92,7 +92,7 @@ func TestUpdateV2Config(t *testing.T) { expectedBinaries := []string{ "/test/runtime/dir/nvidia-container-runtime", - "/test/runtime/dir/nvidia-container-runtime-experimental", + "/test/runtime/dir/nvidia-container-runtime.experimental", } testCases := []struct { @@ -160,7 +160,7 @@ func TestUpdateV2ConfigWithRuncPresent(t *testing.T) { expectedBinaries := []string{ runcBinary, "/test/runtime/dir/nvidia-container-runtime", - "/test/runtime/dir/nvidia-container-runtime-experimental", + "/test/runtime/dir/nvidia-container-runtime.experimental", } testCases := []struct { @@ -239,7 +239,7 @@ func TestRevertV2Config(t *testing.T) { "containerd": map[string]interface{}{ "runtimes": map[string]interface{}{ "nvidia": runtimeMapV2("/test/runtime/dir/nvidia-container-runtime"), - "nvidia-experimental": runtimeMapV2("/test/runtime/dir/nvidia-container-runtime-experimental"), + "nvidia-experimental": runtimeMapV2("/test/runtime/dir/nvidia-container-runtime.experimental"), }, }, }, @@ -254,7 +254,7 @@ func TestRevertV2Config(t *testing.T) { "containerd": map[string]interface{}{ "runtimes": map[string]interface{}{ "nvidia": runtimeMapV2("/test/runtime/dir/nvidia-container-runtime"), - "nvidia-experimental": runtimeMapV2("/test/runtime/dir/nvidia-container-runtime-experimental"), + "nvidia-experimental": runtimeMapV2("/test/runtime/dir/nvidia-container-runtime.experimental"), }, "default_runtime_name": "nvidia", }, diff --git a/tools/container/containerd/containerd.go b/tools/container/containerd/containerd.go index 23566f67..b8067d1a 100644 --- a/tools/container/containerd/containerd.go +++ b/tools/container/containerd/containerd.go @@ -38,7 +38,7 @@ const ( nvidiaRuntimeName = "nvidia" nvidiaRuntimeBinary = "nvidia-container-runtime" nvidiaExperimentalRuntimeName = "nvidia-experimental" - nvidiaExperimentalRuntimeBinary = "nvidia-container-runtime-experimental" + nvidiaExperimentalRuntimeBinary = "nvidia-container-runtime.experimental" defaultConfig = "/etc/containerd/config.toml" defaultSocket = "/run/containerd/containerd.sock" diff --git a/tools/container/containerd/containerd_test.go b/tools/container/containerd/containerd_test.go index d26cd45b..748219b3 100644 --- a/tools/container/containerd/containerd_test.go +++ b/tools/container/containerd/containerd_test.go @@ -31,7 +31,7 @@ func TestOptions(t *testing.T) { { expectedRuntimeBinaries: map[string]string{ "nvidia": "nvidia-container-runtime", - "nvidia-experimental": "nvidia-container-runtime-experimental", + "nvidia-experimental": "nvidia-container-runtime.experimental", }, }, { @@ -41,7 +41,7 @@ func TestOptions(t *testing.T) { expectedDefaultRuntime: "nvidia", expectedRuntimeBinaries: map[string]string{ "nvidia": "nvidia-container-runtime", - "nvidia-experimental": "nvidia-container-runtime-experimental", + "nvidia-experimental": "nvidia-container-runtime.experimental", }, }, { @@ -52,7 +52,7 @@ func TestOptions(t *testing.T) { expectedDefaultRuntime: "nvidia", expectedRuntimeBinaries: map[string]string{ "nvidia": "nvidia-container-runtime", - "nvidia-experimental": "nvidia-container-runtime-experimental", + "nvidia-experimental": "nvidia-container-runtime.experimental", }, }, { @@ -63,7 +63,7 @@ func TestOptions(t *testing.T) { expectedDefaultRuntime: "NAME", expectedRuntimeBinaries: map[string]string{ "NAME": "nvidia-container-runtime", - "nvidia-experimental": "nvidia-container-runtime-experimental", + "nvidia-experimental": "nvidia-container-runtime.experimental", }, }, { @@ -73,7 +73,7 @@ func TestOptions(t *testing.T) { }, expectedRuntimeBinaries: map[string]string{ "NAME": "nvidia-container-runtime", - "nvidia-experimental": "nvidia-container-runtime-experimental", + "nvidia-experimental": "nvidia-container-runtime.experimental", }, }, { @@ -84,7 +84,7 @@ func TestOptions(t *testing.T) { expectedDefaultRuntime: "nvidia-experimental", expectedRuntimeBinaries: map[string]string{ "nvidia": "nvidia-container-runtime", - "nvidia-experimental": "nvidia-container-runtime-experimental", + "nvidia-experimental": "nvidia-container-runtime.experimental", }, }, { @@ -94,7 +94,7 @@ func TestOptions(t *testing.T) { }, expectedRuntimeBinaries: map[string]string{ "nvidia": "nvidia-container-runtime", - "nvidia-experimental": "nvidia-container-runtime-experimental", + "nvidia-experimental": "nvidia-container-runtime.experimental", }, }, } diff --git a/tools/container/docker/docker.go b/tools/container/docker/docker.go index e64b8ba9..92949b20 100644 --- a/tools/container/docker/docker.go +++ b/tools/container/docker/docker.go @@ -36,7 +36,7 @@ const ( nvidiaRuntimeName = "nvidia" nvidiaRuntimeBinary = "nvidia-container-runtime" nvidiaExperimentalRuntimeName = "nvidia-experimental" - nvidiaExperimentalRuntimeBinary = "nvidia-container-runtime-experimental" + nvidiaExperimentalRuntimeBinary = "nvidia-container-runtime.experimental" defaultConfig = "/etc/docker/daemon.json" defaultSocket = "/var/run/docker.sock" @@ -261,7 +261,7 @@ func UpdateConfig(config map[string]interface{}, o *options) error { return nil } -//RevertConfig reverts the docker config to remove the nvidia runtime +// RevertConfig reverts the docker config to remove the nvidia runtime func RevertConfig(config map[string]interface{}) error { if _, exists := config["default-runtime"]; exists { defaultRuntime := config["default-runtime"].(string) diff --git a/tools/container/docker/docker_test.go b/tools/container/docker/docker_test.go index c43ffa83..02f58ff4 100644 --- a/tools/container/docker/docker_test.go +++ b/tools/container/docker/docker_test.go @@ -89,7 +89,7 @@ func TestUpdateConfig(t *testing.T) { "args": []string{}, }, "nvidia-experimental": map[string]interface{}{ - "path": "/test/runtime/dir/nvidia-container-runtime-experimental", + "path": "/test/runtime/dir/nvidia-container-runtime.experimental", "args": []string{}, }, }, @@ -106,7 +106,7 @@ func TestUpdateConfig(t *testing.T) { "args": []string{}, }, "nvidia-experimental": map[string]interface{}{ - "path": "/test/runtime/dir/nvidia-container-runtime-experimental", + "path": "/test/runtime/dir/nvidia-container-runtime.experimental", "args": []string{}, }, }, @@ -123,7 +123,7 @@ func TestUpdateConfig(t *testing.T) { "args": []string{}, }, "nvidia-experimental": map[string]interface{}{ - "path": "/test/runtime/dir/nvidia-container-runtime-experimental", + "path": "/test/runtime/dir/nvidia-container-runtime.experimental", "args": []string{}, }, }, @@ -146,7 +146,7 @@ func TestUpdateConfig(t *testing.T) { "args": []string{}, }, "nvidia-experimental": map[string]interface{}{ - "path": "/test/runtime/dir/nvidia-container-runtime-experimental", + "path": "/test/runtime/dir/nvidia-container-runtime.experimental", "args": []string{}, }, }, @@ -172,7 +172,7 @@ func TestUpdateConfig(t *testing.T) { "args": []string{}, }, "nvidia-experimental": map[string]interface{}{ - "path": "/test/runtime/dir/nvidia-container-runtime-experimental", + "path": "/test/runtime/dir/nvidia-container-runtime.experimental", "args": []string{}, }, }, @@ -192,7 +192,7 @@ func TestUpdateConfig(t *testing.T) { "args": []string{}, }, "nvidia-experimental": map[string]interface{}{ - "path": "/test/runtime/dir/nvidia-container-runtime-experimental", + "path": "/test/runtime/dir/nvidia-container-runtime.experimental", "args": []string{}, }, }, @@ -212,7 +212,7 @@ func TestUpdateConfig(t *testing.T) { "args": []string{}, }, "nvidia-experimental": map[string]interface{}{ - "path": "/test/runtime/dir/nvidia-container-runtime-experimental", + "path": "/test/runtime/dir/nvidia-container-runtime.experimental", "args": []string{}, }, }, @@ -240,7 +240,7 @@ func TestUpdateConfig(t *testing.T) { "args": []string{}, }, "nvidia-experimental": map[string]interface{}{ - "path": "/test/runtime/dir/nvidia-container-runtime-experimental", + "path": "/test/runtime/dir/nvidia-container-runtime.experimental", "args": []string{}, }, }, @@ -306,7 +306,7 @@ func TestRevertConfig(t *testing.T) { "args": []string{}, }, "nvidia-experimental": map[string]interface{}{ - "path": "/test/runtime/dir/nvidia-container-runtime-experimental", + "path": "/test/runtime/dir/nvidia-container-runtime.experimental", "args": []string{}, }, }, diff --git a/tools/container/toolkit/runtime.go b/tools/container/toolkit/runtime.go index a464f530..bd39649a 100644 --- a/tools/container/toolkit/runtime.go +++ b/tools/container/toolkit/runtime.go @@ -29,9 +29,7 @@ const ( nvidiaContainerRuntimeTarget = "nvidia-container-runtime.real" nvidiaContainerRuntimeWrapper = "nvidia-container-runtime" - nvidiaExperimentalContainerRuntimeSource = "nvidia-container-runtime.experimental" - nvidiaExperimentalContainerRuntimeTarget = nvidiaExperimentalContainerRuntimeSource - nvidiaExperimentalContainerRuntimeWrapper = "nvidia-container-runtime-experimental" + nvidiaExperimentalContainerRuntimeSource = "nvidia-container-runtime.experimental" ) // installContainerRuntimes sets up the NVIDIA container runtimes, copying the executables @@ -79,16 +77,19 @@ func newNvidiaContainerRuntimeInstaller() *executable { } func newNvidiaContainerRuntimeExperimentalInstaller(libraryRoot string) *executable { + source := nvidiaExperimentalContainerRuntimeSource + wrapperName := filepath.Base(source) + dotfileName := wrapperName + ".real" target := executableTarget{ - dotfileName: nvidiaExperimentalContainerRuntimeTarget, - wrapperName: nvidiaExperimentalContainerRuntimeWrapper, + dotfileName: dotfileName, + wrapperName: wrapperName, } env := make(map[string]string) if libraryRoot != "" { env["LD_LIBRARY_PATH"] = strings.Join([]string{libraryRoot, "$LD_LIBRARY_PATH"}, ":") } - return newRuntimeInstaller(nvidiaExperimentalContainerRuntimeSource, target, env) + return newRuntimeInstaller(source, target, env) } func newRuntimeInstaller(source string, target executableTarget, env map[string]string) *executable { From ece5b29d97bdaa498c2ee491c59c32b06b26d33c Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Fri, 24 Feb 2023 10:48:27 +0200 Subject: [PATCH 2/9] Add tools/container/operator package to handle runtime naming Signed-off-by: Evan Lezar --- tools/container/containerd/containerd_test.go | 106 ------------ tools/container/operator/operator.go | 130 +++++++++++++++ tools/container/operator/operator_test.go | 151 ++++++++++++++++++ 3 files changed, 281 insertions(+), 106 deletions(-) delete mode 100644 tools/container/containerd/containerd_test.go create mode 100644 tools/container/operator/operator.go create mode 100644 tools/container/operator/operator_test.go diff --git a/tools/container/containerd/containerd_test.go b/tools/container/containerd/containerd_test.go deleted file mode 100644 index 748219b3..00000000 --- a/tools/container/containerd/containerd_test.go +++ /dev/null @@ -1,106 +0,0 @@ -/** -# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -*/ - -package main - -import ( - "testing" - - "github.com/stretchr/testify/require" -) - -func TestOptions(t *testing.T) { - testCases := []struct { - options options - expectedDefaultRuntime string - expectedRuntimeBinaries map[string]string - }{ - { - expectedRuntimeBinaries: map[string]string{ - "nvidia": "nvidia-container-runtime", - "nvidia-experimental": "nvidia-container-runtime.experimental", - }, - }, - { - options: options{ - setAsDefault: true, - }, - expectedDefaultRuntime: "nvidia", - expectedRuntimeBinaries: map[string]string{ - "nvidia": "nvidia-container-runtime", - "nvidia-experimental": "nvidia-container-runtime.experimental", - }, - }, - { - options: options{ - setAsDefault: true, - runtimeClass: "nvidia", - }, - expectedDefaultRuntime: "nvidia", - expectedRuntimeBinaries: map[string]string{ - "nvidia": "nvidia-container-runtime", - "nvidia-experimental": "nvidia-container-runtime.experimental", - }, - }, - { - options: options{ - setAsDefault: true, - runtimeClass: "NAME", - }, - expectedDefaultRuntime: "NAME", - expectedRuntimeBinaries: map[string]string{ - "NAME": "nvidia-container-runtime", - "nvidia-experimental": "nvidia-container-runtime.experimental", - }, - }, - { - options: options{ - setAsDefault: false, - runtimeClass: "NAME", - }, - expectedRuntimeBinaries: map[string]string{ - "NAME": "nvidia-container-runtime", - "nvidia-experimental": "nvidia-container-runtime.experimental", - }, - }, - { - options: options{ - setAsDefault: true, - runtimeClass: "nvidia-experimental", - }, - expectedDefaultRuntime: "nvidia-experimental", - expectedRuntimeBinaries: map[string]string{ - "nvidia": "nvidia-container-runtime", - "nvidia-experimental": "nvidia-container-runtime.experimental", - }, - }, - { - options: options{ - setAsDefault: false, - runtimeClass: "nvidia-experimental", - }, - expectedRuntimeBinaries: map[string]string{ - "nvidia": "nvidia-container-runtime", - "nvidia-experimental": "nvidia-container-runtime.experimental", - }, - }, - } - - for i, tc := range testCases { - require.Equal(t, tc.expectedDefaultRuntime, tc.options.getDefaultRuntime(), "%d: %v", i, tc) - require.EqualValues(t, tc.expectedRuntimeBinaries, tc.options.getRuntimeBinaries(), "%d: %v", i, tc) - } -} diff --git a/tools/container/operator/operator.go b/tools/container/operator/operator.go new file mode 100644 index 00000000..b52e97ad --- /dev/null +++ b/tools/container/operator/operator.go @@ -0,0 +1,130 @@ +/** +# Copyright (c) NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package operator + +import "path/filepath" + +const ( + defaultRuntimeName = "nvidia" + experimentalRuntimeName = "nvidia-experimental" + + defaultRoot = "/usr/bin" +) + +// Runtime defines a runtime to be configured. +// The path and whether the runtime is the default runtime can be specfied +type Runtime struct { + name string + Path string + SetAsDefault bool +} + +// Runtimes defines a set of runtimes to be configure for use in the GPU Operator +type Runtimes map[string]Runtime + +type config struct { + root string + nvidiaRuntimeName string + setAsDefault bool +} + +// GetRuntimes returns the set of runtimes to be configured for use with the GPU Operator. +func GetRuntimes(opts ...Option) Runtimes { + c := &config{} + for _, opt := range opts { + opt(c) + } + + if c.root == "" { + c.root = defaultRoot + } + if c.nvidiaRuntimeName == "" { + c.nvidiaRuntimeName = defaultRuntimeName + } + + runtimes := make(Runtimes) + runtimes.add(c.nvidiaRuntime()) + + modes := []string{"experimental"} + for _, mode := range modes { + runtimes.add(c.modeRuntime(mode)) + } + return runtimes +} + +// DefaultRuntimeName returns the name of the default runtime. +func (r Runtimes) DefaultRuntimeName() string { + for _, runtime := range r { + if runtime.SetAsDefault { + return runtime.name + } + } + return "" +} + +// Add a runtime to the set of runtimes. +func (r *Runtimes) add(runtime Runtime) { + (*r)[runtime.name] = runtime +} + +// nvidiaRuntime creates a runtime that corresponds to the nvidia runtime. +// If the nvidiaRuntimeName is specified, this name us used unless this is exactly equal to nvidia-experimental. +func (c config) nvidiaRuntime() Runtime { + name := c.nvidiaRuntimeName + if name == experimentalRuntimeName { + name = defaultRuntimeName + } + return c.newRuntime(name, "nvidia-container-runtime") +} + +// modeRuntime creates a runtime for the specified mode. +func (c config) modeRuntime(mode string) Runtime { + return c.newRuntime("nvidia-"+mode, "nvidia-container-runtime."+mode) +} + +// newRuntime creates a runtime based on the configuration +func (c config) newRuntime(name string, binary string) Runtime { + return Runtime{ + name: name, + Path: filepath.Join(c.root, binary), + SetAsDefault: c.setAsDefault && name == c.nvidiaRuntimeName, + } +} + +// Option is a functional option for configuring set of runtimes. +type Option func(*config) + +// WithRoot sets the root directory for the runtime binaries. +func WithRoot(root string) Option { + return func(c *config) { + c.root = root + } +} + +// WithNvidiaRuntimeName sets the name of the nvidia runtime. +func WithNvidiaRuntimeName(name string) Option { + return func(c *config) { + c.nvidiaRuntimeName = name + } +} + +// WithSetAsDefault sets the default runtime to the nvidia runtime. +func WithSetAsDefault(set bool) Option { + return func(c *config) { + c.setAsDefault = set + } +} diff --git a/tools/container/operator/operator_test.go b/tools/container/operator/operator_test.go new file mode 100644 index 00000000..07417fb9 --- /dev/null +++ b/tools/container/operator/operator_test.go @@ -0,0 +1,151 @@ +/** +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +*/ + +package operator + +import ( + "fmt" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestOptions(t *testing.T) { + testCases := []struct { + setAsDefault bool + nvidiaRuntimeName string + root string + expectedDefaultRuntime string + expectedRuntimes Runtimes + }{ + { + expectedRuntimes: Runtimes{ + "nvidia": Runtime{ + name: "nvidia", + Path: "/usr/bin/nvidia-container-runtime", + }, + "nvidia-experimental": Runtime{ + name: "nvidia-experimental", + Path: "/usr/bin/nvidia-container-runtime.experimental", + }, + }, + }, + { + setAsDefault: true, + expectedDefaultRuntime: "nvidia", + expectedRuntimes: Runtimes{ + "nvidia": Runtime{ + name: "nvidia", + Path: "/usr/bin/nvidia-container-runtime", + SetAsDefault: true, + }, + "nvidia-experimental": Runtime{ + name: "nvidia-experimental", + Path: "/usr/bin/nvidia-container-runtime.experimental", + }, + }, + }, + { + setAsDefault: true, + nvidiaRuntimeName: "nvidia", + expectedDefaultRuntime: "nvidia", + expectedRuntimes: Runtimes{ + "nvidia": Runtime{ + name: "nvidia", + Path: "/usr/bin/nvidia-container-runtime", + SetAsDefault: true, + }, + "nvidia-experimental": Runtime{ + name: "nvidia-experimental", + Path: "/usr/bin/nvidia-container-runtime.experimental", + }, + }, + }, + { + setAsDefault: true, + nvidiaRuntimeName: "NAME", + expectedDefaultRuntime: "NAME", + expectedRuntimes: Runtimes{ + "NAME": Runtime{ + name: "NAME", + Path: "/usr/bin/nvidia-container-runtime", + SetAsDefault: true, + }, + "nvidia-experimental": Runtime{ + name: "nvidia-experimental", + Path: "/usr/bin/nvidia-container-runtime.experimental", + }, + }, + }, + { + setAsDefault: false, + nvidiaRuntimeName: "NAME", + expectedRuntimes: Runtimes{ + "NAME": Runtime{ + name: "NAME", + Path: "/usr/bin/nvidia-container-runtime", + }, + "nvidia-experimental": Runtime{ + name: "nvidia-experimental", + Path: "/usr/bin/nvidia-container-runtime.experimental", + }, + }, + }, + { + setAsDefault: true, + nvidiaRuntimeName: "nvidia-experimental", + expectedDefaultRuntime: "nvidia-experimental", + expectedRuntimes: Runtimes{ + "nvidia": Runtime{ + name: "nvidia", + Path: "/usr/bin/nvidia-container-runtime", + }, + "nvidia-experimental": Runtime{ + name: "nvidia-experimental", + Path: "/usr/bin/nvidia-container-runtime.experimental", + SetAsDefault: true, + }, + }, + }, + { + setAsDefault: false, + nvidiaRuntimeName: "nvidia-experimental", + expectedRuntimes: Runtimes{ + "nvidia": Runtime{ + name: "nvidia", + Path: "/usr/bin/nvidia-container-runtime", + }, + "nvidia-experimental": Runtime{ + name: "nvidia-experimental", + Path: "/usr/bin/nvidia-container-runtime.experimental", + }, + }, + }, + } + + for i, tc := range testCases { + t.Run(fmt.Sprintf("%d", i), func(t *testing.T) { + runtimes := GetRuntimes( + WithNvidiaRuntimeName(tc.nvidiaRuntimeName), + WithSetAsDefault(tc.setAsDefault), + WithRoot(tc.root), + ) + + require.EqualValues(t, tc.expectedRuntimes, runtimes) + require.Equal(t, tc.expectedDefaultRuntime, runtimes.DefaultRuntimeName()) + }) + } +} From 5bfb51f80177845ff9a4bc1ba2cfd7b4b87593dc Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Fri, 24 Feb 2023 17:44:17 +0200 Subject: [PATCH 3/9] Add API for interacting with runtime engine configs Signed-off-by: Evan Lezar --- internal/config/engine/api.go | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 internal/config/engine/api.go diff --git a/internal/config/engine/api.go b/internal/config/engine/api.go new file mode 100644 index 00000000..5cd5fb52 --- /dev/null +++ b/internal/config/engine/api.go @@ -0,0 +1,25 @@ +/** +# Copyright (c) NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package engine + +// Interface defines the API for a runtime config updater. +type Interface interface { + DefaultRuntime() string + AddRuntime(string, string, bool) error + RemoveRuntime(string) error + Save(string) (int64, error) +} From e5bb4d27183cf379fa6add30753101af6c0d52f9 Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Mon, 27 Feb 2023 13:52:11 +0200 Subject: [PATCH 4/9] Move runtime config code from config to config/engine Signed-off-by: Evan Lezar --- cmd/nvidia-ctk/runtime/configure/configure.go | 4 ++-- internal/config/{ => engine}/crio/crio.go | 0 internal/config/{ => engine}/docker/docker.go | 0 internal/config/{ => engine}/docker/docker_test.go | 0 tools/container/crio/crio.go | 2 +- tools/container/docker/docker.go | 2 +- 6 files changed, 4 insertions(+), 4 deletions(-) rename internal/config/{ => engine}/crio/crio.go (100%) rename internal/config/{ => engine}/docker/docker.go (100%) rename internal/config/{ => engine}/docker/docker_test.go (100%) diff --git a/cmd/nvidia-ctk/runtime/configure/configure.go b/cmd/nvidia-ctk/runtime/configure/configure.go index a0d66afc..1b2decd8 100644 --- a/cmd/nvidia-ctk/runtime/configure/configure.go +++ b/cmd/nvidia-ctk/runtime/configure/configure.go @@ -22,8 +22,8 @@ import ( "os" "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/runtime/nvidia" - "github.com/NVIDIA/nvidia-container-toolkit/internal/config/crio" - "github.com/NVIDIA/nvidia-container-toolkit/internal/config/docker" + "github.com/NVIDIA/nvidia-container-toolkit/internal/config/engine/crio" + "github.com/NVIDIA/nvidia-container-toolkit/internal/config/engine/docker" "github.com/pelletier/go-toml" "github.com/sirupsen/logrus" "github.com/urfave/cli/v2" diff --git a/internal/config/crio/crio.go b/internal/config/engine/crio/crio.go similarity index 100% rename from internal/config/crio/crio.go rename to internal/config/engine/crio/crio.go diff --git a/internal/config/docker/docker.go b/internal/config/engine/docker/docker.go similarity index 100% rename from internal/config/docker/docker.go rename to internal/config/engine/docker/docker.go diff --git a/internal/config/docker/docker_test.go b/internal/config/engine/docker/docker_test.go similarity index 100% rename from internal/config/docker/docker_test.go rename to internal/config/engine/docker/docker_test.go diff --git a/tools/container/crio/crio.go b/tools/container/crio/crio.go index daf5ec39..d415388c 100644 --- a/tools/container/crio/crio.go +++ b/tools/container/crio/crio.go @@ -24,7 +24,7 @@ import ( "path/filepath" "github.com/NVIDIA/nvidia-container-toolkit/internal/config" - "github.com/NVIDIA/nvidia-container-toolkit/internal/config/crio" + "github.com/NVIDIA/nvidia-container-toolkit/internal/config/engine/crio" "github.com/pelletier/go-toml" log "github.com/sirupsen/logrus" cli "github.com/urfave/cli/v2" diff --git a/tools/container/docker/docker.go b/tools/container/docker/docker.go index 92949b20..89621a18 100644 --- a/tools/container/docker/docker.go +++ b/tools/container/docker/docker.go @@ -24,7 +24,7 @@ import ( "syscall" "time" - "github.com/NVIDIA/nvidia-container-toolkit/internal/config/docker" + "github.com/NVIDIA/nvidia-container-toolkit/internal/config/engine/docker" log "github.com/sirupsen/logrus" cli "github.com/urfave/cli/v2" ) From 9fff19da23ee9d277df004267e5fb1a16a95a921 Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Thu, 23 Feb 2023 15:43:15 +0200 Subject: [PATCH 5/9] Migrate docker config to engine.Interface Signed-off-by: Evan Lezar --- cmd/nvidia-ctk/runtime/configure/configure.go | 15 +- internal/config/engine/docker/docker.go | 145 ++++++++++-------- internal/config/engine/docker/docker_test.go | 8 +- internal/config/engine/docker/option.go | 80 ++++++++++ tools/container/docker/docker.go | 100 +++++------- tools/container/docker/docker_test.go | 54 +------ 6 files changed, 221 insertions(+), 181 deletions(-) create mode 100644 internal/config/engine/docker/option.go diff --git a/cmd/nvidia-ctk/runtime/configure/configure.go b/cmd/nvidia-ctk/runtime/configure/configure.go index 1b2decd8..4df63e8d 100644 --- a/cmd/nvidia-ctk/runtime/configure/configure.go +++ b/cmd/nvidia-ctk/runtime/configure/configure.go @@ -127,13 +127,14 @@ func (m command) configureDocker(c *cli.Context, config *config) error { configFilePath = defaultDockerConfigFilePath } - cfg, err := docker.LoadConfig(configFilePath) + cfg, err := docker.New( + docker.WithPath(configFilePath), + ) if err != nil { return fmt.Errorf("unable to load config: %v", err) } - err = docker.UpdateConfig( - cfg, + err = cfg.AddRuntime( config.nvidiaOptions.RuntimeName, config.nvidiaOptions.RuntimePath, config.nvidiaOptions.SetAsDefault, @@ -150,12 +151,16 @@ func (m command) configureDocker(c *cli.Context, config *config) error { os.Stdout.WriteString(fmt.Sprintf("%s\n", output)) return nil } - err = docker.FlushConfig(cfg, configFilePath) + n, err := cfg.Save(configFilePath) if err != nil { return fmt.Errorf("unable to flush config: %v", err) } - m.logger.Infof("Wrote updated config to %v", configFilePath) + if n == 0 { + m.logger.Infof("Removed empty config from %v", configFilePath) + } else { + m.logger.Infof("Wrote updated config to %v", configFilePath) + } m.logger.Infof("It is recommended that the docker daemon be restarted.") return nil diff --git a/internal/config/engine/docker/docker.go b/internal/config/engine/docker/docker.go index 707e7923..3c998375 100644 --- a/internal/config/engine/docker/docker.go +++ b/internal/config/engine/docker/docker.go @@ -17,47 +17,39 @@ package docker import ( - "bytes" "encoding/json" "fmt" - "io/ioutil" "os" - log "github.com/sirupsen/logrus" + "github.com/NVIDIA/nvidia-container-toolkit/internal/config/engine" ) -// LoadConfig loads the docker config from disk -func LoadConfig(configFilePath string) (map[string]interface{}, error) { - log.Infof("Loading docker config from %v", configFilePath) +const ( + defaultDockerRuntime = "runc" +) - info, err := os.Stat(configFilePath) - if os.IsExist(err) && info.IsDir() { - return nil, fmt.Errorf("config file is a directory") +// Config defines a docker config file. +// TODO: This should not be public, but we need to access it from the tests in tools/container/docker +type Config map[string]interface{} + +// New creates a docker config with the specified options +func New(opts ...Option) (engine.Interface, error) { + b := &builder{} + for _, opt := range opts { + opt(b) } - cfg := make(map[string]interface{}) - - if os.IsNotExist(err) { - log.Infof("Config file does not exist, creating new one") - return cfg, nil - } - - readBytes, err := ioutil.ReadFile(configFilePath) - if err != nil { - return nil, fmt.Errorf("unable to read config: %v", err) - } - - reader := bytes.NewReader(readBytes) - if err := json.NewDecoder(reader).Decode(&cfg); err != nil { - return nil, err - } - - log.Infof("Successfully loaded config") - return cfg, nil + return b.build() } -// UpdateConfig updates the docker config to include the nvidia runtimes -func UpdateConfig(config map[string]interface{}, runtimeName string, runtimePath string, setAsDefault bool) error { +// AddRuntime adds a new runtime to the docker config +func (c *Config) AddRuntime(name string, path string, setAsDefault bool) error { + if c == nil { + return fmt.Errorf("config is nil") + } + + config := *c + // Read the existing runtimes runtimes := make(map[string]interface{}) if _, exists := config["runtimes"]; exists { @@ -65,53 +57,84 @@ func UpdateConfig(config map[string]interface{}, runtimeName string, runtimePath } // Add / update the runtime definitions - runtimes[runtimeName] = map[string]interface{}{ - "path": runtimePath, + runtimes[name] = map[string]interface{}{ + "path": path, "args": []string{}, } - // Update the runtimes definition - if len(runtimes) > 0 { - config["runtimes"] = runtimes - } + config["runtimes"] = runtimes if setAsDefault { - config["default-runtime"] = runtimeName + config["default-runtime"] = name } + *c = config + return nil +} + +// DefaultRuntime returns the default runtime for the docker config +func (c Config) DefaultRuntime() string { + r, ok := c["default-runtime"].(string) + if !ok { + return "" + } + return r +} + +// RemoveRuntime removes a runtime from the docker config +func (c *Config) RemoveRuntime(name string) error { + if c == nil { + return nil + } + config := *c + + if _, exists := config["default-runtime"]; exists { + defaultRuntime := config["default-runtime"].(string) + if defaultRuntime == name { + config["default-runtime"] = defaultDockerRuntime + } + } + + if _, exists := config["runtimes"]; exists { + runtimes := config["runtimes"].(map[string]interface{}) + + delete(runtimes, name) + + if len(runtimes) == 0 { + delete(config, "runtimes") + } + } + + *c = config + return nil } -// FlushConfig flushes the updated/reverted config out to disk -func FlushConfig(cfg map[string]interface{}, configFilePath string) error { - log.Infof("Flushing docker config to %v", configFilePath) - - output, err := json.MarshalIndent(cfg, "", " ") +// Save writes the config to the specified path +func (c Config) Save(path string) (int64, error) { + output, err := json.MarshalIndent(c, "", " ") if err != nil { - return fmt.Errorf("unable to convert to JSON: %v", err) + return 0, fmt.Errorf("unable to convert to JSON: %v", err) } - switch len(output) { - case 0: - err := os.Remove(configFilePath) + if len(output) == 0 { + err := os.Remove(path) if err != nil { - return fmt.Errorf("unable to remove empty file: %v", err) - } - log.Infof("Config empty, removing file") - default: - f, err := os.Create(configFilePath) - if err != nil { - return fmt.Errorf("unable to open %v for writing: %v", configFilePath, err) - } - defer f.Close() - - _, err = f.WriteString(string(output)) - if err != nil { - return fmt.Errorf("unable to write output: %v", err) + return 0, fmt.Errorf("unable to remove empty file: %v", err) } + return 0, nil } - log.Infof("Successfully flushed config") + f, err := os.Create(path) + if err != nil { + return 0, fmt.Errorf("unable to open %v for writing: %v", path, err) + } + defer f.Close() - return nil + n, err := f.WriteString(string(output)) + if err != nil { + return 0, fmt.Errorf("unable to write output: %v", err) + } + + return int64(n), nil } diff --git a/internal/config/engine/docker/docker_test.go b/internal/config/engine/docker/docker_test.go index c5b7128f..115e189e 100644 --- a/internal/config/engine/docker/docker_test.go +++ b/internal/config/engine/docker/docker_test.go @@ -26,7 +26,7 @@ import ( func TestUpdateConfigDefaultRuntime(t *testing.T) { testCases := []struct { - config map[string]interface{} + config Config runtimeName string setAsDefault bool expectedDefaultRuntimeName interface{} @@ -63,7 +63,7 @@ func TestUpdateConfigDefaultRuntime(t *testing.T) { if tc.config == nil { tc.config = make(map[string]interface{}) } - err := UpdateConfig(tc.config, tc.runtimeName, "", tc.setAsDefault) + err := tc.config.AddRuntime(tc.runtimeName, "", tc.setAsDefault) require.NoError(t, err) defaultRuntimeName := tc.config["default-runtime"] @@ -74,7 +74,7 @@ func TestUpdateConfigDefaultRuntime(t *testing.T) { func TestUpdateConfigRuntimes(t *testing.T) { testCases := []struct { - config map[string]interface{} + config Config runtimes map[string]string expectedConfig map[string]interface{} }{ @@ -198,7 +198,7 @@ func TestUpdateConfigRuntimes(t *testing.T) { for i, tc := range testCases { t.Run(fmt.Sprintf("test case %d", i), func(t *testing.T) { for runtimeName, runtimePath := range tc.runtimes { - err := UpdateConfig(tc.config, runtimeName, runtimePath, false) + err := tc.config.AddRuntime(runtimeName, runtimePath, false) require.NoError(t, err) } diff --git a/internal/config/engine/docker/option.go b/internal/config/engine/docker/option.go new file mode 100644 index 00000000..238cc3f1 --- /dev/null +++ b/internal/config/engine/docker/option.go @@ -0,0 +1,80 @@ +/** +# Copyright (c) NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package docker + +import ( + "bytes" + "encoding/json" + "fmt" + "io/ioutil" + "os" + + log "github.com/sirupsen/logrus" +) + +type builder struct { + path string +} + +// Option defines a function that can be used to configure the config builder +type Option func(*builder) + +// WithPath sets the path for the config builder +func WithPath(path string) Option { + return func(b *builder) { + b.path = path + } +} + +func (b *builder) build() (*Config, error) { + if b.path == "" { + empty := make(Config) + return &empty, nil + } + + return loadConfig(b.path) +} + +// loadConfig loads the docker config from disk +func loadConfig(configFilePath string) (*Config, error) { + log.Infof("Loading docker config from %v", configFilePath) + + info, err := os.Stat(configFilePath) + if os.IsExist(err) && info.IsDir() { + return nil, fmt.Errorf("config file is a directory") + } + + cfg := make(Config) + + if os.IsNotExist(err) { + log.Infof("Config file does not exist, creating new one") + return &cfg, nil + } + + readBytes, err := ioutil.ReadFile(configFilePath) + if err != nil { + return nil, fmt.Errorf("unable to read config: %v", err) + } + + reader := bytes.NewReader(readBytes) + if err := json.NewDecoder(reader).Decode(&cfg); err != nil { + return nil, err + } + + log.Infof("Successfully loaded config") + return &cfg, nil +} diff --git a/tools/container/docker/docker.go b/tools/container/docker/docker.go index 89621a18..57704e3e 100644 --- a/tools/container/docker/docker.go +++ b/tools/container/docker/docker.go @@ -20,11 +20,12 @@ import ( "fmt" "net" "os" - "path/filepath" "syscall" "time" + "github.com/NVIDIA/nvidia-container-toolkit/internal/config/engine" "github.com/NVIDIA/nvidia-container-toolkit/internal/config/engine/docker" + "github.com/NVIDIA/nvidia-container-toolkit/tools/container/operator" log "github.com/sirupsen/logrus" cli "github.com/urfave/cli/v2" ) @@ -170,7 +171,9 @@ func Setup(c *cli.Context, o *options) error { } o.runtimeDir = runtimeDir - cfg, err := LoadConfig(o.config) + cfg, err := docker.New( + docker.WithPath(o.config), + ) if err != nil { return fmt.Errorf("unable to load config: %v", err) } @@ -180,7 +183,8 @@ func Setup(c *cli.Context, o *options) error { return fmt.Errorf("unable to update config: %v", err) } - err = FlushConfig(cfg, o.config) + log.Infof("Flushing docker config to %v", o.config) + _, err = cfg.Save(o.config) if err != nil { return fmt.Errorf("unable to flush config: %v", err) } @@ -204,20 +208,26 @@ func Cleanup(c *cli.Context, o *options) error { return fmt.Errorf("unable to parse args: %v", err) } - cfg, err := LoadConfig(o.config) + cfg, err := docker.New( + docker.WithPath(o.config), + ) if err != nil { return fmt.Errorf("unable to load config: %v", err) } - err = RevertConfig(cfg) + err = RevertConfig(cfg, o) if err != nil { return fmt.Errorf("unable to update config: %v", err) } - err = FlushConfig(cfg, o.config) + log.Infof("Flushing docker config to %v", o.config) + n, err := cfg.Save(o.config) if err != nil { return fmt.Errorf("unable to flush config: %v", err) } + if n == 0 { + log.Infof("Config file is empty, removed") + } err = RestartDocker(o) if err != nil { @@ -243,18 +253,17 @@ func ParseArgs(c *cli.Context) (string, error) { return runtimeDir, nil } -// LoadConfig loads the docker config from disk -func LoadConfig(config string) (map[string]interface{}, error) { - return docker.LoadConfig(config) -} - // UpdateConfig updates the docker config to include the nvidia runtimes -func UpdateConfig(config map[string]interface{}, o *options) error { - for runtimeName, runtimePath := range o.getRuntimeBinaries() { - setAsDefault := runtimeName == o.getDefaultRuntime() - err := docker.UpdateConfig(config, runtimeName, runtimePath, setAsDefault) +func UpdateConfig(cfg engine.Interface, o *options) error { + runtimes := operator.GetRuntimes( + operator.WithNvidiaRuntimeName(o.runtimeName), + operator.WithSetAsDefault(o.setAsDefault), + operator.WithRoot(o.runtimeDir), + ) + for name, runtime := range runtimes { + err := cfg.AddRuntime(name, runtime.Path, runtime.SetAsDefault) if err != nil { - return fmt.Errorf("failed to update runtime %q: %v", runtimeName, err) + return fmt.Errorf("failed to update runtime %q: %v", name, err) } } @@ -262,33 +271,22 @@ func UpdateConfig(config map[string]interface{}, o *options) error { } // RevertConfig reverts the docker config to remove the nvidia runtime -func RevertConfig(config map[string]interface{}) error { - if _, exists := config["default-runtime"]; exists { - defaultRuntime := config["default-runtime"].(string) - if _, exists := nvidiaRuntimeBinaries[defaultRuntime]; exists { - config["default-runtime"] = defaultDockerRuntime +func RevertConfig(cfg engine.Interface, o *options) error { + runtimes := operator.GetRuntimes( + operator.WithNvidiaRuntimeName(o.runtimeName), + operator.WithSetAsDefault(o.setAsDefault), + operator.WithRoot(o.runtimeDir), + ) + for name := range runtimes { + err := cfg.RemoveRuntime(name) + if err != nil { + return fmt.Errorf("failed to remove runtime %q: %v", name, err) } } - if _, exists := config["runtimes"]; exists { - runtimes := config["runtimes"].(map[string]interface{}) - - for name := range nvidiaRuntimeBinaries { - delete(runtimes, name) - } - - if len(runtimes) == 0 { - delete(config, "runtimes") - } - } return nil } -// FlushConfig flushes the updated/reverted config out to disk -func FlushConfig(cfg map[string]interface{}, config string) error { - return docker.FlushConfig(cfg, config) -} - // RestartDocker restarts docker depending on the value of restartModeFlag func RestartDocker(o *options) error { switch o.restartMode { @@ -385,31 +383,3 @@ func SignalDocker(socket string) error { return nil } - -// getDefaultRuntime returns the default runtime for the configured options. -// If the configuration is invalid or the default runtimes should not be set -// the empty string is returned. -func (o options) getDefaultRuntime() string { - if o.setAsDefault == false { - return "" - } - - return o.runtimeName -} - -// getRuntimeBinaries returns a map of runtime names to binary paths. This includes the -// renaming of the `nvidia` runtime as per the --runtime-class command line flag. -func (o options) getRuntimeBinaries() map[string]string { - runtimeBinaries := make(map[string]string) - - for rt, bin := range nvidiaRuntimeBinaries { - runtime := rt - if o.runtimeName != "" && o.runtimeName != nvidiaExperimentalRuntimeName && runtime == defaultRuntimeName { - runtime = o.runtimeName - } - - runtimeBinaries[runtime] = filepath.Join(o.runtimeDir, bin) - } - - return runtimeBinaries -} diff --git a/tools/container/docker/docker_test.go b/tools/container/docker/docker_test.go index 02f58ff4..462192ed 100644 --- a/tools/container/docker/docker_test.go +++ b/tools/container/docker/docker_test.go @@ -20,6 +20,7 @@ import ( "encoding/json" "testing" + "github.com/NVIDIA/nvidia-container-toolkit/internal/config/engine/docker" "github.com/stretchr/testify/require" ) @@ -60,9 +61,9 @@ func TestUpdateConfigDefaultRuntime(t *testing.T) { runtimeDir: runtimeDir, } - config := map[string]interface{}{} + config := docker.Config(map[string]interface{}{}) - err := UpdateConfig(config, o) + err := UpdateConfig(&config, o) require.NoError(t, err, "%d: %v", i, tc) defaultRuntimeName := config["default-runtime"] @@ -74,7 +75,7 @@ func TestUpdateConfig(t *testing.T) { const runtimeDir = "/test/runtime/dir" testCases := []struct { - config map[string]interface{} + config docker.Config setAsDefault bool runtimeName string expectedConfig map[string]interface{} @@ -254,7 +255,8 @@ func TestUpdateConfig(t *testing.T) { runtimeName: tc.runtimeName, runtimeDir: runtimeDir, } - err := UpdateConfig(tc.config, options) + + err := UpdateConfig(&tc.config, options) require.NoError(t, err, "%d: %v", i, tc) configContent, err := json.MarshalIndent(tc.config, "", " ") @@ -269,7 +271,7 @@ func TestUpdateConfig(t *testing.T) { func TestRevertConfig(t *testing.T) { testCases := []struct { - config map[string]interface{} + config docker.Config expectedConfig map[string]interface{} }{ { @@ -368,7 +370,7 @@ func TestRevertConfig(t *testing.T) { } for i, tc := range testCases { - err := RevertConfig(tc.config) + err := RevertConfig(&tc.config, &options{}) require.NoError(t, err, "%d: %v", i, tc) @@ -381,43 +383,3 @@ func TestRevertConfig(t *testing.T) { require.EqualValues(t, string(expectedContent), string(configContent), "%d: %v", i, tc) } } - -func TestFlagsDefaultRuntime(t *testing.T) { - testCases := []struct { - setAsDefault bool - runtimeName string - expected string - }{ - { - expected: "", - }, - { - runtimeName: "not-bool", - expected: "", - }, - { - setAsDefault: false, - runtimeName: "nvidia", - expected: "", - }, - { - setAsDefault: true, - runtimeName: "nvidia", - expected: "nvidia", - }, - { - setAsDefault: true, - runtimeName: "nvidia-experimental", - expected: "nvidia-experimental", - }, - } - - for i, tc := range testCases { - f := options{ - setAsDefault: tc.setAsDefault, - runtimeName: tc.runtimeName, - } - - require.Equal(t, tc.expected, f.getDefaultRuntime(), "%d: %v", i, tc) - } -} From 3bac4fad09fdc818237482f72d77cdc15c62512a Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Thu, 23 Feb 2023 14:49:54 +0200 Subject: [PATCH 6/9] Migrate cri-o config update to engine.Interface Signed-off-by: Evan Lezar --- cmd/nvidia-ctk/runtime/configure/configure.go | 15 ++- internal/config/engine/crio/crio.go | 116 +++++++++--------- internal/config/engine/crio/option.go | 73 +++++++++++ tools/container/crio/crio.go | 55 +++++++-- 4 files changed, 189 insertions(+), 70 deletions(-) create mode 100644 internal/config/engine/crio/option.go diff --git a/cmd/nvidia-ctk/runtime/configure/configure.go b/cmd/nvidia-ctk/runtime/configure/configure.go index 4df63e8d..21954b30 100644 --- a/cmd/nvidia-ctk/runtime/configure/configure.go +++ b/cmd/nvidia-ctk/runtime/configure/configure.go @@ -173,13 +173,14 @@ func (m command) configureCrio(c *cli.Context, config *config) error { configFilePath = defaultCrioConfigFilePath } - cfg, err := crio.LoadConfig(configFilePath) + cfg, err := crio.New( + crio.WithPath(configFilePath), + ) if err != nil { return fmt.Errorf("unable to load config: %v", err) } - err = crio.UpdateConfig( - cfg, + err = cfg.AddRuntime( config.nvidiaOptions.RuntimeName, config.nvidiaOptions.RuntimePath, config.nvidiaOptions.SetAsDefault, @@ -196,12 +197,16 @@ func (m command) configureCrio(c *cli.Context, config *config) error { os.Stdout.WriteString(fmt.Sprintf("%s\n", output)) return nil } - err = crio.FlushConfig(configFilePath, cfg) + n, err := cfg.Save(configFilePath) if err != nil { return fmt.Errorf("unable to flush config: %v", err) } - m.logger.Infof("Wrote updated config to %v", configFilePath) + if n == 0 { + m.logger.Infof("Removed empty config from %v", configFilePath) + } else { + m.logger.Infof("Wrote updated config to %v", configFilePath) + } m.logger.Infof("It is recommended that the cri-o daemon be restarted.") return nil diff --git a/internal/config/engine/crio/crio.go b/internal/config/engine/crio/crio.go index 89338a40..59b066a0 100644 --- a/internal/config/engine/crio/crio.go +++ b/internal/config/engine/crio/crio.go @@ -20,62 +20,71 @@ import ( "fmt" "os" + "github.com/NVIDIA/nvidia-container-toolkit/internal/config/engine" "github.com/pelletier/go-toml" - log "github.com/sirupsen/logrus" ) -// LoadConfig loads the cri-o config from disk -func LoadConfig(config string) (*toml.Tree, error) { - log.Infof("Loading config: %v", config) +// Config represents the cri-o config +type Config toml.Tree - info, err := os.Stat(config) - if os.IsExist(err) && info.IsDir() { - return nil, fmt.Errorf("config file is a directory") +// New creates a cri-o config with the specified options +func New(opts ...Option) (engine.Interface, error) { + b := &builder{} + for _, opt := range opts { + opt(b) } - configFile := config - if os.IsNotExist(err) { - configFile = "/dev/null" - log.Infof("Config file does not exist, creating new one") - } - - cfg, err := toml.LoadFile(configFile) - if err != nil { - return nil, err - } - - log.Infof("Successfully loaded config") - - return cfg, nil + return b.build() } -// UpdateConfig updates the cri-o config to include the NVIDIA Container Runtime -func UpdateConfig(config *toml.Tree, runtimeClass string, runtimePath string, setAsDefault bool) error { +// AddRuntime adds a new runtime to the crio config +func (c *Config) AddRuntime(name string, path string, setAsDefault bool) error { + if c == nil { + return fmt.Errorf("config is nil") + } + + config := (toml.Tree)(*c) + switch runc := config.Get("crio.runtime.runtimes.runc").(type) { case *toml.Tree: runc, _ = toml.Load(runc.String()) - config.SetPath([]string{"crio", "runtime", "runtimes", runtimeClass}, runc) + config.SetPath([]string{"crio", "runtime", "runtimes", name}, runc) } - config.SetPath([]string{"crio", "runtime", "runtimes", runtimeClass, "runtime_path"}, runtimePath) - config.SetPath([]string{"crio", "runtime", "runtimes", runtimeClass, "runtime_type"}, "oci") + config.SetPath([]string{"crio", "runtime", "runtimes", name, "runtime_path"}, path) + config.SetPath([]string{"crio", "runtime", "runtimes", name, "runtime_type"}, "oci") if setAsDefault { - config.SetPath([]string{"crio", "runtime", "default_runtime"}, runtimeClass) + config.SetPath([]string{"crio", "runtime", "default_runtime"}, name) } + *c = (Config)(config) return nil } -// RevertConfig reverts the cri-o config to remove the NVIDIA Container Runtime -func RevertConfig(config *toml.Tree, runtimeClass string) error { +// DefaultRuntime returns the default runtime for the cri-o config +func (c Config) DefaultRuntime() string { + config := (toml.Tree)(c) if runtime, ok := config.GetPath([]string{"crio", "runtime", "default_runtime"}).(string); ok { - if runtimeClass == runtime { + return runtime + } + return "" +} + +// RemoveRuntime removes a runtime from the cri-o config +func (c *Config) RemoveRuntime(name string) error { + if c == nil { + return nil + } + + config := (toml.Tree)(*c) + if runtime, ok := config.GetPath([]string{"crio", "runtime", "default_runtime"}).(string); ok { + if runtime == name { config.DeletePath([]string{"crio", "runtime", "default_runtime"}) } } - runtimeClassPath := []string{"crio", "runtime", "runtimes", runtimeClass} + runtimeClassPath := []string{"crio", "runtime", "runtimes", name} config.DeletePath(runtimeClassPath) for i := 0; i < len(runtimeClassPath); i++ { remainingPath := runtimeClassPath[:len(runtimeClassPath)-i] @@ -87,39 +96,36 @@ func RevertConfig(config *toml.Tree, runtimeClass string) error { } } + *c = (Config)(config) return nil } -// FlushConfig flushes the updated/reverted config out to disk -func FlushConfig(config string, cfg *toml.Tree) error { - log.Infof("Flushing config") - - output, err := cfg.ToTomlString() +// Save writes the config to the specified path +func (c Config) Save(path string) (int64, error) { + config := (toml.Tree)(c) + output, err := config.ToTomlString() if err != nil { - return fmt.Errorf("unable to convert to TOML: %v", err) + return 0, fmt.Errorf("unable to convert to TOML: %v", err) } - switch len(output) { - case 0: - err := os.Remove(config) + if len(output) == 0 { + err := os.Remove(path) if err != nil { - return fmt.Errorf("unable to remove empty file: %v", err) - } - log.Infof("Config empty, removing file") - default: - f, err := os.Create(config) - if err != nil { - return fmt.Errorf("unable to open '%v' for writing: %v", config, err) - } - defer f.Close() - - _, err = f.WriteString(output) - if err != nil { - return fmt.Errorf("unable to write output: %v", err) + return 0, fmt.Errorf("unable to remove empty file: %v", err) } + return 0, nil } - log.Infof("Successfully flushed config") + f, err := os.Create(path) + if err != nil { + return 0, fmt.Errorf("unable to open '%v' for writing: %v", path, err) + } + defer f.Close() - return nil + n, err := f.WriteString(output) + if err != nil { + return 0, fmt.Errorf("unable to write output: %v", err) + } + + return int64(n), err } diff --git a/internal/config/engine/crio/option.go b/internal/config/engine/crio/option.go new file mode 100644 index 00000000..af4e7ef7 --- /dev/null +++ b/internal/config/engine/crio/option.go @@ -0,0 +1,73 @@ +/** +# Copyright (c) NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package crio + +import ( + "fmt" + "os" + + "github.com/pelletier/go-toml" + log "github.com/sirupsen/logrus" +) + +type builder struct { + path string +} + +// Option defines a function that can be used to configure the config builder +type Option func(*builder) + +// WithPath sets the path for the config builder +func WithPath(path string) Option { + return func(b *builder) { + b.path = path + } +} + +func (b *builder) build() (*Config, error) { + if b.path == "" { + empty := toml.Tree{} + return (*Config)(&empty), nil + } + + return loadConfig(b.path) +} + +// loadConfig loads the cri-o config from disk +func loadConfig(config string) (*Config, error) { + log.Infof("Loading config: %v", config) + + info, err := os.Stat(config) + if os.IsExist(err) && info.IsDir() { + return nil, fmt.Errorf("config file is a directory") + } + + configFile := config + if os.IsNotExist(err) { + configFile = "/dev/null" + log.Infof("Config file does not exist, creating new one") + } + + cfg, err := toml.LoadFile(configFile) + if err != nil { + return nil, err + } + + log.Infof("Successfully loaded config") + + return (*Config)(cfg), nil +} diff --git a/tools/container/crio/crio.go b/tools/container/crio/crio.go index d415388c..233a26fc 100644 --- a/tools/container/crio/crio.go +++ b/tools/container/crio/crio.go @@ -24,8 +24,9 @@ import ( "path/filepath" "github.com/NVIDIA/nvidia-container-toolkit/internal/config" + "github.com/NVIDIA/nvidia-container-toolkit/internal/config/engine" "github.com/NVIDIA/nvidia-container-toolkit/internal/config/engine/crio" - "github.com/pelletier/go-toml" + "github.com/NVIDIA/nvidia-container-toolkit/tools/container/operator" log "github.com/sirupsen/logrus" cli "github.com/urfave/cli/v2" ) @@ -213,7 +214,9 @@ func setupHook(o *options) error { func setupConfig(o *options) error { log.Infof("Updating config file") - cfg, err := crio.LoadConfig(o.config) + cfg, err := crio.New( + crio.WithPath(o.config), + ) if err != nil { return fmt.Errorf("unable to load config: %v", err) } @@ -223,10 +226,14 @@ func setupConfig(o *options) error { return fmt.Errorf("unable to update config: %v", err) } - err = crio.FlushConfig(o.config, cfg) + log.Infof("Flushing cri-o config to %v", o.config) + n, err := cfg.Save(o.config) if err != nil { return fmt.Errorf("unable to flush config: %v", err) } + if n == 0 { + log.Infof("Config file is empty, removed") + } err = RestartCrio(o) if err != nil { @@ -267,7 +274,9 @@ func cleanupHook(o *options) error { func cleanupConfig(o *options) error { log.Infof("Reverting config file modifications") - cfg, err := crio.LoadConfig(o.config) + cfg, err := crio.New( + crio.WithPath(o.config), + ) if err != nil { return fmt.Errorf("unable to load config: %v", err) } @@ -277,10 +286,14 @@ func cleanupConfig(o *options) error { return fmt.Errorf("unable to update config: %v", err) } - err = crio.FlushConfig(o.config, cfg) + log.Infof("Flushing cri-o config to %v", o.config) + n, err := cfg.Save(o.config) if err != nil { return fmt.Errorf("unable to flush config: %v", err) } + if n == 0 { + log.Infof("Config file is empty, removed") + } err = RestartCrio(o) if err != nil { @@ -345,14 +358,36 @@ func generateOciHook(toolkitDir string) podmanHook { } // UpdateConfig updates the cri-o config to include the NVIDIA Container Runtime -func UpdateConfig(config *toml.Tree, o *options) error { - runtimePath := filepath.Join(o.runtimeDir, "nvidia-container-runtime") - return crio.UpdateConfig(config, o.runtimeClass, runtimePath, o.setAsDefault) +func UpdateConfig(cfg engine.Interface, o *options) error { + runtimes := operator.GetRuntimes( + operator.WithNvidiaRuntimeName(o.runtimeClass), + operator.WithSetAsDefault(o.setAsDefault), + operator.WithRoot(o.runtimeDir), + ) + for class, runtime := range runtimes { + err := cfg.AddRuntime(class, runtime.Path, runtime.SetAsDefault) + if err != nil { + return fmt.Errorf("unable to update config for runtime class '%v': %v", class, err) + } + } + + return nil } // RevertConfig reverts the cri-o config to remove the NVIDIA Container Runtime -func RevertConfig(config *toml.Tree, o *options) error { - return crio.RevertConfig(config, o.runtimeClass) +func RevertConfig(cfg engine.Interface, o *options) error { + runtimes := operator.GetRuntimes( + operator.WithNvidiaRuntimeName(o.runtimeClass), + operator.WithSetAsDefault(o.setAsDefault), + operator.WithRoot(o.runtimeDir), + ) + for class := range runtimes { + err := cfg.RemoveRuntime(class) + if err != nil { + return fmt.Errorf("unable to revert config for runtime class '%v': %v", class, err) + } + } + return nil } // RestartCrio restarts crio depending on the value of restartModeFlag From dca8e3123fe82dd801e31c3338ffa16c5fe25a3e Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Thu, 23 Feb 2023 22:01:24 +0200 Subject: [PATCH 7/9] Migrate containerd config to engine.Interface Signed-off-by: Evan Lezar --- .../config/engine/containerd/config_v1.go | 130 ++++++++++ .../config/engine/containerd/config_v2.go | 125 +++++++++ .../config/engine/containerd/containerd.go | 39 +++ internal/config/engine/containerd/option.go | 140 ++++++++++ tools/container/containerd/config.go | 114 --------- tools/container/containerd/config_v1.go | 134 ---------- tools/container/containerd/config_v1_test.go | 226 ++++++++++------- tools/container/containerd/config_v2.go | 58 ----- tools/container/containerd/config_v2_test.go | 88 ++++--- tools/container/containerd/containerd.go | 240 ++++-------------- 10 files changed, 666 insertions(+), 628 deletions(-) create mode 100644 internal/config/engine/containerd/config_v1.go create mode 100644 internal/config/engine/containerd/config_v2.go create mode 100644 internal/config/engine/containerd/containerd.go create mode 100644 internal/config/engine/containerd/option.go delete mode 100644 tools/container/containerd/config.go delete mode 100644 tools/container/containerd/config_v1.go delete mode 100644 tools/container/containerd/config_v2.go diff --git a/internal/config/engine/containerd/config_v1.go b/internal/config/engine/containerd/config_v1.go new file mode 100644 index 00000000..eb524fd8 --- /dev/null +++ b/internal/config/engine/containerd/config_v1.go @@ -0,0 +1,130 @@ +/** +# Copyright (c) NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package containerd + +import ( + "fmt" + + "github.com/NVIDIA/nvidia-container-toolkit/internal/config/engine" + "github.com/pelletier/go-toml" +) + +// ConfigV1 represents a version 1 containerd config +type ConfigV1 Config + +var _ engine.Interface = (*ConfigV1)(nil) + +// AddRuntime adds a runtime to the containerd config +func (c *ConfigV1) AddRuntime(name string, path string, setAsDefault bool) error { + if c == nil || c.Tree == nil { + return fmt.Errorf("config is nil") + } + + config := *c.Tree + + config.Set("version", int64(1)) + + switch runc := config.GetPath([]string{"plugins", "cri", "containerd", "runtimes", "runc"}).(type) { + case *toml.Tree: + runc, _ = toml.Load(runc.String()) + config.SetPath([]string{"plugins", "cri", "containerd", "runtimes", name}, runc) + } + + if config.GetPath([]string{"plugins", "cri", "containerd", "runtimes", name}) == nil { + config.SetPath([]string{"plugins", "cri", "containerd", "runtimes", name, "runtime_type"}, c.RuntimeType) + config.SetPath([]string{"plugins", "cri", "containerd", "runtimes", name, "runtime_root"}, "") + config.SetPath([]string{"plugins", "cri", "containerd", "runtimes", name, "runtime_engine"}, "") + config.SetPath([]string{"plugins", "cri", "containerd", "runtimes", name, "privileged_without_host_devices"}, false) + } + config.SetPath([]string{"plugins", "cri", "containerd", "runtimes", name, "options", "BinaryName"}, path) + config.SetPath([]string{"plugins", "cri", "containerd", "runtimes", name, "options", "Runtime"}, path) + + if setAsDefault && c.UseDefaultRuntimeName { + config.SetPath([]string{"plugins", "cri", "containerd", "default_runtime_name"}, name) + } else if setAsDefault { + // Note: This is deprecated in containerd 1.4.0 and will be removed in 1.5.0 + if config.GetPath([]string{"plugins", "cri", "containerd", "default_runtime"}) == nil { + config.SetPath([]string{"plugins", "cri", "containerd", "default_runtime", "runtime_type"}, c.RuntimeType) + config.SetPath([]string{"plugins", "cri", "containerd", "default_runtime", "runtime_root"}, "") + config.SetPath([]string{"plugins", "cri", "containerd", "default_runtime", "runtime_engine"}, "") + config.SetPath([]string{"plugins", "cri", "containerd", "default_runtime", "privileged_without_host_devices"}, false) + } + config.SetPath([]string{"plugins", "cri", "containerd", "default_runtime", "options", "BinaryName"}, path) + config.SetPath([]string{"plugins", "cri", "containerd", "default_runtime", "options", "Runtime"}, path) + } + + *c.Tree = config + return nil +} + +// DefaultRuntime returns the default runtime for the cri-o config +func (c ConfigV1) DefaultRuntime() string { + if runtime, ok := c.GetPath([]string{"plugins", "cri", "containerd", "default_runtime_name"}).(string); ok { + return runtime + } + return "" +} + +// RemoveRuntime removes a runtime from the docker config +func (c *ConfigV1) RemoveRuntime(name string) error { + if c == nil || c.Tree == nil { + return nil + } + + config := *c.Tree + + // If the specified runtime was set as the default runtime we need to remove the default runtime too. + runtimePath, ok := config.GetPath([]string{"plugins", "cri", "containerd", "runtimes", name, "options", "BinaryName"}).(string) + if !ok || runtimePath == "" { + runtimePath, _ = config.GetPath([]string{"plugins", "cri", "containerd", "runtimes", name, "options", "Runtime"}).(string) + } + defaultRuntimePath, ok := config.GetPath([]string{"plugins", "cri", "containerd", "default_runtime", "options", "BinaryName"}).(string) + if !ok || defaultRuntimePath == "" { + defaultRuntimePath, _ = config.GetPath([]string{"plugins", "cri", "containerd", "default_runtime", "options", "Runtime"}).(string) + } + if runtimePath != "" && defaultRuntimePath != "" && runtimePath == defaultRuntimePath { + config.DeletePath([]string{"plugins", "cri", "containerd", "default_runtime"}) + } + + config.DeletePath([]string{"plugins", "cri", "containerd", "runtimes", name}) + if runtime, ok := config.GetPath([]string{"plugins", "cri", "containerd", "default_runtime_name"}).(string); ok { + if runtime == name { + config.DeletePath([]string{"plugins", "cri", "containerd", "default_runtime_name"}) + } + } + + runtimeConfigPath := []string{"plugins", "cri", "containerd", "runtimes", name} + for i := 0; i < len(runtimeConfigPath); i++ { + if runtimes, ok := config.GetPath(runtimeConfigPath[:len(runtimeConfigPath)-i]).(*toml.Tree); ok { + if len(runtimes.Keys()) == 0 { + config.DeletePath(runtimeConfigPath[:len(runtimeConfigPath)-i]) + } + } + } + + if len(config.Keys()) == 1 && config.Keys()[0] == "version" { + config.Delete("version") + } + + *c.Tree = config + return nil +} + +// Save wrotes the config to a file +func (c ConfigV1) Save(path string) (int64, error) { + return (Config)(c).Save(path) +} diff --git a/internal/config/engine/containerd/config_v2.go b/internal/config/engine/containerd/config_v2.go new file mode 100644 index 00000000..786b8b4a --- /dev/null +++ b/internal/config/engine/containerd/config_v2.go @@ -0,0 +1,125 @@ +/** +# Copyright (c) NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package containerd + +import ( + "fmt" + "os" + + "github.com/pelletier/go-toml" +) + +// AddRuntime adds a runtime to the containerd config +func (c *Config) AddRuntime(name string, path string, setAsDefault bool) error { + if c == nil || c.Tree == nil { + return fmt.Errorf("config is nil") + } + config := *c.Tree + + config.Set("version", int64(2)) + + switch runc := config.GetPath([]string{"plugins", "io.containerd.grpc.v1.cri", "containerd", "runtimes", "runc"}).(type) { + case *toml.Tree: + runc, _ = toml.Load(runc.String()) + config.SetPath([]string{"plugins", "io.containerd.grpc.v1.cri", "containerd", "runtimes", name}, runc) + } + + if config.GetPath([]string{"plugins", "io.containerd.grpc.v1.cri", "containerd", "runtimes", name}) == nil { + config.SetPath([]string{"plugins", "io.containerd.grpc.v1.cri", "containerd", "runtimes", name, "runtime_type"}, c.RuntimeType) + config.SetPath([]string{"plugins", "io.containerd.grpc.v1.cri", "containerd", "runtimes", name, "runtime_root"}, "") + config.SetPath([]string{"plugins", "io.containerd.grpc.v1.cri", "containerd", "runtimes", name, "runtime_engine"}, "") + config.SetPath([]string{"plugins", "io.containerd.grpc.v1.cri", "containerd", "runtimes", name, "privileged_without_host_devices"}, false) + } + config.SetPath([]string{"plugins", "io.containerd.grpc.v1.cri", "containerd", "runtimes", name, "options", "BinaryName"}, path) + + if setAsDefault { + config.SetPath([]string{"plugins", "io.containerd.grpc.v1.cri", "containerd", "default_runtime_name"}, name) + } + + *c.Tree = config + return nil +} + +// DefaultRuntime returns the default runtime for the cri-o config +func (c Config) DefaultRuntime() string { + if runtime, ok := c.GetPath([]string{"plugins", "io.containerd.grpc.v1.cri", "containerd", "default_runtime_name"}).(string); ok { + return runtime + } + return "" +} + +// RemoveRuntime removes a runtime from the docker config +func (c *Config) RemoveRuntime(name string) error { + if c == nil || c.Tree == nil { + return nil + } + + config := *c.Tree + + config.DeletePath([]string{"plugins", "io.containerd.grpc.v1.cri", "containerd", "runtimes", name}) + if runtime, ok := config.GetPath([]string{"plugins", "io.containerd.grpc.v1.cri", "containerd", "default_runtime_name"}).(string); ok { + if runtime == name { + config.DeletePath([]string{"plugins", "io.containerd.grpc.v1.cri", "containerd", "default_runtime_name"}) + } + } + + runtimePath := []string{"plugins", "io.containerd.grpc.v1.cri", "containerd", "runtimes", name} + for i := 0; i < len(runtimePath); i++ { + if runtimes, ok := config.GetPath(runtimePath[:len(runtimePath)-i]).(*toml.Tree); ok { + if len(runtimes.Keys()) == 0 { + config.DeletePath(runtimePath[:len(runtimePath)-i]) + } + } + } + + if len(config.Keys()) == 1 && config.Keys()[0] == "version" { + config.Delete("version") + } + + *c.Tree = config + return nil +} + +// Save writes the config to the specified path +func (c Config) Save(path string) (int64, error) { + config := c.Tree + output, err := config.ToTomlString() + if err != nil { + return 0, fmt.Errorf("unable to convert to TOML: %v", err) + } + + if len(output) == 0 { + err := os.Remove(path) + if err != nil { + return 0, fmt.Errorf("unable to remove empty file: %v", err) + } + return 0, nil + } + + f, err := os.Create(path) + if err != nil { + return 0, fmt.Errorf("unable to open '%v' for writing: %v", path, err) + } + defer f.Close() + + n, err := f.WriteString(output) + if err != nil { + return 0, fmt.Errorf("unable to write output: %v", err) + } + + return int64(n), err +} diff --git a/internal/config/engine/containerd/containerd.go b/internal/config/engine/containerd/containerd.go new file mode 100644 index 00000000..3ea1c65e --- /dev/null +++ b/internal/config/engine/containerd/containerd.go @@ -0,0 +1,39 @@ +/** +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package containerd + +import ( + "github.com/NVIDIA/nvidia-container-toolkit/internal/config/engine" + "github.com/pelletier/go-toml" +) + +// Config represents the containerd config +type Config struct { + *toml.Tree + RuntimeType string + UseDefaultRuntimeName bool +} + +// New creates a containerd config with the specified options +func New(opts ...Option) (engine.Interface, error) { + b := &builder{} + for _, opt := range opts { + opt(b) + } + + return b.build() +} diff --git a/internal/config/engine/containerd/option.go b/internal/config/engine/containerd/option.go new file mode 100644 index 00000000..1c0497df --- /dev/null +++ b/internal/config/engine/containerd/option.go @@ -0,0 +1,140 @@ +/** +# Copyright (c) NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package containerd + +import ( + "fmt" + "os" + + "github.com/NVIDIA/nvidia-container-toolkit/internal/config/engine" + "github.com/pelletier/go-toml" + log "github.com/sirupsen/logrus" +) + +const ( + defaultRuntimeType = "io.containerd.runc.v2" +) + +type builder struct { + path string + runtimeType string + useLegacyConfig bool +} + +// Option defines a function that can be used to configure the config builder +type Option func(*builder) + +// WithPath sets the path for the config builder +func WithPath(path string) Option { + return func(b *builder) { + b.path = path + } +} + +// WithRuntimeType sets the runtime type for the config builder +func WithRuntimeType(runtimeType string) Option { + return func(b *builder) { + b.runtimeType = runtimeType + } +} + +// WithUseLegacyConfig sets the useLegacyConfig flag for the config builder +func WithUseLegacyConfig(useLegacyConfig bool) Option { + return func(b *builder) { + b.useLegacyConfig = useLegacyConfig + } +} + +func (b *builder) build() (engine.Interface, error) { + if b.path == "" { + return nil, fmt.Errorf("config path is empty") + } + + if b.runtimeType == "" { + b.runtimeType = defaultRuntimeType + } + + config, err := loadConfig(b.path) + if err != nil { + return nil, fmt.Errorf("failed to load config: %v", err) + } + config.RuntimeType = b.runtimeType + config.UseDefaultRuntimeName = !b.useLegacyConfig + + version, err := config.parseVersion(b.useLegacyConfig) + if err != nil { + return nil, fmt.Errorf("failed to parse config version: %v", err) + } + switch version { + case 1: + return (*ConfigV1)(config), nil + case 2: + return config, nil + } + + return nil, fmt.Errorf("unsupported config version: %v", version) +} + +// loadConfig loads the containerd config from disk +func loadConfig(config string) (*Config, error) { + log.Infof("Loading config: %v", config) + + info, err := os.Stat(config) + if os.IsExist(err) && info.IsDir() { + return nil, fmt.Errorf("config file is a directory") + } + + configFile := config + if os.IsNotExist(err) { + configFile = "/dev/null" + log.Infof("Config file does not exist, creating new one") + } + + tomlConfig, err := toml.LoadFile(configFile) + if err != nil { + return nil, err + } + + log.Infof("Successfully loaded config") + + cfg := Config{ + Tree: tomlConfig, + } + return &cfg, nil +} + +// parseVersion returns the version of the config +func (c *Config) parseVersion(useLegacyConfig bool) (int, error) { + defaultVersion := 2 + if useLegacyConfig { + defaultVersion = 1 + } + + switch v := c.Get("version").(type) { + case nil: + switch len(c.Keys()) { + case 0: // No config exists, or the config file is empty, use version inferred from containerd + return defaultVersion, nil + default: // A config file exists, has content, and no version is set + return 1, nil + } + case int64: + return int(v), nil + default: + return -1, fmt.Errorf("unsupported type for version field: %v", v) + } +} diff --git a/tools/container/containerd/config.go b/tools/container/containerd/config.go deleted file mode 100644 index 2e61be5d..00000000 --- a/tools/container/containerd/config.go +++ /dev/null @@ -1,114 +0,0 @@ -/** -# Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -*/ - -package main - -import ( - "github.com/pelletier/go-toml" -) - -// UpdateReverter defines the interface for applying and reverting configurations -type UpdateReverter interface { - Update(o *options) error - Revert(o *options) error -} - -type config struct { - *toml.Tree - version int64 - cri string -} - -// update adds the specified runtime class to the the containerd config. -// if set-as default is specified, the runtime class is also set as the -// default runtime. -func (config *config) update(runtimeClass string, runtimeType string, runtimeBinary string, setAsDefault bool) { - config.Set("version", config.version) - - runcPath := config.runcPath() - runtimeClassPath := config.runtimeClassPath(runtimeClass) - - switch runc := config.GetPath(runcPath).(type) { - case *toml.Tree: - runc, _ = toml.Load(runc.String()) - config.SetPath(runtimeClassPath, runc) - } - - config.initRuntime(runtimeClassPath, runtimeType, "BinaryName", runtimeBinary) - if config.version == 1 { - config.initRuntime(runtimeClassPath, runtimeType, "Runtime", runtimeBinary) - } - - if setAsDefault { - defaultRuntimeNamePath := config.defaultRuntimeNamePath() - config.SetPath(defaultRuntimeNamePath, runtimeClass) - } -} - -// revert removes the configuration applied in an update call. -func (config *config) revert(runtimeClass string) { - runtimeClassPath := config.runtimeClassPath(runtimeClass) - defaultRuntimeNamePath := config.defaultRuntimeNamePath() - - config.DeletePath(runtimeClassPath) - if runtime, ok := config.GetPath(defaultRuntimeNamePath).(string); ok { - if runtimeClass == runtime { - config.DeletePath(defaultRuntimeNamePath) - } - } - - for i := 0; i < len(runtimeClassPath); i++ { - if runtimes, ok := config.GetPath(runtimeClassPath[:len(runtimeClassPath)-i]).(*toml.Tree); ok { - if len(runtimes.Keys()) == 0 { - config.DeletePath(runtimeClassPath[:len(runtimeClassPath)-i]) - } - } - } - - if len(config.Keys()) == 1 && config.Keys()[0] == "version" { - config.Delete("version") - } -} - -// initRuntime creates a runtime config if it does not exist and ensures that the -// runtimes binary path is specified. -func (config *config) initRuntime(path []string, runtimeType string, binaryKey string, binary string) { - if config.GetPath(path) == nil { - config.SetPath(append(path, "runtime_type"), runtimeType) - config.SetPath(append(path, "runtime_root"), "") - config.SetPath(append(path, "runtime_engine"), "") - config.SetPath(append(path, "privileged_without_host_devices"), false) - } - - binaryPath := append(path, "options", binaryKey) - config.SetPath(binaryPath, binary) -} - -func (config config) runcPath() []string { - return config.runtimeClassPath("runc") -} - -func (config config) runtimeClassPath(runtimeClass string) []string { - return append(config.containerdPath(), "runtimes", runtimeClass) -} - -func (config config) defaultRuntimeNamePath() []string { - return append(config.containerdPath(), "default_runtime_name") -} - -func (config config) containerdPath() []string { - return []string{"plugins", config.cri, "containerd"} -} diff --git a/tools/container/containerd/config_v1.go b/tools/container/containerd/config_v1.go deleted file mode 100644 index e744cd7f..00000000 --- a/tools/container/containerd/config_v1.go +++ /dev/null @@ -1,134 +0,0 @@ -/** -# Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -*/ - -package main - -import ( - "path" - - "github.com/pelletier/go-toml" - log "github.com/sirupsen/logrus" -) - -// configV1 represents a V1 containerd config -type configV1 struct { - config -} - -func newConfigV1(cfg *toml.Tree) UpdateReverter { - c := configV1{ - config: config{ - Tree: cfg, - version: 1, - cri: "cri", - }, - } - - return &c -} - -// Update performs an update specific to v1 of the containerd config -func (config *configV1) Update(o *options) error { - - // For v1 config, the `default_runtime_name` setting is only supported - // for containerd version at least v1.3 - supportsDefaultRuntimeName := !o.useLegacyConfig - - defaultRuntime := o.getDefaultRuntime() - - for runtimeClass, runtimeBinary := range o.getRuntimeBinaries() { - isDefaultRuntime := runtimeClass == defaultRuntime - config.update(runtimeClass, o.runtimeType, runtimeBinary, isDefaultRuntime && supportsDefaultRuntimeName) - - if !isDefaultRuntime { - continue - } - - if supportsDefaultRuntimeName { - defaultRuntimePath := append(config.containerdPath(), "default_runtime") - if config.GetPath(defaultRuntimePath) != nil { - log.Warnf("The setting of default_runtime (%v) in containerd is deprecated", defaultRuntimePath) - } - continue - } - - log.Warnf("Setting default_runtime is deprecated") - defaultRuntimePath := append(config.containerdPath(), "default_runtime") - config.initRuntime(defaultRuntimePath, o.runtimeType, "Runtime", runtimeBinary) - config.initRuntime(defaultRuntimePath, o.runtimeType, "BinaryName", runtimeBinary) - } - return nil -} - -// Revert performs a revert specific to v1 of the containerd config -func (config *configV1) Revert(o *options) error { - defaultRuntimePath := append(config.containerdPath(), "default_runtime") - defaultRuntimeOptionsPath := append(defaultRuntimePath, "options") - if runtime, ok := config.GetPath(append(defaultRuntimeOptionsPath, "Runtime")).(string); ok { - for _, runtimeBinary := range o.getRuntimeBinaries() { - if path.Base(runtimeBinary) == path.Base(runtime) { - config.DeletePath(append(defaultRuntimeOptionsPath, "Runtime")) - break - } - } - } - if runtime, ok := config.GetPath(append(defaultRuntimeOptionsPath, "BinaryName")).(string); ok { - for _, runtimeBinary := range o.getRuntimeBinaries() { - if path.Base(runtimeBinary) == path.Base(runtime) { - config.DeletePath(append(defaultRuntimeOptionsPath, "BinaryName")) - break - } - } - } - - if options, ok := config.GetPath(defaultRuntimeOptionsPath).(*toml.Tree); ok { - if len(options.Keys()) == 0 { - config.DeletePath(defaultRuntimeOptionsPath) - } - } - - if runtime, ok := config.GetPath(defaultRuntimePath).(*toml.Tree); ok { - fields := []string{"runtime_type", "runtime_root", "runtime_engine", "privileged_without_host_devices"} - if len(runtime.Keys()) <= len(fields) { - matches := []string{} - for _, f := range fields { - e := runtime.Get(f) - if e != nil { - matches = append(matches, f) - } - } - if len(matches) == len(runtime.Keys()) { - for _, m := range matches { - runtime.Delete(m) - } - } - } - } - - for i := 0; i < len(defaultRuntimePath); i++ { - if runtimes, ok := config.GetPath(defaultRuntimePath[:len(defaultRuntimePath)-i]).(*toml.Tree); ok { - if len(runtimes.Keys()) == 0 { - config.DeletePath(defaultRuntimePath[:len(defaultRuntimePath)-i]) - } - } - } - - for runtimeClass := range nvidiaRuntimeBinaries { - config.revert(runtimeClass) - } - - return nil -} diff --git a/tools/container/containerd/config_v1_test.go b/tools/container/containerd/config_v1_test.go index 6c51467b..dd8bf54c 100644 --- a/tools/container/containerd/config_v1_test.go +++ b/tools/container/containerd/config_v1_test.go @@ -17,8 +17,10 @@ package main import ( + "fmt" "testing" + "github.com/NVIDIA/nvidia-container-toolkit/internal/config/engine/containerd" "github.com/pelletier/go-toml" "github.com/stretchr/testify/require" ) @@ -89,36 +91,46 @@ func TestUpdateV1ConfigDefaultRuntime(t *testing.T) { } for i, tc := range testCases { - o := &options{ - useLegacyConfig: tc.legacyConfig, - setAsDefault: tc.setAsDefault, - runtimeClass: tc.runtimeClass, - runtimeType: runtimeType, - runtimeDir: runtimeDir, - } + t.Run(fmt.Sprintf("%d", i), func(t *testing.T) { + o := &options{ + useLegacyConfig: tc.legacyConfig, + setAsDefault: tc.setAsDefault, + runtimeClass: tc.runtimeClass, + runtimeType: runtimeType, + runtimeDir: runtimeDir, + } - config, err := toml.TreeFromMap(map[string]interface{}{}) - require.NoError(t, err, "%d: %v", i, tc) - - err = UpdateV1Config(config, o) - require.NoError(t, err, "%d: %v", i, tc) - - defaultRuntimeName := config.GetPath([]string{"plugins", "cri", "containerd", "default_runtime_name"}) - require.EqualValues(t, tc.expectedDefaultRuntimeName, defaultRuntimeName, "%d: %v", i, tc) - - defaultRuntime := config.GetPath([]string{"plugins", "cri", "containerd", "default_runtime"}) - if tc.expectedDefaultRuntimeBinary == nil { - require.Nil(t, defaultRuntime, "%d: %v", i, tc) - } else { - expected, err := defaultRuntimeTomlConfigV1(tc.expectedDefaultRuntimeBinary.(string)) + config, err := toml.TreeFromMap(map[string]interface{}{}) require.NoError(t, err, "%d: %v", i, tc) - configContents, _ := toml.Marshal(defaultRuntime.(*toml.Tree)) - expectedContents, _ := toml.Marshal(expected) + v1 := &containerd.ConfigV1{ + Tree: config, + UseDefaultRuntimeName: !tc.legacyConfig, + RuntimeType: runtimeType, + } - require.Equal(t, string(expectedContents), string(configContents), "%d: %v: %v", i, tc) - } + err = UpdateConfig(v1, o) + require.NoError(t, err, "%d: %v", i, tc) + defaultRuntimeName := v1.GetPath([]string{"plugins", "cri", "containerd", "default_runtime_name"}) + require.EqualValues(t, tc.expectedDefaultRuntimeName, defaultRuntimeName, "%d: %v", i, tc) + + defaultRuntime := v1.GetPath([]string{"plugins", "cri", "containerd", "default_runtime"}) + if tc.expectedDefaultRuntimeBinary == nil { + require.Nil(t, defaultRuntime, "%d: %v", i, tc) + } else { + require.NotNil(t, defaultRuntime) + + expected, err := defaultRuntimeTomlConfigV1(tc.expectedDefaultRuntimeBinary.(string)) + require.NoError(t, err, "%d: %v", i, tc) + + configContents, _ := toml.Marshal(defaultRuntime.(*toml.Tree)) + expectedContents, _ := toml.Marshal(expected) + + require.Equal(t, string(expectedContents), string(configContents), "%d: %v: %v", i, tc) + } + + }) } } @@ -150,40 +162,48 @@ func TestUpdateV1Config(t *testing.T) { } for i, tc := range testCases { - o := &options{ - runtimeClass: tc.runtimeClass, - runtimeType: runtimeType, - runtimeDir: runtimeDir, - } + t.Run(fmt.Sprintf("%d", i), func(t *testing.T) { + o := &options{ + runtimeClass: tc.runtimeClass, + runtimeType: runtimeType, + runtimeDir: runtimeDir, + } - config, err := toml.TreeFromMap(map[string]interface{}{}) - require.NoError(t, err, "%d: %v", i, tc) - - err = UpdateV1Config(config, o) - require.NoError(t, err, "%d: %v", i, tc) - - version, ok := config.Get("version").(int64) - require.True(t, ok) - require.EqualValues(t, expectedVersion, version) - - runtimes, ok := config.GetPath([]string{"plugins", "cri", "containerd", "runtimes"}).(*toml.Tree) - require.True(t, ok) - - runtimeClasses := runtimes.Keys() - require.ElementsMatch(t, tc.expectedRuntimes, runtimeClasses, "%d: %v", i, tc) - - for i, r := range tc.expectedRuntimes { - runtimeConfig := runtimes.Get(r) - - expected, err := runtimeTomlConfigV1(expectedBinaries[i]) + config, err := toml.TreeFromMap(map[string]interface{}{}) require.NoError(t, err, "%d: %v", i, tc) - configContents, _ := toml.Marshal(runtimeConfig) - expectedContents, _ := toml.Marshal(expected) + v1 := &containerd.ConfigV1{ + Tree: config, + UseDefaultRuntimeName: true, + RuntimeType: runtimeType, + } - require.Equal(t, string(expectedContents), string(configContents), "%d: %v: %v", i, r, tc) + err = UpdateConfig(v1, o) + require.NoError(t, err, "%d: %v", i, tc) - } + version, ok := config.Get("version").(int64) + require.True(t, ok) + require.EqualValues(t, expectedVersion, version) + + runtimes, ok := config.GetPath([]string{"plugins", "cri", "containerd", "runtimes"}).(*toml.Tree) + require.True(t, ok) + + runtimeClasses := runtimes.Keys() + require.ElementsMatch(t, tc.expectedRuntimes, runtimeClasses, "%d: %v", i, tc) + + for i, r := range tc.expectedRuntimes { + runtimeConfig := runtimes.Get(r) + + expected, err := runtimeTomlConfigV1(expectedBinaries[i]) + require.NoError(t, err, "%d: %v", i, tc) + + configContents, _ := toml.Marshal(runtimeConfig) + expectedContents, _ := toml.Marshal(expected) + + require.Equal(t, string(expectedContents), string(configContents), "%d: %v: %v", i, r, tc) + + } + }) } } @@ -217,40 +237,48 @@ func TestUpdateV1ConfigWithRuncPresent(t *testing.T) { } for i, tc := range testCases { - o := &options{ - runtimeClass: tc.runtimeClass, - runtimeType: runtimeType, - runtimeDir: runtimeDir, - } + t.Run(fmt.Sprintf("%d", i), func(t *testing.T) { + o := &options{ + runtimeClass: tc.runtimeClass, + runtimeType: runtimeType, + runtimeDir: runtimeDir, + } - config, err := toml.TreeFromMap(runcConfigMapV1("/runc-binary")) - require.NoError(t, err, "%d: %v", i, tc) - - err = UpdateV1Config(config, o) - require.NoError(t, err, "%d: %v", i, tc) - - version, ok := config.Get("version").(int64) - require.True(t, ok) - require.EqualValues(t, expectedVersion, version) - - runtimes, ok := config.GetPath([]string{"plugins", "cri", "containerd", "runtimes"}).(*toml.Tree) - require.True(t, ok) - - runtimeClasses := runtimes.Keys() - require.ElementsMatch(t, tc.expectedRuntimes, runtimeClasses, "%d: %v", i, tc) - - for i, r := range tc.expectedRuntimes { - runtimeConfig := runtimes.Get(r) - - expected, err := toml.TreeFromMap(runcRuntimeConfigMapV1(expectedBinaries[i])) + config, err := toml.TreeFromMap(runcConfigMapV1("/runc-binary")) require.NoError(t, err, "%d: %v", i, tc) - configContents, _ := toml.Marshal(runtimeConfig) - expectedContents, _ := toml.Marshal(expected) + v1 := &containerd.ConfigV1{ + Tree: config, + UseDefaultRuntimeName: true, + RuntimeType: runtimeType, + } - require.Equal(t, string(expectedContents), string(configContents), "%d: %v: %v", i, r, tc) + err = UpdateConfig(v1, o) + require.NoError(t, err, "%d: %v", i, tc) - } + version, ok := v1.Get("version").(int64) + require.True(t, ok) + require.EqualValues(t, expectedVersion, version) + + runtimes, ok := v1.GetPath([]string{"plugins", "cri", "containerd", "runtimes"}).(*toml.Tree) + require.True(t, ok) + + runtimeClasses := runtimes.Keys() + require.ElementsMatch(t, tc.expectedRuntimes, runtimeClasses, "%d: %v", i, tc) + + for i, r := range tc.expectedRuntimes { + runtimeConfig := runtimes.Get(r) + + expected, err := toml.TreeFromMap(runcRuntimeConfigMapV1(expectedBinaries[i])) + require.NoError(t, err, "%d: %v", i, tc) + + configContents, _ := toml.Marshal(runtimeConfig) + expectedContents, _ := toml.Marshal(expected) + + require.Equal(t, string(expectedContents), string(configContents), "%d: %v: %v", i, r, tc) + + } + }) } } @@ -301,23 +329,31 @@ func TestRevertV1Config(t *testing.T) { } for i, tc := range testCases { - o := &options{ - runtimeClass: "nvidia", - } + t.Run(fmt.Sprintf("%d", i), func(t *testing.T) { + o := &options{ + runtimeClass: "nvidia", + } - config, err := toml.TreeFromMap(tc.config) - require.NoError(t, err, "%d: %v", i, tc) + config, err := toml.TreeFromMap(tc.config) + require.NoError(t, err, "%d: %v", i, tc) - expected, err := toml.TreeFromMap(tc.expected) - require.NoError(t, err, "%d: %v", i, tc) + expected, err := toml.TreeFromMap(tc.expected) + require.NoError(t, err, "%d: %v", i, tc) - err = RevertV1Config(config, o) - require.NoError(t, err, "%d: %v", i, tc) + v1 := &containerd.ConfigV1{ + Tree: config, + UseDefaultRuntimeName: true, + RuntimeType: runtimeType, + } - configContents, _ := toml.Marshal(config) - expectedContents, _ := toml.Marshal(expected) + err = RevertConfig(v1, o) + require.NoError(t, err, "%d: %v", i, tc) - require.Equal(t, string(expectedContents), string(configContents), "%d: %v", i, tc) + configContents, _ := toml.Marshal(config) + expectedContents, _ := toml.Marshal(expected) + + require.Equal(t, string(expectedContents), string(configContents), "%d: %v", i, tc) + }) } } diff --git a/tools/container/containerd/config_v2.go b/tools/container/containerd/config_v2.go deleted file mode 100644 index fac96b0f..00000000 --- a/tools/container/containerd/config_v2.go +++ /dev/null @@ -1,58 +0,0 @@ -/** -# Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -*/ - -package main - -import ( - "github.com/pelletier/go-toml" -) - -// configV2 represents a V2 containerd config -type configV2 struct { - config -} - -func newConfigV2(cfg *toml.Tree) UpdateReverter { - c := configV2{ - config: config{ - Tree: cfg, - version: 2, - cri: "io.containerd.grpc.v1.cri", - }, - } - - return &c -} - -// Update performs an update specific to v2 of the containerd config -func (config *configV2) Update(o *options) error { - defaultRuntime := o.getDefaultRuntime() - for runtimeClass, runtimeBinary := range o.getRuntimeBinaries() { - setAsDefault := defaultRuntime == runtimeClass - config.update(runtimeClass, o.runtimeType, runtimeBinary, setAsDefault) - } - - return nil -} - -// Revert performs a revert specific to v2 of the containerd config -func (config *configV2) Revert(o *options) error { - for runtimeClass := range o.getRuntimeBinaries() { - config.revert(runtimeClass) - } - - return nil -} diff --git a/tools/container/containerd/config_v2_test.go b/tools/container/containerd/config_v2_test.go index 68798446..6df8e69c 100644 --- a/tools/container/containerd/config_v2_test.go +++ b/tools/container/containerd/config_v2_test.go @@ -17,8 +17,10 @@ package main import ( + "fmt" "testing" + "github.com/NVIDIA/nvidia-container-toolkit/internal/config/engine/containerd" "github.com/pelletier/go-toml" "github.com/stretchr/testify/require" ) @@ -78,7 +80,12 @@ func TestUpdateV2ConfigDefaultRuntime(t *testing.T) { config, err := toml.TreeFromMap(map[string]interface{}{}) require.NoError(t, err, "%d: %v", i, tc) - err = UpdateV2Config(config, o) + v2 := &containerd.Config{ + Tree: config, + RuntimeType: runtimeType, + } + + err = UpdateConfig(v2, o) require.NoError(t, err, "%d: %v", i, tc) defaultRuntimeName := config.GetPath([]string{"plugins", "io.containerd.grpc.v1.cri", "containerd", "default_runtime_name"}) @@ -123,7 +130,12 @@ func TestUpdateV2Config(t *testing.T) { config, err := toml.TreeFromMap(map[string]interface{}{}) require.NoError(t, err, "%d: %v", i, tc) - err = UpdateV2Config(config, o) + v2 := &containerd.Config{ + Tree: config, + RuntimeType: runtimeType, + } + + err = UpdateConfig(v2, o) require.NoError(t, err, "%d: %v", i, tc) version, ok := config.Get("version").(int64) @@ -182,40 +194,47 @@ func TestUpdateV2ConfigWithRuncPresent(t *testing.T) { } for i, tc := range testCases { - o := &options{ - runtimeClass: tc.runtimeClass, - runtimeType: runtimeType, - runtimeDir: runtimeDir, - } + t.Run(fmt.Sprintf("%d", i), func(t *testing.T) { + o := &options{ + runtimeClass: tc.runtimeClass, + runtimeType: runtimeType, + runtimeDir: runtimeDir, + } - config, err := toml.TreeFromMap(runcConfigMapV2("/runc-binary")) - require.NoError(t, err, "%d: %v", i, tc) - - err = UpdateV2Config(config, o) - require.NoError(t, err, "%d: %v", i, tc) - - version, ok := config.Get("version").(int64) - require.True(t, ok) - require.EqualValues(t, expectedVersion, version) - - runtimes, ok := config.GetPath([]string{"plugins", "io.containerd.grpc.v1.cri", "containerd", "runtimes"}).(*toml.Tree) - require.True(t, ok, "%d: %v", i, tc) - - runtimeClasses := runtimes.Keys() - require.ElementsMatch(t, tc.expectedRuntimes, runtimeClasses, "%d: %v", i, tc) - - for i, r := range tc.expectedRuntimes { - runtimeConfig := runtimes.Get(r) - - expected, err := toml.TreeFromMap(runcRuntimeConfigMapV2(expectedBinaries[i])) + config, err := toml.TreeFromMap(runcConfigMapV2("/runc-binary")) require.NoError(t, err, "%d: %v", i, tc) - configContents, _ := toml.Marshal(runtimeConfig) - expectedContents, _ := toml.Marshal(expected) + v2 := &containerd.Config{ + Tree: config, + RuntimeType: runtimeType, + } - require.Equal(t, string(expectedContents), string(configContents), "%d: %v: %v", i, r, tc) + err = UpdateConfig(v2, o) + require.NoError(t, err, "%d: %v", i, tc) - } + version, ok := v2.Get("version").(int64) + require.True(t, ok) + require.EqualValues(t, expectedVersion, version) + + runtimes, ok := v2.GetPath([]string{"plugins", "io.containerd.grpc.v1.cri", "containerd", "runtimes"}).(*toml.Tree) + require.True(t, ok, "%d: %v", i, tc) + + runtimeClasses := runtimes.Keys() + require.ElementsMatch(t, tc.expectedRuntimes, runtimeClasses, "%d: %v", i, tc) + + for i, r := range tc.expectedRuntimes { + runtimeConfig := runtimes.Get(r) + + expected, err := toml.TreeFromMap(runcRuntimeConfigMapV2(expectedBinaries[i])) + require.NoError(t, err, "%d: %v", i, tc) + + configContents, _ := toml.Marshal(runtimeConfig) + expectedContents, _ := toml.Marshal(expected) + + require.Equal(t, string(expectedContents), string(configContents), "%d: %v: %v", i, r, tc) + + } + }) } } @@ -275,7 +294,12 @@ func TestRevertV2Config(t *testing.T) { expected, err := toml.TreeFromMap(tc.expected) require.NoError(t, err, "%d: %v", i, tc) - err = RevertV2Config(config, o) + v2 := &containerd.Config{ + Tree: config, + RuntimeType: runtimeType, + } + + err = RevertConfig(v2, o) require.NoError(t, err, "%d: %v", i, tc) configContents, _ := toml.Marshal(config) diff --git a/tools/container/containerd/containerd.go b/tools/container/containerd/containerd.go index b8067d1a..b87769a3 100644 --- a/tools/container/containerd/containerd.go +++ b/tools/container/containerd/containerd.go @@ -21,11 +21,12 @@ import ( "net" "os" "os/exec" - "path/filepath" "syscall" "time" - toml "github.com/pelletier/go-toml" + "github.com/NVIDIA/nvidia-container-toolkit/internal/config/engine" + "github.com/NVIDIA/nvidia-container-toolkit/internal/config/engine/containerd" + "github.com/NVIDIA/nvidia-container-toolkit/tools/container/operator" log "github.com/sirupsen/logrus" cli "github.com/urfave/cli/v2" ) @@ -194,25 +195,28 @@ func Setup(c *cli.Context, o *options) error { } o.runtimeDir = runtimeDir - cfg, err := LoadConfig(o.config) + cfg, err := containerd.New( + containerd.WithPath(o.config), + containerd.WithRuntimeType(o.runtimeType), + containerd.WithUseLegacyConfig(o.useLegacyConfig), + ) if err != nil { return fmt.Errorf("unable to load config: %v", err) } - version, err := ParseVersion(cfg, o.useLegacyConfig) - if err != nil { - return fmt.Errorf("unable to parse version: %v", err) - } - - err = UpdateConfig(cfg, o, version) + err = UpdateConfig(cfg, o) if err != nil { return fmt.Errorf("unable to update config: %v", err) } - err = FlushConfig(o.config, cfg) + log.Infof("Flushing containerd config to %v", o.config) + n, err := cfg.Save(o.config) if err != nil { return fmt.Errorf("unable to flush config: %v", err) } + if n == 0 { + log.Infof("Config file is empty, removed") + } err = RestartContainerd(o) if err != nil { @@ -233,25 +237,28 @@ func Cleanup(c *cli.Context, o *options) error { return fmt.Errorf("unable to parse args: %v", err) } - cfg, err := LoadConfig(o.config) + cfg, err := containerd.New( + containerd.WithPath(o.config), + containerd.WithRuntimeType(o.runtimeType), + containerd.WithUseLegacyConfig(o.useLegacyConfig), + ) if err != nil { return fmt.Errorf("unable to load config: %v", err) } - version, err := ParseVersion(cfg, o.useLegacyConfig) - if err != nil { - return fmt.Errorf("unable to parse version: %v", err) - } - - err = RevertConfig(cfg, o, version) + err = RevertConfig(cfg, o) if err != nil { return fmt.Errorf("unable to update config: %v", err) } - err = FlushConfig(o.config, cfg) + log.Infof("Flushing containerd config to %v", o.config) + n, err := cfg.Save(o.config) if err != nil { return fmt.Errorf("unable to flush config: %v", err) } + if n == 0 { + log.Infof("Config file is empty, removed") + } err = RestartContainerd(o) if err != nil { @@ -277,160 +284,36 @@ func ParseArgs(c *cli.Context) (string, error) { return runtimeDir, nil } -// LoadConfig loads the containerd config from disk -func LoadConfig(config string) (*toml.Tree, error) { - log.Infof("Loading config: %v", config) - - info, err := os.Stat(config) - if os.IsExist(err) && info.IsDir() { - return nil, fmt.Errorf("config file is a directory") - } - - configFile := config - if os.IsNotExist(err) { - configFile = "/dev/null" - log.Infof("Config file does not exist, creating new one") - } - - cfg, err := toml.LoadFile(configFile) - if err != nil { - return nil, err - } - - log.Infof("Successfully loaded config") - - return cfg, nil -} - -// ParseVersion parses the version field out of the containerd config -func ParseVersion(config *toml.Tree, useLegacyConfig bool) (int, error) { - var defaultVersion int - if !useLegacyConfig { - defaultVersion = 2 - } else { - defaultVersion = 1 - } - - var version int - switch v := config.Get("version").(type) { - case nil: - switch len(config.Keys()) { - case 0: // No config exists, or the config file is empty, use version inferred from containerd - version = defaultVersion - default: // A config file exists, has content, and no version is set - version = 1 - } - case int64: - version = int(v) - default: - return -1, fmt.Errorf("unsupported type for version field: %v", v) - } - log.Infof("Config version: %v", version) - - if version == 1 { - log.Warnf("Support for containerd config version 1 is deprecated") - } - - return version, nil -} - // UpdateConfig updates the containerd config to include the nvidia-container-runtime -func UpdateConfig(config *toml.Tree, o *options, version int) error { - var err error - - log.Infof("Updating config") - switch version { - case 1: - err = UpdateV1Config(config, o) - case 2: - err = UpdateV2Config(config, o) - default: - err = fmt.Errorf("unsupported containerd config version: %v", version) +func UpdateConfig(cfg engine.Interface, o *options) error { + runtimes := operator.GetRuntimes( + operator.WithNvidiaRuntimeName(o.runtimeClass), + operator.WithSetAsDefault(o.setAsDefault), + operator.WithRoot(o.runtimeDir), + ) + for class, runtime := range runtimes { + err := cfg.AddRuntime(class, runtime.Path, runtime.SetAsDefault) + if err != nil { + return fmt.Errorf("unable to update config for runtime class '%v': %v", class, err) + } } - if err != nil { - return err - } - log.Infof("Successfully updated config") return nil } // RevertConfig reverts the containerd config to remove the nvidia-container-runtime -func RevertConfig(config *toml.Tree, o *options, version int) error { - var err error - - log.Infof("Reverting config") - switch version { - case 1: - err = RevertV1Config(config, o) - case 2: - err = RevertV2Config(config, o) - default: - err = fmt.Errorf("unsupported containerd config version: %v", version) - } - if err != nil { - return err - } - log.Infof("Successfully reverted config") - - return nil -} - -// UpdateV1Config performs an update specific to v1 of the containerd config -func UpdateV1Config(config *toml.Tree, o *options) error { - c := newConfigV1(config) - return c.Update(o) -} - -// RevertV1Config performs a revert specific to v1 of the containerd config -func RevertV1Config(config *toml.Tree, o *options) error { - c := newConfigV1(config) - return c.Revert(o) -} - -// UpdateV2Config performs an update specific to v2 of the containerd config -func UpdateV2Config(config *toml.Tree, o *options) error { - c := newConfigV2(config) - return c.Update(o) -} - -// RevertV2Config performs a revert specific to v2 of the containerd config -func RevertV2Config(config *toml.Tree, o *options) error { - c := newConfigV2(config) - return c.Revert(o) -} - -// FlushConfig flushes the updated/reverted config out to disk -func FlushConfig(config string, cfg *toml.Tree) error { - log.Infof("Flushing config") - - output, err := cfg.ToTomlString() - if err != nil { - return fmt.Errorf("unable to convert to TOML: %v", err) - } - - switch len(output) { - case 0: - err := os.Remove(config) +func RevertConfig(cfg engine.Interface, o *options) error { + runtimes := operator.GetRuntimes( + operator.WithNvidiaRuntimeName(o.runtimeClass), + operator.WithSetAsDefault(o.setAsDefault), + operator.WithRoot(o.runtimeDir), + ) + for class := range runtimes { + err := cfg.RemoveRuntime(class) if err != nil { - return fmt.Errorf("unable to remove empty file: %v", err) - } - log.Infof("Config empty, removing file") - default: - f, err := os.Create(config) - if err != nil { - return fmt.Errorf("unable to open '%v' for writing: %v", config, err) - } - defer f.Close() - - _, err = f.WriteString(output) - if err != nil { - return fmt.Errorf("unable to write output: %v", err) + return fmt.Errorf("unable to revert config for runtime class '%v': %v", class, err) } } - - log.Infof("Successfully flushed config") - return nil } @@ -551,36 +434,3 @@ func RestartContainerdSystemd(hostRootMount string) error { return nil } - -// getDefaultRuntime returns the default runtime for the configured options. -// If the configuration is invalid or the default runtimes should not be set -// the empty string is returned. -func (o options) getDefaultRuntime() string { - if o.setAsDefault { - if o.runtimeClass == nvidiaExperimentalRuntimeName { - return nvidiaExperimentalRuntimeName - } - if o.runtimeClass == "" { - return defaultRuntimeClass - } - return o.runtimeClass - } - return "" -} - -// getRuntimeBinaries returns a map of runtime names to binary paths. This includes the -// renaming of the `nvidia` runtime as per the --runtime-class command line flag. -func (o options) getRuntimeBinaries() map[string]string { - runtimeBinaries := make(map[string]string) - - for rt, bin := range nvidiaRuntimeBinaries { - runtime := rt - if o.runtimeClass != "" && o.runtimeClass != nvidiaExperimentalRuntimeName && runtime == defaultRuntimeClass { - runtime = o.runtimeClass - } - - runtimeBinaries[runtime] = filepath.Join(o.runtimeDir, bin) - } - - return runtimeBinaries -} From 62d88e7c95b5740ba576cc32f9ee79279af849a2 Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Fri, 24 Feb 2023 11:06:59 +0200 Subject: [PATCH 8/9] Add cdi and legacy mode runtimes This change adds .cdi and .legacy mode-specific runtimes the list of runtimes supported by the operator. These are also installed as part of the NVIDIA Container Toolkit. Signed-off-by: Evan Lezar --- tools/container/containerd/config_v1_test.go | 20 +++-- tools/container/containerd/config_v2_test.go | 82 +++++++++--------- tools/container/docker/docker_test.go | 87 ++++++++++++++++++++ tools/container/operator/operator.go | 2 +- tools/container/operator/operator_test.go | 56 +++++++++++++ tools/container/toolkit/runtime.go | 35 +++++--- tools/container/toolkit/runtime_test.go | 2 +- 7 files changed, 227 insertions(+), 57 deletions(-) diff --git a/tools/container/containerd/config_v1_test.go b/tools/container/containerd/config_v1_test.go index dd8bf54c..9b94bba2 100644 --- a/tools/container/containerd/config_v1_test.go +++ b/tools/container/containerd/config_v1_test.go @@ -141,6 +141,8 @@ func TestUpdateV1Config(t *testing.T) { expectedBinaries := []string{ "/test/runtime/dir/nvidia-container-runtime", "/test/runtime/dir/nvidia-container-runtime.experimental", + "/test/runtime/dir/nvidia-container-runtime.cdi", + "/test/runtime/dir/nvidia-container-runtime.legacy", } testCases := []struct { @@ -149,15 +151,15 @@ func TestUpdateV1Config(t *testing.T) { }{ { runtimeClass: "nvidia", - expectedRuntimes: []string{"nvidia", "nvidia-experimental"}, + expectedRuntimes: []string{"nvidia", "nvidia-experimental", "nvidia-cdi", "nvidia-legacy"}, }, { runtimeClass: "NAME", - expectedRuntimes: []string{"NAME", "nvidia-experimental"}, + expectedRuntimes: []string{"NAME", "nvidia-experimental", "nvidia-cdi", "nvidia-legacy"}, }, { runtimeClass: "nvidia-experimental", - expectedRuntimes: []string{"nvidia", "nvidia-experimental"}, + expectedRuntimes: []string{"nvidia", "nvidia-experimental", "nvidia-cdi", "nvidia-legacy"}, }, } @@ -216,6 +218,8 @@ func TestUpdateV1ConfigWithRuncPresent(t *testing.T) { runcBinary, "/test/runtime/dir/nvidia-container-runtime", "/test/runtime/dir/nvidia-container-runtime.experimental", + "/test/runtime/dir/nvidia-container-runtime.cdi", + "/test/runtime/dir/nvidia-container-runtime.legacy", } testCases := []struct { @@ -224,15 +228,15 @@ func TestUpdateV1ConfigWithRuncPresent(t *testing.T) { }{ { runtimeClass: "nvidia", - expectedRuntimes: []string{"runc", "nvidia", "nvidia-experimental"}, + expectedRuntimes: []string{"runc", "nvidia", "nvidia-experimental", "nvidia-cdi", "nvidia-legacy"}, }, { runtimeClass: "NAME", - expectedRuntimes: []string{"runc", "NAME", "nvidia-experimental"}, + expectedRuntimes: []string{"runc", "NAME", "nvidia-experimental", "nvidia-cdi", "nvidia-legacy"}, }, { runtimeClass: "nvidia-experimental", - expectedRuntimes: []string{"runc", "nvidia", "nvidia-experimental"}, + expectedRuntimes: []string{"runc", "nvidia", "nvidia-experimental", "nvidia-cdi", "nvidia-legacy"}, }, } @@ -303,6 +307,8 @@ func TestRevertV1Config(t *testing.T) { "runtimes": map[string]interface{}{ "nvidia": runtimeMapV1("/test/runtime/dir/nvidia-container-runtime"), "nvidia-experimental": runtimeMapV1("/test/runtime/dir/nvidia-container-runtime.experimental"), + "nvidia-cdi": runtimeMapV1("/test/runtime/dir/nvidia-container-runtime.cdi"), + "nvidia-legacy": runtimeMapV1("/test/runtime/dir/nvidia-container-runtime.legacy"), }, }, }, @@ -318,6 +324,8 @@ func TestRevertV1Config(t *testing.T) { "runtimes": map[string]interface{}{ "nvidia": runtimeMapV1("/test/runtime/dir/nvidia-container-runtime"), "nvidia-experimental": runtimeMapV1("/test/runtime/dir/nvidia-container-runtime.experimental"), + "nvidia-cdi": runtimeMapV1("/test/runtime/dir/nvidia-container-runtime.cdi"), + "nvidia-legacy": runtimeMapV1("/test/runtime/dir/nvidia-container-runtime.legacy"), }, "default_runtime": defaultRuntimeV1("/test/runtime/dir/nvidia-container-runtime"), "default_runtime_name": "nvidia", diff --git a/tools/container/containerd/config_v2_test.go b/tools/container/containerd/config_v2_test.go index 6df8e69c..d5e5597e 100644 --- a/tools/container/containerd/config_v2_test.go +++ b/tools/container/containerd/config_v2_test.go @@ -71,25 +71,27 @@ func TestUpdateV2ConfigDefaultRuntime(t *testing.T) { } for i, tc := range testCases { - o := &options{ - setAsDefault: tc.setAsDefault, - runtimeClass: tc.runtimeClass, - runtimeDir: runtimeDir, - } + t.Run(fmt.Sprintf("%d", i), func(t *testing.T) { + o := &options{ + setAsDefault: tc.setAsDefault, + runtimeClass: tc.runtimeClass, + runtimeDir: runtimeDir, + } - config, err := toml.TreeFromMap(map[string]interface{}{}) - require.NoError(t, err, "%d: %v", i, tc) + config, err := toml.TreeFromMap(map[string]interface{}{}) + require.NoError(t, err, "%d: %v", i, tc) - v2 := &containerd.Config{ - Tree: config, - RuntimeType: runtimeType, - } + v2 := &containerd.Config{ + Tree: config, + RuntimeType: runtimeType, + } - err = UpdateConfig(v2, o) - require.NoError(t, err, "%d: %v", i, tc) + err = UpdateConfig(v2, o) + require.NoError(t, err, "%d: %v", i, tc) - defaultRuntimeName := config.GetPath([]string{"plugins", "io.containerd.grpc.v1.cri", "containerd", "default_runtime_name"}) - require.EqualValues(t, tc.expectedDefaultRuntimeName, defaultRuntimeName, "%d: %v", i, tc) + defaultRuntimeName := config.GetPath([]string{"plugins", "io.containerd.grpc.v1.cri", "containerd", "default_runtime_name"}) + require.EqualValues(t, tc.expectedDefaultRuntimeName, defaultRuntimeName, "%d: %v", i, tc) + }) } } @@ -100,6 +102,8 @@ func TestUpdateV2Config(t *testing.T) { expectedBinaries := []string{ "/test/runtime/dir/nvidia-container-runtime", "/test/runtime/dir/nvidia-container-runtime.experimental", + "/test/runtime/dir/nvidia-container-runtime.cdi", + "/test/runtime/dir/nvidia-container-runtime.legacy", } testCases := []struct { @@ -108,15 +112,15 @@ func TestUpdateV2Config(t *testing.T) { }{ { runtimeClass: "nvidia", - expectedRuntimes: []string{"nvidia", "nvidia-experimental"}, + expectedRuntimes: []string{"nvidia", "nvidia-experimental", "nvidia-cdi", "nvidia-legacy"}, }, { runtimeClass: "NAME", - expectedRuntimes: []string{"NAME", "nvidia-experimental"}, + expectedRuntimes: []string{"NAME", "nvidia-experimental", "nvidia-cdi", "nvidia-legacy"}, }, { runtimeClass: "nvidia-experimental", - expectedRuntimes: []string{"nvidia", "nvidia-experimental"}, + expectedRuntimes: []string{"nvidia", "nvidia-experimental", "nvidia-cdi", "nvidia-legacy"}, }, } @@ -173,6 +177,8 @@ func TestUpdateV2ConfigWithRuncPresent(t *testing.T) { runcBinary, "/test/runtime/dir/nvidia-container-runtime", "/test/runtime/dir/nvidia-container-runtime.experimental", + "/test/runtime/dir/nvidia-container-runtime.cdi", + "/test/runtime/dir/nvidia-container-runtime.legacy", } testCases := []struct { @@ -181,15 +187,15 @@ func TestUpdateV2ConfigWithRuncPresent(t *testing.T) { }{ { runtimeClass: "nvidia", - expectedRuntimes: []string{"runc", "nvidia", "nvidia-experimental"}, + expectedRuntimes: []string{"runc", "nvidia", "nvidia-experimental", "nvidia-cdi", "nvidia-legacy"}, }, { runtimeClass: "NAME", - expectedRuntimes: []string{"runc", "NAME", "nvidia-experimental"}, + expectedRuntimes: []string{"runc", "NAME", "nvidia-experimental", "nvidia-cdi", "nvidia-legacy"}, }, { runtimeClass: "nvidia-experimental", - expectedRuntimes: []string{"runc", "nvidia", "nvidia-experimental"}, + expectedRuntimes: []string{"runc", "nvidia", "nvidia-experimental", "nvidia-cdi", "nvidia-legacy"}, }, } @@ -284,28 +290,30 @@ func TestRevertV2Config(t *testing.T) { } for i, tc := range testCases { - o := &options{ - runtimeClass: "nvidia", - } + t.Run(fmt.Sprintf("%d", i), func(t *testing.T) { + o := &options{ + runtimeClass: "nvidia", + } - config, err := toml.TreeFromMap(tc.config) - require.NoError(t, err, "%d: %v", i, tc) + config, err := toml.TreeFromMap(tc.config) + require.NoError(t, err, "%d: %v", i, tc) - expected, err := toml.TreeFromMap(tc.expected) - require.NoError(t, err, "%d: %v", i, tc) + expected, err := toml.TreeFromMap(tc.expected) + require.NoError(t, err, "%d: %v", i, tc) - v2 := &containerd.Config{ - Tree: config, - RuntimeType: runtimeType, - } + v2 := &containerd.Config{ + Tree: config, + RuntimeType: runtimeType, + } - err = RevertConfig(v2, o) - require.NoError(t, err, "%d: %v", i, tc) + err = RevertConfig(v2, o) + require.NoError(t, err, "%d: %v", i, tc) - configContents, _ := toml.Marshal(config) - expectedContents, _ := toml.Marshal(expected) + configContents, _ := toml.Marshal(config) + expectedContents, _ := toml.Marshal(expected) - require.Equal(t, string(expectedContents), string(configContents), "%d: %v", i, tc) + require.Equal(t, string(expectedContents), string(configContents), "%d: %v", i, tc) + }) } } diff --git a/tools/container/docker/docker_test.go b/tools/container/docker/docker_test.go index 462192ed..2bba2116 100644 --- a/tools/container/docker/docker_test.go +++ b/tools/container/docker/docker_test.go @@ -93,6 +93,14 @@ func TestUpdateConfig(t *testing.T) { "path": "/test/runtime/dir/nvidia-container-runtime.experimental", "args": []string{}, }, + "nvidia-cdi": map[string]interface{}{ + "path": "/test/runtime/dir/nvidia-container-runtime.cdi", + "args": []string{}, + }, + "nvidia-legacy": map[string]interface{}{ + "path": "/test/runtime/dir/nvidia-container-runtime.legacy", + "args": []string{}, + }, }, }, }, @@ -110,6 +118,14 @@ func TestUpdateConfig(t *testing.T) { "path": "/test/runtime/dir/nvidia-container-runtime.experimental", "args": []string{}, }, + "nvidia-cdi": map[string]interface{}{ + "path": "/test/runtime/dir/nvidia-container-runtime.cdi", + "args": []string{}, + }, + "nvidia-legacy": map[string]interface{}{ + "path": "/test/runtime/dir/nvidia-container-runtime.legacy", + "args": []string{}, + }, }, }, }, @@ -127,6 +143,14 @@ func TestUpdateConfig(t *testing.T) { "path": "/test/runtime/dir/nvidia-container-runtime.experimental", "args": []string{}, }, + "nvidia-cdi": map[string]interface{}{ + "path": "/test/runtime/dir/nvidia-container-runtime.cdi", + "args": []string{}, + }, + "nvidia-legacy": map[string]interface{}{ + "path": "/test/runtime/dir/nvidia-container-runtime.legacy", + "args": []string{}, + }, }, }, }, @@ -150,6 +174,14 @@ func TestUpdateConfig(t *testing.T) { "path": "/test/runtime/dir/nvidia-container-runtime.experimental", "args": []string{}, }, + "nvidia-cdi": map[string]interface{}{ + "path": "/test/runtime/dir/nvidia-container-runtime.cdi", + "args": []string{}, + }, + "nvidia-legacy": map[string]interface{}{ + "path": "/test/runtime/dir/nvidia-container-runtime.legacy", + "args": []string{}, + }, }, }, }, @@ -176,6 +208,14 @@ func TestUpdateConfig(t *testing.T) { "path": "/test/runtime/dir/nvidia-container-runtime.experimental", "args": []string{}, }, + "nvidia-cdi": map[string]interface{}{ + "path": "/test/runtime/dir/nvidia-container-runtime.cdi", + "args": []string{}, + }, + "nvidia-legacy": map[string]interface{}{ + "path": "/test/runtime/dir/nvidia-container-runtime.legacy", + "args": []string{}, + }, }, }, }, @@ -196,6 +236,14 @@ func TestUpdateConfig(t *testing.T) { "path": "/test/runtime/dir/nvidia-container-runtime.experimental", "args": []string{}, }, + "nvidia-cdi": map[string]interface{}{ + "path": "/test/runtime/dir/nvidia-container-runtime.cdi", + "args": []string{}, + }, + "nvidia-legacy": map[string]interface{}{ + "path": "/test/runtime/dir/nvidia-container-runtime.legacy", + "args": []string{}, + }, }, }, }, @@ -216,6 +264,14 @@ func TestUpdateConfig(t *testing.T) { "path": "/test/runtime/dir/nvidia-container-runtime.experimental", "args": []string{}, }, + "nvidia-cdi": map[string]interface{}{ + "path": "/test/runtime/dir/nvidia-container-runtime.cdi", + "args": []string{}, + }, + "nvidia-legacy": map[string]interface{}{ + "path": "/test/runtime/dir/nvidia-container-runtime.legacy", + "args": []string{}, + }, }, }, }, @@ -244,6 +300,14 @@ func TestUpdateConfig(t *testing.T) { "path": "/test/runtime/dir/nvidia-container-runtime.experimental", "args": []string{}, }, + "nvidia-cdi": map[string]interface{}{ + "path": "/test/runtime/dir/nvidia-container-runtime.cdi", + "args": []string{}, + }, + "nvidia-legacy": map[string]interface{}{ + "path": "/test/runtime/dir/nvidia-container-runtime.legacy", + "args": []string{}, + }, }, }, }, @@ -315,6 +379,29 @@ func TestRevertConfig(t *testing.T) { }, expectedConfig: map[string]interface{}{}, }, + { + config: map[string]interface{}{ + "runtimes": map[string]interface{}{ + "nvidia": map[string]interface{}{ + "path": "/test/runtime/dir/nvidia-container-runtime", + "args": []string{}, + }, + "nvidia-experimental": map[string]interface{}{ + "path": "/test/runtime/dir/nvidia-container-runtime.experimental", + "args": []string{}, + }, + "nvidia-cdi": map[string]interface{}{ + "path": "/test/runtime/dir/nvidia-container-runtime.cdi", + "args": []string{}, + }, + "nvidia-legacy": map[string]interface{}{ + "path": "/test/runtime/dir/nvidia-container-runtime.legacy", + "args": []string{}, + }, + }, + }, + expectedConfig: map[string]interface{}{}, + }, { config: map[string]interface{}{ "default-runtime": "nvidia", diff --git a/tools/container/operator/operator.go b/tools/container/operator/operator.go index b52e97ad..fe90f715 100644 --- a/tools/container/operator/operator.go +++ b/tools/container/operator/operator.go @@ -59,7 +59,7 @@ func GetRuntimes(opts ...Option) Runtimes { runtimes := make(Runtimes) runtimes.add(c.nvidiaRuntime()) - modes := []string{"experimental"} + modes := []string{"experimental", "cdi", "legacy"} for _, mode := range modes { runtimes.add(c.modeRuntime(mode)) } diff --git a/tools/container/operator/operator_test.go b/tools/container/operator/operator_test.go index 07417fb9..e2f4abf7 100644 --- a/tools/container/operator/operator_test.go +++ b/tools/container/operator/operator_test.go @@ -41,6 +41,14 @@ func TestOptions(t *testing.T) { name: "nvidia-experimental", Path: "/usr/bin/nvidia-container-runtime.experimental", }, + "nvidia-cdi": Runtime{ + name: "nvidia-cdi", + Path: "/usr/bin/nvidia-container-runtime.cdi", + }, + "nvidia-legacy": Runtime{ + name: "nvidia-legacy", + Path: "/usr/bin/nvidia-container-runtime.legacy", + }, }, }, { @@ -56,6 +64,14 @@ func TestOptions(t *testing.T) { name: "nvidia-experimental", Path: "/usr/bin/nvidia-container-runtime.experimental", }, + "nvidia-cdi": Runtime{ + name: "nvidia-cdi", + Path: "/usr/bin/nvidia-container-runtime.cdi", + }, + "nvidia-legacy": Runtime{ + name: "nvidia-legacy", + Path: "/usr/bin/nvidia-container-runtime.legacy", + }, }, }, { @@ -72,6 +88,14 @@ func TestOptions(t *testing.T) { name: "nvidia-experimental", Path: "/usr/bin/nvidia-container-runtime.experimental", }, + "nvidia-cdi": Runtime{ + name: "nvidia-cdi", + Path: "/usr/bin/nvidia-container-runtime.cdi", + }, + "nvidia-legacy": Runtime{ + name: "nvidia-legacy", + Path: "/usr/bin/nvidia-container-runtime.legacy", + }, }, }, { @@ -88,6 +112,14 @@ func TestOptions(t *testing.T) { name: "nvidia-experimental", Path: "/usr/bin/nvidia-container-runtime.experimental", }, + "nvidia-cdi": Runtime{ + name: "nvidia-cdi", + Path: "/usr/bin/nvidia-container-runtime.cdi", + }, + "nvidia-legacy": Runtime{ + name: "nvidia-legacy", + Path: "/usr/bin/nvidia-container-runtime.legacy", + }, }, }, { @@ -102,6 +134,14 @@ func TestOptions(t *testing.T) { name: "nvidia-experimental", Path: "/usr/bin/nvidia-container-runtime.experimental", }, + "nvidia-cdi": Runtime{ + name: "nvidia-cdi", + Path: "/usr/bin/nvidia-container-runtime.cdi", + }, + "nvidia-legacy": Runtime{ + name: "nvidia-legacy", + Path: "/usr/bin/nvidia-container-runtime.legacy", + }, }, }, { @@ -118,6 +158,14 @@ func TestOptions(t *testing.T) { Path: "/usr/bin/nvidia-container-runtime.experimental", SetAsDefault: true, }, + "nvidia-cdi": Runtime{ + name: "nvidia-cdi", + Path: "/usr/bin/nvidia-container-runtime.cdi", + }, + "nvidia-legacy": Runtime{ + name: "nvidia-legacy", + Path: "/usr/bin/nvidia-container-runtime.legacy", + }, }, }, { @@ -132,6 +180,14 @@ func TestOptions(t *testing.T) { name: "nvidia-experimental", Path: "/usr/bin/nvidia-container-runtime.experimental", }, + "nvidia-cdi": Runtime{ + name: "nvidia-cdi", + Path: "/usr/bin/nvidia-container-runtime.cdi", + }, + "nvidia-legacy": Runtime{ + name: "nvidia-legacy", + Path: "/usr/bin/nvidia-container-runtime.legacy", + }, }, }, } diff --git a/tools/container/toolkit/runtime.go b/tools/container/toolkit/runtime.go index bd39649a..d8d7abad 100644 --- a/tools/container/toolkit/runtime.go +++ b/tools/container/toolkit/runtime.go @@ -21,13 +21,12 @@ import ( "path/filepath" "strings" + "github.com/NVIDIA/nvidia-container-toolkit/tools/container/operator" log "github.com/sirupsen/logrus" ) const ( - nvidiaContainerRuntimeSource = "/usr/bin/nvidia-container-runtime" - nvidiaContainerRuntimeTarget = "nvidia-container-runtime.real" - nvidiaContainerRuntimeWrapper = "nvidia-container-runtime" + nvidiaContainerRuntimeSource = "/usr/bin/nvidia-container-runtime" nvidiaExperimentalContainerRuntimeSource = "nvidia-container-runtime.experimental" ) @@ -35,15 +34,21 @@ const ( // installContainerRuntimes sets up the NVIDIA container runtimes, copying the executables // and implementing the required wrapper func installContainerRuntimes(toolkitDir string, driverRoot string) error { - r := newNvidiaContainerRuntimeInstaller() + runtimes := operator.GetRuntimes() + for _, runtime := range runtimes { + if filepath.Base(runtime.Path) == nvidiaExperimentalContainerRuntimeSource { + continue + } + r := newNvidiaContainerRuntimeInstaller(runtime.Path) - _, err := r.install(toolkitDir) - if err != nil { - return fmt.Errorf("error installing NVIDIA container runtime: %v", err) + _, err := r.install(toolkitDir) + if err != nil { + return fmt.Errorf("error installing NVIDIA container runtime: %v", err) + } } // Install the experimental runtime and treat failures as non-fatal. - err = installExperimentalRuntime(toolkitDir, driverRoot) + err := installExperimentalRuntime(toolkitDir, driverRoot) if err != nil { log.Warnf("Could not install experimental runtime: %v", err) } @@ -68,12 +73,18 @@ func installExperimentalRuntime(toolkitDir string, driverRoot string) error { return nil } -func newNvidiaContainerRuntimeInstaller() *executable { +// newNVidiaContainerRuntimeInstaller returns a new executable installer for the NVIDIA container runtime. +// This installer will copy the specified source exectuable to the toolkit directory. +// The executable is copied to a file with the same name as the source, but with a ".real" suffix and a wrapper is +// created to allow for the configuration of the runtime environment. +func newNvidiaContainerRuntimeInstaller(source string) *executable { + wrapperName := filepath.Base(source) + dotfileName := wrapperName + ".real" target := executableTarget{ - dotfileName: nvidiaContainerRuntimeTarget, - wrapperName: nvidiaContainerRuntimeWrapper, + dotfileName: dotfileName, + wrapperName: wrapperName, } - return newRuntimeInstaller(nvidiaContainerRuntimeSource, target, nil) + return newRuntimeInstaller(source, target, nil) } func newNvidiaContainerRuntimeExperimentalInstaller(libraryRoot string) *executable { diff --git a/tools/container/toolkit/runtime_test.go b/tools/container/toolkit/runtime_test.go index 06b1b3c2..14afb14b 100644 --- a/tools/container/toolkit/runtime_test.go +++ b/tools/container/toolkit/runtime_test.go @@ -25,7 +25,7 @@ import ( ) func TestNvidiaContainerRuntimeInstallerWrapper(t *testing.T) { - r := newNvidiaContainerRuntimeInstaller() + r := newNvidiaContainerRuntimeInstaller(nvidiaContainerRuntimeSource) const shebang = "#! /bin/sh" const destFolder = "/dest/folder" From cc7a6f166ba756fc4b91e4cd6695b6ba52da21bd Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Tue, 7 Mar 2023 20:59:22 +0200 Subject: [PATCH 9/9] Handle case were runtime name is set to predefined name Signed-off-by: Evan Lezar --- tools/container/operator/operator.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tools/container/operator/operator.go b/tools/container/operator/operator.go index fe90f715..07cb5ad4 100644 --- a/tools/container/operator/operator.go +++ b/tools/container/operator/operator.go @@ -84,8 +84,13 @@ func (r *Runtimes) add(runtime Runtime) { // nvidiaRuntime creates a runtime that corresponds to the nvidia runtime. // If the nvidiaRuntimeName is specified, this name us used unless this is exactly equal to nvidia-experimental. func (c config) nvidiaRuntime() Runtime { + predefinedRuntimes := map[string]struct{}{ + "nvidia-experimental": {}, + "nvidia-cdi": {}, + "nvidia-legacy": {}, + } name := c.nvidiaRuntimeName - if name == experimentalRuntimeName { + if _, isPredefinedRuntime := predefinedRuntimes[name]; isPredefinedRuntime { name = defaultRuntimeName } return c.newRuntime(name, "nvidia-container-runtime")