mirror of
https://github.com/NVIDIA/nvidia-container-toolkit
synced 2024-11-22 16:29:18 +00:00
65ae6f1dab
This change ensures that the nvidia-ctk config default command generates a config file that is compatible with the official documentation to, for example, disable cgroups in the NVIDIA Container CLI. This requires that whitespace around comments is stripped before outputing the contets. This also adds an option to load a config and modify it in-place instead. This can be triggered as a post-install step, for example. Signed-off-by: Evan Lezar <elezar@nvidia.com>
314 lines
10 KiB
Go
314 lines
10 KiB
Go
/**
|
|
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
**/
|
|
|
|
package config
|
|
|
|
import (
|
|
"bufio"
|
|
"fmt"
|
|
"io"
|
|
"os"
|
|
"path"
|
|
"path/filepath"
|
|
"strings"
|
|
|
|
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
|
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
|
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
|
|
"github.com/pelletier/go-toml"
|
|
)
|
|
|
|
const (
|
|
configOverride = "XDG_CONFIG_HOME"
|
|
configFilePath = "nvidia-container-runtime/config.toml"
|
|
|
|
nvidiaCTKExecutable = "nvidia-ctk"
|
|
nvidiaCTKDefaultFilePath = "/usr/bin/nvidia-ctk"
|
|
|
|
nvidiaContainerRuntimeHookExecutable = "nvidia-container-runtime-hook"
|
|
nvidiaContainerRuntimeHookDefaultPath = "/usr/bin/nvidia-container-runtime-hook"
|
|
)
|
|
|
|
var (
|
|
// DefaultExecutableDir specifies the default path to use for executables if they cannot be located in the path.
|
|
DefaultExecutableDir = "/usr/bin"
|
|
|
|
// NVIDIAContainerRuntimeHookExecutable is the executable name for the NVIDIA Container Runtime Hook
|
|
NVIDIAContainerRuntimeHookExecutable = "nvidia-container-runtime-hook"
|
|
// NVIDIAContainerToolkitExecutable is the executable name for the NVIDIA Container Toolkit (an alias for the NVIDIA Container Runtime Hook)
|
|
NVIDIAContainerToolkitExecutable = "nvidia-container-toolkit"
|
|
|
|
configDir = "/etc/"
|
|
)
|
|
|
|
// Config represents the contents of the config.toml file for the NVIDIA Container Toolkit
|
|
// Note: This is currently duplicated by the HookConfig in cmd/nvidia-container-toolkit/hook_config.go
|
|
type Config struct {
|
|
AcceptEnvvarUnprivileged bool `toml:"accept-nvidia-visible-devices-envvar-when-unprivileged"`
|
|
|
|
NVIDIAContainerCLIConfig ContainerCLIConfig `toml:"nvidia-container-cli"`
|
|
NVIDIACTKConfig CTKConfig `toml:"nvidia-ctk"`
|
|
NVIDIAContainerRuntimeConfig RuntimeConfig `toml:"nvidia-container-runtime"`
|
|
NVIDIAContainerRuntimeHookConfig RuntimeHookConfig `toml:"nvidia-container-runtime-hook"`
|
|
}
|
|
|
|
// GetConfig sets up the config struct. Values are read from a toml file
|
|
// or set via the environment.
|
|
func GetConfig() (*Config, error) {
|
|
if XDGConfigDir := os.Getenv(configOverride); len(XDGConfigDir) != 0 {
|
|
configDir = XDGConfigDir
|
|
}
|
|
|
|
configFilePath := path.Join(configDir, configFilePath)
|
|
|
|
return Load(configFilePath)
|
|
}
|
|
|
|
// Load loads the config from the specified file path.
|
|
func Load(configFilePath string) (*Config, error) {
|
|
if configFilePath == "" {
|
|
return getDefault()
|
|
}
|
|
|
|
tomlFile, err := os.Open(configFilePath)
|
|
if err != nil {
|
|
return getDefault()
|
|
}
|
|
defer tomlFile.Close()
|
|
|
|
cfg, err := LoadFrom(tomlFile)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to read config values: %v", err)
|
|
}
|
|
|
|
return cfg, nil
|
|
}
|
|
|
|
// LoadFrom reads the config from the specified Reader
|
|
func LoadFrom(reader io.Reader) (*Config, error) {
|
|
var tree *toml.Tree
|
|
if reader != nil {
|
|
toml, err := toml.LoadReader(reader)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
tree = toml
|
|
}
|
|
|
|
return getFromTree(tree)
|
|
}
|
|
|
|
// getFromTree reads the nvidia container runtime config from the specified toml Tree.
|
|
func getFromTree(toml *toml.Tree) (*Config, error) {
|
|
cfg, err := getDefault()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if toml == nil {
|
|
return cfg, nil
|
|
}
|
|
|
|
if err := toml.Unmarshal(cfg); err != nil {
|
|
return nil, fmt.Errorf("failed to unmarshal config: %v", err)
|
|
}
|
|
|
|
return cfg, nil
|
|
}
|
|
|
|
// getDefault defines the default values for the config
|
|
func getDefault() (*Config, error) {
|
|
tomlConfig, err := GetDefaultToml()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// tomlConfig above includes information about the default values and comments.
|
|
// we need to marshal it back to a string and then unmarshal it to strip the comments.
|
|
contents, err := tomlConfig.ToTomlString()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
reloaded, err := toml.Load(contents)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
d := Config{}
|
|
if err := reloaded.Unmarshal(&d); err != nil {
|
|
return nil, fmt.Errorf("failed to unmarshal config: %v", err)
|
|
}
|
|
|
|
// The default value for the accept-nvidia-visible-devices-envvar-when-unprivileged is non-standard.
|
|
// As such we explicitly handle it being set here.
|
|
if reloaded.Get("accept-nvidia-visible-devices-envvar-when-unprivileged") == nil {
|
|
d.AcceptEnvvarUnprivileged = true
|
|
}
|
|
// The default value for the nvidia-container-runtime.debug is non-standard.
|
|
// As such we explicitly handle it being set here.
|
|
if reloaded.Get("nvidia-container-runtime.debug") == nil {
|
|
d.NVIDIAContainerRuntimeConfig.DebugFilePath = "/dev/null"
|
|
}
|
|
return &d, nil
|
|
}
|
|
|
|
// GetDefaultToml returns the default config as a toml Tree.
|
|
func GetDefaultToml() (*toml.Tree, error) {
|
|
tree, err := toml.TreeFromMap(nil)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
tree.Set("disable-require", false)
|
|
tree.SetWithComment("swarm-resource", "", true, "DOCKER_RESOURCE_GPU")
|
|
tree.SetWithComment("accept-nvidia-visible-devices-envvar-when-unprivileged", "", true, true)
|
|
tree.SetWithComment("accept-nvidia-visible-devices-as-volume-mounts", "", true, false)
|
|
|
|
// nvidia-container-cli
|
|
tree.SetWithComment("nvidia-container-cli.root", "", true, "/run/nvidia/driver")
|
|
tree.SetWithComment("nvidia-container-cli.path", "", true, "/usr/bin/nvidia-container-cli")
|
|
tree.Set("nvidia-container-cli.environment", []string{})
|
|
tree.SetWithComment("nvidia-container-cli.debug", "", true, "/var/log/nvidia-container-toolkit.log")
|
|
tree.SetWithComment("nvidia-container-cli.ldcache", "", true, "/etc/ld.so.cache")
|
|
tree.Set("nvidia-container-cli.load-kmods", true)
|
|
tree.SetWithComment("nvidia-container-cli.no-cgroups", "", true, false)
|
|
|
|
tree.SetWithComment("nvidia-container-cli.user", "", getCommentedUserGroup(), getUserGroup())
|
|
tree.Set("nvidia-container-cli.ldconfig", getLdConfigPath())
|
|
|
|
// nvidia-container-runtime
|
|
tree.SetWithComment("nvidia-container-runtime.debug", "", true, "/var/log/nvidia-container-runtime.log")
|
|
tree.Set("nvidia-container-runtime.log-level", "info")
|
|
|
|
commentLines := []string{
|
|
"Specify the runtimes to consider. This list is processed in order and the PATH",
|
|
"searched for matching executables unless the entry is an absolute path.",
|
|
}
|
|
tree.SetWithComment("nvidia-container-runtime.runtimes", strings.Join(commentLines, "\n "), false, []string{"docker-runc", "runc"})
|
|
|
|
tree.Set("nvidia-container-runtime.mode", "auto")
|
|
|
|
tree.Set("nvidia-container-runtime.modes.csv.mount-spec-path", "/etc/nvidia-container-runtime/host-files-for-container.d")
|
|
tree.Set("nvidia-container-runtime.modes.cdi.default-kind", "nvidia.com/gpu")
|
|
tree.Set("nvidia-container-runtime.modes.cdi.annotation-prefixes", []string{cdi.AnnotationPrefix})
|
|
|
|
// nvidia-ctk
|
|
tree.Set("nvidia-ctk.path", nvidiaCTKExecutable)
|
|
|
|
// nvidia-container-runtime-hook
|
|
tree.Set("nvidia-container-runtime-hook.path", nvidiaContainerRuntimeHookExecutable)
|
|
|
|
return tree, nil
|
|
}
|
|
|
|
func getLdConfigPath() string {
|
|
if _, err := os.Stat("/sbin/ldconfig.real"); err == nil {
|
|
return "@/sbin/ldconfig.real"
|
|
}
|
|
return "@/sbin/ldconfig"
|
|
}
|
|
|
|
// getUserGroup returns the user and group to use for the nvidia-container-cli and whether the config option should be commented.
|
|
func getUserGroup() string {
|
|
return "root:video"
|
|
}
|
|
|
|
// getCommentedUserGroup returns whether the nvidia-container-cli user and group config option should be commented.
|
|
func getCommentedUserGroup() bool {
|
|
uncommentIf := map[string]bool{
|
|
"suse": true,
|
|
"opensuse": true,
|
|
}
|
|
|
|
idsLike := getDistIDLike()
|
|
for _, id := range idsLike {
|
|
if uncommentIf[id] {
|
|
return false
|
|
}
|
|
}
|
|
return true
|
|
}
|
|
|
|
// getDistIDLike returns the ID_LIKE field from /etc/os-release.
|
|
func getDistIDLike() []string {
|
|
releaseFile, err := os.Open("/etc/os-release")
|
|
if err != nil {
|
|
return nil
|
|
}
|
|
defer releaseFile.Close()
|
|
|
|
scanner := bufio.NewScanner(releaseFile)
|
|
for scanner.Scan() {
|
|
line := scanner.Text()
|
|
if strings.HasPrefix(line, "ID_LIKE=") {
|
|
value := strings.Trim(strings.TrimPrefix(line, "ID_LIKE="), "\"")
|
|
return strings.Split(value, " ")
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// ResolveNVIDIACTKPath resolves the path to the nvidia-ctk binary.
|
|
// This executable is used in hooks and needs to be an absolute path.
|
|
// If the path is specified as an absolute path, it is used directly
|
|
// without checking for existence of an executable at that path.
|
|
func ResolveNVIDIACTKPath(logger logger.Interface, nvidiaCTKPath string) string {
|
|
return resolveWithDefault(
|
|
logger,
|
|
"NVIDIA Container Toolkit CLI",
|
|
nvidiaCTKPath,
|
|
nvidiaCTKDefaultFilePath,
|
|
)
|
|
}
|
|
|
|
// ResolveNVIDIAContainerRuntimeHookPath resolves the path the nvidia-container-runtime-hook binary.
|
|
func ResolveNVIDIAContainerRuntimeHookPath(logger logger.Interface, nvidiaContainerRuntimeHookPath string) string {
|
|
return resolveWithDefault(
|
|
logger,
|
|
"NVIDIA Container Runtime Hook",
|
|
nvidiaContainerRuntimeHookPath,
|
|
nvidiaContainerRuntimeHookDefaultPath,
|
|
)
|
|
}
|
|
|
|
// resolveWithDefault resolves the path to the specified binary.
|
|
// If an absolute path is specified, it is used directly without searching for the binary.
|
|
// If the binary cannot be found in the path, the specified default is used instead.
|
|
func resolveWithDefault(logger logger.Interface, label string, path string, defaultPath string) string {
|
|
if filepath.IsAbs(path) {
|
|
logger.Debugf("Using specified %v path %v", label, path)
|
|
return path
|
|
}
|
|
|
|
if path == "" {
|
|
path = filepath.Base(defaultPath)
|
|
}
|
|
logger.Debugf("Locating %v as %v", label, path)
|
|
lookup := lookup.NewExecutableLocator(logger, "")
|
|
|
|
resolvedPath := defaultPath
|
|
targets, err := lookup.Locate(path)
|
|
if err != nil {
|
|
logger.Warningf("Failed to locate %v: %v", path, err)
|
|
} else {
|
|
logger.Debugf("Found %v candidates: %v", path, targets)
|
|
resolvedPath = targets[0]
|
|
}
|
|
logger.Debugf("Using %v path %v", label, path)
|
|
|
|
return resolvedPath
|
|
}
|