This commit is contained in:
Evan Lezar 2024-05-13 17:08:20 +02:00 committed by GitHub
commit d60610424a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
16 changed files with 290 additions and 34 deletions

View File

@ -2,6 +2,7 @@
## v1.15.0
* Add a hook to create `.so` symlinks for driver libraries in a container.
* Remove `nvidia-container-runtime` and `nvidia-docker2` packages.
* Use `XDG_DATA_DIRS` environment variable when locating config files such as graphics config files.
* Add support for v0.7.0 Container Device Interface (CDI) specification.

View File

@ -60,6 +60,8 @@ type options struct {
files cli.StringSlice
ignorePatterns cli.StringSlice
}
noDotSoSymlinks bool
}
// NewCommand constructs a generate-cdi command with the specified logger
@ -166,6 +168,11 @@ func (m command) build() *cli.Command {
Usage: "Specify a pattern the CSV mount specifications.",
Destination: &opts.csv.ignorePatterns,
},
&cli.BoolFlag{
Name: "no-dot-so-symlinks",
Usage: "Skip the generation of a hook for creating .so symlinks to driver files in the container",
Destination: &opts.noDotSoSymlinks,
},
}
return &c
@ -270,6 +277,7 @@ func (m command) generateSpec(opts *options) (spec.Interface, error) {
nvcdi.WithLibrarySearchPaths(opts.librarySearchPaths.Value()),
nvcdi.WithCSVFiles(opts.csv.files.Value()),
nvcdi.WithCSVIgnorePatterns(opts.csv.ignorePatterns.Value()),
nvcdi.WithNoDotSoSymlinks(opts.noDotSoSymlinks),
)
if err != nil {
return nil, fmt.Errorf("failed to create CDI library: %v", err)

View File

@ -0,0 +1,115 @@
/**
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package dotsosymlinks
import (
"fmt"
"os"
"path/filepath"
"strings"
"github.com/urfave/cli/v2"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
)
type command struct {
logger logger.Interface
}
type config struct {
containerSpec string
driverVersion string
}
// NewCommand constructs a hook command with the specified logger
func NewCommand(logger logger.Interface) *cli.Command {
c := command{
logger: logger,
}
return c.build()
}
// build
func (m command) build() *cli.Command {
cfg := config{}
// Create the '' command
c := cli.Command{
Name: "create-dot-so-symlinks",
Usage: "A hook to create .so symlinks in the container.",
Action: func(c *cli.Context) error {
return m.run(c, &cfg)
},
}
c.Flags = []cli.Flag{
&cli.StringFlag{
Name: "container-spec",
Usage: "Specify the path to the OCI container spec. If empty or '-' the spec will be read from STDIN",
Destination: &cfg.containerSpec,
},
&cli.StringFlag{
Name: "driver-version",
Usage: "specify the driver version for which the symlinks are to be created. This assumes driver libraries have the .so.`VERSION` suffix.",
Destination: &cfg.driverVersion,
Required: true,
},
}
return &c
}
func (m command) run(c *cli.Context, cfg *config) error {
s, err := oci.LoadContainerState(cfg.containerSpec)
if err != nil {
return fmt.Errorf("failed to load container state: %v", err)
}
containerRoot, err := s.GetContainerRoot()
if err != nil {
return fmt.Errorf("failed to determined container root: %v", err)
}
locator := lookup.NewLibraryLocator(
lookup.WithLogger(m.logger),
lookup.WithRoot(containerRoot),
lookup.WithOptional(true),
)
libs, err := locator.Locate("*.so." + cfg.driverVersion)
if err != nil {
return fmt.Errorf("failed to locate libraries for driver version %v: %v", cfg.driverVersion, err)
}
for _, lib := range libs {
if !strings.HasSuffix(lib, ".so."+cfg.driverVersion) {
continue
}
libSoPath := strings.TrimSuffix(lib, "."+cfg.driverVersion)
libSoXPaths, err := filepath.Glob(libSoPath + ".[0-9]")
if len(libSoXPaths) != 1 || err != nil {
continue
}
err = os.Symlink(filepath.Base(libSoXPaths[0]), libSoPath)
if err != nil {
continue
}
}
return nil
}

View File

@ -22,6 +22,7 @@ import (
"github.com/urfave/cli/v2"
createdotsosymlinks "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/hook/create-dot-so-symlinks"
symlinks "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/hook/create-symlinks"
ldcache "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/hook/update-ldcache"
)
@ -50,6 +51,7 @@ func (m hookCommand) build() *cli.Command {
ldcache.NewCommand(m.logger),
symlinks.NewCommand(m.logger),
chmod.NewCommand(m.logger),
createdotsosymlinks.NewCommand(m.logger),
}
return &hook

View File

@ -19,10 +19,11 @@ package config
type featureName string
const (
FeatureGDS = featureName("gds")
FeatureMOFED = featureName("mofed")
FeatureNVSWITCH = featureName("nvswitch")
FeatureGDRCopy = featureName("gdrcopy")
FeatureGDS = featureName("gds")
FeatureMOFED = featureName("mofed")
FeatureNVSWITCH = featureName("nvswitch")
FeatureGDRCopy = featureName("gdrcopy")
FeatureDotSoSymlinks = featureName("dot-so-symlinks")
)
// features specifies a set of named features.
@ -31,6 +32,9 @@ type features struct {
MOFED *feature `toml:"mofed,omitempty"`
NVSWITCH *feature `toml:"nvswitch,omitempty"`
GDRCopy *feature `toml:"gdrcopy,omitempty"`
// DotSoSymlinks allows for the creation of .so symlinks to .so.1 driver
// files to be opted in to.
DotSoSymlinks *feature `toml:"dot-so-symlinks,omitempty"`
}
type feature bool
@ -40,10 +44,11 @@ type feature bool
// variables can also be supplied.
func (fs features) IsEnabled(n featureName, in ...getenver) bool {
featureEnvvars := map[featureName]string{
FeatureGDS: "NVIDIA_GDS",
FeatureMOFED: "NVIDIA_MOFED",
FeatureNVSWITCH: "NVIDIA_NVSWITCH",
FeatureGDRCopy: "NVIDIA_GDRCOPY",
FeatureGDS: "NVIDIA_GDS",
FeatureMOFED: "NVIDIA_MOFED",
FeatureNVSWITCH: "NVIDIA_NVSWITCH",
FeatureGDRCopy: "NVIDIA_GDRCOPY",
FeatureDotSoSymlinks: "NVIDIA_DOT_SO_SYMLINKS",
}
envvar := featureEnvvars[n]
@ -56,6 +61,8 @@ func (fs features) IsEnabled(n featureName, in ...getenver) bool {
return fs.NVSWITCH.isEnabled(envvar, in...)
case FeatureGDRCopy:
return fs.GDRCopy.isEnabled(envvar, in...)
case FeatureDotSoSymlinks:
return fs.DotSoSymlinks.isEnabled(envvar, in...)
default:
return false
}

View File

@ -0,0 +1,27 @@
/**
# Copyright 2024 NVIDIA CORPORATION
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package discover
// NewDotSoSymlinksDiscoverer creates a discoverer that generates a hook to create .so symlinks in
// a container.
func NewDotSoSymlinksDiscoverer(nvidiaCTKPath string, version string) Discover {
return CreateNvidiaCTKHook(
nvidiaCTKPath,
"create-dot-so-symlinks",
"--driver-version", version,
)
}

View File

@ -27,7 +27,6 @@ import (
"github.com/NVIDIA/nvidia-container-toolkit/internal/info/proc"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/cuda"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/root"
)
@ -252,20 +251,16 @@ func optionalXorgDiscoverer(logger logger.Interface, driver *root.Driver, nvidia
}
func newXorgDiscoverer(logger logger.Interface, driver *root.Driver, nvidiaCTKPath string) (Discover, error) {
libCudaPaths, err := cuda.New(
driver.Libraries(),
).Locate(".*.*")
libRoot, err := driver.LibraryRoot()
if err != nil {
return nil, fmt.Errorf("failed to locate libcuda.so: %v", err)
}
libcudaPath := libCudaPaths[0]
version := strings.TrimPrefix(filepath.Base(libcudaPath), "libcuda.so.")
if version == "" {
return nil, fmt.Errorf("failed to determine libcuda.so version from path: %q", libcudaPath)
return nil, fmt.Errorf("failed to determine driver library root: %w", err)
}
version, err := driver.Version()
if err != nil {
return nil, fmt.Errorf("failed to determine driver version: %w", err)
}
libRoot := filepath.Dir(libcudaPath)
xorgLibs := NewMounts(
logger,
lookup.NewFileLocator(

View File

@ -43,3 +43,9 @@ func WithConfigSearchPaths(paths ...string) Option {
d.configSearchPaths = paths
}
}
func WithVersion(version string) Option {
return func(d *Driver) {
d.version = version
}
}

View File

@ -17,8 +17,11 @@
package root
import (
"fmt"
"os"
"path/filepath"
"strings"
"sync"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
@ -26,6 +29,7 @@ import (
// Driver represents a filesystem in which a set of drivers or devices is defined.
type Driver struct {
sync.Mutex
logger logger.Interface
// Root represents the root from the perspective of the driver libraries and binaries.
Root string
@ -33,6 +37,10 @@ type Driver struct {
librarySearchPaths []string
// configSearchPaths specified explicit search paths for discovering driver config files.
configSearchPaths []string
// version stores the driver version. This can be specified at construction or cached on subsequent calls.
version string
// libraryRoot stores the absolute path where the driver libraries (libcuda.so.<VERSION>) can be found.
libraryRoot string
}
// New creates a new Driver root using the specified options.
@ -80,6 +88,62 @@ func (r *Driver) configSearchOptions() []lookup.Option {
}
}
// Version returns the driver version as a string.
func (r *Driver) Version() (string, error) {
r.Lock()
defer r.Unlock()
if r.version != "" {
return r.version, nil
}
libcudaPath, err := r.libcudaPath()
if err != nil {
return "", fmt.Errorf("failed to locate libcuda.so: %v", err)
}
version := strings.TrimPrefix(filepath.Base(libcudaPath), "libcuda.so.")
if version == "" {
return "", fmt.Errorf("failed to determine libcuda.so version from path: %q", libcudaPath)
}
r.version = version
return r.version, nil
}
// LibraryRoot returns the folder in which the driver libraries can be found.
func (r *Driver) LibraryRoot() (string, error) {
r.Lock()
defer r.Unlock()
if r.libraryRoot != "" {
return r.libraryRoot, nil
}
libcudaPath, err := r.libcudaPath()
if err != nil {
return "", fmt.Errorf("failed to locate libcuda.so: %v", err)
}
r.libraryRoot = filepath.Dir(libcudaPath)
return r.libraryRoot, nil
}
// libcudaPath returns the path to libcuda.so.*.* in the driver root.
func (r *Driver) libcudaPath() (string, error) {
pattern := "libcuda.so.*.*"
locator := r.Libraries()
paths, err := locator.Locate(pattern)
if err != nil {
return "", fmt.Errorf("failed to locate %v: %v", pattern, err)
}
libcudaPath := paths[0]
if len(paths) > 1 {
r.logger.Warningf("Selecting %v out of multiple libcuda.so paths.", libcudaPath, paths)
}
return libcudaPath, nil
}
// normalizeSearchPaths takes a list of paths and normalized these.
// Each of the elements in the list is expanded if it is a path list and the
// resultant list is returned.

View File

@ -23,31 +23,31 @@ import (
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/root"
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
)
// NewFeatureGatedModifier creates the modifiers for optional features.
// These include:
//
// NVIDIA_DOT_SO_SYMLINKS=enabled
// NVIDIA_GDS=enabled
// NVIDIA_MOFED=enabled
// NVIDIA_NVSWITCH=enabled
// NVIDIA_GDRCOPY=enabled
//
// If not devices are selected, no changes are made.
func NewFeatureGatedModifier(logger logger.Interface, cfg *config.Config, image image.CUDA) (oci.SpecModifier, error) {
func NewFeatureGatedModifier(logger logger.Interface, cfg *config.Config, image image.CUDA, driver *root.Driver) (oci.SpecModifier, error) {
if devices := image.DevicesFromEnvvars(visibleDevicesEnvvar); len(devices.List()) == 0 {
logger.Infof("No modification required; no devices requested")
return nil, nil
}
var discoverers []discover.Discover
driverRoot := cfg.NVIDIAContainerCLIConfig.Root
devRoot := cfg.NVIDIAContainerCLIConfig.Root
var discoverers []discover.Discover
if cfg.Features.IsEnabled(config.FeatureGDS, image) {
d, err := discover.NewGDSDiscoverer(logger, driverRoot, devRoot)
d, err := discover.NewGDSDiscoverer(logger, driver.Root, devRoot)
if err != nil {
return nil, fmt.Errorf("failed to construct discoverer for GDS devices: %w", err)
}
@ -78,5 +78,15 @@ func NewFeatureGatedModifier(logger logger.Interface, cfg *config.Config, image
discoverers = append(discoverers, d)
}
if cfg.Features.IsEnabled(config.FeatureDotSoSymlinks, image) {
version, err := driver.Version()
if err != nil {
return nil, fmt.Errorf("failed to get driver version required for .so symlinks: %w", err)
}
d := discover.NewDotSoSymlinksDiscoverer(cfg.NVIDIACTKConfig.Path, version)
discoverers = append(discoverers, d)
}
return NewModifierFromDiscoverer(logger, discover.Merge(discoverers...))
}

View File

@ -88,7 +88,7 @@ func newSpecModifier(logger logger.Interface, cfg *config.Config, ociSpec oci.Sp
return nil, err
}
featureModifier, err := modifier.NewFeatureGatedModifier(logger, cfg, image)
featureModifier, err := modifier.NewFeatureGatedModifier(logger, cfg, image, driver)
if err != nil {
return nil, err
}

View File

@ -66,6 +66,7 @@ func TestFactoryMethod(t *testing.T) {
logger, _ := testlog.NewNullLogger()
driver := root.New(
root.WithDriverRoot("/nvidia/driver/root"),
root.WithVersion("999.88.77"),
)
testCases := []struct {

View File

@ -41,7 +41,7 @@ func (l *nvmllib) newCommonNVMLDiscoverer() (discover.Discover, error) {
l.logger.Warningf("failed to create discoverer for graphics mounts: %v", err)
}
driverFiles, err := NewDriverDiscoverer(l.logger, l.driver, l.nvidiaCTKPath, l.ldconfigPath, l.nvmllib)
driverFiles, err := l.newDriverDiscoverer()
if err != nil {
return nil, fmt.Errorf("failed to create discoverer for driver files: %v", err)
}

View File

@ -32,24 +32,35 @@ import (
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/root"
)
// NewDriverDiscoverer creates a discoverer for the libraries and binaries associated with a driver installation.
// newDriverDiscoverer creates a discoverer for the libraries and binaries associated with a driver installation.
// The supplied NVML Library is used to query the expected driver version.
func NewDriverDiscoverer(logger logger.Interface, driver *root.Driver, nvidiaCTKPath string, ldconfigPath string, nvmllib nvml.Interface) (discover.Discover, error) {
if r := nvmllib.Init(); r != nvml.SUCCESS {
func (l *nvmllib) newDriverDiscoverer() (discover.Discover, error) {
if r := l.nvmllib.Init(); r != nvml.SUCCESS {
return nil, fmt.Errorf("failed to initialize NVML: %v", r)
}
defer func() {
if r := nvmllib.Shutdown(); r != nvml.SUCCESS {
logger.Warningf("failed to shutdown NVML: %v", r)
if r := l.nvmllib.Shutdown(); r != nvml.SUCCESS {
l.logger.Warningf("failed to shutdown NVML: %v", r)
}
}()
version, r := nvmllib.SystemGetDriverVersion()
version, r := l.nvmllib.SystemGetDriverVersion()
if r != nvml.SUCCESS {
return nil, fmt.Errorf("failed to determine driver version: %v", r)
}
return newDriverVersionDiscoverer(logger, driver, nvidiaCTKPath, ldconfigPath, version)
driver, err := newDriverVersionDiscoverer(l.logger, l.driver, l.nvidiaCTKPath, l.ldconfigPath, version)
if err != nil {
return nil, fmt.Errorf("failed to create discoverer: %w", err)
}
discoverers := []discover.Discover{driver}
if !l.noDotSoSymlinks {
createDotSoSymlinksHook := discover.NewDotSoSymlinksDiscoverer(l.nvidiaCTKPath, version)
discoverers = append(discoverers, createDotSoSymlinksHook)
}
return discover.Merge(discoverers...), nil
}
func newDriverVersionDiscoverer(logger logger.Interface, driver *root.Driver, nvidiaCTKPath, ldconfigPath, version string) (discover.Discover, error) {

View File

@ -63,6 +63,8 @@ type nvcdilib struct {
infolib info.Interface
mergedDeviceOptions []transform.MergedDeviceOption
noDotSoSymlinks bool
}
// New creates a new nvcdi library

View File

@ -140,3 +140,10 @@ func WithLibrarySearchPaths(paths []string) Option {
o.librarySearchPaths = paths
}
}
// WithNoDotSoSymlinks sets the no-dot-so-symlinks feature.
func WithNoDotSoSymlinks(noDotSoSymlinks bool) Option {
return func(o *nvcdilib) {
o.noDotSoSymlinks = noDotSoSymlinks
}
}