mirror of
https://github.com/NVIDIA/nvidia-container-toolkit
synced 2025-04-22 07:05:06 +00:00
Merge pull request #948 from elezar/add-compat-lib-hook
Add CUDA forward compatibility hook
This commit is contained in:
commit
f5680dd0cd
@ -21,6 +21,7 @@ import (
|
|||||||
|
|
||||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-cdi-hook/chmod"
|
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-cdi-hook/chmod"
|
||||||
symlinks "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-cdi-hook/create-symlinks"
|
symlinks "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-cdi-hook/create-symlinks"
|
||||||
|
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-cdi-hook/cudacompat"
|
||||||
ldcache "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-cdi-hook/update-ldcache"
|
ldcache "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-cdi-hook/update-ldcache"
|
||||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||||
)
|
)
|
||||||
@ -32,5 +33,6 @@ func New(logger logger.Interface) []*cli.Command {
|
|||||||
ldcache.NewCommand(logger),
|
ldcache.NewCommand(logger),
|
||||||
symlinks.NewCommand(logger),
|
symlinks.NewCommand(logger),
|
||||||
chmod.NewCommand(logger),
|
chmod.NewCommand(logger),
|
||||||
|
cudacompat.NewCommand(logger),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
76
cmd/nvidia-cdi-hook/cudacompat/container-root.go
Normal file
76
cmd/nvidia-cdi-hook/cudacompat/container-root.go
Normal file
@ -0,0 +1,76 @@
|
|||||||
|
/**
|
||||||
|
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
**/
|
||||||
|
|
||||||
|
package cudacompat
|
||||||
|
|
||||||
|
import (
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
|
||||||
|
"github.com/moby/sys/symlink"
|
||||||
|
)
|
||||||
|
|
||||||
|
// A containerRoot represents the root filesystem of a container.
|
||||||
|
type containerRoot string
|
||||||
|
|
||||||
|
// hasPath checks whether the specified path exists in the root.
|
||||||
|
func (r containerRoot) hasPath(path string) bool {
|
||||||
|
resolved, err := r.resolve(path)
|
||||||
|
if err != nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if _, err := os.Stat(resolved); err != nil && os.IsNotExist(err) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// globFiles matches the specified pattern in the root.
|
||||||
|
// The files that match must be regular files.
|
||||||
|
func (r containerRoot) globFiles(pattern string) ([]string, error) {
|
||||||
|
patternPath, err := r.resolve(pattern)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
matches, err := filepath.Glob(patternPath)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
var files []string
|
||||||
|
for _, match := range matches {
|
||||||
|
info, err := os.Lstat(match)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
// Ignore symlinks.
|
||||||
|
if info.Mode()&os.ModeSymlink != 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
// Ignore directories.
|
||||||
|
if info.IsDir() {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
files = append(files, match)
|
||||||
|
}
|
||||||
|
return files, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// resolve returns the absolute path including root path.
|
||||||
|
// Symlinks are resolved, but are guaranteed to resolve in the root.
|
||||||
|
func (r containerRoot) resolve(path string) (string, error) {
|
||||||
|
absolute := filepath.Clean(filepath.Join(string(r), path))
|
||||||
|
return symlink.FollowSymlinkInScope(absolute, string(r))
|
||||||
|
}
|
221
cmd/nvidia-cdi-hook/cudacompat/cudacompat.go
Normal file
221
cmd/nvidia-cdi-hook/cudacompat/cudacompat.go
Normal file
@ -0,0 +1,221 @@
|
|||||||
|
/**
|
||||||
|
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
**/
|
||||||
|
|
||||||
|
package cudacompat
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/urfave/cli/v2"
|
||||||
|
|
||||||
|
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||||
|
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
cudaCompatPath = "/usr/local/cuda/compat"
|
||||||
|
// cudaCompatLdsoconfdFilenamePattern specifies the pattern for the filename
|
||||||
|
// in ld.so.conf.d that includes a reference to the CUDA compat path.
|
||||||
|
// The 00-compat prefix is chosen to ensure that these libraries have a
|
||||||
|
// higher precedence than other libraries on the system.
|
||||||
|
cudaCompatLdsoconfdFilenamePattern = "00-compat-*.conf"
|
||||||
|
)
|
||||||
|
|
||||||
|
type command struct {
|
||||||
|
logger logger.Interface
|
||||||
|
}
|
||||||
|
|
||||||
|
type options struct {
|
||||||
|
hostDriverVersion string
|
||||||
|
containerSpec string
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewCommand constructs a cuda-compat command with the specified logger
|
||||||
|
func NewCommand(logger logger.Interface) *cli.Command {
|
||||||
|
c := command{
|
||||||
|
logger: logger,
|
||||||
|
}
|
||||||
|
return c.build()
|
||||||
|
}
|
||||||
|
|
||||||
|
// build the enable-cuda-compat command
|
||||||
|
func (m command) build() *cli.Command {
|
||||||
|
cfg := options{}
|
||||||
|
|
||||||
|
// Create the 'enable-cuda-compat' command
|
||||||
|
c := cli.Command{
|
||||||
|
Name: "enable-cuda-compat",
|
||||||
|
Usage: "This hook ensures that the folder containing the CUDA compat libraries is added to the ldconfig search path if required.",
|
||||||
|
Before: func(c *cli.Context) error {
|
||||||
|
return m.validateFlags(c, &cfg)
|
||||||
|
},
|
||||||
|
Action: func(c *cli.Context) error {
|
||||||
|
return m.run(c, &cfg)
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
c.Flags = []cli.Flag{
|
||||||
|
&cli.StringFlag{
|
||||||
|
Name: "host-driver-version",
|
||||||
|
Usage: "Specify the host driver version. If the CUDA compat libraries detected in the container do not have a higher MAJOR version, the hook is a no-op.",
|
||||||
|
Destination: &cfg.hostDriverVersion,
|
||||||
|
},
|
||||||
|
&cli.StringFlag{
|
||||||
|
Name: "container-spec",
|
||||||
|
Hidden: true,
|
||||||
|
Category: "testing-only",
|
||||||
|
Usage: "Specify the path to the OCI container spec. If empty or '-' the spec will be read from STDIN",
|
||||||
|
Destination: &cfg.containerSpec,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
return &c
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m command) validateFlags(_ *cli.Context, cfg *options) error {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m command) run(_ *cli.Context, cfg *options) error {
|
||||||
|
if cfg.hostDriverVersion == "" {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
s, err := oci.LoadContainerState(cfg.containerSpec)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to load container state: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
containerRootDir, err := s.GetContainerRoot()
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to determined container root: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
containerForwardCompatDir, err := m.getContainerForwardCompatDir(containerRoot(containerRootDir), cfg.hostDriverVersion)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to get container forward compat directory: %w", err)
|
||||||
|
}
|
||||||
|
if containerForwardCompatDir == "" {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return m.createLdsoconfdFile(containerRoot(containerRootDir), cudaCompatLdsoconfdFilenamePattern, containerForwardCompatDir)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m command) getContainerForwardCompatDir(containerRoot containerRoot, hostDriverVersion string) (string, error) {
|
||||||
|
if hostDriverVersion == "" {
|
||||||
|
m.logger.Debugf("Host driver version not specified")
|
||||||
|
return "", nil
|
||||||
|
}
|
||||||
|
if !containerRoot.hasPath(cudaCompatPath) {
|
||||||
|
m.logger.Debugf("No CUDA forward compatibility libraries directory in container")
|
||||||
|
return "", nil
|
||||||
|
}
|
||||||
|
if !containerRoot.hasPath("/etc/ld.so.cache") {
|
||||||
|
m.logger.Debugf("The container does not have an LDCache")
|
||||||
|
return "", nil
|
||||||
|
}
|
||||||
|
|
||||||
|
libs, err := containerRoot.globFiles(filepath.Join(cudaCompatPath, "libcuda.so.*.*"))
|
||||||
|
if err != nil {
|
||||||
|
m.logger.Warningf("Failed to find CUDA compat library: %w", err)
|
||||||
|
return "", nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(libs) == 0 {
|
||||||
|
m.logger.Debugf("No CUDA forward compatibility libraries container")
|
||||||
|
return "", nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(libs) != 1 {
|
||||||
|
m.logger.Warningf("Unexpected number of CUDA compat libraries in container: %v", libs)
|
||||||
|
return "", nil
|
||||||
|
}
|
||||||
|
|
||||||
|
compatDriverVersion := strings.TrimPrefix(filepath.Base(libs[0]), "libcuda.so.")
|
||||||
|
compatMajor, err := extractMajorVersion(compatDriverVersion)
|
||||||
|
if err != nil {
|
||||||
|
return "", fmt.Errorf("failed to extract major version from %q: %v", compatDriverVersion, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
driverMajor, err := extractMajorVersion(hostDriverVersion)
|
||||||
|
if err != nil {
|
||||||
|
return "", fmt.Errorf("failed to extract major version from %q: %v", hostDriverVersion, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if driverMajor >= compatMajor {
|
||||||
|
m.logger.Debugf("Compat major version is not greater than the host driver major version (%v >= %v)", hostDriverVersion, compatDriverVersion)
|
||||||
|
return "", nil
|
||||||
|
}
|
||||||
|
|
||||||
|
resolvedCompatDir := strings.TrimPrefix(filepath.Dir(libs[0]), string(containerRoot))
|
||||||
|
return resolvedCompatDir, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// createLdsoconfdFile creates a file at /etc/ld.so.conf.d/ in the specified root.
|
||||||
|
// The file is created at /etc/ld.so.conf.d/{{ .pattern }} using `CreateTemp` and
|
||||||
|
// contains the specified directories on each line.
|
||||||
|
func (m command) createLdsoconfdFile(in containerRoot, pattern string, dirs ...string) error {
|
||||||
|
if len(dirs) == 0 {
|
||||||
|
m.logger.Debugf("No directories to add to /etc/ld.so.conf")
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
ldsoconfdDir, err := in.resolve("/etc/ld.so.conf.d")
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if err := os.MkdirAll(ldsoconfdDir, 0755); err != nil {
|
||||||
|
return fmt.Errorf("failed to create ld.so.conf.d: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
configFile, err := os.CreateTemp(ldsoconfdDir, pattern)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to create config file: %w", err)
|
||||||
|
}
|
||||||
|
defer configFile.Close()
|
||||||
|
|
||||||
|
m.logger.Debugf("Adding directories %v to %v", dirs, configFile.Name())
|
||||||
|
|
||||||
|
added := make(map[string]bool)
|
||||||
|
for _, dir := range dirs {
|
||||||
|
if added[dir] {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
_, err = configFile.WriteString(fmt.Sprintf("%s\n", dir))
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to update config file: %w", err)
|
||||||
|
}
|
||||||
|
added[dir] = true
|
||||||
|
}
|
||||||
|
|
||||||
|
// The created file needs to be world readable for the cases where the container is run as a non-root user.
|
||||||
|
if err := configFile.Chmod(0644); err != nil {
|
||||||
|
return fmt.Errorf("failed to chmod config file: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// extractMajorVersion parses a version string and returns the major version as an int.
|
||||||
|
func extractMajorVersion(version string) (int, error) {
|
||||||
|
majorString := strings.SplitN(version, ".", 2)[0]
|
||||||
|
return strconv.Atoi(majorString)
|
||||||
|
}
|
182
cmd/nvidia-cdi-hook/cudacompat/cudacompat_test.go
Normal file
182
cmd/nvidia-cdi-hook/cudacompat/cudacompat_test.go
Normal file
@ -0,0 +1,182 @@
|
|||||||
|
/*
|
||||||
|
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package cudacompat
|
||||||
|
|
||||||
|
import (
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
testlog "github.com/sirupsen/logrus/hooks/test"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestCompatLibs(t *testing.T) {
|
||||||
|
logger, _ := testlog.NewNullLogger()
|
||||||
|
|
||||||
|
testCases := []struct {
|
||||||
|
description string
|
||||||
|
contents map[string]string
|
||||||
|
hostDriverVersion string
|
||||||
|
expectedContainerForwardCompatDir string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
description: "empty root",
|
||||||
|
hostDriverVersion: "222.55.66",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "compat lib is newer; no ldcache",
|
||||||
|
contents: map[string]string{
|
||||||
|
"/usr/local/cuda/compat/libcuda.so.333.88.99": "",
|
||||||
|
},
|
||||||
|
hostDriverVersion: "222.55.66",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "compat lib is newer; ldcache",
|
||||||
|
contents: map[string]string{
|
||||||
|
"/etc/ld.so.cache": "",
|
||||||
|
"/usr/local/cuda/compat/libcuda.so.333.88.99": "",
|
||||||
|
},
|
||||||
|
hostDriverVersion: "222.55.66",
|
||||||
|
expectedContainerForwardCompatDir: "/usr/local/cuda/compat",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "compat lib is older; ldcache",
|
||||||
|
contents: map[string]string{
|
||||||
|
"/etc/ld.so.cache": "",
|
||||||
|
"/usr/local/cuda/compat/libcuda.so.111.88.99": "",
|
||||||
|
},
|
||||||
|
hostDriverVersion: "222.55.66",
|
||||||
|
expectedContainerForwardCompatDir: "",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "compat lib has same major version; ldcache",
|
||||||
|
contents: map[string]string{
|
||||||
|
"/etc/ld.so.cache": "",
|
||||||
|
"/usr/local/cuda/compat/libcuda.so.222.88.99": "",
|
||||||
|
},
|
||||||
|
hostDriverVersion: "222.55.66",
|
||||||
|
expectedContainerForwardCompatDir: "",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "numeric comparison is used; ldcache",
|
||||||
|
contents: map[string]string{
|
||||||
|
"/etc/ld.so.cache": "",
|
||||||
|
"/usr/local/cuda/compat/libcuda.so.222.88.99": "",
|
||||||
|
},
|
||||||
|
hostDriverVersion: "99.55.66",
|
||||||
|
expectedContainerForwardCompatDir: "/usr/local/cuda/compat",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "driver version empty; ldcache",
|
||||||
|
contents: map[string]string{
|
||||||
|
"/etc/ld.so.cache": "",
|
||||||
|
"/usr/local/cuda/compat/libcuda.so.222.88.99": "",
|
||||||
|
},
|
||||||
|
hostDriverVersion: "",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "symlinks are followed",
|
||||||
|
contents: map[string]string{
|
||||||
|
"/etc/ld.so.cache": "",
|
||||||
|
"/etc/alternatives/cuda/compat/libcuda.so.333.88.99": "",
|
||||||
|
"/usr/local/cuda": "symlink=/etc/alternatives/cuda",
|
||||||
|
},
|
||||||
|
hostDriverVersion: "222.55.66",
|
||||||
|
expectedContainerForwardCompatDir: "/etc/alternatives/cuda/compat",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "symlinks stay in container",
|
||||||
|
contents: map[string]string{
|
||||||
|
"/etc/ld.so.cache": "",
|
||||||
|
"/compat/libcuda.so.333.88.99": "",
|
||||||
|
"/usr/local/cuda": "symlink=../../../../../../",
|
||||||
|
},
|
||||||
|
hostDriverVersion: "222.55.66",
|
||||||
|
expectedContainerForwardCompatDir: "/compat",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tc := range testCases {
|
||||||
|
t.Run(tc.description, func(t *testing.T) {
|
||||||
|
containerRootDir := t.TempDir()
|
||||||
|
for name, contents := range tc.contents {
|
||||||
|
target := filepath.Join(containerRootDir, name)
|
||||||
|
require.NoError(t, os.MkdirAll(filepath.Dir(target), 0755))
|
||||||
|
|
||||||
|
if strings.HasPrefix(contents, "symlink=") {
|
||||||
|
require.NoError(t, os.Symlink(strings.TrimPrefix(contents, "symlink="), target))
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
require.NoError(t, os.WriteFile(target, []byte(contents), 0600))
|
||||||
|
}
|
||||||
|
|
||||||
|
c := command{
|
||||||
|
logger: logger,
|
||||||
|
}
|
||||||
|
containerForwardCompatDir, err := c.getContainerForwardCompatDir(containerRoot(containerRootDir), tc.hostDriverVersion)
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.EqualValues(t, tc.expectedContainerForwardCompatDir, containerForwardCompatDir)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestUpdateLdconfig(t *testing.T) {
|
||||||
|
logger, _ := testlog.NewNullLogger()
|
||||||
|
testCases := []struct {
|
||||||
|
description string
|
||||||
|
folders []string
|
||||||
|
expectedContents string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
description: "no folders; have no contents",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "single folder is added",
|
||||||
|
folders: []string{"/usr/local/cuda/compat"},
|
||||||
|
expectedContents: "/usr/local/cuda/compat\n",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tc := range testCases {
|
||||||
|
t.Run(tc.description, func(t *testing.T) {
|
||||||
|
containerRootDir := t.TempDir()
|
||||||
|
c := command{
|
||||||
|
logger: logger,
|
||||||
|
}
|
||||||
|
err := c.createLdsoconfdFile(containerRoot(containerRootDir), cudaCompatLdsoconfdFilenamePattern, tc.folders...)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
matches, err := filepath.Glob(filepath.Join(containerRootDir, "/etc/ld.so.conf.d/00-compat-*.conf"))
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
if tc.expectedContents == "" {
|
||||||
|
require.Empty(t, matches)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
require.Len(t, matches, 1)
|
||||||
|
contents, err := os.ReadFile(matches[0])
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
require.EqualValues(t, tc.expectedContents, string(contents))
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -25,6 +25,12 @@ type features struct {
|
|||||||
// If this feature flag is not set to 'true' only host-rooted config paths
|
// If this feature flag is not set to 'true' only host-rooted config paths
|
||||||
// (i.e. paths starting with an '@' are considered valid)
|
// (i.e. paths starting with an '@' are considered valid)
|
||||||
AllowLDConfigFromContainer *feature `toml:"allow-ldconfig-from-container,omitempty"`
|
AllowLDConfigFromContainer *feature `toml:"allow-ldconfig-from-container,omitempty"`
|
||||||
|
// DisableCUDACompatLibHook, when enabled skips the injection of a specific
|
||||||
|
// hook to process CUDA compatibility libraries.
|
||||||
|
//
|
||||||
|
// Note: Since this mechanism replaces the logic in the `nvidia-container-cli`,
|
||||||
|
// toggling this feature has no effect if `allow-cuda-compat-libs-from-container` is enabled.
|
||||||
|
DisableCUDACompatLibHook *feature `toml:"disable-cuda-compat-lib-hook,omitempty"`
|
||||||
// DisableImexChannelCreation ensures that the implicit creation of
|
// DisableImexChannelCreation ensures that the implicit creation of
|
||||||
// requested IMEX channels is skipped when invoking the nvidia-container-cli.
|
// requested IMEX channels is skipped when invoking the nvidia-container-cli.
|
||||||
DisableImexChannelCreation *feature `toml:"disable-imex-channel-creation,omitempty"`
|
DisableImexChannelCreation *feature `toml:"disable-imex-channel-creation,omitempty"`
|
||||||
|
24
internal/discover/compat_libs.go
Normal file
24
internal/discover/compat_libs.go
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
package discover
|
||||||
|
|
||||||
|
import (
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||||
|
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/root"
|
||||||
|
)
|
||||||
|
|
||||||
|
// NewCUDACompatHookDiscoverer creates a discoverer for a enable-cuda-compat hook.
|
||||||
|
// This hook is responsible for setting up CUDA compatibility in the container and depends on the host driver version.
|
||||||
|
func NewCUDACompatHookDiscoverer(logger logger.Interface, nvidiaCDIHookPath string, driver *root.Driver) Discover {
|
||||||
|
_, cudaVersionPattern := getCUDALibRootAndVersionPattern(logger, driver)
|
||||||
|
var args []string
|
||||||
|
if !strings.Contains(cudaVersionPattern, "*") {
|
||||||
|
args = append(args, "--host-driver-version="+cudaVersionPattern)
|
||||||
|
}
|
||||||
|
|
||||||
|
return CreateNvidiaCDIHook(
|
||||||
|
nvidiaCDIHookPath,
|
||||||
|
"enable-cuda-compat",
|
||||||
|
args...,
|
||||||
|
)
|
||||||
|
}
|
@ -23,6 +23,7 @@ import (
|
|||||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
|
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
|
||||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
|
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
|
||||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||||
|
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/root"
|
||||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
|
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -35,7 +36,7 @@ import (
|
|||||||
// NVIDIA_GDRCOPY=enabled
|
// NVIDIA_GDRCOPY=enabled
|
||||||
//
|
//
|
||||||
// If not devices are selected, no changes are made.
|
// If not devices are selected, no changes are made.
|
||||||
func NewFeatureGatedModifier(logger logger.Interface, cfg *config.Config, image image.CUDA) (oci.SpecModifier, error) {
|
func NewFeatureGatedModifier(logger logger.Interface, cfg *config.Config, image image.CUDA, driver *root.Driver) (oci.SpecModifier, error) {
|
||||||
if devices := image.VisibleDevicesFromEnvVar(); len(devices) == 0 {
|
if devices := image.VisibleDevicesFromEnvVar(); len(devices) == 0 {
|
||||||
logger.Infof("No modification required; no devices requested")
|
logger.Infof("No modification required; no devices requested")
|
||||||
return nil, nil
|
return nil, nil
|
||||||
@ -78,5 +79,24 @@ func NewFeatureGatedModifier(logger logger.Interface, cfg *config.Config, image
|
|||||||
discoverers = append(discoverers, d)
|
discoverers = append(discoverers, d)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if !cfg.Features.AllowCUDACompatLibsFromContainer.IsEnabled() && !cfg.Features.DisableCUDACompatLibHook.IsEnabled() {
|
||||||
|
compatLibHookDiscoverer := discover.NewCUDACompatHookDiscoverer(logger, cfg.NVIDIACTKConfig.Path, driver)
|
||||||
|
discoverers = append(discoverers, compatLibHookDiscoverer)
|
||||||
|
// For legacy mode, we also need to inject a hook to update the LDCache
|
||||||
|
// after we have modifed the configuration.
|
||||||
|
if cfg.NVIDIAContainerRuntimeConfig.Mode == "legacy" {
|
||||||
|
ldcacheUpdateHookDiscoverer, err := discover.NewLDCacheUpdateHook(
|
||||||
|
logger,
|
||||||
|
discover.None{},
|
||||||
|
cfg.NVIDIACTKConfig.Path,
|
||||||
|
"",
|
||||||
|
)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to construct ldcache update discoverer: %w", err)
|
||||||
|
}
|
||||||
|
discoverers = append(discoverers, ldcacheUpdateHookDiscoverer)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return NewModifierFromDiscoverer(logger, discover.Merge(discoverers...))
|
return NewModifierFromDiscoverer(logger, discover.Merge(discoverers...))
|
||||||
}
|
}
|
||||||
|
@ -75,6 +75,8 @@ func newSpecModifier(logger logger.Interface, cfg *config.Config, ociSpec oci.Sp
|
|||||||
}
|
}
|
||||||
|
|
||||||
mode := info.ResolveAutoMode(logger, cfg.NVIDIAContainerRuntimeConfig.Mode, image)
|
mode := info.ResolveAutoMode(logger, cfg.NVIDIAContainerRuntimeConfig.Mode, image)
|
||||||
|
// We update the mode here so that we can continue passing just the config to other functions.
|
||||||
|
cfg.NVIDIAContainerRuntimeConfig.Mode = mode
|
||||||
modeModifier, err := newModeModifier(logger, mode, cfg, ociSpec, image)
|
modeModifier, err := newModeModifier(logger, mode, cfg, ociSpec, image)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
@ -94,7 +96,7 @@ func newSpecModifier(logger logger.Interface, cfg *config.Config, ociSpec oci.Sp
|
|||||||
}
|
}
|
||||||
modifiers = append(modifiers, graphicsModifier)
|
modifiers = append(modifiers, graphicsModifier)
|
||||||
case "feature-gated":
|
case "feature-gated":
|
||||||
featureGatedModifier, err := modifier.NewFeatureGatedModifier(logger, cfg, image)
|
featureGatedModifier, err := modifier.NewFeatureGatedModifier(logger, cfg, image, driver)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@ -126,8 +128,8 @@ func supportedModifierTypes(mode string) []string {
|
|||||||
return []string{"nvidia-hook-remover", "mode"}
|
return []string{"nvidia-hook-remover", "mode"}
|
||||||
case "csv":
|
case "csv":
|
||||||
// For CSV mode we support mode and feature-gated modification.
|
// For CSV mode we support mode and feature-gated modification.
|
||||||
return []string{"nvidia-hook-remover", "mode", "feature-gated"}
|
return []string{"nvidia-hook-remover", "feature-gated", "mode"}
|
||||||
default:
|
default:
|
||||||
return []string{"mode", "graphics", "feature-gated"}
|
return []string{"feature-gated", "graphics", "mode"}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -97,6 +97,8 @@ func NewDriverLibraryDiscoverer(logger logger.Interface, driver *root.Driver, nv
|
|||||||
libraryPaths,
|
libraryPaths,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// TODO: The following should use the version directly.
|
||||||
|
cudaCompatLibHookDiscoverer := discover.NewCUDACompatHookDiscoverer(logger, nvidiaCDIHookPath, driver)
|
||||||
updateLDCache, _ := discover.NewLDCacheUpdateHook(logger, libraries, nvidiaCDIHookPath, ldconfigPath)
|
updateLDCache, _ := discover.NewLDCacheUpdateHook(logger, libraries, nvidiaCDIHookPath, ldconfigPath)
|
||||||
|
|
||||||
d := discover.Merge(
|
d := discover.Merge(
|
||||||
@ -105,6 +107,7 @@ func NewDriverLibraryDiscoverer(logger logger.Interface, driver *root.Driver, nv
|
|||||||
version,
|
version,
|
||||||
nvidiaCDIHookPath,
|
nvidiaCDIHookPath,
|
||||||
),
|
),
|
||||||
|
cudaCompatLibHookDiscoverer,
|
||||||
updateLDCache,
|
updateLDCache,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user