diff --git a/CHANGELOG.md b/CHANGELOG.md index 6e0eab38..3b1a34c1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ * Add transformers to deduplicate and simplify CDI specifications. * Generate a simplified CDI specification by default. This means that entities in the common edits in a spec are not included in device definitions. * Also return an error from the nvcdi.New constructor instead of panicing. +* Detect XOrg libraries for injection and CDI spec generation. * [libnvidia-container] Fix segmentation fault when RPC initialization fails. * [libnvidia-container] Build centos variants of the NVIDIA Container Library with static libtirpc v1.3.2. diff --git a/internal/discover/graphics.go b/internal/discover/graphics.go index acba35bf..7d533c0e 100644 --- a/internal/discover/graphics.go +++ b/internal/discover/graphics.go @@ -20,11 +20,13 @@ import ( "fmt" "os" "path/filepath" + "strings" "github.com/NVIDIA/nvidia-container-toolkit/internal/config/image" "github.com/NVIDIA/nvidia-container-toolkit/internal/info/drm" "github.com/NVIDIA/nvidia-container-toolkit/internal/info/proc" "github.com/NVIDIA/nvidia-container-toolkit/internal/lookup" + "github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/cuda" "github.com/sirupsen/logrus" ) @@ -44,9 +46,15 @@ func NewGraphicsDiscoverer(logger *logrus.Logger, devices image.VisibleDevices, drmByPathSymlinks := newCreateDRMByPathSymlinks(logger, drmDeviceNodes, cfg) + xorg, err := newXorgDiscoverer(logger, driverRoot, cfg.NvidiaCTKPath) + if err != nil { + return nil, fmt.Errorf("failed to create Xorg discoverer: %v", err) + } + discover := Merge( Merge(drmDeviceNodes, drmByPathSymlinks), mounts, + xorg, ) return discover, nil @@ -243,6 +251,112 @@ func newDRMDeviceFilter(logger *logrus.Logger, devices image.VisibleDevices, dri return filter, nil } +type xorgHooks struct { + libraries Discover + driverVersion string + nvidiaCTKPath string +} + +var _ Discover = (*xorgHooks)(nil) + +func newXorgDiscoverer(logger *logrus.Logger, driverRoot string, nvidiaCTKPath string) (Discover, error) { + libCudaPaths, err := cuda.New( + cuda.WithLogger(logger), + cuda.WithDriverRoot(driverRoot), + ).Locate(".*.*.*") + if err != nil { + return nil, fmt.Errorf("failed to locate libcuda.so: %v", err) + } + libcudaPath := libCudaPaths[0] + + version := strings.TrimPrefix(filepath.Base(libcudaPath), "libcuda.so.") + if version == "" { + return nil, fmt.Errorf("failed to determine libcuda.so version from path: %q", libcudaPath) + } + + libRoot := filepath.Dir(libcudaPath) + xorgLibs := NewMounts( + logger, + lookup.NewFileLocator( + lookup.WithLogger(logger), + lookup.WithRoot(driverRoot), + lookup.WithSearchPaths(libRoot, "/usr/lib/x86_64-linux-gnu"), + lookup.WithCount(1), + ), + driverRoot, + []string{ + "nvidia/xorg/nvidia_drv.so", + fmt.Sprintf("nvidia/xorg/libglxserver_nvidia.so.%s", version), + }, + ) + xorgHooks := xorgHooks{ + libraries: xorgLibs, + driverVersion: version, + nvidiaCTKPath: FindNvidiaCTK(logger, nvidiaCTKPath), + } + + xorgConfg := NewMounts( + logger, + lookup.NewFileLocator( + lookup.WithLogger(logger), + lookup.WithRoot(driverRoot), + lookup.WithSearchPaths("/usr/share"), + ), + driverRoot, + []string{"X11/xorg.conf.d/10-nvidia.conf"}, + ) + + d := Merge( + xorgLibs, + xorgConfg, + xorgHooks, + ) + + return d, nil +} + +// Devices returns no devices for Xorg +func (m xorgHooks) Devices() ([]Device, error) { + return nil, nil +} + +// Hooks returns a hook to create symlinks for Xorg libraries +func (m xorgHooks) Hooks() ([]Hook, error) { + mounts, err := m.libraries.Mounts() + if err != nil { + return nil, fmt.Errorf("failed to get mounts: %v", err) + } + if len(mounts) == 0 { + return nil, nil + } + + var target string + for _, mount := range mounts { + filename := filepath.Base(mount.HostPath) + if filename == "libglxserver_nvidia.so."+m.driverVersion { + target = mount.Path + } + } + + if target == "" { + return nil, nil + } + + link := strings.TrimSuffix(target, "."+m.driverVersion) + links := []string{fmt.Sprintf("%s::%s", filepath.Base(target), link)} + symlinkHook := CreateCreateSymlinkHook( + m.nvidiaCTKPath, + links, + ) + + return symlinkHook.Hooks() +} + +// Mounts returns the libraries required for Xorg +func (m xorgHooks) Mounts() ([]Mount, error) { + return nil, nil +} + // selectDeviceByPath is a filter that allows devices to be selected by the path type selectDeviceByPath map[string]bool diff --git a/internal/lookup/cuda/cuda.go b/internal/lookup/cuda/cuda.go new file mode 100644 index 00000000..98485a5a --- /dev/null +++ b/internal/lookup/cuda/cuda.go @@ -0,0 +1,102 @@ +/** +# Copyright (c) NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package cuda + +import ( + "path/filepath" + + "github.com/NVIDIA/nvidia-container-toolkit/internal/lookup" + "github.com/sirupsen/logrus" +) + +type cudaLocator struct { + logger *logrus.Logger + driverRoot string +} + +// Options is a function that configures a cudaLocator. +type Options func(*cudaLocator) + +// WithLogger is an option that configures the logger used by the locator. +func WithLogger(logger *logrus.Logger) Options { + return func(c *cudaLocator) { + c.logger = logger + } +} + +// WithDriverRoot is an option that configures the driver root used by the locator. +func WithDriverRoot(driverRoot string) Options { + return func(c *cudaLocator) { + c.driverRoot = driverRoot + } +} + +// New creates a new CUDA library locator. +func New(opts ...Options) lookup.Locator { + c := &cudaLocator{} + for _, opt := range opts { + opt(c) + } + + if c.logger == nil { + c.logger = logrus.StandardLogger() + } + if c.driverRoot == "" { + c.driverRoot = "/" + } + + return c +} + +// Locate returns the path to the libcuda.so.RMVERSION file. +// libcuda.so is prefixed to the specified pattern. +func (l *cudaLocator) Locate(pattern string) ([]string, error) { + ldcacheLocator, err := lookup.NewLibraryLocator( + l.logger, + l.driverRoot, + ) + if err != nil { + l.logger.Debugf("Failed to create LDCache locator: %v", err) + } + + fullPattern := "libcuda.so" + pattern + + candidates, err := ldcacheLocator.Locate("libcuda.so") + if err == nil { + for _, c := range candidates { + if match, err := filepath.Match(fullPattern, filepath.Base(c)); err != nil || !match { + l.logger.Debugf("Skipping non-matching candidate %v: %v", c, err) + continue + } + return []string{c}, nil + } + } + l.logger.Debugf("Could not locate %q in LDCache: Checking predefined library paths.", pattern) + + pathLocator := lookup.NewFileLocator( + lookup.WithLogger(l.logger), + lookup.WithRoot(l.driverRoot), + lookup.WithSearchPaths( + "/usr/lib64", + "/usr/lib/x86_64-linux-gnu", + "/usr/lib/aarch64-linux-gnu", + ), + lookup.WithCount(1), + ) + + return pathLocator.Locate(fullPattern) +} diff --git a/pkg/nvcdi/driver-nvml.go b/pkg/nvcdi/driver-nvml.go index f623ae30..408da55a 100644 --- a/pkg/nvcdi/driver-nvml.go +++ b/pkg/nvcdi/driver-nvml.go @@ -23,6 +23,7 @@ import ( "github.com/NVIDIA/nvidia-container-toolkit/internal/discover" "github.com/NVIDIA/nvidia-container-toolkit/internal/lookup" + "github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/cuda" "github.com/sirupsen/logrus" "gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvml" ) @@ -135,11 +136,10 @@ func NewDriverBinariesDiscoverer(logger *logrus.Logger, driverRoot string) disco func getVersionLibs(logger *logrus.Logger, driverRoot string, version string) ([]string, error) { logger.Infof("Using driver version %v", version) - l := cudaLocator{ - logger: logger, - driverRoot: driverRoot, - } - libCudaPaths, err := l.Locate("libcuda.so." + version) + libCudaPaths, err := cuda.New( + cuda.WithLogger(logger), + cuda.WithDriverRoot(driverRoot), + ).Locate("." + version) if err != nil { return nil, fmt.Errorf("failed to locate libcuda.so.%v: %v", version, err) } @@ -167,43 +167,3 @@ func getVersionLibs(logger *logrus.Logger, driverRoot string, version string) ([ return relative, nil } - -type cudaLocator struct { - logger *logrus.Logger - driverRoot string -} - -// Locate returns the path to the libcuda.so.RMVERSION file. -func (l *cudaLocator) Locate(pattern string) ([]string, error) { - ldcacheLocator, err := lookup.NewLibraryLocator( - l.logger, - l.driverRoot, - ) - if err != nil { - l.logger.Debugf("Failed to create LDCache locator: %v", err) - } - candidates, err := ldcacheLocator.Locate("libcuda.so") - if err == nil { - for _, c := range candidates { - if match, err := filepath.Match(pattern, filepath.Base(c)); err != nil || !match { - l.logger.Debugf("Skipping non-matching candidate %v: %v", c, err) - continue - } - return []string{c}, nil - } - } - l.logger.Debugf("Could not locate %q in LDCache: Checking predefined library paths.", pattern) - - pathLocator := lookup.NewFileLocator( - lookup.WithLogger(l.logger), - lookup.WithRoot(l.driverRoot), - lookup.WithSearchPaths( - "/usr/lib64", - "/usr/lib/x86_64-linux-gnu", - "/usr/lib/aarch64-linux-gnu", - ), - lookup.WithCount(1), - ) - - return pathLocator.Locate(pattern) -} diff --git a/pkg/nvcdi/management.go b/pkg/nvcdi/management.go index 305023ff..6643c559 100644 --- a/pkg/nvcdi/management.go +++ b/pkg/nvcdi/management.go @@ -23,6 +23,7 @@ import ( "github.com/NVIDIA/nvidia-container-toolkit/internal/discover" "github.com/NVIDIA/nvidia-container-toolkit/internal/edits" + "github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/cuda" "github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/spec" "github.com/container-orchestrated-devices/container-device-interface/pkg/cdi" "github.com/container-orchestrated-devices/container-device-interface/specs-go" @@ -84,12 +85,10 @@ func (m *managementlib) getCudaVersion() (string, error) { return version, nil } - l := cudaLocator{ - logger: m.logger, - driverRoot: m.driverRoot, - } - - libCudaPaths, err := l.Locate("libcuda.so.*.*.*") + libCudaPaths, err := cuda.New( + cuda.WithLogger(m.logger), + cuda.WithDriverRoot(m.driverRoot), + ).Locate(".*.*.*") if err != nil { return "", fmt.Errorf("failed to locate libcuda.so: %v", err) }