2022-07-20 08:52:43 +00:00
/ * *
# Copyright ( c ) 2022 , NVIDIA CORPORATION . All rights reserved .
#
# Licensed under the Apache License , Version 2.0 ( the "License" ) ;
# you may not use this file except in compliance with the License .
# You may obtain a copy of the License at
#
# http : //www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing , software
# distributed under the License is distributed on an "AS IS" BASIS ,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND , either express or implied .
# See the License for the specific language governing permissions and
# limitations under the License .
* * /
package discover
import (
"fmt"
2022-09-29 12:47:04 +00:00
"os"
"path/filepath"
2023-03-07 19:57:24 +00:00
"strings"
2022-07-20 08:52:43 +00:00
2022-09-29 12:47:04 +00:00
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
"github.com/NVIDIA/nvidia-container-toolkit/internal/info/drm"
"github.com/NVIDIA/nvidia-container-toolkit/internal/info/proc"
2023-03-22 12:27:43 +00:00
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
2022-07-20 08:52:43 +00:00
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
2023-03-07 19:57:24 +00:00
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/cuda"
2023-11-21 15:08:16 +00:00
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/root"
2022-07-20 08:52:43 +00:00
)
2024-01-23 00:48:11 +00:00
// NewDRMNodesDiscoverer returns a discoverer for the DRM device nodes associated with the specified visible devices.
2023-11-20 21:32:46 +00:00
//
// TODO: The logic for creating DRM devices should be consolidated between this
// and the logic for generating CDI specs for a single device. This is only used
// when applying OCI spec modifications to an incoming spec in "legacy" mode.
2024-04-24 08:47:45 +00:00
func NewDRMNodesDiscoverer ( logger logger . Interface , devices image . VisibleDevices , devRoot string , nvidiaCDIHookPath string ) ( Discover , error ) {
2023-11-14 15:57:37 +00:00
drmDeviceNodes , err := newDRMDeviceDiscoverer ( logger , devices , devRoot )
2022-11-23 15:26:30 +00:00
if err != nil {
return nil , fmt . Errorf ( "failed to create DRM device discoverer: %v" , err )
}
2024-04-24 08:47:45 +00:00
drmByPathSymlinks := newCreateDRMByPathSymlinks ( logger , drmDeviceNodes , devRoot , nvidiaCDIHookPath )
2022-11-23 15:26:30 +00:00
2023-11-20 21:32:46 +00:00
discover := Merge ( drmDeviceNodes , drmByPathSymlinks )
2022-11-23 15:26:30 +00:00
return discover , nil
}
// NewGraphicsMountsDiscoverer creates a discoverer for the mounts required by graphics tools such as vulkan.
2024-04-24 08:47:45 +00:00
func NewGraphicsMountsDiscoverer ( logger logger . Interface , driver * root . Driver , nvidiaCDIHookPath string ) ( Discover , error ) {
2024-05-10 14:12:49 +00:00
libraries := newGraphicsLibrariesDiscoverer ( logger , driver , nvidiaCDIHookPath )
2022-07-20 08:52:43 +00:00
2024-05-10 14:12:49 +00:00
configs := NewMounts (
2022-07-20 08:52:43 +00:00
logger ,
2023-11-22 20:57:03 +00:00
driver . Configs ( ) ,
2023-11-21 15:08:16 +00:00
driver . Root ,
2022-07-20 08:52:43 +00:00
[ ] string {
2022-11-25 13:19:59 +00:00
"glvnd/egl_vendor.d/10_nvidia.json" ,
"egl/egl_external_platform.d/15_nvidia_gbm.json" ,
"egl/egl_external_platform.d/10_nvidia_wayland.json" ,
2023-11-22 12:48:11 +00:00
"nvidia/nvoptix.bin" ,
2024-05-10 14:12:49 +00:00
"X11/xorg.conf.d/10-nvidia.conf" ,
"X11/xorg.conf.d/nvidia-drm-outputclass.conf" ,
2022-07-20 08:52:43 +00:00
} ,
)
discover := Merge (
libraries ,
2024-05-10 14:12:49 +00:00
configs ,
newVulkanConfigsDiscover ( logger , driver ) ,
2022-07-20 08:52:43 +00:00
)
return discover , nil
}
2022-09-29 12:47:04 +00:00
2024-05-10 14:12:49 +00:00
// newVulkanConfigsDiscover creates a discoverer for vulkan ICD files.
2024-06-06 11:42:47 +00:00
// For these files we search the standard driver config paths as well as the
// driver root itself. This allows us to support GKE installations where the
// vulkan ICD files are at {{ .driverRoot }}/vulkan instead of in /etc/vulkan.
2024-05-10 14:12:49 +00:00
func newVulkanConfigsDiscover ( logger logger . Interface , driver * root . Driver ) Discover {
2024-06-06 11:42:47 +00:00
locator := lookup . First ( driver . Configs ( ) , driver . Files ( ) )
return & mountsToContainerPath {
logger : logger ,
locator : locator ,
required : [ ] string {
"vulkan/icd.d/nvidia_icd.json" ,
"vulkan/icd.d/nvidia_layers.json" ,
"vulkan/implicit_layer.d/nvidia_layers.json" ,
} ,
containerRoot : "/etc" ,
}
}
2024-05-10 14:12:49 +00:00
type graphicsDriverLibraries struct {
Discover
logger logger . Interface
nvidiaCDIHookPath string
}
var _ Discover = ( * graphicsDriverLibraries ) ( nil )
func newGraphicsLibrariesDiscoverer ( logger logger . Interface , driver * root . Driver , nvidiaCDIHookPath string ) Discover {
cudaLibRoot , cudaVersionPattern := getCUDALibRootAndVersionPattern ( logger , driver )
libraries := NewMounts (
logger ,
driver . Libraries ( ) ,
driver . Root ,
[ ] string {
// The libnvidia-egl-gbm and libnvidia-egl-wayland libraries do not
// have the RM version. Use the *.* pattern to match X.Y.Z versions.
"libnvidia-egl-gbm.so.*.*" ,
"libnvidia-egl-wayland.so.*.*" ,
// We include the following libraries to have them available for
// symlink creation below:
// If CDI injection is used, these should already be detected as:
// * libnvidia-allocator.so.RM_VERSION
// * libnvidia-vulkan-producer.so.RM_VERSION
// but need to be handled for the legacy case too.
"libnvidia-allocator.so." + cudaVersionPattern ,
"libnvidia-vulkan-producer.so." + cudaVersionPattern ,
} ,
)
xorgLibraries := NewMounts (
logger ,
lookup . NewFileLocator (
lookup . WithLogger ( logger ) ,
lookup . WithRoot ( driver . Root ) ,
lookup . WithSearchPaths ( buildXOrgSearchPaths ( cudaLibRoot ) ... ) ,
lookup . WithCount ( 1 ) ,
) ,
driver . Root ,
[ ] string {
"nvidia_drv.so" ,
"libglxserver_nvidia.so." + cudaVersionPattern ,
} ,
)
return & graphicsDriverLibraries {
Discover : Merge ( libraries , xorgLibraries ) ,
logger : logger ,
nvidiaCDIHookPath : nvidiaCDIHookPath ,
}
}
2024-08-20 14:19:39 +00:00
// Mounts discovers the required libraries and filters out libnvidia-allocator.so.
// The library libnvidia-allocator.so is already handled by either the *.RM_VERSION
// injection or by libnvidia-container. We therefore filter it out here as a
// workaround for the case where libnvidia-container will re-mount this in the
// container, which causes issues with shared mount propagation.
func ( d graphicsDriverLibraries ) Mounts ( ) ( [ ] Mount , error ) {
mounts , err := d . Discover . Mounts ( )
if err != nil {
return nil , fmt . Errorf ( "failed to get library mounts: %v" , err )
}
var filtered [ ] Mount
for _ , mount := range mounts {
if d . isDriverLibrary ( filepath . Base ( mount . Path ) , "libnvidia-allocator.so" ) {
continue
}
filtered = append ( filtered , mount )
}
return filtered , nil
}
2024-05-10 14:12:49 +00:00
// Create necessary library symlinks for graphics drivers
func ( d graphicsDriverLibraries ) Hooks ( ) ( [ ] Hook , error ) {
mounts , err := d . Discover . Mounts ( )
if err != nil {
return nil , fmt . Errorf ( "failed to get library mounts: %v" , err )
}
var links [ ] string
for _ , mount := range mounts {
dir , filename := filepath . Split ( mount . Path )
switch {
case d . isDriverLibrary ( filename , "libnvidia-allocator.so" ) :
// gbm/nvidia-drm_gbm.so is a symlink to ../libnvidia-allocator.so.1 which
// in turn symlinks to libnvidia-allocator.so.RM_VERSION and is created
// when ldconfig is run in the container.
// create libnvidia-allocate.so.1 -> libnvidia-allocate.so.RM_VERSION symlink
links = append ( links , fmt . Sprintf ( "%s::%s" , filename , filepath . Join ( dir , "libnvidia-allocator.so.1" ) ) )
// create gbm/nvidia-drm_gbm.so -> ../libnvidia-allocate.so.1 symlink
linkPath := filepath . Join ( dir , "gbm" , "nvidia-drm_gbm.so" )
links = append ( links , fmt . Sprintf ( "%s::%s" , "../libnvidia-allocator.so.1" , linkPath ) )
case d . isDriverLibrary ( filename , "libnvidia-vulkan-producer.so" ) :
// libnvidia-vulkan-producer.so is a drirect symlink to libnvidia-vulkan-producer.so.RM_VERSION
// create libnvidia-vulkan-producer.so -> libnvidia-vulkan-producer.so.RM_VERSION symlink
linkPath := filepath . Join ( dir , "libnvidia-vulkan-producer.so" )
links = append ( links , fmt . Sprintf ( "%s::%s" , filename , linkPath ) )
case d . isDriverLibrary ( filename , "libglxserver_nvidia.so" ) :
// libglxserver_nvidia.so is a directl symlink to libglxserver_nvidia.so.RM_VERSION
// create libglxserver_nvidia.so -> libglxserver_nvidia.so.RM_VERSION symlink
linkPath := filepath . Join ( dir , "libglxserver_nvidia.so" )
links = append ( links , fmt . Sprintf ( "%s::%s" , filename , linkPath ) )
}
}
if len ( links ) == 0 {
return nil , nil
}
hooks := CreateCreateSymlinkHook ( d . nvidiaCDIHookPath , links )
return hooks . Hooks ( )
}
// isDriverLibrary checks whether the specified filename is a specific driver library.
func ( d graphicsDriverLibraries ) isDriverLibrary ( filename string , libraryName string ) bool {
// TODO: Instead of `.*.*` we could use the driver version.
pattern := strings . TrimSuffix ( libraryName , "." ) + ".*.*"
match , _ := filepath . Match ( pattern , filename )
return match
}
// getCUDALibRootAndVersionPattern returns the parent directory and the version
// suffix of the libcuda.so.*.* library at the driver root.
// If the library cannot be located an empty root is returned.
// If the version string cannot be extracted, the generic *.* pattern is returned.
func getCUDALibRootAndVersionPattern ( logger logger . Interface , driver * root . Driver ) ( string , string ) {
libCudaPaths , err := cuda . New (
driver . Libraries ( ) ,
) . Locate ( ".*.*" )
if err != nil {
logger . Warningf ( "failed to locate libcuda.so: %v; using *.*" , err )
return "" , "*.*"
}
libcudaPath := libCudaPaths [ 0 ]
libRoot := filepath . Dir ( libcudaPath )
version := strings . TrimPrefix ( filepath . Base ( libcudaPath ) , "libcuda.so." )
if version == "" {
logger . Warningf ( "failed to extract version from %v; using *.*" , libcudaPath )
version = "*.*"
}
2024-06-24 13:48:09 +00:00
return driver . RelativeToRoot ( libRoot ) , version
2024-05-10 14:12:49 +00:00
}
// buildXOrgSearchPaths returns the ordered list of search paths for XOrg files.
func buildXOrgSearchPaths ( libRoot string ) [ ] string {
var paths [ ] string
if libRoot != "" {
paths = append ( paths ,
filepath . Join ( libRoot , "nvidia/xorg" ) ,
filepath . Join ( libRoot , "xorg" , "modules" , "drivers" ) ,
filepath . Join ( libRoot , "xorg" , "modules" , "extensions" ) ,
filepath . Join ( libRoot , "xorg" , "modules/updates" , "drivers" ) ,
filepath . Join ( libRoot , "xorg" , "modules/updates" , "extensions" ) ,
)
}
return append ( paths ,
filepath . Join ( "/usr/lib/xorg" , "modules" , "drivers" ) ,
filepath . Join ( "/usr/lib/xorg" , "modules" , "extensions" ) ,
filepath . Join ( "/usr/lib/xorg" , "modules/updates" , "drivers" ) ,
filepath . Join ( "/usr/lib/xorg" , "modules/updates" , "extensions" ) ,
filepath . Join ( "/usr/lib64/xorg" , "modules" , "drivers" ) ,
filepath . Join ( "/usr/lib64/xorg" , "modules" , "extensions" ) ,
filepath . Join ( "/usr/lib64/xorg" , "modules/updates" , "drivers" ) ,
filepath . Join ( "/usr/lib64/xorg" , "modules/updates" , "extensions" ) ,
filepath . Join ( "/usr/X11R6/lib" , "modules" , "drivers" ) ,
filepath . Join ( "/usr/X11R6/lib" , "modules" , "extensions" ) ,
filepath . Join ( "/usr/X11R6/lib" , "modules/updates" , "drivers" ) ,
filepath . Join ( "/usr/X11R6/lib" , "modules/updates" , "extensions" ) ,
filepath . Join ( "/usr/X11R6/lib64" , "modules" , "drivers" ) ,
filepath . Join ( "/usr/X11R6/lib64" , "modules" , "extensions" ) ,
filepath . Join ( "/usr/X11R6/lib64" , "modules/updates" , "drivers" ) ,
filepath . Join ( "/usr/X11R6/lib64" , "modules/updates" , "extensions" ) ,
)
}
2022-09-29 12:47:04 +00:00
type drmDevicesByPath struct {
None
2024-04-24 08:47:45 +00:00
logger logger . Interface
nvidiaCDIHookPath string
devRoot string
devicesFrom Discover
2022-09-29 12:47:04 +00:00
}
// newCreateDRMByPathSymlinks creates a discoverer for a hook to create the by-path symlinks for DRM devices discovered by the specified devices discoverer
2024-04-24 08:47:45 +00:00
func newCreateDRMByPathSymlinks ( logger logger . Interface , devices Discover , devRoot string , nvidiaCDIHookPath string ) Discover {
2022-09-29 12:47:04 +00:00
d := drmDevicesByPath {
2024-04-24 08:47:45 +00:00
logger : logger ,
nvidiaCDIHookPath : nvidiaCDIHookPath ,
devRoot : devRoot ,
devicesFrom : devices ,
2022-09-29 12:47:04 +00:00
}
return & d
}
// Hooks returns a hook to create the symlinks from the required CSV files
func ( d drmDevicesByPath ) Hooks ( ) ( [ ] Hook , error ) {
devices , err := d . devicesFrom . Devices ( )
if err != nil {
return nil , fmt . Errorf ( "failed to discover devices for by-path symlinks: %v" , err )
}
if len ( devices ) == 0 {
return nil , nil
}
2022-11-23 20:49:02 +00:00
links , err := d . getSpecificLinkArgs ( devices )
if err != nil {
return nil , fmt . Errorf ( "failed to determine specific links: %v" , err )
}
if len ( links ) == 0 {
return nil , nil
}
2022-09-29 12:47:04 +00:00
2023-01-19 09:37:10 +00:00
var args [ ] string
2022-09-29 12:47:04 +00:00
for _ , l := range links {
args = append ( args , "--link" , l )
}
2024-04-24 08:47:45 +00:00
hook := CreateNvidiaCDIHook (
d . nvidiaCDIHookPath ,
2023-01-19 09:37:10 +00:00
"create-symlinks" ,
args ... ,
)
2022-09-29 12:47:04 +00:00
2023-01-19 09:37:10 +00:00
return [ ] Hook { hook } , nil
2022-09-29 12:47:04 +00:00
}
2024-01-23 00:48:11 +00:00
// getSpecificLinkArgs returns the required specific links that need to be created
2022-09-29 12:47:04 +00:00
func ( d drmDevicesByPath ) getSpecificLinkArgs ( devices [ ] Device ) ( [ ] string , error ) {
selectedDevices := make ( map [ string ] bool )
for _ , d := range devices {
selectedDevices [ filepath . Base ( d . HostPath ) ] = true
}
2022-12-02 10:38:40 +00:00
linkLocator := lookup . NewFileLocator (
lookup . WithLogger ( d . logger ) ,
2023-11-14 15:57:37 +00:00
lookup . WithRoot ( d . devRoot ) ,
2022-12-02 10:38:40 +00:00
)
2022-09-29 12:47:04 +00:00
candidates , err := linkLocator . Locate ( "/dev/dri/by-path/pci-*-*" )
if err != nil {
2022-11-23 20:49:02 +00:00
d . logger . Warningf ( "Failed to locate by-path links: %v; ignoring" , err )
return nil , nil
2022-09-29 12:47:04 +00:00
}
var links [ ] string
for _ , c := range candidates {
device , err := os . Readlink ( c )
if err != nil {
d . logger . Warningf ( "Failed to evaluate symlink %v; ignoring" , c )
continue
}
if selectedDevices [ filepath . Base ( device ) ] {
d . logger . Debugf ( "adding device symlink %v -> %v" , c , device )
links = append ( links , fmt . Sprintf ( "%v::%v" , device , c ) )
}
}
return links , nil
}
// newDRMDeviceDiscoverer creates a discoverer for the DRM devices associated with the requested devices.
2023-11-14 15:57:37 +00:00
func newDRMDeviceDiscoverer ( logger logger . Interface , devices image . VisibleDevices , devRoot string ) ( Discover , error ) {
2023-11-20 14:03:36 +00:00
allDevices := NewCharDeviceDiscoverer (
2022-09-29 12:47:04 +00:00
logger ,
2023-11-14 15:57:37 +00:00
devRoot ,
2022-09-29 12:47:04 +00:00
[ ] string {
"/dev/dri/card*" ,
"/dev/dri/renderD*" ,
} ,
)
2024-01-23 00:48:11 +00:00
filter , err := newDRMDeviceFilter ( devices , devRoot )
2022-09-29 12:47:04 +00:00
if err != nil {
return nil , fmt . Errorf ( "failed to construct DRM device filter: %v" , err )
}
// We return a discoverer that applies the DRM device filter created above to all discovered DRM device nodes.
2024-01-23 00:48:11 +00:00
d := newFilteredDiscoverer (
2022-09-29 12:47:04 +00:00
logger ,
allDevices ,
filter ,
)
return d , err
}
// newDRMDeviceFilter creates a filter that matches DRM devices nodes for the visible devices.
2024-01-23 00:48:11 +00:00
func newDRMDeviceFilter ( devices image . VisibleDevices , devRoot string ) ( Filter , error ) {
2023-11-14 15:57:37 +00:00
gpuInformationPaths , err := proc . GetInformationFilePaths ( devRoot )
2022-09-29 12:47:04 +00:00
if err != nil {
return nil , fmt . Errorf ( "failed to read GPU information: %v" , err )
}
var selectedBusIds [ ] string
for _ , f := range gpuInformationPaths {
info , err := proc . ParseGPUInformationFile ( f )
if err != nil {
return nil , fmt . Errorf ( "failed to parse %v: %v" , f , err )
}
uuid := info [ proc . GPUInfoGPUUUID ]
busID := info [ proc . GPUInfoBusLocation ]
minor := info [ proc . GPUInfoDeviceMinor ]
if devices . Has ( minor ) || devices . Has ( uuid ) || devices . Has ( busID ) {
selectedBusIds = append ( selectedBusIds , busID )
}
}
filter := make ( selectDeviceByPath )
for _ , busID := range selectedBusIds {
drmDeviceNodes , err := drm . GetDeviceNodesByBusID ( busID )
if err != nil {
return nil , fmt . Errorf ( "failed to determine DRM devices for %v: %v" , busID , err )
}
for _ , drmDeviceNode := range drmDeviceNodes {
2023-08-25 14:55:04 +00:00
filter [ drmDeviceNode ] = true
2022-09-29 12:47:04 +00:00
}
}
return filter , nil
}
// selectDeviceByPath is a filter that allows devices to be selected by the path
type selectDeviceByPath map [ string ] bool
var _ Filter = ( * selectDeviceByPath ) ( nil )
// DeviceIsSelected determines whether the device's path has been selected
func ( s selectDeviceByPath ) DeviceIsSelected ( device Device ) bool {
return s [ device . Path ]
}
// MountIsSelected is always true
func ( s selectDeviceByPath ) MountIsSelected ( Mount ) bool {
return true
}
// HookIsSelected is always true
func ( s selectDeviceByPath ) HookIsSelected ( Hook ) bool {
return true
}