[no-relnote] Refactor driver library discovery

This change aligns the driver file discovery with device discovery
and allows other sources such as nvsandboxutils to be added.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
This commit is contained in:
Evan Lezar 2024-12-05 18:06:04 +01:00
parent de230a7e60
commit be25223e7a
No known key found for this signature in database
7 changed files with 182 additions and 49 deletions

View File

@ -49,6 +49,7 @@ TRANSLATOR:
const: const:
- {action: accept, from: "^NVSANDBOXUTILS_"} - {action: accept, from: "^NVSANDBOXUTILS_"}
- {action: accept, from: "^nvSandboxUtils"} - {action: accept, from: "^nvSandboxUtils"}
- {action: replace, from: "^NVSANDBOXUTILS_255_MASK_", to: "MASK255_" }
- {action: replace, from: "^NVSANDBOXUTILS_"} - {action: replace, from: "^NVSANDBOXUTILS_"}
- {action: replace, from: "^nvSandboxUtils"} - {action: replace, from: "^nvSandboxUtils"}
- {action: accept, from: "^NV"} - {action: accept, from: "^NV"}

View File

@ -14,7 +14,7 @@
# limitations under the License. # limitations under the License.
**/ **/
package nvcdi package dgpu
import ( import (
"fmt" "fmt"
@ -31,33 +31,22 @@ import (
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/root" "github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/root"
) )
// NewDriverDiscoverer creates a discoverer for the libraries and binaries associated with a driver installation. // newNvmlDriverDiscoverer constructs a discoverer from the specified NVML library.
// The supplied NVML Library is used to query the expected driver version. func (o *options) newNvmlDriverDiscoverer() (discover.Discover, error) {
func NewDriverDiscoverer(logger logger.Interface, driver *root.Driver, nvidiaCDIHookPath string, ldconfigPath string, version string) (discover.Discover, error) { libraries, err := o.newNvmlDriverLibraryDiscoverer()
return newDriverVersionDiscoverer(logger, driver, nvidiaCDIHookPath, ldconfigPath, version)
}
func newDriverVersionDiscoverer(logger logger.Interface, driver *root.Driver, nvidiaCDIHookPath, ldconfigPath, version string) (discover.Discover, error) {
libraries, err := NewDriverLibraryDiscoverer(logger, driver, nvidiaCDIHookPath, ldconfigPath, version)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to create discoverer for driver libraries: %v", err) return nil, fmt.Errorf("failed to create discoverer for driver libraries: %v", err)
} }
ipcs, err := discover.NewIPCDiscoverer(logger, driver.Root) firmwares, err := o.newNvmlDriverFirmwareDiscoverer()
if err != nil {
return nil, fmt.Errorf("failed to create discoverer for IPC sockets: %v", err)
}
firmwares, err := NewDriverFirmwareDiscoverer(logger, driver.Root, version)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to create discoverer for GSP firmware: %v", err) return nil, fmt.Errorf("failed to create discoverer for GSP firmware: %v", err)
} }
binaries := NewDriverBinariesDiscoverer(logger, driver.Root) binaries := o.newNvmlDriverBinariesDiscoverer()
d := discover.Merge( d := discover.Merge(
libraries, libraries,
ipcs,
firmwares, firmwares,
binaries, binaries,
) )
@ -65,32 +54,27 @@ func newDriverVersionDiscoverer(logger logger.Interface, driver *root.Driver, nv
return d, nil return d, nil
} }
// NewDriverLibraryDiscoverer creates a discoverer for the libraries associated with the specified driver version. // newNvmlDriverLibraryDiscoverer creates a discoverer for the libraries associated with the specified driver version.
func NewDriverLibraryDiscoverer(logger logger.Interface, driver *root.Driver, nvidiaCDIHookPath, ldconfigPath, version string) (discover.Discover, error) { func (o *options) newNvmlDriverLibraryDiscoverer() (discover.Discover, error) {
libraryPaths, err := getVersionLibs(logger, driver, version) libraryPaths, err := getVersionLibs(o.logger, o.driver, o.version)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to get libraries for driver version: %v", err) return nil, fmt.Errorf("failed to get libraries for driver version: %v", err)
} }
libraries := discover.NewMounts( libraries := discover.NewMounts(
logger, o.logger,
lookup.NewFileLocator( lookup.NewFileLocator(
lookup.WithLogger(logger), lookup.WithLogger(o.logger),
lookup.WithRoot(driver.Root), lookup.WithRoot(o.driver.Root),
), ),
driver.Root, o.driver.Root,
libraryPaths, libraryPaths,
) )
updateLDCache, _ := discover.NewLDCacheUpdateHook(logger, libraries, nvidiaCDIHookPath, ldconfigPath) d := discover.WithDriverDotSoSymlinks(
d := discover.Merge(
discover.WithDriverDotSoSymlinks(
libraries, libraries,
version, o.version,
nvidiaCDIHookPath, o.nvidiaCDIHookPath,
),
updateLDCache,
) )
return d, nil return d, nil
@ -138,31 +122,31 @@ func getCustomFirmwareClassPath(logger logger.Interface) string {
return strings.TrimSpace(string(customFirmwareClassPath)) return strings.TrimSpace(string(customFirmwareClassPath))
} }
// NewDriverFirmwareDiscoverer creates a discoverer for GSP firmware associated with the specified driver version. // newNvmlDriverFirmwareDiscoverer creates a discoverer for GSP firmware associated with the specified driver version.
func NewDriverFirmwareDiscoverer(logger logger.Interface, driverRoot string, version string) (discover.Discover, error) { func (o *options) newNvmlDriverFirmwareDiscoverer() (discover.Discover, error) {
gspFirmwareSearchPaths, err := getFirmwareSearchPaths(logger) gspFirmwareSearchPaths, err := getFirmwareSearchPaths(o.logger)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to get firmware search paths: %v", err) return nil, fmt.Errorf("failed to get firmware search paths: %v", err)
} }
gspFirmwarePaths := filepath.Join("nvidia", version, "gsp*.bin") gspFirmwarePaths := filepath.Join("nvidia", o.version, "gsp*.bin")
return discover.NewMounts( return discover.NewMounts(
logger, o.logger,
lookup.NewFileLocator( lookup.NewFileLocator(
lookup.WithLogger(logger), lookup.WithLogger(o.logger),
lookup.WithRoot(driverRoot), lookup.WithRoot(o.driver.Root),
lookup.WithSearchPaths(gspFirmwareSearchPaths...), lookup.WithSearchPaths(gspFirmwareSearchPaths...),
), ),
driverRoot, o.driver.Root,
[]string{gspFirmwarePaths}, []string{gspFirmwarePaths},
), nil ), nil
} }
// NewDriverBinariesDiscoverer creates a discoverer for GSP firmware associated with the GPU driver. // newNvmlDriverBinariesDiscoverer creates a discoverer for binaries associated with the specified driver version.
func NewDriverBinariesDiscoverer(logger logger.Interface, driverRoot string) discover.Discover { func (o *options) newNvmlDriverBinariesDiscoverer() discover.Discover {
return discover.NewMounts( return discover.NewMounts(
logger, o.logger,
lookup.NewExecutableLocator(logger, driverRoot), lookup.NewExecutableLocator(o.logger, o.driver.Root),
driverRoot, o.driver.Root,
[]string{ []string{
"nvidia-smi", /* System management interface */ "nvidia-smi", /* System management interface */
"nvidia-debugdump", /* GPU coredump utility */ "nvidia-debugdump", /* GPU coredump utility */

View File

@ -0,0 +1,31 @@
/**
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package dgpu
import (
"fmt"
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
)
// newNvsandboxutilsDriverDiscoverer constructs a discoverer from the specified nvsandboxutils library.
func (o *options) newNvsandboxutilsDriverDiscoverer() (discover.Discover, error) {
if o.nvsandboxutilslib == nil {
return nil, nil
}
return nil, fmt.Errorf("nvsandboxutils driver discovery is not implemented")
}

View File

@ -0,0 +1,74 @@
/**
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package dgpu
import (
"errors"
"fmt"
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
)
// NewDriverDiscoverer creates a discoverer for the libraries and binaries associated with a driver installation.
func NewDriverDiscoverer(opts ...Option) (discover.Discover, error) {
o := new(opts...)
if o.version == "" {
return nil, fmt.Errorf("a version must be specified")
}
var discoverers []discover.Discover
var errs error
nvsandboxutilsDiscoverer, err := o.newNvsandboxutilsDriverDiscoverer()
if err != nil {
// TODO: Log a warning
errs = errors.Join(errs, err)
} else if nvsandboxutilsDiscoverer != nil {
discoverers = append(discoverers, nvsandboxutilsDiscoverer)
}
nvmlDiscoverer, err := o.newNvmlDriverDiscoverer()
if err != nil {
// TODO: Log a warning
errs = errors.Join(errs, err)
} else if nvmlDiscoverer != nil {
discoverers = append(discoverers, nvmlDiscoverer)
}
if len(discoverers) == 0 {
return nil, errs
}
cached := discover.WithCache(
discover.FirstValid(
discoverers...,
),
)
updateLDCache, _ := discover.NewLDCacheUpdateHook(o.logger, cached, o.nvidiaCDIHookPath, o.ldconfigPath)
ipcs, err := discover.NewIPCDiscoverer(o.logger, o.driver.Root)
if err != nil {
return nil, fmt.Errorf("failed to create discoverer for IPC sockets: %v", err)
}
return discover.Merge(
cached,
updateLDCache,
ipcs,
), nil
}

View File

@ -18,13 +18,16 @@ package dgpu
import ( import (
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger" "github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/root"
"github.com/NVIDIA/nvidia-container-toolkit/internal/nvcaps" "github.com/NVIDIA/nvidia-container-toolkit/internal/nvcaps"
"github.com/NVIDIA/nvidia-container-toolkit/internal/nvsandboxutils" "github.com/NVIDIA/nvidia-container-toolkit/internal/nvsandboxutils"
) )
type options struct { type options struct {
logger logger.Interface logger logger.Interface
driver *root.Driver
devRoot string devRoot string
ldconfigPath string
nvidiaCDIHookPath string nvidiaCDIHookPath string
isMigDevice bool isMigDevice bool
@ -33,6 +36,9 @@ type options struct {
migCaps nvcaps.MigCaps migCaps nvcaps.MigCaps
migCapsError error migCapsError error
// version stores the driver version.
version string
nvsandboxutilslib nvsandboxutils.Interface nvsandboxutilslib nvsandboxutils.Interface
} }
@ -45,6 +51,19 @@ func WithDevRoot(root string) Option {
} }
} }
func WithDriver(driver *root.Driver) Option {
return func(l *options) {
l.driver = driver
}
}
// WithLdconfigPath sets the path to the ldconfig program
func WithLdconfigPath(path string) Option {
return func(l *options) {
l.ldconfigPath = path
}
}
// WithLogger sets the logger for the library // WithLogger sets the logger for the library
func WithLogger(logger logger.Interface) Option { func WithLogger(logger logger.Interface) Option {
return func(l *options) { return func(l *options) {
@ -72,3 +91,9 @@ func WithNvsandboxuitilsLib(nvsandboxutilslib nvsandboxutils.Interface) Option {
l.nvsandboxutilslib = nvsandboxutilslib l.nvsandboxutilslib = nvsandboxutilslib
} }
} }
func WithVersion(version string) Option {
return func(l *options) {
l.version = version
}
}

View File

@ -20,6 +20,7 @@ import (
"fmt" "fmt"
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover" "github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
"github.com/NVIDIA/nvidia-container-toolkit/internal/platform-support/dgpu"
) )
// newCommonNVMLDiscoverer returns a discoverer for entities that are not associated with a specific CDI device. // newCommonNVMLDiscoverer returns a discoverer for entities that are not associated with a specific CDI device.
@ -41,7 +42,15 @@ func (l *nvmllib) newCommonNVMLDiscoverer(version string) (discover.Discover, er
l.logger.Warningf("failed to create discoverer for graphics mounts: %v", err) l.logger.Warningf("failed to create discoverer for graphics mounts: %v", err)
} }
driverFiles, err := NewDriverDiscoverer(l.logger, l.driver, l.nvidiaCDIHookPath, l.ldconfigPath, version) driverFiles, err := dgpu.NewDriverDiscoverer(
dgpu.WithDevRoot(l.devRoot),
dgpu.WithDriver(l.driver),
dgpu.WithLdconfigPath(l.ldconfigPath),
dgpu.WithLogger(l.logger),
dgpu.WithNVIDIACDIHookPath(l.nvidiaCDIHookPath),
dgpu.WithNvsandboxuitilsLib(l.nvsandboxutilslib),
dgpu.WithVersion(version),
)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to create discoverer for driver files: %v", err) return nil, fmt.Errorf("failed to create discoverer for driver files: %v", err)
} }

View File

@ -28,6 +28,7 @@ import (
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover" "github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
"github.com/NVIDIA/nvidia-container-toolkit/internal/edits" "github.com/NVIDIA/nvidia-container-toolkit/internal/edits"
"github.com/NVIDIA/nvidia-container-toolkit/internal/nvsandboxutils" "github.com/NVIDIA/nvidia-container-toolkit/internal/nvsandboxutils"
"github.com/NVIDIA/nvidia-container-toolkit/internal/platform-support/dgpu"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/spec" "github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/spec"
) )
@ -76,10 +77,18 @@ func (m *managementlib) GetCommonEdits() (*cdi.ContainerEdits, error) {
version, err := (*nvcdilib)(m).getDriverVersion() version, err := (*nvcdilib)(m).getDriverVersion()
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to get CUDA version: %v", err) return nil, fmt.Errorf("failed to get driver version: %v", err)
} }
driver, err := newDriverVersionDiscoverer(m.logger, m.driver, m.nvidiaCDIHookPath, m.ldconfigPath, version) driver, err := dgpu.NewDriverDiscoverer(
dgpu.WithDevRoot(m.devRoot),
dgpu.WithDriver(m.driver),
dgpu.WithLdconfigPath(m.ldconfigPath),
dgpu.WithLogger(m.logger),
dgpu.WithNVIDIACDIHookPath(m.nvidiaCDIHookPath),
dgpu.WithNvsandboxuitilsLib(m.nvsandboxutilslib),
dgpu.WithVersion(version),
)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to create driver library discoverer: %v", err) return nil, fmt.Errorf("failed to create driver library discoverer: %v", err)
} }