mirror of
https://github.com/NVIDIA/nvidia-container-toolkit
synced 2025-04-20 06:05:19 +00:00
Allow enable-cuda-compat hook to be disabled in CDI spec generation
This change adds support to the nvcdi package to opt out of specific hooks. Currently only the `enable-cuda-compat` hook is supported. This allows clients to generate a CDI spec that is compatible with older nvidia-cdi-hook CLIs. Signed-off-by: Evan Lezar <elezar@nvidia.com>
This commit is contained in:
parent
13bccdda73
commit
ac9eee956b
@ -53,3 +53,13 @@ type Interface interface {
|
|||||||
GetMIGDeviceSpecs(int, device.Device, int, device.MigDevice) ([]specs.Device, error)
|
GetMIGDeviceSpecs(int, device.Device, int, device.MigDevice) ([]specs.Device, error)
|
||||||
GetDeviceSpecsByID(...string) ([]specs.Device, error)
|
GetDeviceSpecsByID(...string) ([]specs.Device, error)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// A HookName refers to one of the predefined set of CDI hooks that may be
|
||||||
|
// included in the generated CDI specification.
|
||||||
|
type HookName string
|
||||||
|
|
||||||
|
const (
|
||||||
|
// HookEnableCudaCompat refers to the hook used to enable CUDA Forward Compatibility.
|
||||||
|
// This was added with v1.17.5 of the NVIDIA Container Toolkit.
|
||||||
|
HookEnableCudaCompat = HookName("enable-cuda-compat")
|
||||||
|
)
|
||||||
|
@ -41,7 +41,7 @@ func (l *nvmllib) newCommonNVMLDiscoverer() (discover.Discover, error) {
|
|||||||
l.logger.Warningf("failed to create discoverer for graphics mounts: %v", err)
|
l.logger.Warningf("failed to create discoverer for graphics mounts: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
driverFiles, err := NewDriverDiscoverer(l.logger, l.driver, l.nvidiaCDIHookPath, l.ldconfigPath, l.nvmllib)
|
driverFiles, err := l.NewDriverDiscoverer()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to create discoverer for driver files: %v", err)
|
return nil, fmt.Errorf("failed to create discoverer for driver files: %v", err)
|
||||||
}
|
}
|
||||||
|
@ -34,41 +34,41 @@ import (
|
|||||||
|
|
||||||
// NewDriverDiscoverer creates a discoverer for the libraries and binaries associated with a driver installation.
|
// NewDriverDiscoverer creates a discoverer for the libraries and binaries associated with a driver installation.
|
||||||
// The supplied NVML Library is used to query the expected driver version.
|
// The supplied NVML Library is used to query the expected driver version.
|
||||||
func NewDriverDiscoverer(logger logger.Interface, driver *root.Driver, nvidiaCDIHookPath string, ldconfigPath string, nvmllib nvml.Interface) (discover.Discover, error) {
|
func (l *nvmllib) NewDriverDiscoverer() (discover.Discover, error) {
|
||||||
if r := nvmllib.Init(); r != nvml.SUCCESS {
|
if r := l.nvmllib.Init(); r != nvml.SUCCESS {
|
||||||
return nil, fmt.Errorf("failed to initialize NVML: %v", r)
|
return nil, fmt.Errorf("failed to initialize NVML: %v", r)
|
||||||
}
|
}
|
||||||
defer func() {
|
defer func() {
|
||||||
if r := nvmllib.Shutdown(); r != nvml.SUCCESS {
|
if r := l.nvmllib.Shutdown(); r != nvml.SUCCESS {
|
||||||
logger.Warningf("failed to shutdown NVML: %v", r)
|
l.logger.Warningf("failed to shutdown NVML: %v", r)
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
|
||||||
version, r := nvmllib.SystemGetDriverVersion()
|
version, r := l.nvmllib.SystemGetDriverVersion()
|
||||||
if r != nvml.SUCCESS {
|
if r != nvml.SUCCESS {
|
||||||
return nil, fmt.Errorf("failed to determine driver version: %v", r)
|
return nil, fmt.Errorf("failed to determine driver version: %v", r)
|
||||||
}
|
}
|
||||||
|
|
||||||
return newDriverVersionDiscoverer(logger, driver, nvidiaCDIHookPath, ldconfigPath, version)
|
return (*nvcdilib)(l).newDriverVersionDiscoverer(version)
|
||||||
}
|
}
|
||||||
|
|
||||||
func newDriverVersionDiscoverer(logger logger.Interface, driver *root.Driver, nvidiaCDIHookPath, ldconfigPath, version string) (discover.Discover, error) {
|
func (l *nvcdilib) newDriverVersionDiscoverer(version string) (discover.Discover, error) {
|
||||||
libraries, err := NewDriverLibraryDiscoverer(logger, driver, nvidiaCDIHookPath, ldconfigPath, version)
|
libraries, err := l.NewDriverLibraryDiscoverer(version)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to create discoverer for driver libraries: %v", err)
|
return nil, fmt.Errorf("failed to create discoverer for driver libraries: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
ipcs, err := discover.NewIPCDiscoverer(logger, driver.Root)
|
ipcs, err := discover.NewIPCDiscoverer(l.logger, l.driver.Root)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to create discoverer for IPC sockets: %v", err)
|
return nil, fmt.Errorf("failed to create discoverer for IPC sockets: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
firmwares, err := NewDriverFirmwareDiscoverer(logger, driver.Root, version)
|
firmwares, err := NewDriverFirmwareDiscoverer(l.logger, l.driver.Root, version)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to create discoverer for GSP firmware: %v", err)
|
return nil, fmt.Errorf("failed to create discoverer for GSP firmware: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
binaries := NewDriverBinariesDiscoverer(logger, driver.Root)
|
binaries := NewDriverBinariesDiscoverer(l.logger, l.driver.Root)
|
||||||
|
|
||||||
d := discover.Merge(
|
d := discover.Merge(
|
||||||
libraries,
|
libraries,
|
||||||
@ -81,35 +81,41 @@ func newDriverVersionDiscoverer(logger logger.Interface, driver *root.Driver, nv
|
|||||||
}
|
}
|
||||||
|
|
||||||
// NewDriverLibraryDiscoverer creates a discoverer for the libraries associated with the specified driver version.
|
// NewDriverLibraryDiscoverer creates a discoverer for the libraries associated with the specified driver version.
|
||||||
func NewDriverLibraryDiscoverer(logger logger.Interface, driver *root.Driver, nvidiaCDIHookPath, ldconfigPath, version string) (discover.Discover, error) {
|
func (l *nvcdilib) NewDriverLibraryDiscoverer(version string) (discover.Discover, error) {
|
||||||
libraryPaths, err := getVersionLibs(logger, driver, version)
|
libraryPaths, err := getVersionLibs(l.logger, l.driver, version)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to get libraries for driver version: %v", err)
|
return nil, fmt.Errorf("failed to get libraries for driver version: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
libraries := discover.NewMounts(
|
libraries := discover.NewMounts(
|
||||||
logger,
|
l.logger,
|
||||||
lookup.NewFileLocator(
|
lookup.NewFileLocator(
|
||||||
lookup.WithLogger(logger),
|
lookup.WithLogger(l.logger),
|
||||||
lookup.WithRoot(driver.Root),
|
lookup.WithRoot(l.driver.Root),
|
||||||
),
|
),
|
||||||
driver.Root,
|
l.driver.Root,
|
||||||
libraryPaths,
|
libraryPaths,
|
||||||
)
|
)
|
||||||
|
|
||||||
// TODO: The following should use the version directly.
|
var discoverers []discover.Discover
|
||||||
cudaCompatLibHookDiscoverer := discover.NewCUDACompatHookDiscoverer(logger, nvidiaCDIHookPath, driver)
|
|
||||||
updateLDCache, _ := discover.NewLDCacheUpdateHook(logger, libraries, nvidiaCDIHookPath, ldconfigPath)
|
|
||||||
|
|
||||||
d := discover.Merge(
|
driverDotSoSymlinksDiscoverer := discover.WithDriverDotSoSymlinks(
|
||||||
discover.WithDriverDotSoSymlinks(
|
|
||||||
libraries,
|
libraries,
|
||||||
version,
|
version,
|
||||||
nvidiaCDIHookPath,
|
l.nvidiaCDIHookPath,
|
||||||
),
|
|
||||||
cudaCompatLibHookDiscoverer,
|
|
||||||
updateLDCache,
|
|
||||||
)
|
)
|
||||||
|
discoverers = append(discoverers, driverDotSoSymlinksDiscoverer)
|
||||||
|
|
||||||
|
if l.HookIsSupported(HookEnableCudaCompat) {
|
||||||
|
// TODO: The following should use the version directly.
|
||||||
|
cudaCompatLibHookDiscoverer := discover.NewCUDACompatHookDiscoverer(l.logger, l.nvidiaCDIHookPath, l.driver)
|
||||||
|
discoverers = append(discoverers, cudaCompatLibHookDiscoverer)
|
||||||
|
}
|
||||||
|
|
||||||
|
updateLDCache, _ := discover.NewLDCacheUpdateHook(l.logger, libraries, l.nvidiaCDIHookPath, l.ldconfigPath)
|
||||||
|
discoverers = append(discoverers, updateLDCache)
|
||||||
|
|
||||||
|
d := discover.Merge(discoverers...)
|
||||||
|
|
||||||
return d, nil
|
return d, nil
|
||||||
}
|
}
|
||||||
|
30
pkg/nvcdi/hooks.go
Normal file
30
pkg/nvcdi/hooks.go
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
/**
|
||||||
|
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
**/
|
||||||
|
|
||||||
|
package nvcdi
|
||||||
|
|
||||||
|
// disabledHooks allows individual hooks to be disabled.
|
||||||
|
type disabledHooks map[HookName]bool
|
||||||
|
|
||||||
|
// HookIsSupported checks whether a hook of the specified name is supported.
|
||||||
|
// Hooks must be explicitly disabled, meaning that if no disabled hooks are
|
||||||
|
// all hooks are supported.
|
||||||
|
func (l *nvcdilib) HookIsSupported(h HookName) bool {
|
||||||
|
if len(l.disabledHooks) == 0 {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return !l.disabledHooks[h]
|
||||||
|
}
|
@ -66,11 +66,15 @@ type nvcdilib struct {
|
|||||||
infolib info.Interface
|
infolib info.Interface
|
||||||
|
|
||||||
mergedDeviceOptions []transform.MergedDeviceOption
|
mergedDeviceOptions []transform.MergedDeviceOption
|
||||||
|
|
||||||
|
disabledHooks disabledHooks
|
||||||
}
|
}
|
||||||
|
|
||||||
// New creates a new nvcdi library
|
// New creates a new nvcdi library
|
||||||
func New(opts ...Option) (Interface, error) {
|
func New(opts ...Option) (Interface, error) {
|
||||||
l := &nvcdilib{}
|
l := &nvcdilib{
|
||||||
|
disabledHooks: make(disabledHooks),
|
||||||
|
}
|
||||||
for _, opt := range opts {
|
for _, opt := range opts {
|
||||||
opt(l)
|
opt(l)
|
||||||
}
|
}
|
||||||
|
@ -80,7 +80,7 @@ func (m *managementlib) GetCommonEdits() (*cdi.ContainerEdits, error) {
|
|||||||
return nil, fmt.Errorf("failed to get CUDA version: %v", err)
|
return nil, fmt.Errorf("failed to get CUDA version: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
driver, err := newDriverVersionDiscoverer(m.logger, m.driver, m.nvidiaCDIHookPath, m.ldconfigPath, version)
|
driver, err := (*nvcdilib)(m).newDriverVersionDiscoverer(version)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to create driver library discoverer: %v", err)
|
return nil, fmt.Errorf("failed to create driver library discoverer: %v", err)
|
||||||
}
|
}
|
||||||
|
@ -155,3 +155,14 @@ func WithLibrarySearchPaths(paths []string) Option {
|
|||||||
o.librarySearchPaths = paths
|
o.librarySearchPaths = paths
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// WithDisabledHook allows specific hooks to the disabled.
|
||||||
|
// This option can be specified multiple times for each hook.
|
||||||
|
func WithDisabledHook(hook HookName) Option {
|
||||||
|
return func(o *nvcdilib) {
|
||||||
|
if o.disabledHooks == nil {
|
||||||
|
o.disabledHooks = make(map[HookName]bool)
|
||||||
|
}
|
||||||
|
o.disabledHooks[hook] = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user