diff --git a/pkg/nvcdi/api.go b/pkg/nvcdi/api.go index 2988026f..8c37f277 100644 --- a/pkg/nvcdi/api.go +++ b/pkg/nvcdi/api.go @@ -45,3 +45,13 @@ const ( // This was added with v1.17.5 of the NVIDIA Container Toolkit. HookEnableCudaCompat = HookName("enable-cuda-compat") ) + +// A FeatureFlag refers to a specific feature that can be toggled in the CDI api. +// All features are off by default. +type FeatureFlag string + +const ( + // FeatureDisableNvsandboxUtils disables the use of nvsandboxutils when + // querying devices. + FeatureDisableNvsandboxUtils = FeatureFlag("disable-nvsandbox-utils") +) diff --git a/pkg/nvcdi/lib.go b/pkg/nvcdi/lib.go index 97a39168..165a7136 100644 --- a/pkg/nvcdi/lib.go +++ b/pkg/nvcdi/lib.go @@ -56,6 +56,8 @@ type nvcdilib struct { mergedDeviceOptions []transform.MergedDeviceOption + featureFlags map[FeatureFlag]bool + disabledHooks disabledHooks hookCreator discover.HookCreator } @@ -64,6 +66,7 @@ type nvcdilib struct { func New(opts ...Option) (Interface, error) { l := &nvcdilib{ disabledHooks: make(disabledHooks), + featureFlags: make(map[FeatureFlag]bool), } for _, opt := range opts { opt(l) @@ -108,24 +111,7 @@ func New(opts ...Option) (Interface, error) { } l.nvmllib = nvml.New(nvmlOpts...) } - // TODO: Repeated calls to nvsandboxutils.Init and Shutdown are causing - // segmentation violations. Here we disabled nvsandbox utils unless explicitly - // specified. - // This will be reenabled as soon as we have more visibility into why this is - // happening and a mechanism to detect and disable this if required. - // if l.nvsandboxutilslib == nil { - // var nvsandboxutilsOpts []nvsandboxutils.LibraryOption - // // Set the library path for libnvidia-sandboxutils - // candidates, err := l.driver.Libraries().Locate("libnvidia-sandboxutils.so.1") - // if err != nil { - // l.logger.Warningf("Ignoring error in locating libnvidia-sandboxutils.so.1: %v", err) - // } else { - // libNvidiaSandboxutilsPath := candidates[0] - // l.logger.Infof("Using %v", libNvidiaSandboxutilsPath) - // nvsandboxutilsOpts = append(nvsandboxutilsOpts, nvsandboxutils.WithLibraryPath(libNvidiaSandboxutilsPath)) - // } - // l.nvsandboxutilslib = nvsandboxutils.New(nvsandboxutilsOpts...) - // } + l.nvsandboxutilslib = l.getNvsandboxUtilsLib() if l.devicelib == nil { l.devicelib = device.New(l.nvmllib) } @@ -231,3 +217,26 @@ func (l *nvcdilib) getCudaVersionNvsandboxutils() (string, error) { } return version, nil } + +// getNvsandboxUtilsLib returns the nvsandboxutilslib to use for CDI spec +// generation. +func (l *nvcdilib) getNvsandboxUtilsLib() nvsandboxutils.Interface { + if l.featureFlags[FeatureDisableNvsandboxUtils] { + return nil + } + if l.nvsandboxutilslib != nil { + return l.nvsandboxutilslib + } + + var nvsandboxutilsOpts []nvsandboxutils.LibraryOption + // Set the library path for libnvidia-sandboxutils + candidates, err := l.driver.Libraries().Locate("libnvidia-sandboxutils.so.1") + if err != nil { + l.logger.Warningf("Ignoring error in locating libnvidia-sandboxutils.so.1: %v", err) + } else { + libNvidiaSandboxutilsPath := candidates[0] + l.logger.Infof("Using %v", libNvidiaSandboxutilsPath) + nvsandboxutilsOpts = append(nvsandboxutilsOpts, nvsandboxutils.WithLibraryPath(libNvidiaSandboxutilsPath)) + } + return nvsandboxutils.New(nvsandboxutilsOpts...) +} diff --git a/pkg/nvcdi/options.go b/pkg/nvcdi/options.go index f38f2b4a..7c76f7fc 100644 --- a/pkg/nvcdi/options.go +++ b/pkg/nvcdi/options.go @@ -166,3 +166,14 @@ func WithDisabledHook(hook HookName) Option { o.disabledHooks[hook] = true } } + +// WithFeatureFlag allows specified features to be toggled on. +// This option can be specified multiple times for each feature flag. +func WithFeatureFlag(featureFlag FeatureFlag) Option { + return func(o *nvcdilib) { + if o.featureFlags == nil { + o.featureFlags = make(map[FeatureFlag]bool) + } + o.featureFlags[featureFlag] = true + } +}