mirror of
https://github.com/NVIDIA/nvidia-container-toolkit
synced 2024-11-24 04:54:00 +00:00
Merge pull request #694 from elezar/add-opt-in-to-sockets
Skip injection of nvidia-persistenced socket by default
This commit is contained in:
commit
b061446694
@ -89,6 +89,12 @@ func doPrestart() {
|
||||
rootfs := getRootfsPath(container)
|
||||
|
||||
args := []string{getCLIPath(cli)}
|
||||
|
||||
// Only include the nvidia-persistenced socket if it is explicitly enabled.
|
||||
if !hook.Features.IncludePersistencedSocket.IsEnabled() {
|
||||
args = append(args, "--no-persistenced")
|
||||
}
|
||||
|
||||
if cli.Root != "" {
|
||||
args = append(args, fmt.Sprintf("--root=%s", cli.Root))
|
||||
}
|
||||
|
@ -60,6 +60,8 @@ type options struct {
|
||||
files cli.StringSlice
|
||||
ignorePatterns cli.StringSlice
|
||||
}
|
||||
|
||||
includePersistencedSocket bool
|
||||
}
|
||||
|
||||
// NewCommand constructs a generate-cdi command with the specified logger
|
||||
@ -169,6 +171,11 @@ func (m command) build() *cli.Command {
|
||||
Usage: "Specify a pattern the CSV mount specifications.",
|
||||
Destination: &opts.csv.ignorePatterns,
|
||||
},
|
||||
&cli.BoolFlag{
|
||||
Name: "include-persistenced-socket",
|
||||
Usage: "Include the nvidia-persistenced socket in the generated CDI specification.",
|
||||
Destination: &opts.includePersistencedSocket,
|
||||
},
|
||||
}
|
||||
|
||||
return &c
|
||||
@ -273,6 +280,7 @@ func (m command) generateSpec(opts *options) (spec.Interface, error) {
|
||||
nvcdi.WithLibrarySearchPaths(opts.librarySearchPaths.Value()),
|
||||
nvcdi.WithCSVFiles(opts.csv.files.Value()),
|
||||
nvcdi.WithCSVIgnorePatterns(opts.csv.ignorePatterns.Value()),
|
||||
nvcdi.WithOptInFeature("include-persistenced-socket", opts.includePersistencedSocket),
|
||||
)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create CDI library: %v", err)
|
||||
|
@ -19,10 +19,11 @@ package config
|
||||
type featureName string
|
||||
|
||||
const (
|
||||
FeatureGDS = featureName("gds")
|
||||
FeatureMOFED = featureName("mofed")
|
||||
FeatureNVSWITCH = featureName("nvswitch")
|
||||
FeatureGDRCopy = featureName("gdrcopy")
|
||||
FeatureGDS = featureName("gds")
|
||||
FeatureMOFED = featureName("mofed")
|
||||
FeatureNVSWITCH = featureName("nvswitch")
|
||||
FeatureGDRCopy = featureName("gdrcopy")
|
||||
FeatureIncludePersistencedSocket = featureName("include-persistenced-socket")
|
||||
)
|
||||
|
||||
// features specifies a set of named features.
|
||||
@ -31,53 +32,57 @@ type features struct {
|
||||
MOFED *feature `toml:"mofed,omitempty"`
|
||||
NVSWITCH *feature `toml:"nvswitch,omitempty"`
|
||||
GDRCopy *feature `toml:"gdrcopy,omitempty"`
|
||||
// IncludePersistencedSocket enables the injection of the nvidia-persistenced
|
||||
// socket into containers.
|
||||
IncludePersistencedSocket *feature `toml:"include-persistenced-socket,omitempty"`
|
||||
}
|
||||
|
||||
type feature bool
|
||||
|
||||
// IsEnabled checks whether a specified named feature is enabled.
|
||||
// IsEnabledInEnvironment checks whether a specified named feature is enabled.
|
||||
// An optional list of environments to check for feature-specific environment
|
||||
// variables can also be supplied.
|
||||
func (fs features) IsEnabled(n featureName, in ...getenver) bool {
|
||||
featureEnvvars := map[featureName]string{
|
||||
FeatureGDS: "NVIDIA_GDS",
|
||||
FeatureMOFED: "NVIDIA_MOFED",
|
||||
FeatureNVSWITCH: "NVIDIA_NVSWITCH",
|
||||
FeatureGDRCopy: "NVIDIA_GDRCOPY",
|
||||
}
|
||||
|
||||
envvar := featureEnvvars[n]
|
||||
func (fs features) IsEnabledInEnvironment(n featureName, in ...getenver) bool {
|
||||
switch n {
|
||||
// Features with envvar overrides
|
||||
case FeatureGDS:
|
||||
return fs.GDS.isEnabled(envvar, in...)
|
||||
return fs.GDS.isEnabledWithEnvvarOverride("NVIDIA_GDS", in...)
|
||||
case FeatureMOFED:
|
||||
return fs.MOFED.isEnabled(envvar, in...)
|
||||
return fs.MOFED.isEnabledWithEnvvarOverride("NVIDIA_MOFED", in...)
|
||||
case FeatureNVSWITCH:
|
||||
return fs.NVSWITCH.isEnabled(envvar, in...)
|
||||
return fs.NVSWITCH.isEnabledWithEnvvarOverride("NVIDIA_NVSWITCH", in...)
|
||||
case FeatureGDRCopy:
|
||||
return fs.GDRCopy.isEnabled(envvar, in...)
|
||||
return fs.GDRCopy.isEnabledWithEnvvarOverride("NVIDIA_GDRCOPY", in...)
|
||||
// Features without envvar overrides
|
||||
case FeatureIncludePersistencedSocket:
|
||||
return fs.IncludePersistencedSocket.IsEnabled()
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// isEnabled checks whether a feature is enabled.
|
||||
// If the enabled value is explicitly set, this is returned, otherwise the
|
||||
// associated envvar is checked in the specified getenver for the string "enabled"
|
||||
// A CUDA container / image can be passed here.
|
||||
func (f *feature) isEnabled(envvar string, ins ...getenver) bool {
|
||||
// IsEnabled checks whether a feature is enabled.
|
||||
func (f *feature) IsEnabled() bool {
|
||||
if f != nil {
|
||||
return bool(*f)
|
||||
}
|
||||
if envvar == "" {
|
||||
return false
|
||||
}
|
||||
for _, in := range ins {
|
||||
if in.Getenv(envvar) == "enabled" {
|
||||
return true
|
||||
return false
|
||||
}
|
||||
|
||||
// isEnabledWithEnvvarOverride checks whether a feature is enabled and allows an envvar to overide the feature.
|
||||
// If the enabled value is explicitly set, this is returned, otherwise the
|
||||
// associated envvar is checked in the specified getenver for the string "enabled"
|
||||
// A CUDA container / image can be passed here.
|
||||
func (f *feature) isEnabledWithEnvvarOverride(envvar string, ins ...getenver) bool {
|
||||
if envvar != "" {
|
||||
for _, in := range ins {
|
||||
if in.Getenv(envvar) == "enabled" {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
return false
|
||||
|
||||
return f.IsEnabled()
|
||||
}
|
||||
|
||||
type getenver interface {
|
||||
|
@ -24,7 +24,13 @@ import (
|
||||
type ipcMounts mounts
|
||||
|
||||
// NewIPCDiscoverer creats a discoverer for NVIDIA IPC sockets.
|
||||
func NewIPCDiscoverer(logger logger.Interface, driverRoot string) (Discover, error) {
|
||||
func NewIPCDiscoverer(logger logger.Interface, driverRoot string, includePersistencedSocket bool) (Discover, error) {
|
||||
var requiredSockets []string
|
||||
if includePersistencedSocket {
|
||||
requiredSockets = append(requiredSockets, "/nvidia-persistenced/socket")
|
||||
}
|
||||
requiredSockets = append(requiredSockets, "/nvidia-fabricmanager/socket")
|
||||
|
||||
sockets := newMounts(
|
||||
logger,
|
||||
lookup.NewFileLocator(
|
||||
@ -34,10 +40,7 @@ func NewIPCDiscoverer(logger logger.Interface, driverRoot string) (Discover, err
|
||||
lookup.WithCount(1),
|
||||
),
|
||||
driverRoot,
|
||||
[]string{
|
||||
"/nvidia-persistenced/socket",
|
||||
"/nvidia-fabricmanager/socket",
|
||||
},
|
||||
requiredSockets,
|
||||
)
|
||||
|
||||
mps := newMounts(
|
||||
|
@ -189,6 +189,7 @@ func generateAutomaticCDISpec(logger logger.Interface, cfg *config.Config, devic
|
||||
nvcdi.WithDriverRoot(cfg.NVIDIAContainerCLIConfig.Root),
|
||||
nvcdi.WithVendor("runtime.nvidia.com"),
|
||||
nvcdi.WithClass("gpu"),
|
||||
nvcdi.WithOptInFeature("include-persistenced-socket", cfg.Features.IncludePersistencedSocket.IsEnabled()),
|
||||
)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to construct CDI library: %w", err)
|
||||
|
@ -46,7 +46,7 @@ func NewFeatureGatedModifier(logger logger.Interface, cfg *config.Config, image
|
||||
driverRoot := cfg.NVIDIAContainerCLIConfig.Root
|
||||
devRoot := cfg.NVIDIAContainerCLIConfig.Root
|
||||
|
||||
if cfg.Features.IsEnabled(config.FeatureGDS, image) {
|
||||
if cfg.Features.IsEnabledInEnvironment(config.FeatureGDS, image) {
|
||||
d, err := discover.NewGDSDiscoverer(logger, driverRoot, devRoot)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to construct discoverer for GDS devices: %w", err)
|
||||
@ -54,7 +54,7 @@ func NewFeatureGatedModifier(logger logger.Interface, cfg *config.Config, image
|
||||
discoverers = append(discoverers, d)
|
||||
}
|
||||
|
||||
if cfg.Features.IsEnabled(config.FeatureMOFED, image) {
|
||||
if cfg.Features.IsEnabledInEnvironment(config.FeatureMOFED, image) {
|
||||
d, err := discover.NewMOFEDDiscoverer(logger, devRoot)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to construct discoverer for MOFED devices: %w", err)
|
||||
@ -62,7 +62,7 @@ func NewFeatureGatedModifier(logger logger.Interface, cfg *config.Config, image
|
||||
discoverers = append(discoverers, d)
|
||||
}
|
||||
|
||||
if cfg.Features.IsEnabled(config.FeatureNVSWITCH, image) {
|
||||
if cfg.Features.IsEnabledInEnvironment(config.FeatureNVSWITCH, image) {
|
||||
d, err := discover.NewNvSwitchDiscoverer(logger, devRoot)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to construct discoverer for NVSWITCH devices: %w", err)
|
||||
@ -70,7 +70,7 @@ func NewFeatureGatedModifier(logger logger.Interface, cfg *config.Config, image
|
||||
discoverers = append(discoverers, d)
|
||||
}
|
||||
|
||||
if cfg.Features.IsEnabled(config.FeatureGDRCopy, image) {
|
||||
if cfg.Features.IsEnabledInEnvironment(config.FeatureGDRCopy, image) {
|
||||
d, err := discover.NewGDRCopyDiscoverer(logger, devRoot)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to construct discoverer for GDRCopy devices: %w", err)
|
||||
|
@ -41,7 +41,7 @@ func (l *nvmllib) newCommonNVMLDiscoverer() (discover.Discover, error) {
|
||||
l.logger.Warningf("failed to create discoverer for graphics mounts: %v", err)
|
||||
}
|
||||
|
||||
driverFiles, err := NewDriverDiscoverer(l.logger, l.driver, l.nvidiaCDIHookPath, l.ldconfigPath, l.nvmllib)
|
||||
driverFiles, err := l.NewDriverDiscoverer()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create discoverer for driver files: %v", err)
|
||||
}
|
||||
|
@ -34,41 +34,41 @@ import (
|
||||
|
||||
// NewDriverDiscoverer creates a discoverer for the libraries and binaries associated with a driver installation.
|
||||
// The supplied NVML Library is used to query the expected driver version.
|
||||
func NewDriverDiscoverer(logger logger.Interface, driver *root.Driver, nvidiaCDIHookPath string, ldconfigPath string, nvmllib nvml.Interface) (discover.Discover, error) {
|
||||
if r := nvmllib.Init(); r != nvml.SUCCESS {
|
||||
func (l *nvmllib) NewDriverDiscoverer() (discover.Discover, error) {
|
||||
if r := l.nvmllib.Init(); r != nvml.SUCCESS {
|
||||
return nil, fmt.Errorf("failed to initialize NVML: %v", r)
|
||||
}
|
||||
defer func() {
|
||||
if r := nvmllib.Shutdown(); r != nvml.SUCCESS {
|
||||
logger.Warningf("failed to shutdown NVML: %v", r)
|
||||
if r := l.nvmllib.Shutdown(); r != nvml.SUCCESS {
|
||||
l.logger.Warningf("failed to shutdown NVML: %v", r)
|
||||
}
|
||||
}()
|
||||
|
||||
version, r := nvmllib.SystemGetDriverVersion()
|
||||
version, r := l.nvmllib.SystemGetDriverVersion()
|
||||
if r != nvml.SUCCESS {
|
||||
return nil, fmt.Errorf("failed to determine driver version: %v", r)
|
||||
}
|
||||
|
||||
return newDriverVersionDiscoverer(logger, driver, nvidiaCDIHookPath, ldconfigPath, version)
|
||||
return (*nvcdilib)(l).newDriverVersionDiscoverer(version)
|
||||
}
|
||||
|
||||
func newDriverVersionDiscoverer(logger logger.Interface, driver *root.Driver, nvidiaCDIHookPath, ldconfigPath, version string) (discover.Discover, error) {
|
||||
libraries, err := NewDriverLibraryDiscoverer(logger, driver, nvidiaCDIHookPath, ldconfigPath, version)
|
||||
func (l *nvcdilib) newDriverVersionDiscoverer(version string) (discover.Discover, error) {
|
||||
libraries, err := NewDriverLibraryDiscoverer(l.logger, l.driver, l.nvidiaCDIHookPath, l.ldconfigPath, version)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create discoverer for driver libraries: %v", err)
|
||||
}
|
||||
|
||||
ipcs, err := discover.NewIPCDiscoverer(logger, driver.Root)
|
||||
ipcs, err := discover.NewIPCDiscoverer(l.logger, l.driver.Root, l.optInFeatures["include-persistenced-socket"])
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create discoverer for IPC sockets: %v", err)
|
||||
}
|
||||
|
||||
firmwares, err := NewDriverFirmwareDiscoverer(logger, driver.Root, version)
|
||||
firmwares, err := NewDriverFirmwareDiscoverer(l.logger, l.driver.Root, version)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create discoverer for GSP firmware: %v", err)
|
||||
}
|
||||
|
||||
binaries := NewDriverBinariesDiscoverer(logger, driver.Root)
|
||||
binaries := NewDriverBinariesDiscoverer(l.logger, l.driver.Root)
|
||||
|
||||
d := discover.Merge(
|
||||
libraries,
|
||||
|
@ -63,6 +63,8 @@ type nvcdilib struct {
|
||||
infolib info.Interface
|
||||
|
||||
mergedDeviceOptions []transform.MergedDeviceOption
|
||||
|
||||
optInFeatures map[string]bool
|
||||
}
|
||||
|
||||
// New creates a new nvcdi library
|
||||
@ -130,6 +132,9 @@ func New(opts ...Option) (Interface, error) {
|
||||
if l.vendor == "" {
|
||||
l.vendor = "management.nvidia.com"
|
||||
}
|
||||
// For management specifications we always allow the fabricmanager and
|
||||
// persistenced sockets.
|
||||
WithOptInFeature("include-persistenced-socket", true)(l)
|
||||
lib = (*managementlib)(l)
|
||||
case ModeNvml:
|
||||
lib = (*nvmllib)(l)
|
||||
|
@ -66,7 +66,7 @@ func (m *managementlib) GetCommonEdits() (*cdi.ContainerEdits, error) {
|
||||
return nil, fmt.Errorf("failed to get CUDA version: %v", err)
|
||||
}
|
||||
|
||||
driver, err := newDriverVersionDiscoverer(m.logger, m.driver, m.nvidiaCDIHookPath, m.ldconfigPath, version)
|
||||
driver, err := (*nvcdilib)(m).newDriverVersionDiscoverer(version)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create driver library discoverer: %v", err)
|
||||
}
|
||||
|
@ -155,3 +155,14 @@ func WithLibrarySearchPaths(paths []string) Option {
|
||||
o.librarySearchPaths = paths
|
||||
}
|
||||
}
|
||||
|
||||
// WithOptInFeature sets a specific opt-in feature.
|
||||
// Note that previous opt-in-features are not removed.
|
||||
func WithOptInFeature(feature string, enabled bool) Option {
|
||||
return func(n *nvcdilib) {
|
||||
if n.optInFeatures == nil {
|
||||
n.optInFeatures = make(map[string]bool)
|
||||
}
|
||||
n.optInFeatures[feature] = enabled
|
||||
}
|
||||
}
|
||||
|
@ -81,6 +81,8 @@ type options struct {
|
||||
acceptNVIDIAVisibleDevicesAsVolumeMounts bool
|
||||
|
||||
ignoreErrors bool
|
||||
|
||||
optInFeatures cli.StringSlice
|
||||
}
|
||||
|
||||
func main() {
|
||||
@ -250,6 +252,12 @@ func main() {
|
||||
Destination: &opts.createDeviceNodes,
|
||||
EnvVars: []string{"CREATE_DEVICE_NODES"},
|
||||
},
|
||||
&cli.StringSliceFlag{
|
||||
Name: "opt-in-feature",
|
||||
Hidden: true,
|
||||
Destination: &opts.optInFeatures,
|
||||
EnvVars: []string{"NVIDIA_CONTAINER_TOOLKIT_OPT_IN_FEATURES"},
|
||||
},
|
||||
}
|
||||
|
||||
// Update the subcommand flags with the common subcommand flags
|
||||
@ -518,6 +526,10 @@ func installToolkitConfig(c *cli.Context, toolkitConfigPath string, nvidiaContai
|
||||
"nvidia-container-runtime.runtimes": opts.ContainerRuntimeRuntimes,
|
||||
"nvidia-container-cli.debug": opts.ContainerCLIDebug,
|
||||
}
|
||||
for _, feature := range opts.optInFeatures.Value() {
|
||||
optionalConfigValues["features."+feature] = true
|
||||
}
|
||||
|
||||
for key, value := range optionalConfigValues {
|
||||
if !c.IsSet(key) {
|
||||
log.Infof("Skipping unset option: %v", key)
|
||||
|
Loading…
Reference in New Issue
Block a user