Merge pull request #707 from elezar/revert-no-persistenced-flag-by-default

Revert #703 and #694
This commit is contained in:
Evan Lezar 2024-09-20 20:46:13 +02:00 committed by GitHub
commit 4b6de8036d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 52 additions and 103 deletions

View File

@ -89,7 +89,6 @@ func doPrestart() {
rootfs := getRootfsPath(container) rootfs := getRootfsPath(container)
args := []string{getCLIPath(cli)} args := []string{getCLIPath(cli)}
if cli.Root != "" { if cli.Root != "" {
args = append(args, fmt.Sprintf("--root=%s", cli.Root)) args = append(args, fmt.Sprintf("--root=%s", cli.Root))
} }
@ -112,11 +111,6 @@ func doPrestart() {
} }
args = append(args, "configure") args = append(args, "configure")
// Only include the nvidia-persistenced socket if it is explicitly enabled.
if !hook.Features.IncludePersistencedSocket.IsEnabled() {
args = append(args, "--no-persistenced")
}
if ldconfigPath := cli.NormalizeLDConfigPath(); ldconfigPath != "" { if ldconfigPath := cli.NormalizeLDConfigPath(); ldconfigPath != "" {
args = append(args, fmt.Sprintf("--ldconfig=%s", ldconfigPath)) args = append(args, fmt.Sprintf("--ldconfig=%s", ldconfigPath))
} }

View File

@ -60,8 +60,6 @@ type options struct {
files cli.StringSlice files cli.StringSlice
ignorePatterns cli.StringSlice ignorePatterns cli.StringSlice
} }
includePersistencedSocket bool
} }
// NewCommand constructs a generate-cdi command with the specified logger // NewCommand constructs a generate-cdi command with the specified logger
@ -171,11 +169,6 @@ func (m command) build() *cli.Command {
Usage: "Specify a pattern the CSV mount specifications.", Usage: "Specify a pattern the CSV mount specifications.",
Destination: &opts.csv.ignorePatterns, Destination: &opts.csv.ignorePatterns,
}, },
&cli.BoolFlag{
Name: "include-persistenced-socket",
Usage: "Include the nvidia-persistenced socket in the generated CDI specification.",
Destination: &opts.includePersistencedSocket,
},
} }
return &c return &c
@ -280,7 +273,6 @@ func (m command) generateSpec(opts *options) (spec.Interface, error) {
nvcdi.WithLibrarySearchPaths(opts.librarySearchPaths.Value()), nvcdi.WithLibrarySearchPaths(opts.librarySearchPaths.Value()),
nvcdi.WithCSVFiles(opts.csv.files.Value()), nvcdi.WithCSVFiles(opts.csv.files.Value()),
nvcdi.WithCSVIgnorePatterns(opts.csv.ignorePatterns.Value()), nvcdi.WithCSVIgnorePatterns(opts.csv.ignorePatterns.Value()),
nvcdi.WithOptInFeature("include-persistenced-socket", opts.includePersistencedSocket),
) )
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to create CDI library: %v", err) return nil, fmt.Errorf("failed to create CDI library: %v", err)

View File

@ -19,11 +19,10 @@ package config
type featureName string type featureName string
const ( const (
FeatureGDS = featureName("gds") FeatureGDS = featureName("gds")
FeatureMOFED = featureName("mofed") FeatureMOFED = featureName("mofed")
FeatureNVSWITCH = featureName("nvswitch") FeatureNVSWITCH = featureName("nvswitch")
FeatureGDRCopy = featureName("gdrcopy") FeatureGDRCopy = featureName("gdrcopy")
FeatureIncludePersistencedSocket = featureName("include-persistenced-socket")
) )
// features specifies a set of named features. // features specifies a set of named features.
@ -32,57 +31,53 @@ type features struct {
MOFED *feature `toml:"mofed,omitempty"` MOFED *feature `toml:"mofed,omitempty"`
NVSWITCH *feature `toml:"nvswitch,omitempty"` NVSWITCH *feature `toml:"nvswitch,omitempty"`
GDRCopy *feature `toml:"gdrcopy,omitempty"` GDRCopy *feature `toml:"gdrcopy,omitempty"`
// IncludePersistencedSocket enables the injection of the nvidia-persistenced
// socket into containers.
IncludePersistencedSocket *feature `toml:"include-persistenced-socket,omitempty"`
} }
type feature bool type feature bool
// IsEnabledInEnvironment checks whether a specified named feature is enabled. // IsEnabled checks whether a specified named feature is enabled.
// An optional list of environments to check for feature-specific environment // An optional list of environments to check for feature-specific environment
// variables can also be supplied. // variables can also be supplied.
func (fs features) IsEnabledInEnvironment(n featureName, in ...getenver) bool { func (fs features) IsEnabled(n featureName, in ...getenver) bool {
featureEnvvars := map[featureName]string{
FeatureGDS: "NVIDIA_GDS",
FeatureMOFED: "NVIDIA_MOFED",
FeatureNVSWITCH: "NVIDIA_NVSWITCH",
FeatureGDRCopy: "NVIDIA_GDRCOPY",
}
envvar := featureEnvvars[n]
switch n { switch n {
// Features with envvar overrides
case FeatureGDS: case FeatureGDS:
return fs.GDS.isEnabledWithEnvvarOverride("NVIDIA_GDS", in...) return fs.GDS.isEnabled(envvar, in...)
case FeatureMOFED: case FeatureMOFED:
return fs.MOFED.isEnabledWithEnvvarOverride("NVIDIA_MOFED", in...) return fs.MOFED.isEnabled(envvar, in...)
case FeatureNVSWITCH: case FeatureNVSWITCH:
return fs.NVSWITCH.isEnabledWithEnvvarOverride("NVIDIA_NVSWITCH", in...) return fs.NVSWITCH.isEnabled(envvar, in...)
case FeatureGDRCopy: case FeatureGDRCopy:
return fs.GDRCopy.isEnabledWithEnvvarOverride("NVIDIA_GDRCOPY", in...) return fs.GDRCopy.isEnabled(envvar, in...)
// Features without envvar overrides
case FeatureIncludePersistencedSocket:
return fs.IncludePersistencedSocket.IsEnabled()
default: default:
return false return false
} }
} }
// IsEnabled checks whether a feature is enabled. // isEnabled checks whether a feature is enabled.
func (f *feature) IsEnabled() bool {
if f != nil {
return bool(*f)
}
return false
}
// isEnabledWithEnvvarOverride checks whether a feature is enabled and allows an envvar to overide the feature.
// If the enabled value is explicitly set, this is returned, otherwise the // If the enabled value is explicitly set, this is returned, otherwise the
// associated envvar is checked in the specified getenver for the string "enabled" // associated envvar is checked in the specified getenver for the string "enabled"
// A CUDA container / image can be passed here. // A CUDA container / image can be passed here.
func (f *feature) isEnabledWithEnvvarOverride(envvar string, ins ...getenver) bool { func (f *feature) isEnabled(envvar string, ins ...getenver) bool {
if envvar != "" { if f != nil {
for _, in := range ins { return bool(*f)
if in.Getenv(envvar) == "enabled" { }
return true if envvar == "" {
} return false
}
for _, in := range ins {
if in.Getenv(envvar) == "enabled" {
return true
} }
} }
return false
return f.IsEnabled()
} }
type getenver interface { type getenver interface {

View File

@ -24,13 +24,7 @@ import (
type ipcMounts mounts type ipcMounts mounts
// NewIPCDiscoverer creats a discoverer for NVIDIA IPC sockets. // NewIPCDiscoverer creats a discoverer for NVIDIA IPC sockets.
func NewIPCDiscoverer(logger logger.Interface, driverRoot string, includePersistencedSocket bool) (Discover, error) { func NewIPCDiscoverer(logger logger.Interface, driverRoot string) (Discover, error) {
var requiredSockets []string
if includePersistencedSocket {
requiredSockets = append(requiredSockets, "/nvidia-persistenced/socket")
}
requiredSockets = append(requiredSockets, "/nvidia-fabricmanager/socket")
sockets := newMounts( sockets := newMounts(
logger, logger,
lookup.NewFileLocator( lookup.NewFileLocator(
@ -40,7 +34,10 @@ func NewIPCDiscoverer(logger logger.Interface, driverRoot string, includePersist
lookup.WithCount(1), lookup.WithCount(1),
), ),
driverRoot, driverRoot,
requiredSockets, []string{
"/nvidia-persistenced/socket",
"/nvidia-fabricmanager/socket",
},
) )
mps := newMounts( mps := newMounts(

View File

@ -189,7 +189,6 @@ func generateAutomaticCDISpec(logger logger.Interface, cfg *config.Config, devic
nvcdi.WithDriverRoot(cfg.NVIDIAContainerCLIConfig.Root), nvcdi.WithDriverRoot(cfg.NVIDIAContainerCLIConfig.Root),
nvcdi.WithVendor("runtime.nvidia.com"), nvcdi.WithVendor("runtime.nvidia.com"),
nvcdi.WithClass("gpu"), nvcdi.WithClass("gpu"),
nvcdi.WithOptInFeature("include-persistenced-socket", cfg.Features.IncludePersistencedSocket.IsEnabled()),
) )
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to construct CDI library: %w", err) return nil, fmt.Errorf("failed to construct CDI library: %w", err)

View File

@ -46,7 +46,7 @@ func NewFeatureGatedModifier(logger logger.Interface, cfg *config.Config, image
driverRoot := cfg.NVIDIAContainerCLIConfig.Root driverRoot := cfg.NVIDIAContainerCLIConfig.Root
devRoot := cfg.NVIDIAContainerCLIConfig.Root devRoot := cfg.NVIDIAContainerCLIConfig.Root
if cfg.Features.IsEnabledInEnvironment(config.FeatureGDS, image) { if cfg.Features.IsEnabled(config.FeatureGDS, image) {
d, err := discover.NewGDSDiscoverer(logger, driverRoot, devRoot) d, err := discover.NewGDSDiscoverer(logger, driverRoot, devRoot)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to construct discoverer for GDS devices: %w", err) return nil, fmt.Errorf("failed to construct discoverer for GDS devices: %w", err)
@ -54,7 +54,7 @@ func NewFeatureGatedModifier(logger logger.Interface, cfg *config.Config, image
discoverers = append(discoverers, d) discoverers = append(discoverers, d)
} }
if cfg.Features.IsEnabledInEnvironment(config.FeatureMOFED, image) { if cfg.Features.IsEnabled(config.FeatureMOFED, image) {
d, err := discover.NewMOFEDDiscoverer(logger, devRoot) d, err := discover.NewMOFEDDiscoverer(logger, devRoot)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to construct discoverer for MOFED devices: %w", err) return nil, fmt.Errorf("failed to construct discoverer for MOFED devices: %w", err)
@ -62,7 +62,7 @@ func NewFeatureGatedModifier(logger logger.Interface, cfg *config.Config, image
discoverers = append(discoverers, d) discoverers = append(discoverers, d)
} }
if cfg.Features.IsEnabledInEnvironment(config.FeatureNVSWITCH, image) { if cfg.Features.IsEnabled(config.FeatureNVSWITCH, image) {
d, err := discover.NewNvSwitchDiscoverer(logger, devRoot) d, err := discover.NewNvSwitchDiscoverer(logger, devRoot)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to construct discoverer for NVSWITCH devices: %w", err) return nil, fmt.Errorf("failed to construct discoverer for NVSWITCH devices: %w", err)
@ -70,7 +70,7 @@ func NewFeatureGatedModifier(logger logger.Interface, cfg *config.Config, image
discoverers = append(discoverers, d) discoverers = append(discoverers, d)
} }
if cfg.Features.IsEnabledInEnvironment(config.FeatureGDRCopy, image) { if cfg.Features.IsEnabled(config.FeatureGDRCopy, image) {
d, err := discover.NewGDRCopyDiscoverer(logger, devRoot) d, err := discover.NewGDRCopyDiscoverer(logger, devRoot)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to construct discoverer for GDRCopy devices: %w", err) return nil, fmt.Errorf("failed to construct discoverer for GDRCopy devices: %w", err)

View File

@ -41,7 +41,7 @@ func (l *nvmllib) newCommonNVMLDiscoverer() (discover.Discover, error) {
l.logger.Warningf("failed to create discoverer for graphics mounts: %v", err) l.logger.Warningf("failed to create discoverer for graphics mounts: %v", err)
} }
driverFiles, err := l.NewDriverDiscoverer() driverFiles, err := NewDriverDiscoverer(l.logger, l.driver, l.nvidiaCDIHookPath, l.ldconfigPath, l.nvmllib)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to create discoverer for driver files: %v", err) return nil, fmt.Errorf("failed to create discoverer for driver files: %v", err)
} }

View File

@ -34,41 +34,41 @@ import (
// NewDriverDiscoverer creates a discoverer for the libraries and binaries associated with a driver installation. // NewDriverDiscoverer creates a discoverer for the libraries and binaries associated with a driver installation.
// The supplied NVML Library is used to query the expected driver version. // The supplied NVML Library is used to query the expected driver version.
func (l *nvmllib) NewDriverDiscoverer() (discover.Discover, error) { func NewDriverDiscoverer(logger logger.Interface, driver *root.Driver, nvidiaCDIHookPath string, ldconfigPath string, nvmllib nvml.Interface) (discover.Discover, error) {
if r := l.nvmllib.Init(); r != nvml.SUCCESS { if r := nvmllib.Init(); r != nvml.SUCCESS {
return nil, fmt.Errorf("failed to initialize NVML: %v", r) return nil, fmt.Errorf("failed to initialize NVML: %v", r)
} }
defer func() { defer func() {
if r := l.nvmllib.Shutdown(); r != nvml.SUCCESS { if r := nvmllib.Shutdown(); r != nvml.SUCCESS {
l.logger.Warningf("failed to shutdown NVML: %v", r) logger.Warningf("failed to shutdown NVML: %v", r)
} }
}() }()
version, r := l.nvmllib.SystemGetDriverVersion() version, r := nvmllib.SystemGetDriverVersion()
if r != nvml.SUCCESS { if r != nvml.SUCCESS {
return nil, fmt.Errorf("failed to determine driver version: %v", r) return nil, fmt.Errorf("failed to determine driver version: %v", r)
} }
return (*nvcdilib)(l).newDriverVersionDiscoverer(version) return newDriverVersionDiscoverer(logger, driver, nvidiaCDIHookPath, ldconfigPath, version)
} }
func (l *nvcdilib) newDriverVersionDiscoverer(version string) (discover.Discover, error) { func newDriverVersionDiscoverer(logger logger.Interface, driver *root.Driver, nvidiaCDIHookPath, ldconfigPath, version string) (discover.Discover, error) {
libraries, err := NewDriverLibraryDiscoverer(l.logger, l.driver, l.nvidiaCDIHookPath, l.ldconfigPath, version) libraries, err := NewDriverLibraryDiscoverer(logger, driver, nvidiaCDIHookPath, ldconfigPath, version)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to create discoverer for driver libraries: %v", err) return nil, fmt.Errorf("failed to create discoverer for driver libraries: %v", err)
} }
ipcs, err := discover.NewIPCDiscoverer(l.logger, l.driver.Root, l.optInFeatures["include-persistenced-socket"]) ipcs, err := discover.NewIPCDiscoverer(logger, driver.Root)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to create discoverer for IPC sockets: %v", err) return nil, fmt.Errorf("failed to create discoverer for IPC sockets: %v", err)
} }
firmwares, err := NewDriverFirmwareDiscoverer(l.logger, l.driver.Root, version) firmwares, err := NewDriverFirmwareDiscoverer(logger, driver.Root, version)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to create discoverer for GSP firmware: %v", err) return nil, fmt.Errorf("failed to create discoverer for GSP firmware: %v", err)
} }
binaries := NewDriverBinariesDiscoverer(l.logger, l.driver.Root) binaries := NewDriverBinariesDiscoverer(logger, driver.Root)
d := discover.Merge( d := discover.Merge(
libraries, libraries,

View File

@ -63,8 +63,6 @@ type nvcdilib struct {
infolib info.Interface infolib info.Interface
mergedDeviceOptions []transform.MergedDeviceOption mergedDeviceOptions []transform.MergedDeviceOption
optInFeatures map[string]bool
} }
// New creates a new nvcdi library // New creates a new nvcdi library
@ -132,9 +130,6 @@ func New(opts ...Option) (Interface, error) {
if l.vendor == "" { if l.vendor == "" {
l.vendor = "management.nvidia.com" l.vendor = "management.nvidia.com"
} }
// For management specifications we always allow the fabricmanager and
// persistenced sockets.
WithOptInFeature("include-persistenced-socket", true)(l)
lib = (*managementlib)(l) lib = (*managementlib)(l)
case ModeNvml: case ModeNvml:
lib = (*nvmllib)(l) lib = (*nvmllib)(l)

View File

@ -66,7 +66,7 @@ func (m *managementlib) GetCommonEdits() (*cdi.ContainerEdits, error) {
return nil, fmt.Errorf("failed to get CUDA version: %v", err) return nil, fmt.Errorf("failed to get CUDA version: %v", err)
} }
driver, err := (*nvcdilib)(m).newDriverVersionDiscoverer(version) driver, err := newDriverVersionDiscoverer(m.logger, m.driver, m.nvidiaCDIHookPath, m.ldconfigPath, version)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to create driver library discoverer: %v", err) return nil, fmt.Errorf("failed to create driver library discoverer: %v", err)
} }

View File

@ -155,14 +155,3 @@ func WithLibrarySearchPaths(paths []string) Option {
o.librarySearchPaths = paths o.librarySearchPaths = paths
} }
} }
// WithOptInFeature sets a specific opt-in feature.
// Note that previous opt-in-features are not removed.
func WithOptInFeature(feature string, enabled bool) Option {
return func(n *nvcdilib) {
if n.optInFeatures == nil {
n.optInFeatures = make(map[string]bool)
}
n.optInFeatures[feature] = enabled
}
}

View File

@ -81,8 +81,6 @@ type options struct {
acceptNVIDIAVisibleDevicesAsVolumeMounts bool acceptNVIDIAVisibleDevicesAsVolumeMounts bool
ignoreErrors bool ignoreErrors bool
optInFeatures cli.StringSlice
} }
func main() { func main() {
@ -252,12 +250,6 @@ func main() {
Destination: &opts.createDeviceNodes, Destination: &opts.createDeviceNodes,
EnvVars: []string{"CREATE_DEVICE_NODES"}, EnvVars: []string{"CREATE_DEVICE_NODES"},
}, },
&cli.StringSliceFlag{
Name: "opt-in-feature",
Hidden: true,
Destination: &opts.optInFeatures,
EnvVars: []string{"NVIDIA_CONTAINER_TOOLKIT_OPT_IN_FEATURES"},
},
} }
// Update the subcommand flags with the common subcommand flags // Update the subcommand flags with the common subcommand flags
@ -526,10 +518,6 @@ func installToolkitConfig(c *cli.Context, toolkitConfigPath string, nvidiaContai
"nvidia-container-runtime.runtimes": opts.ContainerRuntimeRuntimes, "nvidia-container-runtime.runtimes": opts.ContainerRuntimeRuntimes,
"nvidia-container-cli.debug": opts.ContainerCLIDebug, "nvidia-container-cli.debug": opts.ContainerCLIDebug,
} }
for _, feature := range opts.optInFeatures.Value() {
optionalConfigValues["features."+feature] = true
}
for key, value := range optionalConfigValues { for key, value := range optionalConfigValues {
if !c.IsSet(key) { if !c.IsSet(key) {
log.Infof("Skipping unset option: %v", key) log.Infof("Skipping unset option: %v", key)