From cf93aa94d3c3977a1dc293a64a3f1e4749873b31 Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Thu, 23 Nov 2023 15:44:45 +0100 Subject: [PATCH] Add support for injecting additional GIDs This change adds support for injecting additional GIDs using the internal CDI representations. (Applicable for Tegra-based systems and /dev/dri devices) This is disabled by default, but can be opted in to by setting the features.allow-additional-gids feature flag. This can also be done by running sudo nvidia-ctk config --in-place --set features.allow-additional-gids Signed-off-by: Evan Lezar --- internal/config/features.go | 49 ++++++---- internal/config/features_test.go | 150 +++++++++++++++++++++++++++++ internal/edits/device.go | 42 +++++++- internal/edits/lib.go | 2 +- internal/edits/options.go | 9 +- internal/modifier/discover.go | 13 ++- internal/modifier/discover_test.go | 2 +- internal/modifier/gated.go | 4 +- internal/modifier/graphics.go | 4 +- internal/oci/spec_mock.go | 3 +- pkg/nvcdi/lib.go | 3 + pkg/nvcdi/namer_nvml_mock.go | 3 +- pkg/nvcdi/options.go | 8 ++ pkg/nvcdi/transform/deduplicate.go | 24 +++++ 14 files changed, 282 insertions(+), 34 deletions(-) create mode 100644 internal/config/features_test.go diff --git a/internal/config/features.go b/internal/config/features.go index dfc6b165..a85307f7 100644 --- a/internal/config/features.go +++ b/internal/config/features.go @@ -17,12 +17,17 @@ package config type featureName string +type feature bool const ( - FeatureGDS = featureName("gds") - FeatureMOFED = featureName("mofed") - FeatureNVSWITCH = featureName("nvswitch") - FeatureGDRCopy = featureName("gdrcopy") + FeatureAllowAdditionalGIDs = featureName("allow-additional-gids") + FeatureGDRCopy = featureName("gdrcopy") + FeatureGDS = featureName("gds") + FeatureMOFED = featureName("mofed") + FeatureNVSWITCH = featureName("nvswitch") + + featureEnabled feature = true + featureDisabled feature = false ) // features specifies a set of named features. @@ -31,19 +36,23 @@ type features struct { MOFED *feature `toml:"mofed,omitempty"` NVSWITCH *feature `toml:"nvswitch,omitempty"` GDRCopy *feature `toml:"gdrcopy,omitempty"` + // AllowAdditionalGIDs triggers the additionalGIDs field in internal CDI + // specifications to be populated if required. This can be useful when + // running the container as a user id that may not have access to device + // nodes. + AllowAdditionalGIDs *feature `toml:"allow-additional-gids,omitempty"` } -type feature bool - // IsEnabled checks whether a specified named feature is enabled. // An optional list of environments to check for feature-specific environment // variables can also be supplied. func (fs features) IsEnabled(n featureName, in ...getenver) bool { featureEnvvars := map[featureName]string{ - FeatureGDS: "NVIDIA_GDS", - FeatureMOFED: "NVIDIA_MOFED", - FeatureNVSWITCH: "NVIDIA_NVSWITCH", - FeatureGDRCopy: "NVIDIA_GDRCOPY", + FeatureGDS: "NVIDIA_GDS", + FeatureMOFED: "NVIDIA_MOFED", + FeatureNVSWITCH: "NVIDIA_NVSWITCH", + FeatureGDRCopy: "NVIDIA_GDRCOPY", + FeatureAllowAdditionalGIDs: "NVIDIA_ALLOW_ADDITIONAL_GIDS", } envvar := featureEnvvars[n] @@ -56,6 +65,8 @@ func (fs features) IsEnabled(n featureName, in ...getenver) bool { return fs.NVSWITCH.isEnabled(envvar, in...) case FeatureGDRCopy: return fs.GDRCopy.isEnabled(envvar, in...) + case FeatureAllowAdditionalGIDs: + return fs.AllowAdditionalGIDs.isEnabled(envvar, in...) default: return false } @@ -66,17 +77,19 @@ func (fs features) IsEnabled(n featureName, in ...getenver) bool { // associated envvar is checked in the specified getenver for the string "enabled" // A CUDA container / image can be passed here. func (f *feature) isEnabled(envvar string, ins ...getenver) bool { + if envvar != "" { + for _, in := range ins { + switch in.Getenv(envvar) { + case "enabled", "true": + return true + case "disabled", "false": + return false + } + } + } if f != nil { return bool(*f) } - if envvar == "" { - return false - } - for _, in := range ins { - if in.Getenv(envvar) == "enabled" { - return true - } - } return false } diff --git a/internal/config/features_test.go b/internal/config/features_test.go new file mode 100644 index 00000000..e7986e80 --- /dev/null +++ b/internal/config/features_test.go @@ -0,0 +1,150 @@ +/** +# Copyright 2024 NVIDIA CORPORATION +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package config + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func TestFeatures(t *testing.T) { + testCases := []struct { + description string + features features + expected map[featureName]bool + envs []getenver + }{ + { + description: "empty features", + expected: map[featureName]bool{ + FeatureAllowAdditionalGIDs: false, + FeatureGDRCopy: false, + FeatureGDS: false, + FeatureMOFED: false, + FeatureNVSWITCH: false, + }, + }, + { + description: "envvar sets features if enabled", + expected: map[featureName]bool{ + FeatureAllowAdditionalGIDs: true, + FeatureGDRCopy: false, + FeatureGDS: false, + FeatureMOFED: false, + FeatureNVSWITCH: false, + }, + envs: []getenver{ + mockEnver{ + "NVIDIA_ALLOW_ADDITIONAL_GIDS": "enabled", + }, + }, + }, + { + description: "envvar sets features if true", + expected: map[featureName]bool{ + FeatureAllowAdditionalGIDs: true, + FeatureGDRCopy: false, + FeatureGDS: false, + FeatureMOFED: false, + FeatureNVSWITCH: false, + }, + envs: []getenver{ + mockEnver{ + "NVIDIA_ALLOW_ADDITIONAL_GIDS": "true", + }, + }, + }, + { + description: "feature sets feature", + features: features{ + AllowAdditionalGIDs: ptr(featureEnabled), + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.description, func(t *testing.T) { + for n, v := range tc.expected { + t.Run(tc.description+"-"+string(n), func(t *testing.T) { + require.EqualValues(t, v, tc.features.IsEnabled(n, tc.envs...)) + }) + } + + }) + } +} + +func TestFeature(t *testing.T) { + testCases := []struct { + description string + feature *feature + envvar string + envs []getenver + expected bool + }{ + { + description: "nil feature is false", + feature: nil, + expected: false, + }, + { + description: "feature enables", + feature: ptr(featureEnabled), + expected: true, + }, + { + description: "feature disabled", + feature: ptr(featureDisabled), + expected: false, + }, + { + description: "envvar overrides feature disabled", + feature: ptr(featureDisabled), + envvar: "FEATURE", + envs: []getenver{ + mockEnver{"FEATURE": "enabled"}, + }, + expected: true, + }, + { + description: "envvar overrides feature enabled", + feature: ptr(featureEnabled), + envvar: "FEATURE", + envs: []getenver{ + mockEnver{"FEATURE": "disabled"}, + }, + expected: false, + }, + } + + for _, tc := range testCases { + t.Run(tc.description, func(t *testing.T) { + require.EqualValues(t, tc.expected, tc.feature.isEnabled(tc.envvar, tc.envs...)) + }) + } +} + +type mockEnver map[string]string + +func (m mockEnver) Getenv(k string) string { + return m[k] +} + +func ptr[T any](x T) *T { + return &x +} diff --git a/internal/edits/device.go b/internal/edits/device.go index d04df153..36ab8bf6 100644 --- a/internal/edits/device.go +++ b/internal/edits/device.go @@ -17,6 +17,9 @@ package edits import ( + "os" + + "golang.org/x/sys/unix" "tags.cncf.io/container-device-interface/pkg/cdi" "tags.cncf.io/container-device-interface/specs-go" @@ -26,15 +29,23 @@ import ( type device discover.Device // toEdits converts a discovered device to CDI Container Edits. -func (d device) toEdits() (*cdi.ContainerEdits, error) { +func (d device) toEdits(allowAdditionalGIDs bool) (*cdi.ContainerEdits, error) { deviceNode, err := d.toSpec() if err != nil { return nil, err } + var additionalGIDs []uint32 + if allowAdditionalGIDs { + if requiredGID := d.getRequiredGID(); requiredGID != 0 { + additionalGIDs = append(additionalGIDs, requiredGID) + } + } + e := cdi.ContainerEdits{ ContainerEdits: &specs.ContainerEdits{ - DeviceNodes: []*specs.DeviceNode{deviceNode}, + DeviceNodes: []*specs.DeviceNode{deviceNode}, + AdditionalGIDs: additionalGIDs, }, } return &e, nil @@ -52,6 +63,7 @@ func (d device) toSpec() (*specs.DeviceNode, error) { if hostPath == d.Path { hostPath = "" } + s := specs.DeviceNode{ HostPath: hostPath, Path: d.Path, @@ -59,3 +71,29 @@ func (d device) toSpec() (*specs.DeviceNode, error) { return &s, nil } + +// getRequiredGID returns the group id of the device if the device is not world read/writable. +// If the information cannot be extracted or an error occurs, 0 is returned. +func (d device) getRequiredGID() uint32 { + path := d.HostPath + if path == "" { + path = d.Path + } + if path == "" { + return 0 + } + + var stat unix.Stat_t + if err := unix.Lstat(path, &stat); err != nil { + return 0 + } + // This is only supported for char devices + if stat.Mode&unix.S_IFMT != unix.S_IFCHR { + return 0 + } + + if permissionsForOther := os.FileMode(stat.Mode).Perm(); permissionsForOther&06 == 0 { + return stat.Gid + } + return 0 +} diff --git a/internal/edits/lib.go b/internal/edits/lib.go index 797882c6..3441de2b 100644 --- a/internal/edits/lib.go +++ b/internal/edits/lib.go @@ -58,7 +58,7 @@ func (o *options) EditsFromDiscoverer(d discover.Discover) (*cdi.ContainerEdits, c := NewContainerEdits() for _, d := range devices { - edits, err := device(d).toEdits() + edits, err := device(d).toEdits(o.allowAdditionalGIDs) if err != nil { return nil, fmt.Errorf("failed to create container edits for device: %w", err) } diff --git a/internal/edits/options.go b/internal/edits/options.go index e9bfd1c5..128f4a53 100644 --- a/internal/edits/options.go +++ b/internal/edits/options.go @@ -19,7 +19,8 @@ package edits import "github.com/NVIDIA/nvidia-container-toolkit/internal/logger" type options struct { - logger logger.Interface + logger logger.Interface + allowAdditionalGIDs bool } type Option func(*options) @@ -29,3 +30,9 @@ func WithLogger(logger logger.Interface) Option { o.logger = logger } } + +func WithAllowAdditionalGIDs(allowAdditionalGIDs bool) Option { + return func(o *options) { + o.allowAdditionalGIDs = allowAdditionalGIDs + } +} diff --git a/internal/modifier/discover.go b/internal/modifier/discover.go index 0f0ec476..a590f1bc 100644 --- a/internal/modifier/discover.go +++ b/internal/modifier/discover.go @@ -28,16 +28,18 @@ import ( ) type discoverModifier struct { - logger logger.Interface - discoverer discover.Discover + logger logger.Interface + discoverer discover.Discover + allowAdditionalGIDs bool } // NewModifierFromDiscoverer creates a modifier that applies the discovered // modifications to an OCI spec if required by the runtime wrapper. -func NewModifierFromDiscoverer(logger logger.Interface, d discover.Discover) (oci.SpecModifier, error) { +func NewModifierFromDiscoverer(logger logger.Interface, d discover.Discover, allowAdditionalGIDs bool) (oci.SpecModifier, error) { m := discoverModifier{ - logger: logger, - discoverer: d, + logger: logger, + discoverer: d, + allowAdditionalGIDs: allowAdditionalGIDs, } return &m, nil } @@ -47,6 +49,7 @@ func NewModifierFromDiscoverer(logger logger.Interface, d discover.Discover) (oc func (m discoverModifier) Modify(spec *specs.Spec) error { e := edits.New( edits.WithLogger(m.logger), + edits.WithAllowAdditionalGIDs(m.allowAdditionalGIDs), ) specEdits, err := e.SpecModifierFromDiscoverer(m.discoverer) diff --git a/internal/modifier/discover_test.go b/internal/modifier/discover_test.go index 77c3ef17..ab1253ac 100644 --- a/internal/modifier/discover_test.go +++ b/internal/modifier/discover_test.go @@ -132,7 +132,7 @@ func TestDiscoverModifier(t *testing.T) { for _, tc := range testCases { t.Run(tc.description, func(t *testing.T) { - m, err := NewModifierFromDiscoverer(logger, tc.discover) + m, err := NewModifierFromDiscoverer(logger, tc.discover, true) require.NoError(t, err) err = m.Modify(tc.spec) diff --git a/internal/modifier/gated.go b/internal/modifier/gated.go index 5bed3eaf..d272d561 100644 --- a/internal/modifier/gated.go +++ b/internal/modifier/gated.go @@ -78,5 +78,7 @@ func NewFeatureGatedModifier(logger logger.Interface, cfg *config.Config, image discoverers = append(discoverers, d) } - return NewModifierFromDiscoverer(logger, discover.Merge(discoverers...)) + allowAdditionalGIDs := !cfg.Features.IsEnabled(config.FeatureAllowAdditionalGIDs, image) + + return NewModifierFromDiscoverer(logger, discover.Merge(discoverers...), allowAdditionalGIDs) } diff --git a/internal/modifier/graphics.go b/internal/modifier/graphics.go index d9784d5e..f590a3c8 100644 --- a/internal/modifier/graphics.go +++ b/internal/modifier/graphics.go @@ -62,7 +62,9 @@ func NewGraphicsModifier(logger logger.Interface, cfg *config.Config, image imag drmNodes, mounts, ) - return NewModifierFromDiscoverer(logger, d) + + allowAdditionalGIDs := cfg.Features.IsEnabled(config.FeatureAllowAdditionalGIDs, image) + return NewModifierFromDiscoverer(logger, d, allowAdditionalGIDs) } // requiresGraphicsModifier determines whether a graphics modifier is required. diff --git a/internal/oci/spec_mock.go b/internal/oci/spec_mock.go index f004d69c..ff8ff647 100644 --- a/internal/oci/spec_mock.go +++ b/internal/oci/spec_mock.go @@ -4,9 +4,8 @@ package oci import ( - "sync" - "github.com/opencontainers/runtime-spec/specs-go" + "sync" ) // Ensure, that SpecMock does implement Spec. diff --git a/pkg/nvcdi/lib.go b/pkg/nvcdi/lib.go index ac08c97f..ce6600c5 100644 --- a/pkg/nvcdi/lib.go +++ b/pkg/nvcdi/lib.go @@ -65,6 +65,8 @@ type nvcdilib struct { infolib info.Interface mergedDeviceOptions []transform.MergedDeviceOption + + allowAdditionalGIDs bool } // New creates a new nvcdi library @@ -196,6 +198,7 @@ func (m *wrapper) GetCommonEdits() (*cdi.ContainerEdits, error) { func (l *nvcdilib) editsFromDiscoverer(d discover.Discover) (*cdi.ContainerEdits, error) { e := edits.New( edits.WithLogger(l.logger), + edits.WithAllowAdditionalGIDs(l.allowAdditionalGIDs), ) return e.EditsFromDiscoverer(d) } diff --git a/pkg/nvcdi/namer_nvml_mock.go b/pkg/nvcdi/namer_nvml_mock.go index 6a704b45..f81a1eee 100644 --- a/pkg/nvcdi/namer_nvml_mock.go +++ b/pkg/nvcdi/namer_nvml_mock.go @@ -4,9 +4,8 @@ package nvcdi import ( - "sync" - "github.com/NVIDIA/go-nvml/pkg/nvml" + "sync" ) // Ensure, that nvmlUUIDerMock does implement nvmlUUIDer. diff --git a/pkg/nvcdi/options.go b/pkg/nvcdi/options.go index 417687b9..d35566a3 100644 --- a/pkg/nvcdi/options.go +++ b/pkg/nvcdi/options.go @@ -155,3 +155,11 @@ func WithLibrarySearchPaths(paths []string) Option { o.librarySearchPaths = paths } } + +// WithAllowAdditionalGIDs specifies whether a generated CDI spec can contain +// the additionaGIDs field. +func WithAllowAdditionalGIDs(allowAdditionalGIDs bool) Option { + return func(o *nvcdilib) { + o.allowAdditionalGIDs = allowAdditionalGIDs + } +} diff --git a/pkg/nvcdi/transform/deduplicate.go b/pkg/nvcdi/transform/deduplicate.go index 27be1b67..c7eace03 100644 --- a/pkg/nvcdi/transform/deduplicate.go +++ b/pkg/nvcdi/transform/deduplicate.go @@ -17,6 +17,8 @@ package transform import ( + "slices" + "tags.cncf.io/container-device-interface/specs-go" ) @@ -50,6 +52,12 @@ func (d dedupe) Transform(spec *specs.Spec) error { } func (d dedupe) transformEdits(edits *specs.ContainerEdits) error { + additionalGIDs, err := d.deduplicateAdditionalGIDs(edits.AdditionalGIDs) + if err != nil { + return err + } + edits.AdditionalGIDs = additionalGIDs + deviceNodes, err := d.deduplicateDeviceNodes(edits.DeviceNodes) if err != nil { return err @@ -150,3 +158,19 @@ func (d dedupe) deduplicateMounts(entities []*specs.Mount) ([]*specs.Mount, erro } return mounts, nil } + +func (d dedupe) deduplicateAdditionalGIDs(entities []uint32) ([]uint32, error) { + seen := make(map[uint32]bool) + var additionalGIDs []uint32 + for _, e := range entities { + if seen[e] { + continue + } + seen[e] = true + additionalGIDs = append(additionalGIDs, e) + } + + slices.Sort(additionalGIDs) + + return additionalGIDs, nil +}