diff --git a/CHANGELOG.md b/CHANGELOG.md index 44b74bb0..d2d515f5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ * Create file in `etc/ld.so.conf.d` with permissions `644` to support non-root containers. * Generate CDI specification files with `644` permissions to allow rootless applications (e.g. podman) * Add `nvidia-ctk cdi list` command to show the known CDI devices. +* Add support for generating merged devices (e.g. `all` device) to the nvcdi API. ## v1.13.1 diff --git a/cmd/nvidia-ctk/cdi/generate/generate.go b/cmd/nvidia-ctk/cdi/generate/generate.go index b88c9234..7d27685a 100644 --- a/cmd/nvidia-ctk/cdi/generate/generate.go +++ b/cmd/nvidia-ctk/cdi/generate/generate.go @@ -23,11 +23,10 @@ import ( "strings" "github.com/NVIDIA/nvidia-container-toolkit/internal/config" - "github.com/NVIDIA/nvidia-container-toolkit/internal/edits" "github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi" "github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/spec" + "github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/transform" "github.com/container-orchestrated-devices/container-device-interface/pkg/cdi" - specs "github.com/container-orchestrated-devices/container-device-interface/specs-go" "github.com/sirupsen/logrus" "github.com/urfave/cli/v2" ) @@ -225,20 +224,6 @@ func (m command) generateSpec(opts *options) (spec.Interface, error) { if err != nil { return nil, fmt.Errorf("failed to create device CDI specs: %v", err) } - var hasAll bool - for _, deviceSpec := range deviceSpecs { - if deviceSpec.Name == allDeviceName { - hasAll = true - break - } - } - if !hasAll { - allDevice, err := MergeDeviceSpecs(deviceSpecs, allDeviceName) - if err != nil { - return nil, fmt.Errorf("failed to create CDI specification for %q device: %v", allDeviceName, err) - } - deviceSpecs = append(deviceSpecs, allDevice) - } commonEdits, err := cdilib.GetCommonEdits() if err != nil { @@ -251,34 +236,10 @@ func (m command) generateSpec(opts *options) (spec.Interface, error) { spec.WithDeviceSpecs(deviceSpecs), spec.WithEdits(*commonEdits.ContainerEdits), spec.WithFormat(opts.format), + spec.WithMergedDeviceOptions( + transform.WithName(allDeviceName), + transform.WithSkipIfExists(true), + ), spec.WithPermissions(0644), ) } - -// MergeDeviceSpecs creates a device with the specified name which combines the edits from the previous devices. -// If a device of the specified name already exists, an error is returned. -func MergeDeviceSpecs(deviceSpecs []specs.Device, mergedDeviceName string) (specs.Device, error) { - if err := cdi.ValidateDeviceName(mergedDeviceName); err != nil { - return specs.Device{}, fmt.Errorf("invalid device name %q: %v", mergedDeviceName, err) - } - for _, d := range deviceSpecs { - if d.Name == mergedDeviceName { - return specs.Device{}, fmt.Errorf("device %q already exists", mergedDeviceName) - } - } - - mergedEdits := edits.NewContainerEdits() - - for _, d := range deviceSpecs { - edit := cdi.ContainerEdits{ - ContainerEdits: &d.ContainerEdits, - } - mergedEdits.Append(&edit) - } - - merged := specs.Device{ - Name: mergedDeviceName, - ContainerEdits: *mergedEdits.ContainerEdits, - } - return merged, nil -} diff --git a/cmd/nvidia-ctk/cdi/generate/generate_test.go b/cmd/nvidia-ctk/cdi/generate/generate_test.go deleted file mode 100644 index 5924480e..00000000 --- a/cmd/nvidia-ctk/cdi/generate/generate_test.go +++ /dev/null @@ -1,117 +0,0 @@ -/** -# Copyright (c) NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -**/ - -package generate - -import ( - "fmt" - "testing" - - "github.com/container-orchestrated-devices/container-device-interface/specs-go" - "github.com/stretchr/testify/require" -) - -func TestMergeDeviceSpecs(t *testing.T) { - testCases := []struct { - description string - deviceSpecs []specs.Device - mergedDeviceName string - expectedError error - expected specs.Device - }{ - { - description: "no devices", - mergedDeviceName: "all", - expected: specs.Device{ - Name: "all", - }, - }, - { - description: "one device", - mergedDeviceName: "all", - deviceSpecs: []specs.Device{ - { - Name: "gpu0", - ContainerEdits: specs.ContainerEdits{ - Env: []string{"GPU=0"}, - }, - }, - }, - expected: specs.Device{ - Name: "all", - ContainerEdits: specs.ContainerEdits{ - Env: []string{"GPU=0"}, - }, - }, - }, - { - description: "two devices", - mergedDeviceName: "all", - deviceSpecs: []specs.Device{ - { - Name: "gpu0", - ContainerEdits: specs.ContainerEdits{ - Env: []string{"GPU=0"}, - }, - }, - { - Name: "gpu1", - ContainerEdits: specs.ContainerEdits{ - Env: []string{"GPU=1"}, - }, - }, - }, - expected: specs.Device{ - Name: "all", - ContainerEdits: specs.ContainerEdits{ - Env: []string{"GPU=0", "GPU=1"}, - }, - }, - }, - { - description: "has merged device", - mergedDeviceName: "gpu0", - deviceSpecs: []specs.Device{ - { - Name: "gpu0", - ContainerEdits: specs.ContainerEdits{ - Env: []string{"GPU=0"}, - }, - }, - }, - expectedError: fmt.Errorf("device %q already exists", "gpu0"), - }, - { - description: "invalid merged device name", - mergedDeviceName: ".-not-valid", - expectedError: fmt.Errorf("invalid device name %q", ".-not-valid"), - }, - } - - for _, tc := range testCases { - t.Run(tc.description, func(t *testing.T) { - mergedDevice, err := MergeDeviceSpecs(tc.deviceSpecs, tc.mergedDeviceName) - - if tc.expectedError != nil { - require.Error(t, err) - return - } - - require.NoError(t, err) - require.EqualValues(t, tc.expected, mergedDevice) - }) - } -} diff --git a/pkg/nvcdi/lib.go b/pkg/nvcdi/lib.go index d692b9ff..fd5e2e54 100644 --- a/pkg/nvcdi/lib.go +++ b/pkg/nvcdi/lib.go @@ -20,6 +20,7 @@ import ( "fmt" "github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/spec" + "github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/transform" "github.com/sirupsen/logrus" "gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/device" "gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/info" @@ -31,6 +32,8 @@ type wrapper struct { vendor string class string + + mergedDeviceOptions []transform.MergedDeviceOption } type nvcdilib struct { @@ -46,6 +49,8 @@ type nvcdilib struct { class string infolib info.Interface + + mergedDeviceOptions []transform.MergedDeviceOption } // New creates a new nvcdi library @@ -106,9 +111,10 @@ func New(opts ...Option) (Interface, error) { } w := wrapper{ - Interface: lib, - vendor: l.vendor, - class: l.class, + Interface: lib, + vendor: l.vendor, + class: l.class, + mergedDeviceOptions: l.mergedDeviceOptions, } return &w, nil } @@ -130,8 +136,8 @@ func (l *wrapper) GetSpec() (spec.Interface, error) { spec.WithEdits(*edits.ContainerEdits), spec.WithVendor(l.vendor), spec.WithClass(l.class), + spec.WithMergedDeviceOptions(l.mergedDeviceOptions...), ) - } // resolveMode resolves the mode for CDI spec generation based on the current system. diff --git a/pkg/nvcdi/options.go b/pkg/nvcdi/options.go index 19aef93c..254c6e0e 100644 --- a/pkg/nvcdi/options.go +++ b/pkg/nvcdi/options.go @@ -17,6 +17,7 @@ package nvcdi import ( + "github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/transform" "github.com/sirupsen/logrus" "gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/device" "gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvml" @@ -87,3 +88,11 @@ func WithClass(class string) Option { o.class = class } } + +// WithMergedDeviceOptions sets the merged device options for the library +// If these are not set, no merged device will be generated. +func WithMergedDeviceOptions(opts ...transform.MergedDeviceOption) Option { + return func(o *nvcdilib) { + o.mergedDeviceOptions = opts + } +} diff --git a/pkg/nvcdi/spec/builder.go b/pkg/nvcdi/spec/builder.go index a0158132..0a867a55 100644 --- a/pkg/nvcdi/spec/builder.go +++ b/pkg/nvcdi/spec/builder.go @@ -33,8 +33,10 @@ type builder struct { deviceSpecs []specs.Device edits specs.ContainerEdits format string - noSimplify bool - permissions os.FileMode + + mergedDeviceOptions []transform.MergedDeviceOption + noSimplify bool + permissions os.FileMode } // newBuilder creates a new spec builder with the supplied options @@ -95,6 +97,16 @@ func (o *builder) Build() (*spec, error) { } } + if len(o.mergedDeviceOptions) > 0 { + merge, err := transform.NewMergedDevice(o.mergedDeviceOptions...) + if err != nil { + return nil, fmt.Errorf("failed to create merged device transformer: %v", err) + } + if err := merge.Transform(raw); err != nil { + return nil, fmt.Errorf("failed to merge devices: %v", err) + } + } + s := spec{ Spec: raw, format: o.format, @@ -169,3 +181,10 @@ func WithPermissions(permissions os.FileMode) Option { o.permissions = permissions } } + +// WithMergedDeviceOptions sets the options for generating a merged device. +func WithMergedDeviceOptions(opts ...transform.MergedDeviceOption) Option { + return func(o *builder) { + o.mergedDeviceOptions = opts + } +} diff --git a/pkg/nvcdi/transform/merged-device.go b/pkg/nvcdi/transform/merged-device.go new file mode 100644 index 00000000..4b744155 --- /dev/null +++ b/pkg/nvcdi/transform/merged-device.go @@ -0,0 +1,123 @@ +/** +# Copyright (c) NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package transform + +import ( + "fmt" + + "github.com/NVIDIA/nvidia-container-toolkit/internal/edits" + "github.com/container-orchestrated-devices/container-device-interface/pkg/cdi" + "github.com/container-orchestrated-devices/container-device-interface/specs-go" +) + +const ( + allDeviceName = "all" +) + +type mergedDevice struct { + name string + skipIfExists bool + simplifier Transformer +} + +var _ Transformer = (*mergedDevice)(nil) + +// MergedDeviceOption is a function that configures a merged device +type MergedDeviceOption func(*mergedDevice) + +// WithName sets the name of the merged device +func WithName(name string) MergedDeviceOption { + return func(m *mergedDevice) { + m.name = name + } +} + +// WithSkipIfExists sets whether to skip adding the merged device if it already exists +func WithSkipIfExists(skipIfExists bool) MergedDeviceOption { + return func(m *mergedDevice) { + m.skipIfExists = skipIfExists + } +} + +// NewMergedDevice creates a transformer with the specified options +func NewMergedDevice(opts ...MergedDeviceOption) (Transformer, error) { + m := &mergedDevice{} + for _, opt := range opts { + opt(m) + } + if m.name == "" { + m.name = allDeviceName + } + m.simplifier = NewSimplifier() + + if err := cdi.ValidateDeviceName(m.name); err != nil { + return nil, fmt.Errorf("invalid device name %q: %v", m.name, err) + } + + return m, nil +} + +// Transform adds a merged device to the spec +func (m mergedDevice) Transform(spec *specs.Spec) error { + if spec == nil { + return nil + } + + mergedDevice, err := mergeDeviceSpecs(spec.Devices, m.name) + if err != nil { + return fmt.Errorf("failed to generate merged device %q: %v", m.name, err) + } + if mergedDevice == nil { + if m.skipIfExists { + return nil + } + return fmt.Errorf("device %q already exists", m.name) + } + + spec.Devices = append(spec.Devices, *mergedDevice) + + if err := m.simplifier.Transform(spec); err != nil { + return fmt.Errorf("failed to simplify spec after merging device %q: %v", m.name, err) + } + + return nil +} + +// mergeDeviceSpecs creates a device with the specified name which combines the edits from the previous devices. +// If a device of the specified name already exists, no device is created and nil is returned. +func mergeDeviceSpecs(deviceSpecs []specs.Device, mergedDeviceName string) (*specs.Device, error) { + for _, d := range deviceSpecs { + if d.Name == mergedDeviceName { + return nil, nil + } + } + + mergedEdits := edits.NewContainerEdits() + + for _, d := range deviceSpecs { + edit := cdi.ContainerEdits{ + ContainerEdits: &d.ContainerEdits, + } + mergedEdits.Append(&edit) + } + + merged := specs.Device{ + Name: mergedDeviceName, + ContainerEdits: *mergedEdits.ContainerEdits, + } + return &merged, nil +} diff --git a/pkg/nvcdi/transform/merged-device_test.go b/pkg/nvcdi/transform/merged-device_test.go new file mode 100644 index 00000000..d8ef7244 --- /dev/null +++ b/pkg/nvcdi/transform/merged-device_test.go @@ -0,0 +1,222 @@ +/** +# Copyright (c) NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package transform + +import ( + "fmt" + "testing" + + "github.com/container-orchestrated-devices/container-device-interface/specs-go" + "github.com/stretchr/testify/require" +) + +func TestMergeDeviceSpecs(t *testing.T) { + testCases := []struct { + description string + deviceSpecs []specs.Device + mergedDeviceName string + createError error + expectedError error + expected *specs.Device + }{ + { + description: "no devices", + mergedDeviceName: "all", + expected: &specs.Device{ + Name: "all", + }, + }, + { + description: "one device", + mergedDeviceName: "all", + deviceSpecs: []specs.Device{ + { + Name: "gpu0", + ContainerEdits: specs.ContainerEdits{ + Env: []string{"GPU=0"}, + }, + }, + }, + expected: &specs.Device{ + Name: "all", + ContainerEdits: specs.ContainerEdits{ + Env: []string{"GPU=0"}, + }, + }, + }, + { + description: "two devices", + mergedDeviceName: "all", + deviceSpecs: []specs.Device{ + { + Name: "gpu0", + ContainerEdits: specs.ContainerEdits{ + Env: []string{"GPU=0"}, + }, + }, + { + Name: "gpu1", + ContainerEdits: specs.ContainerEdits{ + Env: []string{"GPU=1"}, + }, + }, + }, + expected: &specs.Device{ + Name: "all", + ContainerEdits: specs.ContainerEdits{ + Env: []string{"GPU=0", "GPU=1"}, + }, + }, + }, + { + description: "has merged device", + mergedDeviceName: "gpu0", + deviceSpecs: []specs.Device{ + { + Name: "gpu0", + ContainerEdits: specs.ContainerEdits{ + Env: []string{"GPU=0"}, + }, + }, + }, + }, + { + description: "invalid merged device name", + mergedDeviceName: ".-not-valid", + createError: fmt.Errorf("invalid device name %q", ".-not-valid"), + }, + } + + for _, tc := range testCases { + t.Run(tc.description, func(t *testing.T) { + _, err := NewMergedDevice( + WithName(tc.mergedDeviceName), + ) + if tc.createError != nil { + require.Error(t, err) + return + } + require.NoError(t, err) + + device, err := mergeDeviceSpecs(tc.deviceSpecs, tc.mergedDeviceName) + if tc.expectedError != nil { + require.Error(t, err) + return + } + require.NoError(t, err) + + require.EqualValues(t, tc.expected, device) + }) + } +} + +func TestMergedDevice(t *testing.T) { + testCases := []struct { + description string + spec *specs.Spec + expectedError error + expectedSpec *specs.Spec + }{ + { + description: "duplicate hooks are removed", + spec: &specs.Spec{ + Devices: []specs.Device{ + { + Name: "gpu0", + ContainerEdits: specs.ContainerEdits{ + Hooks: []*specs.Hook{ + { + HookName: "createContainer", + Path: "/usr/bin/nvidia-ctk", + Args: []string{"nvidia-ctk", "hook", "chmod", "--mode", "755", "--path", "/dev/dri"}, + }, + }, + }, + }, + { + Name: "gpu1", + ContainerEdits: specs.ContainerEdits{ + Hooks: []*specs.Hook{ + { + HookName: "createContainer", + Path: "/usr/bin/nvidia-ctk", + Args: []string{"nvidia-ctk", "hook", "chmod", "--mode", "755", "--path", "/dev/dri"}, + }, + }, + }, + }, + }, + }, + expectedSpec: &specs.Spec{ + Devices: []specs.Device{ + { + Name: "gpu0", + ContainerEdits: specs.ContainerEdits{ + Hooks: []*specs.Hook{ + { + HookName: "createContainer", + Path: "/usr/bin/nvidia-ctk", + Args: []string{"nvidia-ctk", "hook", "chmod", "--mode", "755", "--path", "/dev/dri"}, + }, + }, + }, + }, + { + Name: "gpu1", + ContainerEdits: specs.ContainerEdits{ + Hooks: []*specs.Hook{ + { + HookName: "createContainer", + Path: "/usr/bin/nvidia-ctk", + Args: []string{"nvidia-ctk", "hook", "chmod", "--mode", "755", "--path", "/dev/dri"}, + }, + }, + }, + }, + { + Name: "all", + ContainerEdits: specs.ContainerEdits{ + Hooks: []*specs.Hook{ + { + HookName: "createContainer", + Path: "/usr/bin/nvidia-ctk", + Args: []string{"nvidia-ctk", "hook", "chmod", "--mode", "755", "--path", "/dev/dri"}, + }, + }, + }, + }, + }, + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.description, func(t *testing.T) { + m, err := NewMergedDevice() + require.NoError(t, err) + + err = m.Transform(tc.spec) + if tc.expectedError != nil { + require.Error(t, err) + return + } + require.NoError(t, err) + + require.EqualValues(t, tc.expectedSpec, tc.spec) + }) + } +} diff --git a/pkg/nvcdi/transform/simplify_test.go b/pkg/nvcdi/transform/simplify_test.go index abe4758a..8b312c20 100644 --- a/pkg/nvcdi/transform/simplify_test.go +++ b/pkg/nvcdi/transform/simplify_test.go @@ -105,6 +105,94 @@ func TestSimplify(t *testing.T) { }, }, }, + { + description: "simplify removes hooks", + spec: &specs.Spec{ + Devices: []specs.Device{ + { + Name: "gpu0", + ContainerEdits: specs.ContainerEdits{ + Hooks: []*specs.Hook{ + { + HookName: "createContainer", + Path: "/usr/bin/nvidia-ctk", + Args: []string{"nvidia-ctk", "hook", "chmod", "--mode", "755", "--path", "/dev/dri"}, + }, + }, + }, + }, + { + Name: "gpu1", + ContainerEdits: specs.ContainerEdits{ + Hooks: []*specs.Hook{ + { + HookName: "createContainer", + Path: "/usr/bin/nvidia-ctk", + Args: []string{"nvidia-ctk", "hook", "chmod", "--mode", "755", "--path", "/dev/dri"}, + }, + }, + }, + }, + { + Name: "all", + ContainerEdits: specs.ContainerEdits{ + Hooks: []*specs.Hook{ + { + HookName: "createContainer", + Path: "/usr/bin/nvidia-ctk", + Args: []string{"nvidia-ctk", "hook", "chmod", "--mode", "755", "--path", "/dev/dri"}, + }, + { + HookName: "createContainer", + Path: "/usr/bin/nvidia-ctk", + Args: []string{"nvidia-ctk", "hook", "chmod", "--mode", "755", "--path", "/dev/dri"}, + }, + }, + }, + }, + }, + }, + expectedSpec: &specs.Spec{ + Devices: []specs.Device{ + { + Name: "gpu0", + ContainerEdits: specs.ContainerEdits{ + Hooks: []*specs.Hook{ + { + HookName: "createContainer", + Path: "/usr/bin/nvidia-ctk", + Args: []string{"nvidia-ctk", "hook", "chmod", "--mode", "755", "--path", "/dev/dri"}, + }, + }, + }, + }, + { + Name: "gpu1", + ContainerEdits: specs.ContainerEdits{ + Hooks: []*specs.Hook{ + { + HookName: "createContainer", + Path: "/usr/bin/nvidia-ctk", + Args: []string{"nvidia-ctk", "hook", "chmod", "--mode", "755", "--path", "/dev/dri"}, + }, + }, + }, + }, + { + Name: "all", + ContainerEdits: specs.ContainerEdits{ + Hooks: []*specs.Hook{ + { + HookName: "createContainer", + Path: "/usr/bin/nvidia-ctk", + Args: []string{"nvidia-ctk", "hook", "chmod", "--mode", "755", "--path", "/dev/dri"}, + }, + }, + }, + }, + }, + }, + }, } for _, tc := range testCases {