Merge branch 'CNT-3939/generate-all-device' into 'main'

Add options to generate all device to nvcdi API

See merge request nvidia/container-toolkit/container-toolkit!348
This commit is contained in:
Evan Lezar 2023-05-22 11:53:39 +00:00
commit c120c511d5
9 changed files with 479 additions and 167 deletions

View File

@ -6,6 +6,7 @@
* Create file in `etc/ld.so.conf.d` with permissions `644` to support non-root containers.
* Generate CDI specification files with `644` permissions to allow rootless applications (e.g. podman)
* Add `nvidia-ctk cdi list` command to show the known CDI devices.
* Add support for generating merged devices (e.g. `all` device) to the nvcdi API.
## v1.13.1

View File

@ -23,11 +23,10 @@ import (
"strings"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
"github.com/NVIDIA/nvidia-container-toolkit/internal/edits"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/spec"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/transform"
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
specs "github.com/container-orchestrated-devices/container-device-interface/specs-go"
"github.com/sirupsen/logrus"
"github.com/urfave/cli/v2"
)
@ -225,20 +224,6 @@ func (m command) generateSpec(opts *options) (spec.Interface, error) {
if err != nil {
return nil, fmt.Errorf("failed to create device CDI specs: %v", err)
}
var hasAll bool
for _, deviceSpec := range deviceSpecs {
if deviceSpec.Name == allDeviceName {
hasAll = true
break
}
}
if !hasAll {
allDevice, err := MergeDeviceSpecs(deviceSpecs, allDeviceName)
if err != nil {
return nil, fmt.Errorf("failed to create CDI specification for %q device: %v", allDeviceName, err)
}
deviceSpecs = append(deviceSpecs, allDevice)
}
commonEdits, err := cdilib.GetCommonEdits()
if err != nil {
@ -251,34 +236,10 @@ func (m command) generateSpec(opts *options) (spec.Interface, error) {
spec.WithDeviceSpecs(deviceSpecs),
spec.WithEdits(*commonEdits.ContainerEdits),
spec.WithFormat(opts.format),
spec.WithMergedDeviceOptions(
transform.WithName(allDeviceName),
transform.WithSkipIfExists(true),
),
spec.WithPermissions(0644),
)
}
// MergeDeviceSpecs creates a device with the specified name which combines the edits from the previous devices.
// If a device of the specified name already exists, an error is returned.
func MergeDeviceSpecs(deviceSpecs []specs.Device, mergedDeviceName string) (specs.Device, error) {
if err := cdi.ValidateDeviceName(mergedDeviceName); err != nil {
return specs.Device{}, fmt.Errorf("invalid device name %q: %v", mergedDeviceName, err)
}
for _, d := range deviceSpecs {
if d.Name == mergedDeviceName {
return specs.Device{}, fmt.Errorf("device %q already exists", mergedDeviceName)
}
}
mergedEdits := edits.NewContainerEdits()
for _, d := range deviceSpecs {
edit := cdi.ContainerEdits{
ContainerEdits: &d.ContainerEdits,
}
mergedEdits.Append(&edit)
}
merged := specs.Device{
Name: mergedDeviceName,
ContainerEdits: *mergedEdits.ContainerEdits,
}
return merged, nil
}

View File

@ -1,117 +0,0 @@
/**
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package generate
import (
"fmt"
"testing"
"github.com/container-orchestrated-devices/container-device-interface/specs-go"
"github.com/stretchr/testify/require"
)
func TestMergeDeviceSpecs(t *testing.T) {
testCases := []struct {
description string
deviceSpecs []specs.Device
mergedDeviceName string
expectedError error
expected specs.Device
}{
{
description: "no devices",
mergedDeviceName: "all",
expected: specs.Device{
Name: "all",
},
},
{
description: "one device",
mergedDeviceName: "all",
deviceSpecs: []specs.Device{
{
Name: "gpu0",
ContainerEdits: specs.ContainerEdits{
Env: []string{"GPU=0"},
},
},
},
expected: specs.Device{
Name: "all",
ContainerEdits: specs.ContainerEdits{
Env: []string{"GPU=0"},
},
},
},
{
description: "two devices",
mergedDeviceName: "all",
deviceSpecs: []specs.Device{
{
Name: "gpu0",
ContainerEdits: specs.ContainerEdits{
Env: []string{"GPU=0"},
},
},
{
Name: "gpu1",
ContainerEdits: specs.ContainerEdits{
Env: []string{"GPU=1"},
},
},
},
expected: specs.Device{
Name: "all",
ContainerEdits: specs.ContainerEdits{
Env: []string{"GPU=0", "GPU=1"},
},
},
},
{
description: "has merged device",
mergedDeviceName: "gpu0",
deviceSpecs: []specs.Device{
{
Name: "gpu0",
ContainerEdits: specs.ContainerEdits{
Env: []string{"GPU=0"},
},
},
},
expectedError: fmt.Errorf("device %q already exists", "gpu0"),
},
{
description: "invalid merged device name",
mergedDeviceName: ".-not-valid",
expectedError: fmt.Errorf("invalid device name %q", ".-not-valid"),
},
}
for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) {
mergedDevice, err := MergeDeviceSpecs(tc.deviceSpecs, tc.mergedDeviceName)
if tc.expectedError != nil {
require.Error(t, err)
return
}
require.NoError(t, err)
require.EqualValues(t, tc.expected, mergedDevice)
})
}
}

View File

@ -20,6 +20,7 @@ import (
"fmt"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/spec"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/transform"
"github.com/sirupsen/logrus"
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/device"
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/info"
@ -31,6 +32,8 @@ type wrapper struct {
vendor string
class string
mergedDeviceOptions []transform.MergedDeviceOption
}
type nvcdilib struct {
@ -46,6 +49,8 @@ type nvcdilib struct {
class string
infolib info.Interface
mergedDeviceOptions []transform.MergedDeviceOption
}
// New creates a new nvcdi library
@ -106,9 +111,10 @@ func New(opts ...Option) (Interface, error) {
}
w := wrapper{
Interface: lib,
vendor: l.vendor,
class: l.class,
Interface: lib,
vendor: l.vendor,
class: l.class,
mergedDeviceOptions: l.mergedDeviceOptions,
}
return &w, nil
}
@ -130,8 +136,8 @@ func (l *wrapper) GetSpec() (spec.Interface, error) {
spec.WithEdits(*edits.ContainerEdits),
spec.WithVendor(l.vendor),
spec.WithClass(l.class),
spec.WithMergedDeviceOptions(l.mergedDeviceOptions...),
)
}
// resolveMode resolves the mode for CDI spec generation based on the current system.

View File

@ -17,6 +17,7 @@
package nvcdi
import (
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/transform"
"github.com/sirupsen/logrus"
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/device"
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvml"
@ -87,3 +88,11 @@ func WithClass(class string) Option {
o.class = class
}
}
// WithMergedDeviceOptions sets the merged device options for the library
// If these are not set, no merged device will be generated.
func WithMergedDeviceOptions(opts ...transform.MergedDeviceOption) Option {
return func(o *nvcdilib) {
o.mergedDeviceOptions = opts
}
}

View File

@ -33,8 +33,10 @@ type builder struct {
deviceSpecs []specs.Device
edits specs.ContainerEdits
format string
noSimplify bool
permissions os.FileMode
mergedDeviceOptions []transform.MergedDeviceOption
noSimplify bool
permissions os.FileMode
}
// newBuilder creates a new spec builder with the supplied options
@ -95,6 +97,16 @@ func (o *builder) Build() (*spec, error) {
}
}
if len(o.mergedDeviceOptions) > 0 {
merge, err := transform.NewMergedDevice(o.mergedDeviceOptions...)
if err != nil {
return nil, fmt.Errorf("failed to create merged device transformer: %v", err)
}
if err := merge.Transform(raw); err != nil {
return nil, fmt.Errorf("failed to merge devices: %v", err)
}
}
s := spec{
Spec: raw,
format: o.format,
@ -169,3 +181,10 @@ func WithPermissions(permissions os.FileMode) Option {
o.permissions = permissions
}
}
// WithMergedDeviceOptions sets the options for generating a merged device.
func WithMergedDeviceOptions(opts ...transform.MergedDeviceOption) Option {
return func(o *builder) {
o.mergedDeviceOptions = opts
}
}

View File

@ -0,0 +1,123 @@
/**
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package transform
import (
"fmt"
"github.com/NVIDIA/nvidia-container-toolkit/internal/edits"
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
"github.com/container-orchestrated-devices/container-device-interface/specs-go"
)
const (
allDeviceName = "all"
)
type mergedDevice struct {
name string
skipIfExists bool
simplifier Transformer
}
var _ Transformer = (*mergedDevice)(nil)
// MergedDeviceOption is a function that configures a merged device
type MergedDeviceOption func(*mergedDevice)
// WithName sets the name of the merged device
func WithName(name string) MergedDeviceOption {
return func(m *mergedDevice) {
m.name = name
}
}
// WithSkipIfExists sets whether to skip adding the merged device if it already exists
func WithSkipIfExists(skipIfExists bool) MergedDeviceOption {
return func(m *mergedDevice) {
m.skipIfExists = skipIfExists
}
}
// NewMergedDevice creates a transformer with the specified options
func NewMergedDevice(opts ...MergedDeviceOption) (Transformer, error) {
m := &mergedDevice{}
for _, opt := range opts {
opt(m)
}
if m.name == "" {
m.name = allDeviceName
}
m.simplifier = NewSimplifier()
if err := cdi.ValidateDeviceName(m.name); err != nil {
return nil, fmt.Errorf("invalid device name %q: %v", m.name, err)
}
return m, nil
}
// Transform adds a merged device to the spec
func (m mergedDevice) Transform(spec *specs.Spec) error {
if spec == nil {
return nil
}
mergedDevice, err := mergeDeviceSpecs(spec.Devices, m.name)
if err != nil {
return fmt.Errorf("failed to generate merged device %q: %v", m.name, err)
}
if mergedDevice == nil {
if m.skipIfExists {
return nil
}
return fmt.Errorf("device %q already exists", m.name)
}
spec.Devices = append(spec.Devices, *mergedDevice)
if err := m.simplifier.Transform(spec); err != nil {
return fmt.Errorf("failed to simplify spec after merging device %q: %v", m.name, err)
}
return nil
}
// mergeDeviceSpecs creates a device with the specified name which combines the edits from the previous devices.
// If a device of the specified name already exists, no device is created and nil is returned.
func mergeDeviceSpecs(deviceSpecs []specs.Device, mergedDeviceName string) (*specs.Device, error) {
for _, d := range deviceSpecs {
if d.Name == mergedDeviceName {
return nil, nil
}
}
mergedEdits := edits.NewContainerEdits()
for _, d := range deviceSpecs {
edit := cdi.ContainerEdits{
ContainerEdits: &d.ContainerEdits,
}
mergedEdits.Append(&edit)
}
merged := specs.Device{
Name: mergedDeviceName,
ContainerEdits: *mergedEdits.ContainerEdits,
}
return &merged, nil
}

View File

@ -0,0 +1,222 @@
/**
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package transform
import (
"fmt"
"testing"
"github.com/container-orchestrated-devices/container-device-interface/specs-go"
"github.com/stretchr/testify/require"
)
func TestMergeDeviceSpecs(t *testing.T) {
testCases := []struct {
description string
deviceSpecs []specs.Device
mergedDeviceName string
createError error
expectedError error
expected *specs.Device
}{
{
description: "no devices",
mergedDeviceName: "all",
expected: &specs.Device{
Name: "all",
},
},
{
description: "one device",
mergedDeviceName: "all",
deviceSpecs: []specs.Device{
{
Name: "gpu0",
ContainerEdits: specs.ContainerEdits{
Env: []string{"GPU=0"},
},
},
},
expected: &specs.Device{
Name: "all",
ContainerEdits: specs.ContainerEdits{
Env: []string{"GPU=0"},
},
},
},
{
description: "two devices",
mergedDeviceName: "all",
deviceSpecs: []specs.Device{
{
Name: "gpu0",
ContainerEdits: specs.ContainerEdits{
Env: []string{"GPU=0"},
},
},
{
Name: "gpu1",
ContainerEdits: specs.ContainerEdits{
Env: []string{"GPU=1"},
},
},
},
expected: &specs.Device{
Name: "all",
ContainerEdits: specs.ContainerEdits{
Env: []string{"GPU=0", "GPU=1"},
},
},
},
{
description: "has merged device",
mergedDeviceName: "gpu0",
deviceSpecs: []specs.Device{
{
Name: "gpu0",
ContainerEdits: specs.ContainerEdits{
Env: []string{"GPU=0"},
},
},
},
},
{
description: "invalid merged device name",
mergedDeviceName: ".-not-valid",
createError: fmt.Errorf("invalid device name %q", ".-not-valid"),
},
}
for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) {
_, err := NewMergedDevice(
WithName(tc.mergedDeviceName),
)
if tc.createError != nil {
require.Error(t, err)
return
}
require.NoError(t, err)
device, err := mergeDeviceSpecs(tc.deviceSpecs, tc.mergedDeviceName)
if tc.expectedError != nil {
require.Error(t, err)
return
}
require.NoError(t, err)
require.EqualValues(t, tc.expected, device)
})
}
}
func TestMergedDevice(t *testing.T) {
testCases := []struct {
description string
spec *specs.Spec
expectedError error
expectedSpec *specs.Spec
}{
{
description: "duplicate hooks are removed",
spec: &specs.Spec{
Devices: []specs.Device{
{
Name: "gpu0",
ContainerEdits: specs.ContainerEdits{
Hooks: []*specs.Hook{
{
HookName: "createContainer",
Path: "/usr/bin/nvidia-ctk",
Args: []string{"nvidia-ctk", "hook", "chmod", "--mode", "755", "--path", "/dev/dri"},
},
},
},
},
{
Name: "gpu1",
ContainerEdits: specs.ContainerEdits{
Hooks: []*specs.Hook{
{
HookName: "createContainer",
Path: "/usr/bin/nvidia-ctk",
Args: []string{"nvidia-ctk", "hook", "chmod", "--mode", "755", "--path", "/dev/dri"},
},
},
},
},
},
},
expectedSpec: &specs.Spec{
Devices: []specs.Device{
{
Name: "gpu0",
ContainerEdits: specs.ContainerEdits{
Hooks: []*specs.Hook{
{
HookName: "createContainer",
Path: "/usr/bin/nvidia-ctk",
Args: []string{"nvidia-ctk", "hook", "chmod", "--mode", "755", "--path", "/dev/dri"},
},
},
},
},
{
Name: "gpu1",
ContainerEdits: specs.ContainerEdits{
Hooks: []*specs.Hook{
{
HookName: "createContainer",
Path: "/usr/bin/nvidia-ctk",
Args: []string{"nvidia-ctk", "hook", "chmod", "--mode", "755", "--path", "/dev/dri"},
},
},
},
},
{
Name: "all",
ContainerEdits: specs.ContainerEdits{
Hooks: []*specs.Hook{
{
HookName: "createContainer",
Path: "/usr/bin/nvidia-ctk",
Args: []string{"nvidia-ctk", "hook", "chmod", "--mode", "755", "--path", "/dev/dri"},
},
},
},
},
},
},
},
}
for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) {
m, err := NewMergedDevice()
require.NoError(t, err)
err = m.Transform(tc.spec)
if tc.expectedError != nil {
require.Error(t, err)
return
}
require.NoError(t, err)
require.EqualValues(t, tc.expectedSpec, tc.spec)
})
}
}

View File

@ -105,6 +105,94 @@ func TestSimplify(t *testing.T) {
},
},
},
{
description: "simplify removes hooks",
spec: &specs.Spec{
Devices: []specs.Device{
{
Name: "gpu0",
ContainerEdits: specs.ContainerEdits{
Hooks: []*specs.Hook{
{
HookName: "createContainer",
Path: "/usr/bin/nvidia-ctk",
Args: []string{"nvidia-ctk", "hook", "chmod", "--mode", "755", "--path", "/dev/dri"},
},
},
},
},
{
Name: "gpu1",
ContainerEdits: specs.ContainerEdits{
Hooks: []*specs.Hook{
{
HookName: "createContainer",
Path: "/usr/bin/nvidia-ctk",
Args: []string{"nvidia-ctk", "hook", "chmod", "--mode", "755", "--path", "/dev/dri"},
},
},
},
},
{
Name: "all",
ContainerEdits: specs.ContainerEdits{
Hooks: []*specs.Hook{
{
HookName: "createContainer",
Path: "/usr/bin/nvidia-ctk",
Args: []string{"nvidia-ctk", "hook", "chmod", "--mode", "755", "--path", "/dev/dri"},
},
{
HookName: "createContainer",
Path: "/usr/bin/nvidia-ctk",
Args: []string{"nvidia-ctk", "hook", "chmod", "--mode", "755", "--path", "/dev/dri"},
},
},
},
},
},
},
expectedSpec: &specs.Spec{
Devices: []specs.Device{
{
Name: "gpu0",
ContainerEdits: specs.ContainerEdits{
Hooks: []*specs.Hook{
{
HookName: "createContainer",
Path: "/usr/bin/nvidia-ctk",
Args: []string{"nvidia-ctk", "hook", "chmod", "--mode", "755", "--path", "/dev/dri"},
},
},
},
},
{
Name: "gpu1",
ContainerEdits: specs.ContainerEdits{
Hooks: []*specs.Hook{
{
HookName: "createContainer",
Path: "/usr/bin/nvidia-ctk",
Args: []string{"nvidia-ctk", "hook", "chmod", "--mode", "755", "--path", "/dev/dri"},
},
},
},
},
{
Name: "all",
ContainerEdits: specs.ContainerEdits{
Hooks: []*specs.Hook{
{
HookName: "createContainer",
Path: "/usr/bin/nvidia-ctk",
Args: []string{"nvidia-ctk", "hook", "chmod", "--mode", "755", "--path", "/dev/dri"},
},
},
},
},
},
},
},
}
for _, tc := range testCases {