Merge branch 'CNT-1876/cdi-specs-from-csv' into 'main'

Add csv mode to CDI spec generation

See merge request nvidia/container-toolkit/container-toolkit!393
This commit is contained in:
Evan Lezar 2023-05-23 14:47:19 +00:00
commit e7d2a9c212
11 changed files with 291 additions and 79 deletions

View File

@ -23,6 +23,7 @@ import (
"strings"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover/csv"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/spec"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/transform"
@ -48,6 +49,10 @@ type options struct {
mode string
vendor string
class string
csv struct {
files cli.StringSlice
}
}
// NewCommand constructs a generate-cdi command with the specified logger
@ -123,13 +128,18 @@ func (m command) build() *cli.Command {
Value: "gpu",
Destination: &opts.class,
},
&cli.StringSliceFlag{
Name: "csv.file",
Usage: "The path to the list of CSV files to use when generating the CDI specification in CDI mode.",
Value: cli.NewStringSlice(csv.DefaultFileList()...),
Destination: &opts.csv.files,
},
}
return &c
}
func (m command) validateFlags(c *cli.Context, opts *options) error {
opts.format = strings.ToLower(opts.format)
switch opts.format {
case spec.FormatJSON:
@ -141,6 +151,7 @@ func (m command) validateFlags(c *cli.Context, opts *options) error {
opts.mode = strings.ToLower(opts.mode)
switch opts.mode {
case nvcdi.ModeAuto:
case nvcdi.ModeCSV:
case nvcdi.ModeNvml:
case nvcdi.ModeWsl:
case nvcdi.ModeManagement:
@ -215,6 +226,7 @@ func (m command) generateSpec(opts *options) (spec.Interface, error) {
nvcdi.WithNVIDIACTKPath(opts.nvidiaCTKPath),
nvcdi.WithDeviceNamer(deviceNamer),
nvcdi.WithMode(string(opts.mode)),
nvcdi.WithCSVFiles(opts.csv.files.Value()),
)
if err != nil {
return nil, fmt.Errorf("failed to create CDI library: %v", err)

View File

@ -33,6 +33,22 @@ const (
DefaultMountSpecPath = "/etc/nvidia-container-runtime/host-files-for-container.d"
)
// DefaultFileList returns the list of CSV files that are used by default.
func DefaultFileList() []string {
files := []string{
"devices.csv",
"drivers.csv",
"l4t.csv",
}
var paths []string
for _, file := range files {
paths = append(paths, filepath.Join(DefaultMountSpecPath, file))
}
return paths
}
// GetFileList returns the (non-recursive) list of CSV files in the specified
// folder
func GetFileList(root string) ([]string, error) {

View File

@ -0,0 +1,106 @@
/**
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package tegra
import (
"fmt"
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
"github.com/sirupsen/logrus"
)
type tegraOptions struct {
logger *logrus.Logger
csvFiles []string
driverRoot string
nvidiaCTKPath string
}
// Option defines a functional option for configuring a Tegra discoverer.
type Option func(*tegraOptions)
// New creates a new tegra discoverer using the supplied options.
func New(opts ...Option) (discover.Discover, error) {
o := &tegraOptions{}
for _, opt := range opts {
opt(o)
}
csvDiscoverer, err := discover.NewFromCSVFiles(o.logger, o.csvFiles, o.driverRoot)
if err != nil {
return nil, fmt.Errorf("failed to create CSV discoverer: %v", err)
}
createSymlinksHook, err := discover.NewCreateSymlinksHook(o.logger, o.csvFiles, csvDiscoverer, o.nvidiaCTKPath)
if err != nil {
return nil, fmt.Errorf("failed to create symlink hook discoverer: %v", err)
}
ldcacheUpdateHook, err := discover.NewLDCacheUpdateHook(o.logger, csvDiscoverer, o.nvidiaCTKPath)
if err != nil {
return nil, fmt.Errorf("failed to create ldcach update hook discoverer: %v", err)
}
tegraSystemMounts := discover.NewMounts(
o.logger,
lookup.NewFileLocator(lookup.WithLogger(o.logger)),
"",
[]string{
"/etc/nv_tegra_release",
"/sys/devices/soc0/family",
},
)
d := discover.Merge(
csvDiscoverer,
createSymlinksHook,
// The ldcacheUpdateHook is added last to ensure that the created symlinks are included
ldcacheUpdateHook,
tegraSystemMounts,
)
return d, nil
}
// WithLogger sets the logger for the discoverer.
func WithLogger(logger *logrus.Logger) Option {
return func(o *tegraOptions) {
o.logger = logger
}
}
// WithDriverRoot sets the driver root for the discoverer.
func WithDriverRoot(driverRoot string) Option {
return func(o *tegraOptions) {
o.driverRoot = driverRoot
}
}
// WithCSVFiles sets the CSV files for the discoverer.
func WithCSVFiles(csvFiles []string) Option {
return func(o *tegraOptions) {
o.csvFiles = csvFiles
}
}
// WithNVIDIACTKPath sets the path to the nvidia-container-toolkit binary.
func WithNVIDIACTKPath(nvidiaCTKPath string) Option {
return func(o *tegraOptions) {
o.nvidiaCTKPath = nvidiaCTKPath
}
}

View File

@ -24,6 +24,7 @@ import (
"github.com/NVIDIA/nvidia-container-toolkit/internal/cuda"
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover/csv"
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover/tegra"
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
"github.com/NVIDIA/nvidia-container-toolkit/internal/requirements"
"github.com/sirupsen/logrus"
@ -74,26 +75,11 @@ func NewCSVModifier(logger *logrus.Logger, cfg *config.Config, ociSpec oci.Spec)
csvFiles = csv.BaseFilesOnly(csvFiles)
}
csvDiscoverer, err := discover.NewFromCSVFiles(logger, csvFiles, cfg.NVIDIAContainerCLIConfig.Root)
if err != nil {
return nil, fmt.Errorf("failed to create CSV discoverer: %v", err)
}
createSymlinksHook, err := discover.NewCreateSymlinksHook(logger, csvFiles, csvDiscoverer, cfg.NVIDIACTKConfig.Path)
if err != nil {
return nil, fmt.Errorf("failed to create symlink hook discoverer: %v", err)
}
ldcacheUpdateHook, err := discover.NewLDCacheUpdateHook(logger, csvDiscoverer, cfg.NVIDIACTKConfig.Path)
if err != nil {
return nil, fmt.Errorf("failed to create ldcach update hook discoverer: %v", err)
}
d := discover.Merge(
csvDiscoverer,
createSymlinksHook,
// The ldcacheUpdateHook is added last to ensure that the created symlinks are included
ldcacheUpdateHook,
d, err := tegra.New(
tegra.WithLogger(logger),
tegra.WithDriverRoot(cfg.NVIDIAContainerCLIConfig.Root),
tegra.WithNVIDIACTKPath(cfg.NVIDIACTKConfig.Path),
tegra.WithCSVFiles(csvFiles),
)
discoverModifier, err := NewModifierFromDiscoverer(logger, d)

View File

@ -1,45 +0,0 @@
/**
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package modifier
import (
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
"github.com/sirupsen/logrus"
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/info"
)
// NewTegraPlatformFiles creates a modifier to inject the Tegra platform files into a container.
func NewTegraPlatformFiles(logger *logrus.Logger) (oci.SpecModifier, error) {
isTegra, _ := info.New().IsTegraSystem()
if !isTegra {
return nil, nil
}
tegraSystemMounts := discover.NewMounts(
logger,
lookup.NewFileLocator(lookup.WithLogger(logger)),
"",
[]string{
"/etc/nv_tegra_release",
"/sys/devices/soc0/family",
},
)
return NewModifierFromDiscoverer(logger, tegraSystemMounts)
}

View File

@ -81,17 +81,11 @@ func newSpecModifier(logger *logrus.Logger, cfg *config.Config, ociSpec oci.Spec
return nil, err
}
tegraModifier, err := modifier.NewTegraPlatformFiles(logger)
if err != nil {
return nil, err
}
modifiers := modifier.Merge(
modeModifier,
graphicsModifier,
gdsModifier,
mofedModifier,
tegraModifier,
)
return modifiers, nil
}

View File

@ -36,6 +36,9 @@ const (
ModeGds = "gds"
// ModeMofed configures the CDI spec generator to generate a MOFED spec.
ModeMofed = "mofed"
// ModeCSV configures the CDI spec generator to generate a spec based on the contents of CSV
// mountspec files.
ModeCSV = "csv"
)
// Interface defines the API for the nvcdi package

87
pkg/nvcdi/lib-csv.go Normal file
View File

@ -0,0 +1,87 @@
/**
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package nvcdi
import (
"fmt"
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover/tegra"
"github.com/NVIDIA/nvidia-container-toolkit/internal/edits"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/spec"
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
"github.com/container-orchestrated-devices/container-device-interface/specs-go"
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/device"
)
type csvlib nvcdilib
var _ Interface = (*csvlib)(nil)
// GetSpec should not be called for wsllib
func (l *csvlib) GetSpec() (spec.Interface, error) {
return nil, fmt.Errorf("Unexpected call to csvlib.GetSpec()")
}
// GetAllDeviceSpecs returns the device specs for all available devices.
func (l *csvlib) GetAllDeviceSpecs() ([]specs.Device, error) {
d, err := tegra.New(
tegra.WithLogger(l.logger),
tegra.WithDriverRoot(l.driverRoot),
tegra.WithNVIDIACTKPath(l.nvidiaCTKPath),
tegra.WithCSVFiles(l.csvFiles),
)
if err != nil {
return nil, fmt.Errorf("failed to create discoverer for CSV files: %v", err)
}
e, err := edits.FromDiscoverer(d)
if err != nil {
return nil, fmt.Errorf("failed to create container edits for CSV files: %v", err)
}
deviceSpec := specs.Device{
Name: "all",
ContainerEdits: *e.ContainerEdits,
}
return []specs.Device{deviceSpec}, nil
}
// GetCommonEdits generates a CDI specification that can be used for ANY devices
func (l *csvlib) GetCommonEdits() (*cdi.ContainerEdits, error) {
d := discover.None{}
return edits.FromDiscoverer(d)
}
// GetGPUDeviceEdits generates a CDI specification that can be used for GPU devices
func (l *csvlib) GetGPUDeviceEdits(device.Device) (*cdi.ContainerEdits, error) {
return nil, fmt.Errorf("GetGPUDeviceEdits is not supported for CSV files")
}
// GetGPUDeviceSpecs returns the CDI device specs for the full GPU represented by 'device'.
func (l *csvlib) GetGPUDeviceSpecs(i int, d device.Device) (*specs.Device, error) {
return nil, fmt.Errorf("GetGPUDeviceSpecs is not supported for CSV files")
}
// GetMIGDeviceEdits generates a CDI specification that can be used for MIG devices
func (l *csvlib) GetMIGDeviceEdits(device.Device, device.MigDevice) (*cdi.ContainerEdits, error) {
return nil, fmt.Errorf("GetMIGDeviceEdits is not supported for CSV files")
}
// GetMIGDeviceSpecs returns the CDI device specs for the full MIG represented by 'device'.
func (l *csvlib) GetMIGDeviceSpecs(int, device.Device, int, device.MigDevice) (*specs.Device, error) {
return nil, fmt.Errorf("GetMIGDeviceSpecs is not supported for CSV files")
}

View File

@ -19,6 +19,7 @@ package nvcdi
import (
"fmt"
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover/csv"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/spec"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/transform"
"github.com/sirupsen/logrus"
@ -45,6 +46,8 @@ type nvcdilib struct {
driverRoot string
nvidiaCTKPath string
csvFiles []string
vendor string
class string
@ -80,6 +83,11 @@ func New(opts ...Option) (Interface, error) {
var lib Interface
switch l.resolveMode() {
case ModeCSV:
if len(l.csvFiles) == 0 {
l.csvFiles = csv.DefaultFileList()
}
lib = (*csvlib)(l)
case ModeManagement:
if l.vendor == "" {
l.vendor = "management.nvidia.com"
@ -156,6 +164,16 @@ func (l *nvcdilib) resolveMode() (rmode string) {
return ModeWsl
}
isNvml, reason := l.infolib.HasNvml()
l.logger.Debugf("Is NVML-based system? %v: %v", isNvml, reason)
isTegra, reason := l.infolib.IsTegraSystem()
l.logger.Debugf("Is Tegra-based system? %v: %v", isTegra, reason)
if isTegra && !isNvml {
return ModeCSV
}
return ModeNvml
}

View File

@ -28,9 +28,10 @@ func TestResolveMode(t *testing.T) {
logger, _ := testlog.NewNullLogger()
testCases := []struct {
mode string
// TODO: This should be a proper mock
mode string
isTegra bool
hasDXCore bool
hasNVML bool
expected string
}{
{
@ -41,11 +42,34 @@ func TestResolveMode(t *testing.T) {
{
mode: "auto",
hasDXCore: false,
isTegra: true,
hasNVML: false,
expected: "csv",
},
{
mode: "auto",
hasDXCore: false,
isTegra: false,
hasNVML: false,
expected: "nvml",
},
{
mode: "auto",
hasDXCore: false,
isTegra: true,
hasNVML: true,
expected: "nvml",
},
{
mode: "auto",
hasDXCore: false,
isTegra: false,
expected: "nvml",
},
{
mode: "nvml",
hasDXCore: true,
isTegra: true,
expected: "nvml",
},
{
@ -65,7 +89,7 @@ func TestResolveMode(t *testing.T) {
l := nvcdilib{
logger: logger,
mode: tc.mode,
infolib: infoMock(tc.hasDXCore),
infolib: infoMock{hasDXCore: tc.hasDXCore, isTegra: tc.isTegra, hasNVML: tc.hasNVML},
}
require.Equal(t, tc.expected, l.resolveMode())
@ -73,16 +97,20 @@ func TestResolveMode(t *testing.T) {
}
}
type infoMock bool
type infoMock struct {
hasDXCore bool
isTegra bool
hasNVML bool
}
func (i infoMock) HasDXCore() (bool, string) {
return bool(i), ""
return bool(i.hasDXCore), ""
}
func (i infoMock) HasNvml() (bool, string) {
panic("should not be called")
return bool(i.hasNVML), ""
}
func (i infoMock) IsTegraSystem() (bool, string) {
panic("should not be called")
return bool(i.isTegra), ""
}

View File

@ -96,3 +96,10 @@ func WithMergedDeviceOptions(opts ...transform.MergedDeviceOption) Option {
o.mergedDeviceOptions = opts
}
}
// WithCSVFiles sets the CSV files for the library
func WithCSVFiles(csvFiles []string) Option {
return func(o *nvcdilib) {
o.csvFiles = csvFiles
}
}