diff --git a/cmd/nvidia-ctk/cdi/generate/generate.go b/cmd/nvidia-ctk/cdi/generate/generate.go index 7d27685a..cab3f97a 100644 --- a/cmd/nvidia-ctk/cdi/generate/generate.go +++ b/cmd/nvidia-ctk/cdi/generate/generate.go @@ -23,6 +23,7 @@ import ( "strings" "github.com/NVIDIA/nvidia-container-toolkit/internal/config" + "github.com/NVIDIA/nvidia-container-toolkit/internal/discover/csv" "github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi" "github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/spec" "github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/transform" @@ -48,6 +49,10 @@ type options struct { mode string vendor string class string + + csv struct { + files cli.StringSlice + } } // NewCommand constructs a generate-cdi command with the specified logger @@ -123,13 +128,18 @@ func (m command) build() *cli.Command { Value: "gpu", Destination: &opts.class, }, + &cli.StringSliceFlag{ + Name: "csv.file", + Usage: "The path to the list of CSV files to use when generating the CDI specification in CDI mode.", + Value: cli.NewStringSlice(csv.DefaultFileList()...), + Destination: &opts.csv.files, + }, } return &c } func (m command) validateFlags(c *cli.Context, opts *options) error { - opts.format = strings.ToLower(opts.format) switch opts.format { case spec.FormatJSON: @@ -141,6 +151,7 @@ func (m command) validateFlags(c *cli.Context, opts *options) error { opts.mode = strings.ToLower(opts.mode) switch opts.mode { case nvcdi.ModeAuto: + case nvcdi.ModeCSV: case nvcdi.ModeNvml: case nvcdi.ModeWsl: case nvcdi.ModeManagement: @@ -215,6 +226,7 @@ func (m command) generateSpec(opts *options) (spec.Interface, error) { nvcdi.WithNVIDIACTKPath(opts.nvidiaCTKPath), nvcdi.WithDeviceNamer(deviceNamer), nvcdi.WithMode(string(opts.mode)), + nvcdi.WithCSVFiles(opts.csv.files.Value()), ) if err != nil { return nil, fmt.Errorf("failed to create CDI library: %v", err) diff --git a/internal/discover/csv/csv.go b/internal/discover/csv/csv.go index 4aa50828..64bc34ff 100644 --- a/internal/discover/csv/csv.go +++ b/internal/discover/csv/csv.go @@ -33,6 +33,22 @@ const ( DefaultMountSpecPath = "/etc/nvidia-container-runtime/host-files-for-container.d" ) +// DefaultFileList returns the list of CSV files that are used by default. +func DefaultFileList() []string { + files := []string{ + "devices.csv", + "drivers.csv", + "l4t.csv", + } + + var paths []string + for _, file := range files { + paths = append(paths, filepath.Join(DefaultMountSpecPath, file)) + } + + return paths +} + // GetFileList returns the (non-recursive) list of CSV files in the specified // folder func GetFileList(root string) ([]string, error) { diff --git a/internal/discover/tegra/tegra.go b/internal/discover/tegra/tegra.go new file mode 100644 index 00000000..e749b3de --- /dev/null +++ b/internal/discover/tegra/tegra.go @@ -0,0 +1,106 @@ +/** +# Copyright (c) NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package tegra + +import ( + "fmt" + + "github.com/NVIDIA/nvidia-container-toolkit/internal/discover" + "github.com/NVIDIA/nvidia-container-toolkit/internal/lookup" + "github.com/sirupsen/logrus" +) + +type tegraOptions struct { + logger *logrus.Logger + csvFiles []string + driverRoot string + nvidiaCTKPath string +} + +// Option defines a functional option for configuring a Tegra discoverer. +type Option func(*tegraOptions) + +// New creates a new tegra discoverer using the supplied options. +func New(opts ...Option) (discover.Discover, error) { + o := &tegraOptions{} + for _, opt := range opts { + opt(o) + } + + csvDiscoverer, err := discover.NewFromCSVFiles(o.logger, o.csvFiles, o.driverRoot) + if err != nil { + return nil, fmt.Errorf("failed to create CSV discoverer: %v", err) + } + + createSymlinksHook, err := discover.NewCreateSymlinksHook(o.logger, o.csvFiles, csvDiscoverer, o.nvidiaCTKPath) + if err != nil { + return nil, fmt.Errorf("failed to create symlink hook discoverer: %v", err) + } + + ldcacheUpdateHook, err := discover.NewLDCacheUpdateHook(o.logger, csvDiscoverer, o.nvidiaCTKPath) + if err != nil { + return nil, fmt.Errorf("failed to create ldcach update hook discoverer: %v", err) + } + + tegraSystemMounts := discover.NewMounts( + o.logger, + lookup.NewFileLocator(lookup.WithLogger(o.logger)), + "", + []string{ + "/etc/nv_tegra_release", + "/sys/devices/soc0/family", + }, + ) + + d := discover.Merge( + csvDiscoverer, + createSymlinksHook, + // The ldcacheUpdateHook is added last to ensure that the created symlinks are included + ldcacheUpdateHook, + tegraSystemMounts, + ) + + return d, nil +} + +// WithLogger sets the logger for the discoverer. +func WithLogger(logger *logrus.Logger) Option { + return func(o *tegraOptions) { + o.logger = logger + } +} + +// WithDriverRoot sets the driver root for the discoverer. +func WithDriverRoot(driverRoot string) Option { + return func(o *tegraOptions) { + o.driverRoot = driverRoot + } +} + +// WithCSVFiles sets the CSV files for the discoverer. +func WithCSVFiles(csvFiles []string) Option { + return func(o *tegraOptions) { + o.csvFiles = csvFiles + } +} + +// WithNVIDIACTKPath sets the path to the nvidia-container-toolkit binary. +func WithNVIDIACTKPath(nvidiaCTKPath string) Option { + return func(o *tegraOptions) { + o.nvidiaCTKPath = nvidiaCTKPath + } +} diff --git a/internal/modifier/csv.go b/internal/modifier/csv.go index 0f59b2f4..aa97f728 100644 --- a/internal/modifier/csv.go +++ b/internal/modifier/csv.go @@ -24,6 +24,7 @@ import ( "github.com/NVIDIA/nvidia-container-toolkit/internal/cuda" "github.com/NVIDIA/nvidia-container-toolkit/internal/discover" "github.com/NVIDIA/nvidia-container-toolkit/internal/discover/csv" + "github.com/NVIDIA/nvidia-container-toolkit/internal/discover/tegra" "github.com/NVIDIA/nvidia-container-toolkit/internal/oci" "github.com/NVIDIA/nvidia-container-toolkit/internal/requirements" "github.com/sirupsen/logrus" @@ -74,26 +75,11 @@ func NewCSVModifier(logger *logrus.Logger, cfg *config.Config, ociSpec oci.Spec) csvFiles = csv.BaseFilesOnly(csvFiles) } - csvDiscoverer, err := discover.NewFromCSVFiles(logger, csvFiles, cfg.NVIDIAContainerCLIConfig.Root) - if err != nil { - return nil, fmt.Errorf("failed to create CSV discoverer: %v", err) - } - - createSymlinksHook, err := discover.NewCreateSymlinksHook(logger, csvFiles, csvDiscoverer, cfg.NVIDIACTKConfig.Path) - if err != nil { - return nil, fmt.Errorf("failed to create symlink hook discoverer: %v", err) - } - - ldcacheUpdateHook, err := discover.NewLDCacheUpdateHook(logger, csvDiscoverer, cfg.NVIDIACTKConfig.Path) - if err != nil { - return nil, fmt.Errorf("failed to create ldcach update hook discoverer: %v", err) - } - - d := discover.Merge( - csvDiscoverer, - createSymlinksHook, - // The ldcacheUpdateHook is added last to ensure that the created symlinks are included - ldcacheUpdateHook, + d, err := tegra.New( + tegra.WithLogger(logger), + tegra.WithDriverRoot(cfg.NVIDIAContainerCLIConfig.Root), + tegra.WithNVIDIACTKPath(cfg.NVIDIACTKConfig.Path), + tegra.WithCSVFiles(csvFiles), ) discoverModifier, err := NewModifierFromDiscoverer(logger, d) diff --git a/internal/modifier/tegra.go b/internal/modifier/tegra.go deleted file mode 100644 index 8ce590a8..00000000 --- a/internal/modifier/tegra.go +++ /dev/null @@ -1,45 +0,0 @@ -/** -# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -**/ - -package modifier - -import ( - "github.com/NVIDIA/nvidia-container-toolkit/internal/discover" - "github.com/NVIDIA/nvidia-container-toolkit/internal/lookup" - "github.com/NVIDIA/nvidia-container-toolkit/internal/oci" - "github.com/sirupsen/logrus" - "gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/info" -) - -// NewTegraPlatformFiles creates a modifier to inject the Tegra platform files into a container. -func NewTegraPlatformFiles(logger *logrus.Logger) (oci.SpecModifier, error) { - isTegra, _ := info.New().IsTegraSystem() - if !isTegra { - return nil, nil - } - - tegraSystemMounts := discover.NewMounts( - logger, - lookup.NewFileLocator(lookup.WithLogger(logger)), - "", - []string{ - "/etc/nv_tegra_release", - "/sys/devices/soc0/family", - }, - ) - - return NewModifierFromDiscoverer(logger, tegraSystemMounts) -} diff --git a/internal/runtime/runtime_factory.go b/internal/runtime/runtime_factory.go index 29219ab8..36ac4e4e 100644 --- a/internal/runtime/runtime_factory.go +++ b/internal/runtime/runtime_factory.go @@ -81,17 +81,11 @@ func newSpecModifier(logger *logrus.Logger, cfg *config.Config, ociSpec oci.Spec return nil, err } - tegraModifier, err := modifier.NewTegraPlatformFiles(logger) - if err != nil { - return nil, err - } - modifiers := modifier.Merge( modeModifier, graphicsModifier, gdsModifier, mofedModifier, - tegraModifier, ) return modifiers, nil } diff --git a/pkg/nvcdi/api.go b/pkg/nvcdi/api.go index 4f145638..68bfd845 100644 --- a/pkg/nvcdi/api.go +++ b/pkg/nvcdi/api.go @@ -36,6 +36,9 @@ const ( ModeGds = "gds" // ModeMofed configures the CDI spec generator to generate a MOFED spec. ModeMofed = "mofed" + // ModeCSV configures the CDI spec generator to generate a spec based on the contents of CSV + // mountspec files. + ModeCSV = "csv" ) // Interface defines the API for the nvcdi package diff --git a/pkg/nvcdi/lib-csv.go b/pkg/nvcdi/lib-csv.go new file mode 100644 index 00000000..127c4beb --- /dev/null +++ b/pkg/nvcdi/lib-csv.go @@ -0,0 +1,87 @@ +/** +# Copyright (c) NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package nvcdi + +import ( + "fmt" + + "github.com/NVIDIA/nvidia-container-toolkit/internal/discover" + "github.com/NVIDIA/nvidia-container-toolkit/internal/discover/tegra" + "github.com/NVIDIA/nvidia-container-toolkit/internal/edits" + "github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/spec" + "github.com/container-orchestrated-devices/container-device-interface/pkg/cdi" + "github.com/container-orchestrated-devices/container-device-interface/specs-go" + "gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/device" +) + +type csvlib nvcdilib + +var _ Interface = (*csvlib)(nil) + +// GetSpec should not be called for wsllib +func (l *csvlib) GetSpec() (spec.Interface, error) { + return nil, fmt.Errorf("Unexpected call to csvlib.GetSpec()") +} + +// GetAllDeviceSpecs returns the device specs for all available devices. +func (l *csvlib) GetAllDeviceSpecs() ([]specs.Device, error) { + d, err := tegra.New( + tegra.WithLogger(l.logger), + tegra.WithDriverRoot(l.driverRoot), + tegra.WithNVIDIACTKPath(l.nvidiaCTKPath), + tegra.WithCSVFiles(l.csvFiles), + ) + if err != nil { + return nil, fmt.Errorf("failed to create discoverer for CSV files: %v", err) + } + e, err := edits.FromDiscoverer(d) + if err != nil { + return nil, fmt.Errorf("failed to create container edits for CSV files: %v", err) + } + + deviceSpec := specs.Device{ + Name: "all", + ContainerEdits: *e.ContainerEdits, + } + return []specs.Device{deviceSpec}, nil +} + +// GetCommonEdits generates a CDI specification that can be used for ANY devices +func (l *csvlib) GetCommonEdits() (*cdi.ContainerEdits, error) { + d := discover.None{} + return edits.FromDiscoverer(d) +} + +// GetGPUDeviceEdits generates a CDI specification that can be used for GPU devices +func (l *csvlib) GetGPUDeviceEdits(device.Device) (*cdi.ContainerEdits, error) { + return nil, fmt.Errorf("GetGPUDeviceEdits is not supported for CSV files") +} + +// GetGPUDeviceSpecs returns the CDI device specs for the full GPU represented by 'device'. +func (l *csvlib) GetGPUDeviceSpecs(i int, d device.Device) (*specs.Device, error) { + return nil, fmt.Errorf("GetGPUDeviceSpecs is not supported for CSV files") +} + +// GetMIGDeviceEdits generates a CDI specification that can be used for MIG devices +func (l *csvlib) GetMIGDeviceEdits(device.Device, device.MigDevice) (*cdi.ContainerEdits, error) { + return nil, fmt.Errorf("GetMIGDeviceEdits is not supported for CSV files") +} + +// GetMIGDeviceSpecs returns the CDI device specs for the full MIG represented by 'device'. +func (l *csvlib) GetMIGDeviceSpecs(int, device.Device, int, device.MigDevice) (*specs.Device, error) { + return nil, fmt.Errorf("GetMIGDeviceSpecs is not supported for CSV files") +} diff --git a/pkg/nvcdi/lib.go b/pkg/nvcdi/lib.go index fd5e2e54..7d11a8e2 100644 --- a/pkg/nvcdi/lib.go +++ b/pkg/nvcdi/lib.go @@ -19,6 +19,7 @@ package nvcdi import ( "fmt" + "github.com/NVIDIA/nvidia-container-toolkit/internal/discover/csv" "github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/spec" "github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/transform" "github.com/sirupsen/logrus" @@ -45,6 +46,8 @@ type nvcdilib struct { driverRoot string nvidiaCTKPath string + csvFiles []string + vendor string class string @@ -80,6 +83,11 @@ func New(opts ...Option) (Interface, error) { var lib Interface switch l.resolveMode() { + case ModeCSV: + if len(l.csvFiles) == 0 { + l.csvFiles = csv.DefaultFileList() + } + lib = (*csvlib)(l) case ModeManagement: if l.vendor == "" { l.vendor = "management.nvidia.com" @@ -156,6 +164,16 @@ func (l *nvcdilib) resolveMode() (rmode string) { return ModeWsl } + isNvml, reason := l.infolib.HasNvml() + l.logger.Debugf("Is NVML-based system? %v: %v", isNvml, reason) + + isTegra, reason := l.infolib.IsTegraSystem() + l.logger.Debugf("Is Tegra-based system? %v: %v", isTegra, reason) + + if isTegra && !isNvml { + return ModeCSV + } + return ModeNvml } diff --git a/pkg/nvcdi/lib_test.go b/pkg/nvcdi/lib_test.go index f0ddf96e..77ead658 100644 --- a/pkg/nvcdi/lib_test.go +++ b/pkg/nvcdi/lib_test.go @@ -28,9 +28,10 @@ func TestResolveMode(t *testing.T) { logger, _ := testlog.NewNullLogger() testCases := []struct { - mode string - // TODO: This should be a proper mock + mode string + isTegra bool hasDXCore bool + hasNVML bool expected string }{ { @@ -41,11 +42,34 @@ func TestResolveMode(t *testing.T) { { mode: "auto", hasDXCore: false, + isTegra: true, + hasNVML: false, + expected: "csv", + }, + { + mode: "auto", + hasDXCore: false, + isTegra: false, + hasNVML: false, + expected: "nvml", + }, + { + mode: "auto", + hasDXCore: false, + isTegra: true, + hasNVML: true, + expected: "nvml", + }, + { + mode: "auto", + hasDXCore: false, + isTegra: false, expected: "nvml", }, { mode: "nvml", hasDXCore: true, + isTegra: true, expected: "nvml", }, { @@ -65,7 +89,7 @@ func TestResolveMode(t *testing.T) { l := nvcdilib{ logger: logger, mode: tc.mode, - infolib: infoMock(tc.hasDXCore), + infolib: infoMock{hasDXCore: tc.hasDXCore, isTegra: tc.isTegra, hasNVML: tc.hasNVML}, } require.Equal(t, tc.expected, l.resolveMode()) @@ -73,16 +97,20 @@ func TestResolveMode(t *testing.T) { } } -type infoMock bool +type infoMock struct { + hasDXCore bool + isTegra bool + hasNVML bool +} func (i infoMock) HasDXCore() (bool, string) { - return bool(i), "" + return bool(i.hasDXCore), "" } func (i infoMock) HasNvml() (bool, string) { - panic("should not be called") + return bool(i.hasNVML), "" } func (i infoMock) IsTegraSystem() (bool, string) { - panic("should not be called") + return bool(i.isTegra), "" } diff --git a/pkg/nvcdi/options.go b/pkg/nvcdi/options.go index 254c6e0e..6baa1b52 100644 --- a/pkg/nvcdi/options.go +++ b/pkg/nvcdi/options.go @@ -96,3 +96,10 @@ func WithMergedDeviceOptions(opts ...transform.MergedDeviceOption) Option { o.mergedDeviceOptions = opts } } + +// WithCSVFiles sets the CSV files for the library +func WithCSVFiles(csvFiles []string) Option { + return func(o *nvcdilib) { + o.csvFiles = csvFiles + } +}