From 0ed757faee64fd068da7d72c9f2350d6b8f54967 Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Wed, 12 Feb 2025 20:29:37 +0100 Subject: [PATCH] TOFIX: split auto Signed-off-by: Evan Lezar --- internal/modifier/cdi.go | 78 -------------------------- internal/modifier/jit-cdi.go | 106 +++++++++++++++++++++++++++++++++++ internal/modifier/options.go | 49 ++++++++++++++++ 3 files changed, 155 insertions(+), 78 deletions(-) create mode 100644 internal/modifier/jit-cdi.go create mode 100644 internal/modifier/options.go diff --git a/internal/modifier/cdi.go b/internal/modifier/cdi.go index 247743ed..5d0b1dc9 100644 --- a/internal/modifier/cdi.go +++ b/internal/modifier/cdi.go @@ -27,14 +27,6 @@ import ( "github.com/NVIDIA/nvidia-container-toolkit/internal/logger" "github.com/NVIDIA/nvidia-container-toolkit/internal/modifier/cdi" "github.com/NVIDIA/nvidia-container-toolkit/internal/oci" - "github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi" - "github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/spec" -) - -const ( - automaticDeviceVendor = "runtime.nvidia.com" - automaticDeviceClass = "gpu" - automaticDeviceKind = automaticDeviceVendor + "/" + automaticDeviceClass ) // NewCDIModifier creates an OCI spec modifier that determines the modifications to make based on the @@ -175,73 +167,3 @@ func getAnnotationDevices(prefixes []string, annotations map[string]string) ([]s return annotationDevices, nil } - -// filterAutomaticDevices searches for "automatic" device names in the input slice. -// "Automatic" devices are a well-defined list of CDI device names which, when requested, -// trigger the generation of a CDI spec at runtime. This removes the need to generate a -// CDI spec on the system a-priori as well as keep it up-to-date. -func filterAutomaticDevices(devices []string) []string { - var automatic []string - for _, device := range devices { - vendor, class, _ := parser.ParseDevice(device) - if vendor == automaticDeviceVendor && class == automaticDeviceClass { - automatic = append(automatic, device) - } - } - return automatic -} - -func newAutomaticCDISpecModifier(logger logger.Interface, cfg *config.Config, devices []string) (oci.SpecModifier, error) { - logger.Debugf("Generating in-memory CDI specs for devices %v", devices) - // TODO: We should try to load the kernel modules and create the device nodes here. - // Failures should raise a warning and not error out. - spec, err := generateAutomaticCDISpec(logger, cfg, devices) - if err != nil { - return nil, fmt.Errorf("failed to generate CDI spec: %w", err) - } - cdiModifier, err := cdi.New( - cdi.WithLogger(logger), - cdi.WithSpec(spec.Raw()), - ) - if err != nil { - return nil, fmt.Errorf("failed to construct CDI modifier: %w", err) - } - - return cdiModifier, nil -} - -func generateAutomaticCDISpec(logger logger.Interface, cfg *config.Config, devices []string) (spec.Interface, error) { - cdilib, err := nvcdi.New( - nvcdi.WithLogger(logger), - nvcdi.WithNVIDIACDIHookPath(cfg.NVIDIACTKConfig.Path), - nvcdi.WithDriverRoot(cfg.NVIDIAContainerCLIConfig.Root), - nvcdi.WithVendor("runtime.nvidia.com"), - nvcdi.WithClass("gpu"), - ) - if err != nil { - return nil, fmt.Errorf("failed to construct CDI library: %w", err) - } - - identifiers := []string{} - for _, device := range devices { - _, _, id := parser.ParseDevice(device) - identifiers = append(identifiers, id) - } - - deviceSpecs, err := cdilib.GetDeviceSpecsByID(identifiers...) - if err != nil { - return nil, fmt.Errorf("failed to get CDI device specs: %w", err) - } - - commonEdits, err := cdilib.GetCommonEdits() - if err != nil { - return nil, fmt.Errorf("failed to get common CDI spec edits: %w", err) - } - - return spec.New( - spec.WithDeviceSpecs(deviceSpecs), - spec.WithEdits(*commonEdits.ContainerEdits), - spec.WithVendor("runtime.nvidia.com"), - spec.WithClass("gpu"), - ) -} diff --git a/internal/modifier/jit-cdi.go b/internal/modifier/jit-cdi.go new file mode 100644 index 00000000..332ffd47 --- /dev/null +++ b/internal/modifier/jit-cdi.go @@ -0,0 +1,106 @@ +/** +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package modifier + +import ( + "fmt" + + "tags.cncf.io/container-device-interface/pkg/parser" + + "github.com/NVIDIA/nvidia-container-toolkit/internal/config" + "github.com/NVIDIA/nvidia-container-toolkit/internal/logger" + "github.com/NVIDIA/nvidia-container-toolkit/internal/modifier/cdi" + "github.com/NVIDIA/nvidia-container-toolkit/internal/oci" + "github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi" + "github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/spec" +) + +const ( + automaticDeviceVendor = "runtime.nvidia.com" + automaticDeviceClass = "gpu" + automaticDeviceKind = automaticDeviceVendor + "/" + automaticDeviceClass +) + +func newAutomaticCDISpecModifier(logger logger.Interface, cfg *config.Config, devices []string) (oci.SpecModifier, error) { + logger.Debugf("Generating in-memory CDI specs for devices %v", devices) + // TODO: We should try to load the kernel modules and create the device nodes here. + // Failures should raise a warning and not error out. + spec, err := generateAutomaticCDISpec(logger, cfg, devices) + if err != nil { + return nil, fmt.Errorf("failed to generate CDI spec: %w", err) + } + cdiModifier, err := cdi.New( + cdi.WithLogger(logger), + cdi.WithSpec(spec.Raw()), + ) + if err != nil { + return nil, fmt.Errorf("failed to construct CDI modifier: %w", err) + } + + return cdiModifier, nil +} + +func generateAutomaticCDISpec(logger logger.Interface, cfg *config.Config, devices []string) (spec.Interface, error) { + cdilib, err := nvcdi.New( + nvcdi.WithLogger(logger), + nvcdi.WithNVIDIACDIHookPath(cfg.NVIDIACTKConfig.Path), + nvcdi.WithDriverRoot(cfg.NVIDIAContainerCLIConfig.Root), + nvcdi.WithVendor("runtime.nvidia.com"), + nvcdi.WithClass("gpu"), + ) + if err != nil { + return nil, fmt.Errorf("failed to construct CDI library: %w", err) + } + + identifiers := []string{} + for _, device := range devices { + _, _, id := parser.ParseDevice(device) + identifiers = append(identifiers, id) + } + + deviceSpecs, err := cdilib.GetDeviceSpecsByID(identifiers...) + if err != nil { + return nil, fmt.Errorf("failed to get CDI device specs: %w", err) + } + + commonEdits, err := cdilib.GetCommonEdits() + if err != nil { + return nil, fmt.Errorf("failed to get common CDI spec edits: %w", err) + } + + return spec.New( + spec.WithDeviceSpecs(deviceSpecs), + spec.WithEdits(*commonEdits.ContainerEdits), + spec.WithVendor("runtime.nvidia.com"), + spec.WithClass("gpu"), + ) +} + +// filterAutomaticDevices searches for "automatic" device names in the input slice. +// "Automatic" devices are a well-defined list of CDI device names which, when requested, +// trigger the generation of a CDI spec at runtime. This removes the need to generate a +// CDI spec on the system a-priori as well as keep it up-to-date. +func filterAutomaticDevices(devices []string) []string { + var automatic []string + for _, device := range devices { + vendor, class, _ := parser.ParseDevice(device) + if vendor == automaticDeviceVendor && class == automaticDeviceClass { + automatic = append(automatic, device) + } + } + return automatic +} diff --git a/internal/modifier/options.go b/internal/modifier/options.go new file mode 100644 index 00000000..09417b3f --- /dev/null +++ b/internal/modifier/options.go @@ -0,0 +1,49 @@ +/** +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package modifier + +import ( + "github.com/NVIDIA/nvidia-container-toolkit/internal/config" + "github.com/NVIDIA/nvidia-container-toolkit/internal/info" + "github.com/NVIDIA/nvidia-container-toolkit/internal/logger" +) + +type options struct { + logger logger.Interface + config *config.Config + runtimeMode info.RuntimeMode +} + +type Option func(*options) + +func WithConfig(config *config.Config) Option { + return func(o *options) { + o.config = config + } +} + +func WithLogger(logger logger.Interface) Option { + return func(o *options) { + o.logger = logger + } +} + +func WithRuntimeMode(runtimeMode info.RuntimeMode) Option { + return func(o *options) { + o.runtimeMode = runtimeMode + } +}