From 3045954cd92519611325b52d8bf82383f5b2a6a0 Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Wed, 22 Nov 2023 14:23:28 +0100 Subject: [PATCH 1/2] Consolidate GDS and MOFED modifiers Signed-off-by: Evan Lezar --- internal/modifier/{gds.go => gated.go} | 38 +++++++++++++------ internal/modifier/mofed.go | 51 -------------------------- internal/runtime/runtime_factory.go | 10 +---- 3 files changed, 29 insertions(+), 70 deletions(-) rename internal/modifier/{gds.go => gated.go} (55%) delete mode 100644 internal/modifier/mofed.go diff --git a/internal/modifier/gds.go b/internal/modifier/gated.go similarity index 55% rename from internal/modifier/gds.go rename to internal/modifier/gated.go index ac431405..1881f174 100644 --- a/internal/modifier/gds.go +++ b/internal/modifier/gated.go @@ -27,27 +27,43 @@ import ( ) const ( - nvidiaGDSEnvvar = "NVIDIA_GDS" + nvidiaGDSEnvvar = "NVIDIA_GDS" + nvidiaMOFEDEnvvar = "NVIDIA_MOFED" ) -// NewGDSModifier creates the modifiers for GDS devices. -// If the spec does not contain the NVIDIA_GDS=enabled environment variable no changes are made. -func NewGDSModifier(logger logger.Interface, cfg *config.Config, image image.CUDA) (oci.SpecModifier, error) { +// NewFeatureGatedModifier creates the modifiers for optional features. +// These include: +// +// NVIDIA_GDS=enabled +// NVIDIA_MOFED=enabled +// +// If not devices are selected, no changes are made. +func NewFeatureGatedModifier(logger logger.Interface, cfg *config.Config, image image.CUDA) (oci.SpecModifier, error) { if devices := image.DevicesFromEnvvars(visibleDevicesEnvvar); len(devices.List()) == 0 { logger.Infof("No modification required; no devices requested") return nil, nil } - if image.Getenv(nvidiaGDSEnvvar) != "enabled" { - return nil, nil - } + var discoverers []discover.Discover driverRoot := cfg.NVIDIAContainerCLIConfig.Root devRoot := cfg.NVIDIAContainerCLIConfig.Root - d, err := discover.NewGDSDiscoverer(logger, driverRoot, devRoot) - if err != nil { - return nil, fmt.Errorf("failed to construct discoverer for GDS devices: %v", err) + + if image.Getenv(nvidiaGDSEnvvar) == "enabled" { + d, err := discover.NewGDSDiscoverer(logger, driverRoot, devRoot) + if err != nil { + return nil, fmt.Errorf("failed to construct discoverer for GDS devices: %w", err) + } + discoverers = append(discoverers, d) } - return NewModifierFromDiscoverer(logger, d) + if image.Getenv(nvidiaMOFEDEnvvar) == "enabled" { + d, err := discover.NewMOFEDDiscoverer(logger, devRoot) + if err != nil { + return nil, fmt.Errorf("failed to construct discoverer for MOFED devices: %w", err) + } + discoverers = append(discoverers, d) + } + + return NewModifierFromDiscoverer(logger, discover.Merge(discoverers...)) } diff --git a/internal/modifier/mofed.go b/internal/modifier/mofed.go deleted file mode 100644 index 0a81a3a6..00000000 --- a/internal/modifier/mofed.go +++ /dev/null @@ -1,51 +0,0 @@ -/** -# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -**/ - -package modifier - -import ( - "fmt" - - "github.com/NVIDIA/nvidia-container-toolkit/internal/config" - "github.com/NVIDIA/nvidia-container-toolkit/internal/config/image" - "github.com/NVIDIA/nvidia-container-toolkit/internal/discover" - "github.com/NVIDIA/nvidia-container-toolkit/internal/logger" - "github.com/NVIDIA/nvidia-container-toolkit/internal/oci" -) - -const ( - nvidiaMOFEDEnvvar = "NVIDIA_MOFED" -) - -// NewMOFEDModifier creates the modifiers for MOFED devices. -// If the spec does not contain the NVIDIA_MOFED=enabled environment variable no changes are made. -func NewMOFEDModifier(logger logger.Interface, cfg *config.Config, image image.CUDA) (oci.SpecModifier, error) { - if devices := image.DevicesFromEnvvars(visibleDevicesEnvvar); len(devices.List()) == 0 { - logger.Infof("No modification required; no devices requested") - return nil, nil - } - - if image.Getenv(nvidiaMOFEDEnvvar) != "enabled" { - return nil, nil - } - - d, err := discover.NewMOFEDDiscoverer(logger, cfg.NVIDIAContainerCLIConfig.Root) - if err != nil { - return nil, fmt.Errorf("failed to construct discoverer for MOFED devices: %v", err) - } - - return NewModifierFromDiscoverer(logger, d) -} diff --git a/internal/runtime/runtime_factory.go b/internal/runtime/runtime_factory.go index 81ab94f1..a8bdbbf6 100644 --- a/internal/runtime/runtime_factory.go +++ b/internal/runtime/runtime_factory.go @@ -87,12 +87,7 @@ func newSpecModifier(logger logger.Interface, cfg *config.Config, ociSpec oci.Sp return nil, err } - gdsModifier, err := modifier.NewGDSModifier(logger, cfg, image) - if err != nil { - return nil, err - } - - mofedModifier, err := modifier.NewMOFEDModifier(logger, cfg, image) + featureModifier, err := modifier.NewFeatureGatedModifier(logger, cfg, image) if err != nil { return nil, err } @@ -100,8 +95,7 @@ func newSpecModifier(logger logger.Interface, cfg *config.Config, ociSpec oci.Sp modifiers := modifier.Merge( modeModifier, graphicsModifier, - gdsModifier, - mofedModifier, + featureModifier, ) return modifiers, nil } From efae5018341e3e659b5e6113c225f0de9634215f Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Wed, 22 Nov 2023 14:16:34 +0100 Subject: [PATCH 2/2] Add support for injecting NVSWITCH devices This change adds support for an NVIDIA_NVSWITCH environment variable. When set to `enabled` this striggers the injection of all available /dev/nvidia-nvswitch* device nodes. Signed-off-by: Evan Lezar --- CHANGELOG.md | 1 + internal/discover/nvswitch.go | 33 +++++++++++++++++++++++++++++++++ internal/modifier/gated.go | 14 ++++++++++++-- 3 files changed, 46 insertions(+), 2 deletions(-) create mode 100644 internal/discover/nvswitch.go diff --git a/CHANGELOG.md b/CHANGELOG.md index bbc75663..62155ab6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ * Include `nvidia/nvoptix.bin` in list of graphics mounts. * Include `vulkan/icd.d/nvidia_layers.json` in list of graphics mounts. * Add support for `--library-search-paths` to `nvidia-ctk cdi generate` command. +* Add support for injecting /dev/nvidia-nvswitch* devices if the NVIDIA_NVSWITCH=enabled envvar is specified. * [libnvidia-container] Fix device permission check when using cgroupv2 (fixes #227) diff --git a/internal/discover/nvswitch.go b/internal/discover/nvswitch.go new file mode 100644 index 00000000..fb956d11 --- /dev/null +++ b/internal/discover/nvswitch.go @@ -0,0 +1,33 @@ +/** +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package discover + +import "github.com/NVIDIA/nvidia-container-toolkit/internal/logger" + +// NewNvSwitchDiscoverer creates a discoverer for NVSWITCH devices. +func NewNvSwitchDiscoverer(logger logger.Interface, devRoot string) (Discover, error) { + devices := NewCharDeviceDiscoverer( + logger, + devRoot, + []string{ + "/dev/nvidia-nvswitchctl", + "/dev/nvidia-nvswitch*", + }, + ) + + return devices, nil +} diff --git a/internal/modifier/gated.go b/internal/modifier/gated.go index 1881f174..c3b6df39 100644 --- a/internal/modifier/gated.go +++ b/internal/modifier/gated.go @@ -27,8 +27,9 @@ import ( ) const ( - nvidiaGDSEnvvar = "NVIDIA_GDS" - nvidiaMOFEDEnvvar = "NVIDIA_MOFED" + nvidiaGDSEnvvar = "NVIDIA_GDS" + nvidiaMOFEDEnvvar = "NVIDIA_MOFED" + nvidiaNVSWITCHEnvvar = "NVIDIA_NVSWITCH" ) // NewFeatureGatedModifier creates the modifiers for optional features. @@ -36,6 +37,7 @@ const ( // // NVIDIA_GDS=enabled // NVIDIA_MOFED=enabled +// NVIDIA_NVSWITCH=enabled // // If not devices are selected, no changes are made. func NewFeatureGatedModifier(logger logger.Interface, cfg *config.Config, image image.CUDA) (oci.SpecModifier, error) { @@ -65,5 +67,13 @@ func NewFeatureGatedModifier(logger logger.Interface, cfg *config.Config, image discoverers = append(discoverers, d) } + if image.Getenv(nvidiaNVSWITCHEnvvar) == "enabled" { + d, err := discover.NewNvSwitchDiscoverer(logger, devRoot) + if err != nil { + return nil, fmt.Errorf("failed to construct discoverer for NVSWITCH devices: %w", err) + } + discoverers = append(discoverers, d) + } + return NewModifierFromDiscoverer(logger, discover.Merge(discoverers...)) }