Add a new gated modifier for GDRCopy which injects the gdrdrv device node

Signed-off-by: Christopher Desiniotis <cdesiniotis@nvidia.com>
This commit is contained in:
Christopher Desiniotis 2024-01-03 15:32:21 -08:00
parent 738ebd83d3
commit 55097b3d7d
3 changed files with 38 additions and 0 deletions

View File

@ -11,6 +11,7 @@
* Add `crun` to the list of configured low-level runtimes. * Add `crun` to the list of configured low-level runtimes.
* Added support for `--ldconfig-path` to `nvidia-ctk cdi generate` command. * Added support for `--ldconfig-path` to `nvidia-ctk cdi generate` command.
* Fix `nvidia-ctk runtime configure --cdi.enabled` for Docker. * Fix `nvidia-ctk runtime configure --cdi.enabled` for Docker.
* Add discovery of the GDRCopy device (`gdrdrv`) if the `NVIDIA_GDRCOPY` environment variable of the container is set to `enabled`
* [toolkit-container] Bump CUDA base image version to 12.3.1. * [toolkit-container] Bump CUDA base image version to 12.3.1.

View File

@ -0,0 +1,27 @@
/**
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package discover
import "github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
func NewGDRCopyDiscoverer(logger logger.Interface, devRoot string) (Discover, error) {
return NewCharDeviceDiscoverer(
logger,
devRoot,
[]string{"/dev/gdrdrv"},
), nil
}

View File

@ -30,6 +30,7 @@ const (
nvidiaGDSEnvvar = "NVIDIA_GDS" nvidiaGDSEnvvar = "NVIDIA_GDS"
nvidiaMOFEDEnvvar = "NVIDIA_MOFED" nvidiaMOFEDEnvvar = "NVIDIA_MOFED"
nvidiaNVSWITCHEnvvar = "NVIDIA_NVSWITCH" nvidiaNVSWITCHEnvvar = "NVIDIA_NVSWITCH"
nvidiaGDRCOPYEnvvar = "NVIDIA_GDRCOPY"
) )
// NewFeatureGatedModifier creates the modifiers for optional features. // NewFeatureGatedModifier creates the modifiers for optional features.
@ -38,6 +39,7 @@ const (
// NVIDIA_GDS=enabled // NVIDIA_GDS=enabled
// NVIDIA_MOFED=enabled // NVIDIA_MOFED=enabled
// NVIDIA_NVSWITCH=enabled // NVIDIA_NVSWITCH=enabled
// NVIDIA_GDRCOPY=enabled
// //
// If not devices are selected, no changes are made. // If not devices are selected, no changes are made.
func NewFeatureGatedModifier(logger logger.Interface, cfg *config.Config, image image.CUDA) (oci.SpecModifier, error) { func NewFeatureGatedModifier(logger logger.Interface, cfg *config.Config, image image.CUDA) (oci.SpecModifier, error) {
@ -75,5 +77,13 @@ func NewFeatureGatedModifier(logger logger.Interface, cfg *config.Config, image
discoverers = append(discoverers, d) discoverers = append(discoverers, d)
} }
if image.Getenv(nvidiaGDRCOPYEnvvar) == "enabled" {
d, err := discover.NewGDRCopyDiscoverer(logger, devRoot)
if err != nil {
return nil, fmt.Errorf("failed to construct discoverer for GDRCopy devices: %w", err)
}
discoverers = append(discoverers, d)
}
return NewModifierFromDiscoverer(logger, discover.Merge(discoverers...)) return NewModifierFromDiscoverer(logger, discover.Merge(discoverers...))
} }