From 49f4bb3198dc5031ac396bf7b888dda248e71529 Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Mon, 2 May 2022 10:28:25 +0200 Subject: [PATCH] Check requirements before creating CSV discoverer Signed-off-by: Evan Lezar --- .../modifier/experimental.go | 41 ++++++++++++++++++- .../modifier/experimental_test.go | 4 +- 2 files changed, 42 insertions(+), 3 deletions(-) diff --git a/cmd/nvidia-container-runtime/modifier/experimental.go b/cmd/nvidia-container-runtime/modifier/experimental.go index 2a048369..4993c147 100644 --- a/cmd/nvidia-container-runtime/modifier/experimental.go +++ b/cmd/nvidia-container-runtime/modifier/experimental.go @@ -22,10 +22,13 @@ import ( "strings" "github.com/NVIDIA/nvidia-container-toolkit/internal/config" + "github.com/NVIDIA/nvidia-container-toolkit/internal/config/image" + "github.com/NVIDIA/nvidia-container-toolkit/internal/cuda" "github.com/NVIDIA/nvidia-container-toolkit/internal/discover" "github.com/NVIDIA/nvidia-container-toolkit/internal/discover/csv" "github.com/NVIDIA/nvidia-container-toolkit/internal/edits" "github.com/NVIDIA/nvidia-container-toolkit/internal/oci" + "github.com/NVIDIA/nvidia-container-toolkit/internal/requirements" "github.com/opencontainers/runtime-spec/specs-go" "github.com/sirupsen/logrus" ) @@ -46,7 +49,7 @@ const ( // NewExperimentalModifier creates a modifier that applies the experimental // modications to an OCI spec if required by the runtime wrapper. func NewExperimentalModifier(logger *logrus.Logger, cfg *config.Config, ociSpec oci.Spec) (oci.SpecModifier, error) { - _, err := ociSpec.Load() + rawSpec, err := ociSpec.Load() if err != nil { return nil, fmt.Errorf("failed to load OCI spec: %v", err) } @@ -75,6 +78,17 @@ func NewExperimentalModifier(logger *logrus.Logger, cfg *config.Config, ociSpec } d = legacyDiscoverer case "csv": + // TODO: Once the devices have been encapsulated in the CUDA image, this can be moved to before the + // visible devices are checked. + image, err := image.NewCUDAImageFromSpec(rawSpec) + if err != nil { + return nil, err + } + + if err := checkRequirements(logger, &image); err != nil { + return nil, fmt.Errorf("requirements not met: %v", err) + } + csvFiles, err := csv.GetFileList(csv.DefaultMountSpecPath) if err != nil { return nil, fmt.Errorf("failed to get list of CSV files: %v", err) @@ -134,6 +148,31 @@ func (m experimental) Modify(spec *specs.Spec) error { return specEdits.Modify(spec) } +func checkRequirements(logger *logrus.Logger, image *image.CUDA) error { + if image.HasDisableRequire() { + // TODO: We could print the real value here instead + logger.Debugf("NVIDIA_DISABLE_REQUIRE=%v; skipping requirement checks", true) + return nil + } + + imageRequirements, err := image.GetRequirements() + if err != nil { + // TODO: Should we treat this as a failure, or just issue a warning? + return fmt.Errorf("failed to get image requirements: %v", err) + } + + r := requirements.New(logger, imageRequirements) + + cudaVersion, err := cuda.Version() + if err != nil { + logger.Warnf("Failed to get CUDA version: %v", err) + } else { + r.AddVersionProperty(requirements.CUDA, cudaVersion) + } + + return r.Assert() +} + // resolveAutoDiscoverMode determines the correct discover mode for the specified platform if set to "auto" func resolveAutoDiscoverMode(logger *logrus.Logger, mode string) (rmode string) { if mode != "auto" { diff --git a/cmd/nvidia-container-runtime/modifier/experimental_test.go b/cmd/nvidia-container-runtime/modifier/experimental_test.go index 34f83bf4..cd492000 100644 --- a/cmd/nvidia-container-runtime/modifier/experimental_test.go +++ b/cmd/nvidia-container-runtime/modifier/experimental_test.go @@ -42,8 +42,8 @@ func TestNewExperimentalModifier(t *testing.T) { { description: "spec load error returns error", spec: &oci.SpecMock{ - LoadFunc: func() error { - return fmt.Errorf("load failed") + LoadFunc: func() (*specs.Spec, error) { + return nil, fmt.Errorf("load failed") }, }, expectedError: fmt.Errorf("load failed"),