From 9eccc1659df2b856137629fee2cae7aa9215bc08 Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Wed, 5 Feb 2025 11:12:06 +0100 Subject: [PATCH] Allow cdi mode to work with --gpus flag This changes ensures that the cdi modifier also removes the NVIDIA Container Runtime Hook from the incoming spec. This aligns with what is done for CSV modifications and prevents an error when starting the container. Signed-off-by: Evan Lezar --- internal/modifier/csv.go | 12 +- internal/modifier/hook_remover.go | 7 + internal/runtime/runtime_factory.go | 6 +- internal/runtime/runtime_factory_test.go | 179 +++++++++++++++++++++++ 4 files changed, 191 insertions(+), 13 deletions(-) diff --git a/internal/modifier/csv.go b/internal/modifier/csv.go index a6129432..d3dc84e7 100644 --- a/internal/modifier/csv.go +++ b/internal/modifier/csv.go @@ -68,20 +68,10 @@ func NewCSVModifier(logger logger.Interface, cfg *config.Config, container image return nil, fmt.Errorf("failed to get CDI spec: %v", err) } - cdiModifier, err := cdi.New( + return cdi.New( cdi.WithLogger(logger), cdi.WithSpec(spec.Raw()), ) - if err != nil { - return nil, fmt.Errorf("failed to construct CDI modifier: %v", err) - } - - modifiers := Merge( - nvidiaContainerRuntimeHookRemover{logger}, - cdiModifier, - ) - - return modifiers, nil } func checkRequirements(logger logger.Interface, image image.CUDA) error { diff --git a/internal/modifier/hook_remover.go b/internal/modifier/hook_remover.go index 059cd8af..24cf7666 100644 --- a/internal/modifier/hook_remover.go +++ b/internal/modifier/hook_remover.go @@ -33,6 +33,13 @@ type nvidiaContainerRuntimeHookRemover struct { var _ oci.SpecModifier = (*nvidiaContainerRuntimeHookRemover)(nil) +// NewNvidiaContainerRuntimeHookRemover creates a modifier that removes any NVIDIA Container Runtime hooks from the provided spec. +func NewNvidiaContainerRuntimeHookRemover(logger logger.Interface) oci.SpecModifier { + return nvidiaContainerRuntimeHookRemover{ + logger: logger, + } +} + // Modify removes any NVIDIA Container Runtime hooks from the provided spec func (m nvidiaContainerRuntimeHookRemover) Modify(spec *specs.Spec) error { if spec == nil { diff --git a/internal/runtime/runtime_factory.go b/internal/runtime/runtime_factory.go index a4426459..2b5cd9c6 100644 --- a/internal/runtime/runtime_factory.go +++ b/internal/runtime/runtime_factory.go @@ -85,6 +85,8 @@ func newSpecModifier(logger logger.Interface, cfg *config.Config, ociSpec oci.Sp switch modifierType { case "mode": modifiers = append(modifiers, modeModifier) + case "nvidia-hook-remover": + modifiers = append(modifiers, modifier.NewNvidiaContainerRuntimeHookRemover(logger)) case "graphics": graphicsModifier, err := modifier.NewGraphicsModifier(logger, cfg, image, driver) if err != nil { @@ -121,10 +123,10 @@ func supportedModifierTypes(mode string) []string { switch mode { case "cdi": // For CDI mode we make no additional modifications. - return []string{"mode"} + return []string{"nvidia-hook-remover", "mode"} case "csv": // For CSV mode we support mode and feature-gated modification. - return []string{"mode", "feature-gated"} + return []string{"nvidia-hook-remover", "mode", "feature-gated"} default: return []string{"mode", "graphics", "feature-gated"} } diff --git a/internal/runtime/runtime_factory_test.go b/internal/runtime/runtime_factory_test.go index dd052d8f..71ce3063 100644 --- a/internal/runtime/runtime_factory_test.go +++ b/internal/runtime/runtime_factory_test.go @@ -30,6 +30,7 @@ import ( "github.com/NVIDIA/nvidia-container-toolkit/internal/config" "github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/root" + "github.com/NVIDIA/nvidia-container-toolkit/internal/oci" "github.com/NVIDIA/nvidia-container-toolkit/internal/test" ) @@ -165,3 +166,181 @@ func TestFactoryMethod(t *testing.T) { }) } } + +func TestNewSpecModifier(t *testing.T) { + logger, _ := testlog.NewNullLogger() + driver := root.New( + root.WithDriverRoot("/nvidia/driver/root"), + ) + testCases := []struct { + description string + config *config.Config + spec *specs.Spec + expectedSpec *specs.Spec + }{ + { + description: "csv mode removes nvidia-container-runtime-hook", + config: &config.Config{ + NVIDIAContainerRuntimeConfig: config.RuntimeConfig{ + Mode: "csv", + }, + }, + spec: &specs.Spec{ + Hooks: &specs.Hooks{ + Prestart: []specs.Hook{ + { + Path: "/path/to/nvidia-container-runtime-hook", + Args: []string{"/path/to/nvidia-container-runtime-hook", "prestart"}, + }, + }, + }, + }, + expectedSpec: &specs.Spec{ + Hooks: &specs.Hooks{ + Prestart: nil, + }, + }, + }, + { + description: "csv mode removes nvidia-container-toolkit", + config: &config.Config{ + NVIDIAContainerRuntimeConfig: config.RuntimeConfig{ + Mode: "csv", + }, + }, + spec: &specs.Spec{ + Hooks: &specs.Hooks{ + Prestart: []specs.Hook{ + { + Path: "/path/to/nvidia-container-toolkit", + Args: []string{"/path/to/nvidia-container-toolkit", "prestart"}, + }, + }, + }, + }, + expectedSpec: &specs.Spec{ + Hooks: &specs.Hooks{ + Prestart: nil, + }, + }, + }, + { + description: "cdi mode removes nvidia-container-runtime-hook", + config: &config.Config{ + NVIDIAContainerRuntimeConfig: config.RuntimeConfig{ + Mode: "cdi", + }, + }, + spec: &specs.Spec{ + Hooks: &specs.Hooks{ + Prestart: []specs.Hook{ + { + Path: "/path/to/nvidia-container-runtime-hook", + Args: []string{"/path/to/nvidia-container-runtime-hook", "prestart"}, + }, + }, + }, + }, + expectedSpec: &specs.Spec{ + Hooks: &specs.Hooks{ + Prestart: nil, + }, + }, + }, + { + description: "cdi mode removes nvidia-container-toolkit", + config: &config.Config{ + NVIDIAContainerRuntimeConfig: config.RuntimeConfig{ + Mode: "cdi", + }, + }, + spec: &specs.Spec{ + Hooks: &specs.Hooks{ + Prestart: []specs.Hook{ + { + Path: "/path/to/nvidia-container-toolkit", + Args: []string{"/path/to/nvidia-container-toolkit", "prestart"}, + }, + }, + }, + }, + expectedSpec: &specs.Spec{ + Hooks: &specs.Hooks{ + Prestart: nil, + }, + }, + }, + { + description: "legacy mode keeps nvidia-container-runtime-hook", + config: &config.Config{ + NVIDIAContainerRuntimeConfig: config.RuntimeConfig{ + Mode: "legacy", + }, + }, + spec: &specs.Spec{ + Hooks: &specs.Hooks{ + Prestart: []specs.Hook{ + { + Path: "/path/to/nvidia-container-runtime-hook", + Args: []string{"/path/to/nvidia-container-runtime-hook", "prestart"}, + }, + }, + }, + }, + expectedSpec: &specs.Spec{ + Hooks: &specs.Hooks{ + Prestart: []specs.Hook{ + { + Path: "/path/to/nvidia-container-runtime-hook", + Args: []string{"/path/to/nvidia-container-runtime-hook", "prestart"}, + }, + }, + }, + }, + }, + { + description: "legacy mode keeps nvidia-container-toolkit", + config: &config.Config{ + NVIDIAContainerRuntimeConfig: config.RuntimeConfig{ + Mode: "legacy", + }, + }, + spec: &specs.Spec{ + Hooks: &specs.Hooks{ + Prestart: []specs.Hook{ + { + Path: "/path/to/nvidia-container-toolkit", + Args: []string{"/path/to/nvidia-container-toolkit", "prestart"}, + }, + }, + }, + }, + expectedSpec: &specs.Spec{ + Hooks: &specs.Hooks{ + Prestart: []specs.Hook{ + { + Path: "/path/to/nvidia-container-toolkit", + Args: []string{"/path/to/nvidia-container-toolkit", "prestart"}, + }, + }, + }, + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.description, func(t *testing.T) { + spec := &oci.SpecMock{ + LoadFunc: func() (*specs.Spec, error) { + return tc.spec, nil + }, + } + m, err := newSpecModifier(logger, tc.config, spec, driver) + require.NoError(t, err) + + err = m.Modify(tc.spec) + require.NoError(t, err) + require.EqualValues(t, tc.expectedSpec, tc.spec) + }) + } +}