Compare commits

...

11 Commits

Author SHA1 Message Date
Evan Lezar
fa66e4cd56 Merge pull request #802 from elezar/bump-release-v1.17.2
Some checks failed
CodeQL / Analyze Go code with CodeQL (push) Failing after 12m54s
Golang / check (push) Failing after 2m12s
Golang / Unit test (push) Failing after 1m35s
Golang / Build (push) Failing after 1m36s
image / packages (${{github.event_name == 'pull_request'}}, centos7-aarch64) (push) Failing after 1m52s
image / packages (${{github.event_name == 'pull_request'}}, centos7-x86_64) (push) Failing after 1m52s
image / packages (${{github.event_name == 'pull_request'}}, centos8-ppc64le) (push) Failing after 1m38s
image / packages (${{github.event_name == 'pull_request'}}, ubuntu18.04-amd64) (push) Failing after 1m45s
image / packages (${{github.event_name == 'pull_request'}}, ubuntu18.04-arm64) (push) Failing after 1m44s
image / packages (${{github.event_name == 'pull_request'}}, ubuntu18.04-ppc64le) (push) Failing after 1m51s
image / image (packaging, ${{github.event_name == 'pull_request'}}) (push) Has been skipped
image / image (ubi8, ${{github.event_name == 'pull_request'}}) (push) Has been skipped
image / image (ubuntu20.04, ${{github.event_name == 'pull_request'}}) (push) Has been skipped
Bump release v1.17.2
2024-11-15 11:04:17 -07:00
Evan Lezar
aac7258b6f Bump version for v1.17.2 release
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-11-15 11:02:13 -07:00
Evan Lezar
70ac1e2d28 Merge pull request #801 from elezar/fix-legacy-nvidia-imex-channels
Fix NVIDIA_IMEX_CHANNELS handling on legacy images
2024-11-15 11:01:44 -07:00
Evan Lezar
f774ceeedd Fix NVIDIA_IMEX_CHANNELS handling on legacy images
For legacy images (images with a CUDA_VERSION set but no CUDA_REQUIRES set), the
default behaviour for device envvars is to treat non-existence as all.

This change ensures that the NVIDIA_IMEX_CHANNELS envvar is not treated in the same
way, instead returning no devices if the envvar is not set.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-11-15 10:57:02 -07:00
Evan Lezar
1467f3f339 Merge pull request #786 from elezar/bump-release-v1.17.1
Some checks failed
CodeQL / Analyze Go code with CodeQL (push) Has been cancelled
Golang / check (push) Has been cancelled
Golang / Unit test (push) Has been cancelled
Golang / Build (push) Has been cancelled
image / packages (${{github.event_name == 'pull_request'}}, centos7-aarch64) (push) Has been cancelled
image / packages (${{github.event_name == 'pull_request'}}, centos7-x86_64) (push) Has been cancelled
image / packages (${{github.event_name == 'pull_request'}}, centos8-ppc64le) (push) Has been cancelled
image / packages (${{github.event_name == 'pull_request'}}, ubuntu18.04-amd64) (push) Has been cancelled
image / packages (${{github.event_name == 'pull_request'}}, ubuntu18.04-arm64) (push) Has been cancelled
image / packages (${{github.event_name == 'pull_request'}}, ubuntu18.04-ppc64le) (push) Has been cancelled
image / image (packaging, ${{github.event_name == 'pull_request'}}) (push) Has been cancelled
image / image (ubi8, ${{github.event_name == 'pull_request'}}) (push) Has been cancelled
image / image (ubuntu20.04, ${{github.event_name == 'pull_request'}}) (push) Has been cancelled
Bump version for v1.17.1 release
2024-11-08 16:28:53 -08:00
Evan Lezar
ca9612a9ff Bump version for v1.17.1 release
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-11-08 16:26:42 -08:00
Evan Lezar
11e4af3e8a Merge pull request #784 from elezar/fix-config-file-path
Fix bug in default config file path
2024-11-08 16:05:49 -08:00
Evan Lezar
edf5d970f4 Fix bug in default config file path
This fix ensures that the default config file path for the nvidia-ctk runtime configure
command is set consistently.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-11-08 15:58:12 -08:00
Evan Lezar
b03e942424 Merge pull request #780 from elezar/add-config-fallback
Fallback to file for runtime config
2024-11-08 15:43:15 -08:00
Evan Lezar
a9185918ab Fallback to file for runtime config
This change ensures that we fall back to the previous behaviour of
reading the existing config from the specified config file if extracting
the current config from the command line fails. This fixes use cases where
the containerd / crio executables are not available.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-11-08 09:10:07 -08:00
Evan Lezar
3cb613a12b Merge pull request #776 from elezar/fix-libcuda-symlink
Force symlink creation in create-symlink hook
2024-11-07 18:28:01 +01:00
8 changed files with 106 additions and 61 deletions

View File

@@ -1,5 +1,16 @@
# NVIDIA Container Toolkit Changelog
## v1.17.2
- Fixed a bug where legacy images would set imex channels as `all`.
## v1.17.1
- Fixed a bug where specific symlinks existing in a container image could cause a container to fail to start.
- Fixed a bug on Tegra-based systems where a container would fail to start.
- Fixed a bug where the default container runtime config path was not properly set.
### Changes in the Toolkit Container
- Fallback to using a config file if the current runtime config can not be determined from the command line.
## v1.17.0
- Promote v1.17.0-rc.2 to v1.17.0
- Fix bug when using just-in-time CDI spec generation

View File

@@ -225,6 +225,17 @@ func (m command) validateFlags(c *cli.Context, config *config) error {
return fmt.Errorf("unrecognized Config Source: %v", config.configSource)
}
if config.configFilePath == "" {
switch config.runtime {
case "containerd":
config.configFilePath = defaultContainerdConfigFilePath
case "crio":
config.configFilePath = defaultCrioConfigFilePath
case "docker":
config.configFilePath = defaultDockerConfigFilePath
}
}
return nil
}
@@ -241,9 +252,6 @@ func (m command) configureWrapper(c *cli.Context, config *config) error {
// configureConfigFile updates the specified container engine config file to enable the NVIDIA runtime.
func (m command) configureConfigFile(c *cli.Context, config *config) error {
configFilePath := config.resolveConfigFilePath()
var err error
configSource, err := config.resolveConfigSource()
if err != nil {
return err
@@ -254,19 +262,19 @@ func (m command) configureConfigFile(c *cli.Context, config *config) error {
case "containerd":
cfg, err = containerd.New(
containerd.WithLogger(m.logger),
containerd.WithPath(configFilePath),
containerd.WithPath(config.configFilePath),
containerd.WithConfigSource(configSource),
)
case "crio":
cfg, err = crio.New(
crio.WithLogger(m.logger),
crio.WithPath(configFilePath),
crio.WithPath(config.configFilePath),
crio.WithConfigSource(configSource),
)
case "docker":
cfg, err = docker.New(
docker.WithLogger(m.logger),
docker.WithPath(configFilePath),
docker.WithPath(config.configFilePath),
)
default:
err = fmt.Errorf("unrecognized runtime '%v'", config.runtime)
@@ -307,22 +315,6 @@ func (m command) configureConfigFile(c *cli.Context, config *config) error {
return nil
}
// resolveConfigFilePath returns the default config file path for the configured container engine
func (c *config) resolveConfigFilePath() string {
if c.configFilePath != "" {
return c.configFilePath
}
switch c.runtime {
case "containerd":
return defaultContainerdConfigFilePath
case "crio":
return defaultCrioConfigFilePath
case "docker":
return defaultDockerConfigFilePath
}
return ""
}
// resolveConfigSource returns the default config source or the user provided config source
func (c *config) resolveConfigSource() (toml.Loader, error) {
switch c.configSource {
@@ -351,7 +343,7 @@ func (c *config) getOutputConfigPath() string {
if c.dryRun {
return ""
}
return c.resolveConfigFilePath()
return c.configFilePath
}
// configureOCIHook creates and configures the OCI hook for the NVIDIA runtime

View File

@@ -292,7 +292,11 @@ func (i CUDA) CDIDevicesFromMounts() []string {
// ImexChannelsFromEnvVar returns the list of IMEX channels requested for the image.
func (i CUDA) ImexChannelsFromEnvVar() []string {
return i.DevicesFromEnvvars(EnvVarNvidiaImexChannels).List()
imexChannels := i.DevicesFromEnvvars(EnvVarNvidiaImexChannels).List()
if len(imexChannels) == 1 && imexChannels[0] == "all" {
return nil
}
return imexChannels
}
// ImexChannelsFromMounts returns the list of IMEX channels requested for the image.

View File

@@ -203,6 +203,37 @@ func TestGetVisibleDevicesFromMounts(t *testing.T) {
}
}
func TestImexChannelsFromEnvVar(t *testing.T) {
testCases := []struct {
description string
env []string
expected []string
}{
{
description: "no imex channels specified",
},
{
description: "imex channel specified",
env: []string{
"NVIDIA_IMEX_CHANNELS=3,4",
},
expected: []string{"3", "4"},
},
}
for _, tc := range testCases {
for id, baseEnvvars := range map[string][]string{"": nil, "legacy": {"CUDA_VERSION=1.2.3"}} {
t.Run(tc.description+id, func(t *testing.T) {
i, err := NewCUDAImageFromEnv(append(baseEnvvars, tc.env...))
require.NoError(t, err)
channels := i.ImexChannelsFromEnvVar()
require.EqualValues(t, tc.expected, channels)
})
}
}
}
func makeTestMounts(paths ...string) []specs.Mount {
var mounts []specs.Mount
for _, path := range paths {

View File

@@ -25,6 +25,7 @@ import (
"github.com/NVIDIA/nvidia-container-toolkit/pkg/config/engine"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/config/engine/containerd"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/config/toml"
"github.com/NVIDIA/nvidia-container-toolkit/tools/container"
)
@@ -84,13 +85,7 @@ func Flags(opts *Options) []cli.Flag {
func Setup(c *cli.Context, o *container.Options, co *Options) error {
log.Infof("Starting 'setup' for %v", c.App.Name)
cfg, err := containerd.New(
containerd.WithPath(o.Config),
containerd.WithConfigSource(containerd.CommandLineSource(o.HostRootMount)),
containerd.WithRuntimeType(co.runtimeType),
containerd.WithUseLegacyConfig(co.useLegacyConfig),
containerd.WithContainerAnnotations(co.containerAnnotationsFromCDIPrefixes()...),
)
cfg, err := getRuntimeConfig(o, co)
if err != nil {
return fmt.Errorf("unable to load config: %v", err)
}
@@ -114,13 +109,7 @@ func Setup(c *cli.Context, o *container.Options, co *Options) error {
func Cleanup(c *cli.Context, o *container.Options, co *Options) error {
log.Infof("Starting 'cleanup' for %v", c.App.Name)
cfg, err := containerd.New(
containerd.WithPath(o.Config),
containerd.WithConfigSource(containerd.CommandLineSource(o.HostRootMount)),
containerd.WithRuntimeType(co.runtimeType),
containerd.WithUseLegacyConfig(co.useLegacyConfig),
containerd.WithContainerAnnotations(co.containerAnnotationsFromCDIPrefixes()...),
)
cfg, err := getRuntimeConfig(o, co)
if err != nil {
return fmt.Errorf("unable to load config: %v", err)
}
@@ -169,13 +158,24 @@ func (o *Options) runtimeConfigOverride() (map[string]interface{}, error) {
}
func GetLowlevelRuntimePaths(o *container.Options, co *Options) ([]string, error) {
cfg, err := containerd.New(
containerd.WithConfigSource(containerd.CommandLineSource(o.HostRootMount)),
containerd.WithRuntimeType(co.runtimeType),
containerd.WithUseLegacyConfig(co.useLegacyConfig),
)
cfg, err := getRuntimeConfig(o, co)
if err != nil {
return nil, fmt.Errorf("unable to load containerd config: %w", err)
}
return engine.GetBinaryPathsForRuntimes(cfg), nil
}
func getRuntimeConfig(o *container.Options, co *Options) (engine.Interface, error) {
return containerd.New(
containerd.WithPath(o.Config),
containerd.WithConfigSource(
toml.LoadFirst(
containerd.CommandLineSource(o.HostRootMount),
toml.FromFile(o.Config),
),
),
containerd.WithRuntimeType(co.runtimeType),
containerd.WithUseLegacyConfig(co.useLegacyConfig),
containerd.WithContainerAnnotations(co.containerAnnotationsFromCDIPrefixes()...),
)
}

View File

@@ -28,6 +28,7 @@ import (
"github.com/NVIDIA/nvidia-container-toolkit/pkg/config/engine"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/config/engine/crio"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/config/ocihook"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/config/toml"
"github.com/NVIDIA/nvidia-container-toolkit/tools/container"
)
@@ -116,10 +117,7 @@ func setupHook(o *container.Options, co *Options) error {
func setupConfig(o *container.Options) error {
log.Infof("Updating config file")
cfg, err := crio.New(
crio.WithPath(o.Config),
crio.WithConfigSource(crio.CommandLineSource(o.HostRootMount)),
)
cfg, err := getRuntimeConfig(o)
if err != nil {
return fmt.Errorf("unable to load config: %v", err)
}
@@ -168,10 +166,7 @@ func cleanupHook(co *Options) error {
func cleanupConfig(o *container.Options) error {
log.Infof("Reverting config file modifications")
cfg, err := crio.New(
crio.WithPath(o.Config),
crio.WithConfigSource(crio.CommandLineSource(o.HostRootMount)),
)
cfg, err := getRuntimeConfig(o)
if err != nil {
return fmt.Errorf("unable to load config: %v", err)
}
@@ -195,11 +190,21 @@ func RestartCrio(o *container.Options) error {
}
func GetLowlevelRuntimePaths(o *container.Options) ([]string, error) {
cfg, err := crio.New(
crio.WithConfigSource(crio.CommandLineSource(o.HostRootMount)),
)
cfg, err := getRuntimeConfig(o)
if err != nil {
return nil, fmt.Errorf("unable to load crio config: %w", err)
}
return engine.GetBinaryPathsForRuntimes(cfg), nil
}
func getRuntimeConfig(o *container.Options) (engine.Interface, error) {
return crio.New(
crio.WithPath(o.Config),
crio.WithConfigSource(
toml.LoadFirst(
crio.CommandLineSource(o.HostRootMount),
toml.FromFile(o.Config),
),
),
)
}

View File

@@ -45,9 +45,7 @@ func Flags(opts *Options) []cli.Flag {
func Setup(c *cli.Context, o *container.Options) error {
log.Infof("Starting 'setup' for %v", c.App.Name)
cfg, err := docker.New(
docker.WithPath(o.Config),
)
cfg, err := getRuntimeConfig(o)
if err != nil {
return fmt.Errorf("unable to load config: %v", err)
}
@@ -71,9 +69,7 @@ func Setup(c *cli.Context, o *container.Options) error {
func Cleanup(c *cli.Context, o *container.Options) error {
log.Infof("Starting 'cleanup' for %v", c.App.Name)
cfg, err := docker.New(
docker.WithPath(o.Config),
)
cfg, err := getRuntimeConfig(o)
if err != nil {
return fmt.Errorf("unable to load config: %v", err)
}
@@ -107,3 +103,9 @@ func GetLowlevelRuntimePaths(o *container.Options) ([]string, error) {
}
return engine.GetBinaryPathsForRuntimes(cfg), nil
}
func getRuntimeConfig(o *container.Options) (engine.Interface, error) {
return docker.New(
docker.WithPath(o.Config),
)
}

View File

@@ -13,7 +13,7 @@
# limitations under the License.
LIB_NAME := nvidia-container-toolkit
LIB_VERSION := 1.17.0
LIB_VERSION := 1.17.2
LIB_TAG :=
# The package version is the combination of the library version and tag.