mirror of
https://github.com/NVIDIA/nvidia-container-toolkit
synced 2025-06-26 18:18:24 +00:00
Compare commits
74 Commits
v1.13.0-rc
...
v1.13.1
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
28b70663f1 | ||
|
|
c0fe8f27eb | ||
|
|
926ac77bc0 | ||
|
|
fc7c8f7520 | ||
|
|
46c1c45d85 | ||
|
|
f99e863649 | ||
|
|
dcc21ece97 | ||
|
|
a53e3604a6 | ||
|
|
cfea6c1179 | ||
|
|
4d1daa0b6c | ||
|
|
df925bc7fd | ||
|
|
df22e37dfd | ||
|
|
2136266d1d | ||
|
|
a95232dd33 | ||
|
|
29c6288128 | ||
|
|
cd6fcb5297 | ||
|
|
36989deff7 | ||
|
|
7f6c9851fe | ||
|
|
b7079454b5 | ||
|
|
448bd45ab4 | ||
|
|
dde6170df1 | ||
|
|
e4b9350e65 | ||
|
|
622a0649ce | ||
|
|
f6983969ad | ||
|
|
7f7fc35843 | ||
|
|
8eef7e5406 | ||
|
|
f27c33b45f | ||
|
|
6a83e2ebe5 | ||
|
|
ee5be5e3f2 | ||
|
|
be0cc9dc6e | ||
|
|
7c5283bb97 | ||
|
|
4d5ba09d88 | ||
|
|
149236b002 | ||
|
|
ee141f97dc | ||
|
|
646503ff31 | ||
|
|
cdaaf5e46f | ||
|
|
e774c51c97 | ||
|
|
7f5c9abc1e | ||
|
|
92d82ceaee | ||
|
|
c46b118f37 | ||
|
|
1722b07615 | ||
|
|
c13c6ebadb | ||
|
|
2abe679dd1 | ||
|
|
9571513601 | ||
|
|
ff2767ee7b | ||
|
|
56319475a6 | ||
|
|
a3ee58a294 | ||
|
|
7a533aeff3 | ||
|
|
226c54613e | ||
|
|
1ebbebf5de | ||
|
|
33f6fe0217 | ||
|
|
5ff206e1a9 | ||
|
|
df618d3cba | ||
|
|
9506bd9da0 | ||
|
|
5e0684e99d | ||
|
|
09a0cb24cc | ||
|
|
ff92f1d799 | ||
|
|
b87703c503 | ||
|
|
b2aaa21b0a | ||
|
|
310c15b046 | ||
|
|
685802b1ce | ||
|
|
380eb8340a | ||
|
|
f98e1160f5 | ||
|
|
1962fd68df | ||
|
|
29813c1e14 | ||
|
|
df40fbe03e | ||
|
|
7000c6074e | ||
|
|
ef1fe3ab41 | ||
|
|
fdd198b0e8 | ||
|
|
e37f77e02d | ||
|
|
3fcfee88be | ||
|
|
a082413d09 | ||
|
|
280f40508e | ||
|
|
e2be0e2ff0 |
@@ -23,6 +23,7 @@ variables:
|
||||
BUILD_MULTI_ARCH_IMAGES: "true"
|
||||
|
||||
stages:
|
||||
- trigger
|
||||
- image
|
||||
- lint
|
||||
- go-checks
|
||||
@@ -34,14 +35,44 @@ stages:
|
||||
- scan
|
||||
- release
|
||||
|
||||
.pipeline-trigger-rules:
|
||||
rules:
|
||||
# We trigger the pipeline if started manually
|
||||
- if: $CI_PIPELINE_SOURCE == "web"
|
||||
# We trigger the pipeline on the main branch
|
||||
- if: $CI_COMMIT_BRANCH == "main"
|
||||
# We trigger the pipeline on the release- branches
|
||||
- if: $CI_COMMIT_BRANCH =~ /^release-.*$/
|
||||
# We trigger the pipeline on tags
|
||||
- if: $CI_COMMIT_TAG && $CI_COMMIT_TAG != ""
|
||||
|
||||
workflow:
|
||||
rules:
|
||||
# We trigger the pipeline on a merge request
|
||||
- if: $CI_PIPELINE_SOURCE == 'merge_request_event'
|
||||
# We then add all the regular triggers
|
||||
- !reference [.pipeline-trigger-rules, rules]
|
||||
|
||||
# The main or manual job is used to filter out distributions or architectures that are not required on
|
||||
# every build.
|
||||
.main-or-manual:
|
||||
rules:
|
||||
- if: $CI_COMMIT_BRANCH == "main"
|
||||
- if: $CI_COMMIT_BRANCH =~ /^release-.*$/
|
||||
- if: $CI_COMMIT_TAG && $CI_COMMIT_TAG != ""
|
||||
- !reference [.pipeline-trigger-rules, rules]
|
||||
- if: $CI_PIPELINE_SOURCE == "schedule"
|
||||
when: manual
|
||||
|
||||
# The trigger-pipeline job adds a manualy triggered job to the pipeline on merge requests.
|
||||
trigger-pipeline:
|
||||
stage: trigger
|
||||
script:
|
||||
- echo "starting pipeline"
|
||||
rules:
|
||||
- !reference [.main-or-manual, rules]
|
||||
- if: $CI_PIPELINE_SOURCE == "merge_request_event"
|
||||
when: manual
|
||||
allow_failure: false
|
||||
- when: always
|
||||
|
||||
# Define the distribution targets
|
||||
.dist-amazonlinux2:
|
||||
rules:
|
||||
|
||||
113
.github/workflows/blossom-ci.yaml
vendored
Normal file
113
.github/workflows/blossom-ci.yaml
vendored
Normal file
@@ -0,0 +1,113 @@
|
||||
# Copyright (c) 2020-2023, NVIDIA CORPORATION.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# A workflow to trigger ci on hybrid infra (github + self hosted runner)
|
||||
name: Blossom-CI
|
||||
on:
|
||||
issue_comment:
|
||||
types: [created]
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
platform:
|
||||
description: 'runs-on argument'
|
||||
required: false
|
||||
args:
|
||||
description: 'argument'
|
||||
required: false
|
||||
jobs:
|
||||
Authorization:
|
||||
name: Authorization
|
||||
runs-on: blossom
|
||||
outputs:
|
||||
args: ${{ env.args }}
|
||||
|
||||
# This job only runs for pull request comments
|
||||
if: |
|
||||
contains( '\
|
||||
anstockatnv,\
|
||||
rohitrajani2018,\
|
||||
cdesiniotis,\
|
||||
shivamerla,\
|
||||
ArangoGutierrez,\
|
||||
elezar,\
|
||||
klueska,\
|
||||
zvonkok,\
|
||||
', format('{0},', github.actor)) &&
|
||||
github.event.comment.body == '/blossom-ci'
|
||||
steps:
|
||||
- name: Check if comment is issued by authorized person
|
||||
run: blossom-ci
|
||||
env:
|
||||
OPERATION: 'AUTH'
|
||||
REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
REPO_KEY_DATA: ${{ secrets.BLOSSOM_KEY }}
|
||||
|
||||
Vulnerability-scan:
|
||||
name: Vulnerability scan
|
||||
needs: [Authorization]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
repository: ${{ fromJson(needs.Authorization.outputs.args).repo }}
|
||||
ref: ${{ fromJson(needs.Authorization.outputs.args).ref }}
|
||||
lfs: 'true'
|
||||
|
||||
# repo specific steps
|
||||
#- name: Setup java
|
||||
# uses: actions/setup-java@v1
|
||||
# with:
|
||||
# java-version: 1.8
|
||||
|
||||
# add blackduck properties https://synopsys.atlassian.net/wiki/spaces/INTDOCS/pages/631308372/Methods+for+Configuring+Analysis#Using-a-configuration-file
|
||||
#- name: Setup blackduck properties
|
||||
# run: |
|
||||
# PROJECTS=$(mvn -am dependency:tree | grep maven-dependency-plugin | awk '{ out="com.nvidia:"$(NF-1);print out }' | grep rapids | xargs | sed -e 's/ /,/g')
|
||||
# echo detect.maven.build.command="-pl=$PROJECTS -am" >> application.properties
|
||||
# echo detect.maven.included.scopes=compile >> application.properties
|
||||
|
||||
- name: Run blossom action
|
||||
uses: NVIDIA/blossom-action@main
|
||||
env:
|
||||
REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
REPO_KEY_DATA: ${{ secrets.BLOSSOM_KEY }}
|
||||
with:
|
||||
args1: ${{ fromJson(needs.Authorization.outputs.args).args1 }}
|
||||
args2: ${{ fromJson(needs.Authorization.outputs.args).args2 }}
|
||||
args3: ${{ fromJson(needs.Authorization.outputs.args).args3 }}
|
||||
|
||||
Job-trigger:
|
||||
name: Start ci job
|
||||
needs: [Vulnerability-scan]
|
||||
runs-on: blossom
|
||||
steps:
|
||||
- name: Start ci job
|
||||
run: blossom-ci
|
||||
env:
|
||||
OPERATION: 'START-CI-JOB'
|
||||
CI_SERVER: ${{ secrets.CI_SERVER }}
|
||||
REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
Upload-Log:
|
||||
name: Upload log
|
||||
runs-on: blossom
|
||||
if : github.event_name == 'workflow_dispatch'
|
||||
steps:
|
||||
- name: Jenkins log for pull request ${{ fromJson(github.event.inputs.args).pr }} (click here)
|
||||
run: blossom-ci
|
||||
env:
|
||||
OPERATION: 'POST-PROCESSING'
|
||||
CI_SERVER: ${{ secrets.CI_SERVER }}
|
||||
REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
32
CHANGELOG.md
32
CHANGELOG.md
@@ -1,5 +1,37 @@
|
||||
# NVIDIA Container Toolkit Changelog
|
||||
|
||||
## v1.13.1
|
||||
|
||||
* Update `update-ldcache` hook to only update ldcache if it exists.
|
||||
* Update `update-ldcache` hook to create `/etc/ld.so.conf.d` folder if it doesn't exist.
|
||||
* Fix failure when libcuda cannot be located during XOrg library discovery.
|
||||
* Fix CDI spec generation on systems that use `/etc/alternatives` (e.g. Debian)
|
||||
|
||||
## v1.13.0
|
||||
|
||||
* Promote 1.13.0-rc.3 to 1.13.0
|
||||
|
||||
## v1.13.0-rc.3
|
||||
|
||||
* Only initialize NVML for modes that require it when runing `nvidia-ctk cdi generate`.
|
||||
* Prefer /run over /var/run when locating nvidia-persistenced and nvidia-fabricmanager sockets.
|
||||
* Fix the generation of CDI specifications for management containers when the driver libraries are not in the LDCache.
|
||||
* Add transformers to deduplicate and simplify CDI specifications.
|
||||
* Generate a simplified CDI specification by default. This means that entities in the common edits in a spec are not included in device definitions.
|
||||
* Also return an error from the nvcdi.New constructor instead of panicing.
|
||||
* Detect XOrg libraries for injection and CDI spec generation.
|
||||
* Add `nvidia-ctk system create-device-nodes` command to create control devices.
|
||||
* Add `nvidia-ctk cdi transform` command to apply transforms to CDI specifications.
|
||||
* Add `--vendor` and `--class` options to `nvidia-ctk cdi generate`
|
||||
|
||||
* [libnvidia-container] Fix segmentation fault when RPC initialization fails.
|
||||
* [libnvidia-container] Build centos variants of the NVIDIA Container Library with static libtirpc v1.3.2.
|
||||
* [libnvidia-container] Remove make targets for fedora35 as the centos8 packages are compatible.
|
||||
|
||||
* [toolkit-container] Add `nvidia-container-runtime.modes.cdi.annotation-prefixes` config option that allows the CDI annotation prefixes that are read to be overridden.
|
||||
* [toolkit-container] Create device nodes when generating CDI specification for management containers.
|
||||
* [toolkit-container] Add `nvidia-container-runtime.runtimes` config option to set the low-level runtime for the NVIDIA Container Runtime
|
||||
|
||||
## v1.13.0-rc.2
|
||||
|
||||
* Don't fail chmod hook if paths are not injected
|
||||
|
||||
@@ -18,6 +18,7 @@ package cdi
|
||||
|
||||
import (
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/cdi/generate"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/cdi/transform"
|
||||
"github.com/sirupsen/logrus"
|
||||
"github.com/urfave/cli/v2"
|
||||
)
|
||||
@@ -44,6 +45,7 @@ func (m command) build() *cli.Command {
|
||||
|
||||
hook.Subcommands = []*cli.Command{
|
||||
generate.NewCommand(m.logger),
|
||||
transform.NewCommand(m.logger),
|
||||
}
|
||||
|
||||
return &hook
|
||||
|
||||
@@ -30,8 +30,6 @@ import (
|
||||
specs "github.com/container-orchestrated-devices/container-device-interface/specs-go"
|
||||
"github.com/sirupsen/logrus"
|
||||
"github.com/urfave/cli/v2"
|
||||
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/device"
|
||||
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvml"
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -49,6 +47,8 @@ type config struct {
|
||||
driverRoot string
|
||||
nvidiaCTKPath string
|
||||
mode string
|
||||
vendor string
|
||||
class string
|
||||
}
|
||||
|
||||
// NewCommand constructs a generate-cdi command with the specified logger
|
||||
@@ -110,6 +110,20 @@ func (m command) build() *cli.Command {
|
||||
Usage: "Specify the path to use for the nvidia-ctk in the generated CDI specification. If this is left empty, the path will be searched.",
|
||||
Destination: &cfg.nvidiaCTKPath,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "vendor",
|
||||
Aliases: []string{"cdi-vendor"},
|
||||
Usage: "the vendor string to use for the generated CDI specification.",
|
||||
Value: "nvidia.com",
|
||||
Destination: &cfg.vendor,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "class",
|
||||
Aliases: []string{"cdi-class"},
|
||||
Usage: "the class string to use for the generated CDI specification.",
|
||||
Value: "gpu",
|
||||
Destination: &cfg.class,
|
||||
},
|
||||
}
|
||||
|
||||
return &c
|
||||
@@ -151,6 +165,12 @@ func (m command) validateFlags(c *cli.Context, cfg *config) error {
|
||||
}
|
||||
}
|
||||
|
||||
if err := cdi.ValidateVendorName(cfg.vendor); err != nil {
|
||||
return fmt.Errorf("invalid CDI vendor name: %v", err)
|
||||
}
|
||||
if err := cdi.ValidateClassName(cfg.class); err != nil {
|
||||
return fmt.Errorf("invalid CDI class name: %v", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -190,23 +210,16 @@ func (m command) generateSpec(cfg *config) (spec.Interface, error) {
|
||||
return nil, fmt.Errorf("failed to create device namer: %v", err)
|
||||
}
|
||||
|
||||
nvmllib := nvml.New()
|
||||
if r := nvmllib.Init(); r != nvml.SUCCESS {
|
||||
return nil, r
|
||||
}
|
||||
defer nvmllib.Shutdown()
|
||||
|
||||
devicelib := device.New(device.WithNvml(nvmllib))
|
||||
|
||||
cdilib := nvcdi.New(
|
||||
cdilib, err := nvcdi.New(
|
||||
nvcdi.WithLogger(m.logger),
|
||||
nvcdi.WithDriverRoot(cfg.driverRoot),
|
||||
nvcdi.WithNVIDIACTKPath(cfg.nvidiaCTKPath),
|
||||
nvcdi.WithDeviceNamer(deviceNamer),
|
||||
nvcdi.WithDeviceLib(devicelib),
|
||||
nvcdi.WithNvmlLib(nvmllib),
|
||||
nvcdi.WithMode(string(cfg.mode)),
|
||||
)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create CDI library: %v", err)
|
||||
}
|
||||
|
||||
deviceSpecs, err := cdilib.GetAllDeviceSpecs()
|
||||
if err != nil {
|
||||
@@ -233,8 +246,8 @@ func (m command) generateSpec(cfg *config) (spec.Interface, error) {
|
||||
}
|
||||
|
||||
return spec.New(
|
||||
spec.WithVendor("nvidia.com"),
|
||||
spec.WithClass("gpu"),
|
||||
spec.WithVendor(cfg.vendor),
|
||||
spec.WithClass(cfg.class),
|
||||
spec.WithDeviceSpecs(deviceSpecs),
|
||||
spec.WithEdits(*commonEdits.ContainerEdits),
|
||||
spec.WithFormat(cfg.format),
|
||||
|
||||
159
cmd/nvidia-ctk/cdi/transform/root/root.go
Normal file
159
cmd/nvidia-ctk/cdi/transform/root/root.go
Normal file
@@ -0,0 +1,159 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package root
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/spec"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/transform"
|
||||
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
|
||||
"github.com/sirupsen/logrus"
|
||||
"github.com/urfave/cli/v2"
|
||||
)
|
||||
|
||||
type loadSaver interface {
|
||||
Load() (spec.Interface, error)
|
||||
Save(spec.Interface) error
|
||||
}
|
||||
|
||||
type command struct {
|
||||
logger *logrus.Logger
|
||||
}
|
||||
|
||||
type transformOptions struct {
|
||||
input string
|
||||
output string
|
||||
}
|
||||
|
||||
type options struct {
|
||||
transformOptions
|
||||
from string
|
||||
to string
|
||||
}
|
||||
|
||||
// NewCommand constructs a generate-cdi command with the specified logger
|
||||
func NewCommand(logger *logrus.Logger) *cli.Command {
|
||||
c := command{
|
||||
logger: logger,
|
||||
}
|
||||
return c.build()
|
||||
}
|
||||
|
||||
// build creates the CLI command
|
||||
func (m command) build() *cli.Command {
|
||||
opts := options{}
|
||||
|
||||
c := cli.Command{
|
||||
Name: "root",
|
||||
Usage: "Apply a root transform to a CDI specification",
|
||||
Before: func(c *cli.Context) error {
|
||||
return m.validateFlags(c, &opts)
|
||||
},
|
||||
Action: func(c *cli.Context) error {
|
||||
return m.run(c, &opts)
|
||||
},
|
||||
}
|
||||
|
||||
c.Flags = []cli.Flag{
|
||||
&cli.StringFlag{
|
||||
Name: "input",
|
||||
Usage: "Specify the file to read the CDI specification from. If this is '-' the specification is read from STDIN",
|
||||
Value: "-",
|
||||
Destination: &opts.input,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "output",
|
||||
Usage: "Specify the file to output the generated CDI specification to. If this is '' the specification is output to STDOUT",
|
||||
Destination: &opts.output,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "from",
|
||||
Usage: "specify the root to be transformed",
|
||||
Destination: &opts.from,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "to",
|
||||
Usage: "specify the replacement root. If this is the same as the from root, the transform is a no-op.",
|
||||
Value: "",
|
||||
Destination: &opts.to,
|
||||
},
|
||||
}
|
||||
|
||||
return &c
|
||||
}
|
||||
|
||||
func (m command) validateFlags(c *cli.Context, opts *options) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m command) run(c *cli.Context, opts *options) error {
|
||||
spec, err := opts.Load()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to load CDI specification: %w", err)
|
||||
}
|
||||
|
||||
err = transform.NewRootTransformer(
|
||||
opts.from,
|
||||
opts.to,
|
||||
).Transform(spec.Raw())
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to transform CDI specification: %w", err)
|
||||
}
|
||||
|
||||
return opts.Save(spec)
|
||||
}
|
||||
|
||||
// Load lodas the input CDI specification
|
||||
func (o transformOptions) Load() (spec.Interface, error) {
|
||||
contents, err := o.getContents()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read spec contents: %v", err)
|
||||
}
|
||||
|
||||
raw, err := cdi.ParseSpec(contents)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse CDI spec: %v", err)
|
||||
}
|
||||
|
||||
return spec.New(
|
||||
spec.WithRawSpec(raw),
|
||||
)
|
||||
}
|
||||
|
||||
func (o transformOptions) getContents() ([]byte, error) {
|
||||
if o.input == "-" {
|
||||
return io.ReadAll(os.Stdin)
|
||||
}
|
||||
|
||||
return os.ReadFile(o.input)
|
||||
}
|
||||
|
||||
// Save saves the CDI specification to the output file
|
||||
func (o transformOptions) Save(s spec.Interface) error {
|
||||
if o.output == "" {
|
||||
_, err := s.WriteTo(os.Stdout)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to write CDI spec to STDOUT: %v", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
return s.Save(o.output)
|
||||
}
|
||||
51
cmd/nvidia-ctk/cdi/transform/transform.go
Normal file
51
cmd/nvidia-ctk/cdi/transform/transform.go
Normal file
@@ -0,0 +1,51 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package transform
|
||||
|
||||
import (
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/cdi/transform/root"
|
||||
"github.com/sirupsen/logrus"
|
||||
"github.com/urfave/cli/v2"
|
||||
)
|
||||
|
||||
type command struct {
|
||||
logger *logrus.Logger
|
||||
}
|
||||
|
||||
// NewCommand constructs a command with the specified logger
|
||||
func NewCommand(logger *logrus.Logger) *cli.Command {
|
||||
c := command{
|
||||
logger: logger,
|
||||
}
|
||||
return c.build()
|
||||
}
|
||||
|
||||
// build creates the CLI command
|
||||
func (m command) build() *cli.Command {
|
||||
c := cli.Command{
|
||||
Name: "transform",
|
||||
Usage: "Apply a transform to a CDI specification",
|
||||
}
|
||||
|
||||
c.Flags = []cli.Flag{}
|
||||
|
||||
c.Subcommands = []*cli.Command{
|
||||
root.NewCommand(m.logger),
|
||||
}
|
||||
|
||||
return &c
|
||||
}
|
||||
@@ -84,6 +84,12 @@ func (m command) run(c *cli.Context, cfg *config) error {
|
||||
return fmt.Errorf("failed to determined container root: %v", err)
|
||||
}
|
||||
|
||||
_, err = os.Stat(filepath.Join(containerRoot, "/etc/ld.so.cache"))
|
||||
if err != nil && os.IsNotExist(err) {
|
||||
m.logger.Debugf("No ld.so.cache found, skipping update")
|
||||
return nil
|
||||
}
|
||||
|
||||
err = m.createConfig(containerRoot, cfg.folders.Value())
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to update ld.so.conf: %v", err)
|
||||
@@ -105,6 +111,10 @@ func (m command) createConfig(root string, folders []string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
if err := os.MkdirAll(filepath.Join(root, "/etc/ld.so.conf.d"), 0755); err != nil {
|
||||
return fmt.Errorf("failed to create ld.so.conf.d: %v", err)
|
||||
}
|
||||
|
||||
configFile, err := os.CreateTemp(filepath.Join(root, "/etc/ld.so.conf.d"), "nvcr-*.conf")
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create config file: %v", err)
|
||||
|
||||
107
cmd/nvidia-ctk/system/create-device-nodes/create-device-nodes.go
Normal file
107
cmd/nvidia-ctk/system/create-device-nodes/create-device-nodes.go
Normal file
@@ -0,0 +1,107 @@
|
||||
/**
|
||||
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package createdevicenodes
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/system"
|
||||
"github.com/sirupsen/logrus"
|
||||
"github.com/urfave/cli/v2"
|
||||
)
|
||||
|
||||
type command struct {
|
||||
logger *logrus.Logger
|
||||
}
|
||||
|
||||
type options struct {
|
||||
driverRoot string
|
||||
|
||||
dryRun bool
|
||||
|
||||
control bool
|
||||
}
|
||||
|
||||
// NewCommand constructs a command sub-command with the specified logger
|
||||
func NewCommand(logger *logrus.Logger) *cli.Command {
|
||||
c := command{
|
||||
logger: logger,
|
||||
}
|
||||
return c.build()
|
||||
}
|
||||
|
||||
// build
|
||||
func (m command) build() *cli.Command {
|
||||
opts := options{}
|
||||
|
||||
c := cli.Command{
|
||||
Name: "create-device-nodes",
|
||||
Usage: "A utility to create NVIDIA device ndoes",
|
||||
Before: func(c *cli.Context) error {
|
||||
return m.validateFlags(c, &opts)
|
||||
},
|
||||
Action: func(c *cli.Context) error {
|
||||
return m.run(c, &opts)
|
||||
},
|
||||
}
|
||||
|
||||
c.Flags = []cli.Flag{
|
||||
&cli.StringFlag{
|
||||
Name: "driver-root",
|
||||
Usage: "the path to the driver root. Device nodes will be created at `DRIVER_ROOT`/dev",
|
||||
Value: "/",
|
||||
Destination: &opts.driverRoot,
|
||||
EnvVars: []string{"DRIVER_ROOT"},
|
||||
},
|
||||
&cli.BoolFlag{
|
||||
Name: "control-devices",
|
||||
Usage: "create all control device nodes: nvidiactl, nvidia-modeset, nvidia-uvm, nvidia-uvm-tools",
|
||||
Destination: &opts.control,
|
||||
},
|
||||
&cli.BoolFlag{
|
||||
Name: "dry-run",
|
||||
Usage: "if set, the command will not create any symlinks.",
|
||||
Value: false,
|
||||
Destination: &opts.dryRun,
|
||||
EnvVars: []string{"DRY_RUN"},
|
||||
},
|
||||
}
|
||||
|
||||
return &c
|
||||
}
|
||||
|
||||
func (m command) validateFlags(r *cli.Context, opts *options) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m command) run(c *cli.Context, opts *options) error {
|
||||
s, err := system.New(
|
||||
system.WithLogger(m.logger),
|
||||
system.WithDryRun(opts.dryRun),
|
||||
)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create library: %v", err)
|
||||
}
|
||||
|
||||
if opts.control {
|
||||
m.logger.Infof("Creating control device nodes at %s", opts.driverRoot)
|
||||
if err := s.CreateNVIDIAControlDeviceNodesAt(opts.driverRoot); err != nil {
|
||||
return fmt.Errorf("failed to create control device nodes: %v", err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@@ -18,6 +18,7 @@ package system
|
||||
|
||||
import (
|
||||
devchar "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/system/create-dev-char-symlinks"
|
||||
devicenodes "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/system/create-device-nodes"
|
||||
"github.com/sirupsen/logrus"
|
||||
"github.com/urfave/cli/v2"
|
||||
)
|
||||
@@ -43,6 +44,7 @@ func (m command) build() *cli.Command {
|
||||
|
||||
system.Subcommands = []*cli.Command{
|
||||
devchar.NewCommand(m.logger),
|
||||
devicenodes.NewCommand(m.logger),
|
||||
}
|
||||
|
||||
return &system
|
||||
|
||||
@@ -14,10 +14,10 @@
|
||||
|
||||
# Supported OSs by architecture
|
||||
AMD64_TARGETS := ubuntu20.04 ubuntu18.04 ubuntu16.04 debian10 debian9
|
||||
X86_64_TARGETS := fedora35 centos7 centos8 rhel7 rhel8 amazonlinux2 opensuse-leap15.1
|
||||
X86_64_TARGETS := centos7 centos8 rhel7 rhel8 amazonlinux2 opensuse-leap15.1
|
||||
PPC64LE_TARGETS := ubuntu18.04 ubuntu16.04 centos7 centos8 rhel7 rhel8
|
||||
ARM64_TARGETS := ubuntu20.04 ubuntu18.04
|
||||
AARCH64_TARGETS := fedora35 centos8 rhel8 amazonlinux2
|
||||
AARCH64_TARGETS := centos8 rhel8 amazonlinux2
|
||||
|
||||
# Define top-level build targets
|
||||
docker%: SHELL:=/bin/bash
|
||||
@@ -102,14 +102,6 @@ LIBNVIDIA_CONTAINER_TOOLS_VERSION := $(LIBNVIDIA_CONTAINER_VERSION)$(if $(LIBNVI
|
||||
--centos%: CONFIG_TOML_SUFFIX := rpm-yum
|
||||
--centos8%: BASEIMAGE = quay.io/centos/centos:stream8
|
||||
|
||||
# private fedora target
|
||||
--fedora%: OS := fedora
|
||||
--fedora%: DOCKERFILE = $(CURDIR)/docker/Dockerfile.rpm-yum
|
||||
--fedora%: CONFIG_TOML_SUFFIX := rpm-yum
|
||||
# The fedora(35) base image has very slow performance when building aarch64 packages.
|
||||
# Since our primary concern here is glibc versions, we use the older glibc version available in centos8.
|
||||
--fedora35%: BASEIMAGE = quay.io/centos/centos:stream8
|
||||
|
||||
# private amazonlinux target
|
||||
--amazonlinux%: OS := amazonlinux
|
||||
--amazonlinux%: DOCKERFILE = $(CURDIR)/docker/Dockerfile.rpm-yum
|
||||
|
||||
@@ -71,7 +71,8 @@ func TestGetConfig(t *testing.T) {
|
||||
MountSpecPath: "/etc/nvidia-container-runtime/host-files-for-container.d",
|
||||
},
|
||||
CDI: cdiModeConfig{
|
||||
DefaultKind: "nvidia.com/gpu",
|
||||
DefaultKind: "nvidia.com/gpu",
|
||||
AnnotationPrefixes: []string{"cdi.k8s.io/"},
|
||||
},
|
||||
},
|
||||
},
|
||||
@@ -92,6 +93,7 @@ func TestGetConfig(t *testing.T) {
|
||||
"nvidia-container-runtime.runtimes = [\"/some/runtime\",]",
|
||||
"nvidia-container-runtime.mode = \"not-auto\"",
|
||||
"nvidia-container-runtime.modes.cdi.default-kind = \"example.vendor.com/device\"",
|
||||
"nvidia-container-runtime.modes.cdi.annotation-prefixes = [\"cdi.k8s.io/\", \"example.vendor.com/\",]",
|
||||
"nvidia-container-runtime.modes.csv.mount-spec-path = \"/not/etc/nvidia-container-runtime/host-files-for-container.d\"",
|
||||
"nvidia-ctk.path = \"/foo/bar/nvidia-ctk\"",
|
||||
},
|
||||
@@ -111,6 +113,10 @@ func TestGetConfig(t *testing.T) {
|
||||
},
|
||||
CDI: cdiModeConfig{
|
||||
DefaultKind: "example.vendor.com/device",
|
||||
AnnotationPrefixes: []string{
|
||||
"cdi.k8s.io/",
|
||||
"example.vendor.com/",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
@@ -134,6 +140,7 @@ func TestGetConfig(t *testing.T) {
|
||||
"mode = \"not-auto\"",
|
||||
"[nvidia-container-runtime.modes.cdi]",
|
||||
"default-kind = \"example.vendor.com/device\"",
|
||||
"annotation-prefixes = [\"cdi.k8s.io/\", \"example.vendor.com/\",]",
|
||||
"[nvidia-container-runtime.modes.csv]",
|
||||
"mount-spec-path = \"/not/etc/nvidia-container-runtime/host-files-for-container.d\"",
|
||||
"[nvidia-ctk]",
|
||||
@@ -155,6 +162,10 @@ func TestGetConfig(t *testing.T) {
|
||||
},
|
||||
CDI: cdiModeConfig{
|
||||
DefaultKind: "example.vendor.com/device",
|
||||
AnnotationPrefixes: []string{
|
||||
"cdi.k8s.io/",
|
||||
"example.vendor.com/",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
@@ -50,12 +50,15 @@ func (c *ConfigV1) AddRuntime(name string, path string, setAsDefault bool) error
|
||||
config.SetPath([]string{"plugins", "cri", "containerd", "runtimes", name, "runtime_engine"}, "")
|
||||
config.SetPath([]string{"plugins", "cri", "containerd", "runtimes", name, "privileged_without_host_devices"}, false)
|
||||
}
|
||||
cdiAnnotations := []interface{}{"cdi.k8s.io/*"}
|
||||
containerAnnotations, ok := config.GetPath([]string{"plugins", "cri", "containerd", "runtimes", name, "container_annotations"}).([]interface{})
|
||||
if ok && containerAnnotations != nil {
|
||||
cdiAnnotations = append(containerAnnotations, cdiAnnotations...)
|
||||
|
||||
if len(c.ContainerAnnotations) > 0 {
|
||||
annotations, err := (*Config)(c).getRuntimeAnnotations([]string{"plugins", "cri", "containerd", "runtimes", name, "container_annotations"})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
annotations = append(c.ContainerAnnotations, annotations...)
|
||||
config.SetPath([]string{"plugins", "cri", "containerd", "runtimes", name, "container_annotations"}, annotations)
|
||||
}
|
||||
config.SetPath([]string{"plugins", "cri", "containerd", "runtimes", name, "container_annotations"}, cdiAnnotations)
|
||||
|
||||
config.SetPath([]string{"plugins", "cri", "containerd", "runtimes", name, "options", "BinaryName"}, path)
|
||||
config.SetPath([]string{"plugins", "cri", "containerd", "runtimes", name, "options", "Runtime"}, path)
|
||||
|
||||
@@ -45,12 +45,14 @@ func (c *Config) AddRuntime(name string, path string, setAsDefault bool) error {
|
||||
config.SetPath([]string{"plugins", "io.containerd.grpc.v1.cri", "containerd", "runtimes", name, "privileged_without_host_devices"}, false)
|
||||
}
|
||||
|
||||
cdiAnnotations := []interface{}{"cdi.k8s.io/*"}
|
||||
containerAnnotations, ok := config.GetPath([]string{"plugins", "io.containerd.grpc.v1.cri", "containerd", "runtimes", name, "container_annotations"}).([]interface{})
|
||||
if ok && containerAnnotations != nil {
|
||||
cdiAnnotations = append(containerAnnotations, cdiAnnotations...)
|
||||
if len(c.ContainerAnnotations) > 0 {
|
||||
annotations, err := c.getRuntimeAnnotations([]string{"plugins", "io.containerd.grpc.v1.cri", "containerd", "runtimes", name, "container_annotations"})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
annotations = append(c.ContainerAnnotations, annotations...)
|
||||
config.SetPath([]string{"plugins", "io.containerd.grpc.v1.cri", "containerd", "runtimes", name, "container_annotations"}, annotations)
|
||||
}
|
||||
config.SetPath([]string{"plugins", "io.containerd.grpc.v1.cri", "containerd", "runtimes", name, "container_annotations"}, cdiAnnotations)
|
||||
|
||||
config.SetPath([]string{"plugins", "io.containerd.grpc.v1.cri", "containerd", "runtimes", name, "options", "BinaryName"}, path)
|
||||
|
||||
@@ -62,6 +64,32 @@ func (c *Config) AddRuntime(name string, path string, setAsDefault bool) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *Config) getRuntimeAnnotations(path []string) ([]string, error) {
|
||||
if c == nil || c.Tree == nil {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
config := *c.Tree
|
||||
if !config.HasPath(path) {
|
||||
return nil, nil
|
||||
}
|
||||
annotationsI, ok := config.GetPath(path).([]interface{})
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("invalid annotations: %v", annotationsI)
|
||||
}
|
||||
|
||||
var annotations []string
|
||||
for _, annotation := range annotationsI {
|
||||
a, ok := annotation.(string)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("invalid annotation: %v", annotation)
|
||||
}
|
||||
annotations = append(annotations, a)
|
||||
}
|
||||
|
||||
return annotations, nil
|
||||
}
|
||||
|
||||
// DefaultRuntime returns the default runtime for the cri-o config
|
||||
func (c Config) DefaultRuntime() string {
|
||||
if runtime, ok := c.GetPath([]string{"plugins", "io.containerd.grpc.v1.cri", "containerd", "default_runtime_name"}).(string); ok {
|
||||
|
||||
@@ -26,6 +26,7 @@ type Config struct {
|
||||
*toml.Tree
|
||||
RuntimeType string
|
||||
UseDefaultRuntimeName bool
|
||||
ContainerAnnotations []string
|
||||
}
|
||||
|
||||
// New creates a containerd config with the specified options
|
||||
|
||||
@@ -30,9 +30,10 @@ const (
|
||||
)
|
||||
|
||||
type builder struct {
|
||||
path string
|
||||
runtimeType string
|
||||
useLegacyConfig bool
|
||||
path string
|
||||
runtimeType string
|
||||
useLegacyConfig bool
|
||||
containerAnnotations []string
|
||||
}
|
||||
|
||||
// Option defines a function that can be used to configure the config builder
|
||||
@@ -59,6 +60,13 @@ func WithUseLegacyConfig(useLegacyConfig bool) Option {
|
||||
}
|
||||
}
|
||||
|
||||
// WithContainerAnnotations sets the container annotations for the config builder
|
||||
func WithContainerAnnotations(containerAnnotations ...string) Option {
|
||||
return func(b *builder) {
|
||||
b.containerAnnotations = containerAnnotations
|
||||
}
|
||||
}
|
||||
|
||||
func (b *builder) build() (engine.Interface, error) {
|
||||
if b.path == "" {
|
||||
return nil, fmt.Errorf("config path is empty")
|
||||
@@ -74,6 +82,7 @@ func (b *builder) build() (engine.Interface, error) {
|
||||
}
|
||||
config.RuntimeType = b.runtimeType
|
||||
config.UseDefaultRuntimeName = !b.useLegacyConfig
|
||||
config.ContainerAnnotations = b.containerAnnotations
|
||||
|
||||
version, err := config.parseVersion(b.useLegacyConfig)
|
||||
if err != nil {
|
||||
|
||||
@@ -19,6 +19,7 @@ package config
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
|
||||
"github.com/pelletier/go-toml"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
@@ -52,6 +53,8 @@ type cdiModeConfig struct {
|
||||
SpecDirs []string `toml:"spec-dirs"`
|
||||
// DefaultKind sets the default kind to be used when constructing fully-qualified CDI device names
|
||||
DefaultKind string `toml:"default-kind"`
|
||||
// AnnotationPrefixes sets the allowed prefixes for CDI annotation-based device injection
|
||||
AnnotationPrefixes []string `toml:"annotation-prefixes"`
|
||||
}
|
||||
|
||||
type csvModeConfig struct {
|
||||
@@ -98,6 +101,9 @@ func GetDefaultRuntimeConfig() *RuntimeConfig {
|
||||
},
|
||||
CDI: cdiModeConfig{
|
||||
DefaultKind: "nvidia.com/gpu",
|
||||
AnnotationPrefixes: []string{
|
||||
cdi.AnnotationPrefix,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
@@ -20,11 +20,13 @@ import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/info/drm"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/info/proc"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/cuda"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
@@ -44,9 +46,12 @@ func NewGraphicsDiscoverer(logger *logrus.Logger, devices image.VisibleDevices,
|
||||
|
||||
drmByPathSymlinks := newCreateDRMByPathSymlinks(logger, drmDeviceNodes, cfg)
|
||||
|
||||
xorg := optionalXorgDiscoverer(logger, driverRoot, cfg.NvidiaCTKPath)
|
||||
|
||||
discover := Merge(
|
||||
Merge(drmDeviceNodes, drmByPathSymlinks),
|
||||
mounts,
|
||||
xorg,
|
||||
)
|
||||
|
||||
return discover, nil
|
||||
@@ -243,6 +248,123 @@ func newDRMDeviceFilter(logger *logrus.Logger, devices image.VisibleDevices, dri
|
||||
return filter, nil
|
||||
}
|
||||
|
||||
type xorgHooks struct {
|
||||
libraries Discover
|
||||
driverVersion string
|
||||
nvidiaCTKPath string
|
||||
}
|
||||
|
||||
var _ Discover = (*xorgHooks)(nil)
|
||||
|
||||
// optionalXorgDiscoverer creates a discoverer for Xorg libraries.
|
||||
// If the creation of the discoverer fails, a None discoverer is returned.
|
||||
func optionalXorgDiscoverer(logger *logrus.Logger, driverRoot string, nvidiaCTKPath string) Discover {
|
||||
xorg, err := newXorgDiscoverer(logger, driverRoot, nvidiaCTKPath)
|
||||
if err != nil {
|
||||
logger.Warnf("Failed to create Xorg discoverer: %v; skipping xorg libraries", err)
|
||||
return None{}
|
||||
}
|
||||
return xorg
|
||||
}
|
||||
|
||||
func newXorgDiscoverer(logger *logrus.Logger, driverRoot string, nvidiaCTKPath string) (Discover, error) {
|
||||
libCudaPaths, err := cuda.New(
|
||||
cuda.WithLogger(logger),
|
||||
cuda.WithDriverRoot(driverRoot),
|
||||
).Locate(".*.*.*")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to locate libcuda.so: %v", err)
|
||||
}
|
||||
libcudaPath := libCudaPaths[0]
|
||||
|
||||
version := strings.TrimPrefix(filepath.Base(libcudaPath), "libcuda.so.")
|
||||
if version == "" {
|
||||
return nil, fmt.Errorf("failed to determine libcuda.so version from path: %q", libcudaPath)
|
||||
}
|
||||
|
||||
libRoot := filepath.Dir(libcudaPath)
|
||||
xorgLibs := NewMounts(
|
||||
logger,
|
||||
lookup.NewFileLocator(
|
||||
lookup.WithLogger(logger),
|
||||
lookup.WithRoot(driverRoot),
|
||||
lookup.WithSearchPaths(libRoot, "/usr/lib/x86_64-linux-gnu"),
|
||||
lookup.WithCount(1),
|
||||
),
|
||||
driverRoot,
|
||||
[]string{
|
||||
"nvidia/xorg/nvidia_drv.so",
|
||||
fmt.Sprintf("nvidia/xorg/libglxserver_nvidia.so.%s", version),
|
||||
},
|
||||
)
|
||||
xorgHooks := xorgHooks{
|
||||
libraries: xorgLibs,
|
||||
driverVersion: version,
|
||||
nvidiaCTKPath: FindNvidiaCTK(logger, nvidiaCTKPath),
|
||||
}
|
||||
|
||||
xorgConfg := NewMounts(
|
||||
logger,
|
||||
lookup.NewFileLocator(
|
||||
lookup.WithLogger(logger),
|
||||
lookup.WithRoot(driverRoot),
|
||||
lookup.WithSearchPaths("/usr/share"),
|
||||
),
|
||||
driverRoot,
|
||||
[]string{"X11/xorg.conf.d/10-nvidia.conf"},
|
||||
)
|
||||
|
||||
d := Merge(
|
||||
xorgLibs,
|
||||
xorgConfg,
|
||||
xorgHooks,
|
||||
)
|
||||
|
||||
return d, nil
|
||||
}
|
||||
|
||||
// Devices returns no devices for Xorg
|
||||
func (m xorgHooks) Devices() ([]Device, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// Hooks returns a hook to create symlinks for Xorg libraries
|
||||
func (m xorgHooks) Hooks() ([]Hook, error) {
|
||||
mounts, err := m.libraries.Mounts()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get mounts: %v", err)
|
||||
}
|
||||
if len(mounts) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
var target string
|
||||
for _, mount := range mounts {
|
||||
filename := filepath.Base(mount.HostPath)
|
||||
if filename == "libglxserver_nvidia.so."+m.driverVersion {
|
||||
target = mount.Path
|
||||
}
|
||||
}
|
||||
|
||||
if target == "" {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
link := strings.TrimSuffix(target, "."+m.driverVersion)
|
||||
links := []string{fmt.Sprintf("%s::%s", filepath.Base(target), link)}
|
||||
symlinkHook := CreateCreateSymlinkHook(
|
||||
m.nvidiaCTKPath,
|
||||
links,
|
||||
)
|
||||
|
||||
return symlinkHook.Hooks()
|
||||
}
|
||||
|
||||
// Mounts returns the libraries required for Xorg
|
||||
func (m xorgHooks) Mounts() ([]Mount, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// selectDeviceByPath is a filter that allows devices to be selected by the path
|
||||
type selectDeviceByPath map[string]bool
|
||||
|
||||
|
||||
@@ -25,21 +25,39 @@ type ipcMounts mounts
|
||||
|
||||
// NewIPCDiscoverer creats a discoverer for NVIDIA IPC sockets.
|
||||
func NewIPCDiscoverer(logger *logrus.Logger, driverRoot string) (Discover, error) {
|
||||
d := newMounts(
|
||||
sockets := newMounts(
|
||||
logger,
|
||||
lookup.NewFileLocator(
|
||||
lookup.WithLogger(logger),
|
||||
lookup.WithRoot(driverRoot),
|
||||
lookup.WithSearchPaths("/run", "/var/run"),
|
||||
lookup.WithCount(1),
|
||||
),
|
||||
driverRoot,
|
||||
[]string{
|
||||
"/nvidia-persistenced/socket",
|
||||
"/nvidia-fabricmanager/socket",
|
||||
},
|
||||
)
|
||||
|
||||
mps := newMounts(
|
||||
logger,
|
||||
lookup.NewFileLocator(
|
||||
lookup.WithLogger(logger),
|
||||
lookup.WithRoot(driverRoot),
|
||||
lookup.WithCount(1),
|
||||
),
|
||||
driverRoot,
|
||||
[]string{
|
||||
"/var/run/nvidia-persistenced/socket",
|
||||
"/var/run/nvidia-fabricmanager/socket",
|
||||
"/tmp/nvidia-mps",
|
||||
},
|
||||
)
|
||||
|
||||
return (*ipcMounts)(d), nil
|
||||
d := Merge(
|
||||
(*ipcMounts)(sockets),
|
||||
(*ipcMounts)(mps),
|
||||
)
|
||||
return d, nil
|
||||
}
|
||||
|
||||
// Mounts returns the discovered mounts with "noexec" added to the mount options.
|
||||
|
||||
@@ -307,11 +307,7 @@ func (c *ldcache) resolve(target string) (string, error) {
|
||||
link = filepath.Join(filepath.Dir(target), link)
|
||||
}
|
||||
|
||||
// Ensure that the returned path is relative to the root.
|
||||
link = filepath.Join(c.root, link)
|
||||
|
||||
c.logger.Debugf("Resolved link: '%v' => '%v'", name, link)
|
||||
return link, nil
|
||||
return c.resolve(link)
|
||||
}
|
||||
|
||||
// bytesToString converts a byte slice to a string.
|
||||
|
||||
104
internal/lookup/cuda/cuda.go
Normal file
104
internal/lookup/cuda/cuda.go
Normal file
@@ -0,0 +1,104 @@
|
||||
/**
|
||||
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package cuda
|
||||
|
||||
import (
|
||||
"path/filepath"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
type cudaLocator struct {
|
||||
logger *logrus.Logger
|
||||
driverRoot string
|
||||
}
|
||||
|
||||
// Options is a function that configures a cudaLocator.
|
||||
type Options func(*cudaLocator)
|
||||
|
||||
// WithLogger is an option that configures the logger used by the locator.
|
||||
func WithLogger(logger *logrus.Logger) Options {
|
||||
return func(c *cudaLocator) {
|
||||
c.logger = logger
|
||||
}
|
||||
}
|
||||
|
||||
// WithDriverRoot is an option that configures the driver root used by the locator.
|
||||
func WithDriverRoot(driverRoot string) Options {
|
||||
return func(c *cudaLocator) {
|
||||
c.driverRoot = driverRoot
|
||||
}
|
||||
}
|
||||
|
||||
// New creates a new CUDA library locator.
|
||||
func New(opts ...Options) lookup.Locator {
|
||||
c := &cudaLocator{}
|
||||
for _, opt := range opts {
|
||||
opt(c)
|
||||
}
|
||||
|
||||
if c.logger == nil {
|
||||
c.logger = logrus.StandardLogger()
|
||||
}
|
||||
if c.driverRoot == "" {
|
||||
c.driverRoot = "/"
|
||||
}
|
||||
|
||||
return c
|
||||
}
|
||||
|
||||
// Locate returns the path to the libcuda.so.RMVERSION file.
|
||||
// libcuda.so is prefixed to the specified pattern.
|
||||
func (l *cudaLocator) Locate(pattern string) ([]string, error) {
|
||||
ldcacheLocator, err := lookup.NewLibraryLocator(
|
||||
l.logger,
|
||||
l.driverRoot,
|
||||
)
|
||||
if err != nil {
|
||||
l.logger.Debugf("Failed to create LDCache locator: %v", err)
|
||||
}
|
||||
|
||||
fullPattern := "libcuda.so" + pattern
|
||||
|
||||
candidates, err := ldcacheLocator.Locate("libcuda.so")
|
||||
if err == nil {
|
||||
for _, c := range candidates {
|
||||
if match, err := filepath.Match(fullPattern, filepath.Base(c)); err != nil || !match {
|
||||
l.logger.Debugf("Skipping non-matching candidate %v: %v", c, err)
|
||||
continue
|
||||
}
|
||||
return []string{c}, nil
|
||||
}
|
||||
}
|
||||
l.logger.Debugf("Could not locate %q in LDCache: Checking predefined library paths.", pattern)
|
||||
|
||||
pathLocator := lookup.NewFileLocator(
|
||||
lookup.WithLogger(l.logger),
|
||||
lookup.WithRoot(l.driverRoot),
|
||||
lookup.WithSearchPaths(
|
||||
"/usr/lib64",
|
||||
"/usr/lib/x86_64-linux-gnu",
|
||||
"/usr/lib/aarch64-linux-gnu",
|
||||
"/usr/lib/x86_64-linux-gnu/nvidia/current",
|
||||
"/usr/lib/aarch64-linux-gnu/nvidia/current",
|
||||
),
|
||||
lookup.WithCount(1),
|
||||
)
|
||||
|
||||
return pathLocator.Locate(fullPattern)
|
||||
}
|
||||
@@ -40,6 +40,7 @@ func NewLibraryLocator(logger *log.Logger, root string) (Locator, error) {
|
||||
}
|
||||
|
||||
l := library{
|
||||
logger: logger,
|
||||
symlink: NewSymlinkLocator(logger, root),
|
||||
cache: cache,
|
||||
}
|
||||
|
||||
@@ -18,6 +18,7 @@ package modifier
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
|
||||
@@ -67,7 +68,7 @@ func getDevicesFromSpec(logger *logrus.Logger, ociSpec oci.Spec, cfg *config.Con
|
||||
return nil, fmt.Errorf("failed to load OCI spec: %v", err)
|
||||
}
|
||||
|
||||
_, annotationDevices, err := cdi.ParseAnnotations(rawSpec.Annotations)
|
||||
annotationDevices, err := getAnnotationDevices(cfg.NVIDIAContainerRuntimeConfig.Modes.CDI.AnnotationPrefixes, rawSpec.Annotations)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse container annotations: %v", err)
|
||||
}
|
||||
@@ -107,6 +108,38 @@ func getDevicesFromSpec(logger *logrus.Logger, ociSpec oci.Spec, cfg *config.Con
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// getAnnotationDevices returns a list of devices specified in the annotations.
|
||||
// Keys starting with the specified prefixes are considered and expected to contain a comma-separated list of
|
||||
// fully-qualified CDI devices names. If any device name is not fully-quality an error is returned.
|
||||
// The list of returned devices is deduplicated.
|
||||
func getAnnotationDevices(prefixes []string, annotations map[string]string) ([]string, error) {
|
||||
devicesByKey := make(map[string][]string)
|
||||
for key, value := range annotations {
|
||||
for _, prefix := range prefixes {
|
||||
if strings.HasPrefix(key, prefix) {
|
||||
devicesByKey[key] = strings.Split(value, ",")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
seen := make(map[string]bool)
|
||||
var annotationDevices []string
|
||||
for key, devices := range devicesByKey {
|
||||
for _, device := range devices {
|
||||
if !cdi.IsQualifiedName(device) {
|
||||
return nil, fmt.Errorf("invalid device name %q in annotation %q", device, key)
|
||||
}
|
||||
if seen[device] {
|
||||
continue
|
||||
}
|
||||
annotationDevices = append(annotationDevices, device)
|
||||
seen[device] = true
|
||||
}
|
||||
}
|
||||
|
||||
return annotationDevices, nil
|
||||
}
|
||||
|
||||
// Modify loads the CDI registry and injects the specified CDI devices into the OCI runtime specification.
|
||||
func (m cdiModifier) Modify(spec *specs.Spec) error {
|
||||
registry := cdi.GetRegistry(
|
||||
|
||||
92
internal/modifier/cdi_test.go
Normal file
92
internal/modifier/cdi_test.go
Normal file
@@ -0,0 +1,92 @@
|
||||
/**
|
||||
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package modifier
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestGetAnnotationDevices(t *testing.T) {
|
||||
testCases := []struct {
|
||||
description string
|
||||
prefixes []string
|
||||
annotations map[string]string
|
||||
expectedDevices []string
|
||||
expectedError error
|
||||
}{
|
||||
{
|
||||
description: "no annotations",
|
||||
},
|
||||
{
|
||||
description: "no matching annotations",
|
||||
prefixes: []string{"not-prefix/"},
|
||||
annotations: map[string]string{
|
||||
"prefix/foo": "example.com/device=bar",
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "single matching annotation",
|
||||
prefixes: []string{"prefix/"},
|
||||
annotations: map[string]string{
|
||||
"prefix/foo": "example.com/device=bar",
|
||||
},
|
||||
expectedDevices: []string{"example.com/device=bar"},
|
||||
},
|
||||
{
|
||||
description: "multiple matching annotations",
|
||||
prefixes: []string{"prefix/", "another-prefix/"},
|
||||
annotations: map[string]string{
|
||||
"prefix/foo": "example.com/device=bar",
|
||||
"another-prefix/bar": "example.com/device=baz",
|
||||
},
|
||||
expectedDevices: []string{"example.com/device=bar", "example.com/device=baz"},
|
||||
},
|
||||
{
|
||||
description: "multiple matching annotations with duplicate devices",
|
||||
prefixes: []string{"prefix/", "another-prefix/"},
|
||||
annotations: map[string]string{
|
||||
"prefix/foo": "example.com/device=bar",
|
||||
"another-prefix/bar": "example.com/device=bar",
|
||||
},
|
||||
expectedDevices: []string{"example.com/device=bar"},
|
||||
},
|
||||
{
|
||||
description: "invalid devices",
|
||||
prefixes: []string{"prefix/"},
|
||||
annotations: map[string]string{
|
||||
"prefix/foo": "example.com/device",
|
||||
},
|
||||
expectedError: fmt.Errorf("invalid device %q", "example.com/device"),
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
devices, err := getAnnotationDevices(tc.prefixes, tc.annotations)
|
||||
if tc.expectedError != nil {
|
||||
require.Error(t, err)
|
||||
return
|
||||
}
|
||||
|
||||
require.NoError(t, err)
|
||||
require.ElementsMatch(t, tc.expectedDevices, devices)
|
||||
})
|
||||
}
|
||||
}
|
||||
36
internal/system/options.go
Normal file
36
internal/system/options.go
Normal file
@@ -0,0 +1,36 @@
|
||||
/**
|
||||
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package system
|
||||
|
||||
import "github.com/sirupsen/logrus"
|
||||
|
||||
// Option is a functional option for the system command
|
||||
type Option func(*Interface)
|
||||
|
||||
// WithLogger sets the logger for the system command
|
||||
func WithLogger(logger *logrus.Logger) Option {
|
||||
return func(i *Interface) {
|
||||
i.logger = logger
|
||||
}
|
||||
}
|
||||
|
||||
// WithDryRun sets the dry run flag
|
||||
func WithDryRun(dryRun bool) Option {
|
||||
return func(i *Interface) {
|
||||
i.dryRun = dryRun
|
||||
}
|
||||
}
|
||||
149
internal/system/system.go
Normal file
149
internal/system/system.go
Normal file
@@ -0,0 +1,149 @@
|
||||
/**
|
||||
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package system
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/info/proc/devices"
|
||||
"github.com/sirupsen/logrus"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
// Interface is the interface for the system command
|
||||
type Interface struct {
|
||||
logger *logrus.Logger
|
||||
dryRun bool
|
||||
|
||||
nvidiaDevices nvidiaDevices
|
||||
}
|
||||
|
||||
// New constructs a system command with the specified options
|
||||
func New(opts ...Option) (*Interface, error) {
|
||||
i := &Interface{
|
||||
logger: logrus.StandardLogger(),
|
||||
}
|
||||
for _, opt := range opts {
|
||||
opt(i)
|
||||
}
|
||||
|
||||
devices, err := devices.GetNVIDIADevices()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create devices info: %v", err)
|
||||
}
|
||||
i.nvidiaDevices = nvidiaDevices{devices}
|
||||
|
||||
return i, nil
|
||||
}
|
||||
|
||||
// CreateNVIDIAControlDeviceNodesAt creates the NVIDIA control device nodes associated with the NVIDIA driver at the specified root.
|
||||
func (m *Interface) CreateNVIDIAControlDeviceNodesAt(root string) error {
|
||||
controlNodes := []string{"/dev/nvidiactl", "/dev/nvidia-modeset", "/dev/nvidia-uvm", "/dev/nvidia-uvm-tools"}
|
||||
|
||||
for _, node := range controlNodes {
|
||||
path := filepath.Join(root, node)
|
||||
err := m.CreateNVIDIADeviceNode(path)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create device node %s: %v", path, err)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// CreateNVIDIADeviceNode creates a specified device node associated with the NVIDIA driver.
|
||||
func (m *Interface) CreateNVIDIADeviceNode(path string) error {
|
||||
node := filepath.Base(path)
|
||||
if !strings.HasPrefix(node, "nvidia") {
|
||||
return fmt.Errorf("invalid device node %q", node)
|
||||
}
|
||||
|
||||
major, err := m.nvidiaDevices.Major(node)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to determine major: %v", err)
|
||||
}
|
||||
|
||||
minor, err := m.nvidiaDevices.Minor(node)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to determine minor: %v", err)
|
||||
}
|
||||
|
||||
return m.createDeviceNode(path, int(major), int(minor))
|
||||
}
|
||||
|
||||
func (m *Interface) createDeviceNode(path string, major int, minor int) error {
|
||||
if m.dryRun {
|
||||
m.logger.Infof("Running: mknod --mode=0666 %s c %d %d", path, major, minor)
|
||||
return nil
|
||||
}
|
||||
|
||||
if _, err := os.Stat(path); err == nil {
|
||||
m.logger.Infof("Skipping: %s already exists", path)
|
||||
return nil
|
||||
} else if !os.IsNotExist(err) {
|
||||
return fmt.Errorf("failed to stat %s: %v", path, err)
|
||||
}
|
||||
|
||||
err := unix.Mknod(path, unix.S_IFCHR, int(unix.Mkdev(uint32(major), uint32(minor))))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return unix.Chmod(path, 0666)
|
||||
}
|
||||
|
||||
type nvidiaDevices struct {
|
||||
devices.Devices
|
||||
}
|
||||
|
||||
// Major returns the major number for the specified NVIDIA device node.
|
||||
// If the device node is not supported, an error is returned.
|
||||
func (n *nvidiaDevices) Major(node string) (int64, error) {
|
||||
var valid bool
|
||||
var major devices.Major
|
||||
switch node {
|
||||
case "nvidia-uvm", "nvidia-uvm-tools":
|
||||
major, valid = n.Get(devices.NVIDIAUVM)
|
||||
case "nvidia-modeset", "nvidiactl":
|
||||
major, valid = n.Get(devices.NVIDIAGPU)
|
||||
}
|
||||
|
||||
if !valid {
|
||||
return 0, fmt.Errorf("invalid device node %q", node)
|
||||
}
|
||||
|
||||
return int64(major), nil
|
||||
}
|
||||
|
||||
// Minor returns the minor number for the specified NVIDIA device node.
|
||||
// If the device node is not supported, an error is returned.
|
||||
func (n *nvidiaDevices) Minor(node string) (int64, error) {
|
||||
switch node {
|
||||
case "nvidia-modeset":
|
||||
return devices.NVIDIAModesetMinor, nil
|
||||
case "nvidia-uvm-tools":
|
||||
return devices.NVIDIAUVMToolsMinor, nil
|
||||
case "nvidia-uvm":
|
||||
return devices.NVIDIAUVMMinor, nil
|
||||
case "nvidiactl":
|
||||
return devices.NVIDIACTLMinor, nil
|
||||
}
|
||||
|
||||
return 0, fmt.Errorf("invalid device node %q", node)
|
||||
}
|
||||
@@ -22,8 +22,8 @@ import (
|
||||
"strings"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/ldcache"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/cuda"
|
||||
"github.com/sirupsen/logrus"
|
||||
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvml"
|
||||
)
|
||||
@@ -31,6 +31,11 @@ import (
|
||||
// NewDriverDiscoverer creates a discoverer for the libraries and binaries associated with a driver installation.
|
||||
// The supplied NVML Library is used to query the expected driver version.
|
||||
func NewDriverDiscoverer(logger *logrus.Logger, driverRoot string, nvidiaCTKPath string, nvmllib nvml.Interface) (discover.Discover, error) {
|
||||
if r := nvmllib.Init(); r != nvml.SUCCESS {
|
||||
return nil, fmt.Errorf("failed to initalize NVML: %v", r)
|
||||
}
|
||||
defer nvmllib.Shutdown()
|
||||
|
||||
version, r := nvmllib.SystemGetDriverVersion()
|
||||
if r != nvml.SUCCESS {
|
||||
return nil, fmt.Errorf("failed to determine driver version: %v", r)
|
||||
@@ -131,26 +136,24 @@ func NewDriverBinariesDiscoverer(logger *logrus.Logger, driverRoot string) disco
|
||||
func getVersionLibs(logger *logrus.Logger, driverRoot string, version string) ([]string, error) {
|
||||
logger.Infof("Using driver version %v", version)
|
||||
|
||||
cache, err := ldcache.New(logger, driverRoot)
|
||||
libCudaPaths, err := cuda.New(
|
||||
cuda.WithLogger(logger),
|
||||
cuda.WithDriverRoot(driverRoot),
|
||||
).Locate("." + version)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to load ldcache: %v", err)
|
||||
return nil, fmt.Errorf("failed to locate libcuda.so.%v: %v", version, err)
|
||||
}
|
||||
libRoot := filepath.Dir(libCudaPaths[0])
|
||||
|
||||
libs32, libs64 := cache.List()
|
||||
libraries := lookup.NewFileLocator(
|
||||
lookup.WithLogger(logger),
|
||||
lookup.WithSearchPaths(libRoot),
|
||||
lookup.WithOptional(true),
|
||||
)
|
||||
|
||||
var libs []string
|
||||
for _, l := range libs64 {
|
||||
if strings.HasSuffix(l, version) {
|
||||
logger.Infof("found 64-bit driver lib: %v", l)
|
||||
libs = append(libs, l)
|
||||
}
|
||||
}
|
||||
|
||||
for _, l := range libs32 {
|
||||
if strings.HasSuffix(l, version) {
|
||||
logger.Infof("found 32-bit driver lib: %v", l)
|
||||
libs = append(libs, l)
|
||||
}
|
||||
libs, err := libraries.Locate("*.so." + version)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to locate libraries for driver version %v: %v", version, err)
|
||||
}
|
||||
|
||||
if driverRoot == "/" || driverRoot == "" {
|
||||
|
||||
@@ -24,6 +24,7 @@ import (
|
||||
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
|
||||
"github.com/container-orchestrated-devices/container-device-interface/specs-go"
|
||||
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/device"
|
||||
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvml"
|
||||
)
|
||||
|
||||
type nvmllib nvcdilib
|
||||
@@ -39,6 +40,11 @@ func (l *nvmllib) GetSpec() (spec.Interface, error) {
|
||||
func (l *nvmllib) GetAllDeviceSpecs() ([]specs.Device, error) {
|
||||
var deviceSpecs []specs.Device
|
||||
|
||||
if r := l.nvmllib.Init(); r != nvml.SUCCESS {
|
||||
return nil, fmt.Errorf("failed to initalize NVML: %v", r)
|
||||
}
|
||||
defer l.nvmllib.Shutdown()
|
||||
|
||||
gpuDeviceSpecs, err := l.getGPUDeviceSpecs()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
||||
@@ -17,6 +17,8 @@
|
||||
package nvcdi
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/spec"
|
||||
"github.com/sirupsen/logrus"
|
||||
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/device"
|
||||
@@ -47,7 +49,7 @@ type nvcdilib struct {
|
||||
}
|
||||
|
||||
// New creates a new nvcdi library
|
||||
func New(opts ...Option) Interface {
|
||||
func New(opts ...Option) (Interface, error) {
|
||||
l := &nvcdilib{}
|
||||
for _, opt := range opts {
|
||||
opt(l)
|
||||
@@ -100,8 +102,7 @@ func New(opts ...Option) Interface {
|
||||
}
|
||||
lib = (*mofedlib)(l)
|
||||
default:
|
||||
// TODO: We would like to return an error here instead of panicking
|
||||
panic("Unknown mode")
|
||||
return nil, fmt.Errorf("unknown mode %q", l.mode)
|
||||
}
|
||||
|
||||
w := wrapper{
|
||||
@@ -109,7 +110,7 @@ func New(opts ...Option) Interface {
|
||||
vendor: l.vendor,
|
||||
class: l.class,
|
||||
}
|
||||
return &w
|
||||
return &w, nil
|
||||
}
|
||||
|
||||
// GetSpec combines the device specs and common edits from the wrapped Interface to a single spec.Interface.
|
||||
@@ -151,3 +152,24 @@ func (l *nvcdilib) resolveMode() (rmode string) {
|
||||
|
||||
return ModeNvml
|
||||
}
|
||||
|
||||
// getCudaVersion returns the CUDA version of the current system.
|
||||
func (l *nvcdilib) getCudaVersion() (string, error) {
|
||||
if hasNVML, reason := l.infolib.HasNvml(); !hasNVML {
|
||||
return "", fmt.Errorf("nvml not detected: %v", reason)
|
||||
}
|
||||
if l.nvmllib == nil {
|
||||
return "", fmt.Errorf("nvml library not initialized")
|
||||
}
|
||||
r := l.nvmllib.Init()
|
||||
if r != nvml.SUCCESS {
|
||||
return "", fmt.Errorf("failed to initialize nvml: %v", r)
|
||||
}
|
||||
defer l.nvmllib.Shutdown()
|
||||
|
||||
version, r := l.nvmllib.SystemGetDriverVersion()
|
||||
if r != nvml.SUCCESS {
|
||||
return "", fmt.Errorf("failed to get driver version: %v", r)
|
||||
}
|
||||
return version, nil
|
||||
}
|
||||
|
||||
@@ -23,7 +23,7 @@ import (
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/edits"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/cuda"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/spec"
|
||||
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
|
||||
"github.com/container-orchestrated-devices/container-device-interface/specs-go"
|
||||
@@ -60,23 +60,9 @@ func (m *managementlib) GetAllDeviceSpecs() ([]specs.Device, error) {
|
||||
|
||||
// GetCommonEdits returns the common edits for use in managementlib containers.
|
||||
func (m *managementlib) GetCommonEdits() (*cdi.ContainerEdits, error) {
|
||||
locator, err := lookup.NewLibraryLocator(
|
||||
m.logger,
|
||||
m.driverRoot,
|
||||
)
|
||||
version, err := m.getCudaVersion()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create library locator: %v", err)
|
||||
}
|
||||
|
||||
candidates, err := locator.Locate("libcuda.so")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to locate libcuda.so: %v", err)
|
||||
}
|
||||
libcudaPath := candidates[0]
|
||||
|
||||
version := strings.TrimPrefix(filepath.Base(libcudaPath), "libcuda.so.")
|
||||
if version == "" {
|
||||
return nil, fmt.Errorf("failed to determine libcuda.so version from path: %q", libcudaPath)
|
||||
return nil, fmt.Errorf("failed to get CUDA version: %v", err)
|
||||
}
|
||||
|
||||
driver, err := newDriverVersionDiscoverer(m.logger, m.driverRoot, m.nvidiaCTKPath, version)
|
||||
@@ -92,6 +78,28 @@ func (m *managementlib) GetCommonEdits() (*cdi.ContainerEdits, error) {
|
||||
return edits, nil
|
||||
}
|
||||
|
||||
// getCudaVersion returns the CUDA version for use in managementlib containers.
|
||||
func (m *managementlib) getCudaVersion() (string, error) {
|
||||
version, err := (*nvcdilib)(m).getCudaVersion()
|
||||
if err == nil {
|
||||
return version, nil
|
||||
}
|
||||
|
||||
libCudaPaths, err := cuda.New(
|
||||
cuda.WithLogger(m.logger),
|
||||
cuda.WithDriverRoot(m.driverRoot),
|
||||
).Locate(".*.*.*")
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to locate libcuda.so: %v", err)
|
||||
}
|
||||
|
||||
libCudaPath := libCudaPaths[0]
|
||||
|
||||
version = strings.TrimPrefix(filepath.Base(libCudaPath), "libcuda.so.")
|
||||
|
||||
return version, nil
|
||||
}
|
||||
|
||||
type managementDiscoverer struct {
|
||||
discover.Discover
|
||||
}
|
||||
|
||||
@@ -19,6 +19,7 @@ package spec
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/transform"
|
||||
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
|
||||
"github.com/container-orchestrated-devices/container-device-interface/specs-go"
|
||||
)
|
||||
@@ -31,6 +32,7 @@ type builder struct {
|
||||
deviceSpecs []specs.Device
|
||||
edits specs.ContainerEdits
|
||||
format string
|
||||
noSimplify bool
|
||||
}
|
||||
|
||||
// newBuilder creates a new spec builder with the supplied options
|
||||
@@ -39,6 +41,13 @@ func newBuilder(opts ...Option) *builder {
|
||||
for _, opt := range opts {
|
||||
opt(s)
|
||||
}
|
||||
if s.raw != nil {
|
||||
s.noSimplify = true
|
||||
vendor, class := cdi.ParseQualifier(s.raw.Kind)
|
||||
s.vendor = vendor
|
||||
s.class = class
|
||||
}
|
||||
|
||||
if s.version == "" {
|
||||
s.version = DetectMinimumVersion
|
||||
}
|
||||
@@ -58,7 +67,6 @@ func newBuilder(opts ...Option) *builder {
|
||||
// Build builds a CDI spec form the spec builder.
|
||||
func (o *builder) Build() (*spec, error) {
|
||||
raw := o.raw
|
||||
|
||||
if raw == nil {
|
||||
raw = &specs.Spec{
|
||||
Version: o.version,
|
||||
@@ -76,6 +84,13 @@ func (o *builder) Build() (*spec, error) {
|
||||
raw.Version = minVersion
|
||||
}
|
||||
|
||||
if !o.noSimplify {
|
||||
err := transform.NewSimplifier().Transform(raw)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to simplify spec: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
s := spec{
|
||||
Spec: raw,
|
||||
format: o.format,
|
||||
@@ -128,3 +143,17 @@ func WithFormat(format string) Option {
|
||||
o.format = format
|
||||
}
|
||||
}
|
||||
|
||||
// WithNoSimplify sets whether the spec must be simplified
|
||||
func WithNoSimplify(noSimplify bool) Option {
|
||||
return func(o *builder) {
|
||||
o.noSimplify = noSimplify
|
||||
}
|
||||
}
|
||||
|
||||
// WithRawSpec sets the raw spec for the spec builder
|
||||
func WithRawSpec(raw *specs.Spec) Option {
|
||||
return func(o *builder) {
|
||||
o.raw = raw
|
||||
}
|
||||
}
|
||||
|
||||
151
pkg/nvcdi/transform/deduplicate.go
Normal file
151
pkg/nvcdi/transform/deduplicate.go
Normal file
@@ -0,0 +1,151 @@
|
||||
/**
|
||||
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package transform
|
||||
|
||||
import (
|
||||
"github.com/container-orchestrated-devices/container-device-interface/specs-go"
|
||||
)
|
||||
|
||||
type dedupe struct{}
|
||||
|
||||
var _ Transformer = (*dedupe)(nil)
|
||||
|
||||
// NewDedupe creates a transformer that deduplicates container edits.
|
||||
func NewDedupe() (Transformer, error) {
|
||||
return &dedupe{}, nil
|
||||
}
|
||||
|
||||
// Transform removes duplicate entris from devices and common container edits.
|
||||
func (d dedupe) Transform(spec *specs.Spec) error {
|
||||
if spec == nil {
|
||||
return nil
|
||||
}
|
||||
if err := d.transformEdits(&spec.ContainerEdits); err != nil {
|
||||
return err
|
||||
}
|
||||
var updatedDevices []specs.Device
|
||||
for _, device := range spec.Devices {
|
||||
if err := d.transformEdits(&device.ContainerEdits); err != nil {
|
||||
return err
|
||||
}
|
||||
updatedDevices = append(updatedDevices, device)
|
||||
}
|
||||
spec.Devices = updatedDevices
|
||||
return nil
|
||||
}
|
||||
|
||||
func (d dedupe) transformEdits(edits *specs.ContainerEdits) error {
|
||||
deviceNodes, err := d.deduplicateDeviceNodes(edits.DeviceNodes)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
edits.DeviceNodes = deviceNodes
|
||||
|
||||
envs, err := d.deduplicateEnvs(edits.Env)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
edits.Env = envs
|
||||
|
||||
hooks, err := d.deduplicateHooks(edits.Hooks)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
edits.Hooks = hooks
|
||||
|
||||
mounts, err := d.deduplicateMounts(edits.Mounts)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
edits.Mounts = mounts
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (d dedupe) deduplicateDeviceNodes(entities []*specs.DeviceNode) ([]*specs.DeviceNode, error) {
|
||||
seen := make(map[string]bool)
|
||||
var deviceNodes []*specs.DeviceNode
|
||||
for _, e := range entities {
|
||||
if e == nil {
|
||||
continue
|
||||
}
|
||||
id, err := deviceNode(*e).id()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if seen[id] {
|
||||
continue
|
||||
}
|
||||
seen[id] = true
|
||||
deviceNodes = append(deviceNodes, e)
|
||||
}
|
||||
return deviceNodes, nil
|
||||
}
|
||||
|
||||
func (d dedupe) deduplicateEnvs(entities []string) ([]string, error) {
|
||||
seen := make(map[string]bool)
|
||||
var envs []string
|
||||
for _, e := range entities {
|
||||
id := e
|
||||
if seen[id] {
|
||||
continue
|
||||
}
|
||||
seen[id] = true
|
||||
envs = append(envs, e)
|
||||
}
|
||||
return envs, nil
|
||||
}
|
||||
|
||||
func (d dedupe) deduplicateHooks(entities []*specs.Hook) ([]*specs.Hook, error) {
|
||||
seen := make(map[string]bool)
|
||||
var hooks []*specs.Hook
|
||||
for _, e := range entities {
|
||||
if e == nil {
|
||||
continue
|
||||
}
|
||||
id, err := hook(*e).id()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if seen[id] {
|
||||
continue
|
||||
}
|
||||
seen[id] = true
|
||||
hooks = append(hooks, e)
|
||||
}
|
||||
return hooks, nil
|
||||
}
|
||||
|
||||
func (d dedupe) deduplicateMounts(entities []*specs.Mount) ([]*specs.Mount, error) {
|
||||
seen := make(map[string]bool)
|
||||
var mounts []*specs.Mount
|
||||
for _, e := range entities {
|
||||
if e == nil {
|
||||
continue
|
||||
}
|
||||
id, err := mount(*e).id()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if seen[id] {
|
||||
continue
|
||||
}
|
||||
seen[id] = true
|
||||
mounts = append(mounts, e)
|
||||
}
|
||||
return mounts, nil
|
||||
}
|
||||
250
pkg/nvcdi/transform/deduplicate_test.go
Normal file
250
pkg/nvcdi/transform/deduplicate_test.go
Normal file
@@ -0,0 +1,250 @@
|
||||
/**
|
||||
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package transform
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/container-orchestrated-devices/container-device-interface/specs-go"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestDeduplicate(t *testing.T) {
|
||||
testCases := []struct {
|
||||
description string
|
||||
spec *specs.Spec
|
||||
expectedError error
|
||||
expectedSpec *specs.Spec
|
||||
}{
|
||||
{
|
||||
description: "nil spec",
|
||||
},
|
||||
{
|
||||
description: "duplicate deviceNode is removed",
|
||||
spec: &specs.Spec{
|
||||
ContainerEdits: specs.ContainerEdits{
|
||||
DeviceNodes: []*specs.DeviceNode{
|
||||
{
|
||||
Path: "/dev/gpu0",
|
||||
},
|
||||
{
|
||||
Path: "/dev/gpu0",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
expectedSpec: &specs.Spec{
|
||||
ContainerEdits: specs.ContainerEdits{
|
||||
DeviceNodes: []*specs.DeviceNode{
|
||||
{
|
||||
Path: "/dev/gpu0",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "duplicate deviceNode is remved from device edits",
|
||||
spec: &specs.Spec{
|
||||
Devices: []specs.Device{
|
||||
{
|
||||
Name: "device0",
|
||||
ContainerEdits: specs.ContainerEdits{
|
||||
DeviceNodes: []*specs.DeviceNode{
|
||||
{
|
||||
Path: "/dev/gpu0",
|
||||
},
|
||||
{
|
||||
Path: "/dev/gpu0",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
expectedSpec: &specs.Spec{
|
||||
Devices: []specs.Device{
|
||||
{
|
||||
Name: "device0",
|
||||
ContainerEdits: specs.ContainerEdits{
|
||||
DeviceNodes: []*specs.DeviceNode{
|
||||
{
|
||||
Path: "/dev/gpu0",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "duplicate hook is removed",
|
||||
spec: &specs.Spec{
|
||||
ContainerEdits: specs.ContainerEdits{
|
||||
Hooks: []*specs.Hook{
|
||||
{
|
||||
HookName: "createContainer",
|
||||
Path: "/usr/bin/nvidia-ctk",
|
||||
Args: []string{"nvidia-ctk", "hook", "chmod", "--mode", "755", "--path", "/dev/dri"},
|
||||
},
|
||||
{
|
||||
HookName: "createContainer",
|
||||
Path: "/usr/bin/nvidia-ctk",
|
||||
Args: []string{"nvidia-ctk", "hook", "chmod", "--mode", "755", "--path", "/dev/dri"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
expectedSpec: &specs.Spec{
|
||||
ContainerEdits: specs.ContainerEdits{
|
||||
Hooks: []*specs.Hook{
|
||||
{
|
||||
HookName: "createContainer",
|
||||
Path: "/usr/bin/nvidia-ctk",
|
||||
Args: []string{"nvidia-ctk", "hook", "chmod", "--mode", "755", "--path", "/dev/dri"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "duplicate mount is removed",
|
||||
spec: &specs.Spec{
|
||||
Devices: []specs.Device{
|
||||
{
|
||||
Name: "device0",
|
||||
ContainerEdits: specs.ContainerEdits{
|
||||
Mounts: []*specs.Mount{
|
||||
{
|
||||
HostPath: "/host/mount2",
|
||||
ContainerPath: "/mount2",
|
||||
},
|
||||
{
|
||||
HostPath: "/host/mount2",
|
||||
ContainerPath: "/mount2",
|
||||
},
|
||||
{
|
||||
HostPath: "/host/mount1",
|
||||
ContainerPath: "/mount1",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
ContainerEdits: specs.ContainerEdits{
|
||||
Mounts: []*specs.Mount{
|
||||
{
|
||||
HostPath: "/host/mount1",
|
||||
ContainerPath: "/mount1",
|
||||
Options: []string{"bind", "ro"},
|
||||
Type: "tmpfs",
|
||||
},
|
||||
{
|
||||
HostPath: "/host/mount1",
|
||||
ContainerPath: "/mount1",
|
||||
Options: []string{"bind", "ro"},
|
||||
Type: "tmpfs",
|
||||
},
|
||||
{
|
||||
HostPath: "/host/mount1",
|
||||
ContainerPath: "/mount1",
|
||||
Options: []string{"bind", "ro"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
expectedSpec: &specs.Spec{
|
||||
Devices: []specs.Device{
|
||||
{
|
||||
Name: "device0",
|
||||
ContainerEdits: specs.ContainerEdits{
|
||||
Mounts: []*specs.Mount{
|
||||
{
|
||||
HostPath: "/host/mount2",
|
||||
ContainerPath: "/mount2",
|
||||
},
|
||||
{
|
||||
HostPath: "/host/mount1",
|
||||
ContainerPath: "/mount1",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
ContainerEdits: specs.ContainerEdits{
|
||||
Mounts: []*specs.Mount{
|
||||
{
|
||||
HostPath: "/host/mount1",
|
||||
ContainerPath: "/mount1",
|
||||
Options: []string{"bind", "ro"},
|
||||
Type: "tmpfs",
|
||||
},
|
||||
{
|
||||
HostPath: "/host/mount1",
|
||||
ContainerPath: "/mount1",
|
||||
Options: []string{"bind", "ro"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "duplicate env is removed",
|
||||
spec: &specs.Spec{
|
||||
Devices: []specs.Device{
|
||||
{
|
||||
Name: "device0",
|
||||
ContainerEdits: specs.ContainerEdits{
|
||||
Env: []string{"ENV1=VAL1", "ENV1=VAL1", "ENV2=ONE_VALUE", "ENV2=ANOTHER_VALUE"},
|
||||
},
|
||||
},
|
||||
},
|
||||
ContainerEdits: specs.ContainerEdits{
|
||||
Env: []string{"ENV1=VAL1", "ENV1=VAL1", "ENV2=ONE_VALUE", "ENV2=ANOTHER_VALUE"},
|
||||
},
|
||||
},
|
||||
expectedSpec: &specs.Spec{
|
||||
Devices: []specs.Device{
|
||||
{
|
||||
Name: "device0",
|
||||
ContainerEdits: specs.ContainerEdits{
|
||||
Env: []string{"ENV1=VAL1", "ENV2=ONE_VALUE", "ENV2=ANOTHER_VALUE"},
|
||||
},
|
||||
},
|
||||
},
|
||||
ContainerEdits: specs.ContainerEdits{
|
||||
Env: []string{"ENV1=VAL1", "ENV2=ONE_VALUE", "ENV2=ANOTHER_VALUE"},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
d := dedupe{}
|
||||
|
||||
err := d.Transform(tc.spec)
|
||||
if tc.expectedError != nil {
|
||||
require.Error(t, err)
|
||||
return
|
||||
}
|
||||
require.NoError(t, err)
|
||||
|
||||
require.EqualValues(t, tc.expectedSpec, tc.spec)
|
||||
})
|
||||
}
|
||||
}
|
||||
166
pkg/nvcdi/transform/edits.go
Normal file
166
pkg/nvcdi/transform/edits.go
Normal file
@@ -0,0 +1,166 @@
|
||||
/*
|
||||
*
|
||||
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package transform
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
|
||||
"github.com/container-orchestrated-devices/container-device-interface/specs-go"
|
||||
)
|
||||
|
||||
type containerEdits specs.ContainerEdits
|
||||
|
||||
// IsEmpty returns true if the edits are empty.
|
||||
func (e containerEdits) IsEmpty() bool {
|
||||
// Devices with empty edits are invalid
|
||||
if len(e.DeviceNodes) > 0 {
|
||||
return false
|
||||
}
|
||||
if len(e.Env) > 0 {
|
||||
return false
|
||||
}
|
||||
if len(e.Hooks) > 0 {
|
||||
return false
|
||||
}
|
||||
if len(e.Mounts) > 0 {
|
||||
return false
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
func (e *containerEdits) getEntityIds() ([]string, error) {
|
||||
if e == nil {
|
||||
return nil, nil
|
||||
}
|
||||
uniqueIDs := make(map[string]bool)
|
||||
|
||||
deviceNodes, err := e.getDeviceNodeIDs()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
for k := range deviceNodes {
|
||||
uniqueIDs[k] = true
|
||||
}
|
||||
|
||||
envs, err := e.getEnvIDs()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
for k := range envs {
|
||||
uniqueIDs[k] = true
|
||||
}
|
||||
|
||||
hooks, err := e.getHookIDs()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
for k := range hooks {
|
||||
uniqueIDs[k] = true
|
||||
}
|
||||
|
||||
mounts, err := e.getMountIDs()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
for k := range mounts {
|
||||
uniqueIDs[k] = true
|
||||
}
|
||||
|
||||
var ids []string
|
||||
for k := range uniqueIDs {
|
||||
ids = append(ids, k)
|
||||
}
|
||||
|
||||
return ids, nil
|
||||
}
|
||||
|
||||
func (e *containerEdits) getDeviceNodeIDs() (map[string]bool, error) {
|
||||
deviceIDs := make(map[string]bool)
|
||||
for _, entity := range e.DeviceNodes {
|
||||
id, err := deviceNode(*entity).id()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
deviceIDs[id] = true
|
||||
}
|
||||
return deviceIDs, nil
|
||||
}
|
||||
|
||||
func (e *containerEdits) getEnvIDs() (map[string]bool, error) {
|
||||
envIDs := make(map[string]bool)
|
||||
for _, entity := range e.Env {
|
||||
id, err := env(entity).id()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
envIDs[id] = true
|
||||
}
|
||||
return envIDs, nil
|
||||
}
|
||||
|
||||
func (e *containerEdits) getHookIDs() (map[string]bool, error) {
|
||||
hookIDs := make(map[string]bool)
|
||||
for _, entity := range e.Hooks {
|
||||
id, err := hook(*entity).id()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
hookIDs[id] = true
|
||||
}
|
||||
return hookIDs, nil
|
||||
}
|
||||
|
||||
func (e *containerEdits) getMountIDs() (map[string]bool, error) {
|
||||
mountIDs := make(map[string]bool)
|
||||
for _, entity := range e.Mounts {
|
||||
id, err := mount(*entity).id()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
mountIDs[id] = true
|
||||
}
|
||||
return mountIDs, nil
|
||||
}
|
||||
|
||||
type deviceNode specs.DeviceNode
|
||||
|
||||
func (dn deviceNode) id() (string, error) {
|
||||
b, err := json.Marshal(dn)
|
||||
return string(b), err
|
||||
}
|
||||
|
||||
type env string
|
||||
|
||||
func (e env) id() (string, error) {
|
||||
return string(e), nil
|
||||
}
|
||||
|
||||
type mount specs.Mount
|
||||
|
||||
func (m mount) id() (string, error) {
|
||||
b, err := json.Marshal(m)
|
||||
return string(b), err
|
||||
}
|
||||
|
||||
type hook specs.Hook
|
||||
|
||||
func (m hook) id() (string, error) {
|
||||
b, err := json.Marshal(m)
|
||||
return string(b), err
|
||||
}
|
||||
105
pkg/nvcdi/transform/remove.go
Normal file
105
pkg/nvcdi/transform/remove.go
Normal file
@@ -0,0 +1,105 @@
|
||||
/**
|
||||
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package transform
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/container-orchestrated-devices/container-device-interface/specs-go"
|
||||
)
|
||||
|
||||
type remove map[string]bool
|
||||
|
||||
func newRemover(ids ...string) Transformer {
|
||||
r := make(remove)
|
||||
for _, id := range ids {
|
||||
r[id] = true
|
||||
}
|
||||
return r
|
||||
}
|
||||
|
||||
// Transform remove the specified entities from the spec.
|
||||
func (r remove) Transform(spec *specs.Spec) error {
|
||||
if spec == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
for _, device := range spec.Devices {
|
||||
if err := r.transformEdits(&device.ContainerEdits); err != nil {
|
||||
return fmt.Errorf("failed to remove edits from device %q: %w", device.Name, err)
|
||||
}
|
||||
}
|
||||
|
||||
return r.transformEdits(&spec.ContainerEdits)
|
||||
}
|
||||
|
||||
func (r remove) transformEdits(edits *specs.ContainerEdits) error {
|
||||
if edits == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
var deviceNodes []*specs.DeviceNode
|
||||
for _, entity := range edits.DeviceNodes {
|
||||
id, err := deviceNode(*entity).id()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if r[id] {
|
||||
continue
|
||||
}
|
||||
deviceNodes = append(deviceNodes, entity)
|
||||
}
|
||||
edits.DeviceNodes = deviceNodes
|
||||
|
||||
var envs []string
|
||||
for _, entity := range edits.Env {
|
||||
id := entity
|
||||
if r[id] {
|
||||
continue
|
||||
}
|
||||
envs = append(envs, entity)
|
||||
}
|
||||
edits.Env = envs
|
||||
|
||||
var hooks []*specs.Hook
|
||||
for _, entity := range edits.Hooks {
|
||||
id, err := hook(*entity).id()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if r[id] {
|
||||
continue
|
||||
}
|
||||
hooks = append(hooks, entity)
|
||||
}
|
||||
edits.Hooks = hooks
|
||||
|
||||
var mounts []*specs.Mount
|
||||
for _, entity := range edits.Mounts {
|
||||
id, err := mount(*entity).id()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if r[id] {
|
||||
continue
|
||||
}
|
||||
mounts = append(mounts, entity)
|
||||
}
|
||||
edits.Mounts = mounts
|
||||
|
||||
return nil
|
||||
}
|
||||
74
pkg/nvcdi/transform/simplify.go
Normal file
74
pkg/nvcdi/transform/simplify.go
Normal file
@@ -0,0 +1,74 @@
|
||||
/**
|
||||
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package transform
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/container-orchestrated-devices/container-device-interface/specs-go"
|
||||
)
|
||||
|
||||
type simplify struct{}
|
||||
|
||||
var _ Transformer = (*simplify)(nil)
|
||||
|
||||
// NewSimplifier creates a simplifier transformer.
|
||||
// This transoformer ensures that entities in the spec are deduplicated and that common edits are removed from device-specific edits.
|
||||
func NewSimplifier() Transformer {
|
||||
return &simplify{}
|
||||
}
|
||||
|
||||
// Transform simplifies the supplied spec.
|
||||
// Edits that are present in the common edits are removed from device-specific edits.
|
||||
func (s simplify) Transform(spec *specs.Spec) error {
|
||||
if spec == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
dedupe := dedupe{}
|
||||
if err := dedupe.Transform(spec); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
commonEntityIDs, err := (*containerEdits)(&spec.ContainerEdits).getEntityIds()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
toRemove := newRemover(commonEntityIDs...)
|
||||
var updatedDevices []specs.Device
|
||||
for _, device := range spec.Devices {
|
||||
deviceAsSpec := specs.Spec{
|
||||
ContainerEdits: device.ContainerEdits,
|
||||
}
|
||||
err := toRemove.Transform(&deviceAsSpec)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to transform device edits: %w", err)
|
||||
}
|
||||
|
||||
if !(containerEdits)(deviceAsSpec.ContainerEdits).IsEmpty() {
|
||||
// Devices with empty edits are invalid.
|
||||
// We only update the container edits for the device if this would
|
||||
// result in a valid device.
|
||||
device.ContainerEdits = deviceAsSpec.ContainerEdits
|
||||
}
|
||||
updatedDevices = append(updatedDevices, device)
|
||||
}
|
||||
spec.Devices = updatedDevices
|
||||
|
||||
return nil
|
||||
}
|
||||
125
pkg/nvcdi/transform/simplify_test.go
Normal file
125
pkg/nvcdi/transform/simplify_test.go
Normal file
@@ -0,0 +1,125 @@
|
||||
/**
|
||||
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package transform
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/container-orchestrated-devices/container-device-interface/specs-go"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestSimplify(t *testing.T) {
|
||||
testCases := []struct {
|
||||
description string
|
||||
spec *specs.Spec
|
||||
expectedError error
|
||||
expectedSpec *specs.Spec
|
||||
}{
|
||||
{
|
||||
description: "nil spec is a no-op",
|
||||
},
|
||||
{
|
||||
description: "empty spec is simplified",
|
||||
spec: &specs.Spec{},
|
||||
expectedSpec: &specs.Spec{},
|
||||
},
|
||||
{
|
||||
description: "simplify does not allow empty device",
|
||||
spec: &specs.Spec{
|
||||
Devices: []specs.Device{
|
||||
{
|
||||
Name: "device0",
|
||||
ContainerEdits: specs.ContainerEdits{
|
||||
Env: []string{"FOO=BAR"},
|
||||
},
|
||||
},
|
||||
},
|
||||
ContainerEdits: specs.ContainerEdits{
|
||||
Env: []string{"FOO=BAR"},
|
||||
},
|
||||
},
|
||||
expectedSpec: &specs.Spec{
|
||||
Devices: []specs.Device{
|
||||
{
|
||||
Name: "device0",
|
||||
ContainerEdits: specs.ContainerEdits{
|
||||
Env: []string{"FOO=BAR"},
|
||||
},
|
||||
},
|
||||
},
|
||||
ContainerEdits: specs.ContainerEdits{
|
||||
Env: []string{"FOO=BAR"},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "simplify removes common entities",
|
||||
spec: &specs.Spec{
|
||||
Devices: []specs.Device{
|
||||
{
|
||||
Name: "device0",
|
||||
ContainerEdits: specs.ContainerEdits{
|
||||
Env: []string{"FOO=BAR"},
|
||||
DeviceNodes: []*specs.DeviceNode{
|
||||
{
|
||||
Path: "/dev/gpu0",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
ContainerEdits: specs.ContainerEdits{
|
||||
Env: []string{"FOO=BAR"},
|
||||
},
|
||||
},
|
||||
expectedSpec: &specs.Spec{
|
||||
Devices: []specs.Device{
|
||||
{
|
||||
Name: "device0",
|
||||
ContainerEdits: specs.ContainerEdits{
|
||||
DeviceNodes: []*specs.DeviceNode{
|
||||
{
|
||||
Path: "/dev/gpu0",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
ContainerEdits: specs.ContainerEdits{
|
||||
Env: []string{"FOO=BAR"},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
s := simplify{}
|
||||
|
||||
err := s.Transform(tc.spec)
|
||||
if tc.expectedError != nil {
|
||||
require.Error(t, err)
|
||||
return
|
||||
}
|
||||
require.NoError(t, err)
|
||||
|
||||
require.EqualValues(t, tc.expectedSpec, tc.spec)
|
||||
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -62,7 +62,12 @@ make -C "${NVIDIA_CONTAINER_TOOLKIT_ROOT}" \
|
||||
LIBNVIDIA_CONTAINER_TAG="${LIBNVIDIA_CONTAINER_TAG}" \
|
||||
"${TARGET}"
|
||||
|
||||
if [[ -z ${NVIDIA_CONTAINER_TOOLKIT_TAG} ]]; then
|
||||
# If required we also build the nvidia-container-runtime and nvidia-docker packages.
|
||||
# Since these are essentially meta packages intended to allow for users to
|
||||
# transition from older installation workflows, we skip these for rc builds
|
||||
# (NVIDIA_CONTAINER_TOOLKIT_TAG != "") and releases with a non-zero patch
|
||||
# version of 0.
|
||||
if [[ -z ${NVIDIA_CONTAINER_TOOLKIT_TAG} && "${NVIDIA_CONTAINER_TOOLKIT_VERSION%.0}" != "${NVIDIA_CONTAINER_TOOLKIT_VERSION}" ]]; then
|
||||
# We set the TOOLKIT_VERSION, TOOLKIT_TAG for the nvidia-container-runtime and nvidia-docker targets
|
||||
# The LIB_TAG is also overridden to match the TOOLKIT_TAG.
|
||||
# Build nvidia-container-runtime
|
||||
@@ -82,5 +87,5 @@ if [[ -z ${NVIDIA_CONTAINER_TOOLKIT_TAG} ]]; then
|
||||
${TARGET}
|
||||
|
||||
else
|
||||
echo "Skipping nvidia-container-runtime and nvidia-docker builds for release candidate"
|
||||
echo "Skipping nvidia-container-runtime and nvidia-docker builds."
|
||||
fi
|
||||
|
||||
@@ -45,17 +45,21 @@ function skip-for-release-candidate() {
|
||||
return 0
|
||||
fi
|
||||
|
||||
# We allow all other packages for non-rc versions.
|
||||
if [[ "${VERSION/rc./}" == "${VERSION}" ]]; then
|
||||
return 1
|
||||
local is_non_patch_full_release=1
|
||||
# We allow all other packages for non-rc and non-patch release versions.
|
||||
if [[ "${VERSION/rc./}" != "${VERSION}" ]]; then
|
||||
is_non_patch_full_release=0
|
||||
fi
|
||||
if [[ "${VERSION%.0}" == "${VERSION}" ]]; then
|
||||
is_non_patch_full_release=0
|
||||
fi
|
||||
|
||||
local package_name=$1
|
||||
if [[ "${package_name/"nvidia-docker2"/}" != "${package_name}" ]]; then
|
||||
return 0
|
||||
return ${is_non_patch_full_release}
|
||||
fi
|
||||
if [[ "${package_name/"nvidia-container-runtime"/}" != "${package_name}" ]]; then
|
||||
return 0
|
||||
return ${is_non_patch_full_release}
|
||||
fi
|
||||
return 1
|
||||
}
|
||||
@@ -91,6 +95,18 @@ function extract-all() {
|
||||
|
||||
echo "Extracting packages for ${dist} from ${PACKAGE_IMAGE}"
|
||||
|
||||
if [ $dist == "ubuntu18.04" ]; then
|
||||
set -x
|
||||
# We need to publish the libnvidia-container0 packages to the kitmaker repository as a once off operation.
|
||||
# We include the packages here so that these will be added to the archive for the ubuntu18.04 arm64 packages.
|
||||
mkdir -p "${ARTIFACTS_DIR}/packages/ubuntu18.04/arm64/"
|
||||
curl -L "https://nvidia.github.io/libnvidia-container/ubuntu18.04/arm64/libnvidia-container0_0.10.0+jetpack_arm64.deb" \
|
||||
--output "${ARTIFACTS_DIR}/packages/ubuntu18.04/arm64/libnvidia-container0_0.10.0+jetpack_arm64.deb"
|
||||
curl -L "https://nvidia.github.io/libnvidia-container/ubuntu18.04/arm64/libnvidia-container0_0.11.0+jetpack_arm64.deb" \
|
||||
--output "${ARTIFACTS_DIR}/packages/ubuntu18.04/arm64/libnvidia-container0_0.11.0+jetpack_arm64.deb"
|
||||
set +x
|
||||
fi
|
||||
|
||||
# Extract every file for the specified dist-arch combiniation in MANIFEST.txt
|
||||
grep "/${dist}/" "${ARTIFACTS_DIR}/manifest.txt" | while read -r f ; do
|
||||
package_name="$(basename "$f")"
|
||||
|
||||
@@ -62,8 +62,10 @@ echo "LIBNVIDIA_CONTAINER_PACKAGE_VERSION=${libnvidia_container_version_tag//\~/
|
||||
echo "NVIDIA_CONTAINER_TOOLKIT_VERSION=${nvidia_container_toolkit_version}"
|
||||
echo "NVIDIA_CONTAINER_TOOLKIT_TAG=${nvidia_container_toolkit_tag}"
|
||||
echo "NVIDIA_CONTAINER_TOOLKIT_PACKAGE_VERSION=${nvidia_container_toolkit_version_tag//\~/-}"
|
||||
if [[ "${libnvidia_container_version_tag}" != "${nvidia_container_toolkit_version_tag}" ]]; then
|
||||
if [[ "${LIBNVIDIA_CONTAINER_PACKAGE_VERSION}" != "${NVIDIA_CONTAINER_TOOLKIT_PACKAGE_VERSION}" ]]; then
|
||||
>&2 echo "WARNING: The libnvidia-container and nvidia-container-toolkit versions do not match"
|
||||
>&2 echo "WARNING: lib: ${LIBNVIDIA_CONTAINER_PACKAGE_VERSION}"
|
||||
>&2 echo "WARNING: toolkit: ${NVIDIA_CONTAINER_TOOLKIT_PACKAGE_VERSION}"
|
||||
fi
|
||||
echo "NVIDIA_CONTAINER_RUNTIME_VERSION=${nvidia_container_runtime_version}"
|
||||
echo "NVIDIA_CONTAINER_RUNTIME_TAG=${nvidia_container_runtime_tag}"
|
||||
|
||||
2
third_party/libnvidia-container
vendored
2
third_party/libnvidia-container
vendored
Submodule third_party/libnvidia-container updated: 35eb134ae5...6f4aea0fca
@@ -332,6 +332,7 @@ func TestUpdateV1Config(t *testing.T) {
|
||||
Tree: config,
|
||||
UseDefaultRuntimeName: true,
|
||||
RuntimeType: runtimeType,
|
||||
ContainerAnnotations: []string{"cdi.k8s.io/*"},
|
||||
}
|
||||
|
||||
err = UpdateConfig(v1, o)
|
||||
@@ -585,6 +586,7 @@ func TestUpdateV1ConfigWithRuncPresent(t *testing.T) {
|
||||
Tree: config,
|
||||
UseDefaultRuntimeName: true,
|
||||
RuntimeType: runtimeType,
|
||||
ContainerAnnotations: []string{"cdi.k8s.io/*"},
|
||||
}
|
||||
|
||||
err = UpdateConfig(v1, o)
|
||||
|
||||
@@ -279,8 +279,9 @@ func TestUpdateV2Config(t *testing.T) {
|
||||
require.NoError(t, err)
|
||||
|
||||
v2 := &containerd.Config{
|
||||
Tree: config,
|
||||
RuntimeType: runtimeType,
|
||||
Tree: config,
|
||||
RuntimeType: runtimeType,
|
||||
ContainerAnnotations: []string{"cdi.k8s.io/*"},
|
||||
}
|
||||
|
||||
err = UpdateConfig(v2, o)
|
||||
@@ -520,8 +521,9 @@ func TestUpdateV2ConfigWithRuncPresent(t *testing.T) {
|
||||
require.NoError(t, err)
|
||||
|
||||
v2 := &containerd.Config{
|
||||
Tree: config,
|
||||
RuntimeType: runtimeType,
|
||||
Tree: config,
|
||||
RuntimeType: runtimeType,
|
||||
ContainerAnnotations: []string{"cdi.k8s.io/*"},
|
||||
}
|
||||
|
||||
err = UpdateConfig(v2, o)
|
||||
|
||||
@@ -72,6 +72,8 @@ type options struct {
|
||||
hostRootMount string
|
||||
runtimeDir string
|
||||
useLegacyConfig bool
|
||||
|
||||
ContainerRuntimeModesCDIAnnotationPrefixes cli.StringSlice
|
||||
}
|
||||
|
||||
func main() {
|
||||
@@ -173,6 +175,11 @@ func main() {
|
||||
Destination: &options.useLegacyConfig,
|
||||
EnvVars: []string{"CONTAINERD_USE_LEGACY_CONFIG"},
|
||||
},
|
||||
&cli.StringSliceFlag{
|
||||
Name: "nvidia-container-runtime-modes.cdi.annotation-prefixes",
|
||||
Destination: &options.ContainerRuntimeModesCDIAnnotationPrefixes,
|
||||
EnvVars: []string{"NVIDIA_CONTAINER_RUNTIME_MODES_CDI_ANNOTATION_PREFIXES"},
|
||||
},
|
||||
}
|
||||
|
||||
// Update the subcommand flags with the common subcommand flags
|
||||
@@ -199,6 +206,7 @@ func Setup(c *cli.Context, o *options) error {
|
||||
containerd.WithPath(o.config),
|
||||
containerd.WithRuntimeType(o.runtimeType),
|
||||
containerd.WithUseLegacyConfig(o.useLegacyConfig),
|
||||
containerd.WithContainerAnnotations(o.containerAnnotationsFromCDIPrefixes()...),
|
||||
)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to load config: %v", err)
|
||||
@@ -241,6 +249,7 @@ func Cleanup(c *cli.Context, o *options) error {
|
||||
containerd.WithPath(o.config),
|
||||
containerd.WithRuntimeType(o.runtimeType),
|
||||
containerd.WithUseLegacyConfig(o.useLegacyConfig),
|
||||
containerd.WithContainerAnnotations(o.containerAnnotationsFromCDIPrefixes()...),
|
||||
)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to load config: %v", err)
|
||||
@@ -434,3 +443,13 @@ func RestartContainerdSystemd(hostRootMount string) error {
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// containerAnnotationsFromCDIPrefixes returns the container annotations to set for the given CDI prefixes.
|
||||
func (o *options) containerAnnotationsFromCDIPrefixes() []string {
|
||||
var annotations []string
|
||||
for _, prefix := range o.ContainerRuntimeModesCDIAnnotationPrefixes.Value() {
|
||||
annotations = append(annotations, prefix+"*")
|
||||
}
|
||||
|
||||
return annotations
|
||||
}
|
||||
|
||||
@@ -23,6 +23,7 @@ import (
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/system"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/transform"
|
||||
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
|
||||
@@ -46,10 +47,14 @@ type options struct {
|
||||
DriverRoot string
|
||||
DriverRootCtrPath string
|
||||
|
||||
ContainerRuntimeMode string
|
||||
ContainerRuntimeModesCdiDefaultKind string
|
||||
ContainerRuntimeDebug string
|
||||
ContainerRuntimeLogLevel string
|
||||
ContainerRuntimeMode string
|
||||
ContainerRuntimeDebug string
|
||||
ContainerRuntimeLogLevel string
|
||||
|
||||
ContainerRuntimeModesCdiDefaultKind string
|
||||
ContainerRuntimeModesCDIAnnotationPrefixes cli.StringSlice
|
||||
|
||||
ContainerRuntimeRuntimes cli.StringSlice
|
||||
|
||||
ContainerRuntimeHookSkipModeDetection bool
|
||||
|
||||
@@ -64,6 +69,8 @@ type options struct {
|
||||
|
||||
acceptNVIDIAVisibleDevicesWhenUnprivileged bool
|
||||
acceptNVIDIAVisibleDevicesAsVolumeMounts bool
|
||||
|
||||
ignoreErrors bool
|
||||
}
|
||||
|
||||
func main() {
|
||||
@@ -120,26 +127,39 @@ func main() {
|
||||
EnvVars: []string{"DRIVER_ROOT_CTR_PATH"},
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "nvidia-container-runtime-debug",
|
||||
Name: "nvidia-container-runtime.debug",
|
||||
Aliases: []string{"nvidia-container-runtime-debug"},
|
||||
Usage: "Specify the location of the debug log file for the NVIDIA Container Runtime",
|
||||
Destination: &opts.ContainerRuntimeDebug,
|
||||
EnvVars: []string{"NVIDIA_CONTAINER_RUNTIME_DEBUG"},
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "nvidia-container-runtime-debug-log-level",
|
||||
Name: "nvidia-container-runtime.log-level",
|
||||
Aliases: []string{"nvidia-container-runtime-debug-log-level"},
|
||||
Destination: &opts.ContainerRuntimeLogLevel,
|
||||
EnvVars: []string{"NVIDIA_CONTAINER_RUNTIME_LOG_LEVEL"},
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "nvidia-container-runtime-mode",
|
||||
Name: "nvidia-container-runtime.mode",
|
||||
Aliases: []string{"nvidia-container-runtime-mode"},
|
||||
Destination: &opts.ContainerRuntimeMode,
|
||||
EnvVars: []string{"NVIDIA_CONTAINER_RUNTIME_MODE"},
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "nvidia-container-runtime-modes.cdi.default-kind",
|
||||
Name: "nvidia-container-runtime.modes.cdi.default-kind",
|
||||
Destination: &opts.ContainerRuntimeModesCdiDefaultKind,
|
||||
EnvVars: []string{"NVIDIA_CONTAINER_RUNTIME_MODES_CDI_DEFAULT_KIND"},
|
||||
},
|
||||
&cli.StringSliceFlag{
|
||||
Name: "nvidia-container-runtime.modes.cdi.annotation-prefixes",
|
||||
Destination: &opts.ContainerRuntimeModesCDIAnnotationPrefixes,
|
||||
EnvVars: []string{"NVIDIA_CONTAINER_RUNTIME_MODES_CDI_ANNOTATION_PREFIXES"},
|
||||
},
|
||||
&cli.StringSliceFlag{
|
||||
Name: "nvidia-container-runtime.runtimes",
|
||||
Destination: &opts.ContainerRuntimeRuntimes,
|
||||
EnvVars: []string{"NVIDIA_CONTAINER_RUNTIME_RUNTIMES"},
|
||||
},
|
||||
&cli.BoolFlag{
|
||||
Name: "nvidia-container-runtime-hook.skip-mode-detection",
|
||||
Value: true,
|
||||
@@ -147,7 +167,8 @@ func main() {
|
||||
EnvVars: []string{"NVIDIA_CONTAINER_RUNTIME_HOOK_SKIP_MODE_DETECTION"},
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "nvidia-container-cli-debug",
|
||||
Name: "nvidia-container-cli.debug",
|
||||
Aliases: []string{"nvidia-container-cli-debug"},
|
||||
Usage: "Specify the location of the debug log file for the NVIDIA Container CLI",
|
||||
Destination: &opts.ContainerCLIDebug,
|
||||
EnvVars: []string{"NVIDIA_CONTAINER_CLI_DEBUG"},
|
||||
@@ -193,6 +214,12 @@ func main() {
|
||||
Destination: &opts.cdiKind,
|
||||
EnvVars: []string{"CDI_KIND"},
|
||||
},
|
||||
&cli.BoolFlag{
|
||||
Name: "ignore-errors",
|
||||
Usage: "ignore errors when installing the NVIDIA Container toolkit. This is used for testing purposes only.",
|
||||
Hidden: true,
|
||||
Destination: &opts.ignoreErrors,
|
||||
},
|
||||
}
|
||||
|
||||
// Update the subcommand flags with the common subcommand flags
|
||||
@@ -241,46 +268,62 @@ func Install(cli *cli.Context, opts *options) error {
|
||||
|
||||
log.Infof("Removing existing NVIDIA container toolkit installation")
|
||||
err := os.RemoveAll(opts.toolkitRoot)
|
||||
if err != nil {
|
||||
if err != nil && !opts.ignoreErrors {
|
||||
return fmt.Errorf("error removing toolkit directory: %v", err)
|
||||
} else if err != nil {
|
||||
log.Errorf("Ignoring error: %v", fmt.Errorf("error removing toolkit directory: %v", err))
|
||||
}
|
||||
|
||||
toolkitConfigDir := filepath.Join(opts.toolkitRoot, ".config", "nvidia-container-runtime")
|
||||
toolkitConfigPath := filepath.Join(toolkitConfigDir, configFilename)
|
||||
|
||||
err = createDirectories(opts.toolkitRoot, toolkitConfigDir)
|
||||
if err != nil {
|
||||
if err != nil && !opts.ignoreErrors {
|
||||
return fmt.Errorf("could not create required directories: %v", err)
|
||||
} else if err != nil {
|
||||
log.Errorf("Ignoring error: %v", fmt.Errorf("could not create required directories: %v", err))
|
||||
}
|
||||
|
||||
err = installContainerLibraries(opts.toolkitRoot)
|
||||
if err != nil {
|
||||
if err != nil && !opts.ignoreErrors {
|
||||
return fmt.Errorf("error installing NVIDIA container library: %v", err)
|
||||
} else if err != nil {
|
||||
log.Errorf("Ignoring error: %v", fmt.Errorf("error installing NVIDIA container library: %v", err))
|
||||
}
|
||||
|
||||
err = installContainerRuntimes(opts.toolkitRoot, opts.DriverRoot)
|
||||
if err != nil {
|
||||
if err != nil && !opts.ignoreErrors {
|
||||
return fmt.Errorf("error installing NVIDIA container runtime: %v", err)
|
||||
} else if err != nil {
|
||||
log.Errorf("Ignoring error: %v", fmt.Errorf("error installing NVIDIA container runtime: %v", err))
|
||||
}
|
||||
|
||||
nvidiaContainerCliExecutable, err := installContainerCLI(opts.toolkitRoot)
|
||||
if err != nil {
|
||||
if err != nil && !opts.ignoreErrors {
|
||||
return fmt.Errorf("error installing NVIDIA container CLI: %v", err)
|
||||
} else if err != nil {
|
||||
log.Errorf("Ignoring error: %v", fmt.Errorf("error installing NVIDIA container CLI: %v", err))
|
||||
}
|
||||
|
||||
_, err = installRuntimeHook(opts.toolkitRoot, toolkitConfigPath)
|
||||
if err != nil {
|
||||
if err != nil && !opts.ignoreErrors {
|
||||
return fmt.Errorf("error installing NVIDIA container runtime hook: %v", err)
|
||||
} else if err != nil {
|
||||
log.Errorf("Ignoring error: %v", fmt.Errorf("error installing NVIDIA container runtime hook: %v", err))
|
||||
}
|
||||
|
||||
nvidiaCTKPath, err := installContainerToolkitCLI(opts.toolkitRoot)
|
||||
if err != nil {
|
||||
if err != nil && !opts.ignoreErrors {
|
||||
return fmt.Errorf("error installing NVIDIA Container Toolkit CLI: %v", err)
|
||||
} else if err != nil {
|
||||
log.Errorf("Ignoring error: %v", fmt.Errorf("error installing NVIDIA Container Toolkit CLI: %v", err))
|
||||
}
|
||||
|
||||
err = installToolkitConfig(toolkitConfigPath, nvidiaContainerCliExecutable, nvidiaCTKPath, opts)
|
||||
if err != nil {
|
||||
err = installToolkitConfig(cli, toolkitConfigPath, nvidiaContainerCliExecutable, nvidiaCTKPath, opts)
|
||||
if err != nil && !opts.ignoreErrors {
|
||||
return fmt.Errorf("error installing NVIDIA container toolkit config: %v", err)
|
||||
} else if err != nil {
|
||||
log.Errorf("Ignoring error: %v", fmt.Errorf("error installing NVIDIA container toolkit config: %v", err))
|
||||
}
|
||||
|
||||
return generateCDISpec(opts, nvidiaCTKPath)
|
||||
@@ -336,10 +379,10 @@ func installLibrary(libName string, toolkitRoot string) error {
|
||||
|
||||
// installToolkitConfig installs the config file for the NVIDIA container toolkit ensuring
|
||||
// that the settings are updated to match the desired install and nvidia driver directories.
|
||||
func installToolkitConfig(toolkitConfigPath string, nvidiaContainerCliExecutablePath string, nvidiaCTKPath string, opts *options) error {
|
||||
func installToolkitConfig(c *cli.Context, toolkitConfigPath string, nvidiaContainerCliExecutablePath string, nvidiaCTKPath string, opts *options) error {
|
||||
log.Infof("Installing NVIDIA container toolkit config '%v'", toolkitConfigPath)
|
||||
|
||||
config, err := toml.LoadFile(nvidiaContainerToolkitConfigSource)
|
||||
config, err := loadConfig(nvidiaContainerToolkitConfigSource)
|
||||
if err != nil {
|
||||
return fmt.Errorf("could not open source config file: %v", err)
|
||||
}
|
||||
@@ -350,45 +393,65 @@ func installToolkitConfig(toolkitConfigPath string, nvidiaContainerCliExecutable
|
||||
}
|
||||
defer targetConfig.Close()
|
||||
|
||||
// Set the options in the root toml table
|
||||
config.Set("accept-nvidia-visible-devices-envvar-when-unprivileged", opts.acceptNVIDIAVisibleDevicesWhenUnprivileged)
|
||||
config.Set("accept-nvidia-visible-devices-as-volume-mounts", opts.acceptNVIDIAVisibleDevicesAsVolumeMounts)
|
||||
|
||||
nvidiaContainerCliKey := func(p string) []string {
|
||||
return []string{"nvidia-container-cli", p}
|
||||
}
|
||||
|
||||
// Read the ldconfig path from the config as this may differ per platform
|
||||
// On ubuntu-based systems this ends in `.real`
|
||||
ldconfigPath := fmt.Sprintf("%s", config.GetPath(nvidiaContainerCliKey("ldconfig")))
|
||||
|
||||
ldconfigPath := fmt.Sprintf("%s", config.GetDefault("nvidia-container-cli.ldconfig", "/sbin/ldconfig"))
|
||||
// Use the driver run root as the root:
|
||||
driverLdconfigPath := "@" + filepath.Join(opts.DriverRoot, strings.TrimPrefix(ldconfigPath, "@/"))
|
||||
|
||||
config.SetPath(nvidiaContainerCliKey("root"), opts.DriverRoot)
|
||||
config.SetPath(nvidiaContainerCliKey("path"), nvidiaContainerCliExecutablePath)
|
||||
config.SetPath(nvidiaContainerCliKey("ldconfig"), driverLdconfigPath)
|
||||
|
||||
// Set the debug options if selected
|
||||
debugOptions := map[string]string{
|
||||
"nvidia-container-runtime.debug": opts.ContainerRuntimeDebug,
|
||||
"nvidia-container-runtime.log-level": opts.ContainerRuntimeLogLevel,
|
||||
"nvidia-container-runtime.mode": opts.ContainerRuntimeMode,
|
||||
"nvidia-container-runtime.modes.cdi.default-kind": opts.ContainerRuntimeModesCdiDefaultKind,
|
||||
"nvidia-container-cli.debug": opts.ContainerCLIDebug,
|
||||
configValues := map[string]interface{}{
|
||||
// Set the options in the root toml table
|
||||
"accept-nvidia-visible-devices-envvar-when-unprivileged": opts.acceptNVIDIAVisibleDevicesWhenUnprivileged,
|
||||
"accept-nvidia-visible-devices-as-volume-mounts": opts.acceptNVIDIAVisibleDevicesAsVolumeMounts,
|
||||
// Set the nvidia-container-cli options
|
||||
"nvidia-container-cli.root": opts.DriverRoot,
|
||||
"nvidia-container-cli.path": nvidiaContainerCliExecutablePath,
|
||||
"nvidia-container-cli.ldconfig": driverLdconfigPath,
|
||||
// Set nvidia-ctk options
|
||||
"nvidia-ctk.path": nvidiaCTKPath,
|
||||
// Set the nvidia-container-runtime-hook options
|
||||
"nvidia-container-runtime-hook.skip-mode-detection": opts.ContainerRuntimeHookSkipModeDetection,
|
||||
}
|
||||
for key, value := range debugOptions {
|
||||
if value == "" {
|
||||
continue
|
||||
}
|
||||
for key, value := range configValues {
|
||||
config.Set(key, value)
|
||||
}
|
||||
|
||||
// Set nvidia-ctk options
|
||||
config.Set("nvidia-ctk.path", nvidiaCTKPath)
|
||||
// Set the optional config options
|
||||
optionalConfigValues := map[string]interface{}{
|
||||
"nvidia-container-runtime.debug": opts.ContainerRuntimeDebug,
|
||||
"nvidia-container-runtime.log-level": opts.ContainerRuntimeLogLevel,
|
||||
"nvidia-container-runtime.mode": opts.ContainerRuntimeMode,
|
||||
"nvidia-container-runtime.modes.cdi.annotation-prefixes": opts.ContainerRuntimeModesCDIAnnotationPrefixes,
|
||||
"nvidia-container-runtime.modes.cdi.default-kind": opts.ContainerRuntimeModesCdiDefaultKind,
|
||||
"nvidia-container-runtime.runtimes": opts.ContainerRuntimeRuntimes,
|
||||
"nvidia-container-cli.debug": opts.ContainerCLIDebug,
|
||||
}
|
||||
for key, value := range optionalConfigValues {
|
||||
if !c.IsSet(key) {
|
||||
log.Infof("Skipping unset option: %v", key)
|
||||
continue
|
||||
}
|
||||
if value == nil {
|
||||
log.Infof("Skipping option with nil value: %v", key)
|
||||
continue
|
||||
}
|
||||
|
||||
// Set the nvidia-container-runtime-hook options
|
||||
config.Set("nvidia-container-runtime-hook.skip-mode-detection", opts.ContainerRuntimeHookSkipModeDetection)
|
||||
switch v := value.(type) {
|
||||
case string:
|
||||
if v == "" {
|
||||
continue
|
||||
}
|
||||
case cli.StringSlice:
|
||||
if len(v.Value()) == 0 {
|
||||
continue
|
||||
}
|
||||
value = v.Value()
|
||||
default:
|
||||
log.Warnf("Unexpected type for option %v=%v: %T", key, value, v)
|
||||
}
|
||||
|
||||
config.Set(key, value)
|
||||
}
|
||||
|
||||
_, err = config.WriteTo(targetConfig)
|
||||
if err != nil {
|
||||
@@ -401,6 +464,16 @@ func installToolkitConfig(toolkitConfigPath string, nvidiaContainerCliExecutable
|
||||
return nil
|
||||
}
|
||||
|
||||
func loadConfig(path string) (*toml.Tree, error) {
|
||||
_, err := os.Stat(path)
|
||||
if err == nil {
|
||||
return toml.LoadFile(path)
|
||||
} else if os.IsNotExist(err) {
|
||||
return toml.TreeFromMap(nil)
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// installContainerToolkitCLI installs the nvidia-ctk CLI executable and wrapper.
|
||||
func installContainerToolkitCLI(toolkitDir string) (string, error) {
|
||||
e := executable{
|
||||
@@ -608,13 +681,26 @@ func generateCDISpec(opts *options, nvidiaCTKPath string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
cdilib := nvcdi.New(
|
||||
log.Infof("Creating control device nodes at %v", opts.DriverRootCtrPath)
|
||||
s, err := system.New()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create library: %v", err)
|
||||
}
|
||||
if err := s.CreateNVIDIAControlDeviceNodesAt(opts.DriverRootCtrPath); err != nil {
|
||||
return fmt.Errorf("failed to create control device nodes: %v", err)
|
||||
}
|
||||
|
||||
log.Info("Generating CDI spec for management containers")
|
||||
cdilib, err := nvcdi.New(
|
||||
nvcdi.WithMode(nvcdi.ModeManagement),
|
||||
nvcdi.WithDriverRoot(opts.DriverRootCtrPath),
|
||||
nvcdi.WithNVIDIACTKPath(nvidiaCTKPath),
|
||||
nvcdi.WithVendor(opts.cdiVendor),
|
||||
nvcdi.WithClass(opts.cdiClass),
|
||||
)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create CDI library for management containers: %v", err)
|
||||
}
|
||||
|
||||
spec, err := cdilib.GetSpec()
|
||||
if err != nil {
|
||||
|
||||
@@ -13,8 +13,8 @@
|
||||
# limitations under the License.
|
||||
|
||||
LIB_NAME := nvidia-container-toolkit
|
||||
LIB_VERSION := 1.13.0
|
||||
LIB_TAG := rc.2
|
||||
LIB_VERSION := 1.13.1
|
||||
LIB_TAG :=
|
||||
|
||||
# The package version is the combination of the library version and tag.
|
||||
# If the tag is specified the two components are joined with a tilde (~).
|
||||
@@ -24,8 +24,8 @@ PACKAGE_REVISION := 1
|
||||
# Specify the nvidia-docker2 and nvidia-container-runtime package versions.
|
||||
# Note: The build tooling uses `LIB_TAG` above as the version tag.
|
||||
# This is appended to the versions below if specified.
|
||||
NVIDIA_DOCKER_VERSION := 2.12.0
|
||||
NVIDIA_CONTAINER_RUNTIME_VERSION := 3.12.0
|
||||
NVIDIA_DOCKER_VERSION := 2.13.0
|
||||
NVIDIA_CONTAINER_RUNTIME_VERSION := 3.13.0
|
||||
|
||||
# Specify the expected libnvidia-container0 version for arm64-based ubuntu builds.
|
||||
LIBNVIDIA_CONTAINER0_VERSION := 0.10.0+jetpack
|
||||
|
||||
Reference in New Issue
Block a user