Merge pull request #765 from elezar/use-logger-in-toolkit-install
Some checks are pending
CodeQL / Analyze Go code with CodeQL (push) Waiting to run
Golang / check (push) Waiting to run
Golang / Unit test (push) Waiting to run
Golang / Build (push) Waiting to run
image / packages (${{github.event_name == 'pull_request'}}, centos7-aarch64) (push) Waiting to run
image / packages (${{github.event_name == 'pull_request'}}, centos7-x86_64) (push) Waiting to run
image / packages (${{github.event_name == 'pull_request'}}, centos8-ppc64le) (push) Waiting to run
image / packages (${{github.event_name == 'pull_request'}}, ubuntu18.04-amd64) (push) Waiting to run
image / packages (${{github.event_name == 'pull_request'}}, ubuntu18.04-arm64) (push) Waiting to run
image / packages (${{github.event_name == 'pull_request'}}, ubuntu18.04-ppc64le) (push) Waiting to run
image / image (packaging, ${{github.event_name == 'pull_request'}}) (push) Blocked by required conditions
image / image (ubi8, ${{github.event_name == 'pull_request'}}) (push) Blocked by required conditions
image / image (ubuntu20.04, ${{github.event_name == 'pull_request'}}) (push) Blocked by required conditions

Simplify standalone installer
This commit is contained in:
Evan Lezar 2025-01-22 14:42:50 +01:00 committed by GitHub
commit c22f3bd56c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
29 changed files with 394 additions and 268 deletions

View File

@ -24,8 +24,8 @@ import (
"github.com/sirupsen/logrus" "github.com/sirupsen/logrus"
"github.com/urfave/cli/v2" "github.com/urfave/cli/v2"
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk-installer/container/operator"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/config/engine" "github.com/NVIDIA/nvidia-container-toolkit/pkg/config/engine"
"github.com/NVIDIA/nvidia-container-toolkit/tools/container/operator"
) )
const ( const (

View File

@ -23,9 +23,9 @@ import (
testlog "github.com/sirupsen/logrus/hooks/test" testlog "github.com/sirupsen/logrus/hooks/test"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk-installer/container"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/config/engine/containerd" "github.com/NVIDIA/nvidia-container-toolkit/pkg/config/engine/containerd"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/config/toml" "github.com/NVIDIA/nvidia-container-toolkit/pkg/config/toml"
"github.com/NVIDIA/nvidia-container-toolkit/tools/container"
) )
func TestUpdateV1ConfigDefaultRuntime(t *testing.T) { func TestUpdateV1ConfigDefaultRuntime(t *testing.T) {

View File

@ -23,9 +23,9 @@ import (
testlog "github.com/sirupsen/logrus/hooks/test" testlog "github.com/sirupsen/logrus/hooks/test"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk-installer/container"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/config/engine/containerd" "github.com/NVIDIA/nvidia-container-toolkit/pkg/config/engine/containerd"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/config/toml" "github.com/NVIDIA/nvidia-container-toolkit/pkg/config/toml"
"github.com/NVIDIA/nvidia-container-toolkit/tools/container"
) )
const ( const (

View File

@ -23,10 +23,10 @@ import (
log "github.com/sirupsen/logrus" log "github.com/sirupsen/logrus"
cli "github.com/urfave/cli/v2" cli "github.com/urfave/cli/v2"
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk-installer/container"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/config/engine" "github.com/NVIDIA/nvidia-container-toolkit/pkg/config/engine"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/config/engine/containerd" "github.com/NVIDIA/nvidia-container-toolkit/pkg/config/engine/containerd"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/config/toml" "github.com/NVIDIA/nvidia-container-toolkit/pkg/config/toml"
"github.com/NVIDIA/nvidia-container-toolkit/tools/container"
) )
const ( const (

View File

@ -24,12 +24,12 @@ import (
log "github.com/sirupsen/logrus" log "github.com/sirupsen/logrus"
cli "github.com/urfave/cli/v2" cli "github.com/urfave/cli/v2"
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk-installer/container"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config" "github.com/NVIDIA/nvidia-container-toolkit/internal/config"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/config/engine" "github.com/NVIDIA/nvidia-container-toolkit/pkg/config/engine"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/config/engine/crio" "github.com/NVIDIA/nvidia-container-toolkit/pkg/config/engine/crio"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/config/ocihook" "github.com/NVIDIA/nvidia-container-toolkit/pkg/config/ocihook"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/config/toml" "github.com/NVIDIA/nvidia-container-toolkit/pkg/config/toml"
"github.com/NVIDIA/nvidia-container-toolkit/tools/container"
) )
const ( const (

View File

@ -22,9 +22,9 @@ import (
log "github.com/sirupsen/logrus" log "github.com/sirupsen/logrus"
cli "github.com/urfave/cli/v2" cli "github.com/urfave/cli/v2"
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk-installer/container"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/config/engine" "github.com/NVIDIA/nvidia-container-toolkit/pkg/config/engine"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/config/engine/docker" "github.com/NVIDIA/nvidia-container-toolkit/pkg/config/engine/docker"
"github.com/NVIDIA/nvidia-container-toolkit/tools/container"
) )
const ( const (

View File

@ -22,8 +22,8 @@ import (
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk-installer/container"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/config/engine/docker" "github.com/NVIDIA/nvidia-container-toolkit/pkg/config/engine/docker"
"github.com/NVIDIA/nvidia-container-toolkit/tools/container"
) )
func TestUpdateConfigDefaultRuntime(t *testing.T) { func TestUpdateConfigDefaultRuntime(t *testing.T) {

View File

@ -21,10 +21,10 @@ import (
"github.com/urfave/cli/v2" "github.com/urfave/cli/v2"
"github.com/NVIDIA/nvidia-container-toolkit/tools/container" "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk-installer/container"
"github.com/NVIDIA/nvidia-container-toolkit/tools/container/runtime/containerd" "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk-installer/container/runtime/containerd"
"github.com/NVIDIA/nvidia-container-toolkit/tools/container/runtime/crio" "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk-installer/container/runtime/crio"
"github.com/NVIDIA/nvidia-container-toolkit/tools/container/runtime/docker" "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk-installer/container/runtime/docker"
) )
const ( const (

View File

@ -23,8 +23,6 @@ import (
"path/filepath" "path/filepath"
"sort" "sort"
"strings" "strings"
log "github.com/sirupsen/logrus"
) )
type executableTarget struct { type executableTarget struct {
@ -33,6 +31,7 @@ type executableTarget struct {
} }
type executable struct { type executable struct {
fileInstaller
source string source string
target executableTarget target executableTarget
env map[string]string env map[string]string
@ -43,21 +42,21 @@ type executable struct {
// install installs an executable component of the NVIDIA container toolkit. The source executable // install installs an executable component of the NVIDIA container toolkit. The source executable
// is copied to a `.real` file and a wapper is created to set up the environment as required. // is copied to a `.real` file and a wapper is created to set up the environment as required.
func (e executable) install(destFolder string) (string, error) { func (e executable) install(destFolder string) (string, error) {
log.Infof("Installing executable '%v' to %v", e.source, destFolder) e.logger.Infof("Installing executable '%v' to %v", e.source, destFolder)
dotfileName := e.dotfileName() dotfileName := e.dotfileName()
installedDotfileName, err := installFileToFolderWithName(destFolder, dotfileName, e.source) installedDotfileName, err := e.installFileToFolderWithName(destFolder, dotfileName, e.source)
if err != nil { if err != nil {
return "", fmt.Errorf("error installing file '%v' as '%v': %v", e.source, dotfileName, err) return "", fmt.Errorf("error installing file '%v' as '%v': %v", e.source, dotfileName, err)
} }
log.Infof("Installed '%v'", installedDotfileName) e.logger.Infof("Installed '%v'", installedDotfileName)
wrapperFilename, err := e.installWrapper(destFolder, installedDotfileName) wrapperFilename, err := e.installWrapper(destFolder, installedDotfileName)
if err != nil { if err != nil {
return "", fmt.Errorf("error wrapping '%v': %v", installedDotfileName, err) return "", fmt.Errorf("error wrapping '%v': %v", installedDotfileName, err)
} }
log.Infof("Installed wrapper '%v'", wrapperFilename) e.logger.Infof("Installed wrapper '%v'", wrapperFilename)
return wrapperFilename, nil return wrapperFilename, nil
} }

View File

@ -23,10 +23,13 @@ import (
"strings" "strings"
"testing" "testing"
testlog "github.com/sirupsen/logrus/hooks/test"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
) )
func TestWrapper(t *testing.T) { func TestWrapper(t *testing.T) {
logger, _ := testlog.NewNullLogger()
const shebang = "#! /bin/sh" const shebang = "#! /bin/sh"
const destFolder = "/dest/folder" const destFolder = "/dest/folder"
const dotfileName = "source.real" const dotfileName = "source.real"
@ -98,6 +101,8 @@ func TestWrapper(t *testing.T) {
for i, tc := range testCases { for i, tc := range testCases {
buf := &bytes.Buffer{} buf := &bytes.Buffer{}
tc.e.logger = logger
err := tc.e.writeWrapperTo(buf, destFolder, dotfileName) err := tc.e.writeWrapperTo(buf, destFolder, dotfileName)
require.NoError(t, err) require.NoError(t, err)
@ -107,6 +112,8 @@ func TestWrapper(t *testing.T) {
} }
func TestInstallExecutable(t *testing.T) { func TestInstallExecutable(t *testing.T) {
logger, _ := testlog.NewNullLogger()
inputFolder, err := os.MkdirTemp("", "") inputFolder, err := os.MkdirTemp("", "")
require.NoError(t, err) require.NoError(t, err)
defer os.RemoveAll(inputFolder) defer os.RemoveAll(inputFolder)
@ -121,6 +128,9 @@ func TestInstallExecutable(t *testing.T) {
require.NoError(t, sourceFile.Close()) require.NoError(t, sourceFile.Close())
e := executable{ e := executable{
fileInstaller: fileInstaller{
logger: logger,
},
source: source, source: source,
target: executableTarget{ target: executableTarget{
dotfileName: "input.real", dotfileName: "input.real",

View File

@ -0,0 +1,95 @@
/**
# Copyright 2024 NVIDIA CORPORATION
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package toolkit
import (
"fmt"
"io"
"os"
"path/filepath"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
)
type fileInstaller struct {
logger logger.Interface
// sourceRoot specifies the root that is searched for the components to install.
sourceRoot string
}
// installFileToFolder copies a source file to a destination folder.
// The path of the input file is ignored.
// e.g. installFileToFolder("/some/path/file.txt", "/output/path")
// will result in a file "/output/path/file.txt" being generated
func (t *fileInstaller) installFileToFolder(destFolder string, src string) (string, error) {
name := filepath.Base(src)
return t.installFileToFolderWithName(destFolder, name, src)
}
// cp src destFolder/name
func (t *fileInstaller) installFileToFolderWithName(destFolder string, name, src string) (string, error) {
dest := filepath.Join(destFolder, name)
err := t.installFile(dest, src)
if err != nil {
return "", fmt.Errorf("error copying '%v' to '%v': %v", src, dest, err)
}
return dest, nil
}
// installFile copies a file from src to dest and maintains
// file modes
func (t *fileInstaller) installFile(dest string, src string) error {
src = filepath.Join(t.sourceRoot, src)
t.logger.Infof("Installing '%v' to '%v'", src, dest)
source, err := os.Open(src)
if err != nil {
return fmt.Errorf("error opening source: %v", err)
}
defer source.Close()
destination, err := os.Create(dest)
if err != nil {
return fmt.Errorf("error creating destination: %v", err)
}
defer destination.Close()
_, err = io.Copy(destination, source)
if err != nil {
return fmt.Errorf("error copying file: %v", err)
}
err = applyModeFromSource(dest, src)
if err != nil {
return fmt.Errorf("error setting destination file mode: %v", err)
}
return nil
}
// applyModeFromSource sets the file mode for a destination file
// to match that of a specified source file
func applyModeFromSource(dest string, src string) error {
sourceInfo, err := os.Stat(src)
if err != nil {
return fmt.Errorf("error getting file info for '%v': %v", src, err)
}
err = os.Chmod(dest, sourceInfo.Mode())
if err != nil {
return fmt.Errorf("error setting mode for '%v': %v", dest, err)
}
return nil
}

View File

@ -0,0 +1,40 @@
/**
# Copyright 2024 NVIDIA CORPORATION
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package toolkit
import "github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
// An Option provides a mechanism to configure an Installer.
type Option func(*Installer)
func WithLogger(logger logger.Interface) Option {
return func(i *Installer) {
i.logger = logger
}
}
func WithToolkitRoot(toolkitRoot string) Option {
return func(i *Installer) {
i.toolkitRoot = toolkitRoot
}
}
func WithSourceRoot(sourceRoot string) Option {
return func(i *Installer) {
i.sourceRoot = sourceRoot
}
}

View File

@ -20,7 +20,7 @@ import (
"fmt" "fmt"
"path/filepath" "path/filepath"
"github.com/NVIDIA/nvidia-container-toolkit/tools/container/operator" "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk-installer/container/operator"
) )
const ( const (
@ -29,10 +29,10 @@ const (
// installContainerRuntimes sets up the NVIDIA container runtimes, copying the executables // installContainerRuntimes sets up the NVIDIA container runtimes, copying the executables
// and implementing the required wrapper // and implementing the required wrapper
func installContainerRuntimes(sourceRoot string, toolkitDir string) error { func (t *Installer) installContainerRuntimes(toolkitDir string) error {
runtimes := operator.GetRuntimes() runtimes := operator.GetRuntimes()
for _, runtime := range runtimes { for _, runtime := range runtimes {
r := newNvidiaContainerRuntimeInstaller(filepath.Join(sourceRoot, runtime.Path)) r := t.newNvidiaContainerRuntimeInstaller(runtime.Path)
_, err := r.install(toolkitDir) _, err := r.install(toolkitDir)
if err != nil { if err != nil {
@ -46,17 +46,17 @@ func installContainerRuntimes(sourceRoot string, toolkitDir string) error {
// This installer will copy the specified source executable to the toolkit directory. // This installer will copy the specified source executable to the toolkit directory.
// The executable is copied to a file with the same name as the source, but with a ".real" suffix and a wrapper is // The executable is copied to a file with the same name as the source, but with a ".real" suffix and a wrapper is
// created to allow for the configuration of the runtime environment. // created to allow for the configuration of the runtime environment.
func newNvidiaContainerRuntimeInstaller(source string) *executable { func (t *Installer) newNvidiaContainerRuntimeInstaller(source string) *executable {
wrapperName := filepath.Base(source) wrapperName := filepath.Base(source)
dotfileName := wrapperName + ".real" dotfileName := wrapperName + ".real"
target := executableTarget{ target := executableTarget{
dotfileName: dotfileName, dotfileName: dotfileName,
wrapperName: wrapperName, wrapperName: wrapperName,
} }
return newRuntimeInstaller(source, target, nil) return t.newRuntimeInstaller(source, target, nil)
} }
func newRuntimeInstaller(source string, target executableTarget, env map[string]string) *executable { func (t *Installer) newRuntimeInstaller(source string, target executableTarget, env map[string]string) *executable {
preLines := []string{ preLines := []string{
"", "",
"cat /proc/modules | grep -e \"^nvidia \" >/dev/null 2>&1", "cat /proc/modules | grep -e \"^nvidia \" >/dev/null 2>&1",
@ -74,6 +74,7 @@ func newRuntimeInstaller(source string, target executableTarget, env map[string]
} }
r := executable{ r := executable{
fileInstaller: t.fileInstaller,
source: source, source: source,
target: target, target: target,
env: runtimeEnv, env: runtimeEnv,

View File

@ -21,11 +21,18 @@ import (
"strings" "strings"
"testing" "testing"
testlog "github.com/sirupsen/logrus/hooks/test"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
) )
func TestNvidiaContainerRuntimeInstallerWrapper(t *testing.T) { func TestNvidiaContainerRuntimeInstallerWrapper(t *testing.T) {
r := newNvidiaContainerRuntimeInstaller(nvidiaContainerRuntimeSource) logger, _ := testlog.NewNullLogger()
i := Installer{
fileInstaller: fileInstaller{
logger: logger,
},
}
r := i.newNvidiaContainerRuntimeInstaller(nvidiaContainerRuntimeSource)
const shebang = "#! /bin/sh" const shebang = "#! /bin/sh"
const destFolder = "/dest/folder" const destFolder = "/dest/folder"

View File

@ -17,19 +17,17 @@
package toolkit package toolkit
import ( import (
"errors"
"fmt" "fmt"
"io"
"os" "os"
"path/filepath" "path/filepath"
"strings" "strings"
log "github.com/sirupsen/logrus"
"github.com/urfave/cli/v2" "github.com/urfave/cli/v2"
"tags.cncf.io/container-device-interface/pkg/cdi" "tags.cncf.io/container-device-interface/pkg/cdi"
"tags.cncf.io/container-device-interface/pkg/parser" "tags.cncf.io/container-device-interface/pkg/parser"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config" "github.com/NVIDIA/nvidia-container-toolkit/internal/config"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/internal/system/nvdevices" "github.com/NVIDIA/nvidia-container-toolkit/internal/system/nvdevices"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi" "github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi"
transformroot "github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/transform/root" transformroot "github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/transform/root"
@ -44,8 +42,6 @@ const (
nvidiaContainerToolkitConfigSource = "/etc/nvidia-container-runtime/config.toml" nvidiaContainerToolkitConfigSource = "/etc/nvidia-container-runtime/config.toml"
configFilename = "config.toml" configFilename = "config.toml"
toolkitPidFilename = "toolkit.pid"
) )
type Options struct { type Options struct {
@ -215,10 +211,33 @@ func Flags(opts *Options) []cli.Flag {
return flags return flags
} }
// An Installer is used to install the NVIDIA Container Toolkit from the toolkit container.
type Installer struct {
fileInstaller
// toolkitRoot specifies the destination path at which the toolkit is installed.
toolkitRoot string
}
// NewInstaller creates an installer for the NVIDIA Container Toolkit.
func NewInstaller(opts ...Option) *Installer {
i := &Installer{}
for _, opt := range opts {
opt(i)
}
if i.logger == nil {
i.logger = logger.New()
}
return i
}
// ValidateOptions checks whether the specified options are valid // ValidateOptions checks whether the specified options are valid
func ValidateOptions(opts *Options, toolkitRoot string) error { func (t *Installer) ValidateOptions(opts *Options) error {
if toolkitRoot == "" { if t == nil {
return fmt.Errorf("invalid --toolkit-root option: %v", toolkitRoot) return fmt.Errorf("toolkit installer is not initilized")
}
if t.toolkitRoot == "" {
return fmt.Errorf("invalid --toolkit-root option: %v", t.toolkitRoot)
} }
vendor, class := parser.ParseQualifier(opts.cdiKind) vendor, class := parser.ParseQualifier(opts.cdiKind)
@ -232,7 +251,7 @@ func ValidateOptions(opts *Options, toolkitRoot string) error {
opts.cdiClass = class opts.cdiClass = class
if opts.cdiEnabled && opts.cdiOutputDir == "" { if opts.cdiEnabled && opts.cdiOutputDir == "" {
log.Warning("Skipping CDI spec generation (no output directory specified)") t.logger.Warning("Skipping CDI spec generation (no output directory specified)")
opts.cdiEnabled = false opts.cdiEnabled = false
} }
@ -247,7 +266,7 @@ func ValidateOptions(opts *Options, toolkitRoot string) error {
} }
} }
if !opts.cdiEnabled && !isDisabled { if !opts.cdiEnabled && !isDisabled {
log.Info("disabling device node creation since --cdi-enabled=false") t.logger.Info("disabling device node creation since --cdi-enabled=false")
isDisabled = true isDisabled = true
} }
if isDisabled { if isDisabled {
@ -257,118 +276,93 @@ func ValidateOptions(opts *Options, toolkitRoot string) error {
return nil return nil
} }
// TryDelete attempts to remove the specified toolkit folder.
// A toolkit.pid file -- if present -- is skipped.
func TryDelete(cli *cli.Context, toolkitRoot string) error {
log.Infof("Attempting to delete NVIDIA container toolkit from '%v'", toolkitRoot)
contents, err := os.ReadDir(toolkitRoot)
if err != nil && errors.Is(err, os.ErrNotExist) {
return nil
} else if err != nil {
return fmt.Errorf("failed to read the contents of %v: %w", toolkitRoot, err)
}
for _, content := range contents {
if content.Name() == toolkitPidFilename {
continue
}
name := filepath.Join(toolkitRoot, content.Name())
if err := os.RemoveAll(name); err != nil {
log.Warningf("could not remove %v: %v", name, err)
}
}
if err := os.RemoveAll(toolkitRoot); err != nil {
log.Warningf("could not remove %v: %v", toolkitRoot, err)
}
return nil
}
// Install installs the components of the NVIDIA container toolkit. // Install installs the components of the NVIDIA container toolkit.
// The specified sourceRoot is searched for the components to install.
// Any existing installation is removed. // Any existing installation is removed.
func Install(cli *cli.Context, opts *Options, sourceRoot string, toolkitRoot string) error { func (t *Installer) Install(cli *cli.Context, opts *Options) error {
log.Infof("Installing NVIDIA container toolkit to '%v'", toolkitRoot) if t == nil {
return fmt.Errorf("toolkit installer is not initilized")
}
t.logger.Infof("Installing NVIDIA container toolkit to '%v'", t.toolkitRoot)
log.Infof("Removing existing NVIDIA container toolkit installation") t.logger.Infof("Removing existing NVIDIA container toolkit installation")
err := os.RemoveAll(toolkitRoot) err := os.RemoveAll(t.toolkitRoot)
if err != nil && !opts.ignoreErrors { if err != nil && !opts.ignoreErrors {
return fmt.Errorf("error removing toolkit directory: %v", err) return fmt.Errorf("error removing toolkit directory: %v", err)
} else if err != nil { } else if err != nil {
log.Errorf("Ignoring error: %v", fmt.Errorf("error removing toolkit directory: %v", err)) t.logger.Errorf("Ignoring error: %v", fmt.Errorf("error removing toolkit directory: %v", err))
} }
toolkitConfigDir := filepath.Join(toolkitRoot, ".config", "nvidia-container-runtime") toolkitConfigDir := filepath.Join(t.toolkitRoot, ".config", "nvidia-container-runtime")
toolkitConfigPath := filepath.Join(toolkitConfigDir, configFilename) toolkitConfigPath := filepath.Join(toolkitConfigDir, configFilename)
err = createDirectories(toolkitRoot, toolkitConfigDir) err = t.createDirectories(t.toolkitRoot, toolkitConfigDir)
if err != nil && !opts.ignoreErrors { if err != nil && !opts.ignoreErrors {
return fmt.Errorf("could not create required directories: %v", err) return fmt.Errorf("could not create required directories: %v", err)
} else if err != nil { } else if err != nil {
log.Errorf("Ignoring error: %v", fmt.Errorf("could not create required directories: %v", err)) t.logger.Errorf("Ignoring error: %v", fmt.Errorf("could not create required directories: %v", err))
} }
err = installContainerLibraries(sourceRoot, toolkitRoot) err = t.installContainerLibraries(t.toolkitRoot)
if err != nil && !opts.ignoreErrors { if err != nil && !opts.ignoreErrors {
return fmt.Errorf("error installing NVIDIA container library: %v", err) return fmt.Errorf("error installing NVIDIA container library: %v", err)
} else if err != nil { } else if err != nil {
log.Errorf("Ignoring error: %v", fmt.Errorf("error installing NVIDIA container library: %v", err)) t.logger.Errorf("Ignoring error: %v", fmt.Errorf("error installing NVIDIA container library: %v", err))
} }
err = installContainerRuntimes(sourceRoot, toolkitRoot) err = t.installContainerRuntimes(t.toolkitRoot)
if err != nil && !opts.ignoreErrors { if err != nil && !opts.ignoreErrors {
return fmt.Errorf("error installing NVIDIA container runtime: %v", err) return fmt.Errorf("error installing NVIDIA container runtime: %v", err)
} else if err != nil { } else if err != nil {
log.Errorf("Ignoring error: %v", fmt.Errorf("error installing NVIDIA container runtime: %v", err)) t.logger.Errorf("Ignoring error: %v", fmt.Errorf("error installing NVIDIA container runtime: %v", err))
} }
nvidiaContainerCliExecutable, err := installContainerCLI(sourceRoot, toolkitRoot) nvidiaContainerCliExecutable, err := t.installContainerCLI(t.toolkitRoot)
if err != nil && !opts.ignoreErrors { if err != nil && !opts.ignoreErrors {
return fmt.Errorf("error installing NVIDIA container CLI: %v", err) return fmt.Errorf("error installing NVIDIA container CLI: %v", err)
} else if err != nil { } else if err != nil {
log.Errorf("Ignoring error: %v", fmt.Errorf("error installing NVIDIA container CLI: %v", err)) t.logger.Errorf("Ignoring error: %v", fmt.Errorf("error installing NVIDIA container CLI: %v", err))
} }
nvidiaContainerRuntimeHookPath, err := installRuntimeHook(sourceRoot, toolkitRoot, toolkitConfigPath) nvidiaContainerRuntimeHookPath, err := t.installRuntimeHook(t.toolkitRoot, toolkitConfigPath)
if err != nil && !opts.ignoreErrors { if err != nil && !opts.ignoreErrors {
return fmt.Errorf("error installing NVIDIA container runtime hook: %v", err) return fmt.Errorf("error installing NVIDIA container runtime hook: %v", err)
} else if err != nil { } else if err != nil {
log.Errorf("Ignoring error: %v", fmt.Errorf("error installing NVIDIA container runtime hook: %v", err)) t.logger.Errorf("Ignoring error: %v", fmt.Errorf("error installing NVIDIA container runtime hook: %v", err))
} }
nvidiaCTKPath, err := installContainerToolkitCLI(sourceRoot, toolkitRoot) nvidiaCTKPath, err := t.installContainerToolkitCLI(t.toolkitRoot)
if err != nil && !opts.ignoreErrors { if err != nil && !opts.ignoreErrors {
return fmt.Errorf("error installing NVIDIA Container Toolkit CLI: %v", err) return fmt.Errorf("error installing NVIDIA Container Toolkit CLI: %v", err)
} else if err != nil { } else if err != nil {
log.Errorf("Ignoring error: %v", fmt.Errorf("error installing NVIDIA Container Toolkit CLI: %v", err)) t.logger.Errorf("Ignoring error: %v", fmt.Errorf("error installing NVIDIA Container Toolkit CLI: %v", err))
} }
nvidiaCDIHookPath, err := installContainerCDIHookCLI(sourceRoot, toolkitRoot) nvidiaCDIHookPath, err := t.installContainerCDIHookCLI(t.toolkitRoot)
if err != nil && !opts.ignoreErrors { if err != nil && !opts.ignoreErrors {
return fmt.Errorf("error installing NVIDIA Container CDI Hook CLI: %v", err) return fmt.Errorf("error installing NVIDIA Container CDI Hook CLI: %v", err)
} else if err != nil { } else if err != nil {
log.Errorf("Ignoring error: %v", fmt.Errorf("error installing NVIDIA Container CDI Hook CLI: %v", err)) t.logger.Errorf("Ignoring error: %v", fmt.Errorf("error installing NVIDIA Container CDI Hook CLI: %v", err))
} }
err = installToolkitConfig(cli, toolkitConfigPath, nvidiaContainerCliExecutable, nvidiaCTKPath, nvidiaContainerRuntimeHookPath, opts) err = t.installToolkitConfig(cli, toolkitConfigPath, nvidiaContainerCliExecutable, nvidiaCTKPath, nvidiaContainerRuntimeHookPath, opts)
if err != nil && !opts.ignoreErrors { if err != nil && !opts.ignoreErrors {
return fmt.Errorf("error installing NVIDIA container toolkit config: %v", err) return fmt.Errorf("error installing NVIDIA container toolkit config: %v", err)
} else if err != nil { } else if err != nil {
log.Errorf("Ignoring error: %v", fmt.Errorf("error installing NVIDIA container toolkit config: %v", err)) t.logger.Errorf("Ignoring error: %v", fmt.Errorf("error installing NVIDIA container toolkit config: %v", err))
} }
err = createDeviceNodes(opts) err = t.createDeviceNodes(opts)
if err != nil && !opts.ignoreErrors { if err != nil && !opts.ignoreErrors {
return fmt.Errorf("error creating device nodes: %v", err) return fmt.Errorf("error creating device nodes: %v", err)
} else if err != nil { } else if err != nil {
log.Errorf("Ignoring error: %v", fmt.Errorf("error creating device nodes: %v", err)) t.logger.Errorf("Ignoring error: %v", fmt.Errorf("error creating device nodes: %v", err))
} }
err = generateCDISpec(opts, nvidiaCDIHookPath) err = t.generateCDISpec(opts, nvidiaCDIHookPath)
if err != nil && !opts.ignoreErrors { if err != nil && !opts.ignoreErrors {
return fmt.Errorf("error generating CDI specification: %v", err) return fmt.Errorf("error generating CDI specification: %v", err)
} else if err != nil { } else if err != nil {
log.Errorf("Ignoring error: %v", fmt.Errorf("error generating CDI specification: %v", err)) t.logger.Errorf("Ignoring error: %v", fmt.Errorf("error generating CDI specification: %v", err))
} }
return nil return nil
@ -379,8 +373,8 @@ func Install(cli *cli.Context, opts *Options, sourceRoot string, toolkitRoot str
// A predefined set of library candidates are considered, with the first one // A predefined set of library candidates are considered, with the first one
// resulting in success being installed to the toolkit folder. The install process // resulting in success being installed to the toolkit folder. The install process
// resolves the symlink for the library and copies the versioned library itself. // resolves the symlink for the library and copies the versioned library itself.
func installContainerLibraries(sourceRoot string, toolkitRoot string) error { func (t *Installer) installContainerLibraries(toolkitRoot string) error {
log.Infof("Installing NVIDIA container library to '%v'", toolkitRoot) t.logger.Infof("Installing NVIDIA container library to '%v'", toolkitRoot)
libs := []string{ libs := []string{
"libnvidia-container.so.1", "libnvidia-container.so.1",
@ -388,7 +382,7 @@ func installContainerLibraries(sourceRoot string, toolkitRoot string) error {
} }
for _, l := range libs { for _, l := range libs {
err := installLibrary(l, sourceRoot, toolkitRoot) err := t.installLibrary(l, toolkitRoot)
if err != nil { if err != nil {
return fmt.Errorf("failed to install %s: %v", l, err) return fmt.Errorf("failed to install %s: %v", l, err)
} }
@ -398,23 +392,23 @@ func installContainerLibraries(sourceRoot string, toolkitRoot string) error {
} }
// installLibrary installs the specified library to the toolkit directory. // installLibrary installs the specified library to the toolkit directory.
func installLibrary(libName string, sourceRoot string, toolkitRoot string) error { func (t *Installer) installLibrary(libName string, toolkitRoot string) error {
libraryPath, err := findLibrary(sourceRoot, libName) libraryPath, err := t.findLibrary(libName)
if err != nil { if err != nil {
return fmt.Errorf("error locating NVIDIA container library: %v", err) return fmt.Errorf("error locating NVIDIA container library: %v", err)
} }
installedLibPath, err := installFileToFolder(toolkitRoot, libraryPath) installedLibPath, err := t.installFileToFolder(toolkitRoot, libraryPath)
if err != nil { if err != nil {
return fmt.Errorf("error installing %v to %v: %v", libraryPath, toolkitRoot, err) return fmt.Errorf("error installing %v to %v: %v", libraryPath, toolkitRoot, err)
} }
log.Infof("Installed '%v' to '%v'", libraryPath, installedLibPath) t.logger.Infof("Installed '%v' to '%v'", libraryPath, installedLibPath)
if filepath.Base(installedLibPath) == libName { if filepath.Base(installedLibPath) == libName {
return nil return nil
} }
err = installSymlink(toolkitRoot, libName, installedLibPath) err = t.installSymlink(toolkitRoot, libName, installedLibPath)
if err != nil { if err != nil {
return fmt.Errorf("error installing symlink for NVIDIA container library: %v", err) return fmt.Errorf("error installing symlink for NVIDIA container library: %v", err)
} }
@ -424,8 +418,8 @@ func installLibrary(libName string, sourceRoot string, toolkitRoot string) error
// installToolkitConfig installs the config file for the NVIDIA container toolkit ensuring // installToolkitConfig installs the config file for the NVIDIA container toolkit ensuring
// that the settings are updated to match the desired install and nvidia driver directories. // that the settings are updated to match the desired install and nvidia driver directories.
func installToolkitConfig(c *cli.Context, toolkitConfigPath string, nvidiaContainerCliExecutablePath string, nvidiaCTKPath string, nvidaContainerRuntimeHookPath string, opts *Options) error { func (t *Installer) installToolkitConfig(c *cli.Context, toolkitConfigPath string, nvidiaContainerCliExecutablePath string, nvidiaCTKPath string, nvidaContainerRuntimeHookPath string, opts *Options) error {
log.Infof("Installing NVIDIA container toolkit config '%v'", toolkitConfigPath) t.logger.Infof("Installing NVIDIA container toolkit config '%v'", toolkitConfigPath)
cfg, err := config.New( cfg, err := config.New(
config.WithConfigFile(nvidiaContainerToolkitConfigSource), config.WithConfigFile(nvidiaContainerToolkitConfigSource),
@ -487,11 +481,11 @@ func installToolkitConfig(c *cli.Context, toolkitConfigPath string, nvidiaContai
for key, value := range optionalConfigValues { for key, value := range optionalConfigValues {
if !c.IsSet(key) { if !c.IsSet(key) {
log.Infof("Skipping unset option: %v", key) t.logger.Infof("Skipping unset option: %v", key)
continue continue
} }
if value == nil { if value == nil {
log.Infof("Skipping option with nil value: %v", key) t.logger.Infof("Skipping option with nil value: %v", key)
continue continue
} }
@ -506,7 +500,7 @@ func installToolkitConfig(c *cli.Context, toolkitConfigPath string, nvidiaContai
} }
value = v.Value() value = v.Value()
default: default:
log.Warningf("Unexpected type for option %v=%v: %T", key, value, v) t.logger.Warningf("Unexpected type for option %v=%v: %T", key, value, v)
} }
cfg.Set(key, value) cfg.Set(key, value)
@ -518,16 +512,17 @@ func installToolkitConfig(c *cli.Context, toolkitConfigPath string, nvidiaContai
os.Stdout.WriteString("Using config:\n") os.Stdout.WriteString("Using config:\n")
if _, err = cfg.WriteTo(os.Stdout); err != nil { if _, err = cfg.WriteTo(os.Stdout); err != nil {
log.Warningf("Failed to output config to STDOUT: %v", err) t.logger.Warningf("Failed to output config to STDOUT: %v", err)
} }
return nil return nil
} }
// installContainerToolkitCLI installs the nvidia-ctk CLI executable and wrapper. // installContainerToolkitCLI installs the nvidia-ctk CLI executable and wrapper.
func installContainerToolkitCLI(sourceRoot string, toolkitDir string) (string, error) { func (t *Installer) installContainerToolkitCLI(toolkitDir string) (string, error) {
e := executable{ e := executable{
source: filepath.Join(sourceRoot, "/usr/bin/nvidia-ctk"), fileInstaller: t.fileInstaller,
source: "/usr/bin/nvidia-ctk",
target: executableTarget{ target: executableTarget{
dotfileName: "nvidia-ctk.real", dotfileName: "nvidia-ctk.real",
wrapperName: "nvidia-ctk", wrapperName: "nvidia-ctk",
@ -538,9 +533,10 @@ func installContainerToolkitCLI(sourceRoot string, toolkitDir string) (string, e
} }
// installContainerCDIHookCLI installs the nvidia-cdi-hook CLI executable and wrapper. // installContainerCDIHookCLI installs the nvidia-cdi-hook CLI executable and wrapper.
func installContainerCDIHookCLI(sourceRoot string, toolkitDir string) (string, error) { func (t *Installer) installContainerCDIHookCLI(toolkitDir string) (string, error) {
e := executable{ e := executable{
source: filepath.Join(sourceRoot, "/usr/bin/nvidia-cdi-hook"), fileInstaller: t.fileInstaller,
source: "/usr/bin/nvidia-cdi-hook",
target: executableTarget{ target: executableTarget{
dotfileName: "nvidia-cdi-hook.real", dotfileName: "nvidia-cdi-hook.real",
wrapperName: "nvidia-cdi-hook", wrapperName: "nvidia-cdi-hook",
@ -552,15 +548,16 @@ func installContainerCDIHookCLI(sourceRoot string, toolkitDir string) (string, e
// installContainerCLI sets up the NVIDIA container CLI executable, copying the executable // installContainerCLI sets up the NVIDIA container CLI executable, copying the executable
// and implementing the required wrapper // and implementing the required wrapper
func installContainerCLI(sourceRoot string, toolkitRoot string) (string, error) { func (t *Installer) installContainerCLI(toolkitRoot string) (string, error) {
log.Infof("Installing NVIDIA container CLI from '%v'", nvidiaContainerCliSource) t.logger.Infof("Installing NVIDIA container CLI from '%v'", nvidiaContainerCliSource)
env := map[string]string{ env := map[string]string{
"LD_LIBRARY_PATH": toolkitRoot, "LD_LIBRARY_PATH": toolkitRoot,
} }
e := executable{ e := executable{
source: filepath.Join(sourceRoot, nvidiaContainerCliSource), fileInstaller: t.fileInstaller,
source: nvidiaContainerCliSource,
target: executableTarget{ target: executableTarget{
dotfileName: "nvidia-container-cli.real", dotfileName: "nvidia-container-cli.real",
wrapperName: "nvidia-container-cli", wrapperName: "nvidia-container-cli",
@ -577,15 +574,16 @@ func installContainerCLI(sourceRoot string, toolkitRoot string) (string, error)
// installRuntimeHook sets up the NVIDIA runtime hook, copying the executable // installRuntimeHook sets up the NVIDIA runtime hook, copying the executable
// and implementing the required wrapper // and implementing the required wrapper
func installRuntimeHook(sourceRoot string, toolkitRoot string, configFilePath string) (string, error) { func (t *Installer) installRuntimeHook(toolkitRoot string, configFilePath string) (string, error) {
log.Infof("Installing NVIDIA container runtime hook from '%v'", nvidiaContainerRuntimeHookSource) t.logger.Infof("Installing NVIDIA container runtime hook from '%v'", nvidiaContainerRuntimeHookSource)
argLines := []string{ argLines := []string{
fmt.Sprintf("-config \"%s\"", configFilePath), fmt.Sprintf("-config \"%s\"", configFilePath),
} }
e := executable{ e := executable{
source: filepath.Join(sourceRoot, nvidiaContainerRuntimeHookSource), fileInstaller: t.fileInstaller,
source: nvidiaContainerRuntimeHookSource,
target: executableTarget{ target: executableTarget{
dotfileName: "nvidia-container-runtime-hook.real", dotfileName: "nvidia-container-runtime-hook.real",
wrapperName: "nvidia-container-runtime-hook", wrapperName: "nvidia-container-runtime-hook",
@ -598,7 +596,7 @@ func installRuntimeHook(sourceRoot string, toolkitRoot string, configFilePath st
return "", fmt.Errorf("error installing NVIDIA container runtime hook: %v", err) return "", fmt.Errorf("error installing NVIDIA container runtime hook: %v", err)
} }
err = installSymlink(toolkitRoot, "nvidia-container-toolkit", installedPath) err = t.installSymlink(toolkitRoot, "nvidia-container-toolkit", installedPath)
if err != nil { if err != nil {
return "", fmt.Errorf("error installing symlink to NVIDIA container runtime hook: %v", err) return "", fmt.Errorf("error installing symlink to NVIDIA container runtime hook: %v", err)
} }
@ -608,10 +606,10 @@ func installRuntimeHook(sourceRoot string, toolkitRoot string, configFilePath st
// installSymlink creates a symlink in the toolkitDirectory that points to the specified target. // installSymlink creates a symlink in the toolkitDirectory that points to the specified target.
// Note: The target is assumed to be local to the toolkit directory // Note: The target is assumed to be local to the toolkit directory
func installSymlink(toolkitRoot string, link string, target string) error { func (t *Installer) installSymlink(toolkitRoot string, link string, target string) error {
symlinkPath := filepath.Join(toolkitRoot, link) symlinkPath := filepath.Join(toolkitRoot, link)
targetPath := filepath.Base(target) targetPath := filepath.Base(target)
log.Infof("Creating symlink '%v' -> '%v'", symlinkPath, targetPath) t.logger.Infof("Creating symlink '%v' -> '%v'", symlinkPath, targetPath)
err := os.Symlink(targetPath, symlinkPath) err := os.Symlink(targetPath, symlinkPath)
if err != nil { if err != nil {
@ -620,72 +618,10 @@ func installSymlink(toolkitRoot string, link string, target string) error {
return nil return nil
} }
// installFileToFolder copies a source file to a destination folder.
// The path of the input file is ignored.
// e.g. installFileToFolder("/some/path/file.txt", "/output/path")
// will result in a file "/output/path/file.txt" being generated
func installFileToFolder(destFolder string, src string) (string, error) {
name := filepath.Base(src)
return installFileToFolderWithName(destFolder, name, src)
}
// cp src destFolder/name
func installFileToFolderWithName(destFolder string, name, src string) (string, error) {
dest := filepath.Join(destFolder, name)
err := installFile(dest, src)
if err != nil {
return "", fmt.Errorf("error copying '%v' to '%v': %v", src, dest, err)
}
return dest, nil
}
// installFile copies a file from src to dest and maintains
// file modes
func installFile(dest string, src string) error {
log.Infof("Installing '%v' to '%v'", src, dest)
source, err := os.Open(src)
if err != nil {
return fmt.Errorf("error opening source: %v", err)
}
defer source.Close()
destination, err := os.Create(dest)
if err != nil {
return fmt.Errorf("error creating destination: %v", err)
}
defer destination.Close()
_, err = io.Copy(destination, source)
if err != nil {
return fmt.Errorf("error copying file: %v", err)
}
err = applyModeFromSource(dest, src)
if err != nil {
return fmt.Errorf("error setting destination file mode: %v", err)
}
return nil
}
// applyModeFromSource sets the file mode for a destination file
// to match that of a specified source file
func applyModeFromSource(dest string, src string) error {
sourceInfo, err := os.Stat(src)
if err != nil {
return fmt.Errorf("error getting file info for '%v': %v", src, err)
}
err = os.Chmod(dest, sourceInfo.Mode())
if err != nil {
return fmt.Errorf("error setting mode for '%v': %v", dest, err)
}
return nil
}
// findLibrary searches a set of candidate libraries in the specified root for // findLibrary searches a set of candidate libraries in the specified root for
// a given library name // a given library name
func findLibrary(root string, libName string) (string, error) { func (t *Installer) findLibrary(libName string) (string, error) {
log.Infof("Finding library %v (root=%v)", libName, root) t.logger.Infof("Finding library %v (root=%v)", libName)
candidateDirs := []string{ candidateDirs := []string{
"/usr/lib64", "/usr/lib64",
@ -694,16 +630,16 @@ func findLibrary(root string, libName string) (string, error) {
} }
for _, d := range candidateDirs { for _, d := range candidateDirs {
l := filepath.Join(root, d, libName) l := filepath.Join(t.sourceRoot, d, libName)
log.Infof("Checking library candidate '%v'", l) t.logger.Infof("Checking library candidate '%v'", l)
libraryCandidate, err := resolveLink(l) libraryCandidate, err := t.resolveLink(l)
if err != nil { if err != nil {
log.Infof("Skipping library candidate '%v': %v", l, err) t.logger.Infof("Skipping library candidate '%v': %v", l, err)
continue continue
} }
return libraryCandidate, nil return strings.TrimPrefix(libraryCandidate, t.sourceRoot), nil
} }
return "", fmt.Errorf("error locating library '%v'", libName) return "", fmt.Errorf("error locating library '%v'", libName)
@ -712,20 +648,20 @@ func findLibrary(root string, libName string) (string, error) {
// resolveLink finds the target of a symlink or the file itself in the // resolveLink finds the target of a symlink or the file itself in the
// case of a regular file. // case of a regular file.
// This is equivalent to running `readlink -f ${l}` // This is equivalent to running `readlink -f ${l}`
func resolveLink(l string) (string, error) { func (t *Installer) resolveLink(l string) (string, error) {
resolved, err := filepath.EvalSymlinks(l) resolved, err := filepath.EvalSymlinks(l)
if err != nil { if err != nil {
return "", fmt.Errorf("error resolving link '%v': %v", l, err) return "", fmt.Errorf("error resolving link '%v': %v", l, err)
} }
if l != resolved { if l != resolved {
log.Infof("Resolved link: '%v' => '%v'", l, resolved) t.logger.Infof("Resolved link: '%v' => '%v'", l, resolved)
} }
return resolved, nil return resolved, nil
} }
func createDirectories(dir ...string) error { func (t *Installer) createDirectories(dir ...string) error {
for _, d := range dir { for _, d := range dir {
log.Infof("Creating directory '%v'", d) t.logger.Infof("Creating directory '%v'", d)
err := os.MkdirAll(d, 0755) err := os.MkdirAll(d, 0755)
if err != nil { if err != nil {
return fmt.Errorf("error creating directory: %v", err) return fmt.Errorf("error creating directory: %v", err)
@ -734,7 +670,7 @@ func createDirectories(dir ...string) error {
return nil return nil
} }
func createDeviceNodes(opts *Options) error { func (t *Installer) createDeviceNodes(opts *Options) error {
modes := opts.createDeviceNodes.Value() modes := opts.createDeviceNodes.Value()
if len(modes) == 0 { if len(modes) == 0 {
return nil return nil
@ -748,9 +684,9 @@ func createDeviceNodes(opts *Options) error {
} }
for _, mode := range modes { for _, mode := range modes {
log.Infof("Creating %v device nodes at %v", mode, opts.DevRootCtrPath) t.logger.Infof("Creating %v device nodes at %v", mode, opts.DevRootCtrPath)
if mode != "control" { if mode != "control" {
log.Warningf("Unrecognised device mode: %v", mode) t.logger.Warningf("Unrecognised device mode: %v", mode)
continue continue
} }
if err := devices.CreateNVIDIAControlDevices(); err != nil { if err := devices.CreateNVIDIAControlDevices(); err != nil {
@ -761,12 +697,13 @@ func createDeviceNodes(opts *Options) error {
} }
// generateCDISpec generates a CDI spec for use in management containers // generateCDISpec generates a CDI spec for use in management containers
func generateCDISpec(opts *Options, nvidiaCDIHookPath string) error { func (t *Installer) generateCDISpec(opts *Options, nvidiaCDIHookPath string) error {
if !opts.cdiEnabled { if !opts.cdiEnabled {
return nil return nil
} }
log.Info("Generating CDI spec for management containers") t.logger.Info("Generating CDI spec for management containers")
cdilib, err := nvcdi.New( cdilib, err := nvcdi.New(
nvcdi.WithLogger(t.logger),
nvcdi.WithMode(nvcdi.ModeManagement), nvcdi.WithMode(nvcdi.ModeManagement),
nvcdi.WithDriverRoot(opts.DriverRootCtrPath), nvcdi.WithDriverRoot(opts.DriverRootCtrPath),
nvcdi.WithDevRoot(opts.DevRootCtrPath), nvcdi.WithDevRoot(opts.DevRootCtrPath),

View File

@ -23,6 +23,7 @@ import (
"strings" "strings"
"testing" "testing"
testlog "github.com/sirupsen/logrus/hooks/test"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
"github.com/urfave/cli/v2" "github.com/urfave/cli/v2"
@ -33,6 +34,7 @@ import (
func TestInstall(t *testing.T) { func TestInstall(t *testing.T) {
t.Setenv("__NVCT_TESTING_DEVICES_ARE_FILES", "true") t.Setenv("__NVCT_TESTING_DEVICES_ARE_FILES", "true")
logger, _ := testlog.NewNullLogger()
moduleRoot, err := test.GetModuleRoot() moduleRoot, err := test.GetModuleRoot()
require.NoError(t, err) require.NoError(t, err)
@ -127,9 +129,14 @@ kind: example.com/class
cdiKind: "example.com/class", cdiKind: "example.com/class",
} }
require.NoError(t, ValidateOptions(&options, toolkitRoot)) ti := NewInstaller(
WithLogger(logger),
WithToolkitRoot(toolkitRoot),
WithSourceRoot(sourceRoot),
)
require.NoError(t, ti.ValidateOptions(&options))
err := Install(&cli.Context{}, &options, sourceRoot, toolkitRoot) err := ti.Install(&cli.Context{}, &options)
if tc.expectedError == nil { if tc.expectedError == nil {
require.NoError(t, err) require.NoError(t, err)
} else { } else {

View File

@ -8,12 +8,12 @@ import (
"strings" "strings"
"syscall" "syscall"
log "github.com/sirupsen/logrus"
"github.com/urfave/cli/v2" "github.com/urfave/cli/v2"
"golang.org/x/sys/unix" "golang.org/x/sys/unix"
"github.com/NVIDIA/nvidia-container-toolkit/tools/container/runtime" "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk-installer/container/runtime"
"github.com/NVIDIA/nvidia-container-toolkit/tools/container/toolkit" "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk-installer/container/toolkit"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
) )
const ( const (
@ -51,12 +51,46 @@ func (o options) toolkitRoot() string {
var Version = "development" var Version = "development"
func main() { func main() {
remainingArgs, root, err := ParseArgs(os.Args) logger := logger.New()
remainingArgs, root, err := ParseArgs(logger, os.Args)
if err != nil { if err != nil {
log.Errorf("Error: unable to parse arguments: %v", err) logger.Errorf("Error: unable to parse arguments: %v", err)
os.Exit(1) os.Exit(1)
} }
c := NewApp(logger, root)
// Run the CLI
logger.Infof("Starting %v", c.Name)
if err := c.Run(remainingArgs); err != nil {
logger.Errorf("error running nvidia-toolkit: %v", err)
os.Exit(1)
}
logger.Infof("Completed %v", c.Name)
}
// An app represents the nvidia-ctk-installer.
type app struct {
logger logger.Interface
// defaultRoot stores the root to use if the --root flag is not specified.
defaultRoot string
toolkit *toolkit.Installer
}
// NewApp creates the CLI app fro the specified options.
// defaultRoot is used as the root if not specified via the --root flag.
func NewApp(logger logger.Interface, defaultRoot string) *cli.App {
a := app{
logger: logger,
defaultRoot: defaultRoot,
}
return a.build()
}
func (a app) build() *cli.App {
options := options{ options := options{
toolkitOptions: toolkit.Options{}, toolkitOptions: toolkit.Options{},
} }
@ -68,10 +102,10 @@ func main() {
c.Description = "DESTINATION points to the host path underneath which the nvidia-container-toolkit should be installed.\nIt will be installed at ${DESTINATION}/toolkit" c.Description = "DESTINATION points to the host path underneath which the nvidia-container-toolkit should be installed.\nIt will be installed at ${DESTINATION}/toolkit"
c.Version = Version c.Version = Version
c.Before = func(ctx *cli.Context) error { c.Before = func(ctx *cli.Context) error {
return validateFlags(ctx, &options) return a.Before(ctx, &options)
} }
c.Action = func(ctx *cli.Context) error { c.Action = func(ctx *cli.Context) error {
return Run(ctx, &options) return a.Run(ctx, &options)
} }
// Setup flags for the CLI // Setup flags for the CLI
@ -102,7 +136,7 @@ func main() {
}, },
&cli.StringFlag{ &cli.StringFlag{
Name: "root", Name: "root",
Value: root, Value: a.defaultRoot,
Usage: "the folder where the NVIDIA Container Toolkit is to be installed. It will be installed to `ROOT`/toolkit", Usage: "the folder where the NVIDIA Container Toolkit is to be installed. It will be installed to `ROOT`/toolkit",
Destination: &options.root, Destination: &options.root,
EnvVars: []string{"ROOT"}, EnvVars: []string{"ROOT"},
@ -119,21 +153,29 @@ func main() {
c.Flags = append(c.Flags, toolkit.Flags(&options.toolkitOptions)...) c.Flags = append(c.Flags, toolkit.Flags(&options.toolkitOptions)...)
c.Flags = append(c.Flags, runtime.Flags(&options.runtimeOptions)...) c.Flags = append(c.Flags, runtime.Flags(&options.runtimeOptions)...)
// Run the CLI return c
log.Infof("Starting %v", c.Name)
if err := c.Run(remainingArgs); err != nil {
log.Errorf("error running nvidia-toolkit: %v", err)
os.Exit(1)
} }
log.Infof("Completed %v", c.Name) func (a *app) Before(c *cli.Context, o *options) error {
a.toolkit = toolkit.NewInstaller(
toolkit.WithLogger(a.logger),
toolkit.WithToolkitRoot(o.toolkitRoot()),
)
return a.validateFlags(c, o)
} }
func validateFlags(_ *cli.Context, o *options) error { func (a *app) validateFlags(_ *cli.Context, o *options) error {
if o.root == "" {
return fmt.Errorf("the install root must be specified")
}
if _, exists := availableRuntimes[o.runtime]; !exists {
return fmt.Errorf("unknown runtime: %v", o.runtime)
}
if filepath.Base(o.pidFile) != toolkitPidFilename { if filepath.Base(o.pidFile) != toolkitPidFilename {
return fmt.Errorf("invalid toolkit.pid path %v", o.pidFile) return fmt.Errorf("invalid toolkit.pid path %v", o.pidFile)
} }
if err := toolkit.ValidateOptions(&o.toolkitOptions, o.toolkitRoot()); err != nil {
if err := a.toolkit.ValidateOptions(&o.toolkitOptions); err != nil {
return err return err
} }
if err := runtime.ValidateOptions(&o.runtimeOptions, o.runtime, o.toolkitRoot()); err != nil { if err := runtime.ValidateOptions(&o.runtimeOptions, o.runtime, o.toolkitRoot()); err != nil {
@ -142,18 +184,15 @@ func validateFlags(_ *cli.Context, o *options) error {
return nil return nil
} }
// Run runs the core logic of the CLI // Run installs the NVIDIA Container Toolkit and updates the requested runtime.
func Run(c *cli.Context, o *options) error { // If the application is run as a daemon, the application waits and unconfigures
err := verifyFlags(o) // the runtime on termination.
if err != nil { func (a *app) Run(c *cli.Context, o *options) error {
return fmt.Errorf("unable to verify flags: %v", err) err := a.initialize(o.pidFile)
}
err = initialize(o.pidFile)
if err != nil { if err != nil {
return fmt.Errorf("unable to initialize: %v", err) return fmt.Errorf("unable to initialize: %v", err)
} }
defer shutdown(o.pidFile) defer a.shutdown(o.pidFile)
if len(o.toolkitOptions.ContainerRuntimeRuntimes.Value()) == 0 { if len(o.toolkitOptions.ContainerRuntimeRuntimes.Value()) == 0 {
lowlevelRuntimePaths, err := runtime.GetLowlevelRuntimePaths(&o.runtimeOptions, o.runtime) lowlevelRuntimePaths, err := runtime.GetLowlevelRuntimePaths(&o.runtimeOptions, o.runtime)
@ -164,7 +203,8 @@ func Run(c *cli.Context, o *options) error {
o.toolkitOptions.ContainerRuntimeRuntimes = *cli.NewStringSlice(lowlevelRuntimePaths...) o.toolkitOptions.ContainerRuntimeRuntimes = *cli.NewStringSlice(lowlevelRuntimePaths...)
} }
err = toolkit.Install(c, &o.toolkitOptions, "", o.toolkitRoot())
err = a.toolkit.Install(c, &o.toolkitOptions)
if err != nil { if err != nil {
return fmt.Errorf("unable to install toolkit: %v", err) return fmt.Errorf("unable to install toolkit: %v", err)
} }
@ -175,7 +215,7 @@ func Run(c *cli.Context, o *options) error {
} }
if !o.noDaemon { if !o.noDaemon {
err = waitForSignal() err = a.waitForSignal()
if err != nil { if err != nil {
return fmt.Errorf("unable to wait for signal: %v", err) return fmt.Errorf("unable to wait for signal: %v", err)
} }
@ -191,8 +231,8 @@ func Run(c *cli.Context, o *options) error {
// ParseArgs checks if a single positional argument was defined and extracts this the root. // ParseArgs checks if a single positional argument was defined and extracts this the root.
// If no positional arguments are defined, it is assumed that the root is specified as a flag. // If no positional arguments are defined, it is assumed that the root is specified as a flag.
func ParseArgs(args []string) ([]string, string, error) { func ParseArgs(logger logger.Interface, args []string) ([]string, string, error) {
log.Infof("Parsing arguments") logger.Infof("Parsing arguments")
if len(args) < 2 { if len(args) < 2 {
return args, "", nil return args, "", nil
@ -217,20 +257,8 @@ func ParseArgs(args []string) ([]string, string, error) {
return nil, "", fmt.Errorf("unexpected positional argument(s) %v", args[2:lastPositionalArg+1]) return nil, "", fmt.Errorf("unexpected positional argument(s) %v", args[2:lastPositionalArg+1])
} }
func verifyFlags(o *options) error { func (a *app) initialize(pidFile string) error {
log.Infof("Verifying Flags") a.logger.Infof("Initializing")
if o.root == "" {
return fmt.Errorf("the install root must be specified")
}
if _, exists := availableRuntimes[o.runtime]; !exists {
return fmt.Errorf("unknown runtime: %v", o.runtime)
}
return nil
}
func initialize(pidFile string) error {
log.Infof("Initializing")
if dir := filepath.Dir(pidFile); dir != "" { if dir := filepath.Dir(pidFile); dir != "" {
err := os.MkdirAll(dir, 0755) err := os.MkdirAll(dir, 0755)
@ -246,8 +274,8 @@ func initialize(pidFile string) error {
err = unix.Flock(int(f.Fd()), unix.LOCK_EX|unix.LOCK_NB) err = unix.Flock(int(f.Fd()), unix.LOCK_EX|unix.LOCK_NB)
if err != nil { if err != nil {
log.Warningf("Unable to get exclusive lock on '%v'", pidFile) a.logger.Warningf("Unable to get exclusive lock on '%v'", pidFile)
log.Warningf("This normally means an instance of the NVIDIA toolkit Container is already running, aborting") a.logger.Warningf("This normally means an instance of the NVIDIA toolkit Container is already running, aborting")
return fmt.Errorf("unable to get flock on pidfile: %v", err) return fmt.Errorf("unable to get flock on pidfile: %v", err)
} }
@ -264,8 +292,8 @@ func initialize(pidFile string) error {
case <-waitingForSignal: case <-waitingForSignal:
signalReceived <- true signalReceived <- true
default: default:
log.Infof("Signal received, exiting early") a.logger.Infof("Signal received, exiting early")
shutdown(pidFile) a.shutdown(pidFile)
os.Exit(0) os.Exit(0)
} }
}() }()
@ -273,18 +301,18 @@ func initialize(pidFile string) error {
return nil return nil
} }
func waitForSignal() error { func (a *app) waitForSignal() error {
log.Infof("Waiting for signal") a.logger.Infof("Waiting for signal")
waitingForSignal <- true waitingForSignal <- true
<-signalReceived <-signalReceived
return nil return nil
} }
func shutdown(pidFile string) { func (a *app) shutdown(pidFile string) {
log.Infof("Shutting Down") a.logger.Infof("Shutting Down")
err := os.Remove(pidFile) err := os.Remove(pidFile)
if err != nil { if err != nil {
log.Warningf("Unable to remove pidfile: %v", err) a.logger.Warningf("Unable to remove pidfile: %v", err)
} }
} }

View File

@ -20,10 +20,12 @@ import (
"fmt" "fmt"
"testing" "testing"
testlog "github.com/sirupsen/logrus/hooks/test"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
) )
func TestParseArgs(t *testing.T) { func TestParseArgs(t *testing.T) {
logger, _ := testlog.NewNullLogger()
testCases := []struct { testCases := []struct {
args []string args []string
expectedRemaining []string expectedRemaining []string
@ -70,7 +72,7 @@ func TestParseArgs(t *testing.T) {
for i, tc := range testCases { for i, tc := range testCases {
t.Run(fmt.Sprintf("%d", i), func(t *testing.T) { t.Run(fmt.Sprintf("%d", i), func(t *testing.T) {
remaining, root, err := ParseArgs(tc.args) remaining, root, err := ParseArgs(logger, tc.args)
if tc.expectedError != nil { if tc.expectedError != nil {
require.EqualError(t, err, tc.expectedError.Error()) require.EqualError(t, err, tc.expectedError.Error())
} else { } else {

View File

@ -42,11 +42,10 @@ ENV PATH=$GOPATH/bin:/usr/local/go/bin:$PATH
WORKDIR /build WORKDIR /build
COPY . . COPY . .
# NOTE: Until the config utilities are properly integrated into the RUN mkdir /artifacts
# nvidia-container-toolkit repository, these are built from the `tools` folder ARG VERSION="N/A"
# and not `cmd`. ARG GIT_COMMIT="unknown"
RUN GOPATH=/artifacts go install -ldflags="-s -w -X 'main.Version=${VERSION}'" ./tools/... RUN make PREFIX=/artifacts cmd-nvidia-ctk-installer
FROM nvidia/cuda:12.6.3-base-ubi8 FROM nvidia/cuda:12.6.3-base-ubi8
@ -72,7 +71,8 @@ RUN PACKAGE_ARCH=${PACKAGE_ARCH/amd64/x86_64} && PACKAGE_ARCH=${PACKAGE_ARCH/arm
WORKDIR /work WORKDIR /work
COPY --from=build /artifacts/bin /work COPY --from=build /artifacts/nvidia-ctk-installer /work/nvidia-ctk-installer
RUN ln -s nvidia-ctk-installer nvidia-toolkit
ENV PATH=/work:$PATH ENV PATH=/work:$PATH
@ -87,4 +87,4 @@ LABEL description="See summary"
RUN mkdir /licenses && mv /NGC-DL-CONTAINER-LICENSE /licenses/NGC-DL-CONTAINER-LICENSE RUN mkdir /licenses && mv /NGC-DL-CONTAINER-LICENSE /licenses/NGC-DL-CONTAINER-LICENSE
ENTRYPOINT ["/work/nvidia-toolkit"] ENTRYPOINT ["/work/nvidia-ctk-installer"]

View File

@ -41,11 +41,10 @@ ENV PATH=$GOPATH/bin:/usr/local/go/bin:$PATH
WORKDIR /build WORKDIR /build
COPY . . COPY . .
# NOTE: Until the config utilities are properly integrated into the RUN mkdir /artifacts
# nvidia-container-toolkit repository, these are built from the `tools` folder ARG VERSION="N/A"
# and not `cmd`. ARG GIT_COMMIT="unknown"
RUN GOPATH=/artifacts go install -ldflags="-s -w -X 'main.Version=${VERSION}'" ./tools/... RUN make PREFIX=/artifacts cmd-nvidia-ctk-installer
FROM nvcr.io/nvidia/cuda:12.6.3-base-ubuntu20.04 FROM nvcr.io/nvidia/cuda:12.6.3-base-ubuntu20.04
@ -80,7 +79,8 @@ RUN dpkg -i \
WORKDIR /work WORKDIR /work
COPY --from=build /artifacts/bin /work/ COPY --from=build /artifacts/nvidia-ctk-installer /work/nvidia-ctk-installer
RUN ln -s nvidia-ctk-installer nvidia-toolkit
ENV PATH=/work:$PATH ENV PATH=/work:$PATH
@ -95,4 +95,4 @@ LABEL description="See summary"
RUN mkdir /licenses && mv /NGC-DL-CONTAINER-LICENSE /licenses/NGC-DL-CONTAINER-LICENSE RUN mkdir /licenses && mv /NGC-DL-CONTAINER-LICENSE /licenses/NGC-DL-CONTAINER-LICENSE
ENTRYPOINT ["/work/nvidia-toolkit"] ENTRYPOINT ["/work/nvidia-ctk-installer"]