mirror of
https://github.com/NVIDIA/nvidia-container-toolkit
synced 2025-06-26 18:18:24 +00:00
Move nvidia-toolkit to nvidia-ctk-installer
This change moves the containerized installer from nvidia-toolkit to cmd/nvidia-ctk-installer to allow for its use in CI. Signed-off-by: Evan Lezar <elezar@nvidia.com>
This commit is contained in:
@@ -1,318 +0,0 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"os/signal"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"syscall"
|
||||
|
||||
"github.com/urfave/cli/v2"
|
||||
"golang.org/x/sys/unix"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/tools/container/runtime"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/tools/container/toolkit"
|
||||
)
|
||||
|
||||
const (
|
||||
toolkitPidFilename = "toolkit.pid"
|
||||
defaultPidFile = "/run/nvidia/toolkit/" + toolkitPidFilename
|
||||
toolkitSubDir = "toolkit"
|
||||
|
||||
defaultRuntime = "docker"
|
||||
defaultRuntimeArgs = ""
|
||||
)
|
||||
|
||||
var availableRuntimes = map[string]struct{}{"docker": {}, "crio": {}, "containerd": {}}
|
||||
var defaultLowLevelRuntimes = []string{"docker-runc", "runc", "crun"}
|
||||
|
||||
var waitingForSignal = make(chan bool, 1)
|
||||
var signalReceived = make(chan bool, 1)
|
||||
|
||||
// options stores the command line arguments
|
||||
type options struct {
|
||||
noDaemon bool
|
||||
runtime string
|
||||
runtimeArgs string
|
||||
root string
|
||||
pidFile string
|
||||
|
||||
toolkitOptions toolkit.Options
|
||||
runtimeOptions runtime.Options
|
||||
}
|
||||
|
||||
func (o options) toolkitRoot() string {
|
||||
return filepath.Join(o.root, toolkitSubDir)
|
||||
}
|
||||
|
||||
// Version defines the CLI version. This is set at build time using LD FLAGS
|
||||
var Version = "development"
|
||||
|
||||
func main() {
|
||||
logger := logger.New()
|
||||
|
||||
remainingArgs, root, err := ParseArgs(logger, os.Args)
|
||||
if err != nil {
|
||||
logger.Errorf("Error: unable to parse arguments: %v", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
c := NewApp(logger, root)
|
||||
|
||||
// Run the CLI
|
||||
logger.Infof("Starting %v", c.Name)
|
||||
if err := c.Run(remainingArgs); err != nil {
|
||||
logger.Errorf("error running nvidia-toolkit: %v", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
logger.Infof("Completed %v", c.Name)
|
||||
}
|
||||
|
||||
// An app represents the nvidia-ctk-installer.
|
||||
type app struct {
|
||||
logger logger.Interface
|
||||
// defaultRoot stores the root to use if the --root flag is not specified.
|
||||
defaultRoot string
|
||||
|
||||
toolkit *toolkit.Installer
|
||||
}
|
||||
|
||||
// NewApp creates the CLI app fro the specified options.
|
||||
// defaultRoot is used as the root if not specified via the --root flag.
|
||||
func NewApp(logger logger.Interface, defaultRoot string) *cli.App {
|
||||
a := app{
|
||||
logger: logger,
|
||||
defaultRoot: defaultRoot,
|
||||
}
|
||||
return a.build()
|
||||
}
|
||||
|
||||
func (a app) build() *cli.App {
|
||||
options := options{
|
||||
toolkitOptions: toolkit.Options{},
|
||||
}
|
||||
// Create the top-level CLI
|
||||
c := cli.NewApp()
|
||||
c.Name = "nvidia-toolkit"
|
||||
c.Usage = "Install the nvidia-container-toolkit for use by a given runtime"
|
||||
c.UsageText = "[DESTINATION] [-n | --no-daemon] [-r | --runtime] [-u | --runtime-args]"
|
||||
c.Description = "DESTINATION points to the host path underneath which the nvidia-container-toolkit should be installed.\nIt will be installed at ${DESTINATION}/toolkit"
|
||||
c.Version = Version
|
||||
c.Before = func(ctx *cli.Context) error {
|
||||
return a.Before(ctx, &options)
|
||||
}
|
||||
c.Action = func(ctx *cli.Context) error {
|
||||
return a.Run(ctx, &options)
|
||||
}
|
||||
|
||||
// Setup flags for the CLI
|
||||
c.Flags = []cli.Flag{
|
||||
&cli.BoolFlag{
|
||||
Name: "no-daemon",
|
||||
Aliases: []string{"n"},
|
||||
Usage: "terminate immediately after setting up the runtime. Note that no cleanup will be performed",
|
||||
Destination: &options.noDaemon,
|
||||
EnvVars: []string{"NO_DAEMON"},
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "runtime",
|
||||
Aliases: []string{"r"},
|
||||
Usage: "the runtime to setup on this node. One of {'docker', 'crio', 'containerd'}",
|
||||
Value: defaultRuntime,
|
||||
Destination: &options.runtime,
|
||||
EnvVars: []string{"RUNTIME"},
|
||||
},
|
||||
// TODO: Remove runtime-args
|
||||
&cli.StringFlag{
|
||||
Name: "runtime-args",
|
||||
Aliases: []string{"u"},
|
||||
Usage: "arguments to pass to 'docker', 'crio', or 'containerd' setup command",
|
||||
Value: defaultRuntimeArgs,
|
||||
Destination: &options.runtimeArgs,
|
||||
EnvVars: []string{"RUNTIME_ARGS"},
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "root",
|
||||
Value: a.defaultRoot,
|
||||
Usage: "the folder where the NVIDIA Container Toolkit is to be installed. It will be installed to `ROOT`/toolkit",
|
||||
Destination: &options.root,
|
||||
EnvVars: []string{"ROOT"},
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "pid-file",
|
||||
Value: defaultPidFile,
|
||||
Usage: "the path to a toolkit.pid file to ensure that only a single configuration instance is running",
|
||||
Destination: &options.pidFile,
|
||||
EnvVars: []string{"TOOLKIT_PID_FILE", "PID_FILE"},
|
||||
},
|
||||
}
|
||||
|
||||
c.Flags = append(c.Flags, toolkit.Flags(&options.toolkitOptions)...)
|
||||
c.Flags = append(c.Flags, runtime.Flags(&options.runtimeOptions)...)
|
||||
|
||||
return c
|
||||
}
|
||||
|
||||
func (a *app) Before(c *cli.Context, o *options) error {
|
||||
a.toolkit = toolkit.NewInstaller(
|
||||
toolkit.WithLogger(a.logger),
|
||||
toolkit.WithToolkitRoot(o.toolkitRoot()),
|
||||
)
|
||||
return a.validateFlags(c, o)
|
||||
}
|
||||
|
||||
func (a *app) validateFlags(_ *cli.Context, o *options) error {
|
||||
if o.root == "" {
|
||||
return fmt.Errorf("the install root must be specified")
|
||||
}
|
||||
if _, exists := availableRuntimes[o.runtime]; !exists {
|
||||
return fmt.Errorf("unknown runtime: %v", o.runtime)
|
||||
}
|
||||
if filepath.Base(o.pidFile) != toolkitPidFilename {
|
||||
return fmt.Errorf("invalid toolkit.pid path %v", o.pidFile)
|
||||
}
|
||||
|
||||
if err := a.toolkit.ValidateOptions(&o.toolkitOptions); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := runtime.ValidateOptions(&o.runtimeOptions, o.runtime, o.toolkitRoot()); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Run installs the NVIDIA Container Toolkit and updates the requested runtime.
|
||||
// If the application is run as a daemon, the application waits and unconfigures
|
||||
// the runtime on termination.
|
||||
func (a *app) Run(c *cli.Context, o *options) error {
|
||||
err := a.initialize(o.pidFile)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to initialize: %v", err)
|
||||
}
|
||||
defer a.shutdown(o.pidFile)
|
||||
|
||||
if len(o.toolkitOptions.ContainerRuntimeRuntimes.Value()) == 0 {
|
||||
lowlevelRuntimePaths, err := runtime.GetLowlevelRuntimePaths(&o.runtimeOptions, o.runtime)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to determine runtime options: %w", err)
|
||||
}
|
||||
lowlevelRuntimePaths = append(lowlevelRuntimePaths, defaultLowLevelRuntimes...)
|
||||
|
||||
o.toolkitOptions.ContainerRuntimeRuntimes = *cli.NewStringSlice(lowlevelRuntimePaths...)
|
||||
}
|
||||
|
||||
err = a.toolkit.Install(c, &o.toolkitOptions)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to install toolkit: %v", err)
|
||||
}
|
||||
|
||||
err = runtime.Setup(c, &o.runtimeOptions, o.runtime)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to setup runtime: %v", err)
|
||||
}
|
||||
|
||||
if !o.noDaemon {
|
||||
err = a.waitForSignal()
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to wait for signal: %v", err)
|
||||
}
|
||||
|
||||
err = runtime.Cleanup(c, &o.runtimeOptions, o.runtime)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to cleanup runtime: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// ParseArgs checks if a single positional argument was defined and extracts this the root.
|
||||
// If no positional arguments are defined, it is assumed that the root is specified as a flag.
|
||||
func ParseArgs(logger logger.Interface, args []string) ([]string, string, error) {
|
||||
logger.Infof("Parsing arguments")
|
||||
|
||||
if len(args) < 2 {
|
||||
return args, "", nil
|
||||
}
|
||||
|
||||
var lastPositionalArg int
|
||||
for i, arg := range args {
|
||||
if strings.HasPrefix(arg, "-") {
|
||||
break
|
||||
}
|
||||
lastPositionalArg = i
|
||||
}
|
||||
|
||||
if lastPositionalArg == 0 {
|
||||
return args, "", nil
|
||||
}
|
||||
|
||||
if lastPositionalArg == 1 {
|
||||
return append([]string{args[0]}, args[2:]...), args[1], nil
|
||||
}
|
||||
|
||||
return nil, "", fmt.Errorf("unexpected positional argument(s) %v", args[2:lastPositionalArg+1])
|
||||
}
|
||||
|
||||
func (a *app) initialize(pidFile string) error {
|
||||
a.logger.Infof("Initializing")
|
||||
|
||||
if dir := filepath.Dir(pidFile); dir != "" {
|
||||
err := os.MkdirAll(dir, 0755)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to create folder for pidfile: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
f, err := os.Create(pidFile)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to create pidfile: %v", err)
|
||||
}
|
||||
|
||||
err = unix.Flock(int(f.Fd()), unix.LOCK_EX|unix.LOCK_NB)
|
||||
if err != nil {
|
||||
a.logger.Warningf("Unable to get exclusive lock on '%v'", pidFile)
|
||||
a.logger.Warningf("This normally means an instance of the NVIDIA toolkit Container is already running, aborting")
|
||||
return fmt.Errorf("unable to get flock on pidfile: %v", err)
|
||||
}
|
||||
|
||||
_, err = f.WriteString(fmt.Sprintf("%v\n", os.Getpid()))
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to write PID to pidfile: %v", err)
|
||||
}
|
||||
|
||||
sigs := make(chan os.Signal, 1)
|
||||
signal.Notify(sigs, syscall.SIGHUP, syscall.SIGINT, syscall.SIGQUIT, syscall.SIGPIPE, syscall.SIGTERM)
|
||||
go func() {
|
||||
<-sigs
|
||||
select {
|
||||
case <-waitingForSignal:
|
||||
signalReceived <- true
|
||||
default:
|
||||
a.logger.Infof("Signal received, exiting early")
|
||||
a.shutdown(pidFile)
|
||||
os.Exit(0)
|
||||
}
|
||||
}()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (a *app) waitForSignal() error {
|
||||
a.logger.Infof("Waiting for signal")
|
||||
waitingForSignal <- true
|
||||
<-signalReceived
|
||||
return nil
|
||||
}
|
||||
|
||||
func (a *app) shutdown(pidFile string) {
|
||||
a.logger.Infof("Shutting Down")
|
||||
|
||||
err := os.Remove(pidFile)
|
||||
if err != nil {
|
||||
a.logger.Warningf("Unable to remove pidfile: %v", err)
|
||||
}
|
||||
}
|
||||
@@ -1,86 +0,0 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
testlog "github.com/sirupsen/logrus/hooks/test"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestParseArgs(t *testing.T) {
|
||||
logger, _ := testlog.NewNullLogger()
|
||||
testCases := []struct {
|
||||
args []string
|
||||
expectedRemaining []string
|
||||
expectedRoot string
|
||||
expectedError error
|
||||
}{
|
||||
{
|
||||
args: []string{},
|
||||
expectedRemaining: []string{},
|
||||
expectedRoot: "",
|
||||
expectedError: nil,
|
||||
},
|
||||
{
|
||||
args: []string{"app"},
|
||||
expectedRemaining: []string{"app"},
|
||||
},
|
||||
{
|
||||
args: []string{"app", "root"},
|
||||
expectedRemaining: []string{"app"},
|
||||
expectedRoot: "root",
|
||||
},
|
||||
{
|
||||
args: []string{"app", "--flag"},
|
||||
expectedRemaining: []string{"app", "--flag"},
|
||||
},
|
||||
{
|
||||
args: []string{"app", "root", "--flag"},
|
||||
expectedRemaining: []string{"app", "--flag"},
|
||||
expectedRoot: "root",
|
||||
},
|
||||
{
|
||||
args: []string{"app", "root", "not-root", "--flag"},
|
||||
expectedError: fmt.Errorf("unexpected positional argument(s) [not-root]"),
|
||||
},
|
||||
{
|
||||
args: []string{"app", "root", "not-root"},
|
||||
expectedError: fmt.Errorf("unexpected positional argument(s) [not-root]"),
|
||||
},
|
||||
{
|
||||
args: []string{"app", "root", "not-root", "also"},
|
||||
expectedError: fmt.Errorf("unexpected positional argument(s) [not-root also]"),
|
||||
},
|
||||
}
|
||||
|
||||
for i, tc := range testCases {
|
||||
t.Run(fmt.Sprintf("%d", i), func(t *testing.T) {
|
||||
remaining, root, err := ParseArgs(logger, tc.args)
|
||||
if tc.expectedError != nil {
|
||||
require.EqualError(t, err, tc.expectedError.Error())
|
||||
} else {
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
require.ElementsMatch(t, tc.expectedRemaining, remaining)
|
||||
require.Equal(t, tc.expectedRoot, root)
|
||||
})
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user