Convert docker to runtime package

Signed-off-by: Evan Lezar <elezar@nvidia.com>
This commit is contained in:
Evan Lezar
2024-09-27 10:36:39 +02:00
committed by Tariq Ibrahim
parent 94337b7427
commit 5bedbc2b50
7 changed files with 136 additions and 274 deletions

View File

@@ -0,0 +1,92 @@
/**
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
*/
package docker
import (
"fmt"
log "github.com/sirupsen/logrus"
cli "github.com/urfave/cli/v2"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/config/engine/docker"
"github.com/NVIDIA/nvidia-container-toolkit/tools/container"
)
const (
Name = "docker"
DefaultConfig = "/etc/docker/daemon.json"
DefaultSocket = "/var/run/docker.sock"
DefaultRestartMode = "signal"
)
// Setup updates docker configuration to include the nvidia runtime and reloads it
func Setup(c *cli.Context, o *container.Options) error {
log.Infof("Starting 'setup' for %v", c.App.Name)
cfg, err := docker.New(
docker.WithPath(o.Config),
)
if err != nil {
return fmt.Errorf("unable to load config: %v", err)
}
err = o.Configure(cfg)
if err != nil {
return fmt.Errorf("unable to configure docker: %v", err)
}
err = RestartDocker(o)
if err != nil {
return fmt.Errorf("unable to restart docker: %v", err)
}
log.Infof("Completed 'setup' for %v", c.App.Name)
return nil
}
// Cleanup reverts docker configuration to remove the nvidia runtime and reloads it
func Cleanup(c *cli.Context, o *container.Options) error {
log.Infof("Starting 'cleanup' for %v", c.App.Name)
cfg, err := docker.New(
docker.WithPath(o.Config),
)
if err != nil {
return fmt.Errorf("unable to load config: %v", err)
}
err = o.Unconfigure(cfg)
if err != nil {
return fmt.Errorf("unable to unconfigure docker: %v", err)
}
err = RestartDocker(o)
if err != nil {
return fmt.Errorf("unable to signal docker: %v", err)
}
log.Infof("Completed 'cleanup' for %v", c.App.Name)
return nil
}
// RestartDocker restarts docker depending on the value of restartModeFlag
func RestartDocker(o *container.Options) error {
return o.Restart("docker", SignalDocker)
}

View File

@@ -0,0 +1,113 @@
/**
# Copyright 2021-2023 NVIDIA CORPORATION
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package docker
import (
"fmt"
"net"
"syscall"
"time"
log "github.com/sirupsen/logrus"
)
const (
reloadBackoff = 5 * time.Second
maxReloadAttempts = 6
socketMessageToGetPID = "GET /info HTTP/1.0\r\n\r\n"
)
// SignalDocker sends a SIGHUP signal to docker daemon
func SignalDocker(socket string) error {
log.Infof("Sending SIGHUP signal to docker")
// Wrap the logic to perform the SIGHUP in a function so we can retry it on failure
retriable := func() error {
conn, err := net.Dial("unix", socket)
if err != nil {
return fmt.Errorf("unable to dial: %v", err)
}
defer conn.Close()
sconn, err := conn.(*net.UnixConn).SyscallConn()
if err != nil {
return fmt.Errorf("unable to get syscall connection: %v", err)
}
err1 := sconn.Control(func(fd uintptr) {
err = syscall.SetsockoptInt(int(fd), syscall.SOL_SOCKET, syscall.SO_PASSCRED, 1)
})
if err1 != nil {
return fmt.Errorf("unable to issue call on socket fd: %v", err1)
}
if err != nil {
return fmt.Errorf("unable to SetsockoptInt on socket fd: %v", err)
}
_, _, err = conn.(*net.UnixConn).WriteMsgUnix([]byte(socketMessageToGetPID), nil, nil)
if err != nil {
return fmt.Errorf("unable to WriteMsgUnix on socket fd: %v", err)
}
oob := make([]byte, 1024)
_, oobn, _, _, err := conn.(*net.UnixConn).ReadMsgUnix(nil, oob)
if err != nil {
return fmt.Errorf("unable to ReadMsgUnix on socket fd: %v", err)
}
oob = oob[:oobn]
scm, err := syscall.ParseSocketControlMessage(oob)
if err != nil {
return fmt.Errorf("unable to ParseSocketControlMessage from message received on socket fd: %v", err)
}
ucred, err := syscall.ParseUnixCredentials(&scm[0])
if err != nil {
return fmt.Errorf("unable to ParseUnixCredentials from message received on socket fd: %v", err)
}
err = syscall.Kill(int(ucred.Pid), syscall.SIGHUP)
if err != nil {
return fmt.Errorf("unable to send SIGHUP to 'docker' process: %v", err)
}
return nil
}
// Try to send a SIGHUP up to maxReloadAttempts times
var err error
for i := 0; i < maxReloadAttempts; i++ {
err = retriable()
if err == nil {
break
}
if i == maxReloadAttempts-1 {
break
}
log.Warningf("Error signaling docker, attempt %v/%v: %v", i+1, maxReloadAttempts, err)
time.Sleep(reloadBackoff)
}
if err != nil {
log.Warningf("Max retries reached %v/%v, aborting", maxReloadAttempts, maxReloadAttempts)
return err
}
log.Infof("Successfully signaled docker")
return nil
}

View File

@@ -0,0 +1,29 @@
//go:build !linux
// +build !linux
/**
# Copyright 2023 NVIDIA CORPORATION
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package docker
import (
"errors"
)
// SignalDocker is unsupported on non-linux platforms.
func SignalDocker(socket string) error {
return errors.New("SignalDocker is unsupported on non-linux platforms")
}

View File

@@ -0,0 +1,377 @@
/**
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
*/
package docker
import (
"encoding/json"
"testing"
"github.com/stretchr/testify/require"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/config/engine/docker"
"github.com/NVIDIA/nvidia-container-toolkit/tools/container"
)
func TestUpdateConfigDefaultRuntime(t *testing.T) {
const runtimeDir = "/test/runtime/dir"
testCases := []struct {
setAsDefault bool
runtimeName string
expectedDefaultRuntimeName interface{}
}{
{},
{
setAsDefault: false,
expectedDefaultRuntimeName: nil,
},
{
setAsDefault: true,
runtimeName: "NAME",
expectedDefaultRuntimeName: "NAME",
},
{
setAsDefault: true,
runtimeName: "nvidia",
expectedDefaultRuntimeName: "nvidia",
},
}
for i, tc := range testCases {
o := &container.Options{
RuntimeName: tc.runtimeName,
RuntimeDir: runtimeDir,
SetAsDefault: tc.setAsDefault,
}
config := docker.Config(map[string]interface{}{})
err := o.UpdateConfig(&config)
require.NoError(t, err, "%d: %v", i, tc)
defaultRuntimeName := config["default-runtime"]
require.EqualValues(t, tc.expectedDefaultRuntimeName, defaultRuntimeName, "%d: %v", i, tc)
}
}
func TestUpdateConfig(t *testing.T) {
const runtimeDir = "/test/runtime/dir"
testCases := []struct {
config docker.Config
setAsDefault bool
runtimeName string
expectedConfig map[string]interface{}
}{
{
config: map[string]interface{}{},
setAsDefault: false,
expectedConfig: map[string]interface{}{
"runtimes": map[string]interface{}{
"nvidia": map[string]interface{}{
"path": "/test/runtime/dir/nvidia-container-runtime",
"args": []string{},
},
"nvidia-cdi": map[string]interface{}{
"path": "/test/runtime/dir/nvidia-container-runtime.cdi",
"args": []string{},
},
"nvidia-legacy": map[string]interface{}{
"path": "/test/runtime/dir/nvidia-container-runtime.legacy",
"args": []string{},
},
},
},
},
{
config: map[string]interface{}{},
setAsDefault: false,
runtimeName: "NAME",
expectedConfig: map[string]interface{}{
"runtimes": map[string]interface{}{
"NAME": map[string]interface{}{
"path": "/test/runtime/dir/nvidia-container-runtime",
"args": []string{},
},
"nvidia-cdi": map[string]interface{}{
"path": "/test/runtime/dir/nvidia-container-runtime.cdi",
"args": []string{},
},
"nvidia-legacy": map[string]interface{}{
"path": "/test/runtime/dir/nvidia-container-runtime.legacy",
"args": []string{},
},
},
},
},
{
config: map[string]interface{}{
"runtimes": map[string]interface{}{
"nvidia": map[string]interface{}{
"path": "nvidia-container-runtime",
"args": []string{},
},
},
},
setAsDefault: false,
expectedConfig: map[string]interface{}{
"runtimes": map[string]interface{}{
"nvidia": map[string]interface{}{
"path": "/test/runtime/dir/nvidia-container-runtime",
"args": []string{},
},
"nvidia-cdi": map[string]interface{}{
"path": "/test/runtime/dir/nvidia-container-runtime.cdi",
"args": []string{},
},
"nvidia-legacy": map[string]interface{}{
"path": "/test/runtime/dir/nvidia-container-runtime.legacy",
"args": []string{},
},
},
},
},
{
config: map[string]interface{}{
"runtimes": map[string]interface{}{
"not-nvidia": map[string]interface{}{
"path": "some-other-path",
"args": []string{},
},
},
},
expectedConfig: map[string]interface{}{
"runtimes": map[string]interface{}{
"not-nvidia": map[string]interface{}{
"path": "some-other-path",
"args": []string{},
},
"nvidia": map[string]interface{}{
"path": "/test/runtime/dir/nvidia-container-runtime",
"args": []string{},
},
"nvidia-cdi": map[string]interface{}{
"path": "/test/runtime/dir/nvidia-container-runtime.cdi",
"args": []string{},
},
"nvidia-legacy": map[string]interface{}{
"path": "/test/runtime/dir/nvidia-container-runtime.legacy",
"args": []string{},
},
},
},
},
{
config: map[string]interface{}{
"default-runtime": "runc",
},
setAsDefault: true,
runtimeName: "nvidia",
expectedConfig: map[string]interface{}{
"default-runtime": "nvidia",
"runtimes": map[string]interface{}{
"nvidia": map[string]interface{}{
"path": "/test/runtime/dir/nvidia-container-runtime",
"args": []string{},
},
"nvidia-cdi": map[string]interface{}{
"path": "/test/runtime/dir/nvidia-container-runtime.cdi",
"args": []string{},
},
"nvidia-legacy": map[string]interface{}{
"path": "/test/runtime/dir/nvidia-container-runtime.legacy",
"args": []string{},
},
},
},
},
{
config: map[string]interface{}{
"exec-opts": []string{"native.cgroupdriver=systemd"},
"log-driver": "json-file",
"log-opts": map[string]string{
"max-size": "100m",
},
"storage-driver": "overlay2",
},
expectedConfig: map[string]interface{}{
"exec-opts": []string{"native.cgroupdriver=systemd"},
"log-driver": "json-file",
"log-opts": map[string]string{
"max-size": "100m",
},
"storage-driver": "overlay2",
"runtimes": map[string]interface{}{
"nvidia": map[string]interface{}{
"path": "/test/runtime/dir/nvidia-container-runtime",
"args": []string{},
},
"nvidia-cdi": map[string]interface{}{
"path": "/test/runtime/dir/nvidia-container-runtime.cdi",
"args": []string{},
},
"nvidia-legacy": map[string]interface{}{
"path": "/test/runtime/dir/nvidia-container-runtime.legacy",
"args": []string{},
},
},
},
},
}
for i, tc := range testCases {
tc := tc
o := &container.Options{
RuntimeName: tc.runtimeName,
RuntimeDir: runtimeDir,
SetAsDefault: tc.setAsDefault,
}
err := o.UpdateConfig(&tc.config)
require.NoError(t, err, "%d: %v", i, tc)
configContent, err := json.MarshalIndent(tc.config, "", " ")
require.NoError(t, err)
expectedContent, err := json.MarshalIndent(tc.expectedConfig, "", " ")
require.NoError(t, err)
require.EqualValues(t, string(expectedContent), string(configContent), "%d: %v", i, tc)
}
}
func TestRevertConfig(t *testing.T) {
testCases := []struct {
config docker.Config
expectedConfig map[string]interface{}
}{
{
config: map[string]interface{}{},
expectedConfig: map[string]interface{}{},
},
{
config: map[string]interface{}{
"runtimes": map[string]interface{}{
"nvidia": map[string]interface{}{
"path": "/test/runtime/dir/nvidia-container-runtime",
"args": []string{},
},
},
},
expectedConfig: map[string]interface{}{},
},
{
config: map[string]interface{}{
"runtimes": map[string]interface{}{
"nvidia": map[string]interface{}{
"path": "/test/runtime/dir/nvidia-container-runtime",
"args": []string{},
},
},
},
expectedConfig: map[string]interface{}{},
},
{
config: map[string]interface{}{
"runtimes": map[string]interface{}{
"nvidia": map[string]interface{}{
"path": "/test/runtime/dir/nvidia-container-runtime",
"args": []string{},
},
"nvidia-cdi": map[string]interface{}{
"path": "/test/runtime/dir/nvidia-container-runtime.cdi",
"args": []string{},
},
"nvidia-legacy": map[string]interface{}{
"path": "/test/runtime/dir/nvidia-container-runtime.legacy",
"args": []string{},
},
},
},
expectedConfig: map[string]interface{}{},
},
{
config: map[string]interface{}{
"default-runtime": "nvidia",
"runtimes": map[string]interface{}{
"nvidia": map[string]interface{}{
"path": "/test/runtime/dir/nvidia-container-runtime",
"args": []string{},
},
},
},
expectedConfig: map[string]interface{}{
"default-runtime": "runc",
},
},
{
config: map[string]interface{}{
"default-runtime": "not-nvidia",
"runtimes": map[string]interface{}{
"nvidia": map[string]interface{}{
"path": "/test/runtime/dir/nvidia-container-runtime",
"args": []string{},
},
},
},
expectedConfig: map[string]interface{}{
"default-runtime": "not-nvidia",
},
},
{
config: map[string]interface{}{
"exec-opts": []string{"native.cgroupdriver=systemd"},
"log-driver": "json-file",
"log-opts": map[string]string{
"max-size": "100m",
},
"storage-driver": "overlay2",
"runtimes": map[string]interface{}{
"nvidia": map[string]interface{}{
"path": "/test/runtime/dir/nvidia-container-runtime",
"args": []string{},
},
},
},
expectedConfig: map[string]interface{}{
"exec-opts": []string{"native.cgroupdriver=systemd"},
"log-driver": "json-file",
"log-opts": map[string]string{
"max-size": "100m",
},
"storage-driver": "overlay2",
},
},
}
for i, tc := range testCases {
tc := tc
o := &container.Options{}
err := o.RevertConfig(&tc.config)
require.NoError(t, err, "%d: %v", i, tc)
configContent, err := json.MarshalIndent(tc.config, "", " ")
require.NoError(t, err)
expectedContent, err := json.MarshalIndent(tc.expectedConfig, "", " ")
require.NoError(t, err)
require.EqualValues(t, string(expectedContent), string(configContent), "%d: %v", i, tc)
}
}