diff --git a/tools/container/containerd/containerd.go b/tools/container/containerd/containerd.go index c652f454..5e4e1a7a 100644 --- a/tools/container/containerd/containerd.go +++ b/tools/container/containerd/containerd.go @@ -18,10 +18,7 @@ package main import ( "fmt" - "net" "os" - "syscall" - "time" "github.com/NVIDIA/nvidia-container-toolkit/internal/info" "github.com/NVIDIA/nvidia-container-toolkit/pkg/config/engine/containerd" @@ -38,11 +35,6 @@ const ( defaultSetAsDefault = true defaultRestartMode = "signal" defaultHostRootMount = "/host" - - reloadBackoff = 5 * time.Second - maxReloadAttempts = 6 - - socketMessageToGetPID = "" ) // options stores the configuration from the command line or environment variables @@ -244,86 +236,6 @@ func RestartContainerd(o *options) error { return o.Restart("containerd", SignalContainerd) } -// SignalContainerd sends a SIGHUP signal to the containerd daemon -func SignalContainerd(socket string) error { - log.Infof("Sending SIGHUP signal to containerd") - - // Wrap the logic to perform the SIGHUP in a function so we can retry it on failure - retriable := func() error { - conn, err := net.Dial("unix", socket) - if err != nil { - return fmt.Errorf("unable to dial: %v", err) - } - defer conn.Close() - - sconn, err := conn.(*net.UnixConn).SyscallConn() - if err != nil { - return fmt.Errorf("unable to get syscall connection: %v", err) - } - - err1 := sconn.Control(func(fd uintptr) { - err = syscall.SetsockoptInt(int(fd), syscall.SOL_SOCKET, syscall.SO_PASSCRED, 1) - }) - if err1 != nil { - return fmt.Errorf("unable to issue call on socket fd: %v", err1) - } - if err != nil { - return fmt.Errorf("unable to SetsockoptInt on socket fd: %v", err) - } - - _, _, err = conn.(*net.UnixConn).WriteMsgUnix([]byte(socketMessageToGetPID), nil, nil) - if err != nil { - return fmt.Errorf("unable to WriteMsgUnix on socket fd: %v", err) - } - - oob := make([]byte, 1024) - _, oobn, _, _, err := conn.(*net.UnixConn).ReadMsgUnix(nil, oob) - if err != nil { - return fmt.Errorf("unable to ReadMsgUnix on socket fd: %v", err) - } - - oob = oob[:oobn] - scm, err := syscall.ParseSocketControlMessage(oob) - if err != nil { - return fmt.Errorf("unable to ParseSocketControlMessage from message received on socket fd: %v", err) - } - - ucred, err := syscall.ParseUnixCredentials(&scm[0]) - if err != nil { - return fmt.Errorf("unable to ParseUnixCredentials from message received on socket fd: %v", err) - } - - err = syscall.Kill(int(ucred.Pid), syscall.SIGHUP) - if err != nil { - return fmt.Errorf("unable to send SIGHUP to 'containerd' process: %v", err) - } - - return nil - } - - // Try to send a SIGHUP up to maxReloadAttempts times - var err error - for i := 0; i < maxReloadAttempts; i++ { - err = retriable() - if err == nil { - break - } - if i == maxReloadAttempts-1 { - break - } - log.Warningf("Error signaling containerd, attempt %v/%v: %v", i+1, maxReloadAttempts, err) - time.Sleep(reloadBackoff) - } - if err != nil { - log.Warningf("Max retries reached %v/%v, aborting", maxReloadAttempts, maxReloadAttempts) - return err - } - - log.Infof("Successfully signaled containerd") - - return nil -} - // containerAnnotationsFromCDIPrefixes returns the container annotations to set for the given CDI prefixes. func (o *options) containerAnnotationsFromCDIPrefixes() []string { var annotations []string diff --git a/tools/container/containerd/containerd_linux.go b/tools/container/containerd/containerd_linux.go new file mode 100644 index 00000000..2dcabcd5 --- /dev/null +++ b/tools/container/containerd/containerd_linux.go @@ -0,0 +1,113 @@ +/** +# Copyright 2020-2023 NVIDIA CORPORATION +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package main + +import ( + "fmt" + "net" + "syscall" + "time" + + log "github.com/sirupsen/logrus" +) + +const ( + reloadBackoff = 5 * time.Second + maxReloadAttempts = 6 + + socketMessageToGetPID = "" +) + +// SignalContainerd sends a SIGHUP signal to the containerd daemon +func SignalContainerd(socket string) error { + log.Infof("Sending SIGHUP signal to containerd") + + // Wrap the logic to perform the SIGHUP in a function so we can retry it on failure + retriable := func() error { + conn, err := net.Dial("unix", socket) + if err != nil { + return fmt.Errorf("unable to dial: %v", err) + } + defer conn.Close() + + sconn, err := conn.(*net.UnixConn).SyscallConn() + if err != nil { + return fmt.Errorf("unable to get syscall connection: %v", err) + } + + err1 := sconn.Control(func(fd uintptr) { + err = syscall.SetsockoptInt(int(fd), syscall.SOL_SOCKET, syscall.SO_PASSCRED, 1) + }) + if err1 != nil { + return fmt.Errorf("unable to issue call on socket fd: %v", err1) + } + if err != nil { + return fmt.Errorf("unable to SetsockoptInt on socket fd: %v", err) + } + + _, _, err = conn.(*net.UnixConn).WriteMsgUnix([]byte(socketMessageToGetPID), nil, nil) + if err != nil { + return fmt.Errorf("unable to WriteMsgUnix on socket fd: %v", err) + } + + oob := make([]byte, 1024) + _, oobn, _, _, err := conn.(*net.UnixConn).ReadMsgUnix(nil, oob) + if err != nil { + return fmt.Errorf("unable to ReadMsgUnix on socket fd: %v", err) + } + + oob = oob[:oobn] + scm, err := syscall.ParseSocketControlMessage(oob) + if err != nil { + return fmt.Errorf("unable to ParseSocketControlMessage from message received on socket fd: %v", err) + } + + ucred, err := syscall.ParseUnixCredentials(&scm[0]) + if err != nil { + return fmt.Errorf("unable to ParseUnixCredentials from message received on socket fd: %v", err) + } + + err = syscall.Kill(int(ucred.Pid), syscall.SIGHUP) + if err != nil { + return fmt.Errorf("unable to send SIGHUP to 'containerd' process: %v", err) + } + + return nil + } + + // Try to send a SIGHUP up to maxReloadAttempts times + var err error + for i := 0; i < maxReloadAttempts; i++ { + err = retriable() + if err == nil { + break + } + if i == maxReloadAttempts-1 { + break + } + log.Warningf("Error signaling containerd, attempt %v/%v: %v", i+1, maxReloadAttempts, err) + time.Sleep(reloadBackoff) + } + if err != nil { + log.Warningf("Max retries reached %v/%v, aborting", maxReloadAttempts, maxReloadAttempts) + return err + } + + log.Infof("Successfully signaled containerd") + + return nil +} diff --git a/tools/container/containerd/containerd_other.go b/tools/container/containerd/containerd_other.go new file mode 100644 index 00000000..1a22d023 --- /dev/null +++ b/tools/container/containerd/containerd_other.go @@ -0,0 +1,29 @@ +//go:build !linux +// +build !linux + +/** +# Copyright 2023 NVIDIA CORPORATION +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package main + +import ( + "errors" +) + +// SignalContainerd is unsupported on non-linux platforms. +func SignalContainerd(socket string) error { + return errors.New("SignalContainerd is unsupported on non-linux platforms") +} diff --git a/tools/container/docker/docker.go b/tools/container/docker/docker.go index c2dddb7e..889267d8 100644 --- a/tools/container/docker/docker.go +++ b/tools/container/docker/docker.go @@ -18,10 +18,7 @@ package main import ( "fmt" - "net" "os" - "syscall" - "time" "github.com/NVIDIA/nvidia-container-toolkit/internal/info" "github.com/NVIDIA/nvidia-container-toolkit/pkg/config/engine/docker" @@ -38,11 +35,6 @@ const ( defaultRuntimeName = "nvidia" defaultRestartMode = "signal" defaultHostRootMount = "/host" - - reloadBackoff = 5 * time.Second - maxReloadAttempts = 6 - - socketMessageToGetPID = "GET /info HTTP/1.0\r\n\r\n" ) // options stores the configuration from the command line or environment variables @@ -217,83 +209,3 @@ func Cleanup(c *cli.Context, o *options) error { func RestartDocker(o *options) error { return o.Restart("docker", SignalDocker) } - -// SignalDocker sends a SIGHUP signal to docker daemon -func SignalDocker(socket string) error { - log.Infof("Sending SIGHUP signal to docker") - - // Wrap the logic to perform the SIGHUP in a function so we can retry it on failure - retriable := func() error { - conn, err := net.Dial("unix", socket) - if err != nil { - return fmt.Errorf("unable to dial: %v", err) - } - defer conn.Close() - - sconn, err := conn.(*net.UnixConn).SyscallConn() - if err != nil { - return fmt.Errorf("unable to get syscall connection: %v", err) - } - - err1 := sconn.Control(func(fd uintptr) { - err = syscall.SetsockoptInt(int(fd), syscall.SOL_SOCKET, syscall.SO_PASSCRED, 1) - }) - if err1 != nil { - return fmt.Errorf("unable to issue call on socket fd: %v", err1) - } - if err != nil { - return fmt.Errorf("unable to SetsockoptInt on socket fd: %v", err) - } - - _, _, err = conn.(*net.UnixConn).WriteMsgUnix([]byte(socketMessageToGetPID), nil, nil) - if err != nil { - return fmt.Errorf("unable to WriteMsgUnix on socket fd: %v", err) - } - - oob := make([]byte, 1024) - _, oobn, _, _, err := conn.(*net.UnixConn).ReadMsgUnix(nil, oob) - if err != nil { - return fmt.Errorf("unable to ReadMsgUnix on socket fd: %v", err) - } - - oob = oob[:oobn] - scm, err := syscall.ParseSocketControlMessage(oob) - if err != nil { - return fmt.Errorf("unable to ParseSocketControlMessage from message received on socket fd: %v", err) - } - - ucred, err := syscall.ParseUnixCredentials(&scm[0]) - if err != nil { - return fmt.Errorf("unable to ParseUnixCredentials from message received on socket fd: %v", err) - } - - err = syscall.Kill(int(ucred.Pid), syscall.SIGHUP) - if err != nil { - return fmt.Errorf("unable to send SIGHUP to 'docker' process: %v", err) - } - - return nil - } - - // Try to send a SIGHUP up to maxReloadAttempts times - var err error - for i := 0; i < maxReloadAttempts; i++ { - err = retriable() - if err == nil { - break - } - if i == maxReloadAttempts-1 { - break - } - log.Warningf("Error signaling docker, attempt %v/%v: %v", i+1, maxReloadAttempts, err) - time.Sleep(reloadBackoff) - } - if err != nil { - log.Warningf("Max retries reached %v/%v, aborting", maxReloadAttempts, maxReloadAttempts) - return err - } - - log.Infof("Successfully signaled docker") - - return nil -} diff --git a/tools/container/docker/docker_linux.go b/tools/container/docker/docker_linux.go new file mode 100644 index 00000000..a4db4aae --- /dev/null +++ b/tools/container/docker/docker_linux.go @@ -0,0 +1,113 @@ +/** +# Copyright 2021-2023 NVIDIA CORPORATION +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package main + +import ( + "fmt" + "net" + "syscall" + "time" + + log "github.com/sirupsen/logrus" +) + +const ( + reloadBackoff = 5 * time.Second + maxReloadAttempts = 6 + + socketMessageToGetPID = "GET /info HTTP/1.0\r\n\r\n" +) + +// SignalDocker sends a SIGHUP signal to docker daemon +func SignalDocker(socket string) error { + log.Infof("Sending SIGHUP signal to docker") + + // Wrap the logic to perform the SIGHUP in a function so we can retry it on failure + retriable := func() error { + conn, err := net.Dial("unix", socket) + if err != nil { + return fmt.Errorf("unable to dial: %v", err) + } + defer conn.Close() + + sconn, err := conn.(*net.UnixConn).SyscallConn() + if err != nil { + return fmt.Errorf("unable to get syscall connection: %v", err) + } + + err1 := sconn.Control(func(fd uintptr) { + err = syscall.SetsockoptInt(int(fd), syscall.SOL_SOCKET, syscall.SO_PASSCRED, 1) + }) + if err1 != nil { + return fmt.Errorf("unable to issue call on socket fd: %v", err1) + } + if err != nil { + return fmt.Errorf("unable to SetsockoptInt on socket fd: %v", err) + } + + _, _, err = conn.(*net.UnixConn).WriteMsgUnix([]byte(socketMessageToGetPID), nil, nil) + if err != nil { + return fmt.Errorf("unable to WriteMsgUnix on socket fd: %v", err) + } + + oob := make([]byte, 1024) + _, oobn, _, _, err := conn.(*net.UnixConn).ReadMsgUnix(nil, oob) + if err != nil { + return fmt.Errorf("unable to ReadMsgUnix on socket fd: %v", err) + } + + oob = oob[:oobn] + scm, err := syscall.ParseSocketControlMessage(oob) + if err != nil { + return fmt.Errorf("unable to ParseSocketControlMessage from message received on socket fd: %v", err) + } + + ucred, err := syscall.ParseUnixCredentials(&scm[0]) + if err != nil { + return fmt.Errorf("unable to ParseUnixCredentials from message received on socket fd: %v", err) + } + + err = syscall.Kill(int(ucred.Pid), syscall.SIGHUP) + if err != nil { + return fmt.Errorf("unable to send SIGHUP to 'docker' process: %v", err) + } + + return nil + } + + // Try to send a SIGHUP up to maxReloadAttempts times + var err error + for i := 0; i < maxReloadAttempts; i++ { + err = retriable() + if err == nil { + break + } + if i == maxReloadAttempts-1 { + break + } + log.Warningf("Error signaling docker, attempt %v/%v: %v", i+1, maxReloadAttempts, err) + time.Sleep(reloadBackoff) + } + if err != nil { + log.Warningf("Max retries reached %v/%v, aborting", maxReloadAttempts, maxReloadAttempts) + return err + } + + log.Infof("Successfully signaled docker") + + return nil +} diff --git a/tools/container/docker/docker_other.go b/tools/container/docker/docker_other.go new file mode 100644 index 00000000..5078d649 --- /dev/null +++ b/tools/container/docker/docker_other.go @@ -0,0 +1,29 @@ +//go:build !linux +// +build !linux + +/** +# Copyright 2023 NVIDIA CORPORATION +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package main + +import ( + "errors" +) + +// SignalDocker is unsupported on non-linux platforms. +func SignalDocker(socket string) error { + return errors.New("SignalDocker is unsupported on non-linux platforms") +}