Add daemon detached mode (--detached, -d) that runs agent in the background and returns immediately

This commit is contained in:
allegroai 2020-03-22 19:00:29 +02:00
parent 5ef627165c
commit b3418e4496
5 changed files with 95 additions and 3 deletions

View File

@ -66,6 +66,7 @@ from trains_agent.helper.base import (
rm_file,
add_python_path)
from trains_agent.helper.console import ensure_text, print_text, decode_binary_lines
from trains_agent.helper.os.daemonize import daemonize_process
from trains_agent.helper.package.base import PackageManager
from trains_agent.helper.package.conda_api import CondaAPI
from trains_agent.helper.package.horovod_req import HorovodRequirement
@ -626,7 +627,7 @@ class Worker(ServiceCommandSection):
self._session.print_configuration()
@resolve_names
def daemon(self, queues, log_level, foreground=False, docker=False, **kwargs):
def daemon(self, queues, log_level, foreground=False, docker=False, detached=False, **kwargs):
# make sure we only have a single instance,
# also make sure we set worker_id properly and cache folders
self._singleton()
@ -686,7 +687,18 @@ class Worker(ServiceCommandSection):
name
)
)
sys.stdout = sys.stderr = out_file
if not detached:
# redirect std out/err to new file
sys.stdout = sys.stderr = out_file
else:
# in detached mode
# fully detach stdin.stdout/stderr and leave main process, running in the background
daemonize_process(out_file.fileno())
# reprint headers to std file (we are now inside the daemon process)
print("Worker \"{}\" :".format(self.worker_id))
self._session.print_configuration()
print_table(queues_info, columns=columns, titles=columns)
try:
while True:

View File

View File

@ -0,0 +1,74 @@
import os
def daemonize_process(redirect_fd=None):
"""
Detach a process from the controlling terminal and run it in the background as a daemon.
"""
assert redirect_fd is None or isinstance(redirect_fd, int)
# re-spawn in the same directory
WORKDIR = os.getcwd()
# The standard I/O file descriptors are redirected to /dev/null by default.
if hasattr(os, "devnull"):
devnull = os.devnull
else:
devnull = "/dev/null"
try:
# Fork a child process so the parent can exit. This returns control to
# the command-line or shell. It also guarantees that the child will not
# be a process group leader, since the child receives a new process ID
# and inherits the parent's process group ID. This step is required
# to insure that the next call to os.setsid is successful.
pid = os.fork()
except OSError as e:
raise Exception("%s [%d]" % (e.strerror, e.errno))
if pid == 0: # The first child.
# To become the session leader of this new session and the process group
# leader of the new process group, we call os.setsid().
# The process is also guaranteed not to have a controlling terminal.
os.setsid()
# Is ignoring SIGHUP necessary? (Set handlers for asynchronous events.)
# import signal
# signal.signal(signal.SIGHUP, signal.SIG_IGN)
try:
# Fork a second child and exit immediately to prevent zombies. This
# causes the second child process to be orphaned, making the init
# process responsible for its cleanup.
pid = os.fork() # Fork a second child.
except OSError as e:
raise Exception("%s [%d]" % (e.strerror, e.errno))
if pid == 0: # The second child.
# Since the current working directory may be a mounted filesystem, we
# avoid the issue of not being able to unmount the filesystem at
# shutdown time by changing it to the root directory.
os.chdir(WORKDIR)
# We probably don't want the file mode creation mask inherited from
# the parent, so we give the child complete control over permissions.
os.umask(0)
else:
# Exit parent (the first child) of the second child.
os._exit(0)
else:
# Exit parent of the first child.
os._exit(0)
# notice we count on the fact that we keep all file descriptors open,
# since we opened then in the parent process, but the daemon process will use them
# Redirect the standard I/O file descriptors to the specified file /dev/null.
if redirect_fd is None:
redirect_fd = os.open(devnull, os.O_RDWR)
# Duplicate standard input to standard output and standard error.
# standard output (1), standard error (2)
os.dup2(redirect_fd, 1)
os.dup2(redirect_fd, 2)
return 0

View File

@ -35,6 +35,7 @@ def get_parser():
for group_name, group in groups:
p = parser if not group_name else parser.add_argument_group(group_name)
for key, value in group:
p.add_argument(key, **value)
aliases = value.pop("aliases", [])
p.add_argument(key, *aliases, **value)
return top_parser

View File

@ -72,6 +72,11 @@ DAEMON_ARGS = dict({
'help': 'Do not use any network connects, assume everything is pre-installed',
'action': 'store_true',
},
'--detached': {
'help': 'Detached mode, run agent in the background',
'action': 'store_true',
'aliases': ['-d'],
},
}, **WORKER_ARGS)