From b3418e4496bcba9926190c90fd5a604cfcfe4e63 Mon Sep 17 00:00:00 2001 From: allegroai <> Date: Sun, 22 Mar 2020 19:00:29 +0200 Subject: [PATCH] Add daemon detached mode (--detached, -d) that runs agent in the background and returns immediately --- trains_agent/commands/worker.py | 16 ++++++- trains_agent/helper/os/__init__.py | 0 trains_agent/helper/os/daemonize.py | 74 +++++++++++++++++++++++++++++ trains_agent/interface/__init__.py | 3 +- trains_agent/interface/worker.py | 5 ++ 5 files changed, 95 insertions(+), 3 deletions(-) create mode 100644 trains_agent/helper/os/__init__.py create mode 100644 trains_agent/helper/os/daemonize.py diff --git a/trains_agent/commands/worker.py b/trains_agent/commands/worker.py index 818fc2a..524d79e 100644 --- a/trains_agent/commands/worker.py +++ b/trains_agent/commands/worker.py @@ -66,6 +66,7 @@ from trains_agent.helper.base import ( rm_file, add_python_path) from trains_agent.helper.console import ensure_text, print_text, decode_binary_lines +from trains_agent.helper.os.daemonize import daemonize_process from trains_agent.helper.package.base import PackageManager from trains_agent.helper.package.conda_api import CondaAPI from trains_agent.helper.package.horovod_req import HorovodRequirement @@ -626,7 +627,7 @@ class Worker(ServiceCommandSection): self._session.print_configuration() @resolve_names - def daemon(self, queues, log_level, foreground=False, docker=False, **kwargs): + def daemon(self, queues, log_level, foreground=False, docker=False, detached=False, **kwargs): # make sure we only have a single instance, # also make sure we set worker_id properly and cache folders self._singleton() @@ -686,7 +687,18 @@ class Worker(ServiceCommandSection): name ) ) - sys.stdout = sys.stderr = out_file + + if not detached: + # redirect std out/err to new file + sys.stdout = sys.stderr = out_file + else: + # in detached mode + # fully detach stdin.stdout/stderr and leave main process, running in the background + daemonize_process(out_file.fileno()) + # reprint headers to std file (we are now inside the daemon process) + print("Worker \"{}\" :".format(self.worker_id)) + self._session.print_configuration() + print_table(queues_info, columns=columns, titles=columns) try: while True: diff --git a/trains_agent/helper/os/__init__.py b/trains_agent/helper/os/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/trains_agent/helper/os/daemonize.py b/trains_agent/helper/os/daemonize.py new file mode 100644 index 0000000..085ca40 --- /dev/null +++ b/trains_agent/helper/os/daemonize.py @@ -0,0 +1,74 @@ +import os + + +def daemonize_process(redirect_fd=None): + """ + Detach a process from the controlling terminal and run it in the background as a daemon. + """ + assert redirect_fd is None or isinstance(redirect_fd, int) + + # re-spawn in the same directory + WORKDIR = os.getcwd() + + # The standard I/O file descriptors are redirected to /dev/null by default. + if hasattr(os, "devnull"): + devnull = os.devnull + else: + devnull = "/dev/null" + + try: + # Fork a child process so the parent can exit. This returns control to + # the command-line or shell. It also guarantees that the child will not + # be a process group leader, since the child receives a new process ID + # and inherits the parent's process group ID. This step is required + # to insure that the next call to os.setsid is successful. + pid = os.fork() + except OSError as e: + raise Exception("%s [%d]" % (e.strerror, e.errno)) + + if pid == 0: # The first child. + # To become the session leader of this new session and the process group + # leader of the new process group, we call os.setsid(). + # The process is also guaranteed not to have a controlling terminal. + os.setsid() + + # Is ignoring SIGHUP necessary? (Set handlers for asynchronous events.) + # import signal + # signal.signal(signal.SIGHUP, signal.SIG_IGN) + + try: + # Fork a second child and exit immediately to prevent zombies. This + # causes the second child process to be orphaned, making the init + # process responsible for its cleanup. + pid = os.fork() # Fork a second child. + except OSError as e: + raise Exception("%s [%d]" % (e.strerror, e.errno)) + + if pid == 0: # The second child. + # Since the current working directory may be a mounted filesystem, we + # avoid the issue of not being able to unmount the filesystem at + # shutdown time by changing it to the root directory. + os.chdir(WORKDIR) + # We probably don't want the file mode creation mask inherited from + # the parent, so we give the child complete control over permissions. + os.umask(0) + else: + # Exit parent (the first child) of the second child. + os._exit(0) + else: + # Exit parent of the first child. + os._exit(0) + + # notice we count on the fact that we keep all file descriptors open, + # since we opened then in the parent process, but the daemon process will use them + + # Redirect the standard I/O file descriptors to the specified file /dev/null. + if redirect_fd is None: + redirect_fd = os.open(devnull, os.O_RDWR) + + # Duplicate standard input to standard output and standard error. + # standard output (1), standard error (2) + os.dup2(redirect_fd, 1) + os.dup2(redirect_fd, 2) + + return 0 diff --git a/trains_agent/interface/__init__.py b/trains_agent/interface/__init__.py index 1242143..3e3bd9a 100644 --- a/trains_agent/interface/__init__.py +++ b/trains_agent/interface/__init__.py @@ -35,6 +35,7 @@ def get_parser(): for group_name, group in groups: p = parser if not group_name else parser.add_argument_group(group_name) for key, value in group: - p.add_argument(key, **value) + aliases = value.pop("aliases", []) + p.add_argument(key, *aliases, **value) return top_parser diff --git a/trains_agent/interface/worker.py b/trains_agent/interface/worker.py index d654435..4af6285 100644 --- a/trains_agent/interface/worker.py +++ b/trains_agent/interface/worker.py @@ -72,6 +72,11 @@ DAEMON_ARGS = dict({ 'help': 'Do not use any network connects, assume everything is pre-installed', 'action': 'store_true', }, + '--detached': { + 'help': 'Detached mode, run agent in the background', + 'action': 'store_true', + 'aliases': ['-d'], + }, }, **WORKER_ARGS)