mirror of
https://github.com/clearml/clearml-agent
synced 2025-03-09 21:30:22 +00:00
Add daemon detached mode (--detached, -d) that runs agent in the background and returns immediately
This commit is contained in:
parent
5ef627165c
commit
b3418e4496
@ -66,6 +66,7 @@ from trains_agent.helper.base import (
|
||||
rm_file,
|
||||
add_python_path)
|
||||
from trains_agent.helper.console import ensure_text, print_text, decode_binary_lines
|
||||
from trains_agent.helper.os.daemonize import daemonize_process
|
||||
from trains_agent.helper.package.base import PackageManager
|
||||
from trains_agent.helper.package.conda_api import CondaAPI
|
||||
from trains_agent.helper.package.horovod_req import HorovodRequirement
|
||||
@ -626,7 +627,7 @@ class Worker(ServiceCommandSection):
|
||||
self._session.print_configuration()
|
||||
|
||||
@resolve_names
|
||||
def daemon(self, queues, log_level, foreground=False, docker=False, **kwargs):
|
||||
def daemon(self, queues, log_level, foreground=False, docker=False, detached=False, **kwargs):
|
||||
# make sure we only have a single instance,
|
||||
# also make sure we set worker_id properly and cache folders
|
||||
self._singleton()
|
||||
@ -686,7 +687,18 @@ class Worker(ServiceCommandSection):
|
||||
name
|
||||
)
|
||||
)
|
||||
sys.stdout = sys.stderr = out_file
|
||||
|
||||
if not detached:
|
||||
# redirect std out/err to new file
|
||||
sys.stdout = sys.stderr = out_file
|
||||
else:
|
||||
# in detached mode
|
||||
# fully detach stdin.stdout/stderr and leave main process, running in the background
|
||||
daemonize_process(out_file.fileno())
|
||||
# reprint headers to std file (we are now inside the daemon process)
|
||||
print("Worker \"{}\" :".format(self.worker_id))
|
||||
self._session.print_configuration()
|
||||
print_table(queues_info, columns=columns, titles=columns)
|
||||
|
||||
try:
|
||||
while True:
|
||||
|
0
trains_agent/helper/os/__init__.py
Normal file
0
trains_agent/helper/os/__init__.py
Normal file
74
trains_agent/helper/os/daemonize.py
Normal file
74
trains_agent/helper/os/daemonize.py
Normal file
@ -0,0 +1,74 @@
|
||||
import os
|
||||
|
||||
|
||||
def daemonize_process(redirect_fd=None):
|
||||
"""
|
||||
Detach a process from the controlling terminal and run it in the background as a daemon.
|
||||
"""
|
||||
assert redirect_fd is None or isinstance(redirect_fd, int)
|
||||
|
||||
# re-spawn in the same directory
|
||||
WORKDIR = os.getcwd()
|
||||
|
||||
# The standard I/O file descriptors are redirected to /dev/null by default.
|
||||
if hasattr(os, "devnull"):
|
||||
devnull = os.devnull
|
||||
else:
|
||||
devnull = "/dev/null"
|
||||
|
||||
try:
|
||||
# Fork a child process so the parent can exit. This returns control to
|
||||
# the command-line or shell. It also guarantees that the child will not
|
||||
# be a process group leader, since the child receives a new process ID
|
||||
# and inherits the parent's process group ID. This step is required
|
||||
# to insure that the next call to os.setsid is successful.
|
||||
pid = os.fork()
|
||||
except OSError as e:
|
||||
raise Exception("%s [%d]" % (e.strerror, e.errno))
|
||||
|
||||
if pid == 0: # The first child.
|
||||
# To become the session leader of this new session and the process group
|
||||
# leader of the new process group, we call os.setsid().
|
||||
# The process is also guaranteed not to have a controlling terminal.
|
||||
os.setsid()
|
||||
|
||||
# Is ignoring SIGHUP necessary? (Set handlers for asynchronous events.)
|
||||
# import signal
|
||||
# signal.signal(signal.SIGHUP, signal.SIG_IGN)
|
||||
|
||||
try:
|
||||
# Fork a second child and exit immediately to prevent zombies. This
|
||||
# causes the second child process to be orphaned, making the init
|
||||
# process responsible for its cleanup.
|
||||
pid = os.fork() # Fork a second child.
|
||||
except OSError as e:
|
||||
raise Exception("%s [%d]" % (e.strerror, e.errno))
|
||||
|
||||
if pid == 0: # The second child.
|
||||
# Since the current working directory may be a mounted filesystem, we
|
||||
# avoid the issue of not being able to unmount the filesystem at
|
||||
# shutdown time by changing it to the root directory.
|
||||
os.chdir(WORKDIR)
|
||||
# We probably don't want the file mode creation mask inherited from
|
||||
# the parent, so we give the child complete control over permissions.
|
||||
os.umask(0)
|
||||
else:
|
||||
# Exit parent (the first child) of the second child.
|
||||
os._exit(0)
|
||||
else:
|
||||
# Exit parent of the first child.
|
||||
os._exit(0)
|
||||
|
||||
# notice we count on the fact that we keep all file descriptors open,
|
||||
# since we opened then in the parent process, but the daemon process will use them
|
||||
|
||||
# Redirect the standard I/O file descriptors to the specified file /dev/null.
|
||||
if redirect_fd is None:
|
||||
redirect_fd = os.open(devnull, os.O_RDWR)
|
||||
|
||||
# Duplicate standard input to standard output and standard error.
|
||||
# standard output (1), standard error (2)
|
||||
os.dup2(redirect_fd, 1)
|
||||
os.dup2(redirect_fd, 2)
|
||||
|
||||
return 0
|
@ -35,6 +35,7 @@ def get_parser():
|
||||
for group_name, group in groups:
|
||||
p = parser if not group_name else parser.add_argument_group(group_name)
|
||||
for key, value in group:
|
||||
p.add_argument(key, **value)
|
||||
aliases = value.pop("aliases", [])
|
||||
p.add_argument(key, *aliases, **value)
|
||||
|
||||
return top_parser
|
||||
|
@ -72,6 +72,11 @@ DAEMON_ARGS = dict({
|
||||
'help': 'Do not use any network connects, assume everything is pre-installed',
|
||||
'action': 'store_true',
|
||||
},
|
||||
'--detached': {
|
||||
'help': 'Detached mode, run agent in the background',
|
||||
'action': 'store_true',
|
||||
'aliases': ['-d'],
|
||||
},
|
||||
|
||||
}, **WORKER_ARGS)
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user