mirror of
https://github.com/clearml/clearml
synced 2025-02-12 15:45:25 +00:00
Improve detection of running reporting subprocess (including zombie state)
This commit is contained in:
parent
4416544ec5
commit
739a34929d
@ -333,6 +333,7 @@ class SingletonLock(AbstractContextManager):
|
|||||||
class BackgroundMonitor(object):
|
class BackgroundMonitor(object):
|
||||||
# If we will need multiple monitoring contexts (i.e. subprocesses) this will become a dict
|
# If we will need multiple monitoring contexts (i.e. subprocesses) this will become a dict
|
||||||
_main_process = None
|
_main_process = None
|
||||||
|
_main_process_proc_obj = None
|
||||||
_main_process_task_id = None
|
_main_process_task_id = None
|
||||||
_parent_pid = None
|
_parent_pid = None
|
||||||
_sub_process_started = None
|
_sub_process_started = None
|
||||||
@ -453,11 +454,20 @@ class BackgroundMonitor(object):
|
|||||||
if BackgroundMonitor._main_process == 0:
|
if BackgroundMonitor._main_process == 0:
|
||||||
# update to the child process pid
|
# update to the child process pid
|
||||||
BackgroundMonitor._main_process = os.getpid()
|
BackgroundMonitor._main_process = os.getpid()
|
||||||
|
BackgroundMonitor._main_process_proc_obj = psutil.Process(BackgroundMonitor._main_process)
|
||||||
cls._background_process_start(*process_args)
|
cls._background_process_start(*process_args)
|
||||||
# force to leave the subprocess
|
# force to leave the subprocess
|
||||||
leave_process(0)
|
leave_process(0)
|
||||||
return
|
return
|
||||||
|
|
||||||
|
# update main process object (we are now in the parent process, and we update on the child's subprocess pid)
|
||||||
|
# noinspection PyBroadException
|
||||||
|
try:
|
||||||
|
BackgroundMonitor._main_process_proc_obj = psutil.Process(BackgroundMonitor._main_process)
|
||||||
|
except Exception:
|
||||||
|
# if we fail for some reason, do not crash, switch to thread mode when you can
|
||||||
|
BackgroundMonitor._main_process_proc_obj = None
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def __start_subprocess_forkprocess(cls, task_obj_id):
|
def __start_subprocess_forkprocess(cls, task_obj_id):
|
||||||
_main_process = Process(
|
_main_process = Process(
|
||||||
@ -486,6 +496,7 @@ class BackgroundMonitor(object):
|
|||||||
continue
|
continue
|
||||||
raise
|
raise
|
||||||
BackgroundMonitor._main_process = _main_process.pid
|
BackgroundMonitor._main_process = _main_process.pid
|
||||||
|
BackgroundMonitor._main_process_proc_obj = psutil.Process(BackgroundMonitor._main_process)
|
||||||
if un_daemonize:
|
if un_daemonize:
|
||||||
# noinspection PyBroadException
|
# noinspection PyBroadException
|
||||||
try:
|
try:
|
||||||
@ -500,6 +511,7 @@ class BackgroundMonitor(object):
|
|||||||
is_debugger_running = bool(getattr(sys, 'gettrace', None) and sys.gettrace())
|
is_debugger_running = bool(getattr(sys, 'gettrace', None) and sys.gettrace())
|
||||||
# make sure we update the pid to our own
|
# make sure we update the pid to our own
|
||||||
cls._main_process = os.getpid()
|
cls._main_process = os.getpid()
|
||||||
|
cls._main_process_proc_obj = psutil.Process(cls._main_process)
|
||||||
# restore original signal, this will prevent any deadlocks
|
# restore original signal, this will prevent any deadlocks
|
||||||
# Do not change the exception we need to catch base exception as well
|
# Do not change the exception we need to catch base exception as well
|
||||||
# noinspection PyBroadException
|
# noinspection PyBroadException
|
||||||
@ -564,11 +576,14 @@ class BackgroundMonitor(object):
|
|||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _fast_is_subprocess_alive(cls):
|
def _fast_is_subprocess_alive(cls):
|
||||||
if not cls._main_process:
|
if not cls._main_process_proc_obj:
|
||||||
return False
|
return False
|
||||||
|
# we have to assume the process actually exists, so we optimize for
|
||||||
|
# just getting the object and status.
|
||||||
# noinspection PyBroadException
|
# noinspection PyBroadException
|
||||||
try:
|
try:
|
||||||
return psutil.pid_exists(cls._main_process)
|
return cls._main_process_proc_obj.is_running() and \
|
||||||
|
cls._main_process_proc_obj.status() != psutil.STATUS_ZOMBIE
|
||||||
except Exception:
|
except Exception:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
@ -615,6 +630,7 @@ class BackgroundMonitor(object):
|
|||||||
return
|
return
|
||||||
cls.wait_for_sub_process(task)
|
cls.wait_for_sub_process(task)
|
||||||
BackgroundMonitor._main_process = None
|
BackgroundMonitor._main_process = None
|
||||||
|
BackgroundMonitor._main_process_proc_obj = None
|
||||||
BackgroundMonitor._main_process_task_id = None
|
BackgroundMonitor._main_process_task_id = None
|
||||||
BackgroundMonitor._parent_pid = None
|
BackgroundMonitor._parent_pid = None
|
||||||
BackgroundMonitor._sub_process_started = None
|
BackgroundMonitor._sub_process_started = None
|
||||||
|
Loading…
Reference in New Issue
Block a user