Fix multiprocess spawn context using ProcessFork kills subprocess before parent process ends

This commit is contained in:
allegroai 2021-06-12 23:10:26 +03:00
parent d769582332
commit e7de292c1c

View File

@ -7,6 +7,7 @@ from multiprocessing import Lock, Event as ProcessEvent
from threading import Thread, Event as TrEvent from threading import Thread, Event as TrEvent
from time import sleep, time from time import sleep, time
from typing import List, Dict, Optional from typing import List, Dict, Optional
from multiprocessing import Process, get_context
import psutil import psutil
from six.moves.queue import Empty, Queue as TrQueue from six.moves.queue import Empty, Queue as TrQueue
@ -19,9 +20,9 @@ except ImportError:
from multiprocessing.queues import SimpleQueue from multiprocessing.queues import SimpleQueue
try: try:
from multiprocessing.context import ForkProcess as Process # noqa from multiprocessing.context import ForkContext # noqa
except ImportError: except ImportError:
from multiprocessing import Process ForkContext = None
class ThreadCalls(object): class ThreadCalls(object):
@ -374,47 +375,72 @@ class BackgroundMonitor(object):
# setup # setup
for d in BackgroundMonitor._instances.get(id(task.id), []): for d in BackgroundMonitor._instances.get(id(task.id), []):
d.set_subprocess_mode() d.set_subprocess_mode()
# todo: solve for standalone spawn subprocess # todo: solve for standalone spawn subprocess
BackgroundMonitor._main_process = Process( if ForkContext is not None and isinstance(get_context(), ForkContext):
target=cls._background_process_start, cls.__start_subprocess_forkprocess(task_obj_id=id(task.id))
args=(id(task.id), cls._sub_process_started) else:
) cls.__start_subprocess_os_fork(task_obj_id=id(task.id))
BackgroundMonitor._main_process.daemon = True
# Hack allow to create daemon subprocesses (even though python doesn't like it)
un_daemonize = False
# noinspection PyBroadException
try:
from multiprocessing import current_process
if current_process()._config.get('daemon'): # noqa
un_daemonize = current_process()._config.get('daemon') # noqa
current_process()._config['daemon'] = False # noqa
except BaseException:
pass
# try to start the background process, if we fail retry again, or crash
for i in range(4):
try:
BackgroundMonitor._main_process.start()
break
except BaseException:
if i < 3:
sleep(1)
continue
raise
if un_daemonize:
# noinspection PyBroadException
try:
from multiprocessing import current_process
current_process()._config['daemon'] = un_daemonize # noqa
except BaseException:
pass
# wait until subprocess is up # wait until subprocess is up
if wait_for_subprocess: if wait_for_subprocess:
cls._sub_process_started.wait() cls._sub_process_started.wait()
@classmethod @classmethod
def _background_process_start(cls, task_obj_id, event_start=None): def __start_subprocess_os_fork(cls, task_obj_id):
# type: (int, Optional[SafeEvent]) -> None process_args = (task_obj_id, cls._sub_process_started, os.getpid())
BackgroundMonitor._main_process = os.fork()
# check if we are the child process
if BackgroundMonitor._main_process == 0:
# update to the child process pid
BackgroundMonitor._main_process = os.getpid()
cls._background_process_start(*process_args)
# force to leave the subprocess
leave_process(0)
return
@classmethod
def __start_subprocess_forkprocess(cls, task_obj_id):
_main_process = Process(
target=cls._background_process_start,
args=(task_obj_id, cls._sub_process_started, os.getpid())
)
_main_process.daemon = True
# Hack allow to create daemon subprocesses (even though python doesn't like it)
un_daemonize = False
# noinspection PyBroadException
try:
from multiprocessing import current_process
if current_process()._config.get('daemon'): # noqa
un_daemonize = current_process()._config.get('daemon') # noqa
current_process()._config['daemon'] = False # noqa
except BaseException:
pass
# try to start the background process, if we fail retry again, or crash
for i in range(4):
try:
_main_process.start()
break
except BaseException:
if i < 3:
sleep(1)
continue
raise
BackgroundMonitor._main_process = _main_process.pid
if un_daemonize:
# noinspection PyBroadException
try:
from multiprocessing import current_process
current_process()._config['daemon'] = un_daemonize # noqa
except BaseException:
pass
@classmethod
def _background_process_start(cls, task_obj_id, event_start=None, parent_pid=None):
# type: (int, Optional[SafeEvent], Optional[int]) -> None
is_debugger_running = bool(getattr(sys, 'gettrace', None) and sys.gettrace()) is_debugger_running = bool(getattr(sys, 'gettrace', None) and sys.gettrace())
# make sure we update the pid to our own
cls._main_process = os.getpid()
# restore original signal, this will prevent any deadlocks # restore original signal, this will prevent any deadlocks
# Do not change the exception we need to catch base exception as well # Do not change the exception we need to catch base exception as well
# noinspection PyBroadException # noinspection PyBroadException
@ -445,16 +471,26 @@ class BackgroundMonitor(object):
# wait until we are signaled # wait until we are signaled
for i in instances: for i in instances:
# noinspection PyBroadException # DO NOT CHANGE, we need to catch base exception, if the process gte's killed
try: try:
if i._thread and i._thread.is_alive(): while i._thread and i._thread.is_alive():
# DO Not change, we need to catch base exception, if the process gte's killed # noinspection PyBroadException
try: try:
i._thread.join() p = psutil.Process(parent_pid)
parent_alive = p.is_running() and p.status() != psutil.STATUS_ZOMBIE
except Exception:
parent_alive = False
# if parent process is not here we should just leave!
if not parent_alive:
return
# DO NOT CHANGE, we need to catch base exception, if the process gte's killed
try:
# timeout so we can detect if the parent process got killed.
i._thread.join(timeout=30.)
except: # noqa except: # noqa
break break
else:
pass
except: # noqa except: # noqa
pass pass
# we are done, leave process # we are done, leave process
@ -473,11 +509,10 @@ class BackgroundMonitor(object):
return False return False
# noinspection PyBroadException # noinspection PyBroadException
try: try:
return \ p = psutil.Process(cls._main_process)
cls._main_process.is_alive() and \ return p.is_running() and p.status() != psutil.STATUS_ZOMBIE
psutil.Process(cls._main_process.pid).status() != psutil.STATUS_ZOMBIE
except Exception: except Exception:
current_pid = cls._main_process.pid current_pid = cls._main_process
if not current_pid: if not current_pid:
return False return False
try: try:
@ -488,7 +523,7 @@ class BackgroundMonitor(object):
for child in parent.children(recursive=True): for child in parent.children(recursive=True):
# kill ourselves last (if we need to) # kill ourselves last (if we need to)
if child.pid == current_pid: if child.pid == current_pid:
return child.status() != psutil.STATUS_ZOMBIE return child.is_running() and child.status() != psutil.STATUS_ZOMBIE
return False return False
def is_subprocess(self): def is_subprocess(self):