mirror of
https://github.com/clearml/clearml
synced 2025-04-16 21:42:10 +00:00
Fix and improve signal handling (disable edit lock on signal callbacks, call before releasing the STD patch)
This commit is contained in:
parent
772fd7f750
commit
4628b5eb82
@ -1731,6 +1731,12 @@ class Task(IdObjectBase, AccessMixin, SetupUploadMixin):
|
|||||||
@property
|
@property
|
||||||
def _edit_lock(self):
|
def _edit_lock(self):
|
||||||
# type: () -> ()
|
# type: () -> ()
|
||||||
|
|
||||||
|
# skip the actual lock, this one-time lock will always enter
|
||||||
|
# only used on shutdown process to avoid deadlocks
|
||||||
|
if self.__edit_lock is False:
|
||||||
|
return RLock()
|
||||||
|
|
||||||
if self.__edit_lock:
|
if self.__edit_lock:
|
||||||
return self.__edit_lock
|
return self.__edit_lock
|
||||||
if not PROC_MASTER_ID_ENV_VAR.get() or len(PROC_MASTER_ID_ENV_VAR.get().split(':')) < 2:
|
if not PROC_MASTER_ID_ENV_VAR.get() or len(PROC_MASTER_ID_ENV_VAR.get().split(':')) < 2:
|
||||||
|
@ -2337,6 +2337,10 @@ class Task(_Task):
|
|||||||
# from here only a single thread can re-enter
|
# from here only a single thread can re-enter
|
||||||
self._at_exit_called = get_current_thread_id()
|
self._at_exit_called = get_current_thread_id()
|
||||||
|
|
||||||
|
# disable lock on signal callbacks, to avoid deadlocks.
|
||||||
|
if self.__exit_hook and self.__exit_hook.signal is not None:
|
||||||
|
self.__edit_lock = False
|
||||||
|
|
||||||
is_sub_process = self.__is_subprocess()
|
is_sub_process = self.__is_subprocess()
|
||||||
|
|
||||||
# noinspection PyBroadException
|
# noinspection PyBroadException
|
||||||
@ -2565,33 +2569,25 @@ class Task(_Task):
|
|||||||
return ret
|
return ret
|
||||||
|
|
||||||
def signal_handler(self, sig, frame):
|
def signal_handler(self, sig, frame):
|
||||||
|
self.signal = sig
|
||||||
|
|
||||||
|
org_handler = self._org_handlers.get(sig)
|
||||||
|
signal.signal(sig, org_handler or signal.SIG_DFL)
|
||||||
|
|
||||||
if self._signal_recursion_protection_flag:
|
if self._signal_recursion_protection_flag:
|
||||||
# call original
|
# call original
|
||||||
org_handler = self._org_handlers.get(sig)
|
os.kill(os.getpid(), sig)
|
||||||
if callable(org_handler):
|
return org_handler if not callable(org_handler) else signal.SIG_DFL
|
||||||
org_handler = org_handler(sig, frame)
|
|
||||||
else:
|
|
||||||
signal.signal(sig, org_handler or signal.SIG_DFL)
|
|
||||||
os.kill(os.getpid(), sig)
|
|
||||||
return org_handler
|
|
||||||
|
|
||||||
self._signal_recursion_protection_flag = True
|
self._signal_recursion_protection_flag = True
|
||||||
|
|
||||||
# call exit callback
|
# call exit callback
|
||||||
self.signal = sig
|
|
||||||
if self._exit_callback:
|
if self._exit_callback:
|
||||||
# noinspection PyBroadException
|
# noinspection PyBroadException
|
||||||
try:
|
try:
|
||||||
self._exit_callback()
|
self._exit_callback()
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
# call original signal handler
|
|
||||||
org_handler = self._org_handlers.get(sig)
|
|
||||||
self._org_handlers[sig] = None
|
|
||||||
if callable(org_handler):
|
|
||||||
ret = org_handler(sig, frame)
|
|
||||||
else:
|
|
||||||
signal.signal(sig, org_handler or signal.SIG_DFL)
|
|
||||||
ret = 0
|
|
||||||
|
|
||||||
# remove stdout logger, just in case
|
# remove stdout logger, just in case
|
||||||
# noinspection PyBroadException
|
# noinspection PyBroadException
|
||||||
@ -2601,13 +2597,11 @@ class Task(_Task):
|
|||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
if not callable(org_handler):
|
os.kill(os.getpid(), sig)
|
||||||
os.kill(os.getpid(), sig)
|
|
||||||
|
|
||||||
self._signal_recursion_protection_flag = False
|
self._signal_recursion_protection_flag = False
|
||||||
|
|
||||||
# return handler result
|
# return handler result
|
||||||
return ret
|
return org_handler if not callable(org_handler) else signal.SIG_DFL
|
||||||
|
|
||||||
# we only remove the signals since this will hang subprocesses
|
# we only remove the signals since this will hang subprocesses
|
||||||
if only_remove_signal_and_exception_hooks:
|
if only_remove_signal_and_exception_hooks:
|
||||||
|
Loading…
Reference in New Issue
Block a user