mirror of
https://github.com/clearml/clearml
synced 2025-06-26 18:16:07 +00:00
Fix crash inside forked subprocess might leave SafeQueue in a locked state, causing task.close() to hang
This commit is contained in:
parent
12439b9970
commit
1795344ec3
@ -49,14 +49,18 @@ class ThreadCalls(object):
|
|||||||
self._queue.put((func, args))
|
self._queue.put((func, args))
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def close(self):
|
def close(self, timeout=5.):
|
||||||
if not self._thread:
|
|
||||||
return
|
|
||||||
t = self._thread
|
t = self._thread
|
||||||
|
if not t:
|
||||||
|
return
|
||||||
|
try:
|
||||||
# push something into queue so it knows this is the end
|
# push something into queue so it knows this is the end
|
||||||
self._queue.put(None)
|
self._queue.put(None)
|
||||||
# wait fot thread
|
# wait fot thread it should not take long, so we have a 5 second timeout
|
||||||
t.join()
|
# the background thread itself is doing nothing but push into a queue, so it should not take long
|
||||||
|
t.join(timeout=timeout)
|
||||||
|
except BaseException: # noqa
|
||||||
|
pass
|
||||||
# mark thread is done
|
# mark thread is done
|
||||||
self._thread = None
|
self._thread = None
|
||||||
|
|
||||||
@ -76,6 +80,7 @@ class ThreadCalls(object):
|
|||||||
request[0]()
|
request[0]()
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
self._thread = None
|
||||||
|
|
||||||
|
|
||||||
class SingletonThreadPool(object):
|
class SingletonThreadPool(object):
|
||||||
@ -110,8 +115,9 @@ class SafeQueue(object):
|
|||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
self._reader_thread = None
|
self._reader_thread = None
|
||||||
self._reader_thread_started = False
|
self._reader_thread_started = False
|
||||||
|
# Fix the python Queue and Use SimpleQueue write so it uses a single OS write,
|
||||||
|
# making it atomic message passing
|
||||||
self._q = SimpleQueue(*args, **kwargs)
|
self._q = SimpleQueue(*args, **kwargs)
|
||||||
# Fix the simple queue write so it uses a single OS write, making it atomic message passing
|
|
||||||
# noinspection PyBroadException
|
# noinspection PyBroadException
|
||||||
try:
|
try:
|
||||||
# noinspection PyUnresolvedReferences,PyProtectedMember
|
# noinspection PyUnresolvedReferences,PyProtectedMember
|
||||||
@ -129,17 +135,24 @@ class SafeQueue(object):
|
|||||||
# only call from main put process
|
# only call from main put process
|
||||||
return self._q_size > 0
|
return self._q_size > 0
|
||||||
|
|
||||||
def close(self, event, timeout=100.0):
|
def close(self, event, timeout=3.0):
|
||||||
# wait until all pending requests pushed
|
# wait until all pending requests pushed
|
||||||
tic = time()
|
tic = time()
|
||||||
|
prev_q_size = self._q_size
|
||||||
while self.is_pending():
|
while self.is_pending():
|
||||||
if event:
|
if event:
|
||||||
event.set()
|
event.set()
|
||||||
if not self.__thread_pool.is_active():
|
if not self.__thread_pool.is_active():
|
||||||
break
|
break
|
||||||
sleep(0.1)
|
sleep(0.1)
|
||||||
|
# timeout is for the maximum time to pull a single object from the queue,
|
||||||
|
# this way if we get stuck we notice quickly and abort
|
||||||
if timeout and (time()-tic) > timeout:
|
if timeout and (time()-tic) > timeout:
|
||||||
|
if prev_q_size == self._q_size:
|
||||||
break
|
break
|
||||||
|
else:
|
||||||
|
prev_q_size = self._q_size
|
||||||
|
tic = time()
|
||||||
|
|
||||||
def get(self, *args, **kwargs):
|
def get(self, *args, **kwargs):
|
||||||
return self._get_internal_queue(*args, **kwargs)
|
return self._get_internal_queue(*args, **kwargs)
|
||||||
@ -169,7 +182,12 @@ class SafeQueue(object):
|
|||||||
self.__thread_pool.get().apply_async(self._q_put, args=(obj, ))
|
self.__thread_pool.get().apply_async(self._q_put, args=(obj, ))
|
||||||
|
|
||||||
def _q_put(self, obj):
|
def _q_put(self, obj):
|
||||||
|
try:
|
||||||
self._q.put(obj)
|
self._q.put(obj)
|
||||||
|
except BaseException:
|
||||||
|
# make sure we zero the _q_size of the process dies (i.e. queue put fails)
|
||||||
|
self._q_size = 0
|
||||||
|
raise
|
||||||
# GIL will make sure it is atomic
|
# GIL will make sure it is atomic
|
||||||
self._q_size -= 1
|
self._q_size -= 1
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user