Fix dataset upload aborted on server watchdog

This commit is contained in:
allegroai 2021-01-10 13:03:40 +02:00
parent 14e38e8a46
commit ce5fd31070
2 changed files with 15 additions and 2 deletions

View File

@ -30,10 +30,10 @@ class DevWorker(object):
return False
return True
def register(self, task):
def register(self, task, stop_signal_support=None):
if self._thread:
return True
if TaskStopSignal.enabled:
if (stop_signal_support is None and TaskStopSignal.enabled) or stop_signal_support is True:
self._dev_stop_signal = TaskStopSignal(task=task)
self._support_ping = hasattr(tasks, 'PingRequest')
# if there is nothing to monitor, leave

View File

@ -15,6 +15,7 @@ from pathlib2 import Path
from .. import Task, StorageManager
from ..backend_api.session.client import APIClient
from ..backend_interface.task.development.worker import DevWorker
from ..backend_interface.util import mutually_exclusive, exact_match_regex
from ..debugging.log import LoggerRoot
from ..storage.helper import StorageHelper
@ -83,6 +84,13 @@ class Dataset(object):
# noinspection PyProtectedMember
task._edit(script=task.data.script)
# if the task is running make sure we ping to the server so it will not be aborted by a watchdog
if task.status in ('created', 'in_progress'):
self._task_pinger = DevWorker()
self._task_pinger.register(task, stop_signal_support=False)
else:
self._task_pinger = None
# store current dataset Task
self._task = task
# store current dataset id
@ -389,6 +397,11 @@ class Dataset(object):
self._task.comment = 'Dependencies: {}\n'.format(hashed_nodes)
self._task.close()
self._task.completed()
if self._task_pinger:
self._task_pinger.unregister()
self._task_pinger = None
return True
def is_final(self):