mirror of
https://github.com/clearml/clearml
synced 2025-02-02 01:54:23 +00:00
Fix Hydra tasks never fail and are only set to completed (fix handling return code)
This commit is contained in:
parent
11242d4029
commit
81de18dbce
@ -90,6 +90,17 @@ class PatchHydra(object):
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _patched_run_job(config, task_function, *args, **kwargs):
|
def _patched_run_job(config, task_function, *args, **kwargs):
|
||||||
|
# noinspection PyBroadException
|
||||||
|
try:
|
||||||
|
from hydra.core.utils import JobStatus
|
||||||
|
|
||||||
|
failed_status = JobStatus.FAILED
|
||||||
|
except Exception:
|
||||||
|
LoggerRoot.get_base_logger(PatchHydra).warning(
|
||||||
|
"Could not import JobStatus from Hydra. Failed tasks will be marked as completed"
|
||||||
|
)
|
||||||
|
failed_status = None
|
||||||
|
|
||||||
# store the config
|
# store the config
|
||||||
# noinspection PyBroadException
|
# noinspection PyBroadException
|
||||||
try:
|
try:
|
||||||
@ -121,10 +132,23 @@ class PatchHydra(object):
|
|||||||
kwargs["config"] = config
|
kwargs["config"] = config
|
||||||
kwargs["task_function"] = partial(PatchHydra._patched_task_function, task_function,)
|
kwargs["task_function"] = partial(PatchHydra._patched_task_function, task_function,)
|
||||||
result = PatchHydra._original_run_job(*args, **kwargs)
|
result = PatchHydra._original_run_job(*args, **kwargs)
|
||||||
|
# noinspection PyBroadException
|
||||||
|
try:
|
||||||
|
result_status = result.status
|
||||||
|
except Exception:
|
||||||
|
LoggerRoot.get_base_logger(PatchHydra).warning(
|
||||||
|
"Could not get Hydra job status. Failed tasks will be marked as completed"
|
||||||
|
)
|
||||||
|
result_status = None
|
||||||
|
|
||||||
# if we have Task.init called inside the App, we close it after the app is done.
|
# if we have Task.init called inside the App, we close it after the app is done.
|
||||||
# This will make sure that hydra run will create multiple Tasks
|
# This will make sure that hydra run will create multiple Tasks
|
||||||
if not running_remotely() and not pre_app_task_init_call and PatchHydra._current_task:
|
if (
|
||||||
|
not running_remotely()
|
||||||
|
and not pre_app_task_init_call
|
||||||
|
and PatchHydra._current_task
|
||||||
|
and (failed_status is None or result_status is None or result_status != failed_status)
|
||||||
|
):
|
||||||
PatchHydra._current_task.close()
|
PatchHydra._current_task.close()
|
||||||
# make sure we do not reuse the Task if we have a multi-run session
|
# make sure we do not reuse the Task if we have a multi-run session
|
||||||
DEV_TASK_NO_REUSE.set(True)
|
DEV_TASK_NO_REUSE.set(True)
|
||||||
|
@ -3245,11 +3245,11 @@ class Task(_Task):
|
|||||||
is_sub_process = self.__is_subprocess()
|
is_sub_process = self.__is_subprocess()
|
||||||
|
|
||||||
# noinspection PyBroadException
|
# noinspection PyBroadException
|
||||||
|
task_status = None
|
||||||
try:
|
try:
|
||||||
wait_for_uploads = True
|
wait_for_uploads = True
|
||||||
# first thing mark task as stopped, so we will not end up with "running" on lost tasks
|
# first thing mark task as stopped, so we will not end up with "running" on lost tasks
|
||||||
# if we are running remotely, the daemon will take care of it
|
# if we are running remotely, the daemon will take care of it
|
||||||
task_status = None
|
|
||||||
wait_for_std_log = True
|
wait_for_std_log = True
|
||||||
if (not running_remotely() or DEBUG_SIMULATE_REMOTE_TASK.get()) \
|
if (not running_remotely() or DEBUG_SIMULATE_REMOTE_TASK.get()) \
|
||||||
and self.is_main_task() and not is_sub_process:
|
and self.is_main_task() and not is_sub_process:
|
||||||
@ -3273,7 +3273,7 @@ class Task(_Task):
|
|||||||
if (is_exception and not isinstance(is_exception, KeyboardInterrupt)
|
if (is_exception and not isinstance(is_exception, KeyboardInterrupt)
|
||||||
and is_exception != KeyboardInterrupt) \
|
and is_exception != KeyboardInterrupt) \
|
||||||
or (not self.__exit_hook.remote_user_aborted and
|
or (not self.__exit_hook.remote_user_aborted and
|
||||||
self.__exit_hook.signal not in (None, 2, 15)):
|
(self.__exit_hook.signal not in (None, 2, 15) or self.__exit_hook.exit_code)):
|
||||||
task_status = (
|
task_status = (
|
||||||
'failed',
|
'failed',
|
||||||
'Exception {}'.format(is_exception) if is_exception else
|
'Exception {}'.format(is_exception) if is_exception else
|
||||||
@ -3391,7 +3391,8 @@ class Task(_Task):
|
|||||||
pass
|
pass
|
||||||
self._edit_lock = None
|
self._edit_lock = None
|
||||||
|
|
||||||
self.set_progress(100)
|
if task_status and task_status[0] == "completed":
|
||||||
|
self.set_progress(100)
|
||||||
|
|
||||||
# make sure no one will re-enter the shutdown method
|
# make sure no one will re-enter the shutdown method
|
||||||
self._at_exit_called = True
|
self._at_exit_called = True
|
||||||
|
Loading…
Reference in New Issue
Block a user