Fix only set Task started status on node rank 0

This commit is contained in:
allegroai 2024-08-27 22:52:31 +03:00
parent b21665ed6e
commit a4d3b5bad6

View File

@ -940,12 +940,14 @@ class Worker(ServiceCommandSection):
print("Warning: failed obtaining/setting hostname for task '{}': {}".format(task_id, ex)) print("Warning: failed obtaining/setting hostname for task '{}': {}".format(task_id, ex))
# set task status to in_progress so we know it was popped from the queue # set task status to in_progress so we know it was popped from the queue
# noinspection PyBroadException if not self._get_node_rank():
try: # noinspection PyBroadException
task_session.send_api(tasks_api.StartedRequest(task=task_id, status_message="launch by agent", force=True)) try:
except Exception: task_session.send_api(tasks_api.StartedRequest(task=task_id, status_message="launch by agent", force=True))
print("Warning: Could not set status=in_progress task id '{}', skipping".format(task_id)) except Exception:
return print("Warning: Could not set status=in_progress task id '{}', skipping".format(task_id))
return
# setup console log # setup console log
temp_stdout_name = safe_mkstemp( temp_stdout_name = safe_mkstemp(
suffix=".txt", prefix=".clearml_agent_out.", name_only=True, dir=(ENV_TEMP_STDOUT_FILE_DIR.get() or None) suffix=".txt", prefix=".clearml_agent_out.", name_only=True, dir=(ENV_TEMP_STDOUT_FILE_DIR.get() or None)
@ -1336,12 +1338,13 @@ class Worker(ServiceCommandSection):
# set task status to in_progress so we know it was popped from the queue # set task status to in_progress so we know it was popped from the queue
# next api version we will set the status when pulling from the queue # next api version we will set the status when pulling from the queue
# noinspection PyBroadException if not self._get_node_rank():
try: # noinspection PyBroadException
self._session.send_api( try:
tasks_api.StartedRequest(task=task_id, status_message="pulled by agent", force=True)) self._session.send_api(
except Exception: tasks_api.StartedRequest(task=task_id, status_message="pulled by agent", force=True))
print("Warning: Could not set status=in_progress task id '{}', retrying in a bit".format(task_id)) except Exception:
print("Warning: Could not set status=in_progress task id '{}', retrying in a bit".format(task_id))
# check if we need to impersonate # check if we need to impersonate
task_session = None task_session = None