mirror of
https://github.com/clearml/clearml
synced 2025-02-12 07:35:08 +00:00
Protect against enqueue failing due to permission
This commit is contained in:
parent
5f7d0348e2
commit
e78c1e806a
@ -673,7 +673,7 @@ class PipelineController(object):
|
|||||||
elif node.job.is_cached_task():
|
elif node.job.is_cached_task():
|
||||||
node.executed = node.job.task_id()
|
node.executed = node.job.task_id()
|
||||||
else:
|
else:
|
||||||
node.job.launch(queue_name=node.queue or self._default_execution_queue)
|
return node.job.launch(queue_name=node.queue or self._default_execution_queue)
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
@ -65,7 +65,8 @@ class _TrainsBandsterWorker(Worker):
|
|||||||
self._current_job = self.optimizer.helper_create_job(self.base_task_id, parameter_override=config)
|
self._current_job = self.optimizer.helper_create_job(self.base_task_id, parameter_override=config)
|
||||||
# noinspection PyProtectedMember
|
# noinspection PyProtectedMember
|
||||||
self.optimizer._current_jobs.append(self._current_job)
|
self.optimizer._current_jobs.append(self._current_job)
|
||||||
self._current_job.launch(self.queue_name)
|
if not self._current_job.launch(self.queue_name):
|
||||||
|
return dict()
|
||||||
iteration_value = None
|
iteration_value = None
|
||||||
is_pending = True
|
is_pending = True
|
||||||
|
|
||||||
|
@ -159,7 +159,7 @@ class ClearmlJob(object):
|
|||||||
|
|
||||||
:param str queue_name:
|
:param str queue_name:
|
||||||
|
|
||||||
:return False if Task is not in "created" status (i.e. cannot be enqueued)
|
:return False if Task is not in "created" status (i.e. cannot be enqueued) or cannot be enqueued
|
||||||
"""
|
"""
|
||||||
if self._is_cached_task:
|
if self._is_cached_task:
|
||||||
return False
|
return False
|
||||||
@ -167,7 +167,7 @@ class ClearmlJob(object):
|
|||||||
Task.enqueue(task=self.task, queue_name=queue_name)
|
Task.enqueue(task=self.task, queue_name=queue_name)
|
||||||
return True
|
return True
|
||||||
except Exception as ex:
|
except Exception as ex:
|
||||||
logger.warning(ex)
|
logger.warning('Error enqueuing Task {} to {}: {}'.format(self.task, queue_name, ex))
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def abort(self):
|
def abort(self):
|
||||||
|
@ -407,8 +407,10 @@ class SearchStrategy(object):
|
|||||||
new_job = self.create_job()
|
new_job = self.create_job()
|
||||||
if not new_job:
|
if not new_job:
|
||||||
break
|
break
|
||||||
|
if not new_job.launch(self._execution_queue):
|
||||||
|
# error enqueuing Job, something wrong here
|
||||||
|
continue
|
||||||
self._num_jobs += 1
|
self._num_jobs += 1
|
||||||
new_job.launch(self._execution_queue)
|
|
||||||
self._current_jobs.append(new_job)
|
self._current_jobs.append(new_job)
|
||||||
self._pending_jobs.append(new_job)
|
self._pending_jobs.append(new_job)
|
||||||
|
|
||||||
|
@ -27,7 +27,7 @@ class OptunaObjective(object):
|
|||||||
self._config_space = config_space
|
self._config_space = config_space
|
||||||
|
|
||||||
def objective(self, trial):
|
def objective(self, trial):
|
||||||
# type: (optuna.Trial) -> float
|
# type: (optuna.Trial) -> Optional[float]
|
||||||
"""
|
"""
|
||||||
return metric value for a specified set of parameter, pulled from the trail object
|
return metric value for a specified set of parameter, pulled from the trail object
|
||||||
|
|
||||||
@ -42,7 +42,9 @@ class OptunaObjective(object):
|
|||||||
current_job = self.optimizer.helper_create_job(self.base_task_id, parameter_override=parameter_override)
|
current_job = self.optimizer.helper_create_job(self.base_task_id, parameter_override=parameter_override)
|
||||||
# noinspection PyProtectedMember
|
# noinspection PyProtectedMember
|
||||||
self.optimizer._current_jobs.append(current_job)
|
self.optimizer._current_jobs.append(current_job)
|
||||||
current_job.launch(self.queue_name)
|
if not current_job.launch(self.queue_name):
|
||||||
|
# failed launching the job
|
||||||
|
return None
|
||||||
iteration_value = None
|
iteration_value = None
|
||||||
is_pending = True
|
is_pending = True
|
||||||
while True:
|
while True:
|
||||||
|
@ -998,6 +998,8 @@ class Task(_Task):
|
|||||||
|
|
||||||
req = tasks.EnqueueRequest(task=task_id, queue=queue_id)
|
req = tasks.EnqueueRequest(task=task_id, queue=queue_id)
|
||||||
res = cls._send(session=session, req=req)
|
res = cls._send(session=session, req=req)
|
||||||
|
if not res.ok():
|
||||||
|
raise ValueError(res.response)
|
||||||
resp = res.response
|
resp = res.response
|
||||||
return resp
|
return resp
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user