mirror of
https://github.com/clearml/clearml
synced 2025-03-09 21:40:51 +00:00
Fix automation BOHB budget display calculation, Job.started() and daemon sleep
This commit is contained in:
parent
2066d9ff9d
commit
7dad7e57e4
@ -299,9 +299,15 @@ class OptimizerBOHB(SearchStrategy, RandomSeed):
|
||||
# Step 3: Run an optimizer
|
||||
self._bohb = BOHB(configspace=self._convert_hyper_parameters_to_cs(),
|
||||
run_id=fake_run_id,
|
||||
num_samples=self.total_max_jobs,
|
||||
# num_samples=self.total_max_jobs, # will be set by self._bohb_kwargs
|
||||
min_budget=float(self._min_iteration_per_job) / float(self._max_iteration_per_job),
|
||||
**self._bohb_kwargs)
|
||||
# scale the budget according to the successive halving iterations
|
||||
if self.budget.jobs.limit:
|
||||
self.budget.jobs.limit *= len(self._bohb.budgets)
|
||||
if self.budget.iterations.limit:
|
||||
self.budget.iterations.limit *= len(self._bohb.budgets)
|
||||
# start optimization
|
||||
self._res = self._bohb.run(n_iterations=self.total_max_jobs, min_n_workers=self._num_concurrent_workers)
|
||||
|
||||
# Step 4: if we get here, Shutdown
|
||||
|
@ -106,7 +106,9 @@ class TrainsJob(object):
|
||||
return -1
|
||||
self.task_started = True
|
||||
if not self.task.data.started:
|
||||
return -1
|
||||
self.task.reload()
|
||||
if not self.task.data.started:
|
||||
return -1
|
||||
return (datetime.now(tz=self.task.data.started.tzinfo) - self.task.data.started).total_seconds()
|
||||
|
||||
def iterations(self):
|
||||
@ -215,6 +217,20 @@ class TrainsJob(object):
|
||||
"""
|
||||
return self.task.status in (Task.TaskStatusEnum.queued, Task.TaskStatusEnum.created)
|
||||
|
||||
def started(self):
|
||||
# type: () -> bool
|
||||
"""
|
||||
Return True if job already started, or ended (or False if created/pending)
|
||||
|
||||
:return bool: False if the task is currently in draft mode or pending
|
||||
"""
|
||||
if not self.task_started and self.task.status in (
|
||||
Task.TaskStatusEnum.in_progress, Task.TaskStatusEnum.created):
|
||||
return False
|
||||
|
||||
self.task_started = True
|
||||
return True
|
||||
|
||||
|
||||
# noinspection PyMethodMayBeStatic, PyUnusedLocal
|
||||
class _JobStub(object):
|
||||
@ -326,3 +342,7 @@ class _JobStub(object):
|
||||
def is_pending(self):
|
||||
# type: () -> bool
|
||||
return self.task_started is None
|
||||
|
||||
def started(self):
|
||||
# type: () -> bool
|
||||
return not self.is_pending()
|
||||
|
@ -369,15 +369,17 @@ class SearchStrategy(object):
|
||||
|
||||
if self.time_limit_per_job:
|
||||
elapsed = job.elapsed() / 60.
|
||||
self.budget.compute_time.update(job.task_id(), elapsed)
|
||||
if elapsed > self.time_limit_per_job:
|
||||
abort_job = True
|
||||
if elapsed > 0:
|
||||
self.budget.compute_time.update(job.task_id(), elapsed)
|
||||
if elapsed > self.time_limit_per_job:
|
||||
abort_job = True
|
||||
|
||||
if self.max_iteration_per_job:
|
||||
iterations = self._get_job_iterations(job)
|
||||
self.budget.iterations.update(job.task_id(), iterations)
|
||||
if iterations > self.max_iteration_per_job:
|
||||
abort_job = True
|
||||
if iterations > 0:
|
||||
self.budget.iterations.update(job.task_id(), iterations)
|
||||
if iterations > self.max_iteration_per_job:
|
||||
abort_job = True
|
||||
|
||||
if abort_job:
|
||||
job.abort()
|
||||
@ -1139,6 +1141,9 @@ class HyperParameterOptimizer(object):
|
||||
|
||||
if timeout >= 0:
|
||||
timeout = min(self._report_period_min * 60., timeout if timeout else self._report_period_min * 60.)
|
||||
# make sure that we have the first report fired before we actually go to sleep, wait for 15 sec.
|
||||
if counter <= 0:
|
||||
timeout = 15
|
||||
print('Progress report #{} completed, sleeping for {} minutes'.format(counter, timeout / 60.))
|
||||
if self._stop_event.wait(timeout=timeout):
|
||||
# wait for one last report
|
||||
|
Loading…
Reference in New Issue
Block a user