Fix automation BOHB budget display calculation, Job.started() and daemon sleep

This commit is contained in:
allegroai 2020-06-01 11:02:45 +03:00
parent 2066d9ff9d
commit 7dad7e57e4
3 changed files with 39 additions and 8 deletions

View File

@ -299,9 +299,15 @@ class OptimizerBOHB(SearchStrategy, RandomSeed):
# Step 3: Run an optimizer
self._bohb = BOHB(configspace=self._convert_hyper_parameters_to_cs(),
run_id=fake_run_id,
num_samples=self.total_max_jobs,
# num_samples=self.total_max_jobs, # will be set by self._bohb_kwargs
min_budget=float(self._min_iteration_per_job) / float(self._max_iteration_per_job),
**self._bohb_kwargs)
# scale the budget according to the successive halving iterations
if self.budget.jobs.limit:
self.budget.jobs.limit *= len(self._bohb.budgets)
if self.budget.iterations.limit:
self.budget.iterations.limit *= len(self._bohb.budgets)
# start optimization
self._res = self._bohb.run(n_iterations=self.total_max_jobs, min_n_workers=self._num_concurrent_workers)
# Step 4: if we get here, Shutdown

View File

@ -106,7 +106,9 @@ class TrainsJob(object):
return -1
self.task_started = True
if not self.task.data.started:
return -1
self.task.reload()
if not self.task.data.started:
return -1
return (datetime.now(tz=self.task.data.started.tzinfo) - self.task.data.started).total_seconds()
def iterations(self):
@ -215,6 +217,20 @@ class TrainsJob(object):
"""
return self.task.status in (Task.TaskStatusEnum.queued, Task.TaskStatusEnum.created)
def started(self):
# type: () -> bool
"""
Return True if job already started, or ended (or False if created/pending)
:return bool: False if the task is currently in draft mode or pending
"""
if not self.task_started and self.task.status in (
Task.TaskStatusEnum.in_progress, Task.TaskStatusEnum.created):
return False
self.task_started = True
return True
# noinspection PyMethodMayBeStatic, PyUnusedLocal
class _JobStub(object):
@ -326,3 +342,7 @@ class _JobStub(object):
def is_pending(self):
# type: () -> bool
return self.task_started is None
def started(self):
# type: () -> bool
return not self.is_pending()

View File

@ -369,15 +369,17 @@ class SearchStrategy(object):
if self.time_limit_per_job:
elapsed = job.elapsed() / 60.
self.budget.compute_time.update(job.task_id(), elapsed)
if elapsed > self.time_limit_per_job:
abort_job = True
if elapsed > 0:
self.budget.compute_time.update(job.task_id(), elapsed)
if elapsed > self.time_limit_per_job:
abort_job = True
if self.max_iteration_per_job:
iterations = self._get_job_iterations(job)
self.budget.iterations.update(job.task_id(), iterations)
if iterations > self.max_iteration_per_job:
abort_job = True
if iterations > 0:
self.budget.iterations.update(job.task_id(), iterations)
if iterations > self.max_iteration_per_job:
abort_job = True
if abort_job:
job.abort()
@ -1139,6 +1141,9 @@ class HyperParameterOptimizer(object):
if timeout >= 0:
timeout = min(self._report_period_min * 60., timeout if timeout else self._report_period_min * 60.)
# make sure that we have the first report fired before we actually go to sleep, wait for 15 sec.
if counter <= 0:
timeout = 15
print('Progress report #{} completed, sleeping for {} minutes'.format(counter, timeout / 60.))
if self._stop_event.wait(timeout=timeout):
# wait for one last report