mirror of
https://github.com/clearml/clearml
synced 2025-04-23 07:45:24 +00:00
Fix optimizer monitor
This commit is contained in:
parent
be099f42f9
commit
295b33857c
@ -1026,17 +1026,17 @@ class HyperParameterOptimizer(object):
|
|||||||
self._thread_reporter.start()
|
self._thread_reporter.start()
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def stop(self, timeout=None, flush_reporter=True):
|
def stop(self, timeout=None, wait_for_reporter=True):
|
||||||
# type: (Optional[float], Optional[bool]) -> ()
|
# type: (Optional[float], Optional[bool]) -> ()
|
||||||
"""
|
"""
|
||||||
Stop the HyperParameterOptimizer controller and the optimization thread.
|
Stop the HyperParameterOptimizer controller and the optimization thread.
|
||||||
|
|
||||||
:param float timeout: Wait timeout for the optimization thread to exit (minutes).
|
:param float timeout: Wait timeout for the optimization thread to exit (minutes).
|
||||||
The default is ``None``, indicating do not wait terminate immediately.
|
The default is ``None``, indicating do not wait terminate immediately.
|
||||||
:param flush_reporter: Wait for reporter to flush data.
|
:param wait_for_reporter: Wait for reporter to flush data.
|
||||||
"""
|
"""
|
||||||
if not self._thread or not self._stop_event or not self.optimizer:
|
if not self._thread or not self._stop_event or not self.optimizer:
|
||||||
if self._thread_reporter and flush_reporter:
|
if self._thread_reporter and wait_for_reporter:
|
||||||
self._thread_reporter.join()
|
self._thread_reporter.join()
|
||||||
return
|
return
|
||||||
|
|
||||||
@ -1054,7 +1054,7 @@ class HyperParameterOptimizer(object):
|
|||||||
|
|
||||||
# clear thread
|
# clear thread
|
||||||
self._thread = None
|
self._thread = None
|
||||||
if flush_reporter:
|
if wait_for_reporter:
|
||||||
# wait for reporter to flush
|
# wait for reporter to flush
|
||||||
self._thread_reporter.join()
|
self._thread_reporter.join()
|
||||||
|
|
||||||
@ -1311,7 +1311,7 @@ class HyperParameterOptimizer(object):
|
|||||||
# if we should leave, stop everything now.
|
# if we should leave, stop everything now.
|
||||||
if timeout < 0:
|
if timeout < 0:
|
||||||
# we should leave
|
# we should leave
|
||||||
self.stop(flush_reporter=False)
|
self.stop(wait_for_reporter=False)
|
||||||
return
|
return
|
||||||
if task_logger and counter:
|
if task_logger and counter:
|
||||||
counter += 1
|
counter += 1
|
||||||
|
@ -45,12 +45,11 @@ class OptunaObjective(object):
|
|||||||
current_job.launch(self.queue_name)
|
current_job.launch(self.queue_name)
|
||||||
iteration_value = None
|
iteration_value = None
|
||||||
is_pending = True
|
is_pending = True
|
||||||
while not current_job.is_stopped():
|
while True:
|
||||||
if is_pending and not current_job.is_pending():
|
if is_pending and not current_job.is_pending():
|
||||||
is_pending = False
|
is_pending = False
|
||||||
self.optimizer.budget.jobs.update(current_job.task_id(), 1.)
|
self.optimizer.budget.jobs.update(current_job.task_id(), 1.)
|
||||||
if not is_pending:
|
if not is_pending:
|
||||||
self.optimizer.update_budget_per_job(current_job)
|
|
||||||
# noinspection PyProtectedMember
|
# noinspection PyProtectedMember
|
||||||
iteration_value = self.optimizer._objective_metric.get_current_raw_objective(current_job)
|
iteration_value = self.optimizer._objective_metric.get_current_raw_objective(current_job)
|
||||||
|
|
||||||
@ -69,7 +68,8 @@ class OptunaObjective(object):
|
|||||||
if self.max_iteration_per_job and iteration_value[0] >= self.max_iteration_per_job:
|
if self.max_iteration_per_job and iteration_value[0] >= self.max_iteration_per_job:
|
||||||
current_job.abort()
|
current_job.abort()
|
||||||
break
|
break
|
||||||
|
if not self.optimizer.monitor_job(current_job):
|
||||||
|
break
|
||||||
sleep(self.sleep_interval)
|
sleep(self.sleep_interval)
|
||||||
|
|
||||||
# noinspection PyProtectedMember
|
# noinspection PyProtectedMember
|
||||||
|
Loading…
Reference in New Issue
Block a user