mirror of
https://github.com/clearml/clearml
synced 2025-03-03 10:42:00 +00:00
Fix HPO randomly aborts running tasks before the time limit
This commit is contained in:
parent
b1120195df
commit
c6769b5fc0
@ -1747,8 +1747,16 @@ class HyperParameterOptimizer(object):
|
|||||||
|
|
||||||
self._report_remaining_budget(task_logger, counter)
|
self._report_remaining_budget(task_logger, counter)
|
||||||
|
|
||||||
if self.optimizer.budget.compute_time.used and self.optimizer.budget.compute_time.used >= 1.0:
|
if (
|
||||||
# Reached compute time limit
|
self.optimizer.budget.compute_time.used
|
||||||
|
and self.optimizer.budget.compute_time.limit
|
||||||
|
and self.optimizer.budget.compute_time.used >= self.optimizer.budget.compute_time.limit
|
||||||
|
):
|
||||||
|
logger.warning(
|
||||||
|
"Optimizer task reached compute time limit (used {:.2f} out of {:.2f})".format(
|
||||||
|
self.optimizer.budget.compute_time.limit, self.optimizer.compute_time.used
|
||||||
|
)
|
||||||
|
)
|
||||||
timeout = -1
|
timeout = -1
|
||||||
|
|
||||||
self._report_resources(task_logger, counter)
|
self._report_resources(task_logger, counter)
|
||||||
|
Loading…
Reference in New Issue
Block a user