Add Optimizer min_iteration_per_job for all SearchStrategies

This commit is contained in:
allegroai 2020-08-08 12:39:56 +03:00
parent ef83a648eb
commit 35fa4f010a
4 changed files with 23 additions and 10 deletions

View File

@ -94,8 +94,8 @@ an_optimizer = HyperParameterOptimizer(
# If OptimizerBOHB is used, it defined the maximum budget in terms of full jobs # If OptimizerBOHB is used, it defined the maximum budget in terms of full jobs
# basically the cumulative number of iterations will not exceed total_max_jobs * max_iteration_per_job # basically the cumulative number of iterations will not exceed total_max_jobs * max_iteration_per_job
total_max_jobs=10, total_max_jobs=10,
# This is only applicable for OptimizerBOHB and ignore by the rest # set the minimum number of iterations for an experiment, before early stopping.
# set the minimum number of iterations for an experiment, before early stopping # Does not apply for simple strategies such as RandomSearch or GridSearch
min_iteration_per_job=10, min_iteration_per_job=10,
# Set the maximum number of iterations for an experiment to execute # Set the maximum number of iterations for an experiment to execute
# (This is optional, unless using OptimizerBOHB where this is a must) # (This is optional, unless using OptimizerBOHB where this is a must)

View File

@ -178,7 +178,8 @@ class OptimizerBOHB(SearchStrategy, RandomSeed):
base_task_id=base_task_id, hyper_parameters=hyper_parameters, objective_metric=objective_metric, base_task_id=base_task_id, hyper_parameters=hyper_parameters, objective_metric=objective_metric,
execution_queue=execution_queue, num_concurrent_workers=num_concurrent_workers, execution_queue=execution_queue, num_concurrent_workers=num_concurrent_workers,
pool_period_min=pool_period_min, time_limit_per_job=time_limit_per_job, pool_period_min=pool_period_min, time_limit_per_job=time_limit_per_job,
max_iteration_per_job=max_iteration_per_job, total_max_jobs=total_max_jobs) max_iteration_per_job=max_iteration_per_job, min_iteration_per_job=min_iteration_per_job,
total_max_jobs=total_max_jobs)
self._max_iteration_per_job = max_iteration_per_job self._max_iteration_per_job = max_iteration_per_job
self._min_iteration_per_job = min_iteration_per_job self._min_iteration_per_job = min_iteration_per_job
verified_bohb_kwargs = ['eta', 'min_budget', 'max_budget', 'min_points_in_model', 'top_n_percent', verified_bohb_kwargs = ['eta', 'min_budget', 'max_budget', 'min_points_in_model', 'top_n_percent',

View File

@ -239,6 +239,7 @@ class SearchStrategy(object):
num_concurrent_workers, # type: int num_concurrent_workers, # type: int
pool_period_min=2., # type: float pool_period_min=2., # type: float
time_limit_per_job=None, # type: Optional[float] time_limit_per_job=None, # type: Optional[float]
min_iteration_per_job=None, # type: Optional[int]
max_iteration_per_job=None, # type: Optional[int] max_iteration_per_job=None, # type: Optional[int]
total_max_jobs=None, # type: Optional[int] total_max_jobs=None, # type: Optional[int]
**_ # type: Any **_ # type: Any
@ -255,8 +256,9 @@ class SearchStrategy(object):
:param float pool_period_min: The time between two consecutive pools (minutes). :param float pool_period_min: The time between two consecutive pools (minutes).
:param float time_limit_per_job: The maximum execution time per single job in minutes. When time limit is :param float time_limit_per_job: The maximum execution time per single job in minutes. When time limit is
exceeded, the job is aborted. (Optional) exceeded, the job is aborted. (Optional)
:param int max_iteration_per_job: The maximum iterations (of the Objective metric) per single job. When maximum :param int min_iteration_per_job: The minimum iterations (of the Objective metric) per single job (Optional)
iterations is exceeded, the job is aborted. (Optional) :param int max_iteration_per_job: The maximum iterations (of the Objective metric) per single job.
When maximum iterations is exceeded, the job is aborted. (Optional)
:param int total_max_jobs: The total maximum jobs for the optimization process. The default value is ``None``, :param int total_max_jobs: The total maximum jobs for the optimization process. The default value is ``None``,
for unlimited. for unlimited.
""" """
@ -269,6 +271,7 @@ class SearchStrategy(object):
self.pool_period_minutes = pool_period_min self.pool_period_minutes = pool_period_min
self.time_limit_per_job = time_limit_per_job self.time_limit_per_job = time_limit_per_job
self.max_iteration_per_job = max_iteration_per_job self.max_iteration_per_job = max_iteration_per_job
self.min_iteration_per_job = min_iteration_per_job
self.total_max_jobs = total_max_jobs self.total_max_jobs = total_max_jobs
self._stop_event = Event() self._stop_event = Event()
self._current_jobs = [] self._current_jobs = []

View File

@ -15,13 +15,15 @@ except ImportError:
class OptunaObjective(object): class OptunaObjective(object):
def __init__(self, base_task_id, queue_name, optimizer, max_iteration_per_job, sleep_interval, config_space): def __init__(self, base_task_id, queue_name, optimizer, max_iteration_per_job, min_iteration_per_job,
# type: (str, str, OptimizerOptuna, int, float, dict) -> None sleep_interval, config_space):
# type: (str, str, OptimizerOptuna, int, Optional[int], float, dict) -> None
self.base_task_id = base_task_id self.base_task_id = base_task_id
self.optimizer = optimizer self.optimizer = optimizer
self.queue_name = queue_name self.queue_name = queue_name
self.sleep_interval = sleep_interval self.sleep_interval = sleep_interval
self.max_iteration_per_job = max_iteration_per_job self.max_iteration_per_job = max_iteration_per_job
self.min_iteration_per_job = min_iteration_per_job
self._config_space = config_space self._config_space = config_space
def objective(self, trial): def objective(self, trial):
@ -57,7 +59,9 @@ class OptunaObjective(object):
trial.report(value=iteration_value[1], step=iteration_value[0]) trial.report(value=iteration_value[1], step=iteration_value[0])
# Handle pruning based on the intermediate value. # Handle pruning based on the intermediate value.
if trial.should_prune(): if trial.should_prune() and (
not self.min_iteration_per_job or
iteration_value[0] >= self.min_iteration_per_job):
current_job.abort() current_job.abort()
raise optuna.TrialPruned() raise optuna.TrialPruned()
@ -87,6 +91,7 @@ class OptimizerOptuna(SearchStrategy):
max_iteration_per_job, # type: Optional[int] max_iteration_per_job, # type: Optional[int]
total_max_jobs, # type: Optional[int] total_max_jobs, # type: Optional[int]
pool_period_min=2., # type: float pool_period_min=2., # type: float
min_iteration_per_job=None, # type: Optional[int]
time_limit_per_job=None, # type: Optional[float] time_limit_per_job=None, # type: Optional[float]
optuna_sampler=None, # type: Optional[optuna.samplers.base] optuna_sampler=None, # type: Optional[optuna.samplers.base]
optuna_pruner=None, # type: Optional[optuna.pruners.base] optuna_pruner=None, # type: Optional[optuna.pruners.base]
@ -97,7 +102,7 @@ class OptimizerOptuna(SearchStrategy):
""" """
Initialize am Optuna search strategy optimizer Initialize am Optuna search strategy optimizer
Optuna performs robust and efficient hyperparameter optimization at scale by combining. Optuna performs robust and efficient hyperparameter optimization at scale by combining.
Specific hyper-parameter pruning strategy can be selected via `sampler` and `pruner` argyments Specific hyper-parameter pruning strategy can be selected via `sampler` and `pruner` arguments
:param str base_task_id: Task ID (str) :param str base_task_id: Task ID (str)
:param list hyper_parameters: list of Parameter objects to optimize over :param list hyper_parameters: list of Parameter objects to optimize over
@ -114,6 +119,8 @@ class OptimizerOptuna(SearchStrategy):
This means more than total_max_jobs could be created, as long as the cumulative iterations This means more than total_max_jobs could be created, as long as the cumulative iterations
(summed over all created jobs) will not exceed `max_iteration_per_job * total_max_jobs` (summed over all created jobs) will not exceed `max_iteration_per_job * total_max_jobs`
:param float pool_period_min: time in minutes between two consecutive pools :param float pool_period_min: time in minutes between two consecutive pools
:param int min_iteration_per_job: The minimum number of iterations (of the Objective metric) per single job,
before early stopping the Job. (Optional)
:param float time_limit_per_job: Optional, maximum execution time per single job in minutes, :param float time_limit_per_job: Optional, maximum execution time per single job in minutes,
when time limit is exceeded job is aborted when time limit is exceeded job is aborted
:param optuna_kwargs: arguments passed directly to the Optuna object :param optuna_kwargs: arguments passed directly to the Optuna object
@ -122,7 +129,8 @@ class OptimizerOptuna(SearchStrategy):
base_task_id=base_task_id, hyper_parameters=hyper_parameters, objective_metric=objective_metric, base_task_id=base_task_id, hyper_parameters=hyper_parameters, objective_metric=objective_metric,
execution_queue=execution_queue, num_concurrent_workers=num_concurrent_workers, execution_queue=execution_queue, num_concurrent_workers=num_concurrent_workers,
pool_period_min=pool_period_min, time_limit_per_job=time_limit_per_job, pool_period_min=pool_period_min, time_limit_per_job=time_limit_per_job,
max_iteration_per_job=max_iteration_per_job, total_max_jobs=total_max_jobs) max_iteration_per_job=max_iteration_per_job, min_iteration_per_job=min_iteration_per_job,
total_max_jobs=total_max_jobs)
self._optuna_sampler = optuna_sampler self._optuna_sampler = optuna_sampler
self._optuna_pruner = optuna_pruner self._optuna_pruner = optuna_pruner
verified_optuna_kwargs = [] verified_optuna_kwargs = []
@ -154,6 +162,7 @@ class OptimizerOptuna(SearchStrategy):
queue_name=self._execution_queue, queue_name=self._execution_queue,
optimizer=self, optimizer=self,
max_iteration_per_job=self.max_iteration_per_job, max_iteration_per_job=self.max_iteration_per_job,
min_iteration_per_job=self.min_iteration_per_job,
sleep_interval=int(self.pool_period_minutes * 60), sleep_interval=int(self.pool_period_minutes * 60),
config_space=config_space, config_space=config_space,
) )