From 562be23ba4fc34cd4f653acdff1bbe22e168d51a Mon Sep 17 00:00:00 2001 From: allegroai <> Date: Mon, 22 Jun 2020 17:13:03 +0300 Subject: [PATCH] Moved hyper-parameter example optimization into optimization folder --- .../base_template_keras_simple.py | 86 +++++++++++++ .../hyper_parameter_optimizer.py | 114 ++++++++++++++++++ .../requirements.txt | 3 + 3 files changed, 203 insertions(+) create mode 100644 examples/optimization/hyper-parameter-optimization/base_template_keras_simple.py create mode 100644 examples/optimization/hyper-parameter-optimization/hyper_parameter_optimizer.py create mode 100644 examples/optimization/hyper-parameter-optimization/requirements.txt diff --git a/examples/optimization/hyper-parameter-optimization/base_template_keras_simple.py b/examples/optimization/hyper-parameter-optimization/base_template_keras_simple.py new file mode 100644 index 00000000..8d2c5e97 --- /dev/null +++ b/examples/optimization/hyper-parameter-optimization/base_template_keras_simple.py @@ -0,0 +1,86 @@ +# TRAINS - Keras with Tensorboard example code, automatic logging model and Tensorboard outputs +# +# Train a simple deep NN on the MNIST dataset. +# Gets to 98.40% test accuracy after 20 epochs +# (there is *a lot* of margin for parameter tuning). +# 2 seconds per epoch on a K520 GPU. +from __future__ import print_function + +import tempfile +import os + +from keras.callbacks import TensorBoard, ModelCheckpoint +from keras.datasets import mnist +from keras.models import Sequential +from keras.layers.core import Dense, Activation +from keras.optimizers import RMSprop +from keras.utils import np_utils +import tensorflow as tf # noqa: F401 + +from trains import Task, Logger + + +# Connecting TRAINS +task = Task.init(project_name='examples', task_name='Keras HP optimization base') + + +# the data, shuffled and split between train and test sets +nb_classes = 10 +(X_train, y_train), (X_test, y_test) = mnist.load_data() + +X_train = X_train.reshape(60000, 784).astype('float32')/255. +X_test = X_test.reshape(10000, 784).astype('float32')/255. +print(X_train.shape[0], 'train samples') +print(X_test.shape[0], 'test samples') + +# convert class vectors to binary class matrices +Y_train = np_utils.to_categorical(y_train, nb_classes) +Y_test = np_utils.to_categorical(y_test, nb_classes) + +args = {'batch_size': 128, + 'epochs': 6, + 'layer_1': 512, + 'layer_2': 512, + 'layer_3': 10, + 'layer_4': 512, + } +args = task.connect(args) + +model = Sequential() +model.add(Dense(args['layer_1'], input_shape=(784,))) +model.add(Activation('relu')) +# model.add(Dropout(0.2)) +model.add(Dense(args['layer_2'])) +model.add(Activation('relu')) +# model.add(Dropout(0.2)) +model.add(Dense(args['layer_3'])) +model.add(Activation('softmax')) + +model2 = Sequential() +model2.add(Dense(args['layer_4'], input_shape=(784,))) +model2.add(Activation('relu')) + +model.summary() + +model.compile(loss='categorical_crossentropy', + optimizer=RMSprop(), + metrics=['accuracy']) + +# Advanced: setting model class enumeration +labels = dict(('digit_%d' % i, i) for i in range(10)) +task.set_model_label_enumeration(labels) + +output_folder = os.path.join(tempfile.gettempdir(), 'keras_example') + +board = TensorBoard(log_dir=output_folder, write_images=False) +model_store = ModelCheckpoint(filepath=os.path.join(output_folder, 'weight.hdf5')) + +history = model.fit(X_train, Y_train, + batch_size=args['batch_size'], epochs=args['epochs'], + callbacks=[board, model_store], + validation_data=(X_test, Y_test)) +score = model.evaluate(X_test, Y_test, verbose=0) +print('Test score:', score[0]) +print('Test accuracy:', score[1]) +Logger.current_logger().report_scalar(title='evaluate', series='score', value=score[0], iteration=args['epochs']) +Logger.current_logger().report_scalar(title='evaluate', series='accuracy', value=score[1], iteration=args['epochs']) diff --git a/examples/optimization/hyper-parameter-optimization/hyper_parameter_optimizer.py b/examples/optimization/hyper-parameter-optimization/hyper_parameter_optimizer.py new file mode 100644 index 00000000..3b39e370 --- /dev/null +++ b/examples/optimization/hyper-parameter-optimization/hyper_parameter_optimizer.py @@ -0,0 +1,114 @@ +import logging + +from trains import Task +from trains.automation import DiscreteParameterRange, HyperParameterOptimizer, RandomSearch, \ + UniformIntegerParameterRange + +try: + from trains.automation.hpbandster import OptimizerBOHB + Our_SearchStrategy = OptimizerBOHB +except ValueError: + logging.getLogger().warning( + 'Apologies, it seems you do not have \'hpbandster\' installed, ' + 'we will be using RandomSearch strategy instead\n' + 'If you like to try ' '{{BOHB}: Robust and Efficient Hyperparameter Optimization at Scale},\n' + 'run: pip install hpbandster') +Our_SearchStrategy = RandomSearch + + +def job_complete_callback( + job_id, # type: str + objective_value, # type: float + objective_iteration, # type: int + job_parameters, # type: dict + top_performance_job_id # type: str +): + print('Job completed!', job_id, objective_value, objective_iteration, job_parameters) + if job_id == top_performance_job_id: + print('WOOT WOOT we broke the record! Objective reached {}'.format(objective_value)) + + +# Connecting TRAINS +task = Task.init(project_name='Hyper-Parameter Optimization', + task_name='Automatic Hyper-Parameter Optimization', + task_type=Task.TaskTypes.optimizer, + reuse_last_task_id=False) + +# experiment template to optimize in the hyper-parameter optimization +args = { + 'template_task_id': None, + 'run_as_service': False, +} +args = task.connect(args) + +# Get the template task experiment that we want to optimize +if not args['template_task_id']: + args['template_task_id'] = Task.get_task( + project_name='examples', task_name='Keras HP optimization base').id + +# Example use case: +an_optimizer = HyperParameterOptimizer( + # This is the experiment we want to optimize + base_task_id=args['template_task_id'], + # here we define the hyper-parameters to optimize + hyper_parameters=[ + UniformIntegerParameterRange('layer_1', min_value=128, max_value=512, step_size=128), + UniformIntegerParameterRange('layer_2', min_value=128, max_value=512, step_size=128), + DiscreteParameterRange('batch_size', values=[96, 128, 160]), + DiscreteParameterRange('epochs', values=[30]), + ], + # this is the objective metric we want to maximize/minimize + objective_metric_title='val_acc', + objective_metric_series='val_acc', + # now we decide if we want to maximize it or minimize it (accuracy we maximize) + objective_metric_sign='max', + # let us limit the number of concurrent experiments, + # this in turn will make sure we do dont bombard the scheduler with experiments. + # if we have an auto-scaler connected, this, by proxy, will limit the number of machine + max_number_of_concurrent_tasks=2, + # this is the optimizer class (actually doing the optimization) + # Currently, we can choose from GridSearch, RandomSearch or OptimizerBOHB (Bayesian optimization Hyper-Band) + # more are coming soon... + optimizer_class=Our_SearchStrategy, + # Select an execution queue to schedule the experiments for execution + execution_queue='moshik', + # Optional: Limit the execution time of a single experiment, in minutes. + # (this is optional, and if using OptimizerBOHB, it is ignored) + time_limit_per_job=10., + # Check the experiments every 6 seconds is way too often, we should probably set it to 5 min, + # assuming a single experiment is usually hours... + pool_period_min=0.1, + # set the maximum number of jobs to launch for the optimization, default (None) unlimited + # If OptimizerBOHB is used, it defined the maximum budget in terms of full jobs + # basically the cumulative number of iterations will not exceed total_max_jobs * max_iteration_per_job + total_max_jobs=10, + # This is only applicable for OptimizerBOHB and ignore by the rest + # set the minimum number of iterations for an experiment, before early stopping + min_iteration_per_job=10, + # Set the maximum number of iterations for an experiment to execute + # (This is optional, unless using OptimizerBOHB where this is a must) + max_iteration_per_job=30, +) + +# if we are running as a service, just enqueue ourselves into the services queue and let it run the optimization +if args['run_as_service']: + # if this code is executed by `trains-agent` the function call does nothing. + # if executed locally, the local process will be terminated, and a remote copy will be executed instead + task.execute_remotely(queue_name='services', exit_process=True) + +# report every 12 seconds, this is way too often, but we are testing here J +an_optimizer.set_report_period(2.2) +# start the optimization process, callback function to be called every time an experiment is completed +# this function returns immediately +an_optimizer.start(job_complete_callback=job_complete_callback) +# set the time limit for the optimization process (2 hours) +an_optimizer.set_time_limit(in_minutes=120.0) +# wait until process is done (notice we are controlling the optimization process in the background) +an_optimizer.wait() +# optimization is completed, print the top performing experiments id +top_exp = an_optimizer.get_top_experiments(top_k=3) +print([t.id for t in top_exp]) +# make sure background optimization stopped +an_optimizer.stop() + +print('We are done, good bye') diff --git a/examples/optimization/hyper-parameter-optimization/requirements.txt b/examples/optimization/hyper-parameter-optimization/requirements.txt new file mode 100644 index 00000000..e1a47da3 --- /dev/null +++ b/examples/optimization/hyper-parameter-optimization/requirements.txt @@ -0,0 +1,3 @@ +keras +tensorflow +trains \ No newline at end of file