Moved hyper-parameter example optimization into optimization folder

2025-06-26 18:16:07 +00:00 · 2020-06-22 17:13:03 +03:00 · 2020-06-22 17:13:03 +03:00 · 562be23ba4
commit 562be23ba4
parent 1b153e401e
3 changed files with 203 additions and 0 deletions
--- a/examples/optimization/hyper-parameter-optimization/base_template_keras_simple.py
+++ b/examples/optimization/hyper-parameter-optimization/base_template_keras_simple.py
@ -0,0 +1,86 @@
+# TRAINS - Keras with Tensorboard example code, automatic logging model and Tensorboard outputs
+#
+# Train a simple deep NN on the MNIST dataset.
+# Gets to 98.40% test accuracy after 20 epochs
+# (there is *a lot* of margin for parameter tuning).
+# 2 seconds per epoch on a K520 GPU.
+from __future__ import print_function
+
+import tempfile
+import os
+
+from keras.callbacks import TensorBoard, ModelCheckpoint
+from keras.datasets import mnist
+from keras.models import Sequential
+from keras.layers.core import Dense, Activation
+from keras.optimizers import RMSprop
+from keras.utils import np_utils
+import tensorflow as tf  # noqa: F401
+
+from trains import Task, Logger
+
+
+# Connecting TRAINS
+task = Task.init(project_name='examples', task_name='Keras HP optimization base')
+
+
+# the data, shuffled and split between train and test sets
+nb_classes = 10
+(X_train, y_train), (X_test, y_test) = mnist.load_data()
+
+X_train = X_train.reshape(60000, 784).astype('float32')/255.
+X_test = X_test.reshape(10000, 784).astype('float32')/255.
+print(X_train.shape[0], 'train samples')
+print(X_test.shape[0], 'test samples')
+
+# convert class vectors to binary class matrices
+Y_train = np_utils.to_categorical(y_train, nb_classes)
+Y_test = np_utils.to_categorical(y_test, nb_classes)
+
+args = {'batch_size': 128,
+        'epochs': 6,
+        'layer_1': 512,
+        'layer_2': 512,
+        'layer_3': 10,
+        'layer_4': 512,
+        }
+args = task.connect(args)
+
+model = Sequential()
+model.add(Dense(args['layer_1'], input_shape=(784,)))
+model.add(Activation('relu'))
+# model.add(Dropout(0.2))
+model.add(Dense(args['layer_2']))
+model.add(Activation('relu'))
+# model.add(Dropout(0.2))
+model.add(Dense(args['layer_3']))
+model.add(Activation('softmax'))
+
+model2 = Sequential()
+model2.add(Dense(args['layer_4'], input_shape=(784,)))
+model2.add(Activation('relu'))
+
+model.summary()
+
+model.compile(loss='categorical_crossentropy',
+              optimizer=RMSprop(),
+              metrics=['accuracy'])
+
+# Advanced: setting model class enumeration
+labels = dict(('digit_%d' % i, i) for i in range(10))
+task.set_model_label_enumeration(labels)
+
+output_folder = os.path.join(tempfile.gettempdir(), 'keras_example')
+
+board = TensorBoard(log_dir=output_folder, write_images=False)
+model_store = ModelCheckpoint(filepath=os.path.join(output_folder, 'weight.hdf5'))
+
+history = model.fit(X_train, Y_train,
+                    batch_size=args['batch_size'], epochs=args['epochs'],
+                    callbacks=[board, model_store],
+                    validation_data=(X_test, Y_test))
+score = model.evaluate(X_test, Y_test, verbose=0)
+print('Test score:', score[0])
+print('Test accuracy:', score[1])
+Logger.current_logger().report_scalar(title='evaluate', series='score', value=score[0], iteration=args['epochs'])
+Logger.current_logger().report_scalar(title='evaluate', series='accuracy', value=score[1], iteration=args['epochs'])
--- a/examples/optimization/hyper-parameter-optimization/hyper_parameter_optimizer.py
+++ b/examples/optimization/hyper-parameter-optimization/hyper_parameter_optimizer.py
@ -0,0 +1,114 @@
+import logging
+
+from trains import Task
+from trains.automation import DiscreteParameterRange, HyperParameterOptimizer, RandomSearch, \
+    UniformIntegerParameterRange
+
+try:
+    from trains.automation.hpbandster import OptimizerBOHB
+    Our_SearchStrategy = OptimizerBOHB
+except ValueError:
+    logging.getLogger().warning(
+        'Apologies, it seems you do not have \'hpbandster\' installed, '
+        'we will be using RandomSearch strategy instead\n'
+        'If you like to try ' '{{BOHB}: Robust and Efficient Hyperparameter Optimization at Scale},\n'
+        'run: pip install hpbandster')
+Our_SearchStrategy = RandomSearch
+
+
+def job_complete_callback(
+    job_id,                 # type: str
+    objective_value,        # type: float
+    objective_iteration,    # type: int
+    job_parameters,         # type: dict
+    top_performance_job_id  # type: str
+):
+    print('Job completed!', job_id, objective_value, objective_iteration, job_parameters)
+    if job_id == top_performance_job_id:
+        print('WOOT WOOT we broke the record! Objective reached {}'.format(objective_value))
+
+
+# Connecting TRAINS
+task = Task.init(project_name='Hyper-Parameter Optimization',
+                 task_name='Automatic Hyper-Parameter Optimization',
+                 task_type=Task.TaskTypes.optimizer,
+                 reuse_last_task_id=False)
+
+# experiment template to optimize in the hyper-parameter optimization
+args = {
+    'template_task_id': None,
+    'run_as_service': False,
+}
+args = task.connect(args)
+
+# Get the template task experiment that we want to optimize
+if not args['template_task_id']:
+    args['template_task_id'] = Task.get_task(
+        project_name='examples', task_name='Keras HP optimization base').id
+
+# Example use case:
+an_optimizer = HyperParameterOptimizer(
+    # This is the experiment we want to optimize
+    base_task_id=args['template_task_id'],
+    # here we define the hyper-parameters to optimize
+    hyper_parameters=[
+        UniformIntegerParameterRange('layer_1', min_value=128, max_value=512, step_size=128),
+        UniformIntegerParameterRange('layer_2', min_value=128, max_value=512, step_size=128),
+        DiscreteParameterRange('batch_size', values=[96, 128, 160]),
+        DiscreteParameterRange('epochs', values=[30]),
+    ],
+    # this is the objective metric we want to maximize/minimize
+    objective_metric_title='val_acc',
+    objective_metric_series='val_acc',
+    # now we decide if we want to maximize it or minimize it (accuracy we maximize)
+    objective_metric_sign='max',
+    # let us limit the number of concurrent experiments,
+    # this in turn will make sure we do dont bombard the scheduler with experiments.
+    # if we have an auto-scaler connected, this, by proxy, will limit the number of machine
+    max_number_of_concurrent_tasks=2,
+    # this is the optimizer class (actually doing the optimization)
+    # Currently, we can choose from GridSearch, RandomSearch or OptimizerBOHB (Bayesian optimization Hyper-Band)
+    # more are coming soon...
+    optimizer_class=Our_SearchStrategy,
+    # Select an execution queue to schedule the experiments for execution
+    execution_queue='moshik',
+    # Optional: Limit the execution time of a single experiment, in minutes.
+    # (this is optional, and if using  OptimizerBOHB, it is ignored)
+    time_limit_per_job=10.,
+    # Check the experiments every 6 seconds is way too often, we should probably set it to 5 min,
+    # assuming a single experiment is usually hours...
+    pool_period_min=0.1,
+    # set the maximum number of jobs to launch for the optimization, default (None) unlimited
+    # If OptimizerBOHB is used, it defined the maximum budget in terms of full jobs
+    # basically the cumulative number of iterations will not exceed total_max_jobs * max_iteration_per_job
+    total_max_jobs=10,
+    # This is only applicable for OptimizerBOHB and ignore by the rest
+    # set the minimum number of iterations for an experiment, before early stopping
+    min_iteration_per_job=10,
+    # Set the maximum number of iterations for an experiment to execute
+    # (This is optional, unless using OptimizerBOHB where this is a must)
+    max_iteration_per_job=30,
+)
+
+# if we are running as a service, just enqueue ourselves into the services queue and let it run the optimization
+if args['run_as_service']:
+    # if this code is executed by `trains-agent` the function call does nothing.
+    # if executed locally, the local process will be terminated, and a remote copy will be executed instead
+    task.execute_remotely(queue_name='services', exit_process=True)
+
+# report every 12 seconds, this is way too often, but we are testing here J
+an_optimizer.set_report_period(2.2)
+# start the optimization process, callback function to be called every time an experiment is completed
+# this function returns immediately
+an_optimizer.start(job_complete_callback=job_complete_callback)
+# set the time limit for the optimization process (2 hours)
+an_optimizer.set_time_limit(in_minutes=120.0)
+# wait until process is done (notice we are controlling the optimization process in the background)
+an_optimizer.wait()
+# optimization is completed, print the top performing experiments id
+top_exp = an_optimizer.get_top_experiments(top_k=3)
+print([t.id for t in top_exp])
+# make sure background optimization stopped
+an_optimizer.stop()
+
+print('We are done, good bye')
--- a/examples/optimization/hyper-parameter-optimization/requirements.txt
+++ b/examples/optimization/hyper-parameter-optimization/requirements.txt
@ -0,0 +1,3 @@
+keras
+tensorflow
+trains