From ba4c4c558f35ed97878f2c4d27ba405ea28a54de Mon Sep 17 00:00:00 2001 From: allegroai <> Date: Sun, 24 May 2020 08:21:21 +0300 Subject: [PATCH] Add automation examples --- .../automation/base_template_keras_simple.py | 86 ++++++++++++++ .../automation/hyper_parameter_optimizer.py | 106 ++++++++++++++++++ .../automation/random_param_search_example.py | 60 ++++++++++ examples/automation/task_piping_example.py | 43 +++++++ examples/automation/toy_base_task.py | 19 ++++ 5 files changed, 314 insertions(+) create mode 100644 examples/automation/base_template_keras_simple.py create mode 100644 examples/automation/hyper_parameter_optimizer.py create mode 100644 examples/automation/random_param_search_example.py create mode 100644 examples/automation/task_piping_example.py create mode 100644 examples/automation/toy_base_task.py diff --git a/examples/automation/base_template_keras_simple.py b/examples/automation/base_template_keras_simple.py new file mode 100644 index 00000000..9538ffed --- /dev/null +++ b/examples/automation/base_template_keras_simple.py @@ -0,0 +1,86 @@ +# TRAINS - Keras with Tensorboard example code, automatic logging model and Tensorboard outputs +# +# Train a simple deep NN on the MNIST dataset. +# Gets to 98.40% test accuracy after 20 epochs +# (there is *a lot* of margin for parameter tuning). +# 2 seconds per epoch on a K520 GPU. +from __future__ import print_function + +import tempfile +import os + +from keras.callbacks import TensorBoard, ModelCheckpoint +from keras.datasets import mnist +from keras.models import Sequential +from keras.layers.core import Dense, Activation +from keras.optimizers import RMSprop +from keras.utils import np_utils +import tensorflow as tf + +from trains import Task, Logger + + +# Connecting TRAINS +task = Task.init(project_name='examples', task_name='Keras HP optimization base') + + +# the data, shuffled and split between train and test sets +nb_classes = 10 +(X_train, y_train), (X_test, y_test) = mnist.load_data() + +X_train = X_train.reshape(60000, 784).astype('float32')/255. +X_test = X_test.reshape(10000, 784).astype('float32')/255. +print(X_train.shape[0], 'train samples') +print(X_test.shape[0], 'test samples') + +# convert class vectors to binary class matrices +Y_train = np_utils.to_categorical(y_train, nb_classes) +Y_test = np_utils.to_categorical(y_test, nb_classes) + +args = {'batch_size': 128, + 'epochs': 6, + 'layer_1': 512, + 'layer_2': 512, + 'layer_3': 10, + 'layer_4': 512, + } +args = task.connect(args) + +model = Sequential() +model.add(Dense(args['layer_1'], input_shape=(784,))) +model.add(Activation('relu')) +# model.add(Dropout(0.2)) +model.add(Dense(args['layer_2'])) +model.add(Activation('relu')) +# model.add(Dropout(0.2)) +model.add(Dense(args['layer_3'])) +model.add(Activation('softmax')) + +model2 = Sequential() +model2.add(Dense(args['layer_4'], input_shape=(784,))) +model2.add(Activation('relu')) + +model.summary() + +model.compile(loss='categorical_crossentropy', + optimizer=RMSprop(), + metrics=['accuracy']) + +# Advanced: setting model class enumeration +labels = dict(('digit_%d' % i, i) for i in range(10)) +task.set_model_label_enumeration(labels) + +output_folder = os.path.join(tempfile.gettempdir(), 'keras_example') + +board = TensorBoard(log_dir=output_folder, write_images=False) +model_store = ModelCheckpoint(filepath=os.path.join(output_folder, 'weight.hdf5')) + +history = model.fit(X_train, Y_train, + batch_size=args['batch_size'], epochs=args['epochs'], + callbacks=[board, model_store], + validation_data=(X_test, Y_test)) +score = model.evaluate(X_test, Y_test, verbose=0) +print('Test score:', score[0]) +print('Test accuracy:', score[1]) +Logger.current_logger().report_scalar(title='evaluate', series='score', value=score[0], iteration=args['epochs']) +Logger.current_logger().report_scalar(title='evaluate', series='accuracy', value=score[1], iteration=args['epochs']) diff --git a/examples/automation/hyper_parameter_optimizer.py b/examples/automation/hyper_parameter_optimizer.py new file mode 100644 index 00000000..1b8fb61b --- /dev/null +++ b/examples/automation/hyper_parameter_optimizer.py @@ -0,0 +1,106 @@ +import logging + +from trains.automation import UniformParameterRange, DiscreteParameterRange, UniformIntegerParameterRange, ParameterSet +from trains.automation import GridSearch, RandomSearch, HyperParameterOptimizer +from trains import Task + +try: + from trains.automation.hpbandster import OptimizerBOHB + Our_SearchStrategy = OptimizerBOHB +except ValueError: + logging.getLogger().warning( + 'Apologies, it seems you do not have \'hpbandster\' installed, ' + 'we will be using RandomSearch strategy instead\n' + 'If you like to try ' '{{BOHB}: Robust and Efficient Hyperparameter Optimization at Scale},\n' + 'run: pip install hpbandster') + Our_SearchStrategy = RandomSearch + + +def job_complete_callback( + job_id, # type: str + objective_value, # type: float + objective_iteration, # type: int + job_parameters, # type: dict + top_performance_job_id # type: str +): + print('Job completed!', job_id, objective_value, objective_iteration, job_parameters) + if job_id == top_performance_job_id: + print('WOOT WOOT we broke the record! Objective reached {}'.format(objective_value)) + + +# Connecting TRAINS +task = Task.init(project_name='Hyper-Parameter Optimization', + task_name='Automatic Hyper-Parameter Optimization', + reuse_last_task_id=False) + +# experiment template to optimize in the hyper-parameter optimization +args = { + 'template_task_id': None, +} +args = task.connect(args) + +# Get the template task experiment that we want to optimize +if not args['template_task_id']: + args['template_task_id'] = Task.get_task( + project_name='examples', task_name='Keras HP optimization base').id + +# Example use case: +an_optimizer = HyperParameterOptimizer( + # This is the experiment we want to optimize + base_task_id=args['template_task_id'], + # here we define the hyper-parameters to optimize + hyper_parameters=[ + UniformIntegerParameterRange('layer_1', min_value=128, max_value=512, step_size=128), + UniformIntegerParameterRange('layer_2', min_value=128, max_value=512, step_size=128), + DiscreteParameterRange('batch_size', values=[96, 128, 160]), + DiscreteParameterRange('epochs', values=[30]), + ], + # this is the objective metric we want to maximize/minimize + objective_metric_title='val_acc', + objective_metric_series='val_acc', + # now we decide if we want to maximize it or minimize it (accuracy we maximize) + objective_metric_sign='max', + # let us limit the number of concurrent experiments, + # this in turn will make sure we do dont bombard the scheduler with experiments. + # if we have an auto-scaler connected, this, by proxy, will limit the number of machine + max_number_of_concurrent_tasks=2, + # this is the optimizer class (actually doing the optimization) + # Currently, we can choose from GridSearch, RandomSearch or OptimizerBOHB (Bayesian optimization Hyper-Band) + # more are coming soon... + optimizer_class=Our_SearchStrategy, + # Select an execution queue to schedule the experiments for execution + execution_queue='default', + # Limit the execution time of a single experiment + # (this is optional, and if using OptimizerBOHB, it is ignored) + max_job_execution_minutes=10., + # Check the experiments every 6 seconds is way too often, we should probably set it to 5 min, + # assuming a single experiment is usually hours... + pool_period_min=0.1, + # set the maximum number of jobs to launch for the optimization, default (None) unlimited + # If OptimizerBOHB is used, it defined the maximum budget in terms of full jobs + # basically the cumulative number of iterations will not exceed total_max_jobs * max_iteration_per_job + total_max_jobs=10, + # This is only applicable for OptimizerBOHB and ignore by the rest + # set the minimum number of iterations for an experiment, before early stopping + min_iteration_per_job=10, + # This is only applicable for OptimizerBOHB and ignore by the rest + # set the maximum number of iterations for an experiment to execute + max_iteration_per_job=30, +) + +# report every 12 seconds, this is way too often, but we are testing here J +an_optimizer.set_report_period(0.2) +# start the optimization process, callback function to be called every time an experiment is completed +# this function returns immediately +an_optimizer.start(job_complete_callback=job_complete_callback) +# set the time limit for the optimization process (2 hours) +an_optimizer.set_time_limit(in_minutes=120.0) +# wait until process is done (notice we are controlling the optimization process in the background) +an_optimizer.wait() +# optimization is completed, print the top performing experiments id +top_exp = an_optimizer.get_top_experiments(top_k=3) +print([t.id for t in top_exp]) +# make sure background optimization stopped +an_optimizer.stop() + +print('We are done, good bye') diff --git a/examples/automation/random_param_search_example.py b/examples/automation/random_param_search_example.py new file mode 100644 index 00000000..6cfbae22 --- /dev/null +++ b/examples/automation/random_param_search_example.py @@ -0,0 +1,60 @@ +from random import random, sample +from trains import Task + +# Connecting TRAINS +task = Task.init(project_name='examples', task_name='Random Hyper-Parameter Search Example') + +# Create a hyper-parameter dictionary for the task +params = dict() + +# track my parameters dictionary +params = task.connect(params) + +# define random search space, +params['batch_size'] = [64, 96, 128, 160, 192] +params['layer_1'] = [128, 512, 32] +params['layer_2'] = [128, 512, 32] + +# This is a simple random search +# (can be integrated with 'bayesian-optimization' 'hpbandster' etc.) +space = { + 'batch_size': lambda: sample(params['batch_size'], 1)[0], + 'layer_1': lambda: sample(range(*params['layer_1']), 1)[0], + 'layer_2': lambda: sample(range(*params['layer_2']), 1)[0], +} + +# number of random samples to test from 'space' +params['total_number_of_experiments'] = 3 + +# execution queue to add experiments to +params['execution_queue_name'] = 'default' + +# experiment template to optimize with random parameter search +params['experiment_template_name'] = 'Keras HP optimization base' + +# Select base template task +# Notice we can be more imaginative and use task_id which will eliminate the need to use project name +template_task = Task.get_task(project_name='examples', task_name=params['experiment_template_name']) + +for i in range(params['total_number_of_experiments']): + # clone the template task into a new write enabled task (where we can change parameters) + cloned_task = Task.clone(source_task=template_task, + name=template_task.name+' {}'.format(i), parent=template_task.id) + + # get the original template parameters + cloned_task_parameters = cloned_task.get_parameters() + + # override with random samples form grid + for k in space.keys(): + cloned_task_parameters[k] = space[k]() + + # put back into the new cloned task + cloned_task.set_parameters(cloned_task_parameters) + print('Experiment {} set with parameters {}'.format(i, cloned_task_parameters)) + + # enqueue the task for execution + Task.enqueue(cloned_task.id, queue_name=params['execution_queue_name']) + print('Experiment id={} enqueue for execution'.format(cloned_task.id)) + +# we are done, the next step is to watch the experiments graphs +print('Done') diff --git a/examples/automation/task_piping_example.py b/examples/automation/task_piping_example.py new file mode 100644 index 00000000..a9c99cc4 --- /dev/null +++ b/examples/automation/task_piping_example.py @@ -0,0 +1,43 @@ +from trains import Task +from time import sleep + +# Initialize the Task Pipe's first Task used to start the Task Pipe +task = Task.init('examples', 'Simple Controller Task') + +# Create a hyper-parameter dictionary for the task +param = dict() +# Connect the hyper-parameter dictionary to the task +param = task.connect(param) + +# In this example we pass next task's name as a parameter +param['next_task_name'] = 'Toy Base Task' +# This is a parameter name in the next task we want to change +param['param_name'] = 'Example_Param' +# This is the parameter value in the next task we want to change +param['param_name_new_value'] = 3 +# The queue where we want the template task (clone) to be sent to +param['execution_queue_name'] = 'default' + +# Simulate the work of a Task +print('Processing....') +sleep(2.0) +print('Done processing :)') + +# Get a reference to the task to pipe to. +next_task = Task.get_task(project_name=task.get_project_name(), task_name=param['next_task_name']) + +# Clone the task to pipe to. This creates a task with status Draft whose parameters can be modified. +cloned_task = Task.clone(source_task=next_task, name='Auto generated cloned task') + +# Get the original parameters of the Task, modify the value of one parameter, +# and set the parameters in the next Task +cloned_task_parameters = cloned_task.get_parameters() +cloned_task_parameters[param['param_name']] = param['param_name_new_value'] +cloned_task.set_parameters(cloned_task_parameters) + +# Enqueue the Task for execution. The enqueued Task must already exist in the trains platform +print('Enqueue next step in pipeline to queue: {}'.format(param['execution_queue_name'])) +Task.enqueue(cloned_task.id, queue_name=param['execution_queue_name']) + +# We are done. The next step in the pipe line is in charge of the pipeline now. +print('Done') diff --git a/examples/automation/toy_base_task.py b/examples/automation/toy_base_task.py new file mode 100644 index 00000000..7e1521be --- /dev/null +++ b/examples/automation/toy_base_task.py @@ -0,0 +1,19 @@ +# This Task is the base task that we will be executing as a second step (see task_piping.py) +# In order to make sure this experiment is registered in the platform, you must execute it once. + +from trains import Task + +# Initialize the task pipe's first task used to start the task pipe +task = Task.init('examples', 'Toy Base Task') + +# Create a dictionary for hyper-parameters +params = dict() + +# Add a parameter and value to the dictionary +params['Example_Param'] = 1 + +# Connect the hyper-parameter dictionary to the task +task.connect(params) + +# Print the value to demonstrate it is the value is set by the initiating task. +print("Example_Param is {}".format(params['Example_Param']))