mirror of
https://github.com/clearml/clearml
synced 2025-06-26 18:16:07 +00:00
Refactor examples
This commit is contained in:
58
examples/automation/controller_example.py
Normal file
58
examples/automation/controller_example.py
Normal file
@@ -0,0 +1,58 @@
|
||||
pipeline_node = {
|
||||
"step1": {
|
||||
# identify the node, so that we code reference outputs, use only alphanumeric characters
|
||||
"node_name": "step1",
|
||||
# parent node, to be executed before this step
|
||||
"parent_node": None,
|
||||
# the experiment/task id to clone & execute
|
||||
"base_task_id": "gafghafh",
|
||||
# preferred queue name/id to use for execution
|
||||
"queue": None,
|
||||
# preferred docker image (override experiment request)
|
||||
"docker": None,
|
||||
# parameter overrides
|
||||
"parameter_override": {"arg": 123, },
|
||||
# task definition overrides, currently not supported
|
||||
"task_override": None,
|
||||
},
|
||||
"step2": {
|
||||
# identify the node, so that we code reference outputs, use only alphanumeric characters
|
||||
"node_name": "step2",
|
||||
# parent node, to be executed before this step
|
||||
"parent_node": "step1",
|
||||
# the experiment/task id to clone & execute
|
||||
"base_task_id": "123456aa",
|
||||
# preferred queue name/id to use for execution
|
||||
"queue": "2xgpu",
|
||||
# preferred docker image (override experiment request)
|
||||
"docker": None,
|
||||
# parameter overrides
|
||||
"parameter_override": {
|
||||
# plug the output of pipeline node `step1` artifact named `my_data` into the Task parameter `url`
|
||||
"url": "@step1:artifacts/my_data",
|
||||
# plug the output of pipeline node `step1` parameter named `arg` into the Task parameter `arg`
|
||||
"arg": "@step1:parameters/arg",
|
||||
},
|
||||
# task definition overrides, currently not supported
|
||||
"task_override": None,
|
||||
},
|
||||
"step3": {
|
||||
# identify the node, so that we code reference outputs, use only alphanumeric characters
|
||||
"node_name": "step3",
|
||||
# parent node, to be executed before this step
|
||||
"parent_node": "step2",
|
||||
# the experiment/task id to clone & execute
|
||||
"base_task_id": "zzcc1244",
|
||||
# preferred queue name/id to use for execution
|
||||
"queue": "2xGPUS",
|
||||
# preferred docker image (override experiment request)
|
||||
"docker": None,
|
||||
# parameter overrides
|
||||
"parameter_override": {
|
||||
# plug the output of pipeline node `step2` last output model into the Task parameter url
|
||||
"model_url": "@step2:models/output/-1",
|
||||
},
|
||||
# task definition overrides, currently not supported
|
||||
"task_override": None,
|
||||
},
|
||||
}
|
||||
@@ -1,4 +1,5 @@
|
||||
from random import random, sample
|
||||
from random import sample
|
||||
|
||||
from trains import Task
|
||||
|
||||
# Connecting TRAINS
|
||||
1
examples/automation/requirements.txt
Normal file
1
examples/automation/requirements.txt
Normal file
@@ -0,0 +1 @@
|
||||
trains
|
||||
@@ -1,86 +0,0 @@
|
||||
# TRAINS - Keras with Tensorboard example code, automatic logging model and Tensorboard outputs
|
||||
#
|
||||
# Train a simple deep NN on the MNIST dataset.
|
||||
# Gets to 98.40% test accuracy after 20 epochs
|
||||
# (there is *a lot* of margin for parameter tuning).
|
||||
# 2 seconds per epoch on a K520 GPU.
|
||||
from __future__ import print_function
|
||||
|
||||
import tempfile
|
||||
import os
|
||||
|
||||
from keras.callbacks import TensorBoard, ModelCheckpoint
|
||||
from keras.datasets import mnist
|
||||
from keras.models import Sequential
|
||||
from keras.layers.core import Dense, Activation
|
||||
from keras.optimizers import RMSprop
|
||||
from keras.utils import np_utils
|
||||
import tensorflow as tf
|
||||
|
||||
from trains import Task, Logger
|
||||
|
||||
|
||||
# Connecting TRAINS
|
||||
task = Task.init(project_name='examples', task_name='Keras AutoML base')
|
||||
|
||||
|
||||
# the data, shuffled and split between train and test sets
|
||||
nb_classes = 10
|
||||
(X_train, y_train), (X_test, y_test) = mnist.load_data()
|
||||
|
||||
X_train = X_train.reshape(60000, 784).astype('float32')/255.
|
||||
X_test = X_test.reshape(10000, 784).astype('float32')/255.
|
||||
print(X_train.shape[0], 'train samples')
|
||||
print(X_test.shape[0], 'test samples')
|
||||
|
||||
# convert class vectors to binary class matrices
|
||||
Y_train = np_utils.to_categorical(y_train, nb_classes)
|
||||
Y_test = np_utils.to_categorical(y_test, nb_classes)
|
||||
|
||||
args = {'batch_size': 128,
|
||||
'epochs': 6,
|
||||
'layer_1': 512,
|
||||
'layer_2': 512,
|
||||
'layer_3': 10,
|
||||
'layer_4': 512,
|
||||
}
|
||||
args = task.connect(args)
|
||||
|
||||
model = Sequential()
|
||||
model.add(Dense(args['layer_1'], input_shape=(784,)))
|
||||
model.add(Activation('relu'))
|
||||
# model.add(Dropout(0.2))
|
||||
model.add(Dense(args['layer_2']))
|
||||
model.add(Activation('relu'))
|
||||
# model.add(Dropout(0.2))
|
||||
model.add(Dense(args['layer_3']))
|
||||
model.add(Activation('softmax'))
|
||||
|
||||
model2 = Sequential()
|
||||
model2.add(Dense(args['layer_4'], input_shape=(784,)))
|
||||
model2.add(Activation('relu'))
|
||||
|
||||
model.summary()
|
||||
|
||||
model.compile(loss='categorical_crossentropy',
|
||||
optimizer=RMSprop(),
|
||||
metrics=['accuracy'])
|
||||
|
||||
# Advanced: setting model class enumeration
|
||||
labels = dict(('digit_%d' % i, i) for i in range(10))
|
||||
task.set_model_label_enumeration(labels)
|
||||
|
||||
output_folder = os.path.join(tempfile.gettempdir(), 'keras_example')
|
||||
|
||||
board = TensorBoard(log_dir=output_folder, write_images=False)
|
||||
model_store = ModelCheckpoint(filepath=os.path.join(output_folder, 'weight.hdf5'))
|
||||
|
||||
history = model.fit(X_train, Y_train,
|
||||
batch_size=args['batch_size'], epochs=args['epochs'],
|
||||
callbacks=[board, model_store],
|
||||
validation_data=(X_test, Y_test))
|
||||
score = model.evaluate(X_test, Y_test, verbose=0)
|
||||
print('Test score:', score[0])
|
||||
print('Test accuracy:', score[1])
|
||||
Logger.current_logger().report_scalar(title='evaluate', series='score', value=score[0], iteration=args['epochs'])
|
||||
Logger.current_logger().report_scalar(title='evaluate', series='accuracy', value=score[1], iteration=args['epochs'])
|
||||
@@ -1,45 +0,0 @@
|
||||
from random import random, sample
|
||||
from trains import Task
|
||||
|
||||
|
||||
# define random search space,
|
||||
# This is a simple random search
|
||||
# (can be integrated with 'bayesian-optimization' 'hpbandster' etc.)
|
||||
space = {
|
||||
'batch_size': lambda: sample([64, 96, 128, 160, 192], 1)[0],
|
||||
'layer_1': lambda: sample(range(128, 512, 32), 1)[0],
|
||||
'layer_2': lambda: sample(range(128, 512, 32), 1)[0],
|
||||
}
|
||||
|
||||
# number of random samples to test from 'space'
|
||||
total_number_of_experiments = 3
|
||||
|
||||
# execution queue to add experiments to
|
||||
execution_queue_name = 'default'
|
||||
|
||||
# Select base template task
|
||||
# Notice we can be more imaginative and use task_id which will eliminate the need to use project name
|
||||
template_task = Task.get_task(project_name='examples', task_name='Keras AutoML base')
|
||||
|
||||
for i in range(total_number_of_experiments):
|
||||
# clone the template task into a new write enabled task (where we can change parameters)
|
||||
cloned_task = Task.clone(source_task=template_task,
|
||||
name=template_task.name+' {}'.format(i), parent=template_task.id)
|
||||
|
||||
# get the original template parameters
|
||||
cloned_task_parameters = cloned_task.get_parameters()
|
||||
|
||||
# override with random samples form grid
|
||||
for k in space.keys():
|
||||
cloned_task_parameters[k] = space[k]()
|
||||
|
||||
# put back into the new cloned task
|
||||
cloned_task.set_parameters(cloned_task_parameters)
|
||||
print('Experiment {} set with parameters {}'.format(i, cloned_task_parameters))
|
||||
|
||||
# enqueue the task for execution
|
||||
Task.enqueue(cloned_task.id, queue_name=execution_queue_name)
|
||||
print('Experiment id={} enqueue for execution'.format(cloned_task.id))
|
||||
|
||||
# we are done, the next step is to watch the experiments graphs
|
||||
print('Done')
|
||||
@@ -1,43 +0,0 @@
|
||||
from trains import Task
|
||||
from time import sleep
|
||||
|
||||
# Initialize the Task Pipe's first Task used to start the Task Pipe
|
||||
task = Task.init('examples', 'Simple Controller Task')
|
||||
|
||||
# Create a hyper-parameter dictionary for the task
|
||||
param = {}
|
||||
# Connect the hyper-parameter dictionary to the task
|
||||
param = task.connect(param)
|
||||
|
||||
# In this example we pass next task's name as a parameter
|
||||
param['next_task_name'] = 'Toy Base Task'
|
||||
# This is a parameter name in the next task we want to change
|
||||
param['param_name'] = 'Example_Param'
|
||||
# This is the parameter value in the next task we want to change
|
||||
param['param_name_new_value'] = 3
|
||||
# The queue where we want the template task (clone) to be sent to
|
||||
param['execution_queue_name'] = 'default'
|
||||
|
||||
# Simulate the work of a Task
|
||||
print('Processing....')
|
||||
sleep(2.0)
|
||||
print('Done processing :)')
|
||||
|
||||
# Get a reference to the task to pipe to.
|
||||
next_task = Task.get_task(project_name=task.get_project_name(), task_name=param['next_task_name'])
|
||||
|
||||
# Clone the task to pipe to. This creates a task with status Draft whose parameters can be modified.
|
||||
cloned_task = Task.clone(source_task=next_task, name='Auto generated cloned task')
|
||||
|
||||
# Get the original parameters of the Task, modify the value of one parameter,
|
||||
# and set the parameters in the next Task
|
||||
cloned_task_parameters = cloned_task.get_parameters()
|
||||
cloned_task_parameters[param['param_name']] = param['param_name_new_value']
|
||||
cloned_task.set_parameters(cloned_task_parameters)
|
||||
|
||||
# Enqueue the Task for execution. The enqueued Task must already exist in the trains platform
|
||||
print('Enqueue next step in pipeline to queue: {}'.format(param['execution_queue_name']))
|
||||
Task.enqueue(cloned_task.id, queue_name=param['execution_queue_name'])
|
||||
|
||||
# We are done. The next step in the pipe line is in charge of the pipeline now.
|
||||
print('Done')
|
||||
@@ -1,19 +0,0 @@
|
||||
# This Task is the base task that we will be executing as a second step (see task_piping.py)
|
||||
# In order to make sure this experiment is registered in the platform, you must execute it once.
|
||||
|
||||
from trains import Task
|
||||
|
||||
# Initialize the task pipe's first task used to start the task pipe
|
||||
task = Task.init('examples', 'Toy Base Task')
|
||||
|
||||
# Create a dictionary for hyper-parameters
|
||||
params = {}
|
||||
|
||||
# Add a parameter and value to the dictionary
|
||||
params['Example_Param'] = 1
|
||||
|
||||
# Connect the hyper-parameter dictionary to the task
|
||||
task.connect(params)
|
||||
|
||||
# Print the value to demonstrate it is the value is set by the initiating task.
|
||||
print("Example_Param is", params['Example_Param'])
|
||||
1
examples/distributed/pytorch_distributed_example.py
Symbolic link
1
examples/distributed/pytorch_distributed_example.py
Symbolic link
@@ -0,0 +1 @@
|
||||
../frameworks/pytorch/pytorch_distributed_example.py
|
||||
3
examples/distributed/requirements.txt
Normal file
3
examples/distributed/requirements.txt
Normal file
@@ -0,0 +1,3 @@
|
||||
torch>=1.1.0
|
||||
torchvision>=0.3.0
|
||||
trains
|
||||
@@ -1,114 +0,0 @@
|
||||
import os
|
||||
import sys
|
||||
import subprocess
|
||||
from copy import deepcopy
|
||||
import socket
|
||||
import psutil
|
||||
from tempfile import mkstemp
|
||||
# make sure we have jupyter in the auto requirements
|
||||
import jupyter
|
||||
from trains import Task
|
||||
|
||||
# set default docker image, with network configuration
|
||||
os.environ['TRAINS_DOCKER_IMAGE'] = 'nvidia/cuda --network host'
|
||||
|
||||
# initialize TRAINS
|
||||
task = Task.init(project_name='examples', task_name='Remote Jupyter NoteBook')
|
||||
|
||||
# get rid of all the runtime TRAINS
|
||||
preserve = ('TRAINS_API_HOST', 'TRAINS_WEB_HOST', 'TRAINS_FILES_HOST', 'TRAINS_CONFIG_FILE',
|
||||
'TRAINS_API_ACCESS_KEY', 'TRAINS_API_SECRET_KEY', 'TRAINS_API_HOST_VERIFY_CERT')
|
||||
|
||||
# setup os environment
|
||||
env = deepcopy(os.environ)
|
||||
for key in os.environ:
|
||||
if key.startswith('TRAINS') and key not in preserve:
|
||||
env.pop(key, None)
|
||||
|
||||
# Add jupyter server base folder
|
||||
param = {
|
||||
'jupyter_server_base_directory': '~/',
|
||||
'ssh_server': True,
|
||||
'ssh_password': 'training'
|
||||
}
|
||||
task.connect(param)
|
||||
|
||||
try:
|
||||
hostname = socket.gethostname()
|
||||
hostnames = socket.gethostbyname(socket.gethostname())
|
||||
except Exception:
|
||||
def get_ip_addresses(family):
|
||||
for interface, snics in psutil.net_if_addrs().items():
|
||||
for snic in snics:
|
||||
if snic.family == family:
|
||||
yield snic.address
|
||||
|
||||
hostnames = list(get_ip_addresses(socket.AF_INET))
|
||||
hostname = hostnames[0]
|
||||
|
||||
if param.get('ssh_server'):
|
||||
print('Installing SSH Server on {} [{}]'.format(hostname, hostnames))
|
||||
ssh_password = param.get('ssh_password', 'training')
|
||||
try:
|
||||
used_ports = [i.laddr.port for i in psutil.net_connections()]
|
||||
port = [i for i in range(10022, 15000) if i not in used_ports][0]
|
||||
|
||||
result = os.system(
|
||||
'apt-get install -y openssh-server && '
|
||||
'mkdir -p /var/run/sshd && '
|
||||
'echo \'root:{password}\' | chpasswd && '
|
||||
'echo \'PermitRootLogin yes\' >> /etc/ssh/sshd_config && '
|
||||
'sed -i \'s/PermitRootLogin prohibit-password/PermitRootLogin yes/\' /etc/ssh/sshd_config && '
|
||||
'sed \'s@session\s*required\s*pam_loginuid.so@session optional pam_loginuid.so@g\' -i /etc/pam.d/sshd && '
|
||||
'echo "export VISIBLE=now" >> /etc/profile && '
|
||||
'echo "export TRAINS_CONFIG_FILE={trains_config_file}" >> /etc/profile && '
|
||||
'/usr/sbin/sshd -p {port}'.format(
|
||||
password=ssh_password, port=port, trains_config_file=os.environ.get('TRAINS_CONFIG_FILE')))
|
||||
|
||||
if result == 0:
|
||||
print('\n#\n# SSH Server running on {} [{}] port {}\n# LOGIN u:root p:{}\n#\n'.format(
|
||||
hostname, hostnames, port, ssh_password))
|
||||
else:
|
||||
raise ValueError()
|
||||
except:
|
||||
print('\n#\n# Error: SSH server could not be launched\n#\n')
|
||||
|
||||
# execute jupyter notebook
|
||||
fd, local_filename = mkstemp()
|
||||
cwd = os.path.expandvars(os.path.expanduser(param['jupyter_server_base_directory'])) \
|
||||
if param['jupyter_server_base_directory'] else os.getcwd()
|
||||
print('Running Jupyter Notebook Server on {} [{}] at {}'.format(hostname, hostnames, cwd))
|
||||
process = subprocess.Popen([sys.executable, '-m', 'jupyter', 'notebook', '--no-browser', '--allow-root', '--ip', '0.0.0.0'],
|
||||
env=env, stdout=fd, stderr=fd, cwd=cwd)
|
||||
|
||||
# print stdout/stderr
|
||||
prev_line_count = 0
|
||||
process_running = True
|
||||
while process_running:
|
||||
process_running = False
|
||||
try:
|
||||
process.wait(timeout=2.0 if prev_line_count == 0 else 15.0)
|
||||
except subprocess.TimeoutExpired:
|
||||
process_running = True
|
||||
|
||||
with open(local_filename, "rt") as f:
|
||||
# read new lines
|
||||
new_lines = f.readlines()
|
||||
if not new_lines:
|
||||
continue
|
||||
output = ''.join(new_lines)
|
||||
print(output)
|
||||
# update task comment with jupyter notebook server links
|
||||
if prev_line_count == 0:
|
||||
task.comment += '\n' + ''.join(line for line in new_lines if 'http://' in line or 'https://' in line)
|
||||
prev_line_count += len(new_lines)
|
||||
|
||||
os.lseek(fd, 0, 0)
|
||||
os.ftruncate(fd, 0)
|
||||
|
||||
# cleanup
|
||||
os.close(fd)
|
||||
try:
|
||||
os.unlink(local_filename)
|
||||
except:
|
||||
pass
|
||||
@@ -2,10 +2,12 @@ import autokeras as ak
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
from tensorflow import keras
|
||||
|
||||
from trains import Task
|
||||
|
||||
task = Task.init(project_name="autokeras", task_name="autokeras imdb example with scalars")
|
||||
|
||||
|
||||
def imdb_raw():
|
||||
max_features = 20000
|
||||
index_offset = 3 # word index offset
|
||||
3
examples/frameworks/autokeras/requirements.txt
Normal file
3
examples/frameworks/autokeras/requirements.txt
Normal file
@@ -0,0 +1,3 @@
|
||||
autokeras
|
||||
tensorflow==2.1.0
|
||||
trains
|
||||
311
examples/frameworks/keras/jupyter.ipynb
Normal file
311
examples/frameworks/keras/jupyter.ipynb
Normal file
File diff suppressed because one or more lines are too long
118
examples/frameworks/keras/keras_tensorboard.py
Normal file
118
examples/frameworks/keras/keras_tensorboard.py
Normal file
@@ -0,0 +1,118 @@
|
||||
# TRAINS - Keras with Tensorboard example code, automatic logging model and Tensorboard outputs
|
||||
#
|
||||
# Train a simple deep NN on the MNIST dataset.
|
||||
# Gets to 98.40% test accuracy after 20 epochs
|
||||
# (there is *a lot* of margin for parameter tuning).
|
||||
# 2 seconds per epoch on a K520 GPU.
|
||||
from __future__ import print_function
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import tempfile
|
||||
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
from tensorflow.keras import utils as np_utils
|
||||
from tensorflow.keras.callbacks import ModelCheckpoint, TensorBoard
|
||||
from tensorflow.keras.datasets import mnist
|
||||
from tensorflow.keras.layers import Activation, Dense
|
||||
from tensorflow.keras.models import Sequential
|
||||
from tensorflow.keras.optimizers import RMSprop
|
||||
|
||||
from trains import Task
|
||||
|
||||
|
||||
class TensorBoardImage(TensorBoard):
|
||||
@staticmethod
|
||||
def make_image(tensor):
|
||||
from PIL import Image
|
||||
import io
|
||||
tensor = np.stack((tensor, tensor, tensor), axis=2)
|
||||
height, width, channels = tensor.shape
|
||||
image = Image.fromarray(tensor)
|
||||
output = io.BytesIO()
|
||||
image.save(output, format='PNG')
|
||||
image_string = output.getvalue()
|
||||
output.close()
|
||||
return tf.Summary.Image(height=height,
|
||||
width=width,
|
||||
colorspace=channels,
|
||||
encoded_image_string=image_string)
|
||||
|
||||
def on_epoch_end(self, epoch, logs=None):
|
||||
if logs is None:
|
||||
logs = {}
|
||||
super(TensorBoardImage, self).on_epoch_end(epoch, logs)
|
||||
images = self.validation_data[0] # 0 - data; 1 - labels
|
||||
img = (255 * images[0].reshape(28, 28)).astype('uint8')
|
||||
|
||||
image = self.make_image(img)
|
||||
summary = tf.Summary(value=[tf.Summary.Value(tag='image', image=image)])
|
||||
self.writer.add_summary(summary, epoch)
|
||||
|
||||
|
||||
parser = argparse.ArgumentParser(description='Keras MNIST Example')
|
||||
parser.add_argument('--batch-size', type=int, default=128, help='input batch size for training (default: 128)')
|
||||
parser.add_argument('--epochs', type=int, default=6, help='number of epochs to train (default: 6)')
|
||||
args = parser.parse_args()
|
||||
|
||||
# the data, shuffled and split between train and test sets
|
||||
nb_classes = 10
|
||||
(X_train, y_train), (X_test, y_test) = mnist.load_data()
|
||||
|
||||
X_train = X_train.reshape(60000, 784).astype('float32')/255.
|
||||
X_test = X_test.reshape(10000, 784).astype('float32')/255.
|
||||
print(X_train.shape[0], 'train samples')
|
||||
print(X_test.shape[0], 'test samples')
|
||||
|
||||
# convert class vectors to binary class matrices
|
||||
Y_train = np_utils.to_categorical(y_train, nb_classes)
|
||||
Y_test = np_utils.to_categorical(y_test, nb_classes)
|
||||
|
||||
model = Sequential()
|
||||
model.add(Dense(512, input_shape=(784,)))
|
||||
model.add(Activation('relu'))
|
||||
# model.add(Dropout(0.2))
|
||||
model.add(Dense(512))
|
||||
model.add(Activation('relu'))
|
||||
# model.add(Dropout(0.2))
|
||||
model.add(Dense(10))
|
||||
model.add(Activation('softmax'))
|
||||
|
||||
model2 = Sequential()
|
||||
model2.add(Dense(512, input_shape=(784,)))
|
||||
model2.add(Activation('relu'))
|
||||
|
||||
model.summary()
|
||||
|
||||
model.compile(loss='categorical_crossentropy',
|
||||
optimizer=RMSprop(),
|
||||
metrics=['accuracy'])
|
||||
|
||||
# Connecting TRAINS
|
||||
task = Task.init(project_name='examples', task_name='Keras with TensorBoard example')
|
||||
task.connect_configuration({'test': 1337, 'nested': {'key': 'value', 'number': 1}})
|
||||
|
||||
# Advanced: setting model class enumeration
|
||||
labels = dict(('digit_%d' % i, i) for i in range(10))
|
||||
task.set_model_label_enumeration(labels)
|
||||
|
||||
output_folder = os.path.join(tempfile.gettempdir(), 'keras_example')
|
||||
|
||||
board = TensorBoard(histogram_freq=1, log_dir=output_folder, write_images=False)
|
||||
model_store = ModelCheckpoint(filepath=os.path.join(output_folder, 'weight.{epoch}.hdf5'))
|
||||
|
||||
# load previous model, if it is there
|
||||
# noinspection PyBroadException
|
||||
try:
|
||||
model.load_weights(os.path.join(output_folder, 'weight.1.hdf5'))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
history = model.fit(X_train, Y_train,
|
||||
batch_size=args.batch_size, epochs=args.epochs,
|
||||
callbacks=[board, model_store],
|
||||
verbose=1, validation_data=(X_test, Y_test))
|
||||
score = model.evaluate(X_test, Y_test, verbose=0)
|
||||
print('Test score:', score[0])
|
||||
print('Test accuracy:', score[1])
|
||||
341
examples/frameworks/keras/legacy/jupyter.ipynb
Normal file
341
examples/frameworks/keras/legacy/jupyter.ipynb
Normal file
File diff suppressed because one or more lines are too long
@@ -102,9 +102,10 @@ board = TensorBoard(histogram_freq=1, log_dir=output_folder, write_images=False)
|
||||
model_store = ModelCheckpoint(filepath=os.path.join(output_folder, 'weight.{epoch}.hdf5'))
|
||||
|
||||
# load previous model, if it is there
|
||||
# noinspection PyBroadException
|
||||
try:
|
||||
model.load_weights(os.path.join(output_folder, 'weight.1.hdf5'))
|
||||
except:
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
history = model.fit(X_train, Y_train,
|
||||
2
examples/frameworks/keras/legacy/requirements.txt
Normal file
2
examples/frameworks/keras/legacy/requirements.txt
Normal file
@@ -0,0 +1,2 @@
|
||||
trains
|
||||
Keras>=2.2.4
|
||||
54
examples/frameworks/keras/manual_model_upload.py
Normal file
54
examples/frameworks/keras/manual_model_upload.py
Normal file
@@ -0,0 +1,54 @@
|
||||
# TRAINS - Example of manual model configuration and uploading
|
||||
#
|
||||
import os
|
||||
from tempfile import gettempdir
|
||||
|
||||
from keras import Input, layers, Model
|
||||
|
||||
from trains import Task
|
||||
|
||||
|
||||
task = Task.init(project_name='examples', task_name='Model configuration and upload')
|
||||
|
||||
|
||||
def get_model():
|
||||
# Create a simple model.
|
||||
inputs = Input(shape=(32,))
|
||||
outputs = layers.Dense(1)(inputs)
|
||||
keras_model = Model(inputs, outputs)
|
||||
keras_model.compile(optimizer='adam', loss='mean_squared_error')
|
||||
return keras_model
|
||||
|
||||
|
||||
# create a model
|
||||
model = get_model()
|
||||
|
||||
# Connect a local configuration file
|
||||
config_file = os.path.join('..', '..', 'reporting', 'data_samples', 'sample.json')
|
||||
config_file = task.connect_configuration(config_file)
|
||||
# then read configuration as usual, the backend will contain a copy of it.
|
||||
# later when executing remotely, the returned `config_file` will be a temporary file
|
||||
# containing a new copy of the configuration retrieved form the backend
|
||||
# # model_config_dict = json.load(open(config_file, 'rt'))
|
||||
|
||||
# Or Store dictionary of definition for a specific network design
|
||||
model_config_dict = {
|
||||
'value': 13.37,
|
||||
'dict': {'sub_value': 'string', 'sub_integer': 11},
|
||||
'list_of_ints': [1, 2, 3, 4],
|
||||
}
|
||||
model_config_dict = task.connect_configuration(model_config_dict)
|
||||
|
||||
# We now update the dictionary after connecting it, and the changes will be tracked as well.
|
||||
model_config_dict['new value'] = 10
|
||||
model_config_dict['value'] *= model_config_dict['new value']
|
||||
|
||||
# store the label enumeration of the training model
|
||||
labels = {'background': 0, 'cat': 1, 'dog': 2}
|
||||
task.connect_label_enumeration(labels)
|
||||
|
||||
# storing the model, it will have the task network configuration and label enumeration
|
||||
print('Any model stored from this point onwards, will contain both model_config and label_enumeration')
|
||||
|
||||
model.save(os.path.join(gettempdir(), "model"))
|
||||
print('Model saved')
|
||||
3
examples/frameworks/keras/requirements.txt
Normal file
3
examples/frameworks/keras/requirements.txt
Normal file
@@ -0,0 +1,3 @@
|
||||
Keras
|
||||
tensorflow>=2.0
|
||||
trains
|
||||
4
examples/frameworks/matplotlib/requirements.txt
Normal file
4
examples/frameworks/matplotlib/requirements.txt
Normal file
@@ -0,0 +1,4 @@
|
||||
matplotlib >= 3.1.1 ; python_version >= '3.6'
|
||||
matplotlib >= 2.2.4 ; python_version < '3.6'
|
||||
seaborn
|
||||
trains
|
||||
@@ -1,4 +1,4 @@
|
||||
# TRAINS - Example of manual model configuration
|
||||
# TRAINS - Example of manual model configuration and uploading
|
||||
#
|
||||
import os
|
||||
from tempfile import gettempdir
|
||||
@@ -7,13 +7,13 @@ import torch
|
||||
from trains import Task
|
||||
|
||||
|
||||
task = Task.init(project_name='examples', task_name='Manual model configuration')
|
||||
task = Task.init(project_name='examples', task_name='Model configuration and upload')
|
||||
|
||||
# create a model
|
||||
model = torch.nn.Module
|
||||
|
||||
# Connect a local configuration file
|
||||
config_file = 'samples/sample.json'
|
||||
config_file = os.path.join('..', '..', 'reporting', 'data_samples', 'sample.json')
|
||||
config_file = task.connect_configuration(config_file)
|
||||
# then read configuration as usual, the backend will contain a copy of it.
|
||||
# later when executing remotely, the returned `config_file` will be a temporary file
|
||||
@@ -0,0 +1,380 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"colab": {},
|
||||
"colab_type": "code",
|
||||
"id": "e-YsQrBjzNdX"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"! pip install -U pip\n",
|
||||
"! pip install -U torch==1.5.0\n",
|
||||
"! pip install -U torchaudio==0.5.0\n",
|
||||
"! pip install -U torchvision==0.6.0\n",
|
||||
"! pip install -U matplotlib==3.2.1\n",
|
||||
"! pip install -U trains==0.15.0\n",
|
||||
"! pip install -U pandas==1.0.4\n",
|
||||
"! pip install -U numpy==1.18.4\n",
|
||||
"! pip install -U tensorboard==2.2.1"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"colab": {},
|
||||
"colab_type": "code",
|
||||
"id": "T7T0Rf26zNdm"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import PIL\n",
|
||||
"import io\n",
|
||||
"\n",
|
||||
"import pandas as pd\n",
|
||||
"import numpy as np\n",
|
||||
"from pathlib2 import Path\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"\n",
|
||||
"import torch\n",
|
||||
"import torch.nn as nn\n",
|
||||
"import torch.nn.functional as F\n",
|
||||
"import torch.optim as optim\n",
|
||||
"from torch.utils.data import Dataset\n",
|
||||
"from torch.utils.tensorboard import SummaryWriter\n",
|
||||
"\n",
|
||||
"import torchaudio\n",
|
||||
"from torchvision.transforms import ToTensor\n",
|
||||
"\n",
|
||||
"from trains import Task\n",
|
||||
"\n",
|
||||
"%matplotlib inline"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"task = Task.init(project_name='Audio Example', task_name='audio classifier')\n",
|
||||
"configuration_dict = {'number_of_epochs': 10, 'batch_size': 4, 'dropout': 0.25, 'base_lr': 0.001}\n",
|
||||
"configuration_dict = task.connect(configuration_dict) # enabling configuration override by trains\n",
|
||||
"print(configuration_dict) # printing actual configuration (after override in remote mode)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"colab": {},
|
||||
"colab_type": "code",
|
||||
"id": "msiz7QdvzNeA",
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Download UrbanSound8K dataset (https://urbansounddataset.weebly.com/urbansound8k.html)\n",
|
||||
"path_to_UrbanSound8K = './data/UrbanSound8K'"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"colab": {},
|
||||
"colab_type": "code",
|
||||
"id": "wXtmZe7yzNeS"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"class UrbanSoundDataset(Dataset):\n",
|
||||
"#rapper for the UrbanSound8K dataset\n",
|
||||
" def __init__(self, csv_path, file_path, folderList):\n",
|
||||
" self.file_path = file_path\n",
|
||||
" self.file_names = []\n",
|
||||
" self.labels = []\n",
|
||||
" self.folders = []\n",
|
||||
" \n",
|
||||
" #loop through the csv entries and only add entries from folders in the folder list\n",
|
||||
" csvData = pd.read_csv(csv_path)\n",
|
||||
" for i in range(0,len(csvData)):\n",
|
||||
" if csvData.iloc[i, 5] in folderList:\n",
|
||||
" self.file_names.append(csvData.iloc[i, 0])\n",
|
||||
" self.labels.append(csvData.iloc[i, 6])\n",
|
||||
" self.folders.append(csvData.iloc[i, 5])\n",
|
||||
" \n",
|
||||
" def __getitem__(self, index):\n",
|
||||
" #format the file path and load the file\n",
|
||||
" path = self.file_path / (\"fold\" + str(self.folders[index])) / self.file_names[index]\n",
|
||||
" sound, sample_rate = torchaudio.load(path, out = None, normalization = True)\n",
|
||||
"\n",
|
||||
" # UrbanSound8K uses two channels, this will convert them to one\n",
|
||||
" soundData = torch.mean(sound, dim=0, keepdim=True)\n",
|
||||
" \n",
|
||||
" #Make sure all files are the same size\n",
|
||||
" if soundData.numel() < 160000:\n",
|
||||
" fixedsize_data = torch.nn.functional.pad(soundData, (0, 160000 - soundData.numel()))\n",
|
||||
" else:\n",
|
||||
" fixedsize_data = soundData[0,:160000].reshape(1,160000)\n",
|
||||
" \n",
|
||||
" #downsample the audio\n",
|
||||
" downsample_data = fixedsize_data[::5]\n",
|
||||
" \n",
|
||||
" melspectogram_transform = torchaudio.transforms.MelSpectrogram(sample_rate=sample_rate)\n",
|
||||
" melspectogram = melspectogram_transform(downsample_data)\n",
|
||||
" melspectogram_db = torchaudio.transforms.AmplitudeToDB()(melspectogram)\n",
|
||||
"\n",
|
||||
" return fixedsize_data, sample_rate, melspectogram_db, self.labels[index]\n",
|
||||
" \n",
|
||||
" def __len__(self):\n",
|
||||
" return len(self.file_names)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"csv_path = Path(path_to_UrbanSound8K) / 'metadata' / 'UrbanSound8K.csv'\n",
|
||||
"file_path = Path(path_to_UrbanSound8K) / 'audio'\n",
|
||||
"\n",
|
||||
"train_set = UrbanSoundDataset(csv_path, file_path, range(1,10))\n",
|
||||
"test_set = UrbanSoundDataset(csv_path, file_path, [10])\n",
|
||||
"print(\"Train set size: \" + str(len(train_set)))\n",
|
||||
"print(\"Test set size: \" + str(len(test_set)))\n",
|
||||
"\n",
|
||||
"train_loader = torch.utils.data.DataLoader(train_set, batch_size = configuration_dict.get('batch_size', 4), \n",
|
||||
" shuffle = True, pin_memory=True, num_workers=1)\n",
|
||||
"test_loader = torch.utils.data.DataLoader(test_set, batch_size = configuration_dict.get('batch_size', 4), \n",
|
||||
" shuffle = False, pin_memory=True, num_workers=1)\n",
|
||||
"\n",
|
||||
"classes = ('air_conditioner', 'car_horn', 'children_playing', 'dog_bark', 'drilling', 'engine_idling', \n",
|
||||
" 'gun_shot', 'jackhammer', 'siren', 'street_music')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"colab": {},
|
||||
"colab_type": "code",
|
||||
"id": "ylblw-k1zNeZ"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"class Net(nn.Module):\n",
|
||||
" def __init__(self, num_classes, dropout_value):\n",
|
||||
" super(Net,self).__init__()\n",
|
||||
" self.num_classes = num_classes\n",
|
||||
" self.dropout_value = dropout_value\n",
|
||||
" \n",
|
||||
" self.C1 = nn.Conv2d(1,16,3)\n",
|
||||
" self.C2 = nn.Conv2d(16,32,3)\n",
|
||||
" self.C3 = nn.Conv2d(32,64,3)\n",
|
||||
" self.C4 = nn.Conv2d(64,128,3)\n",
|
||||
" self.maxpool1 = nn.MaxPool2d(2,2) \n",
|
||||
" self.fc1 = nn.Linear(128*29*197,128)\n",
|
||||
" self.fc2 = nn.Linear(128,self.num_classes)\n",
|
||||
" self.dropout = nn.Dropout(self.dropout_value)\n",
|
||||
" \n",
|
||||
" def forward(self,x):\n",
|
||||
" # add sequence of convolutional and max pooling layers\n",
|
||||
" x = F.relu(self.C1(x))\n",
|
||||
" x = self.maxpool1(F.relu(self.C2(x)))\n",
|
||||
" x = F.relu(self.C3(x))\n",
|
||||
" x = self.maxpool1(F.relu(self.C4(x)))\n",
|
||||
" # flatten image input\n",
|
||||
" x = x.view(-1,128*29*197)\n",
|
||||
" x = F.relu(self.fc1(self.dropout(x)))\n",
|
||||
" x = self.fc2(self.dropout(x))\n",
|
||||
" return x\n",
|
||||
" \n",
|
||||
" \n",
|
||||
"model = Net(len(classes), configuration_dict.get('dropout', 0.25))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"colab": {},
|
||||
"colab_type": "code",
|
||||
"id": "3yKYru14zNef"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"optimizer = optim.SGD(model.parameters(), lr = configuration_dict.get('base_lr', 0.001), momentum = 0.9)\n",
|
||||
"scheduler = optim.lr_scheduler.StepLR(optimizer, step_size = 3, gamma = 0.1)\n",
|
||||
"criterion = nn.CrossEntropyLoss()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"device = torch.cuda.current_device() if torch.cuda.is_available() else torch.device('cpu')\n",
|
||||
"print('Device to use: {}'.format(device))\n",
|
||||
"model.to(device)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"tensorboard_writer = SummaryWriter('./tensorboard_logs')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def plot_signal(signal, title, cmap=None):\n",
|
||||
" fig = plt.figure()\n",
|
||||
" if signal.ndim == 1:\n",
|
||||
" plt.plot(signal)\n",
|
||||
" else:\n",
|
||||
" plt.imshow(signal, cmap=cmap) \n",
|
||||
" plt.title(title)\n",
|
||||
" \n",
|
||||
" plot_buf = io.BytesIO()\n",
|
||||
" plt.savefig(plot_buf, format='jpeg')\n",
|
||||
" plot_buf.seek(0)\n",
|
||||
" plt.close(fig)\n",
|
||||
" return ToTensor()(PIL.Image.open(plot_buf))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"colab": {},
|
||||
"colab_type": "code",
|
||||
"id": "Vdthqz3JzNem"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def train(model, epoch):\n",
|
||||
" model.train()\n",
|
||||
" for batch_idx, (sounds, sample_rate, inputs, labels) in enumerate(train_loader):\n",
|
||||
" inputs = inputs.to(device)\n",
|
||||
" labels = labels.to(device)\n",
|
||||
"\n",
|
||||
" # zero the parameter gradients\n",
|
||||
" optimizer.zero_grad()\n",
|
||||
"\n",
|
||||
" # forward + backward + optimize\n",
|
||||
" outputs = model(inputs)\n",
|
||||
" _, predicted = torch.max(outputs, 1)\n",
|
||||
" loss = criterion(outputs, labels)\n",
|
||||
" loss.backward()\n",
|
||||
" optimizer.step()\n",
|
||||
" \n",
|
||||
" iteration = epoch * len(train_loader) + batch_idx\n",
|
||||
" if batch_idx % log_interval == 0: #print training stats\n",
|
||||
" print('Train Epoch: {} [{}/{} ({:.0f}%)]\\tLoss: {:.6f}'\n",
|
||||
" .format(epoch, batch_idx * len(inputs), len(train_loader.dataset), \n",
|
||||
" 100. * batch_idx / len(train_loader), loss))\n",
|
||||
" tensorboard_writer.add_scalar('training loss/loss', loss, iteration)\n",
|
||||
" tensorboard_writer.add_scalar('learning rate/lr', optimizer.param_groups[0]['lr'], iteration)\n",
|
||||
" \n",
|
||||
" \n",
|
||||
" if batch_idx % debug_interval == 0: # report debug image every 500 mini-batches\n",
|
||||
" for n, (inp, pred, label) in enumerate(zip(inputs, predicted, labels)):\n",
|
||||
" series = 'label_{}_pred_{}'.format(classes[label.cpu()], classes[pred.cpu()])\n",
|
||||
" tensorboard_writer.add_image('Train MelSpectrogram samples/{}'.format(n), \n",
|
||||
" plot_signal(inp.cpu().numpy().squeeze(), series, 'hot'), iteration)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"colab": {},
|
||||
"colab_type": "code",
|
||||
"id": "LBWoj7u5zNes"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def test(model, epoch):\n",
|
||||
" model.eval()\n",
|
||||
" class_correct = list(0. for i in range(10))\n",
|
||||
" class_total = list(0. for i in range(10))\n",
|
||||
" with torch.no_grad():\n",
|
||||
" for idx, (sounds, sample_rate, inputs, labels) in enumerate(test_loader):\n",
|
||||
" inputs = inputs.to(device)\n",
|
||||
" labels = labels.to(device)\n",
|
||||
"\n",
|
||||
" outputs = model(inputs)\n",
|
||||
"\n",
|
||||
" _, predicted = torch.max(outputs, 1)\n",
|
||||
" c = (predicted == labels)\n",
|
||||
" for i in range(len(inputs)):\n",
|
||||
" label = labels[i].item()\n",
|
||||
" class_correct[label] += c[i].item()\n",
|
||||
" class_total[label] += 1\n",
|
||||
" \n",
|
||||
" iteration = (epoch + 1) * len(train_loader)\n",
|
||||
" if idx % debug_interval == 0: # report debug image every 100 mini-batches\n",
|
||||
" for n, (sound, inp, pred, label) in enumerate(zip(sounds, inputs, predicted, labels)):\n",
|
||||
" series = 'label_{}_pred_{}'.format(classes[label.cpu()], classes[pred.cpu()])\n",
|
||||
" tensorboard_writer.add_audio('Test audio samples/{}'.format(n), \n",
|
||||
" sound, iteration, int(sample_rate[n]))\n",
|
||||
" tensorboard_writer.add_image('Test MelSpectrogram samples/{}_{}'.format(idx, n), \n",
|
||||
" plot_signal(inp.cpu().numpy().squeeze(), series, 'hot'), iteration)\n",
|
||||
"\n",
|
||||
" total_accuracy = 100 * sum(class_correct)/sum(class_total)\n",
|
||||
" print('[Iteration {}] Accuracy on the {} test images: {}%\\n'.format(epoch, sum(class_total), total_accuracy))\n",
|
||||
" tensorboard_writer.add_scalar('accuracy/total', total_accuracy, iteration)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"colab": {},
|
||||
"colab_type": "code",
|
||||
"id": "X5lx3g_5zNey",
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"log_interval = 100\n",
|
||||
"debug_interval = 200\n",
|
||||
"for epoch in range(configuration_dict.get('number_of_epochs', 10)):\n",
|
||||
" train(model, epoch)\n",
|
||||
" test(model, epoch)\n",
|
||||
" scheduler.step()"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"name": "audio_classifier_tutorial.ipynb",
|
||||
"provenance": []
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 1
|
||||
}
|
||||
@@ -0,0 +1,128 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"! pip install -U pip\n",
|
||||
"! pip install -U torch==1.5.0\n",
|
||||
"! pip install -U torchaudio==0.5.0\n",
|
||||
"! pip install -U matplotlib==3.2.1\n",
|
||||
"! pip install -U trains==0.15.0\n",
|
||||
"! pip install -U tensorboard==2.2.1"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"import torch\n",
|
||||
"import torchaudio\n",
|
||||
"from torch.utils.tensorboard import SummaryWriter\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"\n",
|
||||
"from trains import Task\n",
|
||||
"\n",
|
||||
"%matplotlib inline"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"task = Task.init(project_name='Audio Example', task_name='data pre-processing')\n",
|
||||
"configuration_dict = {'number_of_smaples': 3}\n",
|
||||
"configuration_dict = task.connect(configuration_dict) # enabling configuration override by trains\n",
|
||||
"print(configuration_dict) # printing actual configuration (after override in remote mode)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"tensorboard_writer = SummaryWriter('./tensorboard_logs')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"if not os.path.isdir('./data'):\n",
|
||||
" os.mkdir('./data')\n",
|
||||
"yesno_data = torchaudio.datasets.YESNO('./data', download=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def plot_signal(signal, title, cmap=None):\n",
|
||||
" plt.figure()\n",
|
||||
" if signal.ndim == 1:\n",
|
||||
" plt.plot(signal)\n",
|
||||
" else:\n",
|
||||
" plt.imshow(signal, cmap=cmap) \n",
|
||||
" plt.title(title)\n",
|
||||
" plt.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"scrolled": true,
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"for n in range(configuration_dict.get('number_of_smaples', 3)):\n",
|
||||
" waveform, sample_rate, labels = yesno_data[n]\n",
|
||||
" melspectogram_transform = torchaudio.transforms.MelSpectrogram(sample_rate=sample_rate)\n",
|
||||
" plot_signal(waveform[0,:], 'Original waveform')\n",
|
||||
" tensorboard_writer.add_audio('Audio samples/{}'.format(n), waveform, n, sample_rate)\n",
|
||||
" plot_signal(melspectogram_transform(waveform.squeeze()).numpy(), 'Mel spectogram', 'hot')\n",
|
||||
" plot_signal(torchaudio.transforms.AmplitudeToDB()(melspectogram_transform(waveform.squeeze())).numpy(), 'Mel spectogram DB', 'hot')"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 1
|
||||
}
|
||||
@@ -0,0 +1,136 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# execute this in command line on all machines to be used as workers before initiating the hyperparamer search \n",
|
||||
"# ! pip install -U trains-agent==0.15.0\n",
|
||||
"# ! trains-agent daemon --queue default\n",
|
||||
"\n",
|
||||
"# pip install with locked versions\n",
|
||||
"! pip install -U pandas==1.0.3\n",
|
||||
"! pip install -U trains==0.15.0\n",
|
||||
"! pip install -U hpbandster==0.7.4 # Needed only for Bayesian optimization Hyper-Band"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from trains.automation import UniformParameterRange, UniformIntegerParameterRange\n",
|
||||
"from trains.automation import RandomSearch, HyperParameterOptimizer\n",
|
||||
"from trains.automation.hpbandster import OptimizerBOHB # Needed only for Bayesian optimization Hyper-Band\n",
|
||||
"\n",
|
||||
"from trains import Task"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"task = Task.init(project_name='Hyper-Parameter Search', task_name='Hyper-Parameter Optimization')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#####################################################################\n",
|
||||
"### Don't forget to replace this default id with your own task id ###\n",
|
||||
"#####################################################################\n",
|
||||
"TEMPLATE_TASK_ID = 'd8e928460f98437c998f3597768597f8'"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"optimizer = HyperParameterOptimizer(\n",
|
||||
" base_task_id=TEMPLATE_TASK_ID, # This is the experiment we want to optimize\n",
|
||||
" # here we define the hyper-parameters to optimize\n",
|
||||
" hyper_parameters=[\n",
|
||||
" UniformIntegerParameterRange('number_of_epochs', min_value=5, max_value=15, step_size=1),\n",
|
||||
" UniformIntegerParameterRange('batch_size', min_value=2, max_value=12, step_size=2),\n",
|
||||
" UniformParameterRange('dropout', min_value=0, max_value=0.5, step_size=0.05),\n",
|
||||
" UniformParameterRange('base_lr', min_value=0.0005, max_value=0.01, step_size=0.0005),\n",
|
||||
" ],\n",
|
||||
" # this is the objective metric we want to maximize/minimize\n",
|
||||
" objective_metric_title='accuracy',\n",
|
||||
" objective_metric_series='total',\n",
|
||||
" objective_metric_sign='max', # maximize or minimize the objective metric\n",
|
||||
" max_number_of_concurrent_tasks=3, # number of concurrent experiments\n",
|
||||
" # setting optimizer - trains supports GridSearch, RandomSearch or OptimizerBOHB\n",
|
||||
" optimizer_class=OptimizerBOHB, # can be replaced with OptimizerBOHB\n",
|
||||
" execution_queue='default', # queue to schedule the experiments for execution\n",
|
||||
" optimization_time_limit=30., # time limit for each experiment (optional, ignored by OptimizerBOHB)\n",
|
||||
" pool_period_min=1, # Check the experiments every x minutes\n",
|
||||
" # set the maximum number of experiments for the optimization.\n",
|
||||
" # OptimizerBOHB sets the total number of iteration as total_max_jobs * max_iteration_per_job\n",
|
||||
" total_max_jobs=12,\n",
|
||||
" # setting OptimizerBOHB configuration (ignored by other optimizers)\n",
|
||||
" min_iteration_per_job=15000, # minimum number of iterations per experiment, till early stopping\n",
|
||||
" max_iteration_per_job=150000, # maximum number of iterations per experiment\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"optimizer.set_time_limit(in_minutes=120.0) # set the time limit for the optimization process\n",
|
||||
"optimizer.start() \n",
|
||||
"optimizer.wait() # wait until process is done\n",
|
||||
"optimizer.stop() # make sure background optimization stopped"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# optimization is completed, print the top performing experiments id\n",
|
||||
"k = 3\n",
|
||||
"top_exp = optimizer.get_top_experiments(top_k=k)\n",
|
||||
"print('Top {} experiments are:'.format(k))\n",
|
||||
"for n, t in enumerate(top_exp, 1):\n",
|
||||
" print('Rank {}: task id={} |result={}'\n",
|
||||
" .format(n, t.id, t.get_last_scalar_metrics()['accuracy']['total']['last']))"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
@@ -0,0 +1,243 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# execute this in command line before initiating the notebook: \n",
|
||||
"# pip install -U pip\n",
|
||||
"# pip install -U ipywidgets==7.5.1\n",
|
||||
"# jupyter nbextension enable --py widgetsnbextension\n",
|
||||
"\n",
|
||||
"# pip install with locked versions\n",
|
||||
"! pip install -U torch==1.5.0\n",
|
||||
"! pip install -U torchvision==0.6.0\n",
|
||||
"! pip install -U numpy==1.18.4\n",
|
||||
"! pip install -U trains==0.15.0\n",
|
||||
"! pip install -U tensorboard==2.2.1"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import torch\n",
|
||||
"import torch.nn as nn\n",
|
||||
"import torch.nn.functional as F\n",
|
||||
"import torch.optim as optim\n",
|
||||
"from torch.utils.tensorboard import SummaryWriter\n",
|
||||
"\n",
|
||||
"import torchvision.datasets as datasets\n",
|
||||
"import torchvision.transforms as transforms\n",
|
||||
"\n",
|
||||
"from trains import Task"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"task = Task.init(project_name='Hyper-Parameter Search', task_name='image_classification_CIFAR10')\n",
|
||||
"configuration_dict = {'number_of_epochs': 3, 'batch_size': 4, 'dropout': 0.25, 'base_lr': 0.001}\n",
|
||||
"configuration_dict = task.connect(configuration_dict) # enabling configuration override by trains\n",
|
||||
"print(configuration_dict) # printing actual configuration (after override in remote mode)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"transform = transforms.Compose([transforms.ToTensor()])\n",
|
||||
"\n",
|
||||
"trainset = datasets.CIFAR10(root='./data', train=True,\n",
|
||||
" download=True, transform=transform)\n",
|
||||
"trainloader = torch.utils.data.DataLoader(trainset, batch_size=configuration_dict.get('batch_size', 4),\n",
|
||||
" shuffle=True, num_workers=2)\n",
|
||||
"\n",
|
||||
"testset = datasets.CIFAR10(root='./data', train=False,\n",
|
||||
" download=True, transform=transform)\n",
|
||||
"testloader = torch.utils.data.DataLoader(testset, batch_size=configuration_dict.get('batch_size', 4),\n",
|
||||
" shuffle=False, num_workers=2)\n",
|
||||
"\n",
|
||||
"classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')\n",
|
||||
"\n",
|
||||
"device = torch.cuda.current_device() if torch.cuda.is_available() else torch.device('cpu')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"class Net(nn.Module):\n",
|
||||
" def __init__(self):\n",
|
||||
" super(Net, self).__init__()\n",
|
||||
" self.conv1 = nn.Conv2d(3, 6, 5)\n",
|
||||
" self.conv2 = nn.Conv2d(3, 6, 5)\n",
|
||||
" self.pool = nn.MaxPool2d(2, 2)\n",
|
||||
" self.conv2 = nn.Conv2d(6, 16, 5)\n",
|
||||
" self.fc1 = nn.Linear(16 * 5 * 5, 120)\n",
|
||||
" self.fc2 = nn.Linear(120, 84)\n",
|
||||
" self.dorpout = nn.Dropout(p=configuration_dict.get('dropout', 0.25))\n",
|
||||
" self.fc3 = nn.Linear(84, 10)\n",
|
||||
"\n",
|
||||
" def forward(self, x):\n",
|
||||
" x = self.pool(F.relu(self.conv1(x)))\n",
|
||||
" x = self.pool(F.relu(self.conv2(x)))\n",
|
||||
" x = x.view(-1, 16 * 5 * 5)\n",
|
||||
" x = F.relu(self.fc1(x))\n",
|
||||
" x = F.relu(self.fc2(x))\n",
|
||||
" x = self.fc3(self.dorpout(x))\n",
|
||||
" return x"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"net = Net().to(device)\n",
|
||||
"criterion = nn.CrossEntropyLoss()\n",
|
||||
"optimizer = optim.SGD(net.parameters(), lr=configuration_dict.get('base_lr', 0.001), momentum=0.9)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"tensorboard_writer = SummaryWriter('./tensorboard_logs')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def test_model(test_dataloader, iteration):\n",
|
||||
" class_correct = list(0. for i in range(10))\n",
|
||||
" class_total = list(0. for i in range(10))\n",
|
||||
" with torch.no_grad():\n",
|
||||
" for j, data in enumerate(test_dataloader, 1):\n",
|
||||
" images, labels = data\n",
|
||||
" images = images.to(device)\n",
|
||||
" labels = labels.to(device)\n",
|
||||
" \n",
|
||||
" outputs = net(images)\n",
|
||||
" _, predicted = torch.max(outputs, 1)\n",
|
||||
" c = (predicted == labels).squeeze()\n",
|
||||
" for i in range(len(images)):\n",
|
||||
" label = labels[i].item()\n",
|
||||
" class_correct[label] += c[i].item()\n",
|
||||
" class_total[label] += 1\n",
|
||||
" \n",
|
||||
" if j % 500 == 0: # report debug image every 500 mini-batches\n",
|
||||
" for n, (img, pred, label) in enumerate(zip(images, predicted, labels)):\n",
|
||||
" tensorboard_writer.add_image(\"testing/{}-{}_GT_{}_pred_{}\"\n",
|
||||
" .format(j, n, classes[label], classes[pred]), img, iteration)\n",
|
||||
"\n",
|
||||
" for i in range(len(classes)):\n",
|
||||
" class_accuracy = 100 * class_correct[i] / class_total[i]\n",
|
||||
" print('[Iteration {}] Accuracy of {} : {}%'.format(iteration, classes[i], class_accuracy))\n",
|
||||
" tensorboard_writer.add_scalar('accuracy per class/{}'.format(classes[i]), class_accuracy, iteration)\n",
|
||||
"\n",
|
||||
" total_accuracy = 100 * sum(class_correct)/sum(class_total)\n",
|
||||
" print('[Iteration {}] Accuracy on the {} test images: {}%\\n'.format(iteration, sum(class_total), total_accuracy))\n",
|
||||
" tensorboard_writer.add_scalar('accuracy/total', total_accuracy, iteration)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"for epoch in range(configuration_dict.get('number_of_epochs', 3)): # loop over the dataset multiple times\n",
|
||||
"\n",
|
||||
" running_loss = 0.0\n",
|
||||
" for i, data in enumerate(trainloader, 1):\n",
|
||||
" # get the inputs; data is a list of [inputs, labels]\n",
|
||||
" inputs, labels = data\n",
|
||||
" inputs = inputs.to(device)\n",
|
||||
" labels = labels.to(device)\n",
|
||||
"\n",
|
||||
" # zero the parameter gradients\n",
|
||||
" optimizer.zero_grad()\n",
|
||||
"\n",
|
||||
" # forward + backward + optimize\n",
|
||||
" outputs = net(inputs)\n",
|
||||
" loss = criterion(outputs, labels)\n",
|
||||
" loss.backward()\n",
|
||||
" optimizer.step()\n",
|
||||
"\n",
|
||||
" # print statistics\n",
|
||||
" running_loss += loss.item()\n",
|
||||
" \n",
|
||||
" iteration = epoch * len(trainloader) + i\n",
|
||||
" if i % 2000 == 0: # report loss every 2000 mini-batches\n",
|
||||
" print('[Epoch %d, Iteration %5d] loss: %.3f' %(epoch + 1, i + 1, running_loss / 2000))\n",
|
||||
" tensorboard_writer.add_scalar('training loss', running_loss / 2000, iteration)\n",
|
||||
" running_loss = 0.0\n",
|
||||
" \n",
|
||||
" test_model(testloader, iteration)\n",
|
||||
"\n",
|
||||
"print('Finished Training')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"PATH = './cifar_net.pth'\n",
|
||||
"torch.save(net.state_dict(), PATH)\n",
|
||||
"tensorboard_writer.close()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print('Task ID number is: {}'.format(task.id))"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
@@ -45,6 +45,9 @@ to resemble the content of the content-image and the artistic style of the style
|
||||
# - ``copy`` (to deep copy the models; system package)
|
||||
|
||||
from __future__ import print_function
|
||||
|
||||
import os
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
@@ -110,8 +113,8 @@ def image_loader(image_name):
|
||||
return image.to(device, torch.float)
|
||||
|
||||
|
||||
style_img = image_loader("./samples/picasso.jpg")
|
||||
content_img = image_loader("./samples/dancing.jpg")
|
||||
style_img = image_loader(os.path.join("..", "..", "reporting", "data_samples", "picasso.jpg"))
|
||||
content_img = image_loader(os.path.join("..", "..", "reporting", "data_samples", "dancing.jpg"))
|
||||
|
||||
assert style_img.size() == content_img.size(), \
|
||||
"we need to import style and content images of the same size"
|
||||
@@ -11,8 +11,7 @@ import torch.nn.functional as F
|
||||
import torch.optim as optim
|
||||
from torchvision import datasets, transforms
|
||||
|
||||
from trains import Task
|
||||
task = Task.init(project_name='examples', task_name='pytorch mnist train')
|
||||
from trains import Task, Logger
|
||||
|
||||
|
||||
class Net(nn.Module):
|
||||
@@ -44,12 +43,14 @@ def train(args, model, device, train_loader, optimizer, epoch):
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
if batch_idx % args.log_interval == 0:
|
||||
Logger.current_logger().report_scalar(
|
||||
"train", "loss", iteration=(epoch * len(train_loader) + batch_idx), value=loss.item())
|
||||
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
|
||||
epoch, batch_idx * len(data), len(train_loader.dataset),
|
||||
100. * batch_idx / len(train_loader), loss.item()))
|
||||
100. * batch_idx / len(train_loader), loss.item()))
|
||||
|
||||
|
||||
def test(args, model, device, test_loader):
|
||||
def test(args, model, device, test_loader, epoch):
|
||||
model.eval()
|
||||
test_loss = 0
|
||||
correct = 0
|
||||
@@ -63,12 +64,18 @@ def test(args, model, device, test_loader):
|
||||
|
||||
test_loss /= len(test_loader.dataset)
|
||||
|
||||
Logger.current_logger().report_scalar(
|
||||
"test", "loss", iteration=epoch, value=test_loss)
|
||||
Logger.current_logger().report_scalar(
|
||||
"test", "accuracy", iteration=epoch, value=(correct / len(test_loader.dataset)))
|
||||
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
|
||||
test_loss, correct, len(test_loader.dataset),
|
||||
100. * correct / len(test_loader.dataset)))
|
||||
|
||||
|
||||
def main():
|
||||
task = Task.init(project_name='examples', task_name='pytorch mnist train')
|
||||
|
||||
# Training settings
|
||||
parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
|
||||
parser.add_argument('--batch-size', type=int, default=64, metavar='N',
|
||||
@@ -99,14 +106,14 @@ def main():
|
||||
|
||||
kwargs = {'num_workers': 4, 'pin_memory': True} if use_cuda else {}
|
||||
train_loader = torch.utils.data.DataLoader(
|
||||
datasets.MNIST('../data', train=True, download=True,
|
||||
datasets.MNIST(os.path.join('..', 'data'), train=True, download=True,
|
||||
transform=transforms.Compose([
|
||||
transforms.ToTensor(),
|
||||
transforms.Normalize((0.1307,), (0.3081,))
|
||||
])),
|
||||
batch_size=args.batch_size, shuffle=True, **kwargs)
|
||||
test_loader = torch.utils.data.DataLoader(
|
||||
datasets.MNIST('../data', train=False, transform=transforms.Compose([
|
||||
datasets.MNIST(os.path.join('..', 'data'), train=False, transform=transforms.Compose([
|
||||
transforms.ToTensor(),
|
||||
transforms.Normalize((0.1307,), (0.3081,))
|
||||
])),
|
||||
@@ -117,7 +124,7 @@ def main():
|
||||
|
||||
for epoch in range(1, args.epochs + 1):
|
||||
train(args, model, device, train_loader, optimizer, epoch)
|
||||
test(args, model, device, test_loader)
|
||||
test(args, model, device, test_loader, epoch)
|
||||
|
||||
if (args.save_model):
|
||||
torch.save(model.state_dict(), os.path.join(gettempdir(), "mnist_cnn.pt"))
|
||||
@@ -66,7 +66,7 @@ def test(model, test_loader, args, optimizer, writer):
|
||||
data, target = Variable(data), Variable(target)
|
||||
output = model(data)
|
||||
test_loss += F.nll_loss(output, target, reduction='sum').data.item() # sum up batch loss
|
||||
pred = output.data.max(1)[1] # get the index of the max log-probability
|
||||
pred = output.data.max(1)[1] # get the index of the max log-probability
|
||||
pred = pred.eq(target.data).cpu().sum()
|
||||
writer.add_scalar('Test/Loss', pred, niter)
|
||||
correct += pred
|
||||
@@ -99,7 +99,7 @@ def main():
|
||||
parser.add_argument('--log-interval', type=int, default=10, metavar='N',
|
||||
help='how many batches to wait before logging training status')
|
||||
args = parser.parse_args()
|
||||
task = Task.init(project_name='examples', task_name='pytorch with tensorboard', output_uri='/tmp/blah')
|
||||
task = Task.init(project_name='examples', task_name='pytorch with tensorboard') # noqa: F841
|
||||
writer = SummaryWriter('runs')
|
||||
writer.add_text('TEXT', 'This is some text', 0)
|
||||
args.cuda = not args.no_cuda and torch.cuda.is_available()
|
||||
1
examples/frameworks/pytorch/pytorch_tensorboardx.py
Symbolic link
1
examples/frameworks/pytorch/pytorch_tensorboardx.py
Symbolic link
@@ -0,0 +1 @@
|
||||
../tensorboardx/pytorch_tensorboardX.py
|
||||
6
examples/frameworks/pytorch/requirements.txt
Normal file
6
examples/frameworks/pytorch/requirements.txt
Normal file
@@ -0,0 +1,6 @@
|
||||
matplotlib
|
||||
# tensorboardX
|
||||
tensorboard>=1.14.0
|
||||
torch>=1.1.0
|
||||
torchvision>=0.3.0
|
||||
trains
|
||||
@@ -1,3 +1,6 @@
|
||||
import os
|
||||
from tempfile import gettempdir
|
||||
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
from torch.utils.tensorboard import SummaryWriter
|
||||
@@ -6,10 +9,10 @@ from trains import Task
|
||||
task = Task.init(project_name='examples', task_name='pytorch tensorboard toy example')
|
||||
|
||||
|
||||
writer = SummaryWriter(log_dir='/tmp/tensorboard_logs')
|
||||
writer = SummaryWriter(log_dir=os.path.join(gettempdir(), 'tensorboard_logs'))
|
||||
|
||||
# convert to 4d [batch, col, row, RGB-channels]
|
||||
image_open = Image.open('./samples/picasso.jpg')
|
||||
image_open = Image.open(os.path.join("..", "..", "reporting", "data_samples", "picasso.jpg"))
|
||||
image = np.asarray(image_open)
|
||||
image_gray = image[:, :, 0][np.newaxis, :, :, np.newaxis]
|
||||
image_rgba = np.concatenate((image, 255*np.atleast_3d(np.ones(shape=image.shape[:2], dtype=np.uint8))), axis=2)
|
||||
@@ -42,4 +42,4 @@ plt.ylim(yy.min(), yy.max())
|
||||
plt.xticks(())
|
||||
plt.yticks(())
|
||||
|
||||
plt.show()
|
||||
plt.show()
|
||||
3
examples/frameworks/scikit-learn/requirements.txt
Normal file
3
examples/frameworks/scikit-learn/requirements.txt
Normal file
@@ -0,0 +1,3 @@
|
||||
joblib>=0.13.2
|
||||
scikit-learn
|
||||
trains
|
||||
@@ -10,9 +10,9 @@ import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
import torch.optim as optim
|
||||
from torchvision import datasets, transforms
|
||||
from torch.autograd import Variable
|
||||
from tensorboardX import SummaryWriter
|
||||
from torch.autograd import Variable
|
||||
from torchvision import datasets, transforms
|
||||
|
||||
from trains import Task
|
||||
|
||||
@@ -65,7 +65,7 @@ def test(model, test_loader, args, optimizer, writer):
|
||||
data, target = Variable(data), Variable(target)
|
||||
output = model(data)
|
||||
test_loss += F.nll_loss(output, target, reduction='sum').data.item() # sum up batch loss
|
||||
pred = output.data.max(1)[1] # get the index of the max log-probability
|
||||
pred = output.data.max(1)[1] # get the index of the max log-probability
|
||||
pred = pred.eq(target.data).cpu().sum()
|
||||
writer.add_scalar('Test/Loss', pred, niter)
|
||||
correct += pred
|
||||
@@ -100,7 +100,7 @@ def main():
|
||||
args = parser.parse_args()
|
||||
args.cuda = not args.no_cuda and torch.cuda.is_available()
|
||||
|
||||
task = Task.init(project_name='examples', task_name='pytorch with tensorboardX')
|
||||
task = Task.init(project_name='examples', task_name='pytorch with tensorboardX') # noqa: F841
|
||||
writer = SummaryWriter('runs')
|
||||
writer.add_text('TEXT', 'This is some text', 0)
|
||||
|
||||
4
examples/frameworks/tensorboardx/requirements.txt
Normal file
4
examples/frameworks/tensorboardx/requirements.txt
Normal file
@@ -0,0 +1,4 @@
|
||||
tensorboardX>=1.8
|
||||
torch>=1.1.0
|
||||
torchvision>=0.3.0
|
||||
trains
|
||||
3
examples/frameworks/tensorflow/legacy/requirements.txt
Normal file
3
examples/frameworks/tensorflow/legacy/requirements.txt
Normal file
@@ -0,0 +1,3 @@
|
||||
trains
|
||||
tensorboard>=1.14.0
|
||||
tensorflow>=1.14.0
|
||||
@@ -1,5 +1,8 @@
|
||||
# TRAINS - Example of tensorboard with tensorflow (without any actual training)
|
||||
#
|
||||
import os
|
||||
from tempfile import gettempdir
|
||||
|
||||
import tensorflow as tf
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
@@ -49,7 +52,7 @@ tf.summary.histogram("all_combined", all_combined)
|
||||
tf.summary.text("this is a test", tf.make_tensor_proto("This is the content", dtype=tf.string))
|
||||
|
||||
# convert to 4d [batch, col, row, RGB-channels]
|
||||
image_open = Image.open('./samples/picasso.jpg')
|
||||
image_open = Image.open(os.path.join("..", "..", "..", "reporting", "data_samples", "picasso.jpg"))
|
||||
image = np.asarray(image_open)
|
||||
image_gray = image[:, :, 0][np.newaxis, :, :, np.newaxis]
|
||||
image_rgba = np.concatenate((image, 255*np.atleast_3d(np.ones(shape=image.shape[:2], dtype=np.uint8))), axis=2)
|
||||
@@ -68,7 +71,7 @@ logger = task.get_logger()
|
||||
|
||||
# Use original FileWriter for comparison , run:
|
||||
# % tensorboard --logdir=/tmp/histogram_example
|
||||
writer = tf.summary.FileWriter("/tmp/histogram_example")
|
||||
writer = tf.summary.FileWriter(os.path.join(gettempdir(), "histogram_example"))
|
||||
|
||||
# Setup a loop and write the summaries to disk
|
||||
N = 40
|
||||
355
examples/frameworks/tensorflow/legacy/tensorflow_eager.py
Normal file
355
examples/frameworks/tensorflow/legacy/tensorflow_eager.py
Normal file
@@ -0,0 +1,355 @@
|
||||
# TRAINS - Example of tensorflow eager mode, model logging and tensorboard
|
||||
#
|
||||
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""A deep MNIST classifier using convolutional layers.
|
||||
Sample usage:
|
||||
python mnist.py --help
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import, division, print_function
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
from tempfile import gettempdir
|
||||
|
||||
import tensorflow as tf
|
||||
from tensorflow.examples.tutorials.mnist import input_data
|
||||
|
||||
from trains import Task
|
||||
|
||||
tf.compat.v1.enable_eager_execution()
|
||||
|
||||
task = Task.init(project_name='examples', task_name='Tensorflow eager mode')
|
||||
|
||||
|
||||
FLAGS = tf.app.flags.FLAGS
|
||||
tf.app.flags.DEFINE_integer('data_num', 100, """Flag of type integer""")
|
||||
tf.app.flags.DEFINE_string('img_path', './img', """Flag of type string""")
|
||||
|
||||
|
||||
layers = tf.keras.layers
|
||||
FLAGS = None
|
||||
|
||||
|
||||
class Discriminator(tf.keras.Model):
|
||||
"""
|
||||
GAN Discriminator.
|
||||
A network to differentiate between generated and real handwritten digits.
|
||||
"""
|
||||
|
||||
def __init__(self, data_format):
|
||||
"""Creates a model for discriminating between real and generated digits.
|
||||
Args:
|
||||
data_format: Either 'channels_first' or 'channels_last'.
|
||||
'channels_first' is typically faster on GPUs while 'channels_last' is
|
||||
typically faster on CPUs. See
|
||||
https://www.tensorflow.org/performance/performance_guide#data_formats
|
||||
"""
|
||||
super(Discriminator, self).__init__(name='')
|
||||
if data_format == 'channels_first':
|
||||
self._input_shape = [-1, 1, 28, 28]
|
||||
else:
|
||||
assert data_format == 'channels_last'
|
||||
self._input_shape = [-1, 28, 28, 1]
|
||||
self.conv1 = layers.Conv2D(
|
||||
64, 5, padding='SAME', data_format=data_format, activation=tf.tanh)
|
||||
self.pool1 = layers.AveragePooling2D(2, 2, data_format=data_format)
|
||||
self.conv2 = layers.Conv2D(
|
||||
128, 5, data_format=data_format, activation=tf.tanh)
|
||||
self.pool2 = layers.AveragePooling2D(2, 2, data_format=data_format)
|
||||
self.flatten = layers.Flatten()
|
||||
self.fc1 = layers.Dense(1024, activation=tf.tanh)
|
||||
self.fc2 = layers.Dense(1, activation=None)
|
||||
|
||||
def call(self, inputs):
|
||||
"""Return two logits per image estimating input authenticity.
|
||||
Users should invoke __call__ to run the network, which delegates to this
|
||||
method (and not call this method directly).
|
||||
Args:
|
||||
inputs: A batch of images as a Tensor with shape [batch_size, 28, 28, 1]
|
||||
or [batch_size, 1, 28, 28]
|
||||
Returns:
|
||||
A Tensor with shape [batch_size] containing logits estimating
|
||||
the probability that corresponding digit is real.
|
||||
"""
|
||||
x = tf.reshape(inputs, self._input_shape)
|
||||
x = self.conv1(x)
|
||||
x = self.pool1(x)
|
||||
x = self.conv2(x)
|
||||
x = self.pool2(x)
|
||||
x = self.flatten(x)
|
||||
x = self.fc1(x)
|
||||
x = self.fc2(x)
|
||||
return x
|
||||
|
||||
|
||||
class Generator(tf.keras.Model):
|
||||
"""
|
||||
Generator of handwritten digits similar to the ones in the MNIST dataset.
|
||||
"""
|
||||
|
||||
def __init__(self, data_format):
|
||||
"""Creates a model for discriminating between real and generated digits.
|
||||
Args:
|
||||
data_format: Either 'channels_first' or 'channels_last'.
|
||||
'channels_first' is typically faster on GPUs while 'channels_last' is
|
||||
typically faster on CPUs. See
|
||||
https://www.tensorflow.org/performance/performance_guide#data_formats
|
||||
"""
|
||||
super(Generator, self).__init__(name='')
|
||||
self.data_format = data_format
|
||||
# We are using 128 6x6 channels as input to the first deconvolution layer
|
||||
if data_format == 'channels_first':
|
||||
self._pre_conv_shape = [-1, 128, 6, 6]
|
||||
else:
|
||||
assert data_format == 'channels_last'
|
||||
self._pre_conv_shape = [-1, 6, 6, 128]
|
||||
self.fc1 = layers.Dense(6 * 6 * 128, activation=tf.tanh)
|
||||
|
||||
# In call(), we reshape the output of fc1 to _pre_conv_shape
|
||||
|
||||
# Deconvolution layer. Resulting image shape: (batch, 14, 14, 64)
|
||||
self.conv1 = layers.Conv2DTranspose(
|
||||
64, 4, strides=2, activation=None, data_format=data_format)
|
||||
|
||||
# Deconvolution layer. Resulting image shape: (batch, 28, 28, 1)
|
||||
self.conv2 = layers.Conv2DTranspose(
|
||||
1, 2, strides=2, activation=tf.nn.sigmoid, data_format=data_format)
|
||||
|
||||
def call(self, inputs):
|
||||
"""Return a batch of generated images.
|
||||
Users should invoke __call__ to run the network, which delegates to this
|
||||
method (and not call this method directly).
|
||||
Args:
|
||||
inputs: A batch of noise vectors as a Tensor with shape
|
||||
[batch_size, length of noise vectors].
|
||||
Returns:
|
||||
A Tensor containing generated images. If data_format is 'channels_last',
|
||||
the shape of returned images is [batch_size, 28, 28, 1], else
|
||||
[batch_size, 1, 28, 28]
|
||||
"""
|
||||
|
||||
x = self.fc1(inputs)
|
||||
x = tf.reshape(x, shape=self._pre_conv_shape)
|
||||
x = self.conv1(x)
|
||||
x = self.conv2(x)
|
||||
return x
|
||||
|
||||
|
||||
def discriminator_loss(discriminator_real_outputs, discriminator_gen_outputs):
|
||||
"""
|
||||
Original discriminator loss for GANs, with label smoothing.
|
||||
See `Generative Adversarial Nets` (https://arxiv.org/abs/1406.2661) for more
|
||||
details.
|
||||
Args:
|
||||
discriminator_real_outputs: Discriminator output on real data.
|
||||
discriminator_gen_outputs: Discriminator output on generated data. Expected
|
||||
to be in the range of (-inf, inf).
|
||||
Returns:
|
||||
A scalar loss Tensor.
|
||||
"""
|
||||
|
||||
loss_on_real = tf.compat.v1.losses.sigmoid_cross_entropy(
|
||||
tf.ones_like(discriminator_real_outputs),
|
||||
discriminator_real_outputs,
|
||||
label_smoothing=0.25)
|
||||
loss_on_generated = tf.compat.v1.losses.sigmoid_cross_entropy(
|
||||
tf.zeros_like(discriminator_gen_outputs), discriminator_gen_outputs)
|
||||
loss = loss_on_real + loss_on_generated
|
||||
tf.contrib.summary.scalar('discriminator_loss', loss)
|
||||
return loss
|
||||
|
||||
|
||||
def generator_loss(discriminator_gen_outputs):
|
||||
"""
|
||||
Original generator loss for GANs.
|
||||
L = -log(sigmoid(D(G(z))))
|
||||
See `Generative Adversarial Nets` (https://arxiv.org/abs/1406.2661)
|
||||
for more details.
|
||||
Args:
|
||||
discriminator_gen_outputs: Discriminator output on generated data. Expected
|
||||
to be in the range of (-inf, inf).
|
||||
Returns:
|
||||
A scalar loss Tensor.
|
||||
"""
|
||||
loss = tf.compat.v1.losses.sigmoid_cross_entropy(
|
||||
tf.ones_like(discriminator_gen_outputs), discriminator_gen_outputs)
|
||||
tf.contrib.summary.scalar('generator_loss', loss)
|
||||
return loss
|
||||
|
||||
|
||||
def train_one_epoch(generator, discriminator, generator_optimizer,
|
||||
discriminator_optimizer, dataset, step_counter,
|
||||
log_interval, noise_dim):
|
||||
"""
|
||||
Train `generator` and `discriminator` models on `dataset`.
|
||||
Args:
|
||||
generator: Generator model.
|
||||
discriminator: Discriminator model.
|
||||
generator_optimizer: Optimizer to use for generator.
|
||||
discriminator_optimizer: Optimizer to use for discriminator.
|
||||
dataset: Dataset of images to train on.
|
||||
step_counter: An integer variable, used to write summaries regularly.
|
||||
log_interval: How many steps to wait between logging and collecting
|
||||
summaries.
|
||||
noise_dim: Dimension of noise vector to use.
|
||||
"""
|
||||
|
||||
total_generator_loss = 0.0
|
||||
total_discriminator_loss = 0.0
|
||||
for (batch_index, images) in enumerate(dataset):
|
||||
with tf.device('/cpu:0'):
|
||||
tf.compat.v1.assign_add(step_counter, 1)
|
||||
with tf.contrib.summary.record_summaries_every_n_global_steps(
|
||||
log_interval, global_step=step_counter):
|
||||
current_batch_size = images.shape[0]
|
||||
noise = tf.random.uniform(
|
||||
shape=[current_batch_size, noise_dim],
|
||||
minval=-1.,
|
||||
maxval=1.,
|
||||
seed=batch_index)
|
||||
|
||||
# we can use 2 tapes or a single persistent tape.
|
||||
# Using two tapes is memory efficient since intermediate tensors can be
|
||||
# released between the two .gradient() calls below
|
||||
with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape:
|
||||
generated_images = generator(noise)
|
||||
tf.contrib.summary.image(
|
||||
'generated_images',
|
||||
tf.reshape(generated_images, [-1, 28, 28, 1]),
|
||||
max_images=10)
|
||||
|
||||
discriminator_gen_outputs = discriminator(generated_images)
|
||||
discriminator_real_outputs = discriminator(images)
|
||||
discriminator_loss_val = discriminator_loss(discriminator_real_outputs,
|
||||
discriminator_gen_outputs)
|
||||
total_discriminator_loss += discriminator_loss_val
|
||||
|
||||
generator_loss_val = generator_loss(discriminator_gen_outputs)
|
||||
total_generator_loss += generator_loss_val
|
||||
|
||||
generator_grad = gen_tape.gradient(generator_loss_val,
|
||||
generator.variables)
|
||||
discriminator_grad = disc_tape.gradient(discriminator_loss_val,
|
||||
discriminator.variables)
|
||||
|
||||
generator_optimizer.apply_gradients(
|
||||
zip(generator_grad, generator.variables))
|
||||
discriminator_optimizer.apply_gradients(
|
||||
zip(discriminator_grad, discriminator.variables))
|
||||
|
||||
if log_interval and batch_index > 0 and batch_index % log_interval == 0:
|
||||
print('Batch #%d\tAverage Generator Loss: %.6f\tAverage Discriminator Loss: %.6f' %
|
||||
(batch_index, total_generator_loss / batch_index, total_discriminator_loss / batch_index))
|
||||
|
||||
|
||||
def main(_):
|
||||
(device, data_format) = ('/gpu:0', 'channels_first')
|
||||
if FLAGS.no_gpu or tf.contrib.eager.num_gpus() <= 0:
|
||||
(device, data_format) = ('/cpu:0', 'channels_last')
|
||||
print('Using device %s, and data format %s.' % (device, data_format))
|
||||
|
||||
# Load the datasets
|
||||
data = input_data.read_data_sets(FLAGS.data_dir)
|
||||
dataset = (
|
||||
tf.data.Dataset.from_tensor_slices(data.train.images[:1280]).shuffle(60000).batch(FLAGS.batch_size))
|
||||
|
||||
# Create the models and optimizers.
|
||||
model_objects = {
|
||||
'generator': Generator(data_format),
|
||||
'discriminator': Discriminator(data_format),
|
||||
'generator_optimizer': tf.compat.v1.train.AdamOptimizer(FLAGS.lr),
|
||||
'discriminator_optimizer': tf.compat.v1.train.AdamOptimizer(FLAGS.lr),
|
||||
'step_counter': tf.compat.v1.train.get_or_create_global_step(),
|
||||
}
|
||||
|
||||
# Prepare summary writer and checkpoint info
|
||||
summary_writer = tf.contrib.summary.create_file_writer(
|
||||
FLAGS.output_dir, flush_millis=1000)
|
||||
checkpoint_prefix = os.path.join(FLAGS.checkpoint_dir, 'ckpt')
|
||||
latest_cpkt = tf.train.latest_checkpoint(FLAGS.checkpoint_dir)
|
||||
if latest_cpkt:
|
||||
print('Using latest checkpoint at ' + latest_cpkt)
|
||||
checkpoint = tf.train.Checkpoint(**model_objects)
|
||||
# Restore variables on creation if a checkpoint exists.
|
||||
checkpoint.restore(latest_cpkt)
|
||||
|
||||
with tf.device(device):
|
||||
for _ in range(3):
|
||||
start = time.time()
|
||||
with summary_writer.as_default():
|
||||
train_one_epoch(dataset=dataset, log_interval=FLAGS.log_interval,
|
||||
noise_dim=FLAGS.noise, **model_objects)
|
||||
end = time.time()
|
||||
checkpoint.save(checkpoint_prefix)
|
||||
print('\nTrain time for epoch #%d (step %d): %f' %
|
||||
(checkpoint.save_counter.numpy(), checkpoint.step_counter.numpy(), end - start))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
'--data-dir',
|
||||
type=str,
|
||||
default=os.path.join(gettempdir(), 'tensorflow', 'mnist', 'input_data'),
|
||||
help='Directory for storing input data (default /tmp/tensorflow/mnist/input_data)')
|
||||
parser.add_argument(
|
||||
'--batch-size',
|
||||
type=int,
|
||||
default=16,
|
||||
metavar='N',
|
||||
help='input batch size for training (default: 128)')
|
||||
parser.add_argument(
|
||||
'--log-interval',
|
||||
type=int,
|
||||
default=1,
|
||||
metavar='N',
|
||||
help='number of batches between logging and writing summaries (default: 100)')
|
||||
parser.add_argument(
|
||||
'--output_dir',
|
||||
type=str,
|
||||
default=os.path.join(gettempdir(), 'tensorflow'),
|
||||
metavar='DIR',
|
||||
help='Directory to write TensorBoard summaries (defaults to none)')
|
||||
parser.add_argument(
|
||||
'--checkpoint_dir',
|
||||
type=str,
|
||||
default=os.path.join(gettempdir(), 'tensorflow', 'mnist', 'checkpoints'),
|
||||
metavar='DIR',
|
||||
help='Directory to save checkpoints in (once per epoch) (default /tmp/tensorflow/mnist/checkpoints/)')
|
||||
parser.add_argument(
|
||||
'--lr',
|
||||
type=float,
|
||||
default=0.001,
|
||||
metavar='LR',
|
||||
help='learning rate (default: 0.001)')
|
||||
parser.add_argument(
|
||||
'--noise',
|
||||
type=int,
|
||||
default=100,
|
||||
metavar='N',
|
||||
help='Length of noise vector for generator input (default: 100)')
|
||||
parser.add_argument(
|
||||
'--no-gpu',
|
||||
action='store_true',
|
||||
default=False,
|
||||
help='disables GPU usage even if a GPU is available')
|
||||
|
||||
FLAGS, unparsed = parser.parse_known_args()
|
||||
|
||||
tf.compat.v1.app.run(main=main, argv=[sys.argv[0]] + unparsed)
|
||||
42
examples/frameworks/tensorflow/manual_model_upload.py
Normal file
42
examples/frameworks/tensorflow/manual_model_upload.py
Normal file
@@ -0,0 +1,42 @@
|
||||
# TRAINS - Example of manual model configuration and uploading
|
||||
#
|
||||
import os
|
||||
import tempfile
|
||||
|
||||
import tensorflow as tf
|
||||
from trains import Task
|
||||
|
||||
task = Task.init(project_name='examples', task_name='Model configuration and upload')
|
||||
|
||||
model = tf.Module()
|
||||
|
||||
# Connect a local configuration file
|
||||
config_file = os.path.join('..', '..', 'reporting', 'data_samples', 'sample.json')
|
||||
config_file = task.connect_configuration(config_file)
|
||||
# then read configuration as usual, the backend will contain a copy of it.
|
||||
# later when executing remotely, the returned `config_file` will be a temporary file
|
||||
# containing a new copy of the configuration retrieved form the backend
|
||||
# # model_config_dict = json.load(open(config_file, 'rt'))
|
||||
|
||||
# Or Store dictionary of definition for a specific network design
|
||||
model_config_dict = {
|
||||
'value': 13.37,
|
||||
'dict': {'sub_value': 'string', 'sub_integer': 11},
|
||||
'list_of_ints': [1, 2, 3, 4],
|
||||
}
|
||||
model_config_dict = task.connect_configuration(model_config_dict)
|
||||
|
||||
# We now update the dictionary after connecting it, and the changes will be tracked as well.
|
||||
model_config_dict['new value'] = 10
|
||||
model_config_dict['value'] *= model_config_dict['new value']
|
||||
|
||||
# store the label enumeration of the training model
|
||||
labels = {'background': 0, 'cat': 1, 'dog': 2}
|
||||
task.connect_label_enumeration(labels)
|
||||
|
||||
# storing the model, it will have the task network configuration and label enumeration
|
||||
print('Any model stored from this point onwards, will contain both model_config and label_enumeration')
|
||||
|
||||
tempdir = tempfile.mkdtemp()
|
||||
tf.saved_model.save(model, os.path.join(tempdir, "model"))
|
||||
print('Model saved')
|
||||
3
examples/frameworks/tensorflow/requirements.txt
Normal file
3
examples/frameworks/tensorflow/requirements.txt
Normal file
@@ -0,0 +1,3 @@
|
||||
tensorboard>=2.0
|
||||
tensorflow>=2.0
|
||||
trains
|
||||
279
examples/frameworks/tensorflow/tensorboard_pr_curve.py
Normal file
279
examples/frameworks/tensorflow/tensorboard_pr_curve.py
Normal file
@@ -0,0 +1,279 @@
|
||||
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Create sample PR curve summary data.
|
||||
We have 3 classes: R, G, and B. We generate colors within RGB space from 3
|
||||
normal distributions (1 at each corner of the color triangle: [255, 0, 0],
|
||||
[0, 255, 0], and [0, 0, 255]).
|
||||
The true label of each random color is associated with the normal distribution
|
||||
that generated it.
|
||||
Using 3 other normal distributions (over the distance each color is from a
|
||||
corner of the color triangle - RGB), we then compute the probability that each
|
||||
color belongs to the class. We use those probabilities to generate PR curves.
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import os.path
|
||||
|
||||
from tempfile import gettempdir
|
||||
from absl import app
|
||||
from absl import flags
|
||||
from six.moves import xrange # pylint: disable=redefined-builtin
|
||||
import tensorflow as tf
|
||||
|
||||
from tensorboard.plugins.pr_curve import summary
|
||||
from trains import Task
|
||||
|
||||
|
||||
task = Task.init(project_name='examples', task_name='tensorboard pr_curve')
|
||||
|
||||
tf.compat.v1.disable_v2_behavior()
|
||||
FLAGS = flags.FLAGS
|
||||
|
||||
flags.DEFINE_string(
|
||||
"logdir",
|
||||
os.path.join(gettempdir(), "pr_curve_demo"),
|
||||
"Directory into which to write TensorBoard data.",
|
||||
)
|
||||
|
||||
flags.DEFINE_integer(
|
||||
"steps", 10, "Number of steps to generate for each PR curve."
|
||||
)
|
||||
|
||||
|
||||
def start_runs(
|
||||
logdir, steps, run_name, thresholds, mask_every_other_prediction=False
|
||||
):
|
||||
"""Generate a PR curve with precision and recall evenly weighted.
|
||||
Arguments:
|
||||
logdir: The directory into which to store all the runs' data.
|
||||
steps: The number of steps to run for.
|
||||
run_name: The name of the run.
|
||||
thresholds: The number of thresholds to use for PR curves.
|
||||
mask_every_other_prediction: Whether to mask every other prediction by
|
||||
alternating weights between 0 and 1.
|
||||
"""
|
||||
tf.compat.v1.reset_default_graph()
|
||||
tf.compat.v1.set_random_seed(42)
|
||||
|
||||
# Create a normal distribution layer used to generate true color labels.
|
||||
distribution = tf.compat.v1.distributions.Normal(loc=0.0, scale=142.0)
|
||||
|
||||
# Sample the distribution to generate colors. Lets generate different numbers
|
||||
# of each color. The first dimension is the count of examples.
|
||||
|
||||
# The calls to sample() are given fixed random seed values that are "magic"
|
||||
# in that they correspond to the default seeds for those ops when the PR
|
||||
# curve test (which depends on this code) was written. We've pinned these
|
||||
# instead of continuing to use the defaults since the defaults are based on
|
||||
# node IDs from the sequence of nodes added to the graph, which can silently
|
||||
# change when this code or any TF op implementations it uses are modified.
|
||||
|
||||
# TODO(nickfelt): redo the PR curve test to avoid reliance on random seeds.
|
||||
|
||||
# Generate reds.
|
||||
number_of_reds = 100
|
||||
true_reds = tf.clip_by_value(
|
||||
tf.concat(
|
||||
[
|
||||
255 - tf.abs(distribution.sample([number_of_reds, 1], seed=11)),
|
||||
tf.abs(distribution.sample([number_of_reds, 2], seed=34)),
|
||||
],
|
||||
axis=1,
|
||||
),
|
||||
0,
|
||||
255,
|
||||
)
|
||||
|
||||
# Generate greens.
|
||||
number_of_greens = 200
|
||||
true_greens = tf.clip_by_value(
|
||||
tf.concat(
|
||||
[
|
||||
tf.abs(distribution.sample([number_of_greens, 1], seed=61)),
|
||||
255
|
||||
- tf.abs(distribution.sample([number_of_greens, 1], seed=82)),
|
||||
tf.abs(distribution.sample([number_of_greens, 1], seed=105)),
|
||||
],
|
||||
axis=1,
|
||||
),
|
||||
0,
|
||||
255,
|
||||
)
|
||||
|
||||
# Generate blues.
|
||||
number_of_blues = 150
|
||||
true_blues = tf.clip_by_value(
|
||||
tf.concat(
|
||||
[
|
||||
tf.abs(distribution.sample([number_of_blues, 2], seed=132)),
|
||||
255
|
||||
- tf.abs(distribution.sample([number_of_blues, 1], seed=153)),
|
||||
],
|
||||
axis=1,
|
||||
),
|
||||
0,
|
||||
255,
|
||||
)
|
||||
|
||||
# Assign each color a vector of 3 booleans based on its true label.
|
||||
labels = tf.concat(
|
||||
[
|
||||
tf.tile(tf.constant([[True, False, False]]), (number_of_reds, 1)),
|
||||
tf.tile(tf.constant([[False, True, False]]), (number_of_greens, 1)),
|
||||
tf.tile(tf.constant([[False, False, True]]), (number_of_blues, 1)),
|
||||
],
|
||||
axis=0,
|
||||
)
|
||||
|
||||
# We introduce 3 normal distributions. They are used to predict whether a
|
||||
# color falls under a certain class (based on distances from corners of the
|
||||
# color triangle). The distributions vary per color. We have the distributions
|
||||
# narrow over time.
|
||||
initial_standard_deviations = [v + FLAGS.steps for v in (158, 200, 242)]
|
||||
iteration = tf.compat.v1.placeholder(tf.int32, shape=[])
|
||||
red_predictor = tf.compat.v1.distributions.Normal(
|
||||
loc=0.0,
|
||||
scale=tf.cast(
|
||||
initial_standard_deviations[0] - iteration, dtype=tf.float32
|
||||
),
|
||||
)
|
||||
green_predictor = tf.compat.v1.distributions.Normal(
|
||||
loc=0.0,
|
||||
scale=tf.cast(
|
||||
initial_standard_deviations[1] - iteration, dtype=tf.float32
|
||||
),
|
||||
)
|
||||
blue_predictor = tf.compat.v1.distributions.Normal(
|
||||
loc=0.0,
|
||||
scale=tf.cast(
|
||||
initial_standard_deviations[2] - iteration, dtype=tf.float32
|
||||
),
|
||||
)
|
||||
|
||||
# Make predictions (assign 3 probabilities to each color based on each color's
|
||||
# distance to each of the 3 corners). We seek double the area in the right
|
||||
# tail of the normal distribution.
|
||||
examples = tf.concat([true_reds, true_greens, true_blues], axis=0)
|
||||
probabilities_colors_are_red = (
|
||||
1
|
||||
- red_predictor.cdf(
|
||||
tf.norm(tensor=examples - tf.constant([255.0, 0, 0]), axis=1)
|
||||
)
|
||||
) * 2
|
||||
probabilities_colors_are_green = (
|
||||
1
|
||||
- green_predictor.cdf(
|
||||
tf.norm(tensor=examples - tf.constant([0, 255.0, 0]), axis=1)
|
||||
)
|
||||
) * 2
|
||||
probabilities_colors_are_blue = (
|
||||
1
|
||||
- blue_predictor.cdf(
|
||||
tf.norm(tensor=examples - tf.constant([0, 0, 255.0]), axis=1)
|
||||
)
|
||||
) * 2
|
||||
|
||||
predictions = (
|
||||
probabilities_colors_are_red,
|
||||
probabilities_colors_are_green,
|
||||
probabilities_colors_are_blue,
|
||||
)
|
||||
|
||||
# This is the crucial piece. We write data required for generating PR curves.
|
||||
# We create 1 summary per class because we create 1 PR curve per class.
|
||||
for i, color in enumerate(("red", "green", "blue")):
|
||||
description = (
|
||||
"The probabilities used to create this PR curve are "
|
||||
"generated from a normal distribution. Its standard "
|
||||
"deviation is initially %0.0f and decreases over time."
|
||||
% initial_standard_deviations[i]
|
||||
)
|
||||
|
||||
weights = None
|
||||
if mask_every_other_prediction:
|
||||
# Assign a weight of 0 to every even-indexed prediction. Odd-indexed
|
||||
# predictions are assigned a default weight of 1.
|
||||
consecutive_indices = tf.reshape(
|
||||
tf.range(tf.size(input=predictions[i])),
|
||||
tf.shape(input=predictions[i]),
|
||||
)
|
||||
weights = tf.cast(consecutive_indices % 2, dtype=tf.float32)
|
||||
|
||||
summary.op(
|
||||
name=color,
|
||||
labels=labels[:, i],
|
||||
predictions=predictions[i],
|
||||
num_thresholds=thresholds,
|
||||
weights=weights,
|
||||
display_name="classifying %s" % color,
|
||||
description=description,
|
||||
)
|
||||
merged_summary_op = tf.compat.v1.summary.merge_all()
|
||||
events_directory = os.path.join(logdir, run_name)
|
||||
sess = tf.compat.v1.Session()
|
||||
writer = tf.compat.v1.summary.FileWriter(events_directory, sess.graph)
|
||||
|
||||
for step in xrange(steps):
|
||||
feed_dict = {
|
||||
iteration: step,
|
||||
}
|
||||
merged_summary = sess.run(merged_summary_op, feed_dict=feed_dict)
|
||||
writer.add_summary(merged_summary, step)
|
||||
|
||||
writer.close()
|
||||
|
||||
|
||||
def run_all(logdir, steps, thresholds, verbose=False):
|
||||
"""Generate PR curve summaries.
|
||||
Arguments:
|
||||
logdir: The directory into which to store all the runs' data.
|
||||
steps: The number of steps to run for.
|
||||
verbose: Whether to print the names of runs into stdout during execution.
|
||||
thresholds: The number of thresholds to use for PR curves.
|
||||
"""
|
||||
# First, we generate data for a PR curve that assigns even weights for
|
||||
# predictions of all classes.
|
||||
run_name = "colors"
|
||||
if verbose:
|
||||
print("--- Running: %s" % run_name)
|
||||
start_runs(
|
||||
logdir=logdir, steps=steps, run_name=run_name, thresholds=thresholds
|
||||
)
|
||||
|
||||
# Next, we generate data for a PR curve that assigns arbitrary weights to
|
||||
# predictions.
|
||||
run_name = "mask_every_other_prediction"
|
||||
if verbose:
|
||||
print("--- Running: %s" % run_name)
|
||||
start_runs(
|
||||
logdir=logdir,
|
||||
steps=steps,
|
||||
run_name=run_name,
|
||||
thresholds=thresholds,
|
||||
mask_every_other_prediction=True,
|
||||
)
|
||||
|
||||
|
||||
def main(unused_argv):
|
||||
print("Saving output to %s." % FLAGS.logdir)
|
||||
run_all(FLAGS.logdir, FLAGS.steps, 50, verbose=True)
|
||||
print("Done. Output saved to %s." % FLAGS.logdir)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
app.run(main)
|
||||
76
examples/frameworks/tensorflow/tensorboard_toy.py
Normal file
76
examples/frameworks/tensorflow/tensorboard_toy.py
Normal file
@@ -0,0 +1,76 @@
|
||||
# TRAINS - Example of tensorboard with tensorflow (without any actual training)
|
||||
#
|
||||
import os
|
||||
import tensorflow as tf
|
||||
import numpy as np
|
||||
from tempfile import gettempdir
|
||||
from PIL import Image
|
||||
|
||||
from trains import Task
|
||||
|
||||
|
||||
def generate_summary(k, step):
|
||||
# Make a normal distribution, with a shifting mean
|
||||
mean_moving_normal = tf.random.normal(shape=[1000], mean=(5 * k), stddev=1)
|
||||
# Record that distribution into a histogram summary
|
||||
tf.summary.histogram("normal/moving_mean", mean_moving_normal, step=step)
|
||||
tf.summary.scalar("normal/value", mean_moving_normal[-1], step=step)
|
||||
|
||||
# Make a normal distribution with shrinking variance
|
||||
variance_shrinking_normal = tf.random.normal(shape=[1000], mean=0, stddev=1-k)
|
||||
# Record that distribution too
|
||||
tf.summary.histogram("normal/shrinking_variance", variance_shrinking_normal, step=step)
|
||||
tf.summary.scalar("normal/variance_shrinking_normal", variance_shrinking_normal[-1], step=step)
|
||||
|
||||
# Let's combine both of those distributions into one dataset
|
||||
normal_combined = tf.concat([mean_moving_normal, variance_shrinking_normal], 0)
|
||||
# We add another histogram summary to record the combined distribution
|
||||
tf.summary.histogram("normal/bimodal", normal_combined, step=step)
|
||||
tf.summary.scalar("normal/normal_combined", normal_combined[0], step=step)
|
||||
|
||||
# Add a gamma distribution
|
||||
gamma = tf.random.gamma(shape=[1000], alpha=k)
|
||||
tf.summary.histogram("gamma", gamma, step=step)
|
||||
|
||||
# And a poisson distribution
|
||||
poisson = tf.random.poisson(shape=[1000], lam=k)
|
||||
tf.summary.histogram("poisson", poisson, step=step)
|
||||
|
||||
# And a uniform distribution
|
||||
uniform = tf.random.uniform(shape=[1000], maxval=k*10)
|
||||
tf.summary.histogram("uniform", uniform, step=step)
|
||||
|
||||
# Finally, combine everything together!
|
||||
all_distributions = [mean_moving_normal, variance_shrinking_normal, gamma, poisson, uniform]
|
||||
all_combined = tf.concat(all_distributions, 0)
|
||||
tf.summary.histogram("all_combined", all_combined, step=step)
|
||||
|
||||
# Log text value
|
||||
tf.summary.text("this is a test", "This is the content", step=step)
|
||||
|
||||
# convert to 4d [batch, col, row, RGB-channels]
|
||||
image_open = Image.open(os.path.join('..', '..', 'reporting', 'data_samples', 'picasso.jpg'))
|
||||
image = np.asarray(image_open)
|
||||
image_gray = image[:, :, 0][np.newaxis, :, :, np.newaxis]
|
||||
image_rgba = np.concatenate((image, 255*np.atleast_3d(np.ones(shape=image.shape[:2], dtype=np.uint8))), axis=2)
|
||||
image_rgba = image_rgba[np.newaxis, :, :, :]
|
||||
image = image[np.newaxis, :, :, :]
|
||||
|
||||
tf.summary.image("test", image, max_outputs=10, step=step)
|
||||
tf.summary.image("test_gray", image_gray, max_outputs=10, step=step)
|
||||
tf.summary.image("test_rgba", image_rgba, max_outputs=10, step=step)
|
||||
|
||||
|
||||
task = Task.init(project_name='examples', task_name='tensorboard toy example')
|
||||
|
||||
# create the tensorboard file writer in a temp folder
|
||||
writer = tf.summary.create_file_writer(os.path.join(gettempdir(), "toy_tb_example"))
|
||||
|
||||
# Setup a loop and write the summaries to disk
|
||||
N = 40
|
||||
for step in range(N):
|
||||
k_val = step/float(N)
|
||||
with writer.as_default():
|
||||
generate_summary(k_val, tf.cast(step, tf.int64))
|
||||
|
||||
print('Tensorboard toy example done')
|
||||
@@ -1,5 +1,8 @@
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
import os
|
||||
from tempfile import gettempdir
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
from tensorflow.keras.layers import Dense, Flatten, Conv2D
|
||||
@@ -58,6 +61,7 @@ train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy
|
||||
test_loss = tf.keras.metrics.Mean(name='test_loss', dtype=tf.float32)
|
||||
test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')
|
||||
|
||||
|
||||
# Use tf.GradientTape to train the model
|
||||
@tf.function
|
||||
def train_step(images, labels):
|
||||
@@ -82,14 +86,14 @@ def test_step(images, labels):
|
||||
|
||||
|
||||
# Set up summary writers to write the summaries to disk in a different logs directory
|
||||
train_log_dir = '/tmp/logs/gradient_tape/train'
|
||||
test_log_dir = '/tmp/logs/gradient_tape/test'
|
||||
train_log_dir = os.path.join(gettempdir(), 'logs', 'gradient_tape', 'train')
|
||||
test_log_dir = os.path.join(gettempdir(), 'logs', 'gradient_tape', 'test')
|
||||
train_summary_writer = tf.summary.create_file_writer(train_log_dir)
|
||||
test_summary_writer = tf.summary.create_file_writer(test_log_dir)
|
||||
|
||||
# Set up checkpoints manager
|
||||
ckpt = tf.train.Checkpoint(step=tf.Variable(1), optimizer=optimizer, net=model)
|
||||
manager = tf.train.CheckpointManager(ckpt, '/tmp/tf_ckpts', max_to_keep=3)
|
||||
manager = tf.train.CheckpointManager(ckpt, os.path.join(gettempdir(), 'tf_ckpts'), max_to_keep=3)
|
||||
ckpt.restore(manager.latest_checkpoint)
|
||||
if manager.latest_checkpoint:
|
||||
print("Restored from {}".format(manager.latest_checkpoint))
|
||||
6
examples/frameworks/xgboost/requirements.txt
Normal file
6
examples/frameworks/xgboost/requirements.txt
Normal file
@@ -0,0 +1,6 @@
|
||||
sklearn
|
||||
trains
|
||||
xgboost>=0.90 ; python_version >= '3'
|
||||
xgboost>=0.82 ; python_version < '3'
|
||||
# sudo apt-get install graphviz
|
||||
graphviz>=0.8
|
||||
@@ -22,11 +22,12 @@ param = {
|
||||
'num_class': 3} # the number of classes that exist in this datset
|
||||
num_round = 20 # the number of training iterations
|
||||
|
||||
# noinspection PyBroadException
|
||||
try:
|
||||
# try to load a model
|
||||
bst = xgb.Booster(params=param, model_file='xgb.01.model')
|
||||
bst.load_model('xgb.01.model')
|
||||
except:
|
||||
except Exception:
|
||||
bst = None
|
||||
|
||||
# if we dont have one train a model
|
||||
File diff suppressed because one or more lines are too long
@@ -1,89 +0,0 @@
|
||||
# TRAINS - Example of manual graphs and statistics reporting
|
||||
#
|
||||
import os
|
||||
from PIL import Image
|
||||
import numpy as np
|
||||
import logging
|
||||
from trains import Task
|
||||
|
||||
|
||||
task = Task.init(project_name="examples", task_name="Manual reporting")
|
||||
|
||||
# standard python logging
|
||||
logging.info("This is an info message")
|
||||
|
||||
# this is loguru test example
|
||||
try:
|
||||
from loguru import logger
|
||||
logger.info("That's it, beautiful and simple logging! (using ANSI colors)")
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
# get TRAINS logger object for any metrics / reports
|
||||
logger = Task.current_task().get_logger()
|
||||
|
||||
# log text
|
||||
logger.report_text("hello")
|
||||
|
||||
# report scalar values
|
||||
logger.report_scalar("example_scalar", "series A", iteration=0, value=100)
|
||||
logger.report_scalar("example_scalar", "series A", iteration=1, value=200)
|
||||
|
||||
# report histogram
|
||||
histogram = np.random.randint(10, size=10)
|
||||
logger.report_histogram("example_histogram", "random histogram", iteration=1, values=histogram,
|
||||
xaxis="title x", yaxis="title y")
|
||||
|
||||
# report confusion matrix
|
||||
confusion = np.random.randint(10, size=(10, 10))
|
||||
logger.report_matrix("example_confusion", "ignored", iteration=1, matrix=confusion, xaxis="title X", yaxis="title Y")
|
||||
|
||||
# report 3d surface
|
||||
logger.report_surface("example_surface", "series1", iteration=1, matrix=confusion,
|
||||
xaxis="title X", yaxis="title Y", zaxis="title Z")
|
||||
|
||||
# report 2d scatter plot
|
||||
scatter2d = np.hstack((np.atleast_2d(np.arange(0, 10)).T, np.random.randint(10, size=(10, 1))))
|
||||
logger.report_scatter2d("example_scatter", "series_xy", iteration=1, scatter=scatter2d,
|
||||
xaxis="title x", yaxis="title y")
|
||||
|
||||
# report 3d scatter plot
|
||||
scatter3d = np.random.randint(10, size=(10, 3))
|
||||
logger.report_scatter3d("example_scatter_3d", "series_xyz", iteration=1, scatter=scatter3d,
|
||||
xaxis="title x", yaxis="title y", zaxis="title z")
|
||||
|
||||
# reporting images
|
||||
m = np.eye(256, 256, dtype=np.float)
|
||||
logger.report_image("test case", "image float", iteration=1, image=m)
|
||||
m = np.eye(256, 256, dtype=np.uint8)*255
|
||||
logger.report_image("test case", "image uint8", iteration=1, image=m)
|
||||
m = np.concatenate((np.atleast_3d(m), np.zeros((256, 256, 2), dtype=np.uint8)), axis=2)
|
||||
logger.report_image("test case", "image color red", iteration=1, image=m)
|
||||
image_open = Image.open(os.path.join("samples", "picasso.jpg"))
|
||||
logger.report_image("test case", "image PIL", iteration=1, image=image_open)
|
||||
|
||||
|
||||
# reporting tables
|
||||
try:
|
||||
import pandas as pd
|
||||
# Report table - DataFrame with index
|
||||
df = pd.DataFrame(
|
||||
{
|
||||
'num_legs': [2, 4, 8, 0],
|
||||
'num_wings': [2, 0, 0, 0],
|
||||
'num_specimen_seen': [10, 2, 1, 8]
|
||||
},
|
||||
index=['falcon', 'dog', 'spider', 'fish']
|
||||
)
|
||||
df.index.name = 'id'
|
||||
logger.report_table("test table pd", "PD with index", 1, table_plot=df)
|
||||
|
||||
# Report table - CSV from path
|
||||
csv_url = "https://raw.githubusercontent.com/plotly/datasets/master/Mining-BTC-180.csv"
|
||||
logger.report_table("test table csv", "remote csv", 1, url=csv_url)
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
|
||||
# flush reports (otherwise it will be flushed in the background, every couple of seconds)
|
||||
logger.flush()
|
||||
63
examples/reporting/3d_plots_reporting.py
Normal file
63
examples/reporting/3d_plots_reporting.py
Normal file
@@ -0,0 +1,63 @@
|
||||
# TRAINS - Example of manual graphs and statistics reporting
|
||||
#
|
||||
import numpy as np
|
||||
|
||||
from trains import Task, Logger
|
||||
|
||||
|
||||
def report_plots(logger, iteration=0):
|
||||
# type: (Logger, int) -> ()
|
||||
"""
|
||||
reporting plots to plots section
|
||||
:param logger: The task.logger to use for sending the plots
|
||||
:param iteration: The iteration number of the current reports
|
||||
"""
|
||||
|
||||
# report 3d surface
|
||||
surface = np.random.randint(10, size=(10, 10))
|
||||
logger.report_surface(
|
||||
"example_surface",
|
||||
"series1",
|
||||
iteration=iteration,
|
||||
matrix=surface,
|
||||
xaxis="title X",
|
||||
yaxis="title Y",
|
||||
zaxis="title Z",
|
||||
)
|
||||
|
||||
# report 3d scatter plot
|
||||
scatter3d = np.random.randint(10, size=(10, 3))
|
||||
logger.report_scatter3d(
|
||||
"example_scatter_3d",
|
||||
"series_xyz",
|
||||
iteration=iteration,
|
||||
scatter=scatter3d,
|
||||
xaxis="title x",
|
||||
yaxis="title y",
|
||||
zaxis="title z",
|
||||
)
|
||||
|
||||
|
||||
def main():
|
||||
# Create the experiment Task
|
||||
task = Task.init(project_name="examples", task_name="3D plot reporting")
|
||||
|
||||
print('reporting 3D plot graphs')
|
||||
|
||||
# Get the task logger,
|
||||
# You can also call Task.current_task().get_logger() from anywhere in your code.
|
||||
logger = task.get_logger()
|
||||
|
||||
# report graphs
|
||||
report_plots(logger)
|
||||
|
||||
# force flush reports
|
||||
# If flush is not called, reports are flushed in the background every couple of seconds,
|
||||
# and at the end of the process execution
|
||||
logger.flush()
|
||||
|
||||
print('We are done reporting, have a great day :)')
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,3 +1,4 @@
|
||||
import os
|
||||
from time import sleep
|
||||
|
||||
import pandas as pd
|
||||
@@ -5,12 +6,16 @@ import numpy as np
|
||||
from PIL import Image
|
||||
from trains import Task
|
||||
|
||||
task = Task.init('examples', 'artifacts toy')
|
||||
task = Task.init('examples', 'artifacts example')
|
||||
|
||||
df = pd.DataFrame({'num_legs': [2, 4, 8, 0],
|
||||
'num_wings': [2, 0, 0, 0],
|
||||
'num_specimen_seen': [10, 2, 1, 8]},
|
||||
index=['falcon', 'dog', 'spider', 'fish'])
|
||||
df = pd.DataFrame(
|
||||
{
|
||||
'num_legs': [2, 4, 8, 0],
|
||||
'num_wings': [2, 0, 0, 0],
|
||||
'num_specimen_seen': [10, 2, 1, 8]
|
||||
},
|
||||
index=['falcon', 'dog', 'spider', 'fish']
|
||||
)
|
||||
|
||||
# Register Pandas object as artifact to watch
|
||||
# (it will be monitored in the background and automatically synced and uploaded)
|
||||
@@ -23,20 +28,20 @@ Task.current_task().get_registered_artifacts()['train'].sample(frac=0.5, replace
|
||||
# add and upload pandas.DataFrame (onetime snapshot of the object)
|
||||
task.upload_artifact('Pandas', artifact_object=df)
|
||||
# add and upload local file artifact
|
||||
task.upload_artifact('local file', artifact_object='samples/dancing.jpg')
|
||||
task.upload_artifact('local file', artifact_object=os.path.join('data_samples', 'dancing.jpg'))
|
||||
# add and upload dictionary stored as JSON)
|
||||
task.upload_artifact('dictionary', df.to_dict())
|
||||
# add and upload Numpy Object (stored as .npz file)
|
||||
task.upload_artifact('Numpy Eye', np.eye(100, 100))
|
||||
# add and upload Image (stored as .png file)
|
||||
im = Image.open('samples/dancing.jpg')
|
||||
im = Image.open(os.path.join('data_samples', 'dancing.jpg'))
|
||||
task.upload_artifact('pillow_image', im)
|
||||
# add and upload a folder, artifact_object should be the folder path
|
||||
task.upload_artifact('local folder', artifact_object='samples/')
|
||||
task.upload_artifact('local folder', artifact_object=os.path.join('data_samples'))
|
||||
# add and upload a wildcard
|
||||
task.upload_artifact('local folder', artifact_object='samples/*.jpg')
|
||||
task.upload_artifact('wildcard jpegs', artifact_object=os.path.join('data_samples', '*.jpg'))
|
||||
|
||||
# do something
|
||||
# do something here
|
||||
sleep(1.)
|
||||
print(df)
|
||||
|
||||
|
Before Width: | Height: | Size: 40 KiB After Width: | Height: | Size: 40 KiB |
|
Before Width: | Height: | Size: 112 KiB After Width: | Height: | Size: 112 KiB |
245
examples/reporting/html_reporting.py
Normal file
245
examples/reporting/html_reporting.py
Normal file
@@ -0,0 +1,245 @@
|
||||
# TRAINS - Example of manual graphs and statistics reporting
|
||||
#
|
||||
import math
|
||||
|
||||
import numpy as np
|
||||
from bokeh.models import ColumnDataSource, GraphRenderer, Oval, StaticLayoutProvider
|
||||
from bokeh.palettes import Spectral5, Spectral8
|
||||
from bokeh.plotting import figure, output_file, save
|
||||
from bokeh.sampledata.autompg import autompg_clean as bokeh_df
|
||||
from bokeh.sampledata.periodic_table import elements
|
||||
from bokeh.transform import dodge, factor_cmap
|
||||
|
||||
from trains import Task, Logger
|
||||
|
||||
|
||||
def report_html_url(logger, iteration=0):
|
||||
# type: (Logger, int) -> ()
|
||||
"""
|
||||
reporting html from url to debug samples section
|
||||
:param logger: The task.logger to use for sending the plots
|
||||
:param iteration: The iteration number of the current reports
|
||||
"""
|
||||
logger.report_media("html", "url_html", iteration=iteration, url="https://allegro.ai/docs/index.html")
|
||||
|
||||
|
||||
def report_html_periodic_table(logger, iteration=0):
|
||||
# type: (Logger, int) -> ()
|
||||
"""
|
||||
reporting interactive (html) of periodic table to debug samples section
|
||||
:param logger: The task.logger to use for sending the plots
|
||||
:param iteration: The iteration number of the current reports
|
||||
"""
|
||||
output_file("periodic.html")
|
||||
periods = ["I", "II", "III", "IV", "V", "VI", "VII"]
|
||||
groups = [str(x) for x in range(1, 19)]
|
||||
autompg_clean = elements.copy()
|
||||
autompg_clean["atomic mass"] = autompg_clean["atomic mass"].astype(str)
|
||||
autompg_clean["group"] = autompg_clean["group"].astype(str)
|
||||
autompg_clean["period"] = [periods[x - 1] for x in autompg_clean.period]
|
||||
autompg_clean = autompg_clean[autompg_clean.group != "-"]
|
||||
autompg_clean = autompg_clean[autompg_clean.symbol != "Lr"]
|
||||
autompg_clean = autompg_clean[autompg_clean.symbol != "Lu"]
|
||||
cmap = {
|
||||
"alkali metal": "#a6cee3",
|
||||
"alkaline earth metal": "#1f78b4",
|
||||
"metal": "#d93b43",
|
||||
"halogen": "#999d9a",
|
||||
"metalloid": "#e08d49",
|
||||
"noble gas": "#eaeaea",
|
||||
"nonmetal": "#f1d4Af",
|
||||
"transition metal": "#599d7A",
|
||||
}
|
||||
source = ColumnDataSource(autompg_clean)
|
||||
p = figure(
|
||||
plot_width=900,
|
||||
plot_height=500,
|
||||
title="Periodic Table (omitting LA and AC Series)",
|
||||
x_range=groups,
|
||||
y_range=list(reversed(periods)),
|
||||
toolbar_location=None,
|
||||
tools="hover",
|
||||
)
|
||||
p.rect(
|
||||
"group",
|
||||
"period",
|
||||
0.95,
|
||||
0.95,
|
||||
source=source,
|
||||
fill_alpha=0.6,
|
||||
legend_label="metal",
|
||||
color=factor_cmap(
|
||||
"metal", palette=list(cmap.values()), factors=list(cmap.keys())
|
||||
),
|
||||
)
|
||||
text_props = {"source": source, "text_align": "left", "text_baseline": "middle"}
|
||||
x = dodge("group", -0.4, range=p.x_range)
|
||||
r = p.text(x=x, y="period", text="symbol", **text_props)
|
||||
r.glyph.text_font_style = "bold"
|
||||
r = p.text(
|
||||
x=x, y=dodge("period", 0.3, range=p.y_range), text="atomic number", **text_props
|
||||
)
|
||||
r.glyph.text_font_size = "8pt"
|
||||
r = p.text(
|
||||
x=x, y=dodge("period", -0.35, range=p.y_range), text="name", **text_props
|
||||
)
|
||||
r.glyph.text_font_size = "5pt"
|
||||
r = p.text(
|
||||
x=x, y=dodge("period", -0.2, range=p.y_range), text="atomic mass", **text_props
|
||||
)
|
||||
r.glyph.text_font_size = "5pt"
|
||||
p.text(
|
||||
x=["3", "3"],
|
||||
y=["VI", "VII"],
|
||||
text=["LA", "AC"],
|
||||
text_align="center",
|
||||
text_baseline="middle",
|
||||
)
|
||||
p.hover.tooltips = [
|
||||
("Name", "@name"),
|
||||
("Atomic number", "@{atomic number}"),
|
||||
("Atomic mass", "@{atomic mass}"),
|
||||
("Type", "@metal"),
|
||||
("CPK color", "$color[hex, swatch]:CPK"),
|
||||
("Electronic configuration", "@{electronic configuration}"),
|
||||
]
|
||||
p.outline_line_color = None
|
||||
p.grid.grid_line_color = None
|
||||
p.axis.axis_line_color = None
|
||||
p.axis.major_tick_line_color = None
|
||||
p.axis.major_label_standoff = 0
|
||||
p.legend.orientation = "horizontal"
|
||||
p.legend.location = "top_center"
|
||||
save(p)
|
||||
logger.report_media("html", "periodic_html", iteration=iteration, local_path="periodic.html")
|
||||
|
||||
|
||||
def report_html_groupby(logger, iteration=0):
|
||||
# type: (Logger, int) -> ()
|
||||
"""
|
||||
reporting bokeh groupby (html) to debug samples section
|
||||
:param logger: The task.logger to use for sending the plots
|
||||
:param iteration: The iteration number of the current reports
|
||||
"""
|
||||
output_file("bar_pandas_groupby_nested.html")
|
||||
bokeh_df.cyl = bokeh_df.cyl.astype(str)
|
||||
bokeh_df.yr = bokeh_df.yr.astype(str)
|
||||
group = bokeh_df.groupby(by=["cyl", "mfr"])
|
||||
index_cmap = factor_cmap(
|
||||
"cyl_mfr", palette=Spectral5, factors=sorted(bokeh_df.cyl.unique()), end=1
|
||||
)
|
||||
p = figure(
|
||||
plot_width=800,
|
||||
plot_height=300,
|
||||
title="Mean MPG by # Cylinders and Manufacturer",
|
||||
x_range=group,
|
||||
toolbar_location=None,
|
||||
tooltips=[("MPG", "@mpg_mean"), ("Cyl, Mfr", "@cyl_mfr")],
|
||||
)
|
||||
p.vbar(
|
||||
x="cyl_mfr",
|
||||
top="mpg_mean",
|
||||
width=1,
|
||||
source=group,
|
||||
line_color="white",
|
||||
fill_color=index_cmap,
|
||||
)
|
||||
p.y_range.start = 0
|
||||
p.x_range.range_padding = 0.05
|
||||
p.xgrid.grid_line_color = None
|
||||
p.xaxis.axis_label = "Manufacturer grouped by # Cylinders"
|
||||
p.xaxis.major_label_orientation = 1.2
|
||||
p.outline_line_color = None
|
||||
save(p)
|
||||
logger.report_media(
|
||||
"html",
|
||||
"pandas_groupby_nested_html",
|
||||
iteration=iteration,
|
||||
local_path="bar_pandas_groupby_nested.html",
|
||||
)
|
||||
|
||||
|
||||
def report_html_graph(logger, iteration=0):
|
||||
# type: (Logger, int) -> ()
|
||||
"""
|
||||
reporting bokeh graph (html) to debug samples section
|
||||
:param logger: The task.logger to use for sending the plots
|
||||
:param iteration: The iteration number of the current reports
|
||||
"""
|
||||
nodes = 8
|
||||
node_indices = list(range(nodes))
|
||||
plot = figure(
|
||||
title="Graph Layout Demonstration",
|
||||
x_range=(-1.1, 1.1),
|
||||
y_range=(-1.1, 1.1),
|
||||
tools="",
|
||||
toolbar_location=None,
|
||||
)
|
||||
graph = GraphRenderer()
|
||||
graph.node_renderer.data_source.add(node_indices, "index")
|
||||
graph.node_renderer.data_source.add(Spectral8, "color")
|
||||
graph.node_renderer.glyph = Oval(height=0.1, width=0.2, fill_color="color")
|
||||
graph.edge_renderer.data_source.data = dict(start=[0] * nodes, end=node_indices)
|
||||
# start of layout code
|
||||
circ = [i * 2 * math.pi / 8 for i in node_indices]
|
||||
x = [math.cos(i) for i in circ]
|
||||
y = [math.sin(i) for i in circ]
|
||||
graph_layout = dict(zip(node_indices, zip(x, y)))
|
||||
graph.layout_provider = StaticLayoutProvider(graph_layout=graph_layout)
|
||||
plot.renderers.append(graph)
|
||||
output_file("graph.html")
|
||||
save(plot)
|
||||
logger.report_media("html", "Graph_html", iteration=iteration, local_path="graph.html")
|
||||
|
||||
|
||||
def report_html_image(logger, iteration=0):
|
||||
# type: (Logger, int) -> ()
|
||||
"""
|
||||
reporting bokeh image (html) to debug samples section
|
||||
:param logger: The task.logger to use for sending the plots
|
||||
:param iteration: The iteration number of the current reports
|
||||
"""
|
||||
|
||||
# First html
|
||||
samples = 500
|
||||
x = np.linspace(0, 10, samples)
|
||||
y = np.linspace(0, 10, samples)
|
||||
xx, yy = np.meshgrid(x, y)
|
||||
d = np.sin(xx) * np.cos(yy)
|
||||
p = figure(tooltips=[("x", "$x"), ("y", "$y"), ("value", "@image")])
|
||||
p.x_range.range_padding = p.y_range.range_padding = 0
|
||||
# must give a vector of image data for image parameter
|
||||
p.image(image=[d], x=0, y=0, dw=10, dh=10, palette="Spectral11", level="image")
|
||||
p.grid.grid_line_width = 0.5
|
||||
output_file("image.html", title="image.py example")
|
||||
save(p)
|
||||
logger.report_media("html", "Spectral_html", iteration=iteration, local_path="image.html")
|
||||
|
||||
|
||||
def main():
|
||||
# Create the experiment Task
|
||||
task = Task.init(project_name="examples", task_name="html samples reporting")
|
||||
|
||||
print('reporting html files into debug samples section')
|
||||
|
||||
# Get the task logger,
|
||||
# You can also call Task.current_task().get_logger() from anywhere in your code.
|
||||
logger = task.get_logger()
|
||||
|
||||
# report html as debug samples
|
||||
report_html_image(logger)
|
||||
report_html_graph(logger)
|
||||
report_html_groupby(logger)
|
||||
report_html_periodic_table(logger)
|
||||
report_html_url(logger)
|
||||
|
||||
# force flush reports
|
||||
# If flush is not called, reports are flushed in the background every couple of seconds,
|
||||
# and at the end of the process execution
|
||||
logger.flush()
|
||||
|
||||
print('We are done reporting, have a great day :)')
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,4 +1,4 @@
|
||||
# TRAINS - example code, absl logging
|
||||
# TRAINS - example code, ArgumentParser parameter logging, absl parameter logging, and dictionary parameter logging
|
||||
#
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
67
examples/reporting/image_reporting.py
Normal file
67
examples/reporting/image_reporting.py
Normal file
@@ -0,0 +1,67 @@
|
||||
# TRAINS - Example of manual graphs and statistics reporting
|
||||
#
|
||||
import os
|
||||
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
|
||||
from trains import Task, Logger
|
||||
|
||||
|
||||
def report_debug_images(logger, iteration=0):
|
||||
# type: (Logger, int) -> ()
|
||||
"""
|
||||
reporting images to debug samples section
|
||||
:param logger: The task.logger to use for sending the plots
|
||||
:param iteration: The iteration number of the current reports
|
||||
"""
|
||||
|
||||
# report image as float image
|
||||
m = np.eye(256, 256, dtype=np.float)
|
||||
logger.report_image("image", "image float", iteration=iteration, image=m)
|
||||
|
||||
# report image as uint8
|
||||
m = np.eye(256, 256, dtype=np.uint8) * 255
|
||||
logger.report_image("image", "image uint8", iteration=iteration, image=m)
|
||||
|
||||
# report image as uint8 RGB
|
||||
m = np.concatenate((np.atleast_3d(m), np.zeros((256, 256, 2), dtype=np.uint8)), axis=2)
|
||||
logger.report_image("image", "image color red", iteration=iteration, image=m)
|
||||
|
||||
# report PIL Image object
|
||||
image_open = Image.open(os.path.join("data_samples", "picasso.jpg"))
|
||||
logger.report_image("image", "image PIL", iteration=iteration, image=image_open)
|
||||
|
||||
# Image can be uploaded via 'report_media' too.
|
||||
logger.report_media(
|
||||
"image",
|
||||
"image with report media",
|
||||
iteration=iteration,
|
||||
local_path=os.path.join("data_samples", "picasso.jpg"),
|
||||
file_extension="jpg",
|
||||
)
|
||||
|
||||
|
||||
def main():
|
||||
# Create the experiment Task
|
||||
task = Task.init(project_name="examples", task_name="image reporting")
|
||||
|
||||
print('reporting a few debug images')
|
||||
|
||||
# Get the task logger,
|
||||
# You can also call Task.current_task().get_logger() from anywhere in your code.
|
||||
logger = task.get_logger()
|
||||
|
||||
# report debug images
|
||||
report_debug_images(logger)
|
||||
|
||||
# force flush reports
|
||||
# If flush is not called, reports are flushed in the background every couple of seconds,
|
||||
# and at the end of the process execution
|
||||
logger.flush()
|
||||
|
||||
print('We are done reporting, have a great day :)')
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -4,19 +4,21 @@ import os
|
||||
from trains import Task, Logger
|
||||
|
||||
|
||||
task = Task.init(project_name="examples", task_name="Reporting audio and video")
|
||||
task = Task.init(project_name="examples", task_name="audio and video reporting")
|
||||
|
||||
# report an already uploaded video media (url)
|
||||
print('reporting audio and video samples to the debug samples section')
|
||||
|
||||
# report video, an already uploaded video media (url)
|
||||
Logger.current_logger().report_media(
|
||||
'video', 'big bunny', iteration=1,
|
||||
url='https://test-videos.co.uk/vids/bigbuckbunny/mp4/h264/720/Big_Buck_Bunny_720_10s_1MB.mp4')
|
||||
|
||||
# report an already uploaded audio media (url)
|
||||
# report audio, report an already uploaded audio media (url)
|
||||
Logger.current_logger().report_media(
|
||||
'audio', 'pink panther', iteration=1,
|
||||
url='https://www2.cs.uic.edu/~i101/SoundFiles/PinkPanther30.wav')
|
||||
|
||||
# report local media file
|
||||
# report audio, report local media audio file
|
||||
Logger.current_logger().report_media(
|
||||
'audio', 'tada', iteration=1,
|
||||
local_path=os.path.join('samples', 'sample.mp3'))
|
||||
local_path=os.path.join('data_samples', 'sample.mp3'))
|
||||
36
examples/reporting/model_config.py
Normal file
36
examples/reporting/model_config.py
Normal file
@@ -0,0 +1,36 @@
|
||||
# TRAINS - Example of manual model configuration
|
||||
#
|
||||
import os
|
||||
|
||||
from trains import Task
|
||||
|
||||
|
||||
task = Task.init(project_name='examples', task_name='Model configuration example')
|
||||
|
||||
# Connect a local configuration file
|
||||
config_file = os.path.join('data_samples', 'sample.json')
|
||||
config_file = task.connect_configuration(config_file)
|
||||
# then read configuration as usual, the backend will contain a copy of it.
|
||||
# later when executing remotely, the returned `config_file` will be a temporary file
|
||||
# containing a new copy of the configuration retrieved form the backend
|
||||
# # model_config_dict = json.load(open(config_file, 'rt'))
|
||||
|
||||
# Or Store dictionary of definition for a specific network design
|
||||
model_config_dict = {
|
||||
'value': 13.37,
|
||||
'dict': {'sub_value': 'string', 'sub_integer': 11},
|
||||
'list_of_ints': [1, 2, 3, 4],
|
||||
}
|
||||
model_config_dict = task.connect_configuration(model_config_dict)
|
||||
|
||||
# We now update the dictionary after connecting it, and the changes will be tracked as well.
|
||||
model_config_dict['new value'] = 10
|
||||
model_config_dict['value'] *= model_config_dict['new value']
|
||||
|
||||
# store the label enumeration of the training model
|
||||
labels = {'background': 0, 'cat': 1, 'dog': 2}
|
||||
task.connect_label_enumeration(labels)
|
||||
|
||||
# storing a model: Any saved model (keras / pytorch / tensorflow / etc.)
|
||||
# will have the task network configuration and label enumeration
|
||||
print('Any model stored from this point onwards, will contain both model_config and label_enumeration')
|
||||
57
examples/reporting/pandas_reporting.py
Normal file
57
examples/reporting/pandas_reporting.py
Normal file
@@ -0,0 +1,57 @@
|
||||
# TRAINS - Example of manual graphs and statistics reporting
|
||||
#
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from trains import Task, Logger
|
||||
|
||||
|
||||
def report_table(logger, iteration=0):
|
||||
# type: (Logger, int) -> ()
|
||||
"""
|
||||
reporting tables to the plots section
|
||||
:param logger: The task.logger to use for sending the plots
|
||||
:param iteration: The iteration number of the current reports
|
||||
"""
|
||||
# report tables
|
||||
|
||||
# Report table - DataFrame with index
|
||||
df = pd.DataFrame(
|
||||
{
|
||||
"num_legs": [2, 4, 8, 0],
|
||||
"num_wings": [2, 0, 0, 0],
|
||||
"num_specimen_seen": [10, 2, 1, 8],
|
||||
},
|
||||
index=["falcon", "dog", "spider", "fish"],
|
||||
)
|
||||
df.index.name = "id"
|
||||
logger.report_table("table pd", "PD with index", iteration=iteration, table_plot=df)
|
||||
|
||||
# Report table - CSV from path
|
||||
csv_url = "https://raw.githubusercontent.com/plotly/datasets/master/Mining-BTC-180.csv"
|
||||
logger.report_table("table csv", "remote csv", iteration=iteration, url=csv_url)
|
||||
|
||||
|
||||
def main():
|
||||
# Create the experiment Task
|
||||
task = Task.init(project_name="examples", task_name="pandas table reporting")
|
||||
|
||||
print('reporting pandas tablea into the plots section')
|
||||
|
||||
# Get the task logger,
|
||||
# You can also call Task.current_task().get_logger() from anywhere in your code.
|
||||
logger = task.get_logger()
|
||||
|
||||
# report graphs
|
||||
report_table(logger)
|
||||
|
||||
# force flush reports
|
||||
# If flush is not called, reports are flushed in the background every couple of seconds,
|
||||
# and at the end of the process execution
|
||||
logger.flush()
|
||||
|
||||
print('We are done reporting, have a great day :)')
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -4,11 +4,17 @@ from trains import Task
|
||||
import plotly.express as px
|
||||
|
||||
|
||||
task = Task.init('examples', 'plotly report')
|
||||
task = Task.init('examples', 'plotly reporting')
|
||||
|
||||
print('reporting plotly figures')
|
||||
|
||||
# Iris dataset
|
||||
df = px.data.iris()
|
||||
|
||||
# create complex plotly figure
|
||||
fig = px.scatter(df, x="sepal_width", y="sepal_length", color="species", marginal_y="rug", marginal_x="histogram")
|
||||
|
||||
# report the plotly figure
|
||||
task.get_logger().report_plotly(title="iris", series="sepal", iteration=0, figure=fig)
|
||||
|
||||
print('done')
|
||||
6
examples/reporting/requirements.txt
Normal file
6
examples/reporting/requirements.txt
Normal file
@@ -0,0 +1,6 @@
|
||||
absl-py>=0.7.1
|
||||
bokeh>=2.1.0
|
||||
numpy
|
||||
pandas
|
||||
pillow>=4.0
|
||||
trains
|
||||
45
examples/reporting/scalar_reporting.py
Normal file
45
examples/reporting/scalar_reporting.py
Normal file
@@ -0,0 +1,45 @@
|
||||
# TRAINS - Example of manual graphs and statistics reporting
|
||||
#
|
||||
from trains import Task, Logger
|
||||
|
||||
|
||||
def report_scalars(logger):
|
||||
# type: (Logger) -> ()
|
||||
"""
|
||||
reporting scalars to scalars section
|
||||
:param logger: The task.logger to use for sending the scalars
|
||||
"""
|
||||
# report two scalar series on the same graph
|
||||
for i in range(100):
|
||||
logger.report_scalar("unified graph", "series A", iteration=i, value=1./(i+1))
|
||||
logger.report_scalar("unified graph", "series B", iteration=i, value=10./(i+1))
|
||||
|
||||
# report two scalar series on two different graphs
|
||||
for i in range(100):
|
||||
logger.report_scalar("graph A", "series A", iteration=i, value=1./(i+1))
|
||||
logger.report_scalar("graph B", "series B", iteration=i, value=10./(i+1))
|
||||
|
||||
|
||||
def main():
|
||||
# Create the experiment Task
|
||||
task = Task.init(project_name="examples", task_name="scalar reporting")
|
||||
|
||||
print('reporting scalar graphs')
|
||||
|
||||
# Get the task logger,
|
||||
# You can also call Task.current_task().get_logger() from anywhere in your code.
|
||||
logger = task.get_logger()
|
||||
|
||||
# report scalars
|
||||
report_scalars(logger)
|
||||
|
||||
# force flush reports
|
||||
# If flush is not called, reports are flushed in the background every couple of seconds,
|
||||
# and at the end of the process execution
|
||||
logger.flush()
|
||||
|
||||
print('We are done reporting, have a great day :)')
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
116
examples/reporting/scatter_hist_confusion_mat_reporting.py
Normal file
116
examples/reporting/scatter_hist_confusion_mat_reporting.py
Normal file
@@ -0,0 +1,116 @@
|
||||
# TRAINS - Example of manual graphs and statistics reporting
|
||||
#
|
||||
import numpy as np
|
||||
|
||||
from trains import Task, Logger
|
||||
|
||||
|
||||
def report_plots(logger, iteration=0):
|
||||
# type: (Logger, int) -> ()
|
||||
"""
|
||||
reporting plots to plots section
|
||||
:param logger: The task.logger to use for sending the plots
|
||||
:param iteration: The iteration number of the current reports
|
||||
"""
|
||||
|
||||
# report a single histogram
|
||||
histogram = np.random.randint(10, size=10)
|
||||
logger.report_histogram(
|
||||
"single_histogram",
|
||||
"random histogram",
|
||||
iteration=iteration,
|
||||
values=histogram,
|
||||
xaxis="title x",
|
||||
yaxis="title y",
|
||||
)
|
||||
|
||||
# report a two histograms on the same graph (plot)
|
||||
histogram1 = np.random.randint(13, size=10)
|
||||
histogram2 = histogram * 0.75
|
||||
logger.report_histogram(
|
||||
"two_histogram",
|
||||
"series 1",
|
||||
iteration=iteration,
|
||||
values=histogram1,
|
||||
xaxis="title x",
|
||||
yaxis="title y",
|
||||
)
|
||||
logger.report_histogram(
|
||||
"two_histogram",
|
||||
"series 2",
|
||||
iteration=iteration,
|
||||
values=histogram2,
|
||||
xaxis="title x",
|
||||
yaxis="title y",
|
||||
)
|
||||
|
||||
# report confusion matrix
|
||||
confusion = np.random.randint(10, size=(10, 10))
|
||||
logger.report_matrix(
|
||||
"example_confusion",
|
||||
"ignored",
|
||||
iteration=iteration,
|
||||
matrix=confusion,
|
||||
xaxis="title X",
|
||||
yaxis="title Y",
|
||||
)
|
||||
|
||||
scatter2d = np.hstack(
|
||||
(np.atleast_2d(np.arange(0, 10)).T, np.random.randint(10, size=(10, 1)))
|
||||
)
|
||||
# report 2d scatter plot with lines
|
||||
logger.report_scatter2d(
|
||||
"example_scatter",
|
||||
"series_xy",
|
||||
iteration=iteration,
|
||||
scatter=scatter2d,
|
||||
xaxis="title x",
|
||||
yaxis="title y",
|
||||
)
|
||||
|
||||
# report 2d scatter plot with markers
|
||||
logger.report_scatter2d(
|
||||
"example_scatter",
|
||||
"series_markers",
|
||||
iteration=iteration,
|
||||
scatter=scatter2d,
|
||||
xaxis="title x",
|
||||
yaxis="title y",
|
||||
mode='markers'
|
||||
)
|
||||
|
||||
# report 2d scatter plot with markers
|
||||
logger.report_scatter2d(
|
||||
"example_scatter",
|
||||
"series_lines+markers",
|
||||
iteration=iteration,
|
||||
scatter=scatter2d,
|
||||
xaxis="title x",
|
||||
yaxis="title y",
|
||||
mode='lines+markers'
|
||||
)
|
||||
|
||||
|
||||
def main():
|
||||
# Create the experiment Task
|
||||
task = Task.init(project_name="examples", task_name="2D plots reporting")
|
||||
|
||||
print('reporting some graphs')
|
||||
|
||||
# Get the task logger,
|
||||
# You can also call Task.current_task().get_logger() from anywhere in your code.
|
||||
logger = task.get_logger()
|
||||
|
||||
# report graphs
|
||||
report_plots(logger)
|
||||
|
||||
# force flush reports
|
||||
# If flush is not called, reports are flushed in the background every couple of seconds,
|
||||
# and at the end of the process execution
|
||||
logger.flush()
|
||||
|
||||
print('We are done reporting, have a great day :)')
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
58
examples/reporting/text_reporting.py
Normal file
58
examples/reporting/text_reporting.py
Normal file
@@ -0,0 +1,58 @@
|
||||
# TRAINS - Example of manual graphs and statistics reporting
|
||||
#
|
||||
import logging
|
||||
import sys
|
||||
|
||||
from trains import Task, Logger
|
||||
|
||||
|
||||
def report_logs(logger):
|
||||
# type: (Logger) -> ()
|
||||
"""
|
||||
reporting text to logs section
|
||||
:param logger: The task.logger to use for sending the text
|
||||
"""
|
||||
# standard python logging
|
||||
logging.info("This is an info message")
|
||||
|
||||
# this is a loguru test example
|
||||
try:
|
||||
from loguru import logger as loguru_logger # noqa
|
||||
|
||||
loguru_logger.info("That's it, beautiful and simple logging! (using ANSI colors)")
|
||||
except ImportError:
|
||||
print('loguru not installed, skipping loguru test')
|
||||
|
||||
# report text
|
||||
logger.report_text("hello, this is plain text")
|
||||
|
||||
|
||||
def main():
|
||||
# Create the experiment Task
|
||||
task = Task.init(project_name="examples", task_name="text reporting")
|
||||
|
||||
print('reporting text logs')
|
||||
|
||||
# report regular console print
|
||||
print('This is standard output test')
|
||||
|
||||
# report stderr
|
||||
print('This is standard error test', file=sys.stderr)
|
||||
|
||||
# Get the task logger,
|
||||
# You can also call Task.current_task().get_logger() from anywhere in your code.
|
||||
logger = task.get_logger()
|
||||
|
||||
# report text based logs
|
||||
report_logs(logger)
|
||||
|
||||
# force flush reports
|
||||
# If flush is not called, reports are flushed in the background every couple of seconds,
|
||||
# and at the end of the process execution
|
||||
logger.flush()
|
||||
|
||||
print('We are done reporting, have a great day :)')
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,17 +0,0 @@
|
||||
absl-py>=0.7.1
|
||||
Keras>=2.2.4
|
||||
joblib>=0.13.2
|
||||
matplotlib>=3.1.1 ; python_version >= '3.6'
|
||||
matplotlib >= 2.2.4 ; python_version < '3.6'
|
||||
pandas
|
||||
seaborn>=0.9.0
|
||||
sklearn>=0.0
|
||||
tensorboard>=1.14.0
|
||||
tensorboardX>=1.8
|
||||
tensorflow>=1.14.0
|
||||
torch>=1.1.0
|
||||
torchvision>=0.3.0
|
||||
xgboost>=0.90 ; python_version >= '3'
|
||||
xgboost >= 0.82 ; python_version < '3'
|
||||
# sudo apt-get install graphviz
|
||||
graphviz>=0.8
|
||||
1
examples/services/cleanup/requirements.txt
Normal file
1
examples/services/cleanup/requirements.txt
Normal file
@@ -0,0 +1 @@
|
||||
trains
|
||||
@@ -15,7 +15,7 @@ from keras.models import Sequential
|
||||
from keras.layers.core import Dense, Activation
|
||||
from keras.optimizers import RMSprop
|
||||
from keras.utils import np_utils
|
||||
import tensorflow as tf
|
||||
import tensorflow as tf # noqa: F401
|
||||
|
||||
from trains import Task, Logger
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
import logging
|
||||
|
||||
from trains.automation import UniformParameterRange, DiscreteParameterRange, UniformIntegerParameterRange, ParameterSet
|
||||
from trains.automation import GridSearch, RandomSearch, HyperParameterOptimizer
|
||||
from trains import Task
|
||||
from trains.automation import DiscreteParameterRange, HyperParameterOptimizer, RandomSearch, \
|
||||
UniformIntegerParameterRange
|
||||
|
||||
try:
|
||||
from trains.automation.hpbandster import OptimizerBOHB
|
||||
@@ -13,7 +13,7 @@ except ValueError:
|
||||
'we will be using RandomSearch strategy instead\n'
|
||||
'If you like to try ' '{{BOHB}: Robust and Efficient Hyperparameter Optimization at Scale},\n'
|
||||
'run: pip install hpbandster')
|
||||
Our_SearchStrategy = RandomSearch
|
||||
Our_SearchStrategy = RandomSearch
|
||||
|
||||
|
||||
def job_complete_callback(
|
||||
@@ -71,7 +71,7 @@ an_optimizer = HyperParameterOptimizer(
|
||||
# more are coming soon...
|
||||
optimizer_class=Our_SearchStrategy,
|
||||
# Select an execution queue to schedule the experiments for execution
|
||||
execution_queue='default',
|
||||
execution_queue='moshik',
|
||||
# Optional: Limit the execution time of a single experiment, in minutes.
|
||||
# (this is optional, and if using OptimizerBOHB, it is ignored)
|
||||
time_limit_per_job=10.,
|
||||
@@ -97,7 +97,7 @@ if args['run_as_service']:
|
||||
task.execute_remotely(queue_name='services', exit_process=True)
|
||||
|
||||
# report every 12 seconds, this is way too often, but we are testing here J
|
||||
an_optimizer.set_report_period(0.2)
|
||||
an_optimizer.set_report_period(2.2)
|
||||
# start the optimization process, callback function to be called every time an experiment is completed
|
||||
# this function returns immediately
|
||||
an_optimizer.start(job_complete_callback=job_complete_callback)
|
||||
@@ -0,0 +1,3 @@
|
||||
keras
|
||||
tensorflow
|
||||
trains
|
||||
@@ -0,0 +1,154 @@
|
||||
import os
|
||||
import socket
|
||||
import subprocess
|
||||
import sys
|
||||
from copy import deepcopy
|
||||
from tempfile import mkstemp
|
||||
|
||||
import psutil
|
||||
|
||||
# make sure we have jupyter in the auto requirements
|
||||
from trains import Task
|
||||
|
||||
# set default docker image, with network configuration
|
||||
os.environ["TRAINS_DOCKER_IMAGE"] = "nvidia/cuda --network host"
|
||||
|
||||
# initialize TRAINS
|
||||
task = Task.init(project_name="examples", task_name="Remote Jupyter NoteBook")
|
||||
|
||||
# get rid of all the runtime TRAINS
|
||||
preserve = (
|
||||
"TRAINS_API_HOST",
|
||||
"TRAINS_WEB_HOST",
|
||||
"TRAINS_FILES_HOST",
|
||||
"TRAINS_CONFIG_FILE",
|
||||
"TRAINS_API_ACCESS_KEY",
|
||||
"TRAINS_API_SECRET_KEY",
|
||||
"TRAINS_API_HOST_VERIFY_CERT",
|
||||
)
|
||||
|
||||
# setup os environment
|
||||
env = deepcopy(os.environ)
|
||||
for key in os.environ:
|
||||
if key.startswith("TRAINS") and key not in preserve:
|
||||
env.pop(key, None)
|
||||
|
||||
# Add jupyter server base folder
|
||||
param = {
|
||||
"jupyter_server_base_directory": "~/",
|
||||
"ssh_server": True,
|
||||
"ssh_password": "training",
|
||||
}
|
||||
task.connect(param)
|
||||
|
||||
# noinspection PyBroadException
|
||||
try:
|
||||
hostname = socket.gethostname()
|
||||
hostnames = socket.gethostbyname(socket.gethostname())
|
||||
except Exception:
|
||||
|
||||
def get_ip_addresses(family):
|
||||
for interface, snics in psutil.net_if_addrs().items():
|
||||
for snic in snics:
|
||||
if snic.family == family:
|
||||
yield snic.address
|
||||
|
||||
hostnames = list(get_ip_addresses(socket.AF_INET))
|
||||
hostname = hostnames[0]
|
||||
|
||||
if param.get("ssh_server"):
|
||||
print("Installing SSH Server on {} [{}]".format(hostname, hostnames))
|
||||
ssh_password = param.get("ssh_password", "training")
|
||||
# noinspection PyBroadException
|
||||
try:
|
||||
used_ports = [i.laddr.port for i in psutil.net_connections()]
|
||||
port = [i for i in range(10022, 15000) if i not in used_ports][0]
|
||||
|
||||
result = os.system(
|
||||
"apt-get install -y openssh-server && "
|
||||
"mkdir -p /var/run/sshd && "
|
||||
"echo 'root:{password}' | chpasswd && "
|
||||
"echo 'PermitRootLogin yes' >> /etc/ssh/sshd_config && "
|
||||
"sed -i 's/PermitRootLogin prohibit-password/PermitRootLogin yes/' /etc/ssh/sshd_config && "
|
||||
"sed 's@session\s*required\s*pam_loginuid.so@session optional pam_loginuid.so@g' -i /etc/pam.d/sshd && " # noqa: W605
|
||||
'echo "export VISIBLE=now" >> /etc/profile && '
|
||||
'echo "export TRAINS_CONFIG_FILE={trains_config_file}" >> /etc/profile && '
|
||||
"/usr/sbin/sshd -p {port}".format(
|
||||
password=ssh_password,
|
||||
port=port,
|
||||
trains_config_file=os.environ.get("TRAINS_CONFIG_FILE"),
|
||||
)
|
||||
)
|
||||
|
||||
if result == 0:
|
||||
print(
|
||||
"\n#\n# SSH Server running on {} [{}] port {}\n# LOGIN u:root p:{}\n#\n".format(
|
||||
hostname, hostnames, port, ssh_password
|
||||
)
|
||||
)
|
||||
else:
|
||||
raise ValueError()
|
||||
except Exception:
|
||||
print("\n#\n# Error: SSH server could not be launched\n#\n")
|
||||
|
||||
# execute jupyter notebook
|
||||
fd, local_filename = mkstemp()
|
||||
cwd = (
|
||||
os.path.expandvars(os.path.expanduser(param["jupyter_server_base_directory"]))
|
||||
if param["jupyter_server_base_directory"]
|
||||
else os.getcwd()
|
||||
)
|
||||
print(
|
||||
"Running Jupyter Notebook Server on {} [{}] at {}".format(hostname, hostnames, cwd)
|
||||
)
|
||||
process = subprocess.Popen(
|
||||
[
|
||||
sys.executable,
|
||||
"-m",
|
||||
"jupyter",
|
||||
"notebook",
|
||||
"--no-browser",
|
||||
"--allow-root",
|
||||
"--ip",
|
||||
"0.0.0.0",
|
||||
],
|
||||
env=env,
|
||||
stdout=fd,
|
||||
stderr=fd,
|
||||
cwd=cwd,
|
||||
)
|
||||
|
||||
# print stdout/stderr
|
||||
prev_line_count = 0
|
||||
process_running = True
|
||||
while process_running:
|
||||
process_running = False
|
||||
try:
|
||||
process.wait(timeout=2.0 if prev_line_count == 0 else 15.0)
|
||||
except subprocess.TimeoutExpired:
|
||||
process_running = True
|
||||
|
||||
with open(local_filename, "rt") as f:
|
||||
# read new lines
|
||||
new_lines = f.readlines()
|
||||
if not new_lines:
|
||||
continue
|
||||
output = "".join(new_lines)
|
||||
print(output)
|
||||
# update task comment with jupyter notebook server links
|
||||
if prev_line_count == 0:
|
||||
task.comment += "\n" + "".join(
|
||||
line for line in new_lines if "http://" in line or "https://" in line
|
||||
)
|
||||
prev_line_count += len(new_lines)
|
||||
|
||||
os.lseek(fd, 0, 0)
|
||||
os.ftruncate(fd, 0)
|
||||
|
||||
# cleanup
|
||||
os.close(fd)
|
||||
# noinspection PyBroadException
|
||||
try:
|
||||
os.unlink(local_filename)
|
||||
except Exception:
|
||||
pass
|
||||
@@ -1,358 +0,0 @@
|
||||
# TRAINS - Example of tensorflow eager mode, model logging and tensorboard
|
||||
#
|
||||
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""A deep MNIST classifier using convolutional layers.
|
||||
Sample usage:
|
||||
python mnist.py --help
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import tensorflow as tf
|
||||
from tensorflow.examples.tutorials.mnist import input_data
|
||||
from trains import Task
|
||||
|
||||
tf.compat.v1.enable_eager_execution()
|
||||
|
||||
task = Task.init(project_name='examples', task_name='Tensorflow eager mode')
|
||||
|
||||
|
||||
FLAGS = tf.app.flags.FLAGS
|
||||
tf.app.flags.DEFINE_integer('data_num', 100, """Flag of type integer""")
|
||||
tf.app.flags.DEFINE_string('img_path', './img', """Flag of type string""")
|
||||
|
||||
|
||||
layers = tf.keras.layers
|
||||
FLAGS = None
|
||||
|
||||
|
||||
class Discriminator(tf.keras.Model):
|
||||
"""GAN Discriminator.
|
||||
A network to differentiate between generated and real handwritten digits.
|
||||
"""
|
||||
|
||||
def __init__(self, data_format):
|
||||
"""Creates a model for discriminating between real and generated digits.
|
||||
Args:
|
||||
data_format: Either 'channels_first' or 'channels_last'.
|
||||
'channels_first' is typically faster on GPUs while 'channels_last' is
|
||||
typically faster on CPUs. See
|
||||
https://www.tensorflow.org/performance/performance_guide#data_formats
|
||||
"""
|
||||
super(Discriminator, self).__init__(name='')
|
||||
if data_format == 'channels_first':
|
||||
self._input_shape = [-1, 1, 28, 28]
|
||||
else:
|
||||
assert data_format == 'channels_last'
|
||||
self._input_shape = [-1, 28, 28, 1]
|
||||
self.conv1 = layers.Conv2D(
|
||||
64, 5, padding='SAME', data_format=data_format, activation=tf.tanh)
|
||||
self.pool1 = layers.AveragePooling2D(2, 2, data_format=data_format)
|
||||
self.conv2 = layers.Conv2D(
|
||||
128, 5, data_format=data_format, activation=tf.tanh)
|
||||
self.pool2 = layers.AveragePooling2D(2, 2, data_format=data_format)
|
||||
self.flatten = layers.Flatten()
|
||||
self.fc1 = layers.Dense(1024, activation=tf.tanh)
|
||||
self.fc2 = layers.Dense(1, activation=None)
|
||||
|
||||
def call(self, inputs):
|
||||
"""Return two logits per image estimating input authenticity.
|
||||
Users should invoke __call__ to run the network, which delegates to this
|
||||
method (and not call this method directly).
|
||||
Args:
|
||||
inputs: A batch of images as a Tensor with shape [batch_size, 28, 28, 1]
|
||||
or [batch_size, 1, 28, 28]
|
||||
Returns:
|
||||
A Tensor with shape [batch_size] containing logits estimating
|
||||
the probability that corresponding digit is real.
|
||||
"""
|
||||
x = tf.reshape(inputs, self._input_shape)
|
||||
x = self.conv1(x)
|
||||
x = self.pool1(x)
|
||||
x = self.conv2(x)
|
||||
x = self.pool2(x)
|
||||
x = self.flatten(x)
|
||||
x = self.fc1(x)
|
||||
x = self.fc2(x)
|
||||
return x
|
||||
|
||||
|
||||
class Generator(tf.keras.Model):
|
||||
"""Generator of handwritten digits similar to the ones in the MNIST dataset.
|
||||
"""
|
||||
|
||||
def __init__(self, data_format):
|
||||
"""Creates a model for discriminating between real and generated digits.
|
||||
Args:
|
||||
data_format: Either 'channels_first' or 'channels_last'.
|
||||
'channels_first' is typically faster on GPUs while 'channels_last' is
|
||||
typically faster on CPUs. See
|
||||
https://www.tensorflow.org/performance/performance_guide#data_formats
|
||||
"""
|
||||
super(Generator, self).__init__(name='')
|
||||
self.data_format = data_format
|
||||
# We are using 128 6x6 channels as input to the first deconvolution layer
|
||||
if data_format == 'channels_first':
|
||||
self._pre_conv_shape = [-1, 128, 6, 6]
|
||||
else:
|
||||
assert data_format == 'channels_last'
|
||||
self._pre_conv_shape = [-1, 6, 6, 128]
|
||||
self.fc1 = layers.Dense(6 * 6 * 128, activation=tf.tanh)
|
||||
|
||||
# In call(), we reshape the output of fc1 to _pre_conv_shape
|
||||
|
||||
# Deconvolution layer. Resulting image shape: (batch, 14, 14, 64)
|
||||
self.conv1 = layers.Conv2DTranspose(
|
||||
64, 4, strides=2, activation=None, data_format=data_format)
|
||||
|
||||
# Deconvolution layer. Resulting image shape: (batch, 28, 28, 1)
|
||||
self.conv2 = layers.Conv2DTranspose(
|
||||
1, 2, strides=2, activation=tf.nn.sigmoid, data_format=data_format)
|
||||
|
||||
def call(self, inputs):
|
||||
"""Return a batch of generated images.
|
||||
Users should invoke __call__ to run the network, which delegates to this
|
||||
method (and not call this method directly).
|
||||
Args:
|
||||
inputs: A batch of noise vectors as a Tensor with shape
|
||||
[batch_size, length of noise vectors].
|
||||
Returns:
|
||||
A Tensor containing generated images. If data_format is 'channels_last',
|
||||
the shape of returned images is [batch_size, 28, 28, 1], else
|
||||
[batch_size, 1, 28, 28]
|
||||
"""
|
||||
|
||||
x = self.fc1(inputs)
|
||||
x = tf.reshape(x, shape=self._pre_conv_shape)
|
||||
x = self.conv1(x)
|
||||
x = self.conv2(x)
|
||||
return x
|
||||
|
||||
|
||||
def discriminator_loss(discriminator_real_outputs, discriminator_gen_outputs):
|
||||
"""Original discriminator loss for GANs, with label smoothing.
|
||||
See `Generative Adversarial Nets` (https://arxiv.org/abs/1406.2661) for more
|
||||
details.
|
||||
Args:
|
||||
discriminator_real_outputs: Discriminator output on real data.
|
||||
discriminator_gen_outputs: Discriminator output on generated data. Expected
|
||||
to be in the range of (-inf, inf).
|
||||
Returns:
|
||||
A scalar loss Tensor.
|
||||
"""
|
||||
|
||||
loss_on_real = tf.compat.v1.losses.sigmoid_cross_entropy(
|
||||
tf.ones_like(discriminator_real_outputs),
|
||||
discriminator_real_outputs,
|
||||
label_smoothing=0.25)
|
||||
loss_on_generated = tf.compat.v1.losses.sigmoid_cross_entropy(
|
||||
tf.zeros_like(discriminator_gen_outputs), discriminator_gen_outputs)
|
||||
loss = loss_on_real + loss_on_generated
|
||||
tf.contrib.summary.scalar('discriminator_loss', loss)
|
||||
return loss
|
||||
|
||||
|
||||
def generator_loss(discriminator_gen_outputs):
|
||||
"""Original generator loss for GANs.
|
||||
L = -log(sigmoid(D(G(z))))
|
||||
See `Generative Adversarial Nets` (https://arxiv.org/abs/1406.2661)
|
||||
for more details.
|
||||
Args:
|
||||
discriminator_gen_outputs: Discriminator output on generated data. Expected
|
||||
to be in the range of (-inf, inf).
|
||||
Returns:
|
||||
A scalar loss Tensor.
|
||||
"""
|
||||
loss = tf.compat.v1.losses.sigmoid_cross_entropy(
|
||||
tf.ones_like(discriminator_gen_outputs), discriminator_gen_outputs)
|
||||
tf.contrib.summary.scalar('generator_loss', loss)
|
||||
return loss
|
||||
|
||||
|
||||
def train_one_epoch(generator, discriminator, generator_optimizer,
|
||||
discriminator_optimizer, dataset, step_counter,
|
||||
log_interval, noise_dim):
|
||||
"""Train `generator` and `discriminator` models on `dataset`.
|
||||
Args:
|
||||
generator: Generator model.
|
||||
discriminator: Discriminator model.
|
||||
generator_optimizer: Optimizer to use for generator.
|
||||
discriminator_optimizer: Optimizer to use for discriminator.
|
||||
dataset: Dataset of images to train on.
|
||||
step_counter: An integer variable, used to write summaries regularly.
|
||||
log_interval: How many steps to wait between logging and collecting
|
||||
summaries.
|
||||
noise_dim: Dimension of noise vector to use.
|
||||
"""
|
||||
|
||||
total_generator_loss = 0.0
|
||||
total_discriminator_loss = 0.0
|
||||
for (batch_index, images) in enumerate(dataset):
|
||||
with tf.device('/cpu:0'):
|
||||
tf.compat.v1.assign_add(step_counter, 1)
|
||||
|
||||
with tf.contrib.summary.record_summaries_every_n_global_steps(
|
||||
log_interval, global_step=step_counter):
|
||||
current_batch_size = images.shape[0]
|
||||
noise = tf.random.uniform(
|
||||
shape=[current_batch_size, noise_dim],
|
||||
minval=-1.,
|
||||
maxval=1.,
|
||||
seed=batch_index)
|
||||
|
||||
# we can use 2 tapes or a single persistent tape.
|
||||
# Using two tapes is memory efficient since intermediate tensors can be
|
||||
# released between the two .gradient() calls below
|
||||
with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape:
|
||||
generated_images = generator(noise)
|
||||
tf.contrib.summary.image(
|
||||
'generated_images',
|
||||
tf.reshape(generated_images, [-1, 28, 28, 1]),
|
||||
max_images=10)
|
||||
|
||||
discriminator_gen_outputs = discriminator(generated_images)
|
||||
discriminator_real_outputs = discriminator(images)
|
||||
discriminator_loss_val = discriminator_loss(discriminator_real_outputs,
|
||||
discriminator_gen_outputs)
|
||||
total_discriminator_loss += discriminator_loss_val
|
||||
|
||||
generator_loss_val = generator_loss(discriminator_gen_outputs)
|
||||
total_generator_loss += generator_loss_val
|
||||
|
||||
generator_grad = gen_tape.gradient(generator_loss_val,
|
||||
generator.variables)
|
||||
discriminator_grad = disc_tape.gradient(discriminator_loss_val,
|
||||
discriminator.variables)
|
||||
|
||||
generator_optimizer.apply_gradients(
|
||||
zip(generator_grad, generator.variables))
|
||||
discriminator_optimizer.apply_gradients(
|
||||
zip(discriminator_grad, discriminator.variables))
|
||||
|
||||
if log_interval and batch_index > 0 and batch_index % log_interval == 0:
|
||||
print('Batch #%d\tAverage Generator Loss: %.6f\t'
|
||||
'Average Discriminator Loss: %.6f' %
|
||||
(batch_index, total_generator_loss / batch_index,
|
||||
total_discriminator_loss / batch_index))
|
||||
|
||||
|
||||
def main(_):
|
||||
(device, data_format) = ('/gpu:0', 'channels_first')
|
||||
if FLAGS.no_gpu or tf.contrib.eager.num_gpus() <= 0:
|
||||
(device, data_format) = ('/cpu:0', 'channels_last')
|
||||
print('Using device %s, and data format %s.' % (device, data_format))
|
||||
|
||||
# Load the datasets
|
||||
data = input_data.read_data_sets(FLAGS.data_dir)
|
||||
dataset = (
|
||||
tf.data.Dataset.from_tensor_slices(data.train.images[:1280]).shuffle(60000)
|
||||
.batch(FLAGS.batch_size))
|
||||
|
||||
# Create the models and optimizers.
|
||||
model_objects = {
|
||||
'generator': Generator(data_format),
|
||||
'discriminator': Discriminator(data_format),
|
||||
'generator_optimizer': tf.compat.v1.train.AdamOptimizer(FLAGS.lr),
|
||||
'discriminator_optimizer': tf.compat.v1.train.AdamOptimizer(FLAGS.lr),
|
||||
'step_counter': tf.compat.v1.train.get_or_create_global_step(),
|
||||
}
|
||||
|
||||
# Prepare summary writer and checkpoint info
|
||||
summary_writer = tf.contrib.summary.create_file_writer(
|
||||
FLAGS.output_dir, flush_millis=1000)
|
||||
checkpoint_prefix = os.path.join(FLAGS.checkpoint_dir, 'ckpt')
|
||||
latest_cpkt = tf.train.latest_checkpoint(FLAGS.checkpoint_dir)
|
||||
if latest_cpkt:
|
||||
print('Using latest checkpoint at ' + latest_cpkt)
|
||||
checkpoint = tf.train.Checkpoint(**model_objects)
|
||||
# Restore variables on creation if a checkpoint exists.
|
||||
checkpoint.restore(latest_cpkt)
|
||||
|
||||
with tf.device(device):
|
||||
for _ in range(3):
|
||||
start = time.time()
|
||||
with summary_writer.as_default():
|
||||
train_one_epoch(dataset=dataset, log_interval=FLAGS.log_interval,
|
||||
noise_dim=FLAGS.noise, **model_objects)
|
||||
end = time.time()
|
||||
checkpoint.save(checkpoint_prefix)
|
||||
print('\nTrain time for epoch #%d (step %d): %f' %
|
||||
(checkpoint.save_counter.numpy(),
|
||||
checkpoint.step_counter.numpy(),
|
||||
end - start))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
'--data-dir',
|
||||
type=str,
|
||||
default='/tmp/tensorflow/mnist/input_data',
|
||||
help=('Directory for storing input data (default '
|
||||
'/tmp/tensorflow/mnist/input_data)'))
|
||||
parser.add_argument(
|
||||
'--batch-size',
|
||||
type=int,
|
||||
default=16,
|
||||
metavar='N',
|
||||
help='input batch size for training (default: 128)')
|
||||
parser.add_argument(
|
||||
'--log-interval',
|
||||
type=int,
|
||||
default=1,
|
||||
metavar='N',
|
||||
help=('number of batches between logging and writing summaries '
|
||||
'(default: 100)'))
|
||||
parser.add_argument(
|
||||
'--output_dir',
|
||||
type=str,
|
||||
default='/tmp/tensorflow/',
|
||||
metavar='DIR',
|
||||
help='Directory to write TensorBoard summaries (defaults to none)')
|
||||
parser.add_argument(
|
||||
'--checkpoint_dir',
|
||||
type=str,
|
||||
default='/tmp/tensorflow/mnist/checkpoints/',
|
||||
metavar='DIR',
|
||||
help=('Directory to save checkpoints in (once per epoch) (default '
|
||||
'/tmp/tensorflow/mnist/checkpoints/)'))
|
||||
parser.add_argument(
|
||||
'--lr',
|
||||
type=float,
|
||||
default=0.001,
|
||||
metavar='LR',
|
||||
help='learning rate (default: 0.001)')
|
||||
parser.add_argument(
|
||||
'--noise',
|
||||
type=int,
|
||||
default=100,
|
||||
metavar='N',
|
||||
help='Length of noise vector for generator input (default: 100)')
|
||||
parser.add_argument(
|
||||
'--no-gpu',
|
||||
action='store_true',
|
||||
default=False,
|
||||
help='disables GPU usage even if a GPU is available')
|
||||
|
||||
FLAGS, unparsed = parser.parse_known_args()
|
||||
|
||||
tf.compat.v1.app.run(main=main, argv=[sys.argv[0]] + unparsed)
|
||||
@@ -1,172 +0,0 @@
|
||||
# TRAINS - Example of tensorflow mnist training model logging
|
||||
#
|
||||
# Save and Restore a model using TensorFlow.
|
||||
# This example is using the MNIST database of handwritten digits
|
||||
# (http://yann.lecun.com/exdb/mnist/)
|
||||
#
|
||||
# Author: Aymeric Damien
|
||||
# Project: https://github.com/aymericdamien/TensorFlow-Examples/
|
||||
|
||||
from __future__ import print_function
|
||||
|
||||
from os.path import exists, join
|
||||
import tempfile
|
||||
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
from trains import Task
|
||||
|
||||
MODEL_PATH = join(tempfile.gettempdir(), "module_no_signatures")
|
||||
task = Task.init(project_name='examples', task_name='Tensorflow mnist example')
|
||||
|
||||
## block
|
||||
X_train = np.random.rand(100, 3)
|
||||
y_train = np.random.rand(100, 1)
|
||||
model = tf.keras.models.Sequential([tf.keras.layers.Dense(1)])
|
||||
model.compile(loss='categorical_crossentropy',
|
||||
optimizer=tf.keras.optimizers.SGD(),
|
||||
metrics=['accuracy'])
|
||||
model.fit(X_train, y_train, steps_per_epoch=1, nb_epoch=1)
|
||||
|
||||
with tf.Session(graph=tf.Graph()) as sess:
|
||||
if exists(MODEL_PATH):
|
||||
try:
|
||||
tf.saved_model.loader.load(sess, [tf.saved_model.tag_constants.SERVING], MODEL_PATH)
|
||||
m2 = tf.saved_model.load(sess, [tf.saved_model.tag_constants.SERVING], MODEL_PATH)
|
||||
except Exception:
|
||||
pass
|
||||
tf.train.Checkpoint
|
||||
## block end
|
||||
|
||||
# Import MNIST data
|
||||
from tensorflow.examples.tutorials.mnist import input_data
|
||||
mnist = input_data.read_data_sets("MNIST_data", one_hot=True)
|
||||
|
||||
# Parameters
|
||||
parameters = {
|
||||
'learning_rate': 0.001,
|
||||
'batch_size': 100,
|
||||
'display_step': 1,
|
||||
'model_path': join(tempfile.gettempdir(), "model.ckpt"),
|
||||
|
||||
# Network Parameters
|
||||
'n_hidden_1': 256, # 1st layer number of features
|
||||
'n_hidden_2': 256, # 2nd layer number of features
|
||||
'n_input': 784, # MNIST data input (img shape: 28*28)
|
||||
'n_classes': 10, # MNIST total classes (0-9 digits)
|
||||
}
|
||||
# TRAINS: connect parameters with the experiment/task for logging
|
||||
parameters = task.connect(parameters)
|
||||
|
||||
# tf Graph input
|
||||
x = tf.placeholder("float", [None, parameters['n_input']])
|
||||
y = tf.placeholder("float", [None, parameters['n_classes']])
|
||||
|
||||
|
||||
# Create model
|
||||
def multilayer_perceptron(x, weights, biases):
|
||||
# Hidden layer with RELU activation
|
||||
layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
|
||||
layer_1 = tf.nn.relu(layer_1)
|
||||
# Hidden layer with RELU activation
|
||||
layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
|
||||
layer_2 = tf.nn.relu(layer_2)
|
||||
# Output layer with linear activation
|
||||
out_layer = tf.matmul(layer_2, weights['out']) + biases['out']
|
||||
return out_layer
|
||||
|
||||
# Store layers weight & bias
|
||||
weights = {
|
||||
'h1': tf.Variable(tf.random_normal([parameters['n_input'], parameters['n_hidden_1']])),
|
||||
'h2': tf.Variable(tf.random_normal([parameters['n_hidden_1'], parameters['n_hidden_2']])),
|
||||
'out': tf.Variable(tf.random_normal([parameters['n_hidden_2'], parameters['n_classes']]))
|
||||
}
|
||||
biases = {
|
||||
'b1': tf.Variable(tf.random_normal([parameters['n_hidden_1']])),
|
||||
'b2': tf.Variable(tf.random_normal([parameters['n_hidden_2']])),
|
||||
'out': tf.Variable(tf.random_normal([parameters['n_classes']]))
|
||||
}
|
||||
|
||||
# Construct model
|
||||
pred = multilayer_perceptron(x, weights, biases)
|
||||
|
||||
# Define loss and optimizer
|
||||
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
|
||||
optimizer = tf.train.AdamOptimizer(learning_rate=parameters['learning_rate']).minimize(cost)
|
||||
|
||||
# Initialize the variables (i.e. assign their default value)
|
||||
init = tf.global_variables_initializer()
|
||||
|
||||
# 'Saver' op to save and restore all the variables
|
||||
saver = tf.train.Saver()
|
||||
|
||||
# Running first session
|
||||
print("Starting 1st session...")
|
||||
with tf.Session() as sess:
|
||||
|
||||
# Run the initializer
|
||||
sess.run(init)
|
||||
|
||||
# Training cycle
|
||||
for epoch in range(3):
|
||||
avg_cost = 0.
|
||||
total_batch = int(mnist.train.num_examples/parameters['batch_size'])
|
||||
# Loop over all batches
|
||||
for i in range(total_batch):
|
||||
batch_x, batch_y = mnist.train.next_batch(parameters['batch_size'])
|
||||
# Run optimization op (backprop) and cost op (to get loss value)
|
||||
_, c = sess.run([optimizer, cost], feed_dict={x: batch_x,
|
||||
y: batch_y})
|
||||
# Compute average loss
|
||||
avg_cost += c / total_batch
|
||||
# Display logs per epoch step
|
||||
if epoch % parameters['display_step'] == 0:
|
||||
print("Epoch:", '%04d' % (epoch+1), "cost=", \
|
||||
"{:.9f}".format(avg_cost))
|
||||
save_path = saver.save(sess, parameters['model_path'])
|
||||
|
||||
print("First Optimization Finished!")
|
||||
|
||||
# Test model
|
||||
correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
|
||||
# Calculate accuracy
|
||||
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
|
||||
print("Accuracy:", accuracy.eval({x: mnist.test.images, y: mnist.test.labels}))
|
||||
|
||||
# Save model weights to disk
|
||||
save_path = saver.save(sess, parameters['model_path'])
|
||||
print("Model saved in file: %s" % save_path)
|
||||
|
||||
# Running a new session
|
||||
print("Starting 2nd session...")
|
||||
with tf.Session() as sess:
|
||||
# Initialize variables
|
||||
sess.run(init)
|
||||
|
||||
# Restore model weights from previously saved model
|
||||
saver.restore(sess, parameters['model_path'])
|
||||
print("Model restored from file: %s" % save_path)
|
||||
|
||||
# Resume training
|
||||
for epoch in range(7):
|
||||
avg_cost = 0.
|
||||
total_batch = int(mnist.train.num_examples / parameters['batch_size'])
|
||||
# Loop over all batches
|
||||
for i in range(total_batch):
|
||||
batch_x, batch_y = mnist.train.next_batch(parameters['batch_size'])
|
||||
# Run optimization op (backprop) and cost op (to get loss value)
|
||||
_, c = sess.run([optimizer, cost], feed_dict={x: batch_x,
|
||||
y: batch_y})
|
||||
# Compute average loss
|
||||
avg_cost += c / total_batch
|
||||
# Display logs per epoch step
|
||||
if epoch % parameters['display_step'] == 0:
|
||||
print("Epoch:", '%04d' % (epoch + 1), "cost=", "{:.9f}".format(avg_cost))
|
||||
print("Second Optimization Finished!")
|
||||
|
||||
# Test model
|
||||
correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
|
||||
# Calculate accuracy
|
||||
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
|
||||
print("Accuracy:", accuracy.eval(
|
||||
{x: mnist.test.images, y: mnist.test.labels}))
|
||||
Reference in New Issue
Block a user