Refactor examples

This commit is contained in:
allegroai
2020-06-15 22:48:51 +03:00
parent bec31c7ac4
commit 99368abb1c
78 changed files with 3505 additions and 1294 deletions

View File

@@ -0,0 +1,58 @@
pipeline_node = {
"step1": {
# identify the node, so that we code reference outputs, use only alphanumeric characters
"node_name": "step1",
# parent node, to be executed before this step
"parent_node": None,
# the experiment/task id to clone & execute
"base_task_id": "gafghafh",
# preferred queue name/id to use for execution
"queue": None,
# preferred docker image (override experiment request)
"docker": None,
# parameter overrides
"parameter_override": {"arg": 123, },
# task definition overrides, currently not supported
"task_override": None,
},
"step2": {
# identify the node, so that we code reference outputs, use only alphanumeric characters
"node_name": "step2",
# parent node, to be executed before this step
"parent_node": "step1",
# the experiment/task id to clone & execute
"base_task_id": "123456aa",
# preferred queue name/id to use for execution
"queue": "2xgpu",
# preferred docker image (override experiment request)
"docker": None,
# parameter overrides
"parameter_override": {
# plug the output of pipeline node `step1` artifact named `my_data` into the Task parameter `url`
"url": "@step1:artifacts/my_data",
# plug the output of pipeline node `step1` parameter named `arg` into the Task parameter `arg`
"arg": "@step1:parameters/arg",
},
# task definition overrides, currently not supported
"task_override": None,
},
"step3": {
# identify the node, so that we code reference outputs, use only alphanumeric characters
"node_name": "step3",
# parent node, to be executed before this step
"parent_node": "step2",
# the experiment/task id to clone & execute
"base_task_id": "zzcc1244",
# preferred queue name/id to use for execution
"queue": "2xGPUS",
# preferred docker image (override experiment request)
"docker": None,
# parameter overrides
"parameter_override": {
# plug the output of pipeline node `step2` last output model into the Task parameter url
"model_url": "@step2:models/output/-1",
},
# task definition overrides, currently not supported
"task_override": None,
},
}

View File

@@ -1,4 +1,5 @@
from random import random, sample
from random import sample
from trains import Task
# Connecting TRAINS

View File

@@ -0,0 +1 @@
trains

View File

@@ -1,86 +0,0 @@
# TRAINS - Keras with Tensorboard example code, automatic logging model and Tensorboard outputs
#
# Train a simple deep NN on the MNIST dataset.
# Gets to 98.40% test accuracy after 20 epochs
# (there is *a lot* of margin for parameter tuning).
# 2 seconds per epoch on a K520 GPU.
from __future__ import print_function
import tempfile
import os
from keras.callbacks import TensorBoard, ModelCheckpoint
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers.core import Dense, Activation
from keras.optimizers import RMSprop
from keras.utils import np_utils
import tensorflow as tf
from trains import Task, Logger
# Connecting TRAINS
task = Task.init(project_name='examples', task_name='Keras AutoML base')
# the data, shuffled and split between train and test sets
nb_classes = 10
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train = X_train.reshape(60000, 784).astype('float32')/255.
X_test = X_test.reshape(10000, 784).astype('float32')/255.
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')
# convert class vectors to binary class matrices
Y_train = np_utils.to_categorical(y_train, nb_classes)
Y_test = np_utils.to_categorical(y_test, nb_classes)
args = {'batch_size': 128,
'epochs': 6,
'layer_1': 512,
'layer_2': 512,
'layer_3': 10,
'layer_4': 512,
}
args = task.connect(args)
model = Sequential()
model.add(Dense(args['layer_1'], input_shape=(784,)))
model.add(Activation('relu'))
# model.add(Dropout(0.2))
model.add(Dense(args['layer_2']))
model.add(Activation('relu'))
# model.add(Dropout(0.2))
model.add(Dense(args['layer_3']))
model.add(Activation('softmax'))
model2 = Sequential()
model2.add(Dense(args['layer_4'], input_shape=(784,)))
model2.add(Activation('relu'))
model.summary()
model.compile(loss='categorical_crossentropy',
optimizer=RMSprop(),
metrics=['accuracy'])
# Advanced: setting model class enumeration
labels = dict(('digit_%d' % i, i) for i in range(10))
task.set_model_label_enumeration(labels)
output_folder = os.path.join(tempfile.gettempdir(), 'keras_example')
board = TensorBoard(log_dir=output_folder, write_images=False)
model_store = ModelCheckpoint(filepath=os.path.join(output_folder, 'weight.hdf5'))
history = model.fit(X_train, Y_train,
batch_size=args['batch_size'], epochs=args['epochs'],
callbacks=[board, model_store],
validation_data=(X_test, Y_test))
score = model.evaluate(X_test, Y_test, verbose=0)
print('Test score:', score[0])
print('Test accuracy:', score[1])
Logger.current_logger().report_scalar(title='evaluate', series='score', value=score[0], iteration=args['epochs'])
Logger.current_logger().report_scalar(title='evaluate', series='accuracy', value=score[1], iteration=args['epochs'])

View File

@@ -1,45 +0,0 @@
from random import random, sample
from trains import Task
# define random search space,
# This is a simple random search
# (can be integrated with 'bayesian-optimization' 'hpbandster' etc.)
space = {
'batch_size': lambda: sample([64, 96, 128, 160, 192], 1)[0],
'layer_1': lambda: sample(range(128, 512, 32), 1)[0],
'layer_2': lambda: sample(range(128, 512, 32), 1)[0],
}
# number of random samples to test from 'space'
total_number_of_experiments = 3
# execution queue to add experiments to
execution_queue_name = 'default'
# Select base template task
# Notice we can be more imaginative and use task_id which will eliminate the need to use project name
template_task = Task.get_task(project_name='examples', task_name='Keras AutoML base')
for i in range(total_number_of_experiments):
# clone the template task into a new write enabled task (where we can change parameters)
cloned_task = Task.clone(source_task=template_task,
name=template_task.name+' {}'.format(i), parent=template_task.id)
# get the original template parameters
cloned_task_parameters = cloned_task.get_parameters()
# override with random samples form grid
for k in space.keys():
cloned_task_parameters[k] = space[k]()
# put back into the new cloned task
cloned_task.set_parameters(cloned_task_parameters)
print('Experiment {} set with parameters {}'.format(i, cloned_task_parameters))
# enqueue the task for execution
Task.enqueue(cloned_task.id, queue_name=execution_queue_name)
print('Experiment id={} enqueue for execution'.format(cloned_task.id))
# we are done, the next step is to watch the experiments graphs
print('Done')

View File

@@ -1,43 +0,0 @@
from trains import Task
from time import sleep
# Initialize the Task Pipe's first Task used to start the Task Pipe
task = Task.init('examples', 'Simple Controller Task')
# Create a hyper-parameter dictionary for the task
param = {}
# Connect the hyper-parameter dictionary to the task
param = task.connect(param)
# In this example we pass next task's name as a parameter
param['next_task_name'] = 'Toy Base Task'
# This is a parameter name in the next task we want to change
param['param_name'] = 'Example_Param'
# This is the parameter value in the next task we want to change
param['param_name_new_value'] = 3
# The queue where we want the template task (clone) to be sent to
param['execution_queue_name'] = 'default'
# Simulate the work of a Task
print('Processing....')
sleep(2.0)
print('Done processing :)')
# Get a reference to the task to pipe to.
next_task = Task.get_task(project_name=task.get_project_name(), task_name=param['next_task_name'])
# Clone the task to pipe to. This creates a task with status Draft whose parameters can be modified.
cloned_task = Task.clone(source_task=next_task, name='Auto generated cloned task')
# Get the original parameters of the Task, modify the value of one parameter,
# and set the parameters in the next Task
cloned_task_parameters = cloned_task.get_parameters()
cloned_task_parameters[param['param_name']] = param['param_name_new_value']
cloned_task.set_parameters(cloned_task_parameters)
# Enqueue the Task for execution. The enqueued Task must already exist in the trains platform
print('Enqueue next step in pipeline to queue: {}'.format(param['execution_queue_name']))
Task.enqueue(cloned_task.id, queue_name=param['execution_queue_name'])
# We are done. The next step in the pipe line is in charge of the pipeline now.
print('Done')

View File

@@ -1,19 +0,0 @@
# This Task is the base task that we will be executing as a second step (see task_piping.py)
# In order to make sure this experiment is registered in the platform, you must execute it once.
from trains import Task
# Initialize the task pipe's first task used to start the task pipe
task = Task.init('examples', 'Toy Base Task')
# Create a dictionary for hyper-parameters
params = {}
# Add a parameter and value to the dictionary
params['Example_Param'] = 1
# Connect the hyper-parameter dictionary to the task
task.connect(params)
# Print the value to demonstrate it is the value is set by the initiating task.
print("Example_Param is", params['Example_Param'])

View File

@@ -0,0 +1 @@
../frameworks/pytorch/pytorch_distributed_example.py

View File

@@ -0,0 +1,3 @@
torch>=1.1.0
torchvision>=0.3.0
trains

View File

@@ -1,114 +0,0 @@
import os
import sys
import subprocess
from copy import deepcopy
import socket
import psutil
from tempfile import mkstemp
# make sure we have jupyter in the auto requirements
import jupyter
from trains import Task
# set default docker image, with network configuration
os.environ['TRAINS_DOCKER_IMAGE'] = 'nvidia/cuda --network host'
# initialize TRAINS
task = Task.init(project_name='examples', task_name='Remote Jupyter NoteBook')
# get rid of all the runtime TRAINS
preserve = ('TRAINS_API_HOST', 'TRAINS_WEB_HOST', 'TRAINS_FILES_HOST', 'TRAINS_CONFIG_FILE',
'TRAINS_API_ACCESS_KEY', 'TRAINS_API_SECRET_KEY', 'TRAINS_API_HOST_VERIFY_CERT')
# setup os environment
env = deepcopy(os.environ)
for key in os.environ:
if key.startswith('TRAINS') and key not in preserve:
env.pop(key, None)
# Add jupyter server base folder
param = {
'jupyter_server_base_directory': '~/',
'ssh_server': True,
'ssh_password': 'training'
}
task.connect(param)
try:
hostname = socket.gethostname()
hostnames = socket.gethostbyname(socket.gethostname())
except Exception:
def get_ip_addresses(family):
for interface, snics in psutil.net_if_addrs().items():
for snic in snics:
if snic.family == family:
yield snic.address
hostnames = list(get_ip_addresses(socket.AF_INET))
hostname = hostnames[0]
if param.get('ssh_server'):
print('Installing SSH Server on {} [{}]'.format(hostname, hostnames))
ssh_password = param.get('ssh_password', 'training')
try:
used_ports = [i.laddr.port for i in psutil.net_connections()]
port = [i for i in range(10022, 15000) if i not in used_ports][0]
result = os.system(
'apt-get install -y openssh-server && '
'mkdir -p /var/run/sshd && '
'echo \'root:{password}\' | chpasswd && '
'echo \'PermitRootLogin yes\' >> /etc/ssh/sshd_config && '
'sed -i \'s/PermitRootLogin prohibit-password/PermitRootLogin yes/\' /etc/ssh/sshd_config && '
'sed \'s@session\s*required\s*pam_loginuid.so@session optional pam_loginuid.so@g\' -i /etc/pam.d/sshd && '
'echo "export VISIBLE=now" >> /etc/profile && '
'echo "export TRAINS_CONFIG_FILE={trains_config_file}" >> /etc/profile && '
'/usr/sbin/sshd -p {port}'.format(
password=ssh_password, port=port, trains_config_file=os.environ.get('TRAINS_CONFIG_FILE')))
if result == 0:
print('\n#\n# SSH Server running on {} [{}] port {}\n# LOGIN u:root p:{}\n#\n'.format(
hostname, hostnames, port, ssh_password))
else:
raise ValueError()
except:
print('\n#\n# Error: SSH server could not be launched\n#\n')
# execute jupyter notebook
fd, local_filename = mkstemp()
cwd = os.path.expandvars(os.path.expanduser(param['jupyter_server_base_directory'])) \
if param['jupyter_server_base_directory'] else os.getcwd()
print('Running Jupyter Notebook Server on {} [{}] at {}'.format(hostname, hostnames, cwd))
process = subprocess.Popen([sys.executable, '-m', 'jupyter', 'notebook', '--no-browser', '--allow-root', '--ip', '0.0.0.0'],
env=env, stdout=fd, stderr=fd, cwd=cwd)
# print stdout/stderr
prev_line_count = 0
process_running = True
while process_running:
process_running = False
try:
process.wait(timeout=2.0 if prev_line_count == 0 else 15.0)
except subprocess.TimeoutExpired:
process_running = True
with open(local_filename, "rt") as f:
# read new lines
new_lines = f.readlines()
if not new_lines:
continue
output = ''.join(new_lines)
print(output)
# update task comment with jupyter notebook server links
if prev_line_count == 0:
task.comment += '\n' + ''.join(line for line in new_lines if 'http://' in line or 'https://' in line)
prev_line_count += len(new_lines)
os.lseek(fd, 0, 0)
os.ftruncate(fd, 0)
# cleanup
os.close(fd)
try:
os.unlink(local_filename)
except:
pass

View File

@@ -2,10 +2,12 @@ import autokeras as ak
import numpy as np
import tensorflow as tf
from tensorflow import keras
from trains import Task
task = Task.init(project_name="autokeras", task_name="autokeras imdb example with scalars")
def imdb_raw():
max_features = 20000
index_offset = 3 # word index offset

View File

@@ -0,0 +1,3 @@
autokeras
tensorflow==2.1.0
trains

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,118 @@
# TRAINS - Keras with Tensorboard example code, automatic logging model and Tensorboard outputs
#
# Train a simple deep NN on the MNIST dataset.
# Gets to 98.40% test accuracy after 20 epochs
# (there is *a lot* of margin for parameter tuning).
# 2 seconds per epoch on a K520 GPU.
from __future__ import print_function
import argparse
import os
import tempfile
import numpy as np
import tensorflow as tf
from tensorflow.keras import utils as np_utils
from tensorflow.keras.callbacks import ModelCheckpoint, TensorBoard
from tensorflow.keras.datasets import mnist
from tensorflow.keras.layers import Activation, Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import RMSprop
from trains import Task
class TensorBoardImage(TensorBoard):
@staticmethod
def make_image(tensor):
from PIL import Image
import io
tensor = np.stack((tensor, tensor, tensor), axis=2)
height, width, channels = tensor.shape
image = Image.fromarray(tensor)
output = io.BytesIO()
image.save(output, format='PNG')
image_string = output.getvalue()
output.close()
return tf.Summary.Image(height=height,
width=width,
colorspace=channels,
encoded_image_string=image_string)
def on_epoch_end(self, epoch, logs=None):
if logs is None:
logs = {}
super(TensorBoardImage, self).on_epoch_end(epoch, logs)
images = self.validation_data[0] # 0 - data; 1 - labels
img = (255 * images[0].reshape(28, 28)).astype('uint8')
image = self.make_image(img)
summary = tf.Summary(value=[tf.Summary.Value(tag='image', image=image)])
self.writer.add_summary(summary, epoch)
parser = argparse.ArgumentParser(description='Keras MNIST Example')
parser.add_argument('--batch-size', type=int, default=128, help='input batch size for training (default: 128)')
parser.add_argument('--epochs', type=int, default=6, help='number of epochs to train (default: 6)')
args = parser.parse_args()
# the data, shuffled and split between train and test sets
nb_classes = 10
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train = X_train.reshape(60000, 784).astype('float32')/255.
X_test = X_test.reshape(10000, 784).astype('float32')/255.
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')
# convert class vectors to binary class matrices
Y_train = np_utils.to_categorical(y_train, nb_classes)
Y_test = np_utils.to_categorical(y_test, nb_classes)
model = Sequential()
model.add(Dense(512, input_shape=(784,)))
model.add(Activation('relu'))
# model.add(Dropout(0.2))
model.add(Dense(512))
model.add(Activation('relu'))
# model.add(Dropout(0.2))
model.add(Dense(10))
model.add(Activation('softmax'))
model2 = Sequential()
model2.add(Dense(512, input_shape=(784,)))
model2.add(Activation('relu'))
model.summary()
model.compile(loss='categorical_crossentropy',
optimizer=RMSprop(),
metrics=['accuracy'])
# Connecting TRAINS
task = Task.init(project_name='examples', task_name='Keras with TensorBoard example')
task.connect_configuration({'test': 1337, 'nested': {'key': 'value', 'number': 1}})
# Advanced: setting model class enumeration
labels = dict(('digit_%d' % i, i) for i in range(10))
task.set_model_label_enumeration(labels)
output_folder = os.path.join(tempfile.gettempdir(), 'keras_example')
board = TensorBoard(histogram_freq=1, log_dir=output_folder, write_images=False)
model_store = ModelCheckpoint(filepath=os.path.join(output_folder, 'weight.{epoch}.hdf5'))
# load previous model, if it is there
# noinspection PyBroadException
try:
model.load_weights(os.path.join(output_folder, 'weight.1.hdf5'))
except Exception:
pass
history = model.fit(X_train, Y_train,
batch_size=args.batch_size, epochs=args.epochs,
callbacks=[board, model_store],
verbose=1, validation_data=(X_test, Y_test))
score = model.evaluate(X_test, Y_test, verbose=0)
print('Test score:', score[0])
print('Test accuracy:', score[1])

File diff suppressed because one or more lines are too long

View File

@@ -102,9 +102,10 @@ board = TensorBoard(histogram_freq=1, log_dir=output_folder, write_images=False)
model_store = ModelCheckpoint(filepath=os.path.join(output_folder, 'weight.{epoch}.hdf5'))
# load previous model, if it is there
# noinspection PyBroadException
try:
model.load_weights(os.path.join(output_folder, 'weight.1.hdf5'))
except:
except Exception:
pass
history = model.fit(X_train, Y_train,

View File

@@ -0,0 +1,2 @@
trains
Keras>=2.2.4

View File

@@ -0,0 +1,54 @@
# TRAINS - Example of manual model configuration and uploading
#
import os
from tempfile import gettempdir
from keras import Input, layers, Model
from trains import Task
task = Task.init(project_name='examples', task_name='Model configuration and upload')
def get_model():
# Create a simple model.
inputs = Input(shape=(32,))
outputs = layers.Dense(1)(inputs)
keras_model = Model(inputs, outputs)
keras_model.compile(optimizer='adam', loss='mean_squared_error')
return keras_model
# create a model
model = get_model()
# Connect a local configuration file
config_file = os.path.join('..', '..', 'reporting', 'data_samples', 'sample.json')
config_file = task.connect_configuration(config_file)
# then read configuration as usual, the backend will contain a copy of it.
# later when executing remotely, the returned `config_file` will be a temporary file
# containing a new copy of the configuration retrieved form the backend
# # model_config_dict = json.load(open(config_file, 'rt'))
# Or Store dictionary of definition for a specific network design
model_config_dict = {
'value': 13.37,
'dict': {'sub_value': 'string', 'sub_integer': 11},
'list_of_ints': [1, 2, 3, 4],
}
model_config_dict = task.connect_configuration(model_config_dict)
# We now update the dictionary after connecting it, and the changes will be tracked as well.
model_config_dict['new value'] = 10
model_config_dict['value'] *= model_config_dict['new value']
# store the label enumeration of the training model
labels = {'background': 0, 'cat': 1, 'dog': 2}
task.connect_label_enumeration(labels)
# storing the model, it will have the task network configuration and label enumeration
print('Any model stored from this point onwards, will contain both model_config and label_enumeration')
model.save(os.path.join(gettempdir(), "model"))
print('Model saved')

View File

@@ -0,0 +1,3 @@
Keras
tensorflow>=2.0
trains

View File

@@ -0,0 +1,4 @@
matplotlib >= 3.1.1 ; python_version >= '3.6'
matplotlib >= 2.2.4 ; python_version < '3.6'
seaborn
trains

View File

@@ -1,4 +1,4 @@
# TRAINS - Example of manual model configuration
# TRAINS - Example of manual model configuration and uploading
#
import os
from tempfile import gettempdir
@@ -7,13 +7,13 @@ import torch
from trains import Task
task = Task.init(project_name='examples', task_name='Manual model configuration')
task = Task.init(project_name='examples', task_name='Model configuration and upload')
# create a model
model = torch.nn.Module
# Connect a local configuration file
config_file = 'samples/sample.json'
config_file = os.path.join('..', '..', 'reporting', 'data_samples', 'sample.json')
config_file = task.connect_configuration(config_file)
# then read configuration as usual, the backend will contain a copy of it.
# later when executing remotely, the returned `config_file` will be a temporary file

View File

@@ -0,0 +1,380 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "e-YsQrBjzNdX"
},
"outputs": [],
"source": [
"! pip install -U pip\n",
"! pip install -U torch==1.5.0\n",
"! pip install -U torchaudio==0.5.0\n",
"! pip install -U torchvision==0.6.0\n",
"! pip install -U matplotlib==3.2.1\n",
"! pip install -U trains==0.15.0\n",
"! pip install -U pandas==1.0.4\n",
"! pip install -U numpy==1.18.4\n",
"! pip install -U tensorboard==2.2.1"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "T7T0Rf26zNdm"
},
"outputs": [],
"source": [
"import PIL\n",
"import io\n",
"\n",
"import pandas as pd\n",
"import numpy as np\n",
"from pathlib2 import Path\n",
"import matplotlib.pyplot as plt\n",
"\n",
"import torch\n",
"import torch.nn as nn\n",
"import torch.nn.functional as F\n",
"import torch.optim as optim\n",
"from torch.utils.data import Dataset\n",
"from torch.utils.tensorboard import SummaryWriter\n",
"\n",
"import torchaudio\n",
"from torchvision.transforms import ToTensor\n",
"\n",
"from trains import Task\n",
"\n",
"%matplotlib inline"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"task = Task.init(project_name='Audio Example', task_name='audio classifier')\n",
"configuration_dict = {'number_of_epochs': 10, 'batch_size': 4, 'dropout': 0.25, 'base_lr': 0.001}\n",
"configuration_dict = task.connect(configuration_dict) # enabling configuration override by trains\n",
"print(configuration_dict) # printing actual configuration (after override in remote mode)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "msiz7QdvzNeA",
"scrolled": true
},
"outputs": [],
"source": [
"# Download UrbanSound8K dataset (https://urbansounddataset.weebly.com/urbansound8k.html)\n",
"path_to_UrbanSound8K = './data/UrbanSound8K'"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "wXtmZe7yzNeS"
},
"outputs": [],
"source": [
"class UrbanSoundDataset(Dataset):\n",
"#rapper for the UrbanSound8K dataset\n",
" def __init__(self, csv_path, file_path, folderList):\n",
" self.file_path = file_path\n",
" self.file_names = []\n",
" self.labels = []\n",
" self.folders = []\n",
" \n",
" #loop through the csv entries and only add entries from folders in the folder list\n",
" csvData = pd.read_csv(csv_path)\n",
" for i in range(0,len(csvData)):\n",
" if csvData.iloc[i, 5] in folderList:\n",
" self.file_names.append(csvData.iloc[i, 0])\n",
" self.labels.append(csvData.iloc[i, 6])\n",
" self.folders.append(csvData.iloc[i, 5])\n",
" \n",
" def __getitem__(self, index):\n",
" #format the file path and load the file\n",
" path = self.file_path / (\"fold\" + str(self.folders[index])) / self.file_names[index]\n",
" sound, sample_rate = torchaudio.load(path, out = None, normalization = True)\n",
"\n",
" # UrbanSound8K uses two channels, this will convert them to one\n",
" soundData = torch.mean(sound, dim=0, keepdim=True)\n",
" \n",
" #Make sure all files are the same size\n",
" if soundData.numel() < 160000:\n",
" fixedsize_data = torch.nn.functional.pad(soundData, (0, 160000 - soundData.numel()))\n",
" else:\n",
" fixedsize_data = soundData[0,:160000].reshape(1,160000)\n",
" \n",
" #downsample the audio\n",
" downsample_data = fixedsize_data[::5]\n",
" \n",
" melspectogram_transform = torchaudio.transforms.MelSpectrogram(sample_rate=sample_rate)\n",
" melspectogram = melspectogram_transform(downsample_data)\n",
" melspectogram_db = torchaudio.transforms.AmplitudeToDB()(melspectogram)\n",
"\n",
" return fixedsize_data, sample_rate, melspectogram_db, self.labels[index]\n",
" \n",
" def __len__(self):\n",
" return len(self.file_names)\n",
"\n",
"\n",
"csv_path = Path(path_to_UrbanSound8K) / 'metadata' / 'UrbanSound8K.csv'\n",
"file_path = Path(path_to_UrbanSound8K) / 'audio'\n",
"\n",
"train_set = UrbanSoundDataset(csv_path, file_path, range(1,10))\n",
"test_set = UrbanSoundDataset(csv_path, file_path, [10])\n",
"print(\"Train set size: \" + str(len(train_set)))\n",
"print(\"Test set size: \" + str(len(test_set)))\n",
"\n",
"train_loader = torch.utils.data.DataLoader(train_set, batch_size = configuration_dict.get('batch_size', 4), \n",
" shuffle = True, pin_memory=True, num_workers=1)\n",
"test_loader = torch.utils.data.DataLoader(test_set, batch_size = configuration_dict.get('batch_size', 4), \n",
" shuffle = False, pin_memory=True, num_workers=1)\n",
"\n",
"classes = ('air_conditioner', 'car_horn', 'children_playing', 'dog_bark', 'drilling', 'engine_idling', \n",
" 'gun_shot', 'jackhammer', 'siren', 'street_music')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "ylblw-k1zNeZ"
},
"outputs": [],
"source": [
"class Net(nn.Module):\n",
" def __init__(self, num_classes, dropout_value):\n",
" super(Net,self).__init__()\n",
" self.num_classes = num_classes\n",
" self.dropout_value = dropout_value\n",
" \n",
" self.C1 = nn.Conv2d(1,16,3)\n",
" self.C2 = nn.Conv2d(16,32,3)\n",
" self.C3 = nn.Conv2d(32,64,3)\n",
" self.C4 = nn.Conv2d(64,128,3)\n",
" self.maxpool1 = nn.MaxPool2d(2,2) \n",
" self.fc1 = nn.Linear(128*29*197,128)\n",
" self.fc2 = nn.Linear(128,self.num_classes)\n",
" self.dropout = nn.Dropout(self.dropout_value)\n",
" \n",
" def forward(self,x):\n",
" # add sequence of convolutional and max pooling layers\n",
" x = F.relu(self.C1(x))\n",
" x = self.maxpool1(F.relu(self.C2(x)))\n",
" x = F.relu(self.C3(x))\n",
" x = self.maxpool1(F.relu(self.C4(x)))\n",
" # flatten image input\n",
" x = x.view(-1,128*29*197)\n",
" x = F.relu(self.fc1(self.dropout(x)))\n",
" x = self.fc2(self.dropout(x))\n",
" return x\n",
" \n",
" \n",
"model = Net(len(classes), configuration_dict.get('dropout', 0.25))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "3yKYru14zNef"
},
"outputs": [],
"source": [
"optimizer = optim.SGD(model.parameters(), lr = configuration_dict.get('base_lr', 0.001), momentum = 0.9)\n",
"scheduler = optim.lr_scheduler.StepLR(optimizer, step_size = 3, gamma = 0.1)\n",
"criterion = nn.CrossEntropyLoss()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"device = torch.cuda.current_device() if torch.cuda.is_available() else torch.device('cpu')\n",
"print('Device to use: {}'.format(device))\n",
"model.to(device)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"tensorboard_writer = SummaryWriter('./tensorboard_logs')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def plot_signal(signal, title, cmap=None):\n",
" fig = plt.figure()\n",
" if signal.ndim == 1:\n",
" plt.plot(signal)\n",
" else:\n",
" plt.imshow(signal, cmap=cmap) \n",
" plt.title(title)\n",
" \n",
" plot_buf = io.BytesIO()\n",
" plt.savefig(plot_buf, format='jpeg')\n",
" plot_buf.seek(0)\n",
" plt.close(fig)\n",
" return ToTensor()(PIL.Image.open(plot_buf))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "Vdthqz3JzNem"
},
"outputs": [],
"source": [
"def train(model, epoch):\n",
" model.train()\n",
" for batch_idx, (sounds, sample_rate, inputs, labels) in enumerate(train_loader):\n",
" inputs = inputs.to(device)\n",
" labels = labels.to(device)\n",
"\n",
" # zero the parameter gradients\n",
" optimizer.zero_grad()\n",
"\n",
" # forward + backward + optimize\n",
" outputs = model(inputs)\n",
" _, predicted = torch.max(outputs, 1)\n",
" loss = criterion(outputs, labels)\n",
" loss.backward()\n",
" optimizer.step()\n",
" \n",
" iteration = epoch * len(train_loader) + batch_idx\n",
" if batch_idx % log_interval == 0: #print training stats\n",
" print('Train Epoch: {} [{}/{} ({:.0f}%)]\\tLoss: {:.6f}'\n",
" .format(epoch, batch_idx * len(inputs), len(train_loader.dataset), \n",
" 100. * batch_idx / len(train_loader), loss))\n",
" tensorboard_writer.add_scalar('training loss/loss', loss, iteration)\n",
" tensorboard_writer.add_scalar('learning rate/lr', optimizer.param_groups[0]['lr'], iteration)\n",
" \n",
" \n",
" if batch_idx % debug_interval == 0: # report debug image every 500 mini-batches\n",
" for n, (inp, pred, label) in enumerate(zip(inputs, predicted, labels)):\n",
" series = 'label_{}_pred_{}'.format(classes[label.cpu()], classes[pred.cpu()])\n",
" tensorboard_writer.add_image('Train MelSpectrogram samples/{}'.format(n), \n",
" plot_signal(inp.cpu().numpy().squeeze(), series, 'hot'), iteration)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "LBWoj7u5zNes"
},
"outputs": [],
"source": [
"def test(model, epoch):\n",
" model.eval()\n",
" class_correct = list(0. for i in range(10))\n",
" class_total = list(0. for i in range(10))\n",
" with torch.no_grad():\n",
" for idx, (sounds, sample_rate, inputs, labels) in enumerate(test_loader):\n",
" inputs = inputs.to(device)\n",
" labels = labels.to(device)\n",
"\n",
" outputs = model(inputs)\n",
"\n",
" _, predicted = torch.max(outputs, 1)\n",
" c = (predicted == labels)\n",
" for i in range(len(inputs)):\n",
" label = labels[i].item()\n",
" class_correct[label] += c[i].item()\n",
" class_total[label] += 1\n",
" \n",
" iteration = (epoch + 1) * len(train_loader)\n",
" if idx % debug_interval == 0: # report debug image every 100 mini-batches\n",
" for n, (sound, inp, pred, label) in enumerate(zip(sounds, inputs, predicted, labels)):\n",
" series = 'label_{}_pred_{}'.format(classes[label.cpu()], classes[pred.cpu()])\n",
" tensorboard_writer.add_audio('Test audio samples/{}'.format(n), \n",
" sound, iteration, int(sample_rate[n]))\n",
" tensorboard_writer.add_image('Test MelSpectrogram samples/{}_{}'.format(idx, n), \n",
" plot_signal(inp.cpu().numpy().squeeze(), series, 'hot'), iteration)\n",
"\n",
" total_accuracy = 100 * sum(class_correct)/sum(class_total)\n",
" print('[Iteration {}] Accuracy on the {} test images: {}%\\n'.format(epoch, sum(class_total), total_accuracy))\n",
" tensorboard_writer.add_scalar('accuracy/total', total_accuracy, iteration)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "X5lx3g_5zNey",
"scrolled": false
},
"outputs": [],
"source": [
"log_interval = 100\n",
"debug_interval = 200\n",
"for epoch in range(configuration_dict.get('number_of_epochs', 10)):\n",
" train(model, epoch)\n",
" test(model, epoch)\n",
" scheduler.step()"
]
}
],
"metadata": {
"colab": {
"name": "audio_classifier_tutorial.ipynb",
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.4"
}
},
"nbformat": 4,
"nbformat_minor": 1
}

View File

@@ -0,0 +1,128 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"! pip install -U pip\n",
"! pip install -U torch==1.5.0\n",
"! pip install -U torchaudio==0.5.0\n",
"! pip install -U matplotlib==3.2.1\n",
"! pip install -U trains==0.15.0\n",
"! pip install -U tensorboard==2.2.1"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import torch\n",
"import torchaudio\n",
"from torch.utils.tensorboard import SummaryWriter\n",
"import matplotlib.pyplot as plt\n",
"\n",
"from trains import Task\n",
"\n",
"%matplotlib inline"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"task = Task.init(project_name='Audio Example', task_name='data pre-processing')\n",
"configuration_dict = {'number_of_smaples': 3}\n",
"configuration_dict = task.connect(configuration_dict) # enabling configuration override by trains\n",
"print(configuration_dict) # printing actual configuration (after override in remote mode)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"tensorboard_writer = SummaryWriter('./tensorboard_logs')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"if not os.path.isdir('./data'):\n",
" os.mkdir('./data')\n",
"yesno_data = torchaudio.datasets.YESNO('./data', download=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def plot_signal(signal, title, cmap=None):\n",
" plt.figure()\n",
" if signal.ndim == 1:\n",
" plt.plot(signal)\n",
" else:\n",
" plt.imshow(signal, cmap=cmap) \n",
" plt.title(title)\n",
" plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"for n in range(configuration_dict.get('number_of_smaples', 3)):\n",
" waveform, sample_rate, labels = yesno_data[n]\n",
" melspectogram_transform = torchaudio.transforms.MelSpectrogram(sample_rate=sample_rate)\n",
" plot_signal(waveform[0,:], 'Original waveform')\n",
" tensorboard_writer.add_audio('Audio samples/{}'.format(n), waveform, n, sample_rate)\n",
" plot_signal(melspectogram_transform(waveform.squeeze()).numpy(), 'Mel spectogram', 'hot')\n",
" plot_signal(torchaudio.transforms.AmplitudeToDB()(melspectogram_transform(waveform.squeeze())).numpy(), 'Mel spectogram DB', 'hot')"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.4"
}
},
"nbformat": 4,
"nbformat_minor": 1
}

View File

@@ -0,0 +1,136 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# execute this in command line on all machines to be used as workers before initiating the hyperparamer search \n",
"# ! pip install -U trains-agent==0.15.0\n",
"# ! trains-agent daemon --queue default\n",
"\n",
"# pip install with locked versions\n",
"! pip install -U pandas==1.0.3\n",
"! pip install -U trains==0.15.0\n",
"! pip install -U hpbandster==0.7.4 # Needed only for Bayesian optimization Hyper-Band"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from trains.automation import UniformParameterRange, UniformIntegerParameterRange\n",
"from trains.automation import RandomSearch, HyperParameterOptimizer\n",
"from trains.automation.hpbandster import OptimizerBOHB # Needed only for Bayesian optimization Hyper-Band\n",
"\n",
"from trains import Task"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"task = Task.init(project_name='Hyper-Parameter Search', task_name='Hyper-Parameter Optimization')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#####################################################################\n",
"### Don't forget to replace this default id with your own task id ###\n",
"#####################################################################\n",
"TEMPLATE_TASK_ID = 'd8e928460f98437c998f3597768597f8'"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"optimizer = HyperParameterOptimizer(\n",
" base_task_id=TEMPLATE_TASK_ID, # This is the experiment we want to optimize\n",
" # here we define the hyper-parameters to optimize\n",
" hyper_parameters=[\n",
" UniformIntegerParameterRange('number_of_epochs', min_value=5, max_value=15, step_size=1),\n",
" UniformIntegerParameterRange('batch_size', min_value=2, max_value=12, step_size=2),\n",
" UniformParameterRange('dropout', min_value=0, max_value=0.5, step_size=0.05),\n",
" UniformParameterRange('base_lr', min_value=0.0005, max_value=0.01, step_size=0.0005),\n",
" ],\n",
" # this is the objective metric we want to maximize/minimize\n",
" objective_metric_title='accuracy',\n",
" objective_metric_series='total',\n",
" objective_metric_sign='max', # maximize or minimize the objective metric\n",
" max_number_of_concurrent_tasks=3, # number of concurrent experiments\n",
" # setting optimizer - trains supports GridSearch, RandomSearch or OptimizerBOHB\n",
" optimizer_class=OptimizerBOHB, # can be replaced with OptimizerBOHB\n",
" execution_queue='default', # queue to schedule the experiments for execution\n",
" optimization_time_limit=30., # time limit for each experiment (optional, ignored by OptimizerBOHB)\n",
" pool_period_min=1, # Check the experiments every x minutes\n",
" # set the maximum number of experiments for the optimization.\n",
" # OptimizerBOHB sets the total number of iteration as total_max_jobs * max_iteration_per_job\n",
" total_max_jobs=12,\n",
" # setting OptimizerBOHB configuration (ignored by other optimizers)\n",
" min_iteration_per_job=15000, # minimum number of iterations per experiment, till early stopping\n",
" max_iteration_per_job=150000, # maximum number of iterations per experiment\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"optimizer.set_time_limit(in_minutes=120.0) # set the time limit for the optimization process\n",
"optimizer.start() \n",
"optimizer.wait() # wait until process is done\n",
"optimizer.stop() # make sure background optimization stopped"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# optimization is completed, print the top performing experiments id\n",
"k = 3\n",
"top_exp = optimizer.get_top_experiments(top_k=k)\n",
"print('Top {} experiments are:'.format(k))\n",
"for n, t in enumerate(top_exp, 1):\n",
" print('Rank {}: task id={} |result={}'\n",
" .format(n, t.id, t.get_last_scalar_metrics()['accuracy']['total']['last']))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.4"
}
},
"nbformat": 4,
"nbformat_minor": 4
}

View File

@@ -0,0 +1,243 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# execute this in command line before initiating the notebook: \n",
"# pip install -U pip\n",
"# pip install -U ipywidgets==7.5.1\n",
"# jupyter nbextension enable --py widgetsnbextension\n",
"\n",
"# pip install with locked versions\n",
"! pip install -U torch==1.5.0\n",
"! pip install -U torchvision==0.6.0\n",
"! pip install -U numpy==1.18.4\n",
"! pip install -U trains==0.15.0\n",
"! pip install -U tensorboard==2.2.1"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"import torch\n",
"import torch.nn as nn\n",
"import torch.nn.functional as F\n",
"import torch.optim as optim\n",
"from torch.utils.tensorboard import SummaryWriter\n",
"\n",
"import torchvision.datasets as datasets\n",
"import torchvision.transforms as transforms\n",
"\n",
"from trains import Task"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"task = Task.init(project_name='Hyper-Parameter Search', task_name='image_classification_CIFAR10')\n",
"configuration_dict = {'number_of_epochs': 3, 'batch_size': 4, 'dropout': 0.25, 'base_lr': 0.001}\n",
"configuration_dict = task.connect(configuration_dict) # enabling configuration override by trains\n",
"print(configuration_dict) # printing actual configuration (after override in remote mode)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"transform = transforms.Compose([transforms.ToTensor()])\n",
"\n",
"trainset = datasets.CIFAR10(root='./data', train=True,\n",
" download=True, transform=transform)\n",
"trainloader = torch.utils.data.DataLoader(trainset, batch_size=configuration_dict.get('batch_size', 4),\n",
" shuffle=True, num_workers=2)\n",
"\n",
"testset = datasets.CIFAR10(root='./data', train=False,\n",
" download=True, transform=transform)\n",
"testloader = torch.utils.data.DataLoader(testset, batch_size=configuration_dict.get('batch_size', 4),\n",
" shuffle=False, num_workers=2)\n",
"\n",
"classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')\n",
"\n",
"device = torch.cuda.current_device() if torch.cuda.is_available() else torch.device('cpu')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"class Net(nn.Module):\n",
" def __init__(self):\n",
" super(Net, self).__init__()\n",
" self.conv1 = nn.Conv2d(3, 6, 5)\n",
" self.conv2 = nn.Conv2d(3, 6, 5)\n",
" self.pool = nn.MaxPool2d(2, 2)\n",
" self.conv2 = nn.Conv2d(6, 16, 5)\n",
" self.fc1 = nn.Linear(16 * 5 * 5, 120)\n",
" self.fc2 = nn.Linear(120, 84)\n",
" self.dorpout = nn.Dropout(p=configuration_dict.get('dropout', 0.25))\n",
" self.fc3 = nn.Linear(84, 10)\n",
"\n",
" def forward(self, x):\n",
" x = self.pool(F.relu(self.conv1(x)))\n",
" x = self.pool(F.relu(self.conv2(x)))\n",
" x = x.view(-1, 16 * 5 * 5)\n",
" x = F.relu(self.fc1(x))\n",
" x = F.relu(self.fc2(x))\n",
" x = self.fc3(self.dorpout(x))\n",
" return x"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"net = Net().to(device)\n",
"criterion = nn.CrossEntropyLoss()\n",
"optimizer = optim.SGD(net.parameters(), lr=configuration_dict.get('base_lr', 0.001), momentum=0.9)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"tensorboard_writer = SummaryWriter('./tensorboard_logs')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def test_model(test_dataloader, iteration):\n",
" class_correct = list(0. for i in range(10))\n",
" class_total = list(0. for i in range(10))\n",
" with torch.no_grad():\n",
" for j, data in enumerate(test_dataloader, 1):\n",
" images, labels = data\n",
" images = images.to(device)\n",
" labels = labels.to(device)\n",
" \n",
" outputs = net(images)\n",
" _, predicted = torch.max(outputs, 1)\n",
" c = (predicted == labels).squeeze()\n",
" for i in range(len(images)):\n",
" label = labels[i].item()\n",
" class_correct[label] += c[i].item()\n",
" class_total[label] += 1\n",
" \n",
" if j % 500 == 0: # report debug image every 500 mini-batches\n",
" for n, (img, pred, label) in enumerate(zip(images, predicted, labels)):\n",
" tensorboard_writer.add_image(\"testing/{}-{}_GT_{}_pred_{}\"\n",
" .format(j, n, classes[label], classes[pred]), img, iteration)\n",
"\n",
" for i in range(len(classes)):\n",
" class_accuracy = 100 * class_correct[i] / class_total[i]\n",
" print('[Iteration {}] Accuracy of {} : {}%'.format(iteration, classes[i], class_accuracy))\n",
" tensorboard_writer.add_scalar('accuracy per class/{}'.format(classes[i]), class_accuracy, iteration)\n",
"\n",
" total_accuracy = 100 * sum(class_correct)/sum(class_total)\n",
" print('[Iteration {}] Accuracy on the {} test images: {}%\\n'.format(iteration, sum(class_total), total_accuracy))\n",
" tensorboard_writer.add_scalar('accuracy/total', total_accuracy, iteration)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"for epoch in range(configuration_dict.get('number_of_epochs', 3)): # loop over the dataset multiple times\n",
"\n",
" running_loss = 0.0\n",
" for i, data in enumerate(trainloader, 1):\n",
" # get the inputs; data is a list of [inputs, labels]\n",
" inputs, labels = data\n",
" inputs = inputs.to(device)\n",
" labels = labels.to(device)\n",
"\n",
" # zero the parameter gradients\n",
" optimizer.zero_grad()\n",
"\n",
" # forward + backward + optimize\n",
" outputs = net(inputs)\n",
" loss = criterion(outputs, labels)\n",
" loss.backward()\n",
" optimizer.step()\n",
"\n",
" # print statistics\n",
" running_loss += loss.item()\n",
" \n",
" iteration = epoch * len(trainloader) + i\n",
" if i % 2000 == 0: # report loss every 2000 mini-batches\n",
" print('[Epoch %d, Iteration %5d] loss: %.3f' %(epoch + 1, i + 1, running_loss / 2000))\n",
" tensorboard_writer.add_scalar('training loss', running_loss / 2000, iteration)\n",
" running_loss = 0.0\n",
" \n",
" test_model(testloader, iteration)\n",
"\n",
"print('Finished Training')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"PATH = './cifar_net.pth'\n",
"torch.save(net.state_dict(), PATH)\n",
"tensorboard_writer.close()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print('Task ID number is: {}'.format(task.id))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.4"
}
},
"nbformat": 4,
"nbformat_minor": 4
}

View File

@@ -45,6 +45,9 @@ to resemble the content of the content-image and the artistic style of the style
# - ``copy`` (to deep copy the models; system package)
from __future__ import print_function
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
@@ -110,8 +113,8 @@ def image_loader(image_name):
return image.to(device, torch.float)
style_img = image_loader("./samples/picasso.jpg")
content_img = image_loader("./samples/dancing.jpg")
style_img = image_loader(os.path.join("..", "..", "reporting", "data_samples", "picasso.jpg"))
content_img = image_loader(os.path.join("..", "..", "reporting", "data_samples", "dancing.jpg"))
assert style_img.size() == content_img.size(), \
"we need to import style and content images of the same size"

View File

@@ -11,8 +11,7 @@ import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from trains import Task
task = Task.init(project_name='examples', task_name='pytorch mnist train')
from trains import Task, Logger
class Net(nn.Module):
@@ -44,12 +43,14 @@ def train(args, model, device, train_loader, optimizer, epoch):
loss.backward()
optimizer.step()
if batch_idx % args.log_interval == 0:
Logger.current_logger().report_scalar(
"train", "loss", iteration=(epoch * len(train_loader) + batch_idx), value=loss.item())
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.item()))
100. * batch_idx / len(train_loader), loss.item()))
def test(args, model, device, test_loader):
def test(args, model, device, test_loader, epoch):
model.eval()
test_loss = 0
correct = 0
@@ -63,12 +64,18 @@ def test(args, model, device, test_loader):
test_loss /= len(test_loader.dataset)
Logger.current_logger().report_scalar(
"test", "loss", iteration=epoch, value=test_loss)
Logger.current_logger().report_scalar(
"test", "accuracy", iteration=epoch, value=(correct / len(test_loader.dataset)))
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
test_loss, correct, len(test_loader.dataset),
100. * correct / len(test_loader.dataset)))
def main():
task = Task.init(project_name='examples', task_name='pytorch mnist train')
# Training settings
parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
parser.add_argument('--batch-size', type=int, default=64, metavar='N',
@@ -99,14 +106,14 @@ def main():
kwargs = {'num_workers': 4, 'pin_memory': True} if use_cuda else {}
train_loader = torch.utils.data.DataLoader(
datasets.MNIST('../data', train=True, download=True,
datasets.MNIST(os.path.join('..', 'data'), train=True, download=True,
transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])),
batch_size=args.batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
datasets.MNIST('../data', train=False, transform=transforms.Compose([
datasets.MNIST(os.path.join('..', 'data'), train=False, transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])),
@@ -117,7 +124,7 @@ def main():
for epoch in range(1, args.epochs + 1):
train(args, model, device, train_loader, optimizer, epoch)
test(args, model, device, test_loader)
test(args, model, device, test_loader, epoch)
if (args.save_model):
torch.save(model.state_dict(), os.path.join(gettempdir(), "mnist_cnn.pt"))

View File

@@ -66,7 +66,7 @@ def test(model, test_loader, args, optimizer, writer):
data, target = Variable(data), Variable(target)
output = model(data)
test_loss += F.nll_loss(output, target, reduction='sum').data.item() # sum up batch loss
pred = output.data.max(1)[1] # get the index of the max log-probability
pred = output.data.max(1)[1] # get the index of the max log-probability
pred = pred.eq(target.data).cpu().sum()
writer.add_scalar('Test/Loss', pred, niter)
correct += pred
@@ -99,7 +99,7 @@ def main():
parser.add_argument('--log-interval', type=int, default=10, metavar='N',
help='how many batches to wait before logging training status')
args = parser.parse_args()
task = Task.init(project_name='examples', task_name='pytorch with tensorboard', output_uri='/tmp/blah')
task = Task.init(project_name='examples', task_name='pytorch with tensorboard') # noqa: F841
writer = SummaryWriter('runs')
writer.add_text('TEXT', 'This is some text', 0)
args.cuda = not args.no_cuda and torch.cuda.is_available()

View File

@@ -0,0 +1 @@
../tensorboardx/pytorch_tensorboardX.py

View File

@@ -0,0 +1,6 @@
matplotlib
# tensorboardX
tensorboard>=1.14.0
torch>=1.1.0
torchvision>=0.3.0
trains

View File

@@ -1,3 +1,6 @@
import os
from tempfile import gettempdir
import numpy as np
from PIL import Image
from torch.utils.tensorboard import SummaryWriter
@@ -6,10 +9,10 @@ from trains import Task
task = Task.init(project_name='examples', task_name='pytorch tensorboard toy example')
writer = SummaryWriter(log_dir='/tmp/tensorboard_logs')
writer = SummaryWriter(log_dir=os.path.join(gettempdir(), 'tensorboard_logs'))
# convert to 4d [batch, col, row, RGB-channels]
image_open = Image.open('./samples/picasso.jpg')
image_open = Image.open(os.path.join("..", "..", "reporting", "data_samples", "picasso.jpg"))
image = np.asarray(image_open)
image_gray = image[:, :, 0][np.newaxis, :, :, np.newaxis]
image_rgba = np.concatenate((image, 255*np.atleast_3d(np.ones(shape=image.shape[:2], dtype=np.uint8))), axis=2)

View File

@@ -42,4 +42,4 @@ plt.ylim(yy.min(), yy.max())
plt.xticks(())
plt.yticks(())
plt.show()
plt.show()

View File

@@ -0,0 +1,3 @@
joblib>=0.13.2
scikit-learn
trains

View File

@@ -10,9 +10,9 @@ import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable
from tensorboardX import SummaryWriter
from torch.autograd import Variable
from torchvision import datasets, transforms
from trains import Task
@@ -65,7 +65,7 @@ def test(model, test_loader, args, optimizer, writer):
data, target = Variable(data), Variable(target)
output = model(data)
test_loss += F.nll_loss(output, target, reduction='sum').data.item() # sum up batch loss
pred = output.data.max(1)[1] # get the index of the max log-probability
pred = output.data.max(1)[1] # get the index of the max log-probability
pred = pred.eq(target.data).cpu().sum()
writer.add_scalar('Test/Loss', pred, niter)
correct += pred
@@ -100,7 +100,7 @@ def main():
args = parser.parse_args()
args.cuda = not args.no_cuda and torch.cuda.is_available()
task = Task.init(project_name='examples', task_name='pytorch with tensorboardX')
task = Task.init(project_name='examples', task_name='pytorch with tensorboardX') # noqa: F841
writer = SummaryWriter('runs')
writer.add_text('TEXT', 'This is some text', 0)

View File

@@ -0,0 +1,4 @@
tensorboardX>=1.8
torch>=1.1.0
torchvision>=0.3.0
trains

View File

@@ -0,0 +1,3 @@
trains
tensorboard>=1.14.0
tensorflow>=1.14.0

View File

@@ -1,5 +1,8 @@
# TRAINS - Example of tensorboard with tensorflow (without any actual training)
#
import os
from tempfile import gettempdir
import tensorflow as tf
import numpy as np
from PIL import Image
@@ -49,7 +52,7 @@ tf.summary.histogram("all_combined", all_combined)
tf.summary.text("this is a test", tf.make_tensor_proto("This is the content", dtype=tf.string))
# convert to 4d [batch, col, row, RGB-channels]
image_open = Image.open('./samples/picasso.jpg')
image_open = Image.open(os.path.join("..", "..", "..", "reporting", "data_samples", "picasso.jpg"))
image = np.asarray(image_open)
image_gray = image[:, :, 0][np.newaxis, :, :, np.newaxis]
image_rgba = np.concatenate((image, 255*np.atleast_3d(np.ones(shape=image.shape[:2], dtype=np.uint8))), axis=2)
@@ -68,7 +71,7 @@ logger = task.get_logger()
# Use original FileWriter for comparison , run:
# % tensorboard --logdir=/tmp/histogram_example
writer = tf.summary.FileWriter("/tmp/histogram_example")
writer = tf.summary.FileWriter(os.path.join(gettempdir(), "histogram_example"))
# Setup a loop and write the summaries to disk
N = 40

View File

@@ -0,0 +1,355 @@
# TRAINS - Example of tensorflow eager mode, model logging and tensorboard
#
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""A deep MNIST classifier using convolutional layers.
Sample usage:
python mnist.py --help
"""
from __future__ import absolute_import, division, print_function
import argparse
import os
import sys
import time
from tempfile import gettempdir
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
from trains import Task
tf.compat.v1.enable_eager_execution()
task = Task.init(project_name='examples', task_name='Tensorflow eager mode')
FLAGS = tf.app.flags.FLAGS
tf.app.flags.DEFINE_integer('data_num', 100, """Flag of type integer""")
tf.app.flags.DEFINE_string('img_path', './img', """Flag of type string""")
layers = tf.keras.layers
FLAGS = None
class Discriminator(tf.keras.Model):
"""
GAN Discriminator.
A network to differentiate between generated and real handwritten digits.
"""
def __init__(self, data_format):
"""Creates a model for discriminating between real and generated digits.
Args:
data_format: Either 'channels_first' or 'channels_last'.
'channels_first' is typically faster on GPUs while 'channels_last' is
typically faster on CPUs. See
https://www.tensorflow.org/performance/performance_guide#data_formats
"""
super(Discriminator, self).__init__(name='')
if data_format == 'channels_first':
self._input_shape = [-1, 1, 28, 28]
else:
assert data_format == 'channels_last'
self._input_shape = [-1, 28, 28, 1]
self.conv1 = layers.Conv2D(
64, 5, padding='SAME', data_format=data_format, activation=tf.tanh)
self.pool1 = layers.AveragePooling2D(2, 2, data_format=data_format)
self.conv2 = layers.Conv2D(
128, 5, data_format=data_format, activation=tf.tanh)
self.pool2 = layers.AveragePooling2D(2, 2, data_format=data_format)
self.flatten = layers.Flatten()
self.fc1 = layers.Dense(1024, activation=tf.tanh)
self.fc2 = layers.Dense(1, activation=None)
def call(self, inputs):
"""Return two logits per image estimating input authenticity.
Users should invoke __call__ to run the network, which delegates to this
method (and not call this method directly).
Args:
inputs: A batch of images as a Tensor with shape [batch_size, 28, 28, 1]
or [batch_size, 1, 28, 28]
Returns:
A Tensor with shape [batch_size] containing logits estimating
the probability that corresponding digit is real.
"""
x = tf.reshape(inputs, self._input_shape)
x = self.conv1(x)
x = self.pool1(x)
x = self.conv2(x)
x = self.pool2(x)
x = self.flatten(x)
x = self.fc1(x)
x = self.fc2(x)
return x
class Generator(tf.keras.Model):
"""
Generator of handwritten digits similar to the ones in the MNIST dataset.
"""
def __init__(self, data_format):
"""Creates a model for discriminating between real and generated digits.
Args:
data_format: Either 'channels_first' or 'channels_last'.
'channels_first' is typically faster on GPUs while 'channels_last' is
typically faster on CPUs. See
https://www.tensorflow.org/performance/performance_guide#data_formats
"""
super(Generator, self).__init__(name='')
self.data_format = data_format
# We are using 128 6x6 channels as input to the first deconvolution layer
if data_format == 'channels_first':
self._pre_conv_shape = [-1, 128, 6, 6]
else:
assert data_format == 'channels_last'
self._pre_conv_shape = [-1, 6, 6, 128]
self.fc1 = layers.Dense(6 * 6 * 128, activation=tf.tanh)
# In call(), we reshape the output of fc1 to _pre_conv_shape
# Deconvolution layer. Resulting image shape: (batch, 14, 14, 64)
self.conv1 = layers.Conv2DTranspose(
64, 4, strides=2, activation=None, data_format=data_format)
# Deconvolution layer. Resulting image shape: (batch, 28, 28, 1)
self.conv2 = layers.Conv2DTranspose(
1, 2, strides=2, activation=tf.nn.sigmoid, data_format=data_format)
def call(self, inputs):
"""Return a batch of generated images.
Users should invoke __call__ to run the network, which delegates to this
method (and not call this method directly).
Args:
inputs: A batch of noise vectors as a Tensor with shape
[batch_size, length of noise vectors].
Returns:
A Tensor containing generated images. If data_format is 'channels_last',
the shape of returned images is [batch_size, 28, 28, 1], else
[batch_size, 1, 28, 28]
"""
x = self.fc1(inputs)
x = tf.reshape(x, shape=self._pre_conv_shape)
x = self.conv1(x)
x = self.conv2(x)
return x
def discriminator_loss(discriminator_real_outputs, discriminator_gen_outputs):
"""
Original discriminator loss for GANs, with label smoothing.
See `Generative Adversarial Nets` (https://arxiv.org/abs/1406.2661) for more
details.
Args:
discriminator_real_outputs: Discriminator output on real data.
discriminator_gen_outputs: Discriminator output on generated data. Expected
to be in the range of (-inf, inf).
Returns:
A scalar loss Tensor.
"""
loss_on_real = tf.compat.v1.losses.sigmoid_cross_entropy(
tf.ones_like(discriminator_real_outputs),
discriminator_real_outputs,
label_smoothing=0.25)
loss_on_generated = tf.compat.v1.losses.sigmoid_cross_entropy(
tf.zeros_like(discriminator_gen_outputs), discriminator_gen_outputs)
loss = loss_on_real + loss_on_generated
tf.contrib.summary.scalar('discriminator_loss', loss)
return loss
def generator_loss(discriminator_gen_outputs):
"""
Original generator loss for GANs.
L = -log(sigmoid(D(G(z))))
See `Generative Adversarial Nets` (https://arxiv.org/abs/1406.2661)
for more details.
Args:
discriminator_gen_outputs: Discriminator output on generated data. Expected
to be in the range of (-inf, inf).
Returns:
A scalar loss Tensor.
"""
loss = tf.compat.v1.losses.sigmoid_cross_entropy(
tf.ones_like(discriminator_gen_outputs), discriminator_gen_outputs)
tf.contrib.summary.scalar('generator_loss', loss)
return loss
def train_one_epoch(generator, discriminator, generator_optimizer,
discriminator_optimizer, dataset, step_counter,
log_interval, noise_dim):
"""
Train `generator` and `discriminator` models on `dataset`.
Args:
generator: Generator model.
discriminator: Discriminator model.
generator_optimizer: Optimizer to use for generator.
discriminator_optimizer: Optimizer to use for discriminator.
dataset: Dataset of images to train on.
step_counter: An integer variable, used to write summaries regularly.
log_interval: How many steps to wait between logging and collecting
summaries.
noise_dim: Dimension of noise vector to use.
"""
total_generator_loss = 0.0
total_discriminator_loss = 0.0
for (batch_index, images) in enumerate(dataset):
with tf.device('/cpu:0'):
tf.compat.v1.assign_add(step_counter, 1)
with tf.contrib.summary.record_summaries_every_n_global_steps(
log_interval, global_step=step_counter):
current_batch_size = images.shape[0]
noise = tf.random.uniform(
shape=[current_batch_size, noise_dim],
minval=-1.,
maxval=1.,
seed=batch_index)
# we can use 2 tapes or a single persistent tape.
# Using two tapes is memory efficient since intermediate tensors can be
# released between the two .gradient() calls below
with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape:
generated_images = generator(noise)
tf.contrib.summary.image(
'generated_images',
tf.reshape(generated_images, [-1, 28, 28, 1]),
max_images=10)
discriminator_gen_outputs = discriminator(generated_images)
discriminator_real_outputs = discriminator(images)
discriminator_loss_val = discriminator_loss(discriminator_real_outputs,
discriminator_gen_outputs)
total_discriminator_loss += discriminator_loss_val
generator_loss_val = generator_loss(discriminator_gen_outputs)
total_generator_loss += generator_loss_val
generator_grad = gen_tape.gradient(generator_loss_val,
generator.variables)
discriminator_grad = disc_tape.gradient(discriminator_loss_val,
discriminator.variables)
generator_optimizer.apply_gradients(
zip(generator_grad, generator.variables))
discriminator_optimizer.apply_gradients(
zip(discriminator_grad, discriminator.variables))
if log_interval and batch_index > 0 and batch_index % log_interval == 0:
print('Batch #%d\tAverage Generator Loss: %.6f\tAverage Discriminator Loss: %.6f' %
(batch_index, total_generator_loss / batch_index, total_discriminator_loss / batch_index))
def main(_):
(device, data_format) = ('/gpu:0', 'channels_first')
if FLAGS.no_gpu or tf.contrib.eager.num_gpus() <= 0:
(device, data_format) = ('/cpu:0', 'channels_last')
print('Using device %s, and data format %s.' % (device, data_format))
# Load the datasets
data = input_data.read_data_sets(FLAGS.data_dir)
dataset = (
tf.data.Dataset.from_tensor_slices(data.train.images[:1280]).shuffle(60000).batch(FLAGS.batch_size))
# Create the models and optimizers.
model_objects = {
'generator': Generator(data_format),
'discriminator': Discriminator(data_format),
'generator_optimizer': tf.compat.v1.train.AdamOptimizer(FLAGS.lr),
'discriminator_optimizer': tf.compat.v1.train.AdamOptimizer(FLAGS.lr),
'step_counter': tf.compat.v1.train.get_or_create_global_step(),
}
# Prepare summary writer and checkpoint info
summary_writer = tf.contrib.summary.create_file_writer(
FLAGS.output_dir, flush_millis=1000)
checkpoint_prefix = os.path.join(FLAGS.checkpoint_dir, 'ckpt')
latest_cpkt = tf.train.latest_checkpoint(FLAGS.checkpoint_dir)
if latest_cpkt:
print('Using latest checkpoint at ' + latest_cpkt)
checkpoint = tf.train.Checkpoint(**model_objects)
# Restore variables on creation if a checkpoint exists.
checkpoint.restore(latest_cpkt)
with tf.device(device):
for _ in range(3):
start = time.time()
with summary_writer.as_default():
train_one_epoch(dataset=dataset, log_interval=FLAGS.log_interval,
noise_dim=FLAGS.noise, **model_objects)
end = time.time()
checkpoint.save(checkpoint_prefix)
print('\nTrain time for epoch #%d (step %d): %f' %
(checkpoint.save_counter.numpy(), checkpoint.step_counter.numpy(), end - start))
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument(
'--data-dir',
type=str,
default=os.path.join(gettempdir(), 'tensorflow', 'mnist', 'input_data'),
help='Directory for storing input data (default /tmp/tensorflow/mnist/input_data)')
parser.add_argument(
'--batch-size',
type=int,
default=16,
metavar='N',
help='input batch size for training (default: 128)')
parser.add_argument(
'--log-interval',
type=int,
default=1,
metavar='N',
help='number of batches between logging and writing summaries (default: 100)')
parser.add_argument(
'--output_dir',
type=str,
default=os.path.join(gettempdir(), 'tensorflow'),
metavar='DIR',
help='Directory to write TensorBoard summaries (defaults to none)')
parser.add_argument(
'--checkpoint_dir',
type=str,
default=os.path.join(gettempdir(), 'tensorflow', 'mnist', 'checkpoints'),
metavar='DIR',
help='Directory to save checkpoints in (once per epoch) (default /tmp/tensorflow/mnist/checkpoints/)')
parser.add_argument(
'--lr',
type=float,
default=0.001,
metavar='LR',
help='learning rate (default: 0.001)')
parser.add_argument(
'--noise',
type=int,
default=100,
metavar='N',
help='Length of noise vector for generator input (default: 100)')
parser.add_argument(
'--no-gpu',
action='store_true',
default=False,
help='disables GPU usage even if a GPU is available')
FLAGS, unparsed = parser.parse_known_args()
tf.compat.v1.app.run(main=main, argv=[sys.argv[0]] + unparsed)

View File

@@ -0,0 +1,42 @@
# TRAINS - Example of manual model configuration and uploading
#
import os
import tempfile
import tensorflow as tf
from trains import Task
task = Task.init(project_name='examples', task_name='Model configuration and upload')
model = tf.Module()
# Connect a local configuration file
config_file = os.path.join('..', '..', 'reporting', 'data_samples', 'sample.json')
config_file = task.connect_configuration(config_file)
# then read configuration as usual, the backend will contain a copy of it.
# later when executing remotely, the returned `config_file` will be a temporary file
# containing a new copy of the configuration retrieved form the backend
# # model_config_dict = json.load(open(config_file, 'rt'))
# Or Store dictionary of definition for a specific network design
model_config_dict = {
'value': 13.37,
'dict': {'sub_value': 'string', 'sub_integer': 11},
'list_of_ints': [1, 2, 3, 4],
}
model_config_dict = task.connect_configuration(model_config_dict)
# We now update the dictionary after connecting it, and the changes will be tracked as well.
model_config_dict['new value'] = 10
model_config_dict['value'] *= model_config_dict['new value']
# store the label enumeration of the training model
labels = {'background': 0, 'cat': 1, 'dog': 2}
task.connect_label_enumeration(labels)
# storing the model, it will have the task network configuration and label enumeration
print('Any model stored from this point onwards, will contain both model_config and label_enumeration')
tempdir = tempfile.mkdtemp()
tf.saved_model.save(model, os.path.join(tempdir, "model"))
print('Model saved')

View File

@@ -0,0 +1,3 @@
tensorboard>=2.0
tensorflow>=2.0
trains

View File

@@ -0,0 +1,279 @@
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Create sample PR curve summary data.
We have 3 classes: R, G, and B. We generate colors within RGB space from 3
normal distributions (1 at each corner of the color triangle: [255, 0, 0],
[0, 255, 0], and [0, 0, 255]).
The true label of each random color is associated with the normal distribution
that generated it.
Using 3 other normal distributions (over the distance each color is from a
corner of the color triangle - RGB), we then compute the probability that each
color belongs to the class. We use those probabilities to generate PR curves.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os.path
from tempfile import gettempdir
from absl import app
from absl import flags
from six.moves import xrange # pylint: disable=redefined-builtin
import tensorflow as tf
from tensorboard.plugins.pr_curve import summary
from trains import Task
task = Task.init(project_name='examples', task_name='tensorboard pr_curve')
tf.compat.v1.disable_v2_behavior()
FLAGS = flags.FLAGS
flags.DEFINE_string(
"logdir",
os.path.join(gettempdir(), "pr_curve_demo"),
"Directory into which to write TensorBoard data.",
)
flags.DEFINE_integer(
"steps", 10, "Number of steps to generate for each PR curve."
)
def start_runs(
logdir, steps, run_name, thresholds, mask_every_other_prediction=False
):
"""Generate a PR curve with precision and recall evenly weighted.
Arguments:
logdir: The directory into which to store all the runs' data.
steps: The number of steps to run for.
run_name: The name of the run.
thresholds: The number of thresholds to use for PR curves.
mask_every_other_prediction: Whether to mask every other prediction by
alternating weights between 0 and 1.
"""
tf.compat.v1.reset_default_graph()
tf.compat.v1.set_random_seed(42)
# Create a normal distribution layer used to generate true color labels.
distribution = tf.compat.v1.distributions.Normal(loc=0.0, scale=142.0)
# Sample the distribution to generate colors. Lets generate different numbers
# of each color. The first dimension is the count of examples.
# The calls to sample() are given fixed random seed values that are "magic"
# in that they correspond to the default seeds for those ops when the PR
# curve test (which depends on this code) was written. We've pinned these
# instead of continuing to use the defaults since the defaults are based on
# node IDs from the sequence of nodes added to the graph, which can silently
# change when this code or any TF op implementations it uses are modified.
# TODO(nickfelt): redo the PR curve test to avoid reliance on random seeds.
# Generate reds.
number_of_reds = 100
true_reds = tf.clip_by_value(
tf.concat(
[
255 - tf.abs(distribution.sample([number_of_reds, 1], seed=11)),
tf.abs(distribution.sample([number_of_reds, 2], seed=34)),
],
axis=1,
),
0,
255,
)
# Generate greens.
number_of_greens = 200
true_greens = tf.clip_by_value(
tf.concat(
[
tf.abs(distribution.sample([number_of_greens, 1], seed=61)),
255
- tf.abs(distribution.sample([number_of_greens, 1], seed=82)),
tf.abs(distribution.sample([number_of_greens, 1], seed=105)),
],
axis=1,
),
0,
255,
)
# Generate blues.
number_of_blues = 150
true_blues = tf.clip_by_value(
tf.concat(
[
tf.abs(distribution.sample([number_of_blues, 2], seed=132)),
255
- tf.abs(distribution.sample([number_of_blues, 1], seed=153)),
],
axis=1,
),
0,
255,
)
# Assign each color a vector of 3 booleans based on its true label.
labels = tf.concat(
[
tf.tile(tf.constant([[True, False, False]]), (number_of_reds, 1)),
tf.tile(tf.constant([[False, True, False]]), (number_of_greens, 1)),
tf.tile(tf.constant([[False, False, True]]), (number_of_blues, 1)),
],
axis=0,
)
# We introduce 3 normal distributions. They are used to predict whether a
# color falls under a certain class (based on distances from corners of the
# color triangle). The distributions vary per color. We have the distributions
# narrow over time.
initial_standard_deviations = [v + FLAGS.steps for v in (158, 200, 242)]
iteration = tf.compat.v1.placeholder(tf.int32, shape=[])
red_predictor = tf.compat.v1.distributions.Normal(
loc=0.0,
scale=tf.cast(
initial_standard_deviations[0] - iteration, dtype=tf.float32
),
)
green_predictor = tf.compat.v1.distributions.Normal(
loc=0.0,
scale=tf.cast(
initial_standard_deviations[1] - iteration, dtype=tf.float32
),
)
blue_predictor = tf.compat.v1.distributions.Normal(
loc=0.0,
scale=tf.cast(
initial_standard_deviations[2] - iteration, dtype=tf.float32
),
)
# Make predictions (assign 3 probabilities to each color based on each color's
# distance to each of the 3 corners). We seek double the area in the right
# tail of the normal distribution.
examples = tf.concat([true_reds, true_greens, true_blues], axis=0)
probabilities_colors_are_red = (
1
- red_predictor.cdf(
tf.norm(tensor=examples - tf.constant([255.0, 0, 0]), axis=1)
)
) * 2
probabilities_colors_are_green = (
1
- green_predictor.cdf(
tf.norm(tensor=examples - tf.constant([0, 255.0, 0]), axis=1)
)
) * 2
probabilities_colors_are_blue = (
1
- blue_predictor.cdf(
tf.norm(tensor=examples - tf.constant([0, 0, 255.0]), axis=1)
)
) * 2
predictions = (
probabilities_colors_are_red,
probabilities_colors_are_green,
probabilities_colors_are_blue,
)
# This is the crucial piece. We write data required for generating PR curves.
# We create 1 summary per class because we create 1 PR curve per class.
for i, color in enumerate(("red", "green", "blue")):
description = (
"The probabilities used to create this PR curve are "
"generated from a normal distribution. Its standard "
"deviation is initially %0.0f and decreases over time."
% initial_standard_deviations[i]
)
weights = None
if mask_every_other_prediction:
# Assign a weight of 0 to every even-indexed prediction. Odd-indexed
# predictions are assigned a default weight of 1.
consecutive_indices = tf.reshape(
tf.range(tf.size(input=predictions[i])),
tf.shape(input=predictions[i]),
)
weights = tf.cast(consecutive_indices % 2, dtype=tf.float32)
summary.op(
name=color,
labels=labels[:, i],
predictions=predictions[i],
num_thresholds=thresholds,
weights=weights,
display_name="classifying %s" % color,
description=description,
)
merged_summary_op = tf.compat.v1.summary.merge_all()
events_directory = os.path.join(logdir, run_name)
sess = tf.compat.v1.Session()
writer = tf.compat.v1.summary.FileWriter(events_directory, sess.graph)
for step in xrange(steps):
feed_dict = {
iteration: step,
}
merged_summary = sess.run(merged_summary_op, feed_dict=feed_dict)
writer.add_summary(merged_summary, step)
writer.close()
def run_all(logdir, steps, thresholds, verbose=False):
"""Generate PR curve summaries.
Arguments:
logdir: The directory into which to store all the runs' data.
steps: The number of steps to run for.
verbose: Whether to print the names of runs into stdout during execution.
thresholds: The number of thresholds to use for PR curves.
"""
# First, we generate data for a PR curve that assigns even weights for
# predictions of all classes.
run_name = "colors"
if verbose:
print("--- Running: %s" % run_name)
start_runs(
logdir=logdir, steps=steps, run_name=run_name, thresholds=thresholds
)
# Next, we generate data for a PR curve that assigns arbitrary weights to
# predictions.
run_name = "mask_every_other_prediction"
if verbose:
print("--- Running: %s" % run_name)
start_runs(
logdir=logdir,
steps=steps,
run_name=run_name,
thresholds=thresholds,
mask_every_other_prediction=True,
)
def main(unused_argv):
print("Saving output to %s." % FLAGS.logdir)
run_all(FLAGS.logdir, FLAGS.steps, 50, verbose=True)
print("Done. Output saved to %s." % FLAGS.logdir)
if __name__ == "__main__":
app.run(main)

View File

@@ -0,0 +1,76 @@
# TRAINS - Example of tensorboard with tensorflow (without any actual training)
#
import os
import tensorflow as tf
import numpy as np
from tempfile import gettempdir
from PIL import Image
from trains import Task
def generate_summary(k, step):
# Make a normal distribution, with a shifting mean
mean_moving_normal = tf.random.normal(shape=[1000], mean=(5 * k), stddev=1)
# Record that distribution into a histogram summary
tf.summary.histogram("normal/moving_mean", mean_moving_normal, step=step)
tf.summary.scalar("normal/value", mean_moving_normal[-1], step=step)
# Make a normal distribution with shrinking variance
variance_shrinking_normal = tf.random.normal(shape=[1000], mean=0, stddev=1-k)
# Record that distribution too
tf.summary.histogram("normal/shrinking_variance", variance_shrinking_normal, step=step)
tf.summary.scalar("normal/variance_shrinking_normal", variance_shrinking_normal[-1], step=step)
# Let's combine both of those distributions into one dataset
normal_combined = tf.concat([mean_moving_normal, variance_shrinking_normal], 0)
# We add another histogram summary to record the combined distribution
tf.summary.histogram("normal/bimodal", normal_combined, step=step)
tf.summary.scalar("normal/normal_combined", normal_combined[0], step=step)
# Add a gamma distribution
gamma = tf.random.gamma(shape=[1000], alpha=k)
tf.summary.histogram("gamma", gamma, step=step)
# And a poisson distribution
poisson = tf.random.poisson(shape=[1000], lam=k)
tf.summary.histogram("poisson", poisson, step=step)
# And a uniform distribution
uniform = tf.random.uniform(shape=[1000], maxval=k*10)
tf.summary.histogram("uniform", uniform, step=step)
# Finally, combine everything together!
all_distributions = [mean_moving_normal, variance_shrinking_normal, gamma, poisson, uniform]
all_combined = tf.concat(all_distributions, 0)
tf.summary.histogram("all_combined", all_combined, step=step)
# Log text value
tf.summary.text("this is a test", "This is the content", step=step)
# convert to 4d [batch, col, row, RGB-channels]
image_open = Image.open(os.path.join('..', '..', 'reporting', 'data_samples', 'picasso.jpg'))
image = np.asarray(image_open)
image_gray = image[:, :, 0][np.newaxis, :, :, np.newaxis]
image_rgba = np.concatenate((image, 255*np.atleast_3d(np.ones(shape=image.shape[:2], dtype=np.uint8))), axis=2)
image_rgba = image_rgba[np.newaxis, :, :, :]
image = image[np.newaxis, :, :, :]
tf.summary.image("test", image, max_outputs=10, step=step)
tf.summary.image("test_gray", image_gray, max_outputs=10, step=step)
tf.summary.image("test_rgba", image_rgba, max_outputs=10, step=step)
task = Task.init(project_name='examples', task_name='tensorboard toy example')
# create the tensorboard file writer in a temp folder
writer = tf.summary.create_file_writer(os.path.join(gettempdir(), "toy_tb_example"))
# Setup a loop and write the summaries to disk
N = 40
for step in range(N):
k_val = step/float(N)
with writer.as_default():
generate_summary(k_val, tf.cast(step, tf.int64))
print('Tensorboard toy example done')

View File

@@ -1,5 +1,8 @@
from __future__ import absolute_import, division, print_function, unicode_literals
import os
from tempfile import gettempdir
import tensorflow as tf
from tensorflow.keras.layers import Dense, Flatten, Conv2D
@@ -58,6 +61,7 @@ train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy
test_loss = tf.keras.metrics.Mean(name='test_loss', dtype=tf.float32)
test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')
# Use tf.GradientTape to train the model
@tf.function
def train_step(images, labels):
@@ -82,14 +86,14 @@ def test_step(images, labels):
# Set up summary writers to write the summaries to disk in a different logs directory
train_log_dir = '/tmp/logs/gradient_tape/train'
test_log_dir = '/tmp/logs/gradient_tape/test'
train_log_dir = os.path.join(gettempdir(), 'logs', 'gradient_tape', 'train')
test_log_dir = os.path.join(gettempdir(), 'logs', 'gradient_tape', 'test')
train_summary_writer = tf.summary.create_file_writer(train_log_dir)
test_summary_writer = tf.summary.create_file_writer(test_log_dir)
# Set up checkpoints manager
ckpt = tf.train.Checkpoint(step=tf.Variable(1), optimizer=optimizer, net=model)
manager = tf.train.CheckpointManager(ckpt, '/tmp/tf_ckpts', max_to_keep=3)
manager = tf.train.CheckpointManager(ckpt, os.path.join(gettempdir(), 'tf_ckpts'), max_to_keep=3)
ckpt.restore(manager.latest_checkpoint)
if manager.latest_checkpoint:
print("Restored from {}".format(manager.latest_checkpoint))

View File

@@ -0,0 +1,6 @@
sklearn
trains
xgboost>=0.90 ; python_version >= '3'
xgboost>=0.82 ; python_version < '3'
# sudo apt-get install graphviz
graphviz>=0.8

View File

@@ -22,11 +22,12 @@ param = {
'num_class': 3} # the number of classes that exist in this datset
num_round = 20 # the number of training iterations
# noinspection PyBroadException
try:
# try to load a model
bst = xgb.Booster(params=param, model_file='xgb.01.model')
bst.load_model('xgb.01.model')
except:
except Exception:
bst = None
# if we dont have one train a model

File diff suppressed because one or more lines are too long

View File

@@ -1,89 +0,0 @@
# TRAINS - Example of manual graphs and statistics reporting
#
import os
from PIL import Image
import numpy as np
import logging
from trains import Task
task = Task.init(project_name="examples", task_name="Manual reporting")
# standard python logging
logging.info("This is an info message")
# this is loguru test example
try:
from loguru import logger
logger.info("That's it, beautiful and simple logging! (using ANSI colors)")
except ImportError:
pass
# get TRAINS logger object for any metrics / reports
logger = Task.current_task().get_logger()
# log text
logger.report_text("hello")
# report scalar values
logger.report_scalar("example_scalar", "series A", iteration=0, value=100)
logger.report_scalar("example_scalar", "series A", iteration=1, value=200)
# report histogram
histogram = np.random.randint(10, size=10)
logger.report_histogram("example_histogram", "random histogram", iteration=1, values=histogram,
xaxis="title x", yaxis="title y")
# report confusion matrix
confusion = np.random.randint(10, size=(10, 10))
logger.report_matrix("example_confusion", "ignored", iteration=1, matrix=confusion, xaxis="title X", yaxis="title Y")
# report 3d surface
logger.report_surface("example_surface", "series1", iteration=1, matrix=confusion,
xaxis="title X", yaxis="title Y", zaxis="title Z")
# report 2d scatter plot
scatter2d = np.hstack((np.atleast_2d(np.arange(0, 10)).T, np.random.randint(10, size=(10, 1))))
logger.report_scatter2d("example_scatter", "series_xy", iteration=1, scatter=scatter2d,
xaxis="title x", yaxis="title y")
# report 3d scatter plot
scatter3d = np.random.randint(10, size=(10, 3))
logger.report_scatter3d("example_scatter_3d", "series_xyz", iteration=1, scatter=scatter3d,
xaxis="title x", yaxis="title y", zaxis="title z")
# reporting images
m = np.eye(256, 256, dtype=np.float)
logger.report_image("test case", "image float", iteration=1, image=m)
m = np.eye(256, 256, dtype=np.uint8)*255
logger.report_image("test case", "image uint8", iteration=1, image=m)
m = np.concatenate((np.atleast_3d(m), np.zeros((256, 256, 2), dtype=np.uint8)), axis=2)
logger.report_image("test case", "image color red", iteration=1, image=m)
image_open = Image.open(os.path.join("samples", "picasso.jpg"))
logger.report_image("test case", "image PIL", iteration=1, image=image_open)
# reporting tables
try:
import pandas as pd
# Report table - DataFrame with index
df = pd.DataFrame(
{
'num_legs': [2, 4, 8, 0],
'num_wings': [2, 0, 0, 0],
'num_specimen_seen': [10, 2, 1, 8]
},
index=['falcon', 'dog', 'spider', 'fish']
)
df.index.name = 'id'
logger.report_table("test table pd", "PD with index", 1, table_plot=df)
# Report table - CSV from path
csv_url = "https://raw.githubusercontent.com/plotly/datasets/master/Mining-BTC-180.csv"
logger.report_table("test table csv", "remote csv", 1, url=csv_url)
except ImportError:
pass
# flush reports (otherwise it will be flushed in the background, every couple of seconds)
logger.flush()

View File

@@ -0,0 +1,63 @@
# TRAINS - Example of manual graphs and statistics reporting
#
import numpy as np
from trains import Task, Logger
def report_plots(logger, iteration=0):
# type: (Logger, int) -> ()
"""
reporting plots to plots section
:param logger: The task.logger to use for sending the plots
:param iteration: The iteration number of the current reports
"""
# report 3d surface
surface = np.random.randint(10, size=(10, 10))
logger.report_surface(
"example_surface",
"series1",
iteration=iteration,
matrix=surface,
xaxis="title X",
yaxis="title Y",
zaxis="title Z",
)
# report 3d scatter plot
scatter3d = np.random.randint(10, size=(10, 3))
logger.report_scatter3d(
"example_scatter_3d",
"series_xyz",
iteration=iteration,
scatter=scatter3d,
xaxis="title x",
yaxis="title y",
zaxis="title z",
)
def main():
# Create the experiment Task
task = Task.init(project_name="examples", task_name="3D plot reporting")
print('reporting 3D plot graphs')
# Get the task logger,
# You can also call Task.current_task().get_logger() from anywhere in your code.
logger = task.get_logger()
# report graphs
report_plots(logger)
# force flush reports
# If flush is not called, reports are flushed in the background every couple of seconds,
# and at the end of the process execution
logger.flush()
print('We are done reporting, have a great day :)')
if __name__ == "__main__":
main()

View File

@@ -1,3 +1,4 @@
import os
from time import sleep
import pandas as pd
@@ -5,12 +6,16 @@ import numpy as np
from PIL import Image
from trains import Task
task = Task.init('examples', 'artifacts toy')
task = Task.init('examples', 'artifacts example')
df = pd.DataFrame({'num_legs': [2, 4, 8, 0],
'num_wings': [2, 0, 0, 0],
'num_specimen_seen': [10, 2, 1, 8]},
index=['falcon', 'dog', 'spider', 'fish'])
df = pd.DataFrame(
{
'num_legs': [2, 4, 8, 0],
'num_wings': [2, 0, 0, 0],
'num_specimen_seen': [10, 2, 1, 8]
},
index=['falcon', 'dog', 'spider', 'fish']
)
# Register Pandas object as artifact to watch
# (it will be monitored in the background and automatically synced and uploaded)
@@ -23,20 +28,20 @@ Task.current_task().get_registered_artifacts()['train'].sample(frac=0.5, replace
# add and upload pandas.DataFrame (onetime snapshot of the object)
task.upload_artifact('Pandas', artifact_object=df)
# add and upload local file artifact
task.upload_artifact('local file', artifact_object='samples/dancing.jpg')
task.upload_artifact('local file', artifact_object=os.path.join('data_samples', 'dancing.jpg'))
# add and upload dictionary stored as JSON)
task.upload_artifact('dictionary', df.to_dict())
# add and upload Numpy Object (stored as .npz file)
task.upload_artifact('Numpy Eye', np.eye(100, 100))
# add and upload Image (stored as .png file)
im = Image.open('samples/dancing.jpg')
im = Image.open(os.path.join('data_samples', 'dancing.jpg'))
task.upload_artifact('pillow_image', im)
# add and upload a folder, artifact_object should be the folder path
task.upload_artifact('local folder', artifact_object='samples/')
task.upload_artifact('local folder', artifact_object=os.path.join('data_samples'))
# add and upload a wildcard
task.upload_artifact('local folder', artifact_object='samples/*.jpg')
task.upload_artifact('wildcard jpegs', artifact_object=os.path.join('data_samples', '*.jpg'))
# do something
# do something here
sleep(1.)
print(df)

View File

Before

Width:  |  Height:  |  Size: 40 KiB

After

Width:  |  Height:  |  Size: 40 KiB

View File

Before

Width:  |  Height:  |  Size: 112 KiB

After

Width:  |  Height:  |  Size: 112 KiB

View File

@@ -0,0 +1,245 @@
# TRAINS - Example of manual graphs and statistics reporting
#
import math
import numpy as np
from bokeh.models import ColumnDataSource, GraphRenderer, Oval, StaticLayoutProvider
from bokeh.palettes import Spectral5, Spectral8
from bokeh.plotting import figure, output_file, save
from bokeh.sampledata.autompg import autompg_clean as bokeh_df
from bokeh.sampledata.periodic_table import elements
from bokeh.transform import dodge, factor_cmap
from trains import Task, Logger
def report_html_url(logger, iteration=0):
# type: (Logger, int) -> ()
"""
reporting html from url to debug samples section
:param logger: The task.logger to use for sending the plots
:param iteration: The iteration number of the current reports
"""
logger.report_media("html", "url_html", iteration=iteration, url="https://allegro.ai/docs/index.html")
def report_html_periodic_table(logger, iteration=0):
# type: (Logger, int) -> ()
"""
reporting interactive (html) of periodic table to debug samples section
:param logger: The task.logger to use for sending the plots
:param iteration: The iteration number of the current reports
"""
output_file("periodic.html")
periods = ["I", "II", "III", "IV", "V", "VI", "VII"]
groups = [str(x) for x in range(1, 19)]
autompg_clean = elements.copy()
autompg_clean["atomic mass"] = autompg_clean["atomic mass"].astype(str)
autompg_clean["group"] = autompg_clean["group"].astype(str)
autompg_clean["period"] = [periods[x - 1] for x in autompg_clean.period]
autompg_clean = autompg_clean[autompg_clean.group != "-"]
autompg_clean = autompg_clean[autompg_clean.symbol != "Lr"]
autompg_clean = autompg_clean[autompg_clean.symbol != "Lu"]
cmap = {
"alkali metal": "#a6cee3",
"alkaline earth metal": "#1f78b4",
"metal": "#d93b43",
"halogen": "#999d9a",
"metalloid": "#e08d49",
"noble gas": "#eaeaea",
"nonmetal": "#f1d4Af",
"transition metal": "#599d7A",
}
source = ColumnDataSource(autompg_clean)
p = figure(
plot_width=900,
plot_height=500,
title="Periodic Table (omitting LA and AC Series)",
x_range=groups,
y_range=list(reversed(periods)),
toolbar_location=None,
tools="hover",
)
p.rect(
"group",
"period",
0.95,
0.95,
source=source,
fill_alpha=0.6,
legend_label="metal",
color=factor_cmap(
"metal", palette=list(cmap.values()), factors=list(cmap.keys())
),
)
text_props = {"source": source, "text_align": "left", "text_baseline": "middle"}
x = dodge("group", -0.4, range=p.x_range)
r = p.text(x=x, y="period", text="symbol", **text_props)
r.glyph.text_font_style = "bold"
r = p.text(
x=x, y=dodge("period", 0.3, range=p.y_range), text="atomic number", **text_props
)
r.glyph.text_font_size = "8pt"
r = p.text(
x=x, y=dodge("period", -0.35, range=p.y_range), text="name", **text_props
)
r.glyph.text_font_size = "5pt"
r = p.text(
x=x, y=dodge("period", -0.2, range=p.y_range), text="atomic mass", **text_props
)
r.glyph.text_font_size = "5pt"
p.text(
x=["3", "3"],
y=["VI", "VII"],
text=["LA", "AC"],
text_align="center",
text_baseline="middle",
)
p.hover.tooltips = [
("Name", "@name"),
("Atomic number", "@{atomic number}"),
("Atomic mass", "@{atomic mass}"),
("Type", "@metal"),
("CPK color", "$color[hex, swatch]:CPK"),
("Electronic configuration", "@{electronic configuration}"),
]
p.outline_line_color = None
p.grid.grid_line_color = None
p.axis.axis_line_color = None
p.axis.major_tick_line_color = None
p.axis.major_label_standoff = 0
p.legend.orientation = "horizontal"
p.legend.location = "top_center"
save(p)
logger.report_media("html", "periodic_html", iteration=iteration, local_path="periodic.html")
def report_html_groupby(logger, iteration=0):
# type: (Logger, int) -> ()
"""
reporting bokeh groupby (html) to debug samples section
:param logger: The task.logger to use for sending the plots
:param iteration: The iteration number of the current reports
"""
output_file("bar_pandas_groupby_nested.html")
bokeh_df.cyl = bokeh_df.cyl.astype(str)
bokeh_df.yr = bokeh_df.yr.astype(str)
group = bokeh_df.groupby(by=["cyl", "mfr"])
index_cmap = factor_cmap(
"cyl_mfr", palette=Spectral5, factors=sorted(bokeh_df.cyl.unique()), end=1
)
p = figure(
plot_width=800,
plot_height=300,
title="Mean MPG by # Cylinders and Manufacturer",
x_range=group,
toolbar_location=None,
tooltips=[("MPG", "@mpg_mean"), ("Cyl, Mfr", "@cyl_mfr")],
)
p.vbar(
x="cyl_mfr",
top="mpg_mean",
width=1,
source=group,
line_color="white",
fill_color=index_cmap,
)
p.y_range.start = 0
p.x_range.range_padding = 0.05
p.xgrid.grid_line_color = None
p.xaxis.axis_label = "Manufacturer grouped by # Cylinders"
p.xaxis.major_label_orientation = 1.2
p.outline_line_color = None
save(p)
logger.report_media(
"html",
"pandas_groupby_nested_html",
iteration=iteration,
local_path="bar_pandas_groupby_nested.html",
)
def report_html_graph(logger, iteration=0):
# type: (Logger, int) -> ()
"""
reporting bokeh graph (html) to debug samples section
:param logger: The task.logger to use for sending the plots
:param iteration: The iteration number of the current reports
"""
nodes = 8
node_indices = list(range(nodes))
plot = figure(
title="Graph Layout Demonstration",
x_range=(-1.1, 1.1),
y_range=(-1.1, 1.1),
tools="",
toolbar_location=None,
)
graph = GraphRenderer()
graph.node_renderer.data_source.add(node_indices, "index")
graph.node_renderer.data_source.add(Spectral8, "color")
graph.node_renderer.glyph = Oval(height=0.1, width=0.2, fill_color="color")
graph.edge_renderer.data_source.data = dict(start=[0] * nodes, end=node_indices)
# start of layout code
circ = [i * 2 * math.pi / 8 for i in node_indices]
x = [math.cos(i) for i in circ]
y = [math.sin(i) for i in circ]
graph_layout = dict(zip(node_indices, zip(x, y)))
graph.layout_provider = StaticLayoutProvider(graph_layout=graph_layout)
plot.renderers.append(graph)
output_file("graph.html")
save(plot)
logger.report_media("html", "Graph_html", iteration=iteration, local_path="graph.html")
def report_html_image(logger, iteration=0):
# type: (Logger, int) -> ()
"""
reporting bokeh image (html) to debug samples section
:param logger: The task.logger to use for sending the plots
:param iteration: The iteration number of the current reports
"""
# First html
samples = 500
x = np.linspace(0, 10, samples)
y = np.linspace(0, 10, samples)
xx, yy = np.meshgrid(x, y)
d = np.sin(xx) * np.cos(yy)
p = figure(tooltips=[("x", "$x"), ("y", "$y"), ("value", "@image")])
p.x_range.range_padding = p.y_range.range_padding = 0
# must give a vector of image data for image parameter
p.image(image=[d], x=0, y=0, dw=10, dh=10, palette="Spectral11", level="image")
p.grid.grid_line_width = 0.5
output_file("image.html", title="image.py example")
save(p)
logger.report_media("html", "Spectral_html", iteration=iteration, local_path="image.html")
def main():
# Create the experiment Task
task = Task.init(project_name="examples", task_name="html samples reporting")
print('reporting html files into debug samples section')
# Get the task logger,
# You can also call Task.current_task().get_logger() from anywhere in your code.
logger = task.get_logger()
# report html as debug samples
report_html_image(logger)
report_html_graph(logger)
report_html_groupby(logger)
report_html_periodic_table(logger)
report_html_url(logger)
# force flush reports
# If flush is not called, reports are flushed in the background every couple of seconds,
# and at the end of the process execution
logger.flush()
print('We are done reporting, have a great day :)')
if __name__ == "__main__":
main()

View File

@@ -1,4 +1,4 @@
# TRAINS - example code, absl logging
# TRAINS - example code, ArgumentParser parameter logging, absl parameter logging, and dictionary parameter logging
#
from __future__ import absolute_import
from __future__ import division

View File

@@ -0,0 +1,67 @@
# TRAINS - Example of manual graphs and statistics reporting
#
import os
import numpy as np
from PIL import Image
from trains import Task, Logger
def report_debug_images(logger, iteration=0):
# type: (Logger, int) -> ()
"""
reporting images to debug samples section
:param logger: The task.logger to use for sending the plots
:param iteration: The iteration number of the current reports
"""
# report image as float image
m = np.eye(256, 256, dtype=np.float)
logger.report_image("image", "image float", iteration=iteration, image=m)
# report image as uint8
m = np.eye(256, 256, dtype=np.uint8) * 255
logger.report_image("image", "image uint8", iteration=iteration, image=m)
# report image as uint8 RGB
m = np.concatenate((np.atleast_3d(m), np.zeros((256, 256, 2), dtype=np.uint8)), axis=2)
logger.report_image("image", "image color red", iteration=iteration, image=m)
# report PIL Image object
image_open = Image.open(os.path.join("data_samples", "picasso.jpg"))
logger.report_image("image", "image PIL", iteration=iteration, image=image_open)
# Image can be uploaded via 'report_media' too.
logger.report_media(
"image",
"image with report media",
iteration=iteration,
local_path=os.path.join("data_samples", "picasso.jpg"),
file_extension="jpg",
)
def main():
# Create the experiment Task
task = Task.init(project_name="examples", task_name="image reporting")
print('reporting a few debug images')
# Get the task logger,
# You can also call Task.current_task().get_logger() from anywhere in your code.
logger = task.get_logger()
# report debug images
report_debug_images(logger)
# force flush reports
# If flush is not called, reports are flushed in the background every couple of seconds,
# and at the end of the process execution
logger.flush()
print('We are done reporting, have a great day :)')
if __name__ == "__main__":
main()

View File

@@ -4,19 +4,21 @@ import os
from trains import Task, Logger
task = Task.init(project_name="examples", task_name="Reporting audio and video")
task = Task.init(project_name="examples", task_name="audio and video reporting")
# report an already uploaded video media (url)
print('reporting audio and video samples to the debug samples section')
# report video, an already uploaded video media (url)
Logger.current_logger().report_media(
'video', 'big bunny', iteration=1,
url='https://test-videos.co.uk/vids/bigbuckbunny/mp4/h264/720/Big_Buck_Bunny_720_10s_1MB.mp4')
# report an already uploaded audio media (url)
# report audio, report an already uploaded audio media (url)
Logger.current_logger().report_media(
'audio', 'pink panther', iteration=1,
url='https://www2.cs.uic.edu/~i101/SoundFiles/PinkPanther30.wav')
# report local media file
# report audio, report local media audio file
Logger.current_logger().report_media(
'audio', 'tada', iteration=1,
local_path=os.path.join('samples', 'sample.mp3'))
local_path=os.path.join('data_samples', 'sample.mp3'))

View File

@@ -0,0 +1,36 @@
# TRAINS - Example of manual model configuration
#
import os
from trains import Task
task = Task.init(project_name='examples', task_name='Model configuration example')
# Connect a local configuration file
config_file = os.path.join('data_samples', 'sample.json')
config_file = task.connect_configuration(config_file)
# then read configuration as usual, the backend will contain a copy of it.
# later when executing remotely, the returned `config_file` will be a temporary file
# containing a new copy of the configuration retrieved form the backend
# # model_config_dict = json.load(open(config_file, 'rt'))
# Or Store dictionary of definition for a specific network design
model_config_dict = {
'value': 13.37,
'dict': {'sub_value': 'string', 'sub_integer': 11},
'list_of_ints': [1, 2, 3, 4],
}
model_config_dict = task.connect_configuration(model_config_dict)
# We now update the dictionary after connecting it, and the changes will be tracked as well.
model_config_dict['new value'] = 10
model_config_dict['value'] *= model_config_dict['new value']
# store the label enumeration of the training model
labels = {'background': 0, 'cat': 1, 'dog': 2}
task.connect_label_enumeration(labels)
# storing a model: Any saved model (keras / pytorch / tensorflow / etc.)
# will have the task network configuration and label enumeration
print('Any model stored from this point onwards, will contain both model_config and label_enumeration')

View File

@@ -0,0 +1,57 @@
# TRAINS - Example of manual graphs and statistics reporting
#
import pandas as pd
from trains import Task, Logger
def report_table(logger, iteration=0):
# type: (Logger, int) -> ()
"""
reporting tables to the plots section
:param logger: The task.logger to use for sending the plots
:param iteration: The iteration number of the current reports
"""
# report tables
# Report table - DataFrame with index
df = pd.DataFrame(
{
"num_legs": [2, 4, 8, 0],
"num_wings": [2, 0, 0, 0],
"num_specimen_seen": [10, 2, 1, 8],
},
index=["falcon", "dog", "spider", "fish"],
)
df.index.name = "id"
logger.report_table("table pd", "PD with index", iteration=iteration, table_plot=df)
# Report table - CSV from path
csv_url = "https://raw.githubusercontent.com/plotly/datasets/master/Mining-BTC-180.csv"
logger.report_table("table csv", "remote csv", iteration=iteration, url=csv_url)
def main():
# Create the experiment Task
task = Task.init(project_name="examples", task_name="pandas table reporting")
print('reporting pandas tablea into the plots section')
# Get the task logger,
# You can also call Task.current_task().get_logger() from anywhere in your code.
logger = task.get_logger()
# report graphs
report_table(logger)
# force flush reports
# If flush is not called, reports are flushed in the background every couple of seconds,
# and at the end of the process execution
logger.flush()
print('We are done reporting, have a great day :)')
if __name__ == "__main__":
main()

View File

@@ -4,11 +4,17 @@ from trains import Task
import plotly.express as px
task = Task.init('examples', 'plotly report')
task = Task.init('examples', 'plotly reporting')
print('reporting plotly figures')
# Iris dataset
df = px.data.iris()
# create complex plotly figure
fig = px.scatter(df, x="sepal_width", y="sepal_length", color="species", marginal_y="rug", marginal_x="histogram")
# report the plotly figure
task.get_logger().report_plotly(title="iris", series="sepal", iteration=0, figure=fig)
print('done')

View File

@@ -0,0 +1,6 @@
absl-py>=0.7.1
bokeh>=2.1.0
numpy
pandas
pillow>=4.0
trains

View File

@@ -0,0 +1,45 @@
# TRAINS - Example of manual graphs and statistics reporting
#
from trains import Task, Logger
def report_scalars(logger):
# type: (Logger) -> ()
"""
reporting scalars to scalars section
:param logger: The task.logger to use for sending the scalars
"""
# report two scalar series on the same graph
for i in range(100):
logger.report_scalar("unified graph", "series A", iteration=i, value=1./(i+1))
logger.report_scalar("unified graph", "series B", iteration=i, value=10./(i+1))
# report two scalar series on two different graphs
for i in range(100):
logger.report_scalar("graph A", "series A", iteration=i, value=1./(i+1))
logger.report_scalar("graph B", "series B", iteration=i, value=10./(i+1))
def main():
# Create the experiment Task
task = Task.init(project_name="examples", task_name="scalar reporting")
print('reporting scalar graphs')
# Get the task logger,
# You can also call Task.current_task().get_logger() from anywhere in your code.
logger = task.get_logger()
# report scalars
report_scalars(logger)
# force flush reports
# If flush is not called, reports are flushed in the background every couple of seconds,
# and at the end of the process execution
logger.flush()
print('We are done reporting, have a great day :)')
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,116 @@
# TRAINS - Example of manual graphs and statistics reporting
#
import numpy as np
from trains import Task, Logger
def report_plots(logger, iteration=0):
# type: (Logger, int) -> ()
"""
reporting plots to plots section
:param logger: The task.logger to use for sending the plots
:param iteration: The iteration number of the current reports
"""
# report a single histogram
histogram = np.random.randint(10, size=10)
logger.report_histogram(
"single_histogram",
"random histogram",
iteration=iteration,
values=histogram,
xaxis="title x",
yaxis="title y",
)
# report a two histograms on the same graph (plot)
histogram1 = np.random.randint(13, size=10)
histogram2 = histogram * 0.75
logger.report_histogram(
"two_histogram",
"series 1",
iteration=iteration,
values=histogram1,
xaxis="title x",
yaxis="title y",
)
logger.report_histogram(
"two_histogram",
"series 2",
iteration=iteration,
values=histogram2,
xaxis="title x",
yaxis="title y",
)
# report confusion matrix
confusion = np.random.randint(10, size=(10, 10))
logger.report_matrix(
"example_confusion",
"ignored",
iteration=iteration,
matrix=confusion,
xaxis="title X",
yaxis="title Y",
)
scatter2d = np.hstack(
(np.atleast_2d(np.arange(0, 10)).T, np.random.randint(10, size=(10, 1)))
)
# report 2d scatter plot with lines
logger.report_scatter2d(
"example_scatter",
"series_xy",
iteration=iteration,
scatter=scatter2d,
xaxis="title x",
yaxis="title y",
)
# report 2d scatter plot with markers
logger.report_scatter2d(
"example_scatter",
"series_markers",
iteration=iteration,
scatter=scatter2d,
xaxis="title x",
yaxis="title y",
mode='markers'
)
# report 2d scatter plot with markers
logger.report_scatter2d(
"example_scatter",
"series_lines+markers",
iteration=iteration,
scatter=scatter2d,
xaxis="title x",
yaxis="title y",
mode='lines+markers'
)
def main():
# Create the experiment Task
task = Task.init(project_name="examples", task_name="2D plots reporting")
print('reporting some graphs')
# Get the task logger,
# You can also call Task.current_task().get_logger() from anywhere in your code.
logger = task.get_logger()
# report graphs
report_plots(logger)
# force flush reports
# If flush is not called, reports are flushed in the background every couple of seconds,
# and at the end of the process execution
logger.flush()
print('We are done reporting, have a great day :)')
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,58 @@
# TRAINS - Example of manual graphs and statistics reporting
#
import logging
import sys
from trains import Task, Logger
def report_logs(logger):
# type: (Logger) -> ()
"""
reporting text to logs section
:param logger: The task.logger to use for sending the text
"""
# standard python logging
logging.info("This is an info message")
# this is a loguru test example
try:
from loguru import logger as loguru_logger # noqa
loguru_logger.info("That's it, beautiful and simple logging! (using ANSI colors)")
except ImportError:
print('loguru not installed, skipping loguru test')
# report text
logger.report_text("hello, this is plain text")
def main():
# Create the experiment Task
task = Task.init(project_name="examples", task_name="text reporting")
print('reporting text logs')
# report regular console print
print('This is standard output test')
# report stderr
print('This is standard error test', file=sys.stderr)
# Get the task logger,
# You can also call Task.current_task().get_logger() from anywhere in your code.
logger = task.get_logger()
# report text based logs
report_logs(logger)
# force flush reports
# If flush is not called, reports are flushed in the background every couple of seconds,
# and at the end of the process execution
logger.flush()
print('We are done reporting, have a great day :)')
if __name__ == "__main__":
main()

View File

@@ -1,17 +0,0 @@
absl-py>=0.7.1
Keras>=2.2.4
joblib>=0.13.2
matplotlib>=3.1.1 ; python_version >= '3.6'
matplotlib >= 2.2.4 ; python_version < '3.6'
pandas
seaborn>=0.9.0
sklearn>=0.0
tensorboard>=1.14.0
tensorboardX>=1.8
tensorflow>=1.14.0
torch>=1.1.0
torchvision>=0.3.0
xgboost>=0.90 ; python_version >= '3'
xgboost >= 0.82 ; python_version < '3'
# sudo apt-get install graphviz
graphviz>=0.8

View File

@@ -0,0 +1 @@
trains

View File

@@ -15,7 +15,7 @@ from keras.models import Sequential
from keras.layers.core import Dense, Activation
from keras.optimizers import RMSprop
from keras.utils import np_utils
import tensorflow as tf
import tensorflow as tf # noqa: F401
from trains import Task, Logger

View File

@@ -1,8 +1,8 @@
import logging
from trains.automation import UniformParameterRange, DiscreteParameterRange, UniformIntegerParameterRange, ParameterSet
from trains.automation import GridSearch, RandomSearch, HyperParameterOptimizer
from trains import Task
from trains.automation import DiscreteParameterRange, HyperParameterOptimizer, RandomSearch, \
UniformIntegerParameterRange
try:
from trains.automation.hpbandster import OptimizerBOHB
@@ -13,7 +13,7 @@ except ValueError:
'we will be using RandomSearch strategy instead\n'
'If you like to try ' '{{BOHB}: Robust and Efficient Hyperparameter Optimization at Scale},\n'
'run: pip install hpbandster')
Our_SearchStrategy = RandomSearch
Our_SearchStrategy = RandomSearch
def job_complete_callback(
@@ -71,7 +71,7 @@ an_optimizer = HyperParameterOptimizer(
# more are coming soon...
optimizer_class=Our_SearchStrategy,
# Select an execution queue to schedule the experiments for execution
execution_queue='default',
execution_queue='moshik',
# Optional: Limit the execution time of a single experiment, in minutes.
# (this is optional, and if using OptimizerBOHB, it is ignored)
time_limit_per_job=10.,
@@ -97,7 +97,7 @@ if args['run_as_service']:
task.execute_remotely(queue_name='services', exit_process=True)
# report every 12 seconds, this is way too often, but we are testing here J
an_optimizer.set_report_period(0.2)
an_optimizer.set_report_period(2.2)
# start the optimization process, callback function to be called every time an experiment is completed
# this function returns immediately
an_optimizer.start(job_complete_callback=job_complete_callback)

View File

@@ -0,0 +1,3 @@
keras
tensorflow
trains

View File

@@ -0,0 +1,154 @@
import os
import socket
import subprocess
import sys
from copy import deepcopy
from tempfile import mkstemp
import psutil
# make sure we have jupyter in the auto requirements
from trains import Task
# set default docker image, with network configuration
os.environ["TRAINS_DOCKER_IMAGE"] = "nvidia/cuda --network host"
# initialize TRAINS
task = Task.init(project_name="examples", task_name="Remote Jupyter NoteBook")
# get rid of all the runtime TRAINS
preserve = (
"TRAINS_API_HOST",
"TRAINS_WEB_HOST",
"TRAINS_FILES_HOST",
"TRAINS_CONFIG_FILE",
"TRAINS_API_ACCESS_KEY",
"TRAINS_API_SECRET_KEY",
"TRAINS_API_HOST_VERIFY_CERT",
)
# setup os environment
env = deepcopy(os.environ)
for key in os.environ:
if key.startswith("TRAINS") and key not in preserve:
env.pop(key, None)
# Add jupyter server base folder
param = {
"jupyter_server_base_directory": "~/",
"ssh_server": True,
"ssh_password": "training",
}
task.connect(param)
# noinspection PyBroadException
try:
hostname = socket.gethostname()
hostnames = socket.gethostbyname(socket.gethostname())
except Exception:
def get_ip_addresses(family):
for interface, snics in psutil.net_if_addrs().items():
for snic in snics:
if snic.family == family:
yield snic.address
hostnames = list(get_ip_addresses(socket.AF_INET))
hostname = hostnames[0]
if param.get("ssh_server"):
print("Installing SSH Server on {} [{}]".format(hostname, hostnames))
ssh_password = param.get("ssh_password", "training")
# noinspection PyBroadException
try:
used_ports = [i.laddr.port for i in psutil.net_connections()]
port = [i for i in range(10022, 15000) if i not in used_ports][0]
result = os.system(
"apt-get install -y openssh-server && "
"mkdir -p /var/run/sshd && "
"echo 'root:{password}' | chpasswd && "
"echo 'PermitRootLogin yes' >> /etc/ssh/sshd_config && "
"sed -i 's/PermitRootLogin prohibit-password/PermitRootLogin yes/' /etc/ssh/sshd_config && "
"sed 's@session\s*required\s*pam_loginuid.so@session optional pam_loginuid.so@g' -i /etc/pam.d/sshd && " # noqa: W605
'echo "export VISIBLE=now" >> /etc/profile && '
'echo "export TRAINS_CONFIG_FILE={trains_config_file}" >> /etc/profile && '
"/usr/sbin/sshd -p {port}".format(
password=ssh_password,
port=port,
trains_config_file=os.environ.get("TRAINS_CONFIG_FILE"),
)
)
if result == 0:
print(
"\n#\n# SSH Server running on {} [{}] port {}\n# LOGIN u:root p:{}\n#\n".format(
hostname, hostnames, port, ssh_password
)
)
else:
raise ValueError()
except Exception:
print("\n#\n# Error: SSH server could not be launched\n#\n")
# execute jupyter notebook
fd, local_filename = mkstemp()
cwd = (
os.path.expandvars(os.path.expanduser(param["jupyter_server_base_directory"]))
if param["jupyter_server_base_directory"]
else os.getcwd()
)
print(
"Running Jupyter Notebook Server on {} [{}] at {}".format(hostname, hostnames, cwd)
)
process = subprocess.Popen(
[
sys.executable,
"-m",
"jupyter",
"notebook",
"--no-browser",
"--allow-root",
"--ip",
"0.0.0.0",
],
env=env,
stdout=fd,
stderr=fd,
cwd=cwd,
)
# print stdout/stderr
prev_line_count = 0
process_running = True
while process_running:
process_running = False
try:
process.wait(timeout=2.0 if prev_line_count == 0 else 15.0)
except subprocess.TimeoutExpired:
process_running = True
with open(local_filename, "rt") as f:
# read new lines
new_lines = f.readlines()
if not new_lines:
continue
output = "".join(new_lines)
print(output)
# update task comment with jupyter notebook server links
if prev_line_count == 0:
task.comment += "\n" + "".join(
line for line in new_lines if "http://" in line or "https://" in line
)
prev_line_count += len(new_lines)
os.lseek(fd, 0, 0)
os.ftruncate(fd, 0)
# cleanup
os.close(fd)
# noinspection PyBroadException
try:
os.unlink(local_filename)
except Exception:
pass

View File

@@ -1,358 +0,0 @@
# TRAINS - Example of tensorflow eager mode, model logging and tensorboard
#
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""A deep MNIST classifier using convolutional layers.
Sample usage:
python mnist.py --help
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import argparse
import os
import sys
import time
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
from trains import Task
tf.compat.v1.enable_eager_execution()
task = Task.init(project_name='examples', task_name='Tensorflow eager mode')
FLAGS = tf.app.flags.FLAGS
tf.app.flags.DEFINE_integer('data_num', 100, """Flag of type integer""")
tf.app.flags.DEFINE_string('img_path', './img', """Flag of type string""")
layers = tf.keras.layers
FLAGS = None
class Discriminator(tf.keras.Model):
"""GAN Discriminator.
A network to differentiate between generated and real handwritten digits.
"""
def __init__(self, data_format):
"""Creates a model for discriminating between real and generated digits.
Args:
data_format: Either 'channels_first' or 'channels_last'.
'channels_first' is typically faster on GPUs while 'channels_last' is
typically faster on CPUs. See
https://www.tensorflow.org/performance/performance_guide#data_formats
"""
super(Discriminator, self).__init__(name='')
if data_format == 'channels_first':
self._input_shape = [-1, 1, 28, 28]
else:
assert data_format == 'channels_last'
self._input_shape = [-1, 28, 28, 1]
self.conv1 = layers.Conv2D(
64, 5, padding='SAME', data_format=data_format, activation=tf.tanh)
self.pool1 = layers.AveragePooling2D(2, 2, data_format=data_format)
self.conv2 = layers.Conv2D(
128, 5, data_format=data_format, activation=tf.tanh)
self.pool2 = layers.AveragePooling2D(2, 2, data_format=data_format)
self.flatten = layers.Flatten()
self.fc1 = layers.Dense(1024, activation=tf.tanh)
self.fc2 = layers.Dense(1, activation=None)
def call(self, inputs):
"""Return two logits per image estimating input authenticity.
Users should invoke __call__ to run the network, which delegates to this
method (and not call this method directly).
Args:
inputs: A batch of images as a Tensor with shape [batch_size, 28, 28, 1]
or [batch_size, 1, 28, 28]
Returns:
A Tensor with shape [batch_size] containing logits estimating
the probability that corresponding digit is real.
"""
x = tf.reshape(inputs, self._input_shape)
x = self.conv1(x)
x = self.pool1(x)
x = self.conv2(x)
x = self.pool2(x)
x = self.flatten(x)
x = self.fc1(x)
x = self.fc2(x)
return x
class Generator(tf.keras.Model):
"""Generator of handwritten digits similar to the ones in the MNIST dataset.
"""
def __init__(self, data_format):
"""Creates a model for discriminating between real and generated digits.
Args:
data_format: Either 'channels_first' or 'channels_last'.
'channels_first' is typically faster on GPUs while 'channels_last' is
typically faster on CPUs. See
https://www.tensorflow.org/performance/performance_guide#data_formats
"""
super(Generator, self).__init__(name='')
self.data_format = data_format
# We are using 128 6x6 channels as input to the first deconvolution layer
if data_format == 'channels_first':
self._pre_conv_shape = [-1, 128, 6, 6]
else:
assert data_format == 'channels_last'
self._pre_conv_shape = [-1, 6, 6, 128]
self.fc1 = layers.Dense(6 * 6 * 128, activation=tf.tanh)
# In call(), we reshape the output of fc1 to _pre_conv_shape
# Deconvolution layer. Resulting image shape: (batch, 14, 14, 64)
self.conv1 = layers.Conv2DTranspose(
64, 4, strides=2, activation=None, data_format=data_format)
# Deconvolution layer. Resulting image shape: (batch, 28, 28, 1)
self.conv2 = layers.Conv2DTranspose(
1, 2, strides=2, activation=tf.nn.sigmoid, data_format=data_format)
def call(self, inputs):
"""Return a batch of generated images.
Users should invoke __call__ to run the network, which delegates to this
method (and not call this method directly).
Args:
inputs: A batch of noise vectors as a Tensor with shape
[batch_size, length of noise vectors].
Returns:
A Tensor containing generated images. If data_format is 'channels_last',
the shape of returned images is [batch_size, 28, 28, 1], else
[batch_size, 1, 28, 28]
"""
x = self.fc1(inputs)
x = tf.reshape(x, shape=self._pre_conv_shape)
x = self.conv1(x)
x = self.conv2(x)
return x
def discriminator_loss(discriminator_real_outputs, discriminator_gen_outputs):
"""Original discriminator loss for GANs, with label smoothing.
See `Generative Adversarial Nets` (https://arxiv.org/abs/1406.2661) for more
details.
Args:
discriminator_real_outputs: Discriminator output on real data.
discriminator_gen_outputs: Discriminator output on generated data. Expected
to be in the range of (-inf, inf).
Returns:
A scalar loss Tensor.
"""
loss_on_real = tf.compat.v1.losses.sigmoid_cross_entropy(
tf.ones_like(discriminator_real_outputs),
discriminator_real_outputs,
label_smoothing=0.25)
loss_on_generated = tf.compat.v1.losses.sigmoid_cross_entropy(
tf.zeros_like(discriminator_gen_outputs), discriminator_gen_outputs)
loss = loss_on_real + loss_on_generated
tf.contrib.summary.scalar('discriminator_loss', loss)
return loss
def generator_loss(discriminator_gen_outputs):
"""Original generator loss for GANs.
L = -log(sigmoid(D(G(z))))
See `Generative Adversarial Nets` (https://arxiv.org/abs/1406.2661)
for more details.
Args:
discriminator_gen_outputs: Discriminator output on generated data. Expected
to be in the range of (-inf, inf).
Returns:
A scalar loss Tensor.
"""
loss = tf.compat.v1.losses.sigmoid_cross_entropy(
tf.ones_like(discriminator_gen_outputs), discriminator_gen_outputs)
tf.contrib.summary.scalar('generator_loss', loss)
return loss
def train_one_epoch(generator, discriminator, generator_optimizer,
discriminator_optimizer, dataset, step_counter,
log_interval, noise_dim):
"""Train `generator` and `discriminator` models on `dataset`.
Args:
generator: Generator model.
discriminator: Discriminator model.
generator_optimizer: Optimizer to use for generator.
discriminator_optimizer: Optimizer to use for discriminator.
dataset: Dataset of images to train on.
step_counter: An integer variable, used to write summaries regularly.
log_interval: How many steps to wait between logging and collecting
summaries.
noise_dim: Dimension of noise vector to use.
"""
total_generator_loss = 0.0
total_discriminator_loss = 0.0
for (batch_index, images) in enumerate(dataset):
with tf.device('/cpu:0'):
tf.compat.v1.assign_add(step_counter, 1)
with tf.contrib.summary.record_summaries_every_n_global_steps(
log_interval, global_step=step_counter):
current_batch_size = images.shape[0]
noise = tf.random.uniform(
shape=[current_batch_size, noise_dim],
minval=-1.,
maxval=1.,
seed=batch_index)
# we can use 2 tapes or a single persistent tape.
# Using two tapes is memory efficient since intermediate tensors can be
# released between the two .gradient() calls below
with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape:
generated_images = generator(noise)
tf.contrib.summary.image(
'generated_images',
tf.reshape(generated_images, [-1, 28, 28, 1]),
max_images=10)
discriminator_gen_outputs = discriminator(generated_images)
discriminator_real_outputs = discriminator(images)
discriminator_loss_val = discriminator_loss(discriminator_real_outputs,
discriminator_gen_outputs)
total_discriminator_loss += discriminator_loss_val
generator_loss_val = generator_loss(discriminator_gen_outputs)
total_generator_loss += generator_loss_val
generator_grad = gen_tape.gradient(generator_loss_val,
generator.variables)
discriminator_grad = disc_tape.gradient(discriminator_loss_val,
discriminator.variables)
generator_optimizer.apply_gradients(
zip(generator_grad, generator.variables))
discriminator_optimizer.apply_gradients(
zip(discriminator_grad, discriminator.variables))
if log_interval and batch_index > 0 and batch_index % log_interval == 0:
print('Batch #%d\tAverage Generator Loss: %.6f\t'
'Average Discriminator Loss: %.6f' %
(batch_index, total_generator_loss / batch_index,
total_discriminator_loss / batch_index))
def main(_):
(device, data_format) = ('/gpu:0', 'channels_first')
if FLAGS.no_gpu or tf.contrib.eager.num_gpus() <= 0:
(device, data_format) = ('/cpu:0', 'channels_last')
print('Using device %s, and data format %s.' % (device, data_format))
# Load the datasets
data = input_data.read_data_sets(FLAGS.data_dir)
dataset = (
tf.data.Dataset.from_tensor_slices(data.train.images[:1280]).shuffle(60000)
.batch(FLAGS.batch_size))
# Create the models and optimizers.
model_objects = {
'generator': Generator(data_format),
'discriminator': Discriminator(data_format),
'generator_optimizer': tf.compat.v1.train.AdamOptimizer(FLAGS.lr),
'discriminator_optimizer': tf.compat.v1.train.AdamOptimizer(FLAGS.lr),
'step_counter': tf.compat.v1.train.get_or_create_global_step(),
}
# Prepare summary writer and checkpoint info
summary_writer = tf.contrib.summary.create_file_writer(
FLAGS.output_dir, flush_millis=1000)
checkpoint_prefix = os.path.join(FLAGS.checkpoint_dir, 'ckpt')
latest_cpkt = tf.train.latest_checkpoint(FLAGS.checkpoint_dir)
if latest_cpkt:
print('Using latest checkpoint at ' + latest_cpkt)
checkpoint = tf.train.Checkpoint(**model_objects)
# Restore variables on creation if a checkpoint exists.
checkpoint.restore(latest_cpkt)
with tf.device(device):
for _ in range(3):
start = time.time()
with summary_writer.as_default():
train_one_epoch(dataset=dataset, log_interval=FLAGS.log_interval,
noise_dim=FLAGS.noise, **model_objects)
end = time.time()
checkpoint.save(checkpoint_prefix)
print('\nTrain time for epoch #%d (step %d): %f' %
(checkpoint.save_counter.numpy(),
checkpoint.step_counter.numpy(),
end - start))
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument(
'--data-dir',
type=str,
default='/tmp/tensorflow/mnist/input_data',
help=('Directory for storing input data (default '
'/tmp/tensorflow/mnist/input_data)'))
parser.add_argument(
'--batch-size',
type=int,
default=16,
metavar='N',
help='input batch size for training (default: 128)')
parser.add_argument(
'--log-interval',
type=int,
default=1,
metavar='N',
help=('number of batches between logging and writing summaries '
'(default: 100)'))
parser.add_argument(
'--output_dir',
type=str,
default='/tmp/tensorflow/',
metavar='DIR',
help='Directory to write TensorBoard summaries (defaults to none)')
parser.add_argument(
'--checkpoint_dir',
type=str,
default='/tmp/tensorflow/mnist/checkpoints/',
metavar='DIR',
help=('Directory to save checkpoints in (once per epoch) (default '
'/tmp/tensorflow/mnist/checkpoints/)'))
parser.add_argument(
'--lr',
type=float,
default=0.001,
metavar='LR',
help='learning rate (default: 0.001)')
parser.add_argument(
'--noise',
type=int,
default=100,
metavar='N',
help='Length of noise vector for generator input (default: 100)')
parser.add_argument(
'--no-gpu',
action='store_true',
default=False,
help='disables GPU usage even if a GPU is available')
FLAGS, unparsed = parser.parse_known_args()
tf.compat.v1.app.run(main=main, argv=[sys.argv[0]] + unparsed)

View File

@@ -1,172 +0,0 @@
# TRAINS - Example of tensorflow mnist training model logging
#
# Save and Restore a model using TensorFlow.
# This example is using the MNIST database of handwritten digits
# (http://yann.lecun.com/exdb/mnist/)
#
# Author: Aymeric Damien
# Project: https://github.com/aymericdamien/TensorFlow-Examples/
from __future__ import print_function
from os.path import exists, join
import tempfile
import numpy as np
import tensorflow as tf
from trains import Task
MODEL_PATH = join(tempfile.gettempdir(), "module_no_signatures")
task = Task.init(project_name='examples', task_name='Tensorflow mnist example')
## block
X_train = np.random.rand(100, 3)
y_train = np.random.rand(100, 1)
model = tf.keras.models.Sequential([tf.keras.layers.Dense(1)])
model.compile(loss='categorical_crossentropy',
optimizer=tf.keras.optimizers.SGD(),
metrics=['accuracy'])
model.fit(X_train, y_train, steps_per_epoch=1, nb_epoch=1)
with tf.Session(graph=tf.Graph()) as sess:
if exists(MODEL_PATH):
try:
tf.saved_model.loader.load(sess, [tf.saved_model.tag_constants.SERVING], MODEL_PATH)
m2 = tf.saved_model.load(sess, [tf.saved_model.tag_constants.SERVING], MODEL_PATH)
except Exception:
pass
tf.train.Checkpoint
## block end
# Import MNIST data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data", one_hot=True)
# Parameters
parameters = {
'learning_rate': 0.001,
'batch_size': 100,
'display_step': 1,
'model_path': join(tempfile.gettempdir(), "model.ckpt"),
# Network Parameters
'n_hidden_1': 256, # 1st layer number of features
'n_hidden_2': 256, # 2nd layer number of features
'n_input': 784, # MNIST data input (img shape: 28*28)
'n_classes': 10, # MNIST total classes (0-9 digits)
}
# TRAINS: connect parameters with the experiment/task for logging
parameters = task.connect(parameters)
# tf Graph input
x = tf.placeholder("float", [None, parameters['n_input']])
y = tf.placeholder("float", [None, parameters['n_classes']])
# Create model
def multilayer_perceptron(x, weights, biases):
# Hidden layer with RELU activation
layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
layer_1 = tf.nn.relu(layer_1)
# Hidden layer with RELU activation
layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
layer_2 = tf.nn.relu(layer_2)
# Output layer with linear activation
out_layer = tf.matmul(layer_2, weights['out']) + biases['out']
return out_layer
# Store layers weight & bias
weights = {
'h1': tf.Variable(tf.random_normal([parameters['n_input'], parameters['n_hidden_1']])),
'h2': tf.Variable(tf.random_normal([parameters['n_hidden_1'], parameters['n_hidden_2']])),
'out': tf.Variable(tf.random_normal([parameters['n_hidden_2'], parameters['n_classes']]))
}
biases = {
'b1': tf.Variable(tf.random_normal([parameters['n_hidden_1']])),
'b2': tf.Variable(tf.random_normal([parameters['n_hidden_2']])),
'out': tf.Variable(tf.random_normal([parameters['n_classes']]))
}
# Construct model
pred = multilayer_perceptron(x, weights, biases)
# Define loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
optimizer = tf.train.AdamOptimizer(learning_rate=parameters['learning_rate']).minimize(cost)
# Initialize the variables (i.e. assign their default value)
init = tf.global_variables_initializer()
# 'Saver' op to save and restore all the variables
saver = tf.train.Saver()
# Running first session
print("Starting 1st session...")
with tf.Session() as sess:
# Run the initializer
sess.run(init)
# Training cycle
for epoch in range(3):
avg_cost = 0.
total_batch = int(mnist.train.num_examples/parameters['batch_size'])
# Loop over all batches
for i in range(total_batch):
batch_x, batch_y = mnist.train.next_batch(parameters['batch_size'])
# Run optimization op (backprop) and cost op (to get loss value)
_, c = sess.run([optimizer, cost], feed_dict={x: batch_x,
y: batch_y})
# Compute average loss
avg_cost += c / total_batch
# Display logs per epoch step
if epoch % parameters['display_step'] == 0:
print("Epoch:", '%04d' % (epoch+1), "cost=", \
"{:.9f}".format(avg_cost))
save_path = saver.save(sess, parameters['model_path'])
print("First Optimization Finished!")
# Test model
correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
# Calculate accuracy
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
print("Accuracy:", accuracy.eval({x: mnist.test.images, y: mnist.test.labels}))
# Save model weights to disk
save_path = saver.save(sess, parameters['model_path'])
print("Model saved in file: %s" % save_path)
# Running a new session
print("Starting 2nd session...")
with tf.Session() as sess:
# Initialize variables
sess.run(init)
# Restore model weights from previously saved model
saver.restore(sess, parameters['model_path'])
print("Model restored from file: %s" % save_path)
# Resume training
for epoch in range(7):
avg_cost = 0.
total_batch = int(mnist.train.num_examples / parameters['batch_size'])
# Loop over all batches
for i in range(total_batch):
batch_x, batch_y = mnist.train.next_batch(parameters['batch_size'])
# Run optimization op (backprop) and cost op (to get loss value)
_, c = sess.run([optimizer, cost], feed_dict={x: batch_x,
y: batch_y})
# Compute average loss
avg_cost += c / total_batch
# Display logs per epoch step
if epoch % parameters['display_step'] == 0:
print("Epoch:", '%04d' % (epoch + 1), "cost=", "{:.9f}".format(avg_cost))
print("Second Optimization Finished!")
# Test model
correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
# Calculate accuracy
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
print("Accuracy:", accuracy.eval(
{x: mnist.test.images, y: mnist.test.labels}))