Initial beta version

This commit is contained in:
allegroai
2019-06-10 20:00:28 +03:00
parent 3cb9de58c3
commit f595afe6c8
121 changed files with 34975 additions and 0 deletions

43
examples/absl_example.py Normal file
View File

@@ -0,0 +1,43 @@
# TRAINS - example code, absl logging
#
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import sys
from absl import app
from absl import flags
from absl import logging
from trains import Task
FLAGS = flags.FLAGS
flags.DEFINE_string('echo', None, 'Text to echo.')
flags.DEFINE_string('another_str', 'My string', 'A string', module_name='test')
task = Task.init(project_name='examples', task_name='absl example')
flags.DEFINE_integer('echo3', 3, 'Text to echo.')
flags.DEFINE_string('echo5', '5', 'Text to echo.', module_name='test')
parameters = {
'list': [1, 2, 3],
'dict': {'a': 1, 'b': 2},
'int': 3,
'float': 2.2,
'string': 'my string',
}
parameters = task.connect(parameters)
def main(_):
print('Running under Python {0[0]}.{0[1]}.{0[2]}'.format(sys.version_info), file=sys.stderr)
logging.info('echo is %s.', FLAGS.echo)
if __name__ == '__main__':
app.run(main)

160
examples/jupyter.ipynb Normal file

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,113 @@
# TRAINS - Keras with Tensorboard example code, automatic logging model and Tensorboard outputs
#
# Train a simple deep NN on the MNIST dataset.
# Gets to 98.40% test accuracy after 20 epochs
# (there is *a lot* of margin for parameter tuning).
# 2 seconds per epoch on a K520 GPU.
from __future__ import print_function
import numpy as np
import tensorflow
from keras.callbacks import TensorBoard, ModelCheckpoint
from keras.datasets import mnist
from keras.models import Sequential, Model
from keras.layers.core import Dense, Dropout, Activation
from keras.optimizers import SGD, Adam, RMSprop
from keras.utils import np_utils
from keras.models import load_model, save_model, model_from_json
from trains import Task
class TensorBoardImage(TensorBoard):
@staticmethod
def make_image(tensor):
import tensorflow as tf
from PIL import Image
tensor = np.stack((tensor, tensor, tensor), axis=2)
height, width, channels = tensor.shape
image = Image.fromarray(tensor)
import io
output = io.BytesIO()
image.save(output, format='PNG')
image_string = output.getvalue()
output.close()
return tf.Summary.Image(height=height,
width=width,
colorspace=channels,
encoded_image_string=image_string)
def on_epoch_end(self, epoch, logs={}):
super().on_epoch_end(epoch, logs)
import tensorflow as tf
images = self.validation_data[0] # 0 - data; 1 - labels
img = (255 * images[0].reshape(28, 28)).astype('uint8')
image = self.make_image(img)
summary = tf.Summary(value=[tf.Summary.Value(tag='image', image=image)])
self.writer.add_summary(summary, epoch)
batch_size = 128
nb_classes = 10
nb_epoch = 6
# the data, shuffled and split between train and test sets
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train = X_train.reshape(60000, 784)
X_test = X_test.reshape(10000, 784)
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255.
X_test /= 255.
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')
# convert class vectors to binary class matrices
Y_train = np_utils.to_categorical(y_train, nb_classes)
Y_test = np_utils.to_categorical(y_test, nb_classes)
model = Sequential()
model.add(Dense(512, input_shape=(784,)))
model.add(Activation('relu'))
# model.add(Dropout(0.2))
model.add(Dense(512))
model.add(Activation('relu'))
# model.add(Dropout(0.2))
model.add(Dense(10))
model.add(Activation('softmax'))
model2 = Sequential()
model2.add(Dense(512, input_shape=(784,)))
model2.add(Activation('relu'))
model.summary()
model.compile(loss='categorical_crossentropy',
optimizer=RMSprop(),
metrics=['accuracy'])
# Connecting TRAINS
task = Task.init(project_name='examples', task_name='Keras with TensorBoard example')
# setting model outputs
labels = dict(('digit_%d' % i, i) for i in range(10))
task.set_model_label_enumeration(labels)
board = TensorBoard(histogram_freq=1, log_dir='/tmp/histogram_example', write_images=False)
model_store = ModelCheckpoint(filepath='/tmp/histogram_example/weight.{epoch}.hdf5')
# load previous model, if it is there
try:
model.load_weights('/tmp/histogram_example/weight.1.hdf5')
except:
pass
history = model.fit(X_train, Y_train,
batch_size=batch_size, epochs=nb_epoch,
callbacks=[board, model_store],
verbose=1, validation_data=(X_test, Y_test))
score = model.evaluate(X_test, Y_test, verbose=0)
print('Test score:', score[0])
print('Test accuracy:', score[1])

View File

@@ -0,0 +1,29 @@
# TRAINS - Example of manual model configuration
#
import torch
from trains import Task
task = Task.init(project_name='examples', task_name='Manual model configuration')
# create a model
model = torch.nn.Module
# store dictionary of definition for a specific network design
model_config_dict = {
'value': 13.37,
'dict': {'sub_value': 'string'},
'list_of_ints': [1, 2, 3, 4],
}
task.set_model_config(config_dict=model_config_dict)
# or read form a config file (this will override the previous configuration dictionary)
# task.set_model_config(config_text='this is just a blob\nof text from a configuration file')
# store the label enumeration the model is training for
task.set_model_label_enumeration({'background': 0, 'cat': 1, 'dog': 2})
print('Any model stored from this point onwards, will contain both model_config and label_enumeration')
# storing the model, it will have the task network configuration and label enumeration
torch.save(model, '/tmp/model')
print('Model saved')

View File

@@ -0,0 +1,51 @@
# TRAINS - Example of manual graphs and statistics reporting
#
import numpy as np
import logging
from trains import Task
task = Task.init(project_name='examples', task_name='Manual reporting')
# example python logger
logging.getLogger().setLevel('DEBUG')
logging.debug('This is a debug message')
logging.info('This is an info message')
logging.warning('This is a warning message')
logging.error('This is an error message')
logging.critical('This is a critical message')
# get TRAINS logger object for any metrics / reports
logger = task.get_logger()
# log text
logger.console("hello")
# report scalar values
logger.report_scalar("example_scalar", "series A", iteration=0, value=100)
logger.report_scalar("example_scalar", "series A", iteration=1, value=200)
# report histogram
histogram = np.random.randint(10, size=10)
logger.report_vector("example_histogram", "random histogram", iteration=1, values=histogram)
# report confusion matrix
confusion = np.random.randint(10, size=(10, 10))
logger.report_matrix("example_confusion", "ignored", iteration=1, matrix=confusion)
# report 2d scatter plot
scatter2d = np.hstack((np.atleast_2d(np.arange(0, 10)).T, np.random.randint(10, size=(10, 1))))
logger.report_scatter2d("example_scatter", "series_xy", iteration=1, scatter=scatter2d)
# report 3d scatter plot
scatter3d = np.random.randint(10, size=(10, 3))
logger.report_scatter3d("example_scatter_3d", "series_xyz", iteration=1, scatter=scatter3d)
# report image
m = np.eye(256, 256, dtype=np.uint8)*255
logger.report_image_and_upload("fail cases", "image uint", iteration=1, matrix=m)
m = np.eye(256, 256, dtype=np.float)
logger.report_image_and_upload("fail cases", "image float", iteration=1, matrix=m)
# flush reports (otherwise it will be flushed in the background, every couple of seconds)
logger.flush()

View File

@@ -0,0 +1,36 @@
# TRAINS - Example of Matplotlib integration and reporting
#
import numpy as np
import matplotlib.pyplot as plt
from trains import Task
task = Task.init(project_name='examples', task_name='Matplotlib example')
# create plot
N = 50
x = np.random.rand(N)
y = np.random.rand(N)
colors = np.random.rand(N)
area = (30 * np.random.rand(N))**2 # 0 to 15 point radii
plt.scatter(x, y, s=area, c=colors, alpha=0.5)
plt.show()
# create another plot - with a name
x = np.linspace(0, 10, 30)
y = np.sin(x)
plt.plot(x, y, 'o', color='black')
plt.show()
# create image plot
m = np.eye(256, 256, dtype=np.uint8)
plt.imshow(m)
plt.show()
# create image plot - with a name
m = np.eye(256, 256, dtype=np.uint8)
plt.imshow(m)
plt.title('Image Title')
plt.show()
print('This is a Matplotlib example')

View File

@@ -0,0 +1,479 @@
# TRAINS - Example of Pytorch and matplotlib integration and reporting
#
"""
Neural Transfer Using PyTorch
=============================
**Author**: `Alexis Jacq <https://alexis-jacq.github.io>`_
**Edited by**: `Winston Herring <https://github.com/winston6>`_
Introduction
------------
This tutorial explains how to implement the `Neural-Style algorithm <https://arxiv.org/abs/1508.06576>`__
developed by Leon A. Gatys, Alexander S. Ecker and Matthias Bethge.
Neural-Style, or Neural-Transfer, allows you to take an image and
reproduce it with a new artistic style. The algorithm takes three images,
an input image, a content-image, and a style-image, and changes the input
to resemble the content of the content-image and the artistic style of the style-image.
.. figure:: /_static/img/neural-style/neuralstyle.png
:alt: content1
"""
######################################################################
# Underlying Principle
# --------------------
#
# The principle is simple: we define two distances, one for the content
# (:math:`D_C`) and one for the style (:math:`D_S`). :math:`D_C` measures how different the content
# is between two images while :math:`D_S` measures how different the style is
# between two images. Then, we take a third image, the input, and
# transform it to minimize both its content-distance with the
# content-image and its style-distance with the style-image. Now we can
# import the necessary packages and begin the neural transfer.
#
# Importing Packages and Selecting a Device
# -----------------------------------------
# Below is a list of the packages needed to implement the neural transfer.
#
# - ``torch``, ``torch.nn``, ``numpy`` (indispensables packages for
# neural networks with PyTorch)
# - ``torch.optim`` (efficient gradient descents)
# - ``PIL``, ``PIL.Image``, ``matplotlib.pyplot`` (load and display
# images)
# - ``torchvision.transforms`` (transform PIL images into tensors)
# - ``torchvision.models`` (train or load pre-trained models)
# - ``copy`` (to deep copy the models; system package)
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from PIL import Image
import matplotlib.pyplot as plt
import torchvision.transforms as transforms
import torchvision.models as models
import copy
from trains import Task
task = Task.init(project_name='examples', task_name='pytorch with matplotlib example', task_type=Task.TaskTypes.testing)
######################################################################
# Next, we need to choose which device to run the network on and import the
# content and style images. Running the neural transfer algorithm on large
# images takes longer and will go much faster when running on a GPU. We can
# use ``torch.cuda.is_available()`` to detect if there is a GPU available.
# Next, we set the ``torch.device`` for use throughout the tutorial. Also the ``.to(device)``
# method is used to move tensors or modules to a desired device.
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
######################################################################
# Loading the Images
# ------------------
#
# Now we will import the style and content images. The original PIL images have values between 0 and 255, but when
# transformed into torch tensors, their values are converted to be between
# 0 and 1. The images also need to be resized to have the same dimensions.
# An important detail to note is that neural networks from the
# torch library are trained with tensor values ranging from 0 to 1. If you
# try to feed the networks with 0 to 255 tensor images, then the activated
# feature maps will be unable sense the intended content and style.
# However, pre-trained networks from the Caffe library are trained with 0
# to 255 tensor images.
#
#
# .. Note::
# Here are links to download the images required to run the tutorial:
# `picasso.jpg <https://pytorch.org/tutorials/_static/img/neural-style/picasso.jpg>`__ and
# `dancing.jpg <https://pytorch.org/tutorials/_static/img/neural-style/dancing.jpg>`__.
# Download these two images and add them to a directory
# with name ``images`` in your current working directory.
# desired size of the output image
imsize = 512 if torch.cuda.is_available() else 128 # use small size if no gpu
loader = transforms.Compose([
transforms.Resize(imsize), # scale imported image
transforms.ToTensor()]) # transform it into a torch tensor
def image_loader(image_name):
image = Image.open(image_name)
# fake batch dimension required to fit network's input dimensions
image = loader(image).unsqueeze(0)
return image.to(device, torch.float)
style_img = image_loader("./samples/picasso.jpg")
content_img = image_loader("./samples/dancing.jpg")
assert style_img.size() == content_img.size(), \
"we need to import style and content images of the same size"
######################################################################
# Now, let's create a function that displays an image by reconverting a
# copy of it to PIL format and displaying the copy using
# ``plt.imshow``. We will try displaying the content and style images
# to ensure they were imported correctly.
unloader = transforms.ToPILImage() # reconvert into PIL image
plt.ion()
def imshow(tensor, title=None):
image = tensor.cpu().clone() # we clone the tensor to not do changes on it
image = image.squeeze(0) # remove the fake batch dimension
image = unloader(image)
plt.imshow(image)
if title is not None:
plt.title(title)
plt.pause(0.001) # pause a bit so that plots are updated
plt.figure()
imshow(style_img, title='Style Image')
plt.figure()
imshow(content_img, title='Content Image')
######################################################################
# Loss Functions
# --------------
# Content Loss
# ~~~~~~~~~~~~
#
# The content loss is a function that represents a weighted version of the
# content distance for an individual layer. The function takes the feature
# maps :math:`F_{XL}` of a layer :math:`L` in a network processing input :math:`X` and returns the
# weighted content distance :math:`w_{CL}.D_C^L(X,C)` between the image :math:`X` and the
# content image :math:`C`. The feature maps of the content image(:math:`F_{CL}`) must be
# known by the function in order to calculate the content distance. We
# implement this function as a torch module with a constructor that takes
# :math:`F_{CL}` as an input. The distance :math:`\|F_{XL} - F_{CL}\|^2` is the mean square error
# between the two sets of feature maps, and can be computed using ``nn.MSELoss``.
#
# We will add this content loss module directly after the convolution
# layer(s) that are being used to compute the content distance. This way
# each time the network is fed an input image the content losses will be
# computed at the desired layers and because of auto grad, all the
# gradients will be computed. Now, in order to make the content loss layer
# transparent we must define a ``forward`` method that computes the content
# loss and then returns the layers input. The computed loss is saved as a
# parameter of the module.
#
class ContentLoss(nn.Module):
def __init__(self, target, ):
super(ContentLoss, self).__init__()
# we 'detach' the target content from the tree used
# to dynamically compute the gradient: this is a stated value,
# not a variable. Otherwise the forward method of the criterion
# will throw an error.
self.target = target.detach()
def forward(self, input):
self.loss = F.mse_loss(input, self.target)
return input
######################################################################
# .. Note::
# **Important detail**: although this module is named ``ContentLoss``, it
# is not a true PyTorch Loss function. If you want to define your content
# loss as a PyTorch Loss function, you have to create a PyTorch autograd function
# to recompute/implement the gradient manually in the ``backward``
# method.
######################################################################
# Style Loss
# ~~~~~~~~~~
#
# The style loss module is implemented similarly to the content loss
# module. It will act as a transparent layer in a
# network that computes the style loss of that layer. In order to
# calculate the style loss, we need to compute the gram matrix :math:`G_{XL}`. A gram
# matrix is the result of multiplying a given matrix by its transposed
# matrix. In this application the given matrix is a reshaped version of
# the feature maps :math:`F_{XL}` of a layer :math:`L`. :math:`F_{XL}` is reshaped to form :math:`\hat{F}_{XL}`, a :math:`K`\ x\ :math:`N`
# matrix, where :math:`K` is the number of feature maps at layer :math:`L` and :math:`N` is the
# length of any vectorized feature map :math:`F_{XL}^k`. For example, the first line
# of :math:`\hat{F}_{XL}` corresponds to the first vectorized feature map :math:`F_{XL}^1`.
#
# Finally, the gram matrix must be normalized by dividing each element by
# the total number of elements in the matrix. This normalization is to
# counteract the fact that :math:`\hat{F}_{XL}` matrices with a large :math:`N` dimension yield
# larger values in the Gram matrix. These larger values will cause the
# first layers (before pooling layers) to have a larger impact during the
# gradient descent. Style features tend to be in the deeper layers of the
# network so this normalization step is crucial.
#
def gram_matrix(input):
a, b, c, d = input.size() # a=batch size(=1)
# b=number of feature maps
# (c,d)=dimensions of a f. map (N=c*d)
features = input.view(a * b, c * d) # resise F_XL into \hat F_XL
G = torch.mm(features, features.t()) # compute the gram product
# we 'normalize' the values of the gram matrix
# by dividing by the number of element in each feature maps.
return G.div(a * b * c * d)
######################################################################
# Now the style loss module looks almost exactly like the content loss
# module. The style distance is also computed using the mean square
# error between :math:`G_{XL}` and :math:`G_{SL}`.
#
class StyleLoss(nn.Module):
def __init__(self, target_feature):
super(StyleLoss, self).__init__()
self.target = gram_matrix(target_feature).detach()
def forward(self, input):
G = gram_matrix(input)
self.loss = F.mse_loss(G, self.target)
return input
######################################################################
# Importing the Model
# -------------------
#
# Now we need to import a pre-trained neural network. We will use a 19
# layer VGG network like the one used in the paper.
#
# PyTorchs implementation of VGG is a module divided into two child
# ``Sequential`` modules: ``features`` (containing convolution and pooling layers),
# and ``classifier`` (containing fully connected layers). We will use the
# ``features`` module because we need the output of the individual
# convolution layers to measure content and style loss. Some layers have
# different behavior during training than evaluation, so we must set the
# network to evaluation mode using ``.eval()``.
#
cnn = models.vgg19(pretrained=True).features.to(device).eval()
######################################################################
# Additionally, VGG networks are trained on images with each channel
# normalized by mean=[0.485, 0.456, 0.406] and std=[0.229, 0.224, 0.225].
# We will use them to normalize the image before sending it into the network.
#
cnn_normalization_mean = torch.tensor([0.485, 0.456, 0.406]).to(device)
cnn_normalization_std = torch.tensor([0.229, 0.224, 0.225]).to(device)
# create a module to normalize input image so we can easily put it in a
# nn.Sequential
class Normalization(nn.Module):
def __init__(self, mean, std):
super(Normalization, self).__init__()
# .view the mean and std to make them [C x 1 x 1] so that they can
# directly work with image Tensor of shape [B x C x H x W].
# B is batch size. C is number of channels. H is height and W is width.
self.mean = torch.tensor(mean).view(-1, 1, 1)
self.std = torch.tensor(std).view(-1, 1, 1)
def forward(self, img):
# normalize img
return (img - self.mean) / self.std
######################################################################
# A ``Sequential`` module contains an ordered list of child modules. For
# instance, ``vgg19.features`` contains a sequence (Conv2d, ReLU, MaxPool2d,
# Conv2d, ReLU…) aligned in the right order of depth. We need to add our
# content loss and style loss layers immediately after the convolution
# layer they are detecting. To do this we must create a new ``Sequential``
# module that has content loss and style loss modules correctly inserted.
#
# desired depth layers to compute style/content losses :
content_layers_default = ['conv_4']
style_layers_default = ['conv_1', 'conv_2', 'conv_3', 'conv_4', 'conv_5']
def get_style_model_and_losses(cnn, normalization_mean, normalization_std,
style_img, content_img,
content_layers=content_layers_default,
style_layers=style_layers_default):
cnn = copy.deepcopy(cnn)
# normalization module
normalization = Normalization(normalization_mean, normalization_std).to(device)
# just in order to have an iterable access to or list of content/syle
# losses
content_losses = []
style_losses = []
# assuming that cnn is a nn.Sequential, so we make a new nn.Sequential
# to put in modules that are supposed to be activated sequentially
model = nn.Sequential(normalization)
i = 0 # increment every time we see a conv
for layer in cnn.children():
if isinstance(layer, nn.Conv2d):
i += 1
name = 'conv_{}'.format(i)
elif isinstance(layer, nn.ReLU):
name = 'relu_{}'.format(i)
# The in-place version doesn't play very nicely with the ContentLoss
# and StyleLoss we insert below. So we replace with out-of-place
# ones here.
layer = nn.ReLU(inplace=False)
elif isinstance(layer, nn.MaxPool2d):
name = 'pool_{}'.format(i)
elif isinstance(layer, nn.BatchNorm2d):
name = 'bn_{}'.format(i)
else:
raise RuntimeError('Unrecognized layer: {}'.format(layer.__class__.__name__))
model.add_module(name, layer)
if name in content_layers:
# add content loss:
target = model(content_img).detach()
content_loss = ContentLoss(target)
model.add_module("content_loss_{}".format(i), content_loss)
content_losses.append(content_loss)
if name in style_layers:
# add style loss:
target_feature = model(style_img).detach()
style_loss = StyleLoss(target_feature)
model.add_module("style_loss_{}".format(i), style_loss)
style_losses.append(style_loss)
# now we trim off the layers after the last content and style losses
for i in range(len(model) - 1, -1, -1):
if isinstance(model[i], ContentLoss) or isinstance(model[i], StyleLoss):
break
model = model[:(i + 1)]
return model, style_losses, content_losses
######################################################################
# Next, we select the input image. You can use a copy of the content image
# or white noise.
#
input_img = content_img.clone()
# if you want to use white noise instead uncomment the below line:
# input_img = torch.randn(content_img.data.size(), device=device)
# add the original input image to the figure:
plt.figure()
imshow(input_img, title='Input Image')
######################################################################
# Gradient Descent
# ----------------
#
# As Leon Gatys, the author of the algorithm, suggested `here <https://discuss.pytorch.org/t/pytorch-tutorial-for-neural-transfert-of-artistic-style/336/20?u=alexis-jacq>`__, we will use
# L-BFGS algorithm to run our gradient descent. Unlike training a network,
# we want to train the input image in order to minimise the content/style
# losses. We will create a PyTorch L-BFGS optimizer ``optim.LBFGS`` and pass
# our image to it as the tensor to optimize.
#
def get_input_optimizer(input_img):
# this line to show that input is a parameter that requires a gradient
optimizer = optim.LBFGS([input_img.requires_grad_()])
return optimizer
######################################################################
# Finally, we must define a function that performs the neural transfer. For
# each iteration of the networks, it is fed an updated input and computes
# new losses. We will run the ``backward`` methods of each loss module to
# dynamicaly compute their gradients. The optimizer requires a “closure”
# function, which reevaluates the modul and returns the loss.
#
# We still have one final constraint to address. The network may try to
# optimize the input with values that exceed the 0 to 1 tensor range for
# the image. We can address this by correcting the input values to be
# between 0 to 1 each time the network is run.
#
def run_style_transfer(cnn, normalization_mean, normalization_std,
content_img, style_img, input_img, num_steps=300,
style_weight=1000000, content_weight=1):
"""Run the style transfer."""
print('Building the style transfer model..')
model, style_losses, content_losses = get_style_model_and_losses(cnn,
normalization_mean, normalization_std, style_img,
content_img)
optimizer = get_input_optimizer(input_img)
print('Optimizing..')
run = [0]
while run[0] <= num_steps:
def closure():
# correct the values of updated input image
input_img.data.clamp_(0, 1)
optimizer.zero_grad()
model(input_img)
style_score = 0
content_score = 0
for sl in style_losses:
style_score += sl.loss
for cl in content_losses:
content_score += cl.loss
style_score *= style_weight
content_score *= content_weight
loss = style_score + content_score
loss.backward()
run[0] += 1
if run[0] % 50 == 0:
print("run {}:".format(run))
print('Style Loss : {:4f} Content Loss: {:4f}'.format(
style_score.item(), content_score.item()))
print()
return style_score + content_score
optimizer.step(closure)
# a last correction...
input_img.data.clamp_(0, 1)
return input_img
######################################################################
# Finally, we can run the algorithm.
#
output = run_style_transfer(cnn, cnn_normalization_mean, cnn_normalization_std,
content_img, style_img, input_img)
plt.figure()
imshow(output, title='Output Image')
# sphinx_gallery_thumbnail_number = 4
plt.ioff()
plt.show()

124
examples/pytorch_mnist.py Normal file
View File

@@ -0,0 +1,124 @@
# TRAINS - Example of Pytorch mnist training integration
#
from __future__ import print_function
import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from trains import Task
task = Task.init(project_name='examples', task_name='pytorch mnist train')
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(1, 20, 5, 1)
self.conv2 = nn.Conv2d(20, 50, 5, 1)
self.fc1 = nn.Linear(4 * 4 * 50, 500)
self.fc2 = nn.Linear(500, 10)
def forward(self, x):
x = F.relu(self.conv1(x))
x = F.max_pool2d(x, 2, 2)
x = F.relu(self.conv2(x))
x = F.max_pool2d(x, 2, 2)
x = x.view(-1, 4 * 4 * 50)
x = F.relu(self.fc1(x))
x = self.fc2(x)
return F.log_softmax(x, dim=1)
def train(args, model, device, train_loader, optimizer, epoch):
model.train()
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
output = model(data)
loss = F.nll_loss(output, target)
loss.backward()
optimizer.step()
if batch_idx % args.log_interval == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.item()))
def test(args, model, device, test_loader):
model.eval()
test_loss = 0
correct = 0
with torch.no_grad():
for data, target in test_loader:
data, target = data.to(device), target.to(device)
output = model(data)
test_loss += F.nll_loss(output, target, reduction='sum').item() # sum up batch loss
pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability
correct += pred.eq(target.view_as(pred)).sum().item()
test_loss /= len(test_loader.dataset)
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
test_loss, correct, len(test_loader.dataset),
100. * correct / len(test_loader.dataset)))
def main():
# Training settings
parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
parser.add_argument('--batch-size', type=int, default=64, metavar='N',
help='input batch size for training (default: 64)')
parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N',
help='input batch size for testing (default: 1000)')
parser.add_argument('--epochs', type=int, default=10, metavar='N',
help='number of epochs to train (default: 10)')
parser.add_argument('--lr', type=float, default=0.01, metavar='LR',
help='learning rate (default: 0.01)')
parser.add_argument('--momentum', type=float, default=0.5, metavar='M',
help='SGD momentum (default: 0.5)')
parser.add_argument('--no-cuda', action='store_true', default=False,
help='disables CUDA training')
parser.add_argument('--seed', type=int, default=1, metavar='S',
help='random seed (default: 1)')
parser.add_argument('--log-interval', type=int, default=10, metavar='N',
help='how many batches to wait before logging training status')
parser.add_argument('--save-model', action='store_true', default=True,
help='For Saving the current Model')
args = parser.parse_args()
use_cuda = not args.no_cuda and torch.cuda.is_available()
torch.manual_seed(args.seed)
device = torch.device("cuda" if use_cuda else "cpu")
kwargs = {'num_workers': 4, 'pin_memory': True} if use_cuda else {}
train_loader = torch.utils.data.DataLoader(
datasets.MNIST('../data', train=True, download=True,
transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])),
batch_size=args.batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
datasets.MNIST('../data', train=False, transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])),
batch_size=args.test_batch_size, shuffle=True, **kwargs)
model = Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum)
for epoch in range(1, args.epochs + 1):
train(args, model, device, train_loader, optimizer, epoch)
test(args, model, device, test_loader)
if (args.save_model):
torch.save(model.state_dict(), "/tmp/mnist_cnn.pt")
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,126 @@
# TRAINS - Example of pytorch with tensorboard>=v1.14
#
from __future__ import print_function
import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable
from torch.utils.tensorboard import SummaryWriter
from trains import Task
task = Task.init(project_name='examples', task_name='pytroch with tensorboard')
writer = SummaryWriter('runs')
writer.add_text('lstm', 'This is an lstm', 0)
# Training settings
parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
parser.add_argument('--batch-size', type=int, default=64, metavar='N',
help='input batch size for training (default: 64)')
parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N',
help='input batch size for testing (default: 1000)')
parser.add_argument('--epochs', type=int, default=2, metavar='N',
help='number of epochs to train (default: 10)')
parser.add_argument('--lr', type=float, default=0.01, metavar='LR',
help='learning rate (default: 0.01)')
parser.add_argument('--momentum', type=float, default=0.5, metavar='M',
help='SGD momentum (default: 0.5)')
parser.add_argument('--no-cuda', action='store_true', default=False,
help='disables CUDA training')
parser.add_argument('--seed', type=int, default=1, metavar='S',
help='random seed (default: 1)')
parser.add_argument('--log-interval', type=int, default=10, metavar='N',
help='how many batches to wait before logging training status')
args = parser.parse_args()
args.cuda = not args.no_cuda and torch.cuda.is_available()
torch.manual_seed(args.seed)
if args.cuda:
torch.cuda.manual_seed(args.seed)
kwargs = {'num_workers': 4, 'pin_memory': True} if args.cuda else {}
train_loader = torch.utils.data.DataLoader(datasets.MNIST('../data', train=True, download=True,
transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))])),
batch_size=args.batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(datasets.MNIST('../data', train=False,
transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))])),
batch_size=args.batch_size, shuffle=True, **kwargs)
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
self.conv2_drop = nn.Dropout2d()
self.fc1 = nn.Linear(320, 50)
self.fc2 = nn.Linear(50, 10)
def forward(self, x):
x = F.relu(F.max_pool2d(self.conv1(x), 2))
x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
x = x.view(-1, 320)
x = F.relu(self.fc1(x))
x = F.dropout(x, training=self.training)
x = self.fc2(x)
return F.log_softmax(x)
model = Net()
if args.cuda:
model.cuda()
optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum)
def train(epoch):
model.train()
for batch_idx, (data, target) in enumerate(train_loader):
if args.cuda:
data, target = data.cuda(), target.cuda()
data, target = Variable(data), Variable(target)
optimizer.zero_grad()
output = model(data)
loss = F.nll_loss(output, target)
loss.backward()
optimizer.step()
if batch_idx % args.log_interval == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.data.item()))
niter = epoch*len(train_loader)+batch_idx
writer.add_scalar('Train/Loss', loss.data.item(), niter)
def test():
model.eval()
test_loss = 0
correct = 0
for niter, (data, target) in enumerate(test_loader):
if args.cuda:
data, target = data.cuda(), target.cuda()
data, target = Variable(data, volatile=True), Variable(target)
output = model(data)
test_loss += F.nll_loss(output, target, size_average=False).data.item() # sum up batch loss
pred = output.data.max(1)[1] # get the index of the max log-probability
pred = pred.eq(target.data).cpu().sum()
writer.add_scalar('Test/Loss', pred, niter)
correct += pred
test_loss /= len(test_loader.dataset)
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
test_loss, correct, len(test_loader.dataset),
100. * correct / len(test_loader.dataset)))
for epoch in range(1, args.epochs + 1):
train(epoch)
torch.save(model, '/tmp/model{}'.format(epoch))
test()

View File

@@ -0,0 +1,126 @@
# TRAINS - Example of pytorch with tensorboardX
#
from __future__ import print_function
import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable
from tensorboardX import SummaryWriter
from trains import Task
task = Task.init(project_name='examples', task_name='pytroch with tensorboardX')
writer = SummaryWriter('runs')
writer.add_text('lstm', 'This is an lstm', 0)
# Training settings
parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
parser.add_argument('--batch-size', type=int, default=64, metavar='N',
help='input batch size for training (default: 64)')
parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N',
help='input batch size for testing (default: 1000)')
parser.add_argument('--epochs', type=int, default=2, metavar='N',
help='number of epochs to train (default: 10)')
parser.add_argument('--lr', type=float, default=0.01, metavar='LR',
help='learning rate (default: 0.01)')
parser.add_argument('--momentum', type=float, default=0.5, metavar='M',
help='SGD momentum (default: 0.5)')
parser.add_argument('--no-cuda', action='store_true', default=False,
help='disables CUDA training')
parser.add_argument('--seed', type=int, default=1, metavar='S',
help='random seed (default: 1)')
parser.add_argument('--log-interval', type=int, default=10, metavar='N',
help='how many batches to wait before logging training status')
args = parser.parse_args()
args.cuda = not args.no_cuda and torch.cuda.is_available()
torch.manual_seed(args.seed)
if args.cuda:
torch.cuda.manual_seed(args.seed)
kwargs = {'num_workers': 4, 'pin_memory': True} if args.cuda else {}
train_loader = torch.utils.data.DataLoader(datasets.MNIST('../data', train=True, download=True,
transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))])),
batch_size=args.batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(datasets.MNIST('../data', train=False,
transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))])),
batch_size=args.batch_size, shuffle=True, **kwargs)
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
self.conv2_drop = nn.Dropout2d()
self.fc1 = nn.Linear(320, 50)
self.fc2 = nn.Linear(50, 10)
def forward(self, x):
x = F.relu(F.max_pool2d(self.conv1(x), 2))
x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
x = x.view(-1, 320)
x = F.relu(self.fc1(x))
x = F.dropout(x, training=self.training)
x = self.fc2(x)
return F.log_softmax(x)
model = Net()
if args.cuda:
model.cuda()
optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum)
def train(epoch):
model.train()
for batch_idx, (data, target) in enumerate(train_loader):
if args.cuda:
data, target = data.cuda(), target.cuda()
data, target = Variable(data), Variable(target)
optimizer.zero_grad()
output = model(data)
loss = F.nll_loss(output, target)
loss.backward()
optimizer.step()
if batch_idx % args.log_interval == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.data.item()))
niter = epoch*len(train_loader)+batch_idx
writer.add_scalar('Train/Loss', loss.data.item(), niter)
def test():
model.eval()
test_loss = 0
correct = 0
for niter, (data, target) in enumerate(test_loader):
if args.cuda:
data, target = data.cuda(), target.cuda()
data, target = Variable(data, volatile=True), Variable(target)
output = model(data)
test_loss += F.nll_loss(output, target, size_average=False).data.item() # sum up batch loss
pred = output.data.max(1)[1] # get the index of the max log-probability
pred = pred.eq(target.data).cpu().sum()
writer.add_scalar('Test/Loss', pred, niter)
correct += pred
test_loss /= len(test_loader.dataset)
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
test_loss, correct, len(test_loader.dataset),
100. * correct / len(test_loader.dataset)))
for epoch in range(1, args.epochs + 1):
train(epoch)
torch.save(model, '/tmp/model{}'.format(epoch))
test()

Binary file not shown.

After

Width:  |  Height:  |  Size: 40 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 112 KiB

View File

@@ -0,0 +1,237 @@
# TRAINS - Example of new tensorboard pr_curves model
#
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Create sample PR curve summary data.
We have 3 classes: R, G, and B. We generate colors within RGB space from 3
normal distributions (1 at each corner of the color triangle: [255, 0, 0],
[0, 255, 0], and [0, 0, 255]).
The true label of each random color is associated with the normal distribution
that generated it.
Using 3 other normal distributions (over the distance each color is from a
corner of the color triangle - RGB), we then compute the probability that each
color belongs to the class. We use those probabilities to generate PR curves.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os.path
from absl import app
from absl import flags
from six.moves import xrange # pylint: disable=redefined-builtin
import tensorflow as tf
from tensorboard.plugins.pr_curve import summary
from trains import Task
task = Task.init(project_name='examples', task_name='tensorboard pr_curve')
tf.compat.v1.disable_v2_behavior()
FLAGS = flags.FLAGS
flags.DEFINE_string('logdir', '/tmp/pr_curve_demo', 'Directory into which to write TensorBoard data.')
flags.DEFINE_integer('steps', 10,
'Number of steps to generate for each PR curve.')
def start_runs(
logdir,
steps,
run_name,
thresholds,
mask_every_other_prediction=False):
"""Generate a PR curve with precision and recall evenly weighted.
Arguments:
logdir: The directory into which to store all the runs' data.
steps: The number of steps to run for.
run_name: The name of the run.
thresholds: The number of thresholds to use for PR curves.
mask_every_other_prediction: Whether to mask every other prediction by
alternating weights between 0 and 1.
"""
tf.compat.v1.reset_default_graph()
tf.compat.v1.set_random_seed(42)
# Create a normal distribution layer used to generate true color labels.
distribution = tf.compat.v1.distributions.Normal(loc=0., scale=142.)
# Sample the distribution to generate colors. Lets generate different numbers
# of each color. The first dimension is the count of examples.
# The calls to sample() are given fixed random seed values that are "magic"
# in that they correspond to the default seeds for those ops when the PR
# curve test (which depends on this code) was written. We've pinned these
# instead of continuing to use the defaults since the defaults are based on
# node IDs from the sequence of nodes added to the graph, which can silently
# change when this code or any TF op implementations it uses are modified.
# TODO(nickfelt): redo the PR curve test to avoid reliance on random seeds.
# Generate reds.
number_of_reds = 100
true_reds = tf.clip_by_value(
tf.concat([
255 - tf.abs(distribution.sample([number_of_reds, 1], seed=11)),
tf.abs(distribution.sample([number_of_reds, 2], seed=34))
], axis=1),
0, 255)
# Generate greens.
number_of_greens = 200
true_greens = tf.clip_by_value(
tf.concat([
tf.abs(distribution.sample([number_of_greens, 1], seed=61)),
255 - tf.abs(distribution.sample([number_of_greens, 1], seed=82)),
tf.abs(distribution.sample([number_of_greens, 1], seed=105))
], axis=1),
0, 255)
# Generate blues.
number_of_blues = 150
true_blues = tf.clip_by_value(
tf.concat([
tf.abs(distribution.sample([number_of_blues, 2], seed=132)),
255 - tf.abs(distribution.sample([number_of_blues, 1], seed=153))
], axis=1),
0, 255)
# Assign each color a vector of 3 booleans based on its true label.
labels = tf.concat([
tf.tile(tf.constant([[True, False, False]]), (number_of_reds, 1)),
tf.tile(tf.constant([[False, True, False]]), (number_of_greens, 1)),
tf.tile(tf.constant([[False, False, True]]), (number_of_blues, 1)),
], axis=0)
# We introduce 3 normal distributions. They are used to predict whether a
# color falls under a certain class (based on distances from corners of the
# color triangle). The distributions vary per color. We have the distributions
# narrow over time.
initial_standard_deviations = [v + FLAGS.steps for v in (158, 200, 242)]
iteration = tf.compat.v1.placeholder(tf.int32, shape=[])
red_predictor = tf.compat.v1.distributions.Normal(
loc=0.,
scale=tf.cast(
initial_standard_deviations[0] - iteration,
dtype=tf.float32))
green_predictor = tf.compat.v1.distributions.Normal(
loc=0.,
scale=tf.cast(
initial_standard_deviations[1] - iteration,
dtype=tf.float32))
blue_predictor = tf.compat.v1.distributions.Normal(
loc=0.,
scale=tf.cast(
initial_standard_deviations[2] - iteration,
dtype=tf.float32))
# Make predictions (assign 3 probabilities to each color based on each color's
# distance to each of the 3 corners). We seek double the area in the right
# tail of the normal distribution.
examples = tf.concat([true_reds, true_greens, true_blues], axis=0)
probabilities_colors_are_red = (1 - red_predictor.cdf(
tf.norm(tensor=examples - tf.constant([255., 0, 0]), axis=1))) * 2
probabilities_colors_are_green = (1 - green_predictor.cdf(
tf.norm(tensor=examples - tf.constant([0, 255., 0]), axis=1))) * 2
probabilities_colors_are_blue = (1 - blue_predictor.cdf(
tf.norm(tensor=examples - tf.constant([0, 0, 255.]), axis=1))) * 2
predictions = (
probabilities_colors_are_red,
probabilities_colors_are_green,
probabilities_colors_are_blue
)
# This is the crucial piece. We write data required for generating PR curves.
# We create 1 summary per class because we create 1 PR curve per class.
for i, color in enumerate(('red', 'green', 'blue')):
description = ('The probabilities used to create this PR curve are '
'generated from a normal distribution. Its standard '
'deviation is initially %0.0f and decreases over time.' %
initial_standard_deviations[i])
weights = None
if mask_every_other_prediction:
# Assign a weight of 0 to every even-indexed prediction. Odd-indexed
# predictions are assigned a default weight of 1.
consecutive_indices = tf.reshape(
tf.range(tf.size(input=predictions[i])), tf.shape(input=predictions[i]))
weights = tf.cast(consecutive_indices % 2, dtype=tf.float32)
summary.op(
name=color,
labels=labels[:, i],
predictions=predictions[i],
num_thresholds=thresholds,
weights=weights,
display_name='classifying %s' % color,
description=description)
merged_summary_op = tf.compat.v1.summary.merge_all()
events_directory = os.path.join(logdir, run_name)
sess = tf.compat.v1.Session()
writer = tf.compat.v1.summary.FileWriter(events_directory, sess.graph)
for step in xrange(steps):
feed_dict = {
iteration: step,
}
merged_summary = sess.run(merged_summary_op, feed_dict=feed_dict)
writer.add_summary(merged_summary, step)
writer.close()
def run_all(logdir, steps, thresholds, verbose=False):
"""Generate PR curve summaries.
Arguments:
logdir: The directory into which to store all the runs' data.
steps: The number of steps to run for.
verbose: Whether to print the names of runs into stdout during execution.
thresholds: The number of thresholds to use for PR curves.
"""
# First, we generate data for a PR curve that assigns even weights for
# predictions of all classes.
run_name = 'colors'
if verbose:
print('--- Running: %s' % run_name)
start_runs(
logdir=logdir,
steps=steps,
run_name=run_name,
thresholds=thresholds)
# Next, we generate data for a PR curve that assigns arbitrary weights to
# predictions.
run_name = 'mask_every_other_prediction'
if verbose:
print('--- Running: %s' % run_name)
start_runs(
logdir=logdir,
steps=steps,
run_name=run_name,
thresholds=thresholds,
mask_every_other_prediction=True)
def main(_):
print('Saving output to %s.' % FLAGS.logdir)
run_all(FLAGS.logdir, FLAGS.steps, 50, verbose=True)
print('Done. Output saved to %s.' % FLAGS.logdir)
if __name__ == '__main__':
app.run(main)

View File

@@ -0,0 +1,76 @@
# TRAINS - Example of tensorboard with tensorflow (without any actual training)
#
import tensorflow as tf
import numpy as np
import cv2
from time import sleep
#import tensorflow.compat.v1 as tf
#tf.disable_v2_behavior()
from trains import Task
task = Task.init(project_name='examples', task_name='tensorboard toy example')
k = tf.placeholder(tf.float32)
# Make a normal distribution, with a shifting mean
mean_moving_normal = tf.random_normal(shape=[1000], mean=(5*k), stddev=1)
# Record that distribution into a histogram summary
tf.summary.histogram("normal/moving_mean", mean_moving_normal)
tf.summary.scalar("normal/value", mean_moving_normal[-1])
# Make a normal distribution with shrinking variance
variance_shrinking_normal = tf.random_normal(shape=[1000], mean=0, stddev=1-(k))
# Record that distribution too
tf.summary.histogram("normal/shrinking_variance", variance_shrinking_normal)
tf.summary.scalar("normal/variance_shrinking_normal", variance_shrinking_normal[-1])
# Let's combine both of those distributions into one dataset
normal_combined = tf.concat([mean_moving_normal, variance_shrinking_normal], 0)
# We add another histogram summary to record the combined distribution
tf.summary.histogram("normal/bimodal", normal_combined)
tf.summary.scalar("normal/normal_combined", normal_combined[0])
# Add a gamma distribution
gamma = tf.random_gamma(shape=[1000], alpha=k)
tf.summary.histogram("gamma", gamma)
# And a poisson distribution
poisson = tf.random_poisson(shape=[1000], lam=k)
tf.summary.histogram("poisson", poisson)
# And a uniform distribution
uniform = tf.random_uniform(shape=[1000], maxval=k*10)
tf.summary.histogram("uniform", uniform)
# Finally, combine everything together!
all_distributions = [mean_moving_normal, variance_shrinking_normal, gamma, poisson, uniform]
all_combined = tf.concat(all_distributions, 0)
tf.summary.histogram("all_combined", all_combined)
# convert to 4d [batch, col, row, RGB-channels]
image = cv2.imread('./samples/picasso.jpg')
image = image[:, :, 0][np.newaxis, :, :, np.newaxis]
# image = image[np.newaxis, :, :, :] # test greyscale image
# un-comment to add image reporting
tf.summary.image("test", image, max_outputs=10)
# Setup a session and summary writer
summaries = tf.summary.merge_all()
sess = tf.Session()
logger = task.get_logger()
# Use original FileWriter for comparison , run:
# % tensorboard --logdir=/tmp/histogram_example
writer = tf.summary.FileWriter("/tmp/histogram_example")
# Setup a loop and write the summaries to disk
N = 40
for step in range(N):
k_val = step/float(N)
summ = sess.run(summaries, feed_dict={k: k_val})
writer.add_summary(summ, global_step=step)
print('Done!')

View File

@@ -0,0 +1,358 @@
# TRAINS - Example of tensorflow eager mode, model logging and tensorboard
#
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""A deep MNIST classifier using convolutional layers.
Sample usage:
python mnist.py --help
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import argparse
import os
import sys
import time
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
from trains import Task
tf.enable_eager_execution()
task = Task.init(project_name='examples', task_name='Tensorflow eager mode')
FLAGS = tf.app.flags.FLAGS
tf.app.flags.DEFINE_integer('data_num', 100, """Flag of type integer""")
tf.app.flags.DEFINE_string('img_path', './img', """Flag of type string""")
layers = tf.keras.layers
FLAGS = None
class Discriminator(tf.keras.Model):
"""GAN Discriminator.
A network to differentiate between generated and real handwritten digits.
"""
def __init__(self, data_format):
"""Creates a model for discriminating between real and generated digits.
Args:
data_format: Either 'channels_first' or 'channels_last'.
'channels_first' is typically faster on GPUs while 'channels_last' is
typically faster on CPUs. See
https://www.tensorflow.org/performance/performance_guide#data_formats
"""
super(Discriminator, self).__init__(name='')
if data_format == 'channels_first':
self._input_shape = [-1, 1, 28, 28]
else:
assert data_format == 'channels_last'
self._input_shape = [-1, 28, 28, 1]
self.conv1 = layers.Conv2D(
64, 5, padding='SAME', data_format=data_format, activation=tf.tanh)
self.pool1 = layers.AveragePooling2D(2, 2, data_format=data_format)
self.conv2 = layers.Conv2D(
128, 5, data_format=data_format, activation=tf.tanh)
self.pool2 = layers.AveragePooling2D(2, 2, data_format=data_format)
self.flatten = layers.Flatten()
self.fc1 = layers.Dense(1024, activation=tf.tanh)
self.fc2 = layers.Dense(1, activation=None)
def call(self, inputs):
"""Return two logits per image estimating input authenticity.
Users should invoke __call__ to run the network, which delegates to this
method (and not call this method directly).
Args:
inputs: A batch of images as a Tensor with shape [batch_size, 28, 28, 1]
or [batch_size, 1, 28, 28]
Returns:
A Tensor with shape [batch_size] containing logits estimating
the probability that corresponding digit is real.
"""
x = tf.reshape(inputs, self._input_shape)
x = self.conv1(x)
x = self.pool1(x)
x = self.conv2(x)
x = self.pool2(x)
x = self.flatten(x)
x = self.fc1(x)
x = self.fc2(x)
return x
class Generator(tf.keras.Model):
"""Generator of handwritten digits similar to the ones in the MNIST dataset.
"""
def __init__(self, data_format):
"""Creates a model for discriminating between real and generated digits.
Args:
data_format: Either 'channels_first' or 'channels_last'.
'channels_first' is typically faster on GPUs while 'channels_last' is
typically faster on CPUs. See
https://www.tensorflow.org/performance/performance_guide#data_formats
"""
super(Generator, self).__init__(name='')
self.data_format = data_format
# We are using 128 6x6 channels as input to the first deconvolution layer
if data_format == 'channels_first':
self._pre_conv_shape = [-1, 128, 6, 6]
else:
assert data_format == 'channels_last'
self._pre_conv_shape = [-1, 6, 6, 128]
self.fc1 = layers.Dense(6 * 6 * 128, activation=tf.tanh)
# In call(), we reshape the output of fc1 to _pre_conv_shape
# Deconvolution layer. Resulting image shape: (batch, 14, 14, 64)
self.conv1 = layers.Conv2DTranspose(
64, 4, strides=2, activation=None, data_format=data_format)
# Deconvolution layer. Resulting image shape: (batch, 28, 28, 1)
self.conv2 = layers.Conv2DTranspose(
1, 2, strides=2, activation=tf.nn.sigmoid, data_format=data_format)
def call(self, inputs):
"""Return a batch of generated images.
Users should invoke __call__ to run the network, which delegates to this
method (and not call this method directly).
Args:
inputs: A batch of noise vectors as a Tensor with shape
[batch_size, length of noise vectors].
Returns:
A Tensor containing generated images. If data_format is 'channels_last',
the shape of returned images is [batch_size, 28, 28, 1], else
[batch_size, 1, 28, 28]
"""
x = self.fc1(inputs)
x = tf.reshape(x, shape=self._pre_conv_shape)
x = self.conv1(x)
x = self.conv2(x)
return x
def discriminator_loss(discriminator_real_outputs, discriminator_gen_outputs):
"""Original discriminator loss for GANs, with label smoothing.
See `Generative Adversarial Nets` (https://arxiv.org/abs/1406.2661) for more
details.
Args:
discriminator_real_outputs: Discriminator output on real data.
discriminator_gen_outputs: Discriminator output on generated data. Expected
to be in the range of (-inf, inf).
Returns:
A scalar loss Tensor.
"""
loss_on_real = tf.losses.sigmoid_cross_entropy(
tf.ones_like(discriminator_real_outputs),
discriminator_real_outputs,
label_smoothing=0.25)
loss_on_generated = tf.losses.sigmoid_cross_entropy(
tf.zeros_like(discriminator_gen_outputs), discriminator_gen_outputs)
loss = loss_on_real + loss_on_generated
tf.contrib.summary.scalar('discriminator_loss', loss)
return loss
def generator_loss(discriminator_gen_outputs):
"""Original generator loss for GANs.
L = -log(sigmoid(D(G(z))))
See `Generative Adversarial Nets` (https://arxiv.org/abs/1406.2661)
for more details.
Args:
discriminator_gen_outputs: Discriminator output on generated data. Expected
to be in the range of (-inf, inf).
Returns:
A scalar loss Tensor.
"""
loss = tf.losses.sigmoid_cross_entropy(
tf.ones_like(discriminator_gen_outputs), discriminator_gen_outputs)
tf.contrib.summary.scalar('generator_loss', loss)
return loss
def train_one_epoch(generator, discriminator, generator_optimizer,
discriminator_optimizer, dataset, step_counter,
log_interval, noise_dim):
"""Train `generator` and `discriminator` models on `dataset`.
Args:
generator: Generator model.
discriminator: Discriminator model.
generator_optimizer: Optimizer to use for generator.
discriminator_optimizer: Optimizer to use for discriminator.
dataset: Dataset of images to train on.
step_counter: An integer variable, used to write summaries regularly.
log_interval: How many steps to wait between logging and collecting
summaries.
noise_dim: Dimension of noise vector to use.
"""
total_generator_loss = 0.0
total_discriminator_loss = 0.0
for (batch_index, images) in enumerate(dataset):
with tf.device('/cpu:0'):
tf.assign_add(step_counter, 1)
with tf.contrib.summary.record_summaries_every_n_global_steps(
log_interval, global_step=step_counter):
current_batch_size = images.shape[0]
noise = tf.random_uniform(
shape=[current_batch_size, noise_dim],
minval=-1.,
maxval=1.,
seed=batch_index)
# we can use 2 tapes or a single persistent tape.
# Using two tapes is memory efficient since intermediate tensors can be
# released between the two .gradient() calls below
with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape:
generated_images = generator(noise)
tf.contrib.summary.image(
'generated_images',
tf.reshape(generated_images, [-1, 28, 28, 1]),
max_images=10)
discriminator_gen_outputs = discriminator(generated_images)
discriminator_real_outputs = discriminator(images)
discriminator_loss_val = discriminator_loss(discriminator_real_outputs,
discriminator_gen_outputs)
total_discriminator_loss += discriminator_loss_val
generator_loss_val = generator_loss(discriminator_gen_outputs)
total_generator_loss += generator_loss_val
generator_grad = gen_tape.gradient(generator_loss_val,
generator.variables)
discriminator_grad = disc_tape.gradient(discriminator_loss_val,
discriminator.variables)
generator_optimizer.apply_gradients(
zip(generator_grad, generator.variables))
discriminator_optimizer.apply_gradients(
zip(discriminator_grad, discriminator.variables))
if log_interval and batch_index > 0 and batch_index % log_interval == 0:
print('Batch #%d\tAverage Generator Loss: %.6f\t'
'Average Discriminator Loss: %.6f' %
(batch_index, total_generator_loss / batch_index,
total_discriminator_loss / batch_index))
def main(_):
(device, data_format) = ('/gpu:0', 'channels_first')
if FLAGS.no_gpu or tf.contrib.eager.num_gpus() <= 0:
(device, data_format) = ('/cpu:0', 'channels_last')
print('Using device %s, and data format %s.' % (device, data_format))
# Load the datasets
data = input_data.read_data_sets(FLAGS.data_dir)
dataset = (
tf.data.Dataset.from_tensor_slices(data.train.images[:1280]).shuffle(60000)
.batch(FLAGS.batch_size))
# Create the models and optimizers.
model_objects = {
'generator': Generator(data_format),
'discriminator': Discriminator(data_format),
'generator_optimizer': tf.train.AdamOptimizer(FLAGS.lr),
'discriminator_optimizer': tf.train.AdamOptimizer(FLAGS.lr),
'step_counter': tf.train.get_or_create_global_step(),
}
# Prepare summary writer and checkpoint info
summary_writer = tf.contrib.summary.create_file_writer(
FLAGS.output_dir, flush_millis=1000)
checkpoint_prefix = os.path.join(FLAGS.checkpoint_dir, 'ckpt')
latest_cpkt = tf.train.latest_checkpoint(FLAGS.checkpoint_dir)
if latest_cpkt:
print('Using latest checkpoint at ' + latest_cpkt)
checkpoint = tf.train.Checkpoint(**model_objects)
# Restore variables on creation if a checkpoint exists.
checkpoint.restore(latest_cpkt)
with tf.device(device):
for _ in range(3):
start = time.time()
with summary_writer.as_default():
train_one_epoch(dataset=dataset, log_interval=FLAGS.log_interval,
noise_dim=FLAGS.noise, **model_objects)
end = time.time()
checkpoint.save(checkpoint_prefix)
print('\nTrain time for epoch #%d (step %d): %f' %
(checkpoint.save_counter.numpy(),
checkpoint.step_counter.numpy(),
end - start))
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument(
'--data-dir',
type=str,
default='/tmp/tensorflow/mnist/input_data',
help=('Directory for storing input data (default '
'/tmp/tensorflow/mnist/input_data)'))
parser.add_argument(
'--batch-size',
type=int,
default=16,
metavar='N',
help='input batch size for training (default: 128)')
parser.add_argument(
'--log-interval',
type=int,
default=1,
metavar='N',
help=('number of batches between logging and writing summaries '
'(default: 100)'))
parser.add_argument(
'--output_dir',
type=str,
default='/tmp/tensorflow/',
metavar='DIR',
help='Directory to write TensorBoard summaries (defaults to none)')
parser.add_argument(
'--checkpoint_dir',
type=str,
default='/tmp/tensorflow/mnist/checkpoints/',
metavar='DIR',
help=('Directory to save checkpoints in (once per epoch) (default '
'/tmp/tensorflow/mnist/checkpoints/)'))
parser.add_argument(
'--lr',
type=float,
default=0.001,
metavar='LR',
help='learning rate (default: 0.001)')
parser.add_argument(
'--noise',
type=int,
default=100,
metavar='N',
help='Length of noise vector for generator input (default: 100)')
parser.add_argument(
'--no-gpu',
action='store_true',
default=False,
help='disables GPU usage even if a GPU is available')
FLAGS, unparsed = parser.parse_known_args()
tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)

View File

@@ -0,0 +1,171 @@
# TRAINS - Example of tensorflow mnist training model logging
#
# Save and Restore a model using TensorFlow.
# This example is using the MNIST database of handwritten digits
# (http://yann.lecun.com/exdb/mnist/)
#
# Author: Aymeric Damien
# Project: https://github.com/aymericdamien/TensorFlow-Examples/
from __future__ import print_function
from os.path import exists
import numpy as np
import tensorflow as tf
from trains import Task
MODEL_PATH = "/tmp/module_no_signatures"
task = Task.init(project_name='examples', task_name='Tensorflow mnist example')
## block
X_train = np.random.rand(100, 3)
y_train = np.random.rand(100, 1)
model = tf.keras.models.Sequential([tf.keras.layers.Dense(1)])
model.compile(loss='categorical_crossentropy',
optimizer=tf.keras.optimizers.SGD(),
metrics=['accuracy'])
model.fit(X_train, y_train, steps_per_epoch=1, nb_epoch=1)
with tf.Session(graph=tf.Graph()) as sess:
if exists(MODEL_PATH):
try:
tf.saved_model.loader.load(sess, [tf.saved_model.tag_constants.SERVING], MODEL_PATH)
m2 = tf.saved_model.load(sess, [tf.saved_model.tag_constants.SERVING], MODEL_PATH)
except Exception:
pass
tf.train.Checkpoint
## block end
# Import MNIST data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
# Parameters
parameters = {
'learning_rate': 0.001,
'batch_size': 100,
'display_step': 1,
'model_path': "/tmp/model.ckpt",
# Network Parameters
'n_hidden_1': 256, # 1st layer number of features
'n_hidden_2': 256, # 2nd layer number of features
'n_input': 784, # MNIST data input (img shape: 28*28)
'n_classes': 10, # MNIST total classes (0-9 digits)
}
# TRAINS: connect parameters with the experiment/task for logging
parameters = task.connect(parameters)
# tf Graph input
x = tf.placeholder("float", [None, parameters['n_input']])
y = tf.placeholder("float", [None, parameters['n_classes']])
# Create model
def multilayer_perceptron(x, weights, biases):
# Hidden layer with RELU activation
layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
layer_1 = tf.nn.relu(layer_1)
# Hidden layer with RELU activation
layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
layer_2 = tf.nn.relu(layer_2)
# Output layer with linear activation
out_layer = tf.matmul(layer_2, weights['out']) + biases['out']
return out_layer
# Store layers weight & bias
weights = {
'h1': tf.Variable(tf.random_normal([parameters['n_input'], parameters['n_hidden_1']])),
'h2': tf.Variable(tf.random_normal([parameters['n_hidden_1'], parameters['n_hidden_2']])),
'out': tf.Variable(tf.random_normal([parameters['n_hidden_2'], parameters['n_classes']]))
}
biases = {
'b1': tf.Variable(tf.random_normal([parameters['n_hidden_1']])),
'b2': tf.Variable(tf.random_normal([parameters['n_hidden_2']])),
'out': tf.Variable(tf.random_normal([parameters['n_classes']]))
}
# Construct model
pred = multilayer_perceptron(x, weights, biases)
# Define loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
optimizer = tf.train.AdamOptimizer(learning_rate=parameters['learning_rate']).minimize(cost)
# Initialize the variables (i.e. assign their default value)
init = tf.global_variables_initializer()
# 'Saver' op to save and restore all the variables
saver = tf.train.Saver()
# Running first session
print("Starting 1st session...")
with tf.Session() as sess:
# Run the initializer
sess.run(init)
# Training cycle
for epoch in range(3):
avg_cost = 0.
total_batch = int(mnist.train.num_examples/parameters['batch_size'])
# Loop over all batches
for i in range(total_batch):
batch_x, batch_y = mnist.train.next_batch(parameters['batch_size'])
# Run optimization op (backprop) and cost op (to get loss value)
_, c = sess.run([optimizer, cost], feed_dict={x: batch_x,
y: batch_y})
# Compute average loss
avg_cost += c / total_batch
# Display logs per epoch step
if epoch % parameters['display_step'] == 0:
print("Epoch:", '%04d' % (epoch+1), "cost=", \
"{:.9f}".format(avg_cost))
save_path = saver.save(sess, parameters['model_path'])
print("First Optimization Finished!")
# Test model
correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
# Calculate accuracy
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
print("Accuracy:", accuracy.eval({x: mnist.test.images, y: mnist.test.labels}))
# Save model weights to disk
save_path = saver.save(sess, parameters['model_path'])
print("Model saved in file: %s" % save_path)
# Running a new session
print("Starting 2nd session...")
with tf.Session() as sess:
# Initialize variables
sess.run(init)
# Restore model weights from previously saved model
saver.restore(sess, parameters['model_path'])
print("Model restored from file: %s" % save_path)
# Resume training
for epoch in range(7):
avg_cost = 0.
total_batch = int(mnist.train.num_examples / parameters['batch_size'])
# Loop over all batches
for i in range(total_batch):
batch_x, batch_y = mnist.train.next_batch(parameters['batch_size'])
# Run optimization op (backprop) and cost op (to get loss value)
_, c = sess.run([optimizer, cost], feed_dict={x: batch_x,
y: batch_y})
# Compute average loss
avg_cost += c / total_batch
# Display logs per epoch step
if epoch % parameters['display_step'] == 0:
print("Epoch:", '%04d' % (epoch + 1), "cost=", "{:.9f}".format(avg_cost))
print("Second Optimization Finished!")
# Test model
correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
# Calculate accuracy
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
print("Accuracy:", accuracy.eval(
{x: mnist.test.images, y: mnist.test.labels}))

131
examples/trains.conf Normal file
View File

@@ -0,0 +1,131 @@
# TRAINS SDK configuration file
api {
host: http://localhost:8008
credentials {"access_key": "EGRTCO8JMSIGI6S39GTP43NFWXDQOW", "secret_key": "x!XTov_G-#vspE*Y(h$Anm&DIc5Ou-F)jsl$PdOyj5wG1&E!Z8"}
}
sdk {
# TRAINS - default SDK configuration
storage {
cache {
# Defaults to system temp folder / cache
default_base_dir: "~/.trains/cache"
}
}
metrics {
# History size for debug files per metric/variant. For each metric/variant combination with an attached file
# (e.g. debug image event), file names for the uploaded files will be recycled in such a way that no more than
# X files are stored in the upload destination for each metric/variant combination.
file_history_size: 100
# Settings for generated debug images
images {
format: JPEG
quality: 87
subsampling: 0
}
}
network {
metrics {
# Number of threads allocated to uploading files (typically debug images) when transmitting metrics for
# a specific iteration
file_upload_threads: 4
# Warn about upload starvation if no uploads were made in specified period while file-bearing events keep
# being sent for upload
file_upload_starvation_warning_sec: 120
}
iteration {
# Max number of retries when getting frames if the server returned an error (http code 500)
max_retries_on_server_error: 5
# Backoff factory for consecutive retry attempts.
# SDK will wait for {backoff factor} * (2 ^ ({number of total retries} - 1)) between retries.
retry_backoff_factor_sec: 10
}
}
aws {
s3 {
# S3 credentials, used for read/write access by various SDK elements
# default, used for any bucket not specified below
key: ""
secret: ""
region: ""
credentials: [
# specifies key/secret credentials to use when handling s3 urls (read or write)
# {
# bucket: "my-bucket-name"
# key: "my-access-key"
# secret: "my-secret-key"
# },
# {
# # This will apply to all buckets in this host (unless key/value is specifically provided for a given bucket)
# host: "my-minio-host:9000"
# key: "12345678"
# secret: "12345678"
# multipart: false
# secure: false
# }
]
}
boto3 {
pool_connections: 512
max_multipart_concurrency: 16
}
}
google.storage {
# # Default project and credentials file
# # Will be used when no bucket configuration is found
# project: "trains"
# credentials_json: "/path/to/credentials.json"
# # Specific credentials per bucket and sub directory
# credentials = [
# {
# bucket: "my-bucket"
# subdir: "path/in/bucket" # Not required
# project: "trains"
# credentials_json: "/path/to/credentials.json"
# },
# ]
}
log {
# debugging feature: set this to true to make null log propagate messages to root logger (so they appear in stdout)
null_log_propagate: False
task_log_buffer_capacity: 66
# disable urllib info and lower levels
disable_urllib3_info: True
}
development {
# Development-mode options
# dev task reuse window
task_reuse_time_window_in_hours: 72.0
# Run VCS repository detection asynchronously
vcs_repo_detect_async: False
# Store uncommitted git/hg source code diff in experiment manifest when training in development mode
# This stores "git diff" or "hg diff" into the experiment's "script.requirements.diff" section
store_uncommitted_code_diff_on_train: True
# Support stopping an experiment in case it was externally stopped, status was changed or task was reset
support_stopping: True
# Development mode worker
worker {
# Status report period in seconds
report_period_sec: 2
# Log all stdout & stderr
log_stdout: True
}
}
}