diff --git a/examples/frameworks/pytorch/notebooks/audio/audio_classifier_UrbanSound8K.ipynb b/examples/frameworks/pytorch/notebooks/audio/audio_classification_UrbanSound8K.ipynb similarity index 97% rename from examples/frameworks/pytorch/notebooks/audio/audio_classifier_UrbanSound8K.ipynb rename to examples/frameworks/pytorch/notebooks/audio/audio_classification_UrbanSound8K.ipynb index a11be7a0..782d2d72 100644 --- a/examples/frameworks/pytorch/notebooks/audio/audio_classifier_UrbanSound8K.ipynb +++ b/examples/frameworks/pytorch/notebooks/audio/audio_classification_UrbanSound8K.ipynb @@ -15,7 +15,7 @@ "! pip install -U torchaudio==0.5.0\n", "! pip install -U torchvision==0.6.0\n", "! pip install -U matplotlib==3.2.1\n", - "! pip install -U trains==0.15.0\n", + "! pip install -U trains>=0.15.0\n", "! pip install -U pandas==1.0.4\n", "! pip install -U numpy==1.18.4\n", "! pip install -U tensorboard==2.2.1" @@ -283,10 +283,10 @@ " tensorboard_writer.add_scalar('learning rate/lr', optimizer.param_groups[0]['lr'], iteration)\n", " \n", " \n", - " if batch_idx % debug_interval == 0: # report debug image every 500 mini-batches\n", + " if batch_idx % debug_interval == 0: # report debug image every \"debug_interval\" mini-batches\n", " for n, (inp, pred, label) in enumerate(zip(inputs, predicted, labels)):\n", " series = 'label_{}_pred_{}'.format(classes[label.cpu()], classes[pred.cpu()])\n", - " tensorboard_writer.add_image('Train MelSpectrogram samples/{}'.format(n), \n", + " tensorboard_writer.add_image('Train MelSpectrogram samples/{}_{}_{}'.format(batch_idx, n, series), \n", " plot_signal(inp.cpu().numpy().squeeze(), series, 'hot'), iteration)" ] }, @@ -319,12 +319,12 @@ " class_total[label] += 1\n", " \n", " iteration = (epoch + 1) * len(train_loader)\n", - " if idx % debug_interval == 0: # report debug image every 100 mini-batches\n", + " if idx % debug_interval == 0: # report debug image every \"debug_interval\" mini-batches\n", " for n, (sound, inp, pred, label) in enumerate(zip(sounds, inputs, predicted, labels)):\n", " series = 'label_{}_pred_{}'.format(classes[label.cpu()], classes[pred.cpu()])\n", - " tensorboard_writer.add_audio('Test audio samples/{}'.format(n), \n", + " tensorboard_writer.add_audio('Test audio samples/{}_{}_{}'.format(idx, n, series), \n", " sound, iteration, int(sample_rate[n]))\n", - " tensorboard_writer.add_image('Test MelSpectrogram samples/{}_{}'.format(idx, n), \n", + " tensorboard_writer.add_image('Test MelSpectrogram samples/{}_{}_{}'.format(idx, n, series), \n", " plot_signal(inp.cpu().numpy().squeeze(), series, 'hot'), iteration)\n", "\n", " total_accuracy = 100 * sum(class_correct)/sum(class_total)\n", @@ -377,4 +377,4 @@ }, "nbformat": 4, "nbformat_minor": 1 -} \ No newline at end of file +} diff --git a/examples/frameworks/pytorch/notebooks/audio/audio_preprocessing_example.ipynb b/examples/frameworks/pytorch/notebooks/audio/audio_preprocessing_example.ipynb index 123954ae..6baae9fe 100644 --- a/examples/frameworks/pytorch/notebooks/audio/audio_preprocessing_example.ipynb +++ b/examples/frameworks/pytorch/notebooks/audio/audio_preprocessing_example.ipynb @@ -12,7 +12,7 @@ "! pip install -U torch==1.5.0\n", "! pip install -U torchaudio==0.5.0\n", "! pip install -U matplotlib==3.2.1\n", - "! pip install -U trains==0.15.0\n", + "! pip install -U trains>=0.15.0\n", "! pip install -U tensorboard==2.2.1" ] }, @@ -87,10 +87,10 @@ "cell_type": "code", "execution_count": null, "metadata": { - "scrolled": true, "pycharm": { "name": "#%%\n" - } + }, + "scrolled": true }, "outputs": [], "source": [ @@ -125,4 +125,4 @@ }, "nbformat": 4, "nbformat_minor": 1 -} \ No newline at end of file +} diff --git a/examples/frameworks/pytorch/notebooks/image/hyperparameter_search.ipynb b/examples/frameworks/pytorch/notebooks/image/hyperparameter_search.ipynb index 8d529909..8baf115a 100644 --- a/examples/frameworks/pytorch/notebooks/image/hyperparameter_search.ipynb +++ b/examples/frameworks/pytorch/notebooks/image/hyperparameter_search.ipynb @@ -133,4 +133,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} \ No newline at end of file +} diff --git a/examples/frameworks/pytorch/notebooks/image/image_classification_CIFAR10.ipynb b/examples/frameworks/pytorch/notebooks/image/image_classification_CIFAR10.ipynb index 736b4f69..c4c41703 100644 --- a/examples/frameworks/pytorch/notebooks/image/image_classification_CIFAR10.ipynb +++ b/examples/frameworks/pytorch/notebooks/image/image_classification_CIFAR10.ipynb @@ -45,7 +45,7 @@ "metadata": {}, "outputs": [], "source": [ - "task = Task.init(project_name='Hyper-Parameter Search', task_name='image_classification_CIFAR10')\n", + "task = Task.init(project_name='Image Example', task_name='image_classification_CIFAR10')\n", "configuration_dict = {'number_of_epochs': 3, 'batch_size': 4, 'dropout': 0.25, 'base_lr': 0.001}\n", "configuration_dict = task.connect(configuration_dict) # enabling configuration override by trains\n", "print(configuration_dict) # printing actual configuration (after override in remote mode)" diff --git a/examples/frameworks/pytorch/notebooks/text/text_classification_AG_NEWS.ipynb b/examples/frameworks/pytorch/notebooks/text/text_classification_AG_NEWS.ipynb new file mode 100644 index 00000000..1990f94d --- /dev/null +++ b/examples/frameworks/pytorch/notebooks/text/text_classification_AG_NEWS.ipynb @@ -0,0 +1,317 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "! pip install -U pip\n", + "! pip install -U torch==1.5.0\n", + "! pip install -U torchtext==0.6.0\n", + "! pip install -U matplotlib==3.2.1\n", + "! pip install -U trains>=0.15.0\n", + "! pip install -U tensorboard==2.2.1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import time\n", + "\n", + "import torch\n", + "import torch.nn as nn\n", + "import torch.nn.functional as F\n", + "import torchtext\n", + "from torchtext.datasets import text_classification\n", + "from torch.utils.data import DataLoader\n", + "from torch.utils.tensorboard import SummaryWriter\n", + "\n", + "from trains import Task\n", + "\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "task = Task.init(project_name='Text Example', task_name='text classifier')\n", + "configuration_dict = {'number_of_epochs': 6, 'batch_size': 16, 'ngrams': 2, 'base_lr': 1.0}\n", + "configuration_dict = task.connect(configuration_dict) # enabling configuration override by trains\n", + "print(configuration_dict) # printing actual configuration (after override in remote mode)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "if not os.path.isdir('./data'):\n", + " os.mkdir('./data')\n", + "train_dataset, test_dataset = text_classification.DATASETS['AG_NEWS'](root='./data', \n", + " ngrams=configuration_dict.get('ngrams', 2))\n", + "vocabulary = train_dataset.get_vocab()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def generate_batch(batch):\n", + " label = torch.tensor([entry[0] for entry in batch])\n", + " # original data batch input are packed into a list and concatenated as a single tensor\n", + " text = [entry[1] for entry in batch]\n", + " # offsets is a tensor of delimiters to represent the beginning index of each sequence in the text tensor.\n", + " offsets = [0] + [len(entry) for entry in text] \n", + " \n", + " # torch.Tensor.cumsum returns the cumulative sum of elements in the dimension dim.\n", + " offsets = torch.tensor(offsets[:-1]).cumsum(dim=0)\n", + " text = torch.cat(text)\n", + " return text, offsets, label\n", + "\n", + "train_loader = torch.utils.data.DataLoader(train_dataset, batch_size = configuration_dict.get('batch_size', 16), \n", + " shuffle = True, pin_memory=True, collate_fn=generate_batch)\n", + "test_loader = torch.utils.data.DataLoader(test_dataset, batch_size = configuration_dict.get('batch_size', 16), \n", + " shuffle = False, pin_memory=True, collate_fn=generate_batch)\n", + "\n", + "classes = (\"World\", \"Sports\", \"Business\", \"Sci/Tec\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "class TextSentiment(nn.Module):\n", + " def __init__(self, vocab_size, embed_dim, num_class):\n", + " super().__init__()\n", + " self.embedding = nn.EmbeddingBag(vocab_size, embed_dim, sparse=True)\n", + " self.fc = nn.Linear(embed_dim, num_class)\n", + " self.init_weights()\n", + "\n", + " def init_weights(self):\n", + " initrange = 0.5\n", + " self.embedding.weight.data.uniform_(-initrange, initrange)\n", + " self.fc.weight.data.uniform_(-initrange, initrange)\n", + " self.fc.bias.data.zero_()\n", + "\n", + " def forward(self, text, offsets):\n", + " embedded = self.embedding(text, offsets)\n", + " return self.fc(embedded)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "VOCAB_SIZE = len(train_dataset.get_vocab())\n", + "EMBED_DIM = 32\n", + "NUN_CLASS = len(train_dataset.get_labels())\n", + "model = TextSentiment(VOCAB_SIZE, EMBED_DIM, NUN_CLASS)\n", + "\n", + "device = torch.cuda.current_device() if torch.cuda.is_available() else torch.device('cpu')\n", + "print('Device to use: {}'.format(device))\n", + "model.to(device)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "criterion = torch.nn.CrossEntropyLoss().to(device)\n", + "optimizer = torch.optim.SGD(model.parameters(), lr=configuration_dict.get('base_lr', 1.0))\n", + "scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 2, gamma=0.9)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "tensorboard_writer = SummaryWriter('./tensorboard_logs')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def train_func(data, epoch):\n", + " # Train the model\n", + " train_loss = 0\n", + " train_acc = 0\n", + " for batch_idx, (text, offsets, cls) in enumerate(data):\n", + " optimizer.zero_grad()\n", + " text, offsets, cls = text.to(device), offsets.to(device), cls.to(device)\n", + " output = model(text, offsets)\n", + " loss = criterion(output, cls)\n", + " train_loss += loss.item()\n", + " loss.backward()\n", + " optimizer.step()\n", + " train_acc += (output.argmax(1) == cls).sum().item()\n", + " \n", + " iteration = epoch * len(train_loader) + batch_idx\n", + " if batch_idx % log_interval == 0: \n", + " print('Train Epoch: {} [{}/{} ({:.0f}%)]\\tLoss: {:.6f}'\n", + " .format(epoch, batch_idx * len(cls), len(train_dataset), \n", + " 100. * batch_idx / len(train_loader), loss))\n", + " tensorboard_writer.add_scalar('training loss/loss', loss, iteration)\n", + " tensorboard_writer.add_scalar('learning rate/lr', optimizer.param_groups[0]['lr'], iteration)\n", + "\n", + " # Adjust the learning rate\n", + " scheduler.step()\n", + "\n", + " return train_loss / len(train_dataset), train_acc / len(train_dataset)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "def test(data, epoch):\n", + " loss = 0\n", + " acc = 0\n", + " for idx, (text, offsets, cls) in enumerate(data):\n", + " text, offsets, cls = text.to(device), offsets.to(device), cls.to(device)\n", + " with torch.no_grad():\n", + " output = model(text, offsets)\n", + " predicted = output.argmax(1)\n", + " loss = criterion(output, cls)\n", + " loss += loss.item()\n", + " acc += (predicted == cls).sum().item()\n", + " \n", + " iteration = (epoch + 1) * len(train_loader)\n", + " if idx % debug_interval == 0: # report debug text every \"debug_interval\" mini-batches\n", + " offsets = offsets.tolist() + [len(text)]\n", + " for n, (pred, label) in enumerate(zip(predicted, cls)):\n", + " ids_to_text = [vocabulary.itos[id] for id in text[offsets[n]:offsets[n+1]]]\n", + " series = '{}_{}_label_{}_pred_{}'.format(idx, n, classes[label], classes[pred])\n", + " tensorboard_writer.add_text('Test text samples/{}'.format(series), \n", + " ' '.join(ids_to_text), iteration)\n", + "\n", + " return loss / len(test_dataset), acc / len(test_dataset)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "log_interval = 200\n", + "debug_interval = 500\n", + "for epoch in range(configuration_dict.get('number_of_epochs', 6)):\n", + " start_time = time.time()\n", + " \n", + " train_loss, train_acc = train_func(train_loader, epoch)\n", + " test_loss, test_acc = test(test_loader, epoch)\n", + " \n", + " secs = int(time.time() - start_time)\n", + "\n", + " print('Epoch: %d' %(epoch + 1), \" | time in %d minutes, %d seconds\" %(secs / 60, secs % 60))\n", + " print(f'\\tLoss: {train_loss:.4f}(train)\\t|\\tAcc: {train_acc * 100:.1f}%(train)')\n", + " print(f'\\tLoss: {test_loss:.4f}(test)\\t|\\tAcc: {test_acc * 100:.1f}%(test)')\n", + " tensorboard_writer.add_scalar('accuracy/train', train_acc, (epoch + 1) * len(train_loader))\n", + " tensorboard_writer.add_scalar('accuracy/test', test_acc, (epoch + 1) * len(train_loader))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "import re\n", + "from torchtext.data.utils import ngrams_iterator\n", + "from torchtext.data.utils import get_tokenizer\n", + "\n", + "def predict(text, model, vocab, ngrams):\n", + " tokenizer = get_tokenizer(\"basic_english\")\n", + " with torch.no_grad():\n", + " text = torch.tensor([vocab[token]\n", + " for token in ngrams_iterator(tokenizer(text), ngrams)])\n", + " output = model(text, torch.tensor([0]))\n", + " return output.argmax(1).item()\n", + "\n", + "ex_text_str = \"MEMPHIS, Tenn. – Four days ago, Jon Rahm was \\\n", + " enduring the season’s worst weather conditions on Sunday at The \\\n", + " Open on his way to a closing 75 at Royal Portrush, which \\\n", + " considering the wind and the rain was a respectable showing. \\\n", + " Thursday’s first round at the WGC-FedEx St. Jude Invitational \\\n", + " was another story. With temperatures in the mid-80s and hardly any \\\n", + " wind, the Spaniard was 13 strokes better in a flawless round. \\\n", + " Thanks to his best putting performance on the PGA Tour, Rahm \\\n", + " finished with an 8-under 62 for a three-stroke lead, which \\\n", + " was even more impressive considering he’d never played the \\\n", + " front nine at TPC Southwind.\"\n", + "\n", + "ans = predict(ex_text_str, model.to(\"cpu\"), vocabulary, configuration_dict.get('ngrams', 2))\n", + "print(\"This is a %s news\" %classes[ans])" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.4" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} \ No newline at end of file