mirror of
https://github.com/clearml/clearml
synced 2025-06-23 01:55:38 +00:00
Merge pull request #150 from danmalowany-allegro/master
Update Jupiter notebook example scripts
This commit is contained in:
commit
f036213d91
@ -15,7 +15,7 @@
|
||||
"! pip install -U torchaudio==0.5.0\n",
|
||||
"! pip install -U torchvision==0.6.0\n",
|
||||
"! pip install -U matplotlib==3.2.1\n",
|
||||
"! pip install -U trains==0.15.0\n",
|
||||
"! pip install -U trains>=0.15.0\n",
|
||||
"! pip install -U pandas==1.0.4\n",
|
||||
"! pip install -U numpy==1.18.4\n",
|
||||
"! pip install -U tensorboard==2.2.1"
|
||||
@ -283,10 +283,10 @@
|
||||
" tensorboard_writer.add_scalar('learning rate/lr', optimizer.param_groups[0]['lr'], iteration)\n",
|
||||
" \n",
|
||||
" \n",
|
||||
" if batch_idx % debug_interval == 0: # report debug image every 500 mini-batches\n",
|
||||
" if batch_idx % debug_interval == 0: # report debug image every \"debug_interval\" mini-batches\n",
|
||||
" for n, (inp, pred, label) in enumerate(zip(inputs, predicted, labels)):\n",
|
||||
" series = 'label_{}_pred_{}'.format(classes[label.cpu()], classes[pred.cpu()])\n",
|
||||
" tensorboard_writer.add_image('Train MelSpectrogram samples/{}'.format(n), \n",
|
||||
" tensorboard_writer.add_image('Train MelSpectrogram samples/{}_{}_{}'.format(batch_idx, n, series), \n",
|
||||
" plot_signal(inp.cpu().numpy().squeeze(), series, 'hot'), iteration)"
|
||||
]
|
||||
},
|
||||
@ -319,12 +319,12 @@
|
||||
" class_total[label] += 1\n",
|
||||
" \n",
|
||||
" iteration = (epoch + 1) * len(train_loader)\n",
|
||||
" if idx % debug_interval == 0: # report debug image every 100 mini-batches\n",
|
||||
" if idx % debug_interval == 0: # report debug image every \"debug_interval\" mini-batches\n",
|
||||
" for n, (sound, inp, pred, label) in enumerate(zip(sounds, inputs, predicted, labels)):\n",
|
||||
" series = 'label_{}_pred_{}'.format(classes[label.cpu()], classes[pred.cpu()])\n",
|
||||
" tensorboard_writer.add_audio('Test audio samples/{}'.format(n), \n",
|
||||
" tensorboard_writer.add_audio('Test audio samples/{}_{}_{}'.format(idx, n, series), \n",
|
||||
" sound, iteration, int(sample_rate[n]))\n",
|
||||
" tensorboard_writer.add_image('Test MelSpectrogram samples/{}_{}'.format(idx, n), \n",
|
||||
" tensorboard_writer.add_image('Test MelSpectrogram samples/{}_{}_{}'.format(idx, n, series), \n",
|
||||
" plot_signal(inp.cpu().numpy().squeeze(), series, 'hot'), iteration)\n",
|
||||
"\n",
|
||||
" total_accuracy = 100 * sum(class_correct)/sum(class_total)\n",
|
||||
@ -377,4 +377,4 @@
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 1
|
||||
}
|
||||
}
|
@ -12,7 +12,7 @@
|
||||
"! pip install -U torch==1.5.0\n",
|
||||
"! pip install -U torchaudio==0.5.0\n",
|
||||
"! pip install -U matplotlib==3.2.1\n",
|
||||
"! pip install -U trains==0.15.0\n",
|
||||
"! pip install -U trains>=0.15.0\n",
|
||||
"! pip install -U tensorboard==2.2.1"
|
||||
]
|
||||
},
|
||||
@ -87,10 +87,10 @@
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"scrolled": true,
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
},
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -125,4 +125,4 @@
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 1
|
||||
}
|
||||
}
|
||||
|
@ -133,4 +133,4 @@
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
}
|
||||
|
@ -45,7 +45,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"task = Task.init(project_name='Hyper-Parameter Search', task_name='image_classification_CIFAR10')\n",
|
||||
"task = Task.init(project_name='Image Example', task_name='image_classification_CIFAR10')\n",
|
||||
"configuration_dict = {'number_of_epochs': 3, 'batch_size': 4, 'dropout': 0.25, 'base_lr': 0.001}\n",
|
||||
"configuration_dict = task.connect(configuration_dict) # enabling configuration override by trains\n",
|
||||
"print(configuration_dict) # printing actual configuration (after override in remote mode)"
|
||||
|
@ -0,0 +1,317 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"! pip install -U pip\n",
|
||||
"! pip install -U torch==1.5.0\n",
|
||||
"! pip install -U torchtext==0.6.0\n",
|
||||
"! pip install -U matplotlib==3.2.1\n",
|
||||
"! pip install -U trains>=0.15.0\n",
|
||||
"! pip install -U tensorboard==2.2.1"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"import time\n",
|
||||
"\n",
|
||||
"import torch\n",
|
||||
"import torch.nn as nn\n",
|
||||
"import torch.nn.functional as F\n",
|
||||
"import torchtext\n",
|
||||
"from torchtext.datasets import text_classification\n",
|
||||
"from torch.utils.data import DataLoader\n",
|
||||
"from torch.utils.tensorboard import SummaryWriter\n",
|
||||
"\n",
|
||||
"from trains import Task\n",
|
||||
"\n",
|
||||
"%matplotlib inline"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"task = Task.init(project_name='Text Example', task_name='text classifier')\n",
|
||||
"configuration_dict = {'number_of_epochs': 6, 'batch_size': 16, 'ngrams': 2, 'base_lr': 1.0}\n",
|
||||
"configuration_dict = task.connect(configuration_dict) # enabling configuration override by trains\n",
|
||||
"print(configuration_dict) # printing actual configuration (after override in remote mode)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"if not os.path.isdir('./data'):\n",
|
||||
" os.mkdir('./data')\n",
|
||||
"train_dataset, test_dataset = text_classification.DATASETS['AG_NEWS'](root='./data', \n",
|
||||
" ngrams=configuration_dict.get('ngrams', 2))\n",
|
||||
"vocabulary = train_dataset.get_vocab()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def generate_batch(batch):\n",
|
||||
" label = torch.tensor([entry[0] for entry in batch])\n",
|
||||
" # original data batch input are packed into a list and concatenated as a single tensor\n",
|
||||
" text = [entry[1] for entry in batch]\n",
|
||||
" # offsets is a tensor of delimiters to represent the beginning index of each sequence in the text tensor.\n",
|
||||
" offsets = [0] + [len(entry) for entry in text] \n",
|
||||
" \n",
|
||||
" # torch.Tensor.cumsum returns the cumulative sum of elements in the dimension dim.\n",
|
||||
" offsets = torch.tensor(offsets[:-1]).cumsum(dim=0)\n",
|
||||
" text = torch.cat(text)\n",
|
||||
" return text, offsets, label\n",
|
||||
"\n",
|
||||
"train_loader = torch.utils.data.DataLoader(train_dataset, batch_size = configuration_dict.get('batch_size', 16), \n",
|
||||
" shuffle = True, pin_memory=True, collate_fn=generate_batch)\n",
|
||||
"test_loader = torch.utils.data.DataLoader(test_dataset, batch_size = configuration_dict.get('batch_size', 16), \n",
|
||||
" shuffle = False, pin_memory=True, collate_fn=generate_batch)\n",
|
||||
"\n",
|
||||
"classes = (\"World\", \"Sports\", \"Business\", \"Sci/Tec\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"class TextSentiment(nn.Module):\n",
|
||||
" def __init__(self, vocab_size, embed_dim, num_class):\n",
|
||||
" super().__init__()\n",
|
||||
" self.embedding = nn.EmbeddingBag(vocab_size, embed_dim, sparse=True)\n",
|
||||
" self.fc = nn.Linear(embed_dim, num_class)\n",
|
||||
" self.init_weights()\n",
|
||||
"\n",
|
||||
" def init_weights(self):\n",
|
||||
" initrange = 0.5\n",
|
||||
" self.embedding.weight.data.uniform_(-initrange, initrange)\n",
|
||||
" self.fc.weight.data.uniform_(-initrange, initrange)\n",
|
||||
" self.fc.bias.data.zero_()\n",
|
||||
"\n",
|
||||
" def forward(self, text, offsets):\n",
|
||||
" embedded = self.embedding(text, offsets)\n",
|
||||
" return self.fc(embedded)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"VOCAB_SIZE = len(train_dataset.get_vocab())\n",
|
||||
"EMBED_DIM = 32\n",
|
||||
"NUN_CLASS = len(train_dataset.get_labels())\n",
|
||||
"model = TextSentiment(VOCAB_SIZE, EMBED_DIM, NUN_CLASS)\n",
|
||||
"\n",
|
||||
"device = torch.cuda.current_device() if torch.cuda.is_available() else torch.device('cpu')\n",
|
||||
"print('Device to use: {}'.format(device))\n",
|
||||
"model.to(device)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"criterion = torch.nn.CrossEntropyLoss().to(device)\n",
|
||||
"optimizer = torch.optim.SGD(model.parameters(), lr=configuration_dict.get('base_lr', 1.0))\n",
|
||||
"scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 2, gamma=0.9)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"tensorboard_writer = SummaryWriter('./tensorboard_logs')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def train_func(data, epoch):\n",
|
||||
" # Train the model\n",
|
||||
" train_loss = 0\n",
|
||||
" train_acc = 0\n",
|
||||
" for batch_idx, (text, offsets, cls) in enumerate(data):\n",
|
||||
" optimizer.zero_grad()\n",
|
||||
" text, offsets, cls = text.to(device), offsets.to(device), cls.to(device)\n",
|
||||
" output = model(text, offsets)\n",
|
||||
" loss = criterion(output, cls)\n",
|
||||
" train_loss += loss.item()\n",
|
||||
" loss.backward()\n",
|
||||
" optimizer.step()\n",
|
||||
" train_acc += (output.argmax(1) == cls).sum().item()\n",
|
||||
" \n",
|
||||
" iteration = epoch * len(train_loader) + batch_idx\n",
|
||||
" if batch_idx % log_interval == 0: \n",
|
||||
" print('Train Epoch: {} [{}/{} ({:.0f}%)]\\tLoss: {:.6f}'\n",
|
||||
" .format(epoch, batch_idx * len(cls), len(train_dataset), \n",
|
||||
" 100. * batch_idx / len(train_loader), loss))\n",
|
||||
" tensorboard_writer.add_scalar('training loss/loss', loss, iteration)\n",
|
||||
" tensorboard_writer.add_scalar('learning rate/lr', optimizer.param_groups[0]['lr'], iteration)\n",
|
||||
"\n",
|
||||
" # Adjust the learning rate\n",
|
||||
" scheduler.step()\n",
|
||||
"\n",
|
||||
" return train_loss / len(train_dataset), train_acc / len(train_dataset)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def test(data, epoch):\n",
|
||||
" loss = 0\n",
|
||||
" acc = 0\n",
|
||||
" for idx, (text, offsets, cls) in enumerate(data):\n",
|
||||
" text, offsets, cls = text.to(device), offsets.to(device), cls.to(device)\n",
|
||||
" with torch.no_grad():\n",
|
||||
" output = model(text, offsets)\n",
|
||||
" predicted = output.argmax(1)\n",
|
||||
" loss = criterion(output, cls)\n",
|
||||
" loss += loss.item()\n",
|
||||
" acc += (predicted == cls).sum().item()\n",
|
||||
" \n",
|
||||
" iteration = (epoch + 1) * len(train_loader)\n",
|
||||
" if idx % debug_interval == 0: # report debug text every \"debug_interval\" mini-batches\n",
|
||||
" offsets = offsets.tolist() + [len(text)]\n",
|
||||
" for n, (pred, label) in enumerate(zip(predicted, cls)):\n",
|
||||
" ids_to_text = [vocabulary.itos[id] for id in text[offsets[n]:offsets[n+1]]]\n",
|
||||
" series = '{}_{}_label_{}_pred_{}'.format(idx, n, classes[label], classes[pred])\n",
|
||||
" tensorboard_writer.add_text('Test text samples/{}'.format(series), \n",
|
||||
" ' '.join(ids_to_text), iteration)\n",
|
||||
"\n",
|
||||
" return loss / len(test_dataset), acc / len(test_dataset)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"log_interval = 200\n",
|
||||
"debug_interval = 500\n",
|
||||
"for epoch in range(configuration_dict.get('number_of_epochs', 6)):\n",
|
||||
" start_time = time.time()\n",
|
||||
" \n",
|
||||
" train_loss, train_acc = train_func(train_loader, epoch)\n",
|
||||
" test_loss, test_acc = test(test_loader, epoch)\n",
|
||||
" \n",
|
||||
" secs = int(time.time() - start_time)\n",
|
||||
"\n",
|
||||
" print('Epoch: %d' %(epoch + 1), \" | time in %d minutes, %d seconds\" %(secs / 60, secs % 60))\n",
|
||||
" print(f'\\tLoss: {train_loss:.4f}(train)\\t|\\tAcc: {train_acc * 100:.1f}%(train)')\n",
|
||||
" print(f'\\tLoss: {test_loss:.4f}(test)\\t|\\tAcc: {test_acc * 100:.1f}%(test)')\n",
|
||||
" tensorboard_writer.add_scalar('accuracy/train', train_acc, (epoch + 1) * len(train_loader))\n",
|
||||
" tensorboard_writer.add_scalar('accuracy/test', test_acc, (epoch + 1) * len(train_loader))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import re\n",
|
||||
"from torchtext.data.utils import ngrams_iterator\n",
|
||||
"from torchtext.data.utils import get_tokenizer\n",
|
||||
"\n",
|
||||
"def predict(text, model, vocab, ngrams):\n",
|
||||
" tokenizer = get_tokenizer(\"basic_english\")\n",
|
||||
" with torch.no_grad():\n",
|
||||
" text = torch.tensor([vocab[token]\n",
|
||||
" for token in ngrams_iterator(tokenizer(text), ngrams)])\n",
|
||||
" output = model(text, torch.tensor([0]))\n",
|
||||
" return output.argmax(1).item()\n",
|
||||
"\n",
|
||||
"ex_text_str = \"MEMPHIS, Tenn. – Four days ago, Jon Rahm was \\\n",
|
||||
" enduring the season’s worst weather conditions on Sunday at The \\\n",
|
||||
" Open on his way to a closing 75 at Royal Portrush, which \\\n",
|
||||
" considering the wind and the rain was a respectable showing. \\\n",
|
||||
" Thursday’s first round at the WGC-FedEx St. Jude Invitational \\\n",
|
||||
" was another story. With temperatures in the mid-80s and hardly any \\\n",
|
||||
" wind, the Spaniard was 13 strokes better in a flawless round. \\\n",
|
||||
" Thanks to his best putting performance on the PGA Tour, Rahm \\\n",
|
||||
" finished with an 8-under 62 for a three-stroke lead, which \\\n",
|
||||
" was even more impressive considering he’d never played the \\\n",
|
||||
" front nine at TPC Southwind.\"\n",
|
||||
"\n",
|
||||
"ans = predict(ex_text_str, model.to(\"cpu\"), vocabulary, configuration_dict.get('ngrams', 2))\n",
|
||||
"print(\"This is a %s news\" %classes[ans])"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 1
|
||||
}
|
Loading…
Reference in New Issue
Block a user