Added text classification example and updated image and audio examples

This commit is contained in:
danmalowany-allegro 2020-06-22 14:13:03 +03:00
parent 53d0da373f
commit 8cb7c8130a
4 changed files with 6 additions and 386 deletions

View File

@ -1,380 +0,0 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "e-YsQrBjzNdX"
},
"outputs": [],
"source": [
"! pip install -U pip\n",
"! pip install -U torch==1.5.0\n",
"! pip install -U torchaudio==0.5.0\n",
"! pip install -U torchvision==0.6.0\n",
"! pip install -U matplotlib==3.2.1\n",
"! pip install -U trains==0.15.0\n",
"! pip install -U pandas==1.0.4\n",
"! pip install -U numpy==1.18.4\n",
"! pip install -U tensorboard==2.2.1"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "T7T0Rf26zNdm"
},
"outputs": [],
"source": [
"import PIL\n",
"import io\n",
"\n",
"import pandas as pd\n",
"import numpy as np\n",
"from pathlib2 import Path\n",
"import matplotlib.pyplot as plt\n",
"\n",
"import torch\n",
"import torch.nn as nn\n",
"import torch.nn.functional as F\n",
"import torch.optim as optim\n",
"from torch.utils.data import Dataset\n",
"from torch.utils.tensorboard import SummaryWriter\n",
"\n",
"import torchaudio\n",
"from torchvision.transforms import ToTensor\n",
"\n",
"from trains import Task\n",
"\n",
"%matplotlib inline"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"task = Task.init(project_name='Audio Example', task_name='audio classifier')\n",
"configuration_dict = {'number_of_epochs': 10, 'batch_size': 4, 'dropout': 0.25, 'base_lr': 0.001}\n",
"configuration_dict = task.connect(configuration_dict) # enabling configuration override by trains\n",
"print(configuration_dict) # printing actual configuration (after override in remote mode)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "msiz7QdvzNeA",
"scrolled": true
},
"outputs": [],
"source": [
"# Download UrbanSound8K dataset (https://urbansounddataset.weebly.com/urbansound8k.html)\n",
"path_to_UrbanSound8K = './data/UrbanSound8K'"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "wXtmZe7yzNeS"
},
"outputs": [],
"source": [
"class UrbanSoundDataset(Dataset):\n",
"#rapper for the UrbanSound8K dataset\n",
" def __init__(self, csv_path, file_path, folderList):\n",
" self.file_path = file_path\n",
" self.file_names = []\n",
" self.labels = []\n",
" self.folders = []\n",
" \n",
" #loop through the csv entries and only add entries from folders in the folder list\n",
" csvData = pd.read_csv(csv_path)\n",
" for i in range(0,len(csvData)):\n",
" if csvData.iloc[i, 5] in folderList:\n",
" self.file_names.append(csvData.iloc[i, 0])\n",
" self.labels.append(csvData.iloc[i, 6])\n",
" self.folders.append(csvData.iloc[i, 5])\n",
" \n",
" def __getitem__(self, index):\n",
" #format the file path and load the file\n",
" path = self.file_path / (\"fold\" + str(self.folders[index])) / self.file_names[index]\n",
" sound, sample_rate = torchaudio.load(path, out = None, normalization = True)\n",
"\n",
" # UrbanSound8K uses two channels, this will convert them to one\n",
" soundData = torch.mean(sound, dim=0, keepdim=True)\n",
" \n",
" #Make sure all files are the same size\n",
" if soundData.numel() < 160000:\n",
" fixedsize_data = torch.nn.functional.pad(soundData, (0, 160000 - soundData.numel()))\n",
" else:\n",
" fixedsize_data = soundData[0,:160000].reshape(1,160000)\n",
" \n",
" #downsample the audio\n",
" downsample_data = fixedsize_data[::5]\n",
" \n",
" melspectogram_transform = torchaudio.transforms.MelSpectrogram(sample_rate=sample_rate)\n",
" melspectogram = melspectogram_transform(downsample_data)\n",
" melspectogram_db = torchaudio.transforms.AmplitudeToDB()(melspectogram)\n",
"\n",
" return fixedsize_data, sample_rate, melspectogram_db, self.labels[index]\n",
" \n",
" def __len__(self):\n",
" return len(self.file_names)\n",
"\n",
"\n",
"csv_path = Path(path_to_UrbanSound8K) / 'metadata' / 'UrbanSound8K.csv'\n",
"file_path = Path(path_to_UrbanSound8K) / 'audio'\n",
"\n",
"train_set = UrbanSoundDataset(csv_path, file_path, range(1,10))\n",
"test_set = UrbanSoundDataset(csv_path, file_path, [10])\n",
"print(\"Train set size: \" + str(len(train_set)))\n",
"print(\"Test set size: \" + str(len(test_set)))\n",
"\n",
"train_loader = torch.utils.data.DataLoader(train_set, batch_size = configuration_dict.get('batch_size', 4), \n",
" shuffle = True, pin_memory=True, num_workers=1)\n",
"test_loader = torch.utils.data.DataLoader(test_set, batch_size = configuration_dict.get('batch_size', 4), \n",
" shuffle = False, pin_memory=True, num_workers=1)\n",
"\n",
"classes = ('air_conditioner', 'car_horn', 'children_playing', 'dog_bark', 'drilling', 'engine_idling', \n",
" 'gun_shot', 'jackhammer', 'siren', 'street_music')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "ylblw-k1zNeZ"
},
"outputs": [],
"source": [
"class Net(nn.Module):\n",
" def __init__(self, num_classes, dropout_value):\n",
" super(Net,self).__init__()\n",
" self.num_classes = num_classes\n",
" self.dropout_value = dropout_value\n",
" \n",
" self.C1 = nn.Conv2d(1,16,3)\n",
" self.C2 = nn.Conv2d(16,32,3)\n",
" self.C3 = nn.Conv2d(32,64,3)\n",
" self.C4 = nn.Conv2d(64,128,3)\n",
" self.maxpool1 = nn.MaxPool2d(2,2) \n",
" self.fc1 = nn.Linear(128*29*197,128)\n",
" self.fc2 = nn.Linear(128,self.num_classes)\n",
" self.dropout = nn.Dropout(self.dropout_value)\n",
" \n",
" def forward(self,x):\n",
" # add sequence of convolutional and max pooling layers\n",
" x = F.relu(self.C1(x))\n",
" x = self.maxpool1(F.relu(self.C2(x)))\n",
" x = F.relu(self.C3(x))\n",
" x = self.maxpool1(F.relu(self.C4(x)))\n",
" # flatten image input\n",
" x = x.view(-1,128*29*197)\n",
" x = F.relu(self.fc1(self.dropout(x)))\n",
" x = self.fc2(self.dropout(x))\n",
" return x\n",
" \n",
" \n",
"model = Net(len(classes), configuration_dict.get('dropout', 0.25))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "3yKYru14zNef"
},
"outputs": [],
"source": [
"optimizer = optim.SGD(model.parameters(), lr = configuration_dict.get('base_lr', 0.001), momentum = 0.9)\n",
"scheduler = optim.lr_scheduler.StepLR(optimizer, step_size = 3, gamma = 0.1)\n",
"criterion = nn.CrossEntropyLoss()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"device = torch.cuda.current_device() if torch.cuda.is_available() else torch.device('cpu')\n",
"print('Device to use: {}'.format(device))\n",
"model.to(device)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"tensorboard_writer = SummaryWriter('./tensorboard_logs')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def plot_signal(signal, title, cmap=None):\n",
" fig = plt.figure()\n",
" if signal.ndim == 1:\n",
" plt.plot(signal)\n",
" else:\n",
" plt.imshow(signal, cmap=cmap) \n",
" plt.title(title)\n",
" \n",
" plot_buf = io.BytesIO()\n",
" plt.savefig(plot_buf, format='jpeg')\n",
" plot_buf.seek(0)\n",
" plt.close(fig)\n",
" return ToTensor()(PIL.Image.open(plot_buf))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "Vdthqz3JzNem"
},
"outputs": [],
"source": [
"def train(model, epoch):\n",
" model.train()\n",
" for batch_idx, (sounds, sample_rate, inputs, labels) in enumerate(train_loader):\n",
" inputs = inputs.to(device)\n",
" labels = labels.to(device)\n",
"\n",
" # zero the parameter gradients\n",
" optimizer.zero_grad()\n",
"\n",
" # forward + backward + optimize\n",
" outputs = model(inputs)\n",
" _, predicted = torch.max(outputs, 1)\n",
" loss = criterion(outputs, labels)\n",
" loss.backward()\n",
" optimizer.step()\n",
" \n",
" iteration = epoch * len(train_loader) + batch_idx\n",
" if batch_idx % log_interval == 0: #print training stats\n",
" print('Train Epoch: {} [{}/{} ({:.0f}%)]\\tLoss: {:.6f}'\n",
" .format(epoch, batch_idx * len(inputs), len(train_loader.dataset), \n",
" 100. * batch_idx / len(train_loader), loss))\n",
" tensorboard_writer.add_scalar('training loss/loss', loss, iteration)\n",
" tensorboard_writer.add_scalar('learning rate/lr', optimizer.param_groups[0]['lr'], iteration)\n",
" \n",
" \n",
" if batch_idx % debug_interval == 0: # report debug image every 500 mini-batches\n",
" for n, (inp, pred, label) in enumerate(zip(inputs, predicted, labels)):\n",
" series = 'label_{}_pred_{}'.format(classes[label.cpu()], classes[pred.cpu()])\n",
" tensorboard_writer.add_image('Train MelSpectrogram samples/{}'.format(n), \n",
" plot_signal(inp.cpu().numpy().squeeze(), series, 'hot'), iteration)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "LBWoj7u5zNes"
},
"outputs": [],
"source": [
"def test(model, epoch):\n",
" model.eval()\n",
" class_correct = list(0. for i in range(10))\n",
" class_total = list(0. for i in range(10))\n",
" with torch.no_grad():\n",
" for idx, (sounds, sample_rate, inputs, labels) in enumerate(test_loader):\n",
" inputs = inputs.to(device)\n",
" labels = labels.to(device)\n",
"\n",
" outputs = model(inputs)\n",
"\n",
" _, predicted = torch.max(outputs, 1)\n",
" c = (predicted == labels)\n",
" for i in range(len(inputs)):\n",
" label = labels[i].item()\n",
" class_correct[label] += c[i].item()\n",
" class_total[label] += 1\n",
" \n",
" iteration = (epoch + 1) * len(train_loader)\n",
" if idx % debug_interval == 0: # report debug image every 100 mini-batches\n",
" for n, (sound, inp, pred, label) in enumerate(zip(sounds, inputs, predicted, labels)):\n",
" series = 'label_{}_pred_{}'.format(classes[label.cpu()], classes[pred.cpu()])\n",
" tensorboard_writer.add_audio('Test audio samples/{}'.format(n), \n",
" sound, iteration, int(sample_rate[n]))\n",
" tensorboard_writer.add_image('Test MelSpectrogram samples/{}_{}'.format(idx, n), \n",
" plot_signal(inp.cpu().numpy().squeeze(), series, 'hot'), iteration)\n",
"\n",
" total_accuracy = 100 * sum(class_correct)/sum(class_total)\n",
" print('[Iteration {}] Accuracy on the {} test images: {}%\\n'.format(epoch, sum(class_total), total_accuracy))\n",
" tensorboard_writer.add_scalar('accuracy/total', total_accuracy, iteration)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "X5lx3g_5zNey",
"scrolled": false
},
"outputs": [],
"source": [
"log_interval = 100\n",
"debug_interval = 200\n",
"for epoch in range(configuration_dict.get('number_of_epochs', 10)):\n",
" train(model, epoch)\n",
" test(model, epoch)\n",
" scheduler.step()"
]
}
],
"metadata": {
"colab": {
"name": "audio_classifier_tutorial.ipynb",
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.4"
}
},
"nbformat": 4,
"nbformat_minor": 1
}

View File

@ -12,7 +12,7 @@
"! pip install -U torch==1.5.0\n", "! pip install -U torch==1.5.0\n",
"! pip install -U torchaudio==0.5.0\n", "! pip install -U torchaudio==0.5.0\n",
"! pip install -U matplotlib==3.2.1\n", "! pip install -U matplotlib==3.2.1\n",
"! pip install -U trains==0.15.0\n", "! pip install -U trains>=0.15.0\n",
"! pip install -U tensorboard==2.2.1" "! pip install -U tensorboard==2.2.1"
] ]
}, },
@ -87,10 +87,10 @@
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {
"scrolled": true,
"pycharm": { "pycharm": {
"name": "#%%\n" "name": "#%%\n"
} },
"scrolled": true
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
@ -125,4 +125,4 @@
}, },
"nbformat": 4, "nbformat": 4,
"nbformat_minor": 1 "nbformat_minor": 1
} }

View File

@ -133,4 +133,4 @@
}, },
"nbformat": 4, "nbformat": 4,
"nbformat_minor": 4 "nbformat_minor": 4
} }

View File

@ -45,7 +45,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"task = Task.init(project_name='Hyper-Parameter Search', task_name='image_classification_CIFAR10')\n", "task = Task.init(project_name='Image Example', task_name='image_classification_CIFAR10')\n",
"configuration_dict = {'number_of_epochs': 3, 'batch_size': 4, 'dropout': 0.25, 'base_lr': 0.001}\n", "configuration_dict = {'number_of_epochs': 3, 'batch_size': 4, 'dropout': 0.25, 'base_lr': 0.001}\n",
"configuration_dict = task.connect(configuration_dict) # enabling configuration override by trains\n", "configuration_dict = task.connect(configuration_dict) # enabling configuration override by trains\n",
"print(configuration_dict) # printing actual configuration (after override in remote mode)" "print(configuration_dict) # printing actual configuration (after override in remote mode)"