Added text classification example and updated image and audio examples

2025-06-26 18:16:07 +00:00 · 2020-06-22 14:13:03 +03:00 · 2020-06-22 14:13:03 +03:00 · 8cb7c8130a
commit 8cb7c8130a
parent 53d0da373f
4 changed files with 6 additions and 386 deletions
--- a/examples/frameworks/pytorch/notebooks/audio/audio_classifier_UrbanSound8K.ipynb
+++ b/examples/frameworks/pytorch/notebooks/audio/audio_classifier_UrbanSound8K.ipynb
@ -1,380 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "e-YsQrBjzNdX"
-   },
-   "outputs": [],
-   "source": [
-    "! pip install -U pip\n",
-    "! pip install -U torch==1.5.0\n",
-    "! pip install -U torchaudio==0.5.0\n",
-    "! pip install -U torchvision==0.6.0\n",
-    "! pip install -U matplotlib==3.2.1\n",
-    "! pip install -U trains==0.15.0\n",
-    "! pip install -U pandas==1.0.4\n",
-    "! pip install -U numpy==1.18.4\n",
-    "! pip install -U tensorboard==2.2.1"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "T7T0Rf26zNdm"
-   },
-   "outputs": [],
-   "source": [
-    "import PIL\n",
-    "import io\n",
-    "\n",
-    "import pandas as pd\n",
-    "import numpy as np\n",
-    "from pathlib2 import Path\n",
-    "import matplotlib.pyplot as plt\n",
-    "\n",
-    "import torch\n",
-    "import torch.nn as nn\n",
-    "import torch.nn.functional as F\n",
-    "import torch.optim as optim\n",
-    "from torch.utils.data import Dataset\n",
-    "from torch.utils.tensorboard import SummaryWriter\n",
-    "\n",
-    "import torchaudio\n",
-    "from torchvision.transforms import ToTensor\n",
-    "\n",
-    "from trains import Task\n",
-    "\n",
-    "%matplotlib inline"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "task = Task.init(project_name='Audio Example', task_name='audio classifier')\n",
-    "configuration_dict = {'number_of_epochs': 10, 'batch_size': 4, 'dropout': 0.25, 'base_lr': 0.001}\n",
-    "configuration_dict = task.connect(configuration_dict)  # enabling configuration override by trains\n",
-    "print(configuration_dict)  # printing actual configuration (after override in remote mode)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "msiz7QdvzNeA",
-    "scrolled": true
-   },
-   "outputs": [],
-   "source": [
-    "# Download UrbanSound8K dataset (https://urbansounddataset.weebly.com/urbansound8k.html)\n",
-    "path_to_UrbanSound8K = './data/UrbanSound8K'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "wXtmZe7yzNeS"
-   },
-   "outputs": [],
-   "source": [
-    "class UrbanSoundDataset(Dataset):\n",
-    "#rapper for the UrbanSound8K dataset\n",
-    "    def __init__(self, csv_path, file_path, folderList):\n",
-    "        self.file_path = file_path\n",
-    "        self.file_names = []\n",
-    "        self.labels = []\n",
-    "        self.folders = []\n",
-    "        \n",
-    "        #loop through the csv entries and only add entries from folders in the folder list\n",
-    "        csvData = pd.read_csv(csv_path)\n",
-    "        for i in range(0,len(csvData)):\n",
-    "            if csvData.iloc[i, 5] in folderList:\n",
-    "                self.file_names.append(csvData.iloc[i, 0])\n",
-    "                self.labels.append(csvData.iloc[i, 6])\n",
-    "                self.folders.append(csvData.iloc[i, 5])\n",
-    "        \n",
-    "    def __getitem__(self, index):\n",
-    "        #format the file path and load the file\n",
-    "        path = self.file_path / (\"fold\" + str(self.folders[index])) / self.file_names[index]\n",
-    "        sound, sample_rate = torchaudio.load(path, out = None, normalization = True)\n",
-    "\n",
-    "        # UrbanSound8K uses two channels, this will convert them to one\n",
-    "        soundData = torch.mean(sound, dim=0, keepdim=True)\n",
-    "        \n",
-    "        #Make sure all files are the same size\n",
-    "        if soundData.numel() < 160000:\n",
-    "            fixedsize_data = torch.nn.functional.pad(soundData, (0, 160000 - soundData.numel()))\n",
-    "        else:\n",
-    "            fixedsize_data = soundData[0,:160000].reshape(1,160000)\n",
-    "        \n",
-    "        #downsample the audio\n",
-    "        downsample_data = fixedsize_data[::5]\n",
-    "        \n",
-    "        melspectogram_transform = torchaudio.transforms.MelSpectrogram(sample_rate=sample_rate)\n",
-    "        melspectogram = melspectogram_transform(downsample_data)\n",
-    "        melspectogram_db = torchaudio.transforms.AmplitudeToDB()(melspectogram)\n",
-    "\n",
-    "        return fixedsize_data, sample_rate, melspectogram_db, self.labels[index]\n",
-    "    \n",
-    "    def __len__(self):\n",
-    "        return len(self.file_names)\n",
-    "\n",
-    "\n",
-    "csv_path = Path(path_to_UrbanSound8K) / 'metadata' / 'UrbanSound8K.csv'\n",
-    "file_path = Path(path_to_UrbanSound8K) / 'audio'\n",
-    "\n",
-    "train_set = UrbanSoundDataset(csv_path, file_path, range(1,10))\n",
-    "test_set = UrbanSoundDataset(csv_path, file_path, [10])\n",
-    "print(\"Train set size: \" + str(len(train_set)))\n",
-    "print(\"Test set size: \" + str(len(test_set)))\n",
-    "\n",
-    "train_loader = torch.utils.data.DataLoader(train_set, batch_size = configuration_dict.get('batch_size', 4), \n",
-    "                                           shuffle = True, pin_memory=True, num_workers=1)\n",
-    "test_loader = torch.utils.data.DataLoader(test_set, batch_size = configuration_dict.get('batch_size', 4), \n",
-    "                                          shuffle = False, pin_memory=True, num_workers=1)\n",
-    "\n",
-    "classes = ('air_conditioner', 'car_horn', 'children_playing', 'dog_bark', 'drilling', 'engine_idling', \n",
-    "           'gun_shot', 'jackhammer', 'siren', 'street_music')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "ylblw-k1zNeZ"
-   },
-   "outputs": [],
-   "source": [
-    "class Net(nn.Module):\n",
-    "    def __init__(self, num_classes, dropout_value):\n",
-    "        super(Net,self).__init__()\n",
-    "        self.num_classes = num_classes\n",
-    "        self.dropout_value = dropout_value\n",
-    "        \n",
-    "        self.C1 = nn.Conv2d(1,16,3)\n",
-    "        self.C2 = nn.Conv2d(16,32,3)\n",
-    "        self.C3 = nn.Conv2d(32,64,3)\n",
-    "        self.C4 = nn.Conv2d(64,128,3)\n",
-    "        self.maxpool1 = nn.MaxPool2d(2,2)        \n",
-    "        self.fc1 = nn.Linear(128*29*197,128)\n",
-    "        self.fc2 = nn.Linear(128,self.num_classes)\n",
-    "        self.dropout = nn.Dropout(self.dropout_value)\n",
-    "    \n",
-    "    def forward(self,x):\n",
-    "        # add sequence of convolutional and max pooling layers\n",
-    "        x = F.relu(self.C1(x))\n",
-    "        x = self.maxpool1(F.relu(self.C2(x)))\n",
-    "        x = F.relu(self.C3(x))\n",
-    "        x = self.maxpool1(F.relu(self.C4(x)))\n",
-    "        # flatten image input\n",
-    "        x = x.view(-1,128*29*197)\n",
-    "        x =  F.relu(self.fc1(self.dropout(x)))\n",
-    "        x = self.fc2(self.dropout(x))\n",
-    "        return x\n",
-    "    \n",
-    "    \n",
-    "model = Net(len(classes), configuration_dict.get('dropout', 0.25))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "3yKYru14zNef"
-   },
-   "outputs": [],
-   "source": [
-    "optimizer = optim.SGD(model.parameters(), lr = configuration_dict.get('base_lr', 0.001), momentum = 0.9)\n",
-    "scheduler = optim.lr_scheduler.StepLR(optimizer, step_size = 3, gamma = 0.1)\n",
-    "criterion = nn.CrossEntropyLoss()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "device = torch.cuda.current_device() if torch.cuda.is_available() else torch.device('cpu')\n",
-    "print('Device to use: {}'.format(device))\n",
-    "model.to(device)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "tensorboard_writer = SummaryWriter('./tensorboard_logs')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def plot_signal(signal, title, cmap=None):\n",
-    "    fig = plt.figure()\n",
-    "    if signal.ndim == 1:\n",
-    "        plt.plot(signal)\n",
-    "    else:\n",
-    "        plt.imshow(signal, cmap=cmap)    \n",
-    "    plt.title(title)\n",
-    "    \n",
-    "    plot_buf = io.BytesIO()\n",
-    "    plt.savefig(plot_buf, format='jpeg')\n",
-    "    plot_buf.seek(0)\n",
-    "    plt.close(fig)\n",
-    "    return ToTensor()(PIL.Image.open(plot_buf))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "Vdthqz3JzNem"
-   },
-   "outputs": [],
-   "source": [
-    "def train(model, epoch):\n",
-    "    model.train()\n",
-    "    for batch_idx, (sounds, sample_rate, inputs, labels) in enumerate(train_loader):\n",
-    "        inputs = inputs.to(device)\n",
-    "        labels = labels.to(device)\n",
-    "\n",
-    "        # zero the parameter gradients\n",
-    "        optimizer.zero_grad()\n",
-    "\n",
-    "        # forward + backward + optimize\n",
-    "        outputs = model(inputs)\n",
-    "        _, predicted = torch.max(outputs, 1)\n",
-    "        loss = criterion(outputs, labels)\n",
-    "        loss.backward()\n",
-    "        optimizer.step()\n",
-    "        \n",
-    "        iteration = epoch * len(train_loader) + batch_idx\n",
-    "        if batch_idx % log_interval == 0: #print training stats\n",
-    "            print('Train Epoch: {} [{}/{} ({:.0f}%)]\\tLoss: {:.6f}'\n",
-    "                  .format(epoch, batch_idx * len(inputs), len(train_loader.dataset), \n",
-    "                          100. * batch_idx / len(train_loader), loss))\n",
-    "            tensorboard_writer.add_scalar('training loss/loss', loss, iteration)\n",
-    "            tensorboard_writer.add_scalar('learning rate/lr', optimizer.param_groups[0]['lr'], iteration)\n",
-    "                \n",
-    "        \n",
-    "        if batch_idx % debug_interval == 0:    # report debug image every 500 mini-batches\n",
-    "            for n, (inp, pred, label) in enumerate(zip(inputs, predicted, labels)):\n",
-    "                series = 'label_{}_pred_{}'.format(classes[label.cpu()], classes[pred.cpu()])\n",
-    "                tensorboard_writer.add_image('Train MelSpectrogram samples/{}'.format(n), \n",
-    "                                             plot_signal(inp.cpu().numpy().squeeze(), series, 'hot'), iteration)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "LBWoj7u5zNes"
-   },
-   "outputs": [],
-   "source": [
-    "def test(model, epoch):\n",
-    "    model.eval()\n",
-    "    class_correct = list(0. for i in range(10))\n",
-    "    class_total = list(0. for i in range(10))\n",
-    "    with torch.no_grad():\n",
-    "        for idx, (sounds, sample_rate, inputs, labels) in enumerate(test_loader):\n",
-    "            inputs = inputs.to(device)\n",
-    "            labels = labels.to(device)\n",
-    "\n",
-    "            outputs = model(inputs)\n",
-    "\n",
-    "            _, predicted = torch.max(outputs, 1)\n",
-    "            c = (predicted == labels)\n",
-    "            for i in range(len(inputs)):\n",
-    "                label = labels[i].item()\n",
-    "                class_correct[label] += c[i].item()\n",
-    "                class_total[label] += 1\n",
-    "        \n",
-    "            iteration = (epoch + 1) * len(train_loader)\n",
-    "            if idx % debug_interval == 0:    # report debug image every 100 mini-batches\n",
-    "                for n, (sound, inp, pred, label) in enumerate(zip(sounds, inputs, predicted, labels)):\n",
-    "                    series = 'label_{}_pred_{}'.format(classes[label.cpu()], classes[pred.cpu()])\n",
-    "                    tensorboard_writer.add_audio('Test audio samples/{}'.format(n), \n",
-    "                                                 sound, iteration, int(sample_rate[n]))\n",
-    "                    tensorboard_writer.add_image('Test MelSpectrogram samples/{}_{}'.format(idx, n), \n",
-    "                                                 plot_signal(inp.cpu().numpy().squeeze(), series, 'hot'), iteration)\n",
-    "\n",
-    "    total_accuracy = 100 * sum(class_correct)/sum(class_total)\n",
-    "    print('[Iteration {}] Accuracy on the {} test images: {}%\\n'.format(epoch, sum(class_total), total_accuracy))\n",
-    "    tensorboard_writer.add_scalar('accuracy/total', total_accuracy, iteration)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "X5lx3g_5zNey",
-    "scrolled": false
-   },
-   "outputs": [],
-   "source": [
-    "log_interval = 100\n",
-    "debug_interval = 200\n",
-    "for epoch in range(configuration_dict.get('number_of_epochs', 10)):\n",
-    "    train(model, epoch)\n",
-    "    test(model, epoch)\n",
-    "    scheduler.step()"
-   ]
-  }
- ],
- "metadata": {
-  "colab": {
-   "name": "audio_classifier_tutorial.ipynb",
-   "provenance": []
-  },
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.7.4"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 1
-}
--- a/examples/frameworks/pytorch/notebooks/audio/audio_preprocessing_example.ipynb
+++ b/examples/frameworks/pytorch/notebooks/audio/audio_preprocessing_example.ipynb
@ -12,7 +12,7 @@
    "! pip install -U torch==1.5.0\n",
    "! pip install -U torchaudio==0.5.0\n",
    "! pip install -U matplotlib==3.2.1\n",
-    "! pip install -U trains==0.15.0\n",
+    "! pip install -U trains>=0.15.0\n",
    "! pip install -U tensorboard==2.2.1"
   ]
  },
@ -87,10 +87,10 @@
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
-    "scrolled": true,
    "pycharm": {
     "name": "#%%\n"
-    }
+    },
+    "scrolled": true
   },
   "outputs": [],
   "source": [
@ -125,4 +125,4 @@
 },
 "nbformat": 4,
 "nbformat_minor": 1
-}
+}
--- a/examples/frameworks/pytorch/notebooks/image/hyperparameter_search.ipynb
+++ b/examples/frameworks/pytorch/notebooks/image/hyperparameter_search.ipynb
@ -133,4 +133,4 @@
 },
 "nbformat": 4,
 "nbformat_minor": 4
-}
+}
--- a/examples/frameworks/pytorch/notebooks/image/image_classification_CIFAR10.ipynb
+++ b/examples/frameworks/pytorch/notebooks/image/image_classification_CIFAR10.ipynb
@ -45,7 +45,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "task = Task.init(project_name='Hyper-Parameter Search', task_name='image_classification_CIFAR10')\n",
+    "task = Task.init(project_name='Image Example', task_name='image_classification_CIFAR10')\n",
    "configuration_dict = {'number_of_epochs': 3, 'batch_size': 4, 'dropout': 0.25, 'base_lr': 0.001}\n",
    "configuration_dict = task.connect(configuration_dict)  # enabling configuration override by trains\n",
    "print(configuration_dict)  # printing actual configuration (after override in remote mode)"