Fix examples

This commit is contained in:
allegroai 2020-08-27 15:00:42 +03:00
parent 838c9cb0d2
commit 0d533e9013
6 changed files with 95 additions and 94 deletions

View File

@ -91,7 +91,12 @@ model.compile(loss='categorical_crossentropy',
# Connecting TRAINS # Connecting TRAINS
task = Task.init(project_name='examples', task_name='Keras with TensorBoard example') task = Task.init(project_name='examples', task_name='Keras with TensorBoard example')
task.connect_configuration({'test': 1337, 'nested': {'key': 'value', 'number': 1}})
# To set your own configuration:
# task.connect_configuration(
# name="MyConfig",
# configuration={'test': 1337, 'nested': {'key': 'value', 'number': 1}}
# )
# Advanced: setting model class enumeration # Advanced: setting model class enumeration
labels = dict(('digit_%d' % i, i) for i in range(10)) labels = dict(('digit_%d' % i, i) for i in range(10))

View File

@ -11,11 +11,11 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"! pip install -U pip\n", "! pip install -U pip\n",
"! pip install -U torch==1.5.0\n", "! pip install -U torch==1.5.1\n",
"! pip install -U torchaudio==0.5.0\n", "! pip install -U torchaudio==0.5.1\n",
"! pip install -U torchvision==0.6.0\n", "! pip install -U torchvision==0.6.1\n",
"! pip install -U matplotlib==3.2.1\n", "! pip install -U matplotlib==3.2.1\n",
"! pip install -U trains>=0.15.0\n", "! pip install -U trains>=0.16.0\n",
"! pip install -U pandas==1.0.4\n", "! pip install -U pandas==1.0.4\n",
"! pip install -U numpy==1.18.4\n", "! pip install -U numpy==1.18.4\n",
"! pip install -U tensorboard==2.2.1" "! pip install -U tensorboard==2.2.1"
@ -35,6 +35,7 @@
"import io\n", "import io\n",
"\n", "\n",
"import pandas as pd\n", "import pandas as pd\n",
"import numpy as np\n",
"from pathlib2 import Path\n", "from pathlib2 import Path\n",
"import matplotlib.pyplot as plt\n", "import matplotlib.pyplot as plt\n",
"\n", "\n",
@ -47,6 +48,7 @@
"\n", "\n",
"import torchaudio\n", "import torchaudio\n",
"from torchvision.transforms import ToTensor\n", "from torchvision.transforms import ToTensor\n",
"from torchvision import models\n",
"\n", "\n",
"from trains import Task\n", "from trains import Task\n",
"from trains.storage import StorageManager\n", "from trains.storage import StorageManager\n",
@ -61,7 +63,8 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"task = Task.init(project_name='Audio Example', task_name='audio classification UrbanSound8K')\n", "task = Task.init(project_name='Audio Example', task_name='audio classification UrbanSound8K')\n",
"configuration_dict = {'number_of_epochs': 10, 'batch_size': 4, 'dropout': 0.25, 'base_lr': 0.001}\n", "configuration_dict = {'number_of_epochs': 6, 'batch_size': 8, 'dropout': 0.25, 'base_lr': 0.005, \n",
" 'number_of_mel_filters': 64, 'resample_freq': 22050}\n",
"configuration_dict = task.connect(configuration_dict) # enabling configuration override by trains\n", "configuration_dict = task.connect(configuration_dict) # enabling configuration override by trains\n",
"print(configuration_dict) # printing actual configuration (after override in remote mode)" "print(configuration_dict) # printing actual configuration (after override in remote mode)"
] ]
@ -77,8 +80,12 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"# Download a sample dataset (https://allegro-datasets.s3.amazonaws.com/trains/UrbanSound8K.zip)based on UrbanSound8K dataset (https://urbansounddataset.weebly.com/urbansound8k.html)\n", "# Download UrbanSound8K dataset (https://urbansounddataset.weebly.com/urbansound8k.html)\n",
"path_to_UrbanSound8K = StorageManager.get_local_copy(\"https://allegro-datasets.s3.amazonaws.com/trains/UrbanSound8K.zip\", extract_archive=True, )" "# For simplicity we will use here a subset of that dataset using trains StorageManager\n",
"path_to_UrbanSound8K = StorageManager.get_local_copy(\"https://allegro-datasets.s3.amazonaws.com/trains/UrbanSound8K.zip\", \n",
" extract_archive=True)\n",
"path_to_UrbanSound8K_csv = Path(path_to_UrbanSound8K) / 'UrbanSound8K' / 'metadata' / 'UrbanSound8K.csv'\n",
"path_to_UrbanSound8K_audio = Path(path_to_UrbanSound8K) / 'UrbanSound8K' / 'audio'"
] ]
}, },
{ {
@ -92,12 +99,14 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"class UrbanSoundDataset(Dataset):\n", "class UrbanSoundDataset(Dataset):\n",
"#rapper for the UrbanSound8K dataset\n", " def __init__(self, csv_path, file_path, folderList, resample_freq=0, return_audio=False):\n",
" def __init__(self, csv_path, file_path, folderList):\n",
" self.file_path = file_path\n", " self.file_path = file_path\n",
" self.file_names = []\n", " self.file_names = []\n",
" self.labels = []\n", " self.labels = []\n",
" self.folders = []\n", " self.folders = []\n",
" self.n_mels = configuration_dict.get('number_of_mel_filters', 64)\n",
" self.return_audio = return_audio\n",
" self.resample = resample_freq\n",
" \n", " \n",
" #loop through the csv entries and only add entries from folders in the folder list\n", " #loop through the csv entries and only add entries from folders in the folder list\n",
" csvData = pd.read_csv(csv_path)\n", " csvData = pd.read_csv(csv_path)\n",
@ -110,35 +119,45 @@
" def __getitem__(self, index):\n", " def __getitem__(self, index):\n",
" #format the file path and load the file\n", " #format the file path and load the file\n",
" path = self.file_path / (\"fold\" + str(self.folders[index])) / self.file_names[index]\n", " path = self.file_path / (\"fold\" + str(self.folders[index])) / self.file_names[index]\n",
" sound, sample_rate = torchaudio.load(path, out = None, normalization = True)\n", " soundData, sample_rate = torchaudio.load(path, out = None, normalization = True)\n",
"\n", "\n",
" # UrbanSound8K uses two channels, this will convert them to one\n", " if self.resample > 0:\n",
" soundData = torch.mean(sound, dim=0, keepdim=True)\n", " resample_transform = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=self.resample)\n",
" soundData = resample_transform(soundData)\n",
" \n", " \n",
" #Make sure all files are the same size\n", " # This will convert audio files with two channels into one\n",
" if soundData.numel() < 160000:\n", " soundData = torch.mean(soundData, dim=0, keepdim=True)\n",
" fixedsize_data = torch.nn.functional.pad(soundData, (0, 160000 - soundData.numel()))\n", " \n",
" # Convert audio to log-scale Mel spectrogram\n",
" melspectrogram_transform = torchaudio.transforms.MelSpectrogram(sample_rate=self.resample, n_mels=self.n_mels)\n",
" melspectrogram = melspectrogram_transform(soundData)\n",
" melspectogram_db = torchaudio.transforms.AmplitudeToDB()(melspectrogram)\n",
" \n",
" #Make sure all spectrograms are the same size\n",
" fixed_length = 3 * (self.resample//200)\n",
" if melspectogram_db.shape[2] < fixed_length:\n",
" melspectogram_db = torch.nn.functional.pad(melspectogram_db, (0, fixed_length - melspectogram_db.shape[2]))\n",
" else:\n", " else:\n",
" fixedsize_data = soundData[0,:160000].reshape(1,160000)\n", " melspectogram_db = melspectogram_db[:, :, :fixed_length]\n",
" \n", " \n",
" #downsample the audio\n", " if self.return_audio:\n",
" downsample_data = fixedsize_data[::5]\n", " fixed_length = 2 * self.resample\n",
" \n", " if soundData.numel() < fixed_length:\n",
" melspectogram_transform = torchaudio.transforms.MelSpectrogram(sample_rate=sample_rate)\n", " soundData = torch.nn.functional.pad(soundData, (0, fixed_length - soundData.numel()))\n",
" melspectogram = melspectogram_transform(downsample_data)\n", " else:\n",
" melspectogram_db = torchaudio.transforms.AmplitudeToDB()(melspectogram)\n", " soundData = soundData[0,:fixed_length].reshape(1,fixed_length)\n",
" else:\n",
" soundData = []\n",
"\n", "\n",
" return fixedsize_data, sample_rate, melspectogram_db, self.labels[index]\n", " return soundData, self.resample, melspectogram_db, self.labels[index]\n",
" \n", " \n",
" def __len__(self):\n", " def __len__(self):\n",
" return len(self.file_names)\n", " return len(self.file_names)\n",
"\n", "\n",
"\n", "train_set = UrbanSoundDataset(path_to_UrbanSound8K_csv, path_to_UrbanSound8K_audio, range(1,10), \n",
"csv_path = Path(path_to_UrbanSound8K) / 'UrbanSound8K' / 'metadata' / 'UrbanSound8K.csv'\n", " resample_freq=configuration_dict.get('resample_freq', 0), return_audio=False)\n",
"file_path = Path(path_to_UrbanSound8K) / 'UrbanSound8K' / 'audio'\n", "test_set = UrbanSoundDataset(path_to_UrbanSound8K_csv, path_to_UrbanSound8K_audio, [10], \n",
"\n", " resample_freq=configuration_dict.get('resample_freq', 0), return_audio=True)\n",
"train_set = UrbanSoundDataset(csv_path, file_path, range(1,10))\n",
"test_set = UrbanSoundDataset(csv_path, file_path, [10])\n",
"print(\"Train set size: \" + str(len(train_set)))\n", "print(\"Train set size: \" + str(len(train_set)))\n",
"print(\"Test set size: \" + str(len(test_set)))\n", "print(\"Test set size: \" + str(len(test_set)))\n",
"\n", "\n",
@ -154,42 +173,14 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"colab": {},
"colab_type": "code",
"id": "ylblw-k1zNeZ"
},
"outputs": [], "outputs": [],
"source": [ "source": [
"class Net(nn.Module):\n", "model = models.resnet18(pretrained=True)\n",
" def __init__(self, num_classes, dropout_value):\n", "model.conv1=nn.Conv2d(1, model.conv1.out_channels, kernel_size=model.conv1.kernel_size[0], \n",
" super(Net,self).__init__()\n", " stride=model.conv1.stride[0], padding=model.conv1.padding[0])\n",
" self.num_classes = num_classes\n", "num_ftrs = model.fc.in_features\n",
" self.dropout_value = dropout_value\n", "model.fc = nn.Sequential(*[nn.Dropout(p=configuration_dict.get('dropout', 0.25)), nn.Linear(num_ftrs, len(classes))])"
" \n",
" self.C1 = nn.Conv2d(1,16,3)\n",
" self.C2 = nn.Conv2d(16,32,3)\n",
" self.C3 = nn.Conv2d(32,64,3)\n",
" self.C4 = nn.Conv2d(64,128,3)\n",
" self.maxpool1 = nn.MaxPool2d(2,2) \n",
" self.fc1 = nn.Linear(128*29*197,128)\n",
" self.fc2 = nn.Linear(128,self.num_classes)\n",
" self.dropout = nn.Dropout(self.dropout_value)\n",
" \n",
" def forward(self,x):\n",
" # add sequence of convolutional and max pooling layers\n",
" x = F.relu(self.C1(x))\n",
" x = self.maxpool1(F.relu(self.C2(x)))\n",
" x = F.relu(self.C3(x))\n",
" x = self.maxpool1(F.relu(self.C4(x)))\n",
" # flatten image input\n",
" x = x.view(-1,128*29*197)\n",
" x = F.relu(self.fc1(self.dropout(x)))\n",
" x = self.fc2(self.dropout(x))\n",
" return x\n",
" \n",
" \n",
"model = Net(len(classes), configuration_dict.get('dropout', 0.25))"
] ]
}, },
{ {
@ -203,7 +194,7 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"optimizer = optim.SGD(model.parameters(), lr = configuration_dict.get('base_lr', 0.001), momentum = 0.9)\n", "optimizer = optim.SGD(model.parameters(), lr = configuration_dict.get('base_lr', 0.001), momentum = 0.9)\n",
"scheduler = optim.lr_scheduler.StepLR(optimizer, step_size = 3, gamma = 0.1)\n", "scheduler = optim.lr_scheduler.StepLR(optimizer, step_size = configuration_dict.get('number_of_epochs')//3, gamma = 0.1)\n",
"criterion = nn.CrossEntropyLoss()" "criterion = nn.CrossEntropyLoss()"
] ]
}, },
@ -343,19 +334,12 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"log_interval = 10\n", "log_interval = 10\n",
"debug_interval = 20\n", "debug_interval = 25\n",
"for epoch in range(configuration_dict.get('number_of_epochs', 10)):\n", "for epoch in range(configuration_dict.get('number_of_epochs', 10)):\n",
" train(model, epoch)\n", " train(model, epoch)\n",
" test(model, epoch)\n", " test(model, epoch)\n",
" scheduler.step()" " scheduler.step()"
] ]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
} }
], ],
"metadata": { "metadata": {

View File

@ -9,10 +9,10 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"! pip install -U pip\n", "! pip install -U pip\n",
"! pip install -U torch==1.5.0\n", "! pip install -U torch==1.5.1\n",
"! pip install -U torchaudio==0.5.0\n", "! pip install -U torchaudio==0.5.1\n",
"! pip install -U matplotlib==3.2.1\n", "! pip install -U matplotlib==3.2.1\n",
"! pip install -U trains>=0.15.0\n", "! pip install -U trains>=0.16.0\n",
"! pip install -U tensorboard==2.2.1" "! pip install -U tensorboard==2.2.1"
] ]
}, },
@ -23,6 +23,7 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"import os\n", "import os\n",
"import torch\n",
"import torchaudio\n", "import torchaudio\n",
"from torch.utils.tensorboard import SummaryWriter\n", "from torch.utils.tensorboard import SummaryWriter\n",
"import matplotlib.pyplot as plt\n", "import matplotlib.pyplot as plt\n",
@ -39,7 +40,7 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"task = Task.init(project_name='Audio Example', task_name='data pre-processing')\n", "task = Task.init(project_name='Audio Example', task_name='data pre-processing')\n",
"configuration_dict = {'number_of_smaples': 3}\n", "configuration_dict = {'number_of_samples': 3}\n",
"configuration_dict = task.connect(configuration_dict) # enabling configuration override by trains\n", "configuration_dict = task.connect(configuration_dict) # enabling configuration override by trains\n",
"print(configuration_dict) # printing actual configuration (after override in remote mode)" "print(configuration_dict) # printing actual configuration (after override in remote mode)"
] ]
@ -86,20 +87,27 @@
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {
"scrolled": true,
"pycharm": { "pycharm": {
"name": "#%%\n" "name": "#%%\n"
} },
"scrolled": true
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"for n in range(configuration_dict.get('number_of_smaples', 3)):\n", "fixed_sample_rate = 22050\n",
" waveform, sample_rate, labels = yesno_data[n]\n", "for n in range(configuration_dict.get('number_of_samples', 3)):\n",
" melspectogram_transform = torchaudio.transforms.MelSpectrogram(sample_rate=sample_rate)\n", " audio, sample_rate, labels = yesno_data[n]\n",
" plot_signal(waveform[0,:], 'Original waveform')\n", " tensorboard_writer.add_audio('Audio samples/{}'.format(n), audio, n, sample_rate)\n",
" tensorboard_writer.add_audio('Audio samples/{}'.format(n), waveform, n, sample_rate)\n", " \n",
" plot_signal(melspectogram_transform(waveform.squeeze()).numpy(), 'Mel spectogram', 'hot')\n", " resample_transform = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=fixed_sample_rate)\n",
" plot_signal(torchaudio.transforms.AmplitudeToDB()(melspectogram_transform(waveform.squeeze())).numpy(), 'Mel spectogram DB', 'hot')" " melspectogram_transform = torchaudio.transforms.MelSpectrogram(sample_rate=fixed_sample_rate, n_mels=128)\n",
" \n",
" audio_mono = torch.mean(resample_transform(audio), dim=0, keepdim=True)\n",
" plot_signal(audio_mono[0,:], 'Original waveform')\n",
" \n",
" melspectogram = melspectogram_transform(audio_mono)\n",
" plot_signal(melspectogram.squeeze().numpy(), 'Mel spectogram', 'hot')\n",
" plot_signal(torchaudio.transforms.AmplitudeToDB()(melspectogram).squeeze().numpy(), 'Mel spectogram DB', 'hot')"
] ]
} }
], ],
@ -123,5 +131,5 @@
} }
}, },
"nbformat": 4, "nbformat": 4,
"nbformat_minor": 1 "nbformat_minor": 4
} }

View File

@ -154,6 +154,10 @@ if __name__ == "__main__":
# otherwise, the `rank` will always be set to the original value. # otherwise, the `rank` will always be set to the original value.
task = Task.init("examples", "test torch distributed", auto_connect_arg_parser={'rank': False}) task = Task.init("examples", "test torch distributed", auto_connect_arg_parser={'rank': False})
if not dist.is_available():
print("torch.distributed is not supported for this platform")
exit(0)
if os.environ.get('MASTER_ADDR'): if os.environ.get('MASTER_ADDR'):
dist.init_process_group(backend='gloo', rank=args.rank, world_size=args.nodes) dist.init_process_group(backend='gloo', rank=args.rank, world_size=args.nodes)
run(args.workers_in_node) run(args.workers_in_node)

View File

@ -9,13 +9,13 @@ from __future__ import print_function
import tempfile import tempfile
import os import os
from keras.callbacks import TensorBoard, ModelCheckpoint
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers.core import Dense, Activation
from keras.optimizers import RMSprop
from keras.utils import np_utils
import tensorflow as tf # noqa: F401 import tensorflow as tf # noqa: F401
from tensorflow.keras import utils as np_utils
from tensorflow.keras.callbacks import ModelCheckpoint, TensorBoard
from tensorflow.keras.datasets import mnist
from tensorflow.keras.layers import Activation, Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import RMSprop
from trains import Task, Logger from trains import Task, Logger

View File

@ -1,5 +1,5 @@
absl-py>=0.7.1 absl-py>=0.7.1
bokeh bokeh>=1.4.0
numpy numpy
pandas pandas
pillow>=4.0 pillow>=4.0