Fix examples

This commit is contained in:
allegroai 2020-08-27 15:00:42 +03:00
parent 838c9cb0d2
commit 0d533e9013
6 changed files with 95 additions and 94 deletions

View File

@ -91,7 +91,12 @@ model.compile(loss='categorical_crossentropy',
# Connecting TRAINS
task = Task.init(project_name='examples', task_name='Keras with TensorBoard example')
task.connect_configuration({'test': 1337, 'nested': {'key': 'value', 'number': 1}})
# To set your own configuration:
# task.connect_configuration(
# name="MyConfig",
# configuration={'test': 1337, 'nested': {'key': 'value', 'number': 1}}
# )
# Advanced: setting model class enumeration
labels = dict(('digit_%d' % i, i) for i in range(10))

View File

@ -11,11 +11,11 @@
"outputs": [],
"source": [
"! pip install -U pip\n",
"! pip install -U torch==1.5.0\n",
"! pip install -U torchaudio==0.5.0\n",
"! pip install -U torchvision==0.6.0\n",
"! pip install -U torch==1.5.1\n",
"! pip install -U torchaudio==0.5.1\n",
"! pip install -U torchvision==0.6.1\n",
"! pip install -U matplotlib==3.2.1\n",
"! pip install -U trains>=0.15.0\n",
"! pip install -U trains>=0.16.0\n",
"! pip install -U pandas==1.0.4\n",
"! pip install -U numpy==1.18.4\n",
"! pip install -U tensorboard==2.2.1"
@ -35,6 +35,7 @@
"import io\n",
"\n",
"import pandas as pd\n",
"import numpy as np\n",
"from pathlib2 import Path\n",
"import matplotlib.pyplot as plt\n",
"\n",
@ -47,6 +48,7 @@
"\n",
"import torchaudio\n",
"from torchvision.transforms import ToTensor\n",
"from torchvision import models\n",
"\n",
"from trains import Task\n",
"from trains.storage import StorageManager\n",
@ -61,7 +63,8 @@
"outputs": [],
"source": [
"task = Task.init(project_name='Audio Example', task_name='audio classification UrbanSound8K')\n",
"configuration_dict = {'number_of_epochs': 10, 'batch_size': 4, 'dropout': 0.25, 'base_lr': 0.001}\n",
"configuration_dict = {'number_of_epochs': 6, 'batch_size': 8, 'dropout': 0.25, 'base_lr': 0.005, \n",
" 'number_of_mel_filters': 64, 'resample_freq': 22050}\n",
"configuration_dict = task.connect(configuration_dict) # enabling configuration override by trains\n",
"print(configuration_dict) # printing actual configuration (after override in remote mode)"
]
@ -77,8 +80,12 @@
},
"outputs": [],
"source": [
"# Download a sample dataset (https://allegro-datasets.s3.amazonaws.com/trains/UrbanSound8K.zip)based on UrbanSound8K dataset (https://urbansounddataset.weebly.com/urbansound8k.html)\n",
"path_to_UrbanSound8K = StorageManager.get_local_copy(\"https://allegro-datasets.s3.amazonaws.com/trains/UrbanSound8K.zip\", extract_archive=True, )"
"# Download UrbanSound8K dataset (https://urbansounddataset.weebly.com/urbansound8k.html)\n",
"# For simplicity we will use here a subset of that dataset using trains StorageManager\n",
"path_to_UrbanSound8K = StorageManager.get_local_copy(\"https://allegro-datasets.s3.amazonaws.com/trains/UrbanSound8K.zip\", \n",
" extract_archive=True)\n",
"path_to_UrbanSound8K_csv = Path(path_to_UrbanSound8K) / 'UrbanSound8K' / 'metadata' / 'UrbanSound8K.csv'\n",
"path_to_UrbanSound8K_audio = Path(path_to_UrbanSound8K) / 'UrbanSound8K' / 'audio'"
]
},
{
@ -92,12 +99,14 @@
"outputs": [],
"source": [
"class UrbanSoundDataset(Dataset):\n",
"#rapper for the UrbanSound8K dataset\n",
" def __init__(self, csv_path, file_path, folderList):\n",
" def __init__(self, csv_path, file_path, folderList, resample_freq=0, return_audio=False):\n",
" self.file_path = file_path\n",
" self.file_names = []\n",
" self.labels = []\n",
" self.folders = []\n",
" self.n_mels = configuration_dict.get('number_of_mel_filters', 64)\n",
" self.return_audio = return_audio\n",
" self.resample = resample_freq\n",
" \n",
" #loop through the csv entries and only add entries from folders in the folder list\n",
" csvData = pd.read_csv(csv_path)\n",
@ -110,35 +119,45 @@
" def __getitem__(self, index):\n",
" #format the file path and load the file\n",
" path = self.file_path / (\"fold\" + str(self.folders[index])) / self.file_names[index]\n",
" sound, sample_rate = torchaudio.load(path, out = None, normalization = True)\n",
" soundData, sample_rate = torchaudio.load(path, out = None, normalization = True)\n",
"\n",
" # UrbanSound8K uses two channels, this will convert them to one\n",
" soundData = torch.mean(sound, dim=0, keepdim=True)\n",
" if self.resample > 0:\n",
" resample_transform = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=self.resample)\n",
" soundData = resample_transform(soundData)\n",
" \n",
" #Make sure all files are the same size\n",
" if soundData.numel() < 160000:\n",
" fixedsize_data = torch.nn.functional.pad(soundData, (0, 160000 - soundData.numel()))\n",
" # This will convert audio files with two channels into one\n",
" soundData = torch.mean(soundData, dim=0, keepdim=True)\n",
" \n",
" # Convert audio to log-scale Mel spectrogram\n",
" melspectrogram_transform = torchaudio.transforms.MelSpectrogram(sample_rate=self.resample, n_mels=self.n_mels)\n",
" melspectrogram = melspectrogram_transform(soundData)\n",
" melspectogram_db = torchaudio.transforms.AmplitudeToDB()(melspectrogram)\n",
" \n",
" #Make sure all spectrograms are the same size\n",
" fixed_length = 3 * (self.resample//200)\n",
" if melspectogram_db.shape[2] < fixed_length:\n",
" melspectogram_db = torch.nn.functional.pad(melspectogram_db, (0, fixed_length - melspectogram_db.shape[2]))\n",
" else:\n",
" fixedsize_data = soundData[0,:160000].reshape(1,160000)\n",
" melspectogram_db = melspectogram_db[:, :, :fixed_length]\n",
" \n",
" #downsample the audio\n",
" downsample_data = fixedsize_data[::5]\n",
" \n",
" melspectogram_transform = torchaudio.transforms.MelSpectrogram(sample_rate=sample_rate)\n",
" melspectogram = melspectogram_transform(downsample_data)\n",
" melspectogram_db = torchaudio.transforms.AmplitudeToDB()(melspectogram)\n",
" if self.return_audio:\n",
" fixed_length = 2 * self.resample\n",
" if soundData.numel() < fixed_length:\n",
" soundData = torch.nn.functional.pad(soundData, (0, fixed_length - soundData.numel()))\n",
" else:\n",
" soundData = soundData[0,:fixed_length].reshape(1,fixed_length)\n",
" else:\n",
" soundData = []\n",
"\n",
" return fixedsize_data, sample_rate, melspectogram_db, self.labels[index]\n",
" return soundData, self.resample, melspectogram_db, self.labels[index]\n",
" \n",
" def __len__(self):\n",
" return len(self.file_names)\n",
"\n",
"\n",
"csv_path = Path(path_to_UrbanSound8K) / 'UrbanSound8K' / 'metadata' / 'UrbanSound8K.csv'\n",
"file_path = Path(path_to_UrbanSound8K) / 'UrbanSound8K' / 'audio'\n",
"\n",
"train_set = UrbanSoundDataset(csv_path, file_path, range(1,10))\n",
"test_set = UrbanSoundDataset(csv_path, file_path, [10])\n",
"train_set = UrbanSoundDataset(path_to_UrbanSound8K_csv, path_to_UrbanSound8K_audio, range(1,10), \n",
" resample_freq=configuration_dict.get('resample_freq', 0), return_audio=False)\n",
"test_set = UrbanSoundDataset(path_to_UrbanSound8K_csv, path_to_UrbanSound8K_audio, [10], \n",
" resample_freq=configuration_dict.get('resample_freq', 0), return_audio=True)\n",
"print(\"Train set size: \" + str(len(train_set)))\n",
"print(\"Test set size: \" + str(len(test_set)))\n",
"\n",
@ -154,42 +173,14 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "ylblw-k1zNeZ"
},
"metadata": {},
"outputs": [],
"source": [
"class Net(nn.Module):\n",
" def __init__(self, num_classes, dropout_value):\n",
" super(Net,self).__init__()\n",
" self.num_classes = num_classes\n",
" self.dropout_value = dropout_value\n",
" \n",
" self.C1 = nn.Conv2d(1,16,3)\n",
" self.C2 = nn.Conv2d(16,32,3)\n",
" self.C3 = nn.Conv2d(32,64,3)\n",
" self.C4 = nn.Conv2d(64,128,3)\n",
" self.maxpool1 = nn.MaxPool2d(2,2) \n",
" self.fc1 = nn.Linear(128*29*197,128)\n",
" self.fc2 = nn.Linear(128,self.num_classes)\n",
" self.dropout = nn.Dropout(self.dropout_value)\n",
" \n",
" def forward(self,x):\n",
" # add sequence of convolutional and max pooling layers\n",
" x = F.relu(self.C1(x))\n",
" x = self.maxpool1(F.relu(self.C2(x)))\n",
" x = F.relu(self.C3(x))\n",
" x = self.maxpool1(F.relu(self.C4(x)))\n",
" # flatten image input\n",
" x = x.view(-1,128*29*197)\n",
" x = F.relu(self.fc1(self.dropout(x)))\n",
" x = self.fc2(self.dropout(x))\n",
" return x\n",
" \n",
" \n",
"model = Net(len(classes), configuration_dict.get('dropout', 0.25))"
"model = models.resnet18(pretrained=True)\n",
"model.conv1=nn.Conv2d(1, model.conv1.out_channels, kernel_size=model.conv1.kernel_size[0], \n",
" stride=model.conv1.stride[0], padding=model.conv1.padding[0])\n",
"num_ftrs = model.fc.in_features\n",
"model.fc = nn.Sequential(*[nn.Dropout(p=configuration_dict.get('dropout', 0.25)), nn.Linear(num_ftrs, len(classes))])"
]
},
{
@ -203,7 +194,7 @@
"outputs": [],
"source": [
"optimizer = optim.SGD(model.parameters(), lr = configuration_dict.get('base_lr', 0.001), momentum = 0.9)\n",
"scheduler = optim.lr_scheduler.StepLR(optimizer, step_size = 3, gamma = 0.1)\n",
"scheduler = optim.lr_scheduler.StepLR(optimizer, step_size = configuration_dict.get('number_of_epochs')//3, gamma = 0.1)\n",
"criterion = nn.CrossEntropyLoss()"
]
},
@ -343,19 +334,12 @@
"outputs": [],
"source": [
"log_interval = 10\n",
"debug_interval = 20\n",
"debug_interval = 25\n",
"for epoch in range(configuration_dict.get('number_of_epochs', 10)):\n",
" train(model, epoch)\n",
" test(model, epoch)\n",
" scheduler.step()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {

View File

@ -9,10 +9,10 @@
"outputs": [],
"source": [
"! pip install -U pip\n",
"! pip install -U torch==1.5.0\n",
"! pip install -U torchaudio==0.5.0\n",
"! pip install -U torch==1.5.1\n",
"! pip install -U torchaudio==0.5.1\n",
"! pip install -U matplotlib==3.2.1\n",
"! pip install -U trains>=0.15.0\n",
"! pip install -U trains>=0.16.0\n",
"! pip install -U tensorboard==2.2.1"
]
},
@ -23,6 +23,7 @@
"outputs": [],
"source": [
"import os\n",
"import torch\n",
"import torchaudio\n",
"from torch.utils.tensorboard import SummaryWriter\n",
"import matplotlib.pyplot as plt\n",
@ -39,7 +40,7 @@
"outputs": [],
"source": [
"task = Task.init(project_name='Audio Example', task_name='data pre-processing')\n",
"configuration_dict = {'number_of_smaples': 3}\n",
"configuration_dict = {'number_of_samples': 3}\n",
"configuration_dict = task.connect(configuration_dict) # enabling configuration override by trains\n",
"print(configuration_dict) # printing actual configuration (after override in remote mode)"
]
@ -86,20 +87,27 @@
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true,
"pycharm": {
"name": "#%%\n"
}
},
"scrolled": true
},
"outputs": [],
"source": [
"for n in range(configuration_dict.get('number_of_smaples', 3)):\n",
" waveform, sample_rate, labels = yesno_data[n]\n",
" melspectogram_transform = torchaudio.transforms.MelSpectrogram(sample_rate=sample_rate)\n",
" plot_signal(waveform[0,:], 'Original waveform')\n",
" tensorboard_writer.add_audio('Audio samples/{}'.format(n), waveform, n, sample_rate)\n",
" plot_signal(melspectogram_transform(waveform.squeeze()).numpy(), 'Mel spectogram', 'hot')\n",
" plot_signal(torchaudio.transforms.AmplitudeToDB()(melspectogram_transform(waveform.squeeze())).numpy(), 'Mel spectogram DB', 'hot')"
"fixed_sample_rate = 22050\n",
"for n in range(configuration_dict.get('number_of_samples', 3)):\n",
" audio, sample_rate, labels = yesno_data[n]\n",
" tensorboard_writer.add_audio('Audio samples/{}'.format(n), audio, n, sample_rate)\n",
" \n",
" resample_transform = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=fixed_sample_rate)\n",
" melspectogram_transform = torchaudio.transforms.MelSpectrogram(sample_rate=fixed_sample_rate, n_mels=128)\n",
" \n",
" audio_mono = torch.mean(resample_transform(audio), dim=0, keepdim=True)\n",
" plot_signal(audio_mono[0,:], 'Original waveform')\n",
" \n",
" melspectogram = melspectogram_transform(audio_mono)\n",
" plot_signal(melspectogram.squeeze().numpy(), 'Mel spectogram', 'hot')\n",
" plot_signal(torchaudio.transforms.AmplitudeToDB()(melspectogram).squeeze().numpy(), 'Mel spectogram DB', 'hot')"
]
}
],
@ -123,5 +131,5 @@
}
},
"nbformat": 4,
"nbformat_minor": 1
"nbformat_minor": 4
}

View File

@ -154,6 +154,10 @@ if __name__ == "__main__":
# otherwise, the `rank` will always be set to the original value.
task = Task.init("examples", "test torch distributed", auto_connect_arg_parser={'rank': False})
if not dist.is_available():
print("torch.distributed is not supported for this platform")
exit(0)
if os.environ.get('MASTER_ADDR'):
dist.init_process_group(backend='gloo', rank=args.rank, world_size=args.nodes)
run(args.workers_in_node)

View File

@ -9,13 +9,13 @@ from __future__ import print_function
import tempfile
import os
from keras.callbacks import TensorBoard, ModelCheckpoint
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers.core import Dense, Activation
from keras.optimizers import RMSprop
from keras.utils import np_utils
import tensorflow as tf # noqa: F401
from tensorflow.keras import utils as np_utils
from tensorflow.keras.callbacks import ModelCheckpoint, TensorBoard
from tensorflow.keras.datasets import mnist
from tensorflow.keras.layers import Activation, Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import RMSprop
from trains import Task, Logger

View File

@ -1,5 +1,5 @@
absl-py>=0.7.1
bokeh
bokeh>=1.4.0
numpy
pandas
pillow>=4.0