Update pytorch examples

This commit is contained in:
allegroai 2020-08-10 08:04:58 +03:00
parent 73bd8c2714
commit f74c89a25d
8 changed files with 640 additions and 33 deletions

View File

@ -0,0 +1 @@
The `audio_classifier_UrbanSound8K.ipynb` example uses a small dataset based on [UrbanSound8K dataset](https://urbansounddataset.weebly.com/urbansound8k.html).

View File

@ -35,7 +35,6 @@
"import io\n",
"\n",
"import pandas as pd\n",
"import numpy as np\n",
"from pathlib2 import Path\n",
"import matplotlib.pyplot as plt\n",
"\n",
@ -50,6 +49,7 @@
"from torchvision.transforms import ToTensor\n",
"\n",
"from trains import Task\n",
"from trains.storage import StorageManager\n",
"\n",
"%matplotlib inline"
]
@ -60,7 +60,7 @@
"metadata": {},
"outputs": [],
"source": [
"task = Task.init(project_name='Audio Example', task_name='audio classifier')\n",
"task = Task.init(project_name='Audio Example', task_name='audio classification UrbanSound8K')\n",
"configuration_dict = {'number_of_epochs': 10, 'batch_size': 4, 'dropout': 0.25, 'base_lr': 0.001}\n",
"configuration_dict = task.connect(configuration_dict) # enabling configuration override by trains\n",
"print(configuration_dict) # printing actual configuration (after override in remote mode)"
@ -77,8 +77,8 @@
},
"outputs": [],
"source": [
"# Download UrbanSound8K dataset (https://urbansounddataset.weebly.com/urbansound8k.html)\n",
"path_to_UrbanSound8K = './data/UrbanSound8K'"
"# Download a sample dataset (https://allegro-datasets.s3.amazonaws.com/trains/UrbanSound8K.zip)based on UrbanSound8K dataset (https://urbansounddataset.weebly.com/urbansound8k.html)\n",
"path_to_UrbanSound8K = StorageManager.get_local_copy(\"https://allegro-datasets.s3.amazonaws.com/trains/UrbanSound8K.zip\", extract_archive=True, )"
]
},
{
@ -134,8 +134,8 @@
" return len(self.file_names)\n",
"\n",
"\n",
"csv_path = Path(path_to_UrbanSound8K) / 'metadata' / 'UrbanSound8K.csv'\n",
"file_path = Path(path_to_UrbanSound8K) / 'audio'\n",
"csv_path = Path(path_to_UrbanSound8K) / 'UrbanSound8K' / 'metadata' / 'UrbanSound8K.csv'\n",
"file_path = Path(path_to_UrbanSound8K) / 'UrbanSound8K' / 'audio'\n",
"\n",
"train_set = UrbanSoundDataset(csv_path, file_path, range(1,10))\n",
"test_set = UrbanSoundDataset(csv_path, file_path, [10])\n",
@ -338,18 +338,24 @@
"metadata": {
"colab": {},
"colab_type": "code",
"id": "X5lx3g_5zNey",
"scrolled": false
"id": "X5lx3g_5zNey"
},
"outputs": [],
"source": [
"log_interval = 100\n",
"debug_interval = 200\n",
"log_interval = 10\n",
"debug_interval = 20\n",
"for epoch in range(configuration_dict.get('number_of_epochs', 10)):\n",
" train(model, epoch)\n",
" test(model, epoch)\n",
" scheduler.step()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
@ -376,5 +382,5 @@
}
},
"nbformat": 4,
"nbformat_minor": 1
"nbformat_minor": 4
}

View File

@ -23,7 +23,6 @@
"outputs": [],
"source": [
"import os\n",
"import torch\n",
"import torchaudio\n",
"from torch.utils.tensorboard import SummaryWriter\n",
"import matplotlib.pyplot as plt\n",
@ -87,10 +86,10 @@
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true,
"pycharm": {
"name": "#%%\n"
},
"scrolled": true
}
},
"outputs": [],
"source": [

View File

@ -12,8 +12,8 @@
"\n",
"# pip install with locked versions\n",
"! pip install -U pandas==1.0.3\n",
"! pip install -U trains==0.15.0\n",
"! pip install -U hpbandster==0.7.4 # Needed only for Bayesian optimization Hyper-Band"
"! pip install -U trains>=0.15.0\n",
"! pip install -U optuna==2.0.0rc0"
]
},
{
@ -23,8 +23,8 @@
"outputs": [],
"source": [
"from trains.automation import UniformParameterRange, UniformIntegerParameterRange\n",
"from trains.automation import RandomSearch, HyperParameterOptimizer\n",
"from trains.automation.hpbandster import OptimizerBOHB # Needed only for Bayesian optimization Hyper-Band\n",
"from trains.automation import HyperParameterOptimizer\n",
"from trains.automation.optuna import OptimizerOptuna\n",
"\n",
"from trains import Task"
]
@ -35,7 +35,7 @@
"metadata": {},
"outputs": [],
"source": [
"task = Task.init(project_name='Hyper-Parameter Search', task_name='Hyper-Parameter Optimization')"
"task = Task.init(project_name='Hyper-Parameter Search', task_name='Hyper-Parameter Optimization')\n"
]
},
{
@ -47,7 +47,7 @@
"#####################################################################\n",
"### Don't forget to replace this default id with your own task id ###\n",
"#####################################################################\n",
"TEMPLATE_TASK_ID = 'd8e928460f98437c998f3597768597f8'"
"TEMPLATE_TASK_ID = 'd551a9990cb5451c9c744cc58201c612'"
]
},
{
@ -71,7 +71,7 @@
" objective_metric_sign='max', # maximize or minimize the objective metric\n",
" max_number_of_concurrent_tasks=3, # number of concurrent experiments\n",
" # setting optimizer - trains supports GridSearch, RandomSearch or OptimizerBOHB\n",
" optimizer_class=OptimizerBOHB, # can be replaced with OptimizerBOHB\n",
" optimizer_class=OptimizerOptuna, # can be replaced with OptimizerBOHB\n",
" execution_queue='default', # queue to schedule the experiments for execution\n",
" optimization_time_limit=30., # time limit for each experiment (optional, ignored by OptimizerBOHB)\n",
" pool_period_min=1, # Check the experiments every x minutes\n",
@ -90,7 +90,7 @@
"metadata": {},
"outputs": [],
"source": [
"optimizer.set_time_limit(in_minutes=120.0) # set the time limit for the optimization process\n",
"optimizer.set_time_limit(in_minutes=90.0) # set the time limit for the optimization process\n",
"optimizer.start() \n",
"optimizer.wait() # wait until process is done\n",
"optimizer.stop() # make sure background optimization stopped"

View File

@ -12,10 +12,10 @@
"# jupyter nbextension enable --py widgetsnbextension\n",
"\n",
"# pip install with locked versions\n",
"! pip install -U torch==1.5.0\n",
"! pip install -U torchvision==0.6.0\n",
"! pip install -U torch==1.5.1\n",
"! pip install -U torchvision==0.6.1\n",
"! pip install -U numpy==1.18.4\n",
"! pip install -U trains==0.15.0\n",
"! pip install -U trains>=0.15.0\n",
"! pip install -U tensorboard==2.2.1"
]
},
@ -45,7 +45,7 @@
"metadata": {},
"outputs": [],
"source": [
"task = Task.init(project_name='Image Example', task_name='image_classification_CIFAR10')\n",
"task = Task.init(project_name='Image Example', task_name='image classification CIFAR10')\n",
"configuration_dict = {'number_of_epochs': 3, 'batch_size': 4, 'dropout': 0.25, 'base_lr': 0.001}\n",
"configuration_dict = task.connect(configuration_dict) # enabling configuration override by trains\n",
"print(configuration_dict) # printing actual configuration (after override in remote mode)"
@ -240,4 +240,4 @@
},
"nbformat": 4,
"nbformat_minor": 4
}
}

View File

@ -0,0 +1,310 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"! pip install -U pip\n",
"! pip install -U torch==1.5.1\n",
"! pip install -U trains>=0.15.1\n",
"! pip install -U pandas==1.0.4\n",
"! pip install -U numpy==1.18.4\n",
"! pip install -U pathlib2==2.3.5\n",
"! pip install -U scikit-learn==0.23.1"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"from collections import Counter\n",
"from sklearn.model_selection import train_test_split\n",
"import torch\n",
"from datetime import datetime\n",
"from pathlib2 import Path\n",
"from trains import Task"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"task = Task.init(project_name='Table Example', task_name='tabular preprocessing')\n",
"logger = task.get_logger()\n",
"configuration_dict = {'test_size': 0.1, 'split_random_state': 0}\n",
"configuration_dict = task.connect(configuration_dict) # enabling configuration override by trains\n",
"print(configuration_dict) # printing actual configuration (after override in remote mode)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Download shelter-animal-outcomes dataset (https://www.kaggle.com/c/shelter-animal-outcomes)\n",
"# This dataset aims to improve understanding trends in animal outcome,\n",
"# Which could help shelters focus their energy on specific animals who need extra help finding a new home. \n",
"path_to_ShelterAnimal = './data'"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"train_set = pd.read_csv(Path(path_to_ShelterAnimal) / 'train.csv')\n",
"logger.report_table(title='Trainset - raw',series='pandas DataFrame',iteration=0, table_plot=train_set.head())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# **Pre-processing**"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Remove hour and year from DateTime data\n",
"timestamp = pd.to_datetime(train_set['DateTime'])\n",
"months = [d.month for d in timestamp]\n",
"train_set['Month'] = pd.DataFrame(months).astype('object')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"age = train_set['AgeuponOutcome']\n",
"months_age = []\n",
"for val in age:\n",
" if pd.isnull(val):\n",
" months_age.append(val)\n",
" else:\n",
" amount, time_type = val.split(' ')\n",
" if 'day' in time_type:\n",
" mult = 1./30\n",
" if 'week' in time_type:\n",
" mult = 1./4\n",
" if 'month' in time_type:\n",
" mult = 1.\n",
" if 'year' in time_type:\n",
" mult = 12.\n",
" months_age.append(int(amount) * mult)\n",
"train_set['Age'] = pd.DataFrame(months_age).astype(np.float32)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"sex_neutered = train_set['SexuponOutcome']\n",
"sex = []\n",
"neutered = []\n",
"for val in sex_neutered:\n",
" if pd.isnull(val):\n",
" sex.append(val)\n",
" neutered.append(val)\n",
" elif 'Unknown' in val:\n",
" sex.append(np.nan)\n",
" neutered.append(np.nan)\n",
" else:\n",
" n, s = val.split(' ')\n",
" if n in ['Neutered', 'Spayed']:\n",
" neutered.append('Yes')\n",
" else:\n",
" neutered.append('No')\n",
" sex.append(s)\n",
"\n",
"train_set['Sex'] = pd.DataFrame(sex)\n",
"train_set['Neutered'] = pd.DataFrame(neutered)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Remove irrelevant columns\n",
"train_set.drop(columns= ['Name', 'OutcomeSubtype', 'AnimalID', 'DateTime', 'AgeuponOutcome', 'SexuponOutcome'], inplace=True)\n",
"logger.report_table(title='Trainset - after preprocessing',series='pandas DataFrame',iteration=0, table_plot=train_set.head())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## *Fill NA Values*"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"object_columns = train_set.select_dtypes(include=['object']).copy()\n",
"numerical_columns = train_set.select_dtypes(include=['number']).copy()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"for col in object_columns.columns:\n",
" if object_columns[col].isnull().sum() > 0:\n",
" most_common = Counter(object_columns[col]).most_common(1)[0][0]\n",
" print('Column \"{}\": replacing null values with \"{}\"'.format(col, most_common))\n",
" train_set[col].fillna(most_common, inplace=True)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"for col in numerical_columns.columns:\n",
" if numerical_columns[col].isnull().sum() > 0:\n",
" median_val = numerical_columns[col].median()\n",
" print('Column \"{}\": replacing null values with \"{}\"'.format(col, median_val))\n",
" train_set[col].fillna(median_val, inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"logger.report_table(title='Trainset - after filling missing values',series='pandas DataFrame',iteration=0, table_plot=train_set.head())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## *Labels Encoding*"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"out_encoding = train_set['OutcomeType'].astype('category').cat.categories\n",
"outcome_dict = {key: val for val,key in enumerate(out_encoding)}\n",
"task.upload_artifact('Outcome dictionary', outcome_dict)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"for col in object_columns.columns:\n",
" train_set[col] = train_set[col].astype('category').cat.codes\n",
"logger.report_table(title='Trainset - after labels encoding',series='pandas DataFrame',iteration=0, table_plot=train_set.head())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## *Splitting dataset*"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"X = train_set.drop(columns= ['OutcomeType'])\n",
"Y = train_set['OutcomeType']\n",
"X_train, X_val, Y_train, Y_val = train_test_split(X, Y, test_size=configuration_dict.get('test_size', 0.1), \n",
" random_state=configuration_dict.get('split_random_state', 0))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# making all variables categorical\n",
"object_columns_names = object_columns.drop(columns= ['OutcomeType']).columns\n",
"for col in object_columns_names:\n",
" X[col] = X[col].astype('category')\n",
"columns_categries = {col: len(X[col].cat.categories) for col in object_columns_names}\n",
"task.upload_artifact('Categries per column', columns_categries)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"train_df = X_train.join(Y_train)\n",
"train_df.to_csv(Path(path_to_ShelterAnimal) / 'train_processed.csv', index=False)\n",
"val_df = X_val.join(Y_val)\n",
"val_df.to_csv(Path(path_to_ShelterAnimal) / 'val_processed.csv', index=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"paths = {'train_data': str(Path(path_to_ShelterAnimal) / 'train_processed.csv'), 'val_data': str(Path(path_to_ShelterAnimal) / 'val_processed.csv')}\n",
"task.upload_artifact('Processed data', paths)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.4"
}
},
"nbformat": 4,
"nbformat_minor": 4
}

View File

@ -0,0 +1,295 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"! pip install -U pip\n",
"! pip install -U torch==1.5.1\n",
"! pip install -U trains>=0.15.1\n",
"! pip install -U pandas==1.0.4\n",
"! pip install -U numpy==1.18.4\n",
"! pip install -U tensorboard==2.2.1"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"\n",
"import torch\n",
"import torch.nn as nn\n",
"import torch.nn.functional as F\n",
"from torch.utils.data import Dataset\n",
"from torch.utils.tensorboard import SummaryWriter\n",
"\n",
"from trains import Task"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"task = Task.init(project_name='Table Example', task_name='tabular prediction')\n",
"logger = task.get_logger()\n",
"configuration_dict = {'number_of_epochs': 30, 'batch_size': 100, 'dropout': 0.3, 'base_lr': 0.1}\n",
"configuration_dict = task.connect(configuration_dict) # enabling configuration override by trains\n",
"print(configuration_dict) # printing actual configuration (after override in remote mode)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"previous_task = Task.get_task('ed7570e1e12d41e5a06557c81fdf1046')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"preprocessed_data = previous_task.artifacts['Processed data'].get()\n",
"train_set = pd.read_csv(preprocessed_data['train_data'])\n",
"test_set = pd.read_csv(preprocessed_data['val_data'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"columns_categories = previous_task.artifacts['Categries per column'].get()\n",
"columns_categories_ordered = {key: columns_categories[key] for key in train_set.columns if key in columns_categories.keys()}\n",
"columns_numerical = [key for key in train_set.drop(columns= ['OutcomeType']).drop(columns=columns_categories_ordered).keys()]\n",
"embedding_sizes = [(n_categories, min(32, (n_categories+1)//2)) for _,n_categories in columns_categories_ordered.items()]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"outcome_dict = previous_task.artifacts['Outcome dictionary'].get()\n",
"reveresed_outcome_dict = {val: key for key, val in outcome_dict.items()}"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"class ShelterDataset(Dataset):\n",
" def __init__(self, X, Y, embedded_col_names):\n",
" X = X.copy()\n",
" self.X1 = X.loc[:,embedded_col_names].copy().values.astype(np.int64) #categorical columns\n",
" self.X2 = X.drop(columns=embedded_col_names).copy().values.astype(np.float32) #numerical columns\n",
" self.y = Y\n",
" \n",
" def __len__(self):\n",
" return len(self.y)\n",
" \n",
" def __getitem__(self, idx):\n",
" return self.X1[idx], self.X2[idx], self.y[idx]\n",
"\n",
"#creating train and valid datasets\n",
"train_ds = ShelterDataset(train_set.drop(columns= ['OutcomeType']), train_set['OutcomeType'], columns_categories_ordered.keys())\n",
"valid_ds = ShelterDataset(test_set.drop(columns= ['OutcomeType']), test_set['OutcomeType'], columns_categories_ordered.keys())"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"class ShelterModel(nn.Module):\n",
" def __init__(self, embedding_sizes, n_cont):\n",
" super().__init__()\n",
" self.embeddings = nn.ModuleList([nn.Embedding(categories, size) for categories,size in embedding_sizes])\n",
" n_emb = sum(e.embedding_dim for e in self.embeddings)\n",
" self.n_emb, self.n_cont = n_emb, n_cont\n",
" self.lin1 = nn.Linear(self.n_emb + self.n_cont, 200)\n",
" self.lin2 = nn.Linear(200, 70)\n",
" self.lin3 = nn.Linear(70, 5)\n",
" self.bn1 = nn.BatchNorm1d(self.n_cont)\n",
" self.bn2 = nn.BatchNorm1d(200)\n",
" self.bn3 = nn.BatchNorm1d(70)\n",
" self.emb_drop = nn.Dropout(0.6)\n",
" self.drops = nn.Dropout(configuration_dict.get('dropout', 0.25))\n",
"\n",
" def forward(self, x_cat, x_cont):\n",
" x = [e(x_cat[:,i]) for i,e in enumerate(self.embeddings)]\n",
" x = torch.cat(x, 1)\n",
" x = self.emb_drop(x)\n",
" x2 = self.bn1(x_cont)\n",
" x = torch.cat([x, x2], 1)\n",
" x = F.relu(self.lin1(x))\n",
" x = self.drops(x)\n",
" x = self.bn2(x)\n",
" x = F.relu(self.lin2(x))\n",
" x = self.drops(x)\n",
" x = self.bn3(x)\n",
" x = self.lin3(x)\n",
" return x\n",
"\n",
"model = ShelterModel(embedding_sizes, 1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"optimizer = torch.optim.SGD(model.parameters(), lr = configuration_dict.get('base_lr', 0.1), momentum = 0.9)\n",
"scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size = configuration_dict.get('number_of_epochs', 15)//3, gamma = 0.1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"device = torch.cuda.current_device() if torch.cuda.is_available() else torch.device('cpu')\n",
"print('Device to use: {}'.format(device))\n",
"model.to(device)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"tensorboard_writer = SummaryWriter('./tensorboard_logs')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def train_model(model, optim, train_dl):\n",
" model.train()\n",
" total = 0\n",
" sum_loss = 0\n",
" for x1, x2, y in train_dl:\n",
" batch = y.shape[0]\n",
" output = model(x1.to(device), x2.to(device))\n",
" loss = F.cross_entropy(output, y.to(device)) \n",
" optim.zero_grad()\n",
" loss.backward()\n",
" optim.step()\n",
" total += batch\n",
" sum_loss += batch*(loss.item())\n",
" return sum_loss/total"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def val_loss(model, valid_dl, epoch):\n",
" model.eval()\n",
" total = 0\n",
" sum_loss = 0\n",
" correct = 0\n",
" with torch.no_grad():\n",
" for x1, x2, y in valid_dl:\n",
" current_batch_size = y.shape[0]\n",
" out = model(x1.to(device), x2.to(device))\n",
" loss = F.cross_entropy(out, y.to(device))\n",
" sum_loss += current_batch_size*(loss.item())\n",
" total += current_batch_size\n",
" pred = torch.max(out, 1)[1]\n",
" correct += (pred.cpu() == y).float().sum().item()\n",
" print(\"\\t valid loss %.3f and accuracy %.3f\" % (sum_loss/total, correct/total))\n",
" tensorboard_writer.add_scalar('accuracy/total', correct/total, epoch)\n",
" \n",
" debug_categories = pd.DataFrame(x1.numpy(), columns=columns_categories_ordered.keys())\n",
" debug_numercal = pd.DataFrame(x2.numpy(), columns=columns_numerical)\n",
" debug_gt = pd.DataFrame(np.array([reveresed_outcome_dict[int(e)] for e in y]), columns=['GT'])\n",
" debug_pred = pd.DataFrame(np.array([reveresed_outcome_dict[int(e)] for e in pred.cpu()]), columns=['Pred'])\n",
" debug_table = debug_categories.join([debug_numercal, debug_gt, debug_pred])\n",
" logger.report_table(title='Trainset - after labels encoding',series='pandas DataFrame',iteration=epoch, table_plot=debug_table.head())\n",
" return sum_loss/total, correct/total"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def train_loop(model, epochs):\n",
" for i in range(epochs): \n",
" loss = train_model(model, optimizer, train_dl)\n",
" print(\"Epoch {}: training loss {}\".format(i, loss))\n",
" tensorboard_writer.add_scalar('training loss/loss', loss, i)\n",
" tensorboard_writer.add_scalar('learning rate/lr', optimizer.param_groups[0]['lr'], i)\n",
" \n",
" val_loss(model, valid_dl, i)\n",
" scheduler.step()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"train_dl = torch.utils.data.DataLoader(train_ds, batch_size=configuration_dict.get('batch_size', 100), shuffle=True, pin_memory=True, num_workers=1)\n",
"valid_dl = torch.utils.data.DataLoader(valid_ds, batch_size=configuration_dict.get('batch_size', 100), shuffle=False, pin_memory=True, num_workers=1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"train_loop(model, epochs=configuration_dict.get('number_of_epochs', 30))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.4"
}
},
"nbformat": 4,
"nbformat_minor": 4
}

View File

@ -25,10 +25,7 @@
"\n",
"import torch\n",
"import torch.nn as nn\n",
"import torch.nn.functional as F\n",
"import torchtext\n",
"from torchtext.datasets import text_classification\n",
"from torch.utils.data import DataLoader\n",
"from torch.utils.tensorboard import SummaryWriter\n",
"\n",
"from trains import Task\n",
@ -264,7 +261,6 @@
},
"outputs": [],
"source": [
"import re\n",
"from torchtext.data.utils import ngrams_iterator\n",
"from torchtext.data.utils import get_tokenizer\n",
"\n",
@ -313,5 +309,5 @@
}
},
"nbformat": 4,
"nbformat_minor": 1
}
"nbformat_minor": 4
}