From f74c89a25dd5e835eaf0fb9aa981fb98de62ef9a Mon Sep 17 00:00:00 2001
From: allegroai <>
Date: Mon, 10 Aug 2020 08:04:58 +0300
Subject: [PATCH] Update pytorch examples

---
 .../pytorch/notebooks/audio/README.md         |   1 +
 ...nb => audio_classifier_UrbanSound8K.ipynb} |  28 +-
 .../audio/audio_preprocessing_example.ipynb   |   5 +-
 .../image/hyperparameter_search.ipynb         |  16 +-
 .../image/image_classification_CIFAR10.ipynb  |  10 +-
 .../table/download_and_preprocessing.ipynb    | 310 ++++++++++++++++++
 .../table/train_tabular_predictor.ipynb       | 295 +++++++++++++++++
 .../text/text_classification_AG_NEWS.ipynb    |   8 +-
 8 files changed, 640 insertions(+), 33 deletions(-)
 create mode 100644 examples/frameworks/pytorch/notebooks/audio/README.md
 rename examples/frameworks/pytorch/notebooks/audio/{audio_classification_UrbanSound8K.ipynb => audio_classifier_UrbanSound8K.ipynb} (94%)
 create mode 100644 examples/frameworks/pytorch/notebooks/table/download_and_preprocessing.ipynb
 create mode 100644 examples/frameworks/pytorch/notebooks/table/train_tabular_predictor.ipynb

diff --git a/examples/frameworks/pytorch/notebooks/audio/README.md b/examples/frameworks/pytorch/notebooks/audio/README.md
new file mode 100644
index 00000000..18d23aa4
--- /dev/null
+++ b/examples/frameworks/pytorch/notebooks/audio/README.md
@@ -0,0 +1 @@
+The `audio_classifier_UrbanSound8K.ipynb` example uses a small dataset based on [UrbanSound8K dataset](https://urbansounddataset.weebly.com/urbansound8k.html).
\ No newline at end of file
diff --git a/examples/frameworks/pytorch/notebooks/audio/audio_classification_UrbanSound8K.ipynb b/examples/frameworks/pytorch/notebooks/audio/audio_classifier_UrbanSound8K.ipynb
similarity index 94%
rename from examples/frameworks/pytorch/notebooks/audio/audio_classification_UrbanSound8K.ipynb
rename to examples/frameworks/pytorch/notebooks/audio/audio_classifier_UrbanSound8K.ipynb
index 782d2d72..6220025b 100644
--- a/examples/frameworks/pytorch/notebooks/audio/audio_classification_UrbanSound8K.ipynb
+++ b/examples/frameworks/pytorch/notebooks/audio/audio_classifier_UrbanSound8K.ipynb
@@ -35,7 +35,6 @@
     "import io\n",
     "\n",
     "import pandas as pd\n",
-    "import numpy as np\n",
     "from pathlib2 import Path\n",
     "import matplotlib.pyplot as plt\n",
     "\n",
@@ -50,6 +49,7 @@
     "from torchvision.transforms import ToTensor\n",
     "\n",
     "from trains import Task\n",
+    "from trains.storage import StorageManager\n",
     "\n",
     "%matplotlib inline"
    ]
@@ -60,7 +60,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "task = Task.init(project_name='Audio Example', task_name='audio classifier')\n",
+    "task = Task.init(project_name='Audio Example', task_name='audio classification UrbanSound8K')\n",
     "configuration_dict = {'number_of_epochs': 10, 'batch_size': 4, 'dropout': 0.25, 'base_lr': 0.001}\n",
     "configuration_dict = task.connect(configuration_dict)  # enabling configuration override by trains\n",
     "print(configuration_dict)  # printing actual configuration (after override in remote mode)"
@@ -77,8 +77,8 @@
    },
    "outputs": [],
    "source": [
-    "# Download UrbanSound8K dataset (https://urbansounddataset.weebly.com/urbansound8k.html)\n",
-    "path_to_UrbanSound8K = './data/UrbanSound8K'"
+    "# Download a sample dataset (https://allegro-datasets.s3.amazonaws.com/trains/UrbanSound8K.zip)based on UrbanSound8K dataset (https://urbansounddataset.weebly.com/urbansound8k.html)\n",
+    "path_to_UrbanSound8K = StorageManager.get_local_copy(\"https://allegro-datasets.s3.amazonaws.com/trains/UrbanSound8K.zip\", extract_archive=True, )"
    ]
   },
   {
@@ -134,8 +134,8 @@
     "        return len(self.file_names)\n",
     "\n",
     "\n",
-    "csv_path = Path(path_to_UrbanSound8K) / 'metadata' / 'UrbanSound8K.csv'\n",
-    "file_path = Path(path_to_UrbanSound8K) / 'audio'\n",
+    "csv_path = Path(path_to_UrbanSound8K) / 'UrbanSound8K' / 'metadata' / 'UrbanSound8K.csv'\n",
+    "file_path = Path(path_to_UrbanSound8K) / 'UrbanSound8K' / 'audio'\n",
     "\n",
     "train_set = UrbanSoundDataset(csv_path, file_path, range(1,10))\n",
     "test_set = UrbanSoundDataset(csv_path, file_path, [10])\n",
@@ -338,18 +338,24 @@
    "metadata": {
     "colab": {},
     "colab_type": "code",
-    "id": "X5lx3g_5zNey",
-    "scrolled": false
+    "id": "X5lx3g_5zNey"
    },
    "outputs": [],
    "source": [
-    "log_interval = 100\n",
-    "debug_interval = 200\n",
+    "log_interval = 10\n",
+    "debug_interval = 20\n",
     "for epoch in range(configuration_dict.get('number_of_epochs', 10)):\n",
     "    train(model, epoch)\n",
     "    test(model, epoch)\n",
     "    scheduler.step()"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
@@ -376,5 +382,5 @@
   }
  },
  "nbformat": 4,
- "nbformat_minor": 1
+ "nbformat_minor": 4
 }
diff --git a/examples/frameworks/pytorch/notebooks/audio/audio_preprocessing_example.ipynb b/examples/frameworks/pytorch/notebooks/audio/audio_preprocessing_example.ipynb
index 6baae9fe..cd56102e 100644
--- a/examples/frameworks/pytorch/notebooks/audio/audio_preprocessing_example.ipynb
+++ b/examples/frameworks/pytorch/notebooks/audio/audio_preprocessing_example.ipynb
@@ -23,7 +23,6 @@
    "outputs": [],
    "source": [
     "import os\n",
-    "import torch\n",
     "import torchaudio\n",
     "from torch.utils.tensorboard import SummaryWriter\n",
     "import matplotlib.pyplot as plt\n",
@@ -87,10 +86,10 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
+    "scrolled": true,
     "pycharm": {
      "name": "#%%\n"
-    },
-    "scrolled": true
+    }
    },
    "outputs": [],
    "source": [
diff --git a/examples/frameworks/pytorch/notebooks/image/hyperparameter_search.ipynb b/examples/frameworks/pytorch/notebooks/image/hyperparameter_search.ipynb
index 8baf115a..9cf3aeb8 100644
--- a/examples/frameworks/pytorch/notebooks/image/hyperparameter_search.ipynb
+++ b/examples/frameworks/pytorch/notebooks/image/hyperparameter_search.ipynb
@@ -12,8 +12,8 @@
     "\n",
     "# pip install with locked versions\n",
     "! pip install -U pandas==1.0.3\n",
-    "! pip install -U trains==0.15.0\n",
-    "! pip install -U hpbandster==0.7.4  # Needed only for Bayesian optimization Hyper-Band"
+    "! pip install -U trains>=0.15.0\n",
+    "! pip install -U optuna==2.0.0rc0"
    ]
   },
   {
@@ -23,8 +23,8 @@
    "outputs": [],
    "source": [
     "from trains.automation import UniformParameterRange, UniformIntegerParameterRange\n",
-    "from trains.automation import RandomSearch, HyperParameterOptimizer\n",
-    "from trains.automation.hpbandster import OptimizerBOHB  # Needed only for Bayesian optimization Hyper-Band\n",
+    "from trains.automation import HyperParameterOptimizer\n",
+    "from trains.automation.optuna import OptimizerOptuna\n",
     "\n",
     "from trains import Task"
    ]
@@ -35,7 +35,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "task = Task.init(project_name='Hyper-Parameter Search', task_name='Hyper-Parameter Optimization')"
+    "task = Task.init(project_name='Hyper-Parameter Search', task_name='Hyper-Parameter Optimization')\n"
    ]
   },
   {
@@ -47,7 +47,7 @@
     "#####################################################################\n",
     "### Don't forget to replace this default id with your own task id ###\n",
     "#####################################################################\n",
-    "TEMPLATE_TASK_ID = 'd8e928460f98437c998f3597768597f8'"
+    "TEMPLATE_TASK_ID = 'd551a9990cb5451c9c744cc58201c612'"
    ]
   },
   {
@@ -71,7 +71,7 @@
     "    objective_metric_sign='max',  # maximize or minimize the objective metric\n",
     "    max_number_of_concurrent_tasks=3,  # number of concurrent experiments\n",
     "    # setting optimizer - trains supports GridSearch, RandomSearch or OptimizerBOHB\n",
-    "    optimizer_class=OptimizerBOHB,  # can be replaced with OptimizerBOHB\n",
+    "    optimizer_class=OptimizerOptuna,  # can be replaced with OptimizerBOHB\n",
     "    execution_queue='default',  # queue to schedule the experiments for execution\n",
     "    optimization_time_limit=30.,  # time limit for each experiment (optional, ignored by OptimizerBOHB)\n",
     "    pool_period_min=1,  # Check the experiments every x minutes\n",
@@ -90,7 +90,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "optimizer.set_time_limit(in_minutes=120.0)  # set the time limit for the optimization process\n",
+    "optimizer.set_time_limit(in_minutes=90.0)  # set the time limit for the optimization process\n",
     "optimizer.start()  \n",
     "optimizer.wait()  # wait until process is done\n",
     "optimizer.stop()  # make sure background optimization stopped"
diff --git a/examples/frameworks/pytorch/notebooks/image/image_classification_CIFAR10.ipynb b/examples/frameworks/pytorch/notebooks/image/image_classification_CIFAR10.ipynb
index c4c41703..00329805 100644
--- a/examples/frameworks/pytorch/notebooks/image/image_classification_CIFAR10.ipynb
+++ b/examples/frameworks/pytorch/notebooks/image/image_classification_CIFAR10.ipynb
@@ -12,10 +12,10 @@
     "#    jupyter nbextension enable --py widgetsnbextension\n",
     "\n",
     "# pip install with locked versions\n",
-    "! pip install -U torch==1.5.0\n",
-    "! pip install -U torchvision==0.6.0\n",
+    "! pip install -U torch==1.5.1\n",
+    "! pip install -U torchvision==0.6.1\n",
     "! pip install -U numpy==1.18.4\n",
-    "! pip install -U trains==0.15.0\n",
+    "! pip install -U trains>=0.15.0\n",
     "! pip install -U tensorboard==2.2.1"
    ]
   },
@@ -45,7 +45,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "task = Task.init(project_name='Image Example', task_name='image_classification_CIFAR10')\n",
+    "task = Task.init(project_name='Image Example', task_name='image classification CIFAR10')\n",
     "configuration_dict = {'number_of_epochs': 3, 'batch_size': 4, 'dropout': 0.25, 'base_lr': 0.001}\n",
     "configuration_dict = task.connect(configuration_dict)  # enabling configuration override by trains\n",
     "print(configuration_dict)  # printing actual configuration (after override in remote mode)"
@@ -240,4 +240,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 4
-}
\ No newline at end of file
+}
diff --git a/examples/frameworks/pytorch/notebooks/table/download_and_preprocessing.ipynb b/examples/frameworks/pytorch/notebooks/table/download_and_preprocessing.ipynb
new file mode 100644
index 00000000..f57a3ccc
--- /dev/null
+++ b/examples/frameworks/pytorch/notebooks/table/download_and_preprocessing.ipynb
@@ -0,0 +1,310 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "! pip install -U pip\n",
+    "! pip install -U torch==1.5.1\n",
+    "! pip install -U trains>=0.15.1\n",
+    "! pip install -U pandas==1.0.4\n",
+    "! pip install -U numpy==1.18.4\n",
+    "! pip install -U pathlib2==2.3.5\n",
+    "! pip install -U scikit-learn==0.23.1"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "from collections import Counter\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "import torch\n",
+    "from datetime import datetime\n",
+    "from pathlib2 import Path\n",
+    "from trains import Task"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "task = Task.init(project_name='Table Example', task_name='tabular preprocessing')\n",
+    "logger = task.get_logger()\n",
+    "configuration_dict = {'test_size': 0.1, 'split_random_state': 0}\n",
+    "configuration_dict = task.connect(configuration_dict)  # enabling configuration override by trains\n",
+    "print(configuration_dict)  # printing actual configuration (after override in remote mode)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Download shelter-animal-outcomes dataset (https://www.kaggle.com/c/shelter-animal-outcomes)\n",
+    "# This dataset aims to improve understanding trends in animal outcome,\n",
+    "# Which could help shelters focus their energy on specific animals who need extra help finding a new home. \n",
+    "path_to_ShelterAnimal = './data'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "train_set = pd.read_csv(Path(path_to_ShelterAnimal) / 'train.csv')\n",
+    "logger.report_table(title='Trainset - raw',series='pandas DataFrame',iteration=0, table_plot=train_set.head())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# **Pre-processing**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Remove hour and year from DateTime data\n",
+    "timestamp = pd.to_datetime(train_set['DateTime'])\n",
+    "months = [d.month for d in timestamp]\n",
+    "train_set['Month'] = pd.DataFrame(months).astype('object')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "age = train_set['AgeuponOutcome']\n",
+    "months_age = []\n",
+    "for val in age:\n",
+    "    if pd.isnull(val):\n",
+    "        months_age.append(val)\n",
+    "    else:\n",
+    "        amount, time_type = val.split(' ')\n",
+    "        if 'day' in time_type:\n",
+    "            mult = 1./30\n",
+    "        if 'week' in time_type:\n",
+    "            mult = 1./4\n",
+    "        if 'month' in time_type:\n",
+    "            mult = 1.\n",
+    "        if 'year' in time_type:\n",
+    "            mult = 12.\n",
+    "        months_age.append(int(amount) * mult)\n",
+    "train_set['Age'] = pd.DataFrame(months_age).astype(np.float32)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sex_neutered = train_set['SexuponOutcome']\n",
+    "sex = []\n",
+    "neutered = []\n",
+    "for val in sex_neutered:\n",
+    "    if pd.isnull(val):\n",
+    "        sex.append(val)\n",
+    "        neutered.append(val)\n",
+    "    elif 'Unknown' in val:\n",
+    "        sex.append(np.nan)\n",
+    "        neutered.append(np.nan)\n",
+    "    else:\n",
+    "        n, s = val.split(' ')\n",
+    "        if n in ['Neutered', 'Spayed']:\n",
+    "            neutered.append('Yes')\n",
+    "        else:\n",
+    "            neutered.append('No')\n",
+    "        sex.append(s)\n",
+    "\n",
+    "train_set['Sex'] = pd.DataFrame(sex)\n",
+    "train_set['Neutered'] = pd.DataFrame(neutered)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Remove irrelevant columns\n",
+    "train_set.drop(columns= ['Name', 'OutcomeSubtype', 'AnimalID', 'DateTime', 'AgeuponOutcome', 'SexuponOutcome'], inplace=True)\n",
+    "logger.report_table(title='Trainset - after preprocessing',series='pandas DataFrame',iteration=0, table_plot=train_set.head())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## *Fill NA Values*"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "object_columns = train_set.select_dtypes(include=['object']).copy()\n",
+    "numerical_columns = train_set.select_dtypes(include=['number']).copy()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for col in object_columns.columns:\n",
+    "    if object_columns[col].isnull().sum() > 0:\n",
+    "        most_common = Counter(object_columns[col]).most_common(1)[0][0]\n",
+    "        print('Column \"{}\": replacing null values with \"{}\"'.format(col, most_common))\n",
+    "        train_set[col].fillna(most_common, inplace=True)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for col in numerical_columns.columns:\n",
+    "    if numerical_columns[col].isnull().sum() > 0:\n",
+    "        median_val = numerical_columns[col].median()\n",
+    "        print('Column \"{}\": replacing null values with \"{}\"'.format(col, median_val))\n",
+    "        train_set[col].fillna(median_val, inplace=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "logger.report_table(title='Trainset - after filling missing values',series='pandas DataFrame',iteration=0, table_plot=train_set.head())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## *Labels Encoding*"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "out_encoding = train_set['OutcomeType'].astype('category').cat.categories\n",
+    "outcome_dict = {key: val for val,key in enumerate(out_encoding)}\n",
+    "task.upload_artifact('Outcome dictionary', outcome_dict)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for col in object_columns.columns:\n",
+    "    train_set[col] = train_set[col].astype('category').cat.codes\n",
+    "logger.report_table(title='Trainset - after labels encoding',series='pandas DataFrame',iteration=0, table_plot=train_set.head())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## *Splitting dataset*"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "X = train_set.drop(columns= ['OutcomeType'])\n",
+    "Y = train_set['OutcomeType']\n",
+    "X_train, X_val, Y_train, Y_val = train_test_split(X, Y, test_size=configuration_dict.get('test_size', 0.1), \n",
+    "                                                  random_state=configuration_dict.get('split_random_state', 0))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# making all variables categorical\n",
+    "object_columns_names = object_columns.drop(columns= ['OutcomeType']).columns\n",
+    "for col in object_columns_names:\n",
+    "    X[col] = X[col].astype('category')\n",
+    "columns_categries = {col: len(X[col].cat.categories) for col in object_columns_names}\n",
+    "task.upload_artifact('Categries per column', columns_categries)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "train_df = X_train.join(Y_train)\n",
+    "train_df.to_csv(Path(path_to_ShelterAnimal) / 'train_processed.csv', index=False)\n",
+    "val_df = X_val.join(Y_val)\n",
+    "val_df.to_csv(Path(path_to_ShelterAnimal) / 'val_processed.csv', index=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "paths = {'train_data': str(Path(path_to_ShelterAnimal) / 'train_processed.csv'), 'val_data': str(Path(path_to_ShelterAnimal) / 'val_processed.csv')}\n",
+    "task.upload_artifact('Processed data', paths)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/examples/frameworks/pytorch/notebooks/table/train_tabular_predictor.ipynb b/examples/frameworks/pytorch/notebooks/table/train_tabular_predictor.ipynb
new file mode 100644
index 00000000..78c49e0b
--- /dev/null
+++ b/examples/frameworks/pytorch/notebooks/table/train_tabular_predictor.ipynb
@@ -0,0 +1,295 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "! pip install -U pip\n",
+    "! pip install -U torch==1.5.1\n",
+    "! pip install -U trains>=0.15.1\n",
+    "! pip install -U pandas==1.0.4\n",
+    "! pip install -U numpy==1.18.4\n",
+    "! pip install -U tensorboard==2.2.1"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "\n",
+    "import torch\n",
+    "import torch.nn as nn\n",
+    "import torch.nn.functional as F\n",
+    "from torch.utils.data import Dataset\n",
+    "from torch.utils.tensorboard import SummaryWriter\n",
+    "\n",
+    "from trains import Task"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "task = Task.init(project_name='Table Example', task_name='tabular prediction')\n",
+    "logger = task.get_logger()\n",
+    "configuration_dict = {'number_of_epochs': 30, 'batch_size': 100, 'dropout': 0.3, 'base_lr': 0.1}\n",
+    "configuration_dict = task.connect(configuration_dict)  # enabling configuration override by trains\n",
+    "print(configuration_dict)  # printing actual configuration (after override in remote mode)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "previous_task = Task.get_task('ed7570e1e12d41e5a06557c81fdf1046')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "preprocessed_data = previous_task.artifacts['Processed data'].get()\n",
+    "train_set = pd.read_csv(preprocessed_data['train_data'])\n",
+    "test_set = pd.read_csv(preprocessed_data['val_data'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "columns_categories = previous_task.artifacts['Categries per column'].get()\n",
+    "columns_categories_ordered = {key: columns_categories[key] for key in train_set.columns if key in columns_categories.keys()}\n",
+    "columns_numerical = [key for key in train_set.drop(columns= ['OutcomeType']).drop(columns=columns_categories_ordered).keys()]\n",
+    "embedding_sizes = [(n_categories, min(32, (n_categories+1)//2)) for _,n_categories in columns_categories_ordered.items()]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "outcome_dict = previous_task.artifacts['Outcome dictionary'].get()\n",
+    "reveresed_outcome_dict = {val: key for key, val in outcome_dict.items()}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class ShelterDataset(Dataset):\n",
+    "    def __init__(self, X, Y, embedded_col_names):\n",
+    "        X = X.copy()\n",
+    "        self.X1 = X.loc[:,embedded_col_names].copy().values.astype(np.int64) #categorical columns\n",
+    "        self.X2 = X.drop(columns=embedded_col_names).copy().values.astype(np.float32) #numerical columns\n",
+    "        self.y = Y\n",
+    "        \n",
+    "    def __len__(self):\n",
+    "        return len(self.y)\n",
+    "    \n",
+    "    def __getitem__(self, idx):\n",
+    "        return self.X1[idx], self.X2[idx], self.y[idx]\n",
+    "\n",
+    "#creating train and valid datasets\n",
+    "train_ds = ShelterDataset(train_set.drop(columns= ['OutcomeType']), train_set['OutcomeType'], columns_categories_ordered.keys())\n",
+    "valid_ds = ShelterDataset(test_set.drop(columns= ['OutcomeType']), test_set['OutcomeType'], columns_categories_ordered.keys())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class ShelterModel(nn.Module):\n",
+    "    def __init__(self, embedding_sizes, n_cont):\n",
+    "        super().__init__()\n",
+    "        self.embeddings = nn.ModuleList([nn.Embedding(categories, size) for categories,size in embedding_sizes])\n",
+    "        n_emb = sum(e.embedding_dim for e in self.embeddings)\n",
+    "        self.n_emb, self.n_cont = n_emb, n_cont\n",
+    "        self.lin1 = nn.Linear(self.n_emb + self.n_cont, 200)\n",
+    "        self.lin2 = nn.Linear(200, 70)\n",
+    "        self.lin3 = nn.Linear(70, 5)\n",
+    "        self.bn1 = nn.BatchNorm1d(self.n_cont)\n",
+    "        self.bn2 = nn.BatchNorm1d(200)\n",
+    "        self.bn3 = nn.BatchNorm1d(70)\n",
+    "        self.emb_drop = nn.Dropout(0.6)\n",
+    "        self.drops = nn.Dropout(configuration_dict.get('dropout', 0.25))\n",
+    "\n",
+    "    def forward(self, x_cat, x_cont):\n",
+    "        x = [e(x_cat[:,i]) for i,e in enumerate(self.embeddings)]\n",
+    "        x = torch.cat(x, 1)\n",
+    "        x = self.emb_drop(x)\n",
+    "        x2 = self.bn1(x_cont)\n",
+    "        x = torch.cat([x, x2], 1)\n",
+    "        x = F.relu(self.lin1(x))\n",
+    "        x = self.drops(x)\n",
+    "        x = self.bn2(x)\n",
+    "        x = F.relu(self.lin2(x))\n",
+    "        x = self.drops(x)\n",
+    "        x = self.bn3(x)\n",
+    "        x = self.lin3(x)\n",
+    "        return x\n",
+    "\n",
+    "model = ShelterModel(embedding_sizes, 1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "optimizer = torch.optim.SGD(model.parameters(), lr = configuration_dict.get('base_lr', 0.1), momentum = 0.9)\n",
+    "scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size = configuration_dict.get('number_of_epochs', 15)//3, gamma = 0.1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "device = torch.cuda.current_device() if torch.cuda.is_available() else torch.device('cpu')\n",
+    "print('Device to use: {}'.format(device))\n",
+    "model.to(device)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tensorboard_writer = SummaryWriter('./tensorboard_logs')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def train_model(model, optim, train_dl):\n",
+    "    model.train()\n",
+    "    total = 0\n",
+    "    sum_loss = 0\n",
+    "    for x1, x2, y in train_dl:\n",
+    "        batch = y.shape[0]\n",
+    "        output = model(x1.to(device), x2.to(device))\n",
+    "        loss = F.cross_entropy(output, y.to(device))   \n",
+    "        optim.zero_grad()\n",
+    "        loss.backward()\n",
+    "        optim.step()\n",
+    "        total += batch\n",
+    "        sum_loss += batch*(loss.item())\n",
+    "    return sum_loss/total"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def val_loss(model, valid_dl, epoch):\n",
+    "    model.eval()\n",
+    "    total = 0\n",
+    "    sum_loss = 0\n",
+    "    correct = 0\n",
+    "    with torch.no_grad():\n",
+    "        for x1, x2, y in valid_dl:\n",
+    "            current_batch_size = y.shape[0]\n",
+    "            out = model(x1.to(device), x2.to(device))\n",
+    "            loss = F.cross_entropy(out, y.to(device))\n",
+    "            sum_loss += current_batch_size*(loss.item())\n",
+    "            total += current_batch_size\n",
+    "            pred = torch.max(out, 1)[1]\n",
+    "            correct += (pred.cpu() == y).float().sum().item()\n",
+    "    print(\"\\t valid loss %.3f and accuracy %.3f\" % (sum_loss/total, correct/total))\n",
+    "    tensorboard_writer.add_scalar('accuracy/total', correct/total, epoch)\n",
+    "    \n",
+    "    debug_categories = pd.DataFrame(x1.numpy(), columns=columns_categories_ordered.keys())\n",
+    "    debug_numercal = pd.DataFrame(x2.numpy(), columns=columns_numerical)\n",
+    "    debug_gt = pd.DataFrame(np.array([reveresed_outcome_dict[int(e)] for e in y]), columns=['GT'])\n",
+    "    debug_pred = pd.DataFrame(np.array([reveresed_outcome_dict[int(e)] for e in pred.cpu()]), columns=['Pred'])\n",
+    "    debug_table = debug_categories.join([debug_numercal, debug_gt, debug_pred])\n",
+    "    logger.report_table(title='Trainset - after labels encoding',series='pandas DataFrame',iteration=epoch, table_plot=debug_table.head())\n",
+    "    return sum_loss/total, correct/total"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def train_loop(model, epochs):\n",
+    "    for i in range(epochs): \n",
+    "        loss = train_model(model, optimizer, train_dl)\n",
+    "        print(\"Epoch {}: training loss {}\".format(i, loss))\n",
+    "        tensorboard_writer.add_scalar('training loss/loss', loss, i)\n",
+    "        tensorboard_writer.add_scalar('learning rate/lr', optimizer.param_groups[0]['lr'], i)\n",
+    "        \n",
+    "        val_loss(model, valid_dl, i)\n",
+    "        scheduler.step()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "train_dl = torch.utils.data.DataLoader(train_ds, batch_size=configuration_dict.get('batch_size', 100), shuffle=True, pin_memory=True, num_workers=1)\n",
+    "valid_dl = torch.utils.data.DataLoader(valid_ds, batch_size=configuration_dict.get('batch_size', 100), shuffle=False, pin_memory=True, num_workers=1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "train_loop(model, epochs=configuration_dict.get('number_of_epochs', 30))"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/examples/frameworks/pytorch/notebooks/text/text_classification_AG_NEWS.ipynb b/examples/frameworks/pytorch/notebooks/text/text_classification_AG_NEWS.ipynb
index 1990f94d..5ab63d2b 100644
--- a/examples/frameworks/pytorch/notebooks/text/text_classification_AG_NEWS.ipynb
+++ b/examples/frameworks/pytorch/notebooks/text/text_classification_AG_NEWS.ipynb
@@ -25,10 +25,7 @@
     "\n",
     "import torch\n",
     "import torch.nn as nn\n",
-    "import torch.nn.functional as F\n",
-    "import torchtext\n",
     "from torchtext.datasets import text_classification\n",
-    "from torch.utils.data import DataLoader\n",
     "from torch.utils.tensorboard import SummaryWriter\n",
     "\n",
     "from trains import Task\n",
@@ -264,7 +261,6 @@
    },
    "outputs": [],
    "source": [
-    "import re\n",
     "from torchtext.data.utils import ngrams_iterator\n",
     "from torchtext.data.utils import get_tokenizer\n",
     "\n",
@@ -313,5 +309,5 @@
   }
  },
  "nbformat": 4,
- "nbformat_minor": 1
-}
\ No newline at end of file
+ "nbformat_minor": 4
+}