tree-of-thought-llm/quant_experiments/tot_torch_quant.ipynb

{
  "cells": [
    {
      "cell_type": "code",
      "execution_count": 1,
      "metadata": {
        "id": "5c-auaj_0WiH"
      },
      "outputs": [],
      "source": [
        "#imports and dependencies\n",
        "import torch\n",
        "import torch.quantization\n",
        "# !pip install datasets\n",
        "from datasets import load_dataset\n",
        "from torch.utils.data import DataLoader\n",
        "import random\n",
        "import multiprocessing\n",
        "import os\n",
        "from transformers import AutoTokenizer, AutoModelForCausalLM"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "vTkIU9zh0Xfc"
      },
      "source": [
        "# Setup and utilities"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 2,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "dFE5cL2wil5L",
        "outputId": "2fa7a10b-ef28-4a93-b933-4a5b63cd844b"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "\n",
            "    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|\n",
            "    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|\n",
            "    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|\n",
            "    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|\n",
            "    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|\n",
            "\n",
            "    A token is already saved on your machine. Run `huggingface-cli whoami` to get more information or `huggingface-cli logout` if you want to log out.\n",
            "    Setting a new token will erase the existing one.\n",
            "    To log in, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .\n",
            "Enter your token (input will not be visible): \n",
            "Add token as git credential? (Y/n) n\n",
            "Token is valid (permission: write).\n",
            "The token `hpml` has been saved to /root/.cache/huggingface/stored_tokens\n",
            "Your token has been saved to /root/.cache/huggingface/token\n",
            "Login successful.\n",
            "The current active token is: `hpml`\n"
          ]
        }
      ],
      "source": [
        "!huggingface-cli login"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 3,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 176,
          "referenced_widgets": [
            "a389778f36d44ff48c54dcb49c83558c",
            "de8e2f0737d3424d8df23fc154e34348",
            "179afcfbf805433eb127212e40a6f140",
            "2542f7ec25cd454cb01bad65f46e9301",
            "55ab70bad9484de3aff788767ccf0ac5",
            "b8d3f99951804b64a26e82cb78cf6a7f",
            "0335aa27984b45de9127c6cc6936f40f",
            "64e7ae2225ab44049080312914519756",
            "ca17213c0b464f3e88416dfafd06bf5c",
            "09e55b7734ea474bbf227a5f047562ba",
            "91952d933ef64beb8b92b6b19dcfb15c"
          ]
        },
        "id": "7B1HhSH1TK73",
        "outputId": "0fb8b17f-e0ff-482f-e030-ccc348df9c20"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_auth.py:94: UserWarning: \n",
            "The secret `HF_TOKEN` does not exist in your Colab secrets.\n",
            "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n",
            "You will be able to reuse this secret in all of your notebooks.\n",
            "Please note that authentication is recommended but still optional to access public models or datasets.\n",
            "  warnings.warn(\n"
          ]
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "a389778f36d44ff48c54dcb49c83558c"
            }
          },
          "metadata": {}
        }
      ],
      "source": [
        "# Load model directly\n",
        "tokenizer = AutoTokenizer.from_pretrained(\"meta-llama/Llama-3.2-3B-Instruct\")\n",
        "model = AutoModelForCausalLM.from_pretrained(\"meta-llama/Llama-3.2-3B-Instruct\")\n",
        "\n",
        "tokenizer.pad_token = tokenizer.eos_token"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "_qdH50KBH2IC",
        "outputId": "26144009-7cfa-4677-9285-be2e00f1502f"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Size (MB): 12851.09892\n"
          ]
        }
      ],
      "source": [
        "#original memory usage for llama 3.2 3b\n",
        "torch.save(model.state_dict(), \"temp.p\")\n",
        "print('Size (MB):', os.path.getsize(\"temp.p\")/1e6)\n",
        "os.remove('temp.p')"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 4,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "as3IaCEeZDKb",
        "outputId": "2c3e7b8b-7a9f-42a2-81fc-7e5444c5ddbd"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "cuda\n"
          ]
        }
      ],
      "source": [
        "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
        "# device = 'cpu'\n",
        "model.to(device)\n",
        "print(device)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 5,
      "metadata": {
        "id": "Ne5lpRHMIYx_"
      },
      "outputs": [],
      "source": [
        "class benchmark_dataset(torch.utils.data.Dataset):\n",
        "  '''formats the data for dataloader'''\n",
        "\n",
        "  def __init__(self, input, labels, tokenizer, filter_n=150):\n",
        "    '''constructor. input samples and output labels'''\n",
        "\n",
        "    self.input = input\n",
        "    self.labels = labels\n",
        "    self.tokenizer = tokenizer\n",
        "\n",
        "    self.filter_len(filter_n)\n",
        "\n",
        "  def filter_len(self, n):\n",
        "\n",
        "    new_input = []\n",
        "    new_label = []\n",
        "\n",
        "    for q, a in zip(self.input, self.labels):\n",
        "      tk_len_q = len(tokenizer(str(q), return_tensors='pt')['input_ids'][0])\n",
        "      tk_len_a = len(tokenizer(str(a), return_tensors='pt')['input_ids'][0])\n",
        "\n",
        "      if tk_len_q <= n and tk_len_a <= n:\n",
        "        new_input.append(q)\n",
        "        new_label.append(a)\n",
        "\n",
        "    print(f\"\"\"\n",
        "    Len of Original Input: {len(self.input)}\n",
        "    Len of Original Labels: {len(self.labels)}\n",
        "    Len of New_Input: {len(new_input)}\n",
        "    Len of New_Label: {len(new_label)}\n",
        "\n",
        "    Sample Input, Label: {new_input[0], new_label[0]}\n",
        "\n",
        "    \"\"\")\n",
        "\n",
        "    self.input = new_input\n",
        "    self.labels = new_label\n",
        "\n",
        "  def __len__(self):\n",
        "    return len(self.input)\n",
        "\n",
        "  def __getitem__(self, idx):\n",
        "\n",
        "    return {\"question\": self.input[idx], \"answer\": self.labels[idx]}\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 6,
      "metadata": {
        "id": "Y3wsylpvajPZ"
      },
      "outputs": [],
      "source": [
        "def format_for_mm(question, choices):\n",
        "  '''\n",
        "  Formats questions and choices into one multiple-choice-question string\n",
        "  '''\n",
        "  return [f\"\"\"Choose the choice that best answer the following question:\n",
        "  Question:\n",
        "  {q.strip()}\n",
        "  Choices:\n",
        "  {c}\n",
        "  \"\"\"\n",
        "  for q, c in zip(question, choices)]"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 7,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 216
        },
        "id": "aHQmNki7SV-G",
        "outputId": "bed40047-571a-40e0-fe3d-e1166d63cdfc"
      },
      "outputs": [
        {
          "output_type": "error",
          "ename": "NameError",
          "evalue": "name 'math_train' is not defined",
          "traceback": [
            "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
            "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
            "\u001b[0;32m<ipython-input-7-942b6603e6a6>\u001b[0m in \u001b[0;36m<cell line: 28>\u001b[0;34m()\u001b[0m\n\u001b[1;32m     26\u001b[0m   \u001b[0;32mreturn\u001b[0m \u001b[0mtk_len\u001b[0m \u001b[0;31m#return tk_len as courtesy for further examination if needed\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     27\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 28\u001b[0;31m \u001b[0mmath_tklen\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mplot_data_dist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmath_train\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     29\u001b[0m \u001b[0mgpqa_tklen\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mplot_data_dist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mgpqa\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     30\u001b[0m \u001b[0mmmlu_tklen\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mplot_data_dist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmmlu_train\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;31mNameError\u001b[0m: name 'math_train' is not defined"
          ]
        }
      ],
      "source": [
        "#to determine the max length of each sample token sequence (below), i'm going to take a quick look\n",
        "#at the distributions\n",
        "import matplotlib.pyplot as plt\n",
        "\n",
        "def plot_data_dist(ds, input=True):\n",
        "  '''\n",
        "  plot the token length distribution of inputs in a dataset to understand where to truncate\n",
        "  bc for qat, i dont have enough memory to feed the entire input\n",
        "  esp. in a left skew case, which most of these datasets entail, makes sense to cut off the long right tail\n",
        "  '''\n",
        "  tk_len = []\n",
        "  if input:\n",
        "    for sample in ds:\n",
        "      tk_len.append(len(tokenizer(str(sample['question']), return_tensors='pt')['input_ids'][0]))\n",
        "\n",
        "    plt.hist(tk_len)\n",
        "    plt.show()\n",
        "\n",
        "  else:\n",
        "    for sample in ds:\n",
        "      tk_len.append(len(tokenizer(str(sample['answer']), return_tensors='pt')['input_ids'][0]))\n",
        "\n",
        "    plt.hist(tk_len)\n",
        "    plt.show()\n",
        "\n",
        "  return tk_len #return tk_len as courtesy for further examination if needed\n",
        "\n",
        "math_tklen = plot_data_dist(math_train)\n",
        "gpqa_tklen = plot_data_dist(gpqa)\n",
        "mmlu_tklen = plot_data_dist(mmlu_train)\n",
        "\n",
        "math_tklen = plot_data_dist(math_train, input=False)\n",
        "gpqa_tklen = plot_data_dist(gpqa, input=False)\n",
        "mmlu_tklen = plot_data_dist(mmlu_train, input=False)"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "plt.hist(math_tklen)\n",
        "plt.xlim((100, 400)) #if we zoom in we can see that most samples are within\n",
        "\n",
        "plt.show()\n",
        "\n",
        "plt.hist(mmlu_tklen)\n",
        "plt.xlim((100, 550)) #if we zoom in we can see that most samples are within\n",
        "\n",
        "plt.show()\n",
        "\n",
        "plt.hist(gpqa_tklen)\n",
        "plt.xlim((100, 600)) #if we zoom in we can see that most samples are within\n",
        "\n",
        "plt.show()"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 1000
        },
        "id": "a1P7L4Ehc1gX",
        "outputId": "b26dd1e6-2579-46a2-926a-ac4a177a54fb"
      },
      "execution_count": 41,
      "outputs": [
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "<Figure size 640x480 with 1 Axes>"
            ],
            "image/png": "iVBORw0KGgoAAAANSUhEUgAAAj0AAAGdCAYAAAD5ZcJyAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy81sbWrAAAACXBIWXMAAA9hAAAPYQGoP6dpAAArz0lEQVR4nO3df3BUVZ7//1dCSBN+dEd+pJsUAePgCllBJbjQpbKDZNMw7ZQOcWrQjGbkV8EG1yQKmB034zBTA4WlDJQC4+AaqkZGYWtQSRYwgoRVmgjRjDFK1h9xghM6ccV0A5IfkPv5w2/ul4agJgQSOM9H1a2i73nfk3NOnTIvL7cvUZZlWQIAALjCRff0AAAAAC4FQg8AADACoQcAABiB0AMAAIxA6AEAAEYg9AAAACMQegAAgBEIPQAAwAgxPT2Ai6WtrU11dXUaNGiQoqKieno4AADge7AsS8eOHVNiYqKio7v33swVG3rq6uqUlJTU08MAAABdcPjwYY0YMaJb+7xiQ8+gQYMkfbNoTqezh0cDAAC+j3A4rKSkJPv3eHe6YkNP+19pOZ1OQg8AAJeZi/FoCg8yAwAAIxB6AACAEToVeq6++mpFRUWdc2RnZ0uSmpqalJ2drSFDhmjgwIHKyMhQfX19RB+1tbXy+/3q37+/EhIStHjxYp06dSqiZs+ePZowYYIcDodGjx6twsLCC5slAAAwXqdCz4EDB3TkyBH7KCkpkST99Kc/lSTl5uZq27Zt2rJli0pLS1VXV6eZM2fa158+fVp+v18tLS3at2+fNm7cqMLCQhUUFNg1NTU18vv9mjp1qioqKpSTk6O5c+dq586d3TFfAABgqCjLsqyuXpyTk6OioiJ99NFHCofDGjZsmDZt2qS7775bknTo0CGNHTtWgUBAkydP1vbt23XHHXeorq5ObrdbkrR+/XotXbpUX3zxhWJjY7V06VIVFxfr/ffft3/OrFmz1NjYqB07dnzvsYXDYblcLoVCIR5kBgDgMnExf393+ZmelpYW/elPf9Ls2bMVFRWl8vJytba2Ki0tza4ZM2aMRo4cqUAgIEkKBAIaN26cHXgkyefzKRwOq6qqyq45s4/2mvY+AAAAuqLLX1l/+eWX1djYqF/84heSpGAwqNjYWMXHx0fUud1uBYNBu+bMwNPe3t72bTXhcFgnT55UXFxch+Npbm5Wc3Oz/TkcDnd1agAA4ArU5Ts9zz33nGbMmKHExMTuHE+XLV++XC6Xyz54GzMAADhTl0LP3/72N73++uuaO3eufc7j8ailpUWNjY0RtfX19fJ4PHbN2d/mav/8XTVOp/O8d3kkKT8/X6FQyD4OHz7clakBAIArVJdCz/PPP6+EhAT5/X77XGpqqvr27atdu3bZ56qrq1VbWyuv1ytJ8nq9qqysVENDg11TUlIip9OplJQUu+bMPtpr2vs4H4fDYb99mbcwAwCAs3U69LS1ten5559XVlaWYmL+/0eCXC6X5syZo7y8PL3xxhsqLy/XAw88IK/Xq8mTJ0uS0tPTlZKSovvuu09//etftXPnTj322GPKzs6Ww+GQJC1YsECffvqplixZokOHDmnt2rXavHmzcnNzu2nKAADARJ1+kPn1119XbW2tZs+efU7bqlWrFB0drYyMDDU3N8vn82nt2rV2e58+fVRUVKSFCxfK6/VqwIABysrK0rJly+ya5ORkFRcXKzc3V6tXr9aIESO0YcMG+Xy+Lk4RAADgAt/T05vxnh4AAC4/vfI9PQAAAJeTLr+n53Jx/a92KtrRv6eHgTN8tsL/3UUAAHQz7vQAAAAjEHoAAIARCD0AAMAIhB4AAGAEQg8AADACoQcAABiB0AMAAIxA6AEAAEYg9AAAACMQegAAgBEIPQAAwAiEHgAAYARCDwAAMAKhBwAAGIHQAwAAjEDoAQAARiD0AAAAIxB6AACAEQg9AADACIQeAABgBEIPAAAwAqEHAAAYgdADAACMQOgBAABGIPQAAAAjEHoAAIARCD0AAMAIhB4AAGAEQg8AADACoQcAABiB0AMAAIxA6AEAAEYg9AAAACMQegAAgBEIPQAAwAiEHgAAYARCDwAAMAKhBwAAGIHQAwAAjNDp0PP3v/9dP//5zzVkyBDFxcVp3LhxOnjwoN1uWZYKCgo0fPhwxcXFKS0tTR999FFEH0ePHlVmZqacTqfi4+M1Z84cHT9+PKLmvffe02233aZ+/fopKSlJK1eu7OIUAQAAOhl6vvrqK91yyy3q27evtm/frg8++EBPPvmkrrrqKrtm5cqVWrNmjdavX6+ysjINGDBAPp9PTU1Ndk1mZqaqqqpUUlKioqIi7d27V/Pnz7fbw+Gw0tPTNWrUKJWXl+uJJ57Q448/rmeffbYbpgwAAEwUZVmW9X2LH330Ub311lv6n//5nw7bLctSYmKiHn74YT3yyCOSpFAoJLfbrcLCQs2aNUsffvihUlJSdODAAU2cOFGStGPHDv3oRz/S559/rsTERK1bt06//OUvFQwGFRsba//sl19+WYcOHfpeYw2Hw3K5XErK2axoR//vO0VcAp+t8Pf0EAAAvVT77+9QKCSn09mtfXfqTs+rr76qiRMn6qc//akSEhJ000036Y9//KPdXlNTo2AwqLS0NPucy+XSpEmTFAgEJEmBQEDx8fF24JGktLQ0RUdHq6yszK6ZMmWKHXgkyefzqbq6Wl999VWHY2tublY4HI44AAAA2nUq9Hz66adat26drr32Wu3cuVMLFy7Uv/3bv2njxo2SpGAwKElyu90R17ndbrstGAwqISEhoj0mJkaDBw+OqOmojzN/xtmWL18ul8tlH0lJSZ2ZGgAAuMJ1KvS0tbVpwoQJ+t3vfqebbrpJ8+fP17x587R+/fqLNb7vLT8/X6FQyD4OHz7c00MCAAC9SKdCz/Dhw5WSkhJxbuzYsaqtrZUkeTweSVJ9fX1ETX19vd3m8XjU0NAQ0X7q1CkdPXo0oqajPs78GWdzOBxyOp0RBwAAQLtOhZ5bbrlF1dXVEef+93//V6NGjZIkJScny+PxaNeuXXZ7OBxWWVmZvF6vJMnr9aqxsVHl5eV2ze7du9XW1qZJkybZNXv37lVra6tdU1JSouuuuy7im2IAAADfV6dCT25urvbv36/f/e53+vjjj7Vp0yY9++yzys7OliRFRUUpJydHv/3tb/Xqq6+qsrJS999/vxITE3XXXXdJ+ubO0PTp0zVv3jy9/fbbeuutt7Ro0SLNmjVLiYmJkqR7771XsbGxmjNnjqqqqvTSSy9p9erVysvL697ZAwAAY8R0pvjmm2/W1q1blZ+fr2XLlik5OVm///3vlZmZadcsWbJEJ06c0Pz589XY2Khbb71VO3bsUL9+/eyaF154QYsWLdK0adMUHR2tjIwMrVmzxm53uVx67bXXlJ2drdTUVA0dOlQFBQUR7/IBAADojE69p+dywnt6ei/e0wMAOJ9e854eAACAyxWhBwAAGIHQAwAAjEDoAQAARiD0AAAAIxB6AACAEQg9AADACIQeAABgBEIPAAAwAqEHAAAYgdADAACMQOgBAABGIPQAAAAjEHoAAIARCD0AAMAIhB4AAGAEQg8AADACoQcAABiB0AMAAIxA6AEAAEYg9AAAACMQegAAgBEIPQAAwAiEHgAAYARCDwAAMAKhBwAAGIHQAwAAjEDoAQAARiD0AAAAIxB6AACAEQg9AADACIQeAABgBEIPAAAwAqEHAAAYgdADAACMQOgBAABGIPQAAAAjEHoAAIARCD0AAMAIhB4AAGCEToWexx9/XFFRURHHmDFj7PampiZlZ2dryJAhGjhwoDIyMlRfXx/RR21trfx+v/r376+EhAQtXrxYp06diqjZs2ePJkyYIIfDodGjR6uwsLDrMwQAAFAX7vT84z/+o44cOWIfb775pt2Wm5urbdu2acuWLSotLVVdXZ1mzpxpt58+fVp+v18tLS3at2+fNm7cqMLCQhUUFNg1NTU18vv9mjp1qioqKpSTk6O5c+dq586dFzhVAABgsphOXxATI4/Hc875UCik5557Tps2bdLtt98uSXr++ec1duxY7d+/X5MnT9Zrr72mDz74QK+//rrcbrduvPFG/eY3v9HSpUv1+OOPKzY2VuvXr1dycrKefPJJSdLYsWP15ptvatWqVfL5fBc4XQAAYKpO3+n56KOPlJiYqGuuuUaZmZmqra2VJJWXl6u1tVVpaWl27ZgxYzRy5EgFAgFJUiAQ0Lhx4+R2u+0an8+ncDisqqoqu+bMPtpr2vs4n+bmZoXD4YgDAACgXadCz6RJk1RYWKgdO3Zo3bp1qqmp0W233aZjx44pGAwqNjZW8fHxEde43W4Fg0FJUjAYjAg87e3tbd9WEw6HdfLkyfOObfny5XK5XPaRlJTUmakBAIArXKf+emvGjBn2n8ePH69JkyZp1KhR2rx5s+Li4rp9cJ2Rn5+vvLw8+3M4HCb4AAAA2wV9ZT0+Pl7/8A//oI8//lgej0ctLS1qbGyMqKmvr7efAfJ4POd8m6v983fVOJ3Obw1WDodDTqcz4gAAAGh3QaHn+PHj+uSTTzR8+HClpqaqb9++2rVrl91eXV2t2tpaeb1eSZLX61VlZaUaGhrsmpKSEjmdTqWkpNg1Z/bRXtPeBwAAQFd0KvQ88sgjKi0t1WeffaZ9+/bpJz/5ifr06aN77rlHLpdLc+bMUV5ent544w2Vl5frgQcekNfr1eTJkyVJ6enpSklJ0X333ae//vWv2rlzpx577DFlZ2fL4XBIkhYsWKBPP/1US5Ys0aFDh7R27Vpt3rxZubm53T97AABgjE490/P555/rnnvu0Zdffqlhw4bp1ltv1f79+zVs2DBJ0qpVqxQdHa2MjAw1NzfL5/Np7dq19vV9+vRRUVGRFi5cKK/XqwEDBigrK0vLli2za5KTk1VcXKzc3FytXr1aI0aM0IYNG/i6OgAAuCBRlmVZPT2IiyEcDn/zLa6czYp29O/p4eAMn63w9/QQAAC9VPvv71Ao1O3P5/JvbwEAACMQegAAgBEIPQAAwAiEHgAAYARCDwAAMAKhBwAAGIHQAwAAjEDoAQAARiD0AAAAIxB6AACAEQg9AADACIQeAABgBEIPAAAwAqEHAAAYgdADAACMQOgBAABGIPQAAAAjEHoAAIARCD0AAMAIhB4AAGAEQg8AADACoQcAABiB0AMAAIxA6AEAAEYg9AAAACMQegAAgBEIPQAAwAiEHgAAYARCDwAAMAKhBwAAGIHQAwAAjEDoAQAARiD0AAAAIxB6AACAEQg9AADACIQeAABgBEIPAAAwAqEHAAAYgdADAACMQOgBAABGIPQAAAAjXFDoWbFihaKiopSTk2Ofa2pqUnZ2toYMGaKBAwcqIyND9fX1EdfV1tbK7/erf//+SkhI0OLFi3Xq1KmImj179mjChAlyOBwaPXq0CgsLL2SoAADAcF0OPQcOHNAf/vAHjR8/PuJ8bm6utm3bpi1btqi0tFR1dXWaOXOm3X769Gn5/X61tLRo37592rhxowoLC1VQUGDX1NTUyO/3a+rUqaqoqFBOTo7mzp2rnTt3dnW4AADAcF0KPcePH1dmZqb++Mc/6qqrrrLPh0IhPffcc3rqqad0++23KzU1Vc8//7z27dun/fv3S5Jee+01ffDBB/rTn/6kG2+8UTNmzNBvfvMbPfPMM2ppaZEkrV+/XsnJyXryySc1duxYLVq0SHfffbdWrVrVDVMGAAAm6lLoyc7Olt/vV1paWsT58vJytba2RpwfM2aMRo4cqUAgIEkKBAIaN26c3G63XePz+RQOh1VVVWXXnN23z+ez++hIc3OzwuFwxAEAANAuprMXvPjii3rnnXd04MCBc9qCwaBiY2MVHx8fcd7tdisYDNo1Zwae9vb2tm+rCYfDOnnypOLi4s752cuXL9evf/3rzk4HAAAYolN3eg4fPqyHHnpIL7zwgvr163exxtQl+fn5CoVC9nH48OGeHhIAAOhFOhV6ysvL1dDQoAkTJigmJkYxMTEqLS3VmjVrFBMTI7fbrZaWFjU2NkZcV19fL4/HI0nyeDznfJur/fN31Tidzg7v8kiSw+GQ0+mMOAAAANp1KvRMmzZNlZWVqqiosI+JEycqMzPT/nPfvn21a9cu+5rq6mrV1tbK6/VKkrxeryorK9XQ0GDXlJSUyOl0KiUlxa45s4/2mvY+AAAAOqtTz/QMGjRI119/fcS5AQMGaMiQIfb5OXPmKC8vT4MHD5bT6dSDDz4or9eryZMnS5LS09OVkpKi++67TytXrlQwGNRjjz2m7OxsORwOSdKCBQv09NNPa8mSJZo9e7Z2796tzZs3q7i4uDvmDAAADNTpB5m/y6pVqxQdHa2MjAw1NzfL5/Np7dq1dnufPn1UVFSkhQsXyuv1asCAAcrKytKyZcvsmuTkZBUXFys3N1erV6/WiBEjtGHDBvl8vu4eLgAAMESUZVlWTw/iYgiHw3K5XErK2axoR/+eHg7O8NkKf08PAQDQS7X//g6FQt3+fC7/9hYAADACoQcAABiB0AMAAIxA6AEAAEYg9AAAACMQegAAgBEIPQAAwAiEHgAAYARCDwAAMAKhBwAAGIHQAwAAjEDoAQAARiD0AAAAIxB6AACAEQg9AADACIQeAABgBEIPAAAwAqEHAAAYgdADAACMQOgBAABGIPQAAAAjEHoAAIARCD0AAMAIhB4AAGAEQg8AADACoQcAABiB0AMAAIxA6AEAAEYg9AAAACMQegAAgBEIPQAAwAiEHgAAYARCDwAAMAKhBwAAGIHQAwAAjEDoAQAARiD0AAAAIxB6AACAEQg9AADACIQeAABghE6FnnXr1mn8+PFyOp1yOp3yer3avn273d7U1KTs7GwNGTJEAwcOVEZGhurr6yP6qK2tld/vV//+/ZWQkKDFixfr1KlTETV79uzRhAkT5HA4NHr0aBUWFnZ9hgAAAOpk6BkxYoRWrFih8vJyHTx4ULfffrvuvPNOVVVVSZJyc3O1bds2bdmyRaWlpaqrq9PMmTPt60+fPi2/36+Wlhbt27dPGzduVGFhoQoKCuyampoa+f1+TZ06VRUVFcrJydHcuXO1c+fObpoyAAAwUZRlWdaFdDB48GA98cQTuvvuuzVs2DBt2rRJd999tyTp0KFDGjt2rAKBgCZPnqzt27frjjvuUF1dndxutyRp/fr1Wrp0qb744gvFxsZq6dKlKi4u1vvvv2//jFmzZqmxsVE7duz43uMKh8NyuVxKytmsaEf/C5kiutlnK/w9PQQAQC/V/vs7FArJ6XR2a99dfqbn9OnTevHFF3XixAl5vV6Vl5ertbVVaWlpds2YMWM0cuRIBQIBSVIgENC4cePswCNJPp9P4XDYvlsUCAQi+mivae/jfJqbmxUOhyMOAACAdp0OPZWVlRo4cKAcDocWLFigrVu3KiUlRcFgULGxsYqPj4+od7vdCgaDkqRgMBgReNrb29u+rSYcDuvkyZPnHdfy5cvlcrnsIykpqbNTAwAAV7BOh57rrrtOFRUVKisr08KFC5WVlaUPPvjgYoytU/Lz8xUKhezj8OHDPT0kAADQi8R09oLY2FiNHj1akpSamqoDBw5o9erV+tnPfqaWlhY1NjZG3O2pr6+Xx+ORJHk8Hr399tsR/bV/u+vMmrO/8VVfXy+n06m4uLjzjsvhcMjhcHR2OgAAwBAX/J6etrY2NTc3KzU1VX379tWuXbvsturqatXW1srr9UqSvF6vKisr1dDQYNeUlJTI6XQqJSXFrjmzj/aa9j4AAAC6olN3evLz8zVjxgyNHDlSx44d06ZNm7Rnzx7t3LlTLpdLc+bMUV5engYPHiyn06kHH3xQXq9XkydPliSlp6crJSVF9913n1auXKlgMKjHHntM2dnZ9l2aBQsW6Omnn9aSJUs0e/Zs7d69W5s3b1ZxcXH3zx4AABijU6GnoaFB999/v44cOSKXy6Xx48dr586d+pd/+RdJ0qpVqxQdHa2MjAw1NzfL5/Np7dq19vV9+vRRUVGRFi5cKK/XqwEDBigrK0vLli2za5KTk1VcXKzc3FytXr1aI0aM0IYNG+Tz+bppygAAwEQX/J6e3or39PRevKcHAHA+vfI9PQAAAJcTQg8AADACoQcAABiB0AMAAIxA6AEAAEYg9AAAACMQegAAgBEIPQAAwAiEHgAAYARCDwAAMAKhBwAAGIHQAwAAjEDoAQAARiD0AAAAIxB6AACAEQg9AADACIQeAABgBEIPAAAwAqEHAAAYgdADAACMQOgBAABGIPQAAAAjEHoAAIARCD0AAMAIhB4AAGAEQg8AADACoQcAABiB0AMAAIxA6AEAAEYg9AAAACMQegAAgBEIPQAAwAiEHgAAYARCDwAAMAKhBwAAGIHQAwAAjEDoAQAARiD0AAAAIxB6AACAEQg9AADACJ0KPcuXL9fNN9+sQYMGKSEhQXfddZeqq6sjapqampSdna0hQ4Zo4MCBysjIUH19fURNbW2t/H6/+vfvr4SEBC1evFinTp2KqNmzZ48mTJggh8Oh0aNHq7CwsGszBAAAUCdDT2lpqbKzs7V//36VlJSotbVV6enpOnHihF2Tm5urbdu2acuWLSotLVVdXZ1mzpxpt58+fVp+v18tLS3at2+fNm7cqMLCQhUUFNg1NTU18vv9mjp1qioqKpSTk6O5c+dq586d3TBlAABgoijLsqyuXvzFF18oISFBpaWlmjJlikKhkIYNG6ZNmzbp7rvvliQdOnRIY8eOVSAQ0OTJk7V9+3bdcccdqqurk9vtliStX79eS5cu1RdffKHY2FgtXbpUxcXFev/99+2fNWvWLDU2NmrHjh3fa2zhcFgul0tJOZsV7ejf1SniIvhshb+nhwAA6KXaf3+HQiE5nc5u7fuCnukJhUKSpMGDB0uSysvL1draqrS0NLtmzJgxGjlypAKBgCQpEAho3LhxduCRJJ/Pp3A4rKqqKrvmzD7aa9r7AAAA6KyYrl7Y1tamnJwc3XLLLbr++uslScFgULGxsYqPj4+odbvdCgaDds2Zgae9vb3t22rC4bBOnjypuLi4c8bT3Nys5uZm+3M4HO7q1AAAwBWoy3d6srOz9f777+vFF1/szvF02fLly+VyuewjKSmpp4cEAAB6kS6FnkWLFqmoqEhvvPGGRowYYZ/3eDxqaWlRY2NjRH19fb08Ho9dc/a3udo/f1eN0+ns8C6PJOXn5ysUCtnH4cOHuzI1AABwhepU6LEsS4sWLdLWrVu1e/duJScnR7Snpqaqb9++2rVrl32uurpatbW18nq9kiSv16vKyko1NDTYNSUlJXI6nUpJSbFrzuyjvaa9j444HA45nc6IAwAAoF2nnunJzs7Wpk2b9Morr2jQoEH2Mzgul0txcXFyuVyaM2eO8vLyNHjwYDmdTj344IPyer2aPHmyJCk9PV0pKSm67777tHLlSgWDQT322GPKzs6Ww+GQJC1YsEBPP/20lixZotmzZ2v37t3avHmziouLu3n6AADAFJ2607Nu3TqFQiH98Ic/1PDhw+3jpZdesmtWrVqlO+64QxkZGZoyZYo8Ho/+8pe/2O19+vRRUVGR+vTpI6/Xq5///Oe6//77tWzZMrsmOTlZxcXFKikp0Q033KAnn3xSGzZskM/n64YpAwAAE13Qe3p6M97T03vxnh4AwPn02vf0AAAAXC4IPQAAwAiEHgAAYARCDwAAMAKhBwAAGIHQAwAAjEDoAQAARiD0AAAAIxB6AACAEQg9AADACIQeAABgBEIPAAAwAqEHAAAYgdADAACMQOgBAABGIPQAAAAjEHoAAIARCD0AAMAIhB4AAGAEQg8AADACoQcAABiB0AMAAIxA6AEAAEYg9AAAACMQegAAgBEIPQAAwAiEHgAAYARCDwAAMAKhBwAAGIHQAwAAjEDoAQAARiD0AAAAIxB6AACAEQg9AADACIQeAABgBEIPAAAwAqEHAAAYgdADAACMQOgBAABGIPQAAAAjEHoAAIAROh169u7dqx//+MdKTExUVFSUXn755Yh2y7JUUFCg4cOHKy4uTmlpafroo48iao4eParMzEw5nU7Fx8drzpw5On78eETNe++9p9tuu039+vVTUlKSVq5c2fnZAQAA/H86HXpOnDihG264Qc8880yH7StXrtSaNWu0fv16lZWVacCAAfL5fGpqarJrMjMzVVVVpZKSEhUVFWnv3r2aP3++3R4Oh5Wenq5Ro0apvLxcTzzxhB5//HE9++yzXZgiAACAFGVZltXli6OitHXrVt11112SvrnLk5iYqIcffliPPPKIJCkUCsntdquwsFCzZs3Shx9+qJSUFB04cEATJ06UJO3YsUM/+tGP9PnnnysxMVHr1q3TL3/5SwWDQcXGxkqSHn30Ub388ss6dOjQ9xpbOByWy+VSUs5mRTv6d3WKuAg+W+Hv6SEAAHqp9t/foVBITqezW/vu1md6ampqFAwGlZaWZp9zuVyaNGmSAoGAJCkQCCg+Pt4OPJKUlpam6OholZWV2TVTpkyxA48k+Xw+VVdX66uvvurwZzc3NyscDkccAAAA7bo19ASDQUmS2+2OOO92u+22YDCohISEiPaYmBgNHjw4oqajPs78GWdbvny5XC6XfSQlJV34hAAAwBXjivn2Vn5+vkKhkH0cPny4p4cEAAB6kW4NPR6PR5JUX18fcb6+vt5u83g8amhoiGg/deqUjh49GlHTUR9n/oyzORwOOZ3OiAMAAKBdt4ae5ORkeTwe7dq1yz4XDodVVlYmr9crSfJ6vWpsbFR5eblds3v3brW1tWnSpEl2zd69e9Xa2mrXlJSU6LrrrtNVV13VnUMGAACG6HToOX78uCoqKlRRUSHpm4eXKyoqVFtbq6ioKOXk5Oi3v/2tXn31VVVWVur+++9XYmKi/Q2vsWPHavr06Zo3b57efvttvfXWW1q0aJFmzZqlxMRESdK9996r2NhYzZkzR1VVVXrppZe0evVq5eXlddvEAQCAWWI6e8HBgwc1depU+3N7EMnKylJhYaGWLFmiEydOaP78+WpsbNStt96qHTt2qF+/fvY1L7zwghYtWqRp06YpOjpaGRkZWrNmjd3ucrn02muvKTs7W6mpqRo6dKgKCgoi3uUDAADQGRf0np7ejPf09F68pwcAcD6XzXt6AAAAeitCDwAAMAKhBwAAGIHQAwAAjEDoAQAARiD0AAAAIxB6AACAEQg9AADACIQeAABgBEIPAAAwAqEHAAAYgdADAACMQOgBAABGIPQAAAAjEHoAAIARCD0AAMAIhB4AAGAEQg8AADACoQcAABiB0AMAAIwQ09MDgHmufrS4p4cAoId8tsLf00OAwbjTAwAAjEDoAQAARiD0AAAAIxB6AACAEQg9AADACIQeAABgBEIPAAAwAqEHAAAYgdADAACMQOgBAABGIPQAAAAjEHoAAIARCD0AAMAIhB4AAGAEQg8AADACoQcAABiB0AMAAIxA6AEAAEaI6ekBAADMcfWjxT09BPRybc1fX7S+e/WdnmeeeUZXX321+vXrp0mTJuntt9/u6SEBAIDLVK8NPS+99JLy8vL0q1/9Su+8845uuOEG+Xw+NTQ09PTQAADAZajXhp6nnnpK8+bN0wMPPKCUlBStX79e/fv313/+53/29NAAAMBlqFc+09PS0qLy8nLl5+fb56Kjo5WWlqZAINDhNc3NzWpubrY/h0IhSRf37wYBAED3av+9bVlWt/fdK0PP//3f/+n06dNyu90R591utw4dOtThNcuXL9evf/3rc87/fd0vLsYQAQDARfTll1/K5XJ1a5+9MvR0RX5+vvLy8uzPjY2NGjVqlGpra7t90UwTDoeVlJSkw4cPy+l09vRwLlusY/dhLbsPa9k9WMfuEwqFNHLkSA0ePLjb++6VoWfo0KHq06eP6uvrI87X19fL4/F0eI3D4ZDD4TjnvMvlYgN2E6fTyVp2A9ax+7CW3Ye17B6sY/eJju7+x4575YPMsbGxSk1N1a5du+xzbW1t2rVrl7xebw+ODAAAXK565Z0eScrLy1NWVpYmTpyof/qnf9Lvf/97nThxQg888EBPDw0AAFyGem3o+dnPfqYvvvhCBQUFCgaDuvHGG7Vjx45zHm4+H4fDoV/96lcd/pUXOoe17B6sY/dhLbsPa9k9WMfuczHXMsq6GN8JAwAA6GV65TM9AAAA3Y3QAwAAjEDoAQAARiD0AAAAI1xWoWfv3r368Y9/rMTEREVFRenll1+OaLcsSwUFBRo+fLji4uKUlpamjz76KKLm6NGjyszMlNPpVHx8vObMmaPjx49fwln0Dt+1lr/4xS8UFRUVcUyfPj2ihrX85p8/ufnmmzVo0CAlJCTorrvuUnV1dURNU1OTsrOzNWTIEA0cOFAZGRnnvHiztrZWfr9f/fv3V0JCghYvXqxTp05dyqn0uO+zlj/84Q/P2ZcLFiyIqDF9LdetW6fx48fbL8nzer3avn273c5+/P6+ay3Zj12zYsUKRUVFKScnxz53qfblZRV6Tpw4oRtuuEHPPPNMh+0rV67UmjVrtH79epWVlWnAgAHy+XxqamqyazIzM1VVVaWSkhIVFRVp7969mj9//qWaQq/xXWspSdOnT9eRI0fs489//nNEO2splZaWKjs7W/v371dJSYlaW1uVnp6uEydO2DW5ubnatm2btmzZotLSUtXV1WnmzJl2++nTp+X3+9XS0qJ9+/Zp48aNKiwsVEFBQU9Mqcd8n7WUpHnz5kXsy5UrV9ptrKU0YsQIrVixQuXl5Tp48KBuv/123XnnnaqqqpLEfuyM71pLif3YWQcOHNAf/vAHjR8/PuL8JduX1mVKkrV161b7c1tbm+XxeKwnnnjCPtfY2Gg5HA7rz3/+s2VZlvXBBx9YkqwDBw7YNdu3b7eioqKsv//975ds7L3N2WtpWZaVlZVl3Xnnnee9hrXsWENDgyXJKi0ttSzrmz3Yt29fa8uWLXbNhx9+aEmyAoGAZVmW9d///d9WdHS0FQwG7Zp169ZZTqfTam5uvrQT6EXOXkvLsqx//ud/th566KHzXsNaduyqq66yNmzYwH7sBu1raVnsx846duyYde2111olJSURa3cp9+Vldafn29TU1CgYDCotLc0+53K5NGnSJAUCAUlSIBBQfHy8Jk6caNekpaUpOjpaZWVll3zMvd2ePXuUkJCg6667TgsXLtSXX35pt7GWHQuFQpJk/0N55eXlam1tjdiXY8aM0ciRIyP25bhx4yJevOnz+RQOhyP+j9I0Z69luxdeeEFDhw7V9ddfr/z8fH399dd2G2sZ6fTp03rxxRd14sQJeb1e9uMFOHst27Efv7/s7Gz5/f6I/Sdd2v9O9to3MndWMBiUpHPe2Ox2u+22YDCohISEiPaYmBgNHjzYrsE3pk+frpkzZyo5OVmffPKJ/v3f/10zZsxQIBBQnz59WMsOtLW1KScnR7fccouuv/56Sd/sudjYWMXHx0fUnr0vO9q37W0m6mgtJenee+/VqFGjlJiYqPfee09Lly5VdXW1/vKXv0hiLdtVVlbK6/WqqalJAwcO1NatW5WSkqKKigr2Yyedby0l9mNnvPjii3rnnXd04MCBc9ou5X8nr5jQg+41a9Ys+8/jxo3T+PHj9YMf/EB79uzRtGnTenBkvVd2drbef/99vfnmmz09lMve+dbyzGfGxo0bp+HDh2vatGn65JNP9IMf/OBSD7PXuu6661RRUaFQKKT/+q//UlZWlkpLS3t6WJel861lSkoK+/F7Onz4sB566CGVlJSoX79+PTqWK+avtzwejySd87R3fX293ebxeNTQ0BDRfurUKR09etSuQceuueYaDR06VB9//LEk1vJsixYtUlFRkd544w2NGDHCPu/xeNTS0qLGxsaI+rP3ZUf7tr3NNOdby45MmjRJkiL2JWspxcbGavTo0UpNTdXy5ct1ww03aPXq1ezHLjjfWnaE/dix8vJyNTQ0aMKECYqJiVFMTIxKS0u1Zs0axcTEyO12X7J9ecWEnuTkZHk8Hu3atcs+Fw6HVVZWZv/9q9frVWNjo8rLy+2a3bt3q62tzd6s6Njnn3+uL7/8UsOHD5fEWrazLEuLFi3S1q1btXv3biUnJ0e0p6amqm/fvhH7srq6WrW1tRH7srKyMiJElpSUyOl02rfRTfBda9mRiooKSYrYl6zludra2tTc3Mx+7Abta9kR9mPHpk2bpsrKSlVUVNjHxIkTlZmZaf/5ku3L7ngi+1I5duyY9e6771rvvvuuJcl66qmnrHfffdf629/+ZlmWZa1YscKKj4+3XnnlFeu9996z7rzzTis5Odk6efKk3cf06dOtm266ySorK7PefPNN69prr7XuueeenppSj/m2tTx27Jj1yCOPWIFAwKqpqbFef/11a8KECda1115rNTU12X2wlpa1cOFCy+VyWXv27LGOHDliH19//bVds2DBAmvkyJHW7t27rYMHD1per9fyer12+6lTp6zrr7/eSk9PtyoqKqwdO3ZYw4YNs/Lz83tiSj3mu9by448/tpYtW2YdPHjQqqmpsV555RXrmmuusaZMmWL3wVpa1qOPPmqVlpZaNTU11nvvvWc9+uijVlRUlPXaa69ZlsV+7IxvW0v244U5+5tvl2pfXlah54033rAknXNkZWVZlvXN19b/4z/+w3K73ZbD4bCmTZtmVVdXR/Tx5ZdfWvfcc481cOBAy+l0Wg888IB17NixHphNz/q2tfz666+t9PR0a9iwYVbfvn2tUaNGWfPmzYv4qqBlsZaWZXW4hpKs559/3q45efKk9a//+q/WVVddZfXv39/6yU9+Yh05ciSin88++8yaMWOGFRcXZw0dOtR6+OGHrdbW1ks8m571XWtZW1trTZkyxRo8eLDlcDis0aNHW4sXL7ZCoVBEP6av5ezZs61Ro0ZZsbGx1rBhw6xp06bZgcey2I+d8W1ryX68MGeHnku1L6Msy7I6fa8KAADgMnPFPNMDAADwbQg9AADACIQeAABgBEIPAAAwAqEHAAAYgdADAACMQOgBAABGIPQAAAAjEHoAAIARCD0AAMAIhB4AAGAEQg8AADDC/wNjZPJpwR6UXwAAAABJRU5ErkJggg==\n"
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "<Figure size 640x480 with 1 Axes>"
            ],
            "image/png": "iVBORw0KGgoAAAANSUhEUgAAAkYAAAGgCAYAAABPKKhuAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy81sbWrAAAACXBIWXMAAA9hAAAPYQGoP6dpAAApkUlEQVR4nO3de3BUZZ7/8U8udCdcuiOEJEQSwMEBI7chQOiZ1R2XLC0TZ3XE+oFDaUTUggmUEAYhjgtqTW0orF2BBWG3rDL+ISuwNehIJEwmCK7acglkBZSsuHGTGegEZZIGhASS5/eHT87QEC7hljS8X1WnKn2e7zn9fPsR86nuPidRxhgjAAAAKLqjJwAAANBZEIwAAAAsghEAAIBFMAIAALAIRgAAABbBCAAAwCIYAQAAWAQjAAAAi2AEAABgEYwAAACsdgWjF198UVFRUWHb4MGDnfFTp04pLy9PvXr1Uvfu3TVx4kTV1taGnaO6ulo5OTnq2rWrkpKSNG/ePJ05cyasZuvWrRo5cqTcbrcGDhyooqKi8+aycuVK9e/fX3FxccrKytKOHTva0woAAMB5Ytt7wN13360//vGPfz1B7F9PMWfOHBUXF2v9+vXyer2aOXOmHn74YX388ceSpObmZuXk5CglJUWffPKJDh8+rMcff1xdunTRP/3TP0mSqqqqlJOTo+nTp+utt95SWVmZnnrqKfXp00d+v1+StHbtWuXn52v16tXKysrS0qVL5ff7VVlZqaSkpMvupaWlRYcOHVKPHj0UFRXV3pcCAAB0AGOMjh07ptTUVEVHX+MPv0w7LFq0yAwfPrzNsfr6etOlSxezfv16Z98XX3xhJJlAIGCMMeb999830dHRJhgMOjWrVq0yHo/HNDY2GmOMee6558zdd98ddu5JkyYZv9/vPB4zZozJy8tzHjc3N5vU1FRTWFjYnnZMTU2NkcTGxsbGxsYWgVtNTU27fu9fjna/Y/Tll18qNTVVcXFx8vl8KiwsVHp6usrLy3X69GllZ2c7tYMHD1Z6eroCgYDGjh2rQCCgoUOHKjk52anx+/2aMWOG9u/frx/96EcKBAJh52itmT17tiSpqalJ5eXlKigocMajo6OVnZ2tQCBw0bk3NjaqsbHReWyMkSTV1NTI4/G096UAAAAdIBQKKS0tTT169Ljm525XMMrKylJRUZEGDRqkw4cP66WXXtI999yjffv2KRgMyuVyKSEhIeyY5ORkBYNBSVIwGAwLRa3jrWMXqwmFQjp58qT+8pe/qLm5uc2aAwcOXHT+hYWFeumll87b7/F4CEYAAESY6/E1mHYFowkTJjg/Dxs2TFlZWerXr5/WrVun+Pj4az65a62goED5+fnO49bECQAAIF3l5foJCQn64Q9/qIMHDyolJUVNTU2qr68Pq6mtrVVKSookKSUl5byr1FofX6rG4/EoPj5eiYmJiomJabOm9RwX4na7nXeHeJcIAACc66qC0fHjx/XVV1+pT58+yszMVJcuXVRWVuaMV1ZWqrq6Wj6fT5Lk8/m0d+9e1dXVOTWlpaXyeDzKyMhwas4+R2tN6zlcLpcyMzPDalpaWlRWVubUAAAAXJH2fFN77ty5ZuvWraaqqsp8/PHHJjs72yQmJpq6ujpjjDHTp0836enpZsuWLWbXrl3G5/MZn8/nHH/mzBkzZMgQM378eFNRUWFKSkpM7969TUFBgVPzv//7v6Zr165m3rx55osvvjArV640MTExpqSkxKl5++23jdvtNkVFRebzzz83zzzzjElISAi72u1yNDQ0GEmmoaGhXccBAICOcz1/f7crGE2aNMn06dPHuFwuc/vtt5tJkyaZgwcPOuMnT540v/rVr8xtt91munbtan7xi1+Yw4cPh53j66+/NhMmTDDx8fEmMTHRzJ0715w+fTqs5oMPPjAjRowwLpfL3HHHHeaNN944by7/+q//atLT043L5TJjxowxn376aXtaMcYQjAAAiETX8/d3lDH2mvVbUCgUktfrVUNDA983AgAgQlzP39/8rTQAAACLYAQAAGARjAAAACyCEQAAgEUwAgAAsAhGAAAAFsEIAADAIhgBAABYsR09AQDoLPovKO7oKQBhvl6c09FTuOXwjhEAAIBFMAIAALAIRgAAABbBCAAAwCIYAQAAWAQjAAAAi2AEAABgEYwAAAAsghEAAIBFMAIAALAIRgAAABbBCAAAwCIYAQAAWAQjAAAAi2AEAABgEYwAAAAsghEAAIBFMAIAALAIRgAAABbBCAAAwCIYAQAAWAQjAAAAi2AEAABgEYwAAAAsghEAAIBFMAIAALAIRgAAABbBCAAAwCIYAQAAWAQjAAAAi2AEAABgEYwAAAAsghEAAIBFMAIAALAIRgAAABbBCAAAwCIYAQAAWAQjAAAAi2AEAABgEYwAAAAsghEAAIBFMAIAALAIRgAAABbBCAAAwCIYAQAAWAQjAAAAi2AEAABgEYwAAAAsghEAAIBFMAIAALAIRgAAABbBCAAAwCIYAQAAWAQjAAAA66qC0eLFixUVFaXZs2c7+06dOqW8vDz16tVL3bt318SJE1VbWxt2XHV1tXJyctS1a1clJSVp3rx5OnPmTFjN1q1bNXLkSLndbg0cOFBFRUXnPf/KlSvVv39/xcXFKSsrSzt27LiadgAAwC3uioPRzp079W//9m8aNmxY2P45c+bovffe0/r167Vt2zYdOnRIDz/8sDPe3NysnJwcNTU16ZNPPtGbb76poqIiLVy40KmpqqpSTk6O7rvvPlVUVGj27Nl66qmntHnzZqdm7dq1ys/P16JFi7R7924NHz5cfr9fdXV1V9oSAAC4xUUZY0x7Dzp+/LhGjhyp1157Tb/97W81YsQILV26VA0NDerdu7fWrFmjRx55RJJ04MAB3XXXXQoEAho7dqw2bdqkBx54QIcOHVJycrIkafXq1Zo/f76OHDkil8ul+fPnq7i4WPv27XOec/Lkyaqvr1dJSYkkKSsrS6NHj9aKFSskSS0tLUpLS9OsWbO0YMGCNufd2NioxsZG53EoFFJaWpoaGhrk8Xja+zIAuMn0X1Dc0VMAwny9OKejp9AphUIheb3e6/L7+4reMcrLy1NOTo6ys7PD9peXl+v06dNh+wcPHqz09HQFAgFJUiAQ0NChQ51QJEl+v1+hUEj79+93as49t9/vd87R1NSk8vLysJro6GhlZ2c7NW0pLCyU1+t1trS0tCtpHwAA3KTaHYzefvtt7d69W4WFheeNBYNBuVwuJSQkhO1PTk5WMBh0as4ORa3jrWMXqwmFQjp58qS++eYbNTc3t1nTeo62FBQUqKGhwdlqamour2kAAHBLiG1PcU1NjZ599lmVlpYqLi7ues3punG73XK73R09DQAA0Em16x2j8vJy1dXVaeTIkYqNjVVsbKy2bdum5cuXKzY2VsnJyWpqalJ9fX3YcbW1tUpJSZEkpaSknHeVWuvjS9V4PB7Fx8crMTFRMTExbda0ngMAAKC92hWMxo0bp71796qiosLZRo0apSlTpjg/d+nSRWVlZc4xlZWVqq6uls/nkyT5fD7t3bs37Oqx0tJSeTweZWRkODVnn6O1pvUcLpdLmZmZYTUtLS0qKytzagAAANqrXR+l9ejRQ0OGDAnb161bN/Xq1cvZP23aNOXn56tnz57yeDyaNWuWfD6fxo4dK0kaP368MjIy9Nhjj2nJkiUKBoN64YUXlJeX53zMNX36dK1YsULPPfecnnzySW3ZskXr1q1TcfFfrxjJz89Xbm6uRo0apTFjxmjp0qU6ceKEpk6delUvCAAAuHW1KxhdjldffVXR0dGaOHGiGhsb5ff79dprrznjMTEx2rhxo2bMmCGfz6du3bopNzdXL7/8slMzYMAAFRcXa86cOVq2bJn69u2r119/XX6/36mZNGmSjhw5ooULFyoYDGrEiBEqKSk57wvZAAAAl+uK7mN0s7ie90EAEHm4jxE6G+5j1LZOdx8jAACAmxHBCAAAwCIYAQAAWAQjAAAAi2AEAABgEYwAAAAsghEAAIBFMAIAALAIRgAAABbBCAAAwCIYAQAAWAQjAAAAi2AEAABgEYwAAAAsghEAAIBFMAIAALAIRgAAABbBCAAAwCIYAQAAWAQjAAAAi2AEAABgEYwAAAAsghEAAIBFMAIAALAIRgAAABbBCAAAwCIYAQAAWAQjAAAAi2AEAABgEYwAAAAsghEAAIBFMAIAALAIRgAAABbBCAAAwCIYAQAAWAQjAAAAi2AEAABgEYwAAAAsghEAAIBFMAIAALAIRgAAABbBCAAAwCIYAQAAWAQjAAAAi2AEAABgEYwAAAAsghEAAIBFMAIAALAIRgAAABbBCAAAwCIYAQAAWAQjAAAAi2AEAABgEYwAAAAsghEAAIBFMAIAALAIRgAAABbBCAAAwCIYAQAAWAQjAAAAi2AEAABgEYwAAACsdgWjVatWadiwYfJ4PPJ4PPL5fNq0aZMzfurUKeXl5alXr17q3r27Jk6cqNra2rBzVFdXKycnR127dlVSUpLmzZunM2fOhNVs3bpVI0eOlNvt1sCBA1VUVHTeXFauXKn+/fsrLi5OWVlZ2rFjR3taAQAAOE+7glHfvn21ePFilZeXa9euXfq7v/s7Pfjgg9q/f78kac6cOXrvvfe0fv16bdu2TYcOHdLDDz/sHN/c3KycnBw1NTXpk08+0ZtvvqmioiItXLjQqamqqlJOTo7uu+8+VVRUaPbs2Xrqqae0efNmp2bt2rXKz8/XokWLtHv3bg0fPlx+v191dXVX+3oAAIBbWJQxxlzNCXr27KlXXnlFjzzyiHr37q01a9bokUcekSQdOHBAd911lwKBgMaOHatNmzbpgQce0KFDh5ScnCxJWr16tebPn68jR47I5XJp/vz5Ki4u1r59+5znmDx5surr61VSUiJJysrK0ujRo7VixQpJUktLi9LS0jRr1iwtWLDgsuceCoXk9XrV0NAgj8dzNS8DgJtA/wXFHT0FIMzXi3M6egqd0vX8/X3F3zFqbm7W22+/rRMnTsjn86m8vFynT59Wdna2UzN48GClp6crEAhIkgKBgIYOHeqEIkny+/0KhULOu06BQCDsHK01redoampSeXl5WE10dLSys7OdmgtpbGxUKBQK2wAAAFq1Oxjt3btX3bt3l9vt1vTp07VhwwZlZGQoGAzK5XIpISEhrD45OVnBYFCSFAwGw0JR63jr2MVqQqGQTp48qW+++UbNzc1t1rSe40IKCwvl9XqdLS0trb3tAwCAm1i7g9GgQYNUUVGh7du3a8aMGcrNzdXnn39+PeZ2zRUUFKihocHZampqOnpKAACgE4lt7wEul0sDBw6UJGVmZmrnzp1atmyZJk2apKamJtXX14e9a1RbW6uUlBRJUkpKynlXj7VetXZ2zblXstXW1srj8Sg+Pl4xMTGKiYlps6b1HBfidrvldrvb2zIAALhFXPV9jFpaWtTY2KjMzEx16dJFZWVlzlhlZaWqq6vl8/kkST6fT3v37g27eqy0tFQej0cZGRlOzdnnaK1pPYfL5VJmZmZYTUtLi8rKypwaAACAK9Gud4wKCgo0YcIEpaen69ixY1qzZo22bt2qzZs3y+v1atq0acrPz1fPnj3l8Xg0a9Ys+Xw+jR07VpI0fvx4ZWRk6LHHHtOSJUsUDAb1wgsvKC8vz3knZ/r06VqxYoWee+45Pfnkk9qyZYvWrVun4uK/Xi2Sn5+v3NxcjRo1SmPGjNHSpUt14sQJTZ069Rq+NAAA4FbTrmBUV1enxx9/XIcPH5bX69WwYcO0efNm/f3f/70k6dVXX1V0dLQmTpyoxsZG+f1+vfbaa87xMTEx2rhxo2bMmCGfz6du3bopNzdXL7/8slMzYMAAFRcXa86cOVq2bJn69u2r119/XX6/36mZNGmSjhw5ooULFyoYDGrEiBEqKSk57wvZAAAA7XHV9zGKZNzHCMDZuI8ROhvuY9S2TnkfIwAAgJsNwQgAAMAiGAEAAFgEIwAAAItgBAAAYBGMAAAALIIRAACARTACAACwCEYAAAAWwQgAAMAiGAEAAFgEIwAAAItgBAAAYBGMAAAALIIRAACARTACAACwCEYAAAAWwQgAAMAiGAEAAFgEIwAAAItgBAAAYBGMAAAALIIRAACARTACAACwCEYAAAAWwQgAAMAiGAEAAFgEIwAAAItgBAAAYBGMAAAALIIRAACARTACAACwCEYAAAAWwQgAAMAiGAEAAFgEIwAAAItgBAAAYBGMAAAALIIRAACARTACAACwCEYAAAAWwQgAAMAiGAEAAFgEIwAAAItgBAAAYBGMAAAALIIRAACARTACAACwCEYAAAAWwQgAAMAiGAEAAFixHT0B3Lr6Lyju6CkAABCGd4wAAAAsghEAAIBFMAIAALAIRgAAABbBCAAAwCIYAQAAWAQjAAAAi2AEAABgEYwAAAAsghEAAIBFMAIAALDaFYwKCws1evRo9ejRQ0lJSXrooYdUWVkZVnPq1Cnl5eWpV69e6t69uyZOnKja2tqwmurqauXk5Khr165KSkrSvHnzdObMmbCarVu3auTIkXK73Ro4cKCKiorOm8/KlSvVv39/xcXFKSsrSzt27GhPOwAAAGHaFYy2bdumvLw8ffrppyotLdXp06c1fvx4nThxwqmZM2eO3nvvPa1fv17btm3ToUOH9PDDDzvjzc3NysnJUVNTkz755BO9+eabKioq0sKFC52aqqoq5eTk6L777lNFRYVmz56tp556Sps3b3Zq1q5dq/z8fC1atEi7d+/W8OHD5ff7VVdXdzWvBwAAuIVFGWPMlR585MgRJSUladu2bbr33nvV0NCg3r17a82aNXrkkUckSQcOHNBdd92lQCCgsWPHatOmTXrggQd06NAhJScnS5JWr16t+fPn68iRI3K5XJo/f76Ki4u1b98+57kmT56s+vp6lZSUSJKysrI0evRorVixQpLU0tKitLQ0zZo1SwsWLGhzvo2NjWpsbHQeh0IhpaWlqaGhQR6P50pfBlyh/guKO3oKANCpfb04p6On0CmFQiF5vd7r8vv7qr5j1NDQIEnq2bOnJKm8vFynT59Wdna2UzN48GClp6crEAhIkgKBgIYOHeqEIkny+/0KhULav3+/U3P2OVprWs/R1NSk8vLysJro6GhlZ2c7NW0pLCyU1+t1trS0tKtpHwAA3GSuOBi1tLRo9uzZ+slPfqIhQ4ZIkoLBoFwulxISEsJqk5OTFQwGnZqzQ1HreOvYxWpCoZBOnjypb775Rs3NzW3WtJ6jLQUFBWpoaHC2mpqa9jcOAABuWrFXemBeXp727dunjz766FrO57pyu91yu90dPQ0AANBJXdE7RjNnztTGjRv1wQcfqG/fvs7+lJQUNTU1qb6+Pqy+trZWKSkpTs25V6m1Pr5UjcfjUXx8vBITExUTE9NmTes5AAAA2qtdwcgYo5kzZ2rDhg3asmWLBgwYEDaemZmpLl26qKyszNlXWVmp6upq+Xw+SZLP59PevXvDrh4rLS2Vx+NRRkaGU3P2OVprWs/hcrmUmZkZVtPS0qKysjKnBgAAoL3a9VFaXl6e1qxZo3fffVc9evRwvs/j9XoVHx8vr9eradOmKT8/Xz179pTH49GsWbPk8/k0duxYSdL48eOVkZGhxx57TEuWLFEwGNQLL7ygvLw852Ou6dOna8WKFXruuef05JNPasuWLVq3bp2Ki/96FVN+fr5yc3M1atQojRkzRkuXLtWJEyc0derUa/XaAACAW0y7gtGqVaskST/96U/D9r/xxht64oknJEmvvvqqoqOjNXHiRDU2Nsrv9+u1115zamNiYrRx40bNmDFDPp9P3bp1U25url5++WWnZsCAASouLtacOXO0bNky9e3bV6+//rr8fr9TM2nSJB05ckQLFy5UMBjUiBEjVFJSct4XsgEAAC7XVd3HKNJdz/sg4NK4jxEAXBz3MWpbp72PEQAAwM2EYAQAAGARjAAAACyCEQAAgEUwAgAAsAhGAAAAFsEIAADAIhgBAABYBCMAAACLYAQAAGARjAAAACyCEQAAgEUwAgAAsAhGAAAAFsEIAADAIhgBAABYBCMAAACLYAQAAGARjAAAACyCEQAAgEUwAgAAsAhGAAAAFsEIAADAIhgBAABYBCMAAACLYAQAAGARjAAAACyCEQAAgEUwAgAAsAhGAAAAFsEIAADAIhgBAABYBCMAAAArtqMnAAAA2tZ/QXFHT6FTamn87rqdm3eMAAAALIIRAACARTACAACwCEYAAAAWwQgAAMAiGAEAAFgEIwAAAItgBAAAYBGMAAAALIIRAACAxZ8EkTRk0WZFu7t29DQAAEAH4x0jAAAAi2AEAABgEYwAAAAsghEAAIBFMAIAALAIRgAAABbBCAAAwCIYAQAAWAQjAAAAi2AEAABgEYwAAAAsghEAAIBFMAIAALAIRgAAABbBCAAAwCIYAQAAWAQjAAAAq93B6MMPP9TPf/5zpaamKioqSu+8807YuDFGCxcuVJ8+fRQfH6/s7Gx9+eWXYTVHjx7VlClT5PF4lJCQoGnTpun48eNhNZ999pnuuecexcXFKS0tTUuWLDlvLuvXr9fgwYMVFxenoUOH6v33329vOwAAAI52B6MTJ05o+PDhWrlyZZvjS5Ys0fLly7V69Wpt375d3bp1k9/v16lTp5yaKVOmaP/+/SotLdXGjRv14Ycf6plnnnHGQ6GQxo8fr379+qm8vFyvvPKKXnzxRf37v/+7U/PJJ5/o0Ucf1bRp07Rnzx499NBDeuihh7Rv3772tgQAACBJijLGmCs+OCpKGzZs0EMPPSTp+3eLUlNTNXfuXP3617+WJDU0NCg5OVlFRUWaPHmyvvjiC2VkZGjnzp0aNWqUJKmkpEQ/+9nP9Kc//UmpqalatWqVfvOb3ygYDMrlckmSFixYoHfeeUcHDhyQJE2aNEknTpzQxo0bnfmMHTtWI0aM0OrVqy9r/qFQSF6vV2mz1yna3fVKXwYAAHADtTR+p5ql/08NDQ3yeDzX9NzX9DtGVVVVCgaDys7OdvZ5vV5lZWUpEAhIkgKBgBISEpxQJEnZ2dmKjo7W9u3bnZp7773XCUWS5Pf7VVlZqb/85S9OzdnP01rT+jxtaWxsVCgUCtsAAABaXdNgFAwGJUnJyclh+5OTk52xYDCopKSksPHY2Fj17NkzrKatc5z9HBeqaR1vS2Fhobxer7OlpaW1t0UAAHATu6WuSisoKFBDQ4Oz1dTUdPSUAABAJ3JNg1FKSookqba2Nmx/bW2tM5aSkqK6urqw8TNnzujo0aNhNW2d4+znuFBN63hb3G63PB5P2AYAANDqmgajAQMGKCUlRWVlZc6+UCik7du3y+fzSZJ8Pp/q6+tVXl7u1GzZskUtLS3Kyspyaj788EOdPn3aqSktLdWgQYN02223OTVnP09rTevzAAAAtFe7g9Hx48dVUVGhiooKSd9/4bqiokLV1dWKiorS7Nmz9dvf/la///3vtXfvXj3++ONKTU11rly76667dP/99+vpp5/Wjh079PHHH2vmzJmaPHmyUlNTJUm//OUv5XK5NG3aNO3fv19r167VsmXLlJ+f78zj2WefVUlJif75n/9ZBw4c0Isvvqhdu3Zp5syZV/+qAACAW1Jsew/YtWuX7rvvPudxa1jJzc1VUVGRnnvuOZ04cULPPPOM6uvr9Td/8zcqKSlRXFycc8xbb72lmTNnaty4cYqOjtbEiRO1fPlyZ9zr9eoPf/iD8vLylJmZqcTERC1cuDDsXkc//vGPtWbNGr3wwgt6/vnndeedd+qdd97RkCFDruiFAAAAuKr7GEU67mMEAEDkiZj7GAEAAEQyghEAAIBFMAIAALAIRgAAABbBCAAAwCIYAQAAWAQjAAAAi2AEAABgEYwAAAAsghEAAIBFMAIAALAIRgAAABbBCAAAwCIYAQAAWAQjAAAAi2AEAABgEYwAAAAsghEAAIBFMAIAALAIRgAAABbBCAAAwCIYAQAAWAQjAAAAi2AEAABgEYwAAAAsghEAAIBFMAIAALAIRgAAABbBCAAAwCIYAQAAWAQjAAAAi2AEAABgEYwAAAAsghEAAIBFMAIAALAIRgAAABbBCAAAwCIYAQAAWAQjAAAAi2AEAABgEYwAAAAsghEAAIBFMAIAALAIRgAAABbBCAAAwCIYAQAAWAQjAAAAi2AEAABgEYwAAAAsghEAAIBFMAIAALAIRgAAABbBCAAAwCIYAQAAWAQjAAAAi2AEAABgEYwAAAAsghEAAIBFMAIAALAIRgAAABbBCAAAwCIYAQAAWAQjAAAAK+KD0cqVK9W/f3/FxcUpKytLO3bs6OgpAQCACBXRwWjt2rXKz8/XokWLtHv3bg0fPlx+v191dXUdPTUAABCBoowxpqMncaWysrI0evRorVixQpLU0tKitLQ0zZo1SwsWLDivvrGxUY2Njc7jhoYGpaen6/YZRYp2d71h8wYAAFeupfE7/XnVE6qvr5fX672m5469pme7gZqamlReXq6CggJnX3R0tLKzsxUIBNo8prCwUC+99NJ5+/+86onrNU0AAHCdfPvttwSjVt98842am5uVnJwctj85OVkHDhxo85iCggLl5+c7j+vr69WvXz9VV1df8xe2o4VCIaWlpammpkYej6ejp3NN0VtkorfIRG+R6WbuTfrrJz49e/a85ueO2GB0Jdxut9xu93n7vV7vTfkfjiR5PB56i0D0FpnoLTLRW+SKjr72X5WO2C9fJyYmKiYmRrW1tWH7a2trlZKS0kGzAgAAkSxig5HL5VJmZqbKysqcfS0tLSorK5PP5+vAmQEAgEgV0R+l5efnKzc3V6NGjdKYMWO0dOlSnThxQlOnTr2s491utxYtWtTmx2uRjt4iE71FJnqLTPQWua5nfxF9ub4krVixQq+88oqCwaBGjBih5cuXKysrq6OnBQAAIlDEByMAAIBrJWK/YwQAAHCtEYwAAAAsghEAAIBFMAIAALBuumD04Ycf6uc//7lSU1MVFRWld955J2zcGKOFCxeqT58+io+PV3Z2tr788suwmqNHj2rKlCnyeDxKSEjQtGnTdPz48RvYRdsu1dsTTzyhqKiosO3+++8Pq+msvRUWFmr06NHq0aOHkpKS9NBDD6mysjKs5tSpU8rLy1OvXr3UvXt3TZw48bwbfFZXVysnJ0ddu3ZVUlKS5s2bpzNnztzIVs5zOb399Kc/PW/tpk+fHlbTGXtbtWqVhg0b5txd1+fzadOmTc54pK6ZdOneInXN2rJ48WJFRUVp9uzZzr5IXruztdVbpK7diy++eN68Bw8e7IxH+ppdqr8btm7mJvP++++b3/zmN+Z3v/udkWQ2bNgQNr548WLj9XrNO++8Y/77v//b/MM//IMZMGCAOXnypFNz//33m+HDh5tPP/3U/Nd//ZcZOHCgefTRR29wJ+e7VG+5ubnm/vvvN4cPH3a2o0ePhtV01t78fr954403zL59+0xFRYX52c9+ZtLT083x48edmunTp5u0tDRTVlZmdu3aZcaOHWt+/OMfO+NnzpwxQ4YMMdnZ2WbPnj3m/fffN4mJiaagoKAjWnJcTm9/+7d/a55++umwtWtoaHDGO2tvv//9701xcbH5n//5H1NZWWmef/5506VLF7Nv3z5jTOSumTGX7i1S1+xcO3bsMP379zfDhg0zzz77rLM/kteu1YV6i9S1W7Rokbn77rvD5n3kyBFnPNLX7FL93ah1u+mC0dnODQ8tLS0mJSXFvPLKK86++vp643a7zX/8x38YY4z5/PPPjSSzc+dOp2bTpk0mKirK/PnPf75hc7+UCwWjBx988ILHREpvxhhTV1dnJJlt27YZY75fpy5dupj169c7NV988YWRZAKBgDHm++AYHR1tgsGgU7Nq1Srj8XhMY2PjjW3gIs7tzZjv/8Gf/T/uc0VKb8YYc9ttt5nXX3/9plqzVq29GXNzrNmxY8fMnXfeaUpLS8P6uRnW7kK9GRO5a7do0SIzfPjwNsduhjW7WH/G3Lh1u+k+SruYqqoqBYNBZWdnO/u8Xq+ysrIUCAQkSYFAQAkJCRo1apRTk52drejoaG3fvv2Gz7m9tm7dqqSkJA0aNEgzZszQt99+64xFUm8NDQ2S5Pzl5PLycp0+fTps7QYPHqz09PSwtRs6dKiSk5OdGr/fr1AopP3799/A2V/cub21euutt5SYmKghQ4aooKBA3333nTMWCb01Nzfr7bff1okTJ+Tz+W6qNTu3t1aRvmZ5eXnKyckJWyPp5vj3dqHeWkXq2n355ZdKTU3VHXfcoSlTpqi6ulrSzbFm0oX7a3Uj1i2i/yRIewWDQUkKe9FaH7eOBYNBJSUlhY3HxsaqZ8+eTk1ndf/99+vhhx/WgAED9NVXX+n555/XhAkTFAgEFBMTEzG9tbS0aPbs2frJT36iIUOGSPp+XVwulxISEsJqz127tta2dawzaKs3SfrlL3+pfv36KTU1VZ999pnmz5+vyspK/e53v5PUuXvbu3evfD6fTp06pe7du2vDhg3KyMhQRUVFxK/ZhXqTInvNJOntt9/W7t27tXPnzvPGIv3f28V6kyJ37bKyslRUVKRBgwbp8OHDeumll3TPPfdo3759Eb9m0sX769Gjxw1bt1sqGN3sJk+e7Pw8dOhQDRs2TD/4wQ+0detWjRs3rgNn1j55eXnat2+fPvroo46eyjV3od6eeeYZ5+ehQ4eqT58+GjdunL766iv94Ac/uNHTbJdBgwapoqJCDQ0N+s///E/l5uZq27ZtHT2ta+JCvWVkZET0mtXU1OjZZ59VaWmp4uLiOno619Tl9BapazdhwgTn52HDhikrK0v9+vXTunXrFB8f34EzuzYu1t+0adNu2LrdUh+lpaSkSNJ539Kvra11xlJSUlRXVxc2fubMGR09etSpiRR33HGHEhMTdfDgQUmR0dvMmTO1ceNGffDBB+rbt6+zPyUlRU1NTaqvrw+rP3ft2lrb1rGOdqHe2tL69/7OXrvO2pvL5dLAgQOVmZmpwsJCDR8+XMuWLbsp1uxCvbUlktasvLxcdXV1GjlypGJjYxUbG6tt27Zp+fLlio2NVXJycsSu3aV6a25uPu+YSFq7syUkJOiHP/yhDh48eFP8ezvX2f215Xqt2y0VjAYMGKCUlBSVlZU5+0KhkLZv3+58b8Dn86m+vl7l5eVOzZYtW9TS0hJxf5z2T3/6k7799lv16dNHUufuzRijmTNnasOGDdqyZYsGDBgQNp6ZmakuXbqErV1lZaWqq6vD1m7v3r1h4a+0tFQej8f5+KMjXKq3tlRUVEhS2Np1xt7a0tLSosbGxoheswtp7a0tkbRm48aN0969e1VRUeFso0aN0pQpU5yfI3XtLtVbTEzMecdE0tqd7fjx4/rqq6/Up0+fm/Lf29n9teW6rdtlf007Qhw7dszs2bPH7Nmzx0gy//Iv/2L27Nlj/u///s8Y8/3l+gkJCebdd981n332mXnwwQfbvFz/Rz/6kdm+fbv56KOPzJ133tkpLmm/WG/Hjh0zv/71r00gEDBVVVXmj3/8oxk5cqS58847zalTp5xzdNbeZsyYYbxer9m6dWvYpZjfffedUzN9+nSTnp5utmzZYnbt2mV8Pp/x+XzOeOulmuPHjzcVFRWmpKTE9O7du8MvRb1UbwcPHjQvv/yy2bVrl6mqqjLvvvuuueOOO8y9997rnKOz9rZgwQKzbds2U1VVZT777DOzYMECExUVZf7whz8YYyJ3zYy5eG+RvGYXcu4VP5G8duc6u7dIXru5c+earVu3mqqqKvPxxx+b7Oxsk5iYaOrq6owxkb9mF+vvRq7bTReMPvjgAyPpvC03N9cY8/0l+//4j/9okpOTjdvtNuPGjTOVlZVh5/j222/No48+arp37248Ho+ZOnWqOXbsWAd0E+5ivX333Xdm/Pjxpnfv3qZLly6mX79+5umnnw67bNGYzttbW31JMm+88YZTc/LkSfOrX/3K3HbbbaZr167mF7/4hTl8+HDYeb7++mszYcIEEx8fbxITE83cuXPN6dOnb3A34S7VW3V1tbn33ntNz549jdvtNgMHDjTz5s0Luz+HMZ2ztyeffNL069fPuFwu07t3bzNu3DgnFBkTuWtmzMV7i+Q1u5Bzg1Ekr925zu4tktdu0qRJpk+fPsblcpnbb7/dTJo0yRw8eNAZj/Q1u1h/N3Ldoowx5vLfXwIAALh53VLfMQIAALgYghEAAIBFMAIAALAIRgAAABbBCAAAwCIYAQAAWAQjAAAAi2AEAABgEYwAAAAsghEAAIBFMAIAALD+P+/w2XpXdXbCAAAAAElFTkSuQmCC\n"
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "<Figure size 640x480 with 1 Axes>"
            ],
            "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjUAAAGdCAYAAADqsoKGAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy81sbWrAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAiPUlEQVR4nO3dfXBU1eH/8c/GmIVodmOAZJO6hAcfAJGIoDHVKpRUCAzWmlrBOAVlUCyoEK2YVgWctkm1tdaWwtgK2BFE7AAqKpYHCVJD5MEUsTYSDAKSgIVJlgRZEnK+f/jj/lgT1OBulpy8XzN3hr3n7s1Zjsy+3b27cRljjAAAANq5mGhPAAAAIByIGgAAYAWiBgAAWIGoAQAAViBqAACAFYgaAABgBaIGAABYgagBAABWiI32BE5HU1OT9u3bp4SEBLlcrmhPBwAAfAPGGB0+fFhpaWmKiQn/6yrtMmr27dsnv98f7WkAAIDTsGfPHp1//vlhP2+7jJqEhARJX/yleDyeKM8GAAB8E4FAQH6/33keD7d2GTUn3nLyeDxEDQAA7UykLh3hQmEAAGAFogYAAFiBqAEAAFYgagAAgBWIGgAAYAWiBgAAWIGoAQAAViBqAACAFYgaAABgBaIGAABYgagBAABWIGoAAIAVWh0169ev1+jRo5WWliaXy6Xly5eHjLtcrha3J554wjmmR48ezcaLioq+9YMBAAAdV6ujpr6+XhkZGZo9e3aL41VVVSHbvHnz5HK5lJubG3LcY489FnLcPffcc3qPAAAAQFJsa++Qk5OjnJycU477fL6Q2y+//LKGDh2qXr16hexPSEhodmxr9Z/xpmLc8d/qHAC+nV1Fo6I9BQCQFOFravbv36/XXntNEyZMaDZWVFSkLl26aODAgXriiSfU2Nh4yvMEg0EFAoGQDQAA4GStfqWmNZ577jklJCTopptuCtl/77336vLLL1dSUpLeeecdFRQUqKqqSk8++WSL5yksLNSsWbMiOVUAANDORTRq5s2bp7y8PHXq1Clkf35+vvPnAQMGKC4uTnfddZcKCwvldrubnaegoCDkPoFAQH6/P3ITBwAA7U7Eoubtt99WeXm5Xnzxxa89NjMzU42Njdq1a5cuvvjiZuNut7vF2AEAADghYtfUPPvssxo0aJAyMjK+9tiysjLFxMQoOTk5UtMBAACWa/UrNXV1daqoqHBuV1ZWqqysTElJSerevbukL94eeumll/T73/++2f1LSkpUWlqqoUOHKiEhQSUlJZo2bZpuu+02nXfeed/ioQAAgI6s1VGzefNmDR061Ll94lqXcePGacGCBZKkxYsXyxijsWPHNru/2+3W4sWLNXPmTAWDQfXs2VPTpk0LuWYGAACgtVzGGBPtSbRWIBCQ1+uVf+oSvqcGiDK+pwbAN3Xi+bu2tlYejyfs5+d3PwEAACsQNQAAwApEDQAAsAJRAwAArEDUAAAAKxA1AADACkQNAACwAlEDAACsQNQAAAArEDUAAMAKRA0AALACUQMAAKxA1AAAACsQNQAAwApEDQAAsAJRAwAArEDUAAAAKxA1AADACkQNAACwAlEDAACsQNQAAAArEDUAAMAKRA0AALACUQMAAKxA1AAAACsQNQAAwApEDQAAsAJRAwAArEDUAAAAKxA1AADACkQNAACwAlEDAACsQNQAAAArEDUAAMAKRA0AALACUQMAAKxA1AAAACsQNQAAwApEDQAAsEKro2b9+vUaPXq00tLS5HK5tHz58pDx8ePHy+VyhWwjRowIOebQoUPKy8uTx+NRYmKiJkyYoLq6um/1QAAAQMfW6qipr69XRkaGZs+efcpjRowYoaqqKmd74YUXQsbz8vL0wQcfaNWqVVqxYoXWr1+vO++8s/WzBwAA+H9iW3uHnJwc5eTkfOUxbrdbPp+vxbEPP/xQK1eu1KZNmzR48GBJ0p/+9CeNHDlSv/vd75SWltbaKQEAAETmmpp169YpOTlZF198se6++24dPHjQGSspKVFiYqITNJKUnZ2tmJgYlZaWtni+YDCoQCAQsgEAAJws7FEzYsQI/f3vf9eaNWv029/+VsXFxcrJydHx48clSdXV1UpOTg65T2xsrJKSklRdXd3iOQsLC+X1ep3N7/eHe9oAAKCda/XbT19nzJgxzp8vvfRSDRgwQL1799a6des0bNiw0zpnQUGB8vPznduBQICwAQAAISL+ke5evXqpa9euqqiokCT5fD4dOHAg5JjGxkYdOnTolNfhuN1ueTyekA0AAOBkEY+avXv36uDBg0pNTZUkZWVlqaamRlu2bHGOWbt2rZqampSZmRnp6QAAAEu1+u2nuro651UXSaqsrFRZWZmSkpKUlJSkWbNmKTc3Vz6fTzt37tSDDz6oCy64QMOHD5ck9e3bVyNGjNDEiRM1d+5cNTQ0aMqUKRozZgyffAIAAKet1a/UbN68WQMHDtTAgQMlSfn5+Ro4cKAeffRRnXXWWdq2bZtuuOEGXXTRRZowYYIGDRqkt99+W2632znHwoUL1adPHw0bNkwjR47UNddco2eeeSZ8jwoAAHQ4rX6lZsiQITLGnHL8zTff/NpzJCUladGiRa390QAAAKfE734CAABWIGoAAIAViBoAAGAFogYAAFiBqAEAAFYgagAAgBWIGgAAYAWiBgAAWIGoAQAAViBqAACAFYgaAABgBaIGAABYgagBAABWIGoAAIAViBoAAGAFogYAAFiBqAEAAFYgagAAgBWIGgAAYAWiBgAAWIGoAQAAViBqAACAFYgaAABgBaIGAABYgagBAABWIGoAAIAViBoAAGAFogYAAFiBqAEAAFYgagAAgBWIGgAAYAWiBgAAWIGoAQAAViBqAACAFYgaAABgBaIGAABYgagBAABWIGoAAIAViBoAAGCFVkfN+vXrNXr0aKWlpcnlcmn58uXOWENDg6ZPn65LL71U55xzjtLS0vTTn/5U+/btCzlHjx495HK5QraioqJv/WAAAEDH1eqoqa+vV0ZGhmbPnt1s7MiRI9q6daseeeQRbd26VUuXLlV5ebluuOGGZsc+9thjqqqqcrZ77rnn9B4BAACApNjW3iEnJ0c5OTktjnm9Xq1atSpk35///GddeeWV2r17t7p37+7sT0hIkM/na+2PBwAAaFHEr6mpra2Vy+VSYmJiyP6ioiJ16dJFAwcO1BNPPKHGxsZTniMYDCoQCIRsAAAAJ2v1KzWtcfToUU2fPl1jx46Vx+Nx9t977726/PLLlZSUpHfeeUcFBQWqqqrSk08+2eJ5CgsLNWvWrEhOFQAAtHMuY4w57Tu7XFq2bJluvPHGZmMNDQ3Kzc3V3r17tW7dupCo+bJ58+bprrvuUl1dndxud7PxYDCoYDDo3A4EAvL7/fJPXaIYd/zpTh9AGOwqGhXtKQBoJwKBgLxer2pra7+yC05XRF6paWho0E9+8hN98sknWrt27ddOPDMzU42Njdq1a5cuvvjiZuNut7vF2AEAADgh7FFzImh27Niht956S126dPna+5SVlSkmJkbJycnhng4AAOggWh01dXV1qqiocG5XVlaqrKxMSUlJSk1N1Y9//GNt3bpVK1as0PHjx1VdXS1JSkpKUlxcnEpKSlRaWqqhQ4cqISFBJSUlmjZtmm677Tadd9554XtkAACgQ2l11GzevFlDhw51bufn50uSxo0bp5kzZ+qVV16RJF122WUh93vrrbc0ZMgQud1uLV68WDNnzlQwGFTPnj01bdo05zwAAACno9VRM2TIEH3VtcVfd93x5Zdfro0bN7b2xwIAAHwlfvcTAACwAlEDAACsQNQAAAArEDUAAMAKRA0AALACUQMAAKxA1AAAACsQNQAAwApEDQAAsAJRAwAArEDUAAAAKxA1AADACkQNAACwAlEDAACsQNQAAAArEDUAAMAKRA0AALACUQMAAKxA1AAAACsQNQAAwApEDQAAsAJRAwAArEDUAAAAKxA1AADACkQNAACwAlEDAACsQNQAAAArEDUAAMAKRA0AALACUQMAAKxA1AAAACsQNQAAwApEDQAAsAJRAwAArEDUAAAAKxA1AADACkQNAACwAlEDAACsQNQAAAArtDpq1q9fr9GjRystLU0ul0vLly8PGTfG6NFHH1Vqaqo6d+6s7Oxs7dixI+SYQ4cOKS8vTx6PR4mJiZowYYLq6uq+1QMBAAAdW6ujpr6+XhkZGZo9e3aL448//riefvppzZ07V6WlpTrnnHM0fPhwHT161DkmLy9PH3zwgVatWqUVK1Zo/fr1uvPOO0//UQAAgA7PZYwxp31nl0vLli3TjTfeKOmLV2nS0tJ0//3364EHHpAk1dbWKiUlRQsWLNCYMWP04Ycfql+/ftq0aZMGDx4sSVq5cqVGjhypvXv3Ki0t7Wt/biAQkNfrlX/qEsW44093+gDCYFfRqGhPAUA7ceL5u7a2Vh6PJ+znD+s1NZWVlaqurlZ2drazz+v1KjMzUyUlJZKkkpISJSYmOkEjSdnZ2YqJiVFpaWmL5w0GgwoEAiEbAADAycIaNdXV1ZKklJSUkP0pKSnOWHV1tZKTk0PGY2NjlZSU5BzzZYWFhfJ6vc7m9/vDOW0AAGCBdvHpp4KCAtXW1jrbnj17oj0lAABwhglr1Ph8PknS/v37Q/bv37/fGfP5fDpw4EDIeGNjow4dOuQc82Vut1sejydkAwAAOFlYo6Znz57y+Xxas2aNsy8QCKi0tFRZWVmSpKysLNXU1GjLli3OMWvXrlVTU5MyMzPDOR0AANCBxLb2DnV1daqoqHBuV1ZWqqysTElJSerevbumTp2qX/3qV7rwwgvVs2dPPfLII0pLS3M+IdW3b1+NGDFCEydO1Ny5c9XQ0KApU6ZozJgx3+iTTwAAAC1pddRs3rxZQ4cOdW7n5+dLksaNG6cFCxbowQcfVH19ve68807V1NTommuu0cqVK9WpUyfnPgsXLtSUKVM0bNgwxcTEKDc3V08//XQYHg4AAOiovtX31EQL31MDnDn4nhoA31S7+p4aAACAaCFqAACAFYgaAABgBaIGAABYgagBAABWIGoAAIAViBoAAGAFogYAAFiBqAEAAFYgagAAgBWIGgAAYAWiBgAAWIGoAQAAViBqAACAFYgaAABgBaIGAABYgagBAABWIGoAAIAViBoAAGAFogYAAFiBqAEAAFYgagAAgBWIGgAAYAWiBgAAWIGoAQAAViBqAACAFYgaAABgBaIGAABYgagBAABWIGoAAIAViBoAAGAFogYAAFiBqAEAAFYgagAAgBWIGgAAYAWiBgAAWIGoAQAAViBqAACAFYgaAABghbBHTY8ePeRyuZptkydPliQNGTKk2dikSZPCPQ0AANDBxIb7hJs2bdLx48ed29u3b9cPfvAD3Xzzzc6+iRMn6rHHHnNux8fHh3saAACggwl71HTr1i3kdlFRkXr37q3rrrvO2RcfHy+fzxfuHw0AADqwiF5Tc+zYMT3//PO644475HK5nP0LFy5U165d1b9/fxUUFOjIkSNfeZ5gMKhAIBCyAQAAnCzsr9ScbPny5aqpqdH48eOdfbfeeqvS09OVlpambdu2afr06SovL9fSpUtPeZ7CwkLNmjUrklMFAADtnMsYYyJ18uHDhysuLk6vvvrqKY9Zu3athg0bpoqKCvXu3bvFY4LBoILBoHM7EAjI7/fLP3WJYtxcjwNE066iUdGeAoB2IhAIyOv1qra2Vh6PJ+znj9grNZ988olWr179la/ASFJmZqYkfWXUuN1uud3usM8RAADYI2LX1MyfP1/JyckaNeqr/y+urKxMkpSamhqpqQAAgA4gIq/UNDU1af78+Ro3bpxiY///j9i5c6cWLVqkkSNHqkuXLtq2bZumTZuma6+9VgMGDIjEVAAAQAcRkahZvXq1du/erTvuuCNkf1xcnFavXq2nnnpK9fX18vv9ys3N1cMPPxyJaQAAgA4kIlFz/fXXq6Xrj/1+v4qLiyPxIwEAQAfH734CAABWIGoAAIAViBoAAGAFogYAAFiBqAEAAFYgagAAgBWIGgAAYAWiBgAAWIGoAQAAViBqAACAFYgaAABgBaIGAABYgagBAABWIGoAAIAViBoAAGAFogYAAFiBqAEAAFYgagAAgBWIGgAAYAWiBgAAWIGoAQAAViBqAACAFYgaAABgBaIGAABYgagBAABWIGoAAIAViBoAAGAFogYAAFiBqAEAAFYgagAAgBWIGgAAYAWiBgAAWIGoAQAAViBqAACAFYgaAABgBaIGAABYgagBAABWIGoAAIAViBoAAGCFsEfNzJkz5XK5QrY+ffo440ePHtXkyZPVpUsXnXvuucrNzdX+/fvDPQ0AANDBROSVmksuuURVVVXOtmHDBmds2rRpevXVV/XSSy+puLhY+/bt00033RSJaQAAgA4kNiInjY2Vz+drtr+2tlbPPvusFi1apO9///uSpPnz56tv377auHGjrrrqqkhMBwAAdAAReaVmx44dSktLU69evZSXl6fdu3dLkrZs2aKGhgZlZ2c7x/bp00fdu3dXSUnJKc8XDAYVCARCNgAAgJOFPWoyMzO1YMECrVy5UnPmzFFlZaW+973v6fDhw6qurlZcXJwSExND7pOSkqLq6upTnrOwsFBer9fZ/H5/uKcNAADaubC//ZSTk+P8ecCAAcrMzFR6erqWLFmizp07n9Y5CwoKlJ+f79wOBAKEDQAACBHxj3QnJibqoosuUkVFhXw+n44dO6aampqQY/bv39/iNTgnuN1ueTyekA0AAOBkEY+auro67dy5U6mpqRo0aJDOPvtsrVmzxhkvLy/X7t27lZWVFempAAAAi4X97acHHnhAo0ePVnp6uvbt26cZM2borLPO0tixY+X1ejVhwgTl5+crKSlJHo9H99xzj7KysvjkEwAA+FbCHjV79+7V2LFjdfDgQXXr1k3XXHONNm7cqG7dukmS/vCHPygmJka5ubkKBoMaPny4/vKXv4R7GgAAoINxGWNMtCfRWoFA4ItPQU1dohh3fLSnA3Rou4pGRXsKANqJE8/ftbW1Ebk+lt/9BAAArEDUAAAAKxA1AADACkQNAACwAlEDAACsQNQAAAArEDUAAMAKRA0AALACUQMAAKxA1AAAACsQNQAAwApEDQAAsAJRAwAArEDUAAAAKxA1AADACkQNAACwAlEDAACsQNQAAAArEDUAAMAKRA0AALACUQMAAKxA1AAAACsQNQAAwApEDQAAsAJRAwAArEDUAAAAKxA1AADACkQNAACwAlEDAACsQNQAAAArEDUAAMAKRA0AALACUQMAAKxA1AAAACsQNQAAwApEDQAAsAJRAwAArEDUAAAAKxA1AADACmGPmsLCQl1xxRVKSEhQcnKybrzxRpWXl4ccM2TIELlcrpBt0qRJ4Z4KAADoQMIeNcXFxZo8ebI2btyoVatWqaGhQddff73q6+tDjps4caKqqqqc7fHHHw/3VAAAQAcSG+4Trly5MuT2ggULlJycrC1btujaa6919sfHx8vn84X7xwMAgA4q4tfU1NbWSpKSkpJC9i9cuFBdu3ZV//79VVBQoCNHjpzyHMFgUIFAIGQDAAA4WdhfqTlZU1OTpk6dqquvvlr9+/d39t96661KT09XWlqatm3bpunTp6u8vFxLly5t8TyFhYWaNWtWJKcKAADaOZcxxkTq5HfffbfeeOMNbdiwQeeff/4pj1u7dq2GDRumiooK9e7du9l4MBhUMBh0bgcCAfn9fvmnLlGMOz4icwfwzewqGhXtKQBoJwKBgLxer2pra+XxeMJ+/oi9UjNlyhStWLFC69ev/8qgkaTMzExJOmXUuN1uud3uiMwTAADYIexRY4zRPffco2XLlmndunXq2bPn196nrKxMkpSamhru6QAAgA4i7FEzefJkLVq0SC+//LISEhJUXV0tSfJ6vercubN27typRYsWaeTIkerSpYu2bdumadOm6dprr9WAAQPCPR0AANBBhD1q5syZI+mLL9g72fz58zV+/HjFxcVp9erVeuqpp1RfXy+/36/c3Fw9/PDD4Z4KAADoQCLy9tNX8fv9Ki4uDvePBQAAHRy/+wkAAFiBqAEAAFYgagAAgBWIGgAAYAWiBgAAWIGoAQAAViBqAACAFSL6W7oB2K/HQ69FewoA2omm4JGInp9XagAAgBWIGgAAYAWiBgAAWIGoAQAAViBqAACAFYgaAABgBaIGAABYgagBAABWIGoAAIAViBoAAGAFogYAAFiBqAEAAFYgagAAgBWIGgAAYAWiBgAAWIGoAQAAViBqAACAFYgaAABgBaIGAABYgagBAABWIGoAAIAViBoAAGAFogYAAFiBqAEAAFYgagAAgBWIGgAAYAWiBgAAWIGoAQAAViBqAACAFYgaAABgBaIGAABYIapRM3v2bPXo0UOdOnVSZmam3n333WhOBwAAtGNRi5oXX3xR+fn5mjFjhrZu3aqMjAwNHz5cBw4ciNaUAABAOxa1qHnyySc1ceJE3X777erXr5/mzp2r+Ph4zZs3L1pTAgAA7VhsNH7osWPHtGXLFhUUFDj7YmJilJ2drZKSkmbHB4NBBYNB53Ztba0kqSl4JPKTBQAAYXHiedsYE5HzRyVq/ve//+n48eNKSUkJ2Z+SkqL//ve/zY4vLCzUrFmzmu3/dM74SE0RAABEyMGDB+X1esN+3qhETWsVFBQoPz/fuV1TU6P09HTt3r07In8p+OYCgYD8fr/27Nkjj8cT7el0aKzFmYX1OHOwFmeO2tpade/eXUlJSRE5f1SipmvXrjrrrLO0f//+kP379++Xz+drdrzb7Zbb7W623+v18h/oGcLj8bAWZwjW4szCepw5WIszR0xMZC7pjcqFwnFxcRo0aJDWrFnj7GtqatKaNWuUlZUVjSkBAIB2LmpvP+Xn52vcuHEaPHiwrrzySj311FOqr6/X7bffHq0pAQCAdixqUXPLLbfos88+06OPPqrq6mpddtllWrlyZbOLh1vidrs1Y8aMFt+SQttiLc4crMWZhfU4c7AWZ45Ir4XLROpzVQAAAG2I3/0EAACsQNQAAAArEDUAAMAKRA0AALDCGRM169ev1+jRo5WWliaXy6Xly5eHjBtj9Oijjyo1NVWdO3dWdna2duzYEXLMoUOHlJeXJ4/Ho8TERE2YMEF1dXVt+CjsUFhYqCuuuEIJCQlKTk7WjTfeqPLy8pBjjh49qsmTJ6tLly4699xzlZub2+zLFHfv3q1Ro0YpPj5eycnJ+vnPf67Gxsa2fCjt3pw5czRgwADnS8OysrL0xhtvOOOsQ/QUFRXJ5XJp6tSpzj7Wo+3MnDlTLpcrZOvTp48zzlq0rU8//VS33XabunTpos6dO+vSSy/V5s2bnfE2ew43Z4jXX3/d/PKXvzRLly41ksyyZctCxouKiozX6zXLly83//73v80NN9xgevbsaT7//HPnmBEjRpiMjAyzceNG8/bbb5sLLrjAjB07to0fSfs3fPhwM3/+fLN9+3ZTVlZmRo4cabp3727q6uqcYyZNmmT8fr9Zs2aN2bx5s7nqqqvMd7/7XWe8sbHR9O/f32RnZ5v33nvPvP7666Zr166moKAgGg+p3XrllVfMa6+9Zj766CNTXl5ufvGLX5izzz7bbN++3RjDOkTLu+++a3r06GEGDBhg7rvvPmc/69F2ZsyYYS655BJTVVXlbJ999pkzzlq0nUOHDpn09HQzfvx4U1paaj7++GPz5ptvmoqKCueYtnoOP2Oi5mRfjpqmpibj8/nME0884eyrqakxbrfbvPDCC8YYY/7zn/8YSWbTpk3OMW+88YZxuVzm008/bbO52+jAgQNGkikuLjbGfPF3f/bZZ5uXXnrJOebDDz80kkxJSYkx5otIjYmJMdXV1c4xc+bMMR6PxwSDwbZ9AJY577zzzN/+9jfWIUoOHz5sLrzwQrNq1Spz3XXXOVHDerStGTNmmIyMjBbHWIu2NX36dHPNNdeccrwtn8PPmLefvkplZaWqq6uVnZ3t7PN6vcrMzFRJSYkkqaSkRImJiRo8eLBzTHZ2tmJiYlRaWtrmc7ZJbW2tJDm/gGzLli1qaGgIWY8+ffqoe/fuIetx6aWXhnyZ4vDhwxUIBPTBBx+04eztcfz4cS1evFj19fXKyspiHaJk8uTJGjVqVMjfu8S/i2jYsWOH0tLS1KtXL+Xl5Wn37t2SWIu29sorr2jw4MG6+eablZycrIEDB+qvf/2rM96Wz+HtImqqq6slqdm3DaekpDhj1dXVSk5ODhmPjY1VUlKScwxar6mpSVOnTtXVV1+t/v37S/ri7zouLk6JiYkhx355PVparxNj+Obef/99nXvuuXK73Zo0aZKWLVumfv36sQ5RsHjxYm3dulWFhYXNxliPtpWZmakFCxZo5cqVmjNnjiorK/W9731Phw8fZi3a2Mcff6w5c+bowgsv1Jtvvqm7775b9957r5577jlJbfscHrVfk4D2YfLkydq+fbs2bNgQ7al0WBdffLHKyspUW1urf/zjHxo3bpyKi4ujPa0OZ8+ePbrvvvu0atUqderUKdrT6fBycnKcPw8YMECZmZlKT0/XkiVL1Llz5yjOrONpamrS4MGD9Zvf/EaSNHDgQG3fvl1z587VuHHj2nQu7eKVGp/PJ0nNrlzfv3+/M+bz+XTgwIGQ8cbGRh06dMg5Bq0zZcoUrVixQm+99ZbOP/98Z7/P59OxY8dUU1MTcvyX16Ol9Toxhm8uLi5OF1xwgQYNGqTCwkJlZGToj3/8I+vQxrZs2aIDBw7o8ssvV2xsrGJjY1VcXKynn35asbGxSklJYT2iKDExURdddJEqKir4t9HGUlNT1a9fv5B9ffv2dd4ObMvn8HYRNT179pTP59OaNWucfYFAQKWlpcrKypIkZWVlqaamRlu2bHGOWbt2rZqampSZmdnmc27PjDGaMmWKli1bprVr16pnz54h44MGDdLZZ58dsh7l5eXavXt3yHq8//77If+Rrlq1Sh6Pp9l//GidpqYmBYNB1qGNDRs2TO+//77KysqcbfDgwcrLy3P+zHpET11dnXbu3KnU1FT+bbSxq6++utnXfnz00UdKT0+X1MbP4a2/zjkyDh8+bN577z3z3nvvGUnmySefNO+995755JNPjDFffBwsMTHRvPzyy2bbtm3mhz/8YYsfBxs4cKApLS01GzZsMBdeeCEf6T4Nd999t/F6vWbdunUhH5c8cuSIc8ykSZNM9+7dzdq1a83mzZtNVlaWycrKcsZPfFzy+uuvN2VlZWblypWmW7dufFyylR566CFTXFxsKisrzbZt28xDDz1kXC6X+ec//2mMYR2i7eRPPxnDerSl+++/36xbt85UVlaaf/3rXyY7O9t07drVHDhwwBjDWrSld99918TGxppf//rXZseOHWbhwoUmPj7ePP/8884xbfUcfsZEzVtvvWUkNdvGjRtnjPniI2GPPPKISUlJMW632wwbNsyUl5eHnOPgwYNm7Nix5txzzzUej8fcfvvt5vDhw1F4NO1bS+sgycyfP9855vPPPzc/+9nPzHnnnWfi4+PNj370I1NVVRVynl27dpmcnBzTuXNn07VrV3P//febhoaGNn407dsdd9xh0tPTTVxcnOnWrZsZNmyYEzTGsA7R9uWoYT3azi233GJSU1NNXFyc+c53vmNuueWWkO9FYS3a1quvvmr69+9v3G636dOnj3nmmWdCxtvqOdxljDGtfKUJAADgjNMurqkBAAD4OkQNAACwAlEDAACsQNQAAAArEDUAAMAKRA0AALACUQMAAKxA1AAAACsQNQAAwApEDQAAsAJRAwAArEDUAAAAK/wf4S2F1QgLMecAAAAASUVORK5CYII=\n"
          },
          "metadata": {}
        }
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 8,
      "metadata": {
        "id": "Yt_Pyc70jWAR"
      },
      "outputs": [],
      "source": [
        "def collate_fn(batch):\n",
        "\n",
        "    # Now collate into mini-batches\n",
        "    inputs = tokenizer([i['question'] for i in batch], return_tensors='pt', padding='max_length', truncation=True, max_length=300)\n",
        "    labels = [i['answer'] for i in batch]\n",
        "\n",
        "    return {'input_ids': inputs['input_ids'], 'attention_mask': inputs['attention_mask'], 'label': labels}\n",
        "\n",
        "def collate_fn_qat(batch):\n",
        "\n",
        "    # Now collate into mini-batches\n",
        "    inputs = tokenizer([i['question'] for i in batch], return_tensors='pt', padding='max_length', truncation=True, max_length=150)\n",
        "    # labels = tokenizer([str(i['answer']) for i in batch], return_tensors='pt', padding='max_length', truncation=True, max_length=65)\n",
        "    labels = tokenizer([str(i['answer']) for i in batch], return_tensors='pt', padding='max_length', truncation=True, max_length=150)\n",
        "\n",
        "    # labels = [ele[-100:] for ele in labels['input_ids']]\n",
        "\n",
        "    return {'input_ids': inputs['input_ids'], 'attention_mask': inputs['attention_mask'], 'label': labels['input_ids']}\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 9,
      "metadata": {
        "id": "pHBPP0U3KJC6",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "7a32e64a-e612-45fe-a59a-b2057375d4e1"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "\n",
            "    Len of Original Input: 107500\n",
            "    Len of Original Labels: 107500\n",
            "    Len of New_Input: 14203\n",
            "    Len of New_Label: 14203\n",
            "          \n",
            "    Sample Input, Label: (\"Choose the choice that best answer the following question:\\n  Question:\\n  Two quantum states with energies E1 and E2 have a lifetime of 10^-9 sec and 10^-8 sec, respectively. We want to clearly distinguish these two energy levels. Which one of the following options could be their energy difference so that they can be clearly resolved?\\n  Choices:\\n  ['10^-4 eV', '10^-9 eV', '10^-8 eV\\\\n', '10^-11 eV']\\n  \", '10^-4 eV')\n",
            "    \n",
            "    \n",
            "\n",
            "    Len of Original Input: 19082\n",
            "    Len of Original Labels: 19082\n",
            "    Len of New_Input: 13706\n",
            "    Len of New_Label: 13706\n",
            "          \n",
            "    Sample Input, Label: (\"Choose the choice that best answer the following question:\\n  Question:\\n  Consider the following metric:\\n\\nds^{2}=\\\\frac{32}{\\\\left(4-x^{2}-y^{2}\\\\right)}\\\\left(dx^{2}+dy^{2}\\\\right)\\n\\nWhat is the area of the pseudosphere of radius r=2?\\n\\nPS: for the maths use a LaTeX editor.\\n  Choices:\\n  ['+\\\\\\\\infty', '4\\\\\\\\pi\\\\\\\\left(x^{2}-y^{2}\\\\\\\\right)', '4\\\\\\\\pi\\\\\\\\left(x^{2}+y^{2}\\\\\\\\right)', '0']\\n  \", '+\\\\infty')\n",
            "    \n",
            "    \n"
          ]
        }
      ],
      "source": [
        "#CREATE SETS FOR 1) PTQ CALIBRATION/QAT FINETUNING and 2) EVAL\n",
        "\n",
        "#note: there is no test set for gpqa, so i take a subset of the train set instead (80%), leaving the other subset for testing\n",
        "\n",
        "#gpqa (for reasoning)\n",
        "gpqa_raw = load_dataset(\"Idavidrein/gpqa\", \"gpqa_diamond\")\n",
        "gpqa_choices = [[a, b, c, d] for a, b, c, d in\n",
        "                zip(gpqa_raw['train']['Correct Answer'], gpqa_raw['train']['Incorrect Answer 1'],\n",
        "                    gpqa_raw['train']['Incorrect Answer 2'], gpqa_raw['train']['Incorrect Answer 3'])]\n",
        "for choices in gpqa_choices:\n",
        "  random.shuffle(choices)\n",
        "\n",
        "gpqa_questions_proc = format_for_mm(gpqa_raw['train']['Question'], gpqa_choices)\n",
        "# gpqa = benchmark_dataset(gpqa_questions_proc, gpqa_raw['train']['Correct Answer'])\n",
        "\n",
        "# gpqa_dl_train = DataLoader(gpqa[:158], batch_size=64, shuffle=True, collate_fn=collate_fn)\n",
        "# gpqa_dl_test = DataLoader(gpqa[158:], batch_size=64, shuffle=True, collate_fn=collate_fn)\n",
        "\n",
        "#math (for math)\n",
        "math_raw = load_dataset(\"lighteval/MATH\", \"all\")\n",
        "# math_train = benchmark_dataset(math_raw['train']['problem'], math_raw['train']['solution'])\n",
        "# math_dl_train = DataLoader(math_train, batch_size=64, shuffle=True, collate_fn=collate_fn)\n",
        "\n",
        "# math_test = benchmark_dataset(math_raw['test']['problem'], math_raw['test']['solution'])\n",
        "# math_dl_test = DataLoader(math_test, batch_size=64, shuffle=True, collate_fn=collate_fn)\n",
        "\n",
        "# #mmlu (for gen knowledge + reasoning)\n",
        "mmlu_raw = load_dataset(\"cais/mmlu\", \"all\")\n",
        "mmlu_questions_proc_train = format_for_mm(mmlu_raw['auxiliary_train']['question'], mmlu_raw['auxiliary_train']['choices'])\n",
        "# mmlu_train = benchmark_dataset(mmlu_questions_proc_train, mmlu_raw['auxiliary_train']['answer'])\n",
        "# mmlu_dl_train = DataLoader(mmlu_train, batch_size=64, shuffle=True, collate_fn=collate_fn)\n",
        "\n",
        "mmlu_questions_proc_test = format_for_mm(mmlu_raw['test']['question'], mmlu_raw['test']['choices'])\n",
        "# mmlu_test = benchmark_dataset(mmlu_questions_proc_test, mmlu_raw['test']['answer'])\n",
        "# mmlu_dl_test = DataLoader(mmlu_test, batch_size=64, shuffle=True, collate_fn=collate_fn)\n",
        "\n",
        "\n",
        "#master list - train\n",
        "sublist_input_train = gpqa_questions_proc[:158] + math_raw['train']['problem'] + mmlu_questions_proc_train\n",
        "sublist_answer_train = gpqa_raw['train']['Correct Answer'][:158] + math_raw['train']['solution'] + mmlu_raw['auxiliary_train']['answer']\n",
        "agg_train_set = benchmark_dataset(sublist_input_train, sublist_answer_train, tokenizer)\n",
        "\n",
        "agg_dl_train = DataLoader(agg_train_set, batch_size=4, shuffle=True, collate_fn=collate_fn_qat)\n",
        "\n",
        "#master list - test\n",
        "sublist_input_test = gpqa_questions_proc[158:] + math_raw['test']['problem'] + mmlu_questions_proc_test\n",
        "sublist_answer_test = gpqa_raw['train']['Correct Answer'][158:] + math_raw['test']['solution'] + mmlu_raw['test']['answer']\n",
        "agg_test_set = benchmark_dataset(sublist_input_test, sublist_answer_test, tokenizer)\n",
        "\n",
        "agg_dl_test = DataLoader(agg_test_set, batch_size=4, shuffle=True, collate_fn=collate_fn_qat)\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 10,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "DCQJlZHd1LrR",
        "outputId": "5983369a-c3af-428d-9db4-d6283a48e155"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Length of Labels\n",
            "4\n",
            "Shape of Input Ids\n",
            "torch.Size([4, 150])\n",
            "Label Sample\n",
            "tensor([[128000,     16, 128009, 128009, 128009, 128009, 128009, 128009, 128009,\n",
            "         128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009,\n",
            "         128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009,\n",
            "         128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009,\n",
            "         128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009,\n",
            "         128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009,\n",
            "         128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009,\n",
            "         128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009,\n",
            "         128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009,\n",
            "         128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009,\n",
            "         128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009,\n",
            "         128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009,\n",
            "         128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009,\n",
            "         128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009,\n",
            "         128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009,\n",
            "         128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009,\n",
            "         128009, 128009, 128009, 128009, 128009, 128009],\n",
            "        [128000,     16, 128009, 128009, 128009, 128009, 128009, 128009, 128009,\n",
            "         128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009,\n",
            "         128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009,\n",
            "         128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009,\n",
            "         128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009,\n",
            "         128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009,\n",
            "         128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009,\n",
            "         128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009,\n",
            "         128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009,\n",
            "         128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009,\n",
            "         128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009,\n",
            "         128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009,\n",
            "         128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009,\n",
            "         128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009,\n",
            "         128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009,\n",
            "         128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009,\n",
            "         128009, 128009, 128009, 128009, 128009, 128009],\n",
            "        [128000,     17, 128009, 128009, 128009, 128009, 128009, 128009, 128009,\n",
            "         128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009,\n",
            "         128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009,\n",
            "         128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009,\n",
            "         128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009,\n",
            "         128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009,\n",
            "         128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009,\n",
            "         128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009,\n",
            "         128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009,\n",
            "         128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009,\n",
            "         128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009,\n",
            "         128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009,\n",
            "         128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009,\n",
            "         128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009,\n",
            "         128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009,\n",
            "         128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009,\n",
            "         128009, 128009, 128009, 128009, 128009, 128009],\n",
            "        [128000,     16, 128009, 128009, 128009, 128009, 128009, 128009, 128009,\n",
            "         128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009,\n",
            "         128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009,\n",
            "         128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009,\n",
            "         128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009,\n",
            "         128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009,\n",
            "         128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009,\n",
            "         128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009,\n",
            "         128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009,\n",
            "         128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009,\n",
            "         128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009,\n",
            "         128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009,\n",
            "         128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009,\n",
            "         128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009,\n",
            "         128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009,\n",
            "         128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009,\n",
            "         128009, 128009, 128009, 128009, 128009, 128009]])\n"
          ]
        }
      ],
      "source": [
        "test_check = next(iter(agg_dl_test))\n",
        "\n",
        "print(\"Length of Labels\")\n",
        "print(len(test_check['label']))\n",
        "print(\"Shape of Input Ids\")\n",
        "print(test_check['input_ids'].shape)\n",
        "print(\"Label Sample\")\n",
        "print(test_check['label'][:5]) #quick check on the shapes (printed in the collate function) and samples of the data to ensure that everything looks as expected"
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "### PTQ using torchao"
      ],
      "metadata": {
        "id": "fVnfQ13dNaQ0"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "!pip install --upgrade torch torchao\n",
        "# Install PyTorch, torchvision, torchao nightlies - for hf torch ao integration use\n",
        "!pip install --pre --upgrade torch torchvision torchao --index-url https://download.pytorch.org/whl/nightly/cu121 # full options are cpu/cu118/cu121/cu124\n",
        "!pip install --pre --upgrade torchtune --extra-index-url https://download.pytorch.org/whl/nightly/cpu"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 1000
        },
        "id": "9BW5wL6zOkov",
        "outputId": "1fbc68fb-7954-4a43-9be6-258e0bf135c7"
      },
      "execution_count": 11,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Requirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (2.5.1+cu121)\n",
            "Collecting torchao\n",
            "  Downloading torchao-0.6.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (13 kB)\n",
            "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch) (3.16.1)\n",
            "Requirement already satisfied: typing-extensions>=4.8.0 in /usr/local/lib/python3.10/dist-packages (from torch) (4.12.2)\n",
            "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch) (3.4.2)\n",
            "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch) (3.1.4)\n",
            "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from torch) (2024.9.0)\n",
            "Requirement already satisfied: sympy==1.13.1 in /usr/local/lib/python3.10/dist-packages (from torch) (1.13.1)\n",
            "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from sympy==1.13.1->torch) (1.3.0)\n",
            "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch) (3.0.2)\n",
            "Downloading torchao-0.6.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.2 MB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.2/2.2 MB\u001b[0m \u001b[31m69.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hInstalling collected packages: torchao\n",
            "Successfully installed torchao-0.6.1\n",
            "Looking in indexes: https://download.pytorch.org/whl/nightly/cu121\n",
            "Requirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (2.5.1+cu121)\n",
            "Collecting torch\n",
            "  Downloading https://download.pytorch.org/whl/nightly/cu121/torch-2.6.0.dev20241112%2Bcu121-cp310-cp310-linux_x86_64.whl (767.9 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m767.9/767.9 MB\u001b[0m \u001b[31m1.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: torchvision in /usr/local/lib/python3.10/dist-packages (0.20.1+cu121)\n",
            "Requirement already satisfied: torchao in /usr/local/lib/python3.10/dist-packages (0.6.1)\n",
            "Collecting torchao\n",
            "  Downloading https://download.pytorch.org/whl/nightly/cu121/torchao-0.7.0.dev20241112%2Bcu121-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.3 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.3/2.3 MB\u001b[0m \u001b[31m79.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch) (3.16.1)\n",
            "Requirement already satisfied: typing-extensions>=4.10.0 in /usr/local/lib/python3.10/dist-packages (from torch) (4.12.2)\n",
            "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch) (3.4.2)\n",
            "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch) (3.1.4)\n",
            "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from torch) (2024.9.0)\n",
            "Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch)\n",
            "  Downloading https://download.pytorch.org/whl/nightly/cu121/nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m23.7/23.7 MB\u001b[0m \u001b[31m85.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting nvidia-cuda-runtime-cu12==12.1.105 (from torch)\n",
            "  Downloading https://download.pytorch.org/whl/nightly/cu121/nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m823.6/823.6 kB\u001b[0m \u001b[31m50.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting nvidia-cuda-cupti-cu12==12.1.105 (from torch)\n",
            "  Downloading https://download.pytorch.org/whl/nightly/cu121/nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m14.1/14.1 MB\u001b[0m \u001b[31m108.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting nvidia-cudnn-cu12==9.1.0.70 (from torch)\n",
            "  Downloading https://download.pytorch.org/whl/nightly/cu121/nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl (664.8 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m664.8/664.8 MB\u001b[0m \u001b[31m1.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting nvidia-cublas-cu12==12.1.3.1 (from torch)\n",
            "  Downloading https://download.pytorch.org/whl/nightly/cu121/nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl (410.6 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m410.6/410.6 MB\u001b[0m \u001b[31m2.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting nvidia-cufft-cu12==11.0.2.54 (from torch)\n",
            "  Downloading https://download.pytorch.org/whl/nightly/cu121/nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl (121.6 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m121.6/121.6 MB\u001b[0m \u001b[31m17.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting nvidia-curand-cu12==10.3.2.106 (from torch)\n",
            "  Downloading https://download.pytorch.org/whl/nightly/cu121/nvidia_curand_cu12-10.3.2.106-py3-none-manylinux1_x86_64.whl (56.5 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m56.5/56.5 MB\u001b[0m \u001b[31m37.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting nvidia-cusolver-cu12==11.4.5.107 (from torch)\n",
            "  Downloading https://download.pytorch.org/whl/nightly/cu121/nvidia_cusolver_cu12-11.4.5.107-py3-none-manylinux1_x86_64.whl (124.2 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m124.2/124.2 MB\u001b[0m \u001b[31m17.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting nvidia-cusparse-cu12==12.1.0.106 (from torch)\n",
            "  Downloading https://download.pytorch.org/whl/nightly/cu121/nvidia_cusparse_cu12-12.1.0.106-py3-none-manylinux1_x86_64.whl (196.0 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m196.0/196.0 MB\u001b[0m \u001b[31m6.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting nvidia-cusparselt-cu12==0.6.2 (from torch)\n",
            "  Downloading https://download.pytorch.org/whl/nightly/cu121/nvidia_cusparselt_cu12-0.6.2-py3-none-manylinux2014_x86_64.whl (150.1 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m150.1/150.1 MB\u001b[0m \u001b[31m15.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting nvidia-nccl-cu12==2.21.5 (from torch)\n",
            "  Downloading https://download.pytorch.org/whl/nightly/cu121/nvidia_nccl_cu12-2.21.5-py3-none-manylinux2014_x86_64.whl (188.7 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m188.7/188.7 MB\u001b[0m \u001b[31m8.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting nvidia-nvtx-cu12==12.1.105 (from torch)\n",
            "  Downloading https://download.pytorch.org/whl/nightly/cu121/nvidia_nvtx_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (99 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m99.1/99.1 kB\u001b[0m \u001b[31m9.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting pytorch-triton==3.1.0+cf34004b8a (from torch)\n",
            "  Downloading https://download.pytorch.org/whl/nightly/pytorch_triton-3.1.0%2Bcf34004b8a-cp310-cp310-linux_x86_64.whl (239.6 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m239.6/239.6 MB\u001b[0m \u001b[31m5.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: sympy==1.13.1 in /usr/local/lib/python3.10/dist-packages (from torch) (1.13.1)\n",
            "Requirement already satisfied: nvidia-nvjitlink-cu12 in /usr/local/lib/python3.10/dist-packages (from nvidia-cusolver-cu12==11.4.5.107->torch) (12.6.77)\n",
            "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from sympy==1.13.1->torch) (1.3.0)\n",
            "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from torchvision) (1.26.4)\n",
            "Requirement already satisfied: pillow!=8.3.*,>=5.3.0 in /usr/local/lib/python3.10/dist-packages (from torchvision) (11.0.0)\n",
            "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch) (3.0.2)\n",
            "Installing collected packages: torchao\n",
            "  Attempting uninstall: torchao\n",
            "    Found existing installation: torchao 0.6.1\n",
            "    Uninstalling torchao-0.6.1:\n",
            "      Successfully uninstalled torchao-0.6.1\n",
            "Successfully installed torchao-0.7.0.dev20241112+cu121\n",
            "Looking in indexes: https://pypi.org/simple, https://download.pytorch.org/whl/nightly/cpu\n",
            "Collecting torchtune\n",
            "  Downloading https://download.pytorch.org/whl/nightly/cpu/torchtune-0.5.0.dev20241204%2Bcpu-py3-none-any.whl (744 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m744.9/744.9 kB\u001b[0m \u001b[31m43.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: datasets in /usr/local/lib/python3.10/dist-packages (from torchtune) (3.1.0)\n",
            "Requirement already satisfied: huggingface_hub[hf_transfer] in /usr/local/lib/python3.10/dist-packages (from torchtune) (0.26.2)\n",
            "Requirement already satisfied: safetensors in /usr/local/lib/python3.10/dist-packages (from torchtune) (0.4.5)\n",
            "Requirement already satisfied: kagglehub in /usr/local/lib/python3.10/dist-packages (from torchtune) (0.3.4)\n",
            "Requirement already satisfied: sentencepiece in /usr/local/lib/python3.10/dist-packages (from torchtune) (0.2.0)\n",
            "Collecting tiktoken (from torchtune)\n",
            "  Downloading https://download.pytorch.org/whl/nightly/tiktoken-0.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.2/1.2 MB\u001b[0m \u001b[31m63.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting blobfile>=2 (from torchtune)\n",
            "  Downloading https://download.pytorch.org/whl/nightly/blobfile-3.0.0-py3-none-any.whl (75 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m75.4/75.4 kB\u001b[0m \u001b[31m6.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from torchtune) (1.26.4)\n",
            "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from torchtune) (4.66.6)\n",
            "Collecting omegaconf (from torchtune)\n",
            "  Downloading omegaconf-2.4.0.dev3-py3-none-any.whl.metadata (4.2 kB)\n",
            "Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from torchtune) (5.9.5)\n",
            "Requirement already satisfied: Pillow>=9.4.0 in /usr/local/lib/python3.10/dist-packages (from torchtune) (11.0.0)\n",
            "Collecting pycryptodomex>=3.8 (from blobfile>=2->torchtune)\n",
            "  Downloading https://download.pytorch.org/whl/nightly/pycryptodomex-3.21.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.3 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.3/2.3 MB\u001b[0m \u001b[31m52.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: urllib3<3,>=1.25.3 in /usr/local/lib/python3.10/dist-packages (from blobfile>=2->torchtune) (2.2.3)\n",
            "Requirement already satisfied: lxml>=4.9 in /usr/local/lib/python3.10/dist-packages (from blobfile>=2->torchtune) (5.3.0)\n",
            "Requirement already satisfied: filelock>=3.0 in /usr/local/lib/python3.10/dist-packages (from blobfile>=2->torchtune) (3.16.1)\n",
            "Requirement already satisfied: pyarrow>=15.0.0 in /usr/local/lib/python3.10/dist-packages (from datasets->torchtune) (17.0.0)\n",
            "Requirement already satisfied: dill<0.3.9,>=0.3.0 in /usr/local/lib/python3.10/dist-packages (from datasets->torchtune) (0.3.8)\n",
            "Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from datasets->torchtune) (2.2.2)\n",
            "Requirement already satisfied: requests>=2.32.2 in /usr/local/lib/python3.10/dist-packages (from datasets->torchtune) (2.32.3)\n",
            "Requirement already satisfied: xxhash in /usr/local/lib/python3.10/dist-packages (from datasets->torchtune) (3.5.0)\n",
            "Requirement already satisfied: multiprocess<0.70.17 in /usr/local/lib/python3.10/dist-packages (from datasets->torchtune) (0.70.16)\n",
            "Requirement already satisfied: fsspec<=2024.9.0,>=2023.1.0 in /usr/local/lib/python3.10/dist-packages (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets->torchtune) (2024.9.0)\n",
            "Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from datasets->torchtune) (3.11.2)\n",
            "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from datasets->torchtune) (24.2)\n",
            "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from datasets->torchtune) (6.0.2)\n",
            "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface_hub[hf_transfer]->torchtune) (4.12.2)\n",
            "Collecting hf-transfer>=0.1.4 (from huggingface_hub[hf_transfer]->torchtune)\n",
            "  Downloading hf_transfer-0.1.8-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.7 kB)\n",
            "Requirement already satisfied: regex>=2022.1.18 in /usr/local/lib/python3.10/dist-packages (from tiktoken->torchtune) (2024.9.11)\n",
            "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->torchtune) (2.4.3)\n",
            "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->torchtune) (1.3.1)\n",
            "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->torchtune) (24.2.0)\n",
            "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->torchtune) (1.5.0)\n",
            "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->torchtune) (6.1.0)\n",
            "Requirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->torchtune) (0.2.0)\n",
            "Requirement already satisfied: yarl<2.0,>=1.17.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->torchtune) (1.17.2)\n",
            "Requirement already satisfied: async-timeout<6.0,>=4.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->torchtune) (4.0.3)\n",
            "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.32.2->datasets->torchtune) (3.4.0)\n",
            "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.32.2->datasets->torchtune) (3.10)\n",
            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.32.2->datasets->torchtune) (2024.8.30)\n",
            "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets->torchtune) (2.8.2)\n",
            "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets->torchtune) (2024.2)\n",
            "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets->torchtune) (2024.2)\n",
            "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.2->pandas->datasets->torchtune) (1.16.0)\n",
            "Downloading omegaconf-2.4.0.dev3-py3-none-any.whl (224 kB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m224.4/224.4 kB\u001b[0m \u001b[31m16.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading hf_transfer-0.1.8-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.6 MB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.6/3.6 MB\u001b[0m \u001b[31m98.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hInstalling collected packages: pycryptodomex, omegaconf, hf-transfer, tiktoken, blobfile, torchtune\n",
            "Successfully installed blobfile-3.0.0 hf-transfer-0.1.8 omegaconf-2.4.0.dev3 pycryptodomex-3.21.0 tiktoken-0.8.0 torchtune-0.5.0.dev20241204+cpu\n"
          ]
        },
        {
          "output_type": "display_data",
          "data": {
            "application/vnd.colab-display-data+json": {
              "pip_warning": {
                "packages": [
                  "pydevd_plugins"
                ]
              },
              "id": "1b4e350aae104314b73766c2a2f6aa64"
            }
          },
          "metadata": {}
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "import torchao\n",
        "import copy\n",
        "from transformers import TorchAoConfig, AutoModelForCausalLM, AutoTokenizer"
      ],
      "metadata": {
        "id": "QrBY9U5hu7Dc"
      },
      "execution_count": 12,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "#subapproach #1 - using torchao directly\n",
        "from torchao.quantization.quant_api import (\n",
        "    quantize_,\n",
        "    int8_dynamic_activation_int8_weight,\n",
        "    int4_weight_only,\n",
        "    int8_weight_only\n",
        ")\n",
        "model.to(device)\n",
        "quantize_(model, int8_weight_only())\n",
        "\n",
        "#double check that we see some expected memory savings\n",
        "torch.save(model.state_dict(), \"temp.p\")\n",
        "print('Size (MB):', os.path.getsize(\"temp.p\")/1e6)\n",
        "os.remove('temp.p')"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "jTineU_UqvoL",
        "outputId": "3af5fec1-9e98-43b1-8f4e-70e2f886d9ab"
      },
      "execution_count": 13,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Size (MB): 4800.333753\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "#double check that generation works\n",
        "input = tokenizer(\"hello\", return_tensors='pt')['input_ids'].to('cuda')\n",
        "tokenizer.decode(model.generate(input)[0]) #quick test check"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 145
        },
        "id": "IKkDlrViq7x5",
        "outputId": "8089ecb6-d9a0-434e-b4d2-75feb9caa358"
      },
      "execution_count": 14,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
            "/usr/local/lib/python3.10/dist-packages/transformers/generation/utils.py:1375: UserWarning: Using the model-agnostic default `max_length` (=20) to control the generation length. We recommend setting `max_new_tokens` to control the maximum length of the generation.\n",
            "  warnings.warn(\n"
          ]
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "\"<|begin_of_text|>hello! I'm new to this forum and I'm excited to learn and share my knowledge with\""
            ],
            "application/vnd.google.colaboratory.intrinsic+json": {
              "type": "string"
            }
          },
          "metadata": {},
          "execution_count": 14
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "#save the model\n",
        "model.save_pretrained(\"./ptq_int8\", safe_serialization=False)"
      ],
      "metadata": {
        "id": "ldgxUVapq2jt"
      },
      "execution_count": 15,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "#double check that the model can be loaded in\n",
        "model = AutoModelForCausalLM.from_pretrained(\"./ptq_int8\", device_map=\"cuda\")\n",
        "model = torch.compile(model, mode=\"max-autotune\")\n",
        "\n",
        "#and fits the expected memory requirements\n",
        "torch.save(model.state_dict(), \"temp.p\")\n",
        "print('Size (MB):', os.path.getsize(\"temp.p\")/1e6)\n",
        "os.remove('temp.p')\n",
        "\n",
        "#double check generatiom works\n",
        "input = tokenizer(\"hello\", return_tensors='pt')['input_ids'].to('cuda')\n",
        "tokenizer.decode(model.generate(input)[0]) #quick test check"
      ],
      "metadata": {
        "id": "VJo2NciJrDhG",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 145
        },
        "outputId": "ba63020a-6625-42ec-f37c-e7927d0a9da5"
      },
      "execution_count": 16,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Size (MB): 4404.79734\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "/usr/local/lib/python3.10/dist-packages/transformers/generation/utils.py:1375: UserWarning: Using the model-agnostic default `max_length` (=20) to control the generation length. We recommend setting `max_new_tokens` to control the maximum length of the generation.\n",
            "  warnings.warn(\n"
          ]
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "'<|begin_of_text|>hello everyone, welcome back to my channel. today i want to talk about the importance of self'"
            ],
            "application/vnd.google.colaboratory.intrinsic+json": {
              "type": "string"
            }
          },
          "metadata": {},
          "execution_count": 16
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "#subapproach #2 - using what hf recommends\n",
        "\n",
        "quantization_config = TorchAoConfig(\"int4_weight_only\", group_size=128)\n",
        "quantized_model = AutoModelForCausalLM.from_pretrained(\"meta-llama/Llama-3.2-3B-Instruct\", device_map=\"auto\", quantization_config=quantization_config)\n",
        "\n",
        "torchao.quantization.utils.recommended_inductor_config_setter()\n",
        "quantized_model = torch.compile(quantized_model, mode=\"max-autotune\")\n",
        "\n",
        "quantized_model.save_pretrained(\"./hf_quant_int4\", safe_serialization=False)\n",
        "\n",
        "loaded_quantized_model = AutoModelForCausalLM.from_pretrained(\"./hf_quant_int4\", device_map=\"cuda\")\n",
        "\n",
        "loaded_quantized_model = torch.compile(loaded_quantized_model, mode=\"max-autotune\")"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 87,
          "referenced_widgets": [
            "a6122a0554324bde96f9e81dcaadccd9",
            "887c3b5bea9b42138d0d4a38917d507c",
            "02ad42a7e19d4c77b893283023af8866",
            "687c41c14d0b4470aeed781e3f02f01e",
            "5388e0720cea4a878d993c711f19b7da",
            "233a67f4e6794cfcb20554569c142d16",
            "f256314e9b6a49eca22b0ff1a8b139ae",
            "ba13ed834465493eadcf0aa00d2441b2",
            "69cdc31b4647483d922bf381137a6f5a",
            "2c5d8a1b076b4f9ebf839cb1ec816186",
            "64fb4299c5bd47439ff0509adb46b1f1"
          ]
        },
        "id": "dedSoTctfQNZ",
        "outputId": "fe61c9e0-c603-45de-c5e2-9a0832dd7ea0"
      },
      "execution_count": 17,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "Setting torch_dtype to torch.bfloat16 for int4_weight_only quantization since only bfloat16 is supported right now. Please set torch_dtype=torch.bfloat16 to remove this warning.\n"
          ]
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "a6122a0554324bde96f9e81dcaadccd9"
            }
          },
          "metadata": {}
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "#double check that the quant was applied\n",
        "# loaded_quantized_model.model.layers[0].self_attn.q_proj.weight.data #we can see that torchao autoquant determined that the best quant approach was to use int8weights"
      ],
      "metadata": {
        "id": "h1PXBfYGSFzX"
      },
      "execution_count": 18,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "#double check that we see some expected memory savings\n",
        "torch.save(loaded_quantized_model.state_dict(), \"temp.p\")\n",
        "print('Size (MB):', os.path.getsize(\"temp.p\")/1e6)\n",
        "os.remove('temp.p')"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "rP5d3M0cOjNq",
        "outputId": "d4a24ef8-8bb7-434a-b0c0-856aca064c40"
      },
      "execution_count": 19,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Size (MB): 2285.908892\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "#double check that generation works\n",
        "input = tokenizer(\"hello\", return_tensors='pt')['input_ids'].to('cuda')\n",
        "tokenizer.decode(loaded_quantized_model.generate(input)[0]) #quick test check"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 127
        },
        "id": "MAmgNpCZOfMo",
        "outputId": "ac626fee-8f22-492d-e14a-64818604c7f6"
      },
      "execution_count": 20,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "/usr/local/lib/python3.10/dist-packages/transformers/generation/utils.py:1375: UserWarning: Using the model-agnostic default `max_length` (=20) to control the generation length. We recommend setting `max_new_tokens` to control the maximum length of the generation.\n",
            "  warnings.warn(\n"
          ]
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "'<|begin_of_text|>hello = \"world\"\\n\\ndef hello_world():\\n    global hello\\n    print(hello)\\n\\nhello'"
            ],
            "application/vnd.google.colaboratory.intrinsic+json": {
              "type": "string"
            }
          },
          "metadata": {},
          "execution_count": 20
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "### QAT with torchao"
      ],
      "metadata": {
        "id": "HGK2mfAoULCk"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "!pip install torchao torchtune #if ptq section was run earlier, no need to pip install again"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 1000
        },
        "id": "lPByoHkqaoX3",
        "outputId": "cb6dcbc7-ff58-4205-fd0e-e4755068ed59"
      },
      "execution_count": 13,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Requirement already satisfied: torchao in /usr/local/lib/python3.10/dist-packages (0.6.1)\n",
            "Collecting torchtune\n",
            "  Downloading torchtune-0.4.0-py3-none-any.whl.metadata (19 kB)\n",
            "Requirement already satisfied: datasets in /usr/local/lib/python3.10/dist-packages (from torchtune) (3.1.0)\n",
            "Requirement already satisfied: huggingface-hub in /usr/local/lib/python3.10/dist-packages (from torchtune) (0.26.2)\n",
            "Requirement already satisfied: safetensors in /usr/local/lib/python3.10/dist-packages (from torchtune) (0.4.5)\n",
            "Requirement already satisfied: sentencepiece in /usr/local/lib/python3.10/dist-packages (from torchtune) (0.2.0)\n",
            "Collecting tiktoken (from torchtune)\n",
            "  Downloading tiktoken-0.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB)\n",
            "Collecting blobfile>=2 (from torchtune)\n",
            "  Downloading blobfile-3.0.0-py3-none-any.whl.metadata (15 kB)\n",
            "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from torchtune) (1.26.4)\n",
            "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from torchtune) (4.66.6)\n",
            "Collecting omegaconf (from torchtune)\n",
            "  Downloading omegaconf-2.3.0-py3-none-any.whl.metadata (3.9 kB)\n",
            "Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from torchtune) (5.9.5)\n",
            "Requirement already satisfied: Pillow>=9.4.0 in /usr/local/lib/python3.10/dist-packages (from torchtune) (11.0.0)\n",
            "Collecting pycryptodomex>=3.8 (from blobfile>=2->torchtune)\n",
            "  Downloading pycryptodomex-3.21.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.4 kB)\n",
            "Requirement already satisfied: urllib3<3,>=1.25.3 in /usr/local/lib/python3.10/dist-packages (from blobfile>=2->torchtune) (2.2.3)\n",
            "Requirement already satisfied: lxml>=4.9 in /usr/local/lib/python3.10/dist-packages (from blobfile>=2->torchtune) (5.3.0)\n",
            "Requirement already satisfied: filelock>=3.0 in /usr/local/lib/python3.10/dist-packages (from blobfile>=2->torchtune) (3.16.1)\n",
            "Requirement already satisfied: pyarrow>=15.0.0 in /usr/local/lib/python3.10/dist-packages (from datasets->torchtune) (17.0.0)\n",
            "Requirement already satisfied: dill<0.3.9,>=0.3.0 in /usr/local/lib/python3.10/dist-packages (from datasets->torchtune) (0.3.8)\n",
            "Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from datasets->torchtune) (2.2.2)\n",
            "Requirement already satisfied: requests>=2.32.2 in /usr/local/lib/python3.10/dist-packages (from datasets->torchtune) (2.32.3)\n",
            "Requirement already satisfied: xxhash in /usr/local/lib/python3.10/dist-packages (from datasets->torchtune) (3.5.0)\n",
            "Requirement already satisfied: multiprocess<0.70.17 in /usr/local/lib/python3.10/dist-packages (from datasets->torchtune) (0.70.16)\n",
            "Requirement already satisfied: fsspec<=2024.9.0,>=2023.1.0 in /usr/local/lib/python3.10/dist-packages (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets->torchtune) (2024.9.0)\n",
            "Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from datasets->torchtune) (3.11.2)\n",
            "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from datasets->torchtune) (24.2)\n",
            "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from datasets->torchtune) (6.0.2)\n",
            "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub->torchtune) (4.12.2)\n",
            "Collecting antlr4-python3-runtime==4.9.* (from omegaconf->torchtune)\n",
            "  Downloading antlr4-python3-runtime-4.9.3.tar.gz (117 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m117.0/117.0 kB\u001b[0m \u001b[31m4.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25h  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
            "Requirement already satisfied: regex>=2022.1.18 in /usr/local/lib/python3.10/dist-packages (from tiktoken->torchtune) (2024.9.11)\n",
            "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->torchtune) (2.4.3)\n",
            "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->torchtune) (1.3.1)\n",
            "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->torchtune) (24.2.0)\n",
            "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->torchtune) (1.5.0)\n",
            "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->torchtune) (6.1.0)\n",
            "Requirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->torchtune) (0.2.0)\n",
            "Requirement already satisfied: yarl<2.0,>=1.17.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->torchtune) (1.17.2)\n",
            "Requirement already satisfied: async-timeout<6.0,>=4.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->torchtune) (4.0.3)\n",
            "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.32.2->datasets->torchtune) (3.4.0)\n",
            "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.32.2->datasets->torchtune) (3.10)\n",
            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.32.2->datasets->torchtune) (2024.8.30)\n",
            "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets->torchtune) (2.8.2)\n",
            "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets->torchtune) (2024.2)\n",
            "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets->torchtune) (2024.2)\n",
            "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.2->pandas->datasets->torchtune) (1.16.0)\n",
            "Downloading torchtune-0.4.0-py3-none-any.whl (686 kB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m686.9/686.9 kB\u001b[0m \u001b[31m19.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading blobfile-3.0.0-py3-none-any.whl (75 kB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m75.4/75.4 kB\u001b[0m \u001b[31m7.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading omegaconf-2.3.0-py3-none-any.whl (79 kB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m79.5/79.5 kB\u001b[0m \u001b[31m7.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading tiktoken-0.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.2/1.2 MB\u001b[0m \u001b[31m43.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading pycryptodomex-3.21.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.3 MB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.3/2.3 MB\u001b[0m \u001b[31m79.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hBuilding wheels for collected packages: antlr4-python3-runtime\n",
            "  Building wheel for antlr4-python3-runtime (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
            "  Created wheel for antlr4-python3-runtime: filename=antlr4_python3_runtime-4.9.3-py3-none-any.whl size=144555 sha256=243d34073d4cdea9e5a8a1b63a76485a053a149bf78a06a273afd71882130e44\n",
            "  Stored in directory: /root/.cache/pip/wheels/12/93/dd/1f6a127edc45659556564c5730f6d4e300888f4bca2d4c5a88\n",
            "Successfully built antlr4-python3-runtime\n",
            "Installing collected packages: antlr4-python3-runtime, pycryptodomex, omegaconf, tiktoken, blobfile, torchtune\n",
            "Successfully installed antlr4-python3-runtime-4.9.3 blobfile-3.0.0 omegaconf-2.3.0 pycryptodomex-3.21.0 tiktoken-0.8.0 torchtune-0.4.0\n"
          ]
        },
        {
          "output_type": "display_data",
          "data": {
            "application/vnd.colab-display-data+json": {
              "pip_warning": {
                "packages": [
                  "pydevd_plugins"
                ]
              },
              "id": "d849c0c6aa404ac9802742cda0cbf51f"
            }
          },
          "metadata": {}
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "#IF PTQ was run before, need a fresh unquant instruct base model again -- otherwise, skip\n",
        "\n",
        "tokenizer = AutoTokenizer.from_pretrained(\"meta-llama/Llama-3.2-3B-Instruct\")\n",
        "model = AutoModelForCausalLM.from_pretrained(\"meta-llama/Llama-3.2-3B-Instruct\")\n",
        "\n",
        "tokenizer.pad_token = tokenizer.eos_token"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 49,
          "referenced_widgets": [
            "cda09fe730404d64b2d1046fc0ff0972",
            "d5fb009b64f64870b23604a6df388d31",
            "d785ea9c32854641bd216fa3cdc0f366",
            "17ac6f96e8a142828bff4d4b86080b47",
            "b18188da875c49eca69c4fe4a2b37d2f",
            "5e0980136ad848c490aa5518751e46e3",
            "e87fcc34901449ac97380d3504f7352a",
            "2402c90e45294d9385ccac1401d9b1c5",
            "3fad5bb47ad342af80beaa6a480ba394",
            "7e1d389498ed404b8f288d7653219206",
            "3d7a3f940b4b44cd85c9187c78972d67"
          ]
        },
        "id": "6Muppldq2Vzq",
        "outputId": "de3304b0-2bc7-44aa-e855-facad488d002"
      },
      "execution_count": 14,
      "outputs": [
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "cda09fe730404d64b2d1046fc0ff0972"
            }
          },
          "metadata": {}
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "import torchao\n",
        "from torchtune.training.quantization import Int8DynActInt4WeightQATQuantizer\n",
        "import time\n",
        "\n",
        "# model.to('cuda')\n",
        "\n",
        "#prep for quantization\n",
        "qat_quantizer = Int8DynActInt4WeightQATQuantizer()\n",
        "model = qat_quantizer.prepare(model)\n",
        "\n",
        "#hyperparams\n",
        "optimizer = torch.optim.SGD(model.parameters(), lr=0.0001) #im keeping the lr small bc this is for fine-tuning\n",
        "criterion = torch.nn.CrossEntropyLoss()\n",
        "num_epochs = 2\n",
        "artificial_len = 150\n",
        "\n",
        "#times to track (key=epoch, value=time in sec)\n",
        "total_runtime = {}\n",
        "t_train_time = {}\n",
        "t_data_time = {}\n",
        "t_epoch_losses = {}\n",
        "\n",
        "#training mode\n",
        "model.train()\n",
        "\n",
        "###TRAINING LOOP ###\n",
        "for epoch in range(num_epochs):\n",
        "\n",
        "  training_time = 0\n",
        "  dataloading_time = 0\n",
        "  epoch_loss = 0\n",
        "\n",
        "  ### START TIMING FOR TOTAL_RUNTIME\n",
        "  torch.cuda.synchronize()\n",
        "  start_totalruntime_timer = time.perf_counter()\n",
        "\n",
        "  ### START DATALOADING TIME\n",
        "  start_dataloading_timer = time.perf_counter()\n",
        "\n",
        "  for idx, sample in enumerate(agg_dl_train):\n",
        "\n",
        "    ### END DATALOADING timer and accumulate total\n",
        "    dataloading_time += time.perf_counter()-start_dataloading_timer\n",
        "\n",
        "    inputs = sample['input_ids'][:, :artificial_len].to(device)\n",
        "    mask = sample['attention_mask'][:, :artificial_len].to(device)\n",
        "    labels = torch.Tensor(sample['label'][:, :artificial_len]).to(device).long()\n",
        "\n",
        "    ### START TIMING FOR TRAINING TIME\n",
        "    torch.cuda.synchronize()\n",
        "    start_training_timer = time.perf_counter()\n",
        "    ###\n",
        "\n",
        "    optimizer.zero_grad()\n",
        "\n",
        "    outputs = model(inputs, attention_mask=mask, labels=labels)\n",
        "    loss = outputs.loss\n",
        "\n",
        "    loss.backward()\n",
        "    optimizer.step()\n",
        "\n",
        "    ### END TIMING FOR TRAINING TIME\n",
        "    torch.cuda.synchronize()\n",
        "    training_time += time.perf_counter()-start_training_timer\n",
        "\n",
        "    #print for myself\n",
        "    if idx%10000 == 0:\n",
        "      print(f\"Epoch {epoch}, Iteration {idx}, Loss: \", loss.item())\n",
        "\n",
        "    epoch_loss += loss.item()\n",
        "\n",
        "    #start dataloading timer again for the next batch load\n",
        "    start_dataloading_timer = time.perf_counter()\n",
        "\n",
        "  #print for myself (after each epoch)\n",
        "  print(f\"Epoch {epoch}, Loss -- {epoch_loss}\")\n",
        "\n",
        "  #END TIMING FOR TOTAL RUNTIME\n",
        "  torch.cuda.synchronize()\n",
        "  total_runtime[epoch] = time.perf_counter()-start_totalruntime_timer\n",
        "\n",
        "  #log other times\n",
        "  t_train_time[epoch] = training_time\n",
        "  t_data_time[epoch] = dataloading_time\n",
        "  t_epoch_losses[epoch] = epoch_loss\n",
        "\n",
        "#print stats\n",
        "print(f\"\"\"\n",
        "\n",
        "total run time: {total_runtime.items()}\n",
        "train time: {t_train_time.items()}\n",
        "dataloading time: {t_data_time.items()}\n",
        "\n",
        "loss/epoch: {t_epoch_losses.items()}\n",
        "\n",
        "\"\"\")"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "ZosfgxzXUKwe",
        "outputId": "166963cc-ce65-4f0a-94ac-64749f827b60"
      },
      "execution_count": 11,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Epoch 0, Iteration 0, Loss:  12.502869606018066\n",
            "Epoch 0, Loss -- 3370.356109796092\n",
            "Epoch 1, Iteration 0, Loss:  0.012146401219069958\n",
            "Epoch 1, Loss -- 3118.979062772356\n",
            "\n",
            "\n",
            "total run time: dict_items([(0, 1811.076236846), (1, 1810.0278724829996)])\n",
            "train time: dict_items([(0, 1797.903542860025), (1, 1796.727398568015)])\n",
            "dataloading time: dict_items([(0, 11.912824163006917), (1, 12.030576425999243)])\n",
            "\n",
            "loss/epoch: dict_items([(0, 3370.356109796092), (1, 3118.979062772356)])\n",
            "\n",
            "\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "# Convert fake quantize to actual quantize operations\n",
        "model = qat_quantizer.convert(model)"
      ],
      "metadata": {
        "id": "XgHfGtJIQ9Ha"
      },
      "execution_count": 12,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "#double check that we see some expected memory savings\n",
        "torch.save(model.state_dict(), \"temp.p\")\n",
        "print('Size (MB):', os.path.getsize(\"temp.p\")/1e6)\n",
        "os.remove('temp.p')\n",
        "\n",
        "#double check that generation works\n",
        "input = tokenizer(\"what is 5*50+6*2\", return_tensors='pt')['input_ids'].to('cuda')\n",
        "tokenizer.decode(model.generate(input, max_new_tokens=200)[0]) #quick test check\n",
        "\n",
        "#save the model\n",
        "model.save_pretrained(\"./qat_int8\", safe_serialization=False)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "FAk-fXWZV5vB",
        "outputId": "46b5640e-4923-429c-a8fd-3c61696853f8"
      },
      "execution_count": 13,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Size (MB): 4889.921905\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "### RELOAD AND CHECK AGAIN ###\n",
        "\n",
        "#double check that the model can be loaded in\n",
        "model = AutoModelForCausalLM.from_pretrained(\"./qat_int8\", device_map=\"cuda\")\n",
        "model = torch.compile(model, mode=\"max-autotune\")\n",
        "\n",
        "#and fits the expected memory requirements\n",
        "torch.save(model.state_dict(), \"temp.p\")\n",
        "print('Size (MB):', os.path.getsize(\"temp.p\")/1e6)\n",
        "os.remove('temp.p')\n",
        "\n",
        "#double check generatiom works\n",
        "input = tokenizer(\"hello\", return_tensors='pt')['input_ids'].to('cuda')\n",
        "tokenizer.decode(model.generate(input)[0]) #quick test check"
      ],
      "metadata": {
        "id": "VlLXkSzfcSHu",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 145
        },
        "outputId": "cc26ce93-03ba-4fc8-d594-fbb7145e131b"
      },
      "execution_count": 14,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Size (MB): 4483.612316\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "/usr/local/lib/python3.10/dist-packages/transformers/generation/utils.py:1375: UserWarning: Using the model-agnostic default `max_length` (=20) to control the generation length. We recommend setting `max_new_tokens` to control the maximum length of the generation.\n",
            "  warnings.warn(\n"
          ]
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "'<|begin_of_text|>hello! the{\\\\<|eot_id|>'"
            ],
            "application/vnd.google.colaboratory.intrinsic+json": {
              "type": "string"
            }
          },
          "metadata": {},
          "execution_count": 14
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "7dHnFVtiYYCQ"
      },
      "source": [
        "### PTQ (this attempt didn't work. see above for what works)"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "!cat /proc/cpuinfo"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "16RW2xvcskOk",
        "outputId": "fedeb920-8cd5-42c9-cd71-3d393bdc757c"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "processor\t: 0\n",
            "vendor_id\t: GenuineIntel\n",
            "cpu family\t: 6\n",
            "model\t\t: 85\n",
            "model name\t: Intel(R) Xeon(R) CPU @ 2.20GHz\n",
            "stepping\t: 7\n",
            "microcode\t: 0xffffffff\n",
            "cpu MHz\t\t: 2200.144\n",
            "cache size\t: 39424 KB\n",
            "physical id\t: 0\n",
            "siblings\t: 12\n",
            "core id\t\t: 0\n",
            "cpu cores\t: 6\n",
            "apicid\t\t: 0\n",
            "initial apicid\t: 0\n",
            "fpu\t\t: yes\n",
            "fpu_exception\t: yes\n",
            "cpuid level\t: 13\n",
            "wp\t\t: yes\n",
            "flags\t\t: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single ssbd ibrs ibpb stibp ibrs_enhanced fsgsbase tsc_adjust bmi1 hle avx2 smep bmi2 erms invpcid rtm mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves arat avx512_vnni md_clear arch_capabilities\n",
            "bugs\t\t: spectre_v1 spectre_v2 spec_store_bypass swapgs taa mmio_stale_data retbleed eibrs_pbrsb bhi\n",
            "bogomips\t: 4400.28\n",
            "clflush size\t: 64\n",
            "cache_alignment\t: 64\n",
            "address sizes\t: 46 bits physical, 48 bits virtual\n",
            "power management:\n",
            "\n",
            "processor\t: 1\n",
            "vendor_id\t: GenuineIntel\n",
            "cpu family\t: 6\n",
            "model\t\t: 85\n",
            "model name\t: Intel(R) Xeon(R) CPU @ 2.20GHz\n",
            "stepping\t: 7\n",
            "microcode\t: 0xffffffff\n",
            "cpu MHz\t\t: 2200.144\n",
            "cache size\t: 39424 KB\n",
            "physical id\t: 0\n",
            "siblings\t: 12\n",
            "core id\t\t: 1\n",
            "cpu cores\t: 6\n",
            "apicid\t\t: 2\n",
            "initial apicid\t: 2\n",
            "fpu\t\t: yes\n",
            "fpu_exception\t: yes\n",
            "cpuid level\t: 13\n",
            "wp\t\t: yes\n",
            "flags\t\t: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single ssbd ibrs ibpb stibp ibrs_enhanced fsgsbase tsc_adjust bmi1 hle avx2 smep bmi2 erms invpcid rtm mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves arat avx512_vnni md_clear arch_capabilities\n",
            "bugs\t\t: spectre_v1 spectre_v2 spec_store_bypass swapgs taa mmio_stale_data retbleed eibrs_pbrsb bhi\n",
            "bogomips\t: 4400.28\n",
            "clflush size\t: 64\n",
            "cache_alignment\t: 64\n",
            "address sizes\t: 46 bits physical, 48 bits virtual\n",
            "power management:\n",
            "\n",
            "processor\t: 2\n",
            "vendor_id\t: GenuineIntel\n",
            "cpu family\t: 6\n",
            "model\t\t: 85\n",
            "model name\t: Intel(R) Xeon(R) CPU @ 2.20GHz\n",
            "stepping\t: 7\n",
            "microcode\t: 0xffffffff\n",
            "cpu MHz\t\t: 2200.144\n",
            "cache size\t: 39424 KB\n",
            "physical id\t: 0\n",
            "siblings\t: 12\n",
            "core id\t\t: 2\n",
            "cpu cores\t: 6\n",
            "apicid\t\t: 4\n",
            "initial apicid\t: 4\n",
            "fpu\t\t: yes\n",
            "fpu_exception\t: yes\n",
            "cpuid level\t: 13\n",
            "wp\t\t: yes\n",
            "flags\t\t: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single ssbd ibrs ibpb stibp ibrs_enhanced fsgsbase tsc_adjust bmi1 hle avx2 smep bmi2 erms invpcid rtm mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves arat avx512_vnni md_clear arch_capabilities\n",
            "bugs\t\t: spectre_v1 spectre_v2 spec_store_bypass swapgs taa mmio_stale_data retbleed eibrs_pbrsb bhi\n",
            "bogomips\t: 4400.28\n",
            "clflush size\t: 64\n",
            "cache_alignment\t: 64\n",
            "address sizes\t: 46 bits physical, 48 bits virtual\n",
            "power management:\n",
            "\n",
            "processor\t: 3\n",
            "vendor_id\t: GenuineIntel\n",
            "cpu family\t: 6\n",
            "model\t\t: 85\n",
            "model name\t: Intel(R) Xeon(R) CPU @ 2.20GHz\n",
            "stepping\t: 7\n",
            "microcode\t: 0xffffffff\n",
            "cpu MHz\t\t: 2200.144\n",
            "cache size\t: 39424 KB\n",
            "physical id\t: 0\n",
            "siblings\t: 12\n",
            "core id\t\t: 3\n",
            "cpu cores\t: 6\n",
            "apicid\t\t: 6\n",
            "initial apicid\t: 6\n",
            "fpu\t\t: yes\n",
            "fpu_exception\t: yes\n",
            "cpuid level\t: 13\n",
            "wp\t\t: yes\n",
            "flags\t\t: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single ssbd ibrs ibpb stibp ibrs_enhanced fsgsbase tsc_adjust bmi1 hle avx2 smep bmi2 erms invpcid rtm mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves arat avx512_vnni md_clear arch_capabilities\n",
            "bugs\t\t: spectre_v1 spectre_v2 spec_store_bypass swapgs taa mmio_stale_data retbleed eibrs_pbrsb bhi\n",
            "bogomips\t: 4400.28\n",
            "clflush size\t: 64\n",
            "cache_alignment\t: 64\n",
            "address sizes\t: 46 bits physical, 48 bits virtual\n",
            "power management:\n",
            "\n",
            "processor\t: 4\n",
            "vendor_id\t: GenuineIntel\n",
            "cpu family\t: 6\n",
            "model\t\t: 85\n",
            "model name\t: Intel(R) Xeon(R) CPU @ 2.20GHz\n",
            "stepping\t: 7\n",
            "microcode\t: 0xffffffff\n",
            "cpu MHz\t\t: 2200.144\n",
            "cache size\t: 39424 KB\n",
            "physical id\t: 0\n",
            "siblings\t: 12\n",
            "core id\t\t: 4\n",
            "cpu cores\t: 6\n",
            "apicid\t\t: 8\n",
            "initial apicid\t: 8\n",
            "fpu\t\t: yes\n",
            "fpu_exception\t: yes\n",
            "cpuid level\t: 13\n",
            "wp\t\t: yes\n",
            "flags\t\t: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single ssbd ibrs ibpb stibp ibrs_enhanced fsgsbase tsc_adjust bmi1 hle avx2 smep bmi2 erms invpcid rtm mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves arat avx512_vnni md_clear arch_capabilities\n",
            "bugs\t\t: spectre_v1 spectre_v2 spec_store_bypass swapgs taa mmio_stale_data retbleed eibrs_pbrsb bhi\n",
            "bogomips\t: 4400.28\n",
            "clflush size\t: 64\n",
            "cache_alignment\t: 64\n",
            "address sizes\t: 46 bits physical, 48 bits virtual\n",
            "power management:\n",
            "\n",
            "processor\t: 5\n",
            "vendor_id\t: GenuineIntel\n",
            "cpu family\t: 6\n",
            "model\t\t: 85\n",
            "model name\t: Intel(R) Xeon(R) CPU @ 2.20GHz\n",
            "stepping\t: 7\n",
            "microcode\t: 0xffffffff\n",
            "cpu MHz\t\t: 2200.144\n",
            "cache size\t: 39424 KB\n",
            "physical id\t: 0\n",
            "siblings\t: 12\n",
            "core id\t\t: 5\n",
            "cpu cores\t: 6\n",
            "apicid\t\t: 10\n",
            "initial apicid\t: 10\n",
            "fpu\t\t: yes\n",
            "fpu_exception\t: yes\n",
            "cpuid level\t: 13\n",
            "wp\t\t: yes\n",
            "flags\t\t: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single ssbd ibrs ibpb stibp ibrs_enhanced fsgsbase tsc_adjust bmi1 hle avx2 smep bmi2 erms invpcid rtm mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves arat avx512_vnni md_clear arch_capabilities\n",
            "bugs\t\t: spectre_v1 spectre_v2 spec_store_bypass swapgs taa mmio_stale_data retbleed eibrs_pbrsb bhi\n",
            "bogomips\t: 4400.28\n",
            "clflush size\t: 64\n",
            "cache_alignment\t: 64\n",
            "address sizes\t: 46 bits physical, 48 bits virtual\n",
            "power management:\n",
            "\n",
            "processor\t: 6\n",
            "vendor_id\t: GenuineIntel\n",
            "cpu family\t: 6\n",
            "model\t\t: 85\n",
            "model name\t: Intel(R) Xeon(R) CPU @ 2.20GHz\n",
            "stepping\t: 7\n",
            "microcode\t: 0xffffffff\n",
            "cpu MHz\t\t: 2200.144\n",
            "cache size\t: 39424 KB\n",
            "physical id\t: 0\n",
            "siblings\t: 12\n",
            "core id\t\t: 0\n",
            "cpu cores\t: 6\n",
            "apicid\t\t: 1\n",
            "initial apicid\t: 1\n",
            "fpu\t\t: yes\n",
            "fpu_exception\t: yes\n",
            "cpuid level\t: 13\n",
            "wp\t\t: yes\n",
            "flags\t\t: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single ssbd ibrs ibpb stibp ibrs_enhanced fsgsbase tsc_adjust bmi1 hle avx2 smep bmi2 erms invpcid rtm mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves arat avx512_vnni md_clear arch_capabilities\n",
            "bugs\t\t: spectre_v1 spectre_v2 spec_store_bypass swapgs taa mmio_stale_data retbleed eibrs_pbrsb bhi\n",
            "bogomips\t: 4400.28\n",
            "clflush size\t: 64\n",
            "cache_alignment\t: 64\n",
            "address sizes\t: 46 bits physical, 48 bits virtual\n",
            "power management:\n",
            "\n",
            "processor\t: 7\n",
            "vendor_id\t: GenuineIntel\n",
            "cpu family\t: 6\n",
            "model\t\t: 85\n",
            "model name\t: Intel(R) Xeon(R) CPU @ 2.20GHz\n",
            "stepping\t: 7\n",
            "microcode\t: 0xffffffff\n",
            "cpu MHz\t\t: 2200.144\n",
            "cache size\t: 39424 KB\n",
            "physical id\t: 0\n",
            "siblings\t: 12\n",
            "core id\t\t: 1\n",
            "cpu cores\t: 6\n",
            "apicid\t\t: 3\n",
            "initial apicid\t: 3\n",
            "fpu\t\t: yes\n",
            "fpu_exception\t: yes\n",
            "cpuid level\t: 13\n",
            "wp\t\t: yes\n",
            "flags\t\t: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single ssbd ibrs ibpb stibp ibrs_enhanced fsgsbase tsc_adjust bmi1 hle avx2 smep bmi2 erms invpcid rtm mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves arat avx512_vnni md_clear arch_capabilities\n",
            "bugs\t\t: spectre_v1 spectre_v2 spec_store_bypass swapgs taa mmio_stale_data retbleed eibrs_pbrsb bhi\n",
            "bogomips\t: 4400.28\n",
            "clflush size\t: 64\n",
            "cache_alignment\t: 64\n",
            "address sizes\t: 46 bits physical, 48 bits virtual\n",
            "power management:\n",
            "\n",
            "processor\t: 8\n",
            "vendor_id\t: GenuineIntel\n",
            "cpu family\t: 6\n",
            "model\t\t: 85\n",
            "model name\t: Intel(R) Xeon(R) CPU @ 2.20GHz\n",
            "stepping\t: 7\n",
            "microcode\t: 0xffffffff\n",
            "cpu MHz\t\t: 2200.144\n",
            "cache size\t: 39424 KB\n",
            "physical id\t: 0\n",
            "siblings\t: 12\n",
            "core id\t\t: 2\n",
            "cpu cores\t: 6\n",
            "apicid\t\t: 5\n",
            "initial apicid\t: 5\n",
            "fpu\t\t: yes\n",
            "fpu_exception\t: yes\n",
            "cpuid level\t: 13\n",
            "wp\t\t: yes\n",
            "flags\t\t: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single ssbd ibrs ibpb stibp ibrs_enhanced fsgsbase tsc_adjust bmi1 hle avx2 smep bmi2 erms invpcid rtm mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves arat avx512_vnni md_clear arch_capabilities\n",
            "bugs\t\t: spectre_v1 spectre_v2 spec_store_bypass swapgs taa mmio_stale_data retbleed eibrs_pbrsb bhi\n",
            "bogomips\t: 4400.28\n",
            "clflush size\t: 64\n",
            "cache_alignment\t: 64\n",
            "address sizes\t: 46 bits physical, 48 bits virtual\n",
            "power management:\n",
            "\n",
            "processor\t: 9\n",
            "vendor_id\t: GenuineIntel\n",
            "cpu family\t: 6\n",
            "model\t\t: 85\n",
            "model name\t: Intel(R) Xeon(R) CPU @ 2.20GHz\n",
            "stepping\t: 7\n",
            "microcode\t: 0xffffffff\n",
            "cpu MHz\t\t: 2200.144\n",
            "cache size\t: 39424 KB\n",
            "physical id\t: 0\n",
            "siblings\t: 12\n",
            "core id\t\t: 3\n",
            "cpu cores\t: 6\n",
            "apicid\t\t: 7\n",
            "initial apicid\t: 7\n",
            "fpu\t\t: yes\n",
            "fpu_exception\t: yes\n",
            "cpuid level\t: 13\n",
            "wp\t\t: yes\n",
            "flags\t\t: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single ssbd ibrs ibpb stibp ibrs_enhanced fsgsbase tsc_adjust bmi1 hle avx2 smep bmi2 erms invpcid rtm mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves arat avx512_vnni md_clear arch_capabilities\n",
            "bugs\t\t: spectre_v1 spectre_v2 spec_store_bypass swapgs taa mmio_stale_data retbleed eibrs_pbrsb bhi\n",
            "bogomips\t: 4400.28\n",
            "clflush size\t: 64\n",
            "cache_alignment\t: 64\n",
            "address sizes\t: 46 bits physical, 48 bits virtual\n",
            "power management:\n",
            "\n",
            "processor\t: 10\n",
            "vendor_id\t: GenuineIntel\n",
            "cpu family\t: 6\n",
            "model\t\t: 85\n",
            "model name\t: Intel(R) Xeon(R) CPU @ 2.20GHz\n",
            "stepping\t: 7\n",
            "microcode\t: 0xffffffff\n",
            "cpu MHz\t\t: 2200.144\n",
            "cache size\t: 39424 KB\n",
            "physical id\t: 0\n",
            "siblings\t: 12\n",
            "core id\t\t: 4\n",
            "cpu cores\t: 6\n",
            "apicid\t\t: 9\n",
            "initial apicid\t: 9\n",
            "fpu\t\t: yes\n",
            "fpu_exception\t: yes\n",
            "cpuid level\t: 13\n",
            "wp\t\t: yes\n",
            "flags\t\t: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single ssbd ibrs ibpb stibp ibrs_enhanced fsgsbase tsc_adjust bmi1 hle avx2 smep bmi2 erms invpcid rtm mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves arat avx512_vnni md_clear arch_capabilities\n",
            "bugs\t\t: spectre_v1 spectre_v2 spec_store_bypass swapgs taa mmio_stale_data retbleed eibrs_pbrsb bhi\n",
            "bogomips\t: 4400.28\n",
            "clflush size\t: 64\n",
            "cache_alignment\t: 64\n",
            "address sizes\t: 46 bits physical, 48 bits virtual\n",
            "power management:\n",
            "\n",
            "processor\t: 11\n",
            "vendor_id\t: GenuineIntel\n",
            "cpu family\t: 6\n",
            "model\t\t: 85\n",
            "model name\t: Intel(R) Xeon(R) CPU @ 2.20GHz\n",
            "stepping\t: 7\n",
            "microcode\t: 0xffffffff\n",
            "cpu MHz\t\t: 2200.144\n",
            "cache size\t: 39424 KB\n",
            "physical id\t: 0\n",
            "siblings\t: 12\n",
            "core id\t\t: 5\n",
            "cpu cores\t: 6\n",
            "apicid\t\t: 11\n",
            "initial apicid\t: 11\n",
            "fpu\t\t: yes\n",
            "fpu_exception\t: yes\n",
            "cpuid level\t: 13\n",
            "wp\t\t: yes\n",
            "flags\t\t: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single ssbd ibrs ibpb stibp ibrs_enhanced fsgsbase tsc_adjust bmi1 hle avx2 smep bmi2 erms invpcid rtm mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves arat avx512_vnni md_clear arch_capabilities\n",
            "bugs\t\t: spectre_v1 spectre_v2 spec_store_bypass swapgs taa mmio_stale_data retbleed eibrs_pbrsb bhi\n",
            "bogomips\t: 4400.28\n",
            "clflush size\t: 64\n",
            "cache_alignment\t: 64\n",
            "address sizes\t: 46 bits physical, 48 bits virtual\n",
            "power management:\n",
            "\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "h33_E9pg3NrM",
        "outputId": "bb2f24e4-0bdd-42a5-caa1-e38812c0ebe8"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "/usr/local/lib/python3.10/dist-packages/torch/ao/quantization/observer.py:229: UserWarning: Please use quant_min and quant_max to specify the range for observers.                     reduce_range will be deprecated in a future release of PyTorch.\n",
            "  warnings.warn(\n"
          ]
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "LlamaForCausalLM(\n",
              "  (model): LlamaModel(\n",
              "    (embed_tokens): Embedding(\n",
              "      128256, 3072\n",
              "      (activation_post_process): HistogramObserver(min_val=inf, max_val=-inf)\n",
              "    )\n",
              "    (layers): ModuleList(\n",
              "      (0-27): 28 x LlamaDecoderLayer(\n",
              "        (self_attn): LlamaSdpaAttention(\n",
              "          (q_proj): Linear(\n",
              "            in_features=3072, out_features=3072, bias=False\n",
              "            (activation_post_process): HistogramObserver(min_val=inf, max_val=-inf)\n",
              "          )\n",
              "          (k_proj): Linear(\n",
              "            in_features=3072, out_features=1024, bias=False\n",
              "            (activation_post_process): HistogramObserver(min_val=inf, max_val=-inf)\n",
              "          )\n",
              "          (v_proj): Linear(\n",
              "            in_features=3072, out_features=1024, bias=False\n",
              "            (activation_post_process): HistogramObserver(min_val=inf, max_val=-inf)\n",
              "          )\n",
              "          (o_proj): Linear(\n",
              "            in_features=3072, out_features=3072, bias=False\n",
              "            (activation_post_process): HistogramObserver(min_val=inf, max_val=-inf)\n",
              "          )\n",
              "          (rotary_emb): LlamaRotaryEmbedding()\n",
              "        )\n",
              "        (mlp): LlamaMLP(\n",
              "          (gate_proj): Linear(\n",
              "            in_features=3072, out_features=8192, bias=False\n",
              "            (activation_post_process): HistogramObserver(min_val=inf, max_val=-inf)\n",
              "          )\n",
              "          (up_proj): Linear(\n",
              "            in_features=3072, out_features=8192, bias=False\n",
              "            (activation_post_process): HistogramObserver(min_val=inf, max_val=-inf)\n",
              "          )\n",
              "          (down_proj): Linear(\n",
              "            in_features=8192, out_features=3072, bias=False\n",
              "            (activation_post_process): HistogramObserver(min_val=inf, max_val=-inf)\n",
              "          )\n",
              "          (act_fn): SiLU()\n",
              "        )\n",
              "        (input_layernorm): LlamaRMSNorm((3072,), eps=1e-05)\n",
              "        (post_attention_layernorm): LlamaRMSNorm((3072,), eps=1e-05)\n",
              "      )\n",
              "    )\n",
              "    (norm): LlamaRMSNorm((3072,), eps=1e-05)\n",
              "    (rotary_emb): LlamaRotaryEmbedding()\n",
              "  )\n",
              "  (lm_head): Linear(\n",
              "    in_features=3072, out_features=128256, bias=False\n",
              "    (activation_post_process): HistogramObserver(min_val=inf, max_val=-inf)\n",
              "  )\n",
              ")"
            ]
          },
          "metadata": {},
          "execution_count": 11
        }
      ],
      "source": [
        "#prep the model for quant\n",
        "model.qconfig = torch.quantization.get_default_qconfig('x86')\n",
        "torch.quantization.prepare(model, inplace=True)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "FvzSvfQQsd39",
        "outputId": "076dee6f-5d8c-43aa-e272-880146a0d45a"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Tue Dec  3 21:52:38 2024       \n",
            "+---------------------------------------------------------------------------------------+\n",
            "| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |\n",
            "|-----------------------------------------+----------------------+----------------------+\n",
            "| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |\n",
            "| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |\n",
            "|                                         |                      |               MIG M. |\n",
            "|=========================================+======================+======================|\n",
            "|   0  NVIDIA A100-SXM4-40GB          Off | 00000000:00:04.0 Off |                    0 |\n",
            "| N/A   28C    P0              50W / 400W |  40009MiB / 40960MiB |      0%      Default |\n",
            "|                                         |                      |             Disabled |\n",
            "+-----------------------------------------+----------------------+----------------------+\n",
            "                                                                                         \n",
            "+---------------------------------------------------------------------------------------+\n",
            "| Processes:                                                                            |\n",
            "|  GPU   GI   CI        PID   Type   Process name                            GPU Memory |\n",
            "|        ID   ID                                                             Usage      |\n",
            "|=======================================================================================|\n",
            "+---------------------------------------------------------------------------------------+\n"
          ]
        }
      ],
      "source": [
        "!nvidia-smi"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "yHPsurhXY6N0"
      },
      "outputs": [],
      "source": [
        "import gc\n",
        "gc.collect()\n",
        "torch.cuda.empty_cache()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "TuJnfoKp3l2m",
        "outputId": "16bc3c41-4bf1-451d-dcf3-a1ddd1099e52"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Count: 0 ================================================================================== \n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Count: 1000 ================================================================================== \n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
            "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n"
          ]
        }
      ],
      "source": [
        "def calibrate_model(model, ds):\n",
        "  '''\n",
        "  Function to calibrate the model for post training quantization\n",
        "  '''\n",
        "  model.eval()\n",
        "  with torch.no_grad():\n",
        "    for idx, samples in enumerate(ds):\n",
        "      input = samples['input_ids'].to(device)\n",
        "      mask = samples['attention_mask'].to(device)\n",
        "      model.generate(input, attention_mask=mask, max_new_tokens=10) #both model() and model.generate() should work here bc both involve calling the forward pass\n",
        "\n",
        "      if idx % 1000 == 0:\n",
        "        print(f\"Count: {idx} ================================================================================== \")\n",
        "\n",
        "calibrate_model(model, agg_dl_train)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "Kkf0d-9d4P1r"
      },
      "outputs": [],
      "source": [
        "for _, mod in model.named_modules():\n",
        "    if isinstance(mod, torch.nn.Embedding):\n",
        "        mod.qconfig = torch.ao.quantization.float_qparams_weight_only_qconfig\n",
        "quantized_model = torch.quantization.convert(model.to('cpu'), inplace=True)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "Z_sGtWat2WQe",
        "outputId": "edaef183-cd59-4a18-87de-765da9c579b5"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Size (MB): 3623.055082\n"
          ]
        }
      ],
      "source": [
        "torch.save(quantized_model.state_dict(), \"temp.p\")\n",
        "print('Size (MB):', os.path.getsize(\"temp.p\")/1e6)\n",
        "os.remove('temp.p')"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "CEEebhcVXA9L",
        "outputId": "196d3cf4-8ec9-47c6-8084-c96e57fb5bc5"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "<ipython-input-17-6a61cfe0e0f4>:2: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.\n",
            "  quantized_model.load_state_dict(torch.load('quantized_model.pth'))\n",
            "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:413: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly.  To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n",
            "  device=storage.device,\n"
          ]
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "LlamaForCausalLM(\n",
              "  (model): LlamaModel(\n",
              "    (embed_tokens): QuantizedEmbedding(num_embeddings=128256, embedding_dim=3072, dtype=torch.quint8, qscheme=torch.per_channel_affine_float_qparams)\n",
              "    (layers): ModuleList(\n",
              "      (0): LlamaDecoderLayer(\n",
              "        (self_attn): LlamaSdpaAttention(\n",
              "          (q_proj): QuantizedLinear(in_features=3072, out_features=3072, scale=0.30233249068260193, zero_point=60, qscheme=torch.per_channel_affine)\n",
              "          (k_proj): QuantizedLinear(in_features=3072, out_features=1024, scale=0.24663104116916656, zero_point=80, qscheme=torch.per_channel_affine)\n",
              "          (v_proj): QuantizedLinear(in_features=3072, out_features=1024, scale=0.009504689835011959, zero_point=68, qscheme=torch.per_channel_affine)\n",
              "          (o_proj): QuantizedLinear(in_features=3072, out_features=3072, scale=0.006590045522898436, zero_point=65, qscheme=torch.per_channel_affine)\n",
              "          (rotary_emb): LlamaRotaryEmbedding()\n",
              "        )\n",
              "        (mlp): LlamaMLP(\n",
              "          (gate_proj): QuantizedLinear(in_features=3072, out_features=8192, scale=0.0317545011639595, zero_point=86, qscheme=torch.per_channel_affine)\n",
              "          (up_proj): QuantizedLinear(in_features=3072, out_features=8192, scale=0.026954399421811104, zero_point=85, qscheme=torch.per_channel_affine)\n",
              "          (down_proj): QuantizedLinear(in_features=8192, out_features=3072, scale=0.041970327496528625, zero_point=12, qscheme=torch.per_channel_affine)\n",
              "          (act_fn): SiLU()\n",
              "        )\n",
              "        (input_layernorm): LlamaRMSNorm((3072,), eps=1e-05)\n",
              "        (post_attention_layernorm): LlamaRMSNorm((3072,), eps=1e-05)\n",
              "      )\n",
              "      (1): LlamaDecoderLayer(\n",
              "        (self_attn): LlamaSdpaAttention(\n",
              "          (q_proj): QuantizedLinear(in_features=3072, out_features=3072, scale=0.12022610008716583, zero_point=69, qscheme=torch.per_channel_affine)\n",
              "          (k_proj): QuantizedLinear(in_features=3072, out_features=1024, scale=0.14462462067604065, zero_point=71, qscheme=torch.per_channel_affine)\n",
              "          (v_proj): QuantizedLinear(in_features=3072, out_features=1024, scale=0.01963285356760025, zero_point=71, qscheme=torch.per_channel_affine)\n",
              "          (o_proj): QuantizedLinear(in_features=3072, out_features=3072, scale=0.004564878065139055, zero_point=63, qscheme=torch.per_channel_affine)\n",
              "          (rotary_emb): LlamaRotaryEmbedding()\n",
              "        )\n",
              "        (mlp): LlamaMLP(\n",
              "          (gate_proj): QuantizedLinear(in_features=3072, out_features=8192, scale=0.048889096826314926, zero_point=96, qscheme=torch.per_channel_affine)\n",
              "          (up_proj): QuantizedLinear(in_features=3072, out_features=8192, scale=0.027223482728004456, zero_point=80, qscheme=torch.per_channel_affine)\n",
              "          (down_proj): QuantizedLinear(in_features=8192, out_features=3072, scale=0.07703574001789093, zero_point=71, qscheme=torch.per_channel_affine)\n",
              "          (act_fn): SiLU()\n",
              "        )\n",
              "        (input_layernorm): LlamaRMSNorm((3072,), eps=1e-05)\n",
              "        (post_attention_layernorm): LlamaRMSNorm((3072,), eps=1e-05)\n",
              "      )\n",
              "      (2): LlamaDecoderLayer(\n",
              "        (self_attn): LlamaSdpaAttention(\n",
              "          (q_proj): QuantizedLinear(in_features=3072, out_features=3072, scale=0.15539230406284332, zero_point=68, qscheme=torch.per_channel_affine)\n",
              "          (k_proj): QuantizedLinear(in_features=3072, out_features=1024, scale=0.1808716505765915, zero_point=66, qscheme=torch.per_channel_affine)\n",
              "          (v_proj): QuantizedLinear(in_features=3072, out_features=1024, scale=0.026354162022471428, zero_point=59, qscheme=torch.per_channel_affine)\n",
              "          (o_proj): QuantizedLinear(in_features=3072, out_features=3072, scale=0.008104337379336357, zero_point=50, qscheme=torch.per_channel_affine)\n",
              "          (rotary_emb): LlamaRotaryEmbedding()\n",
              "        )\n",
              "        (mlp): LlamaMLP(\n",
              "          (gate_proj): QuantizedLinear(in_features=3072, out_features=8192, scale=0.029910624027252197, zero_point=82, qscheme=torch.per_channel_affine)\n",
              "          (up_proj): QuantizedLinear(in_features=3072, out_features=8192, scale=0.026910312473773956, zero_point=58, qscheme=torch.per_channel_affine)\n",
              "          (down_proj): QuantizedLinear(in_features=8192, out_features=3072, scale=0.007480632979422808, zero_point=70, qscheme=torch.per_channel_affine)\n",
              "          (act_fn): SiLU()\n",
              "        )\n",
              "        (input_layernorm): LlamaRMSNorm((3072,), eps=1e-05)\n",
              "        (post_attention_layernorm): LlamaRMSNorm((3072,), eps=1e-05)\n",
              "      )\n",
              "      (3): LlamaDecoderLayer(\n",
              "        (self_attn): LlamaSdpaAttention(\n",
              "          (q_proj): QuantizedLinear(in_features=3072, out_features=3072, scale=0.16439606249332428, zero_point=66, qscheme=torch.per_channel_affine)\n",
              "          (k_proj): QuantizedLinear(in_features=3072, out_features=1024, scale=0.22867362201213837, zero_point=60, qscheme=torch.per_channel_affine)\n",
              "          (v_proj): QuantizedLinear(in_features=3072, out_features=1024, scale=0.03201160207390785, zero_point=64, qscheme=torch.per_channel_affine)\n",
              "          (o_proj): QuantizedLinear(in_features=3072, out_features=3072, scale=0.007924872450530529, zero_point=36, qscheme=torch.per_channel_affine)\n",
              "          (rotary_emb): LlamaRotaryEmbedding()\n",
              "        )\n",
              "        (mlp): LlamaMLP(\n",
              "          (gate_proj): QuantizedLinear(in_features=3072, out_features=8192, scale=0.036199573427438736, zero_point=83, qscheme=torch.per_channel_affine)\n",
              "          (up_proj): QuantizedLinear(in_features=3072, out_features=8192, scale=0.031108034774661064, zero_point=47, qscheme=torch.per_channel_affine)\n",
              "          (down_proj): QuantizedLinear(in_features=8192, out_features=3072, scale=0.0071415225975215435, zero_point=83, qscheme=torch.per_channel_affine)\n",
              "          (act_fn): SiLU()\n",
              "        )\n",
              "        (input_layernorm): LlamaRMSNorm((3072,), eps=1e-05)\n",
              "        (post_attention_layernorm): LlamaRMSNorm((3072,), eps=1e-05)\n",
              "      )\n",
              "      (4): LlamaDecoderLayer(\n",
              "        (self_attn): LlamaSdpaAttention(\n",
              "          (q_proj): QuantizedLinear(in_features=3072, out_features=3072, scale=0.16357927024364471, zero_point=69, qscheme=torch.per_channel_affine)\n",
              "          (k_proj): QuantizedLinear(in_features=3072, out_features=1024, scale=0.19531211256980896, zero_point=63, qscheme=torch.per_channel_affine)\n",
              "          (v_proj): QuantizedLinear(in_features=3072, out_features=1024, scale=0.02516152523458004, zero_point=64, qscheme=torch.per_channel_affine)\n",
              "          (o_proj): QuantizedLinear(in_features=3072, out_features=3072, scale=0.007947683334350586, zero_point=36, qscheme=torch.per_channel_affine)\n",
              "          (rotary_emb): LlamaRotaryEmbedding()\n",
              "        )\n",
              "        (mlp): LlamaMLP(\n",
              "          (gate_proj): QuantizedLinear(in_features=3072, out_features=8192, scale=0.029193395748734474, zero_point=72, qscheme=torch.per_channel_affine)\n",
              "          (up_proj): QuantizedLinear(in_features=3072, out_features=8192, scale=0.02524014748632908, zero_point=66, qscheme=torch.per_channel_affine)\n",
              "          (down_proj): QuantizedLinear(in_features=8192, out_features=3072, scale=0.009098226204514503, zero_point=88, qscheme=torch.per_channel_affine)\n",
              "          (act_fn): SiLU()\n",
              "        )\n",
              "        (input_layernorm): LlamaRMSNorm((3072,), eps=1e-05)\n",
              "        (post_attention_layernorm): LlamaRMSNorm((3072,), eps=1e-05)\n",
              "      )\n",
              "      (5): LlamaDecoderLayer(\n",
              "        (self_attn): LlamaSdpaAttention(\n",
              "          (q_proj): QuantizedLinear(in_features=3072, out_features=3072, scale=0.17876912653446198, zero_point=61, qscheme=torch.per_channel_affine)\n",
              "          (k_proj): QuantizedLinear(in_features=3072, out_features=1024, scale=0.16659782826900482, zero_point=59, qscheme=torch.per_channel_affine)\n",
              "          (v_proj): QuantizedLinear(in_features=3072, out_features=1024, scale=0.03116017021238804, zero_point=60, qscheme=torch.per_channel_affine)\n",
              "          (o_proj): QuantizedLinear(in_features=3072, out_features=3072, scale=0.010329177603125572, zero_point=32, qscheme=torch.per_channel_affine)\n",
              "          (rotary_emb): LlamaRotaryEmbedding()\n",
              "        )\n",
              "        (mlp): LlamaMLP(\n",
              "          (gate_proj): QuantizedLinear(in_features=3072, out_features=8192, scale=0.03966416418552399, zero_point=76, qscheme=torch.per_channel_affine)\n",
              "          (up_proj): QuantizedLinear(in_features=3072, out_features=8192, scale=0.03746285289525986, zero_point=45, qscheme=torch.per_channel_affine)\n",
              "          (down_proj): QuantizedLinear(in_features=8192, out_features=3072, scale=0.01123795285820961, zero_point=84, qscheme=torch.per_channel_affine)\n",
              "          (act_fn): SiLU()\n",
              "        )\n",
              "        (input_layernorm): LlamaRMSNorm((3072,), eps=1e-05)\n",
              "        (post_attention_layernorm): LlamaRMSNorm((3072,), eps=1e-05)\n",
              "      )\n",
              "      (6): LlamaDecoderLayer(\n",
              "        (self_attn): LlamaSdpaAttention(\n",
              "          (q_proj): QuantizedLinear(in_features=3072, out_features=3072, scale=0.18309517204761505, zero_point=70, qscheme=torch.per_channel_affine)\n",
              "          (k_proj): QuantizedLinear(in_features=3072, out_features=1024, scale=0.19555725157260895, zero_point=59, qscheme=torch.per_channel_affine)\n",
              "          (v_proj): QuantizedLinear(in_features=3072, out_features=1024, scale=0.0295467060059309, zero_point=67, qscheme=torch.per_channel_affine)\n",
              "          (o_proj): QuantizedLinear(in_features=3072, out_features=3072, scale=0.009125002659857273, zero_point=34, qscheme=torch.per_channel_affine)\n",
              "          (rotary_emb): LlamaRotaryEmbedding()\n",
              "        )\n",
              "        (mlp): LlamaMLP(\n",
              "          (gate_proj): QuantizedLinear(in_features=3072, out_features=8192, scale=0.04394380375742912, zero_point=86, qscheme=torch.per_channel_affine)\n",
              "          (up_proj): QuantizedLinear(in_features=3072, out_features=8192, scale=0.043053071945905685, zero_point=47, qscheme=torch.per_channel_affine)\n",
              "          (down_proj): QuantizedLinear(in_features=8192, out_features=3072, scale=0.013798939064145088, zero_point=94, qscheme=torch.per_channel_affine)\n",
              "          (act_fn): SiLU()\n",
              "        )\n",
              "        (input_layernorm): LlamaRMSNorm((3072,), eps=1e-05)\n",
              "        (post_attention_layernorm): LlamaRMSNorm((3072,), eps=1e-05)\n",
              "      )\n",
              "      (7): LlamaDecoderLayer(\n",
              "        (self_attn): LlamaSdpaAttention(\n",
              "          (q_proj): QuantizedLinear(in_features=3072, out_features=3072, scale=0.17466047406196594, zero_point=59, qscheme=torch.per_channel_affine)\n",
              "          (k_proj): QuantizedLinear(in_features=3072, out_features=1024, scale=0.20138557255268097, zero_point=75, qscheme=torch.per_channel_affine)\n",
              "          (v_proj): QuantizedLinear(in_features=3072, out_features=1024, scale=0.029704943299293518, zero_point=66, qscheme=torch.per_channel_affine)\n",
              "          (o_proj): QuantizedLinear(in_features=3072, out_features=3072, scale=0.013110083527863026, zero_point=45, qscheme=torch.per_channel_affine)\n",
              "          (rotary_emb): LlamaRotaryEmbedding()\n",
              "        )\n",
              "        (mlp): LlamaMLP(\n",
              "          (gate_proj): QuantizedLinear(in_features=3072, out_features=8192, scale=0.03614325821399689, zero_point=78, qscheme=torch.per_channel_affine)\n",
              "          (up_proj): QuantizedLinear(in_features=3072, out_features=8192, scale=0.04011939465999603, zero_point=43, qscheme=torch.per_channel_affine)\n",
              "          (down_proj): QuantizedLinear(in_features=8192, out_features=3072, scale=0.016378073021769524, zero_point=69, qscheme=torch.per_channel_affine)\n",
              "          (act_fn): SiLU()\n",
              "        )\n",
              "        (input_layernorm): LlamaRMSNorm((3072,), eps=1e-05)\n",
              "        (post_attention_layernorm): LlamaRMSNorm((3072,), eps=1e-05)\n",
              "      )\n",
              "      (8): LlamaDecoderLayer(\n",
              "        (self_attn): LlamaSdpaAttention(\n",
              "          (q_proj): QuantizedLinear(in_features=3072, out_features=3072, scale=0.15469898283481598, zero_point=67, qscheme=torch.per_channel_affine)\n",
              "          (k_proj): QuantizedLinear(in_features=3072, out_features=1024, scale=0.21351762115955353, zero_point=63, qscheme=torch.per_channel_affine)\n",
              "          (v_proj): QuantizedLinear(in_features=3072, out_features=1024, scale=0.028963912278413773, zero_point=65, qscheme=torch.per_channel_affine)\n",
              "          (o_proj): QuantizedLinear(in_features=3072, out_features=3072, scale=0.015278217382729053, zero_point=72, qscheme=torch.per_channel_affine)\n",
              "          (rotary_emb): LlamaRotaryEmbedding()\n",
              "        )\n",
              "        (mlp): LlamaMLP(\n",
              "          (gate_proj): QuantizedLinear(in_features=3072, out_features=8192, scale=0.036861665546894073, zero_point=76, qscheme=torch.per_channel_affine)\n",
              "          (up_proj): QuantizedLinear(in_features=3072, out_features=8192, scale=0.04025080427527428, zero_point=46, qscheme=torch.per_channel_affine)\n",
              "          (down_proj): QuantizedLinear(in_features=8192, out_features=3072, scale=0.01705673523247242, zero_point=57, qscheme=torch.per_channel_affine)\n",
              "          (act_fn): SiLU()\n",
              "        )\n",
              "        (input_layernorm): LlamaRMSNorm((3072,), eps=1e-05)\n",
              "        (post_attention_layernorm): LlamaRMSNorm((3072,), eps=1e-05)\n",
              "      )\n",
              "      (9): LlamaDecoderLayer(\n",
              "        (self_attn): LlamaSdpaAttention(\n",
              "          (q_proj): QuantizedLinear(in_features=3072, out_features=3072, scale=0.16184231638908386, zero_point=63, qscheme=torch.per_channel_affine)\n",
              "          (k_proj): QuantizedLinear(in_features=3072, out_features=1024, scale=0.2204965054988861, zero_point=80, qscheme=torch.per_channel_affine)\n",
              "          (v_proj): QuantizedLinear(in_features=3072, out_features=1024, scale=0.03539949283003807, zero_point=64, qscheme=torch.per_channel_affine)\n",
              "          (o_proj): QuantizedLinear(in_features=3072, out_features=3072, scale=0.01570466347038746, zero_point=76, qscheme=torch.per_channel_affine)\n",
              "          (rotary_emb): LlamaRotaryEmbedding()\n",
              "        )\n",
              "        (mlp): LlamaMLP(\n",
              "          (gate_proj): QuantizedLinear(in_features=3072, out_features=8192, scale=0.04213990271091461, zero_point=71, qscheme=torch.per_channel_affine)\n",
              "          (up_proj): QuantizedLinear(in_features=3072, out_features=8192, scale=0.04345555230975151, zero_point=81, qscheme=torch.per_channel_affine)\n",
              "          (down_proj): QuantizedLinear(in_features=8192, out_features=3072, scale=0.01832427829504013, zero_point=42, qscheme=torch.per_channel_affine)\n",
              "          (act_fn): SiLU()\n",
              "        )\n",
              "        (input_layernorm): LlamaRMSNorm((3072,), eps=1e-05)\n",
              "        (post_attention_layernorm): LlamaRMSNorm((3072,), eps=1e-05)\n",
              "      )\n",
              "      (10): LlamaDecoderLayer(\n",
              "        (self_attn): LlamaSdpaAttention(\n",
              "          (q_proj): QuantizedLinear(in_features=3072, out_features=3072, scale=0.15380555391311646, zero_point=77, qscheme=torch.per_channel_affine)\n",
              "          (k_proj): QuantizedLinear(in_features=3072, out_features=1024, scale=0.19217796623706818, zero_point=67, qscheme=torch.per_channel_affine)\n",
              "          (v_proj): QuantizedLinear(in_features=3072, out_features=1024, scale=0.03384840860962868, zero_point=64, qscheme=torch.per_channel_affine)\n",
              "          (o_proj): QuantizedLinear(in_features=3072, out_features=3072, scale=0.012145768851041794, zero_point=96, qscheme=torch.per_channel_affine)\n",
              "          (rotary_emb): LlamaRotaryEmbedding()\n",
              "        )\n",
              "        (mlp): LlamaMLP(\n",
              "          (gate_proj): QuantizedLinear(in_features=3072, out_features=8192, scale=0.048197027295827866, zero_point=82, qscheme=torch.per_channel_affine)\n",
              "          (up_proj): QuantizedLinear(in_features=3072, out_features=8192, scale=0.04764566943049431, zero_point=88, qscheme=torch.per_channel_affine)\n",
              "          (down_proj): QuantizedLinear(in_features=8192, out_features=3072, scale=0.013675554655492306, zero_point=45, qscheme=torch.per_channel_affine)\n",
              "          (act_fn): SiLU()\n",
              "        )\n",
              "        (input_layernorm): LlamaRMSNorm((3072,), eps=1e-05)\n",
              "        (post_attention_layernorm): LlamaRMSNorm((3072,), eps=1e-05)\n",
              "      )\n",
              "      (11): LlamaDecoderLayer(\n",
              "        (self_attn): LlamaSdpaAttention(\n",
              "          (q_proj): QuantizedLinear(in_features=3072, out_features=3072, scale=0.18324363231658936, zero_point=60, qscheme=torch.per_channel_affine)\n",
              "          (k_proj): QuantizedLinear(in_features=3072, out_features=1024, scale=0.22383351624011993, zero_point=69, qscheme=torch.per_channel_affine)\n",
              "          (v_proj): QuantizedLinear(in_features=3072, out_features=1024, scale=0.0310151819139719, zero_point=64, qscheme=torch.per_channel_affine)\n",
              "          (o_proj): QuantizedLinear(in_features=3072, out_features=3072, scale=0.012005197815597057, zero_point=59, qscheme=torch.per_channel_affine)\n",
              "          (rotary_emb): LlamaRotaryEmbedding()\n",
              "        )\n",
              "        (mlp): LlamaMLP(\n",
              "          (gate_proj): QuantizedLinear(in_features=3072, out_features=8192, scale=0.0419195219874382, zero_point=73, qscheme=torch.per_channel_affine)\n",
              "          (up_proj): QuantizedLinear(in_features=3072, out_features=8192, scale=0.05880364403128624, zero_point=63, qscheme=torch.per_channel_affine)\n",
              "          (down_proj): QuantizedLinear(in_features=8192, out_features=3072, scale=0.02455121837556362, zero_point=20, qscheme=torch.per_channel_affine)\n",
              "          (act_fn): SiLU()\n",
              "        )\n",
              "        (input_layernorm): LlamaRMSNorm((3072,), eps=1e-05)\n",
              "        (post_attention_layernorm): LlamaRMSNorm((3072,), eps=1e-05)\n",
              "      )\n",
              "      (12): LlamaDecoderLayer(\n",
              "        (self_attn): LlamaSdpaAttention(\n",
              "          (q_proj): QuantizedLinear(in_features=3072, out_features=3072, scale=0.14416882395744324, zero_point=58, qscheme=torch.per_channel_affine)\n",
              "          (k_proj): QuantizedLinear(in_features=3072, out_features=1024, scale=0.18097136914730072, zero_point=63, qscheme=torch.per_channel_affine)\n",
              "          (v_proj): QuantizedLinear(in_features=3072, out_features=1024, scale=0.030468912795186043, zero_point=60, qscheme=torch.per_channel_affine)\n",
              "          (o_proj): QuantizedLinear(in_features=3072, out_features=3072, scale=0.023037143051624298, zero_point=96, qscheme=torch.per_channel_affine)\n",
              "          (rotary_emb): LlamaRotaryEmbedding()\n",
              "        )\n",
              "        (mlp): LlamaMLP(\n",
              "          (gate_proj): QuantizedLinear(in_features=3072, out_features=8192, scale=0.04567498341202736, zero_point=81, qscheme=torch.per_channel_affine)\n",
              "          (up_proj): QuantizedLinear(in_features=3072, out_features=8192, scale=0.0641784742474556, zero_point=63, qscheme=torch.per_channel_affine)\n",
              "          (down_proj): QuantizedLinear(in_features=8192, out_features=3072, scale=0.016651522368192673, zero_point=44, qscheme=torch.per_channel_affine)\n",
              "          (act_fn): SiLU()\n",
              "        )\n",
              "        (input_layernorm): LlamaRMSNorm((3072,), eps=1e-05)\n",
              "        (post_attention_layernorm): LlamaRMSNorm((3072,), eps=1e-05)\n",
              "      )\n",
              "      (13): LlamaDecoderLayer(\n",
              "        (self_attn): LlamaSdpaAttention(\n",
              "          (q_proj): QuantizedLinear(in_features=3072, out_features=3072, scale=0.1291762888431549, zero_point=52, qscheme=torch.per_channel_affine)\n",
              "          (k_proj): QuantizedLinear(in_features=3072, out_features=1024, scale=0.22279565036296844, zero_point=56, qscheme=torch.per_channel_affine)\n",
              "          (v_proj): QuantizedLinear(in_features=3072, out_features=1024, scale=0.03203562647104263, zero_point=58, qscheme=torch.per_channel_affine)\n",
              "          (o_proj): QuantizedLinear(in_features=3072, out_features=3072, scale=0.011880146339535713, zero_point=91, qscheme=torch.per_channel_affine)\n",
              "          (rotary_emb): LlamaRotaryEmbedding()\n",
              "        )\n",
              "        (mlp): LlamaMLP(\n",
              "          (gate_proj): QuantizedLinear(in_features=3072, out_features=8192, scale=0.060838181525468826, zero_point=76, qscheme=torch.per_channel_affine)\n",
              "          (up_proj): QuantizedLinear(in_features=3072, out_features=8192, scale=0.035114649683237076, zero_point=64, qscheme=torch.per_channel_affine)\n",
              "          (down_proj): QuantizedLinear(in_features=8192, out_features=3072, scale=0.01639888621866703, zero_point=48, qscheme=torch.per_channel_affine)\n",
              "          (act_fn): SiLU()\n",
              "        )\n",
              "        (input_layernorm): LlamaRMSNorm((3072,), eps=1e-05)\n",
              "        (post_attention_layernorm): LlamaRMSNorm((3072,), eps=1e-05)\n",
              "      )\n",
              "      (14): LlamaDecoderLayer(\n",
              "        (self_attn): LlamaSdpaAttention(\n",
              "          (q_proj): QuantizedLinear(in_features=3072, out_features=3072, scale=0.17186500132083893, zero_point=57, qscheme=torch.per_channel_affine)\n",
              "          (k_proj): QuantizedLinear(in_features=3072, out_features=1024, scale=0.22464025020599365, zero_point=58, qscheme=torch.per_channel_affine)\n",
              "          (v_proj): QuantizedLinear(in_features=3072, out_features=1024, scale=0.029095763340592384, zero_point=62, qscheme=torch.per_channel_affine)\n",
              "          (o_proj): QuantizedLinear(in_features=3072, out_features=3072, scale=0.01572681963443756, zero_point=55, qscheme=torch.per_channel_affine)\n",
              "          (rotary_emb): LlamaRotaryEmbedding()\n",
              "        )\n",
              "        (mlp): LlamaMLP(\n",
              "          (gate_proj): QuantizedLinear(in_features=3072, out_features=8192, scale=0.06909291446208954, zero_point=70, qscheme=torch.per_channel_affine)\n",
              "          (up_proj): QuantizedLinear(in_features=3072, out_features=8192, scale=0.03622816875576973, zero_point=65, qscheme=torch.per_channel_affine)\n",
              "          (down_proj): QuantizedLinear(in_features=8192, out_features=3072, scale=0.025854559615254402, zero_point=32, qscheme=torch.per_channel_affine)\n",
              "          (act_fn): SiLU()\n",
              "        )\n",
              "        (input_layernorm): LlamaRMSNorm((3072,), eps=1e-05)\n",
              "        (post_attention_layernorm): LlamaRMSNorm((3072,), eps=1e-05)\n",
              "      )\n",
              "      (15): LlamaDecoderLayer(\n",
              "        (self_attn): LlamaSdpaAttention(\n",
              "          (q_proj): QuantizedLinear(in_features=3072, out_features=3072, scale=0.16770873963832855, zero_point=61, qscheme=torch.per_channel_affine)\n",
              "          (k_proj): QuantizedLinear(in_features=3072, out_features=1024, scale=0.18545114994049072, zero_point=58, qscheme=torch.per_channel_affine)\n",
              "          (v_proj): QuantizedLinear(in_features=3072, out_features=1024, scale=0.028815651312470436, zero_point=64, qscheme=torch.per_channel_affine)\n",
              "          (o_proj): QuantizedLinear(in_features=3072, out_features=3072, scale=0.014174523763358593, zero_point=90, qscheme=torch.per_channel_affine)\n",
              "          (rotary_emb): LlamaRotaryEmbedding()\n",
              "        )\n",
              "        (mlp): LlamaMLP(\n",
              "          (gate_proj): QuantizedLinear(in_features=3072, out_features=8192, scale=0.06377580761909485, zero_point=69, qscheme=torch.per_channel_affine)\n",
              "          (up_proj): QuantizedLinear(in_features=3072, out_features=8192, scale=0.0459066666662693, zero_point=51, qscheme=torch.per_channel_affine)\n",
              "          (down_proj): QuantizedLinear(in_features=8192, out_features=3072, scale=0.02492685243487358, zero_point=65, qscheme=torch.per_channel_affine)\n",
              "          (act_fn): SiLU()\n",
              "        )\n",
              "        (input_layernorm): LlamaRMSNorm((3072,), eps=1e-05)\n",
              "        (post_attention_layernorm): LlamaRMSNorm((3072,), eps=1e-05)\n",
              "      )\n",
              "      (16): LlamaDecoderLayer(\n",
              "        (self_attn): LlamaSdpaAttention(\n",
              "          (q_proj): QuantizedLinear(in_features=3072, out_features=3072, scale=0.14963984489440918, zero_point=60, qscheme=torch.per_channel_affine)\n",
              "          (k_proj): QuantizedLinear(in_features=3072, out_features=1024, scale=0.1939123123884201, zero_point=60, qscheme=torch.per_channel_affine)\n",
              "          (v_proj): QuantizedLinear(in_features=3072, out_features=1024, scale=0.04826967045664787, zero_point=80, qscheme=torch.per_channel_affine)\n",
              "          (o_proj): QuantizedLinear(in_features=3072, out_features=3072, scale=0.013360698707401752, zero_point=76, qscheme=torch.per_channel_affine)\n",
              "          (rotary_emb): LlamaRotaryEmbedding()\n",
              "        )\n",
              "        (mlp): LlamaMLP(\n",
              "          (gate_proj): QuantizedLinear(in_features=3072, out_features=8192, scale=0.06254632025957108, zero_point=72, qscheme=torch.per_channel_affine)\n",
              "          (up_proj): QuantizedLinear(in_features=3072, out_features=8192, scale=0.0482216402888298, zero_point=83, qscheme=torch.per_channel_affine)\n",
              "          (down_proj): QuantizedLinear(in_features=8192, out_features=3072, scale=0.016450032591819763, zero_point=66, qscheme=torch.per_channel_affine)\n",
              "          (act_fn): SiLU()\n",
              "        )\n",
              "        (input_layernorm): LlamaRMSNorm((3072,), eps=1e-05)\n",
              "        (post_attention_layernorm): LlamaRMSNorm((3072,), eps=1e-05)\n",
              "      )\n",
              "      (17): LlamaDecoderLayer(\n",
              "        (self_attn): LlamaSdpaAttention(\n",
              "          (q_proj): QuantizedLinear(in_features=3072, out_features=3072, scale=0.15050272643566132, zero_point=62, qscheme=torch.per_channel_affine)\n",
              "          (k_proj): QuantizedLinear(in_features=3072, out_features=1024, scale=0.19792769849300385, zero_point=72, qscheme=torch.per_channel_affine)\n",
              "          (v_proj): QuantizedLinear(in_features=3072, out_features=1024, scale=0.03669416159391403, zero_point=50, qscheme=torch.per_channel_affine)\n",
              "          (o_proj): QuantizedLinear(in_features=3072, out_features=3072, scale=0.008166614919900894, zero_point=50, qscheme=torch.per_channel_affine)\n",
              "          (rotary_emb): LlamaRotaryEmbedding()\n",
              "        )\n",
              "        (mlp): LlamaMLP(\n",
              "          (gate_proj): QuantizedLinear(in_features=3072, out_features=8192, scale=0.06074392795562744, zero_point=69, qscheme=torch.per_channel_affine)\n",
              "          (up_proj): QuantizedLinear(in_features=3072, out_features=8192, scale=0.03830232098698616, zero_point=57, qscheme=torch.per_channel_affine)\n",
              "          (down_proj): QuantizedLinear(in_features=8192, out_features=3072, scale=0.024337170645594597, zero_point=91, qscheme=torch.per_channel_affine)\n",
              "          (act_fn): SiLU()\n",
              "        )\n",
              "        (input_layernorm): LlamaRMSNorm((3072,), eps=1e-05)\n",
              "        (post_attention_layernorm): LlamaRMSNorm((3072,), eps=1e-05)\n",
              "      )\n",
              "      (18): LlamaDecoderLayer(\n",
              "        (self_attn): LlamaSdpaAttention(\n",
              "          (q_proj): QuantizedLinear(in_features=3072, out_features=3072, scale=0.1639506220817566, zero_point=58, qscheme=torch.per_channel_affine)\n",
              "          (k_proj): QuantizedLinear(in_features=3072, out_features=1024, scale=0.1678740531206131, zero_point=65, qscheme=torch.per_channel_affine)\n",
              "          (v_proj): QuantizedLinear(in_features=3072, out_features=1024, scale=0.03505965694785118, zero_point=65, qscheme=torch.per_channel_affine)\n",
              "          (o_proj): QuantizedLinear(in_features=3072, out_features=3072, scale=0.010155822150409222, zero_point=67, qscheme=torch.per_channel_affine)\n",
              "          (rotary_emb): LlamaRotaryEmbedding()\n",
              "        )\n",
              "        (mlp): LlamaMLP(\n",
              "          (gate_proj): QuantizedLinear(in_features=3072, out_features=8192, scale=0.0588216632604599, zero_point=68, qscheme=torch.per_channel_affine)\n",
              "          (up_proj): QuantizedLinear(in_features=3072, out_features=8192, scale=0.05680190026760101, zero_point=75, qscheme=torch.per_channel_affine)\n",
              "          (down_proj): QuantizedLinear(in_features=8192, out_features=3072, scale=0.013378864154219627, zero_point=83, qscheme=torch.per_channel_affine)\n",
              "          (act_fn): SiLU()\n",
              "        )\n",
              "        (input_layernorm): LlamaRMSNorm((3072,), eps=1e-05)\n",
              "        (post_attention_layernorm): LlamaRMSNorm((3072,), eps=1e-05)\n",
              "      )\n",
              "      (19): LlamaDecoderLayer(\n",
              "        (self_attn): LlamaSdpaAttention(\n",
              "          (q_proj): QuantizedLinear(in_features=3072, out_features=3072, scale=0.2506491541862488, zero_point=65, qscheme=torch.per_channel_affine)\n",
              "          (k_proj): QuantizedLinear(in_features=3072, out_features=1024, scale=0.22369664907455444, zero_point=67, qscheme=torch.per_channel_affine)\n",
              "          (v_proj): QuantizedLinear(in_features=3072, out_features=1024, scale=0.04058235138654709, zero_point=53, qscheme=torch.per_channel_affine)\n",
              "          (o_proj): QuantizedLinear(in_features=3072, out_features=3072, scale=0.013839371502399445, zero_point=65, qscheme=torch.per_channel_affine)\n",
              "          (rotary_emb): LlamaRotaryEmbedding()\n",
              "        )\n",
              "        (mlp): LlamaMLP(\n",
              "          (gate_proj): QuantizedLinear(in_features=3072, out_features=8192, scale=0.06579774618148804, zero_point=66, qscheme=torch.per_channel_affine)\n",
              "          (up_proj): QuantizedLinear(in_features=3072, out_features=8192, scale=0.056200869381427765, zero_point=72, qscheme=torch.per_channel_affine)\n",
              "          (down_proj): QuantizedLinear(in_features=8192, out_features=3072, scale=0.021891005337238312, zero_point=97, qscheme=torch.per_channel_affine)\n",
              "          (act_fn): SiLU()\n",
              "        )\n",
              "        (input_layernorm): LlamaRMSNorm((3072,), eps=1e-05)\n",
              "        (post_attention_layernorm): LlamaRMSNorm((3072,), eps=1e-05)\n",
              "      )\n",
              "      (20): LlamaDecoderLayer(\n",
              "        (self_attn): LlamaSdpaAttention(\n",
              "          (q_proj): QuantizedLinear(in_features=3072, out_features=3072, scale=0.19794079661369324, zero_point=66, qscheme=torch.per_channel_affine)\n",
              "          (k_proj): QuantizedLinear(in_features=3072, out_features=1024, scale=0.20492981374263763, zero_point=66, qscheme=torch.per_channel_affine)\n",
              "          (v_proj): QuantizedLinear(in_features=3072, out_features=1024, scale=0.038418401032686234, zero_point=72, qscheme=torch.per_channel_affine)\n",
              "          (o_proj): QuantizedLinear(in_features=3072, out_features=3072, scale=0.008207863196730614, zero_point=74, qscheme=torch.per_channel_affine)\n",
              "          (rotary_emb): LlamaRotaryEmbedding()\n",
              "        )\n",
              "        (mlp): LlamaMLP(\n",
              "          (gate_proj): QuantizedLinear(in_features=3072, out_features=8192, scale=0.055098261684179306, zero_point=65, qscheme=torch.per_channel_affine)\n",
              "          (up_proj): QuantizedLinear(in_features=3072, out_features=8192, scale=0.06253641843795776, zero_point=81, qscheme=torch.per_channel_affine)\n",
              "          (down_proj): QuantizedLinear(in_features=8192, out_features=3072, scale=0.02295970730483532, zero_point=98, qscheme=torch.per_channel_affine)\n",
              "          (act_fn): SiLU()\n",
              "        )\n",
              "        (input_layernorm): LlamaRMSNorm((3072,), eps=1e-05)\n",
              "        (post_attention_layernorm): LlamaRMSNorm((3072,), eps=1e-05)\n",
              "      )\n",
              "      (21): LlamaDecoderLayer(\n",
              "        (self_attn): LlamaSdpaAttention(\n",
              "          (q_proj): QuantizedLinear(in_features=3072, out_features=3072, scale=0.23890964686870575, zero_point=59, qscheme=torch.per_channel_affine)\n",
              "          (k_proj): QuantizedLinear(in_features=3072, out_features=1024, scale=0.21387821435928345, zero_point=61, qscheme=torch.per_channel_affine)\n",
              "          (v_proj): QuantizedLinear(in_features=3072, out_features=1024, scale=0.04284908249974251, zero_point=62, qscheme=torch.per_channel_affine)\n",
              "          (o_proj): QuantizedLinear(in_features=3072, out_features=3072, scale=0.010317398235201836, zero_point=82, qscheme=torch.per_channel_affine)\n",
              "          (rotary_emb): LlamaRotaryEmbedding()\n",
              "        )\n",
              "        (mlp): LlamaMLP(\n",
              "          (gate_proj): QuantizedLinear(in_features=3072, out_features=8192, scale=0.0649033933877945, zero_point=68, qscheme=torch.per_channel_affine)\n",
              "          (up_proj): QuantizedLinear(in_features=3072, out_features=8192, scale=0.06227144971489906, zero_point=90, qscheme=torch.per_channel_affine)\n",
              "          (down_proj): QuantizedLinear(in_features=8192, out_features=3072, scale=0.014790414832532406, zero_point=79, qscheme=torch.per_channel_affine)\n",
              "          (act_fn): SiLU()\n",
              "        )\n",
              "        (input_layernorm): LlamaRMSNorm((3072,), eps=1e-05)\n",
              "        (post_attention_layernorm): LlamaRMSNorm((3072,), eps=1e-05)\n",
              "      )\n",
              "      (22): LlamaDecoderLayer(\n",
              "        (self_attn): LlamaSdpaAttention(\n",
              "          (q_proj): QuantizedLinear(in_features=3072, out_features=3072, scale=0.29855412244796753, zero_point=64, qscheme=torch.per_channel_affine)\n",
              "          (k_proj): QuantizedLinear(in_features=3072, out_features=1024, scale=0.1918814480304718, zero_point=52, qscheme=torch.per_channel_affine)\n",
              "          (v_proj): QuantizedLinear(in_features=3072, out_features=1024, scale=0.042813755571842194, zero_point=58, qscheme=torch.per_channel_affine)\n",
              "          (o_proj): QuantizedLinear(in_features=3072, out_features=3072, scale=0.01541436742991209, zero_point=89, qscheme=torch.per_channel_affine)\n",
              "          (rotary_emb): LlamaRotaryEmbedding()\n",
              "        )\n",
              "        (mlp): LlamaMLP(\n",
              "          (gate_proj): QuantizedLinear(in_features=3072, out_features=8192, scale=0.07004866003990173, zero_point=70, qscheme=torch.per_channel_affine)\n",
              "          (up_proj): QuantizedLinear(in_features=3072, out_features=8192, scale=0.04653427004814148, zero_point=66, qscheme=torch.per_channel_affine)\n",
              "          (down_proj): QuantizedLinear(in_features=8192, out_features=3072, scale=0.020340057089924812, zero_point=69, qscheme=torch.per_channel_affine)\n",
              "          (act_fn): SiLU()\n",
              "        )\n",
              "        (input_layernorm): LlamaRMSNorm((3072,), eps=1e-05)\n",
              "        (post_attention_layernorm): LlamaRMSNorm((3072,), eps=1e-05)\n",
              "      )\n",
              "      (23): LlamaDecoderLayer(\n",
              "        (self_attn): LlamaSdpaAttention(\n",
              "          (q_proj): QuantizedLinear(in_features=3072, out_features=3072, scale=0.1687212735414505, zero_point=49, qscheme=torch.per_channel_affine)\n",
              "          (k_proj): QuantizedLinear(in_features=3072, out_features=1024, scale=0.21465052664279938, zero_point=62, qscheme=torch.per_channel_affine)\n",
              "          (v_proj): QuantizedLinear(in_features=3072, out_features=1024, scale=0.038973305374383926, zero_point=67, qscheme=torch.per_channel_affine)\n",
              "          (o_proj): QuantizedLinear(in_features=3072, out_features=3072, scale=0.026865024119615555, zero_point=99, qscheme=torch.per_channel_affine)\n",
              "          (rotary_emb): LlamaRotaryEmbedding()\n",
              "        )\n",
              "        (mlp): LlamaMLP(\n",
              "          (gate_proj): QuantizedLinear(in_features=3072, out_features=8192, scale=0.07327080518007278, zero_point=65, qscheme=torch.per_channel_affine)\n",
              "          (up_proj): QuantizedLinear(in_features=3072, out_features=8192, scale=0.05279293656349182, zero_point=62, qscheme=torch.per_channel_affine)\n",
              "          (down_proj): QuantizedLinear(in_features=8192, out_features=3072, scale=0.022862866520881653, zero_point=88, qscheme=torch.per_channel_affine)\n",
              "          (act_fn): SiLU()\n",
              "        )\n",
              "        (input_layernorm): LlamaRMSNorm((3072,), eps=1e-05)\n",
              "        (post_attention_layernorm): LlamaRMSNorm((3072,), eps=1e-05)\n",
              "      )\n",
              "      (24): LlamaDecoderLayer(\n",
              "        (self_attn): LlamaSdpaAttention(\n",
              "          (q_proj): QuantizedLinear(in_features=3072, out_features=3072, scale=0.18166789412498474, zero_point=70, qscheme=torch.per_channel_affine)\n",
              "          (k_proj): QuantizedLinear(in_features=3072, out_features=1024, scale=0.2087077498435974, zero_point=77, qscheme=torch.per_channel_affine)\n",
              "          (v_proj): QuantizedLinear(in_features=3072, out_features=1024, scale=0.04673730209469795, zero_point=67, qscheme=torch.per_channel_affine)\n",
              "          (o_proj): QuantizedLinear(in_features=3072, out_features=3072, scale=0.021907713264226913, zero_point=94, qscheme=torch.per_channel_affine)\n",
              "          (rotary_emb): LlamaRotaryEmbedding()\n",
              "        )\n",
              "        (mlp): LlamaMLP(\n",
              "          (gate_proj): QuantizedLinear(in_features=3072, out_features=8192, scale=0.07771436125040054, zero_point=68, qscheme=torch.per_channel_affine)\n",
              "          (up_proj): QuantizedLinear(in_features=3072, out_features=8192, scale=0.07448659092187881, zero_point=88, qscheme=torch.per_channel_affine)\n",
              "          (down_proj): QuantizedLinear(in_features=8192, out_features=3072, scale=0.02715042419731617, zero_point=68, qscheme=torch.per_channel_affine)\n",
              "          (act_fn): SiLU()\n",
              "        )\n",
              "        (input_layernorm): LlamaRMSNorm((3072,), eps=1e-05)\n",
              "        (post_attention_layernorm): LlamaRMSNorm((3072,), eps=1e-05)\n",
              "      )\n",
              "      (25): LlamaDecoderLayer(\n",
              "        (self_attn): LlamaSdpaAttention(\n",
              "          (q_proj): QuantizedLinear(in_features=3072, out_features=3072, scale=0.1680346578359604, zero_point=63, qscheme=torch.per_channel_affine)\n",
              "          (k_proj): QuantizedLinear(in_features=3072, out_features=1024, scale=0.2666929066181183, zero_point=56, qscheme=torch.per_channel_affine)\n",
              "          (v_proj): QuantizedLinear(in_features=3072, out_features=1024, scale=0.05743082985281944, zero_point=62, qscheme=torch.per_channel_affine)\n",
              "          (o_proj): QuantizedLinear(in_features=3072, out_features=3072, scale=0.03464660048484802, zero_point=82, qscheme=torch.per_channel_affine)\n",
              "          (rotary_emb): LlamaRotaryEmbedding()\n",
              "        )\n",
              "        (mlp): LlamaMLP(\n",
              "          (gate_proj): QuantizedLinear(in_features=3072, out_features=8192, scale=0.08233989775180817, zero_point=67, qscheme=torch.per_channel_affine)\n",
              "          (up_proj): QuantizedLinear(in_features=3072, out_features=8192, scale=0.1039862260222435, zero_point=68, qscheme=torch.per_channel_affine)\n",
              "          (down_proj): QuantizedLinear(in_features=8192, out_features=3072, scale=0.03800978884100914, zero_point=52, qscheme=torch.per_channel_affine)\n",
              "          (act_fn): SiLU()\n",
              "        )\n",
              "        (input_layernorm): LlamaRMSNorm((3072,), eps=1e-05)\n",
              "        (post_attention_layernorm): LlamaRMSNorm((3072,), eps=1e-05)\n",
              "      )\n",
              "      (26): LlamaDecoderLayer(\n",
              "        (self_attn): LlamaSdpaAttention(\n",
              "          (q_proj): QuantizedLinear(in_features=3072, out_features=3072, scale=0.19529137015342712, zero_point=75, qscheme=torch.per_channel_affine)\n",
              "          (k_proj): QuantizedLinear(in_features=3072, out_features=1024, scale=0.18781012296676636, zero_point=58, qscheme=torch.per_channel_affine)\n",
              "          (v_proj): QuantizedLinear(in_features=3072, out_features=1024, scale=0.04905257374048233, zero_point=63, qscheme=torch.per_channel_affine)\n",
              "          (o_proj): QuantizedLinear(in_features=3072, out_features=3072, scale=0.0686446949839592, zero_point=87, qscheme=torch.per_channel_affine)\n",
              "          (rotary_emb): LlamaRotaryEmbedding()\n",
              "        )\n",
              "        (mlp): LlamaMLP(\n",
              "          (gate_proj): QuantizedLinear(in_features=3072, out_features=8192, scale=0.09255537390708923, zero_point=66, qscheme=torch.per_channel_affine)\n",
              "          (up_proj): QuantizedLinear(in_features=3072, out_features=8192, scale=0.11804821342229843, zero_point=54, qscheme=torch.per_channel_affine)\n",
              "          (down_proj): QuantizedLinear(in_features=8192, out_features=3072, scale=0.07662862539291382, zero_point=46, qscheme=torch.per_channel_affine)\n",
              "          (act_fn): SiLU()\n",
              "        )\n",
              "        (input_layernorm): LlamaRMSNorm((3072,), eps=1e-05)\n",
              "        (post_attention_layernorm): LlamaRMSNorm((3072,), eps=1e-05)\n",
              "      )\n",
              "      (27): LlamaDecoderLayer(\n",
              "        (self_attn): LlamaSdpaAttention(\n",
              "          (q_proj): QuantizedLinear(in_features=3072, out_features=3072, scale=0.17824216187000275, zero_point=57, qscheme=torch.per_channel_affine)\n",
              "          (k_proj): QuantizedLinear(in_features=3072, out_features=1024, scale=0.22375498712062836, zero_point=69, qscheme=torch.per_channel_affine)\n",
              "          (v_proj): QuantizedLinear(in_features=3072, out_features=1024, scale=0.06567756831645966, zero_point=80, qscheme=torch.per_channel_affine)\n",
              "          (o_proj): QuantizedLinear(in_features=3072, out_features=3072, scale=0.07441261410713196, zero_point=78, qscheme=torch.per_channel_affine)\n",
              "          (rotary_emb): LlamaRotaryEmbedding()\n",
              "        )\n",
              "        (mlp): LlamaMLP(\n",
              "          (gate_proj): QuantizedLinear(in_features=3072, out_features=8192, scale=0.2229662984609604, zero_point=79, qscheme=torch.per_channel_affine)\n",
              "          (up_proj): QuantizedLinear(in_features=3072, out_features=8192, scale=0.20525285601615906, zero_point=77, qscheme=torch.per_channel_affine)\n",
              "          (down_proj): QuantizedLinear(in_features=8192, out_features=3072, scale=0.2653174102306366, zero_point=64, qscheme=torch.per_channel_affine)\n",
              "          (act_fn): SiLU()\n",
              "        )\n",
              "        (input_layernorm): LlamaRMSNorm((3072,), eps=1e-05)\n",
              "        (post_attention_layernorm): LlamaRMSNorm((3072,), eps=1e-05)\n",
              "      )\n",
              "    )\n",
              "    (norm): LlamaRMSNorm((3072,), eps=1e-05)\n",
              "    (rotary_emb): LlamaRotaryEmbedding()\n",
              "  )\n",
              "  (lm_head): QuantizedLinear(in_features=3072, out_features=128256, scale=0.20514997839927673, zero_point=46, qscheme=torch.per_channel_affine)\n",
              ")"
            ]
          },
          "metadata": {},
          "execution_count": 17
        }
      ],
      "source": [
        "torch.save(quantized_model.state_dict(), 'quantized_model.pth')\n",
        "quantized_model.load_state_dict(torch.load('quantized_model.pth'))\n",
        "quantized_model.eval()"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "ZZgYPRXtYgXT"
      },
      "source": [
        "### QAT (this attempt didn't work. see above for what works)"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "#prep the model for qat\n",
        "model.train()\n",
        "# model.fuse_model()\n",
        "model.qconfig = torch.quantization.get_default_qat_qconfig('x86')\n",
        "\n",
        "torch.quantization.prepare_qat(model, inplace=True)\n"
      ],
      "metadata": {
        "id": "gaaA5g8Xu8p4"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "test_check = next(iter(agg_dl_test))\n",
        "\n",
        "print(\"Length of Labels\")\n",
        "print(len(test_check['label']))\n",
        "print(\"Shape of Input Ids\")\n",
        "print(test_check['input_ids'].shape)\n",
        "print(\"Label Sample\")\n",
        "print(test_check['label'][:5]) #quick check on the shapes (printed in the collate function) and samples of the data to ensure that everything looks as expected"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "XBdAC0UXP_QG",
        "outputId": "5952440b-14db-43a1-cd25-f589f37a475d"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Length of Labels\n",
            "4\n",
            "Shape of Input Ids\n",
            "torch.Size([4, 560])\n",
            "Label Sample\n",
            "tensor([[128000,     17, 128009,  ..., 128009, 128009, 128009],\n",
            "        [128000,     15, 128009,  ..., 128009, 128009, 128009],\n",
            "        [128000,   1687,   1390,  ..., 128009, 128009, 128009],\n",
            "        [128000,     16, 128009,  ..., 128009, 128009, 128009]])\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "import gc\n",
        "gc.collect()\n",
        "torch.cuda.empty_cache()\n",
        "gc.collect()"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "6aCQUWlpqYeh",
        "outputId": "1e02c442-2f25-40df-844f-61d2e2b9d884"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "0"
            ]
          },
          "metadata": {},
          "execution_count": 13
        }
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 1000
        },
        "id": "kIvquEVaj0Xo",
        "outputId": "87fe61f2-fe2e-4903-e685-1fd919b8b564"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Loss:  tensor(13.2147, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0714, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(1.0061, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0326, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.7808, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0481, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0217, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0199, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0166, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0162, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(1.1999, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.9100, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0111, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0086, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0080, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0093, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.4584, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0054, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0071, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.4384, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.4945, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0054, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.3224, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0087, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(1.1345, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(1.7322, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(1.0990, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0052, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0068, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(1.0448, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0043, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0045, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0040, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0038, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0042, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.2504, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.5076, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0039, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0043, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0043, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0042, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0039, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0036, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0036, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0041, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0034, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0037, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0033, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0036, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0031, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0031, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0041, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0036, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0030, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0030, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0032, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0032, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0039, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0033, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.6983, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0033, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0032, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.2168, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0028, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0033, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0034, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0028, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(2.6929, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0036, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.3528, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(2.0817, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0031, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.6512, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0029, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.5162, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0027, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0031, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.8193, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0030, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0030, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(1.4479, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.6705, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.8209, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.5287, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0028, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0028, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0029, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.5286, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0029, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.5654, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0034, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0033, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.2471, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.6843, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0035, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0028, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.4922, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0028, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0028, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0028, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0029, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0027, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0030, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.2265, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.4904, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0029, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0029, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0027, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.3833, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(1.9316, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0031, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0032, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0027, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0034, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0030, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0032, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0028, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0028, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.6323, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0033, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0029, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0029, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.3342, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0028, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0024, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0028, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0028, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0028, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0030, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(1.3168, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0030, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(2.3497, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0029, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0029, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.5300, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0028, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0026, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0026, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0030, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.3778, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0028, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.4501, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0026, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0028, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0031, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0027, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0037, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0029, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0029, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0028, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0029, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0027, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0027, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0027, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0027, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0030, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0029, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(1.9710, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0029, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0027, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.2549, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0026, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.8470, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0025, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.4394, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0028, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0029, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0029, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0027, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0024, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0027, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0031, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.4671, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0028, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0028, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0029, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0028, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0031, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0025, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.3578, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(3.0039, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0031, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0027, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0027, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0025, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0026, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.2796, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0029, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.4679, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0028, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0027, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.2168, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0023, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0027, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0033, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0027, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.5398, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0028, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0028, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0025, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.2205, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0027, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0026, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0030, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.3156, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0028, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0028, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0025, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0030, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.2985, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0031, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(1.0377, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0034, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0027, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0031, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0030, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0028, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0025, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0030, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0032, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0026, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0027, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(1.1428, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0026, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0029, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.4853, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0035, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.4333, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0031, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0025, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(2.0876, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0029, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.9303, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0028, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0025, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0025, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0031, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.3355, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.6698, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.5123, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0035, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0026, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(1.3666, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0025, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.1531, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0029, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0029, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0027, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0026, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0025, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0028, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.6455, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0027, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.3717, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0031, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0029, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0029, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0029, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0029, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0029, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.4499, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0028, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0026, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0030, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0034, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0028, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0032, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0028, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.5482, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0027, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.6042, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0026, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.2146, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0029, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.4130, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.4154, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0028, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0030, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.4552, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0028, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0027, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0028, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0028, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0025, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0030, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0026, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0026, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0024, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0574, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0028, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0027, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.5781, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0029, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0033, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.3872, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(1.3164, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.2759, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0028, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0027, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(1.0965, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0027, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0027, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0029, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.6222, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0031, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0026, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0027, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0027, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0027, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0027, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0025, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.3596, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0030, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.6185, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0024, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(1.1658, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0027, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0028, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0028, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0030, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0028, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0028, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0027, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.3383, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0028, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0027, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0029, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.2538, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0027, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.8636, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.2391, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(1.6424, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0026, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.8784, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0030, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.1795, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0028, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0026, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0026, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.8010, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.3489, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0028, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0027, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0027, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0027, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0028, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0029, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.3051, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.4853, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.2819, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0031, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0027, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0026, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0026, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0028, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0027, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0030, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0026, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0030, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0027, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0025, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(2.3406, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0030, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.2140, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0025, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0027, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0027, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(1.1971, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0026, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0027, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0028, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(1.8413, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0027, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0030, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.8578, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0029, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0026, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.6032, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0028, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.4962, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0027, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0029, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.3109, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0029, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(1.5521, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0027, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0027, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.6528, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0027, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0028, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0028, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.7115, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0026, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.4352, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.2230, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0027, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0027, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0030, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.4247, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0027, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0028, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.1486, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0027, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0026, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0027, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0028, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.3807, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0024, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.8172, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0027, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0029, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0027, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.3869, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0028, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0027, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0030, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0029, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0029, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0028, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0027, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(1.0764, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0027, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0028, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0027, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0028, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0027, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0028, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0028, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0027, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.2572, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.1994, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0025, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0026, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0029, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0028, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.3996, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0029, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0031, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0029, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.2407, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0028, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.1944, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.4327, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0030, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.4748, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0030, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0028, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0031, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0028, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0025, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0028, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.5664, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0029, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0027, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.1854, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0025, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0023, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0029, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0025, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0027, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0030, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0029, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0026, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.9184, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0026, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0030, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0026, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0029, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0028, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0031, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.6132, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0027, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0029, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0027, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0026, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0026, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0027, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0026, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.1770, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0029, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0027, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.2024, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.3619, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0027, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.8336, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0027, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0026, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0025, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0027, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0029, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0025, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.4224, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.9566, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(1.2523, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.8889, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(1.3202, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0026, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0029, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.3114, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0029, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0030, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0027, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0136, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0031, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.6280, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0050, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0025, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(1.2457, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0030, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0031, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0034, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.7693, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(1.1390, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0025, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0032, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0032, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.2270, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.3552, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0037, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.7793, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.7638, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.3265, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0027, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0036, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0034, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0040, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0040, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0031, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0026, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0032, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0033, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.9132, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0070, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0308, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0098, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0054, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0103, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0182, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0252, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.6872, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0549, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(1.0516, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.2799, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0469, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0154, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0383, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.4853, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.5250, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0156, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0277, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0209, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0323, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0165, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0241, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0220, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0321, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0405, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(1.3829, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0393, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.3091, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0196, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0267, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.5773, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0261, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(1.1669, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.2177, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(1.0855, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0345, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0362, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0199, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0226, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.8758, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0267, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0365, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0245, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.1948, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0529, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0303, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.5721, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(2.1420, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0279, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0353, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0175, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0087, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.5217, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0191, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0182, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0203, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.2193, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0367, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0212, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0199, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0234, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0225, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0188, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0195, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0260, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(3.8059, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0185, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0201, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0209, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(1.0008, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0093, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.5756, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0369, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0100, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0106, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0453, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(1.5099, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0141, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0149, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0125, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0105, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0102, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0203, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0267, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0174, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.8278, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0146, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0274, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0092, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0180, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0187, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0131, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0155, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0165, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.3554, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0371, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0134, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0190, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0291, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.6446, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(1.1508, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0149, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0088, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.3368, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0086, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(2.7049, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0090, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(1.4083, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.2728, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0114, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0122, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.4725, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0246, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0071, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0095, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.4083, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0073, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0151, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0238, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0081, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.8442, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0167, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0282, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0194, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0532, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0129, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0191, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0198, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0182, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0130, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0291, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0173, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0094, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0067, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0154, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0455, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0435, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(1.3673, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.3773, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0109, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0257, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(1.9006, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0190, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(1.2126, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0079, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0100, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(1.1435, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0133, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0077, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0072, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0375, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.9076, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0104, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0298, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.5211, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0163, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0057, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0112, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0104, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0057, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0140, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.1986, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0048, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0051, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0050, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0034, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0151, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0049, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0042, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0081, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.3347, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.5501, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0221, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0046, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0046, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0037, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(1.8796, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.6125, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0059, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0051, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0037, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.9391, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0041, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(2.4312, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0056, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0039, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0037, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0075, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0037, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0066, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0043, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0050, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0060, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0050, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0039, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0043, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0050, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0050, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0085, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0054, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0050, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0054, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0093, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.4523, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0050, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.2410, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(2.2455, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.3262, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0062, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0063, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0070, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0054, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.7662, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.7287, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0050, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0088, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(1.0837, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0086, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0179, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0072, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.8720, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0084, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.5818, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0083, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0067, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0048, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0049, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0062, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(1.6854, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.3303, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0096, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0048, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0096, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0062, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(1.1878, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.2611, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0066, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0074, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0074, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.1565, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0137, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.2550, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.3916, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0068, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0120, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0087, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.4473, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0093, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0091, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0068, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0085, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.3352, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0072, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(2.3072, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0089, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0066, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0111, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0112, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0179, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0134, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0135, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0073, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0057, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0077, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0067, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0082, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0079, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0103, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0051, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0057, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.7202, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.1826, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0082, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0085, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.2695, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0048, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.7852, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0062, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0054, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.3069, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0077, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0053, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0068, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(1.4291, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0080, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0094, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0051, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0078, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.8400, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0068, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0045, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0050, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0080, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0050, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0052, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.9244, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0067, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(2.1984, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0093, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0050, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0047, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0035, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0039, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.6644, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0097, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0038, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.8225, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0054, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0032, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0049, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0065, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.5358, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.9552, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0041, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0048, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0044, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.9081, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0051, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0055, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0057, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0054, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0100, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0095, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0089, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0129, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0037, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0071, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(1.0256, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0041, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0052, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0050, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0071, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.9138, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0128, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0048, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0081, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0065, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0070, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0047, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0527, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0048, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0044, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0044, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0069, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.2207, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.4768, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0053, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0052, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0049, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.3702, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0052, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0048, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0048, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0067, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0036, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0057, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0042, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.3351, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.9732, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.1485, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.7280, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.4160, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0037, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0033, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0035, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0052, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0035, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0039, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0036, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0031, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0039, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.5664, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0034, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0039, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0031, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0054, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0047, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0034, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0053, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0035, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0049, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0042, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0044, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0114, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0047, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0029, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(1.4019, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0045, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0036, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0035, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0053, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0037, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0055, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0041, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0051, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0052, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0034, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0049, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0036, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.6750, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0033, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0034, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0034, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0038, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0048, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0068, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0038, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0043, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.3781, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0049, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0054, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0051, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0040, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0039, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0040, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0037, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0033, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0041, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0038, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0048, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.3252, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(1.5732, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0037, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0040, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0030, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0041, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0078, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0069, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0032, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0057, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0092, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0042, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0038, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0053, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.5088, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0038, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(1.7851, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0046, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0041, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0036, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0032, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0040, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0049, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0057, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0032, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0036, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0039, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.4529, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0035, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0055, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0043, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(2.1383, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0044, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0049, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0038, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0030, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(1.3941, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0036, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.2419, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0032, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0034, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0035, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0045, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0031, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0034, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.3906, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0065, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.3987, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.9229, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0044, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.3794, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0045, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(1.1836, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0034, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0037, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0038, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0036, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0043, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.2338, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.9199, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.3204, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0052, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0039, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0053, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.7334, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0039, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0048, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0048, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0035, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0035, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0045, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0052, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.6060, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(1.9843, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0037, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0059, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0036, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0046, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.3330, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.6356, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0053, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0050, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0062, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0038, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0035, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0037, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0034, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0041, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0034, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0035, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0057, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0044, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.1954, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0043, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.1046, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0042, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0035, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0033, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0036, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(1.1928, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0034, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.5536, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0131, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0053, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0048, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.9422, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0041, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0031, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0065, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.3877, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0032, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(1.4643, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0036, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(1.7348, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0070, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0043, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.9434, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.5221, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0035, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0123, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0037, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0034, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(2.0854, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.6618, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0034, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0053, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.4682, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0032, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0071, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.4253, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0031, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0039, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.3828, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0033, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0041, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0496, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0056, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0033, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0031, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0104, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0057, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.2717, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0028, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0046, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0030, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0029, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0029, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0041, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0036, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0030, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.4408, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0038, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0038, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.2067, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0034, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0041, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0047, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0033, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0031, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0030, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0034, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0032, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0054, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0107, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0034, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0029, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0070, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0040, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0028, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(1.3285, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0039, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0048, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0046, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.5901, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0049, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0117, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0043, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0042, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0033, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0044, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0047, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0031, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0120, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0043, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0031, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0029, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0035, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0039, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0032, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0034, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0032, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0049, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0036, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0030, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0047, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0166, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0028, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0038, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.3901, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0102, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0033, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0034, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0048, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0082, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.0040, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
            "Loss:  tensor(0.6252, device='cuda:0', grad_fn=<NllLossBackward0>)\n"
          ]
        },
        {
          "output_type": "error",
          "ename": "KeyboardInterrupt",
          "evalue": "",
          "traceback": [
            "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
            "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
            "\u001b[0;32m<ipython-input-14-89823fe3272f>\u001b[0m in \u001b[0;36m<cell line: 11>\u001b[0;34m()\u001b[0m\n\u001b[1;32m     30\u001b[0m     \u001b[0;31m# loss = criterion(outputs[:135], labels)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     31\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 32\u001b[0;31m     \u001b[0mloss\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbackward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     33\u001b[0m     \u001b[0moptimizer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     34\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/torch/_tensor.py\u001b[0m in \u001b[0;36mbackward\u001b[0;34m(self, gradient, retain_graph, create_graph, inputs)\u001b[0m\n\u001b[1;32m    579\u001b[0m                 \u001b[0minputs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    580\u001b[0m             )\n\u001b[0;32m--> 581\u001b[0;31m         torch.autograd.backward(\n\u001b[0m\u001b[1;32m    582\u001b[0m             \u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgradient\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mretain_graph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcreate_graph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minputs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    583\u001b[0m         )\n",
            "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/torch/autograd/__init__.py\u001b[0m in \u001b[0;36mbackward\u001b[0;34m(tensors, grad_tensors, retain_graph, create_graph, grad_variables, inputs)\u001b[0m\n\u001b[1;32m    345\u001b[0m     \u001b[0;31m# some Python versions print out the first line of a multi-line function\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    346\u001b[0m     \u001b[0;31m# calls in the traceback and some print out the last line\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 347\u001b[0;31m     _engine_run_backward(\n\u001b[0m\u001b[1;32m    348\u001b[0m         \u001b[0mtensors\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    349\u001b[0m         \u001b[0mgrad_tensors_\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/torch/autograd/graph.py\u001b[0m in \u001b[0;36m_engine_run_backward\u001b[0;34m(t_outputs, *args, **kwargs)\u001b[0m\n\u001b[1;32m    823\u001b[0m         \u001b[0munregister_hooks\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_register_logging_hooks_on_whole_graph\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mt_outputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    824\u001b[0m     \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 825\u001b[0;31m         return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass\n\u001b[0m\u001b[1;32m    826\u001b[0m             \u001b[0mt_outputs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    827\u001b[0m         )  # Calls into the C++ engine to run the backward pass\n",
            "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
          ]
        }
      ],
      "source": [
        "#train\n",
        "\n",
        "optimizer = torch.optim.SGD(model.parameters(), lr=0.0001) #im keeping the lr small bc this is for fine-tuning\n",
        "criterion = torch.nn.CrossEntropyLoss()\n",
        "num_epochs = 5\n",
        "\n",
        "model.to(device)\n",
        "\n",
        "artificial_len = 560\n",
        "\n",
        "for epoch in range(num_epochs):\n",
        "\n",
        "  for idx, sample in enumerate(agg_dl_train):\n",
        "\n",
        "    inputs = sample['input_ids'][:, :artificial_len].to(device)\n",
        "    mask = sample['attention_mask'][:, :artificial_len].to(device)\n",
        "    labels = torch.Tensor(sample['label'][:, :artificial_len]).to(device).long()\n",
        "\n",
        "    optimizer.zero_grad()\n",
        "\n",
        "    outputs = model(inputs, attention_mask=mask, labels=labels)\n",
        "    loss = outputs.loss\n",
        "\n",
        "    if idx%50 == 0:\n",
        "      print(\"Loss: \", loss)\n",
        "\n",
        "    # print(outputs.shape)\n",
        "    # print(labels.shape)\n",
        "\n",
        "    # loss = criterion(outputs[:135], labels)\n",
        "\n",
        "    loss.backward()\n",
        "    optimizer.step()\n",
        "\n",
        "# model.eval()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "OBY91_wZetuZ"
      },
      "outputs": [],
      "source": [
        "outputs[0].shape"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "import copy\n",
        "model2 = copy.deepcopy(model)"
      ],
      "metadata": {
        "id": "OLoYt0XuKJq7"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "VHxJsNdmrb-q",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "5f3468ee-53b7-4fb9-b0bf-2fa6828f935d"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "<ipython-input-19-21b4a27b8782>:9: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.\n",
            "  quantized_model.load_state_dict(torch.load('qat_model.pth'))\n",
            "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:413: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly.  To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n",
            "  device=storage.device,\n"
          ]
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "LlamaForCausalLM(\n",
              "  (model): LlamaModel(\n",
              "    (embed_tokens): QuantizedEmbedding(num_embeddings=128256, embedding_dim=2048, dtype=torch.quint8, qscheme=torch.per_channel_affine_float_qparams)\n",
              "    (layers): ModuleList(\n",
              "      (0): LlamaDecoderLayer(\n",
              "        (self_attn): LlamaSdpaAttention(\n",
              "          (q_proj): QuantizedLinear(in_features=2048, out_features=2048, scale=0.31701186299324036, zero_point=70, qscheme=torch.per_channel_affine)\n",
              "          (k_proj): QuantizedLinear(in_features=2048, out_features=512, scale=0.1922328621149063, zero_point=54, qscheme=torch.per_channel_affine)\n",
              "          (v_proj): QuantizedLinear(in_features=2048, out_features=512, scale=0.01628422923386097, zero_point=72, qscheme=torch.per_channel_affine)\n",
              "          (o_proj): QuantizedLinear(in_features=2048, out_features=2048, scale=0.009655891917645931, zero_point=65, qscheme=torch.per_channel_affine)\n",
              "          (rotary_emb): LlamaRotaryEmbedding()\n",
              "        )\n",
              "        (mlp): LlamaMLP(\n",
              "          (gate_proj): QuantizedLinear(in_features=2048, out_features=8192, scale=0.07631822675466537, zero_point=75, qscheme=torch.per_channel_affine)\n",
              "          (up_proj): QuantizedLinear(in_features=2048, out_features=8192, scale=0.03359270095825195, zero_point=70, qscheme=torch.per_channel_affine)\n",
              "          (down_proj): QuantizedLinear(in_features=8192, out_features=2048, scale=0.05023924633860588, zero_point=44, qscheme=torch.per_channel_affine)\n",
              "          (act_fn): SiLU()\n",
              "        )\n",
              "        (input_layernorm): LlamaRMSNorm((2048,), eps=1e-05)\n",
              "        (post_attention_layernorm): LlamaRMSNorm((2048,), eps=1e-05)\n",
              "      )\n",
              "      (1): LlamaDecoderLayer(\n",
              "        (self_attn): LlamaSdpaAttention(\n",
              "          (q_proj): QuantizedLinear(in_features=2048, out_features=2048, scale=0.13805238902568817, zero_point=52, qscheme=torch.per_channel_affine)\n",
              "          (k_proj): QuantizedLinear(in_features=2048, out_features=512, scale=0.15776652097702026, zero_point=60, qscheme=torch.per_channel_affine)\n",
              "          (v_proj): QuantizedLinear(in_features=2048, out_features=512, scale=0.03161122649908066, zero_point=34, qscheme=torch.per_channel_affine)\n",
              "          (o_proj): QuantizedLinear(in_features=2048, out_features=2048, scale=0.007619784213602543, zero_point=71, qscheme=torch.per_channel_affine)\n",
              "          (rotary_emb): LlamaRotaryEmbedding()\n",
              "        )\n",
              "        (mlp): LlamaMLP(\n",
              "          (gate_proj): QuantizedLinear(in_features=2048, out_features=8192, scale=0.23326125741004944, zero_point=31, qscheme=torch.per_channel_affine)\n",
              "          (up_proj): QuantizedLinear(in_features=2048, out_features=8192, scale=0.46461352705955505, zero_point=114, qscheme=torch.per_channel_affine)\n",
              "          (down_proj): QuantizedLinear(in_features=8192, out_features=2048, scale=6.2376708984375, zero_point=63, qscheme=torch.per_channel_affine)\n",
              "          (act_fn): SiLU()\n",
              "        )\n",
              "        (input_layernorm): LlamaRMSNorm((2048,), eps=1e-05)\n",
              "        (post_attention_layernorm): LlamaRMSNorm((2048,), eps=1e-05)\n",
              "      )\n",
              "      (2): LlamaDecoderLayer(\n",
              "        (self_attn): LlamaSdpaAttention(\n",
              "          (q_proj): QuantizedLinear(in_features=2048, out_features=2048, scale=0.1305798441171646, zero_point=67, qscheme=torch.per_channel_affine)\n",
              "          (k_proj): QuantizedLinear(in_features=2048, out_features=512, scale=0.12747929990291595, zero_point=71, qscheme=torch.per_channel_affine)\n",
              "          (v_proj): QuantizedLinear(in_features=2048, out_features=512, scale=0.032829128205776215, zero_point=74, qscheme=torch.per_channel_affine)\n",
              "          (o_proj): QuantizedLinear(in_features=2048, out_features=2048, scale=0.01895715296268463, zero_point=42, qscheme=torch.per_channel_affine)\n",
              "          (rotary_emb): LlamaRotaryEmbedding()\n",
              "        )\n",
              "        (mlp): LlamaMLP(\n",
              "          (gate_proj): QuantizedLinear(in_features=2048, out_features=8192, scale=0.07305552065372467, zero_point=60, qscheme=torch.per_channel_affine)\n",
              "          (up_proj): QuantizedLinear(in_features=2048, out_features=8192, scale=0.03974951431155205, zero_point=69, qscheme=torch.per_channel_affine)\n",
              "          (down_proj): QuantizedLinear(in_features=8192, out_features=2048, scale=0.012219389900565147, zero_point=56, qscheme=torch.per_channel_affine)\n",
              "          (act_fn): SiLU()\n",
              "        )\n",
              "        (input_layernorm): LlamaRMSNorm((2048,), eps=1e-05)\n",
              "        (post_attention_layernorm): LlamaRMSNorm((2048,), eps=1e-05)\n",
              "      )\n",
              "      (3): LlamaDecoderLayer(\n",
              "        (self_attn): LlamaSdpaAttention(\n",
              "          (q_proj): QuantizedLinear(in_features=2048, out_features=2048, scale=0.14034973084926605, zero_point=74, qscheme=torch.per_channel_affine)\n",
              "          (k_proj): QuantizedLinear(in_features=2048, out_features=512, scale=0.17263036966323853, zero_point=72, qscheme=torch.per_channel_affine)\n",
              "          (v_proj): QuantizedLinear(in_features=2048, out_features=512, scale=0.02904871664941311, zero_point=60, qscheme=torch.per_channel_affine)\n",
              "          (o_proj): QuantizedLinear(in_features=2048, out_features=2048, scale=0.008709244430065155, zero_point=74, qscheme=torch.per_channel_affine)\n",
              "          (rotary_emb): LlamaRotaryEmbedding()\n",
              "        )\n",
              "        (mlp): LlamaMLP(\n",
              "          (gate_proj): QuantizedLinear(in_features=2048, out_features=8192, scale=0.04360383003950119, zero_point=78, qscheme=torch.per_channel_affine)\n",
              "          (up_proj): QuantizedLinear(in_features=2048, out_features=8192, scale=0.0480324849486351, zero_point=37, qscheme=torch.per_channel_affine)\n",
              "          (down_proj): QuantizedLinear(in_features=8192, out_features=2048, scale=0.014903663657605648, zero_point=47, qscheme=torch.per_channel_affine)\n",
              "          (act_fn): SiLU()\n",
              "        )\n",
              "        (input_layernorm): LlamaRMSNorm((2048,), eps=1e-05)\n",
              "        (post_attention_layernorm): LlamaRMSNorm((2048,), eps=1e-05)\n",
              "      )\n",
              "      (4): LlamaDecoderLayer(\n",
              "        (self_attn): LlamaSdpaAttention(\n",
              "          (q_proj): QuantizedLinear(in_features=2048, out_features=2048, scale=0.15473605692386627, zero_point=61, qscheme=torch.per_channel_affine)\n",
              "          (k_proj): QuantizedLinear(in_features=2048, out_features=512, scale=0.18037556111812592, zero_point=59, qscheme=torch.per_channel_affine)\n",
              "          (v_proj): QuantizedLinear(in_features=2048, out_features=512, scale=0.014598781242966652, zero_point=64, qscheme=torch.per_channel_affine)\n",
              "          (o_proj): QuantizedLinear(in_features=2048, out_features=2048, scale=0.009733655489981174, zero_point=59, qscheme=torch.per_channel_affine)\n",
              "          (rotary_emb): LlamaRotaryEmbedding()\n",
              "        )\n",
              "        (mlp): LlamaMLP(\n",
              "          (gate_proj): QuantizedLinear(in_features=2048, out_features=8192, scale=0.04816940799355507, zero_point=80, qscheme=torch.per_channel_affine)\n",
              "          (up_proj): QuantizedLinear(in_features=2048, out_features=8192, scale=0.04994359239935875, zero_point=86, qscheme=torch.per_channel_affine)\n",
              "          (down_proj): QuantizedLinear(in_features=8192, out_features=2048, scale=0.018921468406915665, zero_point=51, qscheme=torch.per_channel_affine)\n",
              "          (act_fn): SiLU()\n",
              "        )\n",
              "        (input_layernorm): LlamaRMSNorm((2048,), eps=1e-05)\n",
              "        (post_attention_layernorm): LlamaRMSNorm((2048,), eps=1e-05)\n",
              "      )\n",
              "      (5): LlamaDecoderLayer(\n",
              "        (self_attn): LlamaSdpaAttention(\n",
              "          (q_proj): QuantizedLinear(in_features=2048, out_features=2048, scale=0.14324717223644257, zero_point=61, qscheme=torch.per_channel_affine)\n",
              "          (k_proj): QuantizedLinear(in_features=2048, out_features=512, scale=0.22314751148223877, zero_point=69, qscheme=torch.per_channel_affine)\n",
              "          (v_proj): QuantizedLinear(in_features=2048, out_features=512, scale=0.019725706428289413, zero_point=58, qscheme=torch.per_channel_affine)\n",
              "          (o_proj): QuantizedLinear(in_features=2048, out_features=2048, scale=0.009052897803485394, zero_point=71, qscheme=torch.per_channel_affine)\n",
              "          (rotary_emb): LlamaRotaryEmbedding()\n",
              "        )\n",
              "        (mlp): LlamaMLP(\n",
              "          (gate_proj): QuantizedLinear(in_features=2048, out_features=8192, scale=0.044827282428741455, zero_point=85, qscheme=torch.per_channel_affine)\n",
              "          (up_proj): QuantizedLinear(in_features=2048, out_features=8192, scale=0.06396540999412537, zero_point=52, qscheme=torch.per_channel_affine)\n",
              "          (down_proj): QuantizedLinear(in_features=8192, out_features=2048, scale=0.014721620827913284, zero_point=53, qscheme=torch.per_channel_affine)\n",
              "          (act_fn): SiLU()\n",
              "        )\n",
              "        (input_layernorm): LlamaRMSNorm((2048,), eps=1e-05)\n",
              "        (post_attention_layernorm): LlamaRMSNorm((2048,), eps=1e-05)\n",
              "      )\n",
              "      (6): LlamaDecoderLayer(\n",
              "        (self_attn): LlamaSdpaAttention(\n",
              "          (q_proj): QuantizedLinear(in_features=2048, out_features=2048, scale=0.15567262470722198, zero_point=58, qscheme=torch.per_channel_affine)\n",
              "          (k_proj): QuantizedLinear(in_features=2048, out_features=512, scale=0.24330493807792664, zero_point=63, qscheme=torch.per_channel_affine)\n",
              "          (v_proj): QuantizedLinear(in_features=2048, out_features=512, scale=0.019611241295933723, zero_point=67, qscheme=torch.per_channel_affine)\n",
              "          (o_proj): QuantizedLinear(in_features=2048, out_features=2048, scale=0.009436138905584812, zero_point=80, qscheme=torch.per_channel_affine)\n",
              "          (rotary_emb): LlamaRotaryEmbedding()\n",
              "        )\n",
              "        (mlp): LlamaMLP(\n",
              "          (gate_proj): QuantizedLinear(in_features=2048, out_features=8192, scale=0.060413457453250885, zero_point=84, qscheme=torch.per_channel_affine)\n",
              "          (up_proj): QuantizedLinear(in_features=2048, out_features=8192, scale=0.03792325034737587, zero_point=50, qscheme=torch.per_channel_affine)\n",
              "          (down_proj): QuantizedLinear(in_features=8192, out_features=2048, scale=0.015196369029581547, zero_point=68, qscheme=torch.per_channel_affine)\n",
              "          (act_fn): SiLU()\n",
              "        )\n",
              "        (input_layernorm): LlamaRMSNorm((2048,), eps=1e-05)\n",
              "        (post_attention_layernorm): LlamaRMSNorm((2048,), eps=1e-05)\n",
              "      )\n",
              "      (7): LlamaDecoderLayer(\n",
              "        (self_attn): LlamaSdpaAttention(\n",
              "          (q_proj): QuantizedLinear(in_features=2048, out_features=2048, scale=0.14517270028591156, zero_point=73, qscheme=torch.per_channel_affine)\n",
              "          (k_proj): QuantizedLinear(in_features=2048, out_features=512, scale=0.22805632650852203, zero_point=59, qscheme=torch.per_channel_affine)\n",
              "          (v_proj): QuantizedLinear(in_features=2048, out_features=512, scale=0.01671365462243557, zero_point=65, qscheme=torch.per_channel_affine)\n",
              "          (o_proj): QuantizedLinear(in_features=2048, out_features=2048, scale=0.01771656610071659, zero_point=91, qscheme=torch.per_channel_affine)\n",
              "          (rotary_emb): LlamaRotaryEmbedding()\n",
              "        )\n",
              "        (mlp): LlamaMLP(\n",
              "          (gate_proj): QuantizedLinear(in_features=2048, out_features=8192, scale=0.06519144028425217, zero_point=84, qscheme=torch.per_channel_affine)\n",
              "          (up_proj): QuantizedLinear(in_features=2048, out_features=8192, scale=0.045925453305244446, zero_point=90, qscheme=torch.per_channel_affine)\n",
              "          (down_proj): QuantizedLinear(in_features=8192, out_features=2048, scale=0.01325978059321642, zero_point=72, qscheme=torch.per_channel_affine)\n",
              "          (act_fn): SiLU()\n",
              "        )\n",
              "        (input_layernorm): LlamaRMSNorm((2048,), eps=1e-05)\n",
              "        (post_attention_layernorm): LlamaRMSNorm((2048,), eps=1e-05)\n",
              "      )\n",
              "      (8): LlamaDecoderLayer(\n",
              "        (self_attn): LlamaSdpaAttention(\n",
              "          (q_proj): QuantizedLinear(in_features=2048, out_features=2048, scale=0.1391000896692276, zero_point=65, qscheme=torch.per_channel_affine)\n",
              "          (k_proj): QuantizedLinear(in_features=2048, out_features=512, scale=0.21637581288814545, zero_point=68, qscheme=torch.per_channel_affine)\n",
              "          (v_proj): QuantizedLinear(in_features=2048, out_features=512, scale=0.017320899292826653, zero_point=63, qscheme=torch.per_channel_affine)\n",
              "          (o_proj): QuantizedLinear(in_features=2048, out_features=2048, scale=0.01049901731312275, zero_point=65, qscheme=torch.per_channel_affine)\n",
              "          (rotary_emb): LlamaRotaryEmbedding()\n",
              "        )\n",
              "        (mlp): LlamaMLP(\n",
              "          (gate_proj): QuantizedLinear(in_features=2048, out_features=8192, scale=0.0675107091665268, zero_point=76, qscheme=torch.per_channel_affine)\n",
              "          (up_proj): QuantizedLinear(in_features=2048, out_features=8192, scale=0.04929600656032562, zero_point=38, qscheme=torch.per_channel_affine)\n",
              "          (down_proj): QuantizedLinear(in_features=8192, out_features=2048, scale=0.018513265997171402, zero_point=84, qscheme=torch.per_channel_affine)\n",
              "          (act_fn): SiLU()\n",
              "        )\n",
              "        (input_layernorm): LlamaRMSNorm((2048,), eps=1e-05)\n",
              "        (post_attention_layernorm): LlamaRMSNorm((2048,), eps=1e-05)\n",
              "      )\n",
              "      (9): LlamaDecoderLayer(\n",
              "        (self_attn): LlamaSdpaAttention(\n",
              "          (q_proj): QuantizedLinear(in_features=2048, out_features=2048, scale=0.16671966016292572, zero_point=63, qscheme=torch.per_channel_affine)\n",
              "          (k_proj): QuantizedLinear(in_features=2048, out_features=512, scale=0.2319967895746231, zero_point=61, qscheme=torch.per_channel_affine)\n",
              "          (v_proj): QuantizedLinear(in_features=2048, out_features=512, scale=0.02032259851694107, zero_point=58, qscheme=torch.per_channel_affine)\n",
              "          (o_proj): QuantizedLinear(in_features=2048, out_features=2048, scale=0.014058471657335758, zero_point=52, qscheme=torch.per_channel_affine)\n",
              "          (rotary_emb): LlamaRotaryEmbedding()\n",
              "        )\n",
              "        (mlp): LlamaMLP(\n",
              "          (gate_proj): QuantizedLinear(in_features=2048, out_features=8192, scale=0.07045481353998184, zero_point=83, qscheme=torch.per_channel_affine)\n",
              "          (up_proj): QuantizedLinear(in_features=2048, out_features=8192, scale=0.062100160866975784, zero_point=43, qscheme=torch.per_channel_affine)\n",
              "          (down_proj): QuantizedLinear(in_features=8192, out_features=2048, scale=0.01701277121901512, zero_point=84, qscheme=torch.per_channel_affine)\n",
              "          (act_fn): SiLU()\n",
              "        )\n",
              "        (input_layernorm): LlamaRMSNorm((2048,), eps=1e-05)\n",
              "        (post_attention_layernorm): LlamaRMSNorm((2048,), eps=1e-05)\n",
              "      )\n",
              "      (10): LlamaDecoderLayer(\n",
              "        (self_attn): LlamaSdpaAttention(\n",
              "          (q_proj): QuantizedLinear(in_features=2048, out_features=2048, scale=0.18677900731563568, zero_point=74, qscheme=torch.per_channel_affine)\n",
              "          (k_proj): QuantizedLinear(in_features=2048, out_features=512, scale=0.17365947365760803, zero_point=69, qscheme=torch.per_channel_affine)\n",
              "          (v_proj): QuantizedLinear(in_features=2048, out_features=512, scale=0.026005607098340988, zero_point=66, qscheme=torch.per_channel_affine)\n",
              "          (o_proj): QuantizedLinear(in_features=2048, out_features=2048, scale=0.011404228396713734, zero_point=76, qscheme=torch.per_channel_affine)\n",
              "          (rotary_emb): LlamaRotaryEmbedding()\n",
              "        )\n",
              "        (mlp): LlamaMLP(\n",
              "          (gate_proj): QuantizedLinear(in_features=2048, out_features=8192, scale=0.06304419785737991, zero_point=82, qscheme=torch.per_channel_affine)\n",
              "          (up_proj): QuantizedLinear(in_features=2048, out_features=8192, scale=0.07537255436182022, zero_point=86, qscheme=torch.per_channel_affine)\n",
              "          (down_proj): QuantizedLinear(in_features=8192, out_features=2048, scale=0.010813701897859573, zero_point=71, qscheme=torch.per_channel_affine)\n",
              "          (act_fn): SiLU()\n",
              "        )\n",
              "        (input_layernorm): LlamaRMSNorm((2048,), eps=1e-05)\n",
              "        (post_attention_layernorm): LlamaRMSNorm((2048,), eps=1e-05)\n",
              "      )\n",
              "      (11): LlamaDecoderLayer(\n",
              "        (self_attn): LlamaSdpaAttention(\n",
              "          (q_proj): QuantizedLinear(in_features=2048, out_features=2048, scale=0.18630267679691315, zero_point=64, qscheme=torch.per_channel_affine)\n",
              "          (k_proj): QuantizedLinear(in_features=2048, out_features=512, scale=0.26545605063438416, zero_point=59, qscheme=torch.per_channel_affine)\n",
              "          (v_proj): QuantizedLinear(in_features=2048, out_features=512, scale=0.022774094715714455, zero_point=62, qscheme=torch.per_channel_affine)\n",
              "          (o_proj): QuantizedLinear(in_features=2048, out_features=2048, scale=0.017639460042119026, zero_point=57, qscheme=torch.per_channel_affine)\n",
              "          (rotary_emb): LlamaRotaryEmbedding()\n",
              "        )\n",
              "        (mlp): LlamaMLP(\n",
              "          (gate_proj): QuantizedLinear(in_features=2048, out_features=8192, scale=0.09043470770120621, zero_point=88, qscheme=torch.per_channel_affine)\n",
              "          (up_proj): QuantizedLinear(in_features=2048, out_features=8192, scale=0.10928525030612946, zero_point=86, qscheme=torch.per_channel_affine)\n",
              "          (down_proj): QuantizedLinear(in_features=8192, out_features=2048, scale=0.016825584694743156, zero_point=79, qscheme=torch.per_channel_affine)\n",
              "          (act_fn): SiLU()\n",
              "        )\n",
              "        (input_layernorm): LlamaRMSNorm((2048,), eps=1e-05)\n",
              "        (post_attention_layernorm): LlamaRMSNorm((2048,), eps=1e-05)\n",
              "      )\n",
              "      (12): LlamaDecoderLayer(\n",
              "        (self_attn): LlamaSdpaAttention(\n",
              "          (q_proj): QuantizedLinear(in_features=2048, out_features=2048, scale=0.21200743317604065, zero_point=51, qscheme=torch.per_channel_affine)\n",
              "          (k_proj): QuantizedLinear(in_features=2048, out_features=512, scale=0.25319522619247437, zero_point=61, qscheme=torch.per_channel_affine)\n",
              "          (v_proj): QuantizedLinear(in_features=2048, out_features=512, scale=0.027644505724310875, zero_point=75, qscheme=torch.per_channel_affine)\n",
              "          (o_proj): QuantizedLinear(in_features=2048, out_features=2048, scale=0.022432168945670128, zero_point=47, qscheme=torch.per_channel_affine)\n",
              "          (rotary_emb): LlamaRotaryEmbedding()\n",
              "        )\n",
              "        (mlp): LlamaMLP(\n",
              "          (gate_proj): QuantizedLinear(in_features=2048, out_features=8192, scale=0.08884824067354202, zero_point=76, qscheme=torch.per_channel_affine)\n",
              "          (up_proj): QuantizedLinear(in_features=2048, out_features=8192, scale=0.06461600214242935, zero_point=69, qscheme=torch.per_channel_affine)\n",
              "          (down_proj): QuantizedLinear(in_features=8192, out_features=2048, scale=0.020088614895939827, zero_point=72, qscheme=torch.per_channel_affine)\n",
              "          (act_fn): SiLU()\n",
              "        )\n",
              "        (input_layernorm): LlamaRMSNorm((2048,), eps=1e-05)\n",
              "        (post_attention_layernorm): LlamaRMSNorm((2048,), eps=1e-05)\n",
              "      )\n",
              "      (13): LlamaDecoderLayer(\n",
              "        (self_attn): LlamaSdpaAttention(\n",
              "          (q_proj): QuantizedLinear(in_features=2048, out_features=2048, scale=0.22792071104049683, zero_point=71, qscheme=torch.per_channel_affine)\n",
              "          (k_proj): QuantizedLinear(in_features=2048, out_features=512, scale=0.23003895580768585, zero_point=53, qscheme=torch.per_channel_affine)\n",
              "          (v_proj): QuantizedLinear(in_features=2048, out_features=512, scale=0.044164374470710754, zero_point=76, qscheme=torch.per_channel_affine)\n",
              "          (o_proj): QuantizedLinear(in_features=2048, out_features=2048, scale=0.02935781516134739, zero_point=101, qscheme=torch.per_channel_affine)\n",
              "          (rotary_emb): LlamaRotaryEmbedding()\n",
              "        )\n",
              "        (mlp): LlamaMLP(\n",
              "          (gate_proj): QuantizedLinear(in_features=2048, out_features=8192, scale=0.11889562010765076, zero_point=56, qscheme=torch.per_channel_affine)\n",
              "          (up_proj): QuantizedLinear(in_features=2048, out_features=8192, scale=0.08737265318632126, zero_point=59, qscheme=torch.per_channel_affine)\n",
              "          (down_proj): QuantizedLinear(in_features=8192, out_features=2048, scale=0.0410793274641037, zero_point=74, qscheme=torch.per_channel_affine)\n",
              "          (act_fn): SiLU()\n",
              "        )\n",
              "        (input_layernorm): LlamaRMSNorm((2048,), eps=1e-05)\n",
              "        (post_attention_layernorm): LlamaRMSNorm((2048,), eps=1e-05)\n",
              "      )\n",
              "      (14): LlamaDecoderLayer(\n",
              "        (self_attn): LlamaSdpaAttention(\n",
              "          (q_proj): QuantizedLinear(in_features=2048, out_features=2048, scale=0.249503493309021, zero_point=61, qscheme=torch.per_channel_affine)\n",
              "          (k_proj): QuantizedLinear(in_features=2048, out_features=512, scale=0.18525458872318268, zero_point=53, qscheme=torch.per_channel_affine)\n",
              "          (v_proj): QuantizedLinear(in_features=2048, out_features=512, scale=0.06224101036787033, zero_point=66, qscheme=torch.per_channel_affine)\n",
              "          (o_proj): QuantizedLinear(in_features=2048, out_features=2048, scale=0.03727683052420616, zero_point=75, qscheme=torch.per_channel_affine)\n",
              "          (rotary_emb): LlamaRotaryEmbedding()\n",
              "        )\n",
              "        (mlp): LlamaMLP(\n",
              "          (gate_proj): QuantizedLinear(in_features=2048, out_features=8192, scale=0.25919973850250244, zero_point=49, qscheme=torch.per_channel_affine)\n",
              "          (up_proj): QuantizedLinear(in_features=2048, out_features=8192, scale=0.22699691355228424, zero_point=63, qscheme=torch.per_channel_affine)\n",
              "          (down_proj): QuantizedLinear(in_features=8192, out_features=2048, scale=0.1121382862329483, zero_point=40, qscheme=torch.per_channel_affine)\n",
              "          (act_fn): SiLU()\n",
              "        )\n",
              "        (input_layernorm): LlamaRMSNorm((2048,), eps=1e-05)\n",
              "        (post_attention_layernorm): LlamaRMSNorm((2048,), eps=1e-05)\n",
              "      )\n",
              "      (15): LlamaDecoderLayer(\n",
              "        (self_attn): LlamaSdpaAttention(\n",
              "          (q_proj): QuantizedLinear(in_features=2048, out_features=2048, scale=0.29254186153411865, zero_point=60, qscheme=torch.per_channel_affine)\n",
              "          (k_proj): QuantizedLinear(in_features=2048, out_features=512, scale=0.259794145822525, zero_point=57, qscheme=torch.per_channel_affine)\n",
              "          (v_proj): QuantizedLinear(in_features=2048, out_features=512, scale=0.0648101419210434, zero_point=70, qscheme=torch.per_channel_affine)\n",
              "          (o_proj): QuantizedLinear(in_features=2048, out_features=2048, scale=0.11098157614469528, zero_point=66, qscheme=torch.per_channel_affine)\n",
              "          (rotary_emb): LlamaRotaryEmbedding()\n",
              "        )\n",
              "        (mlp): LlamaMLP(\n",
              "          (gate_proj): QuantizedLinear(in_features=2048, out_features=8192, scale=0.49932554364204407, zero_point=94, qscheme=torch.per_channel_affine)\n",
              "          (up_proj): QuantizedLinear(in_features=2048, out_features=8192, scale=0.4446257948875427, zero_point=84, qscheme=torch.per_channel_affine)\n",
              "          (down_proj): QuantizedLinear(in_features=8192, out_features=2048, scale=4.449953079223633, zero_point=64, qscheme=torch.per_channel_affine)\n",
              "          (act_fn): SiLU()\n",
              "        )\n",
              "        (input_layernorm): LlamaRMSNorm((2048,), eps=1e-05)\n",
              "        (post_attention_layernorm): LlamaRMSNorm((2048,), eps=1e-05)\n",
              "      )\n",
              "    )\n",
              "    (norm): LlamaRMSNorm((2048,), eps=1e-05)\n",
              "    (rotary_emb): LlamaRotaryEmbedding()\n",
              "  )\n",
              "  (lm_head): QuantizedLinear(in_features=2048, out_features=128256, scale=0.31616729497909546, zero_point=31, qscheme=torch.per_channel_affine)\n",
              ")"
            ]
          },
          "metadata": {},
          "execution_count": 19
        }
      ],
      "source": [
        "#convert\n",
        "for _, mod in model.named_modules():\n",
        "    if isinstance(mod, torch.nn.Embedding):\n",
        "        mod.qconfig = torch.ao.quantization.float_qparams_weight_only_qconfig\n",
        "quantized_model = torch.quantization.convert(model.to('cpu'), inplace=False)\n",
        "\n",
        "#save\n",
        "torch.save(quantized_model.state_dict(), 'qat_model.pth')\n",
        "quantized_model.load_state_dict(torch.load('qat_model.pth'))\n",
        "\n",
        "quantized_model.eval()"
      ]
    },
    {
      "cell_type": "code",
      "source": [],
      "metadata": {
        "id": "RlSIFEuCwB-n"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [],
      "metadata": {
        "id": "VdUBf_-iLyg4"
      },
      "execution_count": null,
      "outputs": []
    }
  ],
  "metadata": {
    "accelerator": "GPU",
    "colab": {
      "gpuType": "A100",
      "machine_shape": "hm",
      "provenance": [],
      "collapsed_sections": [
        "fVnfQ13dNaQ0",
        "7dHnFVtiYYCQ"
      ]
    },
    "kernelspec": {
      "display_name": "Python 3",
      "name": "python3"
    },
    "language_info": {
      "name": "python"
    },
    "widgets": {
      "application/vnd.jupyter.widget-state+json": {
        "a389778f36d44ff48c54dcb49c83558c": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_de8e2f0737d3424d8df23fc154e34348",
              "IPY_MODEL_179afcfbf805433eb127212e40a6f140",
              "IPY_MODEL_2542f7ec25cd454cb01bad65f46e9301"
            ],
            "layout": "IPY_MODEL_55ab70bad9484de3aff788767ccf0ac5"
          }
        },
        "de8e2f0737d3424d8df23fc154e34348": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_b8d3f99951804b64a26e82cb78cf6a7f",
            "placeholder": "",
            "style": "IPY_MODEL_0335aa27984b45de9127c6cc6936f40f",
            "value": "Loading checkpoint shards: 100%"
          }
        },
        "179afcfbf805433eb127212e40a6f140": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_64e7ae2225ab44049080312914519756",
            "max": 2,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_ca17213c0b464f3e88416dfafd06bf5c",
            "value": 2
          }
        },
        "2542f7ec25cd454cb01bad65f46e9301": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_09e55b7734ea474bbf227a5f047562ba",
            "placeholder": "",
            "style": "IPY_MODEL_91952d933ef64beb8b92b6b19dcfb15c",
            "value": " 2/2 [00:02&lt;00:00,  1.04s/it]"
          }
        },
        "55ab70bad9484de3aff788767ccf0ac5": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "b8d3f99951804b64a26e82cb78cf6a7f": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "0335aa27984b45de9127c6cc6936f40f": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "64e7ae2225ab44049080312914519756": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "ca17213c0b464f3e88416dfafd06bf5c": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "09e55b7734ea474bbf227a5f047562ba": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "91952d933ef64beb8b92b6b19dcfb15c": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "a6122a0554324bde96f9e81dcaadccd9": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_887c3b5bea9b42138d0d4a38917d507c",
              "IPY_MODEL_02ad42a7e19d4c77b893283023af8866",
              "IPY_MODEL_687c41c14d0b4470aeed781e3f02f01e"
            ],
            "layout": "IPY_MODEL_5388e0720cea4a878d993c711f19b7da"
          }
        },
        "887c3b5bea9b42138d0d4a38917d507c": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_233a67f4e6794cfcb20554569c142d16",
            "placeholder": "",
            "style": "IPY_MODEL_f256314e9b6a49eca22b0ff1a8b139ae",
            "value": "Loading checkpoint shards: 100%"
          }
        },
        "02ad42a7e19d4c77b893283023af8866": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_ba13ed834465493eadcf0aa00d2441b2",
            "max": 2,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_69cdc31b4647483d922bf381137a6f5a",
            "value": 2
          }
        },
        "687c41c14d0b4470aeed781e3f02f01e": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_2c5d8a1b076b4f9ebf839cb1ec816186",
            "placeholder": "",
            "style": "IPY_MODEL_64fb4299c5bd47439ff0509adb46b1f1",
            "value": " 2/2 [00:02&lt;00:00,  1.36s/it]"
          }
        },
        "5388e0720cea4a878d993c711f19b7da": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "233a67f4e6794cfcb20554569c142d16": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "f256314e9b6a49eca22b0ff1a8b139ae": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "ba13ed834465493eadcf0aa00d2441b2": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "69cdc31b4647483d922bf381137a6f5a": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "2c5d8a1b076b4f9ebf839cb1ec816186": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "64fb4299c5bd47439ff0509adb46b1f1": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "cda09fe730404d64b2d1046fc0ff0972": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_d5fb009b64f64870b23604a6df388d31",
              "IPY_MODEL_d785ea9c32854641bd216fa3cdc0f366",
              "IPY_MODEL_17ac6f96e8a142828bff4d4b86080b47"
            ],
            "layout": "IPY_MODEL_b18188da875c49eca69c4fe4a2b37d2f"
          }
        },
        "d5fb009b64f64870b23604a6df388d31": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_5e0980136ad848c490aa5518751e46e3",
            "placeholder": "",
            "style": "IPY_MODEL_e87fcc34901449ac97380d3504f7352a",
            "value": "Loading checkpoint shards: 100%"
          }
        },
        "d785ea9c32854641bd216fa3cdc0f366": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_2402c90e45294d9385ccac1401d9b1c5",
            "max": 2,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_3fad5bb47ad342af80beaa6a480ba394",
            "value": 2
          }
        },
        "17ac6f96e8a142828bff4d4b86080b47": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_7e1d389498ed404b8f288d7653219206",
            "placeholder": "",
            "style": "IPY_MODEL_3d7a3f940b4b44cd85c9187c78972d67",
            "value": " 2/2 [00:02&lt;00:00,  1.26s/it]"
          }
        },
        "b18188da875c49eca69c4fe4a2b37d2f": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "5e0980136ad848c490aa5518751e46e3": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "e87fcc34901449ac97380d3504f7352a": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "2402c90e45294d9385ccac1401d9b1c5": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "3fad5bb47ad342af80beaa6a480ba394": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "7e1d389498ed404b8f288d7653219206": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "3d7a3f940b4b44cd85c9187c78972d67": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        }
      }
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}