From dafa0892e3e4f6612a0a778eec753061b070b4b2 Mon Sep 17 00:00:00 2001
From: Sergey Shvets <recycletechno@gmail.com>
Date: Tue, 31 Oct 2023 11:53:21 +0300
Subject: [PATCH] Additional assert statement for dims alignment

---
 week05_transfer/seminar.ipynb | 4380 ++++++++++++++++-----------------
 1 file changed, 2190 insertions(+), 2190 deletions(-)

diff --git a/week05_transfer/seminar.ipynb b/week05_transfer/seminar.ipynb
index f92684c0..9caa6dbc 100644
--- a/week05_transfer/seminar.ipynb
+++ b/week05_transfer/seminar.ipynb
@@ -1,2243 +1,2243 @@
 {
-  "cells": [
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "zriTdjauH8iQ",
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "outputId": "e25d0f3f-2879-4d80-8cad-f949cf5b8453"
-      },
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.7/7.7 MB\u001b[0m \u001b[31m24.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m302.0/302.0 kB\u001b[0m \u001b[31m22.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.8/3.8 MB\u001b[0m \u001b[31m51.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m48.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m295.0/295.0 kB\u001b[0m \u001b[31m25.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25h"
-          ]
-        }
-      ],
-      "source": [
-        "%pip install -q transformers huggingface_hub\n",
-        "import math\n",
-        "import numpy as np\n",
-        "import torch\n",
-        "import torch.nn as nn\n",
-        "import torch.nn.functional as F"
-      ]
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "zriTdjauH8iQ",
+    "colab": {
+     "base_uri": "https://localhost:8080/"
     },
+    "outputId": "e25d0f3f-2879-4d80-8cad-f949cf5b8453"
+   },
+   "outputs": [
     {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "xQiRPWWHlSgv"
-      },
-      "source": [
-        "### Using pre-trained transformers (2 points)\n",
-        "_for fun and profit_\n",
-        "\n",
-        "There are many toolkits that let you access pre-trained transformer models, but the most powerful and convenient by far is [`huggingface/transformers`](https://github.com/huggingface/transformers). In this week's practice, you'll learn how to download, apply and modify pre-trained transformers for a range of tasks. Buckle up, we're going in!\n",
-        "\n",
-        "\n",
-        "__Pipelines:__ if all you want is to apply a pre-trained model, you can do that in one line of code using pipeline. Huggingface/transformers has a selection of pre-configured pipelines for masked language modelling, sentiment classification, question aswering, etc. ([see full list here](https://huggingface.co/transformers/main_classes/pipelines.html))\n",
-        "\n",
-        "A typical pipeline includes:\n",
-        "* pre-processing, e.g. tokenization, subword segmentation\n",
-        "* a backbone model, e.g. bert finetuned for classification\n",
-        "* output post-processing\n",
-        "\n",
-        "Let's see it in action:"
-      ]
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "\u001B[2K     \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m7.7/7.7 MB\u001B[0m \u001B[31m24.5 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n",
+      "\u001B[2K     \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m302.0/302.0 kB\u001B[0m \u001B[31m22.3 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n",
+      "\u001B[2K     \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m3.8/3.8 MB\u001B[0m \u001B[31m51.9 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n",
+      "\u001B[2K     \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m1.3/1.3 MB\u001B[0m \u001B[31m48.7 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n",
+      "\u001B[2K     \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m295.0/295.0 kB\u001B[0m \u001B[31m25.9 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n",
+      "\u001B[?25h"
+     ]
+    }
+   ],
+   "source": [
+    "%pip install -q transformers huggingface_hub\n",
+    "import math\n",
+    "import numpy as np\n",
+    "import torch\n",
+    "import torch.nn as nn\n",
+    "import torch.nn.functional as F"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "xQiRPWWHlSgv"
+   },
+   "source": [
+    "### Using pre-trained transformers (2 points)\n",
+    "_for fun and profit_\n",
+    "\n",
+    "There are many toolkits that let you access pre-trained transformer models, but the most powerful and convenient by far is [`huggingface/transformers`](https://github.com/huggingface/transformers). In this week's practice, you'll learn how to download, apply and modify pre-trained transformers for a range of tasks. Buckle up, we're going in!\n",
+    "\n",
+    "\n",
+    "__Pipelines:__ if all you want is to apply a pre-trained model, you can do that in one line of code using pipeline. Huggingface/transformers has a selection of pre-configured pipelines for masked language modelling, sentiment classification, question aswering, etc. ([see full list here](https://huggingface.co/transformers/main_classes/pipelines.html))\n",
+    "\n",
+    "A typical pipeline includes:\n",
+    "* pre-processing, e.g. tokenization, subword segmentation\n",
+    "* a backbone model, e.g. bert finetuned for classification\n",
+    "* output post-processing\n",
+    "\n",
+    "Let's see it in action:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "rP1KFtvLlJHR",
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 162,
+     "referenced_widgets": [
+      "e85584c419aa445285eecb482778c7ba",
+      "5694548560b548dabed4b111766da60e",
+      "7fceb5ec7dec415a9dfcd4261aeb4a3d",
+      "1fd2361180bd49aa8e2012a5e379d0c1",
+      "77441a81d25c40819ae5af99ad70f3c8",
+      "48c3bc239f16407bbba2d36ac6e5aaae",
+      "5c87b856ffeb4c1dbf2d4637065d26ef",
+      "fbbe93e1d4a5493782b7a246f3cb1760",
+      "c3e31f6d90a84af08ee33023be184849",
+      "6099e17176dd4eef829ac7c45badc9e2",
+      "7ea9a125c76543419007fd5083ed63cd",
+      "b2786e8af69e490cba5048fca7a0a51e",
+      "f07e54fd856f4efc99bedb44abcece1b",
+      "e0d2ca01fd7042a88a5493323a61409c",
+      "e07d888d412d43f8ac4cc5285de07772",
+      "15227ca6cc5a4d219e0113da09926da4",
+      "cf83aaa31a1a47419db2127456beae69",
+      "1e463fce356b46859493cbd975163ef1",
+      "4ecb5a9efc3c42dd8faf4af9424ad5cf",
+      "3af4702b95484b8cabd891b18ffc71b1",
+      "e9210c257b8a4091b86123271c8a5486",
+      "7eb0bf256b8d4975b0615aa96f00bbe1",
+      "fc132bfb7d6447ad9900af7b80806aa1",
+      "ab6d124394214415a2580983c88bbb11",
+      "8578b4d2279f40dc846fcdfc2df55742",
+      "7213248ebe1f4e28a00a93eb33169170",
+      "b1497c0d7c3b4c04a91302a2cbc96db1",
+      "60e971374e5448d08e9698a5594ff60a",
+      "6c8113bb76024debaf3256431cbefe35",
+      "73d8ad019f044a6998e81273a922745a",
+      "a445a35355424cb0b4c7abf3d4e6f32d",
+      "d71a81e25f5b4f88b8361c62e2ab9f44",
+      "4f084bbe71bb433ba0e7ee50fccb71cc",
+      "5cff2bc12fec494e9e19b8a956186df6",
+      "787781c894bd46c6990f2513d9f2c79c",
+      "3f4eae6f79054676976f87b9524c4cf5",
+      "237d34743ee64428a9d22ee51e0423f3",
+      "b095428dcb8a4408854f854de54d5692",
+      "1b76512711884ffb81d3c3505b8fd137",
+      "2f53a3d1d0f44192beeb6ed8d40e762f",
+      "1b194af3033342b0b04dc12b3e055ff2",
+      "391b8f44109544328617fbc3a3c3b9fc",
+      "6f36db95f6fb4b4687c7130fc23c4348",
+      "82a35a7ae5e441a1b66534a3eab5e765"
+     ]
     },
+    "outputId": "f4e9f426-6445-4d69-b7b7-dd083d7bdebf"
+   },
+   "outputs": [
     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "rP1KFtvLlJHR",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 162,
-          "referenced_widgets": [
-            "e85584c419aa445285eecb482778c7ba",
-            "5694548560b548dabed4b111766da60e",
-            "7fceb5ec7dec415a9dfcd4261aeb4a3d",
-            "1fd2361180bd49aa8e2012a5e379d0c1",
-            "77441a81d25c40819ae5af99ad70f3c8",
-            "48c3bc239f16407bbba2d36ac6e5aaae",
-            "5c87b856ffeb4c1dbf2d4637065d26ef",
-            "fbbe93e1d4a5493782b7a246f3cb1760",
-            "c3e31f6d90a84af08ee33023be184849",
-            "6099e17176dd4eef829ac7c45badc9e2",
-            "7ea9a125c76543419007fd5083ed63cd",
-            "b2786e8af69e490cba5048fca7a0a51e",
-            "f07e54fd856f4efc99bedb44abcece1b",
-            "e0d2ca01fd7042a88a5493323a61409c",
-            "e07d888d412d43f8ac4cc5285de07772",
-            "15227ca6cc5a4d219e0113da09926da4",
-            "cf83aaa31a1a47419db2127456beae69",
-            "1e463fce356b46859493cbd975163ef1",
-            "4ecb5a9efc3c42dd8faf4af9424ad5cf",
-            "3af4702b95484b8cabd891b18ffc71b1",
-            "e9210c257b8a4091b86123271c8a5486",
-            "7eb0bf256b8d4975b0615aa96f00bbe1",
-            "fc132bfb7d6447ad9900af7b80806aa1",
-            "ab6d124394214415a2580983c88bbb11",
-            "8578b4d2279f40dc846fcdfc2df55742",
-            "7213248ebe1f4e28a00a93eb33169170",
-            "b1497c0d7c3b4c04a91302a2cbc96db1",
-            "60e971374e5448d08e9698a5594ff60a",
-            "6c8113bb76024debaf3256431cbefe35",
-            "73d8ad019f044a6998e81273a922745a",
-            "a445a35355424cb0b4c7abf3d4e6f32d",
-            "d71a81e25f5b4f88b8361c62e2ab9f44",
-            "4f084bbe71bb433ba0e7ee50fccb71cc",
-            "5cff2bc12fec494e9e19b8a956186df6",
-            "787781c894bd46c6990f2513d9f2c79c",
-            "3f4eae6f79054676976f87b9524c4cf5",
-            "237d34743ee64428a9d22ee51e0423f3",
-            "b095428dcb8a4408854f854de54d5692",
-            "1b76512711884ffb81d3c3505b8fd137",
-            "2f53a3d1d0f44192beeb6ed8d40e762f",
-            "1b194af3033342b0b04dc12b3e055ff2",
-            "391b8f44109544328617fbc3a3c3b9fc",
-            "6f36db95f6fb4b4687c7130fc23c4348",
-            "82a35a7ae5e441a1b66534a3eab5e765"
-          ]
-        },
-        "outputId": "f4e9f426-6445-4d69-b7b7-dd083d7bdebf"
-      },
-      "outputs": [
-        {
-          "output_type": "display_data",
-          "data": {
-            "text/plain": [
-              "Downloading (…)lve/main/config.json:   0%|          | 0.00/629 [00:00<?, ?B/s]"
-            ],
-            "application/vnd.jupyter.widget-view+json": {
-              "version_major": 2,
-              "version_minor": 0,
-              "model_id": "e85584c419aa445285eecb482778c7ba"
-            }
-          },
-          "metadata": {}
-        },
-        {
-          "output_type": "display_data",
-          "data": {
-            "text/plain": [
-              "Downloading model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]"
-            ],
-            "application/vnd.jupyter.widget-view+json": {
-              "version_major": 2,
-              "version_minor": 0,
-              "model_id": "b2786e8af69e490cba5048fca7a0a51e"
-            }
-          },
-          "metadata": {}
-        },
-        {
-          "output_type": "display_data",
-          "data": {
-            "text/plain": [
-              "Downloading (…)okenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]"
-            ],
-            "application/vnd.jupyter.widget-view+json": {
-              "version_major": 2,
-              "version_minor": 0,
-              "model_id": "fc132bfb7d6447ad9900af7b80806aa1"
-            }
-          },
-          "metadata": {}
-        },
-        {
-          "output_type": "display_data",
-          "data": {
-            "text/plain": [
-              "Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]"
-            ],
-            "application/vnd.jupyter.widget-view+json": {
-              "version_major": 2,
-              "version_minor": 0,
-              "model_id": "5cff2bc12fec494e9e19b8a956186df6"
-            }
-          },
-          "metadata": {}
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "[{'label': 'POSITIVE', 'score': 0.9998860359191895}]\n"
-          ]
-        }
+     "output_type": "display_data",
+     "data": {
+      "text/plain": [
+       "Downloading (…)lve/main/config.json:   0%|          | 0.00/629 [00:00<?, ?B/s]"
       ],
-      "source": [
-        "import transformers\n",
-        "classifier = transformers.pipeline('sentiment-analysis', model=\"distilbert-base-uncased-finetuned-sst-2-english\")\n",
-        "\n",
-        "print(classifier(\"BERT is amazing!\"))"
-      ]
+      "application/vnd.jupyter.widget-view+json": {
+       "version_major": 2,
+       "version_minor": 0,
+       "model_id": "e85584c419aa445285eecb482778c7ba"
+      }
+     },
+     "metadata": {}
     },
     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "nYUNuyXMn5l9"
-      },
-      "outputs": [],
-      "source": [
-        "import base64\n",
-        "data = {\n",
-        "    'arryn': 'As High as Honor.',\n",
-        "    'baratheon': 'Ours is the fury.',\n",
-        "    'stark': 'Winter is coming.',\n",
-        "    'tyrell': 'Growing strong.'\n",
-        "}\n",
-        "\n",
-        "# YOUR CODE: predict sentiment for each noble house and create outputs dict\n",
-        "<...>\n",
-        "outputs = <YOUR CODE: dict (house name) : True if positive, False if negative>\n",
-        "\n",
-        "assert sum(outputs.values()) == 3 and outputs[base64.decodebytes(b'YmFyYXRoZW9u\\n').decode()] == False\n",
-        "print(\"Well done!\")"
-      ]
+     "output_type": "display_data",
+     "data": {
+      "text/plain": [
+       "Downloading model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]"
+      ],
+      "application/vnd.jupyter.widget-view+json": {
+       "version_major": 2,
+       "version_minor": 0,
+       "model_id": "b2786e8af69e490cba5048fca7a0a51e"
+      }
+     },
+     "metadata": {}
     },
     {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "BRDhIH-XpSNo"
-      },
-      "source": [
-        "You can also access vanilla Masked Language Model that was trained to predict masked words. Here's how:"
-      ]
+     "output_type": "display_data",
+     "data": {
+      "text/plain": [
+       "Downloading (…)okenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]"
+      ],
+      "application/vnd.jupyter.widget-view+json": {
+       "version_major": 2,
+       "version_minor": 0,
+       "model_id": "fc132bfb7d6447ad9900af7b80806aa1"
+      }
+     },
+     "metadata": {}
     },
     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "pa-8noIllRbZ"
-      },
-      "outputs": [],
-      "source": [
-        "mlm_model = transformers.pipeline('fill-mask', model=\"bert-base-uncased\")\n",
-        "MASK = mlm_model.tokenizer.mask_token\n",
-        "\n",
-        "for hypo in mlm_model(f\"Donald {MASK} is the president of the united states.\"):\n",
-        "  print(f\"P={hypo['score']:.5f}\", hypo['sequence'])"
-      ]
+     "output_type": "display_data",
+     "data": {
+      "text/plain": [
+       "Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]"
+      ],
+      "application/vnd.jupyter.widget-view+json": {
+       "version_major": 2,
+       "version_minor": 0,
+       "model_id": "5cff2bc12fec494e9e19b8a956186df6"
+      }
+     },
+     "metadata": {}
     },
     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "9NxeG1Y5pwX1"
-      },
-      "outputs": [],
-      "source": [
-        "# Your turn: use bert to recall what year was the Soviet Union founded in\n",
-        "mlm_model(<YOUR PROMPT>)"
-      ]
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "[{'label': 'POSITIVE', 'score': 0.9998860359191895}]\n"
+     ]
+    }
+   ],
+   "source": [
+    "import transformers\n",
+    "classifier = transformers.pipeline('sentiment-analysis', model=\"distilbert-base-uncased-finetuned-sst-2-english\")\n",
+    "\n",
+    "print(classifier(\"BERT is amazing!\"))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "nYUNuyXMn5l9"
+   },
+   "outputs": [],
+   "source": [
+    "import base64\n",
+    "data = {\n",
+    "    'arryn': 'As High as Honor.',\n",
+    "    'baratheon': 'Ours is the fury.',\n",
+    "    'stark': 'Winter is coming.',\n",
+    "    'tyrell': 'Growing strong.'\n",
+    "}\n",
+    "\n",
+    "# YOUR CODE: predict sentiment for each noble house and create outputs dict\n",
+    "<...>\n",
+    "outputs = <YOUR CODE: dict (house name) : True if positive, False if negative>\n",
+    "\n",
+    "assert sum(outputs.values()) == 3 and outputs[base64.decodebytes(b'YmFyYXRoZW9u\\n').decode()] == False\n",
+    "print(\"Well done!\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "BRDhIH-XpSNo"
+   },
+   "source": [
+    "You can also access vanilla Masked Language Model that was trained to predict masked words. Here's how:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "pa-8noIllRbZ"
+   },
+   "outputs": [],
+   "source": [
+    "mlm_model = transformers.pipeline('fill-mask', model=\"bert-base-uncased\")\n",
+    "MASK = mlm_model.tokenizer.mask_token\n",
+    "\n",
+    "for hypo in mlm_model(f\"Donald {MASK} is the president of the united states.\"):\n",
+    "  print(f\"P={hypo['score']:.5f}\", hypo['sequence'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "9NxeG1Y5pwX1"
+   },
+   "outputs": [],
+   "source": [
+    "# Your turn: use bert to recall what year was the Soviet Union founded in\n",
+    "mlm_model(<YOUR PROMPT>)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "YJxRFzCSq903"
+   },
+   "source": [
+    "```\n",
+    "\n",
+    "```\n",
+    "\n",
+    "```\n",
+    "\n",
+    "```\n",
+    "\n",
+    "\n",
+    "Huggingface offers hundreds of pre-trained models that specialize on different tasks. You can quickly find the model you need using [this list](https://huggingface.co/models).\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "HRux8Qp2hkXr"
+   },
+   "outputs": [],
+   "source": [
+    "text = \"\"\"Almost two-thirds of the 1.5 million people who viewed this liveblog had Googled to discover\n",
+    " the latest on the Rosetta mission. They were treated to this detailed account by the Guardian’s science editor,\n",
+    " Ian Sample, and astronomy writer Stuart Clark of the moment scientists landed a robotic spacecraft on a comet\n",
+    " for the first time in history, and the delirious reaction it provoked at their headquarters in Germany.\n",
+    "  “We are there. We are sitting on the surface. Philae is talking to us,” said one scientist.\n",
+    "\"\"\"\n",
+    "\n",
+    "# Task: create a pipeline for named entity recognition, use task name 'ner' and search for the right model in the list\n",
+    "ner_model = <YOUR CODE>\n",
+    "\n",
+    "named_entities = ner_model(text)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "hf57MRzSiSON"
+   },
+   "outputs": [],
+   "source": [
+    "print('OUTPUT:', named_entities)\n",
+    "word_to_entity = {item['word']: item['entity'] for item in named_entities}\n",
+    "assert 'org' in word_to_entity.get('Guardian').lower() and 'per' in word_to_entity.get('Stuart').lower()\n",
+    "print(\"All tests passed\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "ULMownz6sP9n"
+   },
+   "source": [
+    "### The building blocks of a pipeline\n",
+    "\n",
+    "Huggingface also allows you to access its pipelines on a lower level. There are two main abstractions for you:\n",
+    "* `Tokenizer` - converts from strings to token ids and back\n",
+    "* `Model` - a pytorch `nn.Module` with pre-trained weights\n",
+    "\n",
+    "You can use such models as part of your regular pytorch code: insert is as a layer in your model, apply it to a batch of data, backpropagate, optimize, etc."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "KMJbV0QVsO0Q"
+   },
+   "outputs": [],
+   "source": [
+    "tokenizer = transformers.AutoTokenizer.from_pretrained('bert-base-uncased')\n",
+    "model = transformers.AutoModel.from_pretrained('bert-base-uncased')\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "ZgSPHKPRxG6U"
+   },
+   "outputs": [],
+   "source": [
+    "lines = [\n",
+    "    \"Luke, I am your father.\",\n",
+    "    \"Life is what happens when you're busy making other plans.\",\n",
+    "    ]\n",
+    "\n",
+    "# tokenize a batch of inputs. \"pt\" means [p]y[t]orch tensors\n",
+    "tokens_info = tokenizer(lines, padding=True, truncation=True, return_tensors=\"pt\")\n",
+    "\n",
+    "for key in tokens_info:\n",
+    "    print(key, tokens_info[key])\n",
+    "\n",
+    "print(\"Detokenized:\")\n",
+    "for i in range(2):\n",
+    "    print(tokenizer.decode(tokens_info['input_ids'][i]))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "MJkbHxERyfL4"
+   },
+   "outputs": [],
+   "source": [
+    "# You can now apply the model to get embeddings\n",
+    "with torch.no_grad():\n",
+    "    out = model(**tokens_info)\n",
+    "\n",
+    "print(out['pooler_output'])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "_Vij7Gc1wOaq"
+   },
+   "source": [
+    "Transformers knowledge hub: https://huggingface.co/transformers/"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "bwmTTyjUGqol"
+   },
+   "source": [
+    "### Build-a-transformer (2 points)\n",
+    "\n",
+    "In this section, you will implement a transformer language model layer by layer, then use it to generate (hopefully) coherent text.\n",
+    "\n",
+    "To understand how these layers work, please check out our guide to transformers from [nlp course for you -> transformers](https://lena-voita.github.io/nlp_course/seq2seq_and_attention.html#transformer_intro).\n",
+    "\n",
+    "\n",
+    "First, we download pre-trained weights for the [GPT2 model by OpenAI](https://openai.com/research/better-language-models) - a prominent model from 2019.\n",
+    "\n",
+    "\n",
+    "\n",
+    "Idea & code by: Ilya Beletsky"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
     },
+    "id": "vOcK0lGTGqol",
+    "outputId": "131fbc38-d4af-4e3b-b87b-f4c1b15d3162"
+   },
+   "outputs": [
     {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "YJxRFzCSq903"
-      },
-      "source": [
-        "```\n",
-        "\n",
-        "```\n",
-        "\n",
-        "```\n",
-        "\n",
-        "```\n",
-        "\n",
-        "\n",
-        "Huggingface offers hundreds of pre-trained models that specialize on different tasks. You can quickly find the model you need using [this list](https://huggingface.co/models).\n"
-      ]
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "Weights: ['h.0.attn.c_attn.bias', 'h.0.attn.c_attn.weight', 'h.0.attn.c_proj.bias', 'h.0.attn.c_proj.weight', 'h.0.ln_1.bias', 'h.0.ln_1.weight', 'h.0.ln_2.bias', 'h.0.ln_2.weight', 'h.0.mlp.c_fc.bias', 'h.0.mlp.c_fc.weight', 'h.0.mlp.c_proj.bias', 'h.0.mlp.c_proj.weight', 'h.1.attn.c_attn.bias', 'h.1.attn.c_attn.weight', 'h.1. ...\n"
+     ]
+    }
+   ],
+   "source": [
+    "from huggingface_hub import hf_hub_download\n",
+    "state_dict = torch.load(hf_hub_download(\"gpt2\", filename=\"pytorch_model.bin\"))\n",
+    "for key, value in tuple(state_dict.items()):\n",
+    "    if key.startswith('h.') and key.endswith('.weight') and value.ndim == 2:\n",
+    "        value.transpose_(1, 0)  # <-- for compatibility with modern PyTorch modules\n",
+    "    if key.startswith('h.') and key.endswith('.attn.bias') and value.ndim == 4:\n",
+    "        state_dict.pop(key)  # <-- triangular binar masks, not needed in this code\n",
+    "\n",
+    "print('Weights:', repr(sorted(state_dict.keys()))[:320], '...')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "mr0SUtQnGqom"
+   },
+   "source": [
+    "In the next few cells, we shall implement the model layer by layer to make use of those weights.\n",
+    "\n",
+    "As you might recall, transformers contain two main layer types: attention and fully-connected layers.\n",
+    "\n",
+    "The fully connected layers are by far easier to understand, so we shall begin there:\n",
+    "\n",
+    "Please implement fully-connected layer __without residual or layer normalization__ (we'll add those in a bit)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "3Rh-6DX9Gqom"
+   },
+   "outputs": [],
+   "source": [
+    "class GeLUThatWasUsedInGPT2(nn.Module):\n",
+    "    def forward(self, x):\n",
+    "        return 0.5 * x * (1.0 + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * x ** 3)))\n",
+    "\n",
+    "class FullyConnected(nn.Module):\n",
+    "    def __init__(self, dim: int):\n",
+    "        super().__init__()\n",
+    "        self.c_fc = nn.Linear(dim, 4  * dim)\n",
+    "        self.gelu = GeLUThatWasUsedInGPT2()\n",
+    "        self.c_proj = nn.Linear(4 * dim, dim)\n",
+    "\n",
+    "    def forward(self, x):\n",
+    "        # x.shape = [batch_size, seq_length, dim]\n",
+    "        <YOUR CODE HERE - COMPUTE LAYER OUTPUTS>\n",
+    "        return <MLP OUTPUTS>\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "iSVGKnHBGqom"
+   },
+   "source": [
+    "Now, let's test that it works with GPT-2 weights:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "CoWjZwZkGqom"
+   },
+   "outputs": [],
+   "source": [
+    "mlp = FullyConnected(dim=768)\n",
+    "mlp.load_state_dict({'c_fc.weight': state_dict['h.0.mlp.c_fc.weight'],\n",
+    "                     'c_fc.bias': state_dict['h.0.mlp.c_fc.bias'],\n",
+    "                     'c_proj.weight': state_dict['h.0.mlp.c_proj.weight'],\n",
+    "                     'c_proj.bias': state_dict['h.0.mlp.c_proj.bias']})\n",
+    "\n",
+    "torch.manual_seed(1337)\n",
+    "x = torch.randn(1, 2, 768)  # [batch_size, sequence_length, dim]\n",
+    "checksum = torch.sum(mlp(x) * x)\n",
+    "assert abs(checksum.item() - 1282.3315) < 0.1, \"layer outputs do not match reference\"\n",
+    "assert torch.allclose(mlp(x[:, (1, 0), :])[:, (1, 0), :], mlp(x)), \"mlp must be permutation-invariant\"\n",
+    "print(\"Seems legit!\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "VbfCevRwGqom"
+   },
+   "source": [
+    "Now, let's get to attention layers.\n",
+    "\n",
+    "Since GPT-2 needs to generate text from left to right, each generated token can only attend to tokens on the left (and itself). This kid of attention is called \"Masked\" self-attention, because it hides tokens to the right.\n",
+    "\n",
+    "As before, please implement masked self-attention __without layernorm or residual connections.__"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "T6j7M4hLGqon"
+   },
+   "outputs": [],
+   "source": [
+    "class MaskedSelfAttention(nn.Module):\n",
+    "    def __init__(self, dim: int, num_heads: int):\n",
+    "        super().__init__()\n",
+    "        self.c_attn = nn.Linear(dim, dim * 3)  # query + key + value, combined\n",
+    "        self.c_proj = nn.Linear(dim, dim)  # output projection\n",
+    "        self.dim, self.num_heads = dim, num_heads\n",
+    "        self.head_size = dim // num_heads\n",
+    "\n",
+    "    def forward(self, x):\n",
+    "        q, k, v = self.c_attn(x).split(dim=-1, split_size=self.dim)\n",
+    "        assert q.shape == k.shape == v.shape == x.shape, \"q, k and v must have the same shape as x\"\n",
+    "        assert self.dim % self.num_heads == 0, \"self.dim must be divisible by self.num_heads without a remainder\"\n",
+    "\n",
+    "        # Note: this is an inefficient implementation that uses a for-loop.\n",
+    "        # To get the full grade during homework, please re-implement this code:\n",
+    "        # 1) do not use for-loops (or other loops). Compute everything in parallel with vectorized operations\n",
+    "        # 2) do not use F.scaled_dot_product_attention - write your own attention code using basic PyTorch ops\n",
+    "        head_outputs = []\n",
+    "        for head_index in range(self.num_heads):\n",
+    "            head_selector = range(self.head_size * head_index, self.head_size * (head_index + 1))\n",
+    "\n",
+    "            head_queries = q[..., head_selector]\n",
+    "            head_keys = k[..., head_selector]\n",
+    "            head_values = v[..., head_selector]\n",
+    "\n",
+    "            single_head_output = F.scaled_dot_product_attention(\n",
+    "                <YOUR CODE HERE - fill in the missing parameters; see docs below>\n",
+    "                is_causal=True)\n",
+    "            # docs: https://pytorch.org/docs/stable/generated/torch.nn.functional.scaled_dot_product_attention.html\n",
+    "            head_outputs.append(single_head_output)\n",
+    "\n",
+    "        combined_head_outputs = torch.cat(head_outputs, dim=-1)\n",
+    "        return self.c_proj(combined_head_outputs)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "Test that it works"
+   ],
+   "metadata": {
+    "id": "umZpcpIkJva7"
+   }
+  },
+  {
+   "cell_type": "code",
+   "source": [
+    "attn = MaskedSelfAttention(dim=768, num_heads=12)\n",
+    "attn.load_state_dict({'c_attn.weight': state_dict['h.0.attn.c_attn.weight'],\n",
+    "                      'c_attn.bias': state_dict['h.0.attn.c_attn.bias'],\n",
+    "                      'c_proj.weight': state_dict['h.0.attn.c_proj.weight'],\n",
+    "                      'c_proj.bias': state_dict['h.0.attn.c_proj.bias']})\n",
+    "\n",
+    "torch.manual_seed(1337)\n",
+    "x = torch.randn(1, 10, 768)  # [batch_size, sequence_length, dim]\n",
+    "checksum = torch.sum(attn(x) * x)\n",
+    "assert abs(checksum.item() - 2703.6772) < 0.1, \"layer outputs do not match reference\"\n",
+    "assert not torch.allclose(attn(x[:, (1, 0), :])[:, (1, 0), :], attn(x[:, (0, 1), :])), \"masked attention must *not* be permutation-invariant\"\n",
+    "print(\"It works!\")"
+   ],
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
     },
+    "id": "tg5Oj_PPM6hj",
+    "outputId": "ebeddb50-d805-47ae-cc3a-4d68d900b3a3"
+   },
+   "execution_count": null,
+   "outputs": [
     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "HRux8Qp2hkXr"
-      },
-      "outputs": [],
-      "source": [
-        "text = \"\"\"Almost two-thirds of the 1.5 million people who viewed this liveblog had Googled to discover\n",
-        " the latest on the Rosetta mission. They were treated to this detailed account by the Guardian’s science editor,\n",
-        " Ian Sample, and astronomy writer Stuart Clark of the moment scientists landed a robotic spacecraft on a comet\n",
-        " for the first time in history, and the delirious reaction it provoked at their headquarters in Germany.\n",
-        "  “We are there. We are sitting on the surface. Philae is talking to us,” said one scientist.\n",
-        "\"\"\"\n",
-        "\n",
-        "# Task: create a pipeline for named entity recognition, use task name 'ner' and search for the right model in the list\n",
-        "ner_model = <YOUR CODE>\n",
-        "\n",
-        "named_entities = ner_model(text)"
-      ]
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "It works!\n"
+     ]
+    }
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "We can now combine attention and MLP to build the full transformer layer:\n",
+    "\n",
+    "![img](https://i.imgur.com/1sq2vHO.png)"
+   ],
+   "metadata": {
+    "id": "rn6tgTHzOK4l"
+   }
+  },
+  {
+   "cell_type": "code",
+   "source": [
+    "class TransformerLayer(nn.Module):\n",
+    "    def __init__(self, dim: int, num_heads: int):\n",
+    "        super().__init__()\n",
+    "        self.ln_1 = nn.LayerNorm(dim)\n",
+    "        self.attn = MaskedSelfAttention(dim, num_heads)\n",
+    "        self.ln_2 = nn.LayerNorm(dim)\n",
+    "        self.mlp = FullyConnected(dim)\n",
+    "\n",
+    "    def forward(self, x):\n",
+    "        <YOUR CODE - apply attention, mlp and layer normalization as shown in figure above>\n",
+    "        return <...>"
+   ],
+   "metadata": {
+    "id": "p3AH7YQvRpvU"
+   },
+   "execution_count": null,
+   "outputs": []
+  },
+  {
+   "cell_type": "code",
+   "source": [
+    "layer = TransformerLayer(dim=768, num_heads=12)\n",
+    "layer.load_state_dict({k[5:]: v for k, v in state_dict.items() if k.startswith('h.10.')})\n",
+    "assert abs(torch.sum(layer(x) * x).item() - 9874.7383) < 0.1\n",
+    "print(\"Good job!\")"
+   ],
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
     },
+    "id": "Qzo_QeFVSNZa",
+    "outputId": "15613968-b4d7-4391-dfff-3b490951a125"
+   },
+   "execution_count": null,
+   "outputs": [
     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "hf57MRzSiSON"
-      },
-      "outputs": [],
-      "source": [
-        "print('OUTPUT:', named_entities)\n",
-        "word_to_entity = {item['word']: item['entity'] for item in named_entities}\n",
-        "assert 'org' in word_to_entity.get('Guardian').lower() and 'per' in word_to_entity.get('Stuart').lower()\n",
-        "print(\"All tests passed\")"
-      ]
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "Good job!\n"
+     ]
+    }
+   ]
+  },
+  {
+   "cell_type": "code",
+   "source": [
+    "class GPT2(nn.Module):\n",
+    "    def __init__(self, vocab_size: int, dim: int, num_heads: int, num_layers: int, max_position_embeddings: int = 1024):\n",
+    "        super().__init__()\n",
+    "        self.wte = nn.Embedding(vocab_size, dim)  # token embeddings\n",
+    "        self.wpe = nn.Embedding(max_position_embeddings, dim)  # position embeddings\n",
+    "        self.ln_f = nn.LayerNorm(dim)   # final layer norm - goes after all transformer layers, but before logits\n",
+    "\n",
+    "        self.h = nn.Sequential(*(TransformerLayer(dim, num_heads) for layer in range(num_layers)))\n",
+    "\n",
+    "    def forward(self, input_ids):\n",
+    "        # input_ids.shape: [batch_size, sequence_length], int64 token ids\n",
+    "        position_ids = torch.arange(input_ids.shape[1], device=input_ids.device).unsqueeze(0)\n",
+    "\n",
+    "        token_embeddings = self.wte(input_ids)\n",
+    "        position_embeddings = self.wpe(position_ids)\n",
+    "        full_embeddings = token_embeddings + position_embeddings\n",
+    "\n",
+    "        transformer_output = self.h(full_embeddings)\n",
+    "        transformer_output_ln = self.ln_f(transformer_output)\n",
+    "\n",
+    "        # final layer: we predict logits by re-using token embeddings as linear weights\n",
+    "        output_logits = transformer_output_ln @ self.wte.weight.T\n",
+    "        return output_logits\n"
+   ],
+   "metadata": {
+    "id": "Mbqw9iuaSrYy"
+   },
+   "execution_count": null,
+   "outputs": []
+  },
+  {
+   "cell_type": "code",
+   "source": [
+    "tokenizer = transformers.AutoTokenizer.from_pretrained('gpt2', add_prefix_space=True)\n",
+    "model = GPT2(vocab_size=50257, dim=768, num_heads=12, num_layers=12)\n",
+    "model.load_state_dict(state_dict)\n",
+    "\n",
+    "input_ids = tokenizer(\"A quick\", return_tensors='pt')['input_ids']\n",
+    "\n",
+    "predicted_logits = model(input_ids)\n",
+    "most_likely_token_id = predicted_logits[:, -1].argmax().item()\n",
+    "\n",
+    "print(\"Prediction:\", tokenizer.decode(most_likely_token_id))"
+   ],
+   "metadata": {
+    "id": "p0m8jt66aDIh"
+   },
+   "execution_count": 7,
+   "outputs": []
+  },
+  {
+   "cell_type": "code",
+   "source": [
+    "text = \"The Fermi paradox \"\n",
+    "tokens = tokenizer.encode(text)\n",
+    "print(end=tokenizer.decode(tokens))\n",
+    "line_length = len(tokenizer.decode(tokens))\n",
+    "\n",
+    "for i in range(500):\n",
+    "    # Predict logits with your model\n",
+    "    with torch.no_grad():\n",
+    "        logits = model(torch.as_tensor([tokens]))\n",
+    "\n",
+    "    # Sample with probabilities\n",
+    "    p_next = torch.softmax(logits[0, -1, :], dim=-1).data.cpu().numpy()\n",
+    "    next_token_index = np.random.choice(len(p_next), p=p_next)\n",
+    "\n",
+    "    tokens.append(int(next_token_index))\n",
+    "    print(end=tokenizer.decode(tokens[-1]))\n",
+    "    line_length += len(tokenizer.decode(tokens[-1]))\n",
+    "    if line_length > 120:\n",
+    "      line_length = 0\n",
+    "      print()\n",
+    "\n"
+   ],
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
     },
+    "id": "R8ql3Lo7dXZ2",
+    "outputId": "8db86d13-d16b-4f97-db87-0385c0d91426"
+   },
+   "execution_count": null,
+   "outputs": [
     {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "ULMownz6sP9n"
-      },
-      "source": [
-        "### The building blocks of a pipeline\n",
-        "\n",
-        "Huggingface also allows you to access its pipelines on a lower level. There are two main abstractions for you:\n",
-        "* `Tokenizer` - converts from strings to token ids and back\n",
-        "* `Model` - a pytorch `nn.Module` with pre-trained weights\n",
-        "\n",
-        "You can use such models as part of your regular pytorch code: insert is as a layer in your model, apply it to a batch of data, backpropagate, optimize, etc."
-      ]
-    },
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      " The Fermi paradox  occurred when some comments about the way particle systems are integrated and decided to \"Shine\" by tearing\n",
+      " a mirror toward them from many faces to small ones. Kriegers reactions shows how much Kriegers can follow the dynamical rules\n",
+      " of flow. We then see a throwing of the matrix into the triangle, but I am not sure why this very generating results in depth\n",
+      " physics. I am certainly better interested in what is going on that the mirror riff is actually all about. Kriegers replied\n",
+      ": \"Third one takes up where the half dots of theCrystal meet Tron Braun:How have theFermi Eardrums made or left the fermi\n",
+      " magnetism, an observable capability of the mirror? This can be seen in all ways, e.g. everybody is talking about the fact\n",
+      " there is a Giant thima at the"
+     ]
+    }
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "V3NJ0ocgGqop"
+   },
+   "source": [
+    "__Reminder:__ after class, please go to `MaskedSelfAttention.forward` above and finish the job!\n",
+    "```\n",
+    "\n",
+    "```\n",
+    "\n",
+    "```\n",
+    "\n",
+    "```\n",
+    "\n",
+    "```\n",
+    "\n",
+    "```\n",
+    "\n",
+    "```\n",
+    "\n",
+    "```\n",
+    "\n",
+    "```\n",
+    "\n",
+    "```\n",
+    "\n",
+    "```\n",
+    "\n",
+    "```\n",
+    "\n",
+    "```\n",
+    "\n",
+    "```\n",
+    "\n",
+    "```\n",
+    "\n",
+    "```\n",
+    "\n",
+    "```\n",
+    "\n",
+    "```\n",
+    "\n",
+    "```\n",
+    "\n",
+    "```\n",
+    "\n",
+    "```\n",
+    "\n",
+    "```\n",
+    "\n",
+    "\n",
+    "### Here's how you can do the same with transformers library"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "NTOHu124Gqop",
+    "outputId": "5bb38785-a7d9-47e1-a887-c03634945c0b"
+   },
+   "outputs": [
     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "KMJbV0QVsO0Q"
-      },
-      "outputs": [],
-      "source": [
-        "tokenizer = transformers.AutoTokenizer.from_pretrained('bert-base-uncased')\n",
-        "model = transformers.AutoModel.from_pretrained('bert-base-uncased')\n"
-      ]
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.\n"
+     ]
     },
     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "ZgSPHKPRxG6U"
-      },
-      "outputs": [],
-      "source": [
-        "lines = [\n",
-        "    \"Luke, I am your father.\",\n",
-        "    \"Life is what happens when you're busy making other plans.\",\n",
-        "    ]\n",
-        "\n",
-        "# tokenize a batch of inputs. \"pt\" means [p]y[t]orch tensors\n",
-        "tokens_info = tokenizer(lines, padding=True, truncation=True, return_tensors=\"pt\")\n",
-        "\n",
-        "for key in tokens_info:\n",
-        "    print(key, tokens_info[key])\n",
-        "\n",
-        "print(\"Detokenized:\")\n",
-        "for i in range(2):\n",
-        "    print(tokenizer.decode(tokens_info['input_ids'][i]))"
-      ]
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generated continuation:  The Fermi paradox  (with its paradoxical consequences which, if any, may also be taken to be the paradox of the Big Bang. If an explosion can only happen after the collapse of the matter in one of three states  or after the collapse of a\n"
+     ]
+    }
+   ],
+   "source": [
+    "tokenizer = transformers.AutoTokenizer.from_pretrained('gpt2', add_prefix_space=True)\n",
+    "model = transformers.AutoModelForCausalLM.from_pretrained('gpt2')\n",
+    "print('Generated text:', tokenizer.decode(\n",
+    "    model.generate(\n",
+    "        **tokenizer(\"The Fermi paradox \", return_tensors='pt'),\n",
+    "        do_sample=True, max_new_tokens=50\n",
+    "    ).flatten().numpy()\n",
+    "))\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "accelerator": "GPU",
+  "colab": {
+   "provenance": []
+  },
+  "kernelspec": {
+   "display_name": "py38",
+   "language": "python",
+   "name": "py38"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.1"
+  },
+  "widgets": {
+   "application/vnd.jupyter.widget-state+json": {
+    "e85584c419aa445285eecb482778c7ba": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_name": "HBoxModel",
+     "model_module_version": "1.5.0",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HBoxModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HBoxView",
+      "box_style": "",
+      "children": [
+       "IPY_MODEL_5694548560b548dabed4b111766da60e",
+       "IPY_MODEL_7fceb5ec7dec415a9dfcd4261aeb4a3d",
+       "IPY_MODEL_1fd2361180bd49aa8e2012a5e379d0c1"
+      ],
+      "layout": "IPY_MODEL_77441a81d25c40819ae5af99ad70f3c8"
+     }
     },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "MJkbHxERyfL4"
-      },
-      "outputs": [],
-      "source": [
-        "# You can now apply the model to get embeddings\n",
-        "with torch.no_grad():\n",
-        "    out = model(**tokens_info)\n",
-        "\n",
-        "print(out['pooler_output'])"
-      ]
+    "5694548560b548dabed4b111766da60e": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_name": "HTMLModel",
+     "model_module_version": "1.5.0",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_48c3bc239f16407bbba2d36ac6e5aaae",
+      "placeholder": "​",
+      "style": "IPY_MODEL_5c87b856ffeb4c1dbf2d4637065d26ef",
+      "value": "Downloading (…)lve/main/config.json: 100%"
+     }
     },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "_Vij7Gc1wOaq"
-      },
-      "source": [
-        "Transformers knowledge hub: https://huggingface.co/transformers/"
-      ]
+    "7fceb5ec7dec415a9dfcd4261aeb4a3d": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_name": "FloatProgressModel",
+     "model_module_version": "1.5.0",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "FloatProgressModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "ProgressView",
+      "bar_style": "success",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_fbbe93e1d4a5493782b7a246f3cb1760",
+      "max": 629,
+      "min": 0,
+      "orientation": "horizontal",
+      "style": "IPY_MODEL_c3e31f6d90a84af08ee33023be184849",
+      "value": 629
+     }
     },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "bwmTTyjUGqol"
-      },
-      "source": [
-        "### Build-a-transformer (2 points)\n",
-        "\n",
-        "In this section, you will implement a transformer language model layer by layer, then use it to generate (hopefully) coherent text.\n",
-        "\n",
-        "To understand how these layers work, please check out our guide to transformers from [nlp course for you -> transformers](https://lena-voita.github.io/nlp_course/seq2seq_and_attention.html#transformer_intro).\n",
-        "\n",
-        "\n",
-        "First, we download pre-trained weights for the [GPT2 model by OpenAI](https://openai.com/research/better-language-models) - a prominent model from 2019.\n",
-        "\n",
-        "\n",
-        "\n",
-        "Idea & code by: Ilya Beletsky"
-      ]
+    "1fd2361180bd49aa8e2012a5e379d0c1": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_name": "HTMLModel",
+     "model_module_version": "1.5.0",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_6099e17176dd4eef829ac7c45badc9e2",
+      "placeholder": "​",
+      "style": "IPY_MODEL_7ea9a125c76543419007fd5083ed63cd",
+      "value": " 629/629 [00:00&lt;00:00, 12.4kB/s]"
+     }
     },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "vOcK0lGTGqol",
-        "outputId": "131fbc38-d4af-4e3b-b87b-f4c1b15d3162"
-      },
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Weights: ['h.0.attn.c_attn.bias', 'h.0.attn.c_attn.weight', 'h.0.attn.c_proj.bias', 'h.0.attn.c_proj.weight', 'h.0.ln_1.bias', 'h.0.ln_1.weight', 'h.0.ln_2.bias', 'h.0.ln_2.weight', 'h.0.mlp.c_fc.bias', 'h.0.mlp.c_fc.weight', 'h.0.mlp.c_proj.bias', 'h.0.mlp.c_proj.weight', 'h.1.attn.c_attn.bias', 'h.1.attn.c_attn.weight', 'h.1. ...\n"
-          ]
-        }
-      ],
-      "source": [
-        "from huggingface_hub import hf_hub_download\n",
-        "state_dict = torch.load(hf_hub_download(\"gpt2\", filename=\"pytorch_model.bin\"))\n",
-        "for key, value in tuple(state_dict.items()):\n",
-        "    if key.startswith('h.') and key.endswith('.weight') and value.ndim == 2:\n",
-        "        value.transpose_(1, 0)  # <-- for compatibility with modern PyTorch modules\n",
-        "    if key.startswith('h.') and key.endswith('.attn.bias') and value.ndim == 4:\n",
-        "        state_dict.pop(key)  # <-- triangular binar masks, not needed in this code\n",
-        "\n",
-        "print('Weights:', repr(sorted(state_dict.keys()))[:320], '...')"
-      ]
+    "77441a81d25c40819ae5af99ad70f3c8": {
+     "model_module": "@jupyter-widgets/base",
+     "model_name": "LayoutModel",
+     "model_module_version": "1.2.0",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
     },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "mr0SUtQnGqom"
-      },
-      "source": [
-        "In the next few cells, we shall implement the model layer by layer to make use of those weights.\n",
-        "\n",
-        "As you might recall, transformers contain two main layer types: attention and fully-connected layers.\n",
-        "\n",
-        "The fully connected layers are by far easier to understand, so we shall begin there:\n",
-        "\n",
-        "Please implement fully-connected layer __without residual or layer normalization__ (we'll add those in a bit)."
-      ]
+    "48c3bc239f16407bbba2d36ac6e5aaae": {
+     "model_module": "@jupyter-widgets/base",
+     "model_name": "LayoutModel",
+     "model_module_version": "1.2.0",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
     },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "3Rh-6DX9Gqom"
-      },
-      "outputs": [],
-      "source": [
-        "class GeLUThatWasUsedInGPT2(nn.Module):\n",
-        "    def forward(self, x):\n",
-        "        return 0.5 * x * (1.0 + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * x ** 3)))\n",
-        "\n",
-        "class FullyConnected(nn.Module):\n",
-        "    def __init__(self, dim: int):\n",
-        "        super().__init__()\n",
-        "        self.c_fc = nn.Linear(dim, 4  * dim)\n",
-        "        self.gelu = GeLUThatWasUsedInGPT2()\n",
-        "        self.c_proj = nn.Linear(4 * dim, dim)\n",
-        "\n",
-        "    def forward(self, x):\n",
-        "        # x.shape = [batch_size, seq_length, dim]\n",
-        "        <YOUR CODE HERE - COMPUTE LAYER OUTPUTS>\n",
-        "        return <MLP OUTPUTS>\n"
-      ]
+    "5c87b856ffeb4c1dbf2d4637065d26ef": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_name": "DescriptionStyleModel",
+     "model_module_version": "1.5.0",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
     },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "iSVGKnHBGqom"
-      },
-      "source": [
-        "Now, let's test that it works with GPT-2 weights:"
-      ]
+    "fbbe93e1d4a5493782b7a246f3cb1760": {
+     "model_module": "@jupyter-widgets/base",
+     "model_name": "LayoutModel",
+     "model_module_version": "1.2.0",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
     },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "CoWjZwZkGqom"
-      },
-      "outputs": [],
-      "source": [
-        "mlp = FullyConnected(dim=768)\n",
-        "mlp.load_state_dict({'c_fc.weight': state_dict['h.0.mlp.c_fc.weight'],\n",
-        "                     'c_fc.bias': state_dict['h.0.mlp.c_fc.bias'],\n",
-        "                     'c_proj.weight': state_dict['h.0.mlp.c_proj.weight'],\n",
-        "                     'c_proj.bias': state_dict['h.0.mlp.c_proj.bias']})\n",
-        "\n",
-        "torch.manual_seed(1337)\n",
-        "x = torch.randn(1, 2, 768)  # [batch_size, sequence_length, dim]\n",
-        "checksum = torch.sum(mlp(x) * x)\n",
-        "assert abs(checksum.item() - 1282.3315) < 0.1, \"layer outputs do not match reference\"\n",
-        "assert torch.allclose(mlp(x[:, (1, 0), :])[:, (1, 0), :], mlp(x)), \"mlp must be permutation-invariant\"\n",
-        "print(\"Seems legit!\")"
-      ]
+    "c3e31f6d90a84af08ee33023be184849": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_name": "ProgressStyleModel",
+     "model_module_version": "1.5.0",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "ProgressStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "bar_color": null,
+      "description_width": ""
+     }
     },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "VbfCevRwGqom"
-      },
-      "source": [
-        "Now, let's get to attention layers.\n",
-        "\n",
-        "Since GPT-2 needs to generate text from left to right, each generated token can only attend to tokens on the left (and itself). This kid of attention is called \"Masked\" self-attention, because it hides tokens to the right.\n",
-        "\n",
-        "As before, please implement masked self-attention __without layernorm or residual connections.__"
-      ]
+    "6099e17176dd4eef829ac7c45badc9e2": {
+     "model_module": "@jupyter-widgets/base",
+     "model_name": "LayoutModel",
+     "model_module_version": "1.2.0",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
     },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "T6j7M4hLGqon"
-      },
-      "outputs": [],
-      "source": [
-        "class MaskedSelfAttention(nn.Module):\n",
-        "    def __init__(self, dim: int, num_heads: int):\n",
-        "        super().__init__()\n",
-        "        self.c_attn = nn.Linear(dim, dim * 3)  # query + key + value, combined\n",
-        "        self.c_proj = nn.Linear(dim, dim)  # output projection\n",
-        "        self.dim, self.num_heads = dim, num_heads\n",
-        "        self.head_size = dim // num_heads\n",
-        "\n",
-        "    def forward(self, x):\n",
-        "        q, k, v = self.c_attn(x).split(dim=-1, split_size=self.dim)\n",
-        "        assert q.shape == k.shape == v.shape == x.shape, \"q, k and v must have the same shape as x\"\n",
-        "\n",
-        "\n",
-        "        # Note: this is an inefficient implementation that uses a for-loop.\n",
-        "        # To get the full grade during homework, please re-implement this code:\n",
-        "        # 1) do not use for-loops (or other loops). Compute everything in parallel with vectorized operations\n",
-        "        # 2) do not use F.scaled_dot_product_attention - write your own attention code using basic PyTorch ops\n",
-        "        head_outputs = []\n",
-        "        for head_index in range(self.num_heads):\n",
-        "            head_selector = range(self.head_size * head_index, self.head_size * (head_index + 1))\n",
-        "\n",
-        "            head_queries = q[..., head_selector]\n",
-        "            head_keys = k[..., head_selector]\n",
-        "            head_values = v[..., head_selector]\n",
-        "\n",
-        "            single_head_output = F.scaled_dot_product_attention(\n",
-        "                <YOUR CODE HERE - fill in the missing parameters; see docs below>\n",
-        "                is_causal=True)\n",
-        "            # docs: https://pytorch.org/docs/stable/generated/torch.nn.functional.scaled_dot_product_attention.html\n",
-        "            head_outputs.append(single_head_output)\n",
-        "\n",
-        "        combined_head_outputs = torch.cat(head_outputs, dim=-1)\n",
-        "        return self.c_proj(combined_head_outputs)\n"
-      ]
+    "7ea9a125c76543419007fd5083ed63cd": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_name": "DescriptionStyleModel",
+     "model_module_version": "1.5.0",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
     },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "Test that it works"
+    "b2786e8af69e490cba5048fca7a0a51e": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_name": "HBoxModel",
+     "model_module_version": "1.5.0",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HBoxModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HBoxView",
+      "box_style": "",
+      "children": [
+       "IPY_MODEL_f07e54fd856f4efc99bedb44abcece1b",
+       "IPY_MODEL_e0d2ca01fd7042a88a5493323a61409c",
+       "IPY_MODEL_e07d888d412d43f8ac4cc5285de07772"
       ],
-      "metadata": {
-        "id": "umZpcpIkJva7"
-      }
+      "layout": "IPY_MODEL_15227ca6cc5a4d219e0113da09926da4"
+     }
     },
-    {
-      "cell_type": "code",
-      "source": [
-        "attn = MaskedSelfAttention(dim=768, num_heads=12)\n",
-        "attn.load_state_dict({'c_attn.weight': state_dict['h.0.attn.c_attn.weight'],\n",
-        "                      'c_attn.bias': state_dict['h.0.attn.c_attn.bias'],\n",
-        "                      'c_proj.weight': state_dict['h.0.attn.c_proj.weight'],\n",
-        "                      'c_proj.bias': state_dict['h.0.attn.c_proj.bias']})\n",
-        "\n",
-        "torch.manual_seed(1337)\n",
-        "x = torch.randn(1, 10, 768)  # [batch_size, sequence_length, dim]\n",
-        "checksum = torch.sum(attn(x) * x)\n",
-        "assert abs(checksum.item() - 2703.6772) < 0.1, \"layer outputs do not match reference\"\n",
-        "assert not torch.allclose(attn(x[:, (1, 0), :])[:, (1, 0), :], attn(x[:, (0, 1), :])), \"masked attention must *not* be permutation-invariant\"\n",
-        "print(\"It works!\")"
-      ],
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "tg5Oj_PPM6hj",
-        "outputId": "ebeddb50-d805-47ae-cc3a-4d68d900b3a3"
-      },
-      "execution_count": null,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "It works!\n"
-          ]
-        }
-      ]
+    "f07e54fd856f4efc99bedb44abcece1b": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_name": "HTMLModel",
+     "model_module_version": "1.5.0",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_cf83aaa31a1a47419db2127456beae69",
+      "placeholder": "​",
+      "style": "IPY_MODEL_1e463fce356b46859493cbd975163ef1",
+      "value": "Downloading model.safetensors: 100%"
+     }
     },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "We can now combine attention and MLP to build the full transformer layer:\n",
-        "\n",
-        "![img](https://i.imgur.com/1sq2vHO.png)"
-      ],
-      "metadata": {
-        "id": "rn6tgTHzOK4l"
-      }
+    "e0d2ca01fd7042a88a5493323a61409c": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_name": "FloatProgressModel",
+     "model_module_version": "1.5.0",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "FloatProgressModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "ProgressView",
+      "bar_style": "success",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_4ecb5a9efc3c42dd8faf4af9424ad5cf",
+      "max": 267832558,
+      "min": 0,
+      "orientation": "horizontal",
+      "style": "IPY_MODEL_3af4702b95484b8cabd891b18ffc71b1",
+      "value": 267832558
+     }
     },
-    {
-      "cell_type": "code",
-      "source": [
-        "class TransformerLayer(nn.Module):\n",
-        "    def __init__(self, dim: int, num_heads: int):\n",
-        "        super().__init__()\n",
-        "        self.ln_1 = nn.LayerNorm(dim)\n",
-        "        self.attn = MaskedSelfAttention(dim, num_heads)\n",
-        "        self.ln_2 = nn.LayerNorm(dim)\n",
-        "        self.mlp = FullyConnected(dim)\n",
-        "\n",
-        "    def forward(self, x):\n",
-        "        <YOUR CODE - apply attention, mlp and layer normalization as shown in figure above>\n",
-        "        return <...>"
-      ],
-      "metadata": {
-        "id": "p3AH7YQvRpvU"
-      },
-      "execution_count": null,
-      "outputs": []
+    "e07d888d412d43f8ac4cc5285de07772": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_name": "HTMLModel",
+     "model_module_version": "1.5.0",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_e9210c257b8a4091b86123271c8a5486",
+      "placeholder": "​",
+      "style": "IPY_MODEL_7eb0bf256b8d4975b0615aa96f00bbe1",
+      "value": " 268M/268M [00:01&lt;00:00, 172MB/s]"
+     }
     },
-    {
-      "cell_type": "code",
-      "source": [
-        "layer = TransformerLayer(dim=768, num_heads=12)\n",
-        "layer.load_state_dict({k[5:]: v for k, v in state_dict.items() if k.startswith('h.10.')})\n",
-        "assert abs(torch.sum(layer(x) * x).item() - 9874.7383) < 0.1\n",
-        "print(\"Good job!\")"
-      ],
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "Qzo_QeFVSNZa",
-        "outputId": "15613968-b4d7-4391-dfff-3b490951a125"
-      },
-      "execution_count": null,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Good job!\n"
-          ]
-        }
-      ]
+    "15227ca6cc5a4d219e0113da09926da4": {
+     "model_module": "@jupyter-widgets/base",
+     "model_name": "LayoutModel",
+     "model_module_version": "1.2.0",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
     },
-    {
-      "cell_type": "code",
-      "source": [
-        "class GPT2(nn.Module):\n",
-        "    def __init__(self, vocab_size: int, dim: int, num_heads: int, num_layers: int, max_position_embeddings: int = 1024):\n",
-        "        super().__init__()\n",
-        "        self.wte = nn.Embedding(vocab_size, dim)  # token embeddings\n",
-        "        self.wpe = nn.Embedding(max_position_embeddings, dim)  # position embeddings\n",
-        "        self.ln_f = nn.LayerNorm(dim)   # final layer norm - goes after all transformer layers, but before logits\n",
-        "\n",
-        "        self.h = nn.Sequential(*(TransformerLayer(dim, num_heads) for layer in range(num_layers)))\n",
-        "\n",
-        "    def forward(self, input_ids):\n",
-        "        # input_ids.shape: [batch_size, sequence_length], int64 token ids\n",
-        "        position_ids = torch.arange(input_ids.shape[1], device=input_ids.device).unsqueeze(0)\n",
-        "\n",
-        "        token_embeddings = self.wte(input_ids)\n",
-        "        position_embeddings = self.wpe(position_ids)\n",
-        "        full_embeddings = token_embeddings + position_embeddings\n",
-        "\n",
-        "        transformer_output = self.h(full_embeddings)\n",
-        "        transformer_output_ln = self.ln_f(transformer_output)\n",
-        "\n",
-        "        # final layer: we predict logits by re-using token embeddings as linear weights\n",
-        "        output_logits = transformer_output_ln @ self.wte.weight.T\n",
-        "        return output_logits\n"
-      ],
-      "metadata": {
-        "id": "Mbqw9iuaSrYy"
-      },
-      "execution_count": null,
-      "outputs": []
+    "cf83aaa31a1a47419db2127456beae69": {
+     "model_module": "@jupyter-widgets/base",
+     "model_name": "LayoutModel",
+     "model_module_version": "1.2.0",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
     },
-    {
-      "cell_type": "code",
-      "source": [
-        "tokenizer = transformers.AutoTokenizer.from_pretrained('gpt2', add_prefix_space=True)\n",
-        "model = GPT2(vocab_size=50257, dim=768, num_heads=12, num_layers=12)\n",
-        "model.load_state_dict(state_dict)\n",
-        "\n",
-        "input_ids = tokenizer(\"A quick\", return_tensors='pt')['input_ids']\n",
-        "\n",
-        "predicted_logits = model(input_ids)\n",
-        "most_likely_token_id = predicted_logits[:, -1].argmax().item()\n",
-        "\n",
-        "print(\"Prediction:\", tokenizer.decode(most_likely_token_id))"
-      ],
-      "metadata": {
-        "id": "p0m8jt66aDIh"
-      },
-      "execution_count": 7,
-      "outputs": []
+    "1e463fce356b46859493cbd975163ef1": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_name": "DescriptionStyleModel",
+     "model_module_version": "1.5.0",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
     },
-    {
-      "cell_type": "code",
-      "source": [
-        "text = \"The Fermi paradox \"\n",
-        "tokens = tokenizer.encode(text)\n",
-        "print(end=tokenizer.decode(tokens))\n",
-        "line_length = len(tokenizer.decode(tokens))\n",
-        "\n",
-        "for i in range(500):\n",
-        "    # Predict logits with your model\n",
-        "    with torch.no_grad():\n",
-        "        logits = model(torch.as_tensor([tokens]))\n",
-        "\n",
-        "    # Sample with probabilities\n",
-        "    p_next = torch.softmax(logits[0, -1, :], dim=-1).data.cpu().numpy()\n",
-        "    next_token_index = np.random.choice(len(p_next), p=p_next)\n",
-        "\n",
-        "    tokens.append(int(next_token_index))\n",
-        "    print(end=tokenizer.decode(tokens[-1]))\n",
-        "    line_length += len(tokenizer.decode(tokens[-1]))\n",
-        "    if line_length > 120:\n",
-        "      line_length = 0\n",
-        "      print()\n",
-        "\n"
+    "4ecb5a9efc3c42dd8faf4af9424ad5cf": {
+     "model_module": "@jupyter-widgets/base",
+     "model_name": "LayoutModel",
+     "model_module_version": "1.2.0",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "3af4702b95484b8cabd891b18ffc71b1": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_name": "ProgressStyleModel",
+     "model_module_version": "1.5.0",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "ProgressStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "bar_color": null,
+      "description_width": ""
+     }
+    },
+    "e9210c257b8a4091b86123271c8a5486": {
+     "model_module": "@jupyter-widgets/base",
+     "model_name": "LayoutModel",
+     "model_module_version": "1.2.0",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "7eb0bf256b8d4975b0615aa96f00bbe1": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_name": "DescriptionStyleModel",
+     "model_module_version": "1.5.0",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "fc132bfb7d6447ad9900af7b80806aa1": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_name": "HBoxModel",
+     "model_module_version": "1.5.0",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HBoxModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HBoxView",
+      "box_style": "",
+      "children": [
+       "IPY_MODEL_ab6d124394214415a2580983c88bbb11",
+       "IPY_MODEL_8578b4d2279f40dc846fcdfc2df55742",
+       "IPY_MODEL_7213248ebe1f4e28a00a93eb33169170"
       ],
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "R8ql3Lo7dXZ2",
-        "outputId": "8db86d13-d16b-4f97-db87-0385c0d91426"
-      },
-      "execution_count": null,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            " The Fermi paradox  occurred when some comments about the way particle systems are integrated and decided to \"Shine\" by tearing\n",
-            " a mirror toward them from many faces to small ones. Kriegers reactions shows how much Kriegers can follow the dynamical rules\n",
-            " of flow. We then see a throwing of the matrix into the triangle, but I am not sure why this very generating results in depth\n",
-            " physics. I am certainly better interested in what is going on that the mirror riff is actually all about. Kriegers replied\n",
-            ": \"Third one takes up where the half dots of theCrystal meet Tron Braun:How have theFermi Eardrums made or left the fermi\n",
-            " magnetism, an observable capability of the mirror? This can be seen in all ways, e.g. everybody is talking about the fact\n",
-            " there is a Giant thima at the"
-          ]
-        }
-      ]
+      "layout": "IPY_MODEL_b1497c0d7c3b4c04a91302a2cbc96db1"
+     }
     },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "V3NJ0ocgGqop"
-      },
-      "source": [
-        "__Reminder:__ after class, please go to `MaskedSelfAttention.forward` above and finish the job!\n",
-        "```\n",
-        "\n",
-        "```\n",
-        "\n",
-        "```\n",
-        "\n",
-        "```\n",
-        "\n",
-        "```\n",
-        "\n",
-        "```\n",
-        "\n",
-        "```\n",
-        "\n",
-        "```\n",
-        "\n",
-        "```\n",
-        "\n",
-        "```\n",
-        "\n",
-        "```\n",
-        "\n",
-        "```\n",
-        "\n",
-        "```\n",
-        "\n",
-        "```\n",
-        "\n",
-        "```\n",
-        "\n",
-        "```\n",
-        "\n",
-        "```\n",
-        "\n",
-        "```\n",
-        "\n",
-        "```\n",
-        "\n",
-        "```\n",
-        "\n",
-        "```\n",
-        "\n",
-        "```\n",
-        "\n",
-        "\n",
-        "### Here's how you can do the same with transformers library"
-      ]
+    "ab6d124394214415a2580983c88bbb11": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_name": "HTMLModel",
+     "model_module_version": "1.5.0",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_60e971374e5448d08e9698a5594ff60a",
+      "placeholder": "​",
+      "style": "IPY_MODEL_6c8113bb76024debaf3256431cbefe35",
+      "value": "Downloading (…)okenizer_config.json: 100%"
+     }
     },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "NTOHu124Gqop",
-        "outputId": "5bb38785-a7d9-47e1-a887-c03634945c0b"
-      },
-      "outputs": [
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Generated continuation:  The Fermi paradox  (with its paradoxical consequences which, if any, may also be taken to be the paradox of the Big Bang. If an explosion can only happen after the collapse of the matter in one of three states  or after the collapse of a\n"
-          ]
-        }
+    "8578b4d2279f40dc846fcdfc2df55742": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_name": "FloatProgressModel",
+     "model_module_version": "1.5.0",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "FloatProgressModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "ProgressView",
+      "bar_style": "success",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_73d8ad019f044a6998e81273a922745a",
+      "max": 48,
+      "min": 0,
+      "orientation": "horizontal",
+      "style": "IPY_MODEL_a445a35355424cb0b4c7abf3d4e6f32d",
+      "value": 48
+     }
+    },
+    "7213248ebe1f4e28a00a93eb33169170": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_name": "HTMLModel",
+     "model_module_version": "1.5.0",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_d71a81e25f5b4f88b8361c62e2ab9f44",
+      "placeholder": "​",
+      "style": "IPY_MODEL_4f084bbe71bb433ba0e7ee50fccb71cc",
+      "value": " 48.0/48.0 [00:00&lt;00:00, 1.58kB/s]"
+     }
+    },
+    "b1497c0d7c3b4c04a91302a2cbc96db1": {
+     "model_module": "@jupyter-widgets/base",
+     "model_name": "LayoutModel",
+     "model_module_version": "1.2.0",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "60e971374e5448d08e9698a5594ff60a": {
+     "model_module": "@jupyter-widgets/base",
+     "model_name": "LayoutModel",
+     "model_module_version": "1.2.0",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "6c8113bb76024debaf3256431cbefe35": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_name": "DescriptionStyleModel",
+     "model_module_version": "1.5.0",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "73d8ad019f044a6998e81273a922745a": {
+     "model_module": "@jupyter-widgets/base",
+     "model_name": "LayoutModel",
+     "model_module_version": "1.2.0",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "a445a35355424cb0b4c7abf3d4e6f32d": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_name": "ProgressStyleModel",
+     "model_module_version": "1.5.0",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "ProgressStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "bar_color": null,
+      "description_width": ""
+     }
+    },
+    "d71a81e25f5b4f88b8361c62e2ab9f44": {
+     "model_module": "@jupyter-widgets/base",
+     "model_name": "LayoutModel",
+     "model_module_version": "1.2.0",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "4f084bbe71bb433ba0e7ee50fccb71cc": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_name": "DescriptionStyleModel",
+     "model_module_version": "1.5.0",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "5cff2bc12fec494e9e19b8a956186df6": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_name": "HBoxModel",
+     "model_module_version": "1.5.0",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HBoxModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HBoxView",
+      "box_style": "",
+      "children": [
+       "IPY_MODEL_787781c894bd46c6990f2513d9f2c79c",
+       "IPY_MODEL_3f4eae6f79054676976f87b9524c4cf5",
+       "IPY_MODEL_237d34743ee64428a9d22ee51e0423f3"
       ],
-      "source": [
-        "tokenizer = transformers.AutoTokenizer.from_pretrained('gpt2', add_prefix_space=True)\n",
-        "model = transformers.AutoModelForCausalLM.from_pretrained('gpt2')\n",
-        "print('Generated text:', tokenizer.decode(\n",
-        "    model.generate(\n",
-        "        **tokenizer(\"The Fermi paradox \", return_tensors='pt'),\n",
-        "        do_sample=True, max_new_tokens=50\n",
-        "    ).flatten().numpy()\n",
-        "))\n"
-      ]
-    }
-  ],
-  "metadata": {
-    "accelerator": "GPU",
-    "colab": {
-      "provenance": []
+      "layout": "IPY_MODEL_b095428dcb8a4408854f854de54d5692"
+     }
     },
-    "kernelspec": {
-      "display_name": "py38",
-      "language": "python",
-      "name": "py38"
+    "787781c894bd46c6990f2513d9f2c79c": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_name": "HTMLModel",
+     "model_module_version": "1.5.0",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_1b76512711884ffb81d3c3505b8fd137",
+      "placeholder": "​",
+      "style": "IPY_MODEL_2f53a3d1d0f44192beeb6ed8d40e762f",
+      "value": "Downloading (…)solve/main/vocab.txt: 100%"
+     }
     },
-    "language_info": {
-      "codemirror_mode": {
-        "name": "ipython",
-        "version": 3
-      },
-      "file_extension": ".py",
-      "mimetype": "text/x-python",
-      "name": "python",
-      "nbconvert_exporter": "python",
-      "pygments_lexer": "ipython3",
-      "version": "3.8.1"
+    "3f4eae6f79054676976f87b9524c4cf5": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_name": "FloatProgressModel",
+     "model_module_version": "1.5.0",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "FloatProgressModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "ProgressView",
+      "bar_style": "success",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_1b194af3033342b0b04dc12b3e055ff2",
+      "max": 231508,
+      "min": 0,
+      "orientation": "horizontal",
+      "style": "IPY_MODEL_391b8f44109544328617fbc3a3c3b9fc",
+      "value": 231508
+     }
     },
-    "widgets": {
-      "application/vnd.jupyter.widget-state+json": {
-        "e85584c419aa445285eecb482778c7ba": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "HBoxModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HBoxModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HBoxView",
-            "box_style": "",
-            "children": [
-              "IPY_MODEL_5694548560b548dabed4b111766da60e",
-              "IPY_MODEL_7fceb5ec7dec415a9dfcd4261aeb4a3d",
-              "IPY_MODEL_1fd2361180bd49aa8e2012a5e379d0c1"
-            ],
-            "layout": "IPY_MODEL_77441a81d25c40819ae5af99ad70f3c8"
-          }
-        },
-        "5694548560b548dabed4b111766da60e": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "HTMLModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_48c3bc239f16407bbba2d36ac6e5aaae",
-            "placeholder": "​",
-            "style": "IPY_MODEL_5c87b856ffeb4c1dbf2d4637065d26ef",
-            "value": "Downloading (…)lve/main/config.json: 100%"
-          }
-        },
-        "7fceb5ec7dec415a9dfcd4261aeb4a3d": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "FloatProgressModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "FloatProgressModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "ProgressView",
-            "bar_style": "success",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_fbbe93e1d4a5493782b7a246f3cb1760",
-            "max": 629,
-            "min": 0,
-            "orientation": "horizontal",
-            "style": "IPY_MODEL_c3e31f6d90a84af08ee33023be184849",
-            "value": 629
-          }
-        },
-        "1fd2361180bd49aa8e2012a5e379d0c1": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "HTMLModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_6099e17176dd4eef829ac7c45badc9e2",
-            "placeholder": "​",
-            "style": "IPY_MODEL_7ea9a125c76543419007fd5083ed63cd",
-            "value": " 629/629 [00:00&lt;00:00, 12.4kB/s]"
-          }
-        },
-        "77441a81d25c40819ae5af99ad70f3c8": {
-          "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
-          "model_module_version": "1.2.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "48c3bc239f16407bbba2d36ac6e5aaae": {
-          "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
-          "model_module_version": "1.2.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "5c87b856ffeb4c1dbf2d4637065d26ef": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "DescriptionStyleModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "fbbe93e1d4a5493782b7a246f3cb1760": {
-          "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
-          "model_module_version": "1.2.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "c3e31f6d90a84af08ee33023be184849": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "ProgressStyleModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "ProgressStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "bar_color": null,
-            "description_width": ""
-          }
-        },
-        "6099e17176dd4eef829ac7c45badc9e2": {
-          "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
-          "model_module_version": "1.2.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "7ea9a125c76543419007fd5083ed63cd": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "DescriptionStyleModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "b2786e8af69e490cba5048fca7a0a51e": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "HBoxModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HBoxModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HBoxView",
-            "box_style": "",
-            "children": [
-              "IPY_MODEL_f07e54fd856f4efc99bedb44abcece1b",
-              "IPY_MODEL_e0d2ca01fd7042a88a5493323a61409c",
-              "IPY_MODEL_e07d888d412d43f8ac4cc5285de07772"
-            ],
-            "layout": "IPY_MODEL_15227ca6cc5a4d219e0113da09926da4"
-          }
-        },
-        "f07e54fd856f4efc99bedb44abcece1b": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "HTMLModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_cf83aaa31a1a47419db2127456beae69",
-            "placeholder": "​",
-            "style": "IPY_MODEL_1e463fce356b46859493cbd975163ef1",
-            "value": "Downloading model.safetensors: 100%"
-          }
-        },
-        "e0d2ca01fd7042a88a5493323a61409c": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "FloatProgressModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "FloatProgressModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "ProgressView",
-            "bar_style": "success",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_4ecb5a9efc3c42dd8faf4af9424ad5cf",
-            "max": 267832558,
-            "min": 0,
-            "orientation": "horizontal",
-            "style": "IPY_MODEL_3af4702b95484b8cabd891b18ffc71b1",
-            "value": 267832558
-          }
-        },
-        "e07d888d412d43f8ac4cc5285de07772": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "HTMLModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_e9210c257b8a4091b86123271c8a5486",
-            "placeholder": "​",
-            "style": "IPY_MODEL_7eb0bf256b8d4975b0615aa96f00bbe1",
-            "value": " 268M/268M [00:01&lt;00:00, 172MB/s]"
-          }
-        },
-        "15227ca6cc5a4d219e0113da09926da4": {
-          "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
-          "model_module_version": "1.2.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "cf83aaa31a1a47419db2127456beae69": {
-          "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
-          "model_module_version": "1.2.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "1e463fce356b46859493cbd975163ef1": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "DescriptionStyleModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "4ecb5a9efc3c42dd8faf4af9424ad5cf": {
-          "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
-          "model_module_version": "1.2.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "3af4702b95484b8cabd891b18ffc71b1": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "ProgressStyleModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "ProgressStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "bar_color": null,
-            "description_width": ""
-          }
-        },
-        "e9210c257b8a4091b86123271c8a5486": {
-          "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
-          "model_module_version": "1.2.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "7eb0bf256b8d4975b0615aa96f00bbe1": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "DescriptionStyleModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "fc132bfb7d6447ad9900af7b80806aa1": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "HBoxModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HBoxModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HBoxView",
-            "box_style": "",
-            "children": [
-              "IPY_MODEL_ab6d124394214415a2580983c88bbb11",
-              "IPY_MODEL_8578b4d2279f40dc846fcdfc2df55742",
-              "IPY_MODEL_7213248ebe1f4e28a00a93eb33169170"
-            ],
-            "layout": "IPY_MODEL_b1497c0d7c3b4c04a91302a2cbc96db1"
-          }
-        },
-        "ab6d124394214415a2580983c88bbb11": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "HTMLModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_60e971374e5448d08e9698a5594ff60a",
-            "placeholder": "​",
-            "style": "IPY_MODEL_6c8113bb76024debaf3256431cbefe35",
-            "value": "Downloading (…)okenizer_config.json: 100%"
-          }
-        },
-        "8578b4d2279f40dc846fcdfc2df55742": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "FloatProgressModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "FloatProgressModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "ProgressView",
-            "bar_style": "success",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_73d8ad019f044a6998e81273a922745a",
-            "max": 48,
-            "min": 0,
-            "orientation": "horizontal",
-            "style": "IPY_MODEL_a445a35355424cb0b4c7abf3d4e6f32d",
-            "value": 48
-          }
-        },
-        "7213248ebe1f4e28a00a93eb33169170": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "HTMLModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_d71a81e25f5b4f88b8361c62e2ab9f44",
-            "placeholder": "​",
-            "style": "IPY_MODEL_4f084bbe71bb433ba0e7ee50fccb71cc",
-            "value": " 48.0/48.0 [00:00&lt;00:00, 1.58kB/s]"
-          }
-        },
-        "b1497c0d7c3b4c04a91302a2cbc96db1": {
-          "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
-          "model_module_version": "1.2.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "60e971374e5448d08e9698a5594ff60a": {
-          "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
-          "model_module_version": "1.2.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "6c8113bb76024debaf3256431cbefe35": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "DescriptionStyleModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "73d8ad019f044a6998e81273a922745a": {
-          "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
-          "model_module_version": "1.2.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "a445a35355424cb0b4c7abf3d4e6f32d": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "ProgressStyleModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "ProgressStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "bar_color": null,
-            "description_width": ""
-          }
-        },
-        "d71a81e25f5b4f88b8361c62e2ab9f44": {
-          "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
-          "model_module_version": "1.2.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "4f084bbe71bb433ba0e7ee50fccb71cc": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "DescriptionStyleModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "5cff2bc12fec494e9e19b8a956186df6": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "HBoxModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HBoxModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HBoxView",
-            "box_style": "",
-            "children": [
-              "IPY_MODEL_787781c894bd46c6990f2513d9f2c79c",
-              "IPY_MODEL_3f4eae6f79054676976f87b9524c4cf5",
-              "IPY_MODEL_237d34743ee64428a9d22ee51e0423f3"
-            ],
-            "layout": "IPY_MODEL_b095428dcb8a4408854f854de54d5692"
-          }
-        },
-        "787781c894bd46c6990f2513d9f2c79c": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "HTMLModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_1b76512711884ffb81d3c3505b8fd137",
-            "placeholder": "​",
-            "style": "IPY_MODEL_2f53a3d1d0f44192beeb6ed8d40e762f",
-            "value": "Downloading (…)solve/main/vocab.txt: 100%"
-          }
-        },
-        "3f4eae6f79054676976f87b9524c4cf5": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "FloatProgressModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "FloatProgressModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "ProgressView",
-            "bar_style": "success",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_1b194af3033342b0b04dc12b3e055ff2",
-            "max": 231508,
-            "min": 0,
-            "orientation": "horizontal",
-            "style": "IPY_MODEL_391b8f44109544328617fbc3a3c3b9fc",
-            "value": 231508
-          }
-        },
-        "237d34743ee64428a9d22ee51e0423f3": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "HTMLModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_6f36db95f6fb4b4687c7130fc23c4348",
-            "placeholder": "​",
-            "style": "IPY_MODEL_82a35a7ae5e441a1b66534a3eab5e765",
-            "value": " 232k/232k [00:00&lt;00:00, 3.06MB/s]"
-          }
-        },
-        "b095428dcb8a4408854f854de54d5692": {
-          "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
-          "model_module_version": "1.2.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "1b76512711884ffb81d3c3505b8fd137": {
-          "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
-          "model_module_version": "1.2.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "2f53a3d1d0f44192beeb6ed8d40e762f": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "DescriptionStyleModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "1b194af3033342b0b04dc12b3e055ff2": {
-          "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
-          "model_module_version": "1.2.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "391b8f44109544328617fbc3a3c3b9fc": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "ProgressStyleModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "ProgressStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "bar_color": null,
-            "description_width": ""
-          }
-        },
-        "6f36db95f6fb4b4687c7130fc23c4348": {
-          "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
-          "model_module_version": "1.2.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "82a35a7ae5e441a1b66534a3eab5e765": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "DescriptionStyleModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        }
-      }
+    "237d34743ee64428a9d22ee51e0423f3": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_name": "HTMLModel",
+     "model_module_version": "1.5.0",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_6f36db95f6fb4b4687c7130fc23c4348",
+      "placeholder": "​",
+      "style": "IPY_MODEL_82a35a7ae5e441a1b66534a3eab5e765",
+      "value": " 232k/232k [00:00&lt;00:00, 3.06MB/s]"
+     }
+    },
+    "b095428dcb8a4408854f854de54d5692": {
+     "model_module": "@jupyter-widgets/base",
+     "model_name": "LayoutModel",
+     "model_module_version": "1.2.0",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "1b76512711884ffb81d3c3505b8fd137": {
+     "model_module": "@jupyter-widgets/base",
+     "model_name": "LayoutModel",
+     "model_module_version": "1.2.0",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "2f53a3d1d0f44192beeb6ed8d40e762f": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_name": "DescriptionStyleModel",
+     "model_module_version": "1.5.0",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "1b194af3033342b0b04dc12b3e055ff2": {
+     "model_module": "@jupyter-widgets/base",
+     "model_name": "LayoutModel",
+     "model_module_version": "1.2.0",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "391b8f44109544328617fbc3a3c3b9fc": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_name": "ProgressStyleModel",
+     "model_module_version": "1.5.0",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "ProgressStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "bar_color": null,
+      "description_width": ""
+     }
+    },
+    "6f36db95f6fb4b4687c7130fc23c4348": {
+     "model_module": "@jupyter-widgets/base",
+     "model_name": "LayoutModel",
+     "model_module_version": "1.2.0",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "82a35a7ae5e441a1b66534a3eab5e765": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_name": "DescriptionStyleModel",
+     "model_module_version": "1.5.0",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
     }
-  },
-  "nbformat": 4,
-  "nbformat_minor": 0
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
 }