From dafa0892e3e4f6612a0a778eec753061b070b4b2 Mon Sep 17 00:00:00 2001 From: Sergey Shvets Date: Tue, 31 Oct 2023 11:53:21 +0300 Subject: [PATCH] Additional assert statement for dims alignment --- week05_transfer/seminar.ipynb | 4380 ++++++++++++++++----------------- 1 file changed, 2190 insertions(+), 2190 deletions(-) diff --git a/week05_transfer/seminar.ipynb b/week05_transfer/seminar.ipynb index f92684c0..9caa6dbc 100644 --- a/week05_transfer/seminar.ipynb +++ b/week05_transfer/seminar.ipynb @@ -1,2243 +1,2243 @@ { - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "zriTdjauH8iQ", - "colab": { - "base_uri": "https://localhost:8080/" - }, - "outputId": "e25d0f3f-2879-4d80-8cad-f949cf5b8453" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.7/7.7 MB\u001b[0m \u001b[31m24.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m302.0/302.0 kB\u001b[0m \u001b[31m22.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.8/3.8 MB\u001b[0m \u001b[31m51.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m48.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m295.0/295.0 kB\u001b[0m \u001b[31m25.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25h" - ] - } - ], - "source": [ - "%pip install -q transformers huggingface_hub\n", - "import math\n", - "import numpy as np\n", - "import torch\n", - "import torch.nn as nn\n", - "import torch.nn.functional as F" - ] + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "zriTdjauH8iQ", + "colab": { + "base_uri": "https://localhost:8080/" }, + "outputId": "e25d0f3f-2879-4d80-8cad-f949cf5b8453" + }, + "outputs": [ { - "cell_type": "markdown", - "metadata": { - "id": "xQiRPWWHlSgv" - }, - "source": [ - "### Using pre-trained transformers (2 points)\n", - "_for fun and profit_\n", - "\n", - "There are many toolkits that let you access pre-trained transformer models, but the most powerful and convenient by far is [`huggingface/transformers`](https://github.com/huggingface/transformers). In this week's practice, you'll learn how to download, apply and modify pre-trained transformers for a range of tasks. Buckle up, we're going in!\n", - "\n", - "\n", - "__Pipelines:__ if all you want is to apply a pre-trained model, you can do that in one line of code using pipeline. Huggingface/transformers has a selection of pre-configured pipelines for masked language modelling, sentiment classification, question aswering, etc. ([see full list here](https://huggingface.co/transformers/main_classes/pipelines.html))\n", - "\n", - "A typical pipeline includes:\n", - "* pre-processing, e.g. tokenization, subword segmentation\n", - "* a backbone model, e.g. bert finetuned for classification\n", - "* output post-processing\n", - "\n", - "Let's see it in action:" - ] + "output_type": "stream", + "name": "stdout", + "text": [ + "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m7.7/7.7 MB\u001B[0m \u001B[31m24.5 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n", + "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m302.0/302.0 kB\u001B[0m \u001B[31m22.3 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n", + "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m3.8/3.8 MB\u001B[0m \u001B[31m51.9 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n", + "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m1.3/1.3 MB\u001B[0m \u001B[31m48.7 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n", + "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m295.0/295.0 kB\u001B[0m \u001B[31m25.9 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n", + "\u001B[?25h" + ] + } + ], + "source": [ + "%pip install -q transformers huggingface_hub\n", + "import math\n", + "import numpy as np\n", + "import torch\n", + "import torch.nn as nn\n", + "import torch.nn.functional as F" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "xQiRPWWHlSgv" + }, + "source": [ + "### Using pre-trained transformers (2 points)\n", + "_for fun and profit_\n", + "\n", + "There are many toolkits that let you access pre-trained transformer models, but the most powerful and convenient by far is [`huggingface/transformers`](https://github.com/huggingface/transformers). In this week's practice, you'll learn how to download, apply and modify pre-trained transformers for a range of tasks. Buckle up, we're going in!\n", + "\n", + "\n", + "__Pipelines:__ if all you want is to apply a pre-trained model, you can do that in one line of code using pipeline. Huggingface/transformers has a selection of pre-configured pipelines for masked language modelling, sentiment classification, question aswering, etc. ([see full list here](https://huggingface.co/transformers/main_classes/pipelines.html))\n", + "\n", + "A typical pipeline includes:\n", + "* pre-processing, e.g. tokenization, subword segmentation\n", + "* a backbone model, e.g. bert finetuned for classification\n", + "* output post-processing\n", + "\n", + "Let's see it in action:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "rP1KFtvLlJHR", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 162, + "referenced_widgets": [ + "e85584c419aa445285eecb482778c7ba", + "5694548560b548dabed4b111766da60e", + "7fceb5ec7dec415a9dfcd4261aeb4a3d", + "1fd2361180bd49aa8e2012a5e379d0c1", + "77441a81d25c40819ae5af99ad70f3c8", + "48c3bc239f16407bbba2d36ac6e5aaae", + "5c87b856ffeb4c1dbf2d4637065d26ef", + "fbbe93e1d4a5493782b7a246f3cb1760", + "c3e31f6d90a84af08ee33023be184849", + "6099e17176dd4eef829ac7c45badc9e2", + "7ea9a125c76543419007fd5083ed63cd", + "b2786e8af69e490cba5048fca7a0a51e", + "f07e54fd856f4efc99bedb44abcece1b", + "e0d2ca01fd7042a88a5493323a61409c", + "e07d888d412d43f8ac4cc5285de07772", + "15227ca6cc5a4d219e0113da09926da4", + "cf83aaa31a1a47419db2127456beae69", + "1e463fce356b46859493cbd975163ef1", + "4ecb5a9efc3c42dd8faf4af9424ad5cf", + "3af4702b95484b8cabd891b18ffc71b1", + "e9210c257b8a4091b86123271c8a5486", + "7eb0bf256b8d4975b0615aa96f00bbe1", + "fc132bfb7d6447ad9900af7b80806aa1", + "ab6d124394214415a2580983c88bbb11", + "8578b4d2279f40dc846fcdfc2df55742", + "7213248ebe1f4e28a00a93eb33169170", + "b1497c0d7c3b4c04a91302a2cbc96db1", + "60e971374e5448d08e9698a5594ff60a", + "6c8113bb76024debaf3256431cbefe35", + "73d8ad019f044a6998e81273a922745a", + "a445a35355424cb0b4c7abf3d4e6f32d", + "d71a81e25f5b4f88b8361c62e2ab9f44", + "4f084bbe71bb433ba0e7ee50fccb71cc", + "5cff2bc12fec494e9e19b8a956186df6", + "787781c894bd46c6990f2513d9f2c79c", + "3f4eae6f79054676976f87b9524c4cf5", + "237d34743ee64428a9d22ee51e0423f3", + "b095428dcb8a4408854f854de54d5692", + "1b76512711884ffb81d3c3505b8fd137", + "2f53a3d1d0f44192beeb6ed8d40e762f", + "1b194af3033342b0b04dc12b3e055ff2", + "391b8f44109544328617fbc3a3c3b9fc", + "6f36db95f6fb4b4687c7130fc23c4348", + "82a35a7ae5e441a1b66534a3eab5e765" + ] }, + "outputId": "f4e9f426-6445-4d69-b7b7-dd083d7bdebf" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "rP1KFtvLlJHR", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 162, - "referenced_widgets": [ - "e85584c419aa445285eecb482778c7ba", - "5694548560b548dabed4b111766da60e", - "7fceb5ec7dec415a9dfcd4261aeb4a3d", - "1fd2361180bd49aa8e2012a5e379d0c1", - "77441a81d25c40819ae5af99ad70f3c8", - "48c3bc239f16407bbba2d36ac6e5aaae", - "5c87b856ffeb4c1dbf2d4637065d26ef", - "fbbe93e1d4a5493782b7a246f3cb1760", - "c3e31f6d90a84af08ee33023be184849", - "6099e17176dd4eef829ac7c45badc9e2", - "7ea9a125c76543419007fd5083ed63cd", - "b2786e8af69e490cba5048fca7a0a51e", - "f07e54fd856f4efc99bedb44abcece1b", - "e0d2ca01fd7042a88a5493323a61409c", - "e07d888d412d43f8ac4cc5285de07772", - "15227ca6cc5a4d219e0113da09926da4", - "cf83aaa31a1a47419db2127456beae69", - "1e463fce356b46859493cbd975163ef1", - "4ecb5a9efc3c42dd8faf4af9424ad5cf", - "3af4702b95484b8cabd891b18ffc71b1", - "e9210c257b8a4091b86123271c8a5486", - "7eb0bf256b8d4975b0615aa96f00bbe1", - "fc132bfb7d6447ad9900af7b80806aa1", - "ab6d124394214415a2580983c88bbb11", - "8578b4d2279f40dc846fcdfc2df55742", - "7213248ebe1f4e28a00a93eb33169170", - "b1497c0d7c3b4c04a91302a2cbc96db1", - "60e971374e5448d08e9698a5594ff60a", - "6c8113bb76024debaf3256431cbefe35", - "73d8ad019f044a6998e81273a922745a", - "a445a35355424cb0b4c7abf3d4e6f32d", - "d71a81e25f5b4f88b8361c62e2ab9f44", - "4f084bbe71bb433ba0e7ee50fccb71cc", - "5cff2bc12fec494e9e19b8a956186df6", - "787781c894bd46c6990f2513d9f2c79c", - "3f4eae6f79054676976f87b9524c4cf5", - "237d34743ee64428a9d22ee51e0423f3", - "b095428dcb8a4408854f854de54d5692", - "1b76512711884ffb81d3c3505b8fd137", - "2f53a3d1d0f44192beeb6ed8d40e762f", - "1b194af3033342b0b04dc12b3e055ff2", - "391b8f44109544328617fbc3a3c3b9fc", - "6f36db95f6fb4b4687c7130fc23c4348", - "82a35a7ae5e441a1b66534a3eab5e765" - ] - }, - "outputId": "f4e9f426-6445-4d69-b7b7-dd083d7bdebf" - }, - "outputs": [ - { - "output_type": "display_data", - "data": { - "text/plain": [ - "Downloading (…)lve/main/config.json: 0%| | 0.00/629 [00:00\n", - "outputs = \n", - "\n", - "assert sum(outputs.values()) == 3 and outputs[base64.decodebytes(b'YmFyYXRoZW9u\\n').decode()] == False\n", - "print(\"Well done!\")" - ] + "output_type": "display_data", + "data": { + "text/plain": [ + "Downloading model.safetensors: 0%| | 0.00/268M [00:00)" - ] + "output_type": "stream", + "name": "stdout", + "text": [ + "[{'label': 'POSITIVE', 'score': 0.9998860359191895}]\n" + ] + } + ], + "source": [ + "import transformers\n", + "classifier = transformers.pipeline('sentiment-analysis', model=\"distilbert-base-uncased-finetuned-sst-2-english\")\n", + "\n", + "print(classifier(\"BERT is amazing!\"))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "nYUNuyXMn5l9" + }, + "outputs": [], + "source": [ + "import base64\n", + "data = {\n", + " 'arryn': 'As High as Honor.',\n", + " 'baratheon': 'Ours is the fury.',\n", + " 'stark': 'Winter is coming.',\n", + " 'tyrell': 'Growing strong.'\n", + "}\n", + "\n", + "# YOUR CODE: predict sentiment for each noble house and create outputs dict\n", + "<...>\n", + "outputs = \n", + "\n", + "assert sum(outputs.values()) == 3 and outputs[base64.decodebytes(b'YmFyYXRoZW9u\\n').decode()] == False\n", + "print(\"Well done!\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "BRDhIH-XpSNo" + }, + "source": [ + "You can also access vanilla Masked Language Model that was trained to predict masked words. Here's how:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "pa-8noIllRbZ" + }, + "outputs": [], + "source": [ + "mlm_model = transformers.pipeline('fill-mask', model=\"bert-base-uncased\")\n", + "MASK = mlm_model.tokenizer.mask_token\n", + "\n", + "for hypo in mlm_model(f\"Donald {MASK} is the president of the united states.\"):\n", + " print(f\"P={hypo['score']:.5f}\", hypo['sequence'])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "9NxeG1Y5pwX1" + }, + "outputs": [], + "source": [ + "# Your turn: use bert to recall what year was the Soviet Union founded in\n", + "mlm_model()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "YJxRFzCSq903" + }, + "source": [ + "```\n", + "\n", + "```\n", + "\n", + "```\n", + "\n", + "```\n", + "\n", + "\n", + "Huggingface offers hundreds of pre-trained models that specialize on different tasks. You can quickly find the model you need using [this list](https://huggingface.co/models).\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "HRux8Qp2hkXr" + }, + "outputs": [], + "source": [ + "text = \"\"\"Almost two-thirds of the 1.5 million people who viewed this liveblog had Googled to discover\n", + " the latest on the Rosetta mission. They were treated to this detailed account by the Guardian’s science editor,\n", + " Ian Sample, and astronomy writer Stuart Clark of the moment scientists landed a robotic spacecraft on a comet\n", + " for the first time in history, and the delirious reaction it provoked at their headquarters in Germany.\n", + " “We are there. We are sitting on the surface. Philae is talking to us,” said one scientist.\n", + "\"\"\"\n", + "\n", + "# Task: create a pipeline for named entity recognition, use task name 'ner' and search for the right model in the list\n", + "ner_model = \n", + "\n", + "named_entities = ner_model(text)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "hf57MRzSiSON" + }, + "outputs": [], + "source": [ + "print('OUTPUT:', named_entities)\n", + "word_to_entity = {item['word']: item['entity'] for item in named_entities}\n", + "assert 'org' in word_to_entity.get('Guardian').lower() and 'per' in word_to_entity.get('Stuart').lower()\n", + "print(\"All tests passed\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ULMownz6sP9n" + }, + "source": [ + "### The building blocks of a pipeline\n", + "\n", + "Huggingface also allows you to access its pipelines on a lower level. There are two main abstractions for you:\n", + "* `Tokenizer` - converts from strings to token ids and back\n", + "* `Model` - a pytorch `nn.Module` with pre-trained weights\n", + "\n", + "You can use such models as part of your regular pytorch code: insert is as a layer in your model, apply it to a batch of data, backpropagate, optimize, etc." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "KMJbV0QVsO0Q" + }, + "outputs": [], + "source": [ + "tokenizer = transformers.AutoTokenizer.from_pretrained('bert-base-uncased')\n", + "model = transformers.AutoModel.from_pretrained('bert-base-uncased')\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ZgSPHKPRxG6U" + }, + "outputs": [], + "source": [ + "lines = [\n", + " \"Luke, I am your father.\",\n", + " \"Life is what happens when you're busy making other plans.\",\n", + " ]\n", + "\n", + "# tokenize a batch of inputs. \"pt\" means [p]y[t]orch tensors\n", + "tokens_info = tokenizer(lines, padding=True, truncation=True, return_tensors=\"pt\")\n", + "\n", + "for key in tokens_info:\n", + " print(key, tokens_info[key])\n", + "\n", + "print(\"Detokenized:\")\n", + "for i in range(2):\n", + " print(tokenizer.decode(tokens_info['input_ids'][i]))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "MJkbHxERyfL4" + }, + "outputs": [], + "source": [ + "# You can now apply the model to get embeddings\n", + "with torch.no_grad():\n", + " out = model(**tokens_info)\n", + "\n", + "print(out['pooler_output'])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_Vij7Gc1wOaq" + }, + "source": [ + "Transformers knowledge hub: https://huggingface.co/transformers/" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "bwmTTyjUGqol" + }, + "source": [ + "### Build-a-transformer (2 points)\n", + "\n", + "In this section, you will implement a transformer language model layer by layer, then use it to generate (hopefully) coherent text.\n", + "\n", + "To understand how these layers work, please check out our guide to transformers from [nlp course for you -> transformers](https://lena-voita.github.io/nlp_course/seq2seq_and_attention.html#transformer_intro).\n", + "\n", + "\n", + "First, we download pre-trained weights for the [GPT2 model by OpenAI](https://openai.com/research/better-language-models) - a prominent model from 2019.\n", + "\n", + "\n", + "\n", + "Idea & code by: Ilya Beletsky" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "vOcK0lGTGqol", + "outputId": "131fbc38-d4af-4e3b-b87b-f4c1b15d3162" + }, + "outputs": [ { - "cell_type": "markdown", - "metadata": { - "id": "YJxRFzCSq903" - }, - "source": [ - "```\n", - "\n", - "```\n", - "\n", - "```\n", - "\n", - "```\n", - "\n", - "\n", - "Huggingface offers hundreds of pre-trained models that specialize on different tasks. You can quickly find the model you need using [this list](https://huggingface.co/models).\n" - ] + "output_type": "stream", + "name": "stdout", + "text": [ + "Weights: ['h.0.attn.c_attn.bias', 'h.0.attn.c_attn.weight', 'h.0.attn.c_proj.bias', 'h.0.attn.c_proj.weight', 'h.0.ln_1.bias', 'h.0.ln_1.weight', 'h.0.ln_2.bias', 'h.0.ln_2.weight', 'h.0.mlp.c_fc.bias', 'h.0.mlp.c_fc.weight', 'h.0.mlp.c_proj.bias', 'h.0.mlp.c_proj.weight', 'h.1.attn.c_attn.bias', 'h.1.attn.c_attn.weight', 'h.1. ...\n" + ] + } + ], + "source": [ + "from huggingface_hub import hf_hub_download\n", + "state_dict = torch.load(hf_hub_download(\"gpt2\", filename=\"pytorch_model.bin\"))\n", + "for key, value in tuple(state_dict.items()):\n", + " if key.startswith('h.') and key.endswith('.weight') and value.ndim == 2:\n", + " value.transpose_(1, 0) # <-- for compatibility with modern PyTorch modules\n", + " if key.startswith('h.') and key.endswith('.attn.bias') and value.ndim == 4:\n", + " state_dict.pop(key) # <-- triangular binar masks, not needed in this code\n", + "\n", + "print('Weights:', repr(sorted(state_dict.keys()))[:320], '...')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "mr0SUtQnGqom" + }, + "source": [ + "In the next few cells, we shall implement the model layer by layer to make use of those weights.\n", + "\n", + "As you might recall, transformers contain two main layer types: attention and fully-connected layers.\n", + "\n", + "The fully connected layers are by far easier to understand, so we shall begin there:\n", + "\n", + "Please implement fully-connected layer __without residual or layer normalization__ (we'll add those in a bit)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "3Rh-6DX9Gqom" + }, + "outputs": [], + "source": [ + "class GeLUThatWasUsedInGPT2(nn.Module):\n", + " def forward(self, x):\n", + " return 0.5 * x * (1.0 + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * x ** 3)))\n", + "\n", + "class FullyConnected(nn.Module):\n", + " def __init__(self, dim: int):\n", + " super().__init__()\n", + " self.c_fc = nn.Linear(dim, 4 * dim)\n", + " self.gelu = GeLUThatWasUsedInGPT2()\n", + " self.c_proj = nn.Linear(4 * dim, dim)\n", + "\n", + " def forward(self, x):\n", + " # x.shape = [batch_size, seq_length, dim]\n", + " \n", + " return \n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "iSVGKnHBGqom" + }, + "source": [ + "Now, let's test that it works with GPT-2 weights:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "CoWjZwZkGqom" + }, + "outputs": [], + "source": [ + "mlp = FullyConnected(dim=768)\n", + "mlp.load_state_dict({'c_fc.weight': state_dict['h.0.mlp.c_fc.weight'],\n", + " 'c_fc.bias': state_dict['h.0.mlp.c_fc.bias'],\n", + " 'c_proj.weight': state_dict['h.0.mlp.c_proj.weight'],\n", + " 'c_proj.bias': state_dict['h.0.mlp.c_proj.bias']})\n", + "\n", + "torch.manual_seed(1337)\n", + "x = torch.randn(1, 2, 768) # [batch_size, sequence_length, dim]\n", + "checksum = torch.sum(mlp(x) * x)\n", + "assert abs(checksum.item() - 1282.3315) < 0.1, \"layer outputs do not match reference\"\n", + "assert torch.allclose(mlp(x[:, (1, 0), :])[:, (1, 0), :], mlp(x)), \"mlp must be permutation-invariant\"\n", + "print(\"Seems legit!\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "VbfCevRwGqom" + }, + "source": [ + "Now, let's get to attention layers.\n", + "\n", + "Since GPT-2 needs to generate text from left to right, each generated token can only attend to tokens on the left (and itself). This kid of attention is called \"Masked\" self-attention, because it hides tokens to the right.\n", + "\n", + "As before, please implement masked self-attention __without layernorm or residual connections.__" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "T6j7M4hLGqon" + }, + "outputs": [], + "source": [ + "class MaskedSelfAttention(nn.Module):\n", + " def __init__(self, dim: int, num_heads: int):\n", + " super().__init__()\n", + " self.c_attn = nn.Linear(dim, dim * 3) # query + key + value, combined\n", + " self.c_proj = nn.Linear(dim, dim) # output projection\n", + " self.dim, self.num_heads = dim, num_heads\n", + " self.head_size = dim // num_heads\n", + "\n", + " def forward(self, x):\n", + " q, k, v = self.c_attn(x).split(dim=-1, split_size=self.dim)\n", + " assert q.shape == k.shape == v.shape == x.shape, \"q, k and v must have the same shape as x\"\n", + " assert self.dim % self.num_heads == 0, \"self.dim must be divisible by self.num_heads without a remainder\"\n", + "\n", + " # Note: this is an inefficient implementation that uses a for-loop.\n", + " # To get the full grade during homework, please re-implement this code:\n", + " # 1) do not use for-loops (or other loops). Compute everything in parallel with vectorized operations\n", + " # 2) do not use F.scaled_dot_product_attention - write your own attention code using basic PyTorch ops\n", + " head_outputs = []\n", + " for head_index in range(self.num_heads):\n", + " head_selector = range(self.head_size * head_index, self.head_size * (head_index + 1))\n", + "\n", + " head_queries = q[..., head_selector]\n", + " head_keys = k[..., head_selector]\n", + " head_values = v[..., head_selector]\n", + "\n", + " single_head_output = F.scaled_dot_product_attention(\n", + " \n", + " is_causal=True)\n", + " # docs: https://pytorch.org/docs/stable/generated/torch.nn.functional.scaled_dot_product_attention.html\n", + " head_outputs.append(single_head_output)\n", + "\n", + " combined_head_outputs = torch.cat(head_outputs, dim=-1)\n", + " return self.c_proj(combined_head_outputs)\n" + ] + }, + { + "cell_type": "markdown", + "source": [ + "Test that it works" + ], + "metadata": { + "id": "umZpcpIkJva7" + } + }, + { + "cell_type": "code", + "source": [ + "attn = MaskedSelfAttention(dim=768, num_heads=12)\n", + "attn.load_state_dict({'c_attn.weight': state_dict['h.0.attn.c_attn.weight'],\n", + " 'c_attn.bias': state_dict['h.0.attn.c_attn.bias'],\n", + " 'c_proj.weight': state_dict['h.0.attn.c_proj.weight'],\n", + " 'c_proj.bias': state_dict['h.0.attn.c_proj.bias']})\n", + "\n", + "torch.manual_seed(1337)\n", + "x = torch.randn(1, 10, 768) # [batch_size, sequence_length, dim]\n", + "checksum = torch.sum(attn(x) * x)\n", + "assert abs(checksum.item() - 2703.6772) < 0.1, \"layer outputs do not match reference\"\n", + "assert not torch.allclose(attn(x[:, (1, 0), :])[:, (1, 0), :], attn(x[:, (0, 1), :])), \"masked attention must *not* be permutation-invariant\"\n", + "print(\"It works!\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "tg5Oj_PPM6hj", + "outputId": "ebeddb50-d805-47ae-cc3a-4d68d900b3a3" + }, + "execution_count": null, + "outputs": [ { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "HRux8Qp2hkXr" - }, - "outputs": [], - "source": [ - "text = \"\"\"Almost two-thirds of the 1.5 million people who viewed this liveblog had Googled to discover\n", - " the latest on the Rosetta mission. They were treated to this detailed account by the Guardian’s science editor,\n", - " Ian Sample, and astronomy writer Stuart Clark of the moment scientists landed a robotic spacecraft on a comet\n", - " for the first time in history, and the delirious reaction it provoked at their headquarters in Germany.\n", - " “We are there. We are sitting on the surface. Philae is talking to us,” said one scientist.\n", - "\"\"\"\n", - "\n", - "# Task: create a pipeline for named entity recognition, use task name 'ner' and search for the right model in the list\n", - "ner_model = \n", - "\n", - "named_entities = ner_model(text)" - ] + "output_type": "stream", + "name": "stdout", + "text": [ + "It works!\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "We can now combine attention and MLP to build the full transformer layer:\n", + "\n", + "![img](https://i.imgur.com/1sq2vHO.png)" + ], + "metadata": { + "id": "rn6tgTHzOK4l" + } + }, + { + "cell_type": "code", + "source": [ + "class TransformerLayer(nn.Module):\n", + " def __init__(self, dim: int, num_heads: int):\n", + " super().__init__()\n", + " self.ln_1 = nn.LayerNorm(dim)\n", + " self.attn = MaskedSelfAttention(dim, num_heads)\n", + " self.ln_2 = nn.LayerNorm(dim)\n", + " self.mlp = FullyConnected(dim)\n", + "\n", + " def forward(self, x):\n", + " \n", + " return <...>" + ], + "metadata": { + "id": "p3AH7YQvRpvU" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "layer = TransformerLayer(dim=768, num_heads=12)\n", + "layer.load_state_dict({k[5:]: v for k, v in state_dict.items() if k.startswith('h.10.')})\n", + "assert abs(torch.sum(layer(x) * x).item() - 9874.7383) < 0.1\n", + "print(\"Good job!\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "Qzo_QeFVSNZa", + "outputId": "15613968-b4d7-4391-dfff-3b490951a125" + }, + "execution_count": null, + "outputs": [ { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "hf57MRzSiSON" - }, - "outputs": [], - "source": [ - "print('OUTPUT:', named_entities)\n", - "word_to_entity = {item['word']: item['entity'] for item in named_entities}\n", - "assert 'org' in word_to_entity.get('Guardian').lower() and 'per' in word_to_entity.get('Stuart').lower()\n", - "print(\"All tests passed\")" - ] + "output_type": "stream", + "name": "stdout", + "text": [ + "Good job!\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "class GPT2(nn.Module):\n", + " def __init__(self, vocab_size: int, dim: int, num_heads: int, num_layers: int, max_position_embeddings: int = 1024):\n", + " super().__init__()\n", + " self.wte = nn.Embedding(vocab_size, dim) # token embeddings\n", + " self.wpe = nn.Embedding(max_position_embeddings, dim) # position embeddings\n", + " self.ln_f = nn.LayerNorm(dim) # final layer norm - goes after all transformer layers, but before logits\n", + "\n", + " self.h = nn.Sequential(*(TransformerLayer(dim, num_heads) for layer in range(num_layers)))\n", + "\n", + " def forward(self, input_ids):\n", + " # input_ids.shape: [batch_size, sequence_length], int64 token ids\n", + " position_ids = torch.arange(input_ids.shape[1], device=input_ids.device).unsqueeze(0)\n", + "\n", + " token_embeddings = self.wte(input_ids)\n", + " position_embeddings = self.wpe(position_ids)\n", + " full_embeddings = token_embeddings + position_embeddings\n", + "\n", + " transformer_output = self.h(full_embeddings)\n", + " transformer_output_ln = self.ln_f(transformer_output)\n", + "\n", + " # final layer: we predict logits by re-using token embeddings as linear weights\n", + " output_logits = transformer_output_ln @ self.wte.weight.T\n", + " return output_logits\n" + ], + "metadata": { + "id": "Mbqw9iuaSrYy" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "tokenizer = transformers.AutoTokenizer.from_pretrained('gpt2', add_prefix_space=True)\n", + "model = GPT2(vocab_size=50257, dim=768, num_heads=12, num_layers=12)\n", + "model.load_state_dict(state_dict)\n", + "\n", + "input_ids = tokenizer(\"A quick\", return_tensors='pt')['input_ids']\n", + "\n", + "predicted_logits = model(input_ids)\n", + "most_likely_token_id = predicted_logits[:, -1].argmax().item()\n", + "\n", + "print(\"Prediction:\", tokenizer.decode(most_likely_token_id))" + ], + "metadata": { + "id": "p0m8jt66aDIh" + }, + "execution_count": 7, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "text = \"The Fermi paradox \"\n", + "tokens = tokenizer.encode(text)\n", + "print(end=tokenizer.decode(tokens))\n", + "line_length = len(tokenizer.decode(tokens))\n", + "\n", + "for i in range(500):\n", + " # Predict logits with your model\n", + " with torch.no_grad():\n", + " logits = model(torch.as_tensor([tokens]))\n", + "\n", + " # Sample with probabilities\n", + " p_next = torch.softmax(logits[0, -1, :], dim=-1).data.cpu().numpy()\n", + " next_token_index = np.random.choice(len(p_next), p=p_next)\n", + "\n", + " tokens.append(int(next_token_index))\n", + " print(end=tokenizer.decode(tokens[-1]))\n", + " line_length += len(tokenizer.decode(tokens[-1]))\n", + " if line_length > 120:\n", + " line_length = 0\n", + " print()\n", + "\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "R8ql3Lo7dXZ2", + "outputId": "8db86d13-d16b-4f97-db87-0385c0d91426" + }, + "execution_count": null, + "outputs": [ { - "cell_type": "markdown", - "metadata": { - "id": "ULMownz6sP9n" - }, - "source": [ - "### The building blocks of a pipeline\n", - "\n", - "Huggingface also allows you to access its pipelines on a lower level. There are two main abstractions for you:\n", - "* `Tokenizer` - converts from strings to token ids and back\n", - "* `Model` - a pytorch `nn.Module` with pre-trained weights\n", - "\n", - "You can use such models as part of your regular pytorch code: insert is as a layer in your model, apply it to a batch of data, backpropagate, optimize, etc." - ] - }, + "output_type": "stream", + "name": "stdout", + "text": [ + " The Fermi paradox  occurred when some comments about the way particle systems are integrated and decided to \"Shine\" by tearing\n", + " a mirror toward them from many faces to small ones. Kriegers reactions shows how much Kriegers can follow the dynamical rules\n", + " of flow. We then see a throwing of the matrix into the triangle, but I am not sure why this very generating results in depth\n", + " physics. I am certainly better interested in what is going on that the mirror riff is actually all about. Kriegers replied\n", + ": \"Third one takes up where the half dots of theCrystal meet Tron Braun:How have theFermi Eardrums made or left the fermi\n", + " magnetism, an observable capability of the mirror? This can be seen in all ways, e.g. everybody is talking about the fact\n", + " there is a Giant thima at the" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "V3NJ0ocgGqop" + }, + "source": [ + "__Reminder:__ after class, please go to `MaskedSelfAttention.forward` above and finish the job!\n", + "```\n", + "\n", + "```\n", + "\n", + "```\n", + "\n", + "```\n", + "\n", + "```\n", + "\n", + "```\n", + "\n", + "```\n", + "\n", + "```\n", + "\n", + "```\n", + "\n", + "```\n", + "\n", + "```\n", + "\n", + "```\n", + "\n", + "```\n", + "\n", + "```\n", + "\n", + "```\n", + "\n", + "```\n", + "\n", + "```\n", + "\n", + "```\n", + "\n", + "```\n", + "\n", + "```\n", + "\n", + "```\n", + "\n", + "```\n", + "\n", + "\n", + "### Here's how you can do the same with transformers library" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "NTOHu124Gqop", + "outputId": "5bb38785-a7d9-47e1-a887-c03634945c0b" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "KMJbV0QVsO0Q" - }, - "outputs": [], - "source": [ - "tokenizer = transformers.AutoTokenizer.from_pretrained('bert-base-uncased')\n", - "model = transformers.AutoModel.from_pretrained('bert-base-uncased')\n" - ] + "name": "stderr", + "output_type": "stream", + "text": [ + "Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.\n" + ] }, { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "ZgSPHKPRxG6U" - }, - "outputs": [], - "source": [ - "lines = [\n", - " \"Luke, I am your father.\",\n", - " \"Life is what happens when you're busy making other plans.\",\n", - " ]\n", - "\n", - "# tokenize a batch of inputs. \"pt\" means [p]y[t]orch tensors\n", - "tokens_info = tokenizer(lines, padding=True, truncation=True, return_tensors=\"pt\")\n", - "\n", - "for key in tokens_info:\n", - " print(key, tokens_info[key])\n", - "\n", - "print(\"Detokenized:\")\n", - "for i in range(2):\n", - " print(tokenizer.decode(tokens_info['input_ids'][i]))" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated continuation: The Fermi paradox  (with its paradoxical consequences which, if any, may also be taken to be the paradox of the Big Bang. If an explosion can only happen after the collapse of the matter in one of three states  or after the collapse of a\n" + ] + } + ], + "source": [ + "tokenizer = transformers.AutoTokenizer.from_pretrained('gpt2', add_prefix_space=True)\n", + "model = transformers.AutoModelForCausalLM.from_pretrained('gpt2')\n", + "print('Generated text:', tokenizer.decode(\n", + " model.generate(\n", + " **tokenizer(\"The Fermi paradox \", return_tensors='pt'),\n", + " do_sample=True, max_new_tokens=50\n", + " ).flatten().numpy()\n", + "))\n" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "py38", + "language": "python", + "name": "py38" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.1" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "e85584c419aa445285eecb482778c7ba": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_5694548560b548dabed4b111766da60e", + "IPY_MODEL_7fceb5ec7dec415a9dfcd4261aeb4a3d", + "IPY_MODEL_1fd2361180bd49aa8e2012a5e379d0c1" + ], + "layout": "IPY_MODEL_77441a81d25c40819ae5af99ad70f3c8" + } }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "MJkbHxERyfL4" - }, - "outputs": [], - "source": [ - "# You can now apply the model to get embeddings\n", - "with torch.no_grad():\n", - " out = model(**tokens_info)\n", - "\n", - "print(out['pooler_output'])" - ] + "5694548560b548dabed4b111766da60e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_48c3bc239f16407bbba2d36ac6e5aaae", + "placeholder": "​", + "style": "IPY_MODEL_5c87b856ffeb4c1dbf2d4637065d26ef", + "value": "Downloading (…)lve/main/config.json: 100%" + } }, - { - "cell_type": "markdown", - "metadata": { - "id": "_Vij7Gc1wOaq" - }, - "source": [ - "Transformers knowledge hub: https://huggingface.co/transformers/" - ] + "7fceb5ec7dec415a9dfcd4261aeb4a3d": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_fbbe93e1d4a5493782b7a246f3cb1760", + "max": 629, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_c3e31f6d90a84af08ee33023be184849", + "value": 629 + } }, - { - "cell_type": "markdown", - "metadata": { - "id": "bwmTTyjUGqol" - }, - "source": [ - "### Build-a-transformer (2 points)\n", - "\n", - "In this section, you will implement a transformer language model layer by layer, then use it to generate (hopefully) coherent text.\n", - "\n", - "To understand how these layers work, please check out our guide to transformers from [nlp course for you -> transformers](https://lena-voita.github.io/nlp_course/seq2seq_and_attention.html#transformer_intro).\n", - "\n", - "\n", - "First, we download pre-trained weights for the [GPT2 model by OpenAI](https://openai.com/research/better-language-models) - a prominent model from 2019.\n", - "\n", - "\n", - "\n", - "Idea & code by: Ilya Beletsky" - ] + "1fd2361180bd49aa8e2012a5e379d0c1": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_6099e17176dd4eef829ac7c45badc9e2", + "placeholder": "​", + "style": "IPY_MODEL_7ea9a125c76543419007fd5083ed63cd", + "value": " 629/629 [00:00<00:00, 12.4kB/s]" + } }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "vOcK0lGTGqol", - "outputId": "131fbc38-d4af-4e3b-b87b-f4c1b15d3162" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Weights: ['h.0.attn.c_attn.bias', 'h.0.attn.c_attn.weight', 'h.0.attn.c_proj.bias', 'h.0.attn.c_proj.weight', 'h.0.ln_1.bias', 'h.0.ln_1.weight', 'h.0.ln_2.bias', 'h.0.ln_2.weight', 'h.0.mlp.c_fc.bias', 'h.0.mlp.c_fc.weight', 'h.0.mlp.c_proj.bias', 'h.0.mlp.c_proj.weight', 'h.1.attn.c_attn.bias', 'h.1.attn.c_attn.weight', 'h.1. ...\n" - ] - } - ], - "source": [ - "from huggingface_hub import hf_hub_download\n", - "state_dict = torch.load(hf_hub_download(\"gpt2\", filename=\"pytorch_model.bin\"))\n", - "for key, value in tuple(state_dict.items()):\n", - " if key.startswith('h.') and key.endswith('.weight') and value.ndim == 2:\n", - " value.transpose_(1, 0) # <-- for compatibility with modern PyTorch modules\n", - " if key.startswith('h.') and key.endswith('.attn.bias') and value.ndim == 4:\n", - " state_dict.pop(key) # <-- triangular binar masks, not needed in this code\n", - "\n", - "print('Weights:', repr(sorted(state_dict.keys()))[:320], '...')" - ] + "77441a81d25c40819ae5af99ad70f3c8": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } }, - { - "cell_type": "markdown", - "metadata": { - "id": "mr0SUtQnGqom" - }, - "source": [ - "In the next few cells, we shall implement the model layer by layer to make use of those weights.\n", - "\n", - "As you might recall, transformers contain two main layer types: attention and fully-connected layers.\n", - "\n", - "The fully connected layers are by far easier to understand, so we shall begin there:\n", - "\n", - "Please implement fully-connected layer __without residual or layer normalization__ (we'll add those in a bit)." - ] + "48c3bc239f16407bbba2d36ac6e5aaae": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "3Rh-6DX9Gqom" - }, - "outputs": [], - "source": [ - "class GeLUThatWasUsedInGPT2(nn.Module):\n", - " def forward(self, x):\n", - " return 0.5 * x * (1.0 + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * x ** 3)))\n", - "\n", - "class FullyConnected(nn.Module):\n", - " def __init__(self, dim: int):\n", - " super().__init__()\n", - " self.c_fc = nn.Linear(dim, 4 * dim)\n", - " self.gelu = GeLUThatWasUsedInGPT2()\n", - " self.c_proj = nn.Linear(4 * dim, dim)\n", - "\n", - " def forward(self, x):\n", - " # x.shape = [batch_size, seq_length, dim]\n", - " \n", - " return \n" - ] + "5c87b856ffeb4c1dbf2d4637065d26ef": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } }, - { - "cell_type": "markdown", - "metadata": { - "id": "iSVGKnHBGqom" - }, - "source": [ - "Now, let's test that it works with GPT-2 weights:" - ] + "fbbe93e1d4a5493782b7a246f3cb1760": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "CoWjZwZkGqom" - }, - "outputs": [], - "source": [ - "mlp = FullyConnected(dim=768)\n", - "mlp.load_state_dict({'c_fc.weight': state_dict['h.0.mlp.c_fc.weight'],\n", - " 'c_fc.bias': state_dict['h.0.mlp.c_fc.bias'],\n", - " 'c_proj.weight': state_dict['h.0.mlp.c_proj.weight'],\n", - " 'c_proj.bias': state_dict['h.0.mlp.c_proj.bias']})\n", - "\n", - "torch.manual_seed(1337)\n", - "x = torch.randn(1, 2, 768) # [batch_size, sequence_length, dim]\n", - "checksum = torch.sum(mlp(x) * x)\n", - "assert abs(checksum.item() - 1282.3315) < 0.1, \"layer outputs do not match reference\"\n", - "assert torch.allclose(mlp(x[:, (1, 0), :])[:, (1, 0), :], mlp(x)), \"mlp must be permutation-invariant\"\n", - "print(\"Seems legit!\")" - ] + "c3e31f6d90a84af08ee33023be184849": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } }, - { - "cell_type": "markdown", - "metadata": { - "id": "VbfCevRwGqom" - }, - "source": [ - "Now, let's get to attention layers.\n", - "\n", - "Since GPT-2 needs to generate text from left to right, each generated token can only attend to tokens on the left (and itself). This kid of attention is called \"Masked\" self-attention, because it hides tokens to the right.\n", - "\n", - "As before, please implement masked self-attention __without layernorm or residual connections.__" - ] + "6099e17176dd4eef829ac7c45badc9e2": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "T6j7M4hLGqon" - }, - "outputs": [], - "source": [ - "class MaskedSelfAttention(nn.Module):\n", - " def __init__(self, dim: int, num_heads: int):\n", - " super().__init__()\n", - " self.c_attn = nn.Linear(dim, dim * 3) # query + key + value, combined\n", - " self.c_proj = nn.Linear(dim, dim) # output projection\n", - " self.dim, self.num_heads = dim, num_heads\n", - " self.head_size = dim // num_heads\n", - "\n", - " def forward(self, x):\n", - " q, k, v = self.c_attn(x).split(dim=-1, split_size=self.dim)\n", - " assert q.shape == k.shape == v.shape == x.shape, \"q, k and v must have the same shape as x\"\n", - "\n", - "\n", - " # Note: this is an inefficient implementation that uses a for-loop.\n", - " # To get the full grade during homework, please re-implement this code:\n", - " # 1) do not use for-loops (or other loops). Compute everything in parallel with vectorized operations\n", - " # 2) do not use F.scaled_dot_product_attention - write your own attention code using basic PyTorch ops\n", - " head_outputs = []\n", - " for head_index in range(self.num_heads):\n", - " head_selector = range(self.head_size * head_index, self.head_size * (head_index + 1))\n", - "\n", - " head_queries = q[..., head_selector]\n", - " head_keys = k[..., head_selector]\n", - " head_values = v[..., head_selector]\n", - "\n", - " single_head_output = F.scaled_dot_product_attention(\n", - " \n", - " is_causal=True)\n", - " # docs: https://pytorch.org/docs/stable/generated/torch.nn.functional.scaled_dot_product_attention.html\n", - " head_outputs.append(single_head_output)\n", - "\n", - " combined_head_outputs = torch.cat(head_outputs, dim=-1)\n", - " return self.c_proj(combined_head_outputs)\n" - ] + "7ea9a125c76543419007fd5083ed63cd": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } }, - { - "cell_type": "markdown", - "source": [ - "Test that it works" + "b2786e8af69e490cba5048fca7a0a51e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_f07e54fd856f4efc99bedb44abcece1b", + "IPY_MODEL_e0d2ca01fd7042a88a5493323a61409c", + "IPY_MODEL_e07d888d412d43f8ac4cc5285de07772" ], - "metadata": { - "id": "umZpcpIkJva7" - } + "layout": "IPY_MODEL_15227ca6cc5a4d219e0113da09926da4" + } }, - { - "cell_type": "code", - "source": [ - "attn = MaskedSelfAttention(dim=768, num_heads=12)\n", - "attn.load_state_dict({'c_attn.weight': state_dict['h.0.attn.c_attn.weight'],\n", - " 'c_attn.bias': state_dict['h.0.attn.c_attn.bias'],\n", - " 'c_proj.weight': state_dict['h.0.attn.c_proj.weight'],\n", - " 'c_proj.bias': state_dict['h.0.attn.c_proj.bias']})\n", - "\n", - "torch.manual_seed(1337)\n", - "x = torch.randn(1, 10, 768) # [batch_size, sequence_length, dim]\n", - "checksum = torch.sum(attn(x) * x)\n", - "assert abs(checksum.item() - 2703.6772) < 0.1, \"layer outputs do not match reference\"\n", - "assert not torch.allclose(attn(x[:, (1, 0), :])[:, (1, 0), :], attn(x[:, (0, 1), :])), \"masked attention must *not* be permutation-invariant\"\n", - "print(\"It works!\")" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "tg5Oj_PPM6hj", - "outputId": "ebeddb50-d805-47ae-cc3a-4d68d900b3a3" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "It works!\n" - ] - } - ] + "f07e54fd856f4efc99bedb44abcece1b": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_cf83aaa31a1a47419db2127456beae69", + "placeholder": "​", + "style": "IPY_MODEL_1e463fce356b46859493cbd975163ef1", + "value": "Downloading model.safetensors: 100%" + } }, - { - "cell_type": "markdown", - "source": [ - "We can now combine attention and MLP to build the full transformer layer:\n", - "\n", - "![img](https://i.imgur.com/1sq2vHO.png)" - ], - "metadata": { - "id": "rn6tgTHzOK4l" - } + "e0d2ca01fd7042a88a5493323a61409c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_4ecb5a9efc3c42dd8faf4af9424ad5cf", + "max": 267832558, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_3af4702b95484b8cabd891b18ffc71b1", + "value": 267832558 + } }, - { - "cell_type": "code", - "source": [ - "class TransformerLayer(nn.Module):\n", - " def __init__(self, dim: int, num_heads: int):\n", - " super().__init__()\n", - " self.ln_1 = nn.LayerNorm(dim)\n", - " self.attn = MaskedSelfAttention(dim, num_heads)\n", - " self.ln_2 = nn.LayerNorm(dim)\n", - " self.mlp = FullyConnected(dim)\n", - "\n", - " def forward(self, x):\n", - " \n", - " return <...>" - ], - "metadata": { - "id": "p3AH7YQvRpvU" - }, - "execution_count": null, - "outputs": [] + "e07d888d412d43f8ac4cc5285de07772": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_e9210c257b8a4091b86123271c8a5486", + "placeholder": "​", + "style": "IPY_MODEL_7eb0bf256b8d4975b0615aa96f00bbe1", + "value": " 268M/268M [00:01<00:00, 172MB/s]" + } }, - { - "cell_type": "code", - "source": [ - "layer = TransformerLayer(dim=768, num_heads=12)\n", - "layer.load_state_dict({k[5:]: v for k, v in state_dict.items() if k.startswith('h.10.')})\n", - "assert abs(torch.sum(layer(x) * x).item() - 9874.7383) < 0.1\n", - "print(\"Good job!\")" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "Qzo_QeFVSNZa", - "outputId": "15613968-b4d7-4391-dfff-3b490951a125" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Good job!\n" - ] - } - ] + "15227ca6cc5a4d219e0113da09926da4": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } }, - { - "cell_type": "code", - "source": [ - "class GPT2(nn.Module):\n", - " def __init__(self, vocab_size: int, dim: int, num_heads: int, num_layers: int, max_position_embeddings: int = 1024):\n", - " super().__init__()\n", - " self.wte = nn.Embedding(vocab_size, dim) # token embeddings\n", - " self.wpe = nn.Embedding(max_position_embeddings, dim) # position embeddings\n", - " self.ln_f = nn.LayerNorm(dim) # final layer norm - goes after all transformer layers, but before logits\n", - "\n", - " self.h = nn.Sequential(*(TransformerLayer(dim, num_heads) for layer in range(num_layers)))\n", - "\n", - " def forward(self, input_ids):\n", - " # input_ids.shape: [batch_size, sequence_length], int64 token ids\n", - " position_ids = torch.arange(input_ids.shape[1], device=input_ids.device).unsqueeze(0)\n", - "\n", - " token_embeddings = self.wte(input_ids)\n", - " position_embeddings = self.wpe(position_ids)\n", - " full_embeddings = token_embeddings + position_embeddings\n", - "\n", - " transformer_output = self.h(full_embeddings)\n", - " transformer_output_ln = self.ln_f(transformer_output)\n", - "\n", - " # final layer: we predict logits by re-using token embeddings as linear weights\n", - " output_logits = transformer_output_ln @ self.wte.weight.T\n", - " return output_logits\n" - ], - "metadata": { - "id": "Mbqw9iuaSrYy" - }, - "execution_count": null, - "outputs": [] + "cf83aaa31a1a47419db2127456beae69": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } }, - { - "cell_type": "code", - "source": [ - "tokenizer = transformers.AutoTokenizer.from_pretrained('gpt2', add_prefix_space=True)\n", - "model = GPT2(vocab_size=50257, dim=768, num_heads=12, num_layers=12)\n", - "model.load_state_dict(state_dict)\n", - "\n", - "input_ids = tokenizer(\"A quick\", return_tensors='pt')['input_ids']\n", - "\n", - "predicted_logits = model(input_ids)\n", - "most_likely_token_id = predicted_logits[:, -1].argmax().item()\n", - "\n", - "print(\"Prediction:\", tokenizer.decode(most_likely_token_id))" - ], - "metadata": { - "id": "p0m8jt66aDIh" - }, - "execution_count": 7, - "outputs": [] + "1e463fce356b46859493cbd975163ef1": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } }, - { - "cell_type": "code", - "source": [ - "text = \"The Fermi paradox \"\n", - "tokens = tokenizer.encode(text)\n", - "print(end=tokenizer.decode(tokens))\n", - "line_length = len(tokenizer.decode(tokens))\n", - "\n", - "for i in range(500):\n", - " # Predict logits with your model\n", - " with torch.no_grad():\n", - " logits = model(torch.as_tensor([tokens]))\n", - "\n", - " # Sample with probabilities\n", - " p_next = torch.softmax(logits[0, -1, :], dim=-1).data.cpu().numpy()\n", - " next_token_index = np.random.choice(len(p_next), p=p_next)\n", - "\n", - " tokens.append(int(next_token_index))\n", - " print(end=tokenizer.decode(tokens[-1]))\n", - " line_length += len(tokenizer.decode(tokens[-1]))\n", - " if line_length > 120:\n", - " line_length = 0\n", - " print()\n", - "\n" + "4ecb5a9efc3c42dd8faf4af9424ad5cf": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "3af4702b95484b8cabd891b18ffc71b1": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "e9210c257b8a4091b86123271c8a5486": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "7eb0bf256b8d4975b0615aa96f00bbe1": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "fc132bfb7d6447ad9900af7b80806aa1": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_ab6d124394214415a2580983c88bbb11", + "IPY_MODEL_8578b4d2279f40dc846fcdfc2df55742", + "IPY_MODEL_7213248ebe1f4e28a00a93eb33169170" ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "R8ql3Lo7dXZ2", - "outputId": "8db86d13-d16b-4f97-db87-0385c0d91426" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - " The Fermi paradox  occurred when some comments about the way particle systems are integrated and decided to \"Shine\" by tearing\n", - " a mirror toward them from many faces to small ones. Kriegers reactions shows how much Kriegers can follow the dynamical rules\n", - " of flow. We then see a throwing of the matrix into the triangle, but I am not sure why this very generating results in depth\n", - " physics. I am certainly better interested in what is going on that the mirror riff is actually all about. Kriegers replied\n", - ": \"Third one takes up where the half dots of theCrystal meet Tron Braun:How have theFermi Eardrums made or left the fermi\n", - " magnetism, an observable capability of the mirror? This can be seen in all ways, e.g. everybody is talking about the fact\n", - " there is a Giant thima at the" - ] - } - ] + "layout": "IPY_MODEL_b1497c0d7c3b4c04a91302a2cbc96db1" + } }, - { - "cell_type": "markdown", - "metadata": { - "id": "V3NJ0ocgGqop" - }, - "source": [ - "__Reminder:__ after class, please go to `MaskedSelfAttention.forward` above and finish the job!\n", - "```\n", - "\n", - "```\n", - "\n", - "```\n", - "\n", - "```\n", - "\n", - "```\n", - "\n", - "```\n", - "\n", - "```\n", - "\n", - "```\n", - "\n", - "```\n", - "\n", - "```\n", - "\n", - "```\n", - "\n", - "```\n", - "\n", - "```\n", - "\n", - "```\n", - "\n", - "```\n", - "\n", - "```\n", - "\n", - "```\n", - "\n", - "```\n", - "\n", - "```\n", - "\n", - "```\n", - "\n", - "```\n", - "\n", - "```\n", - "\n", - "\n", - "### Here's how you can do the same with transformers library" - ] + "ab6d124394214415a2580983c88bbb11": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_60e971374e5448d08e9698a5594ff60a", + "placeholder": "​", + "style": "IPY_MODEL_6c8113bb76024debaf3256431cbefe35", + "value": "Downloading (…)okenizer_config.json: 100%" + } }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "NTOHu124Gqop", - "outputId": "5bb38785-a7d9-47e1-a887-c03634945c0b" - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated continuation: The Fermi paradox  (with its paradoxical consequences which, if any, may also be taken to be the paradox of the Big Bang. If an explosion can only happen after the collapse of the matter in one of three states  or after the collapse of a\n" - ] - } + "8578b4d2279f40dc846fcdfc2df55742": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_73d8ad019f044a6998e81273a922745a", + "max": 48, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_a445a35355424cb0b4c7abf3d4e6f32d", + "value": 48 + } + }, + "7213248ebe1f4e28a00a93eb33169170": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_d71a81e25f5b4f88b8361c62e2ab9f44", + "placeholder": "​", + "style": "IPY_MODEL_4f084bbe71bb433ba0e7ee50fccb71cc", + "value": " 48.0/48.0 [00:00<00:00, 1.58kB/s]" + } + }, + "b1497c0d7c3b4c04a91302a2cbc96db1": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "60e971374e5448d08e9698a5594ff60a": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "6c8113bb76024debaf3256431cbefe35": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "73d8ad019f044a6998e81273a922745a": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "a445a35355424cb0b4c7abf3d4e6f32d": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "d71a81e25f5b4f88b8361c62e2ab9f44": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "4f084bbe71bb433ba0e7ee50fccb71cc": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "5cff2bc12fec494e9e19b8a956186df6": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_787781c894bd46c6990f2513d9f2c79c", + "IPY_MODEL_3f4eae6f79054676976f87b9524c4cf5", + "IPY_MODEL_237d34743ee64428a9d22ee51e0423f3" ], - "source": [ - "tokenizer = transformers.AutoTokenizer.from_pretrained('gpt2', add_prefix_space=True)\n", - "model = transformers.AutoModelForCausalLM.from_pretrained('gpt2')\n", - "print('Generated text:', tokenizer.decode(\n", - " model.generate(\n", - " **tokenizer(\"The Fermi paradox \", return_tensors='pt'),\n", - " do_sample=True, max_new_tokens=50\n", - " ).flatten().numpy()\n", - "))\n" - ] - } - ], - "metadata": { - "accelerator": "GPU", - "colab": { - "provenance": [] + "layout": "IPY_MODEL_b095428dcb8a4408854f854de54d5692" + } }, - "kernelspec": { - "display_name": "py38", - "language": "python", - "name": "py38" + "787781c894bd46c6990f2513d9f2c79c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_1b76512711884ffb81d3c3505b8fd137", + "placeholder": "​", + "style": "IPY_MODEL_2f53a3d1d0f44192beeb6ed8d40e762f", + "value": "Downloading (…)solve/main/vocab.txt: 100%" + } }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.1" + "3f4eae6f79054676976f87b9524c4cf5": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_1b194af3033342b0b04dc12b3e055ff2", + "max": 231508, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_391b8f44109544328617fbc3a3c3b9fc", + "value": 231508 + } }, - "widgets": { - "application/vnd.jupyter.widget-state+json": { - "e85584c419aa445285eecb482778c7ba": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_5694548560b548dabed4b111766da60e", - "IPY_MODEL_7fceb5ec7dec415a9dfcd4261aeb4a3d", - "IPY_MODEL_1fd2361180bd49aa8e2012a5e379d0c1" - ], - "layout": "IPY_MODEL_77441a81d25c40819ae5af99ad70f3c8" - } - }, - "5694548560b548dabed4b111766da60e": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_48c3bc239f16407bbba2d36ac6e5aaae", - "placeholder": "​", - "style": "IPY_MODEL_5c87b856ffeb4c1dbf2d4637065d26ef", - "value": "Downloading (…)lve/main/config.json: 100%" - } - }, - "7fceb5ec7dec415a9dfcd4261aeb4a3d": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_fbbe93e1d4a5493782b7a246f3cb1760", - "max": 629, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_c3e31f6d90a84af08ee33023be184849", - "value": 629 - } - }, - "1fd2361180bd49aa8e2012a5e379d0c1": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_6099e17176dd4eef829ac7c45badc9e2", - "placeholder": "​", - "style": "IPY_MODEL_7ea9a125c76543419007fd5083ed63cd", - "value": " 629/629 [00:00<00:00, 12.4kB/s]" - } - }, - "77441a81d25c40819ae5af99ad70f3c8": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "48c3bc239f16407bbba2d36ac6e5aaae": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "5c87b856ffeb4c1dbf2d4637065d26ef": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "fbbe93e1d4a5493782b7a246f3cb1760": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "c3e31f6d90a84af08ee33023be184849": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "6099e17176dd4eef829ac7c45badc9e2": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "7ea9a125c76543419007fd5083ed63cd": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "b2786e8af69e490cba5048fca7a0a51e": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_f07e54fd856f4efc99bedb44abcece1b", - "IPY_MODEL_e0d2ca01fd7042a88a5493323a61409c", - "IPY_MODEL_e07d888d412d43f8ac4cc5285de07772" - ], - "layout": "IPY_MODEL_15227ca6cc5a4d219e0113da09926da4" - } - }, - "f07e54fd856f4efc99bedb44abcece1b": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_cf83aaa31a1a47419db2127456beae69", - "placeholder": "​", - "style": "IPY_MODEL_1e463fce356b46859493cbd975163ef1", - "value": "Downloading model.safetensors: 100%" - } - }, - "e0d2ca01fd7042a88a5493323a61409c": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_4ecb5a9efc3c42dd8faf4af9424ad5cf", - "max": 267832558, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_3af4702b95484b8cabd891b18ffc71b1", - "value": 267832558 - } - }, - "e07d888d412d43f8ac4cc5285de07772": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_e9210c257b8a4091b86123271c8a5486", - "placeholder": "​", - "style": "IPY_MODEL_7eb0bf256b8d4975b0615aa96f00bbe1", - "value": " 268M/268M [00:01<00:00, 172MB/s]" - } - }, - "15227ca6cc5a4d219e0113da09926da4": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "cf83aaa31a1a47419db2127456beae69": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "1e463fce356b46859493cbd975163ef1": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "4ecb5a9efc3c42dd8faf4af9424ad5cf": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "3af4702b95484b8cabd891b18ffc71b1": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "e9210c257b8a4091b86123271c8a5486": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "7eb0bf256b8d4975b0615aa96f00bbe1": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "fc132bfb7d6447ad9900af7b80806aa1": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_ab6d124394214415a2580983c88bbb11", - "IPY_MODEL_8578b4d2279f40dc846fcdfc2df55742", - "IPY_MODEL_7213248ebe1f4e28a00a93eb33169170" - ], - "layout": "IPY_MODEL_b1497c0d7c3b4c04a91302a2cbc96db1" - } - }, - "ab6d124394214415a2580983c88bbb11": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_60e971374e5448d08e9698a5594ff60a", - "placeholder": "​", - "style": "IPY_MODEL_6c8113bb76024debaf3256431cbefe35", - "value": "Downloading (…)okenizer_config.json: 100%" - } - }, - "8578b4d2279f40dc846fcdfc2df55742": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_73d8ad019f044a6998e81273a922745a", - "max": 48, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_a445a35355424cb0b4c7abf3d4e6f32d", - "value": 48 - } - }, - "7213248ebe1f4e28a00a93eb33169170": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_d71a81e25f5b4f88b8361c62e2ab9f44", - "placeholder": "​", - "style": "IPY_MODEL_4f084bbe71bb433ba0e7ee50fccb71cc", - "value": " 48.0/48.0 [00:00<00:00, 1.58kB/s]" - } - }, - "b1497c0d7c3b4c04a91302a2cbc96db1": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "60e971374e5448d08e9698a5594ff60a": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "6c8113bb76024debaf3256431cbefe35": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "73d8ad019f044a6998e81273a922745a": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "a445a35355424cb0b4c7abf3d4e6f32d": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "d71a81e25f5b4f88b8361c62e2ab9f44": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "4f084bbe71bb433ba0e7ee50fccb71cc": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "5cff2bc12fec494e9e19b8a956186df6": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_787781c894bd46c6990f2513d9f2c79c", - "IPY_MODEL_3f4eae6f79054676976f87b9524c4cf5", - "IPY_MODEL_237d34743ee64428a9d22ee51e0423f3" - ], - "layout": "IPY_MODEL_b095428dcb8a4408854f854de54d5692" - } - }, - "787781c894bd46c6990f2513d9f2c79c": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_1b76512711884ffb81d3c3505b8fd137", - "placeholder": "​", - "style": "IPY_MODEL_2f53a3d1d0f44192beeb6ed8d40e762f", - "value": "Downloading (…)solve/main/vocab.txt: 100%" - } - }, - "3f4eae6f79054676976f87b9524c4cf5": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_1b194af3033342b0b04dc12b3e055ff2", - "max": 231508, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_391b8f44109544328617fbc3a3c3b9fc", - "value": 231508 - } - }, - "237d34743ee64428a9d22ee51e0423f3": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_6f36db95f6fb4b4687c7130fc23c4348", - "placeholder": "​", - "style": "IPY_MODEL_82a35a7ae5e441a1b66534a3eab5e765", - "value": " 232k/232k [00:00<00:00, 3.06MB/s]" - } - }, - "b095428dcb8a4408854f854de54d5692": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "1b76512711884ffb81d3c3505b8fd137": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "2f53a3d1d0f44192beeb6ed8d40e762f": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "1b194af3033342b0b04dc12b3e055ff2": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "391b8f44109544328617fbc3a3c3b9fc": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "6f36db95f6fb4b4687c7130fc23c4348": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "82a35a7ae5e441a1b66534a3eab5e765": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - } - } + "237d34743ee64428a9d22ee51e0423f3": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_6f36db95f6fb4b4687c7130fc23c4348", + "placeholder": "​", + "style": "IPY_MODEL_82a35a7ae5e441a1b66534a3eab5e765", + "value": " 232k/232k [00:00<00:00, 3.06MB/s]" + } + }, + "b095428dcb8a4408854f854de54d5692": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "1b76512711884ffb81d3c3505b8fd137": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "2f53a3d1d0f44192beeb6ed8d40e762f": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "1b194af3033342b0b04dc12b3e055ff2": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "391b8f44109544328617fbc3a3c3b9fc": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "6f36db95f6fb4b4687c7130fc23c4348": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "82a35a7ae5e441a1b66534a3eab5e765": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } } - }, - "nbformat": 4, - "nbformat_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 0 }