Upload 9 files

Browse files

Files changed (9) hide show

Others/Beam_Search.ipynb +234 -0
Others/Colab_Train.ipynb +0 -0
Others/Inference.ipynb +93 -0
Others/Local_Train.ipynb +1832 -0
Others/attention_visual.ipynb +207 -0
Others/conda.txt +24 -0
Others/requirements.txt +12 -0
Others/train_wb.py +274 -0
Others/translate.py +79 -0

Others/Beam_Search.ipynb ADDED Viewed

	@@ -0,0 +1,234 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from pathlib import Path\n",
+    "import torch\n",
+    "import torch.nn as nn\n",
+    "from config import get_config, get_weights_file_path\n",
+    "from train import get_model, get_ds, run_validation, causal_mask"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Using device: cuda\n",
+      "Max length of source sentence: 309\n",
+      "Max length of target sentence: 274\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "<All keys matched successfully>"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Define the device\n",
+    "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
+    "print(\"Using device:\", device)\n",
+    "config = get_config()\n",
+    "train_dataloader, val_dataloader, tokenizer_src, tokenizer_tgt = get_ds(config)\n",
+    "model = get_model(config, tokenizer_src.get_vocab_size(), tokenizer_tgt.get_vocab_size()).to(device)\n",
+    "\n",
+    "# Load the pretrained weights\n",
+    "model_filename = get_weights_file_path(config, f\"19\")\n",
+    "state = torch.load(model_filename)\n",
+    "model.load_state_dict(state['model_state_dict'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "--------------------------------------------------------------------------------\n",
+      "            SOURCE: Hence it is that for so long a time, and during so much fighting in the past twenty years, whenever there has been an army wholly Italian, it has always given a poor account of itself; the first witness to this is Il Taro, afterwards Allesandria, Capua, Genoa, Vaila, Bologna, Mestri.\n",
+      "            TARGET: Di qui nasce che, in tanto tempo, in tante guerre fatte ne' passati venti anni, quando elli è stato uno esercito tutto italiano, sempre ha fatto mala pruova. Di che è testimone prima el Taro, di poi Alessandria, Capua, Genova, Vailà, Bologna, Mestri.\n",
+      "  PREDICTED GREEDY: Di qui nasce che , in tanto , in tanto tempo , in tante guerre fatte ne ' passati\n",
+      "    PREDICTED BEAM: Di qui nasce che , in tanto tempo , in tante guerre fatte ne ' passati venti anni ,\n",
+      "--------------------------------------------------------------------------------\n",
+      "            SOURCE: She went out.\n",
+      "            TARGET: Aprì lo sportello e venne fuori.\n",
+      "  PREDICTED GREEDY: Aprì lo sportello e venne fuori .\n",
+      "    PREDICTED BEAM: Aprì lo sportello e venne fuori . — Ecco , poi uscì e andò via . — Ecco ,\n",
+      "--------------------------------------------------------------------------------\n"
+     ]
+    }
+   ],
+   "source": [
+    "def beam_search_decode(model, beam_size, source, source_mask, tokenizer_src, tokenizer_tgt, max_len, device):\n",
+    "    sos_idx = tokenizer_tgt.token_to_id('[SOS]')\n",
+    "    eos_idx = tokenizer_tgt.token_to_id('[EOS]')\n",
+    "\n",
+    "    # Precompute the encoder output and reuse it for every step\n",
+    "    encoder_output = model.encode(source, source_mask)\n",
+    "    # Initialize the decoder input with the sos token\n",
+    "    decoder_initial_input = torch.empty(1, 1).fill_(sos_idx).type_as(source).to(device)\n",
+    "\n",
+    "    # Create a candidate list\n",
+    "    candidates = [(decoder_initial_input, 1)]\n",
+    "\n",
+    "    while True:\n",
+    "\n",
+    "        # If a candidate has reached the maximum length, it means we have run the decoding for at least max_len iterations, so stop the search\n",
+    "        if any([cand.size(1) == max_len for cand, _ in candidates]):\n",
+    "            break\n",
+    "\n",
+    "        # Create a new list of candidates\n",
+    "        new_candidates = []\n",
+    "\n",
+    "        for candidate, score in candidates:\n",
+    "\n",
+    "            # Do not expand candidates that have reached the eos token\n",
+    "            if candidate[0][-1].item() == eos_idx:\n",
+    "                continue\n",
+    "\n",
+    "            # Build the candidate's mask\n",
+    "            candidate_mask = causal_mask(candidate.size(1)).type_as(source_mask).to(device)\n",
+    "            # calculate output\n",
+    "            out = model.decode(encoder_output, source_mask, candidate, candidate_mask)\n",
+    "            # get next token probabilities\n",
+    "            prob = model.project(out[:, -1])\n",
+    "            # get the top k candidates\n",
+    "            topk_prob, topk_idx = torch.topk(prob, beam_size, dim=1)\n",
+    "            for i in range(beam_size):\n",
+    "                # for each of the top k candidates, get the token and its probability\n",
+    "                token = topk_idx[0][i].unsqueeze(0).unsqueeze(0)\n",
+    "                token_prob = topk_prob[0][i].item()\n",
+    "                # create a new candidate by appending the token to the current candidate\n",
+    "                new_candidate = torch.cat([candidate, token], dim=1)\n",
+    "                # We sum the log probabilities because the probabilities are in log space\n",
+    "                new_candidates.append((new_candidate, score + token_prob))\n",
+    "\n",
+    "        # Sort the new candidates by their score\n",
+    "        candidates = sorted(new_candidates, key=lambda x: x[1], reverse=True)\n",
+    "        # Keep only the top k candidates\n",
+    "        candidates = candidates[:beam_size]\n",
+    "\n",
+    "        # If all the candidates have reached the eos token, stop\n",
+    "        if all([cand[0][-1].item() == eos_idx for cand, _ in candidates]):\n",
+    "            break\n",
+    "\n",
+    "    # Return the best candidate\n",
+    "    return candidates[0][0].squeeze()\n",
+    "\n",
+    "def greedy_decode(model, source, source_mask, tokenizer_src, tokenizer_tgt, max_len, device):\n",
+    "    sos_idx = tokenizer_tgt.token_to_id('[SOS]')\n",
+    "    eos_idx = tokenizer_tgt.token_to_id('[EOS]')\n",
+    "\n",
+    "    # Precompute the encoder output and reuse it for every step\n",
+    "    encoder_output = model.encode(source, source_mask)\n",
+    "    # Initialize the decoder input with the sos token\n",
+    "    decoder_input = torch.empty(1, 1).fill_(sos_idx).type_as(source).to(device)\n",
+    "    while True:\n",
+    "        if decoder_input.size(1) == max_len:\n",
+    "            break\n",
+    "\n",
+    "        # build mask for target\n",
+    "        decoder_mask = causal_mask(decoder_input.size(1)).type_as(source_mask).to(device)\n",
+    "\n",
+    "        # calculate output\n",
+    "        out = model.decode(encoder_output, source_mask, decoder_input, decoder_mask)\n",
+    "\n",
+    "        # get next token\n",
+    "        prob = model.project(out[:, -1])\n",
+    "        _, next_word = torch.max(prob, dim=1)\n",
+    "        decoder_input = torch.cat(\n",
+    "            [decoder_input, torch.empty(1, 1).type_as(source).fill_(next_word.item()).to(device)], dim=1\n",
+    "        )\n",
+    "\n",
+    "        if next_word == eos_idx:\n",
+    "            break\n",
+    "\n",
+    "    return decoder_input.squeeze(0)\n",
+    "\n",
+    "def run_validation(model, validation_ds, tokenizer_src, tokenizer_tgt, max_len, device, print_msg, num_examples=2):\n",
+    "    model.eval()\n",
+    "    count = 0\n",
+    "\n",
+    "    console_width = 80\n",
+    "\n",
+    "    with torch.no_grad():\n",
+    "        for batch in validation_ds:\n",
+    "            count += 1\n",
+    "            encoder_input = batch[\"encoder_input\"].to(device) # (b, seq_len)\n",
+    "            encoder_mask = batch[\"encoder_mask\"].to(device) # (b, 1, 1, seq_len)\n",
+    "\n",
+    "            # check that the batch size is 1\n",
+    "            assert encoder_input.size(\n",
+    "                0) == 1, \"Batch size must be 1 for validation\"\n",
+    "\n",
+    "            \n",
+    "            model_out_greedy = greedy_decode(model, encoder_input, encoder_mask, tokenizer_src, tokenizer_tgt, max_len, device)\n",
+    "            model_out_beam = beam_search_decode(model, 3, encoder_input, encoder_mask, tokenizer_src, tokenizer_tgt, max_len, device)\n",
+    "\n",
+    "            source_text = batch[\"src_text\"][0]\n",
+    "            target_text = batch[\"tgt_text\"][0]\n",
+    "            model_out_text_beam = tokenizer_tgt.decode(model_out_beam.detach().cpu().numpy())\n",
+    "            model_out_text_greedy = tokenizer_tgt.decode(model_out_greedy.detach().cpu().numpy())\n",
+    "            \n",
+    "            # Print the source, target and model output\n",
+    "            print_msg('-'*console_width)\n",
+    "            print_msg(f\"{f'SOURCE: ':>20}{source_text}\")\n",
+    "            print_msg(f\"{f'TARGET: ':>20}{target_text}\")\n",
+    "            print_msg(f\"{f'PREDICTED GREEDY: ':>20}{model_out_text_greedy}\")\n",
+    "            print_msg(f\"{f'PREDICTED BEAM: ':>20}{model_out_text_beam}\")\n",
+    "\n",
+    "            if count == num_examples:\n",
+    "                print_msg('-'*console_width)\n",
+    "                break\n",
+    "\n",
+    "run_validation(model, val_dataloader, tokenizer_src, tokenizer_tgt, 20, device, print_msg=print, num_examples=2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "transformer",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.3"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

Others/Colab_Train.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

Others/Inference.ipynb ADDED Viewed

	@@ -0,0 +1,93 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from pathlib import Path\n",
+    "import torch\n",
+    "import torch.nn as nn\n",
+    "from config import get_config, latest_weights_file_path\n",
+    "from train import get_model, get_ds, run_validation\n",
+    "from translate import translate"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Define the device\n",
+    "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
+    "print(\"Using device:\", device)\n",
+    "config = get_config()\n",
+    "train_dataloader, val_dataloader, tokenizer_src, tokenizer_tgt = get_ds(config)\n",
+    "model = get_model(config, tokenizer_src.get_vocab_size(), tokenizer_tgt.get_vocab_size()).to(device)\n",
+    "\n",
+    "# Load the pretrained weights\n",
+    "model_filename = latest_weights_file_path(config)\n",
+    "state = torch.load(model_filename)\n",
+    "model.load_state_dict(state['model_state_dict'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "run_validation(model, val_dataloader, tokenizer_src, tokenizer_tgt, config['seq_len'], device, lambda msg: print(msg), 0, None, num_examples=10)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "t = translate(\"Why do I need to translate this?\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "t = translate(34)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "transformer",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.0"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

Others/Local_Train.ipynb ADDED Viewed

	@@ -0,0 +1,1832 @@

+{
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 198,
+          "referenced_widgets": [
+            "0ce327d5112b44dbb20e57752afc478a",
+            "423a3059ad1a4e01bd01095cf1b41e14",
+            "9cf2d2e2bfe24f2ab185165d79da8bdb",
+            "996ac47b200c427088ee7644fe886896",
+            "9b9addf13301466b9ef30b9d4b836a67",
+            "ec2051bf0e9343d394e8a0ecb4fd5ec8",
+            "56049bd375cd4512a0deaf69b7dae245",
+            "140f33387db341398bc39e9c47703df4",
+            "b3a8424c0b584a37ad2ede748085425c",
+            "cb7d88a70af746f2ae31416b4b670c63",
+            "4837276e5cf248449e287b1eeaef30ec",
+            "3ab0f2022e654458875c2c091908e8c9",
+            "f74bdeb79a224de8b1c85f4ca8657331",
+            "4eb62038f89d4a8cb2c46e6a7cc70150",
+            "9055fd09043642e0ae3d8a7a7c0ab31b",
+            "4a2ead337d5c4ded9f28c93a70db1f08",
+            "888a323362ae4daeac99915bcb3dcf10",
+            "4d0e364e9f274e8ea7447e4e01c7f28f",
+            "78a32764678a42f0a5a892f5275d88de",
+            "aa17c3a834694a978046808fc5d29da1",
+            "11e011e4acb24519bd41a054ddecbfb1",
+            "5d1a9518abd44c18b122e575a7548ed2",
+            "76e80fb236f5491597c992d1a809be33",
+            "f7359467b0214c5385de8ee4334f7ba3",
+            "a58ac736aa884eb9a27264cb04bb36ce",
+            "6e6f7b7cccaa4f0cbfc9311db257bea1",
+            "0656eee26364487f81580c3864e7a159",
+            "05240e68c55a458286f43967e7f90889",
+            "8cfa6df0ee654643bfdb4a3825e8fbbe",
+            "96baa91869eb478eb492754b98169470",
+            "bbda5260ca1c450386f9191e9f9dde97",
+            "6fc5bec49f17469db39e0d4b535b94e9",
+            "67822d28f8584e69abcb041b88377a9f",
+            "aa082ade829247dc8ea0d75cc8a5b2a7",
+            "83bc41f428b7492e9defdaa177f33a3e",
+            "7f168d0ea11c4ea1a96202d3a36ec389",
+            "ebb7ee3fd084466f9667771a99e6e3b2",
+            "1e3c2a94251b4e75af0413a88b53bfe1",
+            "a1188f80f78c49c7a822d71694e47074",
+            "068552491889440e8a66e61b9f013786",
+            "c88027eb3e1c4771ab57366070ecd553",
+            "df75b255bfb04057b553830b59f0a153",
+            "f0e5024d0d054c1eb8e01c4c8b027e79",
+            "937ee45f4d634d189c6d95c886e97bca",
+            "c2d14fa4280c48e0ae04859b73c80781",
+            "d3104837d9734834b7c87e87289b08df",
+            "02b02005adf241a4a0be8173ca3a4aee",
+            "b317ba38f2b145f9b0b49f523547684f",
+            "434340d109d1401d8868498a23b291cf",
+            "2c95f5b81fc84ad698fe77b52cb84076",
+            "ca588157678e4cc09c3fd760676efd39",
+            "c020b38c6d2c436e8b742fd87d3b8b89",
+            "3dc97a04373f484d9ccd1c46646d96cc",
+            "4aed1fa58b7342eba35c2106ec934019",
+            "60c72c47a8d84f0eab652822bed1ed09"
+          ]
+        },
+        "id": "gGDOaOoIwGc5",
+        "outputId": "4180e60a-8985-4795-8e72-373deabc1ebc"
+      },
+      "outputs": [],
+      "source": [
+        "from config import get_config\n",
+        "cfg = get_config()\n",
+        "cfg['batch_size'] = 6\n",
+        "cfg['preload'] = None\n",
+        "cfg['num_epochs'] = 30\n",
+        "\n",
+        "from train import train_model\n",
+        "\n",
+        "train_model(cfg)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": []
+    }
+  ],
+  "metadata": {
+    "accelerator": "GPU",
+    "colab": {
+      "gpuType": "T4",
+      "provenance": []
+    },
+    "gpuClass": "standard",
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.10.6"
+    },
+    "widgets": {
+      "application/vnd.jupyter.widget-state+json": {
+        "02b02005adf241a4a0be8173ca3a4aee": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "FloatProgressModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "FloatProgressModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "ProgressView",
+            "bar_style": "",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_c020b38c6d2c436e8b742fd87d3b8b89",
+            "max": 32332,
+            "min": 0,
+            "orientation": "horizontal",
+            "style": "IPY_MODEL_3dc97a04373f484d9ccd1c46646d96cc",
+            "value": 32332
+          }
+        },
+        "05240e68c55a458286f43967e7f90889": {
+          "model_module": "@jupyter-widgets/base",
+          "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "0656eee26364487f81580c3864e7a159": {
+          "model_module": "@jupyter-widgets/base",
+          "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "068552491889440e8a66e61b9f013786": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "DescriptionStyleModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "0ce327d5112b44dbb20e57752afc478a": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "HBoxModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HBoxModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HBoxView",
+            "box_style": "",
+            "children": [
+              "IPY_MODEL_423a3059ad1a4e01bd01095cf1b41e14",
+              "IPY_MODEL_9cf2d2e2bfe24f2ab185165d79da8bdb",
+              "IPY_MODEL_996ac47b200c427088ee7644fe886896"
+            ],
+            "layout": "IPY_MODEL_9b9addf13301466b9ef30b9d4b836a67"
+          }
+        },
+        "11e011e4acb24519bd41a054ddecbfb1": {
+          "model_module": "@jupyter-widgets/base",
+          "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "140f33387db341398bc39e9c47703df4": {
+          "model_module": "@jupyter-widgets/base",
+          "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "1e3c2a94251b4e75af0413a88b53bfe1": {
+          "model_module": "@jupyter-widgets/base",
+          "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "2c95f5b81fc84ad698fe77b52cb84076": {
+          "model_module": "@jupyter-widgets/base",
+          "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "3ab0f2022e654458875c2c091908e8c9": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "HBoxModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HBoxModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HBoxView",
+            "box_style": "",
+            "children": [
+              "IPY_MODEL_f74bdeb79a224de8b1c85f4ca8657331",
+              "IPY_MODEL_4eb62038f89d4a8cb2c46e6a7cc70150",
+              "IPY_MODEL_9055fd09043642e0ae3d8a7a7c0ab31b"
+            ],
+            "layout": "IPY_MODEL_4a2ead337d5c4ded9f28c93a70db1f08"
+          }
+        },
+        "3dc97a04373f484d9ccd1c46646d96cc": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "ProgressStyleModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "ProgressStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "bar_color": null,
+            "description_width": ""
+          }
+        },
+        "423a3059ad1a4e01bd01095cf1b41e14": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "HTMLModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_ec2051bf0e9343d394e8a0ecb4fd5ec8",
+            "placeholder": "",
+            "style": "IPY_MODEL_56049bd375cd4512a0deaf69b7dae245",
+            "value": "Downloading builder script: 100%"
+          }
+        },
+        "434340d109d1401d8868498a23b291cf": {
+          "model_module": "@jupyter-widgets/base",
+          "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": "hidden",
+            "width": null
+          }
+        },
+        "4837276e5cf248449e287b1eeaef30ec": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "DescriptionStyleModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "4a2ead337d5c4ded9f28c93a70db1f08": {
+          "model_module": "@jupyter-widgets/base",
+          "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "4aed1fa58b7342eba35c2106ec934019": {
+          "model_module": "@jupyter-widgets/base",
+          "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "4d0e364e9f274e8ea7447e4e01c7f28f": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "DescriptionStyleModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "4eb62038f89d4a8cb2c46e6a7cc70150": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "FloatProgressModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "FloatProgressModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "ProgressView",
+            "bar_style": "success",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_78a32764678a42f0a5a892f5275d88de",
+            "max": 161154,
+            "min": 0,
+            "orientation": "horizontal",
+            "style": "IPY_MODEL_aa17c3a834694a978046808fc5d29da1",
+            "value": 161154
+          }
+        },
+        "56049bd375cd4512a0deaf69b7dae245": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "DescriptionStyleModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "5d1a9518abd44c18b122e575a7548ed2": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "DescriptionStyleModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "60c72c47a8d84f0eab652822bed1ed09": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "DescriptionStyleModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "67822d28f8584e69abcb041b88377a9f": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "DescriptionStyleModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "6e6f7b7cccaa4f0cbfc9311db257bea1": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "HTMLModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_6fc5bec49f17469db39e0d4b535b94e9",
+            "placeholder": "",
+            "style": "IPY_MODEL_67822d28f8584e69abcb041b88377a9f",
+            "value": " 20.5k/20.5k [00:00&lt;00:00, 1.34MB/s]"
+          }
+        },
+        "6fc5bec49f17469db39e0d4b535b94e9": {
+          "model_module": "@jupyter-widgets/base",
+          "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "76e80fb236f5491597c992d1a809be33": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "HBoxModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HBoxModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HBoxView",
+            "box_style": "",
+            "children": [
+              "IPY_MODEL_f7359467b0214c5385de8ee4334f7ba3",
+              "IPY_MODEL_a58ac736aa884eb9a27264cb04bb36ce",
+              "IPY_MODEL_6e6f7b7cccaa4f0cbfc9311db257bea1"
+            ],
+            "layout": "IPY_MODEL_0656eee26364487f81580c3864e7a159"
+          }
+        },
+        "78a32764678a42f0a5a892f5275d88de": {
+          "model_module": "@jupyter-widgets/base",
+          "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "7f168d0ea11c4ea1a96202d3a36ec389": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "FloatProgressModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "FloatProgressModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "ProgressView",
+            "bar_style": "success",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_c88027eb3e1c4771ab57366070ecd553",
+            "max": 3295251,
+            "min": 0,
+            "orientation": "horizontal",
+            "style": "IPY_MODEL_df75b255bfb04057b553830b59f0a153",
+            "value": 3295251
+          }
+        },
+        "83bc41f428b7492e9defdaa177f33a3e": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "HTMLModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_a1188f80f78c49c7a822d71694e47074",
+            "placeholder": "",
+            "style": "IPY_MODEL_068552491889440e8a66e61b9f013786",
+            "value": "Downloading data: 100%"
+          }
+        },
+        "888a323362ae4daeac99915bcb3dcf10": {
+          "model_module": "@jupyter-widgets/base",
+          "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "8cfa6df0ee654643bfdb4a3825e8fbbe": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "DescriptionStyleModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "9055fd09043642e0ae3d8a7a7c0ab31b": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "HTMLModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_11e011e4acb24519bd41a054ddecbfb1",
+            "placeholder": "",
+            "style": "IPY_MODEL_5d1a9518abd44c18b122e575a7548ed2",
+            "value": " 161k/161k [00:00&lt;00:00, 865kB/s]"
+          }
+        },
+        "937ee45f4d634d189c6d95c886e97bca": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "DescriptionStyleModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "96baa91869eb478eb492754b98169470": {
+          "model_module": "@jupyter-widgets/base",
+          "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "996ac47b200c427088ee7644fe886896": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "HTMLModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_cb7d88a70af746f2ae31416b4b670c63",
+            "placeholder": "",
+            "style": "IPY_MODEL_4837276e5cf248449e287b1eeaef30ec",
+            "value": " 6.08k/6.08k [00:00&lt;00:00, 279kB/s]"
+          }
+        },
+        "9b9addf13301466b9ef30b9d4b836a67": {
+          "model_module": "@jupyter-widgets/base",
+          "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "9cf2d2e2bfe24f2ab185165d79da8bdb": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "FloatProgressModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "FloatProgressModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "ProgressView",
+            "bar_style": "success",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_140f33387db341398bc39e9c47703df4",
+            "max": 6081,
+            "min": 0,
+            "orientation": "horizontal",
+            "style": "IPY_MODEL_b3a8424c0b584a37ad2ede748085425c",
+            "value": 6081
+          }
+        },
+        "a1188f80f78c49c7a822d71694e47074": {
+          "model_module": "@jupyter-widgets/base",
+          "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "a58ac736aa884eb9a27264cb04bb36ce": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "FloatProgressModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "FloatProgressModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "ProgressView",
+            "bar_style": "success",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_96baa91869eb478eb492754b98169470",
+            "max": 20464,
+            "min": 0,
+            "orientation": "horizontal",
+            "style": "IPY_MODEL_bbda5260ca1c450386f9191e9f9dde97",
+            "value": 20464
+          }
+        },
+        "aa082ade829247dc8ea0d75cc8a5b2a7": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "HBoxModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HBoxModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HBoxView",
+            "box_style": "",
+            "children": [
+              "IPY_MODEL_83bc41f428b7492e9defdaa177f33a3e",
+              "IPY_MODEL_7f168d0ea11c4ea1a96202d3a36ec389",
+              "IPY_MODEL_ebb7ee3fd084466f9667771a99e6e3b2"
+            ],
+            "layout": "IPY_MODEL_1e3c2a94251b4e75af0413a88b53bfe1"
+          }
+        },
+        "aa17c3a834694a978046808fc5d29da1": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "ProgressStyleModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "ProgressStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "bar_color": null,
+            "description_width": ""
+          }
+        },
+        "b317ba38f2b145f9b0b49f523547684f": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "HTMLModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_4aed1fa58b7342eba35c2106ec934019",
+            "placeholder": "",
+            "style": "IPY_MODEL_60c72c47a8d84f0eab652822bed1ed09",
+            "value": " 32332/32332 [00:01&lt;00:00, 27628.23 examples/s]"
+          }
+        },
+        "b3a8424c0b584a37ad2ede748085425c": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "ProgressStyleModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "ProgressStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "bar_color": null,
+            "description_width": ""
+          }
+        },
+        "bbda5260ca1c450386f9191e9f9dde97": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "ProgressStyleModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "ProgressStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "bar_color": null,
+            "description_width": ""
+          }
+        },
+        "c020b38c6d2c436e8b742fd87d3b8b89": {
+          "model_module": "@jupyter-widgets/base",
+          "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "c2d14fa4280c48e0ae04859b73c80781": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "HBoxModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HBoxModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HBoxView",
+            "box_style": "",
+            "children": [
+              "IPY_MODEL_d3104837d9734834b7c87e87289b08df",
+              "IPY_MODEL_02b02005adf241a4a0be8173ca3a4aee",
+              "IPY_MODEL_b317ba38f2b145f9b0b49f523547684f"
+            ],
+            "layout": "IPY_MODEL_434340d109d1401d8868498a23b291cf"
+          }
+        },
+        "c88027eb3e1c4771ab57366070ecd553": {
+          "model_module": "@jupyter-widgets/base",
+          "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "ca588157678e4cc09c3fd760676efd39": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "DescriptionStyleModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "cb7d88a70af746f2ae31416b4b670c63": {
+          "model_module": "@jupyter-widgets/base",
+          "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "d3104837d9734834b7c87e87289b08df": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "HTMLModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_2c95f5b81fc84ad698fe77b52cb84076",
+            "placeholder": "",
+            "style": "IPY_MODEL_ca588157678e4cc09c3fd760676efd39",
+            "value": "Generating train split: 100%"
+          }
+        },
+        "df75b255bfb04057b553830b59f0a153": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "ProgressStyleModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "ProgressStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "bar_color": null,
+            "description_width": ""
+          }
+        },
+        "ebb7ee3fd084466f9667771a99e6e3b2": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "HTMLModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_f0e5024d0d054c1eb8e01c4c8b027e79",
+            "placeholder": "",
+            "style": "IPY_MODEL_937ee45f4d634d189c6d95c886e97bca",
+            "value": " 3.30M/3.30M [00:01&lt;00:00, 2.77MB/s]"
+          }
+        },
+        "ec2051bf0e9343d394e8a0ecb4fd5ec8": {
+          "model_module": "@jupyter-widgets/base",
+          "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "f0e5024d0d054c1eb8e01c4c8b027e79": {
+          "model_module": "@jupyter-widgets/base",
+          "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "f7359467b0214c5385de8ee4334f7ba3": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "HTMLModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_05240e68c55a458286f43967e7f90889",
+            "placeholder": "",
+            "style": "IPY_MODEL_8cfa6df0ee654643bfdb4a3825e8fbbe",
+            "value": "Downloading readme: 100%"
+          }
+        },
+        "f74bdeb79a224de8b1c85f4ca8657331": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "HTMLModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_888a323362ae4daeac99915bcb3dcf10",
+            "placeholder": "",
+            "style": "IPY_MODEL_4d0e364e9f274e8ea7447e4e01c7f28f",
+            "value": "Downloading metadata: 100%"
+          }
+        }
+      }
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}

Others/attention_visual.ipynb ADDED Viewed

	@@ -0,0 +1,207 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import torch\n",
+    "import torch.nn as nn\n",
+    "from model import Transformer\n",
+    "from config import get_config, get_weights_file_path\n",
+    "from train import get_model, get_ds, greedy_decode\n",
+    "import altair as alt\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "import warnings\n",
+    "warnings.filterwarnings(\"ignore\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Define the device\n",
+    "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
+    "print(\"Using device:\", device)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "config = get_config()\n",
+    "train_dataloader, val_dataloader, vocab_src, vocab_tgt = get_ds(config)\n",
+    "model = get_model(config, vocab_src.get_vocab_size(), vocab_tgt.get_vocab_size()).to(device)\n",
+    "\n",
+    "# Load the pretrained weights\n",
+    "model_filename = get_weights_file_path(config, f\"29\")\n",
+    "state = torch.load(model_filename)\n",
+    "model.load_state_dict(state['model_state_dict'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def load_next_batch():\n",
+    "    # Load a sample batch from the validation set\n",
+    "    batch = next(iter(val_dataloader))\n",
+    "    encoder_input = batch[\"encoder_input\"].to(device)\n",
+    "    encoder_mask = batch[\"encoder_mask\"].to(device)\n",
+    "    decoder_input = batch[\"decoder_input\"].to(device)\n",
+    "    decoder_mask = batch[\"decoder_mask\"].to(device)\n",
+    "\n",
+    "    encoder_input_tokens = [vocab_src.id_to_token(idx) for idx in encoder_input[0].cpu().numpy()]\n",
+    "    decoder_input_tokens = [vocab_tgt.id_to_token(idx) for idx in decoder_input[0].cpu().numpy()]\n",
+    "\n",
+    "    # check that the batch size is 1\n",
+    "    assert encoder_input.size(\n",
+    "        0) == 1, \"Batch size must be 1 for validation\"\n",
+    "\n",
+    "    model_out = greedy_decode(\n",
+    "        model, encoder_input, encoder_mask, vocab_src, vocab_tgt, config['seq_len'], device)\n",
+    "    \n",
+    "    return batch, encoder_input_tokens, decoder_input_tokens"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def mtx2df(m, max_row, max_col, row_tokens, col_tokens):\n",
+    "    return pd.DataFrame(\n",
+    "        [\n",
+    "            (\n",
+    "                r,\n",
+    "                c,\n",
+    "                float(m[r, c]),\n",
+    "                \"%.3d %s\" % (r, row_tokens[r] if len(row_tokens) > r else \"<blank>\"),\n",
+    "                \"%.3d %s\" % (c, col_tokens[c] if len(col_tokens) > c else \"<blank>\"),\n",
+    "            )\n",
+    "            for r in range(m.shape[0])\n",
+    "            for c in range(m.shape[1])\n",
+    "            if r < max_row and c < max_col\n",
+    "        ],\n",
+    "        columns=[\"row\", \"column\", \"value\", \"row_token\", \"col_token\"],\n",
+    "    )\n",
+    "\n",
+    "def get_attn_map(attn_type: str, layer: int, head: int):\n",
+    "    if attn_type == \"encoder\":\n",
+    "        attn = model.encoder.layers[layer].self_attention_block.attention_scores\n",
+    "    elif attn_type == \"decoder\":\n",
+    "        attn = model.decoder.layers[layer].self_attention_block.attention_scores\n",
+    "    elif attn_type == \"encoder-decoder\":\n",
+    "        attn = model.decoder.layers[layer].cross_attention_block.attention_scores\n",
+    "    return attn[0, head].data\n",
+    "\n",
+    "def attn_map(attn_type, layer, head, row_tokens, col_tokens, max_sentence_len):\n",
+    "    df = mtx2df(\n",
+    "        get_attn_map(attn_type, layer, head),\n",
+    "        max_sentence_len,\n",
+    "        max_sentence_len,\n",
+    "        row_tokens,\n",
+    "        col_tokens,\n",
+    "    )\n",
+    "    return (\n",
+    "        alt.Chart(data=df)\n",
+    "        .mark_rect()\n",
+    "        .encode(\n",
+    "            x=alt.X(\"col_token\", axis=alt.Axis(title=\"\")),\n",
+    "            y=alt.Y(\"row_token\", axis=alt.Axis(title=\"\")),\n",
+    "            color=\"value\",\n",
+    "            tooltip=[\"row\", \"column\", \"value\", \"row_token\", \"col_token\"],\n",
+    "        )\n",
+    "        #.title(f\"Layer {layer} Head {head}\")\n",
+    "        .properties(height=400, width=400, title=f\"Layer {layer} Head {head}\")\n",
+    "        .interactive()\n",
+    "    )\n",
+    "\n",
+    "def get_all_attention_maps(attn_type: str, layers: list[int], heads: list[int], row_tokens: list, col_tokens, max_sentence_len: int):\n",
+    "    charts = []\n",
+    "    for layer in layers:\n",
+    "        rowCharts = []\n",
+    "        for head in heads:\n",
+    "            rowCharts.append(attn_map(attn_type, layer, head, row_tokens, col_tokens, max_sentence_len))\n",
+    "        charts.append(alt.hconcat(*rowCharts))\n",
+    "    return alt.vconcat(*charts)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "batch, encoder_input_tokens, decoder_input_tokens = load_next_batch()\n",
+    "print(f'Source: {batch[\"src_text\"][0]}')\n",
+    "print(f'Target: {batch[\"tgt_text\"][0]}')\n",
+    "sentence_len = encoder_input_tokens.index(\"[PAD]\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "layers = [0, 1, 2]\n",
+    "heads = [0, 1, 2, 3, 4, 5, 6, 7]\n",
+    "\n",
+    "# Encoder Self-Attention\n",
+    "get_all_attention_maps(\"encoder\", layers, heads, encoder_input_tokens, encoder_input_tokens, min(20, sentence_len))\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Encoder Self-Attention\n",
+    "get_all_attention_maps(\"decoder\", layers, heads, decoder_input_tokens, decoder_input_tokens, min(20, sentence_len))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Encoder Self-Attention\n",
+    "get_all_attention_maps(\"encoder-decoder\", layers, heads, encoder_input_tokens, decoder_input_tokens, min(20, sentence_len))"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "transformer",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.6"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

Others/conda.txt ADDED Viewed

	@@ -0,0 +1,24 @@

+# This file may be used to create an environment using:
+# $ conda create --name <env> --file <this file>
+# platform: linux-64
+@EXPLICIT
+https://repo.anaconda.com/pkgs/main/linux-64/_libgcc_mutex-0.1-main.conda
+https://repo.anaconda.com/pkgs/main/linux-64/ca-certificates-2023.08.22-h06a4308_0.conda
+https://repo.anaconda.com/pkgs/main/linux-64/ld_impl_linux-64-2.38-h1181459_1.conda
+https://repo.anaconda.com/pkgs/main/linux-64/libstdcxx-ng-11.2.0-h1234567_1.conda
+https://repo.anaconda.com/pkgs/main/noarch/tzdata-2023c-h04d1e81_0.conda
+https://repo.anaconda.com/pkgs/main/linux-64/libgomp-11.2.0-h1234567_1.conda
+https://repo.anaconda.com/pkgs/main/linux-64/_openmp_mutex-5.1-1_gnu.conda
+https://repo.anaconda.com/pkgs/main/linux-64/libgcc-ng-11.2.0-h1234567_1.conda
+https://repo.anaconda.com/pkgs/main/linux-64/libffi-3.4.4-h6a678d5_0.conda
+https://repo.anaconda.com/pkgs/main/linux-64/ncurses-6.4-h6a678d5_0.conda
+https://repo.anaconda.com/pkgs/main/linux-64/openssl-3.0.12-h7f8727e_0.conda
+https://repo.anaconda.com/pkgs/main/linux-64/xz-5.4.5-h5eee18b_0.conda
+https://repo.anaconda.com/pkgs/main/linux-64/zlib-1.2.13-h5eee18b_0.conda
+https://repo.anaconda.com/pkgs/main/linux-64/readline-8.2-h5eee18b_0.conda
+https://repo.anaconda.com/pkgs/main/linux-64/tk-8.6.12-h1ccaba5_0.conda
+https://repo.anaconda.com/pkgs/main/linux-64/sqlite-3.41.2-h5eee18b_0.conda
+https://repo.anaconda.com/pkgs/main/linux-64/python-3.9.18-h955ad1f_0.conda
+https://repo.anaconda.com/pkgs/main/linux-64/setuptools-68.0.0-py39h06a4308_0.conda
+https://repo.anaconda.com/pkgs/main/linux-64/wheel-0.41.2-py39h06a4308_0.conda
+https://repo.anaconda.com/pkgs/main/linux-64/pip-23.3.1-py39h06a4308_0.conda

Others/requirements.txt ADDED Viewed

	@@ -0,0 +1,12 @@

+## Use python 3.9
+torch==2.0.1
+torchvision==0.15.2
+torchaudio==2.0.2
+torchtext==0.15.2
+datasets==2.15.0
+tokenizers==0.13.3
+torchmetrics==1.0.3
+tensorboard==2.13.0
+altair==5.1.1
+wandb==0.15.9

Others/train_wb.py ADDED Viewed

	@@ -0,0 +1,274 @@

+from model import build_transformer
+from dataset import BilingualDataset, causal_mask
+from config import get_config, get_weights_file_path
+import torchtext.datasets as datasets
+import torch
+import torch.nn as nn
+from torch.utils.data import Dataset, DataLoader, random_split
+from torch.optim.lr_scheduler import LambdaLR
+import warnings
+from tqdm import tqdm
+import os
+from pathlib import Path
+# Huggingface datasets and tokenizers
+from datasets import load_dataset
+from tokenizers import Tokenizer
+from tokenizers.models import WordLevel
+from tokenizers.trainers import WordLevelTrainer
+from tokenizers.pre_tokenizers import Whitespace
+import wandb
+import torchmetrics
+def greedy_decode(model, source, source_mask, tokenizer_src, tokenizer_tgt, max_len, device):
+    sos_idx = tokenizer_tgt.token_to_id('[SOS]')
+    eos_idx = tokenizer_tgt.token_to_id('[EOS]')
+    # Precompute the encoder output and reuse it for every step
+    encoder_output = model.encode(source, source_mask)
+    # Initialize the decoder input with the sos token
+    decoder_input = torch.empty(1, 1).fill_(sos_idx).type_as(source).to(device)
+    while True:
+        if decoder_input.size(1) == max_len:
+            break
+        # build mask for target
+        decoder_mask = causal_mask(decoder_input.size(1)).type_as(source_mask).to(device)
+        # calculate output
+        out = model.decode(encoder_output, source_mask, decoder_input, decoder_mask)
+        # get next token
+        prob = model.project(out[:, -1])
+        _, next_word = torch.max(prob, dim=1)
+        decoder_input = torch.cat(
+            [decoder_input, torch.empty(1, 1).type_as(source).fill_(next_word.item()).to(device)], dim=1
+        )
+        if next_word == eos_idx:
+            break
+    return decoder_input.squeeze(0)
+def run_validation(model, validation_ds, tokenizer_src, tokenizer_tgt, max_len, device, print_msg, global_step, num_examples=2):
+    model.eval()
+    count = 0
+    source_texts = []
+    expected = []
+    predicted = []
+    try:
+        # get the console window width
+        with os.popen('stty size', 'r') as console:
+            _, console_width = console.read().split()
+            console_width = int(console_width)
+    except:
+        # If we can't get the console width, use 80 as default
+        console_width = 80
+    with torch.no_grad():
+        for batch in validation_ds:
+            count += 1
+            encoder_input = batch["encoder_input"].to(device) # (b, seq_len)
+            encoder_mask = batch["encoder_mask"].to(device) # (b, 1, 1, seq_len)
+            # check that the batch size is 1
+            assert encoder_input.size(
+                0) == 1, "Batch size must be 1 for validation"
+            model_out = greedy_decode(model, encoder_input, encoder_mask, tokenizer_src, tokenizer_tgt, max_len, device)
+            source_text = batch["src_text"][0]
+            target_text = batch["tgt_text"][0]
+            model_out_text = tokenizer_tgt.decode(model_out.detach().cpu().numpy())
+            source_texts.append(source_text)
+            expected.append(target_text)
+            predicted.append(model_out_text)
+            # Print the source, target and model output
+            print_msg('-'*console_width)
+            print_msg(f"{f'SOURCE: ':>12}{source_text}")
+            print_msg(f"{f'TARGET: ':>12}{target_text}")
+            print_msg(f"{f'PREDICTED: ':>12}{model_out_text}")
+            if count == num_examples:
+                print_msg('-'*console_width)
+                break
+    # Evaluate the character error rate
+    # Compute the char error rate
+    metric = torchmetrics.CharErrorRate()
+    cer = metric(predicted, expected)
+    wandb.log({'validation/cer': cer, 'global_step': global_step})
+    # Compute the word error rate
+    metric = torchmetrics.WordErrorRate()
+    wer = metric(predicted, expected)
+    wandb.log({'validation/wer': wer, 'global_step': global_step})
+    # Compute the BLEU metric
+    metric = torchmetrics.BLEUScore()
+    bleu = metric(predicted, expected)
+    wandb.log({'validation/BLEU': bleu, 'global_step': global_step})
+def get_all_sentences(ds, lang):
+    for item in ds:
+        yield item['translation'][lang]
+def get_or_build_tokenizer(config, ds, lang):
+    tokenizer_path = Path(config['tokenizer_file'].format(lang))
+    if not Path.exists(tokenizer_path):
+        # Most code taken from: https://huggingface.co/docs/tokenizers/quicktour
+        tokenizer = Tokenizer(WordLevel(unk_token="[UNK]"))
+        tokenizer.pre_tokenizer = Whitespace()
+        trainer = WordLevelTrainer(special_tokens=["[UNK]", "[PAD]", "[SOS]", "[EOS]"], min_frequency=2)
+        tokenizer.train_from_iterator(get_all_sentences(ds, lang), trainer=trainer)
+        tokenizer.save(str(tokenizer_path))
+    else:
+        tokenizer = Tokenizer.from_file(str(tokenizer_path))
+    return tokenizer
+def get_ds(config):
+    # It only has the train split, so we divide it overselves
+    ds_raw = load_dataset('opus_books', f"{config['lang_src']}-{config['lang_tgt']}", split='train')
+    # Build tokenizers
+    tokenizer_src = get_or_build_tokenizer(config, ds_raw, config['lang_src'])
+    tokenizer_tgt = get_or_build_tokenizer(config, ds_raw, config['lang_tgt'])
+    # Keep 90% for training, 10% for validation
+    train_ds_size = int(0.9 * len(ds_raw))
+    val_ds_size = len(ds_raw) - train_ds_size
+    train_ds_raw, val_ds_raw = random_split(ds_raw, [train_ds_size, val_ds_size])
+    train_ds = BilingualDataset(train_ds_raw, tokenizer_src, tokenizer_tgt, config['lang_src'], config['lang_tgt'], config['seq_len'])
+    val_ds = BilingualDataset(val_ds_raw, tokenizer_src, tokenizer_tgt, config['lang_src'], config['lang_tgt'], config['seq_len'])
+    # Find the maximum length of each sentence in the source and target sentence
+    max_len_src = 0
+    max_len_tgt = 0
+    for item in ds_raw:
+        src_ids = tokenizer_src.encode(item['translation'][config['lang_src']]).ids
+        tgt_ids = tokenizer_tgt.encode(item['translation'][config['lang_tgt']]).ids
+        max_len_src = max(max_len_src, len(src_ids))
+        max_len_tgt = max(max_len_tgt, len(tgt_ids))
+    print(f'Max length of source sentence: {max_len_src}')
+    print(f'Max length of target sentence: {max_len_tgt}')
+    train_dataloader = DataLoader(train_ds, batch_size=config['batch_size'], shuffle=True)
+    val_dataloader = DataLoader(val_ds, batch_size=1, shuffle=True)
+    return train_dataloader, val_dataloader, tokenizer_src, tokenizer_tgt
+def get_model(config, vocab_src_len, vocab_tgt_len):
+    model = build_transformer(vocab_src_len, vocab_tgt_len, config["seq_len"], config['seq_len'], d_model=config['d_model'])
+    return model
+def train_model(config):
+    # Define the device
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    print("Using device:", device)
+    # Make sure the weights folder exists
+    Path(config['model_folder']).mkdir(parents=True, exist_ok=True)
+    train_dataloader, val_dataloader, tokenizer_src, tokenizer_tgt = get_ds(config)
+    model = get_model(config, tokenizer_src.get_vocab_size(), tokenizer_tgt.get_vocab_size()).to(device)
+    optimizer = torch.optim.Adam(model.parameters(), lr=config['lr'], eps=1e-9)
+    # If the user specified a model to preload before training, load it
+    initial_epoch = 0
+    global_step = 0
+    if config['preload']:
+        model_filename = get_weights_file_path(config, config['preload'])
+        print(f'Preloading model {model_filename}')
+        state = torch.load(model_filename)
+        model.load_state_dict(state['model_state_dict'])
+        initial_epoch = state['epoch'] + 1
+        optimizer.load_state_dict(state['optimizer_state_dict'])
+        global_step = state['global_step']
+        del state
+    loss_fn = nn.CrossEntropyLoss(ignore_index=tokenizer_src.token_to_id('[PAD]'), label_smoothing=0.1).to(device)
+    # define our custom x axis metric
+    wandb.define_metric("global_step")
+    # define which metrics will be plotted against it
+    wandb.define_metric("validation/*", step_metric="global_step")
+    wandb.define_metric("train/*", step_metric="global_step")
+    for epoch in range(initial_epoch, config['num_epochs']):
+        torch.cuda.empty_cache()
+        model.train()
+        batch_iterator = tqdm(train_dataloader, desc=f"Processing Epoch {epoch:02d}")
+        for batch in batch_iterator:
+            encoder_input = batch['encoder_input'].to(device) # (b, seq_len)
+            decoder_input = batch['decoder_input'].to(device) # (B, seq_len)
+            encoder_mask = batch['encoder_mask'].to(device) # (B, 1, 1, seq_len)
+            decoder_mask = batch['decoder_mask'].to(device) # (B, 1, seq_len, seq_len)
+            # Run the tensors through the encoder, decoder and the projection layer
+            encoder_output = model.encode(encoder_input, encoder_mask) # (B, seq_len, d_model)
+            decoder_output = model.decode(encoder_output, encoder_mask, decoder_input, decoder_mask) # (B, seq_len, d_model)
+            proj_output = model.project(decoder_output) # (B, seq_len, vocab_size)
+            # Compare the output with the label
+            label = batch['label'].to(device) # (B, seq_len)
+            # Compute the loss using a simple cross entropy
+            loss = loss_fn(proj_output.view(-1, tokenizer_tgt.get_vocab_size()), label.view(-1))
+            batch_iterator.set_postfix({"loss": f"{loss.item():6.3f}"})
+            # Log the loss
+            wandb.log({'train/loss': loss.item(), 'global_step': global_step})
+            # Backpropagate the loss
+            loss.backward()
+            # Update the weights
+            optimizer.step()
+            optimizer.zero_grad(set_to_none=True)
+            global_step += 1
+        # Run validation at the end of every epoch
+        run_validation(model, val_dataloader, tokenizer_src, tokenizer_tgt, config['seq_len'], device, lambda msg: batch_iterator.write(msg), global_step)
+        # Save the model at the end of every epoch
+        model_filename = get_weights_file_path(config, f"{epoch:02d}")
+        torch.save({
+            'epoch': epoch,
+            'model_state_dict': model.state_dict(),
+            'optimizer_state_dict': optimizer.state_dict(),
+            'global_step': global_step
+        }, model_filename)
+if __name__ == '__main__':
+    warnings.filterwarnings("ignore")
+    config = get_config()
+    config['num_epochs'] = 30
+    config['preload'] = None
+    wandb.init(
+        # set the wandb project where this run will be logged
+        project="pytorch-transformer",
+        # track hyperparameters and run metadata
+        config=config
+    )
+    train_model(config)

Others/translate.py ADDED Viewed

	@@ -0,0 +1,79 @@

+from pathlib import Path
+from config import get_config, latest_weights_file_path
+from model import build_transformer
+from tokenizers import Tokenizer
+from datasets import load_dataset
+from dataset import BilingualDataset
+import torch
+import sys
+def translate(sentence: str):
+    # Define the device, tokenizers, and model
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    print("Using device:", device)
+    config = get_config()
+    tokenizer_src = Tokenizer.from_file(str(Path(config['tokenizer_file'].format(config['lang_src']))))
+    tokenizer_tgt = Tokenizer.from_file(str(Path(config['tokenizer_file'].format(config['lang_tgt']))))
+    model = build_transformer(tokenizer_src.get_vocab_size(), tokenizer_tgt.get_vocab_size(), config["seq_len"], config['seq_len'], d_model=config['d_model']).to(device)
+    # Load the pretrained weights
+    model_filename = latest_weights_file_path(config)
+    state = torch.load(model_filename)
+    model.load_state_dict(state['model_state_dict'])
+    # if the sentence is a number use it as an index to the test set
+    label = ""
+    if type(sentence) == int or sentence.isdigit():
+        id = int(sentence)
+        ds = load_dataset(f"{config['datasource']}", f"{config['lang_src']}-{config['lang_tgt']}", split='all')
+        ds = BilingualDataset(ds, tokenizer_src, tokenizer_tgt, config['lang_src'], config['lang_tgt'], config['seq_len'])
+        sentence = ds[id]['src_text']
+        label = ds[id]["tgt_text"]
+    seq_len = config['seq_len']
+    # translate the sentence
+    model.eval()
+    with torch.no_grad():
+        # Precompute the encoder output and reuse it for every generation step
+        source = tokenizer_src.encode(sentence)
+        source = torch.cat([
+            torch.tensor([tokenizer_src.token_to_id('[SOS]')], dtype=torch.int64),
+            torch.tensor(source.ids, dtype=torch.int64),
+            torch.tensor([tokenizer_src.token_to_id('[EOS]')], dtype=torch.int64),
+            torch.tensor([tokenizer_src.token_to_id('[PAD]')] * (seq_len - len(source.ids) - 2), dtype=torch.int64)
+        ], dim=0).to(device)
+        source_mask = (source != tokenizer_src.token_to_id('[PAD]')).unsqueeze(0).unsqueeze(0).int().to(device)
+        encoder_output = model.encode(source, source_mask)
+        # Initialize the decoder input with the sos token
+        decoder_input = torch.empty(1, 1).fill_(tokenizer_tgt.token_to_id('[SOS]')).type_as(source).to(device)
+        # Print the source sentence and target start prompt
+        if label != "": print(f"{f'ID: ':>12}{id}")
+        print(f"{f'SOURCE: ':>12}{sentence}")
+        if label != "": print(f"{f'TARGET: ':>12}{label}")
+        print(f"{f'PREDICTED: ':>12}", end='')
+        # Generate the translation word by word
+        while decoder_input.size(1) < seq_len:
+            # build mask for target and calculate output
+            decoder_mask = torch.triu(torch.ones((1, decoder_input.size(1), decoder_input.size(1))), diagonal=1).type(torch.int).type_as(source_mask).to(device)
+            out = model.decode(encoder_output, source_mask, decoder_input, decoder_mask)
+            # project next token
+            prob = model.project(out[:, -1])
+            _, next_word = torch.max(prob, dim=1)
+            decoder_input = torch.cat([decoder_input, torch.empty(1, 1).type_as(source).fill_(next_word.item()).to(device)], dim=1)
+            # print the translated word
+            print(f"{tokenizer_tgt.decode([next_word.item()])}", end=' ')
+            # break if we predict the end of sentence token
+            if next_word == tokenizer_tgt.token_to_id('[EOS]'):
+                break
+    # convert ids to tokens
+    return tokenizer_tgt.decode(decoder_input[0].tolist())
+#read sentence from argument
+translate(sys.argv[1] if len(sys.argv) > 1 else "I am not a very good a student.")