diff --git "a/space/space/space/notebooks/Softmax_PhoBERT.ipynb" "b/space/space/space/notebooks/Softmax_PhoBERT.ipynb"
new file mode 100644--- /dev/null
+++ "b/space/space/space/notebooks/Softmax_PhoBERT.ipynb"
@@ -0,0 +1,5063 @@
+{
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": 1,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 191
+        },
+        "id": "yRrmkevlCjXr",
+        "outputId": "b0abb114-925d-4ebf-f9ab-1abe0ce61723"
+      },
+      "outputs": [
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "<IPython.core.display.Javascript object>"
+            ],
+            "application/javascript": [
+              "\n",
+              "        window._wandbApiKey = new Promise((resolve, reject) => {\n",
+              "            function loadScript(url) {\n",
+              "            return new Promise(function(resolve, reject) {\n",
+              "                let newScript = document.createElement(\"script\");\n",
+              "                newScript.onerror = reject;\n",
+              "                newScript.onload = resolve;\n",
+              "                document.body.appendChild(newScript);\n",
+              "                newScript.src = url;\n",
+              "            });\n",
+              "            }\n",
+              "            loadScript(\"https://cdn.jsdelivr.net/npm/postmate/build/postmate.min.js\").then(() => {\n",
+              "            const iframe = document.createElement('iframe')\n",
+              "            iframe.style.cssText = \"width:0;height:0;border:none\"\n",
+              "            document.body.appendChild(iframe)\n",
+              "            const handshake = new Postmate({\n",
+              "                container: iframe,\n",
+              "                url: 'https://wandb.ai/authorize'\n",
+              "            });\n",
+              "            const timeout = setTimeout(() => reject(\"Couldn't auto authenticate\"), 5000)\n",
+              "            handshake.then(function(child) {\n",
+              "                child.on('authorize', data => {\n",
+              "                    clearTimeout(timeout)\n",
+              "                    resolve(data)\n",
+              "                });\n",
+              "            });\n",
+              "            })\n",
+              "        });\n",
+              "    "
+            ]
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "\u001b[34m\u001b[1mwandb\u001b[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)\n",
+            "\u001b[34m\u001b[1mwandb\u001b[0m: You can find your API key in your browser here: https://wandb.ai/authorize\n",
+            "wandb: Paste an API key from your profile and hit enter:"
+          ]
+        },
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            " ··········\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m If you're specifying your api key in code, ensure this code is not shared publicly.\n",
+            "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m Consider setting the WANDB_API_KEY environment variable, or running `wandb login` from the command line.\n",
+            "\u001b[34m\u001b[1mwandb\u001b[0m: No netrc file found, creating one.\n",
+            "\u001b[34m\u001b[1mwandb\u001b[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc\n",
+            "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mlenguyenquocanh-vn-fptu\u001b[0m (\u001b[33mlenguyenquocanh-vn-fptu-fpt-university\u001b[0m) to \u001b[32mhttps://api.wandb.ai\u001b[0m. Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n"
+          ]
+        },
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "True"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 1
+        }
+      ],
+      "source": [
+        "import wandb\n",
+        "wandb.login()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "YY74yDYXID_a"
+      },
+      "source": [
+        "# Data Preparation"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 2,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "66m2J73nGXEV",
+        "outputId": "24173ca4-38fe-4f9e-f9a8-b39bdfdabe72"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "/usr/local/lib/python3.11/dist-packages/huggingface_hub/utils/_auth.py:94: UserWarning: \n",
+            "The secret `HF_TOKEN` does not exist in your Colab secrets.\n",
+            "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n",
+            "You will be able to reuse this secret in all of your notebooks.\n",
+            "Please note that authentication is recommended but still optional to access public models or datasets.\n",
+            "  warnings.warn(\n"
+          ]
+        }
+      ],
+      "source": [
+        "import pandas as pd\n",
+        "\n",
+        "splits = {'train': 'data/train-00000-of-00001-b0417886a268b83a.parquet', 'valid': 'data/valid-00000-of-00001-846411c236133ba3.parquet'}\n",
+        "df_train = pd.read_parquet(\"hf://datasets/datnth1709/VLSP2016-NER-data/\" + splits[\"train\"])\n",
+        "df_valid = pd.read_parquet(\"hf://datasets/datnth1709/VLSP2016-NER-data/\" + splits[\"valid\"])\n",
+        "df = pd.concat([df_train, df_valid]).reset_index(drop=True)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 3,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 424
+        },
+        "id": "U81OmhBeGmMM",
+        "outputId": "f6c51bb0-3b7b-4029-e1fc-ae8d9a75ef87"
+      },
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "                                                  tokens  \\\n",
+              "0                        [Không_khí, thật, náo_nhiệt, .]   \n",
+              "1      [Chị, Lãnh, và, Xăng, ra, đi, ,, mình, đứng, n...   \n",
+              "2      [Suy_tính, mãi, ,, khóc, mãi, rồi, Phúc, lấy, ...   \n",
+              "3      [Hoà, bảo, hồi, mới, qua, đâu, có, biết, nấu_n...   \n",
+              "4                         [Nhật_ký, của, thuyền_viên, .]   \n",
+              "...                                                  ...   \n",
+              "16853  [Nghe, thấy, đã, ghê_ghê, nhưng, Nhiêu, chưa, ...   \n",
+              "16854        [Nhưng, mọi, chuyện, không, dừng, ở, đó, .]   \n",
+              "16855  [Hoà, bảo, thời_gian, đầu, mặc_cảm, lắm, ,, ở,...   \n",
+              "16856  [Biết_bao, người, đã, tình_nguyện, hiến_dâng, ...   \n",
+              "16857  [Trên, đây, mới, là, “, thành_tích, ”, tiêu, t...   \n",
+              "\n",
+              "                                                      id  \\\n",
+              "0                                           [0, 0, 0, 0]   \n",
+              "1      [0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...   \n",
+              "2      [0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, ...   \n",
+              "3      [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, ...   \n",
+              "4                                           [0, 0, 0, 0]   \n",
+              "...                                                  ...   \n",
+              "16853  [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, ...   \n",
+              "16854                           [0, 0, 0, 0, 0, 0, 0, 0]   \n",
+              "16855  [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...   \n",
+              "16856      [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]   \n",
+              "16857  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...   \n",
+              "\n",
+              "                                                seg_text  \\\n",
+              "0                             Không_khí thật náo_nhiệt .   \n",
+              "1      Chị Lãnh và Xăng ra đi , mình đứng nhìn hai ch...   \n",
+              "2      Suy_tính mãi , khóc mãi rồi Phúc lấy ra tờ giấ...   \n",
+              "3      Hoà bảo hồi mới qua đâu có biết nấu_nướng gì ,...   \n",
+              "4                              Nhật_ký của thuyền_viên .   \n",
+              "...                                                  ...   \n",
+              "16853  Nghe thấy đã ghê_ghê nhưng Nhiêu chưa được tườ...   \n",
+              "16854                 Nhưng mọi chuyện không dừng ở đó .   \n",
+              "16855  Hoà bảo thời_gian đầu mặc_cảm lắm , ở trong nh...   \n",
+              "16856  Biết_bao người đã tình_nguyện hiến_dâng cả cuộ...   \n",
+              "16857  Trên đây mới là “ thành_tích ” tiêu tiền của m...   \n",
+              "\n",
+              "                                                raw_text  \\\n",
+              "0                             Không khí thật náo nhiệt .   \n",
+              "1      Chị Lãnh và Xăng ra đi , mình đứng nhìn hai ch...   \n",
+              "2      Suy tính mãi , khóc mãi rồi Phúc lấy ra tờ giấ...   \n",
+              "3      Hoà bảo hồi mới qua đâu có biết nấu nướng gì ,...   \n",
+              "4                              Nhật ký của thuyền viên .   \n",
+              "...                                                  ...   \n",
+              "16853  Nghe thấy đã ghê ghê nhưng Nhiêu chưa được tườ...   \n",
+              "16854                 Nhưng mọi chuyện không dừng ở đó .   \n",
+              "16855  Hoà bảo thời gian đầu mặc cảm lắm , ở trong nh...   \n",
+              "16856  Biết bao người đã tình nguyện hiến dâng cả cuộ...   \n",
+              "16857  Trên đây mới là “ thành tích ” tiêu tiền của m...   \n",
+              "\n",
+              "                                                  labels  \n",
+              "0                                           [O, O, O, O]  \n",
+              "1      [O, B-PER, O, B-PER, O, O, O, O, O, O, O, O, O...  \n",
+              "2      [O, O, O, O, O, O, B-PER, O, O, O, O, O, O, O,...  \n",
+              "3      [B-PER, O, O, O, O, O, O, O, O, O, O, O, O, B-...  \n",
+              "4                                           [O, O, O, O]  \n",
+              "...                                                  ...  \n",
+              "16853  [O, O, O, O, O, B-PER, O, O, O, O, O, O, O, O,...  \n",
+              "16854                           [O, O, O, O, O, O, O, O]  \n",
+              "16855  [B-PER, O, O, O, O, O, O, O, O, O, O, O, O, O,...  \n",
+              "16856      [O, O, O, O, O, O, O, O, O, O, O, O, O, O, O]  \n",
+              "16857  [O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...  \n",
+              "\n",
+              "[16858 rows x 5 columns]"
+            ],
+            "text/html": [
+              "\n",
+              "  <div id=\"df-80ed26fa-649c-4578-9b8d-9efd6c581c06\" class=\"colab-df-container\">\n",
+              "    <div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>tokens</th>\n",
+              "      <th>id</th>\n",
+              "      <th>seg_text</th>\n",
+              "      <th>raw_text</th>\n",
+              "      <th>labels</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>[Không_khí, thật, náo_nhiệt, .]</td>\n",
+              "      <td>[0, 0, 0, 0]</td>\n",
+              "      <td>Không_khí thật náo_nhiệt .</td>\n",
+              "      <td>Không khí thật náo nhiệt .</td>\n",
+              "      <td>[O, O, O, O]</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>[Chị, Lãnh, và, Xăng, ra, đi, ,, mình, đứng, n...</td>\n",
+              "      <td>[0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n",
+              "      <td>Chị Lãnh và Xăng ra đi , mình đứng nhìn hai ch...</td>\n",
+              "      <td>Chị Lãnh và Xăng ra đi , mình đứng nhìn hai ch...</td>\n",
+              "      <td>[O, B-PER, O, B-PER, O, O, O, O, O, O, O, O, O...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>[Suy_tính, mãi, ,, khóc, mãi, rồi, Phúc, lấy, ...</td>\n",
+              "      <td>[0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n",
+              "      <td>Suy_tính mãi , khóc mãi rồi Phúc lấy ra tờ giấ...</td>\n",
+              "      <td>Suy tính mãi , khóc mãi rồi Phúc lấy ra tờ giấ...</td>\n",
+              "      <td>[O, O, O, O, O, O, B-PER, O, O, O, O, O, O, O,...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td>[Hoà, bảo, hồi, mới, qua, đâu, có, biết, nấu_n...</td>\n",
+              "      <td>[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, ...</td>\n",
+              "      <td>Hoà bảo hồi mới qua đâu có biết nấu_nướng gì ,...</td>\n",
+              "      <td>Hoà bảo hồi mới qua đâu có biết nấu nướng gì ,...</td>\n",
+              "      <td>[B-PER, O, O, O, O, O, O, O, O, O, O, O, O, B-...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>[Nhật_ký, của, thuyền_viên, .]</td>\n",
+              "      <td>[0, 0, 0, 0]</td>\n",
+              "      <td>Nhật_ký của thuyền_viên .</td>\n",
+              "      <td>Nhật ký của thuyền viên .</td>\n",
+              "      <td>[O, O, O, O]</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>...</th>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>16853</th>\n",
+              "      <td>[Nghe, thấy, đã, ghê_ghê, nhưng, Nhiêu, chưa, ...</td>\n",
+              "      <td>[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, ...</td>\n",
+              "      <td>Nghe thấy đã ghê_ghê nhưng Nhiêu chưa được tườ...</td>\n",
+              "      <td>Nghe thấy đã ghê ghê nhưng Nhiêu chưa được tườ...</td>\n",
+              "      <td>[O, O, O, O, O, B-PER, O, O, O, O, O, O, O, O,...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>16854</th>\n",
+              "      <td>[Nhưng, mọi, chuyện, không, dừng, ở, đó, .]</td>\n",
+              "      <td>[0, 0, 0, 0, 0, 0, 0, 0]</td>\n",
+              "      <td>Nhưng mọi chuyện không dừng ở đó .</td>\n",
+              "      <td>Nhưng mọi chuyện không dừng ở đó .</td>\n",
+              "      <td>[O, O, O, O, O, O, O, O]</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>16855</th>\n",
+              "      <td>[Hoà, bảo, thời_gian, đầu, mặc_cảm, lắm, ,, ở,...</td>\n",
+              "      <td>[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n",
+              "      <td>Hoà bảo thời_gian đầu mặc_cảm lắm , ở trong nh...</td>\n",
+              "      <td>Hoà bảo thời gian đầu mặc cảm lắm , ở trong nh...</td>\n",
+              "      <td>[B-PER, O, O, O, O, O, O, O, O, O, O, O, O, O,...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>16856</th>\n",
+              "      <td>[Biết_bao, người, đã, tình_nguyện, hiến_dâng, ...</td>\n",
+              "      <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]</td>\n",
+              "      <td>Biết_bao người đã tình_nguyện hiến_dâng cả cuộ...</td>\n",
+              "      <td>Biết bao người đã tình nguyện hiến dâng cả cuộ...</td>\n",
+              "      <td>[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O]</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>16857</th>\n",
+              "      <td>[Trên, đây, mới, là, “, thành_tích, ”, tiêu, t...</td>\n",
+              "      <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n",
+              "      <td>Trên đây mới là “ thành_tích ” tiêu tiền của m...</td>\n",
+              "      <td>Trên đây mới là “ thành tích ” tiêu tiền của m...</td>\n",
+              "      <td>[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "<p>16858 rows × 5 columns</p>\n",
+              "</div>\n",
+              "    <div class=\"colab-df-buttons\">\n",
+              "\n",
+              "  <div class=\"colab-df-container\">\n",
+              "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-80ed26fa-649c-4578-9b8d-9efd6c581c06')\"\n",
+              "            title=\"Convert this dataframe to an interactive table.\"\n",
+              "            style=\"display:none;\">\n",
+              "\n",
+              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
+              "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
+              "  </svg>\n",
+              "    </button>\n",
+              "\n",
+              "  <style>\n",
+              "    .colab-df-container {\n",
+              "      display:flex;\n",
+              "      gap: 12px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert {\n",
+              "      background-color: #E8F0FE;\n",
+              "      border: none;\n",
+              "      border-radius: 50%;\n",
+              "      cursor: pointer;\n",
+              "      display: none;\n",
+              "      fill: #1967D2;\n",
+              "      height: 32px;\n",
+              "      padding: 0 0 0 0;\n",
+              "      width: 32px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert:hover {\n",
+              "      background-color: #E2EBFA;\n",
+              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "      fill: #174EA6;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-buttons div {\n",
+              "      margin-bottom: 4px;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert {\n",
+              "      background-color: #3B4455;\n",
+              "      fill: #D2E3FC;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert:hover {\n",
+              "      background-color: #434B5C;\n",
+              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
+              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
+              "      fill: #FFFFFF;\n",
+              "    }\n",
+              "  </style>\n",
+              "\n",
+              "    <script>\n",
+              "      const buttonEl =\n",
+              "        document.querySelector('#df-80ed26fa-649c-4578-9b8d-9efd6c581c06 button.colab-df-convert');\n",
+              "      buttonEl.style.display =\n",
+              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "\n",
+              "      async function convertToInteractive(key) {\n",
+              "        const element = document.querySelector('#df-80ed26fa-649c-4578-9b8d-9efd6c581c06');\n",
+              "        const dataTable =\n",
+              "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
+              "                                                    [key], {});\n",
+              "        if (!dataTable) return;\n",
+              "\n",
+              "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
+              "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
+              "          + ' to learn more about interactive tables.';\n",
+              "        element.innerHTML = '';\n",
+              "        dataTable['output_type'] = 'display_data';\n",
+              "        await google.colab.output.renderOutput(dataTable, element);\n",
+              "        const docLink = document.createElement('div');\n",
+              "        docLink.innerHTML = docLinkHtml;\n",
+              "        element.appendChild(docLink);\n",
+              "      }\n",
+              "    </script>\n",
+              "  </div>\n",
+              "\n",
+              "\n",
+              "    <div id=\"df-234b0103-d887-40f3-916e-fe03774c73fe\">\n",
+              "      <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-234b0103-d887-40f3-916e-fe03774c73fe')\"\n",
+              "                title=\"Suggest charts\"\n",
+              "                style=\"display:none;\">\n",
+              "\n",
+              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
+              "     width=\"24px\">\n",
+              "    <g>\n",
+              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
+              "    </g>\n",
+              "</svg>\n",
+              "      </button>\n",
+              "\n",
+              "<style>\n",
+              "  .colab-df-quickchart {\n",
+              "      --bg-color: #E8F0FE;\n",
+              "      --fill-color: #1967D2;\n",
+              "      --hover-bg-color: #E2EBFA;\n",
+              "      --hover-fill-color: #174EA6;\n",
+              "      --disabled-fill-color: #AAA;\n",
+              "      --disabled-bg-color: #DDD;\n",
+              "  }\n",
+              "\n",
+              "  [theme=dark] .colab-df-quickchart {\n",
+              "      --bg-color: #3B4455;\n",
+              "      --fill-color: #D2E3FC;\n",
+              "      --hover-bg-color: #434B5C;\n",
+              "      --hover-fill-color: #FFFFFF;\n",
+              "      --disabled-bg-color: #3B4455;\n",
+              "      --disabled-fill-color: #666;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart {\n",
+              "    background-color: var(--bg-color);\n",
+              "    border: none;\n",
+              "    border-radius: 50%;\n",
+              "    cursor: pointer;\n",
+              "    display: none;\n",
+              "    fill: var(--fill-color);\n",
+              "    height: 32px;\n",
+              "    padding: 0;\n",
+              "    width: 32px;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart:hover {\n",
+              "    background-color: var(--hover-bg-color);\n",
+              "    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "    fill: var(--button-hover-fill-color);\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart-complete:disabled,\n",
+              "  .colab-df-quickchart-complete:disabled:hover {\n",
+              "    background-color: var(--disabled-bg-color);\n",
+              "    fill: var(--disabled-fill-color);\n",
+              "    box-shadow: none;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-spinner {\n",
+              "    border: 2px solid var(--fill-color);\n",
+              "    border-color: transparent;\n",
+              "    border-bottom-color: var(--fill-color);\n",
+              "    animation:\n",
+              "      spin 1s steps(1) infinite;\n",
+              "  }\n",
+              "\n",
+              "  @keyframes spin {\n",
+              "    0% {\n",
+              "      border-color: transparent;\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "      border-left-color: var(--fill-color);\n",
+              "    }\n",
+              "    20% {\n",
+              "      border-color: transparent;\n",
+              "      border-left-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "    }\n",
+              "    30% {\n",
+              "      border-color: transparent;\n",
+              "      border-left-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "      border-right-color: var(--fill-color);\n",
+              "    }\n",
+              "    40% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "    }\n",
+              "    60% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "    }\n",
+              "    80% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "    }\n",
+              "    90% {\n",
+              "      border-color: transparent;\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "    }\n",
+              "  }\n",
+              "</style>\n",
+              "\n",
+              "      <script>\n",
+              "        async function quickchart(key) {\n",
+              "          const quickchartButtonEl =\n",
+              "            document.querySelector('#' + key + ' button');\n",
+              "          quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n",
+              "          quickchartButtonEl.classList.add('colab-df-spinner');\n",
+              "          try {\n",
+              "            const charts = await google.colab.kernel.invokeFunction(\n",
+              "                'suggestCharts', [key], {});\n",
+              "          } catch (error) {\n",
+              "            console.error('Error during call to suggestCharts:', error);\n",
+              "          }\n",
+              "          quickchartButtonEl.classList.remove('colab-df-spinner');\n",
+              "          quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
+              "        }\n",
+              "        (() => {\n",
+              "          let quickchartButtonEl =\n",
+              "            document.querySelector('#df-234b0103-d887-40f3-916e-fe03774c73fe button');\n",
+              "          quickchartButtonEl.style.display =\n",
+              "            google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "        })();\n",
+              "      </script>\n",
+              "    </div>\n",
+              "\n",
+              "  <div id=\"id_ec456ee6-f1c9-450a-be96-df5df4cff0d7\">\n",
+              "    <style>\n",
+              "      .colab-df-generate {\n",
+              "        background-color: #E8F0FE;\n",
+              "        border: none;\n",
+              "        border-radius: 50%;\n",
+              "        cursor: pointer;\n",
+              "        display: none;\n",
+              "        fill: #1967D2;\n",
+              "        height: 32px;\n",
+              "        padding: 0 0 0 0;\n",
+              "        width: 32px;\n",
+              "      }\n",
+              "\n",
+              "      .colab-df-generate:hover {\n",
+              "        background-color: #E2EBFA;\n",
+              "        box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "        fill: #174EA6;\n",
+              "      }\n",
+              "\n",
+              "      [theme=dark] .colab-df-generate {\n",
+              "        background-color: #3B4455;\n",
+              "        fill: #D2E3FC;\n",
+              "      }\n",
+              "\n",
+              "      [theme=dark] .colab-df-generate:hover {\n",
+              "        background-color: #434B5C;\n",
+              "        box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
+              "        filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
+              "        fill: #FFFFFF;\n",
+              "      }\n",
+              "    </style>\n",
+              "    <button class=\"colab-df-generate\" onclick=\"generateWithVariable('df')\"\n",
+              "            title=\"Generate code using this dataframe.\"\n",
+              "            style=\"display:none;\">\n",
+              "\n",
+              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
+              "       width=\"24px\">\n",
+              "    <path d=\"M7,19H8.4L18.45,9,17,7.55,7,17.6ZM5,21V16.75L18.45,3.32a2,2,0,0,1,2.83,0l1.4,1.43a1.91,1.91,0,0,1,.58,1.4,1.91,1.91,0,0,1-.58,1.4L9.25,21ZM18.45,9,17,7.55Zm-12,3A5.31,5.31,0,0,0,4.9,8.1,5.31,5.31,0,0,0,1,6.5,5.31,5.31,0,0,0,4.9,4.9,5.31,5.31,0,0,0,6.5,1,5.31,5.31,0,0,0,8.1,4.9,5.31,5.31,0,0,0,12,6.5,5.46,5.46,0,0,0,6.5,12Z\"/>\n",
+              "  </svg>\n",
+              "    </button>\n",
+              "    <script>\n",
+              "      (() => {\n",
+              "      const buttonEl =\n",
+              "        document.querySelector('#id_ec456ee6-f1c9-450a-be96-df5df4cff0d7 button.colab-df-generate');\n",
+              "      buttonEl.style.display =\n",
+              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "\n",
+              "      buttonEl.onclick = () => {\n",
+              "        google.colab.notebook.generateWithVariable('df');\n",
+              "      }\n",
+              "      })();\n",
+              "    </script>\n",
+              "  </div>\n",
+              "\n",
+              "    </div>\n",
+              "  </div>\n"
+            ],
+            "application/vnd.google.colaboratory.intrinsic+json": {
+              "type": "dataframe",
+              "variable_name": "df",
+              "summary": "{\n  \"name\": \"df\",\n  \"rows\": 16858,\n  \"fields\": [\n    {\n      \"column\": \"tokens\",\n      \"properties\": {\n        \"dtype\": \"object\",\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"id\",\n      \"properties\": {\n        \"dtype\": \"object\",\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"seg_text\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 16787,\n        \"samples\": [\n          \"T\\u00ednh th\\u00f4ng_minh , l\\u1ea1i t\\u00f2_m\\u00f2 , anh Ki\\u1ec7m b\\u1eaft_\\u0111\\u1ea7u \\u0111i \\u0111\\u1ebfn c\\u00e1c x\\u01b0\\u1edfng c\\u01a1_kh\\u00ed \\u0111\\u1ec3 quan_s\\u00e1t c\\u00e1c lo\\u1ea1i m\\u00e1y_m\\u00f3c , r\\u1ed3i v\\u1ec1 nh\\u00e0 suy_ngh\\u0129 v\\u00e0 c\\u1ea7m b\\u00fat v\\u1ebd ph\\u00e1c_ho\\u1ea1 ra c\\u00e1i m\\u00e1y v\\u00fat g\\u1ea1o .\",\n          \"V\\u1eady th\\u00ec , h\\u1ecd c\\u1ea7n ph\\u1ea3i \\u0111\\u01b0\\u1ee3c gi\\u00fap_\\u0111\\u1ee1 , ph\\u1ea3i \\u0111\\u01b0\\u1ee3c s\\u1ed1ng \\u0111\\u00e0ng_ho\\u00e0ng , ph\\u1ea3i \\u0111\\u01b0\\u1ee3c l\\u00e0m ng\\u01b0\\u1eddi d\\u00f9 ch\\u1ec9 l\\u00e0 nh\\u1eefng ng\\u00e0y cu\\u1ed1i_c\\u00f9ng .\",\n          \"Nhi\\u1ec1u ng\\u01b0\\u1eddi th\\u00f4ng_d\\u1ecbch c\\u00f9ng th\\u1eddi v\\u1edbi Nguy\\u1ec5n Trung Hi\\u1ebfu c\\u0169ng \\u0111\\u00e3 ch\\u1ebft trong khi th\\u1ef1c_hi\\u1ec7n nhi\\u1ec7m_v\\u1ee5 t\\u1ea1i chi\\u1ebfn_tr\\u01b0\\u1eddng ho\\u1eb7c tr\\u00ean \\u0111\\u01b0\\u1eddng h\\u00e0nh_qu\\u00e2n .\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"raw_text\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 16785,\n        \"samples\": [\n          \"Trong kho\\u1ea3ng th\\u1eddi gian \\u0111\\u00f3 ch\\u1ecb c\\u1ed1 c\\u00f4ng t\\u1ef1 h\\u1ecdc ti\\u1ebfng Anh .\",\n          \"Sau \\u0111\\u00f3 , ch\\u00ednh b\\u00e0 Susan \\u0111\\u00e3 \\u0111\\u01b0a Mai l\\u00ean h\\u1ecdc \\u0111\\u1ea1i h\\u1ecdc , m\\u1ed7i n\\u0103m chu c\\u1ea5p cho c\\u00f4 30.000 USD .\",\n          \"T\\u1eeb r\\u1ea5t l\\u00e2u r\\u1ed3i t\\u00f4i v\\u1eabn ngh\\u0129 n\\u1ebfu nh\\u01b0 cu\\u1ed1n s\\u00e1ch \\u0111\\u01b0\\u1ee3c xu\\u1ea5t b\\u1ea3n , ho\\u1eb7c ng\\u01b0\\u1eddi ta l\\u00e0m phim v\\u1ec1 n\\u00f3 th\\u00ec t\\u00f4i s\\u1ebd d\\u00f9ng s\\u1ed1 ti\\u1ec1n b\\u00e1n s\\u00e1ch \\u0111\\u1ec3 thi\\u1ebft l\\u1eadp m\\u1ed9t s\\u1ed1 gi\\u01b0\\u1eddng b\\u1ec7nh t\\u1ea1i H\\u00e0 N\\u1ed9i .\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"labels\",\n      \"properties\": {\n        \"dtype\": \"object\",\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    }\n  ]\n}"
+            }
+          },
+          "metadata": {},
+          "execution_count": 3
+        }
+      ],
+      "source": [
+        "# Tạo thêm các cột khác\n",
+        "def join_tokens(tokens):\n",
+        "    text = ' '.join(tokens)\n",
+        "    return text\n",
+        "\n",
+        "def reform_raw_text(tokens):\n",
+        "    text = ' '.join(tokens)\n",
+        "    return text.replace(\"_\", \" \")\n",
+        "\n",
+        "def label(x):\n",
+        "  return [id_tag[int(i)] for i in x]\n",
+        "\n",
+        "def replace_7_8(lst):\n",
+        "    return [0 if x in (7, 8) else x for x in lst]\n",
+        "\n",
+        "\n",
+        "tag_id = {'O': 0, 'B-PER': 1, 'I-PER': 2, 'B-ORG': 3, 'I-ORG': 4, 'B-LOC': 5, 'I-LOC': 6}\n",
+        "id_tag = {0: 'O', 1: 'B-PER', 2: 'I-PER', 3: 'B-ORG', 4: 'I-ORG', 5: 'B-LOC', 6: 'I-LOC'}\n",
+        "\n",
+        "\n",
+        "df['ner_tags'] = df['ner_tags'].apply(replace_7_8)\n",
+        "df['text_withseg'] = df['tokens'].apply(join_tokens)\n",
+        "df['text_raw'] = df['tokens'].apply(reform_raw_text)\n",
+        "df[\"ner_labels\"] = df.ner_tags.apply(label)\n",
+        "df.columns = ['tokens', 'id', 'seg_text', 'raw_text', 'labels']\n",
+        "df\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 4,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "Sgpm-btsXxzt",
+        "outputId": "4275e090-0bb5-47a2-9b51-682d13bd7e45"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Chị Lãnh và Xăng ra đi , mình đứng nhìn hai chị quần xắn tròn trên vế , lặn_lội qua dòng suối nước chảy rần_rần , tự_nhiên nước_mắt mình rưng_rưng ...\n"
+          ]
+        }
+      ],
+      "source": [
+        "print(df['seg_text'][1])"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 5,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "I_VaVTLfXxzu",
+        "outputId": "f5a568e0-0235-40b5-9a87-9df21b39af44"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Chị Lãnh và Xăng ra đi , mình đứng nhìn hai chị quần xắn tròn trên vế , lặn lội qua dòng suối nước chảy rần rần , tự nhiên nước mắt mình rưng rưng ...\n"
+          ]
+        }
+      ],
+      "source": [
+        "print(df['raw_text'][1])"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 6,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "Zum7uCLSXxzu",
+        "outputId": "44b2d50f-fec1-42c5-fd52-854d510ba13d"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "[np.int64(0), np.int64(1), np.int64(0), np.int64(1), np.int64(0), np.int64(0), np.int64(0), np.int64(0), np.int64(0), np.int64(0), np.int64(0), np.int64(0), np.int64(0), np.int64(0), np.int64(0), np.int64(0), np.int64(0), np.int64(0), np.int64(0), np.int64(0), np.int64(0), np.int64(0), np.int64(0), np.int64(0), np.int64(0), np.int64(0), np.int64(0), np.int64(0), np.int64(0), np.int64(0), np.int64(0)]\n"
+          ]
+        }
+      ],
+      "source": [
+        "print(df['id'][1])"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ooewb479FdqS"
+      },
+      "source": [
+        "# Get Embedding Vectors"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 7,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 889,
+          "referenced_widgets": [
+            "1d9aaa035056485c959f66b60cf41714",
+            "71054a4397e344a2a66e32892a37b59b",
+            "6c582399be1d42c3b5a4ef21743d1a26",
+            "26a6ef7f967a4504a698ff3152ccb24e",
+            "82669dd27686486588fdd7d11f49edd3",
+            "461c136b3eac4a9dadb8a3af7c11c98a",
+            "4fb68dcea1ac4e54b82c92ad64e9be95",
+            "df397db853874f6db911acb667785ec0",
+            "2de5befe0ab24de9a62ba076e5abf78e",
+            "d7da38e7c5e1484597bb1faae3c2d7f5",
+            "2ec3573cb04143a8ba5e555bfaf60165",
+            "67c557f2651b4e5c81e9af82531898fe",
+            "5a5a04069ebd41fdba7835e1b5da585e",
+            "6c5b30dc67d94071af4d4b14cf4be7e4",
+            "e9200c8269fa4ebd9c1157cc1b871005",
+            "50ab44e431a54c27b409dc74c068c392",
+            "9929425b4df94d3792dc454afe59b3fd",
+            "03efbd25cd4341cea6714ffc3585632a",
+            "ae22d9a4de574ce3905a6f6c82fac1aa",
+            "71fe669e2f68444ba4b81ffc14a39c03",
+            "4d4c131206f448c7ba6d5c4e41126d41",
+            "ec260d8279bd4a40ad2bbc3c1d7dadaa",
+            "08611ca37f8c462db079dc2883f06002",
+            "47eca14208cc4e5085d44cce42872a35",
+            "49d3e88f60e744e7b37bcbcca5bbb087",
+            "81b9bb3315e4402ea5b6768d0d189591",
+            "4b88329de5ed48738ca0da054a1f0131",
+            "1507e771a5ca4056b0605cd453d89c60",
+            "44946e0a5d31408a851b0e8ab5217c43",
+            "ee8c999b66e84cbda17702e916d48a3f",
+            "6c6228d416944599b110ffa97b20bd8c",
+            "c519c27334b742ee8f14e29da2ebdf9b",
+            "7d422740d70546559703cd0304be663f",
+            "371ab2b9d7c84402b3c4b934e89eca4b",
+            "7ead683f167c408f88ba72b2ee1599d1",
+            "737d3d1e5a3146de96c17ca8ec72d75e",
+            "7ec9beb535c9428a954367613fa7f4cd",
+            "3c51b3c0b7ba4a2eacb1b8b2be8e024c",
+            "50dc5fc6a7354394ace536241fa01714",
+            "78ba04a44d9e42dba9fb7617d28c91e9",
+            "ca87704af3bc4c7590eea8f8f0f50d94",
+            "f3870bce67da4affa8925d9d898638da",
+            "e6efac23366643dd861caf121a8a220b",
+            "1044cf40d7e54337859bab0057aa0b54",
+            "3a6bb86ac3db4f82ae139507f94607e0",
+            "04a96e9ef4774bd1a3cd3a1dd20fe194",
+            "6158b1f05f6c4851b492df312f0312ca",
+            "b81c4146511045e280ba4fa226074679",
+            "3676c8217c654790972189be8c1f4627",
+            "762b2c8d23824070b2eb115e151f0c73",
+            "d9d57b09b01846a4805882b4adf64e55",
+            "e804cb9279dd458aa8b661d28c4427ff",
+            "3169169dbabb4b1aa7906a0415eacdcb",
+            "408130d71bb74141906cbc1d2123bb63",
+            "4be265bd67e1470cbd856dd268908c00"
+          ]
+        },
+        "id": "b04c2Xq7IBac",
+        "outputId": "b09c7e51-baad-4dde-fea2-9d380a5988d9"
+      },
+      "outputs": [
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "config.json:   0%|          | 0.00/557 [00:00<?, ?B/s]"
+            ],
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "1d9aaa035056485c959f66b60cf41714"
+            }
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "vocab.txt:   0%|          | 0.00/895k [00:00<?, ?B/s]"
+            ],
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "67c557f2651b4e5c81e9af82531898fe"
+            }
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "bpe.codes:   0%|          | 0.00/1.14M [00:00<?, ?B/s]"
+            ],
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "08611ca37f8c462db079dc2883f06002"
+            }
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "tokenizer.json:   0%|          | 0.00/3.13M [00:00<?, ?B/s]"
+            ],
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "371ab2b9d7c84402b3c4b934e89eca4b"
+            }
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "pytorch_model.bin:   0%|          | 0.00/543M [00:00<?, ?B/s]"
+            ],
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "3a6bb86ac3db4f82ae139507f94607e0"
+            }
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "RobertaModel(\n",
+              "  (embeddings): RobertaEmbeddings(\n",
+              "    (word_embeddings): Embedding(64001, 768, padding_idx=1)\n",
+              "    (position_embeddings): Embedding(258, 768, padding_idx=1)\n",
+              "    (token_type_embeddings): Embedding(1, 768)\n",
+              "    (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
+              "    (dropout): Dropout(p=0.1, inplace=False)\n",
+              "  )\n",
+              "  (encoder): RobertaEncoder(\n",
+              "    (layer): ModuleList(\n",
+              "      (0-11): 12 x RobertaLayer(\n",
+              "        (attention): RobertaAttention(\n",
+              "          (self): RobertaSdpaSelfAttention(\n",
+              "            (query): Linear(in_features=768, out_features=768, bias=True)\n",
+              "            (key): Linear(in_features=768, out_features=768, bias=True)\n",
+              "            (value): Linear(in_features=768, out_features=768, bias=True)\n",
+              "            (dropout): Dropout(p=0.1, inplace=False)\n",
+              "          )\n",
+              "          (output): RobertaSelfOutput(\n",
+              "            (dense): Linear(in_features=768, out_features=768, bias=True)\n",
+              "            (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
+              "            (dropout): Dropout(p=0.1, inplace=False)\n",
+              "          )\n",
+              "        )\n",
+              "        (intermediate): RobertaIntermediate(\n",
+              "          (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
+              "          (intermediate_act_fn): GELUActivation()\n",
+              "        )\n",
+              "        (output): RobertaOutput(\n",
+              "          (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
+              "          (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
+              "          (dropout): Dropout(p=0.1, inplace=False)\n",
+              "        )\n",
+              "      )\n",
+              "    )\n",
+              "  )\n",
+              "  (pooler): RobertaPooler(\n",
+              "    (dense): Linear(in_features=768, out_features=768, bias=True)\n",
+              "    (activation): Tanh()\n",
+              "  )\n",
+              ")"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 7
+        }
+      ],
+      "source": [
+        "import torch\n",
+        "from transformers import AutoTokenizer, AutoModel\n",
+        "from tqdm import tqdm\n",
+        "\n",
+        "# Load PhoBERT tokenizer và model\n",
+        "tokenizer = AutoTokenizer.from_pretrained(\"vinai/phobert-base\", use_fast=False)\n",
+        "model = AutoModel.from_pretrained(\"vinai/phobert-base\")\n",
+        "model.eval()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 8,
+      "metadata": {
+        "id": "Ejx_pVTzXxzv"
+      },
+      "outputs": [],
+      "source": [
+        "tokens_e1 = ['Tôi', 'thích', 'học', 'học@@', 'máy']\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 9,
+      "metadata": {
+        "id": "z-JZZ2VrJiQ6"
+      },
+      "outputs": [],
+      "source": [
+        "# Hàm gộp các embedding vectors của token bị tách ra khi qua SentencePiece\n",
+        "def group_embeddings(tokens, embeddings):\n",
+        "    word_embeddings = []\n",
+        "    current_vecs = []\n",
+        "\n",
+        "    for token, emb in zip(tokens, embeddings):\n",
+        "        if token in [\"<s>\", \"</s>\"]:\n",
+        "            continue\n",
+        "        #Token là subword (có đuôi \"@@\")\n",
+        "        if token.endswith(\"@@\"):\n",
+        "            current_vecs.append(emb)\n",
+        "        else: #Token là phần cuối của một từ (không có \"@@\")\n",
+        "            current_vecs.append(emb)\n",
+        "            word_emb = torch.mean(torch.stack(current_vecs), dim=0)\n",
+        "            word_embeddings.append(word_emb)\n",
+        "            current_vecs = []\n",
+        "\n",
+        "    if current_vecs:  # Trong trường hợp sót lại cuối câu\n",
+        "        word_emb = torch.mean(torch.stack(current_vecs), dim=0)\n",
+        "        word_embeddings.append(word_emb)\n",
+        "\n",
+        "    return word_embeddings"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 10,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "FSAhQKN1Xxzw",
+        "outputId": "ddd0edd3-145e-4966-b78c-2f66f83bfd14"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Không khí thật náo nhiệt .\n"
+          ]
+        }
+      ],
+      "source": [
+        "raw_e = df['raw_text'][0]\n",
+        "print(raw_e)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 11,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "OJ7ifS6wXxzw",
+        "outputId": "4908dbef-b495-4a17-e2f6-19a6b2b85eb3"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Số lượng từ trong câu là: 31\n",
+            "[0, 1108, 19703, 6, 28163, 40, 57, 4, 68, 414, 364, 82, 213, 2747, 20899, 2533, 34, 23798, 4, 13468, 89, 532, 3364, 58, 2181, 33151, 4, 1124, 2396, 68, 17865, 135, 2]\n",
+            "31\n",
+            "độ dài của tokens 33\n",
+            "<s>\n"
+          ]
+        }
+      ],
+      "source": [
+        "sentence_e  = 'Chị Lãnh và Xăng ra đi , mình đứng nhìn hai chị quần xắn tròn trên vế , lặn_lội qua dòng suối nước chảy rần_rần , tự_nhiên nước_mắt mình rưng_rưng ...'\n",
+        "id_e = [0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
+        "def dem_so_tu(cau):\n",
+        "    # Tách câu thành các từ bằng khoảng trắng\n",
+        "    tu_danh_sach = cau.split()\n",
+        "    # Đếm số lượng từ\n",
+        "    return len(tu_danh_sach)\n",
+        "\n",
+        "# Ví dụ sử dụng\n",
+        "sentence_e = 'Chị Lãnh và Xăng ra đi , mình đứng nhìn hai chị quần xắn tròn trên vế , lặn_lội qua dòng suối nước chảy rần_rần , tự_nhiên nước_mắt mình rưng_rưng ...'\n",
+        "so_tu = dem_so_tu(sentence_e)\n",
+        "print(\"Số lượng từ trong câu là:\", so_tu)\n",
+        "input_e = tokenizer.encode(sentence_e)\n",
+        "tokens_e = tokenizer.convert_ids_to_tokens(input_e[0])\n",
+        "print(input_e)\n",
+        "print(len(id_e))\n",
+        "print('độ dài của tokens',len(input_e))\n",
+        "print(tokens_e)\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 12,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 830,
+          "referenced_widgets": [
+            "fabfacd2a2964d23994070bcb6bc4b3c",
+            "09db5366452347dfa40bbd192d22d489",
+            "706046f4266a4ade9f5f1718fc0bbf4f",
+            "d39e9db3bbf84736b090e12ecf9bd1fd",
+            "654a5b527d0c4c51afce2e65fb3b36aa",
+            "fa1045a3003d4496a2a5c3055355120d",
+            "f00793fc0f1948fca2fda701eb461505",
+            "9994f44df6a0451792559446361557e9",
+            "cc2a9437c039472f8447f9d0194459dc",
+            "2c64cf42d35a4722a3b32b366d1dcd1b",
+            "a9886da1334d4df0b4c6536255df6420"
+          ]
+        },
+        "id": "3wpjBGK3JuwS",
+        "outputId": "ec11cd7f-84ca-402d-c7c0-b86db3ea555c"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "  0%|          | 18/16858 [00:00<08:06, 34.64it/s]"
+          ]
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "model.safetensors:   0%|          | 0.00/543M [00:00<?, ?B/s]"
+            ],
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "fabfacd2a2964d23994070bcb6bc4b3c"
+            }
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "  4%|▎         | 622/16858 [00:08<02:38, 102.31it/s]"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Warning: Skipping row 610 - length mismatch\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "  4%|▍         | 670/16858 [00:08<03:23, 79.66it/s]"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Warning: Skipping row 659 - length mismatch\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "  7%|▋         | 1204/16858 [00:14<02:24, 108.20it/s]"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Warning: Skipping row 1187 - length mismatch\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            " 13%|█▎        | 2204/16858 [00:24<02:14, 108.94it/s]"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Warning: Skipping row 2190 - length mismatch\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            " 15%|█▍        | 2527/16858 [00:27<02:11, 109.32it/s]"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Warning: Skipping row 2507 - length mismatch\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            " 20%|█▉        | 3367/16858 [00:35<02:32, 88.29it/s]"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Warning: Skipping row 3348 - length mismatch\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            " 21%|██▏       | 3606/16858 [00:38<02:04, 106.72it/s]"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Warning: Skipping row 3589 - length mismatch\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            " 22%|██▏       | 3629/16858 [00:38<02:03, 106.74it/s]"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Warning: Skipping row 3611 - length mismatch\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            " 22%|██▏       | 3754/16858 [00:39<02:01, 107.84it/s]"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Warning: Skipping row 3739 - length mismatch\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            " 23%|██▎       | 3948/16858 [00:41<01:58, 108.60it/s]"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Warning: Skipping row 3932 - length mismatch\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            " 24%|██▍       | 4049/16858 [00:42<02:00, 106.52it/s]"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Warning: Skipping row 4036 - length mismatch\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            " 28%|██▊       | 4645/16858 [00:48<02:05, 97.09it/s]"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Warning: Skipping row 4624 - length mismatch\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            " 28%|██▊       | 4772/16858 [00:49<01:54, 105.17it/s]"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Warning: Skipping row 4759 - length mismatch\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            " 37%|███▋      | 6174/16858 [01:03<01:38, 108.44it/s]"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Warning: Skipping row 6158 - length mismatch\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            " 37%|███▋      | 6232/16858 [01:04<01:38, 107.73it/s]"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Warning: Skipping row 6215 - length mismatch\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            " 37%|███▋      | 6301/16858 [01:04<01:36, 109.16it/s]"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Warning: Skipping row 6284 - length mismatch\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            " 39%|███▊      | 6492/16858 [01:06<01:37, 106.80it/s]"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Warning: Skipping row 6479 - length mismatch\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            " 40%|███▉      | 6699/16858 [01:08<01:34, 108.07it/s]"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Warning: Skipping row 6678 - length mismatch\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            " 41%|████      | 6879/16858 [01:10<01:35, 105.01it/s]"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Warning: Skipping row 6865 - length mismatch\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            " 41%|████      | 6935/16858 [01:10<01:49, 90.54it/s] "
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Warning: Skipping row 6925 - length mismatch\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            " 46%|████▌     | 7770/16858 [01:19<01:24, 107.15it/s]"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Warning: Skipping row 7755 - length mismatch\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            " 51%|█████     | 8557/16858 [01:27<01:15, 110.09it/s]"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Warning: Skipping row 8546 - length mismatch\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            " 53%|█████▎    | 8938/16858 [01:30<01:13, 107.63it/s]"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Warning: Skipping row 8924 - length mismatch\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            " 56%|█████▌    | 9454/16858 [01:35<01:17, 95.77it/s] "
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Warning: Skipping row 9445 - length mismatch\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            " 59%|█████▊    | 9880/16858 [01:40<01:07, 103.40it/s]"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Warning: Skipping row 9868 - length mismatch\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            " 61%|██████    | 10279/16858 [01:44<01:01, 107.54it/s]"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Warning: Skipping row 10257 - length mismatch\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            " 66%|██████▌   | 11126/16858 [01:52<00:54, 105.50it/s]"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Warning: Skipping row 11107 - length mismatch\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            " 69%|██████▊   | 11560/16858 [01:56<00:51, 103.44it/s]"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Warning: Skipping row 11544 - length mismatch\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            " 69%|██████▉   | 11615/16858 [01:57<00:49, 106.21it/s]"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Warning: Skipping row 11595 - length mismatch\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            " 70%|██████▉   | 11783/16858 [01:59<00:50, 101.44it/s]"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Warning: Skipping row 11764 - length mismatch\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            " 72%|███████▏  | 12064/16858 [02:02<00:57, 82.93it/s]"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Warning: Skipping row 12047 - length mismatch\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            " 72%|███████▏  | 12090/16858 [02:02<01:00, 78.95it/s]"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Warning: Skipping row 12073 - length mismatch\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            " 76%|███████▌  | 12765/16858 [02:08<00:37, 109.55it/s]"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Warning: Skipping row 12750 - length mismatch\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            " 78%|███████▊  | 13175/16858 [02:12<00:34, 105.44it/s]"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Warning: Skipping row 13158 - length mismatch\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            " 82%|████████▏ | 13878/16858 [02:20<00:27, 107.45it/s]"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Warning: Skipping row 13866 - length mismatch\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            " 83%|████████▎ | 14046/16858 [02:21<00:26, 106.94it/s]"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Warning: Skipping row 14033 - length mismatch\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            " 84%|████████▍ | 14148/16858 [02:22<00:25, 108.31it/s]"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Warning: Skipping row 14128 - length mismatch\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            " 85%|████████▌ | 14406/16858 [02:25<00:23, 105.36it/s]"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Warning: Skipping row 14386 - length mismatch\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            " 90%|████████▉ | 15092/16858 [02:32<00:16, 108.92it/s]"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Warning: Skipping row 15078 - length mismatch\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            " 91%|█████████ | 15339/16858 [02:34<00:14, 104.17it/s]"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Warning: Skipping row 15324 - length mismatch\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            " 92%|█████████▏| 15451/16858 [02:35<00:13, 105.05it/s]"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Warning: Skipping row 15434 - length mismatch\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            " 92%|█████████▏| 15529/16858 [02:36<00:12, 106.31it/s]"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Warning: Skipping row 15507 - length mismatch\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "100%|██████████| 16858/16858 [02:49<00:00, 99.47it/s] "
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Warning: Skipping row 16853 - length mismatch\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "\n"
+          ]
+        }
+      ],
+      "source": [
+        "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
+        "model.to(device)\n",
+        "\n",
+        "all_embeddings = []  # list of [seq_len_i, 768] tensors\n",
+        "all_labels = [] # list of [seq_len_i,] tensors\n",
+        "len_em = []\n",
+        "\n",
+        "# count = 0\n",
+        "\n",
+        "for i, row in tqdm(df.iterrows(), total=len(df)):\n",
+        "\n",
+        "    # count += 1\n",
+        "    # if count == 500:\n",
+        "    #   break\n",
+        "\n",
+        "    # Truy cập phần tử từng dòng\n",
+        "    sentence = row['seg_text']\n",
+        "    gold_labels = row[\"id\"]\n",
+        "\n",
+        "    # Cho sentence đi qua SentencePiece\n",
+        "    input_ids = tokenizer.encode(sentence, return_tensors=\"pt\").to(device)\n",
+        "\n",
+        "    tokens = tokenizer.convert_ids_to_tokens(input_ids[0].cpu())\n",
+        "\n",
+        "    # Encode tạo embeddings\n",
+        "    with torch.no_grad():\n",
+        "        outputs = model(input_ids)\n",
+        "        last_hidden_state = outputs.last_hidden_state.squeeze(0).cpu()\n",
+        "\n",
+        "    # Gộp các embeddings đã bị tách khi đi qua SentencePiece\n",
+        "    word_embeds = group_embeddings(tokens, last_hidden_state)\n",
+        "\n",
+        "    # Kiểm tra số lượng embeddings và số lượng labels\n",
+        "    if len(word_embeds) != len(gold_labels):\n",
+        "        print(f\"Warning: Skipping row {i} - length mismatch\")\n",
+        "        continue\n",
+        "\n",
+        "    # Thêm vào list tổng / Tới đây là data đã sẵn sàng cho training\n",
+        "    all_embeddings.append(torch.stack(word_embeds))\n",
+        "    all_labels.append(torch.tensor(gold_labels))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "EWVmbe_GnW-8"
+      },
+      "source": [
+        "#Train Softmax with Pytorch"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 13,
+      "metadata": {
+        "id": "WeIlLLHsnRvO"
+      },
+      "outputs": [],
+      "source": [
+        "# from torch.utils.data import Dataset, DataLoader\n",
+        "\n",
+        "# class NERDataset(Dataset):\n",
+        "#     def __init__(self, embeddings, labels):\n",
+        "#         self.embeddings = embeddings\n",
+        "#         self.labels = labelsinp\n",
+        "\n",
+        "#     def __len__(self):\n",
+        "#         return len(self.embeddings)\n",
+        "\n",
+        "#     def __getitem__(self, idx):\n",
+        "#         return self.embeddings[idx], self.labels[idx]\n",
+        "\n",
+        "# def collate_fn(batch): # Hàm này được dùng để \"gom\" các mẫu có độ dài khác nhau vào một batch, thông qua padding.\n",
+        "\n",
+        "#     embeddings, labels = zip(*batch)\n",
+        "#     lengths = [len(x) for x in embeddings]\n",
+        "\n",
+        "#     # Padding\n",
+        "#     max_len = max(lengths)\n",
+        "#     padded_embs = torch.stack([\n",
+        "#         torch.cat([e, torch.zeros(max_len - len(e), e.size(1))]) for e in embeddings\n",
+        "#     ])\n",
+        "#     padded_labels = torch.stack([\n",
+        "#         torch.cat([l, torch.full((max_len - len(l),), -1)]) for l in labels\n",
+        "#     ])\n",
+        "#     return padded_embs, padded_labels, lengths"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 14,
+      "metadata": {
+        "id": "gcsNGMB5Hwtw"
+      },
+      "outputs": [],
+      "source": [
+        "# import torch\n",
+        "# import torch.nn as nn\n",
+        "# from torch.utils.data import Dataset, DataLoader\n",
+        "# from sklearn.model_selection import train_test_split\n",
+        "# from sklearn.metrics import precision_recall_fscore_support, classification_report, accuracy_score\n",
+        "# from tqdm import tqdm\n",
+        "# # Initialize Weights & Biases\n",
+        "# wandb.init(\n",
+        "#     project=\"NER\",\n",
+        "#     name=\"Softmax_VLSP2016\",\n",
+        "#     config={\n",
+        "#         \"epochs\": 20,\n",
+        "#         \"batch_size\": 16,\n",
+        "#         \"learning_rate\": 1e-3,\n",
+        "#         \"input_dim\": 768,\n",
+        "#         \"test_size\": 0.2\n",
+        "#     }\n",
+        "# )\n",
+        "# # --- 1. Define the Softmax Model ---\n",
+        "# class SoftmaxTagger(nn.Module):\n",
+        "#     def __init__(self, input_dim, num_tags):\n",
+        "#         super().__init__()\n",
+        "#         # Một lớp tuyến tính duy nhất ánh xạ từ chiều nhúng đến số lượng thẻ\n",
+        "#         self.hidden2tag = nn.Linear(input_dim, num_tags)\n",
+        "\n",
+        "#     def forward(self, x):\n",
+        "#         # Truyền các embedding qua các lớp tuyến tính\n",
+        "#         emissions = self.hidden2tag(x)\n",
+        "#         return emissions\n",
+        "#     def save_model(self, path):\n",
+        "#         torch.save(self.state_dict(), path)\n",
+        "\n",
+        "#     def load_model(self, path):\n",
+        "#         self.load_state_dict(torch.load(path))\n",
+        "#         self.eval()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 15,
+      "metadata": {
+        "id": "AmGqMa5TH-8M"
+      },
+      "outputs": [],
+      "source": [
+        "# # --- 2. Define the Evaluation Function ---\n",
+        "# def evaluate_softmax(model, dataloader, loss_fn, device):\n",
+        "#     model.eval()  # Set the model to evaluation mode\n",
+        "#     total_loss = 0\n",
+        "#     all_preds = []\n",
+        "#     all_true = []\n",
+        "\n",
+        "#     with torch.no_grad():\n",
+        "#         for x, y, lengths in dataloader:\n",
+        "#             x, y = x.to(device), y.to(device)\n",
+        "\n",
+        "#             #Model output\n",
+        "#             emissions = model(x)\n",
+        "\n",
+        "#             # Calculate loss, ignoring padding -1\n",
+        "#             # Reshape for CrossEntropyLoss: (N, C) and (N)\n",
+        "#             loss = loss_fn(emissions.view(-1, model.hidden2tag.out_features), y.view(-1))\n",
+        "#             total_loss += loss.item()\n",
+        "\n",
+        "#             #Nhận dự đoán bằng cách tìm tag có điểm cao nhất\n",
+        "#             preds = torch.argmax(emissions, dim=2)\n",
+        "\n",
+        "#             #Làm phẳng các dự đoán và nhãn thực, loại bỏ padding\n",
+        "#             for i in range(len(lengths)):\n",
+        "#                 true_len = lengths[i]\n",
+        "#                 all_true.extend(y[i, :true_len].cpu().numpy())\n",
+        "#                 all_preds.extend(preds[i, :true_len].cpu().numpy())\n",
+        "\n",
+        "#     # Calculate metrics\n",
+        "#     precision, recall, f1, _ = precision_recall_fscore_support(\n",
+        "#         all_true, all_preds, average='macro', zero_division=0\n",
+        "#     )\n",
+        "#     accuracy = accuracy_score(all_true, all_preds)\n",
+        "\n",
+        "#     return total_loss / len(dataloader), precision, recall, f1, accuracy\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 16,
+      "metadata": {
+        "id": "tUcrgq0vIIxA"
+      },
+      "outputs": [],
+      "source": [
+        "# # --- 3. Prepare Data and Training Setup ---\n",
+        "\n",
+        "# device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
+        "\n",
+        "# # Split data into training and testing sets\n",
+        "# train_embs, test_embs, train_labels, test_labels = train_test_split(\n",
+        "#     all_embeddings, all_labels, test_size=0.2, random_state=42\n",
+        "# )\n",
+        "\n",
+        "# # Create Datasets and DataLoaders\n",
+        "# train_dataset = NERDataset(train_embs, train_labels)\n",
+        "# test_dataset = NERDataset(test_embs, test_labels)\n",
+        "\n",
+        "# BATCH_SIZE = 32\n",
+        "# train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, collate_fn=collate_fn)\n",
+        "# test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, collate_fn=collate_fn)\n",
+        "\n",
+        "# # Model, Loss, and Optimizer Initialization\n",
+        "# INPUT_DIM = 768  # PhoBERT base embedding size\n",
+        "# NUM_TAGS = max(label.max().item() for label in all_labels) + 1\n",
+        "# LEARNING_RATE = 1e-3\n",
+        "# EPOCHS = 10\n",
+        "\n",
+        "# model = SoftmaxTagger(INPUT_DIM, NUM_TAGS).to(device)\n",
+        "# # Use CrossEntropyLoss, which combines LogSoftmax and NLLLoss.\n",
+        "# # ignore_index=-1 tells it to skip padded positions during loss calculation.\n",
+        "# loss_fn = nn.CrossEntropyLoss(ignore_index=-1)\n",
+        "# optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 17,
+      "metadata": {
+        "id": "TT_tOpTCILR9"
+      },
+      "outputs": [],
+      "source": [
+        "# # --- 4. Training Loop ---\n",
+        "\n",
+        "# print(\"Starting Softmax Model Training...\")\n",
+        "# for epoch in range(1, EPOCHS + 1):\n",
+        "#     model.train()\n",
+        "#     total_train_loss = 0\n",
+        "\n",
+        "#     train_bar = tqdm(train_loader, desc=f\"Epoch {epoch}/{EPOCHS}\")\n",
+        "#     for x, y, lengths in train_bar:\n",
+        "#         x, y = x.to(device), y.to(device)\n",
+        "\n",
+        "#         # Forward pass\n",
+        "#         emissions = model(x)\n",
+        "\n",
+        "#         # Calculate loss\n",
+        "#         # Reshape for CrossEntropyLoss: (N, C) and (N)\n",
+        "#         loss = loss_fn(emissions.view(-1, NUM_TAGS), y.view(-1))\n",
+        "\n",
+        "#         # Backward pass and optimization\n",
+        "#         optimizer.zero_grad()\n",
+        "#         loss.backward()\n",
+        "#         optimizer.step()\n",
+        "\n",
+        "#         total_train_loss += loss.item()\n",
+        "#         train_bar.set_postfix(loss=total_train_loss / len(train_bar))\n",
+        "\n",
+        "#     # Evaluate after each epoch\n",
+        "#     avg_test_loss, precision, recall, f1, accuracy = evaluate_softmax(model, test_loader, loss_fn, device)\n",
+        "\n",
+        "#     print(f\"Epoch {epoch}/{EPOCHS} -> Train Loss: {total_train_loss/len(train_loader):.4f} | \"\n",
+        "#           f\"Test Loss: {avg_test_loss:.4f} | test_f1: {f1:.4f} | test_acc: {accuracy:.4f}\")\n",
+        "\n",
+        "\n",
+        "# print(\"\\n--- Training Finished ---\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 18,
+      "metadata": {
+        "id": "xnPOKnLBJF7A"
+      },
+      "outputs": [],
+      "source": [
+        "# # --- 5. Final Evaluation Report ---\n",
+        "# print(\"\\nFinal Test Set Performance:\")\n",
+        "# model.eval()\n",
+        "# all_preds_final, all_true_final = [], []\n",
+        "# with torch.no_grad():\n",
+        "#     for x, y, lengths in tqdm(test_loader, desc=\"Generating Final Report\"):\n",
+        "#         x, y = x.to(device), y.to(device)\n",
+        "#         preds = torch.argmax(model(x), dim=2)\n",
+        "#         for i in range(len(lengths)):\n",
+        "#             true_len = lengths[i]\n",
+        "#             all_true_final.extend(y[i, :true_len].cpu().numpy())\n",
+        "#             all_preds_final.extend(preds[i, :true_len].cpu().numpy())\n",
+        "\n",
+        "# # Generate and print the classification report\n",
+        "# target_names = [id_tag[i] for i in range(NUM_TAGS)]\n",
+        "# report = classification_report(all_true_final, all_preds_final, target_names=target_names, digits=4)\n",
+        "# print(\"\\nClassification Report:\\n\", report)\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 19,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 1000
+        },
+        "id": "CM4FYvo4IL3e",
+        "outputId": "a86b87f7-0f74-4c96-dd9a-9450ca3b905b"
+      },
+      "outputs": [
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ],
+            "text/html": [
+              "Tracking run with wandb version 0.19.11"
+            ]
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ],
+            "text/html": [
+              "Run data is saved locally in <code>/content/wandb/run-20250610_121142-lmnb07kv</code>"
+            ]
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ],
+            "text/html": [
+              "Syncing run <strong><a href='https://wandb.ai/lenguyenquocanh-vn-fptu-fpt-university/NER/runs/lmnb07kv' target=\"_blank\">Softmax_VLSP2016</a></strong> to <a href='https://wandb.ai/lenguyenquocanh-vn-fptu-fpt-university/NER' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/developer-guide' target=\"_blank\">docs</a>)<br>"
+            ]
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ],
+            "text/html": [
+              " View project at <a href='https://wandb.ai/lenguyenquocanh-vn-fptu-fpt-university/NER' target=\"_blank\">https://wandb.ai/lenguyenquocanh-vn-fptu-fpt-university/NER</a>"
+            ]
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ],
+            "text/html": [
+              " View run at <a href='https://wandb.ai/lenguyenquocanh-vn-fptu-fpt-university/NER/runs/lmnb07kv' target=\"_blank\">https://wandb.ai/lenguyenquocanh-vn-fptu-fpt-university/NER/runs/lmnb07kv</a>"
+            ]
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Starting Softmax Model Training...\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "Epoch 1/20: 100%|██████████| 841/841 [00:05<00:00, 149.09it/s, loss=0.122]\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Saved improved model to checkpoints/best_epoch_1.pt\n",
+            "Epoch 1/20 -> Train Loss: 0.1224 | Val Loss: 0.0489 | Val F1: 0.7949 | Val Acc: 0.9847\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "Epoch 2/20: 100%|██████████| 841/841 [00:04<00:00, 173.24it/s, loss=0.0419]\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Saved improved model to checkpoints/best_epoch_2.pt\n",
+            "Epoch 2/20 -> Train Loss: 0.0419 | Val Loss: 0.0359 | Val F1: 0.8518 | Val Acc: 0.9885\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "Epoch 3/20: 100%|██████████| 841/841 [00:05<00:00, 159.28it/s, loss=0.0338]\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Saved improved model to checkpoints/best_epoch_3.pt\n",
+            "Epoch 3/20 -> Train Loss: 0.0338 | Val Loss: 0.0314 | Val F1: 0.8666 | Val Acc: 0.9898\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "Epoch 4/20: 100%|██████████| 841/841 [00:04<00:00, 174.77it/s, loss=0.03]\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Saved improved model to checkpoints/best_epoch_4.pt\n",
+            "Epoch 4/20 -> Train Loss: 0.0300 | Val Loss: 0.0295 | Val F1: 0.8674 | Val Acc: 0.9899\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "Epoch 5/20: 100%|██████████| 841/841 [00:05<00:00, 157.59it/s, loss=0.0279]\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Saved improved model to checkpoints/best_epoch_5.pt\n",
+            "Epoch 5/20 -> Train Loss: 0.0279 | Val Loss: 0.0281 | Val F1: 0.8838 | Val Acc: 0.9908\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "Epoch 6/20: 100%|██████████| 841/841 [00:04<00:00, 172.50it/s, loss=0.026]\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Saved improved model to checkpoints/best_epoch_6.pt\n",
+            "Epoch 6/20 -> Train Loss: 0.0260 | Val Loss: 0.0268 | Val F1: 0.8838 | Val Acc: 0.9909\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "Epoch 7/20: 100%|██████████| 841/841 [00:05<00:00, 156.51it/s, loss=0.0249]\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Saved improved model to checkpoints/best_epoch_7.pt\n",
+            "Epoch 7/20 -> Train Loss: 0.0249 | Val Loss: 0.0262 | Val F1: 0.8855 | Val Acc: 0.9910\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "Epoch 8/20: 100%|██████████| 841/841 [00:04<00:00, 173.05it/s, loss=0.0238]\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Saved improved model to checkpoints/best_epoch_8.pt\n",
+            "Epoch 8/20 -> Train Loss: 0.0238 | Val Loss: 0.0258 | Val F1: 0.8849 | Val Acc: 0.9912\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "Epoch 9/20: 100%|██████████| 841/841 [00:05<00:00, 158.86it/s, loss=0.0228]\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Epoch 9/20 -> Train Loss: 0.0228 | Val Loss: 0.0256 | Val F1: 0.8850 | Val Acc: 0.9912\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "Epoch 10/20: 100%|██████████| 841/841 [00:04<00:00, 170.77it/s, loss=0.0224]\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Saved improved model to checkpoints/best_epoch_10.pt\n",
+            "Epoch 10/20 -> Train Loss: 0.0224 | Val Loss: 0.0254 | Val F1: 0.8866 | Val Acc: 0.9914\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "Epoch 11/20: 100%|██████████| 841/841 [00:05<00:00, 163.16it/s, loss=0.0218]\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Saved improved model to checkpoints/best_epoch_11.pt\n",
+            "Epoch 11/20 -> Train Loss: 0.0218 | Val Loss: 0.0249 | Val F1: 0.8908 | Val Acc: 0.9916\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "Epoch 12/20: 100%|██████████| 841/841 [00:04<00:00, 170.64it/s, loss=0.021]\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Epoch 12/20 -> Train Loss: 0.0210 | Val Loss: 0.0252 | Val F1: 0.8885 | Val Acc: 0.9914\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "Epoch 13/20: 100%|██████████| 841/841 [00:05<00:00, 161.40it/s, loss=0.0209]\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Epoch 13/20 -> Train Loss: 0.0209 | Val Loss: 0.0250 | Val F1: 0.8902 | Val Acc: 0.9915\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "Epoch 14/20: 100%|██████████| 841/841 [00:04<00:00, 170.76it/s, loss=0.0203]\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Epoch 14/20 -> Train Loss: 0.0203 | Val Loss: 0.0251 | Val F1: 0.8895 | Val Acc: 0.9915\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "Epoch 15/20: 100%|██████████| 841/841 [00:05<00:00, 162.77it/s, loss=0.0199]\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Epoch 15/20 -> Train Loss: 0.0199 | Val Loss: 0.0250 | Val F1: 0.8868 | Val Acc: 0.9913\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "Epoch 16/20: 100%|██████████| 841/841 [00:04<00:00, 171.25it/s, loss=0.0197]\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Epoch 16/20 -> Train Loss: 0.0197 | Val Loss: 0.0253 | Val F1: 0.8888 | Val Acc: 0.9912\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "Epoch 17/20: 100%|██████████| 841/841 [00:05<00:00, 160.48it/s, loss=0.0195]\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Epoch 17/20 -> Train Loss: 0.0195 | Val Loss: 0.0250 | Val F1: 0.8900 | Val Acc: 0.9915\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "Epoch 18/20: 100%|██████████| 841/841 [00:04<00:00, 168.69it/s, loss=0.0192]\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Epoch 18/20 -> Train Loss: 0.0192 | Val Loss: 0.0250 | Val F1: 0.8893 | Val Acc: 0.9914\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "Epoch 19/20: 100%|██████████| 841/841 [00:05<00:00, 163.39it/s, loss=0.0188]\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Saved improved model to checkpoints/best_epoch_19.pt\n",
+            "Epoch 19/20 -> Train Loss: 0.0188 | Val Loss: 0.0253 | Val F1: 0.8926 | Val Acc: 0.9915\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "Epoch 20/20: 100%|██████████| 841/841 [00:04<00:00, 168.43it/s, loss=0.0188]\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Saved improved model to checkpoints/best_epoch_20.pt\n",
+            "Epoch 20/20 -> Train Loss: 0.0188 | Val Loss: 0.0249 | Val F1: 0.8936 | Val Acc: 0.9918\n",
+            "\n",
+            "--- Training Finished ---\n",
+            "\n",
+            "Final Test Set Performance:\n",
+            "\n",
+            "Classification Report:\n",
+            "               precision    recall  f1-score   support\n",
+            "\n",
+            "           O     0.9973    0.9973    0.9973     68476\n",
+            "       B-PER     0.9869    0.9768    0.9818      1464\n",
+            "       I-PER     0.9810    0.9767    0.9788       686\n",
+            "       B-ORG     0.7709    0.8249    0.7970       257\n",
+            "       I-ORG     0.7981    0.7721    0.7849       430\n",
+            "       B-LOC     0.8809    0.9001    0.8904      1241\n",
+            "       I-LOC     0.8339    0.8159    0.8248       554\n",
+            "\n",
+            "    accuracy                         0.9918     73108\n",
+            "   macro avg     0.8927    0.8948    0.8936     73108\n",
+            "weighted avg     0.9918    0.9918    0.9918     73108\n",
+            "\n"
+          ]
+        }
+      ],
+      "source": [
+        "import os\n",
+        "import torch\n",
+        "import torch.nn as nn\n",
+        "from torch.utils.data import Dataset, DataLoader\n",
+        "from sklearn.model_selection import train_test_split\n",
+        "from sklearn.metrics import precision_recall_fscore_support, classification_report, accuracy_score\n",
+        "from tqdm import tqdm\n",
+        "import wandb\n",
+        "\n",
+        "# Create checkpoint directory\n",
+        "os.makedirs(\"checkpoints\", exist_ok=True)\n",
+        "\n",
+        "# Initialize Weights & Biases\n",
+        "wandb.init(\n",
+        "    project=\"NER\",\n",
+        "    name=\"Softmax_VLSP2016\",\n",
+        "    config={\n",
+        "        \"epochs\": 20,\n",
+        "        \"batch_size\": 16,\n",
+        "        \"learning_rate\": 1e-3,\n",
+        "        \"input_dim\": 768,\n",
+        "        \"test_size\": 0.2\n",
+        "    }\n",
+        ")\n",
+        "\n",
+        "# --- Dataset ---\n",
+        "class NERDataset(Dataset):\n",
+        "    def __init__(self, embeddings, labels):\n",
+        "        self.embeddings = embeddings\n",
+        "        self.labels = labels\n",
+        "\n",
+        "    def __len__(self):\n",
+        "        return len(self.embeddings)\n",
+        "\n",
+        "    def __getitem__(self, idx):\n",
+        "        return self.embeddings[idx], self.labels[idx]\n",
+        "\n",
+        "def collate_fn(batch):\n",
+        "    embeddings, labels = zip(*batch)\n",
+        "    lengths = [len(x) for x in embeddings]\n",
+        "    max_len = max(lengths)\n",
+        "\n",
+        "    padded_embs = torch.stack([\n",
+        "        torch.cat([e, torch.zeros(max_len - len(e), e.size(1))]) for e in embeddings\n",
+        "    ])\n",
+        "    padded_labels = torch.stack([\n",
+        "        torch.cat([l, torch.full((max_len - len(l),), -1)]) for l in labels\n",
+        "    ])\n",
+        "    return padded_embs, padded_labels, lengths\n",
+        "\n",
+        "# --- Model ---\n",
+        "class SoftmaxTagger(nn.Module):\n",
+        "    def __init__(self, input_dim, num_tags):\n",
+        "        super().__init__()\n",
+        "        self.hidden2tag = nn.Linear(input_dim, num_tags)\n",
+        "\n",
+        "    def forward(self, x):\n",
+        "        return self.hidden2tag(x)\n",
+        "\n",
+        "    def save_model(self, path):\n",
+        "        torch.save(self.state_dict(), path)\n",
+        "\n",
+        "    def load_model(self, path):\n",
+        "        self.load_state_dict(torch.load(path))\n",
+        "        self.eval()\n",
+        "\n",
+        "# --- Evaluation ---\n",
+        "def evaluate_softmax(model, dataloader, loss_fn, device):\n",
+        "    model.eval()\n",
+        "    total_loss = 0\n",
+        "    all_preds, all_true = [], []\n",
+        "\n",
+        "    with torch.no_grad():\n",
+        "        for x, y, lengths in dataloader:\n",
+        "            x, y = x.to(device), y.to(device)\n",
+        "            emissions = model(x)\n",
+        "            loss = loss_fn(emissions.view(-1, model.hidden2tag.out_features), y.view(-1))\n",
+        "            total_loss += loss.item()\n",
+        "            preds = torch.argmax(emissions, dim=2)\n",
+        "            for i in range(len(lengths)):\n",
+        "                true_len = lengths[i]\n",
+        "                all_true.extend(y[i, :true_len].cpu().numpy())\n",
+        "                all_preds.extend(preds[i, :true_len].cpu().numpy())\n",
+        "\n",
+        "    precision, recall, f1, _ = precision_recall_fscore_support(all_true, all_preds, average='macro', zero_division=0)\n",
+        "    accuracy = accuracy_score(all_true, all_preds)\n",
+        "\n",
+        "    return total_loss / len(dataloader), precision, recall, f1, accuracy, all_preds, all_true\n",
+        "\n",
+        "# Train/test split\n",
+        "train_embs, test_embs, train_labels, test_labels = train_test_split(\n",
+        "    all_embeddings, all_labels, test_size=0.2, random_state=42\n",
+        ")\n",
+        "\n",
+        "train_dataset = NERDataset(train_embs, train_labels)\n",
+        "test_dataset = NERDataset(test_embs, test_labels)\n",
+        "\n",
+        "BATCH_SIZE = wandb.config.batch_size\n",
+        "train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, collate_fn=collate_fn)\n",
+        "test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, collate_fn=collate_fn)\n",
+        "\n",
+        "INPUT_DIM = wandb.config.input_dim\n",
+        "NUM_TAGS = max(label.max().item() for label in all_labels) + 1\n",
+        "LEARNING_RATE = wandb.config.learning_rate\n",
+        "EPOCHS = wandb.config.epochs\n",
+        "\n",
+        "model = SoftmaxTagger(INPUT_DIM, NUM_TAGS).to(device)\n",
+        "loss_fn = nn.CrossEntropyLoss(ignore_index=-1)\n",
+        "optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)\n",
+        "\n",
+        "best_f1 = 0\n",
+        "best_acc = 0\n",
+        "\n",
+        "# --- Training Loop ---\n",
+        "print(\"Starting Softmax Model Training...\")\n",
+        "for epoch in range(1, EPOCHS + 1):\n",
+        "    model.train()\n",
+        "    total_train_loss = 0\n",
+        "    all_train_preds, all_train_true = [], []\n",
+        "\n",
+        "    train_bar = tqdm(train_loader, desc=f\"Epoch {epoch}/{EPOCHS}\")\n",
+        "    for x, y, lengths in train_bar:\n",
+        "        x, y = x.to(device), y.to(device)\n",
+        "        emissions = model(x)\n",
+        "        loss = loss_fn(emissions.view(-1, NUM_TAGS), y.view(-1))\n",
+        "        optimizer.zero_grad()\n",
+        "        loss.backward()\n",
+        "        optimizer.step()\n",
+        "        total_train_loss += loss.item()\n",
+        "        train_bar.set_postfix(loss=total_train_loss / len(train_bar))\n",
+        "\n",
+        "        preds = torch.argmax(emissions, dim=2)\n",
+        "        for i in range(len(lengths)):\n",
+        "            true_len = lengths[i]\n",
+        "            all_train_true.extend(y[i, :true_len].cpu().numpy())\n",
+        "            all_train_preds.extend(preds[i, :true_len].cpu().numpy())\n",
+        "\n",
+        "    train_precision, train_recall, train_f1, _ = precision_recall_fscore_support(\n",
+        "        all_train_true, all_train_preds, average='macro', zero_division=0\n",
+        "    )\n",
+        "    train_acc = accuracy_score(all_train_true, all_train_preds)\n",
+        "\n",
+        "    # Validation\n",
+        "    val_loss, val_precision, val_recall, val_f1, val_acc, _, _ = evaluate_softmax(model, test_loader, loss_fn, device)\n",
+        "\n",
+        "    # Logging to wandb\n",
+        "    wandb.log({\n",
+        "        \"epoch\": epoch,\n",
+        "        \"avg_train_loss\": total_train_loss / len(train_loader),\n",
+        "        \"train_precision\": train_precision,\n",
+        "        \"train_recall\": train_recall,\n",
+        "        \"train_f1\": train_f1,\n",
+        "        \"train_acc\": train_acc,\n",
+        "        \"val_loss\": val_loss,\n",
+        "        \"val_precision\": val_precision,\n",
+        "        \"val_recall\": val_recall,\n",
+        "        \"val_f1\": val_f1,\n",
+        "        \"val_acc\": val_acc,\n",
+        "    })\n",
+        "\n",
+        "    # Save best model\n",
+        "    if val_f1 > best_f1 or val_acc > best_acc:\n",
+        "        best_f1 = max(val_f1, best_f1)\n",
+        "        best_acc = max(val_acc, best_acc)\n",
+        "        ckpt_path = f\"checkpoints/best_epoch_{epoch}.pt\"\n",
+        "        model.save_model(ckpt_path)\n",
+        "        wandb.save(ckpt_path)\n",
+        "        print(f\"Saved improved model to {ckpt_path}\")\n",
+        "\n",
+        "    print(f\"Epoch {epoch}/{EPOCHS} -> Train Loss: {total_train_loss/len(train_loader):.4f} | \"\n",
+        "          f\"Val Loss: {val_loss:.4f} | Val F1: {val_f1:.4f} | Val Acc: {val_acc:.4f}\")\n",
+        "\n",
+        "print(\"\\n--- Training Finished ---\")\n",
+        "\n",
+        "# --- Final Evaluation Report ---\n",
+        "print(\"\\nFinal Test Set Performance:\")\n",
+        "model.eval()\n",
+        "_, _, _, _, _, all_preds_final, all_true_final = evaluate_softmax(model, test_loader, loss_fn, device)\n",
+        "\n",
+        "# Classification report table\n",
+        "target_names = [id_tag[i] for i in range(NUM_TAGS)]\n",
+        "report = classification_report(all_true_final, all_preds_final, target_names=target_names, digits=4, output_dict=True)\n",
+        "\n",
+        "# Log report as wandb table\n",
+        "table = wandb.Table(columns=[\"Label\", \"Precision\", \"Recall\", \"F1-score\", \"Support\"])\n",
+        "for label in target_names:\n",
+        "    row = report[label]\n",
+        "    table.add_data(label, row[\"precision\"], row[\"recall\"], row[\"f1-score\"], row[\"support\"])\n",
+        "\n",
+        "wandb.log({\"Test Classification Report\": table})\n",
+        "print(\"\\nClassification Report:\\n\", classification_report(all_true_final, all_preds_final, target_names=target_names, digits=4))\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "4Ppa-bdT8r2v"
+      },
+      "source": [
+        "# Lưu data"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 20,
+      "metadata": {
+        "id": "s9GulKoGqx6d"
+      },
+      "outputs": [],
+      "source": [
+        "def save_tensors(all_embeddings, all_labels, embed_path='embeddings.pt', label_path='labels.pt'):\n",
+        "    torch.save(all_embeddings, embed_path)\n",
+        "    torch.save(all_labels, label_path)\n",
+        "    print(f\"Saved embeddings to {embed_path} and labels to {label_path}\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 22,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 70
+        },
+        "id": "AGAJZH_h8ve6",
+        "outputId": "1893fbfa-dbcc-48f9-b6e3-ef17f9eef51c"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Saved embeddings to embeddings.pt and labels to labels.pt\n",
+            "Mounted at /content/drive\n"
+          ]
+        },
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "'/content/drive/My Drive/labels.pt'"
+            ],
+            "application/vnd.google.colaboratory.intrinsic+json": {
+              "type": "string"
+            }
+          },
+          "metadata": {},
+          "execution_count": 22
+        }
+      ],
+      "source": [
+        "from google.colab import drive\n",
+        "import shutil\n",
+        "\n",
+        "# Gọi hàm đã viết\n",
+        "save_tensors(all_embeddings, all_labels)\n",
+        "\n",
+        "# Mount và tải lên Drive\n",
+        "drive.mount('/content/drive')\n",
+        "shutil.copy('embeddings.pt', '/content/drive/My Drive')\n",
+        "shutil.copy('labels.pt', '/content/drive/My Drive')\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 23,
+      "metadata": {
+        "id": "ESWu8QI59dwl"
+      },
+      "outputs": [],
+      "source": [
+        "torch.save(model.state_dict(), \"softmax_tagger.pth\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 24,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "p_iixQwcVuum",
+        "outputId": "9d4875b8-f67a-46aa-c5c4-a001836e9cb6"
+      },
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "SoftmaxTagger(\n",
+              "  (hidden2tag): Linear(in_features=768, out_features=7, bias=True)\n",
+              ")"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 24
+        }
+      ],
+      "source": [
+        "model = SoftmaxTagger(INPUT_DIM, NUM_TAGS)\n",
+        "model.load_state_dict(torch.load(\"softmax_tagger.pth\"))\n",
+        "model.eval()  # chuyển sang chế độ đánh giá nếu cần\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 25,
+      "metadata": {
+        "id": "vM2lbEBkXxzy"
+      },
+      "outputs": [],
+      "source": [
+        "from transformers import AutoModel, AutoTokenizer\n",
+        "\n",
+        "tokenizer = AutoTokenizer.from_pretrained(\"vinai/phobert-base\")\n",
+        "model_bert = AutoModel.from_pretrained(\"vinai/phobert-base\").to(device)  # PhoBERT để lấy embedding\n",
+        "\n",
+        "# model là SoftmaxTagger đã train xong\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 26,
+      "metadata": {
+        "id": "InnYf_SyXxzy"
+      },
+      "outputs": [],
+      "source": [
+        "def predict_ner(text):\n",
+        "    device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
+        "    model.eval()\n",
+        "\n",
+        "    # Tokenize văn bản đầu vào\n",
+        "    input_ids = tokenizer.encode(text, return_tensors=\"pt\").to(device)\n",
+        "    tokens = tokenizer.convert_ids_to_tokens(input_ids[0].cpu())\n",
+        "\n",
+        "    # Lấy embedding đầu ra từ PhoBERT\n",
+        "    with torch.no_grad():\n",
+        "        outputs = model_bert(input_ids)\n",
+        "        last_hidden_state = outputs.last_hidden_state.squeeze(0).cpu()\n",
+        "\n",
+        "    # Gộp embedding của từ bị tách (sentencepiece)\n",
+        "    word_embeds = group_embeddings(tokens, last_hidden_state)\n",
+        "\n",
+        "    # Chuyển sang tensor\n",
+        "    x_tensor = torch.stack(word_embeds).unsqueeze(0).to(device)  # (1, seq_len, 768)\n",
+        "\n",
+        "    # Dự đoán\n",
+        "    with torch.no_grad():\n",
+        "        emissions = model(x_tensor)\n",
+        "        preds = torch.argmax(emissions, dim=2).squeeze(0).cpu().tolist()\n",
+        "\n",
+        "    # Trích xuất token gốc không bị tách '@@'\n",
+        "    final_tokens = []\n",
+        "    current_token = \"\"\n",
+        "    for tok in tokens:\n",
+        "        if tok in [\"<s>\", \"</s>\"]:\n",
+        "            continue\n",
+        "        if tok.endswith(\"@@\"):\n",
+        "            current_token += tok[:-2]\n",
+        "        else:\n",
+        "            current_token += tok\n",
+        "            final_tokens.append(current_token)\n",
+        "            current_token = \"\"\n",
+        "\n",
+        "    # Ánh xạ sang tên nhãn\n",
+        "    label_names = [id_tag[i] for i in preds]\n",
+        "\n",
+        "    return preds, label_names, final_tokens\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 27,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "collapsed": true,
+        "id": "aSj0-dl1Xxzy",
+        "outputId": "26bbf3fe-b011-4a59-bb3e-03d9511be21a"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Requirement already satisfied: gradio in /usr/local/lib/python3.11/dist-packages (5.31.0)\n",
+            "Requirement already satisfied: aiofiles<25.0,>=22.0 in /usr/local/lib/python3.11/dist-packages (from gradio) (24.1.0)\n",
+            "Requirement already satisfied: anyio<5.0,>=3.0 in /usr/local/lib/python3.11/dist-packages (from gradio) (4.9.0)\n",
+            "Requirement already satisfied: fastapi<1.0,>=0.115.2 in /usr/local/lib/python3.11/dist-packages (from gradio) (0.115.12)\n",
+            "Requirement already satisfied: ffmpy in /usr/local/lib/python3.11/dist-packages (from gradio) (0.6.0)\n",
+            "Requirement already satisfied: gradio-client==1.10.1 in /usr/local/lib/python3.11/dist-packages (from gradio) (1.10.1)\n",
+            "Requirement already satisfied: groovy~=0.1 in /usr/local/lib/python3.11/dist-packages (from gradio) (0.1.2)\n",
+            "Requirement already satisfied: httpx>=0.24.1 in /usr/local/lib/python3.11/dist-packages (from gradio) (0.28.1)\n",
+            "Requirement already satisfied: huggingface-hub>=0.28.1 in /usr/local/lib/python3.11/dist-packages (from gradio) (0.32.4)\n",
+            "Requirement already satisfied: jinja2<4.0 in /usr/local/lib/python3.11/dist-packages (from gradio) (3.1.6)\n",
+            "Requirement already satisfied: markupsafe<4.0,>=2.0 in /usr/local/lib/python3.11/dist-packages (from gradio) (3.0.2)\n",
+            "Requirement already satisfied: numpy<3.0,>=1.0 in /usr/local/lib/python3.11/dist-packages (from gradio) (2.0.2)\n",
+            "Requirement already satisfied: orjson~=3.0 in /usr/local/lib/python3.11/dist-packages (from gradio) (3.10.18)\n",
+            "Requirement already satisfied: packaging in /usr/local/lib/python3.11/dist-packages (from gradio) (24.2)\n",
+            "Requirement already satisfied: pandas<3.0,>=1.0 in /usr/local/lib/python3.11/dist-packages (from gradio) (2.2.2)\n",
+            "Requirement already satisfied: pillow<12.0,>=8.0 in /usr/local/lib/python3.11/dist-packages (from gradio) (11.2.1)\n",
+            "Requirement already satisfied: pydantic<2.12,>=2.0 in /usr/local/lib/python3.11/dist-packages (from gradio) (2.11.5)\n",
+            "Requirement already satisfied: pydub in /usr/local/lib/python3.11/dist-packages (from gradio) (0.25.1)\n",
+            "Requirement already satisfied: python-multipart>=0.0.18 in /usr/local/lib/python3.11/dist-packages (from gradio) (0.0.20)\n",
+            "Requirement already satisfied: pyyaml<7.0,>=5.0 in /usr/local/lib/python3.11/dist-packages (from gradio) (6.0.2)\n",
+            "Requirement already satisfied: ruff>=0.9.3 in /usr/local/lib/python3.11/dist-packages (from gradio) (0.11.12)\n",
+            "Requirement already satisfied: safehttpx<0.2.0,>=0.1.6 in /usr/local/lib/python3.11/dist-packages (from gradio) (0.1.6)\n",
+            "Requirement already satisfied: semantic-version~=2.0 in /usr/local/lib/python3.11/dist-packages (from gradio) (2.10.0)\n",
+            "Requirement already satisfied: starlette<1.0,>=0.40.0 in /usr/local/lib/python3.11/dist-packages (from gradio) (0.46.2)\n",
+            "Requirement already satisfied: tomlkit<0.14.0,>=0.12.0 in /usr/local/lib/python3.11/dist-packages (from gradio) (0.13.2)\n",
+            "Requirement already satisfied: typer<1.0,>=0.12 in /usr/local/lib/python3.11/dist-packages (from gradio) (0.16.0)\n",
+            "Requirement already satisfied: typing-extensions~=4.0 in /usr/local/lib/python3.11/dist-packages (from gradio) (4.14.0)\n",
+            "Requirement already satisfied: uvicorn>=0.14.0 in /usr/local/lib/python3.11/dist-packages (from gradio) (0.34.3)\n",
+            "Requirement already satisfied: fsspec in /usr/local/lib/python3.11/dist-packages (from gradio-client==1.10.1->gradio) (2025.3.2)\n",
+            "Requirement already satisfied: websockets<16.0,>=10.0 in /usr/local/lib/python3.11/dist-packages (from gradio-client==1.10.1->gradio) (15.0.1)\n",
+            "Requirement already satisfied: idna>=2.8 in /usr/local/lib/python3.11/dist-packages (from anyio<5.0,>=3.0->gradio) (3.10)\n",
+            "Requirement already satisfied: sniffio>=1.1 in /usr/local/lib/python3.11/dist-packages (from anyio<5.0,>=3.0->gradio) (1.3.1)\n",
+            "Requirement already satisfied: certifi in /usr/local/lib/python3.11/dist-packages (from httpx>=0.24.1->gradio) (2025.4.26)\n",
+            "Requirement already satisfied: httpcore==1.* in /usr/local/lib/python3.11/dist-packages (from httpx>=0.24.1->gradio) (1.0.9)\n",
+            "Requirement already satisfied: h11>=0.16 in /usr/local/lib/python3.11/dist-packages (from httpcore==1.*->httpx>=0.24.1->gradio) (0.16.0)\n",
+            "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from huggingface-hub>=0.28.1->gradio) (3.18.0)\n",
+            "Requirement already satisfied: requests in /usr/local/lib/python3.11/dist-packages (from huggingface-hub>=0.28.1->gradio) (2.32.3)\n",
+            "Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub>=0.28.1->gradio) (4.67.1)\n",
+            "Requirement already satisfied: hf-xet<2.0.0,>=1.1.2 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub>=0.28.1->gradio) (1.1.2)\n",
+            "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas<3.0,>=1.0->gradio) (2.9.0.post0)\n",
+            "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas<3.0,>=1.0->gradio) (2025.2)\n",
+            "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas<3.0,>=1.0->gradio) (2025.2)\n",
+            "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic<2.12,>=2.0->gradio) (0.7.0)\n",
+            "Requirement already satisfied: pydantic-core==2.33.2 in /usr/local/lib/python3.11/dist-packages (from pydantic<2.12,>=2.0->gradio) (2.33.2)\n",
+            "Requirement already satisfied: typing-inspection>=0.4.0 in /usr/local/lib/python3.11/dist-packages (from pydantic<2.12,>=2.0->gradio) (0.4.1)\n",
+            "Requirement already satisfied: click>=8.0.0 in /usr/local/lib/python3.11/dist-packages (from typer<1.0,>=0.12->gradio) (8.2.1)\n",
+            "Requirement already satisfied: shellingham>=1.3.0 in /usr/local/lib/python3.11/dist-packages (from typer<1.0,>=0.12->gradio) (1.5.4)\n",
+            "Requirement already satisfied: rich>=10.11.0 in /usr/local/lib/python3.11/dist-packages (from typer<1.0,>=0.12->gradio) (13.9.4)\n",
+            "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas<3.0,>=1.0->gradio) (1.17.0)\n",
+            "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.11/dist-packages (from rich>=10.11.0->typer<1.0,>=0.12->gradio) (3.0.0)\n",
+            "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.11/dist-packages (from rich>=10.11.0->typer<1.0,>=0.12->gradio) (2.19.1)\n",
+            "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub>=0.28.1->gradio) (3.4.2)\n",
+            "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub>=0.28.1->gradio) (2.4.0)\n",
+            "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.11/dist-packages (from markdown-it-py>=2.2.0->rich>=10.11.0->typer<1.0,>=0.12->gradio) (0.1.2)\n"
+          ]
+        }
+      ],
+      "source": [
+        "pip install gradio"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 28,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "SshnFWQzj6aS",
+        "outputId": "f25a7aa5-f179-472f-a79c-11df166497be"
+      },
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "SoftmaxTagger(\n",
+              "  (hidden2tag): Linear(in_features=768, out_features=7, bias=True)\n",
+              ")"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 28
+        }
+      ],
+      "source": [
+        "model = SoftmaxTagger(INPUT_DIM, NUM_TAGS) # Make sure INPUT_DIM and NUM_TAGS are defined or accessible here\n",
+        "model.load_state_dict(torch.load(\"softmax_tagger.pth\"))\n",
+        "model.eval()  # chuyển sang chế độ đánh giá nếu cần\n",
+        "model.to(device) # Add this line to move the model to the device"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 29,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 625
+        },
+        "id": "wIK-QRWmXxzz",
+        "outputId": "2547cdda-a687-46a9-9243-cea7a8916de6"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).\n",
+            "\n",
+            "Colab notebook detected. To show errors in colab notebook, set debug=True in launch()\n",
+            "* Running on public URL: https://c3f739dbf40a0a0681.gradio.live\n",
+            "\n",
+            "This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)\n"
+          ]
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ],
+            "text/html": [
+              "<div><iframe src=\"https://c3f739dbf40a0a0681.gradio.live\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
+            ]
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": []
+          },
+          "metadata": {},
+          "execution_count": 29
+        }
+      ],
+      "source": [
+        "import gradio as gr\n",
+        "import json\n",
+        "import tempfile\n",
+        "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
+        "\n",
+        "\n",
+        "def ner_interface(text):\n",
+        "    try:\n",
+        "        ids, labels, tokens = predict_ner(text)\n",
+        "        data = [[token, label, _id] for token, label, _id in zip(tokens, labels, ids)]\n",
+        "        json_result = {\n",
+        "            \"tokens\": tokens,\n",
+        "            \"labels\": labels,\n",
+        "            \"label_ids\": ids\n",
+        "        }\n",
+        "        return data, json_result\n",
+        "    except Exception as e:\n",
+        "        print(\"Error:\", e)\n",
+        "        return [[\"Lỗi\", str(e), \"\"]], {\"error\": str(e)}\n",
+        "\n",
+        "def json_to_file(json_data):\n",
+        "    # Tạo file tạm thời để trả về cho gr.File tải về\n",
+        "    tmp = tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False, encoding='utf-8')\n",
+        "    json.dump(json_data, tmp, ensure_ascii=False, indent=2)\n",
+        "    tmp.close()\n",
+        "    return tmp.name\n",
+        "\n",
+        "\n",
+        "with gr.Blocks(title=\"Nhận dạng Thực thể (NER) với PhoBERT\") as demo:\n",
+        "    gr.Markdown(\"## 📌 Hệ thống Nhận dạng Thực thể Tên (NER) sử dụng PhoBERT + Softmax\")\n",
+        "\n",
+        "    with gr.Row():\n",
+        "        with gr.Column(scale=3):\n",
+        "            input_text = gr.Textbox(\n",
+        "                lines=4,\n",
+        "                label=\"✍️ Nhập văn bản đầu vào\",\n",
+        "                placeholder=\"Ví dụ: Nguyễn Văn A sinh ra ở Hà Nội.\"\n",
+        "            )\n",
+        "            btn = gr.Button(\"🚀 Nhận dạng Thực thể\")\n",
+        "\n",
+        "        with gr.Column(scale=5):\n",
+        "            output_table = gr.Dataframe(\n",
+        "                headers=[\"Token\", \"Label\", \"ID\"],\n",
+        "                label=\"📄 Kết quả nhận dạng thực thể\",\n",
+        "                wrap=True\n",
+        "            )\n",
+        "            output_json = gr.JSON(visible=False)  # Có thể bật nếu muốn hiển thị JSON\n",
+        "\n",
+        "    with gr.Row():\n",
+        "        download_trigger = gr.Button(\"💾 Tải kết quả dưới dạng JSON\")\n",
+        "        download_file = gr.File(label=\"📥 File JSON đã xử lý\")\n",
+        "\n",
+        "    # Hành động xử lý NER\n",
+        "    btn.click(fn=ner_interface, inputs=input_text, outputs=[output_table, output_json])\n",
+        "    download_trigger.click(fn=json_to_file, inputs=output_json, outputs=download_file)\n",
+        "\n",
+        "demo.launch()\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 29,
+      "metadata": {
+        "id": "mvQgpNetjo02"
+      },
+      "outputs": [],
+      "source": []
+    }
+  ],
+  "metadata": {
+    "accelerator": "GPU",
+    "colab": {
+      "collapsed_sections": [
+        "DiCxlUcHQ9NJ"
+      ],
+      "gpuType": "T4",
+      "provenance": []
+    },
+    "kaggle": {
+      "accelerator": "nvidiaTeslaT4",
+      "dataSources": [],
+      "dockerImageVersionId": 31040,
+      "isGpuEnabled": true,
+      "isInternetEnabled": true,
+      "language": "python",
+      "sourceType": "notebook"
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.11.11"
+    },
+    "widgets": {
+      "application/vnd.jupyter.widget-state+json": {
+        "1d9aaa035056485c959f66b60cf41714": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HBoxModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HBoxModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HBoxView",
+            "box_style": "",
+            "children": [
+              "IPY_MODEL_71054a4397e344a2a66e32892a37b59b",
+              "IPY_MODEL_6c582399be1d42c3b5a4ef21743d1a26",
+              "IPY_MODEL_26a6ef7f967a4504a698ff3152ccb24e"
+            ],
+            "layout": "IPY_MODEL_82669dd27686486588fdd7d11f49edd3"
+          }
+        },
+        "71054a4397e344a2a66e32892a37b59b": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_461c136b3eac4a9dadb8a3af7c11c98a",
+            "placeholder": "​",
+            "style": "IPY_MODEL_4fb68dcea1ac4e54b82c92ad64e9be95",
+            "value": "config.json: 100%"
+          }
+        },
+        "6c582399be1d42c3b5a4ef21743d1a26": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "FloatProgressModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "FloatProgressModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "ProgressView",
+            "bar_style": "success",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_df397db853874f6db911acb667785ec0",
+            "max": 557,
+            "min": 0,
+            "orientation": "horizontal",
+            "style": "IPY_MODEL_2de5befe0ab24de9a62ba076e5abf78e",
+            "value": 557
+          }
+        },
+        "26a6ef7f967a4504a698ff3152ccb24e": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_d7da38e7c5e1484597bb1faae3c2d7f5",
+            "placeholder": "​",
+            "style": "IPY_MODEL_2ec3573cb04143a8ba5e555bfaf60165",
+            "value": " 557/557 [00:00&lt;00:00, 52.7kB/s]"
+          }
+        },
+        "82669dd27686486588fdd7d11f49edd3": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "461c136b3eac4a9dadb8a3af7c11c98a": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "4fb68dcea1ac4e54b82c92ad64e9be95": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "df397db853874f6db911acb667785ec0": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "2de5befe0ab24de9a62ba076e5abf78e": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "ProgressStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "ProgressStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "bar_color": null,
+            "description_width": ""
+          }
+        },
+        "d7da38e7c5e1484597bb1faae3c2d7f5": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "2ec3573cb04143a8ba5e555bfaf60165": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "67c557f2651b4e5c81e9af82531898fe": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HBoxModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HBoxModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HBoxView",
+            "box_style": "",
+            "children": [
+              "IPY_MODEL_5a5a04069ebd41fdba7835e1b5da585e",
+              "IPY_MODEL_6c5b30dc67d94071af4d4b14cf4be7e4",
+              "IPY_MODEL_e9200c8269fa4ebd9c1157cc1b871005"
+            ],
+            "layout": "IPY_MODEL_50ab44e431a54c27b409dc74c068c392"
+          }
+        },
+        "5a5a04069ebd41fdba7835e1b5da585e": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_9929425b4df94d3792dc454afe59b3fd",
+            "placeholder": "​",
+            "style": "IPY_MODEL_03efbd25cd4341cea6714ffc3585632a",
+            "value": "vocab.txt: 100%"
+          }
+        },
+        "6c5b30dc67d94071af4d4b14cf4be7e4": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "FloatProgressModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "FloatProgressModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "ProgressView",
+            "bar_style": "success",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_ae22d9a4de574ce3905a6f6c82fac1aa",
+            "max": 895321,
+            "min": 0,
+            "orientation": "horizontal",
+            "style": "IPY_MODEL_71fe669e2f68444ba4b81ffc14a39c03",
+            "value": 895321
+          }
+        },
+        "e9200c8269fa4ebd9c1157cc1b871005": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_4d4c131206f448c7ba6d5c4e41126d41",
+            "placeholder": "​",
+            "style": "IPY_MODEL_ec260d8279bd4a40ad2bbc3c1d7dadaa",
+            "value": " 895k/895k [00:00&lt;00:00, 1.91MB/s]"
+          }
+        },
+        "50ab44e431a54c27b409dc74c068c392": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "9929425b4df94d3792dc454afe59b3fd": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "03efbd25cd4341cea6714ffc3585632a": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "ae22d9a4de574ce3905a6f6c82fac1aa": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "71fe669e2f68444ba4b81ffc14a39c03": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "ProgressStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "ProgressStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "bar_color": null,
+            "description_width": ""
+          }
+        },
+        "4d4c131206f448c7ba6d5c4e41126d41": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "ec260d8279bd4a40ad2bbc3c1d7dadaa": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "08611ca37f8c462db079dc2883f06002": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HBoxModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HBoxModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HBoxView",
+            "box_style": "",
+            "children": [
+              "IPY_MODEL_47eca14208cc4e5085d44cce42872a35",
+              "IPY_MODEL_49d3e88f60e744e7b37bcbcca5bbb087",
+              "IPY_MODEL_81b9bb3315e4402ea5b6768d0d189591"
+            ],
+            "layout": "IPY_MODEL_4b88329de5ed48738ca0da054a1f0131"
+          }
+        },
+        "47eca14208cc4e5085d44cce42872a35": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_1507e771a5ca4056b0605cd453d89c60",
+            "placeholder": "​",
+            "style": "IPY_MODEL_44946e0a5d31408a851b0e8ab5217c43",
+            "value": "bpe.codes: 100%"
+          }
+        },
+        "49d3e88f60e744e7b37bcbcca5bbb087": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "FloatProgressModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "FloatProgressModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "ProgressView",
+            "bar_style": "success",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_ee8c999b66e84cbda17702e916d48a3f",
+            "max": 1135173,
+            "min": 0,
+            "orientation": "horizontal",
+            "style": "IPY_MODEL_6c6228d416944599b110ffa97b20bd8c",
+            "value": 1135173
+          }
+        },
+        "81b9bb3315e4402ea5b6768d0d189591": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_c519c27334b742ee8f14e29da2ebdf9b",
+            "placeholder": "​",
+            "style": "IPY_MODEL_7d422740d70546559703cd0304be663f",
+            "value": " 1.14M/1.14M [00:00&lt;00:00, 1.74MB/s]"
+          }
+        },
+        "4b88329de5ed48738ca0da054a1f0131": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "1507e771a5ca4056b0605cd453d89c60": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "44946e0a5d31408a851b0e8ab5217c43": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "ee8c999b66e84cbda17702e916d48a3f": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "6c6228d416944599b110ffa97b20bd8c": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "ProgressStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "ProgressStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "bar_color": null,
+            "description_width": ""
+          }
+        },
+        "c519c27334b742ee8f14e29da2ebdf9b": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "7d422740d70546559703cd0304be663f": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "371ab2b9d7c84402b3c4b934e89eca4b": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HBoxModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HBoxModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HBoxView",
+            "box_style": "",
+            "children": [
+              "IPY_MODEL_7ead683f167c408f88ba72b2ee1599d1",
+              "IPY_MODEL_737d3d1e5a3146de96c17ca8ec72d75e",
+              "IPY_MODEL_7ec9beb535c9428a954367613fa7f4cd"
+            ],
+            "layout": "IPY_MODEL_3c51b3c0b7ba4a2eacb1b8b2be8e024c"
+          }
+        },
+        "7ead683f167c408f88ba72b2ee1599d1": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_50dc5fc6a7354394ace536241fa01714",
+            "placeholder": "​",
+            "style": "IPY_MODEL_78ba04a44d9e42dba9fb7617d28c91e9",
+            "value": "tokenizer.json: 100%"
+          }
+        },
+        "737d3d1e5a3146de96c17ca8ec72d75e": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "FloatProgressModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "FloatProgressModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "ProgressView",
+            "bar_style": "success",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_ca87704af3bc4c7590eea8f8f0f50d94",
+            "max": 3132320,
+            "min": 0,
+            "orientation": "horizontal",
+            "style": "IPY_MODEL_f3870bce67da4affa8925d9d898638da",
+            "value": 3132320
+          }
+        },
+        "7ec9beb535c9428a954367613fa7f4cd": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_e6efac23366643dd861caf121a8a220b",
+            "placeholder": "​",
+            "style": "IPY_MODEL_1044cf40d7e54337859bab0057aa0b54",
+            "value": " 3.13M/3.13M [00:00&lt;00:00, 6.78MB/s]"
+          }
+        },
+        "3c51b3c0b7ba4a2eacb1b8b2be8e024c": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "50dc5fc6a7354394ace536241fa01714": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "78ba04a44d9e42dba9fb7617d28c91e9": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "ca87704af3bc4c7590eea8f8f0f50d94": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "f3870bce67da4affa8925d9d898638da": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "ProgressStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "ProgressStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "bar_color": null,
+            "description_width": ""
+          }
+        },
+        "e6efac23366643dd861caf121a8a220b": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "1044cf40d7e54337859bab0057aa0b54": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "3a6bb86ac3db4f82ae139507f94607e0": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HBoxModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HBoxModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HBoxView",
+            "box_style": "",
+            "children": [
+              "IPY_MODEL_04a96e9ef4774bd1a3cd3a1dd20fe194",
+              "IPY_MODEL_6158b1f05f6c4851b492df312f0312ca",
+              "IPY_MODEL_b81c4146511045e280ba4fa226074679"
+            ],
+            "layout": "IPY_MODEL_3676c8217c654790972189be8c1f4627"
+          }
+        },
+        "04a96e9ef4774bd1a3cd3a1dd20fe194": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_762b2c8d23824070b2eb115e151f0c73",
+            "placeholder": "​",
+            "style": "IPY_MODEL_d9d57b09b01846a4805882b4adf64e55",
+            "value": "pytorch_model.bin: 100%"
+          }
+        },
+        "6158b1f05f6c4851b492df312f0312ca": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "FloatProgressModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "FloatProgressModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "ProgressView",
+            "bar_style": "success",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_e804cb9279dd458aa8b661d28c4427ff",
+            "max": 542923308,
+            "min": 0,
+            "orientation": "horizontal",
+            "style": "IPY_MODEL_3169169dbabb4b1aa7906a0415eacdcb",
+            "value": 542923308
+          }
+        },
+        "b81c4146511045e280ba4fa226074679": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_408130d71bb74141906cbc1d2123bb63",
+            "placeholder": "​",
+            "style": "IPY_MODEL_4be265bd67e1470cbd856dd268908c00",
+            "value": " 543M/543M [00:01&lt;00:00, 366MB/s]"
+          }
+        },
+        "3676c8217c654790972189be8c1f4627": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "762b2c8d23824070b2eb115e151f0c73": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "d9d57b09b01846a4805882b4adf64e55": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "e804cb9279dd458aa8b661d28c4427ff": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "3169169dbabb4b1aa7906a0415eacdcb": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "ProgressStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "ProgressStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "bar_color": null,
+            "description_width": ""
+          }
+        },
+        "408130d71bb74141906cbc1d2123bb63": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "4be265bd67e1470cbd856dd268908c00": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "fabfacd2a2964d23994070bcb6bc4b3c": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HBoxModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HBoxModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HBoxView",
+            "box_style": "",
+            "children": [
+              "IPY_MODEL_09db5366452347dfa40bbd192d22d489",
+              "IPY_MODEL_706046f4266a4ade9f5f1718fc0bbf4f",
+              "IPY_MODEL_d39e9db3bbf84736b090e12ecf9bd1fd"
+            ],
+            "layout": "IPY_MODEL_654a5b527d0c4c51afce2e65fb3b36aa"
+          }
+        },
+        "09db5366452347dfa40bbd192d22d489": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_fa1045a3003d4496a2a5c3055355120d",
+            "placeholder": "​",
+            "style": "IPY_MODEL_f00793fc0f1948fca2fda701eb461505",
+            "value": "model.safetensors: 100%"
+          }
+        },
+        "706046f4266a4ade9f5f1718fc0bbf4f": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "FloatProgressModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "FloatProgressModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "ProgressView",
+            "bar_style": "success",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_9994f44df6a0451792559446361557e9",
+            "max": 542900336,
+            "min": 0,
+            "orientation": "horizontal",
+            "style": "IPY_MODEL_cc2a9437c039472f8447f9d0194459dc",
+            "value": 542900336
+          }
+        },
+        "d39e9db3bbf84736b090e12ecf9bd1fd": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_2c64cf42d35a4722a3b32b366d1dcd1b",
+            "placeholder": "​",
+            "style": "IPY_MODEL_a9886da1334d4df0b4c6536255df6420",
+            "value": " 543M/543M [00:02&lt;00:00, 213MB/s]"
+          }
+        },
+        "654a5b527d0c4c51afce2e65fb3b36aa": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "fa1045a3003d4496a2a5c3055355120d": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "f00793fc0f1948fca2fda701eb461505": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "9994f44df6a0451792559446361557e9": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "cc2a9437c039472f8447f9d0194459dc": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "ProgressStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "ProgressStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "bar_color": null,
+            "description_width": ""
+          }
+        },
+        "2c64cf42d35a4722a3b32b366d1dcd1b": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "a9886da1334d4df0b4c6536255df6420": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        }
+      }
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
\ No newline at end of file