Training in progress, step 600

Files changed (4) hide show

fine-tune-whisper-non-streaming-zh.ipynb → .ipynb_checkpoints/fine-tune-whisper-non-streaming-zh-TW-checkpoint.ipynb +15 -5
.ipynb_checkpoints/fine-tune-whisper-non-streaming-zh-checkpoint.ipynb → fine-tune-whisper-non-streaming-zh-TW.ipynb +112 -406
pytorch_model.bin +1 -1
runs/Dec20_16-48-49_DANDAN/events.out.tfevents.1671526137.DANDAN.29004.0 +2 -2

fine-tune-whisper-non-streaming-zh.ipynb → .ipynb_checkpoints/fine-tune-whisper-non-streaming-zh-TW-checkpoint.ipynb RENAMED Viewed

@@ -1134,8 +1134,8 @@
        "\n",
        "    <div>\n",
        "      \n",
-       "      <progress value='401' max='1000' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
-       "      [ 401/1000 1:53:27 < 2:50:19, 0.06 it/s, Epoch 1.14/3]\n",
        "    </div>\n",
        "    <table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
@@ -1160,7 +1160,13 @@
        "      <td>7.590416</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
-       "</table><p>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -1190,7 +1196,11 @@
       "Configuration saved in ./checkpoint-400/config.json\n",
       "Model weights saved in ./checkpoint-400/pytorch_model.bin\n",
       "Feature extractor saved in ./checkpoint-400/preprocessor_config.json\n",
-      "Feature extractor saved in ./preprocessor_config.json\n"
      ]
     }
    ],
@@ -1221,7 +1231,7 @@
     "    \"dataset_tags\": \"mozilla-foundation/common_voice_11_0\",\n",
     "    \"dataset\": \"mozilla-foundation/common_voice_11_0\",  # a 'pretty' name for the training dataset\n",
     "    \"language\": \"zh-TW\",\n",
-    "    \"model_name\": \"Whisper Medium MS - Augmented\",  # a 'pretty' name for your model\n",
     "    \"finetuned_from\": \"openai/whisper-medium\",\n",
     "    \"tasks\": \"automatic-speech-recognition\",\n",
     "    \"tags\": \"whisper-event\",\n",

        "\n",
        "    <div>\n",
        "      \n",
+       "      <progress value='601' max='1000' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
+       "      [ 601/1000 3:16:05 < 2:10:37, 0.05 it/s, Epoch 1.70/3]\n",
        "    </div>\n",
        "    <table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
        "      <td>7.590416</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
+       "</table><p>\n",
+       "    <div>\n",
+       "      \n",
+       "      <progress value='330' max='2355' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
+       "      [ 330/2355 07:22 < 45:21, 0.74 it/s]\n",
+       "    </div>\n",
+       "    "
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
       "Configuration saved in ./checkpoint-400/config.json\n",
       "Model weights saved in ./checkpoint-400/pytorch_model.bin\n",
       "Feature extractor saved in ./checkpoint-400/preprocessor_config.json\n",
+      "Feature extractor saved in ./preprocessor_config.json\n",
+      "The following columns in the evaluation set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`,  you can safely ignore this message.\n",
+      "***** Running Evaluation *****\n",
+      "  Num examples = 4709\n",
+      "  Batch size = 2\n"
      ]
     }
    ],
     "    \"dataset_tags\": \"mozilla-foundation/common_voice_11_0\",\n",
     "    \"dataset\": \"mozilla-foundation/common_voice_11_0\",  # a 'pretty' name for the training dataset\n",
     "    \"language\": \"zh-TW\",\n",
+    "    \"model_name\": \"Whisper Medium TW - Augmented\",  # a 'pretty' name for your model\n",
     "    \"finetuned_from\": \"openai/whisper-medium\",\n",
     "    \"tasks\": \"automatic-speech-recognition\",\n",
     "    \"tags\": \"whisper-event\",\n",

.ipynb_checkpoints/fine-tune-whisper-non-streaming-zh-checkpoint.ipynb → fine-tune-whisper-non-streaming-zh-TW.ipynb RENAMED Viewed

@@ -145,354 +145,11 @@
     "id": "a2787582-554f-44ce-9f38-4180a5ed6b44"
    },
    "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Downloading and preparing dataset common_voice_11_0/zh-TW to /home/daniel/.cache/huggingface/datasets/mozilla-foundation___common_voice_11_0/zh-TW/11.0.0/f8e47235d9b4e68fa24ed71d63266a02018ccf7194b2a8c9c598a5f3ab304d9f...\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "2b102b9d1660420cabb06bfe54c35c25",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Downloading data files:   0%|          | 0/5 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "bfd9db96bd5c47fbb8847a7211b71c1b",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Downloading data:   0%|          | 0.00/152M [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "0f0161d0a1d04428bf295e1ce0bfc7d8",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Downloading data:   0%|          | 0.00/120M [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "d85a133c5b5c4f8883faada1fcee9ddf",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Downloading data:   0%|          | 0.00/134M [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "b2b0ecba08154555a3f1940c2d7e3f18",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Downloading data:   0%|          | 0.00/995M [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "689229ba2edf47bea236f231fdd845b8",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Downloading data:   0%|          | 0.00/18.6M [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "728521ae7d68422c92c2c29cc9aaff7a",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Downloading data:   0%|          | 0.00/128M [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "e6c78710e8dd4f00a856ba8c17325ca9",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Extracting data files:   0%|          | 0/5 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "cc7cdaf4c35b4784990b949d05f43b47",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Downloading data files:   0%|          | 0/5 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "628b00ae5c7a4ac082453b11bd3fb68a",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Downloading data:   0%|          | 0.00/1.49M [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "34941d3d98554e4dbac5e27e20cc84df",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Downloading data:   0%|          | 0.00/1.02M [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "5e5886ccc15a497380aae6559f11abd1",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Downloading data:   0%|          | 0.00/983k [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "830007d441bd44109a015d89d27e0ba9",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Downloading data:   0%|          | 0.00/8.75M [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "8df8ab2f3cdd4f51bc4f7bdc31f9d2a7",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Downloading data:   0%|          | 0.00/995k [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "fa78caff99dc442b9612c266eda880f3",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Extracting data files:   0%|          | 0/5 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Generating train split: 0 examples [00:00, ? examples/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "Reading metadata...: 6568it [00:00, 242101.00it/s]\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Generating validation split: 0 examples [00:00, ? examples/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "\n",
-      "Reading metadata...: 4709it [00:00, 244282.56it/s]\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Generating test split: 0 examples [00:00, ? examples/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "\n",
-      "\n",
-      "Reading metadata...: 4709it [00:00, 253653.42it/s]\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Generating other split: 0 examples [00:00, ? examples/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "Reading metadata...: 0it [00:00, ?it/s]\u001b[A\u001b[A\u001b[A\u001b[A\n",
-      "\n",
-      "\n",
-      "\n",
-      "Reading metadata...: 40630it [00:00, 258238.72it/s]\u001b[A\u001b[A\u001b[A\u001b[A\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Generating invalidated split: 0 examples [00:00, ? examples/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "Reading metadata...: 4596it [00:00, 253281.76it/s]\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Dataset common_voice_11_0 downloaded and prepared to /home/daniel/.cache/huggingface/datasets/mozilla-foundation___common_voice_11_0/zh-TW/11.0.0/f8e47235d9b4e68fa24ed71d63266a02018ccf7194b2a8c9c598a5f3ab304d9f. Subsequent calls will reuse this data.\n"
-     ]
-    },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
       "Found cached dataset common_voice_11_0 (/home/daniel/.cache/huggingface/datasets/mozilla-foundation___common_voice_11_0/zh-TW/11.0.0/f8e47235d9b4e68fa24ed71d63266a02018ccf7194b2a8c9c598a5f3ab304d9f)\n"
      ]
     },
@@ -526,20 +183,20 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
    "id": "79731fc3",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "{'audio': {'path': None,\n",
-       "  'array': array([0., 0., 0., ..., 0., 0., 0.]),\n",
-       "  'sampling_rate': 16000},\n",
        " 'sentence': '爸爸們父親節快樂！'}"
       ]
      },
-     "execution_count": 20,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -759,7 +416,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
    "id": "b27e4720",
    "metadata": {},
    "outputs": [],
@@ -784,7 +441,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
    "id": "b459b0c5",
    "metadata": {},
    "outputs": [
@@ -798,7 +455,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "2db072c14c9a419d87f37e6747e62703",
        "version_major": 2,
        "version_minor": 0
       },
@@ -827,7 +484,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
    "id": "d041650e-1c48-4439-87b3-5b6f4a514107",
    "metadata": {},
    "outputs": [],
@@ -838,7 +495,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
    "id": "c085911c-a10a-41ef-8874-306e0503e9bb",
    "metadata": {},
    "outputs": [],
@@ -865,7 +522,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 25,
    "id": "90965caa",
    "metadata": {},
    "outputs": [
@@ -879,7 +536,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "ee8d88c2e3ef46039c2db32818ab08d7",
        "version_major": 2,
        "version_minor": 0
       },
@@ -891,18 +548,11 @@
      "output_type": "display_data"
     },
     {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "48fce3473f7a42a29723b4148fb549c4",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/4709 [00:00<?, ?ex/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
     }
    ],
    "source": [
@@ -912,7 +562,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
    "id": "bde2118b",
    "metadata": {},
    "outputs": [
@@ -931,7 +581,7 @@
        "         34131,   220, 42117,   220, 27694, 42598,   220, 34043,   220, 50257])}"
       ]
      },
-     "execution_count": 26,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -950,7 +600,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
    "id": "01cb25ef-4bb0-4325-9461-f59198acadf6",
    "metadata": {},
    "outputs": [],
@@ -971,14 +621,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
    "id": "333f7f6e-6053-4d3b-8924-c733c79b82ac",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "cbb3a2b5bb1a43a6a9acf13fa6ddf6b9",
        "version_major": 2,
        "version_minor": 0
       },
@@ -999,7 +649,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 29,
    "id": "53945dcb",
    "metadata": {},
    "outputs": [],
@@ -1077,7 +727,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 30,
    "id": "8326221e-ec13-4731-bb4e-51e5fc1486c5",
    "metadata": {
     "id": "8326221e-ec13-4731-bb4e-51e5fc1486c5"
@@ -1129,7 +779,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 31,
    "id": "fc834702-c0d3-4a96-b101-7b87be32bf42",
    "metadata": {
     "id": "fc834702-c0d3-4a96-b101-7b87be32bf42"
@@ -1162,7 +812,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 32,
    "id": "b22b4011-f31f-4b57-b684-c52332f92890",
    "metadata": {
     "id": "b22b4011-f31f-4b57-b684-c52332f92890"
@@ -1195,7 +845,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 33,
    "id": "23959a70-22d0-4ffe-9fa1-72b61e75bb52",
    "metadata": {
     "id": "23959a70-22d0-4ffe-9fa1-72b61e75bb52"
@@ -1248,7 +898,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 34,
    "id": "5a10cc4b-07ec-4ebd-ac1d-7c601023594f",
    "metadata": {
     "id": "5a10cc4b-07ec-4ebd-ac1d-7c601023594f"
@@ -1272,7 +922,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 35,
    "id": "62038ba3-88ed-4fce-84db-338f50dcd04f",
    "metadata": {
     "id": "62038ba3-88ed-4fce-84db-338f50dcd04f"
@@ -1306,7 +956,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 36,
    "id": "0ae3e9af-97b7-4aa0-ae85-20b23b5bcb3a",
    "metadata": {
     "id": "0ae3e9af-97b7-4aa0-ae85-20b23b5bcb3a"
@@ -1364,7 +1014,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 37,
    "id": "d546d7fe-0543-479a-b708-2ebabec19493",
    "metadata": {
     "id": "d546d7fe-0543-479a-b708-2ebabec19493",
@@ -1407,7 +1057,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 38,
    "id": "-2zQwMfEOBJq",
    "metadata": {
     "id": "-2zQwMfEOBJq"
@@ -1456,7 +1106,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 39,
    "id": "ee8b7b8e-1c9a-4d77-9137-1778a629e6de",
    "metadata": {
     "id": "ee8b7b8e-1c9a-4d77-9137-1778a629e6de",
@@ -1468,8 +1118,6 @@
      "output_type": "stream",
      "text": [
       "The following columns in the training set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`,  you can safely ignore this message.\n",
-      "/home/daniel/whisper/lib/python3.8/site-packages/bitsandbytes/cextension.py:127: UserWarning: The installed version of bitsandbytes was compiled without GPU support. 8-bit optimizers and GPU quantization are unavailable.\n",
-      "  warn(\"The installed version of bitsandbytes was compiled without GPU support. \"\n",
       "***** Running training *****\n",
       "  Num examples = 11277\n",
       "  Num Epochs = 3\n",
@@ -1481,25 +1129,83 @@
      ]
     },
     {
-     "ename": "NameError",
-     "evalue": "name 'str2optimizer8bit_blockwise' is not defined",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
-      "Cell \u001b[0;32mIn[39], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mtrainer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtrain\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
-      "File \u001b[0;32m~/whisper/lib/python3.8/site-packages/transformers/trainer.py:1535\u001b[0m, in \u001b[0;36mTrainer.train\u001b[0;34m(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)\u001b[0m\n\u001b[1;32m   1530\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel_wrapped \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel\n\u001b[1;32m   1532\u001b[0m inner_training_loop \u001b[38;5;241m=\u001b[39m find_executable_batch_size(\n\u001b[1;32m   1533\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_inner_training_loop, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_train_batch_size, args\u001b[38;5;241m.\u001b[39mauto_find_batch_size\n\u001b[1;32m   1534\u001b[0m )\n\u001b[0;32m-> 1535\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43minner_training_loop\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1536\u001b[0m \u001b[43m    \u001b[49m\u001b[43margs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1537\u001b[0m \u001b[43m    \u001b[49m\u001b[43mresume_from_checkpoint\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mresume_from_checkpoint\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1538\u001b[0m \u001b[43m    \u001b[49m\u001b[43mtrial\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtrial\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1539\u001b[0m \u001b[43m    \u001b[49m\u001b[43mignore_keys_for_eval\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mignore_keys_for_eval\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1540\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
-      "File \u001b[0;32m~/whisper/lib/python3.8/site-packages/transformers/trainer.py:1845\u001b[0m, in \u001b[0;36mTrainer._inner_training_loop\u001b[0;34m(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)\u001b[0m\n\u001b[1;32m   1843\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdo_grad_scaling:\n\u001b[1;32m   1844\u001b[0m     scale_before \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mscaler\u001b[38;5;241m.\u001b[39mget_scale()\n\u001b[0;32m-> 1845\u001b[0m     \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mscaler\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mstep\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptimizer\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1846\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mscaler\u001b[38;5;241m.\u001b[39mupdate()\n\u001b[1;32m   1847\u001b[0m     scale_after \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mscaler\u001b[38;5;241m.\u001b[39mget_scale()\n",
-      "File \u001b[0;32m~/whisper/lib/python3.8/site-packages/torch/cuda/amp/grad_scaler.py:341\u001b[0m, in \u001b[0;36mGradScaler.step\u001b[0;34m(self, optimizer, *args, **kwargs)\u001b[0m\n\u001b[1;32m    337\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39munscale_(optimizer)\n\u001b[1;32m    339\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(optimizer_state[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfound_inf_per_device\u001b[39m\u001b[38;5;124m\"\u001b[39m]) \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNo inf checks were recorded for this optimizer.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m--> 341\u001b[0m retval \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_maybe_opt_step\u001b[49m\u001b[43m(\u001b[49m\u001b[43moptimizer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43moptimizer_state\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    343\u001b[0m optimizer_state[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mstage\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m OptState\u001b[38;5;241m.\u001b[39mSTEPPED\n\u001b[1;32m    345\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m retval\n",
-      "File \u001b[0;32m~/whisper/lib/python3.8/site-packages/torch/cuda/amp/grad_scaler.py:288\u001b[0m, in \u001b[0;36mGradScaler._maybe_opt_step\u001b[0;34m(self, optimizer, optimizer_state, *args, **kwargs)\u001b[0m\n\u001b[1;32m    286\u001b[0m retval \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m    287\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28msum\u001b[39m(v\u001b[38;5;241m.\u001b[39mitem() \u001b[38;5;28;01mfor\u001b[39;00m v \u001b[38;5;129;01min\u001b[39;00m optimizer_state[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfound_inf_per_device\u001b[39m\u001b[38;5;124m\"\u001b[39m]\u001b[38;5;241m.\u001b[39mvalues()):\n\u001b[0;32m--> 288\u001b[0m     retval \u001b[38;5;241m=\u001b[39m \u001b[43moptimizer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mstep\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    289\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m retval\n",
-      "File \u001b[0;32m~/whisper/lib/python3.8/site-packages/torch/optim/lr_scheduler.py:68\u001b[0m, in \u001b[0;36m_LRScheduler.__init__.<locals>.with_counter.<locals>.wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m     66\u001b[0m instance\u001b[38;5;241m.\u001b[39m_step_count \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m1\u001b[39m\n\u001b[1;32m     67\u001b[0m wrapped \u001b[38;5;241m=\u001b[39m func\u001b[38;5;241m.\u001b[39m\u001b[38;5;21m__get__\u001b[39m(instance, \u001b[38;5;28mcls\u001b[39m)\n\u001b[0;32m---> 68\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mwrapped\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
-      "File \u001b[0;32m~/whisper/lib/python3.8/site-packages/torch/optim/optimizer.py:140\u001b[0m, in \u001b[0;36mOptimizer._hook_for_profile.<locals>.profile_hook_step.<locals>.wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    138\u001b[0m profile_name \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mOptimizer.step#\u001b[39m\u001b[38;5;132;01m{}\u001b[39;00m\u001b[38;5;124m.step\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m.\u001b[39mformat(obj\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m)\n\u001b[1;32m    139\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m torch\u001b[38;5;241m.\u001b[39mautograd\u001b[38;5;241m.\u001b[39mprofiler\u001b[38;5;241m.\u001b[39mrecord_function(profile_name):\n\u001b[0;32m--> 140\u001b[0m     out \u001b[38;5;241m=\u001b[39m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    141\u001b[0m     obj\u001b[38;5;241m.\u001b[39m_optimizer_step_code()\n\u001b[1;32m    142\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m out\n",
-      "File \u001b[0;32m~/whisper/lib/python3.8/site-packages/torch/autograd/grad_mode.py:27\u001b[0m, in \u001b[0;36m_DecoratorContextManager.__call__.<locals>.decorate_context\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m     24\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m     25\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdecorate_context\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m     26\u001b[0m     \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mclone():\n\u001b[0;32m---> 27\u001b[0m         \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
-      "File \u001b[0;32m~/whisper/lib/python3.8/site-packages/bitsandbytes/optim/optimizer.py:265\u001b[0m, in \u001b[0;36mOptimizer8bit.step\u001b[0;34m(self, closure)\u001b[0m\n\u001b[1;32m    262\u001b[0m         \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(state) \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[1;32m    263\u001b[0m             \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39minit_state(group, p, gindex, pindex)\n\u001b[0;32m--> 265\u001b[0m         \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mupdate_step\u001b[49m\u001b[43m(\u001b[49m\u001b[43mgroup\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mp\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mgindex\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpindex\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    267\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m loss\n",
-      "File \u001b[0;32m~/whisper/lib/python3.8/site-packages/torch/autograd/grad_mode.py:27\u001b[0m, in \u001b[0;36m_DecoratorContextManager.__call__.<locals>.decorate_context\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m     24\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m     25\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdecorate_context\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m     26\u001b[0m     \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mclone():\n\u001b[0;32m---> 27\u001b[0m         \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
-      "File \u001b[0;32m~/whisper/lib/python3.8/site-packages/bitsandbytes/optim/optimizer.py:506\u001b[0m, in \u001b[0;36mOptimizer2State.update_step\u001b[0;34m(self, group, p, gindex, pindex)\u001b[0m\n\u001b[1;32m    504\u001b[0m     state[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmax2\u001b[39m\u001b[38;5;124m\"\u001b[39m], state[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnew_max2\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m state[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnew_max2\u001b[39m\u001b[38;5;124m\"\u001b[39m], state[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmax2\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[1;32m    505\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m state[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mstate1\u001b[39m\u001b[38;5;124m\"\u001b[39m]\u001b[38;5;241m.\u001b[39mdtype \u001b[38;5;241m==\u001b[39m torch\u001b[38;5;241m.\u001b[39muint8 \u001b[38;5;129;01mand\u001b[39;00m config[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mblock_wise\u001b[39m\u001b[38;5;124m\"\u001b[39m]:\n\u001b[0;32m--> 506\u001b[0m     \u001b[43mF\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptimizer_update_8bit_blockwise\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    507\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptimizer_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    508\u001b[0m \u001b[43m        \u001b[49m\u001b[43mgrad\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    509\u001b[0m \u001b[43m        \u001b[49m\u001b[43mp\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    510\u001b[0m \u001b[43m        \u001b[49m\u001b[43mstate\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mstate1\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    511\u001b[0m \u001b[43m        \u001b[49m\u001b[43mstate\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mstate2\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    512\u001b[0m \u001b[43m        \u001b[49m\u001b[43mconfig\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mbetas\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    513\u001b[0m \u001b[43m        \u001b[49m\u001b[43mconfig\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mbetas\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    514\u001b[0m \u001b[43m        \u001b[49m\u001b[43mconfig\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43meps\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    515\u001b[0m \u001b[43m        \u001b[49m\u001b[43mstep\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    516\u001b[0m \u001b[43m        \u001b[49m\u001b[43mconfig\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mlr\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    517\u001b[0m \u001b[43m        \u001b[49m\u001b[43mstate\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mqmap1\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    518\u001b[0m \u001b[43m        \u001b[49m\u001b[43mstate\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mqmap2\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    519\u001b[0m \u001b[43m        \u001b[49m\u001b[43mstate\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mabsmax1\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    520\u001b[0m \u001b[43m        \u001b[49m\u001b[43mstate\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mabsmax2\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    521\u001b[0m \u001b[43m        \u001b[49m\u001b[43mconfig\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mweight_decay\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    522\u001b[0m \u001b[43m        \u001b[49m\u001b[43mgnorm_scale\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mgnorm_scale\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    523\u001b[0m \u001b[43m        \u001b[49m\u001b[43mskip_zeros\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconfig\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mskip_zeros\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    524\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n",
-      "File \u001b[0;32m~/whisper/lib/python3.8/site-packages/bitsandbytes/functional.py:858\u001b[0m, in \u001b[0;36moptimizer_update_8bit_blockwise\u001b[0;34m(optimizer_name, g, p, state1, state2, beta1, beta2, eps, step, lr, qmap1, qmap2, absmax1, absmax2, weight_decay, gnorm_scale, skip_zeros)\u001b[0m\n\u001b[1;32m    837\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21moptimizer_update_8bit_blockwise\u001b[39m(\n\u001b[1;32m    838\u001b[0m     optimizer_name: \u001b[38;5;28mstr\u001b[39m,\n\u001b[1;32m    839\u001b[0m     g: Tensor,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    854\u001b[0m     skip_zeros\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m,\n\u001b[1;32m    855\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m    857\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m g\u001b[38;5;241m.\u001b[39mdtype \u001b[38;5;241m==\u001b[39m torch\u001b[38;5;241m.\u001b[39mfloat32 \u001b[38;5;129;01mand\u001b[39;00m state1\u001b[38;5;241m.\u001b[39mdtype \u001b[38;5;241m==\u001b[39m torch\u001b[38;5;241m.\u001b[39muint8:\n\u001b[0;32m--> 858\u001b[0m         \u001b[43mstr2optimizer8bit_blockwise\u001b[49m[optimizer_name][\u001b[38;5;241m0\u001b[39m](\n\u001b[1;32m    859\u001b[0m             get_ptr(p),\n\u001b[1;32m    860\u001b[0m             get_ptr(g),\n\u001b[1;32m    861\u001b[0m             get_ptr(state1),\n\u001b[1;32m    862\u001b[0m             get_ptr(state2),\n\u001b[1;32m    863\u001b[0m             ct\u001b[38;5;241m.\u001b[39mc_float(beta1),\n\u001b[1;32m    864\u001b[0m             ct\u001b[38;5;241m.\u001b[39mc_float(beta2),\n\u001b[1;32m    865\u001b[0m             ct\u001b[38;5;241m.\u001b[39mc_float(eps),\n\u001b[1;32m    866\u001b[0m             ct\u001b[38;5;241m.\u001b[39mc_int32(step),\n\u001b[1;32m    867\u001b[0m             ct\u001b[38;5;241m.\u001b[39mc_float(lr),\n\u001b[1;32m    868\u001b[0m             get_ptr(qmap1),\n\u001b[1;32m    869\u001b[0m             get_ptr(qmap2),\n\u001b[1;32m    870\u001b[0m             get_ptr(absmax1),\n\u001b[1;32m    871\u001b[0m             get_ptr(absmax2),\n\u001b[1;32m    872\u001b[0m             ct\u001b[38;5;241m.\u001b[39mc_float(weight_decay),\n\u001b[1;32m    873\u001b[0m             ct\u001b[38;5;241m.\u001b[39mc_float(gnorm_scale),\n\u001b[1;32m    874\u001b[0m             ct\u001b[38;5;241m.\u001b[39mc_bool(skip_zeros),\n\u001b[1;32m    875\u001b[0m             ct\u001b[38;5;241m.\u001b[39mc_int32(g\u001b[38;5;241m.\u001b[39mnumel()),\n\u001b[1;32m    876\u001b[0m         )\n\u001b[1;32m    877\u001b[0m     \u001b[38;5;28;01melif\u001b[39;00m g\u001b[38;5;241m.\u001b[39mdtype \u001b[38;5;241m==\u001b[39m torch\u001b[38;5;241m.\u001b[39mfloat16 \u001b[38;5;129;01mand\u001b[39;00m state1\u001b[38;5;241m.\u001b[39mdtype \u001b[38;5;241m==\u001b[39m torch\u001b[38;5;241m.\u001b[39muint8:\n\u001b[1;32m    878\u001b[0m         str2optimizer8bit_blockwise[optimizer_name][\u001b[38;5;241m1\u001b[39m](\n\u001b[1;32m    879\u001b[0m             get_ptr(p),\n\u001b[1;32m    880\u001b[0m             get_ptr(g),\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    895\u001b[0m             ct\u001b[38;5;241m.\u001b[39mc_int32(g\u001b[38;5;241m.\u001b[39mnumel()),\n\u001b[1;32m    896\u001b[0m         )\n",
-      "\u001b[0;31mNameError\u001b[0m: name 'str2optimizer8bit_blockwise' is not defined"
      ]
     }
    ],
@@ -1530,7 +1236,7 @@
     "    \"dataset_tags\": \"mozilla-foundation/common_voice_11_0\",\n",
     "    \"dataset\": \"mozilla-foundation/common_voice_11_0\",  # a 'pretty' name for the training dataset\n",
     "    \"language\": \"zh-TW\",\n",
-    "    \"model_name\": \"Whisper Medium MS - Augmented\",  # a 'pretty' name for your model\n",
     "    \"finetuned_from\": \"openai/whisper-medium\",\n",
     "    \"tasks\": \"automatic-speech-recognition\",\n",
     "    \"tags\": \"whisper-event\",\n",

     "id": "a2787582-554f-44ce-9f38-4180a5ed6b44"
    },
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
+      "Found cached dataset common_voice_11_0 (/home/daniel/.cache/huggingface/datasets/mozilla-foundation___common_voice_11_0/zh-TW/11.0.0/f8e47235d9b4e68fa24ed71d63266a02018ccf7194b2a8c9c598a5f3ab304d9f)\n",
       "Found cached dataset common_voice_11_0 (/home/daniel/.cache/huggingface/datasets/mozilla-foundation___common_voice_11_0/zh-TW/11.0.0/f8e47235d9b4e68fa24ed71d63266a02018ccf7194b2a8c9c598a5f3ab304d9f)\n"
      ]
     },
   },
   {
    "cell_type": "code",
+   "execution_count": 2,
    "id": "79731fc3",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
+       "{'audio': {'path': '/home/daniel/.cache/huggingface/datasets/downloads/extracted/8d1722ebe07713de78ba2ed06286baa9fb33c24f19cb47ef1a3d6cb0774ad391/common_voice_zh-TW_18013265.mp3',\n",
+       "  'array': array([0., 0., 0., ..., 0., 0., 0.], dtype=float32),\n",
+       "  'sampling_rate': 48000},\n",
        " 'sentence': '爸爸們父親節快樂！'}"
       ]
      },
+     "execution_count": 2,
      "metadata": {},
      "output_type": "execute_result"
     }
   },
   {
    "cell_type": "code",
+   "execution_count": 6,
    "id": "b27e4720",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 7,
    "id": "b459b0c5",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
+       "model_id": "e7f849f56879427995d5de3d75585606",
        "version_major": 2,
        "version_minor": 0
       },
   },
   {
    "cell_type": "code",
+   "execution_count": 8,
    "id": "d041650e-1c48-4439-87b3-5b6f4a514107",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 9,
    "id": "c085911c-a10a-41ef-8874-306e0503e9bb",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 10,
    "id": "90965caa",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
+       "model_id": "91de26e5528241e895f883a394bdab2a",
        "version_major": 2,
        "version_minor": 0
       },
      "output_type": "display_data"
     },
     {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Loading cached processed dataset at /home/daniel/.cache/huggingface/datasets/mozilla-foundation___common_voice_11_0/zh-TW/11.0.0/f8e47235d9b4e68fa24ed71d63266a02018ccf7194b2a8c9c598a5f3ab304d9f/cache-e493134b08029cc3.arrow\n"
+     ]
     }
    ],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 11,
    "id": "bde2118b",
    "metadata": {},
    "outputs": [
        "         34131,   220, 42117,   220, 27694, 42598,   220, 34043,   220, 50257])}"
       ]
      },
+     "execution_count": 11,
      "metadata": {},
      "output_type": "execute_result"
     }
   },
   {
    "cell_type": "code",
+   "execution_count": 12,
    "id": "01cb25ef-4bb0-4325-9461-f59198acadf6",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 13,
    "id": "333f7f6e-6053-4d3b-8924-c733c79b82ac",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
+       "model_id": "bcef97571c7548c081de85f242931b21",
        "version_major": 2,
        "version_minor": 0
       },
   },
   {
    "cell_type": "code",
+   "execution_count": 14,
    "id": "53945dcb",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 15,
    "id": "8326221e-ec13-4731-bb4e-51e5fc1486c5",
    "metadata": {
     "id": "8326221e-ec13-4731-bb4e-51e5fc1486c5"
   },
   {
    "cell_type": "code",
+   "execution_count": 16,
    "id": "fc834702-c0d3-4a96-b101-7b87be32bf42",
    "metadata": {
     "id": "fc834702-c0d3-4a96-b101-7b87be32bf42"
   },
   {
    "cell_type": "code",
+   "execution_count": 17,
    "id": "b22b4011-f31f-4b57-b684-c52332f92890",
    "metadata": {
     "id": "b22b4011-f31f-4b57-b684-c52332f92890"
   },
   {
    "cell_type": "code",
+   "execution_count": 18,
    "id": "23959a70-22d0-4ffe-9fa1-72b61e75bb52",
    "metadata": {
     "id": "23959a70-22d0-4ffe-9fa1-72b61e75bb52"
   },
   {
    "cell_type": "code",
+   "execution_count": 19,
    "id": "5a10cc4b-07ec-4ebd-ac1d-7c601023594f",
    "metadata": {
     "id": "5a10cc4b-07ec-4ebd-ac1d-7c601023594f"
   },
   {
    "cell_type": "code",
+   "execution_count": 20,
    "id": "62038ba3-88ed-4fce-84db-338f50dcd04f",
    "metadata": {
     "id": "62038ba3-88ed-4fce-84db-338f50dcd04f"
   },
   {
    "cell_type": "code",
+   "execution_count": 21,
    "id": "0ae3e9af-97b7-4aa0-ae85-20b23b5bcb3a",
    "metadata": {
     "id": "0ae3e9af-97b7-4aa0-ae85-20b23b5bcb3a"
   },
   {
    "cell_type": "code",
+   "execution_count": 22,
    "id": "d546d7fe-0543-479a-b708-2ebabec19493",
    "metadata": {
     "id": "d546d7fe-0543-479a-b708-2ebabec19493",
   },
   {
    "cell_type": "code",
+   "execution_count": 23,
    "id": "-2zQwMfEOBJq",
    "metadata": {
     "id": "-2zQwMfEOBJq"
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "id": "ee8b7b8e-1c9a-4d77-9137-1778a629e6de",
    "metadata": {
     "id": "ee8b7b8e-1c9a-4d77-9137-1778a629e6de",
      "output_type": "stream",
      "text": [
       "The following columns in the training set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`,  you can safely ignore this message.\n",
       "***** Running training *****\n",
       "  Num examples = 11277\n",
       "  Num Epochs = 3\n",
      ]
     },
     {
+     "data": {
+      "text/html": [
+       "\n",
+       "    <div>\n",
+       "      \n",
+       "      <progress value='601' max='1000' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
+       "      [ 601/1000 3:16:05 < 2:10:37, 0.05 it/s, Epoch 1.70/3]\n",
+       "    </div>\n",
+       "    <table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       " <tr style=\"text-align: left;\">\n",
+       "      <th>Step</th>\n",
+       "      <th>Training Loss</th>\n",
+       "      <th>Validation Loss</th>\n",
+       "      <th>Wer</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <td>200</td>\n",
+       "      <td>0.216900</td>\n",
+       "      <td>0.119112</td>\n",
+       "      <td>9.333440</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>400</td>\n",
+       "      <td>0.082300</td>\n",
+       "      <td>0.101187</td>\n",
+       "      <td>7.590416</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>600</td>\n",
+       "      <td>0.078400</td>\n",
+       "      <td>0.095110</td>\n",
+       "      <td>7.486474</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table><p>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "The following columns in the evaluation set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`,  you can safely ignore this message.\n",
+      "***** Running Evaluation *****\n",
+      "  Num examples = 4709\n",
+      "  Batch size = 2\n",
+      "Saving model checkpoint to ./checkpoint-200\n",
+      "Configuration saved in ./checkpoint-200/config.json\n",
+      "Model weights saved in ./checkpoint-200/pytorch_model.bin\n",
+      "Feature extractor saved in ./checkpoint-200/preprocessor_config.json\n",
+      "Feature extractor saved in ./preprocessor_config.json\n",
+      "The following columns in the evaluation set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`,  you can safely ignore this message.\n",
+      "***** Running Evaluation *****\n",
+      "  Num examples = 4709\n",
+      "  Batch size = 2\n",
+      "Saving model checkpoint to ./checkpoint-400\n",
+      "Configuration saved in ./checkpoint-400/config.json\n",
+      "Model weights saved in ./checkpoint-400/pytorch_model.bin\n",
+      "Feature extractor saved in ./checkpoint-400/preprocessor_config.json\n",
+      "Feature extractor saved in ./preprocessor_config.json\n",
+      "The following columns in the evaluation set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`,  you can safely ignore this message.\n",
+      "***** Running Evaluation *****\n",
+      "  Num examples = 4709\n",
+      "  Batch size = 2\n",
+      "Saving model checkpoint to ./checkpoint-600\n",
+      "Configuration saved in ./checkpoint-600/config.json\n",
+      "Model weights saved in ./checkpoint-600/pytorch_model.bin\n",
+      "Feature extractor saved in ./checkpoint-600/preprocessor_config.json\n",
+      "Feature extractor saved in ./preprocessor_config.json\n"
      ]
     }
    ],
     "    \"dataset_tags\": \"mozilla-foundation/common_voice_11_0\",\n",
     "    \"dataset\": \"mozilla-foundation/common_voice_11_0\",  # a 'pretty' name for the training dataset\n",
     "    \"language\": \"zh-TW\",\n",
+    "    \"model_name\": \"Whisper Medium TW - Augmented\",  # a 'pretty' name for your model\n",
     "    \"finetuned_from\": \"openai/whisper-medium\",\n",
     "    \"tasks\": \"automatic-speech-recognition\",\n",
     "    \"tags\": \"whisper-event\",\n",

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c59231bec50bf7e63c665d3feb4951821ecb964b915a1be521482c50b1e826e8
 size 3055754841

 version https://git-lfs.github.com/spec/v1
+oid sha256:6411d501e4303ddf9b86289a5d217422eb9512dbcc64a08c30ef1eb0eacffd82
 size 3055754841

runs/Dec20_16-48-49_DANDAN/events.out.tfevents.1671526137.DANDAN.29004.0 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bdd159d93587e66eb3149250140df4f3a7bf7e881249d76236bd2e9630445a9d
-size 7418

 version https://git-lfs.github.com/spec/v1
+oid sha256:79731c0e98643c8665dadac50b68886f182f977725bc91193254b8e577a064f0
+size 8992