Training in progress, step 200

Browse files

Files changed (6) hide show

.ipynb_checkpoints/fine-tune-whisper-streaming-checkpoint.ipynb +311 -144
fine-tune-whisper-streaming.ipynb +46 -155
pytorch_model.bin +1 -1
runs/Dec12_04-37-47_150-136-44-233/1670819878.783822/events.out.tfevents.1670819878.150-136-44-233.69039.1 +3 -0
runs/Dec12_04-37-47_150-136-44-233/events.out.tfevents.1670819878.150-136-44-233.69039.0 +3 -0
training_args.bin +1 -1

.ipynb_checkpoints/fine-tune-whisper-streaming-checkpoint.ipynb CHANGED Viewed

@@ -108,7 +108,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
    "id": "065a8cf7-e54f-4ac3-900e-609c80714fca",
    "metadata": {},
    "outputs": [],
@@ -142,7 +142,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
    "id": "a2787582-554f-44ce-9f38-4180a5ed6b44",
    "metadata": {},
    "outputs": [],
@@ -151,7 +151,7 @@
     "\n",
     "raw_datasets = IterableDatasetDict()\n",
     "\n",
-    "raw_datasets[\"train\"] = load_streaming_dataset(\"mozilla-foundation/common_voice_11_0\", \"ja\", split=\"train\", use_auth_token=True)  # set split=\"train+validation\" for low-resource\n",
     "raw_datasets[\"test\"] = load_streaming_dataset(\"mozilla-foundation/common_voice_11_0\", \"ja\", split=\"test\", use_auth_token=True)"
    ]
   },
@@ -185,109 +185,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
    "id": "77d9f0c5-8607-4642-a8ac-c3ab2e223ea6",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "ab8ef1fb2f284e2abd43a1b1bde55882",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Downloading:   0%|          | 0.00/185k [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "e0c2142f48224f1582e6457dbb8e5276",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Downloading:   0%|          | 0.00/829 [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "55aa8ea93e924389b339aefec864805d",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Downloading:   0%|          | 0.00/1.04M [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "5cc4483a4d234f73914d26f285588949",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Downloading:   0%|          | 0.00/494k [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "806dfeffeb1a4d6ba3a042cadee13450",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Downloading:   0%|          | 0.00/52.7k [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "b93cdf2091424615927adaefb032132f",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Downloading:   0%|          | 0.00/2.11k [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "cdb5621656934de2a60214f67530212c",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Downloading:   0%|          | 0.00/2.06k [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
    "source": [
     "from transformers import WhisperProcessor\n",
     "\n",
@@ -312,7 +213,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
    "id": "ab5a13b4-9bd4-4aa0-aef2-b3de9b762988",
    "metadata": {},
    "outputs": [
@@ -332,7 +233,7 @@
        " 'segment': Value(dtype='string', id=None)}"
       ]
      },
-     "execution_count": 5,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -358,7 +259,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
    "id": "3ab6a724-3d1e-478b-a9e9-d2f85feb6c39",
    "metadata": {},
    "outputs": [],
@@ -378,7 +279,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
    "id": "d041650e-1c48-4439-87b3-5b6f4a514107",
    "metadata": {},
    "outputs": [],
@@ -405,7 +306,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
    "id": "c085911c-a10a-41ef-8874-306e0503e9bb",
    "metadata": {},
    "outputs": [],
@@ -441,7 +342,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
    "id": "a37a7cdb-9013-427f-8de9-6a8d0e9dc684",
    "metadata": {},
    "outputs": [],
@@ -459,7 +360,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
    "id": "1b145699-acfc-4b1d-93a2-a2ad3d62674c",
    "metadata": {},
    "outputs": [],
@@ -480,7 +381,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
    "id": "01cb25ef-4bb0-4325-9461-f59198acadf6",
    "metadata": {},
    "outputs": [],
@@ -501,7 +402,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
    "id": "333f7f6e-6053-4d3b-8924-c733c79b82ac",
    "metadata": {},
    "outputs": [],
@@ -571,7 +472,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
    "id": "8326221e-ec13-4731-bb4e-51e5fc1486c5",
    "metadata": {},
    "outputs": [],
@@ -619,7 +520,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
    "id": "fc834702-c0d3-4a96-b101-7b87be32bf42",
    "metadata": {},
    "outputs": [],
@@ -646,14 +547,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
    "id": "b22b4011-f31f-4b57-b684-c52332f92890",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "737faa61d325424ba4b395c4aeb9a58f",
        "version_major": 2,
        "version_minor": 0
       },
@@ -690,7 +591,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
    "id": "a11d1bfc-9e28-460f-a287-72d8f7bc1acb",
    "metadata": {},
    "outputs": [],
@@ -740,14 +641,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
    "id": "5a10cc4b-07ec-4ebd-ac1d-7c601023594f",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "a2e3bfa2e47241f193b2e9be28e184b4",
        "version_major": 2,
        "version_minor": 0
       },
@@ -761,7 +662,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "22bccf0b5b46459ebfbdb17f4641b800",
        "version_major": 2,
        "version_minor": 0
       },
@@ -789,7 +690,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
    "id": "62038ba3-88ed-4fce-84db-338f50dcd04f",
    "metadata": {},
    "outputs": [],
@@ -817,7 +718,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
    "id": "0ae3e9af-97b7-4aa0-ae85-20b23b5bcb3a",
    "metadata": {},
    "outputs": [],
@@ -829,16 +730,16 @@
     "    per_device_train_batch_size=64,\n",
     "    gradient_accumulation_steps=1,  # increase by 2x for every 2x decrease in batch size\n",
     "    learning_rate=1e-5,\n",
-    "    warmup_steps=500,\n",
-    "    max_steps=5000,\n",
     "    gradient_checkpointing=True,\n",
     "    fp16=True,\n",
     "    evaluation_strategy=\"steps\",\n",
     "    per_device_eval_batch_size=8,\n",
     "    predict_with_generate=True,\n",
     "    generation_max_length=225,\n",
-    "    save_steps=1000,\n",
-    "    eval_steps=1000,\n",
     "    logging_steps=25,\n",
     "    report_to=[\"tensorboard\"],\n",
     "    load_best_model_at_end=True,\n",
@@ -867,7 +768,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
    "id": "3ac16b62-b3c0-4c68-8f3d-9ecf471534b2",
    "metadata": {},
    "outputs": [],
@@ -896,7 +797,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
    "id": "d546d7fe-0543-479a-b708-2ebabec19493",
    "metadata": {},
    "outputs": [
@@ -935,7 +836,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
    "id": "a1ccb9ed-cbc8-4419-91c0-651e9424b672",
    "metadata": {},
    "outputs": [
@@ -992,14 +893,15 @@
       "/home/ubuntu/.venv/lib/python3.8/site-packages/transformers/optimization.py:306: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
       "  warnings.warn(\n",
       "***** Running training *****\n",
-      "  Num examples = 320000\n",
       "  Num Epochs = 9223372036854775807\n",
       "  Instantaneous batch size per device = 64\n",
       "  Total train batch size (w. parallel, distributed & accumulation) = 64\n",
       "  Gradient Accumulation steps = 1\n",
-      "  Total optimization steps = 5000\n",
       "  Number of trainable parameters = 241734912\n",
-      "Reading metadata...: 6505it [00:00, 21991.51it/s]\n",
       "The following columns in the training set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`,  you can safely ignore this message.\n"
      ]
     },
@@ -1009,8 +911,8 @@
        "\n",
        "    <div>\n",
        "      \n",
-       "      <progress value='424' max='5000' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
-       "      [ 424/5000 1:07:10 < 12:08:22, 0.10 it/s, Epoch 4.00/9223372036854775807]\n",
        "    </div>\n",
        "    <table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
@@ -1035,10 +937,13 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "Reading metadata...: 6505it [00:00, 24574.42it/s]\n",
-      "Reading metadata...: 6505it [00:00, 24420.15it/s]\n",
-      "Reading metadata...: 6505it [00:00, 36254.85it/s]\n",
-      "Reading metadata...: 6505it [00:00, 30794.80it/s]\n"
      ]
     }
    ],
@@ -1068,7 +973,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "id": "6dd0e310-9b07-4133-ac14-2ed2d7524e22",
    "metadata": {},
    "outputs": [],
@@ -1094,20 +999,282 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "id": "95737cda-c5dd-4887-a4d0-dfcb0d61d977",
    "metadata": {},
-   "outputs": [],
    "source": [
     "trainer.push_to_hub(**kwargs)"
    ]
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "hf",
    "language": "python",
-   "name": "hf"
   },
   "language_info": {
    "codemirror_mode": {

   },
   {
    "cell_type": "code",
+   "execution_count": 5,
    "id": "065a8cf7-e54f-4ac3-900e-609c80714fca",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 6,
    "id": "a2787582-554f-44ce-9f38-4180a5ed6b44",
    "metadata": {},
    "outputs": [],
     "\n",
     "raw_datasets = IterableDatasetDict()\n",
     "\n",
+    "raw_datasets[\"train\"] = load_streaming_dataset(\"mozilla-foundation/common_voice_11_0\", \"ja\", split=\"train+validation\", use_auth_token=True)  # set split=\"train+validation\" for low-resource\n",
     "raw_datasets[\"test\"] = load_streaming_dataset(\"mozilla-foundation/common_voice_11_0\", \"ja\", split=\"test\", use_auth_token=True)"
    ]
   },
   },
   {
    "cell_type": "code",
+   "execution_count": 7,
    "id": "77d9f0c5-8607-4642-a8ac-c3ab2e223ea6",
    "metadata": {},
+   "outputs": [],
    "source": [
     "from transformers import WhisperProcessor\n",
     "\n",
   },
   {
    "cell_type": "code",
+   "execution_count": 8,
    "id": "ab5a13b4-9bd4-4aa0-aef2-b3de9b762988",
    "metadata": {},
    "outputs": [
        " 'segment': Value(dtype='string', id=None)}"
       ]
      },
+     "execution_count": 8,
      "metadata": {},
      "output_type": "execute_result"
     }
   },
   {
    "cell_type": "code",
+   "execution_count": 9,
    "id": "3ab6a724-3d1e-478b-a9e9-d2f85feb6c39",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 10,
    "id": "d041650e-1c48-4439-87b3-5b6f4a514107",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 11,
    "id": "c085911c-a10a-41ef-8874-306e0503e9bb",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 12,
    "id": "a37a7cdb-9013-427f-8de9-6a8d0e9dc684",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 13,
    "id": "1b145699-acfc-4b1d-93a2-a2ad3d62674c",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 14,
    "id": "01cb25ef-4bb0-4325-9461-f59198acadf6",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 15,
    "id": "333f7f6e-6053-4d3b-8924-c733c79b82ac",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 16,
    "id": "8326221e-ec13-4731-bb4e-51e5fc1486c5",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 17,
    "id": "fc834702-c0d3-4a96-b101-7b87be32bf42",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 18,
    "id": "b22b4011-f31f-4b57-b684-c52332f92890",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
+       "model_id": "bffdd7b1fed44295954d9eed41a9cfd5",
        "version_major": 2,
        "version_minor": 0
       },
   },
   {
    "cell_type": "code",
+   "execution_count": 19,
    "id": "a11d1bfc-9e28-460f-a287-72d8f7bc1acb",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 20,
    "id": "5a10cc4b-07ec-4ebd-ac1d-7c601023594f",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
+       "model_id": "48fee2fd3b2a4a67b3a35666fda4dfe9",
        "version_major": 2,
        "version_minor": 0
       },
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
+       "model_id": "51cdba284e8f44318868fbd013970280",
        "version_major": 2,
        "version_minor": 0
       },
   },
   {
    "cell_type": "code",
+   "execution_count": 21,
    "id": "62038ba3-88ed-4fce-84db-338f50dcd04f",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 22,
    "id": "0ae3e9af-97b7-4aa0-ae85-20b23b5bcb3a",
    "metadata": {},
    "outputs": [],
     "    per_device_train_batch_size=64,\n",
     "    gradient_accumulation_steps=1,  # increase by 2x for every 2x decrease in batch size\n",
     "    learning_rate=1e-5,\n",
+    "    warmup_steps=200,\n",
+    "    max_steps=1000,\n",
     "    gradient_checkpointing=True,\n",
     "    fp16=True,\n",
     "    evaluation_strategy=\"steps\",\n",
     "    per_device_eval_batch_size=8,\n",
     "    predict_with_generate=True,\n",
     "    generation_max_length=225,\n",
+    "    save_steps=200,\n",
+    "    eval_steps=200,\n",
     "    logging_steps=25,\n",
     "    report_to=[\"tensorboard\"],\n",
     "    load_best_model_at_end=True,\n",
   },
   {
    "cell_type": "code",
+   "execution_count": 23,
    "id": "3ac16b62-b3c0-4c68-8f3d-9ecf471534b2",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 24,
    "id": "d546d7fe-0543-479a-b708-2ebabec19493",
    "metadata": {},
    "outputs": [
   },
   {
    "cell_type": "code",
+   "execution_count": 25,
    "id": "a1ccb9ed-cbc8-4419-91c0-651e9424b672",
    "metadata": {},
    "outputs": [
       "/home/ubuntu/.venv/lib/python3.8/site-packages/transformers/optimization.py:306: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
       "  warnings.warn(\n",
       "***** Running training *****\n",
+      "  Num examples = 64000\n",
       "  Num Epochs = 9223372036854775807\n",
       "  Instantaneous batch size per device = 64\n",
       "  Total train batch size (w. parallel, distributed & accumulation) = 64\n",
       "  Gradient Accumulation steps = 1\n",
+      "  Total optimization steps = 1000\n",
       "  Number of trainable parameters = 241734912\n",
+      "Reading metadata...: 6505it [00:00, 31331.40it/s]\n",
+      "Reading metadata...: 4485it [00:00, 41376.86it/s]\n",
       "The following columns in the training set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`,  you can safely ignore this message.\n"
      ]
     },
        "\n",
        "    <div>\n",
        "      \n",
+       "      <progress value='201' max='1000' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
+       "      [ 201/1000 22:31 < 1:30:27, 0.15 it/s, Epoch 1.06/9223372036854775807]\n",
        "    </div>\n",
        "    <table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
      "name": "stderr",
      "output_type": "stream",
      "text": [
+      "Reading metadata...: 6505it [00:00, 64162.65it/s]\n",
+      "Reading metadata...: 4485it [00:00, 27834.06it/s]\n",
+      "***** Running Evaluation *****\n",
+      "  Num examples: Unknown\n",
+      "  Batch size = 8\n",
+      "Reading metadata...: 4604it [00:00, 27155.92it/s]\n",
+      "The following columns in the evaluation set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`,  you can safely ignore this message.\n"
      ]
     }
    ],
   },
   {
    "cell_type": "code",
+   "execution_count": 24,
    "id": "6dd0e310-9b07-4133-ac14-2ed2d7524e22",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 31,
    "id": "95737cda-c5dd-4887-a4d0-dfcb0d61d977",
    "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Saving model checkpoint to ./\n",
+      "Configuration saved in ./config.json\n",
+      "Model weights saved in ./pytorch_model.bin\n",
+      "Feature extractor saved in ./preprocessor_config.json\n",
+      "tokenizer config file saved in ./tokenizer_config.json\n",
+      "Special tokens file saved in ./special_tokens_map.json\n",
+      "added tokens file saved in ./added_tokens.json\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "695c170663c94560a567be198b7181ff",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Upload file runs/Dec10_16-23-25_129-213-27-84/1670689420.7830398/events.out.tfevents.1670689420.129-213-27-84.…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "2318836d6dd3405fabafca4370232e34",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Upload file training_args.bin: 100%|##########| 3.50k/3.50k [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "9b673eb134984bdda227d23929b66479",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Upload file runs/Dec10_16-23-25_129-213-27-84/events.out.tfevents.1670689420.129-213-27-84.69598.2: 100%|#####…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "remote: Scanning LFS files for validity, may be slow...        \n",
+      "remote: LFS file scan complete.        \n",
+      "To https://huggingface.co/kimbochen/whisper-small-jp\n",
+      "   3a44fa5..05da956  main -> main\n",
+      "\n",
+      "To https://huggingface.co/kimbochen/whisper-small-jp\n",
+      "   05da956..30906c5  main -> main\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "'https://huggingface.co/kimbochen/whisper-small-jp/commit/05da956fdc97e7c01112f45c20e56c8f6a127502'"
+      ]
+     },
+     "execution_count": 31,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "trainer.push_to_hub(**kwargs)"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "id": "4df1603c-ef35-40f1-ae57-3214441073c8",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "PyTorch: setting up devices\n"
+     ]
+    }
+   ],
+   "source": [
+    "training_args = Seq2SeqTrainingArguments(\n",
+    "    output_dir=\"./\",\n",
+    "    per_device_train_batch_size=64,\n",
+    "    gradient_accumulation_steps=1,  # increase by 2x for every 2x decrease in batch size\n",
+    "    learning_rate=1e-5,\n",
+    "    max_steps=1000,\n",
+    "    num_train_epochs=-1,\n",
+    "    gradient_checkpointing=True,\n",
+    "    fp16=True,\n",
+    "    evaluation_strategy=\"steps\",\n",
+    "    per_device_eval_batch_size=8,\n",
+    "    predict_with_generate=True,\n",
+    "    generation_max_length=225,\n",
+    "    save_steps=1000,\n",
+    "    eval_steps=1000,\n",
+    "    logging_steps=25,\n",
+    "    report_to=[\"tensorboard\"],\n",
+    "    load_best_model_at_end=True,\n",
+    "    metric_for_best_model=\"wer\",\n",
+    "    greater_is_better=False,\n",
+    "    push_to_hub=True,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "id": "afc2b554-7171-48c7-95aa-b7e61b70ab20",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/ubuntu/whisper-small-jp/./ is already a clone of https://huggingface.co/kimbochen/whisper-small-jp. Make sure you pull the latest changes with `repo.git_pull()`.\n",
+      "max_steps is given, it will override any value given in num_train_epochs\n",
+      "Using cuda_amp half precision backend\n"
+     ]
+    }
+   ],
+   "source": [
+    "trainer = Seq2SeqTrainer(\n",
+    "    args=training_args,\n",
+    "    model=model,\n",
+    "    train_dataset=vectorized_datasets[\"train\"],\n",
+    "    eval_dataset=vectorized_datasets[\"test\"],\n",
+    "    data_collator=data_collator,\n",
+    "    compute_metrics=compute_metrics,\n",
+    "    tokenizer=processor,\n",
+    "    callbacks=[ShuffleCallback()],\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "id": "b029a1d8-24de-46e7-b067-0f900b1db342",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Loading model from checkpoint-4000.\n",
+      "/home/ubuntu/.venv/lib/python3.8/site-packages/transformers/optimization.py:306: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
+      "  warnings.warn(\n",
+      "***** Running training *****\n",
+      "  Num examples = 64000\n",
+      "  Num Epochs = 9223372036854775807\n",
+      "  Instantaneous batch size per device = 64\n",
+      "  Total train batch size (w. parallel, distributed & accumulation) = 64\n",
+      "  Gradient Accumulation steps = 1\n",
+      "  Total optimization steps = 1000\n",
+      "  Number of trainable parameters = 241734912\n",
+      "  Continuing training from checkpoint, will skip to saved global_step\n",
+      "  Continuing training from epoch 4\n",
+      "  Continuing training from global step 4000\n",
+      "  Will skip the first 4 epochs then the first 0 batches in the first epoch. If this takes a lot of time, you can add the `--ignore_data_skip` flag to your launch command, but you will resume the training on data already seen by your model.\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "01337298313740d98d3cc75b6d5e3ff7",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "0it [00:00, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Reading metadata...: 0it [00:00, ?it/s]\u001b[A\n",
+      "Reading metadata...: 6505it [00:00, 34246.80it/s]\n",
+      "The following columns in the training set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`,  you can safely ignore this message.\n",
+      "\n",
+      "Reading metadata...: 6505it [00:00, 84823.64it/s]\n",
+      "\n",
+      "Reading metadata...: 6505it [00:00, 88617.62it/s]\n",
+      "\n",
+      "Reading metadata...: 6505it [00:00, 90289.78it/s]\n",
+      "\n",
+      "Reading metadata...: 6505it [00:00, 91816.92it/s]\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "\n",
+       "    <div>\n",
+       "      \n",
+       "      <progress value='4001' max='1000' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
+       "      [1000/1000 00:00, Epoch 4/9223372036854775807]\n",
+       "    </div>\n",
+       "    <table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       " <tr style=\"text-align: left;\">\n",
+       "      <th>Step</th>\n",
+       "      <th>Training Loss</th>\n",
+       "      <th>Validation Loss</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "  </tbody>\n",
+       "</table><p>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Training completed. Do not forget to share your model on huggingface.co/models =)\n",
+      "\n",
+      "\n",
+      "Loading best model from ./checkpoint-4000 (score: 88.31039863810469).\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "TrainOutput(global_step=4001, training_loss=8.343380785802548e-08, metrics={'train_runtime': 169.0541, 'train_samples_per_second': 378.577, 'train_steps_per_second': 5.915, 'total_flos': 7.363747084345344e+19, 'train_loss': 8.343380785802548e-08, 'epoch': 4.0})"
+      ]
+     },
+     "execution_count": 30,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "trainer.train(\"checkpoint-4000\")"
+   ]
   }
  ],
  "metadata": {
   "kernelspec": {
+   "display_name": "wspsr",
    "language": "python",
+   "name": "wspsr"
   },
   "language_info": {
    "codemirror_mode": {

fine-tune-whisper-streaming.ipynb CHANGED Viewed

@@ -108,7 +108,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
    "id": "065a8cf7-e54f-4ac3-900e-609c80714fca",
    "metadata": {},
    "outputs": [],
@@ -142,7 +142,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
    "id": "a2787582-554f-44ce-9f38-4180a5ed6b44",
    "metadata": {},
    "outputs": [],
@@ -151,7 +151,7 @@
     "\n",
     "raw_datasets = IterableDatasetDict()\n",
     "\n",
-    "raw_datasets[\"train\"] = load_streaming_dataset(\"mozilla-foundation/common_voice_11_0\", \"ja\", split=\"train\", use_auth_token=True)  # set split=\"train+validation\" for low-resource\n",
     "raw_datasets[\"test\"] = load_streaming_dataset(\"mozilla-foundation/common_voice_11_0\", \"ja\", split=\"test\", use_auth_token=True)"
    ]
   },
@@ -185,109 +185,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
    "id": "77d9f0c5-8607-4642-a8ac-c3ab2e223ea6",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "ab8ef1fb2f284e2abd43a1b1bde55882",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Downloading:   0%|          | 0.00/185k [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "e0c2142f48224f1582e6457dbb8e5276",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Downloading:   0%|          | 0.00/829 [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "55aa8ea93e924389b339aefec864805d",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Downloading:   0%|          | 0.00/1.04M [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "5cc4483a4d234f73914d26f285588949",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Downloading:   0%|          | 0.00/494k [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "806dfeffeb1a4d6ba3a042cadee13450",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Downloading:   0%|          | 0.00/52.7k [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "b93cdf2091424615927adaefb032132f",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Downloading:   0%|          | 0.00/2.11k [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "cdb5621656934de2a60214f67530212c",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Downloading:   0%|          | 0.00/2.06k [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
    "source": [
     "from transformers import WhisperProcessor\n",
     "\n",
@@ -312,7 +213,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
    "id": "ab5a13b4-9bd4-4aa0-aef2-b3de9b762988",
    "metadata": {},
    "outputs": [
@@ -332,7 +233,7 @@
        " 'segment': Value(dtype='string', id=None)}"
       ]
      },
-     "execution_count": 5,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -358,7 +259,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
    "id": "3ab6a724-3d1e-478b-a9e9-d2f85feb6c39",
    "metadata": {},
    "outputs": [],
@@ -378,7 +279,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
    "id": "d041650e-1c48-4439-87b3-5b6f4a514107",
    "metadata": {},
    "outputs": [],
@@ -405,7 +306,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
    "id": "c085911c-a10a-41ef-8874-306e0503e9bb",
    "metadata": {},
    "outputs": [],
@@ -441,7 +342,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
    "id": "a37a7cdb-9013-427f-8de9-6a8d0e9dc684",
    "metadata": {},
    "outputs": [],
@@ -459,7 +360,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
    "id": "1b145699-acfc-4b1d-93a2-a2ad3d62674c",
    "metadata": {},
    "outputs": [],
@@ -480,7 +381,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
    "id": "01cb25ef-4bb0-4325-9461-f59198acadf6",
    "metadata": {},
    "outputs": [],
@@ -501,7 +402,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
    "id": "333f7f6e-6053-4d3b-8924-c733c79b82ac",
    "metadata": {},
    "outputs": [],
@@ -571,7 +472,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
    "id": "8326221e-ec13-4731-bb4e-51e5fc1486c5",
    "metadata": {},
    "outputs": [],
@@ -619,7 +520,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
    "id": "fc834702-c0d3-4a96-b101-7b87be32bf42",
    "metadata": {},
    "outputs": [],
@@ -646,14 +547,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
    "id": "b22b4011-f31f-4b57-b684-c52332f92890",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "737faa61d325424ba4b395c4aeb9a58f",
        "version_major": 2,
        "version_minor": 0
       },
@@ -690,7 +591,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
    "id": "a11d1bfc-9e28-460f-a287-72d8f7bc1acb",
    "metadata": {},
    "outputs": [],
@@ -740,14 +641,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
    "id": "5a10cc4b-07ec-4ebd-ac1d-7c601023594f",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "a2e3bfa2e47241f193b2e9be28e184b4",
        "version_major": 2,
        "version_minor": 0
       },
@@ -761,7 +662,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "22bccf0b5b46459ebfbdb17f4641b800",
        "version_major": 2,
        "version_minor": 0
       },
@@ -789,7 +690,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
    "id": "62038ba3-88ed-4fce-84db-338f50dcd04f",
    "metadata": {},
    "outputs": [],
@@ -817,7 +718,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
    "id": "0ae3e9af-97b7-4aa0-ae85-20b23b5bcb3a",
    "metadata": {},
    "outputs": [],
@@ -829,16 +730,16 @@
     "    per_device_train_batch_size=64,\n",
     "    gradient_accumulation_steps=1,  # increase by 2x for every 2x decrease in batch size\n",
     "    learning_rate=1e-5,\n",
-    "    warmup_steps=500,\n",
-    "    max_steps=5000,\n",
     "    gradient_checkpointing=True,\n",
     "    fp16=True,\n",
     "    evaluation_strategy=\"steps\",\n",
     "    per_device_eval_batch_size=8,\n",
     "    predict_with_generate=True,\n",
     "    generation_max_length=225,\n",
-    "    save_steps=1000,\n",
-    "    eval_steps=1000,\n",
     "    logging_steps=25,\n",
     "    report_to=[\"tensorboard\"],\n",
     "    load_best_model_at_end=True,\n",
@@ -867,7 +768,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
    "id": "3ac16b62-b3c0-4c68-8f3d-9ecf471534b2",
    "metadata": {},
    "outputs": [],
@@ -896,7 +797,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
    "id": "d546d7fe-0543-479a-b708-2ebabec19493",
    "metadata": {},
    "outputs": [
@@ -935,7 +836,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
    "id": "a1ccb9ed-cbc8-4419-91c0-651e9424b672",
    "metadata": {},
    "outputs": [
@@ -992,14 +893,15 @@
       "/home/ubuntu/.venv/lib/python3.8/site-packages/transformers/optimization.py:306: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
       "  warnings.warn(\n",
       "***** Running training *****\n",
-      "  Num examples = 320000\n",
       "  Num Epochs = 9223372036854775807\n",
       "  Instantaneous batch size per device = 64\n",
       "  Total train batch size (w. parallel, distributed & accumulation) = 64\n",
       "  Gradient Accumulation steps = 1\n",
-      "  Total optimization steps = 5000\n",
       "  Number of trainable parameters = 241734912\n",
-      "Reading metadata...: 6505it [00:00, 21991.51it/s]\n",
       "The following columns in the training set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`,  you can safely ignore this message.\n"
      ]
     },
@@ -1009,8 +911,8 @@
        "\n",
        "    <div>\n",
        "      \n",
-       "      <progress value='2231' max='5000' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
-       "      [2231/5000 6:49:21 < 8:28:31, 0.09 it/s, Epoch 21.02/9223372036854775807]\n",
        "    </div>\n",
        "    <table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
@@ -1018,22 +920,9 @@
        "      <th>Step</th>\n",
        "      <th>Training Loss</th>\n",
        "      <th>Validation Loss</th>\n",
-       "      <th>Wer</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
-       "    <tr>\n",
-       "      <td>1000</td>\n",
-       "      <td>0.006600</td>\n",
-       "      <td>0.468024</td>\n",
-       "      <td>90.537665</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>2000</td>\n",
-       "      <td>0.003000</td>\n",
-       "      <td>0.512834</td>\n",
-       "      <td>89.360193</td>\n",
-       "    </tr>\n",
        "  </tbody>\n",
        "</table><p>"
       ],
@@ -1048,11 +937,13 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "Reading metadata...: 6505it [00:00, 24574.42it/s]\n",
-      "Reading metadata...: 6505it [00:00, 24420.15it/s]\n",
-      "Reading metadata...: 6505it [00:00, 36254.85it/s]\n",
-      "Reading metadata...: 6505it [00:00, 30794.80it/s]\n",
-      "Reading metadata...: 6505it [00:00, 27712.44it/s]\n"
      ]
     }
    ],
@@ -1381,9 +1272,9 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "hf",
    "language": "python",
-   "name": "hf"
   },
   "language_info": {
    "codemirror_mode": {

   },
   {
    "cell_type": "code",
+   "execution_count": 5,
    "id": "065a8cf7-e54f-4ac3-900e-609c80714fca",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 6,
    "id": "a2787582-554f-44ce-9f38-4180a5ed6b44",
    "metadata": {},
    "outputs": [],
     "\n",
     "raw_datasets = IterableDatasetDict()\n",
     "\n",
+    "raw_datasets[\"train\"] = load_streaming_dataset(\"mozilla-foundation/common_voice_11_0\", \"ja\", split=\"train+validation\", use_auth_token=True)  # set split=\"train+validation\" for low-resource\n",
     "raw_datasets[\"test\"] = load_streaming_dataset(\"mozilla-foundation/common_voice_11_0\", \"ja\", split=\"test\", use_auth_token=True)"
    ]
   },
   },
   {
    "cell_type": "code",
+   "execution_count": 7,
    "id": "77d9f0c5-8607-4642-a8ac-c3ab2e223ea6",
    "metadata": {},
+   "outputs": [],
    "source": [
     "from transformers import WhisperProcessor\n",
     "\n",
   },
   {
    "cell_type": "code",
+   "execution_count": 8,
    "id": "ab5a13b4-9bd4-4aa0-aef2-b3de9b762988",
    "metadata": {},
    "outputs": [
        " 'segment': Value(dtype='string', id=None)}"
       ]
      },
+     "execution_count": 8,
      "metadata": {},
      "output_type": "execute_result"
     }
   },
   {
    "cell_type": "code",
+   "execution_count": 9,
    "id": "3ab6a724-3d1e-478b-a9e9-d2f85feb6c39",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 10,
    "id": "d041650e-1c48-4439-87b3-5b6f4a514107",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 11,
    "id": "c085911c-a10a-41ef-8874-306e0503e9bb",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 12,
    "id": "a37a7cdb-9013-427f-8de9-6a8d0e9dc684",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 13,
    "id": "1b145699-acfc-4b1d-93a2-a2ad3d62674c",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 14,
    "id": "01cb25ef-4bb0-4325-9461-f59198acadf6",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 15,
    "id": "333f7f6e-6053-4d3b-8924-c733c79b82ac",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 16,
    "id": "8326221e-ec13-4731-bb4e-51e5fc1486c5",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 17,
    "id": "fc834702-c0d3-4a96-b101-7b87be32bf42",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 18,
    "id": "b22b4011-f31f-4b57-b684-c52332f92890",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
+       "model_id": "bffdd7b1fed44295954d9eed41a9cfd5",
        "version_major": 2,
        "version_minor": 0
       },
   },
   {
    "cell_type": "code",
+   "execution_count": 19,
    "id": "a11d1bfc-9e28-460f-a287-72d8f7bc1acb",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 20,
    "id": "5a10cc4b-07ec-4ebd-ac1d-7c601023594f",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
+       "model_id": "48fee2fd3b2a4a67b3a35666fda4dfe9",
        "version_major": 2,
        "version_minor": 0
       },
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
+       "model_id": "51cdba284e8f44318868fbd013970280",
        "version_major": 2,
        "version_minor": 0
       },
   },
   {
    "cell_type": "code",
+   "execution_count": 21,
    "id": "62038ba3-88ed-4fce-84db-338f50dcd04f",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 22,
    "id": "0ae3e9af-97b7-4aa0-ae85-20b23b5bcb3a",
    "metadata": {},
    "outputs": [],
     "    per_device_train_batch_size=64,\n",
     "    gradient_accumulation_steps=1,  # increase by 2x for every 2x decrease in batch size\n",
     "    learning_rate=1e-5,\n",
+    "    warmup_steps=200,\n",
+    "    max_steps=1000,\n",
     "    gradient_checkpointing=True,\n",
     "    fp16=True,\n",
     "    evaluation_strategy=\"steps\",\n",
     "    per_device_eval_batch_size=8,\n",
     "    predict_with_generate=True,\n",
     "    generation_max_length=225,\n",
+    "    save_steps=200,\n",
+    "    eval_steps=200,\n",
     "    logging_steps=25,\n",
     "    report_to=[\"tensorboard\"],\n",
     "    load_best_model_at_end=True,\n",
   },
   {
    "cell_type": "code",
+   "execution_count": 23,
    "id": "3ac16b62-b3c0-4c68-8f3d-9ecf471534b2",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 24,
    "id": "d546d7fe-0543-479a-b708-2ebabec19493",
    "metadata": {},
    "outputs": [
   },
   {
    "cell_type": "code",
+   "execution_count": 25,
    "id": "a1ccb9ed-cbc8-4419-91c0-651e9424b672",
    "metadata": {},
    "outputs": [
       "/home/ubuntu/.venv/lib/python3.8/site-packages/transformers/optimization.py:306: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
       "  warnings.warn(\n",
       "***** Running training *****\n",
+      "  Num examples = 64000\n",
       "  Num Epochs = 9223372036854775807\n",
       "  Instantaneous batch size per device = 64\n",
       "  Total train batch size (w. parallel, distributed & accumulation) = 64\n",
       "  Gradient Accumulation steps = 1\n",
+      "  Total optimization steps = 1000\n",
       "  Number of trainable parameters = 241734912\n",
+      "Reading metadata...: 6505it [00:00, 31331.40it/s]\n",
+      "Reading metadata...: 4485it [00:00, 41376.86it/s]\n",
       "The following columns in the training set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`,  you can safely ignore this message.\n"
      ]
     },
        "\n",
        "    <div>\n",
        "      \n",
+       "      <progress value='201' max='1000' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
+       "      [ 201/1000 22:31 < 1:30:27, 0.15 it/s, Epoch 1.06/9223372036854775807]\n",
        "    </div>\n",
        "    <table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
        "      <th>Step</th>\n",
        "      <th>Training Loss</th>\n",
        "      <th>Validation Loss</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "  </tbody>\n",
        "</table><p>"
       ],
      "name": "stderr",
      "output_type": "stream",
      "text": [
+      "Reading metadata...: 6505it [00:00, 64162.65it/s]\n",
+      "Reading metadata...: 4485it [00:00, 27834.06it/s]\n",
+      "***** Running Evaluation *****\n",
+      "  Num examples: Unknown\n",
+      "  Batch size = 8\n",
+      "Reading metadata...: 4604it [00:00, 27155.92it/s]\n",
+      "The following columns in the evaluation set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`,  you can safely ignore this message.\n"
      ]
     }
    ],
  ],
  "metadata": {
   "kernelspec": {
+   "display_name": "wspsr",
    "language": "python",
+   "name": "wspsr"
   },
   "language_info": {
    "codemirror_mode": {

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b3246529f086b22124c7901ea81e50c3e83cfe22009b2ee44ddc94f5bea88d86
 size 967102601

 version https://git-lfs.github.com/spec/v1
+oid sha256:56c4b0bb4897d70e1953cf26927fc51e19cecc3225658657daa32a0c0d1e1cb0
 size 967102601

runs/Dec12_04-37-47_150-136-44-233/1670819878.783822/events.out.tfevents.1670819878.150-136-44-233.69039.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d13318210207986e1d4965c6206a303c2bcd72da40d33ba3b859c8e3111cf764
+size 5864

runs/Dec12_04-37-47_150-136-44-233/events.out.tfevents.1670819878.150-136-44-233.69039.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:da946657c9377166580c41662af45f086a478b101a9862b40eb5174e55e6f75a
+size 5844

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b15a0138008e0c490133c10ef48941adc9502d5f778b86dcc1d39f32d25062dc
 size 3579

 version https://git-lfs.github.com/spec/v1
+oid sha256:728d6cd7b154a86029fc38c737217977eb35dd910ed073d6628129742d876d7e
 size 3579