End of training
Browse files- fine-tune-whisper-non-streaming-zh-TW.ipynb +23 -14
- pytorch_model.bin +1 -1
fine-tune-whisper-non-streaming-zh-TW.ipynb
CHANGED
@@ -1106,11 +1106,11 @@
|
|
1106 |
},
|
1107 |
{
|
1108 |
"cell_type": "code",
|
1109 |
-
"execution_count":
|
1110 |
"id": "ee8b7b8e-1c9a-4d77-9137-1778a629e6de",
|
1111 |
"metadata": {
|
1112 |
"id": "ee8b7b8e-1c9a-4d77-9137-1778a629e6de",
|
1113 |
-
"scrolled":
|
1114 |
},
|
1115 |
"outputs": [
|
1116 |
{
|
@@ -1134,8 +1134,8 @@
|
|
1134 |
"\n",
|
1135 |
" <div>\n",
|
1136 |
" \n",
|
1137 |
-
" <progress value='
|
1138 |
-
" [
|
1139 |
" </div>\n",
|
1140 |
" <table border=\"1\" class=\"dataframe\">\n",
|
1141 |
" <thead>\n",
|
@@ -1207,6 +1207,22 @@
|
|
1207 |
"Feature extractor saved in ./checkpoint-600/preprocessor_config.json\n",
|
1208 |
"Feature extractor saved in ./preprocessor_config.json\n"
|
1209 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1210 |
}
|
1211 |
],
|
1212 |
"source": [
|
@@ -1225,7 +1241,7 @@
|
|
1225 |
},
|
1226 |
{
|
1227 |
"cell_type": "code",
|
1228 |
-
"execution_count":
|
1229 |
"id": "c704f91e-241b-48c9-b8e0-f0da396a9663",
|
1230 |
"metadata": {
|
1231 |
"id": "c704f91e-241b-48c9-b8e0-f0da396a9663"
|
@@ -1255,7 +1271,7 @@
|
|
1255 |
},
|
1256 |
{
|
1257 |
"cell_type": "code",
|
1258 |
-
"execution_count":
|
1259 |
"id": "d7030622-caf7-4039-939b-6195cdaa2585",
|
1260 |
"metadata": {
|
1261 |
"id": "d7030622-caf7-4039-939b-6195cdaa2585"
|
@@ -1266,14 +1282,7 @@
|
|
1266 |
"output_type": "stream",
|
1267 |
"text": [
|
1268 |
"Saving model checkpoint to ./\n",
|
1269 |
-
"Configuration saved in ./config.json\n"
|
1270 |
-
"Model weights saved in ./pytorch_model.bin\n",
|
1271 |
-
"Feature extractor saved in ./preprocessor_config.json\n",
|
1272 |
-
"Several commits (3) will be pushed upstream.\n",
|
1273 |
-
"The progress bars may be unreliable.\n",
|
1274 |
-
"To https://huggingface.co/Scrya/whisper-medium-ms-augmented\n",
|
1275 |
-
" 1684a4c..b46b3bd main -> main\n",
|
1276 |
-
"\n"
|
1277 |
]
|
1278 |
}
|
1279 |
],
|
|
|
1106 |
},
|
1107 |
{
|
1108 |
"cell_type": "code",
|
1109 |
+
"execution_count": 24,
|
1110 |
"id": "ee8b7b8e-1c9a-4d77-9137-1778a629e6de",
|
1111 |
"metadata": {
|
1112 |
"id": "ee8b7b8e-1c9a-4d77-9137-1778a629e6de",
|
1113 |
+
"scrolled": true
|
1114 |
},
|
1115 |
"outputs": [
|
1116 |
{
|
|
|
1134 |
"\n",
|
1135 |
" <div>\n",
|
1136 |
" \n",
|
1137 |
+
" <progress value='606' max='1000' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
|
1138 |
+
" [ 606/1000 4:07:06 < 2:41:11, 0.04 it/s, Epoch 1.72/3]\n",
|
1139 |
" </div>\n",
|
1140 |
" <table border=\"1\" class=\"dataframe\">\n",
|
1141 |
" <thead>\n",
|
|
|
1207 |
"Feature extractor saved in ./checkpoint-600/preprocessor_config.json\n",
|
1208 |
"Feature extractor saved in ./preprocessor_config.json\n"
|
1209 |
]
|
1210 |
+
},
|
1211 |
+
{
|
1212 |
+
"ename": "KeyboardInterrupt",
|
1213 |
+
"evalue": "",
|
1214 |
+
"output_type": "error",
|
1215 |
+
"traceback": [
|
1216 |
+
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
1217 |
+
"\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
|
1218 |
+
"Cell \u001b[0;32mIn[24], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mtrainer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtrain\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
|
1219 |
+
"File \u001b[0;32m~/whisper/lib/python3.8/site-packages/transformers/trainer.py:1535\u001b[0m, in \u001b[0;36mTrainer.train\u001b[0;34m(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)\u001b[0m\n\u001b[1;32m 1530\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel_wrapped \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel\n\u001b[1;32m 1532\u001b[0m inner_training_loop \u001b[38;5;241m=\u001b[39m find_executable_batch_size(\n\u001b[1;32m 1533\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_inner_training_loop, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_train_batch_size, args\u001b[38;5;241m.\u001b[39mauto_find_batch_size\n\u001b[1;32m 1534\u001b[0m )\n\u001b[0;32m-> 1535\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43minner_training_loop\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1536\u001b[0m \u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1537\u001b[0m \u001b[43m \u001b[49m\u001b[43mresume_from_checkpoint\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mresume_from_checkpoint\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1538\u001b[0m \u001b[43m \u001b[49m\u001b[43mtrial\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtrial\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1539\u001b[0m \u001b[43m \u001b[49m\u001b[43mignore_keys_for_eval\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mignore_keys_for_eval\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1540\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
|
1220 |
+
"File \u001b[0;32m~/whisper/lib/python3.8/site-packages/transformers/trainer.py:1783\u001b[0m, in \u001b[0;36mTrainer._inner_training_loop\u001b[0;34m(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)\u001b[0m\n\u001b[1;32m 1781\u001b[0m tr_loss_step \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtraining_step(model, inputs)\n\u001b[1;32m 1782\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1783\u001b[0m tr_loss_step \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtraining_step\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minputs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1785\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\n\u001b[1;32m 1786\u001b[0m args\u001b[38;5;241m.\u001b[39mlogging_nan_inf_filter\n\u001b[1;32m 1787\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m is_torch_tpu_available()\n\u001b[1;32m 1788\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m (torch\u001b[38;5;241m.\u001b[39misnan(tr_loss_step) \u001b[38;5;129;01mor\u001b[39;00m torch\u001b[38;5;241m.\u001b[39misinf(tr_loss_step))\n\u001b[1;32m 1789\u001b[0m ):\n\u001b[1;32m 1790\u001b[0m \u001b[38;5;66;03m# if loss is nan or inf simply add the average of previous logged losses\u001b[39;00m\n\u001b[1;32m 1791\u001b[0m tr_loss \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m tr_loss \u001b[38;5;241m/\u001b[39m (\u001b[38;5;241m1\u001b[39m \u001b[38;5;241m+\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate\u001b[38;5;241m.\u001b[39mglobal_step \u001b[38;5;241m-\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_globalstep_last_logged)\n",
|
1221 |
+
"File \u001b[0;32m~/whisper/lib/python3.8/site-packages/transformers/trainer.py:2541\u001b[0m, in \u001b[0;36mTrainer.training_step\u001b[0;34m(self, model, inputs)\u001b[0m\n\u001b[1;32m 2538\u001b[0m loss \u001b[38;5;241m=\u001b[39m loss \u001b[38;5;241m/\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs\u001b[38;5;241m.\u001b[39mgradient_accumulation_steps\n\u001b[1;32m 2540\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdo_grad_scaling:\n\u001b[0;32m-> 2541\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mscaler\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mscale\u001b[49m\u001b[43m(\u001b[49m\u001b[43mloss\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbackward\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 2542\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39muse_apex:\n\u001b[1;32m 2543\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m amp\u001b[38;5;241m.\u001b[39mscale_loss(loss, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moptimizer) \u001b[38;5;28;01mas\u001b[39;00m scaled_loss:\n",
|
1222 |
+
"File \u001b[0;32m~/whisper/lib/python3.8/site-packages/torch/_tensor.py:487\u001b[0m, in \u001b[0;36mTensor.backward\u001b[0;34m(self, gradient, retain_graph, create_graph, inputs)\u001b[0m\n\u001b[1;32m 477\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m has_torch_function_unary(\u001b[38;5;28mself\u001b[39m):\n\u001b[1;32m 478\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m handle_torch_function(\n\u001b[1;32m 479\u001b[0m Tensor\u001b[38;5;241m.\u001b[39mbackward,\n\u001b[1;32m 480\u001b[0m (\u001b[38;5;28mself\u001b[39m,),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 485\u001b[0m inputs\u001b[38;5;241m=\u001b[39minputs,\n\u001b[1;32m 486\u001b[0m )\n\u001b[0;32m--> 487\u001b[0m \u001b[43mtorch\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mautograd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbackward\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 488\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mgradient\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mretain_graph\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcreate_graph\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minputs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minputs\u001b[49m\n\u001b[1;32m 489\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
|
1223 |
+
"File \u001b[0;32m~/whisper/lib/python3.8/site-packages/torch/autograd/__init__.py:197\u001b[0m, in \u001b[0;36mbackward\u001b[0;34m(tensors, grad_tensors, retain_graph, create_graph, grad_variables, inputs)\u001b[0m\n\u001b[1;32m 192\u001b[0m retain_graph \u001b[38;5;241m=\u001b[39m create_graph\n\u001b[1;32m 194\u001b[0m \u001b[38;5;66;03m# The reason we repeat same the comment below is that\u001b[39;00m\n\u001b[1;32m 195\u001b[0m \u001b[38;5;66;03m# some Python versions print out the first line of a multi-line function\u001b[39;00m\n\u001b[1;32m 196\u001b[0m \u001b[38;5;66;03m# calls in the traceback and some print out the last line\u001b[39;00m\n\u001b[0;32m--> 197\u001b[0m \u001b[43mVariable\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_execution_engine\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrun_backward\u001b[49m\u001b[43m(\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# Calls into the C++ engine to run the backward pass\u001b[39;49;00m\n\u001b[1;32m 198\u001b[0m \u001b[43m \u001b[49m\u001b[43mtensors\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mgrad_tensors_\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mretain_graph\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcreate_graph\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minputs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 199\u001b[0m \u001b[43m \u001b[49m\u001b[43mallow_unreachable\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maccumulate_grad\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n",
|
1224 |
+
"\u001b[0;31mKeyboardInterrupt\u001b[0m: "
|
1225 |
+
]
|
1226 |
}
|
1227 |
],
|
1228 |
"source": [
|
|
|
1241 |
},
|
1242 |
{
|
1243 |
"cell_type": "code",
|
1244 |
+
"execution_count": 25,
|
1245 |
"id": "c704f91e-241b-48c9-b8e0-f0da396a9663",
|
1246 |
"metadata": {
|
1247 |
"id": "c704f91e-241b-48c9-b8e0-f0da396a9663"
|
|
|
1271 |
},
|
1272 |
{
|
1273 |
"cell_type": "code",
|
1274 |
+
"execution_count": null,
|
1275 |
"id": "d7030622-caf7-4039-939b-6195cdaa2585",
|
1276 |
"metadata": {
|
1277 |
"id": "d7030622-caf7-4039-939b-6195cdaa2585"
|
|
|
1282 |
"output_type": "stream",
|
1283 |
"text": [
|
1284 |
"Saving model checkpoint to ./\n",
|
1285 |
+
"Configuration saved in ./config.json\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1286 |
]
|
1287 |
}
|
1288 |
],
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3055754841
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2cb10116b77691d60116e9dcbd6abb617b0d87b0e3b40e1883895088c288a5cb
|
3 |
size 3055754841
|