Scrya commited on
Commit
7a6740e
1 Parent(s): f1dc7a1

End of training

Browse files
fine-tune-whisper-non-streaming-zh-TW.ipynb CHANGED
@@ -1106,11 +1106,11 @@
1106
  },
1107
  {
1108
  "cell_type": "code",
1109
- "execution_count": null,
1110
  "id": "ee8b7b8e-1c9a-4d77-9137-1778a629e6de",
1111
  "metadata": {
1112
  "id": "ee8b7b8e-1c9a-4d77-9137-1778a629e6de",
1113
- "scrolled": false
1114
  },
1115
  "outputs": [
1116
  {
@@ -1134,8 +1134,8 @@
1134
  "\n",
1135
  " <div>\n",
1136
  " \n",
1137
- " <progress value='601' max='1000' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
1138
- " [ 601/1000 3:16:05 < 2:10:37, 0.05 it/s, Epoch 1.70/3]\n",
1139
  " </div>\n",
1140
  " <table border=\"1\" class=\"dataframe\">\n",
1141
  " <thead>\n",
@@ -1207,6 +1207,22 @@
1207
  "Feature extractor saved in ./checkpoint-600/preprocessor_config.json\n",
1208
  "Feature extractor saved in ./preprocessor_config.json\n"
1209
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1210
  }
1211
  ],
1212
  "source": [
@@ -1225,7 +1241,7 @@
1225
  },
1226
  {
1227
  "cell_type": "code",
1228
- "execution_count": 28,
1229
  "id": "c704f91e-241b-48c9-b8e0-f0da396a9663",
1230
  "metadata": {
1231
  "id": "c704f91e-241b-48c9-b8e0-f0da396a9663"
@@ -1255,7 +1271,7 @@
1255
  },
1256
  {
1257
  "cell_type": "code",
1258
- "execution_count": 29,
1259
  "id": "d7030622-caf7-4039-939b-6195cdaa2585",
1260
  "metadata": {
1261
  "id": "d7030622-caf7-4039-939b-6195cdaa2585"
@@ -1266,14 +1282,7 @@
1266
  "output_type": "stream",
1267
  "text": [
1268
  "Saving model checkpoint to ./\n",
1269
- "Configuration saved in ./config.json\n",
1270
- "Model weights saved in ./pytorch_model.bin\n",
1271
- "Feature extractor saved in ./preprocessor_config.json\n",
1272
- "Several commits (3) will be pushed upstream.\n",
1273
- "The progress bars may be unreliable.\n",
1274
- "To https://huggingface.co/Scrya/whisper-medium-ms-augmented\n",
1275
- " 1684a4c..b46b3bd main -> main\n",
1276
- "\n"
1277
  ]
1278
  }
1279
  ],
 
1106
  },
1107
  {
1108
  "cell_type": "code",
1109
+ "execution_count": 24,
1110
  "id": "ee8b7b8e-1c9a-4d77-9137-1778a629e6de",
1111
  "metadata": {
1112
  "id": "ee8b7b8e-1c9a-4d77-9137-1778a629e6de",
1113
+ "scrolled": true
1114
  },
1115
  "outputs": [
1116
  {
 
1134
  "\n",
1135
  " <div>\n",
1136
  " \n",
1137
+ " <progress value='606' max='1000' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
1138
+ " [ 606/1000 4:07:06 < 2:41:11, 0.04 it/s, Epoch 1.72/3]\n",
1139
  " </div>\n",
1140
  " <table border=\"1\" class=\"dataframe\">\n",
1141
  " <thead>\n",
 
1207
  "Feature extractor saved in ./checkpoint-600/preprocessor_config.json\n",
1208
  "Feature extractor saved in ./preprocessor_config.json\n"
1209
  ]
1210
+ },
1211
+ {
1212
+ "ename": "KeyboardInterrupt",
1213
+ "evalue": "",
1214
+ "output_type": "error",
1215
+ "traceback": [
1216
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
1217
+ "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
1218
+ "Cell \u001b[0;32mIn[24], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mtrainer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtrain\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
1219
+ "File \u001b[0;32m~/whisper/lib/python3.8/site-packages/transformers/trainer.py:1535\u001b[0m, in \u001b[0;36mTrainer.train\u001b[0;34m(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)\u001b[0m\n\u001b[1;32m 1530\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel_wrapped \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel\n\u001b[1;32m 1532\u001b[0m inner_training_loop \u001b[38;5;241m=\u001b[39m find_executable_batch_size(\n\u001b[1;32m 1533\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_inner_training_loop, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_train_batch_size, args\u001b[38;5;241m.\u001b[39mauto_find_batch_size\n\u001b[1;32m 1534\u001b[0m )\n\u001b[0;32m-> 1535\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43minner_training_loop\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1536\u001b[0m \u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1537\u001b[0m \u001b[43m \u001b[49m\u001b[43mresume_from_checkpoint\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mresume_from_checkpoint\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1538\u001b[0m \u001b[43m \u001b[49m\u001b[43mtrial\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtrial\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1539\u001b[0m \u001b[43m \u001b[49m\u001b[43mignore_keys_for_eval\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mignore_keys_for_eval\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1540\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
1220
+ "File \u001b[0;32m~/whisper/lib/python3.8/site-packages/transformers/trainer.py:1783\u001b[0m, in \u001b[0;36mTrainer._inner_training_loop\u001b[0;34m(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)\u001b[0m\n\u001b[1;32m 1781\u001b[0m tr_loss_step \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtraining_step(model, inputs)\n\u001b[1;32m 1782\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1783\u001b[0m tr_loss_step \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtraining_step\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minputs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1785\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\n\u001b[1;32m 1786\u001b[0m args\u001b[38;5;241m.\u001b[39mlogging_nan_inf_filter\n\u001b[1;32m 1787\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m is_torch_tpu_available()\n\u001b[1;32m 1788\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m (torch\u001b[38;5;241m.\u001b[39misnan(tr_loss_step) \u001b[38;5;129;01mor\u001b[39;00m torch\u001b[38;5;241m.\u001b[39misinf(tr_loss_step))\n\u001b[1;32m 1789\u001b[0m ):\n\u001b[1;32m 1790\u001b[0m \u001b[38;5;66;03m# if loss is nan or inf simply add the average of previous logged losses\u001b[39;00m\n\u001b[1;32m 1791\u001b[0m tr_loss \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m tr_loss \u001b[38;5;241m/\u001b[39m (\u001b[38;5;241m1\u001b[39m \u001b[38;5;241m+\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate\u001b[38;5;241m.\u001b[39mglobal_step \u001b[38;5;241m-\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_globalstep_last_logged)\n",
1221
+ "File \u001b[0;32m~/whisper/lib/python3.8/site-packages/transformers/trainer.py:2541\u001b[0m, in \u001b[0;36mTrainer.training_step\u001b[0;34m(self, model, inputs)\u001b[0m\n\u001b[1;32m 2538\u001b[0m loss \u001b[38;5;241m=\u001b[39m loss \u001b[38;5;241m/\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs\u001b[38;5;241m.\u001b[39mgradient_accumulation_steps\n\u001b[1;32m 2540\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdo_grad_scaling:\n\u001b[0;32m-> 2541\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mscaler\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mscale\u001b[49m\u001b[43m(\u001b[49m\u001b[43mloss\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbackward\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 2542\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39muse_apex:\n\u001b[1;32m 2543\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m amp\u001b[38;5;241m.\u001b[39mscale_loss(loss, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moptimizer) \u001b[38;5;28;01mas\u001b[39;00m scaled_loss:\n",
1222
+ "File \u001b[0;32m~/whisper/lib/python3.8/site-packages/torch/_tensor.py:487\u001b[0m, in \u001b[0;36mTensor.backward\u001b[0;34m(self, gradient, retain_graph, create_graph, inputs)\u001b[0m\n\u001b[1;32m 477\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m has_torch_function_unary(\u001b[38;5;28mself\u001b[39m):\n\u001b[1;32m 478\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m handle_torch_function(\n\u001b[1;32m 479\u001b[0m Tensor\u001b[38;5;241m.\u001b[39mbackward,\n\u001b[1;32m 480\u001b[0m (\u001b[38;5;28mself\u001b[39m,),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 485\u001b[0m inputs\u001b[38;5;241m=\u001b[39minputs,\n\u001b[1;32m 486\u001b[0m )\n\u001b[0;32m--> 487\u001b[0m \u001b[43mtorch\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mautograd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbackward\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 488\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mgradient\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mretain_graph\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcreate_graph\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minputs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minputs\u001b[49m\n\u001b[1;32m 489\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
1223
+ "File \u001b[0;32m~/whisper/lib/python3.8/site-packages/torch/autograd/__init__.py:197\u001b[0m, in \u001b[0;36mbackward\u001b[0;34m(tensors, grad_tensors, retain_graph, create_graph, grad_variables, inputs)\u001b[0m\n\u001b[1;32m 192\u001b[0m retain_graph \u001b[38;5;241m=\u001b[39m create_graph\n\u001b[1;32m 194\u001b[0m \u001b[38;5;66;03m# The reason we repeat same the comment below is that\u001b[39;00m\n\u001b[1;32m 195\u001b[0m \u001b[38;5;66;03m# some Python versions print out the first line of a multi-line function\u001b[39;00m\n\u001b[1;32m 196\u001b[0m \u001b[38;5;66;03m# calls in the traceback and some print out the last line\u001b[39;00m\n\u001b[0;32m--> 197\u001b[0m \u001b[43mVariable\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_execution_engine\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrun_backward\u001b[49m\u001b[43m(\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# Calls into the C++ engine to run the backward pass\u001b[39;49;00m\n\u001b[1;32m 198\u001b[0m \u001b[43m \u001b[49m\u001b[43mtensors\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mgrad_tensors_\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mretain_graph\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcreate_graph\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minputs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 199\u001b[0m \u001b[43m \u001b[49m\u001b[43mallow_unreachable\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maccumulate_grad\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n",
1224
+ "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
1225
+ ]
1226
  }
1227
  ],
1228
  "source": [
 
1241
  },
1242
  {
1243
  "cell_type": "code",
1244
+ "execution_count": 25,
1245
  "id": "c704f91e-241b-48c9-b8e0-f0da396a9663",
1246
  "metadata": {
1247
  "id": "c704f91e-241b-48c9-b8e0-f0da396a9663"
 
1271
  },
1272
  {
1273
  "cell_type": "code",
1274
+ "execution_count": null,
1275
  "id": "d7030622-caf7-4039-939b-6195cdaa2585",
1276
  "metadata": {
1277
  "id": "d7030622-caf7-4039-939b-6195cdaa2585"
 
1282
  "output_type": "stream",
1283
  "text": [
1284
  "Saving model checkpoint to ./\n",
1285
+ "Configuration saved in ./config.json\n"
 
 
 
 
 
 
 
1286
  ]
1287
  }
1288
  ],
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6411d501e4303ddf9b86289a5d217422eb9512dbcc64a08c30ef1eb0eacffd82
3
  size 3055754841
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2cb10116b77691d60116e9dcbd6abb617b0d87b0e3b40e1883895088c288a5cb
3
  size 3055754841