kimbochen commited on
Commit
05da956
1 Parent(s): 3a44fa5

End of training

Browse files
fine-tune-whisper-streaming.ipynb CHANGED
@@ -1082,7 +1082,7 @@
1082
  },
1083
  {
1084
  "cell_type": "code",
1085
- "execution_count": null,
1086
  "id": "6dd0e310-9b07-4133-ac14-2ed2d7524e22",
1087
  "metadata": {},
1088
  "outputs": [],
@@ -1108,13 +1108,263 @@
1108
  },
1109
  {
1110
  "cell_type": "code",
1111
- "execution_count": null,
1112
  "id": "95737cda-c5dd-4887-a4d0-dfcb0d61d977",
1113
  "metadata": {},
1114
- "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1115
  "source": [
1116
  "trainer.push_to_hub(**kwargs)"
1117
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1118
  }
1119
  ],
1120
  "metadata": {
 
1082
  },
1083
  {
1084
  "cell_type": "code",
1085
+ "execution_count": 24,
1086
  "id": "6dd0e310-9b07-4133-ac14-2ed2d7524e22",
1087
  "metadata": {},
1088
  "outputs": [],
 
1108
  },
1109
  {
1110
  "cell_type": "code",
1111
+ "execution_count": 25,
1112
  "id": "95737cda-c5dd-4887-a4d0-dfcb0d61d977",
1113
  "metadata": {},
1114
+ "outputs": [
1115
+ {
1116
+ "name": "stderr",
1117
+ "output_type": "stream",
1118
+ "text": [
1119
+ "Saving model checkpoint to ./\n",
1120
+ "Configuration saved in ./config.json\n",
1121
+ "Model weights saved in ./pytorch_model.bin\n",
1122
+ "Feature extractor saved in ./preprocessor_config.json\n",
1123
+ "tokenizer config file saved in ./tokenizer_config.json\n",
1124
+ "Special tokens file saved in ./special_tokens_map.json\n",
1125
+ "added tokens file saved in ./added_tokens.json\n"
1126
+ ]
1127
+ },
1128
+ {
1129
+ "data": {
1130
+ "application/vnd.jupyter.widget-view+json": {
1131
+ "model_id": "16739dc58bd048408e8154a39dca4590",
1132
+ "version_major": 2,
1133
+ "version_minor": 0
1134
+ },
1135
+ "text/plain": [
1136
+ "Upload file pytorch_model.bin: 0%| | 32.0k/922M [00:00<?, ?B/s]"
1137
+ ]
1138
+ },
1139
+ "metadata": {},
1140
+ "output_type": "display_data"
1141
+ },
1142
+ {
1143
+ "data": {
1144
+ "application/vnd.jupyter.widget-view+json": {
1145
+ "model_id": "df61c3286393482e9084f4f78f661525",
1146
+ "version_major": 2,
1147
+ "version_minor": 0
1148
+ },
1149
+ "text/plain": [
1150
+ "Upload file runs/Dec10_01-56-07_129-213-27-84/events.out.tfevents.1670637380.129-213-27-84.69598.0: 95%|#####…"
1151
+ ]
1152
+ },
1153
+ "metadata": {},
1154
+ "output_type": "display_data"
1155
+ },
1156
+ {
1157
+ "name": "stderr",
1158
+ "output_type": "stream",
1159
+ "text": [
1160
+ "remote: Scanning LFS files for validity, may be slow... \n",
1161
+ "remote: LFS file scan complete. \n",
1162
+ "To https://huggingface.co/kimbochen/whisper-small-jp\n",
1163
+ " f4b374d..e216c5d main -> main\n",
1164
+ "\n",
1165
+ "Dropping the following result as it does not have all the necessary fields:\n",
1166
+ "{'task': {'name': 'Automatic Speech Recognition', 'type': 'automatic-speech-recognition'}, 'dataset': {'name': 'Common Voice 11.0', 'type': 'mozilla-foundation/common_voice_11_0', 'config': 'ja', 'split': 'test', 'args': 'ja'}}\n",
1167
+ "To https://huggingface.co/kimbochen/whisper-small-jp\n",
1168
+ " e216c5d..3a44fa5 main -> main\n",
1169
+ "\n"
1170
+ ]
1171
+ },
1172
+ {
1173
+ "data": {
1174
+ "text/plain": [
1175
+ "'https://huggingface.co/kimbochen/whisper-small-jp/commit/e216c5dfdb8e05855b7f8c0cb2778c7731a46633'"
1176
+ ]
1177
+ },
1178
+ "execution_count": 25,
1179
+ "metadata": {},
1180
+ "output_type": "execute_result"
1181
+ }
1182
+ ],
1183
  "source": [
1184
  "trainer.push_to_hub(**kwargs)"
1185
  ]
1186
+ },
1187
+ {
1188
+ "cell_type": "code",
1189
+ "execution_count": 28,
1190
+ "id": "4df1603c-ef35-40f1-ae57-3214441073c8",
1191
+ "metadata": {},
1192
+ "outputs": [
1193
+ {
1194
+ "name": "stderr",
1195
+ "output_type": "stream",
1196
+ "text": [
1197
+ "PyTorch: setting up devices\n"
1198
+ ]
1199
+ }
1200
+ ],
1201
+ "source": [
1202
+ "training_args = Seq2SeqTrainingArguments(\n",
1203
+ " output_dir=\"./\",\n",
1204
+ " per_device_train_batch_size=64,\n",
1205
+ " gradient_accumulation_steps=1, # increase by 2x for every 2x decrease in batch size\n",
1206
+ " learning_rate=1e-5,\n",
1207
+ " max_steps=1000,\n",
1208
+ " num_train_epochs=-1,\n",
1209
+ " gradient_checkpointing=True,\n",
1210
+ " fp16=True,\n",
1211
+ " evaluation_strategy=\"steps\",\n",
1212
+ " per_device_eval_batch_size=8,\n",
1213
+ " predict_with_generate=True,\n",
1214
+ " generation_max_length=225,\n",
1215
+ " save_steps=1000,\n",
1216
+ " eval_steps=1000,\n",
1217
+ " logging_steps=25,\n",
1218
+ " report_to=[\"tensorboard\"],\n",
1219
+ " load_best_model_at_end=True,\n",
1220
+ " metric_for_best_model=\"wer\",\n",
1221
+ " greater_is_better=False,\n",
1222
+ " push_to_hub=True,\n",
1223
+ ")"
1224
+ ]
1225
+ },
1226
+ {
1227
+ "cell_type": "code",
1228
+ "execution_count": 29,
1229
+ "id": "afc2b554-7171-48c7-95aa-b7e61b70ab20",
1230
+ "metadata": {},
1231
+ "outputs": [
1232
+ {
1233
+ "name": "stderr",
1234
+ "output_type": "stream",
1235
+ "text": [
1236
+ "/home/ubuntu/whisper-small-jp/./ is already a clone of https://huggingface.co/kimbochen/whisper-small-jp. Make sure you pull the latest changes with `repo.git_pull()`.\n",
1237
+ "max_steps is given, it will override any value given in num_train_epochs\n",
1238
+ "Using cuda_amp half precision backend\n"
1239
+ ]
1240
+ }
1241
+ ],
1242
+ "source": [
1243
+ "trainer = Seq2SeqTrainer(\n",
1244
+ " args=training_args,\n",
1245
+ " model=model,\n",
1246
+ " train_dataset=vectorized_datasets[\"train\"],\n",
1247
+ " eval_dataset=vectorized_datasets[\"test\"],\n",
1248
+ " data_collator=data_collator,\n",
1249
+ " compute_metrics=compute_metrics,\n",
1250
+ " tokenizer=processor,\n",
1251
+ " callbacks=[ShuffleCallback()],\n",
1252
+ ")"
1253
+ ]
1254
+ },
1255
+ {
1256
+ "cell_type": "code",
1257
+ "execution_count": 30,
1258
+ "id": "b029a1d8-24de-46e7-b067-0f900b1db342",
1259
+ "metadata": {},
1260
+ "outputs": [
1261
+ {
1262
+ "name": "stderr",
1263
+ "output_type": "stream",
1264
+ "text": [
1265
+ "Loading model from checkpoint-4000.\n",
1266
+ "/home/ubuntu/.venv/lib/python3.8/site-packages/transformers/optimization.py:306: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
1267
+ " warnings.warn(\n",
1268
+ "***** Running training *****\n",
1269
+ " Num examples = 64000\n",
1270
+ " Num Epochs = 9223372036854775807\n",
1271
+ " Instantaneous batch size per device = 64\n",
1272
+ " Total train batch size (w. parallel, distributed & accumulation) = 64\n",
1273
+ " Gradient Accumulation steps = 1\n",
1274
+ " Total optimization steps = 1000\n",
1275
+ " Number of trainable parameters = 241734912\n",
1276
+ " Continuing training from checkpoint, will skip to saved global_step\n",
1277
+ " Continuing training from epoch 4\n",
1278
+ " Continuing training from global step 4000\n",
1279
+ " Will skip the first 4 epochs then the first 0 batches in the first epoch. If this takes a lot of time, you can add the `--ignore_data_skip` flag to your launch command, but you will resume the training on data already seen by your model.\n"
1280
+ ]
1281
+ },
1282
+ {
1283
+ "data": {
1284
+ "application/vnd.jupyter.widget-view+json": {
1285
+ "model_id": "01337298313740d98d3cc75b6d5e3ff7",
1286
+ "version_major": 2,
1287
+ "version_minor": 0
1288
+ },
1289
+ "text/plain": [
1290
+ "0it [00:00, ?it/s]"
1291
+ ]
1292
+ },
1293
+ "metadata": {},
1294
+ "output_type": "display_data"
1295
+ },
1296
+ {
1297
+ "name": "stderr",
1298
+ "output_type": "stream",
1299
+ "text": [
1300
+ "\n",
1301
+ "Reading metadata...: 0it [00:00, ?it/s]\u001b[A\n",
1302
+ "Reading metadata...: 6505it [00:00, 34246.80it/s]\n",
1303
+ "The following columns in the training set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`, you can safely ignore this message.\n",
1304
+ "\n",
1305
+ "Reading metadata...: 6505it [00:00, 84823.64it/s]\n",
1306
+ "\n",
1307
+ "Reading metadata...: 6505it [00:00, 88617.62it/s]\n",
1308
+ "\n",
1309
+ "Reading metadata...: 6505it [00:00, 90289.78it/s]\n",
1310
+ "\n",
1311
+ "Reading metadata...: 6505it [00:00, 91816.92it/s]\n"
1312
+ ]
1313
+ },
1314
+ {
1315
+ "data": {
1316
+ "text/html": [
1317
+ "\n",
1318
+ " <div>\n",
1319
+ " \n",
1320
+ " <progress value='4001' max='1000' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
1321
+ " [1000/1000 00:00, Epoch 4/9223372036854775807]\n",
1322
+ " </div>\n",
1323
+ " <table border=\"1\" class=\"dataframe\">\n",
1324
+ " <thead>\n",
1325
+ " <tr style=\"text-align: left;\">\n",
1326
+ " <th>Step</th>\n",
1327
+ " <th>Training Loss</th>\n",
1328
+ " <th>Validation Loss</th>\n",
1329
+ " </tr>\n",
1330
+ " </thead>\n",
1331
+ " <tbody>\n",
1332
+ " </tbody>\n",
1333
+ "</table><p>"
1334
+ ],
1335
+ "text/plain": [
1336
+ "<IPython.core.display.HTML object>"
1337
+ ]
1338
+ },
1339
+ "metadata": {},
1340
+ "output_type": "display_data"
1341
+ },
1342
+ {
1343
+ "name": "stderr",
1344
+ "output_type": "stream",
1345
+ "text": [
1346
+ "\n",
1347
+ "\n",
1348
+ "Training completed. Do not forget to share your model on huggingface.co/models =)\n",
1349
+ "\n",
1350
+ "\n",
1351
+ "Loading best model from ./checkpoint-4000 (score: 88.31039863810469).\n"
1352
+ ]
1353
+ },
1354
+ {
1355
+ "data": {
1356
+ "text/plain": [
1357
+ "TrainOutput(global_step=4001, training_loss=8.343380785802548e-08, metrics={'train_runtime': 169.0541, 'train_samples_per_second': 378.577, 'train_steps_per_second': 5.915, 'total_flos': 7.363747084345344e+19, 'train_loss': 8.343380785802548e-08, 'epoch': 4.0})"
1358
+ ]
1359
+ },
1360
+ "execution_count": 30,
1361
+ "metadata": {},
1362
+ "output_type": "execute_result"
1363
+ }
1364
+ ],
1365
+ "source": [
1366
+ "trainer.train(\"checkpoint-4000\")"
1367
+ ]
1368
  }
1369
  ],
1370
  "metadata": {
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ae23baa6d08377d69619c875f6ba0ad1ba99e737c1dafae82d54f122b0e881d3
3
  size 967102601
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b3246529f086b22124c7901ea81e50c3e83cfe22009b2ee44ddc94f5bea88d86
3
  size 967102601
runs/Dec10_16-23-25_129-213-27-84/1670689420.7830398/events.out.tfevents.1670689420.129-213-27-84.69598.3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:919f991c2b7b827b7bbfa43f46161f5f173d21c892703c4c7a1722f696dedfbb
3
+ size 5863
runs/Dec10_16-23-25_129-213-27-84/events.out.tfevents.1670689420.129-213-27-84.69598.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:945311114191fd94d4d98afee3982d6e6ada989a6b38f2442c3c6e0217f1644d
3
+ size 4637
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e47dc3dd345045f851c2adb1a18a18460c556d0e3f86131b6707ff1504ff767f
3
  size 3579
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:004a4cf781ce4e3549410cee708eb390c3b675a56f1d039eff79f582955c901a
3
  size 3579