whisper-training-blog / trainer_state.json
marinone94's picture
End of training
309997b
raw history blame
No virus
15.1 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 9.093366093366093,
"global_step": 407,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 0.0,
"loss": 1.8118,
"step": 4
},
{
"epoch": 0.02,
"learning_rate": 3.6585365853658536e-07,
"loss": 1.8122,
"step": 8
},
{
"epoch": 0.03,
"learning_rate": 8.53658536585366e-07,
"loss": 1.8174,
"step": 12
},
{
"epoch": 0.04,
"learning_rate": 1.3414634146341465e-06,
"loss": 1.7616,
"step": 16
},
{
"epoch": 0.05,
"learning_rate": 1.8292682926829268e-06,
"loss": 1.6875,
"step": 20
},
{
"epoch": 0.06,
"learning_rate": 2.317073170731708e-06,
"loss": 1.5201,
"step": 24
},
{
"epoch": 0.07,
"learning_rate": 2.8048780487804884e-06,
"loss": 1.3982,
"step": 28
},
{
"epoch": 0.08,
"learning_rate": 3.292682926829269e-06,
"loss": 1.3541,
"step": 32
},
{
"epoch": 0.09,
"learning_rate": 3.780487804878049e-06,
"loss": 1.2092,
"step": 36
},
{
"epoch": 0.1,
"learning_rate": 4.268292682926829e-06,
"loss": 1.1599,
"step": 40
},
{
"epoch": 0.1,
"eval_loss": 1.142654299736023,
"eval_runtime": 101.9854,
"eval_samples_per_second": 3.863,
"eval_steps_per_second": 0.127,
"eval_wer": 15.213946117274169,
"step": 40
},
{
"epoch": 1.01,
"learning_rate": 4.75609756097561e-06,
"loss": 1.0124,
"step": 44
},
{
"epoch": 1.02,
"learning_rate": 5.243902439024391e-06,
"loss": 0.9171,
"step": 48
},
{
"epoch": 1.03,
"learning_rate": 5.731707317073171e-06,
"loss": 0.8027,
"step": 52
},
{
"epoch": 1.04,
"learning_rate": 6.219512195121951e-06,
"loss": 0.7284,
"step": 56
},
{
"epoch": 1.05,
"learning_rate": 6.707317073170733e-06,
"loss": 0.6185,
"step": 60
},
{
"epoch": 1.06,
"learning_rate": 7.1951219512195125e-06,
"loss": 0.57,
"step": 64
},
{
"epoch": 1.07,
"learning_rate": 7.682926829268293e-06,
"loss": 0.4985,
"step": 68
},
{
"epoch": 1.08,
"learning_rate": 8.170731707317073e-06,
"loss": 0.488,
"step": 72
},
{
"epoch": 1.09,
"learning_rate": 8.658536585365854e-06,
"loss": 0.4569,
"step": 76
},
{
"epoch": 1.1,
"learning_rate": 9.146341463414635e-06,
"loss": 0.4655,
"step": 80
},
{
"epoch": 1.1,
"eval_loss": 0.5613037943840027,
"eval_runtime": 91.9697,
"eval_samples_per_second": 4.284,
"eval_steps_per_second": 0.141,
"eval_wer": 17.591125198098258,
"step": 80
},
{
"epoch": 2.0,
"learning_rate": 9.634146341463415e-06,
"loss": 0.425,
"step": 84
},
{
"epoch": 2.01,
"learning_rate": 9.96923076923077e-06,
"loss": 0.4162,
"step": 88
},
{
"epoch": 2.02,
"learning_rate": 9.846153846153848e-06,
"loss": 0.3809,
"step": 92
},
{
"epoch": 2.03,
"learning_rate": 9.723076923076924e-06,
"loss": 0.3533,
"step": 96
},
{
"epoch": 2.04,
"learning_rate": 9.600000000000001e-06,
"loss": 0.3511,
"step": 100
},
{
"epoch": 2.05,
"learning_rate": 9.476923076923079e-06,
"loss": 0.3475,
"step": 104
},
{
"epoch": 2.06,
"learning_rate": 9.353846153846155e-06,
"loss": 0.321,
"step": 108
},
{
"epoch": 2.07,
"learning_rate": 9.230769230769232e-06,
"loss": 0.2859,
"step": 112
},
{
"epoch": 2.08,
"learning_rate": 9.107692307692308e-06,
"loss": 0.3191,
"step": 116
},
{
"epoch": 2.09,
"learning_rate": 8.984615384615386e-06,
"loss": 0.2753,
"step": 120
},
{
"epoch": 2.09,
"eval_loss": 0.5241264700889587,
"eval_runtime": 88.0526,
"eval_samples_per_second": 4.475,
"eval_steps_per_second": 0.148,
"eval_wer": 17.21321467755699,
"step": 120
},
{
"epoch": 3.0,
"learning_rate": 8.861538461538463e-06,
"loss": 0.3104,
"step": 124
},
{
"epoch": 3.01,
"learning_rate": 8.73846153846154e-06,
"loss": 0.2734,
"step": 128
},
{
"epoch": 3.02,
"learning_rate": 8.615384615384617e-06,
"loss": 0.2608,
"step": 132
},
{
"epoch": 3.03,
"learning_rate": 8.492307692307693e-06,
"loss": 0.2509,
"step": 136
},
{
"epoch": 3.04,
"learning_rate": 8.36923076923077e-06,
"loss": 0.2548,
"step": 140
},
{
"epoch": 3.05,
"learning_rate": 8.246153846153848e-06,
"loss": 0.2469,
"step": 144
},
{
"epoch": 3.06,
"learning_rate": 8.123076923076924e-06,
"loss": 0.2231,
"step": 148
},
{
"epoch": 3.07,
"learning_rate": 8.000000000000001e-06,
"loss": 0.2138,
"step": 152
},
{
"epoch": 3.08,
"learning_rate": 7.876923076923077e-06,
"loss": 0.2349,
"step": 156
},
{
"epoch": 3.09,
"learning_rate": 7.753846153846155e-06,
"loss": 0.2077,
"step": 160
},
{
"epoch": 3.09,
"eval_loss": 0.5241798758506775,
"eval_runtime": 88.5317,
"eval_samples_per_second": 4.45,
"eval_steps_per_second": 0.147,
"eval_wer": 17.26197732536877,
"step": 160
},
{
"epoch": 3.1,
"learning_rate": 7.630769230769232e-06,
"loss": 0.2322,
"step": 164
},
{
"epoch": 4.01,
"learning_rate": 7.507692307692308e-06,
"loss": 0.2036,
"step": 168
},
{
"epoch": 4.02,
"learning_rate": 7.384615384615386e-06,
"loss": 0.2058,
"step": 172
},
{
"epoch": 4.03,
"learning_rate": 7.261538461538462e-06,
"loss": 0.1797,
"step": 176
},
{
"epoch": 4.04,
"learning_rate": 7.1384615384615385e-06,
"loss": 0.186,
"step": 180
},
{
"epoch": 4.05,
"learning_rate": 7.015384615384616e-06,
"loss": 0.2035,
"step": 184
},
{
"epoch": 4.06,
"learning_rate": 6.892307692307693e-06,
"loss": 0.1794,
"step": 188
},
{
"epoch": 4.07,
"learning_rate": 6.76923076923077e-06,
"loss": 0.1589,
"step": 192
},
{
"epoch": 4.08,
"learning_rate": 6.646153846153846e-06,
"loss": 0.1879,
"step": 196
},
{
"epoch": 4.09,
"learning_rate": 6.523076923076923e-06,
"loss": 0.1636,
"step": 200
},
{
"epoch": 4.09,
"eval_loss": 0.5289868712425232,
"eval_runtime": 95.5188,
"eval_samples_per_second": 4.125,
"eval_steps_per_second": 0.136,
"eval_wer": 17.66426916981592,
"step": 200
},
{
"epoch": 4.1,
"learning_rate": 6.4000000000000006e-06,
"loss": 0.1767,
"step": 204
},
{
"epoch": 5.01,
"learning_rate": 6.276923076923077e-06,
"loss": 0.1657,
"step": 208
},
{
"epoch": 5.02,
"learning_rate": 6.153846153846155e-06,
"loss": 0.1607,
"step": 212
},
{
"epoch": 5.03,
"learning_rate": 6.030769230769231e-06,
"loss": 0.1458,
"step": 216
},
{
"epoch": 5.04,
"learning_rate": 5.907692307692308e-06,
"loss": 0.1541,
"step": 220
},
{
"epoch": 5.05,
"learning_rate": 5.784615384615385e-06,
"loss": 0.1494,
"step": 224
},
{
"epoch": 5.06,
"learning_rate": 5.661538461538462e-06,
"loss": 0.144,
"step": 228
},
{
"epoch": 5.07,
"learning_rate": 5.538461538461539e-06,
"loss": 0.1311,
"step": 232
},
{
"epoch": 5.08,
"learning_rate": 5.415384615384615e-06,
"loss": 0.1411,
"step": 236
},
{
"epoch": 5.09,
"learning_rate": 5.292307692307693e-06,
"loss": 0.1322,
"step": 240
},
{
"epoch": 5.09,
"eval_loss": 0.5350630283355713,
"eval_runtime": 92.5111,
"eval_samples_per_second": 4.259,
"eval_steps_per_second": 0.141,
"eval_wer": 18.2128489576984,
"step": 240
},
{
"epoch": 5.1,
"learning_rate": 5.16923076923077e-06,
"loss": 0.1436,
"step": 244
},
{
"epoch": 6.0,
"learning_rate": 5.046153846153846e-06,
"loss": 0.1375,
"step": 248
},
{
"epoch": 6.01,
"learning_rate": 4.923076923076924e-06,
"loss": 0.1361,
"step": 252
},
{
"epoch": 6.02,
"learning_rate": 4.800000000000001e-06,
"loss": 0.129,
"step": 256
},
{
"epoch": 6.03,
"learning_rate": 4.676923076923077e-06,
"loss": 0.1127,
"step": 260
},
{
"epoch": 6.04,
"learning_rate": 4.553846153846154e-06,
"loss": 0.1266,
"step": 264
},
{
"epoch": 6.05,
"learning_rate": 4.430769230769232e-06,
"loss": 0.1193,
"step": 268
},
{
"epoch": 6.06,
"learning_rate": 4.307692307692308e-06,
"loss": 0.1127,
"step": 272
},
{
"epoch": 6.07,
"learning_rate": 4.184615384615385e-06,
"loss": 0.1064,
"step": 276
},
{
"epoch": 6.08,
"learning_rate": 4.061538461538462e-06,
"loss": 0.123,
"step": 280
},
{
"epoch": 6.08,
"eval_loss": 0.5429388284683228,
"eval_runtime": 91.5818,
"eval_samples_per_second": 4.302,
"eval_steps_per_second": 0.142,
"eval_wer": 18.907716689016212,
"step": 280
},
{
"epoch": 6.09,
"learning_rate": 3.938461538461539e-06,
"loss": 0.1057,
"step": 284
},
{
"epoch": 7.0,
"learning_rate": 3.815384615384616e-06,
"loss": 0.1258,
"step": 288
},
{
"epoch": 7.01,
"learning_rate": 3.692307692307693e-06,
"loss": 0.1108,
"step": 292
},
{
"epoch": 7.02,
"learning_rate": 3.5692307692307692e-06,
"loss": 0.1115,
"step": 296
},
{
"epoch": 7.03,
"learning_rate": 3.4461538461538464e-06,
"loss": 0.0998,
"step": 300
},
{
"epoch": 7.04,
"learning_rate": 3.323076923076923e-06,
"loss": 0.1106,
"step": 304
},
{
"epoch": 7.05,
"learning_rate": 3.2000000000000003e-06,
"loss": 0.1045,
"step": 308
},
{
"epoch": 7.06,
"learning_rate": 3.0769230769230774e-06,
"loss": 0.0908,
"step": 312
},
{
"epoch": 7.07,
"learning_rate": 2.953846153846154e-06,
"loss": 0.0931,
"step": 316
},
{
"epoch": 7.08,
"learning_rate": 2.830769230769231e-06,
"loss": 0.1074,
"step": 320
},
{
"epoch": 7.08,
"eval_loss": 0.5500437021255493,
"eval_runtime": 104.0907,
"eval_samples_per_second": 3.785,
"eval_steps_per_second": 0.125,
"eval_wer": 19.054004632451544,
"step": 320
},
{
"epoch": 7.09,
"learning_rate": 2.7076923076923076e-06,
"loss": 0.0937,
"step": 324
},
{
"epoch": 7.1,
"learning_rate": 2.584615384615385e-06,
"loss": 0.1091,
"step": 328
},
{
"epoch": 8.01,
"learning_rate": 2.461538461538462e-06,
"loss": 0.0951,
"step": 332
},
{
"epoch": 8.02,
"learning_rate": 2.3384615384615387e-06,
"loss": 0.1003,
"step": 336
},
{
"epoch": 8.03,
"learning_rate": 2.215384615384616e-06,
"loss": 0.0836,
"step": 340
},
{
"epoch": 8.04,
"learning_rate": 2.0923076923076926e-06,
"loss": 0.0907,
"step": 344
},
{
"epoch": 8.05,
"learning_rate": 1.9692307692307693e-06,
"loss": 0.1013,
"step": 348
},
{
"epoch": 8.06,
"learning_rate": 1.8461538461538465e-06,
"loss": 0.0891,
"step": 352
},
{
"epoch": 8.07,
"learning_rate": 1.7230769230769232e-06,
"loss": 0.077,
"step": 356
},
{
"epoch": 8.08,
"learning_rate": 1.6000000000000001e-06,
"loss": 0.1007,
"step": 360
},
{
"epoch": 8.08,
"eval_loss": 0.5552565455436707,
"eval_runtime": 88.458,
"eval_samples_per_second": 4.454,
"eval_steps_per_second": 0.147,
"eval_wer": 19.310008533463368,
"step": 360
},
{
"epoch": 8.09,
"learning_rate": 1.476923076923077e-06,
"loss": 0.0849,
"step": 364
},
{
"epoch": 8.1,
"learning_rate": 1.3538461538461538e-06,
"loss": 0.0971,
"step": 368
},
{
"epoch": 9.01,
"learning_rate": 1.230769230769231e-06,
"loss": 0.0876,
"step": 372
},
{
"epoch": 9.02,
"learning_rate": 1.107692307692308e-06,
"loss": 0.0879,
"step": 376
},
{
"epoch": 9.03,
"learning_rate": 9.846153846153847e-07,
"loss": 0.0805,
"step": 380
},
{
"epoch": 9.04,
"learning_rate": 8.615384615384616e-07,
"loss": 0.0888,
"step": 384
},
{
"epoch": 9.05,
"learning_rate": 7.384615384615385e-07,
"loss": 0.0858,
"step": 388
},
{
"epoch": 9.06,
"learning_rate": 6.153846153846155e-07,
"loss": 0.0825,
"step": 392
},
{
"epoch": 9.07,
"learning_rate": 4.923076923076923e-07,
"loss": 0.0748,
"step": 396
},
{
"epoch": 9.08,
"learning_rate": 3.6923076923076927e-07,
"loss": 0.0876,
"step": 400
},
{
"epoch": 9.08,
"eval_loss": 0.5568162202835083,
"eval_runtime": 89.7223,
"eval_samples_per_second": 4.391,
"eval_steps_per_second": 0.145,
"eval_wer": 19.3465805193222,
"step": 400
},
{
"epoch": 9.09,
"learning_rate": 2.4615384615384616e-07,
"loss": 0.0802,
"step": 404
},
{
"epoch": 9.09,
"step": 407,
"total_flos": 6.36398180352e+17,
"train_loss": 0.35074408769753995,
"train_runtime": 2707.3827,
"train_samples_per_second": 9.621,
"train_steps_per_second": 0.15
}
],
"max_steps": 407,
"num_train_epochs": 9223372036854775807,
"total_flos": 6.36398180352e+17,
"trial_name": null,
"trial_params": null
}