|
{ |
|
"best_metric": 0.923943661971831, |
|
"best_model_checkpoint": "./ssw-finetune/checkpoint-1150", |
|
"epoch": 115.0, |
|
"eval_steps": 25, |
|
"global_step": 1150, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 5.561491012573242, |
|
"learning_rate": 2.9999999999999997e-06, |
|
"loss": 7.7799, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 4.18166971206665, |
|
"learning_rate": 6.749999999999999e-06, |
|
"loss": 7.4713, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 6.806884288787842, |
|
"learning_rate": 1.05e-05, |
|
"loss": 7.696, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": NaN, |
|
"learning_rate": 1.3499999999999998e-05, |
|
"loss": 7.9462, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": NaN, |
|
"learning_rate": 1.6499999999999998e-05, |
|
"loss": 8.0165, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"eval_loss": 7.467132091522217, |
|
"eval_runtime": 1.054, |
|
"eval_samples_per_second": 22.77, |
|
"eval_steps_per_second": 0.949, |
|
"eval_wer": 1.0, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 2.7979044914245605, |
|
"learning_rate": 2.025e-05, |
|
"loss": 6.5204, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"grad_norm": 9.716986656188965, |
|
"learning_rate": 2.3999999999999997e-05, |
|
"loss": 7.6715, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 3.3519299030303955, |
|
"learning_rate": 2.7749999999999997e-05, |
|
"loss": 7.0161, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"grad_norm": 5.6749138832092285, |
|
"learning_rate": 3.149999999999999e-05, |
|
"loss": 8.0617, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 8.150848388671875, |
|
"learning_rate": 3.5249999999999996e-05, |
|
"loss": 6.3142, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 6.626723766326904, |
|
"eval_runtime": 1.071, |
|
"eval_samples_per_second": 22.409, |
|
"eval_steps_per_second": 0.934, |
|
"eval_wer": 1.0, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 5.5, |
|
"grad_norm": 6.339476585388184, |
|
"learning_rate": 3.9e-05, |
|
"loss": 6.2643, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 13.012835502624512, |
|
"learning_rate": 4.2749999999999996e-05, |
|
"loss": 7.1655, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 6.5, |
|
"grad_norm": 11.24893569946289, |
|
"learning_rate": 4.65e-05, |
|
"loss": 5.8178, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 24.677473068237305, |
|
"learning_rate": 5.025e-05, |
|
"loss": 5.5684, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"grad_norm": 8.214367866516113, |
|
"learning_rate": 5.399999999999999e-05, |
|
"loss": 4.3185, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"eval_loss": 3.72790789604187, |
|
"eval_runtime": 1.0391, |
|
"eval_samples_per_second": 23.097, |
|
"eval_steps_per_second": 0.962, |
|
"eval_wer": 1.0, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 8.643641471862793, |
|
"learning_rate": 5.7749999999999994e-05, |
|
"loss": 4.1807, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 8.5, |
|
"grad_norm": 10.54008674621582, |
|
"learning_rate": 6.149999999999999e-05, |
|
"loss": 3.7552, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 3.332289934158325, |
|
"learning_rate": 6.525e-05, |
|
"loss": 3.7053, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 9.5, |
|
"grad_norm": 4.925398349761963, |
|
"learning_rate": 6.9e-05, |
|
"loss": 3.3661, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 5.291933536529541, |
|
"learning_rate": 7.274999999999999e-05, |
|
"loss": 3.1777, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 3.050647735595703, |
|
"eval_runtime": 1.0273, |
|
"eval_samples_per_second": 23.362, |
|
"eval_steps_per_second": 0.973, |
|
"eval_wer": 1.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 10.5, |
|
"grad_norm": 1.6660319566726685, |
|
"learning_rate": 7.649999999999999e-05, |
|
"loss": 3.0435, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 0.681082546710968, |
|
"learning_rate": 8.025e-05, |
|
"loss": 3.254, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 11.5, |
|
"grad_norm": 2.713016986846924, |
|
"learning_rate": 8.4e-05, |
|
"loss": 2.972, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 7.211615085601807, |
|
"learning_rate": 8.774999999999999e-05, |
|
"loss": 3.1145, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"grad_norm": 3.3372182846069336, |
|
"learning_rate": 9.149999999999999e-05, |
|
"loss": 3.0587, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"eval_loss": 2.936924695968628, |
|
"eval_runtime": 1.0424, |
|
"eval_samples_per_second": 23.023, |
|
"eval_steps_per_second": 0.959, |
|
"eval_wer": 1.0, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 1.895374059677124, |
|
"learning_rate": 9.525e-05, |
|
"loss": 2.9096, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 13.5, |
|
"grad_norm": 8.356375694274902, |
|
"learning_rate": 9.9e-05, |
|
"loss": 3.3159, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 1.6825320720672607, |
|
"learning_rate": 0.00010275, |
|
"loss": 2.9022, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 14.5, |
|
"grad_norm": 0.7314967513084412, |
|
"learning_rate": 0.00010649999999999999, |
|
"loss": 2.9058, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 3.183772563934326, |
|
"learning_rate": 0.00011024999999999998, |
|
"loss": 3.0633, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_loss": 2.9296257495880127, |
|
"eval_runtime": 1.0347, |
|
"eval_samples_per_second": 23.194, |
|
"eval_steps_per_second": 0.966, |
|
"eval_wer": 1.0, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 15.5, |
|
"grad_norm": 1.4891362190246582, |
|
"learning_rate": 0.00011399999999999999, |
|
"loss": 2.9901, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 5.58284854888916, |
|
"learning_rate": 0.00011774999999999999, |
|
"loss": 2.9861, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 16.5, |
|
"grad_norm": 1.3804000616073608, |
|
"learning_rate": 0.0001215, |
|
"loss": 2.9584, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 1.6562563180923462, |
|
"learning_rate": 0.00012524999999999998, |
|
"loss": 3.0194, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 17.5, |
|
"grad_norm": 0.653541088104248, |
|
"learning_rate": 0.000129, |
|
"loss": 2.9639, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 17.5, |
|
"eval_loss": 2.926556348800659, |
|
"eval_runtime": 1.0442, |
|
"eval_samples_per_second": 22.985, |
|
"eval_steps_per_second": 0.958, |
|
"eval_wer": 1.0, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 9.456038475036621, |
|
"learning_rate": 0.00013275, |
|
"loss": 2.8944, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 18.5, |
|
"grad_norm": 0.3759576082229614, |
|
"learning_rate": 0.00013649999999999998, |
|
"loss": 2.9149, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 3.0567305088043213, |
|
"learning_rate": 0.00014025, |
|
"loss": 3.0321, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 19.5, |
|
"grad_norm": 8.436885833740234, |
|
"learning_rate": 0.00014399999999999998, |
|
"loss": 2.9683, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 1.9778860807418823, |
|
"learning_rate": 0.00014774999999999999, |
|
"loss": 2.9576, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_loss": 2.9644908905029297, |
|
"eval_runtime": 1.0268, |
|
"eval_samples_per_second": 23.374, |
|
"eval_steps_per_second": 0.974, |
|
"eval_wer": 1.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 20.5, |
|
"grad_norm": 0.6856608390808105, |
|
"learning_rate": 0.00014976923076923077, |
|
"loss": 2.9374, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"grad_norm": 1.157402515411377, |
|
"learning_rate": 0.00014919230769230767, |
|
"loss": 2.875, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 21.5, |
|
"grad_norm": 0.42920613288879395, |
|
"learning_rate": 0.0001486153846153846, |
|
"loss": 2.9796, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"grad_norm": 4.603660583496094, |
|
"learning_rate": 0.00014803846153846152, |
|
"loss": 2.9233, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 22.5, |
|
"grad_norm": 1.3661619424819946, |
|
"learning_rate": 0.00014746153846153845, |
|
"loss": 2.8708, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 22.5, |
|
"eval_loss": 2.9085776805877686, |
|
"eval_runtime": 1.0387, |
|
"eval_samples_per_second": 23.106, |
|
"eval_steps_per_second": 0.963, |
|
"eval_wer": 1.0, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"grad_norm": 0.7445681691169739, |
|
"learning_rate": 0.00014688461538461537, |
|
"loss": 2.933, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 23.5, |
|
"grad_norm": 1.2040903568267822, |
|
"learning_rate": 0.0001463076923076923, |
|
"loss": 2.9217, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"grad_norm": 4.538419246673584, |
|
"learning_rate": 0.00014573076923076923, |
|
"loss": 2.9043, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 24.5, |
|
"grad_norm": 0.36169031262397766, |
|
"learning_rate": 0.00014515384615384615, |
|
"loss": 2.8554, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"grad_norm": 1.2133870124816895, |
|
"learning_rate": 0.00014457692307692305, |
|
"loss": 2.943, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_loss": 2.900446653366089, |
|
"eval_runtime": 1.0279, |
|
"eval_samples_per_second": 23.348, |
|
"eval_steps_per_second": 0.973, |
|
"eval_wer": 1.0, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 25.5, |
|
"grad_norm": 1.1455128192901611, |
|
"learning_rate": 0.00014399999999999998, |
|
"loss": 2.8775, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"grad_norm": 3.7162177562713623, |
|
"learning_rate": 0.0001434230769230769, |
|
"loss": 2.9401, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 26.5, |
|
"grad_norm": 4.095553398132324, |
|
"learning_rate": 0.00014284615384615383, |
|
"loss": 2.9053, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"grad_norm": 2.0302634239196777, |
|
"learning_rate": 0.00014226923076923075, |
|
"loss": 2.975, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 27.5, |
|
"grad_norm": 3.123234510421753, |
|
"learning_rate": 0.00014169230769230768, |
|
"loss": 2.9225, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 27.5, |
|
"eval_loss": 2.9469966888427734, |
|
"eval_runtime": 1.023, |
|
"eval_samples_per_second": 23.46, |
|
"eval_steps_per_second": 0.978, |
|
"eval_wer": 1.0, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"grad_norm": 0.886202871799469, |
|
"learning_rate": 0.0001411153846153846, |
|
"loss": 2.8783, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 28.5, |
|
"grad_norm": 0.48980531096458435, |
|
"learning_rate": 0.00014053846153846153, |
|
"loss": 2.8977, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"grad_norm": 2.4499869346618652, |
|
"learning_rate": 0.00013996153846153843, |
|
"loss": 2.9178, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 29.5, |
|
"grad_norm": 3.5155863761901855, |
|
"learning_rate": 0.00013938461538461536, |
|
"loss": 2.8955, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"grad_norm": 3.8240697383880615, |
|
"learning_rate": 0.00013880769230769228, |
|
"loss": 2.9897, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_loss": 2.9530646800994873, |
|
"eval_runtime": 1.0334, |
|
"eval_samples_per_second": 23.224, |
|
"eval_steps_per_second": 0.968, |
|
"eval_wer": 1.0, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 30.5, |
|
"grad_norm": 1.4881560802459717, |
|
"learning_rate": 0.0001382307692307692, |
|
"loss": 2.8732, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"grad_norm": 5.950206756591797, |
|
"learning_rate": 0.00013765384615384613, |
|
"loss": 2.9688, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 31.5, |
|
"grad_norm": 0.8825148940086365, |
|
"learning_rate": 0.00013707692307692306, |
|
"loss": 2.869, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"grad_norm": 1.6368755102157593, |
|
"learning_rate": 0.00013649999999999998, |
|
"loss": 2.8843, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 32.5, |
|
"grad_norm": 1.556404709815979, |
|
"learning_rate": 0.0001359230769230769, |
|
"loss": 2.8514, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 32.5, |
|
"eval_loss": 2.911478042602539, |
|
"eval_runtime": 1.021, |
|
"eval_samples_per_second": 23.505, |
|
"eval_steps_per_second": 0.979, |
|
"eval_wer": 1.0, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"grad_norm": 0.6802976131439209, |
|
"learning_rate": 0.00013534615384615384, |
|
"loss": 2.8542, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 33.5, |
|
"grad_norm": 0.5035978555679321, |
|
"learning_rate": 0.00013476923076923076, |
|
"loss": 2.9064, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"grad_norm": 1.6443456411361694, |
|
"learning_rate": 0.0001341923076923077, |
|
"loss": 2.8498, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 34.5, |
|
"grad_norm": 0.6262179017066956, |
|
"learning_rate": 0.0001336153846153846, |
|
"loss": 2.8368, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"grad_norm": 0.8266497850418091, |
|
"learning_rate": 0.00013303846153846154, |
|
"loss": 2.8681, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_loss": 2.9094789028167725, |
|
"eval_runtime": 1.0369, |
|
"eval_samples_per_second": 23.145, |
|
"eval_steps_per_second": 0.964, |
|
"eval_wer": 1.0, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 35.5, |
|
"grad_norm": 0.33677324652671814, |
|
"learning_rate": 0.00013246153846153846, |
|
"loss": 2.8163, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"grad_norm": 0.6221341490745544, |
|
"learning_rate": 0.0001318846153846154, |
|
"loss": 2.8746, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 36.5, |
|
"grad_norm": 0.5015878677368164, |
|
"learning_rate": 0.00013130769230769232, |
|
"loss": 2.8477, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"grad_norm": 0.6005992889404297, |
|
"learning_rate": 0.00013073076923076921, |
|
"loss": 2.838, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 37.5, |
|
"grad_norm": 0.4997330605983734, |
|
"learning_rate": 0.00013015384615384614, |
|
"loss": 2.8431, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 37.5, |
|
"eval_loss": 2.90104603767395, |
|
"eval_runtime": 1.017, |
|
"eval_samples_per_second": 23.599, |
|
"eval_steps_per_second": 0.983, |
|
"eval_wer": 1.0, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"grad_norm": 1.342210292816162, |
|
"learning_rate": 0.00012957692307692307, |
|
"loss": 2.8672, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 38.5, |
|
"grad_norm": 1.2935914993286133, |
|
"learning_rate": 0.000129, |
|
"loss": 2.848, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"grad_norm": 0.41487249732017517, |
|
"learning_rate": 0.00012842307692307692, |
|
"loss": 2.8244, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 39.5, |
|
"grad_norm": 1.1988450288772583, |
|
"learning_rate": 0.00012784615384615384, |
|
"loss": 2.8328, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"grad_norm": 2.2671468257904053, |
|
"learning_rate": 0.00012726923076923077, |
|
"loss": 2.8843, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_loss": 2.9156665802001953, |
|
"eval_runtime": 1.0579, |
|
"eval_samples_per_second": 22.686, |
|
"eval_steps_per_second": 0.945, |
|
"eval_wer": 1.0, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 40.5, |
|
"grad_norm": 1.003772497177124, |
|
"learning_rate": 0.0001266923076923077, |
|
"loss": 2.8312, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"grad_norm": 1.2402571439743042, |
|
"learning_rate": 0.00012611538461538462, |
|
"loss": 2.8291, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 41.5, |
|
"grad_norm": 0.29388442635536194, |
|
"learning_rate": 0.00012553846153846152, |
|
"loss": 2.8275, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"grad_norm": 0.9477460980415344, |
|
"learning_rate": 0.00012496153846153844, |
|
"loss": 2.8384, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 42.5, |
|
"grad_norm": 1.4519686698913574, |
|
"learning_rate": 0.00012438461538461537, |
|
"loss": 2.9357, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 42.5, |
|
"eval_loss": 2.902658462524414, |
|
"eval_runtime": 1.0363, |
|
"eval_samples_per_second": 23.158, |
|
"eval_steps_per_second": 0.965, |
|
"eval_wer": 1.0, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"grad_norm": 0.4391646087169647, |
|
"learning_rate": 0.0001238076923076923, |
|
"loss": 2.8395, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 43.5, |
|
"grad_norm": 2.1784377098083496, |
|
"learning_rate": 0.00012323076923076922, |
|
"loss": 2.8599, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"grad_norm": 0.9729048609733582, |
|
"learning_rate": 0.00012265384615384615, |
|
"loss": 2.8489, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 44.5, |
|
"grad_norm": 0.5243009328842163, |
|
"learning_rate": 0.00012207692307692307, |
|
"loss": 2.83, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"grad_norm": 0.7081323862075806, |
|
"learning_rate": 0.0001215, |
|
"loss": 2.8236, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_loss": 2.901521682739258, |
|
"eval_runtime": 1.0318, |
|
"eval_samples_per_second": 23.261, |
|
"eval_steps_per_second": 0.969, |
|
"eval_wer": 1.0, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 45.5, |
|
"grad_norm": 0.3105088770389557, |
|
"learning_rate": 0.00012092307692307691, |
|
"loss": 2.8189, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"grad_norm": 0.6120209097862244, |
|
"learning_rate": 0.00012034615384615384, |
|
"loss": 2.8075, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 46.5, |
|
"grad_norm": 0.996507465839386, |
|
"learning_rate": 0.00011976923076923076, |
|
"loss": 2.8318, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"grad_norm": 7.280458927154541, |
|
"learning_rate": 0.00011919230769230767, |
|
"loss": 2.871, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 47.5, |
|
"grad_norm": 0.8332684636116028, |
|
"learning_rate": 0.0001186153846153846, |
|
"loss": 2.8376, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 47.5, |
|
"eval_loss": 2.900068998336792, |
|
"eval_runtime": 1.0322, |
|
"eval_samples_per_second": 23.251, |
|
"eval_steps_per_second": 0.969, |
|
"eval_wer": 1.0, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"grad_norm": 0.6555355191230774, |
|
"learning_rate": 0.00011803846153846153, |
|
"loss": 2.7954, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 48.5, |
|
"grad_norm": 1.127866268157959, |
|
"learning_rate": 0.00011746153846153845, |
|
"loss": 2.8494, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"grad_norm": 0.7961714863777161, |
|
"learning_rate": 0.00011688461538461538, |
|
"loss": 2.8446, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 49.5, |
|
"grad_norm": 1.9832100868225098, |
|
"learning_rate": 0.00011630769230769229, |
|
"loss": 2.8353, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"grad_norm": 0.9229313731193542, |
|
"learning_rate": 0.00011573076923076922, |
|
"loss": 2.8148, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_loss": 2.8878333568573, |
|
"eval_runtime": 1.0279, |
|
"eval_samples_per_second": 23.349, |
|
"eval_steps_per_second": 0.973, |
|
"eval_wer": 1.0, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 50.5, |
|
"grad_norm": 2.113555669784546, |
|
"learning_rate": 0.00011515384615384614, |
|
"loss": 2.816, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"grad_norm": 2.10042667388916, |
|
"learning_rate": 0.00011457692307692307, |
|
"loss": 2.8544, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 51.5, |
|
"grad_norm": 0.48272839188575745, |
|
"learning_rate": 0.00011399999999999999, |
|
"loss": 2.8207, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"grad_norm": 0.9009172320365906, |
|
"learning_rate": 0.00011342307692307692, |
|
"loss": 2.8008, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 52.5, |
|
"grad_norm": 1.0341640710830688, |
|
"learning_rate": 0.00011284615384615384, |
|
"loss": 2.8057, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 52.5, |
|
"eval_loss": 2.8624706268310547, |
|
"eval_runtime": 1.037, |
|
"eval_samples_per_second": 23.144, |
|
"eval_steps_per_second": 0.964, |
|
"eval_wer": 1.0, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"grad_norm": 1.3395497798919678, |
|
"learning_rate": 0.00011226923076923077, |
|
"loss": 2.7866, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 53.5, |
|
"grad_norm": 0.3619355261325836, |
|
"learning_rate": 0.00011169230769230768, |
|
"loss": 2.7779, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"grad_norm": 1.4029289484024048, |
|
"learning_rate": 0.0001111153846153846, |
|
"loss": 2.789, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 54.5, |
|
"grad_norm": 0.29736635088920593, |
|
"learning_rate": 0.00011053846153846152, |
|
"loss": 2.7452, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"grad_norm": 1.7570823431015015, |
|
"learning_rate": 0.00010996153846153845, |
|
"loss": 2.7268, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_loss": 2.819674253463745, |
|
"eval_runtime": 1.0343, |
|
"eval_samples_per_second": 23.205, |
|
"eval_steps_per_second": 0.967, |
|
"eval_wer": 1.0, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 55.5, |
|
"grad_norm": 0.3762887418270111, |
|
"learning_rate": 0.00010938461538461537, |
|
"loss": 2.7224, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"grad_norm": 1.0835281610488892, |
|
"learning_rate": 0.0001088076923076923, |
|
"loss": 2.7022, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 56.5, |
|
"grad_norm": 1.721433401107788, |
|
"learning_rate": 0.00010823076923076922, |
|
"loss": 2.6927, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"grad_norm": 2.9872403144836426, |
|
"learning_rate": 0.00010765384615384615, |
|
"loss": 2.7924, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 57.5, |
|
"grad_norm": 0.5493649840354919, |
|
"learning_rate": 0.00010707692307692306, |
|
"loss": 2.6252, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 57.5, |
|
"eval_loss": 2.807591676712036, |
|
"eval_runtime": 1.0323, |
|
"eval_samples_per_second": 23.25, |
|
"eval_steps_per_second": 0.969, |
|
"eval_wer": 1.0, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"grad_norm": 1.2353851795196533, |
|
"learning_rate": 0.00010649999999999999, |
|
"loss": 2.6458, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 58.5, |
|
"grad_norm": 0.7240511775016785, |
|
"learning_rate": 0.00010592307692307691, |
|
"loss": 2.5911, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"grad_norm": 0.9982340335845947, |
|
"learning_rate": 0.00010534615384615384, |
|
"loss": 2.6489, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 59.5, |
|
"grad_norm": 0.6784680485725403, |
|
"learning_rate": 0.00010476923076923076, |
|
"loss": 2.5169, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"grad_norm": 1.9756778478622437, |
|
"learning_rate": 0.00010419230769230769, |
|
"loss": 2.5511, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_loss": 2.615316152572632, |
|
"eval_runtime": 1.0274, |
|
"eval_samples_per_second": 23.361, |
|
"eval_steps_per_second": 0.973, |
|
"eval_wer": 1.0056338028169014, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 60.5, |
|
"grad_norm": 1.3284317255020142, |
|
"learning_rate": 0.00010361538461538462, |
|
"loss": 2.4731, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"grad_norm": 1.3110464811325073, |
|
"learning_rate": 0.00010303846153846154, |
|
"loss": 2.4817, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 61.5, |
|
"grad_norm": 1.003812551498413, |
|
"learning_rate": 0.00010246153846153844, |
|
"loss": 2.3945, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"grad_norm": 1.148573398590088, |
|
"learning_rate": 0.00010188461538461537, |
|
"loss": 2.399, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 62.5, |
|
"grad_norm": 0.5585479736328125, |
|
"learning_rate": 0.00010130769230769229, |
|
"loss": 2.323, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 62.5, |
|
"eval_loss": 2.4444546699523926, |
|
"eval_runtime": 1.0272, |
|
"eval_samples_per_second": 23.365, |
|
"eval_steps_per_second": 0.974, |
|
"eval_wer": 1.0169014084507042, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"grad_norm": 2.2142958641052246, |
|
"learning_rate": 0.00010073076923076922, |
|
"loss": 2.2927, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 63.5, |
|
"grad_norm": 1.0168890953063965, |
|
"learning_rate": 0.00010015384615384614, |
|
"loss": 2.2108, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"grad_norm": 1.312639832496643, |
|
"learning_rate": 9.957692307692307e-05, |
|
"loss": 2.1866, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 64.5, |
|
"grad_norm": 0.5699294209480286, |
|
"learning_rate": 9.9e-05, |
|
"loss": 2.1114, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"grad_norm": 1.4273818731307983, |
|
"learning_rate": 9.842307692307692e-05, |
|
"loss": 2.1119, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"eval_loss": 2.2476181983947754, |
|
"eval_runtime": 1.0519, |
|
"eval_samples_per_second": 22.815, |
|
"eval_steps_per_second": 0.951, |
|
"eval_wer": 1.1183098591549296, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 65.5, |
|
"grad_norm": 0.5214980244636536, |
|
"learning_rate": 9.784615384615383e-05, |
|
"loss": 2.0414, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"grad_norm": 2.480297803878784, |
|
"learning_rate": 9.726923076923076e-05, |
|
"loss": 2.0609, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 66.5, |
|
"grad_norm": 3.5270726680755615, |
|
"learning_rate": 9.669230769230768e-05, |
|
"loss": 1.9963, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"grad_norm": 14.827882766723633, |
|
"learning_rate": 9.611538461538461e-05, |
|
"loss": 1.9333, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 67.5, |
|
"grad_norm": 1.1005451679229736, |
|
"learning_rate": 9.553846153846153e-05, |
|
"loss": 1.8514, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 67.5, |
|
"eval_loss": 2.173093318939209, |
|
"eval_runtime": 1.033, |
|
"eval_samples_per_second": 23.233, |
|
"eval_steps_per_second": 0.968, |
|
"eval_wer": 1.095774647887324, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"grad_norm": 1.5897767543792725, |
|
"learning_rate": 9.496153846153846e-05, |
|
"loss": 1.9986, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 68.5, |
|
"grad_norm": 0.8863438963890076, |
|
"learning_rate": 9.438461538461539e-05, |
|
"loss": 1.8067, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"grad_norm": 1.305874228477478, |
|
"learning_rate": 9.380769230769231e-05, |
|
"loss": 1.7975, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 69.5, |
|
"grad_norm": 0.6541560292243958, |
|
"learning_rate": 9.323076923076921e-05, |
|
"loss": 1.7655, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"grad_norm": 1.056104063987732, |
|
"learning_rate": 9.265384615384614e-05, |
|
"loss": 1.7094, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"eval_loss": 2.0642001628875732, |
|
"eval_runtime": 1.0377, |
|
"eval_samples_per_second": 23.129, |
|
"eval_steps_per_second": 0.964, |
|
"eval_wer": 1.0309859154929577, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 70.5, |
|
"grad_norm": 0.5228053331375122, |
|
"learning_rate": 9.207692307692306e-05, |
|
"loss": 1.6764, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 71.0, |
|
"grad_norm": 6.9655256271362305, |
|
"learning_rate": 9.149999999999999e-05, |
|
"loss": 1.7414, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 71.5, |
|
"grad_norm": 0.6360809206962585, |
|
"learning_rate": 9.092307692307691e-05, |
|
"loss": 1.6232, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"grad_norm": 1.2141180038452148, |
|
"learning_rate": 9.034615384615384e-05, |
|
"loss": 1.6497, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 72.5, |
|
"grad_norm": 0.874902606010437, |
|
"learning_rate": 8.976923076923077e-05, |
|
"loss": 1.6069, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 72.5, |
|
"eval_loss": 2.0792412757873535, |
|
"eval_runtime": 1.0243, |
|
"eval_samples_per_second": 23.431, |
|
"eval_steps_per_second": 0.976, |
|
"eval_wer": 1.0788732394366196, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 73.0, |
|
"grad_norm": 0.9335172176361084, |
|
"learning_rate": 8.919230769230769e-05, |
|
"loss": 1.4947, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 73.5, |
|
"grad_norm": 1.299177885055542, |
|
"learning_rate": 8.861538461538462e-05, |
|
"loss": 1.5304, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"grad_norm": 1.6317135095596313, |
|
"learning_rate": 8.803846153846153e-05, |
|
"loss": 1.5218, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 74.5, |
|
"grad_norm": 0.8083561062812805, |
|
"learning_rate": 8.746153846153845e-05, |
|
"loss": 1.5259, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"grad_norm": 1.805677890777588, |
|
"learning_rate": 8.688461538461538e-05, |
|
"loss": 1.4663, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"eval_loss": 2.0323963165283203, |
|
"eval_runtime": 1.0407, |
|
"eval_samples_per_second": 23.062, |
|
"eval_steps_per_second": 0.961, |
|
"eval_wer": 1.036619718309859, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 75.5, |
|
"grad_norm": 0.8463692665100098, |
|
"learning_rate": 8.63076923076923e-05, |
|
"loss": 1.4244, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"grad_norm": 2.091686248779297, |
|
"learning_rate": 8.573076923076923e-05, |
|
"loss": 1.3791, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 76.5, |
|
"grad_norm": 0.7040625810623169, |
|
"learning_rate": 8.515384615384614e-05, |
|
"loss": 1.3495, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 77.0, |
|
"grad_norm": 1.7725024223327637, |
|
"learning_rate": 8.457692307692307e-05, |
|
"loss": 1.3497, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 77.5, |
|
"grad_norm": 0.808942437171936, |
|
"learning_rate": 8.4e-05, |
|
"loss": 1.288, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 77.5, |
|
"eval_loss": 2.0642640590667725, |
|
"eval_runtime": 1.0443, |
|
"eval_samples_per_second": 22.982, |
|
"eval_steps_per_second": 0.958, |
|
"eval_wer": 1.0929577464788733, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"grad_norm": 3.843997001647949, |
|
"learning_rate": 8.342307692307691e-05, |
|
"loss": 1.2597, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 78.5, |
|
"grad_norm": 0.9082187414169312, |
|
"learning_rate": 8.284615384615383e-05, |
|
"loss": 1.2702, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 79.0, |
|
"grad_norm": 1.4159339666366577, |
|
"learning_rate": 8.226923076923076e-05, |
|
"loss": 1.2833, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 79.5, |
|
"grad_norm": 1.0848701000213623, |
|
"learning_rate": 8.169230769230768e-05, |
|
"loss": 1.2117, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"grad_norm": 2.275663137435913, |
|
"learning_rate": 8.111538461538461e-05, |
|
"loss": 1.262, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_loss": 2.084003210067749, |
|
"eval_runtime": 1.0408, |
|
"eval_samples_per_second": 23.059, |
|
"eval_steps_per_second": 0.961, |
|
"eval_wer": 1.076056338028169, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 80.5, |
|
"grad_norm": 0.9842613339424133, |
|
"learning_rate": 8.053846153846154e-05, |
|
"loss": 1.2799, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 81.0, |
|
"grad_norm": 20.336593627929688, |
|
"learning_rate": 7.996153846153846e-05, |
|
"loss": 1.2903, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 81.5, |
|
"grad_norm": 0.8291641473770142, |
|
"learning_rate": 7.938461538461539e-05, |
|
"loss": 1.1215, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 82.0, |
|
"grad_norm": 1.6971830129623413, |
|
"learning_rate": 7.88076923076923e-05, |
|
"loss": 1.1435, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 82.5, |
|
"grad_norm": 0.69861900806427, |
|
"learning_rate": 7.823076923076923e-05, |
|
"loss": 1.043, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 82.5, |
|
"eval_loss": 2.149214506149292, |
|
"eval_runtime": 1.0296, |
|
"eval_samples_per_second": 23.311, |
|
"eval_steps_per_second": 0.971, |
|
"eval_wer": 1.0901408450704226, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 83.0, |
|
"grad_norm": 1.7208884954452515, |
|
"learning_rate": 7.776923076923076e-05, |
|
"loss": 1.203, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 83.5, |
|
"grad_norm": 0.8559800982475281, |
|
"learning_rate": 7.719230769230768e-05, |
|
"loss": 1.0825, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"grad_norm": 1.6605381965637207, |
|
"learning_rate": 7.661538461538461e-05, |
|
"loss": 1.1121, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 84.5, |
|
"grad_norm": 1.077573537826538, |
|
"learning_rate": 7.603846153846154e-05, |
|
"loss": 1.0145, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 85.0, |
|
"grad_norm": 2.7091293334960938, |
|
"learning_rate": 7.546153846153846e-05, |
|
"loss": 1.0501, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 85.0, |
|
"eval_loss": 2.177476644515991, |
|
"eval_runtime": 1.0552, |
|
"eval_samples_per_second": 22.744, |
|
"eval_steps_per_second": 0.948, |
|
"eval_wer": 1.0591549295774647, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 85.5, |
|
"grad_norm": 1.3562541007995605, |
|
"learning_rate": 7.488461538461539e-05, |
|
"loss": 1.1098, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 86.0, |
|
"grad_norm": 2.6526386737823486, |
|
"learning_rate": 7.43076923076923e-05, |
|
"loss": 0.8642, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 86.5, |
|
"grad_norm": 1.1710244417190552, |
|
"learning_rate": 7.373076923076922e-05, |
|
"loss": 0.9004, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 87.0, |
|
"grad_norm": 2.9008164405822754, |
|
"learning_rate": 7.315384615384615e-05, |
|
"loss": 1.037, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 87.5, |
|
"grad_norm": 0.6306678056716919, |
|
"learning_rate": 7.257692307692308e-05, |
|
"loss": 0.9726, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 87.5, |
|
"eval_loss": 2.176731586456299, |
|
"eval_runtime": 1.0313, |
|
"eval_samples_per_second": 23.271, |
|
"eval_steps_per_second": 0.97, |
|
"eval_wer": 1.028169014084507, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"grad_norm": 1.6984366178512573, |
|
"learning_rate": 7.199999999999999e-05, |
|
"loss": 1.1201, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 88.5, |
|
"grad_norm": 0.803970992565155, |
|
"learning_rate": 7.142307692307691e-05, |
|
"loss": 0.908, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 89.0, |
|
"grad_norm": 2.103391408920288, |
|
"learning_rate": 7.084615384615384e-05, |
|
"loss": 0.8684, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 89.5, |
|
"grad_norm": 0.9575273990631104, |
|
"learning_rate": 7.026923076923077e-05, |
|
"loss": 0.9791, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 90.0, |
|
"grad_norm": 3.000880479812622, |
|
"learning_rate": 6.969230769230768e-05, |
|
"loss": 0.8079, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 90.0, |
|
"eval_loss": 2.1965668201446533, |
|
"eval_runtime": 1.0433, |
|
"eval_samples_per_second": 23.003, |
|
"eval_steps_per_second": 0.958, |
|
"eval_wer": 0.9943661971830986, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 90.5, |
|
"grad_norm": 0.6576473712921143, |
|
"learning_rate": 6.91153846153846e-05, |
|
"loss": 0.846, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 91.0, |
|
"grad_norm": 2.2526416778564453, |
|
"learning_rate": 6.853846153846153e-05, |
|
"loss": 0.8868, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 91.5, |
|
"grad_norm": 0.5678216814994812, |
|
"learning_rate": 6.796153846153845e-05, |
|
"loss": 0.8925, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 92.0, |
|
"grad_norm": 2.549266815185547, |
|
"learning_rate": 6.738461538461538e-05, |
|
"loss": 1.0163, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 92.5, |
|
"grad_norm": 0.7736966013908386, |
|
"learning_rate": 6.68076923076923e-05, |
|
"loss": 0.7198, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 92.5, |
|
"eval_loss": 2.2433066368103027, |
|
"eval_runtime": 1.0523, |
|
"eval_samples_per_second": 22.808, |
|
"eval_steps_per_second": 0.95, |
|
"eval_wer": 1.0028169014084507, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 93.0, |
|
"grad_norm": 3.742175817489624, |
|
"learning_rate": 6.623076923076923e-05, |
|
"loss": 1.011, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 93.5, |
|
"grad_norm": 0.748150110244751, |
|
"learning_rate": 6.565384615384616e-05, |
|
"loss": 0.7659, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 94.0, |
|
"grad_norm": 2.121845006942749, |
|
"learning_rate": 6.507692307692307e-05, |
|
"loss": 0.7862, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 94.5, |
|
"grad_norm": 0.7966519594192505, |
|
"learning_rate": 6.45e-05, |
|
"loss": 0.8271, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 95.0, |
|
"grad_norm": 1.6206731796264648, |
|
"learning_rate": 6.392307692307692e-05, |
|
"loss": 0.6312, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 95.0, |
|
"eval_loss": 2.309884786605835, |
|
"eval_runtime": 1.062, |
|
"eval_samples_per_second": 22.599, |
|
"eval_steps_per_second": 0.942, |
|
"eval_wer": 0.9971830985915493, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 95.5, |
|
"grad_norm": 3.240893602371216, |
|
"learning_rate": 6.334615384615385e-05, |
|
"loss": 0.723, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 96.0, |
|
"grad_norm": 1.4926756620407104, |
|
"learning_rate": 6.276923076923076e-05, |
|
"loss": 0.7344, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 96.5, |
|
"grad_norm": 0.8542086482048035, |
|
"learning_rate": 6.219230769230769e-05, |
|
"loss": 0.7649, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 97.0, |
|
"grad_norm": 2.2014851570129395, |
|
"learning_rate": 6.161538461538461e-05, |
|
"loss": 0.6969, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 97.5, |
|
"grad_norm": 0.6612327694892883, |
|
"learning_rate": 6.103846153846154e-05, |
|
"loss": 0.6336, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 97.5, |
|
"eval_loss": 2.3546626567840576, |
|
"eval_runtime": 1.0484, |
|
"eval_samples_per_second": 22.893, |
|
"eval_steps_per_second": 0.954, |
|
"eval_wer": 0.9971830985915493, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 98.0, |
|
"grad_norm": 2.117011547088623, |
|
"learning_rate": 6.0461538461538456e-05, |
|
"loss": 0.7537, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 98.5, |
|
"grad_norm": 8.142460823059082, |
|
"learning_rate": 5.988461538461538e-05, |
|
"loss": 0.6593, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 99.0, |
|
"grad_norm": 2.6468851566314697, |
|
"learning_rate": 5.93076923076923e-05, |
|
"loss": 0.8069, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 99.5, |
|
"grad_norm": 1.392821192741394, |
|
"learning_rate": 5.8730769230769226e-05, |
|
"loss": 0.746, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"grad_norm": 2.0805888175964355, |
|
"learning_rate": 5.8153846153846145e-05, |
|
"loss": 0.9073, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"eval_loss": 2.350856304168701, |
|
"eval_runtime": 1.0707, |
|
"eval_samples_per_second": 22.414, |
|
"eval_steps_per_second": 0.934, |
|
"eval_wer": 0.9943661971830986, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 100.5, |
|
"grad_norm": 18.686534881591797, |
|
"learning_rate": 5.757692307692307e-05, |
|
"loss": 0.7907, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 101.0, |
|
"grad_norm": 1.7688676118850708, |
|
"learning_rate": 5.6999999999999996e-05, |
|
"loss": 0.5693, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 101.5, |
|
"grad_norm": 0.9006216526031494, |
|
"learning_rate": 5.642307692307692e-05, |
|
"loss": 0.6408, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 102.0, |
|
"grad_norm": 2.382704496383667, |
|
"learning_rate": 5.584615384615384e-05, |
|
"loss": 0.7203, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 102.5, |
|
"grad_norm": 0.8852857351303101, |
|
"learning_rate": 5.526923076923076e-05, |
|
"loss": 0.6431, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 102.5, |
|
"eval_loss": 2.4202942848205566, |
|
"eval_runtime": 1.0529, |
|
"eval_samples_per_second": 22.794, |
|
"eval_steps_per_second": 0.95, |
|
"eval_wer": 1.0056338028169014, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 103.0, |
|
"grad_norm": 3.3610403537750244, |
|
"learning_rate": 5.4692307692307686e-05, |
|
"loss": 0.6476, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 103.5, |
|
"grad_norm": 0.8738270401954651, |
|
"learning_rate": 5.411538461538461e-05, |
|
"loss": 0.5492, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 104.0, |
|
"grad_norm": 2.4251339435577393, |
|
"learning_rate": 5.353846153846153e-05, |
|
"loss": 0.6005, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 104.5, |
|
"grad_norm": 0.7935536503791809, |
|
"learning_rate": 5.2961538461538456e-05, |
|
"loss": 0.5855, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 105.0, |
|
"grad_norm": 2.805385112762451, |
|
"learning_rate": 5.238461538461538e-05, |
|
"loss": 0.62, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 105.0, |
|
"eval_loss": 2.3933348655700684, |
|
"eval_runtime": 1.0674, |
|
"eval_samples_per_second": 22.485, |
|
"eval_steps_per_second": 0.937, |
|
"eval_wer": 0.9746478873239437, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 105.5, |
|
"grad_norm": 1.2249245643615723, |
|
"learning_rate": 5.180769230769231e-05, |
|
"loss": 0.652, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 106.0, |
|
"grad_norm": 1.2247533798217773, |
|
"learning_rate": 5.123076923076922e-05, |
|
"loss": 0.6108, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 106.5, |
|
"grad_norm": 0.8812918663024902, |
|
"learning_rate": 5.0653846153846146e-05, |
|
"loss": 0.6453, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 107.0, |
|
"grad_norm": 2.7638535499572754, |
|
"learning_rate": 5.007692307692307e-05, |
|
"loss": 0.568, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 107.5, |
|
"grad_norm": 1.3182368278503418, |
|
"learning_rate": 4.95e-05, |
|
"loss": 0.708, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 107.5, |
|
"eval_loss": 2.4381346702575684, |
|
"eval_runtime": 1.061, |
|
"eval_samples_per_second": 22.619, |
|
"eval_steps_per_second": 0.942, |
|
"eval_wer": 0.9690140845070423, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 108.0, |
|
"grad_norm": 2.4760406017303467, |
|
"learning_rate": 4.8923076923076916e-05, |
|
"loss": 0.6171, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 108.5, |
|
"grad_norm": 0.5409008264541626, |
|
"learning_rate": 4.834615384615384e-05, |
|
"loss": 0.5542, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 109.0, |
|
"grad_norm": 1.675410509109497, |
|
"learning_rate": 4.776923076923077e-05, |
|
"loss": 0.6491, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 109.5, |
|
"grad_norm": 0.8941754698753357, |
|
"learning_rate": 4.719230769230769e-05, |
|
"loss": 0.7266, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 110.0, |
|
"grad_norm": 1.9851211309432983, |
|
"learning_rate": 4.6615384615384605e-05, |
|
"loss": 0.6729, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 110.0, |
|
"eval_loss": 2.474308967590332, |
|
"eval_runtime": 1.0636, |
|
"eval_samples_per_second": 22.564, |
|
"eval_steps_per_second": 0.94, |
|
"eval_wer": 1.0, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 110.5, |
|
"grad_norm": 0.677306592464447, |
|
"learning_rate": 4.603846153846153e-05, |
|
"loss": 0.7625, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 111.0, |
|
"grad_norm": 2.572356700897217, |
|
"learning_rate": 4.546153846153846e-05, |
|
"loss": 0.5146, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 111.5, |
|
"grad_norm": 1.2789101600646973, |
|
"learning_rate": 4.488461538461538e-05, |
|
"loss": 0.5504, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 112.0, |
|
"grad_norm": 2.3920390605926514, |
|
"learning_rate": 4.430769230769231e-05, |
|
"loss": 0.4821, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 112.5, |
|
"grad_norm": 1.219436764717102, |
|
"learning_rate": 4.373076923076923e-05, |
|
"loss": 0.5779, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 112.5, |
|
"eval_loss": 2.492933988571167, |
|
"eval_runtime": 1.0274, |
|
"eval_samples_per_second": 23.36, |
|
"eval_steps_per_second": 0.973, |
|
"eval_wer": 0.9549295774647887, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 113.0, |
|
"grad_norm": 3.558155059814453, |
|
"learning_rate": 4.315384615384615e-05, |
|
"loss": 0.4743, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 113.5, |
|
"grad_norm": 0.9398171901702881, |
|
"learning_rate": 4.257692307692307e-05, |
|
"loss": 0.493, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 114.0, |
|
"grad_norm": 4.514529705047607, |
|
"learning_rate": 4.2e-05, |
|
"loss": 0.4341, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 114.5, |
|
"grad_norm": 1.015120029449463, |
|
"learning_rate": 4.142307692307692e-05, |
|
"loss": 0.5069, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 115.0, |
|
"grad_norm": 2.043063163757324, |
|
"learning_rate": 4.084615384615384e-05, |
|
"loss": 0.6303, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 115.0, |
|
"eval_loss": 2.5056331157684326, |
|
"eval_runtime": 1.0408, |
|
"eval_samples_per_second": 23.06, |
|
"eval_steps_per_second": 0.961, |
|
"eval_wer": 0.923943661971831, |
|
"step": 1150 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 1500, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 150, |
|
"save_steps": 25, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.771505223996499e+18, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|