|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 198.91205211726384, |
|
"global_step": 91500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 5.988000000000001e-05, |
|
"loss": 6.0553, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"eval_cer": 0.7651548792050215, |
|
"eval_loss": 3.0141708850860596, |
|
"eval_runtime": 204.505, |
|
"eval_samples_per_second": 21.858, |
|
"eval_steps_per_second": 1.369, |
|
"eval_wer": 0.9928897917724734, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 6.52, |
|
"learning_rate": 7.911466666666667e-05, |
|
"loss": 2.3839, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 6.52, |
|
"eval_cer": 0.19668038836815765, |
|
"eval_loss": 0.5387711524963379, |
|
"eval_runtime": 202.7107, |
|
"eval_samples_per_second": 22.051, |
|
"eval_steps_per_second": 1.381, |
|
"eval_wer": 0.45625045345715737, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 9.78, |
|
"learning_rate": 7.778133333333334e-05, |
|
"loss": 1.3597, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 9.78, |
|
"eval_cer": 0.15966779880705206, |
|
"eval_loss": 0.4122415781021118, |
|
"eval_runtime": 202.9557, |
|
"eval_samples_per_second": 22.025, |
|
"eval_steps_per_second": 1.38, |
|
"eval_wer": 0.36247551331350214, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 13.04, |
|
"learning_rate": 7.644800000000001e-05, |
|
"loss": 1.2294, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 13.04, |
|
"eval_cer": 0.15757353911885075, |
|
"eval_loss": 0.36426475644111633, |
|
"eval_runtime": 204.7716, |
|
"eval_samples_per_second": 21.829, |
|
"eval_steps_per_second": 1.367, |
|
"eval_wer": 0.3416164840745846, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 16.3, |
|
"learning_rate": 7.511644444444445e-05, |
|
"loss": 1.1672, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 16.3, |
|
"eval_cer": 0.1442218835135557, |
|
"eval_loss": 0.34255123138427734, |
|
"eval_runtime": 202.492, |
|
"eval_samples_per_second": 22.075, |
|
"eval_steps_per_second": 1.383, |
|
"eval_wer": 0.3202314445331205, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 19.56, |
|
"learning_rate": 7.378311111111112e-05, |
|
"loss": 1.1288, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 19.56, |
|
"eval_cer": 0.1378970992403058, |
|
"eval_loss": 0.31974485516548157, |
|
"eval_runtime": 204.9353, |
|
"eval_samples_per_second": 21.812, |
|
"eval_steps_per_second": 1.366, |
|
"eval_wer": 0.3065551766669085, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 22.83, |
|
"learning_rate": 7.245155555555557e-05, |
|
"loss": 1.0864, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 22.83, |
|
"eval_cer": 0.13394260888350154, |
|
"eval_loss": 0.30638357996940613, |
|
"eval_runtime": 205.7125, |
|
"eval_samples_per_second": 21.729, |
|
"eval_steps_per_second": 1.361, |
|
"eval_wer": 0.29559965174490316, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 26.09, |
|
"learning_rate": 7.111822222222222e-05, |
|
"loss": 1.0493, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 26.09, |
|
"eval_cer": 0.1299701162944205, |
|
"eval_loss": 0.29015836119651794, |
|
"eval_runtime": 205.4201, |
|
"eval_samples_per_second": 21.76, |
|
"eval_steps_per_second": 1.363, |
|
"eval_wer": 0.2849162011173184, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 29.35, |
|
"learning_rate": 6.97848888888889e-05, |
|
"loss": 1.0193, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 29.35, |
|
"eval_cer": 0.12554156715432713, |
|
"eval_loss": 0.27857705950737, |
|
"eval_runtime": 204.6144, |
|
"eval_samples_per_second": 21.846, |
|
"eval_steps_per_second": 1.368, |
|
"eval_wer": 0.2710222738155699, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 32.61, |
|
"learning_rate": 6.845333333333334e-05, |
|
"loss": 0.991, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 32.61, |
|
"eval_cer": 0.12242118022634807, |
|
"eval_loss": 0.27255550026893616, |
|
"eval_runtime": 205.3956, |
|
"eval_samples_per_second": 21.763, |
|
"eval_steps_per_second": 1.363, |
|
"eval_wer": 0.2632046724225495, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 35.87, |
|
"learning_rate": 6.712000000000001e-05, |
|
"loss": 0.965, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 35.87, |
|
"eval_cer": 0.1227452204073305, |
|
"eval_loss": 0.2663341462612152, |
|
"eval_runtime": 207.401, |
|
"eval_samples_per_second": 21.552, |
|
"eval_steps_per_second": 1.35, |
|
"eval_wer": 0.2638395124428644, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 39.13, |
|
"learning_rate": 6.578755555555556e-05, |
|
"loss": 0.9443, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 39.13, |
|
"eval_cer": 0.11871272037732679, |
|
"eval_loss": 0.2498483508825302, |
|
"eval_runtime": 206.2679, |
|
"eval_samples_per_second": 21.671, |
|
"eval_steps_per_second": 1.357, |
|
"eval_wer": 0.2540629761300152, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 42.39, |
|
"learning_rate": 6.445422222222224e-05, |
|
"loss": 0.9258, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 42.39, |
|
"eval_cer": 0.11627641827586621, |
|
"eval_loss": 0.24673064053058624, |
|
"eval_runtime": 211.3131, |
|
"eval_samples_per_second": 21.153, |
|
"eval_steps_per_second": 1.325, |
|
"eval_wer": 0.24822244794311835, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 45.65, |
|
"learning_rate": 6.312088888888889e-05, |
|
"loss": 0.9062, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 45.65, |
|
"eval_cer": 0.11399013477671231, |
|
"eval_loss": 0.23752263188362122, |
|
"eval_runtime": 210.4679, |
|
"eval_samples_per_second": 21.238, |
|
"eval_steps_per_second": 1.33, |
|
"eval_wer": 0.24243633461510555, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 48.91, |
|
"learning_rate": 6.178844444444445e-05, |
|
"loss": 0.8797, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 48.91, |
|
"eval_cer": 0.11319803655653302, |
|
"eval_loss": 0.23775836825370789, |
|
"eval_runtime": 200.5119, |
|
"eval_samples_per_second": 22.293, |
|
"eval_steps_per_second": 1.396, |
|
"eval_wer": 0.24310745120800986, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 52.17, |
|
"learning_rate": 6.0456000000000005e-05, |
|
"loss": 0.8691, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 52.17, |
|
"eval_cer": 0.11229792494269289, |
|
"eval_loss": 0.23617656528949738, |
|
"eval_runtime": 200.8473, |
|
"eval_samples_per_second": 22.256, |
|
"eval_steps_per_second": 1.394, |
|
"eval_wer": 0.23674091271856634, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 55.43, |
|
"learning_rate": 5.912266666666667e-05, |
|
"loss": 0.8554, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 55.43, |
|
"eval_cer": 0.11031767939224464, |
|
"eval_loss": 0.23144488036632538, |
|
"eval_runtime": 200.5411, |
|
"eval_samples_per_second": 22.29, |
|
"eval_steps_per_second": 1.396, |
|
"eval_wer": 0.23295001088297176, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 58.69, |
|
"learning_rate": 5.778933333333334e-05, |
|
"loss": 0.8393, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 58.69, |
|
"eval_cer": 0.10892550676283859, |
|
"eval_loss": 0.22345688939094543, |
|
"eval_runtime": 199.926, |
|
"eval_samples_per_second": 22.358, |
|
"eval_steps_per_second": 1.401, |
|
"eval_wer": 0.22779873757527389, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 61.96, |
|
"learning_rate": 5.645688888888889e-05, |
|
"loss": 0.8268, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 61.96, |
|
"eval_cer": 0.10809140333401342, |
|
"eval_loss": 0.21886605024337769, |
|
"eval_runtime": 200.5151, |
|
"eval_samples_per_second": 22.293, |
|
"eval_steps_per_second": 1.396, |
|
"eval_wer": 0.2251324094899514, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 65.22, |
|
"learning_rate": 5.512355555555556e-05, |
|
"loss": 0.8116, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 65.22, |
|
"eval_cer": 0.10730530585792639, |
|
"eval_loss": 0.22185355424880981, |
|
"eval_runtime": 199.8759, |
|
"eval_samples_per_second": 22.364, |
|
"eval_steps_per_second": 1.401, |
|
"eval_wer": 0.2221395922513241, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 68.48, |
|
"learning_rate": 5.3791111111111116e-05, |
|
"loss": 0.7981, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 68.48, |
|
"eval_cer": 0.10677724037780685, |
|
"eval_loss": 0.21808896958827972, |
|
"eval_runtime": 201.4847, |
|
"eval_samples_per_second": 22.185, |
|
"eval_steps_per_second": 1.39, |
|
"eval_wer": 0.221831241384314, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 71.74, |
|
"learning_rate": 5.245777777777778e-05, |
|
"loss": 0.7839, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 71.74, |
|
"eval_cer": 0.10549308114206161, |
|
"eval_loss": 0.21510420739650726, |
|
"eval_runtime": 200.7972, |
|
"eval_samples_per_second": 22.261, |
|
"eval_steps_per_second": 1.394, |
|
"eval_wer": 0.2175143292461728, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"learning_rate": 5.112533333333334e-05, |
|
"loss": 0.7714, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"eval_cer": 0.10464097548095964, |
|
"eval_loss": 0.21727131307125092, |
|
"eval_runtime": 200.0575, |
|
"eval_samples_per_second": 22.344, |
|
"eval_steps_per_second": 1.4, |
|
"eval_wer": 0.21642603206849018, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 78.26, |
|
"learning_rate": 4.9792e-05, |
|
"loss": 0.7582, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 78.26, |
|
"eval_cer": 0.10387288023714941, |
|
"eval_loss": 0.21982263028621674, |
|
"eval_runtime": 201.7359, |
|
"eval_samples_per_second": 22.158, |
|
"eval_steps_per_second": 1.388, |
|
"eval_wer": 0.2148298628745556, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 81.52, |
|
"learning_rate": 4.845955555555556e-05, |
|
"loss": 0.7486, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 81.52, |
|
"eval_cer": 0.10329080806019947, |
|
"eval_loss": 0.21195770800113678, |
|
"eval_runtime": 199.1645, |
|
"eval_samples_per_second": 22.444, |
|
"eval_steps_per_second": 1.406, |
|
"eval_wer": 0.21250816222883262, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 84.78, |
|
"learning_rate": 4.7127111111111114e-05, |
|
"loss": 0.7326, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 84.78, |
|
"eval_cer": 0.10266072993051138, |
|
"eval_loss": 0.21248655021190643, |
|
"eval_runtime": 199.3582, |
|
"eval_samples_per_second": 22.422, |
|
"eval_steps_per_second": 1.405, |
|
"eval_wer": 0.2124537473699485, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 88.04, |
|
"learning_rate": 4.5793777777777784e-05, |
|
"loss": 0.7263, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 88.04, |
|
"eval_cer": 0.1012565558129208, |
|
"eval_loss": 0.20737077295780182, |
|
"eval_runtime": 206.4127, |
|
"eval_samples_per_second": 21.656, |
|
"eval_steps_per_second": 1.357, |
|
"eval_wer": 0.2079735906551549, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 91.3, |
|
"learning_rate": 4.446044444444445e-05, |
|
"loss": 0.7142, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 91.3, |
|
"eval_cer": 0.10114254167516772, |
|
"eval_loss": 0.21454070508480072, |
|
"eval_runtime": 204.8009, |
|
"eval_samples_per_second": 21.826, |
|
"eval_steps_per_second": 1.367, |
|
"eval_wer": 0.2069397083363564, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 94.56, |
|
"learning_rate": 4.312711111111111e-05, |
|
"loss": 0.7044, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 94.56, |
|
"eval_cer": 0.10105253051378371, |
|
"eval_loss": 0.20794141292572021, |
|
"eval_runtime": 205.8959, |
|
"eval_samples_per_second": 21.71, |
|
"eval_steps_per_second": 1.36, |
|
"eval_wer": 0.2077922077922078, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 97.83, |
|
"learning_rate": 4.179555555555556e-05, |
|
"loss": 0.6975, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 97.83, |
|
"eval_cer": 0.0996543571402854, |
|
"eval_loss": 0.20573750138282776, |
|
"eval_runtime": 205.1157, |
|
"eval_samples_per_second": 21.793, |
|
"eval_steps_per_second": 1.365, |
|
"eval_wer": 0.2029855619241094, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 101.09, |
|
"learning_rate": 4.0462222222222225e-05, |
|
"loss": 0.683, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 101.09, |
|
"eval_cer": 0.09906628421924318, |
|
"eval_loss": 0.20465880632400513, |
|
"eval_runtime": 205.9334, |
|
"eval_samples_per_second": 21.706, |
|
"eval_steps_per_second": 1.36, |
|
"eval_wer": 0.20209678589566857, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 104.35, |
|
"learning_rate": 3.913066666666667e-05, |
|
"loss": 0.6763, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 104.35, |
|
"eval_cer": 0.0991922998451808, |
|
"eval_loss": 0.20821230113506317, |
|
"eval_runtime": 207.1439, |
|
"eval_samples_per_second": 21.579, |
|
"eval_steps_per_second": 1.352, |
|
"eval_wer": 0.20189726474642675, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 107.61, |
|
"learning_rate": 3.779733333333333e-05, |
|
"loss": 0.6687, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 107.61, |
|
"eval_cer": 0.09791414135352784, |
|
"eval_loss": 0.20512840151786804, |
|
"eval_runtime": 206.0617, |
|
"eval_samples_per_second": 21.693, |
|
"eval_steps_per_second": 1.359, |
|
"eval_wer": 0.19807008633824277, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 110.87, |
|
"learning_rate": 3.6464e-05, |
|
"loss": 0.6573, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 110.87, |
|
"eval_cer": 0.09767411158983715, |
|
"eval_loss": 0.20361328125, |
|
"eval_runtime": 205.3176, |
|
"eval_samples_per_second": 21.771, |
|
"eval_steps_per_second": 1.364, |
|
"eval_wer": 0.197616629180875, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 114.13, |
|
"learning_rate": 3.513155555555556e-05, |
|
"loss": 0.6497, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 114.13, |
|
"eval_cer": 0.09658197616504446, |
|
"eval_loss": 0.2067527323961258, |
|
"eval_runtime": 206.5547, |
|
"eval_samples_per_second": 21.641, |
|
"eval_steps_per_second": 1.356, |
|
"eval_wer": 0.19573024740622505, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 117.39, |
|
"learning_rate": 3.379822222222222e-05, |
|
"loss": 0.6406, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 117.39, |
|
"eval_cer": 0.09563985934255848, |
|
"eval_loss": 0.20097151398658752, |
|
"eval_runtime": 205.4638, |
|
"eval_samples_per_second": 21.756, |
|
"eval_steps_per_second": 1.363, |
|
"eval_wer": 0.1919212072843358, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 120.65, |
|
"learning_rate": 3.2466666666666665e-05, |
|
"loss": 0.6313, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 120.65, |
|
"eval_cer": 0.09662398137369034, |
|
"eval_loss": 0.20263761281967163, |
|
"eval_runtime": 210.159, |
|
"eval_samples_per_second": 21.27, |
|
"eval_steps_per_second": 1.332, |
|
"eval_wer": 0.19520423710367846, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 123.91, |
|
"learning_rate": 3.1133333333333336e-05, |
|
"loss": 0.6261, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 123.91, |
|
"eval_cer": 0.09623993375178522, |
|
"eval_loss": 0.20339860022068024, |
|
"eval_runtime": 208.9616, |
|
"eval_samples_per_second": 21.391, |
|
"eval_steps_per_second": 1.34, |
|
"eval_wer": 0.1936080679097439, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 127.17, |
|
"learning_rate": 2.9800000000000003e-05, |
|
"loss": 0.6179, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 127.17, |
|
"eval_cer": 0.09569986678348115, |
|
"eval_loss": 0.20739464461803436, |
|
"eval_runtime": 215.3433, |
|
"eval_samples_per_second": 20.758, |
|
"eval_steps_per_second": 1.3, |
|
"eval_wer": 0.1929550896031343, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 130.43, |
|
"learning_rate": 2.846755555555556e-05, |
|
"loss": 0.6082, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 130.43, |
|
"eval_cer": 0.09512979609471574, |
|
"eval_loss": 0.20194286108016968, |
|
"eval_runtime": 213.4283, |
|
"eval_samples_per_second": 20.944, |
|
"eval_steps_per_second": 1.312, |
|
"eval_wer": 0.1902887615178118, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 133.69, |
|
"learning_rate": 2.7134222222222226e-05, |
|
"loss": 0.6039, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 133.69, |
|
"eval_cer": 0.09520780576791522, |
|
"eval_loss": 0.20227235555648804, |
|
"eval_runtime": 209.2948, |
|
"eval_samples_per_second": 21.357, |
|
"eval_steps_per_second": 1.338, |
|
"eval_wer": 0.1912682289777262, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 136.96, |
|
"learning_rate": 2.580177777777778e-05, |
|
"loss": 0.5948, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 136.96, |
|
"eval_cer": 0.09544183478751365, |
|
"eval_loss": 0.20598873496055603, |
|
"eval_runtime": 207.4517, |
|
"eval_samples_per_second": 21.547, |
|
"eval_steps_per_second": 1.35, |
|
"eval_wer": 0.19108684611477908, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 140.22, |
|
"learning_rate": 2.4468444444444447e-05, |
|
"loss": 0.5867, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 140.22, |
|
"eval_cer": 0.09470374326416475, |
|
"eval_loss": 0.20384302735328674, |
|
"eval_runtime": 209.3468, |
|
"eval_samples_per_second": 21.352, |
|
"eval_steps_per_second": 1.337, |
|
"eval_wer": 0.18905535804977147, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 143.48, |
|
"learning_rate": 2.3136000000000003e-05, |
|
"loss": 0.5804, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 143.48, |
|
"eval_cer": 0.09508779088606988, |
|
"eval_loss": 0.2093733698129654, |
|
"eval_runtime": 212.8045, |
|
"eval_samples_per_second": 21.005, |
|
"eval_steps_per_second": 1.316, |
|
"eval_wer": 0.19039759123558006, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 146.74, |
|
"learning_rate": 2.1803555555555557e-05, |
|
"loss": 0.5757, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 146.74, |
|
"eval_cer": 0.09395365025263133, |
|
"eval_loss": 0.20662076771259308, |
|
"eval_runtime": 210.0316, |
|
"eval_samples_per_second": 21.283, |
|
"eval_steps_per_second": 1.333, |
|
"eval_wer": 0.18718711456141623, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 150.0, |
|
"learning_rate": 2.0470222222222224e-05, |
|
"loss": 0.5684, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 150.0, |
|
"eval_cer": 0.09401965843764627, |
|
"eval_loss": 0.20758455991744995, |
|
"eval_runtime": 210.1895, |
|
"eval_samples_per_second": 21.267, |
|
"eval_steps_per_second": 1.332, |
|
"eval_wer": 0.1878038162954364, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 153.26, |
|
"learning_rate": 1.913777777777778e-05, |
|
"loss": 0.5622, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 153.26, |
|
"eval_cer": 0.09470974400825702, |
|
"eval_loss": 0.21354267001152039, |
|
"eval_runtime": 210.2099, |
|
"eval_samples_per_second": 21.264, |
|
"eval_steps_per_second": 1.332, |
|
"eval_wer": 0.1891641877675397, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 156.52, |
|
"learning_rate": 1.7805333333333334e-05, |
|
"loss": 0.5572, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 156.52, |
|
"eval_cer": 0.09412167108721481, |
|
"eval_loss": 0.21093089878559113, |
|
"eval_runtime": 208.6793, |
|
"eval_samples_per_second": 21.42, |
|
"eval_steps_per_second": 1.342, |
|
"eval_wer": 0.18854748603351956, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 159.78, |
|
"learning_rate": 1.647288888888889e-05, |
|
"loss": 0.5513, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 159.78, |
|
"eval_cer": 0.09389964355580092, |
|
"eval_loss": 0.20995952188968658, |
|
"eval_runtime": 211.9593, |
|
"eval_samples_per_second": 21.089, |
|
"eval_steps_per_second": 1.321, |
|
"eval_wer": 0.1872959442791845, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 163.04, |
|
"learning_rate": 1.5139555555555556e-05, |
|
"loss": 0.5463, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 163.04, |
|
"eval_cer": 0.09368961751257156, |
|
"eval_loss": 0.2099747508764267, |
|
"eval_runtime": 206.1662, |
|
"eval_samples_per_second": 21.682, |
|
"eval_steps_per_second": 1.358, |
|
"eval_wer": 0.18666110425886961, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 166.3, |
|
"learning_rate": 1.3807111111111111e-05, |
|
"loss": 0.5402, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 166.3, |
|
"eval_cer": 0.0937376234653097, |
|
"eval_loss": 0.2125527560710907, |
|
"eval_runtime": 206.4719, |
|
"eval_samples_per_second": 21.649, |
|
"eval_steps_per_second": 1.356, |
|
"eval_wer": 0.18675179569034317, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 169.56, |
|
"learning_rate": 1.2473777777777778e-05, |
|
"loss": 0.5361, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 169.56, |
|
"eval_cer": 0.09316155203245202, |
|
"eval_loss": 0.20593136548995972, |
|
"eval_runtime": 204.7532, |
|
"eval_samples_per_second": 21.831, |
|
"eval_steps_per_second": 1.368, |
|
"eval_wer": 0.18530073278676631, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 172.83, |
|
"learning_rate": 1.1140444444444445e-05, |
|
"loss": 0.5322, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 172.83, |
|
"eval_cer": 0.09320955798519016, |
|
"eval_loss": 0.20699192583560944, |
|
"eval_runtime": 206.6944, |
|
"eval_samples_per_second": 21.626, |
|
"eval_steps_per_second": 1.355, |
|
"eval_wer": 0.1852825945004716, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 176.09, |
|
"learning_rate": 9.807111111111112e-06, |
|
"loss": 0.5274, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 176.09, |
|
"eval_cer": 0.0925194724145794, |
|
"eval_loss": 0.2075292468070984, |
|
"eval_runtime": 205.8591, |
|
"eval_samples_per_second": 21.714, |
|
"eval_steps_per_second": 1.36, |
|
"eval_wer": 0.18423057389537836, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 179.35, |
|
"learning_rate": 8.474666666666667e-06, |
|
"loss": 0.5229, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 179.35, |
|
"eval_cer": 0.09307154087106802, |
|
"eval_loss": 0.2078840136528015, |
|
"eval_runtime": 207.0411, |
|
"eval_samples_per_second": 21.59, |
|
"eval_steps_per_second": 1.352, |
|
"eval_wer": 0.18519190306899805, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 182.61, |
|
"learning_rate": 7.141333333333333e-06, |
|
"loss": 0.5167, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 182.61, |
|
"eval_cer": 0.09254347539094848, |
|
"eval_loss": 0.2052079439163208, |
|
"eval_runtime": 205.153, |
|
"eval_samples_per_second": 21.789, |
|
"eval_steps_per_second": 1.365, |
|
"eval_wer": 0.1834869041572952, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 185.87, |
|
"learning_rate": 5.80888888888889e-06, |
|
"loss": 0.515, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 185.87, |
|
"eval_cer": 0.0924474634854722, |
|
"eval_loss": 0.20651493966579437, |
|
"eval_runtime": 206.3346, |
|
"eval_samples_per_second": 21.664, |
|
"eval_steps_per_second": 1.357, |
|
"eval_wer": 0.1834869041572952, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 189.13, |
|
"learning_rate": 4.476444444444445e-06, |
|
"loss": 0.5116, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 189.13, |
|
"eval_cer": 0.09247746720593354, |
|
"eval_loss": 0.20912431180477142, |
|
"eval_runtime": 206.6532, |
|
"eval_samples_per_second": 21.63, |
|
"eval_steps_per_second": 1.355, |
|
"eval_wer": 0.18323296814916926, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 192.39, |
|
"learning_rate": 3.143111111111111e-06, |
|
"loss": 0.5097, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 192.39, |
|
"eval_cer": 0.0925194724145794, |
|
"eval_loss": 0.2088840752840042, |
|
"eval_runtime": 206.9892, |
|
"eval_samples_per_second": 21.595, |
|
"eval_steps_per_second": 1.353, |
|
"eval_wer": 0.18355945730247406, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 195.65, |
|
"learning_rate": 1.8106666666666667e-06, |
|
"loss": 0.5056, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 195.65, |
|
"eval_cer": 0.09236345306818046, |
|
"eval_loss": 0.2069932371377945, |
|
"eval_runtime": 205.214, |
|
"eval_samples_per_second": 21.782, |
|
"eval_steps_per_second": 1.364, |
|
"eval_wer": 0.1829608938547486, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 198.91, |
|
"learning_rate": 4.782222222222222e-07, |
|
"loss": 0.5068, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 198.91, |
|
"eval_cer": 0.09240545827682632, |
|
"eval_loss": 0.20784138143062592, |
|
"eval_runtime": 205.9346, |
|
"eval_samples_per_second": 21.706, |
|
"eval_steps_per_second": 1.36, |
|
"eval_wer": 0.183124138431401, |
|
"step": 91500 |
|
} |
|
], |
|
"max_steps": 92000, |
|
"num_train_epochs": 200, |
|
"total_flos": 1.0091178676342205e+21, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|