|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 100.0, |
|
"global_step": 13100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 0.0002969690721649484, |
|
"loss": 1.0671, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"eval_loss": 0.3079470694065094, |
|
"eval_runtime": 237.7627, |
|
"eval_samples_per_second": 35.073, |
|
"eval_steps_per_second": 4.387, |
|
"eval_wer": 0.2752062328139322, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 0.00029078350515463917, |
|
"loss": 0.6433, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"eval_loss": 0.27281925082206726, |
|
"eval_runtime": 238.3892, |
|
"eval_samples_per_second": 34.981, |
|
"eval_steps_per_second": 4.375, |
|
"eval_wer": 0.2847616865261228, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 6.12, |
|
"learning_rate": 0.0002845979381443299, |
|
"loss": 0.5687, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 6.12, |
|
"eval_loss": 0.28817421197891235, |
|
"eval_runtime": 238.0657, |
|
"eval_samples_per_second": 35.028, |
|
"eval_steps_per_second": 4.381, |
|
"eval_wer": 0.3036205316223648, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 8.16, |
|
"learning_rate": 0.00027841237113402056, |
|
"loss": 0.5355, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 8.16, |
|
"eval_loss": 0.27776163816452026, |
|
"eval_runtime": 239.8353, |
|
"eval_samples_per_second": 34.77, |
|
"eval_steps_per_second": 4.349, |
|
"eval_wer": 0.29200274977085244, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 10.2, |
|
"learning_rate": 0.00027222680412371134, |
|
"loss": 0.5116, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 10.2, |
|
"eval_loss": 0.2905969023704529, |
|
"eval_runtime": 240.1935, |
|
"eval_samples_per_second": 34.718, |
|
"eval_steps_per_second": 4.342, |
|
"eval_wer": 0.3013978001833181, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 9.16, |
|
"learning_rate": 0.00027468461538461536, |
|
"loss": 0.5313, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 9.16, |
|
"eval_loss": 0.2984345555305481, |
|
"eval_runtime": 234.3486, |
|
"eval_samples_per_second": 35.584, |
|
"eval_steps_per_second": 4.451, |
|
"eval_wer": 0.327314390467461, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 10.69, |
|
"learning_rate": 0.00027006923076923077, |
|
"loss": 0.4996, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 10.69, |
|
"eval_loss": 0.3169882297515869, |
|
"eval_runtime": 237.8663, |
|
"eval_samples_per_second": 35.058, |
|
"eval_steps_per_second": 4.385, |
|
"eval_wer": 0.3344179651695692, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 12.21, |
|
"learning_rate": 0.0002654538461538461, |
|
"loss": 0.4845, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 12.21, |
|
"eval_loss": 0.32016345858573914, |
|
"eval_runtime": 236.9291, |
|
"eval_samples_per_second": 35.196, |
|
"eval_steps_per_second": 4.402, |
|
"eval_wer": 0.36338221814848765, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 13.74, |
|
"learning_rate": 0.00026086153846153847, |
|
"loss": 0.5092, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 13.74, |
|
"eval_loss": 0.3166552186012268, |
|
"eval_runtime": 236.2482, |
|
"eval_samples_per_second": 35.298, |
|
"eval_steps_per_second": 4.415, |
|
"eval_wer": 0.3373052245646196, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 15.27, |
|
"learning_rate": 0.0002562461538461538, |
|
"loss": 0.4777, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 15.27, |
|
"eval_loss": 0.32921522855758667, |
|
"eval_runtime": 235.517, |
|
"eval_samples_per_second": 35.407, |
|
"eval_steps_per_second": 4.429, |
|
"eval_wer": 0.3385655362053162, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 16.79, |
|
"learning_rate": 0.0002516307692307692, |
|
"loss": 0.4651, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 16.79, |
|
"eval_loss": 0.3070097863674164, |
|
"eval_runtime": 238.1366, |
|
"eval_samples_per_second": 35.018, |
|
"eval_steps_per_second": 4.38, |
|
"eval_wer": 0.34271310724106324, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 18.32, |
|
"learning_rate": 0.0002470153846153846, |
|
"loss": 0.461, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 18.32, |
|
"eval_loss": 0.3148922026157379, |
|
"eval_runtime": 237.1783, |
|
"eval_samples_per_second": 35.159, |
|
"eval_steps_per_second": 4.398, |
|
"eval_wer": 0.35609532538955085, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 19.85, |
|
"learning_rate": 0.00024239999999999998, |
|
"loss": 0.4481, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 19.85, |
|
"eval_loss": 0.32919421792030334, |
|
"eval_runtime": 236.0044, |
|
"eval_samples_per_second": 35.334, |
|
"eval_steps_per_second": 4.419, |
|
"eval_wer": 0.34411090742438133, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 21.37, |
|
"learning_rate": 0.00023778461538461536, |
|
"loss": 0.4479, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 21.37, |
|
"eval_loss": 0.3142247200012207, |
|
"eval_runtime": 234.6363, |
|
"eval_samples_per_second": 35.54, |
|
"eval_steps_per_second": 4.445, |
|
"eval_wer": 0.32089825847846015, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 22.9, |
|
"learning_rate": 0.00023316923076923077, |
|
"loss": 0.4305, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 22.9, |
|
"eval_loss": 0.3525453507900238, |
|
"eval_runtime": 236.821, |
|
"eval_samples_per_second": 35.212, |
|
"eval_steps_per_second": 4.404, |
|
"eval_wer": 0.35467461044912924, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 24.43, |
|
"learning_rate": 0.00022855384615384612, |
|
"loss": 0.4254, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 24.43, |
|
"eval_loss": 0.34136688709259033, |
|
"eval_runtime": 235.4909, |
|
"eval_samples_per_second": 35.411, |
|
"eval_steps_per_second": 4.429, |
|
"eval_wer": 0.3400091659028414, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 25.95, |
|
"learning_rate": 0.00022393846153846153, |
|
"loss": 0.4066, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 25.95, |
|
"eval_loss": 0.3118491470813751, |
|
"eval_runtime": 236.1268, |
|
"eval_samples_per_second": 35.316, |
|
"eval_steps_per_second": 4.417, |
|
"eval_wer": 0.3207378551787351, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 27.48, |
|
"learning_rate": 0.0002193230769230769, |
|
"loss": 0.4043, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 27.48, |
|
"eval_loss": 0.34181562066078186, |
|
"eval_runtime": 235.2934, |
|
"eval_samples_per_second": 35.441, |
|
"eval_steps_per_second": 4.433, |
|
"eval_wer": 0.3482584784601283, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 29.01, |
|
"learning_rate": 0.0002147076923076923, |
|
"loss": 0.3985, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 29.01, |
|
"eval_loss": 0.32544735074043274, |
|
"eval_runtime": 236.4374, |
|
"eval_samples_per_second": 35.269, |
|
"eval_steps_per_second": 4.411, |
|
"eval_wer": 0.31663611365719524, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 30.53, |
|
"learning_rate": 0.00021009230769230766, |
|
"loss": 0.3982, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 30.53, |
|
"eval_loss": 0.33056947588920593, |
|
"eval_runtime": 239.8848, |
|
"eval_samples_per_second": 34.763, |
|
"eval_steps_per_second": 4.348, |
|
"eval_wer": 0.3452795600366636, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 32.06, |
|
"learning_rate": 0.00020547692307692307, |
|
"loss": 0.3929, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 32.06, |
|
"eval_loss": 0.3262433409690857, |
|
"eval_runtime": 238.1544, |
|
"eval_samples_per_second": 35.015, |
|
"eval_steps_per_second": 4.38, |
|
"eval_wer": 0.3228689275893675, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 33.59, |
|
"learning_rate": 0.00020086153846153845, |
|
"loss": 0.378, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 33.59, |
|
"eval_loss": 0.3545904755592346, |
|
"eval_runtime": 234.4177, |
|
"eval_samples_per_second": 35.573, |
|
"eval_steps_per_second": 4.449, |
|
"eval_wer": 0.33357011915673696, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 35.11, |
|
"learning_rate": 0.00019624615384615385, |
|
"loss": 0.4062, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 35.11, |
|
"eval_loss": 0.3174082636833191, |
|
"eval_runtime": 235.3658, |
|
"eval_samples_per_second": 35.43, |
|
"eval_steps_per_second": 4.431, |
|
"eval_wer": 0.34566911090742436, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 36.64, |
|
"learning_rate": 0.0001916307692307692, |
|
"loss": 0.3648, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 36.64, |
|
"eval_loss": 0.3376729190349579, |
|
"eval_runtime": 239.2202, |
|
"eval_samples_per_second": 34.859, |
|
"eval_steps_per_second": 4.36, |
|
"eval_wer": 0.33572410632447297, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 38.17, |
|
"learning_rate": 0.0001870153846153846, |
|
"loss": 0.3609, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 38.17, |
|
"eval_loss": 0.33455467224121094, |
|
"eval_runtime": 236.9279, |
|
"eval_samples_per_second": 35.196, |
|
"eval_steps_per_second": 4.402, |
|
"eval_wer": 0.351993583868011, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 39.69, |
|
"learning_rate": 0.0001824, |
|
"loss": 0.3483, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 39.69, |
|
"eval_loss": 0.3349843919277191, |
|
"eval_runtime": 235.432, |
|
"eval_samples_per_second": 35.42, |
|
"eval_steps_per_second": 4.43, |
|
"eval_wer": 0.3525893675527039, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 41.22, |
|
"learning_rate": 0.0001777846153846154, |
|
"loss": 0.3548, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 41.22, |
|
"eval_loss": 0.33302220702171326, |
|
"eval_runtime": 237.3922, |
|
"eval_samples_per_second": 35.128, |
|
"eval_steps_per_second": 4.394, |
|
"eval_wer": 0.34062786434463793, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 42.75, |
|
"learning_rate": 0.00017316923076923075, |
|
"loss": 0.3446, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 42.75, |
|
"eval_loss": 0.3398281931877136, |
|
"eval_runtime": 236.2326, |
|
"eval_samples_per_second": 35.3, |
|
"eval_steps_per_second": 4.415, |
|
"eval_wer": 0.3372135655362053, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 44.27, |
|
"learning_rate": 0.00016855384615384615, |
|
"loss": 0.3346, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 44.27, |
|
"eval_loss": 0.34486979246139526, |
|
"eval_runtime": 236.4721, |
|
"eval_samples_per_second": 35.264, |
|
"eval_steps_per_second": 4.411, |
|
"eval_wer": 0.3287809349220898, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 45.8, |
|
"learning_rate": 0.00016393846153846153, |
|
"loss": 0.3309, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 45.8, |
|
"eval_loss": 0.3319507837295532, |
|
"eval_runtime": 236.6263, |
|
"eval_samples_per_second": 35.241, |
|
"eval_steps_per_second": 4.408, |
|
"eval_wer": 0.31439046746104493, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 47.33, |
|
"learning_rate": 0.0001593230769230769, |
|
"loss": 0.326, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 47.33, |
|
"eval_loss": 0.3399747312068939, |
|
"eval_runtime": 236.0765, |
|
"eval_samples_per_second": 35.323, |
|
"eval_steps_per_second": 4.418, |
|
"eval_wer": 0.32786434463794684, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 48.85, |
|
"learning_rate": 0.0001547076923076923, |
|
"loss": 0.3189, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 48.85, |
|
"eval_loss": 0.3399554491043091, |
|
"eval_runtime": 236.2028, |
|
"eval_samples_per_second": 35.304, |
|
"eval_steps_per_second": 4.416, |
|
"eval_wer": 0.31500916590284145, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 50.38, |
|
"learning_rate": 0.0001500923076923077, |
|
"loss": 0.3165, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 50.38, |
|
"eval_loss": 0.3359447121620178, |
|
"eval_runtime": 235.0525, |
|
"eval_samples_per_second": 35.477, |
|
"eval_steps_per_second": 4.437, |
|
"eval_wer": 0.29945004582951423, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 51.91, |
|
"learning_rate": 0.00014547692307692305, |
|
"loss": 0.3132, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 51.91, |
|
"eval_loss": 0.3342697024345398, |
|
"eval_runtime": 235.4774, |
|
"eval_samples_per_second": 35.413, |
|
"eval_steps_per_second": 4.429, |
|
"eval_wer": 0.3095554537121907, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 53.44, |
|
"learning_rate": 0.00014086153846153845, |
|
"loss": 0.3092, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 53.44, |
|
"eval_loss": 0.3224042057991028, |
|
"eval_runtime": 237.4291, |
|
"eval_samples_per_second": 35.122, |
|
"eval_steps_per_second": 4.393, |
|
"eval_wer": 0.302910174152154, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 54.96, |
|
"learning_rate": 0.00013624615384615383, |
|
"loss": 0.2995, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 54.96, |
|
"eval_loss": 0.3204595744609833, |
|
"eval_runtime": 236.2646, |
|
"eval_samples_per_second": 35.295, |
|
"eval_steps_per_second": 4.415, |
|
"eval_wer": 0.29851054078826766, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 56.49, |
|
"learning_rate": 0.0001316307692307692, |
|
"loss": 0.304, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 56.49, |
|
"eval_loss": 0.35227909684181213, |
|
"eval_runtime": 236.3256, |
|
"eval_samples_per_second": 35.286, |
|
"eval_steps_per_second": 4.413, |
|
"eval_wer": 0.30339138405132904, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 58.02, |
|
"learning_rate": 0.0001270153846153846, |
|
"loss": 0.2952, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 58.02, |
|
"eval_loss": 0.3288583755493164, |
|
"eval_runtime": 238.857, |
|
"eval_samples_per_second": 34.912, |
|
"eval_steps_per_second": 4.367, |
|
"eval_wer": 0.2934005499541705, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 59.54, |
|
"learning_rate": 0.0001224, |
|
"loss": 0.2875, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 59.54, |
|
"eval_loss": 0.335005521774292, |
|
"eval_runtime": 236.8029, |
|
"eval_samples_per_second": 35.215, |
|
"eval_steps_per_second": 4.405, |
|
"eval_wer": 0.3008020164986251, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 61.07, |
|
"learning_rate": 0.00011778461538461537, |
|
"loss": 0.2868, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 61.07, |
|
"eval_loss": 0.3537150025367737, |
|
"eval_runtime": 236.5754, |
|
"eval_samples_per_second": 35.249, |
|
"eval_steps_per_second": 4.409, |
|
"eval_wer": 0.3227314390467461, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 62.6, |
|
"learning_rate": 0.00011316923076923076, |
|
"loss": 0.2875, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 62.6, |
|
"eval_loss": 0.3389277756214142, |
|
"eval_runtime": 234.9228, |
|
"eval_samples_per_second": 35.497, |
|
"eval_steps_per_second": 4.44, |
|
"eval_wer": 0.29704399633363887, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 64.12, |
|
"learning_rate": 0.00010855384615384616, |
|
"loss": 0.2778, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 64.12, |
|
"eval_loss": 0.33703726530075073, |
|
"eval_runtime": 234.6165, |
|
"eval_samples_per_second": 35.543, |
|
"eval_steps_per_second": 4.446, |
|
"eval_wer": 0.29596700274977084, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 65.65, |
|
"learning_rate": 0.00010393846153846154, |
|
"loss": 0.2706, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 65.65, |
|
"eval_loss": 0.32503727078437805, |
|
"eval_runtime": 234.7831, |
|
"eval_samples_per_second": 35.518, |
|
"eval_steps_per_second": 4.442, |
|
"eval_wer": 0.28015582034830433, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 67.18, |
|
"learning_rate": 9.934615384615383e-05, |
|
"loss": 0.2669, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 67.18, |
|
"eval_loss": 0.335059255361557, |
|
"eval_runtime": 234.788, |
|
"eval_samples_per_second": 35.517, |
|
"eval_steps_per_second": 4.442, |
|
"eval_wer": 0.2902841429880843, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 68.7, |
|
"learning_rate": 9.473076923076922e-05, |
|
"loss": 0.2615, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 68.7, |
|
"eval_loss": 0.3381515145301819, |
|
"eval_runtime": 236.5391, |
|
"eval_samples_per_second": 35.254, |
|
"eval_steps_per_second": 4.409, |
|
"eval_wer": 0.29885426214482125, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 70.23, |
|
"learning_rate": 9.01153846153846e-05, |
|
"loss": 0.2563, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 70.23, |
|
"eval_loss": 0.3312215507030487, |
|
"eval_runtime": 236.7043, |
|
"eval_samples_per_second": 35.23, |
|
"eval_steps_per_second": 4.406, |
|
"eval_wer": 0.2974793767186068, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 71.76, |
|
"learning_rate": 8.549999999999999e-05, |
|
"loss": 0.2546, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 71.76, |
|
"eval_loss": 0.3212486505508423, |
|
"eval_runtime": 238.4241, |
|
"eval_samples_per_second": 34.975, |
|
"eval_steps_per_second": 4.375, |
|
"eval_wer": 0.30034372135655363, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 73.28, |
|
"learning_rate": 8.088461538461537e-05, |
|
"loss": 0.2482, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 73.28, |
|
"eval_loss": 0.3337170481681824, |
|
"eval_runtime": 237.0835, |
|
"eval_samples_per_second": 35.173, |
|
"eval_steps_per_second": 4.399, |
|
"eval_wer": 0.30907424381301557, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 74.81, |
|
"learning_rate": 7.626923076923075e-05, |
|
"loss": 0.2504, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 74.81, |
|
"eval_loss": 0.33080631494522095, |
|
"eval_runtime": 236.549, |
|
"eval_samples_per_second": 35.253, |
|
"eval_steps_per_second": 4.409, |
|
"eval_wer": 0.3109761686526123, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 76.34, |
|
"learning_rate": 7.165384615384615e-05, |
|
"loss": 0.2456, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 76.34, |
|
"eval_loss": 0.31574100255966187, |
|
"eval_runtime": 235.6572, |
|
"eval_samples_per_second": 35.386, |
|
"eval_steps_per_second": 4.426, |
|
"eval_wer": 0.3117781851512374, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 77.86, |
|
"learning_rate": 6.703846153846153e-05, |
|
"loss": 0.2363, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 77.86, |
|
"eval_loss": 0.3251018524169922, |
|
"eval_runtime": 236.3835, |
|
"eval_samples_per_second": 35.277, |
|
"eval_steps_per_second": 4.412, |
|
"eval_wer": 0.31439046746104493, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 79.39, |
|
"learning_rate": 6.242307692307691e-05, |
|
"loss": 0.2319, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 79.39, |
|
"eval_loss": 0.32527828216552734, |
|
"eval_runtime": 236.1364, |
|
"eval_samples_per_second": 35.314, |
|
"eval_steps_per_second": 4.417, |
|
"eval_wer": 0.30382676443629697, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 80.92, |
|
"learning_rate": 5.7807692307692304e-05, |
|
"loss": 0.2266, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 80.92, |
|
"eval_loss": 0.3374153673648834, |
|
"eval_runtime": 236.6995, |
|
"eval_samples_per_second": 35.23, |
|
"eval_steps_per_second": 4.406, |
|
"eval_wer": 0.30382676443629697, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 82.44, |
|
"learning_rate": 5.321538461538461e-05, |
|
"loss": 0.2279, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 82.44, |
|
"eval_loss": 0.32676786184310913, |
|
"eval_runtime": 235.2934, |
|
"eval_samples_per_second": 35.441, |
|
"eval_steps_per_second": 4.433, |
|
"eval_wer": 0.29640238313473877, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 83.97, |
|
"learning_rate": 4.8599999999999995e-05, |
|
"loss": 0.2231, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 83.97, |
|
"eval_loss": 0.32775917649269104, |
|
"eval_runtime": 237.2828, |
|
"eval_samples_per_second": 35.144, |
|
"eval_steps_per_second": 4.396, |
|
"eval_wer": 0.29502749770852427, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 85.5, |
|
"learning_rate": 4.398461538461538e-05, |
|
"loss": 0.2185, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 85.5, |
|
"eval_loss": 0.3461829721927643, |
|
"eval_runtime": 234.8207, |
|
"eval_samples_per_second": 35.512, |
|
"eval_steps_per_second": 4.442, |
|
"eval_wer": 0.29814390467461044, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 87.02, |
|
"learning_rate": 3.9369230769230767e-05, |
|
"loss": 0.2245, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 87.02, |
|
"eval_loss": 0.3311368525028229, |
|
"eval_runtime": 234.6868, |
|
"eval_samples_per_second": 35.532, |
|
"eval_steps_per_second": 4.444, |
|
"eval_wer": 0.2894821264894592, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 88.55, |
|
"learning_rate": 3.475384615384615e-05, |
|
"loss": 0.223, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 88.55, |
|
"eval_loss": 0.3325417935848236, |
|
"eval_runtime": 234.9788, |
|
"eval_samples_per_second": 35.488, |
|
"eval_steps_per_second": 4.439, |
|
"eval_wer": 0.2876947754353804, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 90.08, |
|
"learning_rate": 3.0138461538461538e-05, |
|
"loss": 0.2121, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 90.08, |
|
"eval_loss": 0.333699494600296, |
|
"eval_runtime": 237.3406, |
|
"eval_samples_per_second": 35.135, |
|
"eval_steps_per_second": 4.395, |
|
"eval_wer": 0.282813932172319, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 91.6, |
|
"learning_rate": 2.552307692307692e-05, |
|
"loss": 0.2126, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 91.6, |
|
"eval_loss": 0.3324645757675171, |
|
"eval_runtime": 234.7938, |
|
"eval_samples_per_second": 35.516, |
|
"eval_steps_per_second": 4.442, |
|
"eval_wer": 0.28077451879010085, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 93.13, |
|
"learning_rate": 2.0907692307692305e-05, |
|
"loss": 0.2027, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 93.13, |
|
"eval_loss": 0.3276507258415222, |
|
"eval_runtime": 235.1021, |
|
"eval_samples_per_second": 35.47, |
|
"eval_steps_per_second": 4.436, |
|
"eval_wer": 0.2819660861594867, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 94.66, |
|
"learning_rate": 1.629230769230769e-05, |
|
"loss": 0.2058, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 94.66, |
|
"eval_loss": 0.33078595995903015, |
|
"eval_runtime": 236.8707, |
|
"eval_samples_per_second": 35.205, |
|
"eval_steps_per_second": 4.403, |
|
"eval_wer": 0.2827451879010083, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 96.18, |
|
"learning_rate": 1.1676923076923075e-05, |
|
"loss": 0.1991, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 96.18, |
|
"eval_loss": 0.3278521001338959, |
|
"eval_runtime": 235.6753, |
|
"eval_samples_per_second": 35.383, |
|
"eval_steps_per_second": 4.426, |
|
"eval_wer": 0.2820119156736939, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 97.71, |
|
"learning_rate": 7.061538461538461e-06, |
|
"loss": 0.1991, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 97.71, |
|
"eval_loss": 0.3299693167209625, |
|
"eval_runtime": 236.8407, |
|
"eval_samples_per_second": 35.209, |
|
"eval_steps_per_second": 4.404, |
|
"eval_wer": 0.28221814848762605, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 99.24, |
|
"learning_rate": 2.446153846153846e-06, |
|
"loss": 0.1986, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 99.24, |
|
"eval_loss": 0.3284846246242523, |
|
"eval_runtime": 236.4429, |
|
"eval_samples_per_second": 35.269, |
|
"eval_steps_per_second": 4.411, |
|
"eval_wer": 0.2834555453712191, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"step": 13100, |
|
"total_flos": 8.36308166572502e+19, |
|
"train_loss": 0.29656382378731067, |
|
"train_runtime": 73649.3567, |
|
"train_samples_per_second": 34.023, |
|
"train_steps_per_second": 0.178 |
|
} |
|
], |
|
"max_steps": 13100, |
|
"num_train_epochs": 100, |
|
"total_flos": 8.36308166572502e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|