{ "best_metric": null, "best_model_checkpoint": null, "epoch": 100.0, "global_step": 13100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 2.04, "learning_rate": 0.0002969690721649484, "loss": 1.0671, "step": 200 }, { "epoch": 2.04, "eval_loss": 0.3079470694065094, "eval_runtime": 237.7627, "eval_samples_per_second": 35.073, "eval_steps_per_second": 4.387, "eval_wer": 0.2752062328139322, "step": 200 }, { "epoch": 4.08, "learning_rate": 0.00029078350515463917, "loss": 0.6433, "step": 400 }, { "epoch": 4.08, "eval_loss": 0.27281925082206726, "eval_runtime": 238.3892, "eval_samples_per_second": 34.981, "eval_steps_per_second": 4.375, "eval_wer": 0.2847616865261228, "step": 400 }, { "epoch": 6.12, "learning_rate": 0.0002845979381443299, "loss": 0.5687, "step": 600 }, { "epoch": 6.12, "eval_loss": 0.28817421197891235, "eval_runtime": 238.0657, "eval_samples_per_second": 35.028, "eval_steps_per_second": 4.381, "eval_wer": 0.3036205316223648, "step": 600 }, { "epoch": 8.16, "learning_rate": 0.00027841237113402056, "loss": 0.5355, "step": 800 }, { "epoch": 8.16, "eval_loss": 0.27776163816452026, "eval_runtime": 239.8353, "eval_samples_per_second": 34.77, "eval_steps_per_second": 4.349, "eval_wer": 0.29200274977085244, "step": 800 }, { "epoch": 10.2, "learning_rate": 0.00027222680412371134, "loss": 0.5116, "step": 1000 }, { "epoch": 10.2, "eval_loss": 0.2905969023704529, "eval_runtime": 240.1935, "eval_samples_per_second": 34.718, "eval_steps_per_second": 4.342, "eval_wer": 0.3013978001833181, "step": 1000 }, { "epoch": 9.16, "learning_rate": 0.00027468461538461536, "loss": 0.5313, "step": 1200 }, { "epoch": 9.16, "eval_loss": 0.2984345555305481, "eval_runtime": 234.3486, "eval_samples_per_second": 35.584, "eval_steps_per_second": 4.451, "eval_wer": 0.327314390467461, "step": 1200 }, { "epoch": 10.69, "learning_rate": 0.00027006923076923077, "loss": 0.4996, "step": 1400 }, { "epoch": 10.69, "eval_loss": 0.3169882297515869, "eval_runtime": 237.8663, "eval_samples_per_second": 35.058, "eval_steps_per_second": 4.385, "eval_wer": 0.3344179651695692, "step": 1400 }, { "epoch": 12.21, "learning_rate": 0.0002654538461538461, "loss": 0.4845, "step": 1600 }, { "epoch": 12.21, "eval_loss": 0.32016345858573914, "eval_runtime": 236.9291, "eval_samples_per_second": 35.196, "eval_steps_per_second": 4.402, "eval_wer": 0.36338221814848765, "step": 1600 }, { "epoch": 13.74, "learning_rate": 0.00026086153846153847, "loss": 0.5092, "step": 1800 }, { "epoch": 13.74, "eval_loss": 0.3166552186012268, "eval_runtime": 236.2482, "eval_samples_per_second": 35.298, "eval_steps_per_second": 4.415, "eval_wer": 0.3373052245646196, "step": 1800 }, { "epoch": 15.27, "learning_rate": 0.0002562461538461538, "loss": 0.4777, "step": 2000 }, { "epoch": 15.27, "eval_loss": 0.32921522855758667, "eval_runtime": 235.517, "eval_samples_per_second": 35.407, "eval_steps_per_second": 4.429, "eval_wer": 0.3385655362053162, "step": 2000 }, { "epoch": 16.79, "learning_rate": 0.0002516307692307692, "loss": 0.4651, "step": 2200 }, { "epoch": 16.79, "eval_loss": 0.3070097863674164, "eval_runtime": 238.1366, "eval_samples_per_second": 35.018, "eval_steps_per_second": 4.38, "eval_wer": 0.34271310724106324, "step": 2200 }, { "epoch": 18.32, "learning_rate": 0.0002470153846153846, "loss": 0.461, "step": 2400 }, { "epoch": 18.32, "eval_loss": 0.3148922026157379, "eval_runtime": 237.1783, "eval_samples_per_second": 35.159, "eval_steps_per_second": 4.398, "eval_wer": 0.35609532538955085, "step": 2400 }, { "epoch": 19.85, "learning_rate": 0.00024239999999999998, "loss": 0.4481, "step": 2600 }, { "epoch": 19.85, "eval_loss": 0.32919421792030334, "eval_runtime": 236.0044, "eval_samples_per_second": 35.334, "eval_steps_per_second": 4.419, "eval_wer": 0.34411090742438133, "step": 2600 }, { "epoch": 21.37, "learning_rate": 0.00023778461538461536, "loss": 0.4479, "step": 2800 }, { "epoch": 21.37, "eval_loss": 0.3142247200012207, "eval_runtime": 234.6363, "eval_samples_per_second": 35.54, "eval_steps_per_second": 4.445, "eval_wer": 0.32089825847846015, "step": 2800 }, { "epoch": 22.9, "learning_rate": 0.00023316923076923077, "loss": 0.4305, "step": 3000 }, { "epoch": 22.9, "eval_loss": 0.3525453507900238, "eval_runtime": 236.821, "eval_samples_per_second": 35.212, "eval_steps_per_second": 4.404, "eval_wer": 0.35467461044912924, "step": 3000 }, { "epoch": 24.43, "learning_rate": 0.00022855384615384612, "loss": 0.4254, "step": 3200 }, { "epoch": 24.43, "eval_loss": 0.34136688709259033, "eval_runtime": 235.4909, "eval_samples_per_second": 35.411, "eval_steps_per_second": 4.429, "eval_wer": 0.3400091659028414, "step": 3200 }, { "epoch": 25.95, "learning_rate": 0.00022393846153846153, "loss": 0.4066, "step": 3400 }, { "epoch": 25.95, "eval_loss": 0.3118491470813751, "eval_runtime": 236.1268, "eval_samples_per_second": 35.316, "eval_steps_per_second": 4.417, "eval_wer": 0.3207378551787351, "step": 3400 }, { "epoch": 27.48, "learning_rate": 0.0002193230769230769, "loss": 0.4043, "step": 3600 }, { "epoch": 27.48, "eval_loss": 0.34181562066078186, "eval_runtime": 235.2934, "eval_samples_per_second": 35.441, "eval_steps_per_second": 4.433, "eval_wer": 0.3482584784601283, "step": 3600 }, { "epoch": 29.01, "learning_rate": 0.0002147076923076923, "loss": 0.3985, "step": 3800 }, { "epoch": 29.01, "eval_loss": 0.32544735074043274, "eval_runtime": 236.4374, "eval_samples_per_second": 35.269, "eval_steps_per_second": 4.411, "eval_wer": 0.31663611365719524, "step": 3800 }, { "epoch": 30.53, "learning_rate": 0.00021009230769230766, "loss": 0.3982, "step": 4000 }, { "epoch": 30.53, "eval_loss": 0.33056947588920593, "eval_runtime": 239.8848, "eval_samples_per_second": 34.763, "eval_steps_per_second": 4.348, "eval_wer": 0.3452795600366636, "step": 4000 }, { "epoch": 32.06, "learning_rate": 0.00020547692307692307, "loss": 0.3929, "step": 4200 }, { "epoch": 32.06, "eval_loss": 0.3262433409690857, "eval_runtime": 238.1544, "eval_samples_per_second": 35.015, "eval_steps_per_second": 4.38, "eval_wer": 0.3228689275893675, "step": 4200 }, { "epoch": 33.59, "learning_rate": 0.00020086153846153845, "loss": 0.378, "step": 4400 }, { "epoch": 33.59, "eval_loss": 0.3545904755592346, "eval_runtime": 234.4177, "eval_samples_per_second": 35.573, "eval_steps_per_second": 4.449, "eval_wer": 0.33357011915673696, "step": 4400 }, { "epoch": 35.11, "learning_rate": 0.00019624615384615385, "loss": 0.4062, "step": 4600 }, { "epoch": 35.11, "eval_loss": 0.3174082636833191, "eval_runtime": 235.3658, "eval_samples_per_second": 35.43, "eval_steps_per_second": 4.431, "eval_wer": 0.34566911090742436, "step": 4600 }, { "epoch": 36.64, "learning_rate": 0.0001916307692307692, "loss": 0.3648, "step": 4800 }, { "epoch": 36.64, "eval_loss": 0.3376729190349579, "eval_runtime": 239.2202, "eval_samples_per_second": 34.859, "eval_steps_per_second": 4.36, "eval_wer": 0.33572410632447297, "step": 4800 }, { "epoch": 38.17, "learning_rate": 0.0001870153846153846, "loss": 0.3609, "step": 5000 }, { "epoch": 38.17, "eval_loss": 0.33455467224121094, "eval_runtime": 236.9279, "eval_samples_per_second": 35.196, "eval_steps_per_second": 4.402, "eval_wer": 0.351993583868011, "step": 5000 }, { "epoch": 39.69, "learning_rate": 0.0001824, "loss": 0.3483, "step": 5200 }, { "epoch": 39.69, "eval_loss": 0.3349843919277191, "eval_runtime": 235.432, "eval_samples_per_second": 35.42, "eval_steps_per_second": 4.43, "eval_wer": 0.3525893675527039, "step": 5200 }, { "epoch": 41.22, "learning_rate": 0.0001777846153846154, "loss": 0.3548, "step": 5400 }, { "epoch": 41.22, "eval_loss": 0.33302220702171326, "eval_runtime": 237.3922, "eval_samples_per_second": 35.128, "eval_steps_per_second": 4.394, "eval_wer": 0.34062786434463793, "step": 5400 }, { "epoch": 42.75, "learning_rate": 0.00017316923076923075, "loss": 0.3446, "step": 5600 }, { "epoch": 42.75, "eval_loss": 0.3398281931877136, "eval_runtime": 236.2326, "eval_samples_per_second": 35.3, "eval_steps_per_second": 4.415, "eval_wer": 0.3372135655362053, "step": 5600 }, { "epoch": 44.27, "learning_rate": 0.00016855384615384615, "loss": 0.3346, "step": 5800 }, { "epoch": 44.27, "eval_loss": 0.34486979246139526, "eval_runtime": 236.4721, "eval_samples_per_second": 35.264, "eval_steps_per_second": 4.411, "eval_wer": 0.3287809349220898, "step": 5800 }, { "epoch": 45.8, "learning_rate": 0.00016393846153846153, "loss": 0.3309, "step": 6000 }, { "epoch": 45.8, "eval_loss": 0.3319507837295532, "eval_runtime": 236.6263, "eval_samples_per_second": 35.241, "eval_steps_per_second": 4.408, "eval_wer": 0.31439046746104493, "step": 6000 }, { "epoch": 47.33, "learning_rate": 0.0001593230769230769, "loss": 0.326, "step": 6200 }, { "epoch": 47.33, "eval_loss": 0.3399747312068939, "eval_runtime": 236.0765, "eval_samples_per_second": 35.323, "eval_steps_per_second": 4.418, "eval_wer": 0.32786434463794684, "step": 6200 }, { "epoch": 48.85, "learning_rate": 0.0001547076923076923, "loss": 0.3189, "step": 6400 }, { "epoch": 48.85, "eval_loss": 0.3399554491043091, "eval_runtime": 236.2028, "eval_samples_per_second": 35.304, "eval_steps_per_second": 4.416, "eval_wer": 0.31500916590284145, "step": 6400 }, { "epoch": 50.38, "learning_rate": 0.0001500923076923077, "loss": 0.3165, "step": 6600 }, { "epoch": 50.38, "eval_loss": 0.3359447121620178, "eval_runtime": 235.0525, "eval_samples_per_second": 35.477, "eval_steps_per_second": 4.437, "eval_wer": 0.29945004582951423, "step": 6600 }, { "epoch": 51.91, "learning_rate": 0.00014547692307692305, "loss": 0.3132, "step": 6800 }, { "epoch": 51.91, "eval_loss": 0.3342697024345398, "eval_runtime": 235.4774, "eval_samples_per_second": 35.413, "eval_steps_per_second": 4.429, "eval_wer": 0.3095554537121907, "step": 6800 }, { "epoch": 53.44, "learning_rate": 0.00014086153846153845, "loss": 0.3092, "step": 7000 }, { "epoch": 53.44, "eval_loss": 0.3224042057991028, "eval_runtime": 237.4291, "eval_samples_per_second": 35.122, "eval_steps_per_second": 4.393, "eval_wer": 0.302910174152154, "step": 7000 }, { "epoch": 54.96, "learning_rate": 0.00013624615384615383, "loss": 0.2995, "step": 7200 }, { "epoch": 54.96, "eval_loss": 0.3204595744609833, "eval_runtime": 236.2646, "eval_samples_per_second": 35.295, "eval_steps_per_second": 4.415, "eval_wer": 0.29851054078826766, "step": 7200 }, { "epoch": 56.49, "learning_rate": 0.0001316307692307692, "loss": 0.304, "step": 7400 }, { "epoch": 56.49, "eval_loss": 0.35227909684181213, "eval_runtime": 236.3256, "eval_samples_per_second": 35.286, "eval_steps_per_second": 4.413, "eval_wer": 0.30339138405132904, "step": 7400 }, { "epoch": 58.02, "learning_rate": 0.0001270153846153846, "loss": 0.2952, "step": 7600 }, { "epoch": 58.02, "eval_loss": 0.3288583755493164, "eval_runtime": 238.857, "eval_samples_per_second": 34.912, "eval_steps_per_second": 4.367, "eval_wer": 0.2934005499541705, "step": 7600 }, { "epoch": 59.54, "learning_rate": 0.0001224, "loss": 0.2875, "step": 7800 }, { "epoch": 59.54, "eval_loss": 0.335005521774292, "eval_runtime": 236.8029, "eval_samples_per_second": 35.215, "eval_steps_per_second": 4.405, "eval_wer": 0.3008020164986251, "step": 7800 }, { "epoch": 61.07, "learning_rate": 0.00011778461538461537, "loss": 0.2868, "step": 8000 }, { "epoch": 61.07, "eval_loss": 0.3537150025367737, "eval_runtime": 236.5754, "eval_samples_per_second": 35.249, "eval_steps_per_second": 4.409, "eval_wer": 0.3227314390467461, "step": 8000 }, { "epoch": 62.6, "learning_rate": 0.00011316923076923076, "loss": 0.2875, "step": 8200 }, { "epoch": 62.6, "eval_loss": 0.3389277756214142, "eval_runtime": 234.9228, "eval_samples_per_second": 35.497, "eval_steps_per_second": 4.44, "eval_wer": 0.29704399633363887, "step": 8200 }, { "epoch": 64.12, "learning_rate": 0.00010855384615384616, "loss": 0.2778, "step": 8400 }, { "epoch": 64.12, "eval_loss": 0.33703726530075073, "eval_runtime": 234.6165, "eval_samples_per_second": 35.543, "eval_steps_per_second": 4.446, "eval_wer": 0.29596700274977084, "step": 8400 }, { "epoch": 65.65, "learning_rate": 0.00010393846153846154, "loss": 0.2706, "step": 8600 }, { "epoch": 65.65, "eval_loss": 0.32503727078437805, "eval_runtime": 234.7831, "eval_samples_per_second": 35.518, "eval_steps_per_second": 4.442, "eval_wer": 0.28015582034830433, "step": 8600 }, { "epoch": 67.18, "learning_rate": 9.934615384615383e-05, "loss": 0.2669, "step": 8800 }, { "epoch": 67.18, "eval_loss": 0.335059255361557, "eval_runtime": 234.788, "eval_samples_per_second": 35.517, "eval_steps_per_second": 4.442, "eval_wer": 0.2902841429880843, "step": 8800 }, { "epoch": 68.7, "learning_rate": 9.473076923076922e-05, "loss": 0.2615, "step": 9000 }, { "epoch": 68.7, "eval_loss": 0.3381515145301819, "eval_runtime": 236.5391, "eval_samples_per_second": 35.254, "eval_steps_per_second": 4.409, "eval_wer": 0.29885426214482125, "step": 9000 }, { "epoch": 70.23, "learning_rate": 9.01153846153846e-05, "loss": 0.2563, "step": 9200 }, { "epoch": 70.23, "eval_loss": 0.3312215507030487, "eval_runtime": 236.7043, "eval_samples_per_second": 35.23, "eval_steps_per_second": 4.406, "eval_wer": 0.2974793767186068, "step": 9200 }, { "epoch": 71.76, "learning_rate": 8.549999999999999e-05, "loss": 0.2546, "step": 9400 }, { "epoch": 71.76, "eval_loss": 0.3212486505508423, "eval_runtime": 238.4241, "eval_samples_per_second": 34.975, "eval_steps_per_second": 4.375, "eval_wer": 0.30034372135655363, "step": 9400 }, { "epoch": 73.28, "learning_rate": 8.088461538461537e-05, "loss": 0.2482, "step": 9600 }, { "epoch": 73.28, "eval_loss": 0.3337170481681824, "eval_runtime": 237.0835, "eval_samples_per_second": 35.173, "eval_steps_per_second": 4.399, "eval_wer": 0.30907424381301557, "step": 9600 }, { "epoch": 74.81, "learning_rate": 7.626923076923075e-05, "loss": 0.2504, "step": 9800 }, { "epoch": 74.81, "eval_loss": 0.33080631494522095, "eval_runtime": 236.549, "eval_samples_per_second": 35.253, "eval_steps_per_second": 4.409, "eval_wer": 0.3109761686526123, "step": 9800 }, { "epoch": 76.34, "learning_rate": 7.165384615384615e-05, "loss": 0.2456, "step": 10000 }, { "epoch": 76.34, "eval_loss": 0.31574100255966187, "eval_runtime": 235.6572, "eval_samples_per_second": 35.386, "eval_steps_per_second": 4.426, "eval_wer": 0.3117781851512374, "step": 10000 }, { "epoch": 77.86, "learning_rate": 6.703846153846153e-05, "loss": 0.2363, "step": 10200 }, { "epoch": 77.86, "eval_loss": 0.3251018524169922, "eval_runtime": 236.3835, "eval_samples_per_second": 35.277, "eval_steps_per_second": 4.412, "eval_wer": 0.31439046746104493, "step": 10200 }, { "epoch": 79.39, "learning_rate": 6.242307692307691e-05, "loss": 0.2319, "step": 10400 }, { "epoch": 79.39, "eval_loss": 0.32527828216552734, "eval_runtime": 236.1364, "eval_samples_per_second": 35.314, "eval_steps_per_second": 4.417, "eval_wer": 0.30382676443629697, "step": 10400 }, { "epoch": 80.92, "learning_rate": 5.7807692307692304e-05, "loss": 0.2266, "step": 10600 }, { "epoch": 80.92, "eval_loss": 0.3374153673648834, "eval_runtime": 236.6995, "eval_samples_per_second": 35.23, "eval_steps_per_second": 4.406, "eval_wer": 0.30382676443629697, "step": 10600 }, { "epoch": 82.44, "learning_rate": 5.321538461538461e-05, "loss": 0.2279, "step": 10800 }, { "epoch": 82.44, "eval_loss": 0.32676786184310913, "eval_runtime": 235.2934, "eval_samples_per_second": 35.441, "eval_steps_per_second": 4.433, "eval_wer": 0.29640238313473877, "step": 10800 }, { "epoch": 83.97, "learning_rate": 4.8599999999999995e-05, "loss": 0.2231, "step": 11000 }, { "epoch": 83.97, "eval_loss": 0.32775917649269104, "eval_runtime": 237.2828, "eval_samples_per_second": 35.144, "eval_steps_per_second": 4.396, "eval_wer": 0.29502749770852427, "step": 11000 }, { "epoch": 85.5, "learning_rate": 4.398461538461538e-05, "loss": 0.2185, "step": 11200 }, { "epoch": 85.5, "eval_loss": 0.3461829721927643, "eval_runtime": 234.8207, "eval_samples_per_second": 35.512, "eval_steps_per_second": 4.442, "eval_wer": 0.29814390467461044, "step": 11200 }, { "epoch": 87.02, "learning_rate": 3.9369230769230767e-05, "loss": 0.2245, "step": 11400 }, { "epoch": 87.02, "eval_loss": 0.3311368525028229, "eval_runtime": 234.6868, "eval_samples_per_second": 35.532, "eval_steps_per_second": 4.444, "eval_wer": 0.2894821264894592, "step": 11400 }, { "epoch": 88.55, "learning_rate": 3.475384615384615e-05, "loss": 0.223, "step": 11600 }, { "epoch": 88.55, "eval_loss": 0.3325417935848236, "eval_runtime": 234.9788, "eval_samples_per_second": 35.488, "eval_steps_per_second": 4.439, "eval_wer": 0.2876947754353804, "step": 11600 }, { "epoch": 90.08, "learning_rate": 3.0138461538461538e-05, "loss": 0.2121, "step": 11800 }, { "epoch": 90.08, "eval_loss": 0.333699494600296, "eval_runtime": 237.3406, "eval_samples_per_second": 35.135, "eval_steps_per_second": 4.395, "eval_wer": 0.282813932172319, "step": 11800 }, { "epoch": 91.6, "learning_rate": 2.552307692307692e-05, "loss": 0.2126, "step": 12000 }, { "epoch": 91.6, "eval_loss": 0.3324645757675171, "eval_runtime": 234.7938, "eval_samples_per_second": 35.516, "eval_steps_per_second": 4.442, "eval_wer": 0.28077451879010085, "step": 12000 }, { "epoch": 93.13, "learning_rate": 2.0907692307692305e-05, "loss": 0.2027, "step": 12200 }, { "epoch": 93.13, "eval_loss": 0.3276507258415222, "eval_runtime": 235.1021, "eval_samples_per_second": 35.47, "eval_steps_per_second": 4.436, "eval_wer": 0.2819660861594867, "step": 12200 }, { "epoch": 94.66, "learning_rate": 1.629230769230769e-05, "loss": 0.2058, "step": 12400 }, { "epoch": 94.66, "eval_loss": 0.33078595995903015, "eval_runtime": 236.8707, "eval_samples_per_second": 35.205, "eval_steps_per_second": 4.403, "eval_wer": 0.2827451879010083, "step": 12400 }, { "epoch": 96.18, "learning_rate": 1.1676923076923075e-05, "loss": 0.1991, "step": 12600 }, { "epoch": 96.18, "eval_loss": 0.3278521001338959, "eval_runtime": 235.6753, "eval_samples_per_second": 35.383, "eval_steps_per_second": 4.426, "eval_wer": 0.2820119156736939, "step": 12600 }, { "epoch": 97.71, "learning_rate": 7.061538461538461e-06, "loss": 0.1991, "step": 12800 }, { "epoch": 97.71, "eval_loss": 0.3299693167209625, "eval_runtime": 236.8407, "eval_samples_per_second": 35.209, "eval_steps_per_second": 4.404, "eval_wer": 0.28221814848762605, "step": 12800 }, { "epoch": 99.24, "learning_rate": 2.446153846153846e-06, "loss": 0.1986, "step": 13000 }, { "epoch": 99.24, "eval_loss": 0.3284846246242523, "eval_runtime": 236.4429, "eval_samples_per_second": 35.269, "eval_steps_per_second": 4.411, "eval_wer": 0.2834555453712191, "step": 13000 }, { "epoch": 100.0, "step": 13100, "total_flos": 8.36308166572502e+19, "train_loss": 0.29656382378731067, "train_runtime": 73649.3567, "train_samples_per_second": 34.023, "train_steps_per_second": 0.178 } ], "max_steps": 13100, "num_train_epochs": 100, "total_flos": 8.36308166572502e+19, "trial_name": null, "trial_params": null }