{ "best_metric": null, "best_model_checkpoint": null, "epoch": 493.02296211251434, "global_step": 21200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 9.3, "learning_rate": 0.00023999999999999998, "loss": 4.3982, "step": 400 }, { "epoch": 9.3, "eval_cer": 0.14366206687135194, "eval_loss": 0.5217852592468262, "eval_runtime": 117.8175, "eval_samples_per_second": 26.414, "eval_steps_per_second": 3.302, "eval_wer": 0.6506676906011348, "step": 400 }, { "epoch": 18.6, "learning_rate": 0.00029461077844311373, "loss": 0.229, "step": 800 }, { "epoch": 18.6, "eval_cer": 0.08482742332737273, "eval_loss": 0.36793914437294006, "eval_runtime": 118.0263, "eval_samples_per_second": 26.367, "eval_steps_per_second": 3.296, "eval_wer": 0.40479542642604205, "step": 800 }, { "epoch": 27.9, "learning_rate": 0.00028742514970059877, "loss": 0.1054, "step": 1200 }, { "epoch": 27.9, "eval_cer": 0.07780964270049597, "eval_loss": 0.38127270340919495, "eval_runtime": 116.0512, "eval_samples_per_second": 26.816, "eval_steps_per_second": 3.352, "eval_wer": 0.367037842911387, "step": 1200 }, { "epoch": 37.21, "learning_rate": 0.0002802395209580838, "loss": 0.0784, "step": 1600 }, { "epoch": 37.21, "eval_cer": 0.07465164141840143, "eval_loss": 0.3839242458343506, "eval_runtime": 116.2982, "eval_samples_per_second": 26.759, "eval_steps_per_second": 3.345, "eval_wer": 0.35496394897393235, "step": 1600 }, { "epoch": 46.51, "learning_rate": 0.00027305389221556883, "loss": 0.066, "step": 2000 }, { "epoch": 46.51, "eval_cer": 0.07363946152029421, "eval_loss": 0.3969549238681793, "eval_runtime": 118.9877, "eval_samples_per_second": 26.154, "eval_steps_per_second": 3.269, "eval_wer": 0.3443406288664192, "step": 2000 }, { "epoch": 55.8, "learning_rate": 0.00026586826347305386, "loss": 0.0603, "step": 2400 }, { "epoch": 55.8, "eval_cer": 0.07218192246701981, "eval_loss": 0.3702129125595093, "eval_runtime": 118.9492, "eval_samples_per_second": 26.162, "eval_steps_per_second": 3.27, "eval_wer": 0.3393489483339733, "step": 2400 }, { "epoch": 65.11, "learning_rate": 0.0002586826347305389, "loss": 0.0539, "step": 2800 }, { "epoch": 65.11, "eval_cer": 0.07241809777657816, "eval_loss": 0.3762107491493225, "eval_runtime": 117.3723, "eval_samples_per_second": 26.514, "eval_steps_per_second": 3.314, "eval_wer": 0.33875165322752676, "step": 2800 }, { "epoch": 74.41, "learning_rate": 0.00025149700598802393, "loss": 0.0497, "step": 3200 }, { "epoch": 74.41, "eval_cer": 0.07128445629069807, "eval_loss": 0.36228740215301514, "eval_runtime": 116.3914, "eval_samples_per_second": 26.737, "eval_steps_per_second": 3.342, "eval_wer": 0.3413541533341866, "step": 3200 }, { "epoch": 83.71, "learning_rate": 0.00024431137724550896, "loss": 0.0432, "step": 3600 }, { "epoch": 83.71, "eval_cer": 0.07248557643645197, "eval_loss": 0.3847475051879883, "eval_runtime": 116.3314, "eval_samples_per_second": 26.751, "eval_steps_per_second": 3.344, "eval_wer": 0.3346132514185759, "step": 3600 }, { "epoch": 93.02, "learning_rate": 0.000237125748502994, "loss": 0.0438, "step": 4000 }, { "epoch": 93.02, "eval_cer": 0.07503626977968217, "eval_loss": 0.4057835340499878, "eval_runtime": 116.616, "eval_samples_per_second": 26.686, "eval_steps_per_second": 3.336, "eval_wer": 0.3393489483339733, "step": 4000 }, { "epoch": 102.32, "learning_rate": 0.00022994011976047902, "loss": 0.0413, "step": 4400 }, { "epoch": 102.32, "eval_cer": 0.07270825601403556, "eval_loss": 0.3957100510597229, "eval_runtime": 116.9156, "eval_samples_per_second": 26.617, "eval_steps_per_second": 3.327, "eval_wer": 0.3362771449293912, "step": 4400 }, { "epoch": 111.62, "learning_rate": 0.00022275449101796406, "loss": 0.039, "step": 4800 }, { "epoch": 111.62, "eval_cer": 0.07179729410573905, "eval_loss": 0.386459618806839, "eval_runtime": 119.0004, "eval_samples_per_second": 26.151, "eval_steps_per_second": 3.269, "eval_wer": 0.3330346857801101, "step": 4800 }, { "epoch": 120.92, "learning_rate": 0.0002155688622754491, "loss": 0.0356, "step": 5200 }, { "epoch": 120.92, "eval_cer": 0.07110226390903876, "eval_loss": 0.38599926233291626, "eval_runtime": 118.9882, "eval_samples_per_second": 26.154, "eval_steps_per_second": 3.269, "eval_wer": 0.33192542343956655, "step": 5200 }, { "epoch": 130.23, "learning_rate": 0.00020838323353293412, "loss": 0.0336, "step": 5600 }, { "epoch": 130.23, "eval_cer": 0.07001585748507035, "eval_loss": 0.3902195692062378, "eval_runtime": 116.3404, "eval_samples_per_second": 26.749, "eval_steps_per_second": 3.344, "eval_wer": 0.3241605870557618, "step": 5600 }, { "epoch": 139.53, "learning_rate": 0.00020119760479041913, "loss": 0.034, "step": 6000 }, { "epoch": 139.53, "eval_cer": 0.07322109382907656, "eval_loss": 0.39304569363594055, "eval_runtime": 116.7151, "eval_samples_per_second": 26.663, "eval_steps_per_second": 3.333, "eval_wer": 0.3337173087589061, "step": 6000 }, { "epoch": 148.83, "learning_rate": 0.00019402994011976046, "loss": 0.0273, "step": 6400 }, { "epoch": 148.83, "eval_cer": 0.07479334660413645, "eval_loss": 0.39119070768356323, "eval_runtime": 116.4756, "eval_samples_per_second": 26.718, "eval_steps_per_second": 3.34, "eval_wer": 0.33747173514228423, "step": 6400 }, { "epoch": 158.14, "learning_rate": 0.0001868443113772455, "loss": 0.027, "step": 6800 }, { "epoch": 158.14, "eval_cer": 0.07523870575930362, "eval_loss": 0.42656052112579346, "eval_runtime": 116.5029, "eval_samples_per_second": 26.712, "eval_steps_per_second": 3.339, "eval_wer": 0.34344468620674945, "step": 6800 }, { "epoch": 167.44, "learning_rate": 0.0001796586826347305, "loss": 0.028, "step": 7200 }, { "epoch": 167.44, "eval_cer": 0.0707648706096697, "eval_loss": 0.38949263095855713, "eval_runtime": 116.3646, "eval_samples_per_second": 26.744, "eval_steps_per_second": 3.343, "eval_wer": 0.32266734928964547, "step": 7200 }, { "epoch": 176.73, "learning_rate": 0.00017247305389221556, "loss": 0.0241, "step": 7600 }, { "epoch": 176.73, "eval_cer": 0.07274874320995985, "eval_loss": 0.3967472016811371, "eval_runtime": 116.4729, "eval_samples_per_second": 26.719, "eval_steps_per_second": 3.34, "eval_wer": 0.32936558726908144, "step": 7600 }, { "epoch": 186.05, "learning_rate": 0.00016530538922155687, "loss": 0.0241, "step": 8000 }, { "epoch": 186.05, "eval_cer": 0.07122372549681164, "eval_loss": 0.4058191776275635, "eval_runtime": 116.3498, "eval_samples_per_second": 26.747, "eval_steps_per_second": 3.343, "eval_wer": 0.32548316907717906, "step": 8000 }, { "epoch": 195.34, "learning_rate": 0.0001581197604790419, "loss": 0.0209, "step": 8400 }, { "epoch": 195.34, "eval_cer": 0.07019804986672964, "eval_loss": 0.4101807177066803, "eval_runtime": 116.6051, "eval_samples_per_second": 26.688, "eval_steps_per_second": 3.336, "eval_wer": 0.3233499722684415, "step": 8400 }, { "epoch": 204.64, "learning_rate": 0.00015093413173652694, "loss": 0.0206, "step": 8800 }, { "epoch": 204.64, "eval_cer": 0.06987415229933533, "eval_loss": 0.40751102566719055, "eval_runtime": 117.508, "eval_samples_per_second": 26.483, "eval_steps_per_second": 3.31, "eval_wer": 0.3193822262041896, "step": 8800 }, { "epoch": 213.94, "learning_rate": 0.00014376646706586825, "loss": 0.0172, "step": 9200 }, { "epoch": 213.94, "eval_cer": 0.06948952393805459, "eval_loss": 0.42218008637428284, "eval_runtime": 116.7394, "eval_samples_per_second": 26.658, "eval_steps_per_second": 3.332, "eval_wer": 0.31912624258714106, "step": 9200 }, { "epoch": 223.25, "learning_rate": 0.00013658083832335328, "loss": 0.0166, "step": 9600 }, { "epoch": 223.25, "eval_cer": 0.06777556597725969, "eval_loss": 0.38604938983917236, "eval_runtime": 116.6232, "eval_samples_per_second": 26.684, "eval_steps_per_second": 3.336, "eval_wer": 0.31345193907589913, "step": 9600 }, { "epoch": 232.55, "learning_rate": 0.0001293952095808383, "loss": 0.0156, "step": 10000 }, { "epoch": 232.55, "eval_cer": 0.0677013394513985, "eval_loss": 0.40345117449760437, "eval_runtime": 117.639, "eval_samples_per_second": 26.454, "eval_steps_per_second": 3.307, "eval_wer": 0.31170271769273433, "step": 10000 }, { "epoch": 241.85, "learning_rate": 0.00012220958083832334, "loss": 0.0149, "step": 10400 }, { "epoch": 241.85, "eval_cer": 0.0677013394513985, "eval_loss": 0.39512303471565247, "eval_runtime": 120.4059, "eval_samples_per_second": 25.846, "eval_steps_per_second": 3.231, "eval_wer": 0.30867357822432695, "step": 10400 }, { "epoch": 251.16, "learning_rate": 0.00011502395209580837, "loss": 0.0142, "step": 10800 }, { "epoch": 251.16, "eval_cer": 0.06735045042005466, "eval_loss": 0.3971852958202362, "eval_runtime": 118.3102, "eval_samples_per_second": 26.304, "eval_steps_per_second": 3.288, "eval_wer": 0.309697512692521, "step": 10800 }, { "epoch": 260.46, "learning_rate": 0.0001078383233532934, "loss": 0.0134, "step": 11200 }, { "epoch": 260.46, "eval_cer": 0.06749215560578967, "eval_loss": 0.40693503618240356, "eval_runtime": 120.6637, "eval_samples_per_second": 25.791, "eval_steps_per_second": 3.224, "eval_wer": 0.31114808652246256, "step": 11200 }, { "epoch": 269.76, "learning_rate": 0.00010065269461077844, "loss": 0.0116, "step": 11600 }, { "epoch": 269.76, "eval_cer": 0.06968521205168865, "eval_loss": 0.41885173320770264, "eval_runtime": 118.3717, "eval_samples_per_second": 26.29, "eval_steps_per_second": 3.286, "eval_wer": 0.31609710311873374, "step": 11600 }, { "epoch": 279.07, "learning_rate": 9.346706586826346e-05, "loss": 0.0119, "step": 12000 }, { "epoch": 279.07, "eval_cer": 0.0648200006747866, "eval_loss": 0.3901657462120056, "eval_runtime": 119.8759, "eval_samples_per_second": 25.96, "eval_steps_per_second": 3.245, "eval_wer": 0.3008234139681727, "step": 12000 }, { "epoch": 288.37, "learning_rate": 8.62814371257485e-05, "loss": 0.0098, "step": 12400 }, { "epoch": 288.37, "eval_cer": 0.06515064610816829, "eval_loss": 0.40946489572525024, "eval_runtime": 120.8583, "eval_samples_per_second": 25.749, "eval_steps_per_second": 3.219, "eval_wer": 0.30018345492555143, "step": 12400 }, { "epoch": 297.67, "learning_rate": 7.909580838323352e-05, "loss": 0.0091, "step": 12800 }, { "epoch": 297.67, "eval_cer": 0.06441512871554371, "eval_loss": 0.3892023265361786, "eval_runtime": 118.1231, "eval_samples_per_second": 26.345, "eval_steps_per_second": 3.293, "eval_wer": 0.2989888647126584, "step": 12800 }, { "epoch": 306.96, "learning_rate": 7.191017964071855e-05, "loss": 0.0094, "step": 13200 }, { "epoch": 306.96, "eval_cer": 0.06469853908701373, "eval_loss": 0.40261197090148926, "eval_runtime": 118.2919, "eval_samples_per_second": 26.308, "eval_steps_per_second": 3.288, "eval_wer": 0.29834890567003713, "step": 13200 }, { "epoch": 316.28, "learning_rate": 6.474251497005988e-05, "loss": 0.0081, "step": 13600 }, { "epoch": 316.28, "eval_cer": 0.06462431256115253, "eval_loss": 0.4302999675273895, "eval_runtime": 118.0293, "eval_samples_per_second": 26.366, "eval_steps_per_second": 3.296, "eval_wer": 0.29779427449976537, "step": 13600 }, { "epoch": 325.57, "learning_rate": 5.7556886227544904e-05, "loss": 0.0079, "step": 14000 }, { "epoch": 325.57, "eval_cer": 0.06431391072573299, "eval_loss": 0.40440893173217773, "eval_runtime": 118.2184, "eval_samples_per_second": 26.324, "eval_steps_per_second": 3.291, "eval_wer": 0.29796493024446435, "step": 14000 }, { "epoch": 334.87, "learning_rate": 5.038922155688622e-05, "loss": 0.0072, "step": 14400 }, { "epoch": 334.87, "eval_cer": 0.06546779580957522, "eval_loss": 0.382755845785141, "eval_runtime": 118.6873, "eval_samples_per_second": 26.22, "eval_steps_per_second": 3.278, "eval_wer": 0.29992747130850295, "step": 14400 }, { "epoch": 344.18, "learning_rate": 9.578571428571428e-05, "loss": 0.0081, "step": 14800 }, { "epoch": 344.18, "eval_cer": 0.06676338607915247, "eval_loss": 0.4108315706253052, "eval_runtime": 114.661, "eval_samples_per_second": 27.141, "eval_steps_per_second": 3.393, "eval_wer": 0.30457784035155083, "step": 14800 }, { "epoch": 353.48, "learning_rate": 9.007142857142856e-05, "loss": 0.0088, "step": 15200 }, { "epoch": 353.48, "eval_cer": 0.06539356928371402, "eval_loss": 0.40191251039505005, "eval_runtime": 117.0774, "eval_samples_per_second": 26.581, "eval_steps_per_second": 3.323, "eval_wer": 0.2993301762020564, "step": 15200 }, { "epoch": 362.78, "learning_rate": 8.435714285714286e-05, "loss": 0.0088, "step": 15600 }, { "epoch": 362.78, "eval_cer": 0.06814669860656568, "eval_loss": 0.4072999954223633, "eval_runtime": 114.628, "eval_samples_per_second": 27.149, "eval_steps_per_second": 3.394, "eval_wer": 0.3091002175860745, "step": 15600 }, { "epoch": 372.09, "learning_rate": 7.864285714285714e-05, "loss": 0.0079, "step": 16000 }, { "epoch": 372.09, "eval_cer": 0.0667161510172408, "eval_loss": 0.42044562101364136, "eval_runtime": 115.1974, "eval_samples_per_second": 27.014, "eval_steps_per_second": 3.377, "eval_wer": 0.30547378301122063, "step": 16000 }, { "epoch": 381.39, "learning_rate": 7.292857142857142e-05, "loss": 0.0072, "step": 16400 }, { "epoch": 381.39, "eval_cer": 0.06564998819123452, "eval_loss": 0.40300747752189636, "eval_runtime": 114.5196, "eval_samples_per_second": 27.174, "eval_steps_per_second": 3.397, "eval_wer": 0.3027859550322113, "step": 16400 }, { "epoch": 390.69, "learning_rate": 6.721428571428571e-05, "loss": 0.0073, "step": 16800 }, { "epoch": 390.69, "eval_cer": 0.0677350787813354, "eval_loss": 0.4031626284122467, "eval_runtime": 114.7831, "eval_samples_per_second": 27.112, "eval_steps_per_second": 3.389, "eval_wer": 0.30807628311788043, "step": 16800 }, { "epoch": 399.99, "learning_rate": 6.151428571428571e-05, "loss": 0.0069, "step": 17200 }, { "epoch": 399.99, "eval_cer": 0.06693208272883701, "eval_loss": 0.41302183270454407, "eval_runtime": 114.8457, "eval_samples_per_second": 27.097, "eval_steps_per_second": 3.387, "eval_wer": 0.30214599598959, "step": 17200 }, { "epoch": 409.3, "learning_rate": 5.5799999999999994e-05, "loss": 0.0063, "step": 17600 }, { "epoch": 409.3, "eval_cer": 0.06513715037619353, "eval_loss": 0.4071926772594452, "eval_runtime": 114.6356, "eval_samples_per_second": 27.147, "eval_steps_per_second": 3.393, "eval_wer": 0.2979222663082896, "step": 17600 }, { "epoch": 418.6, "learning_rate": 5.008571428571428e-05, "loss": 0.0059, "step": 18000 }, { "epoch": 418.6, "eval_cer": 0.06403724822025035, "eval_loss": 0.41102761030197144, "eval_runtime": 116.5576, "eval_samples_per_second": 26.699, "eval_steps_per_second": 3.337, "eval_wer": 0.2969409957762703, "step": 18000 }, { "epoch": 427.9, "learning_rate": 4.437142857142857e-05, "loss": 0.0056, "step": 18400 }, { "epoch": 427.9, "eval_cer": 0.06465805189108945, "eval_loss": 0.4228787422180176, "eval_runtime": 114.7096, "eval_samples_per_second": 27.129, "eval_steps_per_second": 3.391, "eval_wer": 0.29945816801058067, "step": 18400 }, { "epoch": 437.21, "learning_rate": 3.8657142857142856e-05, "loss": 0.005, "step": 18800 }, { "epoch": 437.21, "eval_cer": 0.062370525321367117, "eval_loss": 0.41175001859664917, "eval_runtime": 115.5821, "eval_samples_per_second": 26.925, "eval_steps_per_second": 3.366, "eval_wer": 0.2884508724774948, "step": 18800 }, { "epoch": 446.51, "learning_rate": 3.294285714285714e-05, "loss": 0.0046, "step": 19200 }, { "epoch": 446.51, "eval_cer": 0.06147305914504538, "eval_loss": 0.41112595796585083, "eval_runtime": 115.5282, "eval_samples_per_second": 26.937, "eval_steps_per_second": 3.367, "eval_wer": 0.28409915098767013, "step": 19200 }, { "epoch": 455.8, "learning_rate": 2.7228571428571427e-05, "loss": 0.0043, "step": 19600 }, { "epoch": 455.8, "eval_cer": 0.06160126859880563, "eval_loss": 0.40707847476005554, "eval_runtime": 117.681, "eval_samples_per_second": 26.444, "eval_steps_per_second": 3.306, "eval_wer": 0.28495242971116513, "step": 19600 }, { "epoch": 465.11, "learning_rate": 2.1514285714285714e-05, "loss": 0.0038, "step": 20000 }, { "epoch": 465.11, "eval_cer": 0.062363777455379736, "eval_loss": 0.4267757534980774, "eval_runtime": 115.119, "eval_samples_per_second": 27.033, "eval_steps_per_second": 3.379, "eval_wer": 0.28670165109433, "step": 20000 }, { "epoch": 474.41, "learning_rate": 1.5799999999999998e-05, "loss": 0.0035, "step": 20400 }, { "epoch": 474.41, "eval_cer": 0.06053510577279935, "eval_loss": 0.4116959869861603, "eval_runtime": 115.3416, "eval_samples_per_second": 26.981, "eval_steps_per_second": 3.373, "eval_wer": 0.2820086181151073, "step": 20400 }, { "epoch": 483.71, "learning_rate": 1.0085714285714285e-05, "loss": 0.0035, "step": 20800 }, { "epoch": 483.71, "eval_cer": 0.060238199669354564, "eval_loss": 0.4154604375362396, "eval_runtime": 115.2471, "eval_samples_per_second": 27.003, "eval_steps_per_second": 3.375, "eval_wer": 0.2819232902427578, "step": 20800 }, { "epoch": 493.02, "learning_rate": 4.371428571428571e-06, "loss": 0.0034, "step": 21200 }, { "epoch": 493.02, "eval_cer": 0.06007625088565741, "eval_loss": 0.41654759645462036, "eval_runtime": 115.3904, "eval_samples_per_second": 26.969, "eval_steps_per_second": 3.371, "eval_wer": 0.27991808524254447, "step": 21200 } ], "max_steps": 21500, "num_train_epochs": 500, "total_flos": 5.302246573116527e+20, "trial_name": null, "trial_params": null }