|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 493.02296211251434, |
|
"global_step": 21200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 9.3, |
|
"learning_rate": 0.00023999999999999998, |
|
"loss": 4.3982, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 9.3, |
|
"eval_cer": 0.14366206687135194, |
|
"eval_loss": 0.5217852592468262, |
|
"eval_runtime": 117.8175, |
|
"eval_samples_per_second": 26.414, |
|
"eval_steps_per_second": 3.302, |
|
"eval_wer": 0.6506676906011348, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 18.6, |
|
"learning_rate": 0.00029461077844311373, |
|
"loss": 0.229, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 18.6, |
|
"eval_cer": 0.08482742332737273, |
|
"eval_loss": 0.36793914437294006, |
|
"eval_runtime": 118.0263, |
|
"eval_samples_per_second": 26.367, |
|
"eval_steps_per_second": 3.296, |
|
"eval_wer": 0.40479542642604205, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 27.9, |
|
"learning_rate": 0.00028742514970059877, |
|
"loss": 0.1054, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 27.9, |
|
"eval_cer": 0.07780964270049597, |
|
"eval_loss": 0.38127270340919495, |
|
"eval_runtime": 116.0512, |
|
"eval_samples_per_second": 26.816, |
|
"eval_steps_per_second": 3.352, |
|
"eval_wer": 0.367037842911387, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 37.21, |
|
"learning_rate": 0.0002802395209580838, |
|
"loss": 0.0784, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 37.21, |
|
"eval_cer": 0.07465164141840143, |
|
"eval_loss": 0.3839242458343506, |
|
"eval_runtime": 116.2982, |
|
"eval_samples_per_second": 26.759, |
|
"eval_steps_per_second": 3.345, |
|
"eval_wer": 0.35496394897393235, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 46.51, |
|
"learning_rate": 0.00027305389221556883, |
|
"loss": 0.066, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 46.51, |
|
"eval_cer": 0.07363946152029421, |
|
"eval_loss": 0.3969549238681793, |
|
"eval_runtime": 118.9877, |
|
"eval_samples_per_second": 26.154, |
|
"eval_steps_per_second": 3.269, |
|
"eval_wer": 0.3443406288664192, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 55.8, |
|
"learning_rate": 0.00026586826347305386, |
|
"loss": 0.0603, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 55.8, |
|
"eval_cer": 0.07218192246701981, |
|
"eval_loss": 0.3702129125595093, |
|
"eval_runtime": 118.9492, |
|
"eval_samples_per_second": 26.162, |
|
"eval_steps_per_second": 3.27, |
|
"eval_wer": 0.3393489483339733, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 65.11, |
|
"learning_rate": 0.0002586826347305389, |
|
"loss": 0.0539, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 65.11, |
|
"eval_cer": 0.07241809777657816, |
|
"eval_loss": 0.3762107491493225, |
|
"eval_runtime": 117.3723, |
|
"eval_samples_per_second": 26.514, |
|
"eval_steps_per_second": 3.314, |
|
"eval_wer": 0.33875165322752676, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 74.41, |
|
"learning_rate": 0.00025149700598802393, |
|
"loss": 0.0497, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 74.41, |
|
"eval_cer": 0.07128445629069807, |
|
"eval_loss": 0.36228740215301514, |
|
"eval_runtime": 116.3914, |
|
"eval_samples_per_second": 26.737, |
|
"eval_steps_per_second": 3.342, |
|
"eval_wer": 0.3413541533341866, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 83.71, |
|
"learning_rate": 0.00024431137724550896, |
|
"loss": 0.0432, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 83.71, |
|
"eval_cer": 0.07248557643645197, |
|
"eval_loss": 0.3847475051879883, |
|
"eval_runtime": 116.3314, |
|
"eval_samples_per_second": 26.751, |
|
"eval_steps_per_second": 3.344, |
|
"eval_wer": 0.3346132514185759, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 93.02, |
|
"learning_rate": 0.000237125748502994, |
|
"loss": 0.0438, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 93.02, |
|
"eval_cer": 0.07503626977968217, |
|
"eval_loss": 0.4057835340499878, |
|
"eval_runtime": 116.616, |
|
"eval_samples_per_second": 26.686, |
|
"eval_steps_per_second": 3.336, |
|
"eval_wer": 0.3393489483339733, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 102.32, |
|
"learning_rate": 0.00022994011976047902, |
|
"loss": 0.0413, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 102.32, |
|
"eval_cer": 0.07270825601403556, |
|
"eval_loss": 0.3957100510597229, |
|
"eval_runtime": 116.9156, |
|
"eval_samples_per_second": 26.617, |
|
"eval_steps_per_second": 3.327, |
|
"eval_wer": 0.3362771449293912, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 111.62, |
|
"learning_rate": 0.00022275449101796406, |
|
"loss": 0.039, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 111.62, |
|
"eval_cer": 0.07179729410573905, |
|
"eval_loss": 0.386459618806839, |
|
"eval_runtime": 119.0004, |
|
"eval_samples_per_second": 26.151, |
|
"eval_steps_per_second": 3.269, |
|
"eval_wer": 0.3330346857801101, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 120.92, |
|
"learning_rate": 0.0002155688622754491, |
|
"loss": 0.0356, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 120.92, |
|
"eval_cer": 0.07110226390903876, |
|
"eval_loss": 0.38599926233291626, |
|
"eval_runtime": 118.9882, |
|
"eval_samples_per_second": 26.154, |
|
"eval_steps_per_second": 3.269, |
|
"eval_wer": 0.33192542343956655, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 130.23, |
|
"learning_rate": 0.00020838323353293412, |
|
"loss": 0.0336, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 130.23, |
|
"eval_cer": 0.07001585748507035, |
|
"eval_loss": 0.3902195692062378, |
|
"eval_runtime": 116.3404, |
|
"eval_samples_per_second": 26.749, |
|
"eval_steps_per_second": 3.344, |
|
"eval_wer": 0.3241605870557618, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 139.53, |
|
"learning_rate": 0.00020119760479041913, |
|
"loss": 0.034, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 139.53, |
|
"eval_cer": 0.07322109382907656, |
|
"eval_loss": 0.39304569363594055, |
|
"eval_runtime": 116.7151, |
|
"eval_samples_per_second": 26.663, |
|
"eval_steps_per_second": 3.333, |
|
"eval_wer": 0.3337173087589061, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 148.83, |
|
"learning_rate": 0.00019402994011976046, |
|
"loss": 0.0273, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 148.83, |
|
"eval_cer": 0.07479334660413645, |
|
"eval_loss": 0.39119070768356323, |
|
"eval_runtime": 116.4756, |
|
"eval_samples_per_second": 26.718, |
|
"eval_steps_per_second": 3.34, |
|
"eval_wer": 0.33747173514228423, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 158.14, |
|
"learning_rate": 0.0001868443113772455, |
|
"loss": 0.027, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 158.14, |
|
"eval_cer": 0.07523870575930362, |
|
"eval_loss": 0.42656052112579346, |
|
"eval_runtime": 116.5029, |
|
"eval_samples_per_second": 26.712, |
|
"eval_steps_per_second": 3.339, |
|
"eval_wer": 0.34344468620674945, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 167.44, |
|
"learning_rate": 0.0001796586826347305, |
|
"loss": 0.028, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 167.44, |
|
"eval_cer": 0.0707648706096697, |
|
"eval_loss": 0.38949263095855713, |
|
"eval_runtime": 116.3646, |
|
"eval_samples_per_second": 26.744, |
|
"eval_steps_per_second": 3.343, |
|
"eval_wer": 0.32266734928964547, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 176.73, |
|
"learning_rate": 0.00017247305389221556, |
|
"loss": 0.0241, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 176.73, |
|
"eval_cer": 0.07274874320995985, |
|
"eval_loss": 0.3967472016811371, |
|
"eval_runtime": 116.4729, |
|
"eval_samples_per_second": 26.719, |
|
"eval_steps_per_second": 3.34, |
|
"eval_wer": 0.32936558726908144, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 186.05, |
|
"learning_rate": 0.00016530538922155687, |
|
"loss": 0.0241, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 186.05, |
|
"eval_cer": 0.07122372549681164, |
|
"eval_loss": 0.4058191776275635, |
|
"eval_runtime": 116.3498, |
|
"eval_samples_per_second": 26.747, |
|
"eval_steps_per_second": 3.343, |
|
"eval_wer": 0.32548316907717906, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 195.34, |
|
"learning_rate": 0.0001581197604790419, |
|
"loss": 0.0209, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 195.34, |
|
"eval_cer": 0.07019804986672964, |
|
"eval_loss": 0.4101807177066803, |
|
"eval_runtime": 116.6051, |
|
"eval_samples_per_second": 26.688, |
|
"eval_steps_per_second": 3.336, |
|
"eval_wer": 0.3233499722684415, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 204.64, |
|
"learning_rate": 0.00015093413173652694, |
|
"loss": 0.0206, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 204.64, |
|
"eval_cer": 0.06987415229933533, |
|
"eval_loss": 0.40751102566719055, |
|
"eval_runtime": 117.508, |
|
"eval_samples_per_second": 26.483, |
|
"eval_steps_per_second": 3.31, |
|
"eval_wer": 0.3193822262041896, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 213.94, |
|
"learning_rate": 0.00014376646706586825, |
|
"loss": 0.0172, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 213.94, |
|
"eval_cer": 0.06948952393805459, |
|
"eval_loss": 0.42218008637428284, |
|
"eval_runtime": 116.7394, |
|
"eval_samples_per_second": 26.658, |
|
"eval_steps_per_second": 3.332, |
|
"eval_wer": 0.31912624258714106, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 223.25, |
|
"learning_rate": 0.00013658083832335328, |
|
"loss": 0.0166, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 223.25, |
|
"eval_cer": 0.06777556597725969, |
|
"eval_loss": 0.38604938983917236, |
|
"eval_runtime": 116.6232, |
|
"eval_samples_per_second": 26.684, |
|
"eval_steps_per_second": 3.336, |
|
"eval_wer": 0.31345193907589913, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 232.55, |
|
"learning_rate": 0.0001293952095808383, |
|
"loss": 0.0156, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 232.55, |
|
"eval_cer": 0.0677013394513985, |
|
"eval_loss": 0.40345117449760437, |
|
"eval_runtime": 117.639, |
|
"eval_samples_per_second": 26.454, |
|
"eval_steps_per_second": 3.307, |
|
"eval_wer": 0.31170271769273433, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 241.85, |
|
"learning_rate": 0.00012220958083832334, |
|
"loss": 0.0149, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 241.85, |
|
"eval_cer": 0.0677013394513985, |
|
"eval_loss": 0.39512303471565247, |
|
"eval_runtime": 120.4059, |
|
"eval_samples_per_second": 25.846, |
|
"eval_steps_per_second": 3.231, |
|
"eval_wer": 0.30867357822432695, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 251.16, |
|
"learning_rate": 0.00011502395209580837, |
|
"loss": 0.0142, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 251.16, |
|
"eval_cer": 0.06735045042005466, |
|
"eval_loss": 0.3971852958202362, |
|
"eval_runtime": 118.3102, |
|
"eval_samples_per_second": 26.304, |
|
"eval_steps_per_second": 3.288, |
|
"eval_wer": 0.309697512692521, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 260.46, |
|
"learning_rate": 0.0001078383233532934, |
|
"loss": 0.0134, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 260.46, |
|
"eval_cer": 0.06749215560578967, |
|
"eval_loss": 0.40693503618240356, |
|
"eval_runtime": 120.6637, |
|
"eval_samples_per_second": 25.791, |
|
"eval_steps_per_second": 3.224, |
|
"eval_wer": 0.31114808652246256, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 269.76, |
|
"learning_rate": 0.00010065269461077844, |
|
"loss": 0.0116, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 269.76, |
|
"eval_cer": 0.06968521205168865, |
|
"eval_loss": 0.41885173320770264, |
|
"eval_runtime": 118.3717, |
|
"eval_samples_per_second": 26.29, |
|
"eval_steps_per_second": 3.286, |
|
"eval_wer": 0.31609710311873374, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 279.07, |
|
"learning_rate": 9.346706586826346e-05, |
|
"loss": 0.0119, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 279.07, |
|
"eval_cer": 0.0648200006747866, |
|
"eval_loss": 0.3901657462120056, |
|
"eval_runtime": 119.8759, |
|
"eval_samples_per_second": 25.96, |
|
"eval_steps_per_second": 3.245, |
|
"eval_wer": 0.3008234139681727, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 288.37, |
|
"learning_rate": 8.62814371257485e-05, |
|
"loss": 0.0098, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 288.37, |
|
"eval_cer": 0.06515064610816829, |
|
"eval_loss": 0.40946489572525024, |
|
"eval_runtime": 120.8583, |
|
"eval_samples_per_second": 25.749, |
|
"eval_steps_per_second": 3.219, |
|
"eval_wer": 0.30018345492555143, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 297.67, |
|
"learning_rate": 7.909580838323352e-05, |
|
"loss": 0.0091, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 297.67, |
|
"eval_cer": 0.06441512871554371, |
|
"eval_loss": 0.3892023265361786, |
|
"eval_runtime": 118.1231, |
|
"eval_samples_per_second": 26.345, |
|
"eval_steps_per_second": 3.293, |
|
"eval_wer": 0.2989888647126584, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 306.96, |
|
"learning_rate": 7.191017964071855e-05, |
|
"loss": 0.0094, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 306.96, |
|
"eval_cer": 0.06469853908701373, |
|
"eval_loss": 0.40261197090148926, |
|
"eval_runtime": 118.2919, |
|
"eval_samples_per_second": 26.308, |
|
"eval_steps_per_second": 3.288, |
|
"eval_wer": 0.29834890567003713, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 316.28, |
|
"learning_rate": 6.474251497005988e-05, |
|
"loss": 0.0081, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 316.28, |
|
"eval_cer": 0.06462431256115253, |
|
"eval_loss": 0.4302999675273895, |
|
"eval_runtime": 118.0293, |
|
"eval_samples_per_second": 26.366, |
|
"eval_steps_per_second": 3.296, |
|
"eval_wer": 0.29779427449976537, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 325.57, |
|
"learning_rate": 5.7556886227544904e-05, |
|
"loss": 0.0079, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 325.57, |
|
"eval_cer": 0.06431391072573299, |
|
"eval_loss": 0.40440893173217773, |
|
"eval_runtime": 118.2184, |
|
"eval_samples_per_second": 26.324, |
|
"eval_steps_per_second": 3.291, |
|
"eval_wer": 0.29796493024446435, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 334.87, |
|
"learning_rate": 5.038922155688622e-05, |
|
"loss": 0.0072, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 334.87, |
|
"eval_cer": 0.06546779580957522, |
|
"eval_loss": 0.382755845785141, |
|
"eval_runtime": 118.6873, |
|
"eval_samples_per_second": 26.22, |
|
"eval_steps_per_second": 3.278, |
|
"eval_wer": 0.29992747130850295, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 344.18, |
|
"learning_rate": 9.578571428571428e-05, |
|
"loss": 0.0081, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 344.18, |
|
"eval_cer": 0.06676338607915247, |
|
"eval_loss": 0.4108315706253052, |
|
"eval_runtime": 114.661, |
|
"eval_samples_per_second": 27.141, |
|
"eval_steps_per_second": 3.393, |
|
"eval_wer": 0.30457784035155083, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 353.48, |
|
"learning_rate": 9.007142857142856e-05, |
|
"loss": 0.0088, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 353.48, |
|
"eval_cer": 0.06539356928371402, |
|
"eval_loss": 0.40191251039505005, |
|
"eval_runtime": 117.0774, |
|
"eval_samples_per_second": 26.581, |
|
"eval_steps_per_second": 3.323, |
|
"eval_wer": 0.2993301762020564, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 362.78, |
|
"learning_rate": 8.435714285714286e-05, |
|
"loss": 0.0088, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 362.78, |
|
"eval_cer": 0.06814669860656568, |
|
"eval_loss": 0.4072999954223633, |
|
"eval_runtime": 114.628, |
|
"eval_samples_per_second": 27.149, |
|
"eval_steps_per_second": 3.394, |
|
"eval_wer": 0.3091002175860745, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 372.09, |
|
"learning_rate": 7.864285714285714e-05, |
|
"loss": 0.0079, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 372.09, |
|
"eval_cer": 0.0667161510172408, |
|
"eval_loss": 0.42044562101364136, |
|
"eval_runtime": 115.1974, |
|
"eval_samples_per_second": 27.014, |
|
"eval_steps_per_second": 3.377, |
|
"eval_wer": 0.30547378301122063, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 381.39, |
|
"learning_rate": 7.292857142857142e-05, |
|
"loss": 0.0072, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 381.39, |
|
"eval_cer": 0.06564998819123452, |
|
"eval_loss": 0.40300747752189636, |
|
"eval_runtime": 114.5196, |
|
"eval_samples_per_second": 27.174, |
|
"eval_steps_per_second": 3.397, |
|
"eval_wer": 0.3027859550322113, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 390.69, |
|
"learning_rate": 6.721428571428571e-05, |
|
"loss": 0.0073, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 390.69, |
|
"eval_cer": 0.0677350787813354, |
|
"eval_loss": 0.4031626284122467, |
|
"eval_runtime": 114.7831, |
|
"eval_samples_per_second": 27.112, |
|
"eval_steps_per_second": 3.389, |
|
"eval_wer": 0.30807628311788043, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 399.99, |
|
"learning_rate": 6.151428571428571e-05, |
|
"loss": 0.0069, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 399.99, |
|
"eval_cer": 0.06693208272883701, |
|
"eval_loss": 0.41302183270454407, |
|
"eval_runtime": 114.8457, |
|
"eval_samples_per_second": 27.097, |
|
"eval_steps_per_second": 3.387, |
|
"eval_wer": 0.30214599598959, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 409.3, |
|
"learning_rate": 5.5799999999999994e-05, |
|
"loss": 0.0063, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 409.3, |
|
"eval_cer": 0.06513715037619353, |
|
"eval_loss": 0.4071926772594452, |
|
"eval_runtime": 114.6356, |
|
"eval_samples_per_second": 27.147, |
|
"eval_steps_per_second": 3.393, |
|
"eval_wer": 0.2979222663082896, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 418.6, |
|
"learning_rate": 5.008571428571428e-05, |
|
"loss": 0.0059, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 418.6, |
|
"eval_cer": 0.06403724822025035, |
|
"eval_loss": 0.41102761030197144, |
|
"eval_runtime": 116.5576, |
|
"eval_samples_per_second": 26.699, |
|
"eval_steps_per_second": 3.337, |
|
"eval_wer": 0.2969409957762703, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 427.9, |
|
"learning_rate": 4.437142857142857e-05, |
|
"loss": 0.0056, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 427.9, |
|
"eval_cer": 0.06465805189108945, |
|
"eval_loss": 0.4228787422180176, |
|
"eval_runtime": 114.7096, |
|
"eval_samples_per_second": 27.129, |
|
"eval_steps_per_second": 3.391, |
|
"eval_wer": 0.29945816801058067, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 437.21, |
|
"learning_rate": 3.8657142857142856e-05, |
|
"loss": 0.005, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 437.21, |
|
"eval_cer": 0.062370525321367117, |
|
"eval_loss": 0.41175001859664917, |
|
"eval_runtime": 115.5821, |
|
"eval_samples_per_second": 26.925, |
|
"eval_steps_per_second": 3.366, |
|
"eval_wer": 0.2884508724774948, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 446.51, |
|
"learning_rate": 3.294285714285714e-05, |
|
"loss": 0.0046, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 446.51, |
|
"eval_cer": 0.06147305914504538, |
|
"eval_loss": 0.41112595796585083, |
|
"eval_runtime": 115.5282, |
|
"eval_samples_per_second": 26.937, |
|
"eval_steps_per_second": 3.367, |
|
"eval_wer": 0.28409915098767013, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 455.8, |
|
"learning_rate": 2.7228571428571427e-05, |
|
"loss": 0.0043, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 455.8, |
|
"eval_cer": 0.06160126859880563, |
|
"eval_loss": 0.40707847476005554, |
|
"eval_runtime": 117.681, |
|
"eval_samples_per_second": 26.444, |
|
"eval_steps_per_second": 3.306, |
|
"eval_wer": 0.28495242971116513, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 465.11, |
|
"learning_rate": 2.1514285714285714e-05, |
|
"loss": 0.0038, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 465.11, |
|
"eval_cer": 0.062363777455379736, |
|
"eval_loss": 0.4267757534980774, |
|
"eval_runtime": 115.119, |
|
"eval_samples_per_second": 27.033, |
|
"eval_steps_per_second": 3.379, |
|
"eval_wer": 0.28670165109433, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 474.41, |
|
"learning_rate": 1.5799999999999998e-05, |
|
"loss": 0.0035, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 474.41, |
|
"eval_cer": 0.06053510577279935, |
|
"eval_loss": 0.4116959869861603, |
|
"eval_runtime": 115.3416, |
|
"eval_samples_per_second": 26.981, |
|
"eval_steps_per_second": 3.373, |
|
"eval_wer": 0.2820086181151073, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 483.71, |
|
"learning_rate": 1.0085714285714285e-05, |
|
"loss": 0.0035, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 483.71, |
|
"eval_cer": 0.060238199669354564, |
|
"eval_loss": 0.4154604375362396, |
|
"eval_runtime": 115.2471, |
|
"eval_samples_per_second": 27.003, |
|
"eval_steps_per_second": 3.375, |
|
"eval_wer": 0.2819232902427578, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 493.02, |
|
"learning_rate": 4.371428571428571e-06, |
|
"loss": 0.0034, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 493.02, |
|
"eval_cer": 0.06007625088565741, |
|
"eval_loss": 0.41654759645462036, |
|
"eval_runtime": 115.3904, |
|
"eval_samples_per_second": 26.969, |
|
"eval_steps_per_second": 3.371, |
|
"eval_wer": 0.27991808524254447, |
|
"step": 21200 |
|
} |
|
], |
|
"max_steps": 21500, |
|
"num_train_epochs": 500, |
|
"total_flos": 5.302246573116527e+20, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|