wav2vec2-xls-r-300m-uk / trainer_state.json
Yurii Paniv
Update model to WER 27.99%
2162892
raw
history blame
21.9 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 493.02296211251434,
"global_step": 21200,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 9.3,
"learning_rate": 0.00023999999999999998,
"loss": 4.3982,
"step": 400
},
{
"epoch": 9.3,
"eval_cer": 0.14366206687135194,
"eval_loss": 0.5217852592468262,
"eval_runtime": 117.8175,
"eval_samples_per_second": 26.414,
"eval_steps_per_second": 3.302,
"eval_wer": 0.6506676906011348,
"step": 400
},
{
"epoch": 18.6,
"learning_rate": 0.00029461077844311373,
"loss": 0.229,
"step": 800
},
{
"epoch": 18.6,
"eval_cer": 0.08482742332737273,
"eval_loss": 0.36793914437294006,
"eval_runtime": 118.0263,
"eval_samples_per_second": 26.367,
"eval_steps_per_second": 3.296,
"eval_wer": 0.40479542642604205,
"step": 800
},
{
"epoch": 27.9,
"learning_rate": 0.00028742514970059877,
"loss": 0.1054,
"step": 1200
},
{
"epoch": 27.9,
"eval_cer": 0.07780964270049597,
"eval_loss": 0.38127270340919495,
"eval_runtime": 116.0512,
"eval_samples_per_second": 26.816,
"eval_steps_per_second": 3.352,
"eval_wer": 0.367037842911387,
"step": 1200
},
{
"epoch": 37.21,
"learning_rate": 0.0002802395209580838,
"loss": 0.0784,
"step": 1600
},
{
"epoch": 37.21,
"eval_cer": 0.07465164141840143,
"eval_loss": 0.3839242458343506,
"eval_runtime": 116.2982,
"eval_samples_per_second": 26.759,
"eval_steps_per_second": 3.345,
"eval_wer": 0.35496394897393235,
"step": 1600
},
{
"epoch": 46.51,
"learning_rate": 0.00027305389221556883,
"loss": 0.066,
"step": 2000
},
{
"epoch": 46.51,
"eval_cer": 0.07363946152029421,
"eval_loss": 0.3969549238681793,
"eval_runtime": 118.9877,
"eval_samples_per_second": 26.154,
"eval_steps_per_second": 3.269,
"eval_wer": 0.3443406288664192,
"step": 2000
},
{
"epoch": 55.8,
"learning_rate": 0.00026586826347305386,
"loss": 0.0603,
"step": 2400
},
{
"epoch": 55.8,
"eval_cer": 0.07218192246701981,
"eval_loss": 0.3702129125595093,
"eval_runtime": 118.9492,
"eval_samples_per_second": 26.162,
"eval_steps_per_second": 3.27,
"eval_wer": 0.3393489483339733,
"step": 2400
},
{
"epoch": 65.11,
"learning_rate": 0.0002586826347305389,
"loss": 0.0539,
"step": 2800
},
{
"epoch": 65.11,
"eval_cer": 0.07241809777657816,
"eval_loss": 0.3762107491493225,
"eval_runtime": 117.3723,
"eval_samples_per_second": 26.514,
"eval_steps_per_second": 3.314,
"eval_wer": 0.33875165322752676,
"step": 2800
},
{
"epoch": 74.41,
"learning_rate": 0.00025149700598802393,
"loss": 0.0497,
"step": 3200
},
{
"epoch": 74.41,
"eval_cer": 0.07128445629069807,
"eval_loss": 0.36228740215301514,
"eval_runtime": 116.3914,
"eval_samples_per_second": 26.737,
"eval_steps_per_second": 3.342,
"eval_wer": 0.3413541533341866,
"step": 3200
},
{
"epoch": 83.71,
"learning_rate": 0.00024431137724550896,
"loss": 0.0432,
"step": 3600
},
{
"epoch": 83.71,
"eval_cer": 0.07248557643645197,
"eval_loss": 0.3847475051879883,
"eval_runtime": 116.3314,
"eval_samples_per_second": 26.751,
"eval_steps_per_second": 3.344,
"eval_wer": 0.3346132514185759,
"step": 3600
},
{
"epoch": 93.02,
"learning_rate": 0.000237125748502994,
"loss": 0.0438,
"step": 4000
},
{
"epoch": 93.02,
"eval_cer": 0.07503626977968217,
"eval_loss": 0.4057835340499878,
"eval_runtime": 116.616,
"eval_samples_per_second": 26.686,
"eval_steps_per_second": 3.336,
"eval_wer": 0.3393489483339733,
"step": 4000
},
{
"epoch": 102.32,
"learning_rate": 0.00022994011976047902,
"loss": 0.0413,
"step": 4400
},
{
"epoch": 102.32,
"eval_cer": 0.07270825601403556,
"eval_loss": 0.3957100510597229,
"eval_runtime": 116.9156,
"eval_samples_per_second": 26.617,
"eval_steps_per_second": 3.327,
"eval_wer": 0.3362771449293912,
"step": 4400
},
{
"epoch": 111.62,
"learning_rate": 0.00022275449101796406,
"loss": 0.039,
"step": 4800
},
{
"epoch": 111.62,
"eval_cer": 0.07179729410573905,
"eval_loss": 0.386459618806839,
"eval_runtime": 119.0004,
"eval_samples_per_second": 26.151,
"eval_steps_per_second": 3.269,
"eval_wer": 0.3330346857801101,
"step": 4800
},
{
"epoch": 120.92,
"learning_rate": 0.0002155688622754491,
"loss": 0.0356,
"step": 5200
},
{
"epoch": 120.92,
"eval_cer": 0.07110226390903876,
"eval_loss": 0.38599926233291626,
"eval_runtime": 118.9882,
"eval_samples_per_second": 26.154,
"eval_steps_per_second": 3.269,
"eval_wer": 0.33192542343956655,
"step": 5200
},
{
"epoch": 130.23,
"learning_rate": 0.00020838323353293412,
"loss": 0.0336,
"step": 5600
},
{
"epoch": 130.23,
"eval_cer": 0.07001585748507035,
"eval_loss": 0.3902195692062378,
"eval_runtime": 116.3404,
"eval_samples_per_second": 26.749,
"eval_steps_per_second": 3.344,
"eval_wer": 0.3241605870557618,
"step": 5600
},
{
"epoch": 139.53,
"learning_rate": 0.00020119760479041913,
"loss": 0.034,
"step": 6000
},
{
"epoch": 139.53,
"eval_cer": 0.07322109382907656,
"eval_loss": 0.39304569363594055,
"eval_runtime": 116.7151,
"eval_samples_per_second": 26.663,
"eval_steps_per_second": 3.333,
"eval_wer": 0.3337173087589061,
"step": 6000
},
{
"epoch": 148.83,
"learning_rate": 0.00019402994011976046,
"loss": 0.0273,
"step": 6400
},
{
"epoch": 148.83,
"eval_cer": 0.07479334660413645,
"eval_loss": 0.39119070768356323,
"eval_runtime": 116.4756,
"eval_samples_per_second": 26.718,
"eval_steps_per_second": 3.34,
"eval_wer": 0.33747173514228423,
"step": 6400
},
{
"epoch": 158.14,
"learning_rate": 0.0001868443113772455,
"loss": 0.027,
"step": 6800
},
{
"epoch": 158.14,
"eval_cer": 0.07523870575930362,
"eval_loss": 0.42656052112579346,
"eval_runtime": 116.5029,
"eval_samples_per_second": 26.712,
"eval_steps_per_second": 3.339,
"eval_wer": 0.34344468620674945,
"step": 6800
},
{
"epoch": 167.44,
"learning_rate": 0.0001796586826347305,
"loss": 0.028,
"step": 7200
},
{
"epoch": 167.44,
"eval_cer": 0.0707648706096697,
"eval_loss": 0.38949263095855713,
"eval_runtime": 116.3646,
"eval_samples_per_second": 26.744,
"eval_steps_per_second": 3.343,
"eval_wer": 0.32266734928964547,
"step": 7200
},
{
"epoch": 176.73,
"learning_rate": 0.00017247305389221556,
"loss": 0.0241,
"step": 7600
},
{
"epoch": 176.73,
"eval_cer": 0.07274874320995985,
"eval_loss": 0.3967472016811371,
"eval_runtime": 116.4729,
"eval_samples_per_second": 26.719,
"eval_steps_per_second": 3.34,
"eval_wer": 0.32936558726908144,
"step": 7600
},
{
"epoch": 186.05,
"learning_rate": 0.00016530538922155687,
"loss": 0.0241,
"step": 8000
},
{
"epoch": 186.05,
"eval_cer": 0.07122372549681164,
"eval_loss": 0.4058191776275635,
"eval_runtime": 116.3498,
"eval_samples_per_second": 26.747,
"eval_steps_per_second": 3.343,
"eval_wer": 0.32548316907717906,
"step": 8000
},
{
"epoch": 195.34,
"learning_rate": 0.0001581197604790419,
"loss": 0.0209,
"step": 8400
},
{
"epoch": 195.34,
"eval_cer": 0.07019804986672964,
"eval_loss": 0.4101807177066803,
"eval_runtime": 116.6051,
"eval_samples_per_second": 26.688,
"eval_steps_per_second": 3.336,
"eval_wer": 0.3233499722684415,
"step": 8400
},
{
"epoch": 204.64,
"learning_rate": 0.00015093413173652694,
"loss": 0.0206,
"step": 8800
},
{
"epoch": 204.64,
"eval_cer": 0.06987415229933533,
"eval_loss": 0.40751102566719055,
"eval_runtime": 117.508,
"eval_samples_per_second": 26.483,
"eval_steps_per_second": 3.31,
"eval_wer": 0.3193822262041896,
"step": 8800
},
{
"epoch": 213.94,
"learning_rate": 0.00014376646706586825,
"loss": 0.0172,
"step": 9200
},
{
"epoch": 213.94,
"eval_cer": 0.06948952393805459,
"eval_loss": 0.42218008637428284,
"eval_runtime": 116.7394,
"eval_samples_per_second": 26.658,
"eval_steps_per_second": 3.332,
"eval_wer": 0.31912624258714106,
"step": 9200
},
{
"epoch": 223.25,
"learning_rate": 0.00013658083832335328,
"loss": 0.0166,
"step": 9600
},
{
"epoch": 223.25,
"eval_cer": 0.06777556597725969,
"eval_loss": 0.38604938983917236,
"eval_runtime": 116.6232,
"eval_samples_per_second": 26.684,
"eval_steps_per_second": 3.336,
"eval_wer": 0.31345193907589913,
"step": 9600
},
{
"epoch": 232.55,
"learning_rate": 0.0001293952095808383,
"loss": 0.0156,
"step": 10000
},
{
"epoch": 232.55,
"eval_cer": 0.0677013394513985,
"eval_loss": 0.40345117449760437,
"eval_runtime": 117.639,
"eval_samples_per_second": 26.454,
"eval_steps_per_second": 3.307,
"eval_wer": 0.31170271769273433,
"step": 10000
},
{
"epoch": 241.85,
"learning_rate": 0.00012220958083832334,
"loss": 0.0149,
"step": 10400
},
{
"epoch": 241.85,
"eval_cer": 0.0677013394513985,
"eval_loss": 0.39512303471565247,
"eval_runtime": 120.4059,
"eval_samples_per_second": 25.846,
"eval_steps_per_second": 3.231,
"eval_wer": 0.30867357822432695,
"step": 10400
},
{
"epoch": 251.16,
"learning_rate": 0.00011502395209580837,
"loss": 0.0142,
"step": 10800
},
{
"epoch": 251.16,
"eval_cer": 0.06735045042005466,
"eval_loss": 0.3971852958202362,
"eval_runtime": 118.3102,
"eval_samples_per_second": 26.304,
"eval_steps_per_second": 3.288,
"eval_wer": 0.309697512692521,
"step": 10800
},
{
"epoch": 260.46,
"learning_rate": 0.0001078383233532934,
"loss": 0.0134,
"step": 11200
},
{
"epoch": 260.46,
"eval_cer": 0.06749215560578967,
"eval_loss": 0.40693503618240356,
"eval_runtime": 120.6637,
"eval_samples_per_second": 25.791,
"eval_steps_per_second": 3.224,
"eval_wer": 0.31114808652246256,
"step": 11200
},
{
"epoch": 269.76,
"learning_rate": 0.00010065269461077844,
"loss": 0.0116,
"step": 11600
},
{
"epoch": 269.76,
"eval_cer": 0.06968521205168865,
"eval_loss": 0.41885173320770264,
"eval_runtime": 118.3717,
"eval_samples_per_second": 26.29,
"eval_steps_per_second": 3.286,
"eval_wer": 0.31609710311873374,
"step": 11600
},
{
"epoch": 279.07,
"learning_rate": 9.346706586826346e-05,
"loss": 0.0119,
"step": 12000
},
{
"epoch": 279.07,
"eval_cer": 0.0648200006747866,
"eval_loss": 0.3901657462120056,
"eval_runtime": 119.8759,
"eval_samples_per_second": 25.96,
"eval_steps_per_second": 3.245,
"eval_wer": 0.3008234139681727,
"step": 12000
},
{
"epoch": 288.37,
"learning_rate": 8.62814371257485e-05,
"loss": 0.0098,
"step": 12400
},
{
"epoch": 288.37,
"eval_cer": 0.06515064610816829,
"eval_loss": 0.40946489572525024,
"eval_runtime": 120.8583,
"eval_samples_per_second": 25.749,
"eval_steps_per_second": 3.219,
"eval_wer": 0.30018345492555143,
"step": 12400
},
{
"epoch": 297.67,
"learning_rate": 7.909580838323352e-05,
"loss": 0.0091,
"step": 12800
},
{
"epoch": 297.67,
"eval_cer": 0.06441512871554371,
"eval_loss": 0.3892023265361786,
"eval_runtime": 118.1231,
"eval_samples_per_second": 26.345,
"eval_steps_per_second": 3.293,
"eval_wer": 0.2989888647126584,
"step": 12800
},
{
"epoch": 306.96,
"learning_rate": 7.191017964071855e-05,
"loss": 0.0094,
"step": 13200
},
{
"epoch": 306.96,
"eval_cer": 0.06469853908701373,
"eval_loss": 0.40261197090148926,
"eval_runtime": 118.2919,
"eval_samples_per_second": 26.308,
"eval_steps_per_second": 3.288,
"eval_wer": 0.29834890567003713,
"step": 13200
},
{
"epoch": 316.28,
"learning_rate": 6.474251497005988e-05,
"loss": 0.0081,
"step": 13600
},
{
"epoch": 316.28,
"eval_cer": 0.06462431256115253,
"eval_loss": 0.4302999675273895,
"eval_runtime": 118.0293,
"eval_samples_per_second": 26.366,
"eval_steps_per_second": 3.296,
"eval_wer": 0.29779427449976537,
"step": 13600
},
{
"epoch": 325.57,
"learning_rate": 5.7556886227544904e-05,
"loss": 0.0079,
"step": 14000
},
{
"epoch": 325.57,
"eval_cer": 0.06431391072573299,
"eval_loss": 0.40440893173217773,
"eval_runtime": 118.2184,
"eval_samples_per_second": 26.324,
"eval_steps_per_second": 3.291,
"eval_wer": 0.29796493024446435,
"step": 14000
},
{
"epoch": 334.87,
"learning_rate": 5.038922155688622e-05,
"loss": 0.0072,
"step": 14400
},
{
"epoch": 334.87,
"eval_cer": 0.06546779580957522,
"eval_loss": 0.382755845785141,
"eval_runtime": 118.6873,
"eval_samples_per_second": 26.22,
"eval_steps_per_second": 3.278,
"eval_wer": 0.29992747130850295,
"step": 14400
},
{
"epoch": 344.18,
"learning_rate": 9.578571428571428e-05,
"loss": 0.0081,
"step": 14800
},
{
"epoch": 344.18,
"eval_cer": 0.06676338607915247,
"eval_loss": 0.4108315706253052,
"eval_runtime": 114.661,
"eval_samples_per_second": 27.141,
"eval_steps_per_second": 3.393,
"eval_wer": 0.30457784035155083,
"step": 14800
},
{
"epoch": 353.48,
"learning_rate": 9.007142857142856e-05,
"loss": 0.0088,
"step": 15200
},
{
"epoch": 353.48,
"eval_cer": 0.06539356928371402,
"eval_loss": 0.40191251039505005,
"eval_runtime": 117.0774,
"eval_samples_per_second": 26.581,
"eval_steps_per_second": 3.323,
"eval_wer": 0.2993301762020564,
"step": 15200
},
{
"epoch": 362.78,
"learning_rate": 8.435714285714286e-05,
"loss": 0.0088,
"step": 15600
},
{
"epoch": 362.78,
"eval_cer": 0.06814669860656568,
"eval_loss": 0.4072999954223633,
"eval_runtime": 114.628,
"eval_samples_per_second": 27.149,
"eval_steps_per_second": 3.394,
"eval_wer": 0.3091002175860745,
"step": 15600
},
{
"epoch": 372.09,
"learning_rate": 7.864285714285714e-05,
"loss": 0.0079,
"step": 16000
},
{
"epoch": 372.09,
"eval_cer": 0.0667161510172408,
"eval_loss": 0.42044562101364136,
"eval_runtime": 115.1974,
"eval_samples_per_second": 27.014,
"eval_steps_per_second": 3.377,
"eval_wer": 0.30547378301122063,
"step": 16000
},
{
"epoch": 381.39,
"learning_rate": 7.292857142857142e-05,
"loss": 0.0072,
"step": 16400
},
{
"epoch": 381.39,
"eval_cer": 0.06564998819123452,
"eval_loss": 0.40300747752189636,
"eval_runtime": 114.5196,
"eval_samples_per_second": 27.174,
"eval_steps_per_second": 3.397,
"eval_wer": 0.3027859550322113,
"step": 16400
},
{
"epoch": 390.69,
"learning_rate": 6.721428571428571e-05,
"loss": 0.0073,
"step": 16800
},
{
"epoch": 390.69,
"eval_cer": 0.0677350787813354,
"eval_loss": 0.4031626284122467,
"eval_runtime": 114.7831,
"eval_samples_per_second": 27.112,
"eval_steps_per_second": 3.389,
"eval_wer": 0.30807628311788043,
"step": 16800
},
{
"epoch": 399.99,
"learning_rate": 6.151428571428571e-05,
"loss": 0.0069,
"step": 17200
},
{
"epoch": 399.99,
"eval_cer": 0.06693208272883701,
"eval_loss": 0.41302183270454407,
"eval_runtime": 114.8457,
"eval_samples_per_second": 27.097,
"eval_steps_per_second": 3.387,
"eval_wer": 0.30214599598959,
"step": 17200
},
{
"epoch": 409.3,
"learning_rate": 5.5799999999999994e-05,
"loss": 0.0063,
"step": 17600
},
{
"epoch": 409.3,
"eval_cer": 0.06513715037619353,
"eval_loss": 0.4071926772594452,
"eval_runtime": 114.6356,
"eval_samples_per_second": 27.147,
"eval_steps_per_second": 3.393,
"eval_wer": 0.2979222663082896,
"step": 17600
},
{
"epoch": 418.6,
"learning_rate": 5.008571428571428e-05,
"loss": 0.0059,
"step": 18000
},
{
"epoch": 418.6,
"eval_cer": 0.06403724822025035,
"eval_loss": 0.41102761030197144,
"eval_runtime": 116.5576,
"eval_samples_per_second": 26.699,
"eval_steps_per_second": 3.337,
"eval_wer": 0.2969409957762703,
"step": 18000
},
{
"epoch": 427.9,
"learning_rate": 4.437142857142857e-05,
"loss": 0.0056,
"step": 18400
},
{
"epoch": 427.9,
"eval_cer": 0.06465805189108945,
"eval_loss": 0.4228787422180176,
"eval_runtime": 114.7096,
"eval_samples_per_second": 27.129,
"eval_steps_per_second": 3.391,
"eval_wer": 0.29945816801058067,
"step": 18400
},
{
"epoch": 437.21,
"learning_rate": 3.8657142857142856e-05,
"loss": 0.005,
"step": 18800
},
{
"epoch": 437.21,
"eval_cer": 0.062370525321367117,
"eval_loss": 0.41175001859664917,
"eval_runtime": 115.5821,
"eval_samples_per_second": 26.925,
"eval_steps_per_second": 3.366,
"eval_wer": 0.2884508724774948,
"step": 18800
},
{
"epoch": 446.51,
"learning_rate": 3.294285714285714e-05,
"loss": 0.0046,
"step": 19200
},
{
"epoch": 446.51,
"eval_cer": 0.06147305914504538,
"eval_loss": 0.41112595796585083,
"eval_runtime": 115.5282,
"eval_samples_per_second": 26.937,
"eval_steps_per_second": 3.367,
"eval_wer": 0.28409915098767013,
"step": 19200
},
{
"epoch": 455.8,
"learning_rate": 2.7228571428571427e-05,
"loss": 0.0043,
"step": 19600
},
{
"epoch": 455.8,
"eval_cer": 0.06160126859880563,
"eval_loss": 0.40707847476005554,
"eval_runtime": 117.681,
"eval_samples_per_second": 26.444,
"eval_steps_per_second": 3.306,
"eval_wer": 0.28495242971116513,
"step": 19600
},
{
"epoch": 465.11,
"learning_rate": 2.1514285714285714e-05,
"loss": 0.0038,
"step": 20000
},
{
"epoch": 465.11,
"eval_cer": 0.062363777455379736,
"eval_loss": 0.4267757534980774,
"eval_runtime": 115.119,
"eval_samples_per_second": 27.033,
"eval_steps_per_second": 3.379,
"eval_wer": 0.28670165109433,
"step": 20000
},
{
"epoch": 474.41,
"learning_rate": 1.5799999999999998e-05,
"loss": 0.0035,
"step": 20400
},
{
"epoch": 474.41,
"eval_cer": 0.06053510577279935,
"eval_loss": 0.4116959869861603,
"eval_runtime": 115.3416,
"eval_samples_per_second": 26.981,
"eval_steps_per_second": 3.373,
"eval_wer": 0.2820086181151073,
"step": 20400
},
{
"epoch": 483.71,
"learning_rate": 1.0085714285714285e-05,
"loss": 0.0035,
"step": 20800
},
{
"epoch": 483.71,
"eval_cer": 0.060238199669354564,
"eval_loss": 0.4154604375362396,
"eval_runtime": 115.2471,
"eval_samples_per_second": 27.003,
"eval_steps_per_second": 3.375,
"eval_wer": 0.2819232902427578,
"step": 20800
},
{
"epoch": 493.02,
"learning_rate": 4.371428571428571e-06,
"loss": 0.0034,
"step": 21200
},
{
"epoch": 493.02,
"eval_cer": 0.06007625088565741,
"eval_loss": 0.41654759645462036,
"eval_runtime": 115.3904,
"eval_samples_per_second": 26.969,
"eval_steps_per_second": 3.371,
"eval_wer": 0.27991808524254447,
"step": 21200
}
],
"max_steps": 21500,
"num_train_epochs": 500,
"total_flos": 5.302246573116527e+20,
"trial_name": null,
"trial_params": null
}