wav2vec2-xls-r-300m-korean / trainer_state.json
w11wo's picture
End of training
b8d7f07
raw
history blame
62.3 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 49.99892202659001,
"global_step": 34750,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.14,
"learning_rate": 3.675e-06,
"loss": 47.2908,
"step": 100
},
{
"epoch": 0.29,
"learning_rate": 7.425e-06,
"loss": 33.9125,
"step": 200
},
{
"epoch": 0.43,
"learning_rate": 1.1174999999999999e-05,
"loss": 26.6068,
"step": 300
},
{
"epoch": 0.57,
"learning_rate": 1.4925e-05,
"loss": 23.2775,
"step": 400
},
{
"epoch": 0.72,
"learning_rate": 1.8675e-05,
"loss": 19.7138,
"step": 500
},
{
"epoch": 0.72,
"eval_cer": 1.0,
"eval_loss": 19.642736434936523,
"eval_runtime": 41.3907,
"eval_samples_per_second": 11.017,
"eval_steps_per_second": 1.377,
"eval_wer": 1.0,
"step": 500
},
{
"epoch": 0.86,
"learning_rate": 2.2424999999999996e-05,
"loss": 15.7715,
"step": 600
},
{
"epoch": 1.01,
"learning_rate": 2.6174999999999996e-05,
"loss": 11.4061,
"step": 700
},
{
"epoch": 1.15,
"learning_rate": 2.9925e-05,
"loss": 7.4329,
"step": 800
},
{
"epoch": 1.29,
"learning_rate": 3.3675e-05,
"loss": 5.3081,
"step": 900
},
{
"epoch": 1.44,
"learning_rate": 3.7424999999999995e-05,
"loss": 4.8039,
"step": 1000
},
{
"epoch": 1.44,
"eval_cer": 1.0,
"eval_loss": 4.784187316894531,
"eval_runtime": 42.2256,
"eval_samples_per_second": 10.799,
"eval_steps_per_second": 1.35,
"eval_wer": 1.0,
"step": 1000
},
{
"epoch": 1.58,
"learning_rate": 4.1175e-05,
"loss": 4.762,
"step": 1100
},
{
"epoch": 1.73,
"learning_rate": 4.4924999999999994e-05,
"loss": 4.6928,
"step": 1200
},
{
"epoch": 1.87,
"learning_rate": 4.8675e-05,
"loss": 4.6292,
"step": 1300
},
{
"epoch": 2.01,
"learning_rate": 5.2424999999999994e-05,
"loss": 4.6321,
"step": 1400
},
{
"epoch": 2.16,
"learning_rate": 5.6175e-05,
"loss": 4.5619,
"step": 1500
},
{
"epoch": 2.16,
"eval_cer": 0.9598094788222327,
"eval_loss": 4.560794830322266,
"eval_runtime": 41.0352,
"eval_samples_per_second": 11.112,
"eval_steps_per_second": 1.389,
"eval_wer": 0.9992449411054062,
"step": 1500
},
{
"epoch": 2.3,
"learning_rate": 5.9925e-05,
"loss": 4.4704,
"step": 1600
},
{
"epoch": 2.45,
"learning_rate": 6.367499999999999e-05,
"loss": 4.3806,
"step": 1700
},
{
"epoch": 2.59,
"learning_rate": 6.7425e-05,
"loss": 4.3092,
"step": 1800
},
{
"epoch": 2.73,
"learning_rate": 7.1175e-05,
"loss": 4.2794,
"step": 1900
},
{
"epoch": 2.88,
"learning_rate": 7.492499999999999e-05,
"loss": 4.254,
"step": 2000
},
{
"epoch": 2.88,
"eval_cer": 0.906274602424815,
"eval_loss": 4.272861003875732,
"eval_runtime": 40.8387,
"eval_samples_per_second": 11.166,
"eval_steps_per_second": 1.396,
"eval_wer": 0.9954696466324373,
"step": 2000
},
{
"epoch": 3.02,
"learning_rate": 7.477557251908395e-05,
"loss": 4.2616,
"step": 2100
},
{
"epoch": 3.17,
"learning_rate": 7.454656488549618e-05,
"loss": 4.2184,
"step": 2200
},
{
"epoch": 3.31,
"learning_rate": 7.43175572519084e-05,
"loss": 4.227,
"step": 2300
},
{
"epoch": 3.45,
"learning_rate": 7.408854961832061e-05,
"loss": 4.1985,
"step": 2400
},
{
"epoch": 3.6,
"learning_rate": 7.385954198473281e-05,
"loss": 4.1905,
"step": 2500
},
{
"epoch": 3.6,
"eval_cer": 0.8758463234136357,
"eval_loss": 4.225706100463867,
"eval_runtime": 40.6017,
"eval_samples_per_second": 11.231,
"eval_steps_per_second": 1.404,
"eval_wer": 0.9903352461491997,
"step": 2500
},
{
"epoch": 3.74,
"learning_rate": 7.36328244274809e-05,
"loss": 4.1873,
"step": 2600
},
{
"epoch": 3.88,
"learning_rate": 7.340381679389312e-05,
"loss": 4.1615,
"step": 2700
},
{
"epoch": 4.03,
"learning_rate": 7.317480916030534e-05,
"loss": 4.157,
"step": 2800
},
{
"epoch": 4.17,
"learning_rate": 7.294580152671756e-05,
"loss": 4.1124,
"step": 2900
},
{
"epoch": 4.32,
"learning_rate": 7.271679389312976e-05,
"loss": 4.0683,
"step": 3000
},
{
"epoch": 4.32,
"eval_cer": 0.7911352542906629,
"eval_loss": 3.929443120956421,
"eval_runtime": 41.2715,
"eval_samples_per_second": 11.049,
"eval_steps_per_second": 1.381,
"eval_wer": 0.9936575052854123,
"step": 3000
},
{
"epoch": 4.46,
"learning_rate": 7.248778625954197e-05,
"loss": 4.0704,
"step": 3100
},
{
"epoch": 4.6,
"learning_rate": 7.22587786259542e-05,
"loss": 3.9616,
"step": 3200
},
{
"epoch": 4.75,
"learning_rate": 7.202977099236641e-05,
"loss": 3.7798,
"step": 3300
},
{
"epoch": 4.89,
"learning_rate": 7.180076335877862e-05,
"loss": 3.6123,
"step": 3400
},
{
"epoch": 5.04,
"learning_rate": 7.15740458015267e-05,
"loss": 3.486,
"step": 3500
},
{
"epoch": 5.04,
"eval_cer": 0.5933711226578492,
"eval_loss": 2.704545497894287,
"eval_runtime": 40.8408,
"eval_samples_per_second": 11.165,
"eval_steps_per_second": 1.396,
"eval_wer": 1.0012080942313502,
"step": 3500
},
{
"epoch": 5.18,
"learning_rate": 7.134503816793892e-05,
"loss": 3.3283,
"step": 3600
},
{
"epoch": 5.32,
"learning_rate": 7.111603053435114e-05,
"loss": 3.2091,
"step": 3700
},
{
"epoch": 5.47,
"learning_rate": 7.088931297709923e-05,
"loss": 3.1158,
"step": 3800
},
{
"epoch": 5.61,
"learning_rate": 7.066030534351145e-05,
"loss": 2.9983,
"step": 3900
},
{
"epoch": 5.75,
"learning_rate": 7.043129770992365e-05,
"loss": 2.946,
"step": 4000
},
{
"epoch": 5.75,
"eval_cer": 0.4634309557549992,
"eval_loss": 1.9690674543380737,
"eval_runtime": 40.879,
"eval_samples_per_second": 11.155,
"eval_steps_per_second": 1.394,
"eval_wer": 0.942464512231954,
"step": 4000
},
{
"epoch": 5.9,
"learning_rate": 7.020229007633587e-05,
"loss": 2.8545,
"step": 4100
},
{
"epoch": 6.04,
"learning_rate": 6.997328244274808e-05,
"loss": 2.8092,
"step": 4200
},
{
"epoch": 6.19,
"learning_rate": 6.97442748091603e-05,
"loss": 2.7229,
"step": 4300
},
{
"epoch": 6.33,
"learning_rate": 6.951526717557252e-05,
"loss": 2.7053,
"step": 4400
},
{
"epoch": 6.47,
"learning_rate": 6.928625954198472e-05,
"loss": 2.634,
"step": 4500
},
{
"epoch": 6.47,
"eval_cer": 0.38501810738466385,
"eval_loss": 1.521231770515442,
"eval_runtime": 41.5435,
"eval_samples_per_second": 10.976,
"eval_steps_per_second": 1.372,
"eval_wer": 0.880700694654183,
"step": 4500
},
{
"epoch": 6.62,
"learning_rate": 6.905725190839693e-05,
"loss": 2.5996,
"step": 4600
},
{
"epoch": 6.76,
"learning_rate": 6.882824427480916e-05,
"loss": 2.5472,
"step": 4700
},
{
"epoch": 6.91,
"learning_rate": 6.859923664122137e-05,
"loss": 2.4959,
"step": 4800
},
{
"epoch": 7.05,
"learning_rate": 6.837022900763359e-05,
"loss": 2.4554,
"step": 4900
},
{
"epoch": 7.19,
"learning_rate": 6.814122137404579e-05,
"loss": 2.4066,
"step": 5000
},
{
"epoch": 7.19,
"eval_cer": 0.36014013541174617,
"eval_loss": 1.2550952434539795,
"eval_runtime": 41.0408,
"eval_samples_per_second": 11.111,
"eval_steps_per_second": 1.389,
"eval_wer": 0.8177287828450619,
"step": 5000
},
{
"epoch": 7.34,
"learning_rate": 6.791221374045801e-05,
"loss": 2.3768,
"step": 5100
},
{
"epoch": 7.48,
"learning_rate": 6.768320610687023e-05,
"loss": 2.3557,
"step": 5200
},
{
"epoch": 7.63,
"learning_rate": 6.745419847328244e-05,
"loss": 2.3109,
"step": 5300
},
{
"epoch": 7.77,
"learning_rate": 6.722519083969465e-05,
"loss": 2.2953,
"step": 5400
},
{
"epoch": 7.91,
"learning_rate": 6.699618320610687e-05,
"loss": 2.2651,
"step": 5500
},
{
"epoch": 7.91,
"eval_cer": 0.30392851519445757,
"eval_loss": 1.0423332452774048,
"eval_runtime": 40.9098,
"eval_samples_per_second": 11.146,
"eval_steps_per_second": 1.393,
"eval_wer": 0.7650256720024162,
"step": 5500
},
{
"epoch": 8.06,
"learning_rate": 6.676717557251908e-05,
"loss": 2.2589,
"step": 5600
},
{
"epoch": 8.2,
"learning_rate": 6.654045801526718e-05,
"loss": 2.2122,
"step": 5700
},
{
"epoch": 8.34,
"learning_rate": 6.631145038167939e-05,
"loss": 2.2017,
"step": 5800
},
{
"epoch": 8.49,
"learning_rate": 6.60824427480916e-05,
"loss": 2.1814,
"step": 5900
},
{
"epoch": 8.63,
"learning_rate": 6.58534351145038e-05,
"loss": 2.1828,
"step": 6000
},
{
"epoch": 8.63,
"eval_cer": 0.3106203747441348,
"eval_loss": 0.9598844051361084,
"eval_runtime": 41.4743,
"eval_samples_per_second": 10.995,
"eval_steps_per_second": 1.374,
"eval_wer": 0.7272727272727273,
"step": 6000
},
{
"epoch": 8.78,
"learning_rate": 6.562442748091603e-05,
"loss": 2.1714,
"step": 6100
},
{
"epoch": 8.92,
"learning_rate": 6.539541984732824e-05,
"loss": 2.1422,
"step": 6200
},
{
"epoch": 9.06,
"learning_rate": 6.516641221374046e-05,
"loss": 2.1546,
"step": 6300
},
{
"epoch": 9.21,
"learning_rate": 6.493740458015267e-05,
"loss": 2.12,
"step": 6400
},
{
"epoch": 9.35,
"learning_rate": 6.470839694656488e-05,
"loss": 2.1023,
"step": 6500
},
{
"epoch": 9.35,
"eval_cer": 0.30632971185640057,
"eval_loss": 0.9481843113899231,
"eval_runtime": 41.1867,
"eval_samples_per_second": 11.072,
"eval_steps_per_second": 1.384,
"eval_wer": 0.7160978556327393,
"step": 6500
},
{
"epoch": 9.5,
"learning_rate": 6.44793893129771e-05,
"loss": 2.1104,
"step": 6600
},
{
"epoch": 9.64,
"learning_rate": 6.425038167938931e-05,
"loss": 2.0879,
"step": 6700
},
{
"epoch": 9.78,
"learning_rate": 6.402137404580152e-05,
"loss": 2.0724,
"step": 6800
},
{
"epoch": 9.93,
"learning_rate": 6.379236641221374e-05,
"loss": 2.0622,
"step": 6900
},
{
"epoch": 10.07,
"learning_rate": 6.356335877862595e-05,
"loss": 2.0536,
"step": 7000
},
{
"epoch": 10.07,
"eval_cer": 0.28597858604944104,
"eval_loss": 0.8241907954216003,
"eval_runtime": 41.2837,
"eval_samples_per_second": 11.046,
"eval_steps_per_second": 1.381,
"eval_wer": 0.6766837813349441,
"step": 7000
},
{
"epoch": 10.22,
"learning_rate": 6.333435114503816e-05,
"loss": 2.0258,
"step": 7100
},
{
"epoch": 10.36,
"learning_rate": 6.310534351145038e-05,
"loss": 2.038,
"step": 7200
},
{
"epoch": 10.5,
"learning_rate": 6.287633587786259e-05,
"loss": 2.0093,
"step": 7300
},
{
"epoch": 10.65,
"learning_rate": 6.26473282442748e-05,
"loss": 1.9839,
"step": 7400
},
{
"epoch": 10.79,
"learning_rate": 6.241832061068702e-05,
"loss": 1.9803,
"step": 7500
},
{
"epoch": 10.79,
"eval_cer": 0.2636592662572823,
"eval_loss": 0.7643126845359802,
"eval_runtime": 41.3574,
"eval_samples_per_second": 11.026,
"eval_steps_per_second": 1.378,
"eval_wer": 0.6562971911809121,
"step": 7500
},
{
"epoch": 10.93,
"learning_rate": 6.218931297709923e-05,
"loss": 1.9704,
"step": 7600
},
{
"epoch": 11.08,
"learning_rate": 6.196030534351144e-05,
"loss": 1.9923,
"step": 7700
},
{
"epoch": 11.22,
"learning_rate": 6.173129770992366e-05,
"loss": 1.9549,
"step": 7800
},
{
"epoch": 11.37,
"learning_rate": 6.150229007633587e-05,
"loss": 1.9339,
"step": 7900
},
{
"epoch": 11.51,
"learning_rate": 6.127328244274808e-05,
"loss": 1.9468,
"step": 8000
},
{
"epoch": 11.51,
"eval_cer": 0.25051173043615177,
"eval_loss": 0.7318933606147766,
"eval_runtime": 40.8808,
"eval_samples_per_second": 11.154,
"eval_steps_per_second": 1.394,
"eval_wer": 0.644065237088493,
"step": 8000
},
{
"epoch": 11.65,
"learning_rate": 6.10442748091603e-05,
"loss": 1.9691,
"step": 8100
},
{
"epoch": 11.8,
"learning_rate": 6.081526717557252e-05,
"loss": 1.9845,
"step": 8200
},
{
"epoch": 11.94,
"learning_rate": 6.0586259541984725e-05,
"loss": 1.9561,
"step": 8300
},
{
"epoch": 12.09,
"learning_rate": 6.035725190839694e-05,
"loss": 1.9486,
"step": 8400
},
{
"epoch": 12.23,
"learning_rate": 6.012824427480916e-05,
"loss": 1.9178,
"step": 8500
},
{
"epoch": 12.23,
"eval_cer": 0.24893717524799244,
"eval_loss": 0.6936821937561035,
"eval_runtime": 41.1829,
"eval_samples_per_second": 11.073,
"eval_steps_per_second": 1.384,
"eval_wer": 0.6319842947749924,
"step": 8500
},
{
"epoch": 12.37,
"learning_rate": 5.989923664122137e-05,
"loss": 1.9133,
"step": 8600
},
{
"epoch": 12.52,
"learning_rate": 5.9670229007633586e-05,
"loss": 1.9327,
"step": 8700
},
{
"epoch": 12.66,
"learning_rate": 5.944122137404579e-05,
"loss": 1.8749,
"step": 8800
},
{
"epoch": 12.8,
"learning_rate": 5.9212213740458006e-05,
"loss": 1.8775,
"step": 8900
},
{
"epoch": 12.95,
"learning_rate": 5.8983206106870226e-05,
"loss": 1.8515,
"step": 9000
},
{
"epoch": 12.95,
"eval_cer": 0.21961108486852465,
"eval_loss": 0.6443303823471069,
"eval_runtime": 40.5279,
"eval_samples_per_second": 11.252,
"eval_steps_per_second": 1.406,
"eval_wer": 0.6052552099063727,
"step": 9000
},
{
"epoch": 13.09,
"learning_rate": 5.875419847328244e-05,
"loss": 1.8554,
"step": 9100
},
{
"epoch": 13.24,
"learning_rate": 5.852519083969465e-05,
"loss": 1.8568,
"step": 9200
},
{
"epoch": 13.38,
"learning_rate": 5.829618320610686e-05,
"loss": 1.8477,
"step": 9300
},
{
"epoch": 13.52,
"learning_rate": 5.806717557251908e-05,
"loss": 1.8328,
"step": 9400
},
{
"epoch": 13.67,
"learning_rate": 5.783816793893129e-05,
"loss": 1.8083,
"step": 9500
},
{
"epoch": 13.67,
"eval_cer": 0.21484805542434263,
"eval_loss": 0.6285760402679443,
"eval_runtime": 41.6653,
"eval_samples_per_second": 10.944,
"eval_steps_per_second": 1.368,
"eval_wer": 0.6122017517366355,
"step": 9500
},
{
"epoch": 13.81,
"learning_rate": 5.760916030534351e-05,
"loss": 1.8236,
"step": 9600
},
{
"epoch": 13.96,
"learning_rate": 5.738015267175571e-05,
"loss": 1.8199,
"step": 9700
},
{
"epoch": 14.1,
"learning_rate": 5.7151145038167934e-05,
"loss": 1.8285,
"step": 9800
},
{
"epoch": 14.24,
"learning_rate": 5.692213740458015e-05,
"loss": 1.817,
"step": 9900
},
{
"epoch": 14.39,
"learning_rate": 5.669312977099236e-05,
"loss": 1.819,
"step": 10000
},
{
"epoch": 14.39,
"eval_cer": 0.2074476460399937,
"eval_loss": 0.6015097498893738,
"eval_runtime": 41.6458,
"eval_samples_per_second": 10.949,
"eval_steps_per_second": 1.369,
"eval_wer": 0.5986106916339474,
"step": 10000
},
{
"epoch": 14.53,
"learning_rate": 5.6466412213740455e-05,
"loss": 1.7952,
"step": 10100
},
{
"epoch": 14.68,
"learning_rate": 5.623740458015266e-05,
"loss": 1.7955,
"step": 10200
},
{
"epoch": 14.82,
"learning_rate": 5.600839694656488e-05,
"loss": 1.7878,
"step": 10300
},
{
"epoch": 14.96,
"learning_rate": 5.5779389312977095e-05,
"loss": 1.769,
"step": 10400
},
{
"epoch": 15.11,
"learning_rate": 5.555267175572519e-05,
"loss": 1.7684,
"step": 10500
},
{
"epoch": 15.11,
"eval_cer": 0.19815777042985355,
"eval_loss": 0.5682193636894226,
"eval_runtime": 41.2484,
"eval_samples_per_second": 11.055,
"eval_steps_per_second": 1.382,
"eval_wer": 0.5741467834491091,
"step": 10500
},
{
"epoch": 15.25,
"learning_rate": 5.53236641221374e-05,
"loss": 1.7626,
"step": 10600
},
{
"epoch": 15.4,
"learning_rate": 5.5094656488549616e-05,
"loss": 1.7582,
"step": 10700
},
{
"epoch": 15.54,
"learning_rate": 5.486564885496182e-05,
"loss": 1.75,
"step": 10800
},
{
"epoch": 15.68,
"learning_rate": 5.463664122137404e-05,
"loss": 1.735,
"step": 10900
},
{
"epoch": 15.83,
"learning_rate": 5.4407633587786256e-05,
"loss": 1.7195,
"step": 11000
},
{
"epoch": 15.83,
"eval_cer": 0.20067705873090852,
"eval_loss": 0.5385124683380127,
"eval_runtime": 41.6481,
"eval_samples_per_second": 10.949,
"eval_steps_per_second": 1.369,
"eval_wer": 0.5591966173361522,
"step": 11000
},
{
"epoch": 15.97,
"learning_rate": 5.417862595419847e-05,
"loss": 1.7274,
"step": 11100
},
{
"epoch": 16.11,
"learning_rate": 5.3949618320610677e-05,
"loss": 1.7183,
"step": 11200
},
{
"epoch": 16.26,
"learning_rate": 5.37206106870229e-05,
"loss": 1.7117,
"step": 11300
},
{
"epoch": 16.4,
"learning_rate": 5.349160305343511e-05,
"loss": 1.6918,
"step": 11400
},
{
"epoch": 16.55,
"learning_rate": 5.3262595419847324e-05,
"loss": 1.7044,
"step": 11500
},
{
"epoch": 16.55,
"eval_cer": 0.20965202330341678,
"eval_loss": 0.5361923575401306,
"eval_runtime": 41.5242,
"eval_samples_per_second": 10.982,
"eval_steps_per_second": 1.373,
"eval_wer": 0.5524010872848082,
"step": 11500
},
{
"epoch": 16.69,
"learning_rate": 5.303358778625954e-05,
"loss": 1.7134,
"step": 11600
},
{
"epoch": 16.83,
"learning_rate": 5.280458015267176e-05,
"loss": 1.7016,
"step": 11700
},
{
"epoch": 16.98,
"learning_rate": 5.2575572519083964e-05,
"loss": 1.7069,
"step": 11800
},
{
"epoch": 17.12,
"learning_rate": 5.234656488549618e-05,
"loss": 1.7046,
"step": 11900
},
{
"epoch": 17.27,
"learning_rate": 5.211755725190839e-05,
"loss": 1.6879,
"step": 12000
},
{
"epoch": 17.27,
"eval_cer": 0.20831365139348135,
"eval_loss": 0.5119141936302185,
"eval_runtime": 40.4618,
"eval_samples_per_second": 11.27,
"eval_steps_per_second": 1.409,
"eval_wer": 0.5489278163696768,
"step": 12000
},
{
"epoch": 17.41,
"learning_rate": 5.188854961832061e-05,
"loss": 1.681,
"step": 12100
},
{
"epoch": 17.55,
"learning_rate": 5.1659541984732825e-05,
"loss": 1.6683,
"step": 12200
},
{
"epoch": 17.7,
"learning_rate": 5.143053435114503e-05,
"loss": 1.655,
"step": 12300
},
{
"epoch": 17.84,
"learning_rate": 5.1201526717557245e-05,
"loss": 1.6604,
"step": 12400
},
{
"epoch": 17.98,
"learning_rate": 5.0972519083969465e-05,
"loss": 1.656,
"step": 12500
},
{
"epoch": 17.98,
"eval_cer": 0.19678003464021415,
"eval_loss": 0.4990406930446625,
"eval_runtime": 40.5826,
"eval_samples_per_second": 11.236,
"eval_steps_per_second": 1.405,
"eval_wer": 0.5362428269405014,
"step": 12500
},
{
"epoch": 18.13,
"learning_rate": 5.074351145038168e-05,
"loss": 1.6645,
"step": 12600
},
{
"epoch": 18.27,
"learning_rate": 5.051450381679389e-05,
"loss": 1.6269,
"step": 12700
},
{
"epoch": 18.42,
"learning_rate": 5.02854961832061e-05,
"loss": 1.6306,
"step": 12800
},
{
"epoch": 18.56,
"learning_rate": 5.005877862595419e-05,
"loss": 1.6191,
"step": 12900
},
{
"epoch": 18.7,
"learning_rate": 4.9829770992366406e-05,
"loss": 1.6122,
"step": 13000
},
{
"epoch": 18.7,
"eval_cer": 0.18997008345142496,
"eval_loss": 0.45614466071128845,
"eval_runtime": 41.2927,
"eval_samples_per_second": 11.043,
"eval_steps_per_second": 1.38,
"eval_wer": 0.5092117185140441,
"step": 13000
},
{
"epoch": 18.85,
"learning_rate": 4.9600763358778626e-05,
"loss": 1.622,
"step": 13100
},
{
"epoch": 18.99,
"learning_rate": 4.937175572519084e-05,
"loss": 1.6305,
"step": 13200
},
{
"epoch": 19.14,
"learning_rate": 4.9142748091603046e-05,
"loss": 1.6134,
"step": 13300
},
{
"epoch": 19.28,
"learning_rate": 4.891374045801526e-05,
"loss": 1.6044,
"step": 13400
},
{
"epoch": 19.42,
"learning_rate": 4.868473282442748e-05,
"loss": 1.5919,
"step": 13500
},
{
"epoch": 19.42,
"eval_cer": 0.19752794835458984,
"eval_loss": 0.47778981924057007,
"eval_runtime": 41.5758,
"eval_samples_per_second": 10.968,
"eval_steps_per_second": 1.371,
"eval_wer": 0.5225007550588946,
"step": 13500
},
{
"epoch": 19.57,
"learning_rate": 4.8455725190839694e-05,
"loss": 1.595,
"step": 13600
},
{
"epoch": 19.71,
"learning_rate": 4.822671755725191e-05,
"loss": 1.5959,
"step": 13700
},
{
"epoch": 19.86,
"learning_rate": 4.7997709923664114e-05,
"loss": 1.6006,
"step": 13800
},
{
"epoch": 20.0,
"learning_rate": 4.7768702290076334e-05,
"loss": 1.5913,
"step": 13900
},
{
"epoch": 20.14,
"learning_rate": 4.753969465648855e-05,
"loss": 1.5896,
"step": 14000
},
{
"epoch": 20.14,
"eval_cer": 0.18591560384191466,
"eval_loss": 0.4563109278678894,
"eval_runtime": 40.8794,
"eval_samples_per_second": 11.155,
"eval_steps_per_second": 1.394,
"eval_wer": 0.5098157656297191,
"step": 14000
},
{
"epoch": 20.29,
"learning_rate": 4.731068702290076e-05,
"loss": 1.5823,
"step": 14100
},
{
"epoch": 20.43,
"learning_rate": 4.708167938931297e-05,
"loss": 1.5634,
"step": 14200
},
{
"epoch": 20.57,
"learning_rate": 4.685267175572519e-05,
"loss": 1.5573,
"step": 14300
},
{
"epoch": 20.72,
"learning_rate": 4.66236641221374e-05,
"loss": 1.5689,
"step": 14400
},
{
"epoch": 20.86,
"learning_rate": 4.6394656488549615e-05,
"loss": 1.5589,
"step": 14500
},
{
"epoch": 20.86,
"eval_cer": 0.17249252086285624,
"eval_loss": 0.43622052669525146,
"eval_runtime": 41.7277,
"eval_samples_per_second": 10.928,
"eval_steps_per_second": 1.366,
"eval_wer": 0.4939595288432498,
"step": 14500
},
{
"epoch": 21.01,
"learning_rate": 4.616564885496183e-05,
"loss": 1.5697,
"step": 14600
},
{
"epoch": 21.15,
"learning_rate": 4.593664122137405e-05,
"loss": 1.5336,
"step": 14700
},
{
"epoch": 21.29,
"learning_rate": 4.5707633587786255e-05,
"loss": 1.5425,
"step": 14800
},
{
"epoch": 21.44,
"learning_rate": 4.547862595419847e-05,
"loss": 1.5461,
"step": 14900
},
{
"epoch": 21.58,
"learning_rate": 4.524961832061068e-05,
"loss": 1.5353,
"step": 15000
},
{
"epoch": 21.58,
"eval_cer": 0.15804597701149425,
"eval_loss": 0.41395294666290283,
"eval_runtime": 40.5257,
"eval_samples_per_second": 11.252,
"eval_steps_per_second": 1.407,
"eval_wer": 0.4826336454243431,
"step": 15000
},
{
"epoch": 21.73,
"learning_rate": 4.5020610687022895e-05,
"loss": 1.5348,
"step": 15100
},
{
"epoch": 21.87,
"learning_rate": 4.4791603053435116e-05,
"loss": 1.5279,
"step": 15200
},
{
"epoch": 22.01,
"learning_rate": 4.456259541984732e-05,
"loss": 1.5492,
"step": 15300
},
{
"epoch": 22.16,
"learning_rate": 4.4333587786259536e-05,
"loss": 1.5291,
"step": 15400
},
{
"epoch": 22.3,
"learning_rate": 4.410458015267175e-05,
"loss": 1.5441,
"step": 15500
},
{
"epoch": 22.3,
"eval_cer": 0.15501495827428752,
"eval_loss": 0.40313535928726196,
"eval_runtime": 41.0848,
"eval_samples_per_second": 11.099,
"eval_steps_per_second": 1.387,
"eval_wer": 0.47417698580489276,
"step": 15500
},
{
"epoch": 22.45,
"learning_rate": 4.387557251908397e-05,
"loss": 1.518,
"step": 15600
},
{
"epoch": 22.59,
"learning_rate": 4.364656488549618e-05,
"loss": 1.5081,
"step": 15700
},
{
"epoch": 22.73,
"learning_rate": 4.341755725190839e-05,
"loss": 1.4959,
"step": 15800
},
{
"epoch": 22.88,
"learning_rate": 4.31885496183206e-05,
"loss": 1.5097,
"step": 15900
},
{
"epoch": 23.02,
"learning_rate": 4.295954198473282e-05,
"loss": 1.5116,
"step": 16000
},
{
"epoch": 23.02,
"eval_cer": 0.15450322783813572,
"eval_loss": 0.39162585139274597,
"eval_runtime": 40.7373,
"eval_samples_per_second": 11.194,
"eval_steps_per_second": 1.399,
"eval_wer": 0.4747810329205678,
"step": 16000
},
{
"epoch": 23.17,
"learning_rate": 4.273053435114504e-05,
"loss": 1.4951,
"step": 16100
},
{
"epoch": 23.31,
"learning_rate": 4.250152671755724e-05,
"loss": 1.4974,
"step": 16200
},
{
"epoch": 23.45,
"learning_rate": 4.227480916030534e-05,
"loss": 1.5045,
"step": 16300
},
{
"epoch": 23.6,
"learning_rate": 4.204580152671755e-05,
"loss": 1.4944,
"step": 16400
},
{
"epoch": 23.74,
"learning_rate": 4.181679389312977e-05,
"loss": 1.4731,
"step": 16500
},
{
"epoch": 23.74,
"eval_cer": 0.15422768068020784,
"eval_loss": 0.3840835392475128,
"eval_runtime": 40.8763,
"eval_samples_per_second": 11.156,
"eval_steps_per_second": 1.394,
"eval_wer": 0.4809725158562368,
"step": 16500
},
{
"epoch": 23.88,
"learning_rate": 4.1587786259541985e-05,
"loss": 1.472,
"step": 16600
},
{
"epoch": 24.03,
"learning_rate": 4.13587786259542e-05,
"loss": 1.4847,
"step": 16700
},
{
"epoch": 24.17,
"learning_rate": 4.1129770992366405e-05,
"loss": 1.4603,
"step": 16800
},
{
"epoch": 24.32,
"learning_rate": 4.090076335877862e-05,
"loss": 1.4563,
"step": 16900
},
{
"epoch": 24.46,
"learning_rate": 4.067175572519084e-05,
"loss": 1.4647,
"step": 17000
},
{
"epoch": 24.46,
"eval_cer": 0.14753582113053063,
"eval_loss": 0.37518319487571716,
"eval_runtime": 41.0205,
"eval_samples_per_second": 11.116,
"eval_steps_per_second": 1.39,
"eval_wer": 0.452431289640592,
"step": 17000
},
{
"epoch": 24.6,
"learning_rate": 4.044274809160305e-05,
"loss": 1.4585,
"step": 17100
},
{
"epoch": 24.75,
"learning_rate": 4.021374045801526e-05,
"loss": 1.4692,
"step": 17200
},
{
"epoch": 24.89,
"learning_rate": 3.998473282442747e-05,
"loss": 1.444,
"step": 17300
},
{
"epoch": 25.04,
"learning_rate": 3.975572519083969e-05,
"loss": 1.4717,
"step": 17400
},
{
"epoch": 25.18,
"learning_rate": 3.9526717557251906e-05,
"loss": 1.4328,
"step": 17500
},
{
"epoch": 25.18,
"eval_cer": 0.1461187214611872,
"eval_loss": 0.35870596766471863,
"eval_runtime": 40.6723,
"eval_samples_per_second": 11.212,
"eval_steps_per_second": 1.401,
"eval_wer": 0.4475989127151918,
"step": 17500
},
{
"epoch": 25.32,
"learning_rate": 3.929770992366412e-05,
"loss": 1.4329,
"step": 17600
},
{
"epoch": 25.47,
"learning_rate": 3.9068702290076326e-05,
"loss": 1.4209,
"step": 17700
},
{
"epoch": 25.61,
"learning_rate": 3.884198473282442e-05,
"loss": 1.4188,
"step": 17800
},
{
"epoch": 25.75,
"learning_rate": 3.861297709923664e-05,
"loss": 1.4301,
"step": 17900
},
{
"epoch": 25.9,
"learning_rate": 3.8383969465648854e-05,
"loss": 1.4129,
"step": 18000
},
{
"epoch": 25.9,
"eval_cer": 0.13663202645252717,
"eval_loss": 0.3428773581981659,
"eval_runtime": 42.0192,
"eval_samples_per_second": 10.852,
"eval_steps_per_second": 1.357,
"eval_wer": 0.42419208698278466,
"step": 18000
},
{
"epoch": 26.04,
"learning_rate": 3.815496183206107e-05,
"loss": 1.4266,
"step": 18100
},
{
"epoch": 26.19,
"learning_rate": 3.7925954198473274e-05,
"loss": 1.4166,
"step": 18200
},
{
"epoch": 26.33,
"learning_rate": 3.7696946564885494e-05,
"loss": 1.4157,
"step": 18300
},
{
"epoch": 26.47,
"learning_rate": 3.746793893129771e-05,
"loss": 1.4285,
"step": 18400
},
{
"epoch": 26.62,
"learning_rate": 3.723893129770992e-05,
"loss": 1.4062,
"step": 18500
},
{
"epoch": 26.62,
"eval_cer": 0.13549047394111163,
"eval_loss": 0.34499478340148926,
"eval_runtime": 41.0336,
"eval_samples_per_second": 11.113,
"eval_steps_per_second": 1.389,
"eval_wer": 0.4250981576562972,
"step": 18500
},
{
"epoch": 26.76,
"learning_rate": 3.7009923664122134e-05,
"loss": 1.4163,
"step": 18600
},
{
"epoch": 26.91,
"learning_rate": 3.678091603053435e-05,
"loss": 1.404,
"step": 18700
},
{
"epoch": 27.05,
"learning_rate": 3.655190839694656e-05,
"loss": 1.4134,
"step": 18800
},
{
"epoch": 27.19,
"learning_rate": 3.6322900763358775e-05,
"loss": 1.4001,
"step": 18900
},
{
"epoch": 27.34,
"learning_rate": 3.609389312977099e-05,
"loss": 1.3928,
"step": 19000
},
{
"epoch": 27.34,
"eval_cer": 0.13218390804597702,
"eval_loss": 0.32969579100608826,
"eval_runtime": 41.0801,
"eval_samples_per_second": 11.1,
"eval_steps_per_second": 1.388,
"eval_wer": 0.4145273331319843,
"step": 19000
},
{
"epoch": 27.48,
"learning_rate": 3.58648854961832e-05,
"loss": 1.3979,
"step": 19100
},
{
"epoch": 27.63,
"learning_rate": 3.5635877862595415e-05,
"loss": 1.3971,
"step": 19200
},
{
"epoch": 27.77,
"learning_rate": 3.540687022900763e-05,
"loss": 1.3934,
"step": 19300
},
{
"epoch": 27.91,
"learning_rate": 3.517786259541984e-05,
"loss": 1.3866,
"step": 19400
},
{
"epoch": 28.06,
"learning_rate": 3.4948854961832055e-05,
"loss": 1.3906,
"step": 19500
},
{
"epoch": 28.06,
"eval_cer": 0.1336403715950244,
"eval_loss": 0.32101842761039734,
"eval_runtime": 41.0367,
"eval_samples_per_second": 11.112,
"eval_steps_per_second": 1.389,
"eval_wer": 0.4184536393838719,
"step": 19500
},
{
"epoch": 28.2,
"learning_rate": 3.471984732824427e-05,
"loss": 1.3689,
"step": 19600
},
{
"epoch": 28.34,
"learning_rate": 3.449083969465649e-05,
"loss": 1.3715,
"step": 19700
},
{
"epoch": 28.49,
"learning_rate": 3.4261832061068696e-05,
"loss": 1.3527,
"step": 19800
},
{
"epoch": 28.63,
"learning_rate": 3.4032824427480916e-05,
"loss": 1.3532,
"step": 19900
},
{
"epoch": 28.78,
"learning_rate": 3.380381679389312e-05,
"loss": 1.358,
"step": 20000
},
{
"epoch": 28.78,
"eval_cer": 0.12753897024090693,
"eval_loss": 0.31306591629981995,
"eval_runtime": 41.2359,
"eval_samples_per_second": 11.058,
"eval_steps_per_second": 1.382,
"eval_wer": 0.39700996677740863,
"step": 20000
},
{
"epoch": 28.92,
"learning_rate": 3.357480916030534e-05,
"loss": 1.3582,
"step": 20100
},
{
"epoch": 29.06,
"learning_rate": 3.334580152671755e-05,
"loss": 1.3587,
"step": 20200
},
{
"epoch": 29.21,
"learning_rate": 3.311679389312977e-05,
"loss": 1.3392,
"step": 20300
},
{
"epoch": 29.35,
"learning_rate": 3.288778625954198e-05,
"loss": 1.3486,
"step": 20400
},
{
"epoch": 29.5,
"learning_rate": 3.26587786259542e-05,
"loss": 1.3445,
"step": 20500
},
{
"epoch": 29.5,
"eval_cer": 0.12761769800031492,
"eval_loss": 0.3069218099117279,
"eval_runtime": 41.0687,
"eval_samples_per_second": 11.103,
"eval_steps_per_second": 1.388,
"eval_wer": 0.3920265780730897,
"step": 20500
},
{
"epoch": 29.64,
"learning_rate": 3.242977099236641e-05,
"loss": 1.3354,
"step": 20600
},
{
"epoch": 29.78,
"learning_rate": 3.2200763358778624e-05,
"loss": 1.3334,
"step": 20700
},
{
"epoch": 29.93,
"learning_rate": 3.197175572519084e-05,
"loss": 1.3305,
"step": 20800
},
{
"epoch": 30.07,
"learning_rate": 3.174274809160305e-05,
"loss": 1.3354,
"step": 20900
},
{
"epoch": 30.22,
"learning_rate": 3.1513740458015264e-05,
"loss": 1.3159,
"step": 21000
},
{
"epoch": 30.22,
"eval_cer": 0.1254920484962998,
"eval_loss": 0.30346596240997314,
"eval_runtime": 41.0784,
"eval_samples_per_second": 11.101,
"eval_steps_per_second": 1.388,
"eval_wer": 0.3961038961038961,
"step": 21000
},
{
"epoch": 30.36,
"learning_rate": 3.128473282442748e-05,
"loss": 1.3376,
"step": 21100
},
{
"epoch": 30.5,
"learning_rate": 3.105572519083969e-05,
"loss": 1.324,
"step": 21200
},
{
"epoch": 30.65,
"learning_rate": 3.0826717557251904e-05,
"loss": 1.3091,
"step": 21300
},
{
"epoch": 30.79,
"learning_rate": 3.059770992366412e-05,
"loss": 1.3213,
"step": 21400
},
{
"epoch": 30.93,
"learning_rate": 3.0368702290076335e-05,
"loss": 1.3044,
"step": 21500
},
{
"epoch": 30.93,
"eval_cer": 0.12423240434577232,
"eval_loss": 0.29519879817962646,
"eval_runtime": 41.1753,
"eval_samples_per_second": 11.075,
"eval_steps_per_second": 1.384,
"eval_wer": 0.3853820598006645,
"step": 21500
},
{
"epoch": 31.08,
"learning_rate": 3.0139694656488545e-05,
"loss": 1.3033,
"step": 21600
},
{
"epoch": 31.22,
"learning_rate": 2.991068702290076e-05,
"loss": 1.2995,
"step": 21700
},
{
"epoch": 31.37,
"learning_rate": 2.9681679389312975e-05,
"loss": 1.3101,
"step": 21800
},
{
"epoch": 31.51,
"learning_rate": 2.945267175572519e-05,
"loss": 1.304,
"step": 21900
},
{
"epoch": 31.65,
"learning_rate": 2.9223664122137402e-05,
"loss": 1.3034,
"step": 22000
},
{
"epoch": 31.65,
"eval_cer": 0.12273657691702095,
"eval_loss": 0.29660850763320923,
"eval_runtime": 41.8912,
"eval_samples_per_second": 10.885,
"eval_steps_per_second": 1.361,
"eval_wer": 0.37722742373905166,
"step": 22000
},
{
"epoch": 31.8,
"learning_rate": 2.8994656488549615e-05,
"loss": 1.2912,
"step": 22100
},
{
"epoch": 31.94,
"learning_rate": 2.876564885496183e-05,
"loss": 1.299,
"step": 22200
},
{
"epoch": 32.09,
"learning_rate": 2.8536641221374046e-05,
"loss": 1.3042,
"step": 22300
},
{
"epoch": 32.23,
"learning_rate": 2.8307633587786256e-05,
"loss": 1.294,
"step": 22400
},
{
"epoch": 32.37,
"learning_rate": 2.8078625954198472e-05,
"loss": 1.2963,
"step": 22500
},
{
"epoch": 32.37,
"eval_cer": 0.12080774681152574,
"eval_loss": 0.2843906879425049,
"eval_runtime": 41.7644,
"eval_samples_per_second": 10.918,
"eval_steps_per_second": 1.365,
"eval_wer": 0.3705829054666264,
"step": 22500
},
{
"epoch": 32.52,
"learning_rate": 2.7849618320610682e-05,
"loss": 1.2769,
"step": 22600
},
{
"epoch": 32.66,
"learning_rate": 2.76206106870229e-05,
"loss": 1.2812,
"step": 22700
},
{
"epoch": 32.8,
"learning_rate": 2.7391603053435113e-05,
"loss": 1.2827,
"step": 22800
},
{
"epoch": 32.95,
"learning_rate": 2.7162595419847326e-05,
"loss": 1.2747,
"step": 22900
},
{
"epoch": 33.09,
"learning_rate": 2.6935877862595417e-05,
"loss": 1.2765,
"step": 23000
},
{
"epoch": 33.09,
"eval_cer": 0.11726499763816722,
"eval_loss": 0.28407707810401917,
"eval_runtime": 40.9894,
"eval_samples_per_second": 11.125,
"eval_steps_per_second": 1.391,
"eval_wer": 0.35668982180610087,
"step": 23000
},
{
"epoch": 33.24,
"learning_rate": 2.670687022900763e-05,
"loss": 1.2785,
"step": 23100
},
{
"epoch": 33.38,
"learning_rate": 2.6477862595419844e-05,
"loss": 1.2644,
"step": 23200
},
{
"epoch": 33.52,
"learning_rate": 2.624885496183206e-05,
"loss": 1.2724,
"step": 23300
},
{
"epoch": 33.67,
"learning_rate": 2.601984732824427e-05,
"loss": 1.2551,
"step": 23400
},
{
"epoch": 33.81,
"learning_rate": 2.5790839694656488e-05,
"loss": 1.2438,
"step": 23500
},
{
"epoch": 33.81,
"eval_cer": 0.11372224846480869,
"eval_loss": 0.2734295129776001,
"eval_runtime": 41.7199,
"eval_samples_per_second": 10.93,
"eval_steps_per_second": 1.366,
"eval_wer": 0.35517970401691334,
"step": 23500
},
{
"epoch": 33.96,
"learning_rate": 2.5561832061068698e-05,
"loss": 1.2491,
"step": 23600
},
{
"epoch": 34.1,
"learning_rate": 2.5332824427480915e-05,
"loss": 1.252,
"step": 23700
},
{
"epoch": 34.24,
"learning_rate": 2.5103816793893128e-05,
"loss": 1.2467,
"step": 23800
},
{
"epoch": 34.39,
"learning_rate": 2.487480916030534e-05,
"loss": 1.2406,
"step": 23900
},
{
"epoch": 34.53,
"learning_rate": 2.4645801526717555e-05,
"loss": 1.2487,
"step": 24000
},
{
"epoch": 34.53,
"eval_cer": 0.11179341835931349,
"eval_loss": 0.2702818512916565,
"eval_runtime": 41.8515,
"eval_samples_per_second": 10.896,
"eval_steps_per_second": 1.362,
"eval_wer": 0.3501963153125944,
"step": 24000
},
{
"epoch": 34.68,
"learning_rate": 2.441679389312977e-05,
"loss": 1.2504,
"step": 24100
},
{
"epoch": 34.82,
"learning_rate": 2.4187786259541982e-05,
"loss": 1.2341,
"step": 24200
},
{
"epoch": 34.96,
"learning_rate": 2.39587786259542e-05,
"loss": 1.2477,
"step": 24300
},
{
"epoch": 35.11,
"learning_rate": 2.372977099236641e-05,
"loss": 1.2427,
"step": 24400
},
{
"epoch": 35.25,
"learning_rate": 2.3500763358778626e-05,
"loss": 1.2249,
"step": 24500
},
{
"epoch": 35.25,
"eval_cer": 0.11423397890096047,
"eval_loss": 0.2650163471698761,
"eval_runtime": 41.2103,
"eval_samples_per_second": 11.065,
"eval_steps_per_second": 1.383,
"eval_wer": 0.3483841739655693,
"step": 24500
},
{
"epoch": 35.4,
"learning_rate": 2.3271755725190836e-05,
"loss": 1.2265,
"step": 24600
},
{
"epoch": 35.54,
"learning_rate": 2.3042748091603052e-05,
"loss": 1.2276,
"step": 24700
},
{
"epoch": 35.68,
"learning_rate": 2.2816030534351143e-05,
"loss": 1.2332,
"step": 24800
},
{
"epoch": 35.83,
"learning_rate": 2.2587022900763357e-05,
"loss": 1.2249,
"step": 24900
},
{
"epoch": 35.97,
"learning_rate": 2.235801526717557e-05,
"loss": 1.2229,
"step": 25000
},
{
"epoch": 35.97,
"eval_cer": 0.10970713273500236,
"eval_loss": 0.25843024253845215,
"eval_runtime": 42.815,
"eval_samples_per_second": 10.65,
"eval_steps_per_second": 1.331,
"eval_wer": 0.3373603141045001,
"step": 25000
},
{
"epoch": 36.11,
"learning_rate": 2.2129007633587784e-05,
"loss": 1.2412,
"step": 25100
},
{
"epoch": 36.26,
"learning_rate": 2.1899999999999997e-05,
"loss": 1.212,
"step": 25200
},
{
"epoch": 36.4,
"learning_rate": 2.1670992366412214e-05,
"loss": 1.2151,
"step": 25300
},
{
"epoch": 36.55,
"learning_rate": 2.1441984732824424e-05,
"loss": 1.2303,
"step": 25400
},
{
"epoch": 36.69,
"learning_rate": 2.121297709923664e-05,
"loss": 1.2374,
"step": 25500
},
{
"epoch": 36.69,
"eval_cer": 0.10951031333648244,
"eval_loss": 0.2568279504776001,
"eval_runtime": 41.6839,
"eval_samples_per_second": 10.939,
"eval_steps_per_second": 1.367,
"eval_wer": 0.33373603141045,
"step": 25500
},
{
"epoch": 36.83,
"learning_rate": 2.098396946564885e-05,
"loss": 1.2152,
"step": 25600
},
{
"epoch": 36.98,
"learning_rate": 2.0754961832061068e-05,
"loss": 1.2089,
"step": 25700
},
{
"epoch": 37.12,
"learning_rate": 2.052595419847328e-05,
"loss": 1.2201,
"step": 25800
},
{
"epoch": 37.27,
"learning_rate": 2.0296946564885495e-05,
"loss": 1.2006,
"step": 25900
},
{
"epoch": 37.41,
"learning_rate": 2.0067938931297708e-05,
"loss": 1.2153,
"step": 26000
},
{
"epoch": 37.41,
"eval_cer": 0.10710911667453944,
"eval_loss": 0.24941784143447876,
"eval_runtime": 41.3494,
"eval_samples_per_second": 11.028,
"eval_steps_per_second": 1.378,
"eval_wer": 0.33267894895801875,
"step": 26000
},
{
"epoch": 37.55,
"learning_rate": 1.983893129770992e-05,
"loss": 1.2071,
"step": 26100
},
{
"epoch": 37.7,
"learning_rate": 1.9609923664122135e-05,
"loss": 1.2042,
"step": 26200
},
{
"epoch": 37.84,
"learning_rate": 1.9380916030534352e-05,
"loss": 1.2037,
"step": 26300
},
{
"epoch": 37.98,
"learning_rate": 1.9151908396946562e-05,
"loss": 1.1962,
"step": 26400
},
{
"epoch": 38.13,
"learning_rate": 1.892290076335878e-05,
"loss": 1.1925,
"step": 26500
},
{
"epoch": 38.13,
"eval_cer": 0.1076995748700992,
"eval_loss": 0.2518324553966522,
"eval_runtime": 40.748,
"eval_samples_per_second": 11.191,
"eval_steps_per_second": 1.399,
"eval_wer": 0.33660525520990636,
"step": 26500
},
{
"epoch": 38.27,
"learning_rate": 1.869389312977099e-05,
"loss": 1.1969,
"step": 26600
},
{
"epoch": 38.42,
"learning_rate": 1.8464885496183202e-05,
"loss": 1.1947,
"step": 26700
},
{
"epoch": 38.56,
"learning_rate": 1.823587786259542e-05,
"loss": 1.2005,
"step": 26800
},
{
"epoch": 38.7,
"learning_rate": 1.8006870229007632e-05,
"loss": 1.1961,
"step": 26900
},
{
"epoch": 38.85,
"learning_rate": 1.7777862595419846e-05,
"loss": 1.1908,
"step": 27000
},
{
"epoch": 38.85,
"eval_cer": 0.10565265312549205,
"eval_loss": 0.24367305636405945,
"eval_runtime": 41.2308,
"eval_samples_per_second": 11.06,
"eval_steps_per_second": 1.382,
"eval_wer": 0.3272425249169435,
"step": 27000
},
{
"epoch": 38.99,
"learning_rate": 1.754885496183206e-05,
"loss": 1.1762,
"step": 27100
},
{
"epoch": 39.14,
"learning_rate": 1.7319847328244273e-05,
"loss": 1.2018,
"step": 27200
},
{
"epoch": 39.28,
"learning_rate": 1.7090839694656486e-05,
"loss": 1.1822,
"step": 27300
},
{
"epoch": 39.42,
"learning_rate": 1.68618320610687e-05,
"loss": 1.1745,
"step": 27400
},
{
"epoch": 39.57,
"learning_rate": 1.6632824427480913e-05,
"loss": 1.1858,
"step": 27500
},
{
"epoch": 39.57,
"eval_cer": 0.10443237285466855,
"eval_loss": 0.23960824310779572,
"eval_runtime": 42.517,
"eval_samples_per_second": 10.725,
"eval_steps_per_second": 1.341,
"eval_wer": 0.32648746602234974,
"step": 27500
},
{
"epoch": 39.71,
"learning_rate": 1.6403816793893127e-05,
"loss": 1.1866,
"step": 27600
},
{
"epoch": 39.86,
"learning_rate": 1.617480916030534e-05,
"loss": 1.1878,
"step": 27700
},
{
"epoch": 40.0,
"learning_rate": 1.5945801526717557e-05,
"loss": 1.1817,
"step": 27800
},
{
"epoch": 40.14,
"learning_rate": 1.571679389312977e-05,
"loss": 1.1851,
"step": 27900
},
{
"epoch": 40.29,
"learning_rate": 1.5487786259541984e-05,
"loss": 1.1808,
"step": 28000
},
{
"epoch": 40.29,
"eval_cer": 0.10277908990710125,
"eval_loss": 0.2373325228691101,
"eval_runtime": 41.3513,
"eval_samples_per_second": 11.027,
"eval_steps_per_second": 1.378,
"eval_wer": 0.31561461794019935,
"step": 28000
},
{
"epoch": 40.43,
"learning_rate": 1.5258778625954197e-05,
"loss": 1.1558,
"step": 28100
},
{
"epoch": 40.57,
"learning_rate": 1.502977099236641e-05,
"loss": 1.1804,
"step": 28200
},
{
"epoch": 40.72,
"learning_rate": 1.4800763358778624e-05,
"loss": 1.1736,
"step": 28300
},
{
"epoch": 40.86,
"learning_rate": 1.4571755725190838e-05,
"loss": 1.1782,
"step": 28400
},
{
"epoch": 41.01,
"learning_rate": 1.4342748091603053e-05,
"loss": 1.1842,
"step": 28500
},
{
"epoch": 41.01,
"eval_cer": 0.10258227050858132,
"eval_loss": 0.23562349379062653,
"eval_runtime": 40.492,
"eval_samples_per_second": 11.261,
"eval_steps_per_second": 1.408,
"eval_wer": 0.31516158260344307,
"step": 28500
},
{
"epoch": 41.15,
"learning_rate": 1.4113740458015266e-05,
"loss": 1.1595,
"step": 28600
},
{
"epoch": 41.29,
"learning_rate": 1.388473282442748e-05,
"loss": 1.1527,
"step": 28700
},
{
"epoch": 41.44,
"learning_rate": 1.3655725190839693e-05,
"loss": 1.1517,
"step": 28800
},
{
"epoch": 41.58,
"learning_rate": 1.3426717557251907e-05,
"loss": 1.1609,
"step": 28900
},
{
"epoch": 41.73,
"learning_rate": 1.3197709923664122e-05,
"loss": 1.1668,
"step": 29000
},
{
"epoch": 41.73,
"eval_cer": 0.10246417886946937,
"eval_loss": 0.23187227547168732,
"eval_runtime": 40.5813,
"eval_samples_per_second": 11.237,
"eval_steps_per_second": 1.405,
"eval_wer": 0.3187858652974932,
"step": 29000
},
{
"epoch": 41.87,
"learning_rate": 1.2968702290076335e-05,
"loss": 1.1536,
"step": 29100
},
{
"epoch": 42.01,
"learning_rate": 1.2739694656488549e-05,
"loss": 1.1649,
"step": 29200
},
{
"epoch": 42.16,
"learning_rate": 1.2510687022900762e-05,
"loss": 1.1459,
"step": 29300
},
{
"epoch": 42.3,
"learning_rate": 1.2281679389312975e-05,
"loss": 1.1495,
"step": 29400
},
{
"epoch": 42.45,
"learning_rate": 1.205267175572519e-05,
"loss": 1.1448,
"step": 29500
},
{
"epoch": 42.45,
"eval_cer": 0.09947252401196661,
"eval_loss": 0.2292834371328354,
"eval_runtime": 41.732,
"eval_samples_per_second": 10.927,
"eval_steps_per_second": 1.366,
"eval_wer": 0.3098761703412866,
"step": 29500
},
{
"epoch": 42.59,
"learning_rate": 1.1823664122137404e-05,
"loss": 1.1408,
"step": 29600
},
{
"epoch": 42.73,
"learning_rate": 1.1594656488549618e-05,
"loss": 1.1458,
"step": 29700
},
{
"epoch": 42.88,
"learning_rate": 1.1365648854961831e-05,
"loss": 1.1358,
"step": 29800
},
{
"epoch": 43.02,
"learning_rate": 1.1136641221374044e-05,
"loss": 1.1519,
"step": 29900
},
{
"epoch": 43.17,
"learning_rate": 1.0909923664122137e-05,
"loss": 1.1327,
"step": 30000
},
{
"epoch": 43.17,
"eval_cer": 0.09793733270351125,
"eval_loss": 0.2265164852142334,
"eval_runtime": 40.9338,
"eval_samples_per_second": 11.14,
"eval_steps_per_second": 1.392,
"eval_wer": 0.3047417698580489,
"step": 30000
},
{
"epoch": 43.31,
"learning_rate": 1.068091603053435e-05,
"loss": 1.1322,
"step": 30100
},
{
"epoch": 43.45,
"learning_rate": 1.0451908396946564e-05,
"loss": 1.1392,
"step": 30200
},
{
"epoch": 43.6,
"learning_rate": 1.0222900763358777e-05,
"loss": 1.1318,
"step": 30300
},
{
"epoch": 43.74,
"learning_rate": 9.99389312977099e-06,
"loss": 1.1321,
"step": 30400
},
{
"epoch": 43.88,
"learning_rate": 9.764885496183206e-06,
"loss": 1.1307,
"step": 30500
},
{
"epoch": 43.88,
"eval_cer": 0.09888206581640686,
"eval_loss": 0.22221311926841736,
"eval_runtime": 40.9097,
"eval_samples_per_second": 11.147,
"eval_steps_per_second": 1.393,
"eval_wer": 0.30776200543642407,
"step": 30500
},
{
"epoch": 44.03,
"learning_rate": 9.53587786259542e-06,
"loss": 1.1358,
"step": 30600
},
{
"epoch": 44.17,
"learning_rate": 9.306870229007633e-06,
"loss": 1.1342,
"step": 30700
},
{
"epoch": 44.32,
"learning_rate": 9.077862595419846e-06,
"loss": 1.1348,
"step": 30800
},
{
"epoch": 44.46,
"learning_rate": 8.84885496183206e-06,
"loss": 1.1294,
"step": 30900
},
{
"epoch": 44.6,
"learning_rate": 8.619847328244275e-06,
"loss": 1.1419,
"step": 31000
},
{
"epoch": 44.6,
"eval_cer": 0.09813415210203118,
"eval_loss": 0.22149430215358734,
"eval_runtime": 40.8027,
"eval_samples_per_second": 11.176,
"eval_steps_per_second": 1.397,
"eval_wer": 0.3038356991845364,
"step": 31000
},
{
"epoch": 44.75,
"learning_rate": 8.390839694656488e-06,
"loss": 1.1191,
"step": 31100
},
{
"epoch": 44.89,
"learning_rate": 8.161832061068702e-06,
"loss": 1.1223,
"step": 31200
},
{
"epoch": 45.04,
"learning_rate": 7.932824427480915e-06,
"loss": 1.1393,
"step": 31300
},
{
"epoch": 45.18,
"learning_rate": 7.703816793893129e-06,
"loss": 1.1172,
"step": 31400
},
{
"epoch": 45.32,
"learning_rate": 7.474809160305343e-06,
"loss": 1.1231,
"step": 31500
},
{
"epoch": 45.32,
"eval_cer": 0.09722878286883956,
"eval_loss": 0.2193477302789688,
"eval_runtime": 40.6396,
"eval_samples_per_second": 11.221,
"eval_steps_per_second": 1.403,
"eval_wer": 0.3012684989429176,
"step": 31500
},
{
"epoch": 45.47,
"learning_rate": 7.245801526717557e-06,
"loss": 1.1289,
"step": 31600
},
{
"epoch": 45.61,
"learning_rate": 7.016793893129771e-06,
"loss": 1.1083,
"step": 31700
},
{
"epoch": 45.75,
"learning_rate": 6.787786259541984e-06,
"loss": 1.109,
"step": 31800
},
{
"epoch": 45.9,
"learning_rate": 6.558778625954198e-06,
"loss": 1.1218,
"step": 31900
},
{
"epoch": 46.04,
"learning_rate": 6.329770992366412e-06,
"loss": 1.139,
"step": 32000
},
{
"epoch": 46.04,
"eval_cer": 0.09683514407179972,
"eval_loss": 0.2162453532218933,
"eval_runtime": 41.1951,
"eval_samples_per_second": 11.069,
"eval_steps_per_second": 1.384,
"eval_wer": 0.30066445182724255,
"step": 32000
},
{
"epoch": 46.19,
"learning_rate": 6.100763358778626e-06,
"loss": 1.1155,
"step": 32100
},
{
"epoch": 46.33,
"learning_rate": 5.8717557251908395e-06,
"loss": 1.1212,
"step": 32200
},
{
"epoch": 46.47,
"learning_rate": 5.642748091603053e-06,
"loss": 1.1149,
"step": 32300
},
{
"epoch": 46.62,
"learning_rate": 5.413740458015267e-06,
"loss": 1.1183,
"step": 32400
},
{
"epoch": 46.76,
"learning_rate": 5.184732824427481e-06,
"loss": 1.1114,
"step": 32500
},
{
"epoch": 46.76,
"eval_cer": 0.09596913871831207,
"eval_loss": 0.2121613770723343,
"eval_runtime": 40.6982,
"eval_samples_per_second": 11.204,
"eval_steps_per_second": 1.401,
"eval_wer": 0.2982482633645424,
"step": 32500
},
{
"epoch": 46.91,
"learning_rate": 4.955725190839695e-06,
"loss": 1.1091,
"step": 32600
},
{
"epoch": 47.05,
"learning_rate": 4.726717557251908e-06,
"loss": 1.1148,
"step": 32700
},
{
"epoch": 47.19,
"learning_rate": 4.497709923664122e-06,
"loss": 1.0962,
"step": 32800
},
{
"epoch": 47.34,
"learning_rate": 4.268702290076335e-06,
"loss": 1.0984,
"step": 32900
},
{
"epoch": 47.48,
"learning_rate": 4.03969465648855e-06,
"loss": 1.111,
"step": 33000
},
{
"epoch": 47.48,
"eval_cer": 0.09482758620689655,
"eval_loss": 0.21248506009578705,
"eval_runtime": 40.6368,
"eval_samples_per_second": 11.221,
"eval_steps_per_second": 1.403,
"eval_wer": 0.2946239806704923,
"step": 33000
},
{
"epoch": 47.63,
"learning_rate": 3.810687022900763e-06,
"loss": 1.1031,
"step": 33100
},
{
"epoch": 47.77,
"learning_rate": 3.581679389312977e-06,
"loss": 1.1159,
"step": 33200
},
{
"epoch": 47.91,
"learning_rate": 3.352671755725191e-06,
"loss": 1.0905,
"step": 33300
},
{
"epoch": 48.06,
"learning_rate": 3.1236641221374048e-06,
"loss": 1.1087,
"step": 33400
},
{
"epoch": 48.2,
"learning_rate": 2.894656488549618e-06,
"loss": 1.0982,
"step": 33500
},
{
"epoch": 48.2,
"eval_cer": 0.09533931664304834,
"eval_loss": 0.2098563313484192,
"eval_runtime": 40.5946,
"eval_samples_per_second": 11.233,
"eval_steps_per_second": 1.404,
"eval_wer": 0.2956810631229236,
"step": 33500
},
{
"epoch": 48.34,
"learning_rate": 2.66793893129771e-06,
"loss": 1.0947,
"step": 33600
},
{
"epoch": 48.49,
"learning_rate": 2.4389312977099237e-06,
"loss": 1.1102,
"step": 33700
},
{
"epoch": 48.63,
"learning_rate": 2.209923664122137e-06,
"loss": 1.0891,
"step": 33800
},
{
"epoch": 48.78,
"learning_rate": 1.980916030534351e-06,
"loss": 1.0937,
"step": 33900
},
{
"epoch": 48.92,
"learning_rate": 1.7519083969465647e-06,
"loss": 1.109,
"step": 34000
},
{
"epoch": 48.92,
"eval_cer": 0.09545740828216029,
"eval_loss": 0.20918497443199158,
"eval_runtime": 40.877,
"eval_samples_per_second": 11.155,
"eval_steps_per_second": 1.394,
"eval_wer": 0.29553005134400484,
"step": 34000
},
{
"epoch": 49.06,
"learning_rate": 1.5229007633587786e-06,
"loss": 1.097,
"step": 34100
},
{
"epoch": 49.21,
"learning_rate": 1.2938931297709922e-06,
"loss": 1.0909,
"step": 34200
},
{
"epoch": 49.35,
"learning_rate": 1.0648854961832059e-06,
"loss": 1.1008,
"step": 34300
},
{
"epoch": 49.5,
"learning_rate": 8.358778625954198e-07,
"loss": 1.0904,
"step": 34400
},
{
"epoch": 49.64,
"learning_rate": 6.068702290076335e-07,
"loss": 1.0905,
"step": 34500
},
{
"epoch": 49.64,
"eval_cer": 0.09526058888364038,
"eval_loss": 0.20883652567863464,
"eval_runtime": 40.602,
"eval_samples_per_second": 11.231,
"eval_steps_per_second": 1.404,
"eval_wer": 0.2953790395650861,
"step": 34500
},
{
"epoch": 49.78,
"learning_rate": 3.778625954198473e-07,
"loss": 1.0961,
"step": 34600
},
{
"epoch": 49.93,
"learning_rate": 1.4885496183206107e-07,
"loss": 1.095,
"step": 34700
},
{
"epoch": 50.0,
"step": 34750,
"total_flos": 2.8392187465644065e+20,
"train_loss": 2.2316733406121783,
"train_runtime": 114311.9751,
"train_samples_per_second": 9.737,
"train_steps_per_second": 0.304
}
],
"max_steps": 34750,
"num_train_epochs": 50,
"total_flos": 2.8392187465644065e+20,
"trial_name": null,
"trial_params": null
}