{ "best_metric": null, "best_model_checkpoint": null, "epoch": 49.99892202659001, "global_step": 34750, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.14, "learning_rate": 3.675e-06, "loss": 47.2908, "step": 100 }, { "epoch": 0.29, "learning_rate": 7.425e-06, "loss": 33.9125, "step": 200 }, { "epoch": 0.43, "learning_rate": 1.1174999999999999e-05, "loss": 26.6068, "step": 300 }, { "epoch": 0.57, "learning_rate": 1.4925e-05, "loss": 23.2775, "step": 400 }, { "epoch": 0.72, "learning_rate": 1.8675e-05, "loss": 19.7138, "step": 500 }, { "epoch": 0.72, "eval_cer": 1.0, "eval_loss": 19.642736434936523, "eval_runtime": 41.3907, "eval_samples_per_second": 11.017, "eval_steps_per_second": 1.377, "eval_wer": 1.0, "step": 500 }, { "epoch": 0.86, "learning_rate": 2.2424999999999996e-05, "loss": 15.7715, "step": 600 }, { "epoch": 1.01, "learning_rate": 2.6174999999999996e-05, "loss": 11.4061, "step": 700 }, { "epoch": 1.15, "learning_rate": 2.9925e-05, "loss": 7.4329, "step": 800 }, { "epoch": 1.29, "learning_rate": 3.3675e-05, "loss": 5.3081, "step": 900 }, { "epoch": 1.44, "learning_rate": 3.7424999999999995e-05, "loss": 4.8039, "step": 1000 }, { "epoch": 1.44, "eval_cer": 1.0, "eval_loss": 4.784187316894531, "eval_runtime": 42.2256, "eval_samples_per_second": 10.799, "eval_steps_per_second": 1.35, "eval_wer": 1.0, "step": 1000 }, { "epoch": 1.58, "learning_rate": 4.1175e-05, "loss": 4.762, "step": 1100 }, { "epoch": 1.73, "learning_rate": 4.4924999999999994e-05, "loss": 4.6928, "step": 1200 }, { "epoch": 1.87, "learning_rate": 4.8675e-05, "loss": 4.6292, "step": 1300 }, { "epoch": 2.01, "learning_rate": 5.2424999999999994e-05, "loss": 4.6321, "step": 1400 }, { "epoch": 2.16, "learning_rate": 5.6175e-05, "loss": 4.5619, "step": 1500 }, { "epoch": 2.16, "eval_cer": 0.9598094788222327, "eval_loss": 4.560794830322266, "eval_runtime": 41.0352, "eval_samples_per_second": 11.112, "eval_steps_per_second": 1.389, "eval_wer": 0.9992449411054062, "step": 1500 }, { "epoch": 2.3, "learning_rate": 5.9925e-05, "loss": 4.4704, "step": 1600 }, { "epoch": 2.45, "learning_rate": 6.367499999999999e-05, "loss": 4.3806, "step": 1700 }, { "epoch": 2.59, "learning_rate": 6.7425e-05, "loss": 4.3092, "step": 1800 }, { "epoch": 2.73, "learning_rate": 7.1175e-05, "loss": 4.2794, "step": 1900 }, { "epoch": 2.88, "learning_rate": 7.492499999999999e-05, "loss": 4.254, "step": 2000 }, { "epoch": 2.88, "eval_cer": 0.906274602424815, "eval_loss": 4.272861003875732, "eval_runtime": 40.8387, "eval_samples_per_second": 11.166, "eval_steps_per_second": 1.396, "eval_wer": 0.9954696466324373, "step": 2000 }, { "epoch": 3.02, "learning_rate": 7.477557251908395e-05, "loss": 4.2616, "step": 2100 }, { "epoch": 3.17, "learning_rate": 7.454656488549618e-05, "loss": 4.2184, "step": 2200 }, { "epoch": 3.31, "learning_rate": 7.43175572519084e-05, "loss": 4.227, "step": 2300 }, { "epoch": 3.45, "learning_rate": 7.408854961832061e-05, "loss": 4.1985, "step": 2400 }, { "epoch": 3.6, "learning_rate": 7.385954198473281e-05, "loss": 4.1905, "step": 2500 }, { "epoch": 3.6, "eval_cer": 0.8758463234136357, "eval_loss": 4.225706100463867, "eval_runtime": 40.6017, "eval_samples_per_second": 11.231, "eval_steps_per_second": 1.404, "eval_wer": 0.9903352461491997, "step": 2500 }, { "epoch": 3.74, "learning_rate": 7.36328244274809e-05, "loss": 4.1873, "step": 2600 }, { "epoch": 3.88, "learning_rate": 7.340381679389312e-05, "loss": 4.1615, "step": 2700 }, { "epoch": 4.03, "learning_rate": 7.317480916030534e-05, "loss": 4.157, "step": 2800 }, { "epoch": 4.17, "learning_rate": 7.294580152671756e-05, "loss": 4.1124, "step": 2900 }, { "epoch": 4.32, "learning_rate": 7.271679389312976e-05, "loss": 4.0683, "step": 3000 }, { "epoch": 4.32, "eval_cer": 0.7911352542906629, "eval_loss": 3.929443120956421, "eval_runtime": 41.2715, "eval_samples_per_second": 11.049, "eval_steps_per_second": 1.381, "eval_wer": 0.9936575052854123, "step": 3000 }, { "epoch": 4.46, "learning_rate": 7.248778625954197e-05, "loss": 4.0704, "step": 3100 }, { "epoch": 4.6, "learning_rate": 7.22587786259542e-05, "loss": 3.9616, "step": 3200 }, { "epoch": 4.75, "learning_rate": 7.202977099236641e-05, "loss": 3.7798, "step": 3300 }, { "epoch": 4.89, "learning_rate": 7.180076335877862e-05, "loss": 3.6123, "step": 3400 }, { "epoch": 5.04, "learning_rate": 7.15740458015267e-05, "loss": 3.486, "step": 3500 }, { "epoch": 5.04, "eval_cer": 0.5933711226578492, "eval_loss": 2.704545497894287, "eval_runtime": 40.8408, "eval_samples_per_second": 11.165, "eval_steps_per_second": 1.396, "eval_wer": 1.0012080942313502, "step": 3500 }, { "epoch": 5.18, "learning_rate": 7.134503816793892e-05, "loss": 3.3283, "step": 3600 }, { "epoch": 5.32, "learning_rate": 7.111603053435114e-05, "loss": 3.2091, "step": 3700 }, { "epoch": 5.47, "learning_rate": 7.088931297709923e-05, "loss": 3.1158, "step": 3800 }, { "epoch": 5.61, "learning_rate": 7.066030534351145e-05, "loss": 2.9983, "step": 3900 }, { "epoch": 5.75, "learning_rate": 7.043129770992365e-05, "loss": 2.946, "step": 4000 }, { "epoch": 5.75, "eval_cer": 0.4634309557549992, "eval_loss": 1.9690674543380737, "eval_runtime": 40.879, "eval_samples_per_second": 11.155, "eval_steps_per_second": 1.394, "eval_wer": 0.942464512231954, "step": 4000 }, { "epoch": 5.9, "learning_rate": 7.020229007633587e-05, "loss": 2.8545, "step": 4100 }, { "epoch": 6.04, "learning_rate": 6.997328244274808e-05, "loss": 2.8092, "step": 4200 }, { "epoch": 6.19, "learning_rate": 6.97442748091603e-05, "loss": 2.7229, "step": 4300 }, { "epoch": 6.33, "learning_rate": 6.951526717557252e-05, "loss": 2.7053, "step": 4400 }, { "epoch": 6.47, "learning_rate": 6.928625954198472e-05, "loss": 2.634, "step": 4500 }, { "epoch": 6.47, "eval_cer": 0.38501810738466385, "eval_loss": 1.521231770515442, "eval_runtime": 41.5435, "eval_samples_per_second": 10.976, "eval_steps_per_second": 1.372, "eval_wer": 0.880700694654183, "step": 4500 }, { "epoch": 6.62, "learning_rate": 6.905725190839693e-05, "loss": 2.5996, "step": 4600 }, { "epoch": 6.76, "learning_rate": 6.882824427480916e-05, "loss": 2.5472, "step": 4700 }, { "epoch": 6.91, "learning_rate": 6.859923664122137e-05, "loss": 2.4959, "step": 4800 }, { "epoch": 7.05, "learning_rate": 6.837022900763359e-05, "loss": 2.4554, "step": 4900 }, { "epoch": 7.19, "learning_rate": 6.814122137404579e-05, "loss": 2.4066, "step": 5000 }, { "epoch": 7.19, "eval_cer": 0.36014013541174617, "eval_loss": 1.2550952434539795, "eval_runtime": 41.0408, "eval_samples_per_second": 11.111, "eval_steps_per_second": 1.389, "eval_wer": 0.8177287828450619, "step": 5000 }, { "epoch": 7.34, "learning_rate": 6.791221374045801e-05, "loss": 2.3768, "step": 5100 }, { "epoch": 7.48, "learning_rate": 6.768320610687023e-05, "loss": 2.3557, "step": 5200 }, { "epoch": 7.63, "learning_rate": 6.745419847328244e-05, "loss": 2.3109, "step": 5300 }, { "epoch": 7.77, "learning_rate": 6.722519083969465e-05, "loss": 2.2953, "step": 5400 }, { "epoch": 7.91, "learning_rate": 6.699618320610687e-05, "loss": 2.2651, "step": 5500 }, { "epoch": 7.91, "eval_cer": 0.30392851519445757, "eval_loss": 1.0423332452774048, "eval_runtime": 40.9098, "eval_samples_per_second": 11.146, "eval_steps_per_second": 1.393, "eval_wer": 0.7650256720024162, "step": 5500 }, { "epoch": 8.06, "learning_rate": 6.676717557251908e-05, "loss": 2.2589, "step": 5600 }, { "epoch": 8.2, "learning_rate": 6.654045801526718e-05, "loss": 2.2122, "step": 5700 }, { "epoch": 8.34, "learning_rate": 6.631145038167939e-05, "loss": 2.2017, "step": 5800 }, { "epoch": 8.49, "learning_rate": 6.60824427480916e-05, "loss": 2.1814, "step": 5900 }, { "epoch": 8.63, "learning_rate": 6.58534351145038e-05, "loss": 2.1828, "step": 6000 }, { "epoch": 8.63, "eval_cer": 0.3106203747441348, "eval_loss": 0.9598844051361084, "eval_runtime": 41.4743, "eval_samples_per_second": 10.995, "eval_steps_per_second": 1.374, "eval_wer": 0.7272727272727273, "step": 6000 }, { "epoch": 8.78, "learning_rate": 6.562442748091603e-05, "loss": 2.1714, "step": 6100 }, { "epoch": 8.92, "learning_rate": 6.539541984732824e-05, "loss": 2.1422, "step": 6200 }, { "epoch": 9.06, "learning_rate": 6.516641221374046e-05, "loss": 2.1546, "step": 6300 }, { "epoch": 9.21, "learning_rate": 6.493740458015267e-05, "loss": 2.12, "step": 6400 }, { "epoch": 9.35, "learning_rate": 6.470839694656488e-05, "loss": 2.1023, "step": 6500 }, { "epoch": 9.35, "eval_cer": 0.30632971185640057, "eval_loss": 0.9481843113899231, "eval_runtime": 41.1867, "eval_samples_per_second": 11.072, "eval_steps_per_second": 1.384, "eval_wer": 0.7160978556327393, "step": 6500 }, { "epoch": 9.5, "learning_rate": 6.44793893129771e-05, "loss": 2.1104, "step": 6600 }, { "epoch": 9.64, "learning_rate": 6.425038167938931e-05, "loss": 2.0879, "step": 6700 }, { "epoch": 9.78, "learning_rate": 6.402137404580152e-05, "loss": 2.0724, "step": 6800 }, { "epoch": 9.93, "learning_rate": 6.379236641221374e-05, "loss": 2.0622, "step": 6900 }, { "epoch": 10.07, "learning_rate": 6.356335877862595e-05, "loss": 2.0536, "step": 7000 }, { "epoch": 10.07, "eval_cer": 0.28597858604944104, "eval_loss": 0.8241907954216003, "eval_runtime": 41.2837, "eval_samples_per_second": 11.046, "eval_steps_per_second": 1.381, "eval_wer": 0.6766837813349441, "step": 7000 }, { "epoch": 10.22, "learning_rate": 6.333435114503816e-05, "loss": 2.0258, "step": 7100 }, { "epoch": 10.36, "learning_rate": 6.310534351145038e-05, "loss": 2.038, "step": 7200 }, { "epoch": 10.5, "learning_rate": 6.287633587786259e-05, "loss": 2.0093, "step": 7300 }, { "epoch": 10.65, "learning_rate": 6.26473282442748e-05, "loss": 1.9839, "step": 7400 }, { "epoch": 10.79, "learning_rate": 6.241832061068702e-05, "loss": 1.9803, "step": 7500 }, { "epoch": 10.79, "eval_cer": 0.2636592662572823, "eval_loss": 0.7643126845359802, "eval_runtime": 41.3574, "eval_samples_per_second": 11.026, "eval_steps_per_second": 1.378, "eval_wer": 0.6562971911809121, "step": 7500 }, { "epoch": 10.93, "learning_rate": 6.218931297709923e-05, "loss": 1.9704, "step": 7600 }, { "epoch": 11.08, "learning_rate": 6.196030534351144e-05, "loss": 1.9923, "step": 7700 }, { "epoch": 11.22, "learning_rate": 6.173129770992366e-05, "loss": 1.9549, "step": 7800 }, { "epoch": 11.37, "learning_rate": 6.150229007633587e-05, "loss": 1.9339, "step": 7900 }, { "epoch": 11.51, "learning_rate": 6.127328244274808e-05, "loss": 1.9468, "step": 8000 }, { "epoch": 11.51, "eval_cer": 0.25051173043615177, "eval_loss": 0.7318933606147766, "eval_runtime": 40.8808, "eval_samples_per_second": 11.154, "eval_steps_per_second": 1.394, "eval_wer": 0.644065237088493, "step": 8000 }, { "epoch": 11.65, "learning_rate": 6.10442748091603e-05, "loss": 1.9691, "step": 8100 }, { "epoch": 11.8, "learning_rate": 6.081526717557252e-05, "loss": 1.9845, "step": 8200 }, { "epoch": 11.94, "learning_rate": 6.0586259541984725e-05, "loss": 1.9561, "step": 8300 }, { "epoch": 12.09, "learning_rate": 6.035725190839694e-05, "loss": 1.9486, "step": 8400 }, { "epoch": 12.23, "learning_rate": 6.012824427480916e-05, "loss": 1.9178, "step": 8500 }, { "epoch": 12.23, "eval_cer": 0.24893717524799244, "eval_loss": 0.6936821937561035, "eval_runtime": 41.1829, "eval_samples_per_second": 11.073, "eval_steps_per_second": 1.384, "eval_wer": 0.6319842947749924, "step": 8500 }, { "epoch": 12.37, "learning_rate": 5.989923664122137e-05, "loss": 1.9133, "step": 8600 }, { "epoch": 12.52, "learning_rate": 5.9670229007633586e-05, "loss": 1.9327, "step": 8700 }, { "epoch": 12.66, "learning_rate": 5.944122137404579e-05, "loss": 1.8749, "step": 8800 }, { "epoch": 12.8, "learning_rate": 5.9212213740458006e-05, "loss": 1.8775, "step": 8900 }, { "epoch": 12.95, "learning_rate": 5.8983206106870226e-05, "loss": 1.8515, "step": 9000 }, { "epoch": 12.95, "eval_cer": 0.21961108486852465, "eval_loss": 0.6443303823471069, "eval_runtime": 40.5279, "eval_samples_per_second": 11.252, "eval_steps_per_second": 1.406, "eval_wer": 0.6052552099063727, "step": 9000 }, { "epoch": 13.09, "learning_rate": 5.875419847328244e-05, "loss": 1.8554, "step": 9100 }, { "epoch": 13.24, "learning_rate": 5.852519083969465e-05, "loss": 1.8568, "step": 9200 }, { "epoch": 13.38, "learning_rate": 5.829618320610686e-05, "loss": 1.8477, "step": 9300 }, { "epoch": 13.52, "learning_rate": 5.806717557251908e-05, "loss": 1.8328, "step": 9400 }, { "epoch": 13.67, "learning_rate": 5.783816793893129e-05, "loss": 1.8083, "step": 9500 }, { "epoch": 13.67, "eval_cer": 0.21484805542434263, "eval_loss": 0.6285760402679443, "eval_runtime": 41.6653, "eval_samples_per_second": 10.944, "eval_steps_per_second": 1.368, "eval_wer": 0.6122017517366355, "step": 9500 }, { "epoch": 13.81, "learning_rate": 5.760916030534351e-05, "loss": 1.8236, "step": 9600 }, { "epoch": 13.96, "learning_rate": 5.738015267175571e-05, "loss": 1.8199, "step": 9700 }, { "epoch": 14.1, "learning_rate": 5.7151145038167934e-05, "loss": 1.8285, "step": 9800 }, { "epoch": 14.24, "learning_rate": 5.692213740458015e-05, "loss": 1.817, "step": 9900 }, { "epoch": 14.39, "learning_rate": 5.669312977099236e-05, "loss": 1.819, "step": 10000 }, { "epoch": 14.39, "eval_cer": 0.2074476460399937, "eval_loss": 0.6015097498893738, "eval_runtime": 41.6458, "eval_samples_per_second": 10.949, "eval_steps_per_second": 1.369, "eval_wer": 0.5986106916339474, "step": 10000 }, { "epoch": 14.53, "learning_rate": 5.6466412213740455e-05, "loss": 1.7952, "step": 10100 }, { "epoch": 14.68, "learning_rate": 5.623740458015266e-05, "loss": 1.7955, "step": 10200 }, { "epoch": 14.82, "learning_rate": 5.600839694656488e-05, "loss": 1.7878, "step": 10300 }, { "epoch": 14.96, "learning_rate": 5.5779389312977095e-05, "loss": 1.769, "step": 10400 }, { "epoch": 15.11, "learning_rate": 5.555267175572519e-05, "loss": 1.7684, "step": 10500 }, { "epoch": 15.11, "eval_cer": 0.19815777042985355, "eval_loss": 0.5682193636894226, "eval_runtime": 41.2484, "eval_samples_per_second": 11.055, "eval_steps_per_second": 1.382, "eval_wer": 0.5741467834491091, "step": 10500 }, { "epoch": 15.25, "learning_rate": 5.53236641221374e-05, "loss": 1.7626, "step": 10600 }, { "epoch": 15.4, "learning_rate": 5.5094656488549616e-05, "loss": 1.7582, "step": 10700 }, { "epoch": 15.54, "learning_rate": 5.486564885496182e-05, "loss": 1.75, "step": 10800 }, { "epoch": 15.68, "learning_rate": 5.463664122137404e-05, "loss": 1.735, "step": 10900 }, { "epoch": 15.83, "learning_rate": 5.4407633587786256e-05, "loss": 1.7195, "step": 11000 }, { "epoch": 15.83, "eval_cer": 0.20067705873090852, "eval_loss": 0.5385124683380127, "eval_runtime": 41.6481, "eval_samples_per_second": 10.949, "eval_steps_per_second": 1.369, "eval_wer": 0.5591966173361522, "step": 11000 }, { "epoch": 15.97, "learning_rate": 5.417862595419847e-05, "loss": 1.7274, "step": 11100 }, { "epoch": 16.11, "learning_rate": 5.3949618320610677e-05, "loss": 1.7183, "step": 11200 }, { "epoch": 16.26, "learning_rate": 5.37206106870229e-05, "loss": 1.7117, "step": 11300 }, { "epoch": 16.4, "learning_rate": 5.349160305343511e-05, "loss": 1.6918, "step": 11400 }, { "epoch": 16.55, "learning_rate": 5.3262595419847324e-05, "loss": 1.7044, "step": 11500 }, { "epoch": 16.55, "eval_cer": 0.20965202330341678, "eval_loss": 0.5361923575401306, "eval_runtime": 41.5242, "eval_samples_per_second": 10.982, "eval_steps_per_second": 1.373, "eval_wer": 0.5524010872848082, "step": 11500 }, { "epoch": 16.69, "learning_rate": 5.303358778625954e-05, "loss": 1.7134, "step": 11600 }, { "epoch": 16.83, "learning_rate": 5.280458015267176e-05, "loss": 1.7016, "step": 11700 }, { "epoch": 16.98, "learning_rate": 5.2575572519083964e-05, "loss": 1.7069, "step": 11800 }, { "epoch": 17.12, "learning_rate": 5.234656488549618e-05, "loss": 1.7046, "step": 11900 }, { "epoch": 17.27, "learning_rate": 5.211755725190839e-05, "loss": 1.6879, "step": 12000 }, { "epoch": 17.27, "eval_cer": 0.20831365139348135, "eval_loss": 0.5119141936302185, "eval_runtime": 40.4618, "eval_samples_per_second": 11.27, "eval_steps_per_second": 1.409, "eval_wer": 0.5489278163696768, "step": 12000 }, { "epoch": 17.41, "learning_rate": 5.188854961832061e-05, "loss": 1.681, "step": 12100 }, { "epoch": 17.55, "learning_rate": 5.1659541984732825e-05, "loss": 1.6683, "step": 12200 }, { "epoch": 17.7, "learning_rate": 5.143053435114503e-05, "loss": 1.655, "step": 12300 }, { "epoch": 17.84, "learning_rate": 5.1201526717557245e-05, "loss": 1.6604, "step": 12400 }, { "epoch": 17.98, "learning_rate": 5.0972519083969465e-05, "loss": 1.656, "step": 12500 }, { "epoch": 17.98, "eval_cer": 0.19678003464021415, "eval_loss": 0.4990406930446625, "eval_runtime": 40.5826, "eval_samples_per_second": 11.236, "eval_steps_per_second": 1.405, "eval_wer": 0.5362428269405014, "step": 12500 }, { "epoch": 18.13, "learning_rate": 5.074351145038168e-05, "loss": 1.6645, "step": 12600 }, { "epoch": 18.27, "learning_rate": 5.051450381679389e-05, "loss": 1.6269, "step": 12700 }, { "epoch": 18.42, "learning_rate": 5.02854961832061e-05, "loss": 1.6306, "step": 12800 }, { "epoch": 18.56, "learning_rate": 5.005877862595419e-05, "loss": 1.6191, "step": 12900 }, { "epoch": 18.7, "learning_rate": 4.9829770992366406e-05, "loss": 1.6122, "step": 13000 }, { "epoch": 18.7, "eval_cer": 0.18997008345142496, "eval_loss": 0.45614466071128845, "eval_runtime": 41.2927, "eval_samples_per_second": 11.043, "eval_steps_per_second": 1.38, "eval_wer": 0.5092117185140441, "step": 13000 }, { "epoch": 18.85, "learning_rate": 4.9600763358778626e-05, "loss": 1.622, "step": 13100 }, { "epoch": 18.99, "learning_rate": 4.937175572519084e-05, "loss": 1.6305, "step": 13200 }, { "epoch": 19.14, "learning_rate": 4.9142748091603046e-05, "loss": 1.6134, "step": 13300 }, { "epoch": 19.28, "learning_rate": 4.891374045801526e-05, "loss": 1.6044, "step": 13400 }, { "epoch": 19.42, "learning_rate": 4.868473282442748e-05, "loss": 1.5919, "step": 13500 }, { "epoch": 19.42, "eval_cer": 0.19752794835458984, "eval_loss": 0.47778981924057007, "eval_runtime": 41.5758, "eval_samples_per_second": 10.968, "eval_steps_per_second": 1.371, "eval_wer": 0.5225007550588946, "step": 13500 }, { "epoch": 19.57, "learning_rate": 4.8455725190839694e-05, "loss": 1.595, "step": 13600 }, { "epoch": 19.71, "learning_rate": 4.822671755725191e-05, "loss": 1.5959, "step": 13700 }, { "epoch": 19.86, "learning_rate": 4.7997709923664114e-05, "loss": 1.6006, "step": 13800 }, { "epoch": 20.0, "learning_rate": 4.7768702290076334e-05, "loss": 1.5913, "step": 13900 }, { "epoch": 20.14, "learning_rate": 4.753969465648855e-05, "loss": 1.5896, "step": 14000 }, { "epoch": 20.14, "eval_cer": 0.18591560384191466, "eval_loss": 0.4563109278678894, "eval_runtime": 40.8794, "eval_samples_per_second": 11.155, "eval_steps_per_second": 1.394, "eval_wer": 0.5098157656297191, "step": 14000 }, { "epoch": 20.29, "learning_rate": 4.731068702290076e-05, "loss": 1.5823, "step": 14100 }, { "epoch": 20.43, "learning_rate": 4.708167938931297e-05, "loss": 1.5634, "step": 14200 }, { "epoch": 20.57, "learning_rate": 4.685267175572519e-05, "loss": 1.5573, "step": 14300 }, { "epoch": 20.72, "learning_rate": 4.66236641221374e-05, "loss": 1.5689, "step": 14400 }, { "epoch": 20.86, "learning_rate": 4.6394656488549615e-05, "loss": 1.5589, "step": 14500 }, { "epoch": 20.86, "eval_cer": 0.17249252086285624, "eval_loss": 0.43622052669525146, "eval_runtime": 41.7277, "eval_samples_per_second": 10.928, "eval_steps_per_second": 1.366, "eval_wer": 0.4939595288432498, "step": 14500 }, { "epoch": 21.01, "learning_rate": 4.616564885496183e-05, "loss": 1.5697, "step": 14600 }, { "epoch": 21.15, "learning_rate": 4.593664122137405e-05, "loss": 1.5336, "step": 14700 }, { "epoch": 21.29, "learning_rate": 4.5707633587786255e-05, "loss": 1.5425, "step": 14800 }, { "epoch": 21.44, "learning_rate": 4.547862595419847e-05, "loss": 1.5461, "step": 14900 }, { "epoch": 21.58, "learning_rate": 4.524961832061068e-05, "loss": 1.5353, "step": 15000 }, { "epoch": 21.58, "eval_cer": 0.15804597701149425, "eval_loss": 0.41395294666290283, "eval_runtime": 40.5257, "eval_samples_per_second": 11.252, "eval_steps_per_second": 1.407, "eval_wer": 0.4826336454243431, "step": 15000 }, { "epoch": 21.73, "learning_rate": 4.5020610687022895e-05, "loss": 1.5348, "step": 15100 }, { "epoch": 21.87, "learning_rate": 4.4791603053435116e-05, "loss": 1.5279, "step": 15200 }, { "epoch": 22.01, "learning_rate": 4.456259541984732e-05, "loss": 1.5492, "step": 15300 }, { "epoch": 22.16, "learning_rate": 4.4333587786259536e-05, "loss": 1.5291, "step": 15400 }, { "epoch": 22.3, "learning_rate": 4.410458015267175e-05, "loss": 1.5441, "step": 15500 }, { "epoch": 22.3, "eval_cer": 0.15501495827428752, "eval_loss": 0.40313535928726196, "eval_runtime": 41.0848, "eval_samples_per_second": 11.099, "eval_steps_per_second": 1.387, "eval_wer": 0.47417698580489276, "step": 15500 }, { "epoch": 22.45, "learning_rate": 4.387557251908397e-05, "loss": 1.518, "step": 15600 }, { "epoch": 22.59, "learning_rate": 4.364656488549618e-05, "loss": 1.5081, "step": 15700 }, { "epoch": 22.73, "learning_rate": 4.341755725190839e-05, "loss": 1.4959, "step": 15800 }, { "epoch": 22.88, "learning_rate": 4.31885496183206e-05, "loss": 1.5097, "step": 15900 }, { "epoch": 23.02, "learning_rate": 4.295954198473282e-05, "loss": 1.5116, "step": 16000 }, { "epoch": 23.02, "eval_cer": 0.15450322783813572, "eval_loss": 0.39162585139274597, "eval_runtime": 40.7373, "eval_samples_per_second": 11.194, "eval_steps_per_second": 1.399, "eval_wer": 0.4747810329205678, "step": 16000 }, { "epoch": 23.17, "learning_rate": 4.273053435114504e-05, "loss": 1.4951, "step": 16100 }, { "epoch": 23.31, "learning_rate": 4.250152671755724e-05, "loss": 1.4974, "step": 16200 }, { "epoch": 23.45, "learning_rate": 4.227480916030534e-05, "loss": 1.5045, "step": 16300 }, { "epoch": 23.6, "learning_rate": 4.204580152671755e-05, "loss": 1.4944, "step": 16400 }, { "epoch": 23.74, "learning_rate": 4.181679389312977e-05, "loss": 1.4731, "step": 16500 }, { "epoch": 23.74, "eval_cer": 0.15422768068020784, "eval_loss": 0.3840835392475128, "eval_runtime": 40.8763, "eval_samples_per_second": 11.156, "eval_steps_per_second": 1.394, "eval_wer": 0.4809725158562368, "step": 16500 }, { "epoch": 23.88, "learning_rate": 4.1587786259541985e-05, "loss": 1.472, "step": 16600 }, { "epoch": 24.03, "learning_rate": 4.13587786259542e-05, "loss": 1.4847, "step": 16700 }, { "epoch": 24.17, "learning_rate": 4.1129770992366405e-05, "loss": 1.4603, "step": 16800 }, { "epoch": 24.32, "learning_rate": 4.090076335877862e-05, "loss": 1.4563, "step": 16900 }, { "epoch": 24.46, "learning_rate": 4.067175572519084e-05, "loss": 1.4647, "step": 17000 }, { "epoch": 24.46, "eval_cer": 0.14753582113053063, "eval_loss": 0.37518319487571716, "eval_runtime": 41.0205, "eval_samples_per_second": 11.116, "eval_steps_per_second": 1.39, "eval_wer": 0.452431289640592, "step": 17000 }, { "epoch": 24.6, "learning_rate": 4.044274809160305e-05, "loss": 1.4585, "step": 17100 }, { "epoch": 24.75, "learning_rate": 4.021374045801526e-05, "loss": 1.4692, "step": 17200 }, { "epoch": 24.89, "learning_rate": 3.998473282442747e-05, "loss": 1.444, "step": 17300 }, { "epoch": 25.04, "learning_rate": 3.975572519083969e-05, "loss": 1.4717, "step": 17400 }, { "epoch": 25.18, "learning_rate": 3.9526717557251906e-05, "loss": 1.4328, "step": 17500 }, { "epoch": 25.18, "eval_cer": 0.1461187214611872, "eval_loss": 0.35870596766471863, "eval_runtime": 40.6723, "eval_samples_per_second": 11.212, "eval_steps_per_second": 1.401, "eval_wer": 0.4475989127151918, "step": 17500 }, { "epoch": 25.32, "learning_rate": 3.929770992366412e-05, "loss": 1.4329, "step": 17600 }, { "epoch": 25.47, "learning_rate": 3.9068702290076326e-05, "loss": 1.4209, "step": 17700 }, { "epoch": 25.61, "learning_rate": 3.884198473282442e-05, "loss": 1.4188, "step": 17800 }, { "epoch": 25.75, "learning_rate": 3.861297709923664e-05, "loss": 1.4301, "step": 17900 }, { "epoch": 25.9, "learning_rate": 3.8383969465648854e-05, "loss": 1.4129, "step": 18000 }, { "epoch": 25.9, "eval_cer": 0.13663202645252717, "eval_loss": 0.3428773581981659, "eval_runtime": 42.0192, "eval_samples_per_second": 10.852, "eval_steps_per_second": 1.357, "eval_wer": 0.42419208698278466, "step": 18000 }, { "epoch": 26.04, "learning_rate": 3.815496183206107e-05, "loss": 1.4266, "step": 18100 }, { "epoch": 26.19, "learning_rate": 3.7925954198473274e-05, "loss": 1.4166, "step": 18200 }, { "epoch": 26.33, "learning_rate": 3.7696946564885494e-05, "loss": 1.4157, "step": 18300 }, { "epoch": 26.47, "learning_rate": 3.746793893129771e-05, "loss": 1.4285, "step": 18400 }, { "epoch": 26.62, "learning_rate": 3.723893129770992e-05, "loss": 1.4062, "step": 18500 }, { "epoch": 26.62, "eval_cer": 0.13549047394111163, "eval_loss": 0.34499478340148926, "eval_runtime": 41.0336, "eval_samples_per_second": 11.113, "eval_steps_per_second": 1.389, "eval_wer": 0.4250981576562972, "step": 18500 }, { "epoch": 26.76, "learning_rate": 3.7009923664122134e-05, "loss": 1.4163, "step": 18600 }, { "epoch": 26.91, "learning_rate": 3.678091603053435e-05, "loss": 1.404, "step": 18700 }, { "epoch": 27.05, "learning_rate": 3.655190839694656e-05, "loss": 1.4134, "step": 18800 }, { "epoch": 27.19, "learning_rate": 3.6322900763358775e-05, "loss": 1.4001, "step": 18900 }, { "epoch": 27.34, "learning_rate": 3.609389312977099e-05, "loss": 1.3928, "step": 19000 }, { "epoch": 27.34, "eval_cer": 0.13218390804597702, "eval_loss": 0.32969579100608826, "eval_runtime": 41.0801, "eval_samples_per_second": 11.1, "eval_steps_per_second": 1.388, "eval_wer": 0.4145273331319843, "step": 19000 }, { "epoch": 27.48, "learning_rate": 3.58648854961832e-05, "loss": 1.3979, "step": 19100 }, { "epoch": 27.63, "learning_rate": 3.5635877862595415e-05, "loss": 1.3971, "step": 19200 }, { "epoch": 27.77, "learning_rate": 3.540687022900763e-05, "loss": 1.3934, "step": 19300 }, { "epoch": 27.91, "learning_rate": 3.517786259541984e-05, "loss": 1.3866, "step": 19400 }, { "epoch": 28.06, "learning_rate": 3.4948854961832055e-05, "loss": 1.3906, "step": 19500 }, { "epoch": 28.06, "eval_cer": 0.1336403715950244, "eval_loss": 0.32101842761039734, "eval_runtime": 41.0367, "eval_samples_per_second": 11.112, "eval_steps_per_second": 1.389, "eval_wer": 0.4184536393838719, "step": 19500 }, { "epoch": 28.2, "learning_rate": 3.471984732824427e-05, "loss": 1.3689, "step": 19600 }, { "epoch": 28.34, "learning_rate": 3.449083969465649e-05, "loss": 1.3715, "step": 19700 }, { "epoch": 28.49, "learning_rate": 3.4261832061068696e-05, "loss": 1.3527, "step": 19800 }, { "epoch": 28.63, "learning_rate": 3.4032824427480916e-05, "loss": 1.3532, "step": 19900 }, { "epoch": 28.78, "learning_rate": 3.380381679389312e-05, "loss": 1.358, "step": 20000 }, { "epoch": 28.78, "eval_cer": 0.12753897024090693, "eval_loss": 0.31306591629981995, "eval_runtime": 41.2359, "eval_samples_per_second": 11.058, "eval_steps_per_second": 1.382, "eval_wer": 0.39700996677740863, "step": 20000 }, { "epoch": 28.92, "learning_rate": 3.357480916030534e-05, "loss": 1.3582, "step": 20100 }, { "epoch": 29.06, "learning_rate": 3.334580152671755e-05, "loss": 1.3587, "step": 20200 }, { "epoch": 29.21, "learning_rate": 3.311679389312977e-05, "loss": 1.3392, "step": 20300 }, { "epoch": 29.35, "learning_rate": 3.288778625954198e-05, "loss": 1.3486, "step": 20400 }, { "epoch": 29.5, "learning_rate": 3.26587786259542e-05, "loss": 1.3445, "step": 20500 }, { "epoch": 29.5, "eval_cer": 0.12761769800031492, "eval_loss": 0.3069218099117279, "eval_runtime": 41.0687, "eval_samples_per_second": 11.103, "eval_steps_per_second": 1.388, "eval_wer": 0.3920265780730897, "step": 20500 }, { "epoch": 29.64, "learning_rate": 3.242977099236641e-05, "loss": 1.3354, "step": 20600 }, { "epoch": 29.78, "learning_rate": 3.2200763358778624e-05, "loss": 1.3334, "step": 20700 }, { "epoch": 29.93, "learning_rate": 3.197175572519084e-05, "loss": 1.3305, "step": 20800 }, { "epoch": 30.07, "learning_rate": 3.174274809160305e-05, "loss": 1.3354, "step": 20900 }, { "epoch": 30.22, "learning_rate": 3.1513740458015264e-05, "loss": 1.3159, "step": 21000 }, { "epoch": 30.22, "eval_cer": 0.1254920484962998, "eval_loss": 0.30346596240997314, "eval_runtime": 41.0784, "eval_samples_per_second": 11.101, "eval_steps_per_second": 1.388, "eval_wer": 0.3961038961038961, "step": 21000 }, { "epoch": 30.36, "learning_rate": 3.128473282442748e-05, "loss": 1.3376, "step": 21100 }, { "epoch": 30.5, "learning_rate": 3.105572519083969e-05, "loss": 1.324, "step": 21200 }, { "epoch": 30.65, "learning_rate": 3.0826717557251904e-05, "loss": 1.3091, "step": 21300 }, { "epoch": 30.79, "learning_rate": 3.059770992366412e-05, "loss": 1.3213, "step": 21400 }, { "epoch": 30.93, "learning_rate": 3.0368702290076335e-05, "loss": 1.3044, "step": 21500 }, { "epoch": 30.93, "eval_cer": 0.12423240434577232, "eval_loss": 0.29519879817962646, "eval_runtime": 41.1753, "eval_samples_per_second": 11.075, "eval_steps_per_second": 1.384, "eval_wer": 0.3853820598006645, "step": 21500 }, { "epoch": 31.08, "learning_rate": 3.0139694656488545e-05, "loss": 1.3033, "step": 21600 }, { "epoch": 31.22, "learning_rate": 2.991068702290076e-05, "loss": 1.2995, "step": 21700 }, { "epoch": 31.37, "learning_rate": 2.9681679389312975e-05, "loss": 1.3101, "step": 21800 }, { "epoch": 31.51, "learning_rate": 2.945267175572519e-05, "loss": 1.304, "step": 21900 }, { "epoch": 31.65, "learning_rate": 2.9223664122137402e-05, "loss": 1.3034, "step": 22000 }, { "epoch": 31.65, "eval_cer": 0.12273657691702095, "eval_loss": 0.29660850763320923, "eval_runtime": 41.8912, "eval_samples_per_second": 10.885, "eval_steps_per_second": 1.361, "eval_wer": 0.37722742373905166, "step": 22000 }, { "epoch": 31.8, "learning_rate": 2.8994656488549615e-05, "loss": 1.2912, "step": 22100 }, { "epoch": 31.94, "learning_rate": 2.876564885496183e-05, "loss": 1.299, "step": 22200 }, { "epoch": 32.09, "learning_rate": 2.8536641221374046e-05, "loss": 1.3042, "step": 22300 }, { "epoch": 32.23, "learning_rate": 2.8307633587786256e-05, "loss": 1.294, "step": 22400 }, { "epoch": 32.37, "learning_rate": 2.8078625954198472e-05, "loss": 1.2963, "step": 22500 }, { "epoch": 32.37, "eval_cer": 0.12080774681152574, "eval_loss": 0.2843906879425049, "eval_runtime": 41.7644, "eval_samples_per_second": 10.918, "eval_steps_per_second": 1.365, "eval_wer": 0.3705829054666264, "step": 22500 }, { "epoch": 32.52, "learning_rate": 2.7849618320610682e-05, "loss": 1.2769, "step": 22600 }, { "epoch": 32.66, "learning_rate": 2.76206106870229e-05, "loss": 1.2812, "step": 22700 }, { "epoch": 32.8, "learning_rate": 2.7391603053435113e-05, "loss": 1.2827, "step": 22800 }, { "epoch": 32.95, "learning_rate": 2.7162595419847326e-05, "loss": 1.2747, "step": 22900 }, { "epoch": 33.09, "learning_rate": 2.6935877862595417e-05, "loss": 1.2765, "step": 23000 }, { "epoch": 33.09, "eval_cer": 0.11726499763816722, "eval_loss": 0.28407707810401917, "eval_runtime": 40.9894, "eval_samples_per_second": 11.125, "eval_steps_per_second": 1.391, "eval_wer": 0.35668982180610087, "step": 23000 }, { "epoch": 33.24, "learning_rate": 2.670687022900763e-05, "loss": 1.2785, "step": 23100 }, { "epoch": 33.38, "learning_rate": 2.6477862595419844e-05, "loss": 1.2644, "step": 23200 }, { "epoch": 33.52, "learning_rate": 2.624885496183206e-05, "loss": 1.2724, "step": 23300 }, { "epoch": 33.67, "learning_rate": 2.601984732824427e-05, "loss": 1.2551, "step": 23400 }, { "epoch": 33.81, "learning_rate": 2.5790839694656488e-05, "loss": 1.2438, "step": 23500 }, { "epoch": 33.81, "eval_cer": 0.11372224846480869, "eval_loss": 0.2734295129776001, "eval_runtime": 41.7199, "eval_samples_per_second": 10.93, "eval_steps_per_second": 1.366, "eval_wer": 0.35517970401691334, "step": 23500 }, { "epoch": 33.96, "learning_rate": 2.5561832061068698e-05, "loss": 1.2491, "step": 23600 }, { "epoch": 34.1, "learning_rate": 2.5332824427480915e-05, "loss": 1.252, "step": 23700 }, { "epoch": 34.24, "learning_rate": 2.5103816793893128e-05, "loss": 1.2467, "step": 23800 }, { "epoch": 34.39, "learning_rate": 2.487480916030534e-05, "loss": 1.2406, "step": 23900 }, { "epoch": 34.53, "learning_rate": 2.4645801526717555e-05, "loss": 1.2487, "step": 24000 }, { "epoch": 34.53, "eval_cer": 0.11179341835931349, "eval_loss": 0.2702818512916565, "eval_runtime": 41.8515, "eval_samples_per_second": 10.896, "eval_steps_per_second": 1.362, "eval_wer": 0.3501963153125944, "step": 24000 }, { "epoch": 34.68, "learning_rate": 2.441679389312977e-05, "loss": 1.2504, "step": 24100 }, { "epoch": 34.82, "learning_rate": 2.4187786259541982e-05, "loss": 1.2341, "step": 24200 }, { "epoch": 34.96, "learning_rate": 2.39587786259542e-05, "loss": 1.2477, "step": 24300 }, { "epoch": 35.11, "learning_rate": 2.372977099236641e-05, "loss": 1.2427, "step": 24400 }, { "epoch": 35.25, "learning_rate": 2.3500763358778626e-05, "loss": 1.2249, "step": 24500 }, { "epoch": 35.25, "eval_cer": 0.11423397890096047, "eval_loss": 0.2650163471698761, "eval_runtime": 41.2103, "eval_samples_per_second": 11.065, "eval_steps_per_second": 1.383, "eval_wer": 0.3483841739655693, "step": 24500 }, { "epoch": 35.4, "learning_rate": 2.3271755725190836e-05, "loss": 1.2265, "step": 24600 }, { "epoch": 35.54, "learning_rate": 2.3042748091603052e-05, "loss": 1.2276, "step": 24700 }, { "epoch": 35.68, "learning_rate": 2.2816030534351143e-05, "loss": 1.2332, "step": 24800 }, { "epoch": 35.83, "learning_rate": 2.2587022900763357e-05, "loss": 1.2249, "step": 24900 }, { "epoch": 35.97, "learning_rate": 2.235801526717557e-05, "loss": 1.2229, "step": 25000 }, { "epoch": 35.97, "eval_cer": 0.10970713273500236, "eval_loss": 0.25843024253845215, "eval_runtime": 42.815, "eval_samples_per_second": 10.65, "eval_steps_per_second": 1.331, "eval_wer": 0.3373603141045001, "step": 25000 }, { "epoch": 36.11, "learning_rate": 2.2129007633587784e-05, "loss": 1.2412, "step": 25100 }, { "epoch": 36.26, "learning_rate": 2.1899999999999997e-05, "loss": 1.212, "step": 25200 }, { "epoch": 36.4, "learning_rate": 2.1670992366412214e-05, "loss": 1.2151, "step": 25300 }, { "epoch": 36.55, "learning_rate": 2.1441984732824424e-05, "loss": 1.2303, "step": 25400 }, { "epoch": 36.69, "learning_rate": 2.121297709923664e-05, "loss": 1.2374, "step": 25500 }, { "epoch": 36.69, "eval_cer": 0.10951031333648244, "eval_loss": 0.2568279504776001, "eval_runtime": 41.6839, "eval_samples_per_second": 10.939, "eval_steps_per_second": 1.367, "eval_wer": 0.33373603141045, "step": 25500 }, { "epoch": 36.83, "learning_rate": 2.098396946564885e-05, "loss": 1.2152, "step": 25600 }, { "epoch": 36.98, "learning_rate": 2.0754961832061068e-05, "loss": 1.2089, "step": 25700 }, { "epoch": 37.12, "learning_rate": 2.052595419847328e-05, "loss": 1.2201, "step": 25800 }, { "epoch": 37.27, "learning_rate": 2.0296946564885495e-05, "loss": 1.2006, "step": 25900 }, { "epoch": 37.41, "learning_rate": 2.0067938931297708e-05, "loss": 1.2153, "step": 26000 }, { "epoch": 37.41, "eval_cer": 0.10710911667453944, "eval_loss": 0.24941784143447876, "eval_runtime": 41.3494, "eval_samples_per_second": 11.028, "eval_steps_per_second": 1.378, "eval_wer": 0.33267894895801875, "step": 26000 }, { "epoch": 37.55, "learning_rate": 1.983893129770992e-05, "loss": 1.2071, "step": 26100 }, { "epoch": 37.7, "learning_rate": 1.9609923664122135e-05, "loss": 1.2042, "step": 26200 }, { "epoch": 37.84, "learning_rate": 1.9380916030534352e-05, "loss": 1.2037, "step": 26300 }, { "epoch": 37.98, "learning_rate": 1.9151908396946562e-05, "loss": 1.1962, "step": 26400 }, { "epoch": 38.13, "learning_rate": 1.892290076335878e-05, "loss": 1.1925, "step": 26500 }, { "epoch": 38.13, "eval_cer": 0.1076995748700992, "eval_loss": 0.2518324553966522, "eval_runtime": 40.748, "eval_samples_per_second": 11.191, "eval_steps_per_second": 1.399, "eval_wer": 0.33660525520990636, "step": 26500 }, { "epoch": 38.27, "learning_rate": 1.869389312977099e-05, "loss": 1.1969, "step": 26600 }, { "epoch": 38.42, "learning_rate": 1.8464885496183202e-05, "loss": 1.1947, "step": 26700 }, { "epoch": 38.56, "learning_rate": 1.823587786259542e-05, "loss": 1.2005, "step": 26800 }, { "epoch": 38.7, "learning_rate": 1.8006870229007632e-05, "loss": 1.1961, "step": 26900 }, { "epoch": 38.85, "learning_rate": 1.7777862595419846e-05, "loss": 1.1908, "step": 27000 }, { "epoch": 38.85, "eval_cer": 0.10565265312549205, "eval_loss": 0.24367305636405945, "eval_runtime": 41.2308, "eval_samples_per_second": 11.06, "eval_steps_per_second": 1.382, "eval_wer": 0.3272425249169435, "step": 27000 }, { "epoch": 38.99, "learning_rate": 1.754885496183206e-05, "loss": 1.1762, "step": 27100 }, { "epoch": 39.14, "learning_rate": 1.7319847328244273e-05, "loss": 1.2018, "step": 27200 }, { "epoch": 39.28, "learning_rate": 1.7090839694656486e-05, "loss": 1.1822, "step": 27300 }, { "epoch": 39.42, "learning_rate": 1.68618320610687e-05, "loss": 1.1745, "step": 27400 }, { "epoch": 39.57, "learning_rate": 1.6632824427480913e-05, "loss": 1.1858, "step": 27500 }, { "epoch": 39.57, "eval_cer": 0.10443237285466855, "eval_loss": 0.23960824310779572, "eval_runtime": 42.517, "eval_samples_per_second": 10.725, "eval_steps_per_second": 1.341, "eval_wer": 0.32648746602234974, "step": 27500 }, { "epoch": 39.71, "learning_rate": 1.6403816793893127e-05, "loss": 1.1866, "step": 27600 }, { "epoch": 39.86, "learning_rate": 1.617480916030534e-05, "loss": 1.1878, "step": 27700 }, { "epoch": 40.0, "learning_rate": 1.5945801526717557e-05, "loss": 1.1817, "step": 27800 }, { "epoch": 40.14, "learning_rate": 1.571679389312977e-05, "loss": 1.1851, "step": 27900 }, { "epoch": 40.29, "learning_rate": 1.5487786259541984e-05, "loss": 1.1808, "step": 28000 }, { "epoch": 40.29, "eval_cer": 0.10277908990710125, "eval_loss": 0.2373325228691101, "eval_runtime": 41.3513, "eval_samples_per_second": 11.027, "eval_steps_per_second": 1.378, "eval_wer": 0.31561461794019935, "step": 28000 }, { "epoch": 40.43, "learning_rate": 1.5258778625954197e-05, "loss": 1.1558, "step": 28100 }, { "epoch": 40.57, "learning_rate": 1.502977099236641e-05, "loss": 1.1804, "step": 28200 }, { "epoch": 40.72, "learning_rate": 1.4800763358778624e-05, "loss": 1.1736, "step": 28300 }, { "epoch": 40.86, "learning_rate": 1.4571755725190838e-05, "loss": 1.1782, "step": 28400 }, { "epoch": 41.01, "learning_rate": 1.4342748091603053e-05, "loss": 1.1842, "step": 28500 }, { "epoch": 41.01, "eval_cer": 0.10258227050858132, "eval_loss": 0.23562349379062653, "eval_runtime": 40.492, "eval_samples_per_second": 11.261, "eval_steps_per_second": 1.408, "eval_wer": 0.31516158260344307, "step": 28500 }, { "epoch": 41.15, "learning_rate": 1.4113740458015266e-05, "loss": 1.1595, "step": 28600 }, { "epoch": 41.29, "learning_rate": 1.388473282442748e-05, "loss": 1.1527, "step": 28700 }, { "epoch": 41.44, "learning_rate": 1.3655725190839693e-05, "loss": 1.1517, "step": 28800 }, { "epoch": 41.58, "learning_rate": 1.3426717557251907e-05, "loss": 1.1609, "step": 28900 }, { "epoch": 41.73, "learning_rate": 1.3197709923664122e-05, "loss": 1.1668, "step": 29000 }, { "epoch": 41.73, "eval_cer": 0.10246417886946937, "eval_loss": 0.23187227547168732, "eval_runtime": 40.5813, "eval_samples_per_second": 11.237, "eval_steps_per_second": 1.405, "eval_wer": 0.3187858652974932, "step": 29000 }, { "epoch": 41.87, "learning_rate": 1.2968702290076335e-05, "loss": 1.1536, "step": 29100 }, { "epoch": 42.01, "learning_rate": 1.2739694656488549e-05, "loss": 1.1649, "step": 29200 }, { "epoch": 42.16, "learning_rate": 1.2510687022900762e-05, "loss": 1.1459, "step": 29300 }, { "epoch": 42.3, "learning_rate": 1.2281679389312975e-05, "loss": 1.1495, "step": 29400 }, { "epoch": 42.45, "learning_rate": 1.205267175572519e-05, "loss": 1.1448, "step": 29500 }, { "epoch": 42.45, "eval_cer": 0.09947252401196661, "eval_loss": 0.2292834371328354, "eval_runtime": 41.732, "eval_samples_per_second": 10.927, "eval_steps_per_second": 1.366, "eval_wer": 0.3098761703412866, "step": 29500 }, { "epoch": 42.59, "learning_rate": 1.1823664122137404e-05, "loss": 1.1408, "step": 29600 }, { "epoch": 42.73, "learning_rate": 1.1594656488549618e-05, "loss": 1.1458, "step": 29700 }, { "epoch": 42.88, "learning_rate": 1.1365648854961831e-05, "loss": 1.1358, "step": 29800 }, { "epoch": 43.02, "learning_rate": 1.1136641221374044e-05, "loss": 1.1519, "step": 29900 }, { "epoch": 43.17, "learning_rate": 1.0909923664122137e-05, "loss": 1.1327, "step": 30000 }, { "epoch": 43.17, "eval_cer": 0.09793733270351125, "eval_loss": 0.2265164852142334, "eval_runtime": 40.9338, "eval_samples_per_second": 11.14, "eval_steps_per_second": 1.392, "eval_wer": 0.3047417698580489, "step": 30000 }, { "epoch": 43.31, "learning_rate": 1.068091603053435e-05, "loss": 1.1322, "step": 30100 }, { "epoch": 43.45, "learning_rate": 1.0451908396946564e-05, "loss": 1.1392, "step": 30200 }, { "epoch": 43.6, "learning_rate": 1.0222900763358777e-05, "loss": 1.1318, "step": 30300 }, { "epoch": 43.74, "learning_rate": 9.99389312977099e-06, "loss": 1.1321, "step": 30400 }, { "epoch": 43.88, "learning_rate": 9.764885496183206e-06, "loss": 1.1307, "step": 30500 }, { "epoch": 43.88, "eval_cer": 0.09888206581640686, "eval_loss": 0.22221311926841736, "eval_runtime": 40.9097, "eval_samples_per_second": 11.147, "eval_steps_per_second": 1.393, "eval_wer": 0.30776200543642407, "step": 30500 }, { "epoch": 44.03, "learning_rate": 9.53587786259542e-06, "loss": 1.1358, "step": 30600 }, { "epoch": 44.17, "learning_rate": 9.306870229007633e-06, "loss": 1.1342, "step": 30700 }, { "epoch": 44.32, "learning_rate": 9.077862595419846e-06, "loss": 1.1348, "step": 30800 }, { "epoch": 44.46, "learning_rate": 8.84885496183206e-06, "loss": 1.1294, "step": 30900 }, { "epoch": 44.6, "learning_rate": 8.619847328244275e-06, "loss": 1.1419, "step": 31000 }, { "epoch": 44.6, "eval_cer": 0.09813415210203118, "eval_loss": 0.22149430215358734, "eval_runtime": 40.8027, "eval_samples_per_second": 11.176, "eval_steps_per_second": 1.397, "eval_wer": 0.3038356991845364, "step": 31000 }, { "epoch": 44.75, "learning_rate": 8.390839694656488e-06, "loss": 1.1191, "step": 31100 }, { "epoch": 44.89, "learning_rate": 8.161832061068702e-06, "loss": 1.1223, "step": 31200 }, { "epoch": 45.04, "learning_rate": 7.932824427480915e-06, "loss": 1.1393, "step": 31300 }, { "epoch": 45.18, "learning_rate": 7.703816793893129e-06, "loss": 1.1172, "step": 31400 }, { "epoch": 45.32, "learning_rate": 7.474809160305343e-06, "loss": 1.1231, "step": 31500 }, { "epoch": 45.32, "eval_cer": 0.09722878286883956, "eval_loss": 0.2193477302789688, "eval_runtime": 40.6396, "eval_samples_per_second": 11.221, "eval_steps_per_second": 1.403, "eval_wer": 0.3012684989429176, "step": 31500 }, { "epoch": 45.47, "learning_rate": 7.245801526717557e-06, "loss": 1.1289, "step": 31600 }, { "epoch": 45.61, "learning_rate": 7.016793893129771e-06, "loss": 1.1083, "step": 31700 }, { "epoch": 45.75, "learning_rate": 6.787786259541984e-06, "loss": 1.109, "step": 31800 }, { "epoch": 45.9, "learning_rate": 6.558778625954198e-06, "loss": 1.1218, "step": 31900 }, { "epoch": 46.04, "learning_rate": 6.329770992366412e-06, "loss": 1.139, "step": 32000 }, { "epoch": 46.04, "eval_cer": 0.09683514407179972, "eval_loss": 0.2162453532218933, "eval_runtime": 41.1951, "eval_samples_per_second": 11.069, "eval_steps_per_second": 1.384, "eval_wer": 0.30066445182724255, "step": 32000 }, { "epoch": 46.19, "learning_rate": 6.100763358778626e-06, "loss": 1.1155, "step": 32100 }, { "epoch": 46.33, "learning_rate": 5.8717557251908395e-06, "loss": 1.1212, "step": 32200 }, { "epoch": 46.47, "learning_rate": 5.642748091603053e-06, "loss": 1.1149, "step": 32300 }, { "epoch": 46.62, "learning_rate": 5.413740458015267e-06, "loss": 1.1183, "step": 32400 }, { "epoch": 46.76, "learning_rate": 5.184732824427481e-06, "loss": 1.1114, "step": 32500 }, { "epoch": 46.76, "eval_cer": 0.09596913871831207, "eval_loss": 0.2121613770723343, "eval_runtime": 40.6982, "eval_samples_per_second": 11.204, "eval_steps_per_second": 1.401, "eval_wer": 0.2982482633645424, "step": 32500 }, { "epoch": 46.91, "learning_rate": 4.955725190839695e-06, "loss": 1.1091, "step": 32600 }, { "epoch": 47.05, "learning_rate": 4.726717557251908e-06, "loss": 1.1148, "step": 32700 }, { "epoch": 47.19, "learning_rate": 4.497709923664122e-06, "loss": 1.0962, "step": 32800 }, { "epoch": 47.34, "learning_rate": 4.268702290076335e-06, "loss": 1.0984, "step": 32900 }, { "epoch": 47.48, "learning_rate": 4.03969465648855e-06, "loss": 1.111, "step": 33000 }, { "epoch": 47.48, "eval_cer": 0.09482758620689655, "eval_loss": 0.21248506009578705, "eval_runtime": 40.6368, "eval_samples_per_second": 11.221, "eval_steps_per_second": 1.403, "eval_wer": 0.2946239806704923, "step": 33000 }, { "epoch": 47.63, "learning_rate": 3.810687022900763e-06, "loss": 1.1031, "step": 33100 }, { "epoch": 47.77, "learning_rate": 3.581679389312977e-06, "loss": 1.1159, "step": 33200 }, { "epoch": 47.91, "learning_rate": 3.352671755725191e-06, "loss": 1.0905, "step": 33300 }, { "epoch": 48.06, "learning_rate": 3.1236641221374048e-06, "loss": 1.1087, "step": 33400 }, { "epoch": 48.2, "learning_rate": 2.894656488549618e-06, "loss": 1.0982, "step": 33500 }, { "epoch": 48.2, "eval_cer": 0.09533931664304834, "eval_loss": 0.2098563313484192, "eval_runtime": 40.5946, "eval_samples_per_second": 11.233, "eval_steps_per_second": 1.404, "eval_wer": 0.2956810631229236, "step": 33500 }, { "epoch": 48.34, "learning_rate": 2.66793893129771e-06, "loss": 1.0947, "step": 33600 }, { "epoch": 48.49, "learning_rate": 2.4389312977099237e-06, "loss": 1.1102, "step": 33700 }, { "epoch": 48.63, "learning_rate": 2.209923664122137e-06, "loss": 1.0891, "step": 33800 }, { "epoch": 48.78, "learning_rate": 1.980916030534351e-06, "loss": 1.0937, "step": 33900 }, { "epoch": 48.92, "learning_rate": 1.7519083969465647e-06, "loss": 1.109, "step": 34000 }, { "epoch": 48.92, "eval_cer": 0.09545740828216029, "eval_loss": 0.20918497443199158, "eval_runtime": 40.877, "eval_samples_per_second": 11.155, "eval_steps_per_second": 1.394, "eval_wer": 0.29553005134400484, "step": 34000 }, { "epoch": 49.06, "learning_rate": 1.5229007633587786e-06, "loss": 1.097, "step": 34100 }, { "epoch": 49.21, "learning_rate": 1.2938931297709922e-06, "loss": 1.0909, "step": 34200 }, { "epoch": 49.35, "learning_rate": 1.0648854961832059e-06, "loss": 1.1008, "step": 34300 }, { "epoch": 49.5, "learning_rate": 8.358778625954198e-07, "loss": 1.0904, "step": 34400 }, { "epoch": 49.64, "learning_rate": 6.068702290076335e-07, "loss": 1.0905, "step": 34500 }, { "epoch": 49.64, "eval_cer": 0.09526058888364038, "eval_loss": 0.20883652567863464, "eval_runtime": 40.602, "eval_samples_per_second": 11.231, "eval_steps_per_second": 1.404, "eval_wer": 0.2953790395650861, "step": 34500 }, { "epoch": 49.78, "learning_rate": 3.778625954198473e-07, "loss": 1.0961, "step": 34600 }, { "epoch": 49.93, "learning_rate": 1.4885496183206107e-07, "loss": 1.095, "step": 34700 }, { "epoch": 50.0, "step": 34750, "total_flos": 2.8392187465644065e+20, "train_loss": 2.2316733406121783, "train_runtime": 114311.9751, "train_samples_per_second": 9.737, "train_steps_per_second": 0.304 } ], "max_steps": 34750, "num_train_epochs": 50, "total_flos": 2.8392187465644065e+20, "trial_name": null, "trial_params": null }