{ "best_metric": null, "best_model_checkpoint": null, "epoch": 49.999556933983165, "global_step": 28200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.18, "learning_rate": 3.7125e-06, "loss": 9.6392, "step": 100 }, { "epoch": 0.35, "learning_rate": 7.4625e-06, "loss": 5.3472, "step": 200 }, { "epoch": 0.53, "learning_rate": 1.1212499999999998e-05, "loss": 3.7052, "step": 300 }, { "epoch": 0.71, "learning_rate": 1.49625e-05, "loss": 3.35, "step": 400 }, { "epoch": 0.89, "learning_rate": 1.8712499999999997e-05, "loss": 3.1484, "step": 500 }, { "epoch": 0.89, "eval_loss": 3.084352731704712, "eval_runtime": 31.0524, "eval_samples_per_second": 5.732, "eval_steps_per_second": 1.932, "eval_wer": 1.0, "step": 500 }, { "epoch": 1.06, "learning_rate": 2.2462499999999997e-05, "loss": 3.0531, "step": 600 }, { "epoch": 1.24, "learning_rate": 2.6212499999999997e-05, "loss": 2.9911, "step": 700 }, { "epoch": 1.42, "learning_rate": 2.99625e-05, "loss": 2.9576, "step": 800 }, { "epoch": 1.6, "learning_rate": 3.37125e-05, "loss": 2.9406, "step": 900 }, { "epoch": 1.77, "learning_rate": 3.7462499999999996e-05, "loss": 2.6539, "step": 1000 }, { "epoch": 1.77, "eval_loss": 1.7271525859832764, "eval_runtime": 30.9027, "eval_samples_per_second": 5.76, "eval_steps_per_second": 1.942, "eval_wer": 0.9358414511252939, "step": 1000 }, { "epoch": 1.95, "learning_rate": 4.12125e-05, "loss": 1.5429, "step": 1100 }, { "epoch": 2.13, "learning_rate": 4.4962499999999995e-05, "loss": 1.0875, "step": 1200 }, { "epoch": 2.3, "learning_rate": 4.871249999999999e-05, "loss": 0.9728, "step": 1300 }, { "epoch": 2.48, "learning_rate": 5.2424999999999994e-05, "loss": 0.9117, "step": 1400 }, { "epoch": 2.66, "learning_rate": 5.6175e-05, "loss": 0.8732, "step": 1500 }, { "epoch": 2.66, "eval_loss": 0.19752363860607147, "eval_runtime": 31.0609, "eval_samples_per_second": 5.731, "eval_steps_per_second": 1.932, "eval_wer": 0.16090023513604298, "step": 1500 }, { "epoch": 2.84, "learning_rate": 5.9925e-05, "loss": 0.8518, "step": 1600 }, { "epoch": 3.01, "learning_rate": 6.367499999999999e-05, "loss": 0.8373, "step": 1700 }, { "epoch": 3.19, "learning_rate": 6.7425e-05, "loss": 0.7958, "step": 1800 }, { "epoch": 3.37, "learning_rate": 7.1175e-05, "loss": 0.8058, "step": 1900 }, { "epoch": 3.55, "learning_rate": 7.492499999999999e-05, "loss": 0.8075, "step": 2000 }, { "epoch": 3.55, "eval_loss": 0.1483038365840912, "eval_runtime": 31.0609, "eval_samples_per_second": 5.731, "eval_steps_per_second": 1.932, "eval_wer": 0.1467920725562647, "step": 2000 }, { "epoch": 3.72, "learning_rate": 7.471946564885497e-05, "loss": 0.785, "step": 2100 }, { "epoch": 3.9, "learning_rate": 7.443320610687022e-05, "loss": 0.7656, "step": 2200 }, { "epoch": 4.08, "learning_rate": 7.414694656488549e-05, "loss": 0.7681, "step": 2300 }, { "epoch": 4.26, "learning_rate": 7.386068702290076e-05, "loss": 0.7535, "step": 2400 }, { "epoch": 4.43, "learning_rate": 7.357442748091603e-05, "loss": 0.7358, "step": 2500 }, { "epoch": 4.43, "eval_loss": 0.1331482231616974, "eval_runtime": 30.9431, "eval_samples_per_second": 5.752, "eval_steps_per_second": 1.939, "eval_wer": 0.14007389989922742, "step": 2500 }, { "epoch": 4.61, "learning_rate": 7.328816793893128e-05, "loss": 0.7478, "step": 2600 }, { "epoch": 4.79, "learning_rate": 7.300190839694656e-05, "loss": 0.746, "step": 2700 }, { "epoch": 4.96, "learning_rate": 7.271564885496182e-05, "loss": 0.7545, "step": 2800 }, { "epoch": 5.14, "learning_rate": 7.242938931297709e-05, "loss": 0.7268, "step": 2900 }, { "epoch": 5.32, "learning_rate": 7.214312977099236e-05, "loss": 0.7079, "step": 3000 }, { "epoch": 5.32, "eval_loss": 0.12731623649597168, "eval_runtime": 30.9985, "eval_samples_per_second": 5.742, "eval_steps_per_second": 1.936, "eval_wer": 0.1363789049378569, "step": 3000 }, { "epoch": 5.5, "learning_rate": 7.185687022900763e-05, "loss": 0.7344, "step": 3100 }, { "epoch": 5.67, "learning_rate": 7.15706106870229e-05, "loss": 0.7334, "step": 3200 }, { "epoch": 5.85, "learning_rate": 7.128435114503815e-05, "loss": 0.7274, "step": 3300 }, { "epoch": 6.03, "learning_rate": 7.099809160305343e-05, "loss": 0.7271, "step": 3400 }, { "epoch": 6.21, "learning_rate": 7.071469465648854e-05, "loss": 0.7032, "step": 3500 }, { "epoch": 6.21, "eval_loss": 0.11333052814006805, "eval_runtime": 31.1935, "eval_samples_per_second": 5.706, "eval_steps_per_second": 1.923, "eval_wer": 0.12395028552233793, "step": 3500 }, { "epoch": 6.38, "learning_rate": 7.04284351145038e-05, "loss": 0.6852, "step": 3600 }, { "epoch": 6.56, "learning_rate": 7.014217557251907e-05, "loss": 0.699, "step": 3700 }, { "epoch": 6.74, "learning_rate": 6.985591603053435e-05, "loss": 0.6894, "step": 3800 }, { "epoch": 6.91, "learning_rate": 6.956965648854962e-05, "loss": 0.7014, "step": 3900 }, { "epoch": 7.09, "learning_rate": 6.928339694656487e-05, "loss": 0.7129, "step": 4000 }, { "epoch": 7.09, "eval_loss": 0.11242285370826721, "eval_runtime": 31.0672, "eval_samples_per_second": 5.73, "eval_steps_per_second": 1.931, "eval_wer": 0.1289889150151159, "step": 4000 }, { "epoch": 7.27, "learning_rate": 6.899713740458015e-05, "loss": 0.6902, "step": 4100 }, { "epoch": 7.45, "learning_rate": 6.871087786259541e-05, "loss": 0.6898, "step": 4200 }, { "epoch": 7.62, "learning_rate": 6.842461832061069e-05, "loss": 0.6785, "step": 4300 }, { "epoch": 7.8, "learning_rate": 6.813835877862594e-05, "loss": 0.7024, "step": 4400 }, { "epoch": 7.98, "learning_rate": 6.785209923664122e-05, "loss": 0.6771, "step": 4500 }, { "epoch": 7.98, "eval_loss": 0.11212227493524551, "eval_runtime": 31.0691, "eval_samples_per_second": 5.729, "eval_steps_per_second": 1.931, "eval_wer": 0.12999664091367147, "step": 4500 }, { "epoch": 8.16, "learning_rate": 6.756583969465648e-05, "loss": 0.6555, "step": 4600 }, { "epoch": 8.33, "learning_rate": 6.727958015267174e-05, "loss": 0.6897, "step": 4700 }, { "epoch": 8.51, "learning_rate": 6.699618320610687e-05, "loss": 0.6809, "step": 4800 }, { "epoch": 8.69, "learning_rate": 6.670992366412213e-05, "loss": 0.6821, "step": 4900 }, { "epoch": 8.86, "learning_rate": 6.642366412213741e-05, "loss": 0.6859, "step": 5000 }, { "epoch": 8.86, "eval_loss": 0.109534852206707, "eval_runtime": 31.3176, "eval_samples_per_second": 5.684, "eval_steps_per_second": 1.916, "eval_wer": 0.13134027544507895, "step": 5000 }, { "epoch": 9.04, "learning_rate": 6.613740458015266e-05, "loss": 0.6849, "step": 5100 }, { "epoch": 9.22, "learning_rate": 6.585114503816793e-05, "loss": 0.6615, "step": 5200 }, { "epoch": 9.4, "learning_rate": 6.55648854961832e-05, "loss": 0.6587, "step": 5300 }, { "epoch": 9.57, "learning_rate": 6.527862595419846e-05, "loss": 0.6741, "step": 5400 }, { "epoch": 9.75, "learning_rate": 6.499236641221373e-05, "loss": 0.6496, "step": 5500 }, { "epoch": 9.75, "eval_loss": 0.1090548112988472, "eval_runtime": 31.0434, "eval_samples_per_second": 5.734, "eval_steps_per_second": 1.933, "eval_wer": 0.12495801142089352, "step": 5500 }, { "epoch": 9.93, "learning_rate": 6.4706106870229e-05, "loss": 0.6672, "step": 5600 }, { "epoch": 10.11, "learning_rate": 6.441984732824428e-05, "loss": 0.6511, "step": 5700 }, { "epoch": 10.28, "learning_rate": 6.413645038167938e-05, "loss": 0.6393, "step": 5800 }, { "epoch": 10.46, "learning_rate": 6.385019083969465e-05, "loss": 0.6531, "step": 5900 }, { "epoch": 10.64, "learning_rate": 6.356393129770992e-05, "loss": 0.6431, "step": 6000 }, { "epoch": 10.64, "eval_loss": 0.11021342128515244, "eval_runtime": 30.9854, "eval_samples_per_second": 5.745, "eval_steps_per_second": 1.936, "eval_wer": 0.12932482364796774, "step": 6000 }, { "epoch": 10.82, "learning_rate": 6.32776717557252e-05, "loss": 0.6476, "step": 6100 }, { "epoch": 10.99, "learning_rate": 6.299141221374044e-05, "loss": 0.6504, "step": 6200 }, { "epoch": 11.17, "learning_rate": 6.270515267175572e-05, "loss": 0.6366, "step": 6300 }, { "epoch": 11.35, "learning_rate": 6.241889312977098e-05, "loss": 0.6435, "step": 6400 }, { "epoch": 11.52, "learning_rate": 6.213263358778625e-05, "loss": 0.6422, "step": 6500 }, { "epoch": 11.52, "eval_loss": 0.11072035878896713, "eval_runtime": 31.126, "eval_samples_per_second": 5.719, "eval_steps_per_second": 1.928, "eval_wer": 0.11790393013100436, "step": 6500 }, { "epoch": 11.7, "learning_rate": 6.184637404580152e-05, "loss": 0.636, "step": 6600 }, { "epoch": 11.88, "learning_rate": 6.156011450381679e-05, "loss": 0.647, "step": 6700 }, { "epoch": 12.06, "learning_rate": 6.127385496183207e-05, "loss": 0.644, "step": 6800 }, { "epoch": 12.23, "learning_rate": 6.098759541984732e-05, "loss": 0.611, "step": 6900 }, { "epoch": 12.41, "learning_rate": 6.070133587786259e-05, "loss": 0.6334, "step": 7000 }, { "epoch": 12.41, "eval_loss": 0.10494451969861984, "eval_runtime": 31.1005, "eval_samples_per_second": 5.723, "eval_steps_per_second": 1.929, "eval_wer": 0.12361437688948607, "step": 7000 }, { "epoch": 12.59, "learning_rate": 6.0415076335877855e-05, "loss": 0.6305, "step": 7100 }, { "epoch": 12.77, "learning_rate": 6.0128816793893126e-05, "loss": 0.6292, "step": 7200 }, { "epoch": 12.94, "learning_rate": 5.984255725190839e-05, "loss": 0.6341, "step": 7300 }, { "epoch": 13.12, "learning_rate": 5.955629770992366e-05, "loss": 0.625, "step": 7400 }, { "epoch": 13.3, "learning_rate": 5.927003816793893e-05, "loss": 0.599, "step": 7500 }, { "epoch": 13.3, "eval_loss": 0.10921534150838852, "eval_runtime": 31.0896, "eval_samples_per_second": 5.725, "eval_steps_per_second": 1.93, "eval_wer": 0.11521666106818945, "step": 7500 }, { "epoch": 13.47, "learning_rate": 5.898377862595419e-05, "loss": 0.6144, "step": 7600 }, { "epoch": 13.65, "learning_rate": 5.869751908396946e-05, "loss": 0.6314, "step": 7700 }, { "epoch": 13.83, "learning_rate": 5.8411259541984726e-05, "loss": 0.6185, "step": 7800 }, { "epoch": 14.01, "learning_rate": 5.8124999999999997e-05, "loss": 0.6122, "step": 7900 }, { "epoch": 14.18, "learning_rate": 5.783874045801526e-05, "loss": 0.6205, "step": 8000 }, { "epoch": 14.18, "eval_loss": 0.1046827957034111, "eval_runtime": 31.2229, "eval_samples_per_second": 5.701, "eval_steps_per_second": 1.922, "eval_wer": 0.12193483372522673, "step": 8000 }, { "epoch": 14.36, "learning_rate": 5.755248091603053e-05, "loss": 0.612, "step": 8100 }, { "epoch": 14.54, "learning_rate": 5.7266221374045794e-05, "loss": 0.6196, "step": 8200 }, { "epoch": 14.72, "learning_rate": 5.6979961832061064e-05, "loss": 0.6114, "step": 8300 }, { "epoch": 14.89, "learning_rate": 5.6693702290076334e-05, "loss": 0.6005, "step": 8400 }, { "epoch": 15.07, "learning_rate": 5.64074427480916e-05, "loss": 0.5944, "step": 8500 }, { "epoch": 15.07, "eval_loss": 0.10684490948915482, "eval_runtime": 31.2028, "eval_samples_per_second": 5.705, "eval_steps_per_second": 1.923, "eval_wer": 0.12025529056096741, "step": 8500 }, { "epoch": 15.25, "learning_rate": 5.612118320610687e-05, "loss": 0.6108, "step": 8600 }, { "epoch": 15.43, "learning_rate": 5.583492366412213e-05, "loss": 0.5995, "step": 8700 }, { "epoch": 15.6, "learning_rate": 5.55486641221374e-05, "loss": 0.6117, "step": 8800 }, { "epoch": 15.78, "learning_rate": 5.5262404580152665e-05, "loss": 0.6006, "step": 8900 }, { "epoch": 15.96, "learning_rate": 5.4976145038167935e-05, "loss": 0.6102, "step": 9000 }, { "epoch": 15.96, "eval_loss": 0.10560546070337296, "eval_runtime": 31.2837, "eval_samples_per_second": 5.69, "eval_steps_per_second": 1.918, "eval_wer": 0.11588847833389318, "step": 9000 }, { "epoch": 16.13, "learning_rate": 5.46898854961832e-05, "loss": 0.5968, "step": 9100 }, { "epoch": 16.31, "learning_rate": 5.440362595419847e-05, "loss": 0.5912, "step": 9200 }, { "epoch": 16.49, "learning_rate": 5.411736641221374e-05, "loss": 0.5889, "step": 9300 }, { "epoch": 16.67, "learning_rate": 5.3831106870229e-05, "loss": 0.6014, "step": 9400 }, { "epoch": 16.84, "learning_rate": 5.354484732824427e-05, "loss": 0.5983, "step": 9500 }, { "epoch": 16.84, "eval_loss": 0.1061149537563324, "eval_runtime": 31.4987, "eval_samples_per_second": 5.651, "eval_steps_per_second": 1.905, "eval_wer": 0.11521666106818945, "step": 9500 }, { "epoch": 17.02, "learning_rate": 5.3258587786259536e-05, "loss": 0.5992, "step": 9600 }, { "epoch": 17.2, "learning_rate": 5.2972328244274806e-05, "loss": 0.5887, "step": 9700 }, { "epoch": 17.38, "learning_rate": 5.268606870229007e-05, "loss": 0.6012, "step": 9800 }, { "epoch": 17.55, "learning_rate": 5.239980916030534e-05, "loss": 0.5985, "step": 9900 }, { "epoch": 17.73, "learning_rate": 5.211354961832061e-05, "loss": 0.5882, "step": 10000 }, { "epoch": 17.73, "eval_loss": 0.10430345684289932, "eval_runtime": 30.988, "eval_samples_per_second": 5.744, "eval_steps_per_second": 1.936, "eval_wer": 0.11353711790393013, "step": 10000 }, { "epoch": 17.91, "learning_rate": 5.182729007633587e-05, "loss": 0.5906, "step": 10100 }, { "epoch": 18.09, "learning_rate": 5.1541030534351143e-05, "loss": 0.5843, "step": 10200 }, { "epoch": 18.26, "learning_rate": 5.1257633587786254e-05, "loss": 0.5976, "step": 10300 }, { "epoch": 18.44, "learning_rate": 5.0971374045801525e-05, "loss": 0.59, "step": 10400 }, { "epoch": 18.62, "learning_rate": 5.068511450381679e-05, "loss": 0.5876, "step": 10500 }, { "epoch": 18.62, "eval_loss": 0.10231117904186249, "eval_runtime": 31.0041, "eval_samples_per_second": 5.741, "eval_steps_per_second": 1.935, "eval_wer": 0.11588847833389318, "step": 10500 }, { "epoch": 18.79, "learning_rate": 5.039885496183206e-05, "loss": 0.5845, "step": 10600 }, { "epoch": 18.97, "learning_rate": 5.011259541984732e-05, "loss": 0.5812, "step": 10700 }, { "epoch": 19.15, "learning_rate": 4.982633587786259e-05, "loss": 0.5695, "step": 10800 }, { "epoch": 19.33, "learning_rate": 4.954007633587786e-05, "loss": 0.5665, "step": 10900 }, { "epoch": 19.5, "learning_rate": 4.9253816793893125e-05, "loss": 0.5717, "step": 11000 }, { "epoch": 19.5, "eval_loss": 0.10367337614297867, "eval_runtime": 31.2366, "eval_samples_per_second": 5.698, "eval_steps_per_second": 1.921, "eval_wer": 0.1232784682566342, "step": 11000 }, { "epoch": 19.68, "learning_rate": 4.8967557251908396e-05, "loss": 0.5657, "step": 11100 }, { "epoch": 19.86, "learning_rate": 4.868129770992366e-05, "loss": 0.5733, "step": 11200 }, { "epoch": 20.04, "learning_rate": 4.839503816793893e-05, "loss": 0.5816, "step": 11300 }, { "epoch": 20.21, "learning_rate": 4.810877862595419e-05, "loss": 0.5694, "step": 11400 }, { "epoch": 20.39, "learning_rate": 4.782251908396946e-05, "loss": 0.5537, "step": 11500 }, { "epoch": 20.39, "eval_loss": 0.10704291611909866, "eval_runtime": 31.0837, "eval_samples_per_second": 5.726, "eval_steps_per_second": 1.93, "eval_wer": 0.11924756466241182, "step": 11500 }, { "epoch": 20.57, "learning_rate": 4.7536259541984726e-05, "loss": 0.5726, "step": 11600 }, { "epoch": 20.74, "learning_rate": 4.7249999999999997e-05, "loss": 0.5742, "step": 11700 }, { "epoch": 20.92, "learning_rate": 4.696374045801527e-05, "loss": 0.563, "step": 11800 }, { "epoch": 21.1, "learning_rate": 4.667748091603053e-05, "loss": 0.5655, "step": 11900 }, { "epoch": 21.28, "learning_rate": 4.63912213740458e-05, "loss": 0.5636, "step": 12000 }, { "epoch": 21.28, "eval_loss": 0.10363561660051346, "eval_runtime": 31.1987, "eval_samples_per_second": 5.705, "eval_steps_per_second": 1.923, "eval_wer": 0.11689620423244877, "step": 12000 }, { "epoch": 21.45, "learning_rate": 4.6104961832061064e-05, "loss": 0.5612, "step": 12100 }, { "epoch": 21.63, "learning_rate": 4.5818702290076334e-05, "loss": 0.5732, "step": 12200 }, { "epoch": 21.81, "learning_rate": 4.55324427480916e-05, "loss": 0.5726, "step": 12300 }, { "epoch": 21.99, "learning_rate": 4.524618320610687e-05, "loss": 0.5547, "step": 12400 }, { "epoch": 22.16, "learning_rate": 4.495992366412213e-05, "loss": 0.5536, "step": 12500 }, { "epoch": 22.16, "eval_loss": 0.10082551091909409, "eval_runtime": 31.121, "eval_samples_per_second": 5.72, "eval_steps_per_second": 1.928, "eval_wer": 0.11823983876385623, "step": 12500 }, { "epoch": 22.34, "learning_rate": 4.46736641221374e-05, "loss": 0.5509, "step": 12600 }, { "epoch": 22.52, "learning_rate": 4.439026717557252e-05, "loss": 0.5645, "step": 12700 }, { "epoch": 22.69, "learning_rate": 4.410400763358778e-05, "loss": 0.5527, "step": 12800 }, { "epoch": 22.87, "learning_rate": 4.381774809160305e-05, "loss": 0.547, "step": 12900 }, { "epoch": 23.05, "learning_rate": 4.3534351145038163e-05, "loss": 0.5656, "step": 13000 }, { "epoch": 23.05, "eval_loss": 0.10101909935474396, "eval_runtime": 30.9319, "eval_samples_per_second": 5.755, "eval_steps_per_second": 1.94, "eval_wer": 0.11723211286530064, "step": 13000 }, { "epoch": 23.23, "learning_rate": 4.3248091603053434e-05, "loss": 0.5483, "step": 13100 }, { "epoch": 23.4, "learning_rate": 4.29618320610687e-05, "loss": 0.5501, "step": 13200 }, { "epoch": 23.58, "learning_rate": 4.267557251908397e-05, "loss": 0.5429, "step": 13300 }, { "epoch": 23.76, "learning_rate": 4.238931297709923e-05, "loss": 0.5455, "step": 13400 }, { "epoch": 23.94, "learning_rate": 4.21030534351145e-05, "loss": 0.5504, "step": 13500 }, { "epoch": 23.94, "eval_loss": 0.10192937403917313, "eval_runtime": 31.0113, "eval_samples_per_second": 5.74, "eval_steps_per_second": 1.935, "eval_wer": 0.11051394020826336, "step": 13500 }, { "epoch": 24.11, "learning_rate": 4.181679389312977e-05, "loss": 0.5601, "step": 13600 }, { "epoch": 24.29, "learning_rate": 4.1530534351145035e-05, "loss": 0.5419, "step": 13700 }, { "epoch": 24.47, "learning_rate": 4.1244274809160305e-05, "loss": 0.5389, "step": 13800 }, { "epoch": 24.65, "learning_rate": 4.095801526717557e-05, "loss": 0.5572, "step": 13900 }, { "epoch": 24.82, "learning_rate": 4.067175572519084e-05, "loss": 0.5476, "step": 14000 }, { "epoch": 24.82, "eval_loss": 0.10260963439941406, "eval_runtime": 31.1565, "eval_samples_per_second": 5.713, "eval_steps_per_second": 1.926, "eval_wer": 0.11656029559959691, "step": 14000 }, { "epoch": 25.0, "learning_rate": 4.03854961832061e-05, "loss": 0.5554, "step": 14100 }, { "epoch": 25.18, "learning_rate": 4.009923664122137e-05, "loss": 0.537, "step": 14200 }, { "epoch": 25.35, "learning_rate": 3.9812977099236635e-05, "loss": 0.5346, "step": 14300 }, { "epoch": 25.53, "learning_rate": 3.9526717557251906e-05, "loss": 0.5245, "step": 14400 }, { "epoch": 25.71, "learning_rate": 3.9240458015267176e-05, "loss": 0.5375, "step": 14500 }, { "epoch": 25.71, "eval_loss": 0.1107296496629715, "eval_runtime": 30.9057, "eval_samples_per_second": 5.759, "eval_steps_per_second": 1.941, "eval_wer": 0.11891165602955996, "step": 14500 }, { "epoch": 25.89, "learning_rate": 3.895419847328244e-05, "loss": 0.5437, "step": 14600 }, { "epoch": 26.06, "learning_rate": 3.866793893129771e-05, "loss": 0.5284, "step": 14700 }, { "epoch": 26.24, "learning_rate": 3.838167938931297e-05, "loss": 0.5276, "step": 14800 }, { "epoch": 26.42, "learning_rate": 3.809541984732824e-05, "loss": 0.5441, "step": 14900 }, { "epoch": 26.6, "learning_rate": 3.7809160305343507e-05, "loss": 0.5318, "step": 15000 }, { "epoch": 26.6, "eval_loss": 0.10514429956674576, "eval_runtime": 31.3716, "eval_samples_per_second": 5.674, "eval_steps_per_second": 1.913, "eval_wer": 0.11420893516963386, "step": 15000 }, { "epoch": 26.77, "learning_rate": 3.752290076335878e-05, "loss": 0.5343, "step": 15100 }, { "epoch": 26.95, "learning_rate": 3.723664122137404e-05, "loss": 0.5394, "step": 15200 }, { "epoch": 27.13, "learning_rate": 3.695038167938931e-05, "loss": 0.5352, "step": 15300 }, { "epoch": 27.3, "learning_rate": 3.6664122137404574e-05, "loss": 0.5398, "step": 15400 }, { "epoch": 27.48, "learning_rate": 3.6377862595419844e-05, "loss": 0.5278, "step": 15500 }, { "epoch": 27.48, "eval_loss": 0.10493182390928268, "eval_runtime": 31.3849, "eval_samples_per_second": 5.672, "eval_steps_per_second": 1.912, "eval_wer": 0.11656029559959691, "step": 15500 }, { "epoch": 27.66, "learning_rate": 3.609160305343511e-05, "loss": 0.538, "step": 15600 }, { "epoch": 27.84, "learning_rate": 3.580534351145038e-05, "loss": 0.5143, "step": 15700 }, { "epoch": 28.01, "learning_rate": 3.551908396946565e-05, "loss": 0.5229, "step": 15800 }, { "epoch": 28.19, "learning_rate": 3.523282442748091e-05, "loss": 0.522, "step": 15900 }, { "epoch": 28.37, "learning_rate": 3.494656488549618e-05, "loss": 0.5204, "step": 16000 }, { "epoch": 28.37, "eval_loss": 0.10806475579738617, "eval_runtime": 31.4114, "eval_samples_per_second": 5.667, "eval_steps_per_second": 1.91, "eval_wer": 0.11823983876385623, "step": 16000 }, { "epoch": 28.55, "learning_rate": 3.4660305343511445e-05, "loss": 0.5322, "step": 16100 }, { "epoch": 28.72, "learning_rate": 3.4374045801526715e-05, "loss": 0.5151, "step": 16200 }, { "epoch": 28.9, "learning_rate": 3.408778625954198e-05, "loss": 0.5393, "step": 16300 }, { "epoch": 29.08, "learning_rate": 3.380152671755725e-05, "loss": 0.513, "step": 16400 }, { "epoch": 29.26, "learning_rate": 3.351526717557251e-05, "loss": 0.512, "step": 16500 }, { "epoch": 29.26, "eval_loss": 0.10623880475759506, "eval_runtime": 31.0174, "eval_samples_per_second": 5.739, "eval_steps_per_second": 1.934, "eval_wer": 0.11555256970104132, "step": 16500 }, { "epoch": 29.43, "learning_rate": 3.322900763358778e-05, "loss": 0.5258, "step": 16600 }, { "epoch": 29.61, "learning_rate": 3.294274809160305e-05, "loss": 0.5366, "step": 16700 }, { "epoch": 29.79, "learning_rate": 3.2656488549618316e-05, "loss": 0.515, "step": 16800 }, { "epoch": 29.96, "learning_rate": 3.2370229007633586e-05, "loss": 0.5246, "step": 16900 }, { "epoch": 30.14, "learning_rate": 3.208396946564885e-05, "loss": 0.5082, "step": 17000 }, { "epoch": 30.14, "eval_loss": 0.10452543944120407, "eval_runtime": 30.9393, "eval_samples_per_second": 5.753, "eval_steps_per_second": 1.939, "eval_wer": 0.11353711790393013, "step": 17000 }, { "epoch": 30.32, "learning_rate": 3.179770992366412e-05, "loss": 0.5029, "step": 17100 }, { "epoch": 30.5, "learning_rate": 3.151431297709923e-05, "loss": 0.5219, "step": 17200 }, { "epoch": 30.67, "learning_rate": 3.12280534351145e-05, "loss": 0.5235, "step": 17300 }, { "epoch": 30.85, "learning_rate": 3.0941793893129764e-05, "loss": 0.5141, "step": 17400 }, { "epoch": 31.03, "learning_rate": 3.0655534351145035e-05, "loss": 0.5193, "step": 17500 }, { "epoch": 31.03, "eval_loss": 0.10911107808351517, "eval_runtime": 30.9608, "eval_samples_per_second": 5.749, "eval_steps_per_second": 1.938, "eval_wer": 0.11454484380248572, "step": 17500 }, { "epoch": 31.21, "learning_rate": 3.0369274809160305e-05, "loss": 0.5038, "step": 17600 }, { "epoch": 31.38, "learning_rate": 3.008301526717557e-05, "loss": 0.5086, "step": 17700 }, { "epoch": 31.56, "learning_rate": 2.979675572519084e-05, "loss": 0.5001, "step": 17800 }, { "epoch": 31.74, "learning_rate": 2.9510496183206105e-05, "loss": 0.5206, "step": 17900 }, { "epoch": 31.91, "learning_rate": 2.9224236641221372e-05, "loss": 0.5129, "step": 18000 }, { "epoch": 31.91, "eval_loss": 0.10398419201374054, "eval_runtime": 30.9924, "eval_samples_per_second": 5.743, "eval_steps_per_second": 1.936, "eval_wer": 0.10883439704400404, "step": 18000 }, { "epoch": 32.09, "learning_rate": 2.893797709923664e-05, "loss": 0.5105, "step": 18100 }, { "epoch": 32.27, "learning_rate": 2.8651717557251906e-05, "loss": 0.5062, "step": 18200 }, { "epoch": 32.45, "learning_rate": 2.8365458015267172e-05, "loss": 0.5021, "step": 18300 }, { "epoch": 32.62, "learning_rate": 2.807919847328244e-05, "loss": 0.5122, "step": 18400 }, { "epoch": 32.8, "learning_rate": 2.779293893129771e-05, "loss": 0.5126, "step": 18500 }, { "epoch": 32.8, "eval_loss": 0.10847991704940796, "eval_runtime": 30.8721, "eval_samples_per_second": 5.766, "eval_steps_per_second": 1.944, "eval_wer": 0.11689620423244877, "step": 18500 }, { "epoch": 32.98, "learning_rate": 2.7506679389312976e-05, "loss": 0.5179, "step": 18600 }, { "epoch": 33.16, "learning_rate": 2.7220419847328243e-05, "loss": 0.5049, "step": 18700 }, { "epoch": 33.33, "learning_rate": 2.693416030534351e-05, "loss": 0.5173, "step": 18800 }, { "epoch": 33.51, "learning_rate": 2.6647900763358777e-05, "loss": 0.5115, "step": 18900 }, { "epoch": 33.69, "learning_rate": 2.6361641221374043e-05, "loss": 0.496, "step": 19000 }, { "epoch": 33.69, "eval_loss": 0.10698471963405609, "eval_runtime": 31.005, "eval_samples_per_second": 5.741, "eval_steps_per_second": 1.935, "eval_wer": 0.11656029559959691, "step": 19000 }, { "epoch": 33.86, "learning_rate": 2.607538167938931e-05, "loss": 0.4938, "step": 19100 }, { "epoch": 34.04, "learning_rate": 2.5789122137404577e-05, "loss": 0.5128, "step": 19200 }, { "epoch": 34.22, "learning_rate": 2.550572519083969e-05, "loss": 0.497, "step": 19300 }, { "epoch": 34.4, "learning_rate": 2.521946564885496e-05, "loss": 0.4879, "step": 19400 }, { "epoch": 34.57, "learning_rate": 2.493320610687023e-05, "loss": 0.5017, "step": 19500 }, { "epoch": 34.57, "eval_loss": 0.11190272867679596, "eval_runtime": 30.8238, "eval_samples_per_second": 5.775, "eval_steps_per_second": 1.947, "eval_wer": 0.11622438696674504, "step": 19500 }, { "epoch": 34.75, "learning_rate": 2.4646946564885495e-05, "loss": 0.4994, "step": 19600 }, { "epoch": 34.93, "learning_rate": 2.436354961832061e-05, "loss": 0.4973, "step": 19700 }, { "epoch": 35.11, "learning_rate": 2.4077290076335876e-05, "loss": 0.5012, "step": 19800 }, { "epoch": 35.28, "learning_rate": 2.3791030534351143e-05, "loss": 0.4889, "step": 19900 }, { "epoch": 35.46, "learning_rate": 2.350477099236641e-05, "loss": 0.4808, "step": 20000 }, { "epoch": 35.46, "eval_loss": 0.11005562543869019, "eval_runtime": 31.0411, "eval_samples_per_second": 5.734, "eval_steps_per_second": 1.933, "eval_wer": 0.113873026536782, "step": 20000 }, { "epoch": 35.64, "learning_rate": 2.3218511450381677e-05, "loss": 0.4984, "step": 20100 }, { "epoch": 35.82, "learning_rate": 2.2932251908396944e-05, "loss": 0.4929, "step": 20200 }, { "epoch": 35.99, "learning_rate": 2.2645992366412214e-05, "loss": 0.4861, "step": 20300 }, { "epoch": 36.17, "learning_rate": 2.235973282442748e-05, "loss": 0.506, "step": 20400 }, { "epoch": 36.35, "learning_rate": 2.2073473282442747e-05, "loss": 0.4939, "step": 20500 }, { "epoch": 36.35, "eval_loss": 0.10813739150762558, "eval_runtime": 31.0724, "eval_samples_per_second": 5.729, "eval_steps_per_second": 1.931, "eval_wer": 0.11252939200537454, "step": 20500 }, { "epoch": 36.52, "learning_rate": 2.1787213740458014e-05, "loss": 0.4789, "step": 20600 }, { "epoch": 36.7, "learning_rate": 2.150095419847328e-05, "loss": 0.4904, "step": 20700 }, { "epoch": 36.88, "learning_rate": 2.1214694656488548e-05, "loss": 0.4916, "step": 20800 }, { "epoch": 37.06, "learning_rate": 2.0928435114503815e-05, "loss": 0.487, "step": 20900 }, { "epoch": 37.23, "learning_rate": 2.064217557251908e-05, "loss": 0.4738, "step": 21000 }, { "epoch": 37.23, "eval_loss": 0.10911141335964203, "eval_runtime": 31.0688, "eval_samples_per_second": 5.729, "eval_steps_per_second": 1.931, "eval_wer": 0.10984212294255963, "step": 21000 }, { "epoch": 37.41, "learning_rate": 2.0355916030534352e-05, "loss": 0.4757, "step": 21100 }, { "epoch": 37.59, "learning_rate": 2.006965648854962e-05, "loss": 0.4752, "step": 21200 }, { "epoch": 37.77, "learning_rate": 1.9783396946564885e-05, "loss": 0.4939, "step": 21300 }, { "epoch": 37.94, "learning_rate": 1.9497137404580152e-05, "loss": 0.4673, "step": 21400 }, { "epoch": 38.12, "learning_rate": 1.921087786259542e-05, "loss": 0.4978, "step": 21500 }, { "epoch": 38.12, "eval_loss": 0.10570317506790161, "eval_runtime": 31.0697, "eval_samples_per_second": 5.729, "eval_steps_per_second": 1.931, "eval_wer": 0.1091703056768559, "step": 21500 }, { "epoch": 38.3, "learning_rate": 1.8924618320610686e-05, "loss": 0.4829, "step": 21600 }, { "epoch": 38.47, "learning_rate": 1.8638358778625953e-05, "loss": 0.4857, "step": 21700 }, { "epoch": 38.65, "learning_rate": 1.835209923664122e-05, "loss": 0.4618, "step": 21800 }, { "epoch": 38.83, "learning_rate": 1.8065839694656486e-05, "loss": 0.4831, "step": 21900 }, { "epoch": 39.01, "learning_rate": 1.77824427480916e-05, "loss": 0.4972, "step": 22000 }, { "epoch": 39.01, "eval_loss": 0.10742757469415665, "eval_runtime": 31.1659, "eval_samples_per_second": 5.711, "eval_steps_per_second": 1.925, "eval_wer": 0.11051394020826336, "step": 22000 }, { "epoch": 39.18, "learning_rate": 1.7496183206106867e-05, "loss": 0.4974, "step": 22100 }, { "epoch": 39.36, "learning_rate": 1.7209923664122138e-05, "loss": 0.4778, "step": 22200 }, { "epoch": 39.54, "learning_rate": 1.6923664122137404e-05, "loss": 0.4863, "step": 22300 }, { "epoch": 39.72, "learning_rate": 1.663740458015267e-05, "loss": 0.4815, "step": 22400 }, { "epoch": 39.89, "learning_rate": 1.6351145038167938e-05, "loss": 0.4773, "step": 22500 }, { "epoch": 39.89, "eval_loss": 0.10622948408126831, "eval_runtime": 31.5999, "eval_samples_per_second": 5.633, "eval_steps_per_second": 1.899, "eval_wer": 0.11084984884111522, "step": 22500 }, { "epoch": 40.07, "learning_rate": 1.6064885496183205e-05, "loss": 0.4808, "step": 22600 }, { "epoch": 40.25, "learning_rate": 1.577862595419847e-05, "loss": 0.4804, "step": 22700 }, { "epoch": 40.43, "learning_rate": 1.549236641221374e-05, "loss": 0.4796, "step": 22800 }, { "epoch": 40.6, "learning_rate": 1.5206106870229005e-05, "loss": 0.4866, "step": 22900 }, { "epoch": 40.78, "learning_rate": 1.4919847328244272e-05, "loss": 0.4741, "step": 23000 }, { "epoch": 40.78, "eval_loss": 0.10567349940538406, "eval_runtime": 31.2551, "eval_samples_per_second": 5.695, "eval_steps_per_second": 1.92, "eval_wer": 0.10849848841115217, "step": 23000 }, { "epoch": 40.96, "learning_rate": 1.4633587786259542e-05, "loss": 0.4976, "step": 23100 }, { "epoch": 41.13, "learning_rate": 1.4347328244274809e-05, "loss": 0.4811, "step": 23200 }, { "epoch": 41.31, "learning_rate": 1.4061068702290076e-05, "loss": 0.4904, "step": 23300 }, { "epoch": 41.49, "learning_rate": 1.3774809160305341e-05, "loss": 0.4658, "step": 23400 }, { "epoch": 41.67, "learning_rate": 1.3488549618320608e-05, "loss": 0.4776, "step": 23500 }, { "epoch": 41.67, "eval_loss": 0.10771991312503815, "eval_runtime": 31.4029, "eval_samples_per_second": 5.668, "eval_steps_per_second": 1.911, "eval_wer": 0.10849848841115217, "step": 23500 }, { "epoch": 41.84, "learning_rate": 1.3202290076335878e-05, "loss": 0.4745, "step": 23600 }, { "epoch": 42.02, "learning_rate": 1.2916030534351145e-05, "loss": 0.4746, "step": 23700 }, { "epoch": 42.2, "learning_rate": 1.262977099236641e-05, "loss": 0.4858, "step": 23800 }, { "epoch": 42.38, "learning_rate": 1.2343511450381677e-05, "loss": 0.4687, "step": 23900 }, { "epoch": 42.55, "learning_rate": 1.2057251908396947e-05, "loss": 0.4637, "step": 24000 }, { "epoch": 42.55, "eval_loss": 0.10607700049877167, "eval_runtime": 31.045, "eval_samples_per_second": 5.734, "eval_steps_per_second": 1.933, "eval_wer": 0.10950621430970776, "step": 24000 }, { "epoch": 42.73, "learning_rate": 1.1770992366412214e-05, "loss": 0.4462, "step": 24100 }, { "epoch": 42.91, "learning_rate": 1.1484732824427479e-05, "loss": 0.4815, "step": 24200 }, { "epoch": 43.09, "learning_rate": 1.1198473282442746e-05, "loss": 0.461, "step": 24300 }, { "epoch": 43.26, "learning_rate": 1.0912213740458016e-05, "loss": 0.4594, "step": 24400 }, { "epoch": 43.44, "learning_rate": 1.0625954198473283e-05, "loss": 0.4853, "step": 24500 }, { "epoch": 43.44, "eval_loss": 0.10806521028280258, "eval_runtime": 31.4451, "eval_samples_per_second": 5.661, "eval_steps_per_second": 1.908, "eval_wer": 0.10749076251259658, "step": 24500 }, { "epoch": 43.62, "learning_rate": 1.033969465648855e-05, "loss": 0.4747, "step": 24600 }, { "epoch": 43.79, "learning_rate": 1.0053435114503815e-05, "loss": 0.4649, "step": 24700 }, { "epoch": 43.97, "learning_rate": 9.767175572519081e-06, "loss": 0.471, "step": 24800 }, { "epoch": 44.15, "learning_rate": 9.480916030534352e-06, "loss": 0.453, "step": 24900 }, { "epoch": 44.33, "learning_rate": 9.194656488549618e-06, "loss": 0.4602, "step": 25000 }, { "epoch": 44.33, "eval_loss": 0.1075613871216774, "eval_runtime": 31.217, "eval_samples_per_second": 5.702, "eval_steps_per_second": 1.922, "eval_wer": 0.10849848841115217, "step": 25000 }, { "epoch": 44.5, "learning_rate": 8.908396946564884e-06, "loss": 0.4579, "step": 25100 }, { "epoch": 44.68, "learning_rate": 8.622137404580152e-06, "loss": 0.4752, "step": 25200 }, { "epoch": 44.86, "learning_rate": 8.335877862595419e-06, "loss": 0.4708, "step": 25300 }, { "epoch": 45.04, "learning_rate": 8.049618320610687e-06, "loss": 0.4613, "step": 25400 }, { "epoch": 45.21, "learning_rate": 7.763358778625954e-06, "loss": 0.4667, "step": 25500 }, { "epoch": 45.21, "eval_loss": 0.1077902689576149, "eval_runtime": 31.0572, "eval_samples_per_second": 5.731, "eval_steps_per_second": 1.932, "eval_wer": 0.10782667114544844, "step": 25500 }, { "epoch": 45.39, "learning_rate": 7.477099236641221e-06, "loss": 0.4709, "step": 25600 }, { "epoch": 45.57, "learning_rate": 7.190839694656488e-06, "loss": 0.4523, "step": 25700 }, { "epoch": 45.74, "learning_rate": 6.9045801526717555e-06, "loss": 0.4537, "step": 25800 }, { "epoch": 45.92, "learning_rate": 6.62118320610687e-06, "loss": 0.4642, "step": 25900 }, { "epoch": 46.1, "learning_rate": 6.334923664122137e-06, "loss": 0.4484, "step": 26000 }, { "epoch": 46.1, "eval_loss": 0.10564640909433365, "eval_runtime": 31.5659, "eval_samples_per_second": 5.639, "eval_steps_per_second": 1.901, "eval_wer": 0.10816257977830031, "step": 26000 }, { "epoch": 46.28, "learning_rate": 6.048664122137404e-06, "loss": 0.4505, "step": 26100 }, { "epoch": 46.45, "learning_rate": 5.762404580152671e-06, "loss": 0.4758, "step": 26200 }, { "epoch": 46.63, "learning_rate": 5.476145038167939e-06, "loss": 0.4528, "step": 26300 }, { "epoch": 46.81, "learning_rate": 5.1898854961832056e-06, "loss": 0.4579, "step": 26400 }, { "epoch": 46.99, "learning_rate": 4.903625954198473e-06, "loss": 0.4601, "step": 26500 }, { "epoch": 46.99, "eval_loss": 0.10663167387247086, "eval_runtime": 31.3245, "eval_samples_per_second": 5.682, "eval_steps_per_second": 1.915, "eval_wer": 0.10782667114544844, "step": 26500 }, { "epoch": 47.16, "learning_rate": 4.61736641221374e-06, "loss": 0.4634, "step": 26600 }, { "epoch": 47.34, "learning_rate": 4.331106870229007e-06, "loss": 0.4603, "step": 26700 }, { "epoch": 47.52, "learning_rate": 4.0448473282442745e-06, "loss": 0.4653, "step": 26800 }, { "epoch": 47.69, "learning_rate": 3.7585877862595418e-06, "loss": 0.4619, "step": 26900 }, { "epoch": 47.87, "learning_rate": 3.472328244274809e-06, "loss": 0.4691, "step": 27000 }, { "epoch": 47.87, "eval_loss": 0.10679604858160019, "eval_runtime": 31.1673, "eval_samples_per_second": 5.711, "eval_steps_per_second": 1.925, "eval_wer": 0.10849848841115217, "step": 27000 }, { "epoch": 48.05, "learning_rate": 3.1860687022900762e-06, "loss": 0.4582, "step": 27100 }, { "epoch": 48.23, "learning_rate": 2.8998091603053435e-06, "loss": 0.4439, "step": 27200 }, { "epoch": 48.4, "learning_rate": 2.6135496183206107e-06, "loss": 0.4617, "step": 27300 }, { "epoch": 48.58, "learning_rate": 2.3272900763358775e-06, "loss": 0.4538, "step": 27400 }, { "epoch": 48.76, "learning_rate": 2.0410305343511447e-06, "loss": 0.4457, "step": 27500 }, { "epoch": 48.76, "eval_loss": 0.10663049668073654, "eval_runtime": 31.2143, "eval_samples_per_second": 5.703, "eval_steps_per_second": 1.922, "eval_wer": 0.10782667114544844, "step": 27500 }, { "epoch": 48.94, "learning_rate": 1.7547709923664122e-06, "loss": 0.4766, "step": 27600 }, { "epoch": 49.11, "learning_rate": 1.4685114503816792e-06, "loss": 0.4576, "step": 27700 }, { "epoch": 49.29, "learning_rate": 1.185114503816794e-06, "loss": 0.4616, "step": 27800 }, { "epoch": 49.47, "learning_rate": 8.98854961832061e-07, "loss": 0.4588, "step": 27900 }, { "epoch": 49.65, "learning_rate": 6.125954198473282e-07, "loss": 0.475, "step": 28000 }, { "epoch": 49.65, "eval_loss": 0.1059938296675682, "eval_runtime": 32.0006, "eval_samples_per_second": 5.562, "eval_steps_per_second": 1.875, "eval_wer": 0.10816257977830031, "step": 28000 }, { "epoch": 49.82, "learning_rate": 3.263358778625954e-07, "loss": 0.4622, "step": 28100 }, { "epoch": 50.0, "learning_rate": 4.0076335877862596e-08, "loss": 0.4567, "step": 28200 }, { "epoch": 50.0, "step": 28200, "total_flos": 6.992186583697227e+19, "train_loss": 0.6889870901987062, "train_runtime": 132713.9902, "train_samples_per_second": 2.551, "train_steps_per_second": 0.212 } ], "max_steps": 28200, "num_train_epochs": 50, "total_flos": 6.992186583697227e+19, "trial_name": null, "trial_params": null }