{ "best_metric": null, "best_model_checkpoint": null, "epoch": 50.0, "global_step": 23250, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.22, "learning_rate": 2.8500000000000002e-06, "loss": 7.8132, "step": 100 }, { "epoch": 0.43, "learning_rate": 5.82e-06, "loss": 3.053, "step": 200 }, { "epoch": 0.65, "learning_rate": 8.82e-06, "loss": 2.9223, "step": 300 }, { "epoch": 0.86, "learning_rate": 1.182e-05, "loss": 2.6894, "step": 400 }, { "epoch": 1.08, "learning_rate": 1.482e-05, "loss": 1.6894, "step": 500 }, { "epoch": 1.08, "eval_loss": 1.2422521114349365, "eval_runtime": 81.2578, "eval_samples_per_second": 10.177, "eval_steps_per_second": 1.28, "eval_wer": 0.8619092627599244, "step": 500 }, { "epoch": 1.29, "learning_rate": 1.782e-05, "loss": 1.3281, "step": 600 }, { "epoch": 1.51, "learning_rate": 2.082e-05, "loss": 1.1146, "step": 700 }, { "epoch": 1.72, "learning_rate": 2.3820000000000002e-05, "loss": 0.9295, "step": 800 }, { "epoch": 1.94, "learning_rate": 2.682e-05, "loss": 0.8392, "step": 900 }, { "epoch": 2.15, "learning_rate": 2.982e-05, "loss": 0.7543, "step": 1000 }, { "epoch": 2.15, "eval_loss": 0.5956054329872131, "eval_runtime": 81.5082, "eval_samples_per_second": 10.146, "eval_steps_per_second": 1.276, "eval_wer": 0.38166351606805293, "step": 1000 }, { "epoch": 2.37, "learning_rate": 3.282e-05, "loss": 0.7128, "step": 1100 }, { "epoch": 2.58, "learning_rate": 3.582e-05, "loss": 0.6859, "step": 1200 }, { "epoch": 2.8, "learning_rate": 3.8820000000000004e-05, "loss": 0.6024, "step": 1300 }, { "epoch": 3.01, "learning_rate": 4.1819999999999996e-05, "loss": 0.6365, "step": 1400 }, { "epoch": 3.23, "learning_rate": 4.482e-05, "loss": 0.5481, "step": 1500 }, { "epoch": 3.23, "eval_loss": 0.5042839646339417, "eval_runtime": 83.6557, "eval_samples_per_second": 9.886, "eval_steps_per_second": 1.243, "eval_wer": 0.32457466918714556, "step": 1500 }, { "epoch": 3.44, "learning_rate": 4.7820000000000006e-05, "loss": 0.5648, "step": 1600 }, { "epoch": 3.66, "learning_rate": 5.082e-05, "loss": 0.555, "step": 1700 }, { "epoch": 3.87, "learning_rate": 5.382e-05, "loss": 0.5723, "step": 1800 }, { "epoch": 4.09, "learning_rate": 5.682e-05, "loss": 0.4716, "step": 1900 }, { "epoch": 4.3, "learning_rate": 5.982e-05, "loss": 0.4661, "step": 2000 }, { "epoch": 4.3, "eval_loss": 0.48127105832099915, "eval_runtime": 81.5918, "eval_samples_per_second": 10.136, "eval_steps_per_second": 1.275, "eval_wer": 0.2793005671077505, "step": 2000 }, { "epoch": 4.52, "learning_rate": 5.973458823529412e-05, "loss": 0.4756, "step": 2100 }, { "epoch": 4.73, "learning_rate": 5.945223529411765e-05, "loss": 0.4022, "step": 2200 }, { "epoch": 4.95, "learning_rate": 5.9169882352941175e-05, "loss": 0.4314, "step": 2300 }, { "epoch": 5.16, "learning_rate": 5.8887529411764706e-05, "loss": 0.4154, "step": 2400 }, { "epoch": 5.38, "learning_rate": 5.8605176470588236e-05, "loss": 0.3901, "step": 2500 }, { "epoch": 5.38, "eval_loss": 0.4370500147342682, "eval_runtime": 81.2387, "eval_samples_per_second": 10.18, "eval_steps_per_second": 1.28, "eval_wer": 0.25916824196597354, "step": 2500 }, { "epoch": 5.59, "learning_rate": 5.8322823529411767e-05, "loss": 0.4014, "step": 2600 }, { "epoch": 5.81, "learning_rate": 5.80404705882353e-05, "loss": 0.3762, "step": 2700 }, { "epoch": 6.02, "learning_rate": 5.775811764705882e-05, "loss": 0.3777, "step": 2800 }, { "epoch": 6.24, "learning_rate": 5.747576470588236e-05, "loss": 0.3342, "step": 2900 }, { "epoch": 6.45, "learning_rate": 5.719341176470588e-05, "loss": 0.3512, "step": 3000 }, { "epoch": 6.45, "eval_loss": 0.4216073155403137, "eval_runtime": 81.8015, "eval_samples_per_second": 10.11, "eval_steps_per_second": 1.271, "eval_wer": 0.24584120982986768, "step": 3000 }, { "epoch": 6.67, "learning_rate": 5.691105882352942e-05, "loss": 0.3204, "step": 3100 }, { "epoch": 6.88, "learning_rate": 5.662870588235294e-05, "loss": 0.3232, "step": 3200 }, { "epoch": 7.1, "learning_rate": 5.634635294117647e-05, "loss": 0.2976, "step": 3300 }, { "epoch": 7.31, "learning_rate": 5.6064000000000004e-05, "loss": 0.296, "step": 3400 }, { "epoch": 7.53, "learning_rate": 5.578164705882353e-05, "loss": 0.3016, "step": 3500 }, { "epoch": 7.53, "eval_loss": 0.3813554048538208, "eval_runtime": 83.1204, "eval_samples_per_second": 9.949, "eval_steps_per_second": 1.251, "eval_wer": 0.22570888468809075, "step": 3500 }, { "epoch": 7.74, "learning_rate": 5.549929411764706e-05, "loss": 0.2726, "step": 3600 }, { "epoch": 7.96, "learning_rate": 5.521694117647059e-05, "loss": 0.2953, "step": 3700 }, { "epoch": 8.17, "learning_rate": 5.493458823529412e-05, "loss": 0.2647, "step": 3800 }, { "epoch": 8.39, "learning_rate": 5.465223529411765e-05, "loss": 0.2524, "step": 3900 }, { "epoch": 8.6, "learning_rate": 5.436988235294118e-05, "loss": 0.278, "step": 4000 }, { "epoch": 8.6, "eval_loss": 0.41506877541542053, "eval_runtime": 81.9008, "eval_samples_per_second": 10.098, "eval_steps_per_second": 1.27, "eval_wer": 0.21446124763705104, "step": 4000 }, { "epoch": 8.82, "learning_rate": 5.4087529411764704e-05, "loss": 0.2559, "step": 4100 }, { "epoch": 9.03, "learning_rate": 5.380517647058824e-05, "loss": 0.2669, "step": 4200 }, { "epoch": 9.25, "learning_rate": 5.3522823529411765e-05, "loss": 0.2189, "step": 4300 }, { "epoch": 9.46, "learning_rate": 5.3240470588235296e-05, "loss": 0.2321, "step": 4400 }, { "epoch": 9.68, "learning_rate": 5.2958117647058826e-05, "loss": 0.2435, "step": 4500 }, { "epoch": 9.68, "eval_loss": 0.48163077235221863, "eval_runtime": 80.9004, "eval_samples_per_second": 10.222, "eval_steps_per_second": 1.286, "eval_wer": 0.21304347826086956, "step": 4500 }, { "epoch": 9.89, "learning_rate": 5.267576470588235e-05, "loss": 0.2348, "step": 4600 }, { "epoch": 10.11, "learning_rate": 5.239341176470589e-05, "loss": 0.2266, "step": 4700 }, { "epoch": 10.32, "learning_rate": 5.211105882352941e-05, "loss": 0.2345, "step": 4800 }, { "epoch": 10.54, "learning_rate": 5.182870588235294e-05, "loss": 0.2266, "step": 4900 }, { "epoch": 10.75, "learning_rate": 5.154635294117647e-05, "loss": 0.2122, "step": 5000 }, { "epoch": 10.75, "eval_loss": 0.4489321708679199, "eval_runtime": 81.0474, "eval_samples_per_second": 10.204, "eval_steps_per_second": 1.283, "eval_wer": 0.21370510396975426, "step": 5000 }, { "epoch": 10.97, "learning_rate": 5.1264e-05, "loss": 0.2038, "step": 5100 }, { "epoch": 11.18, "learning_rate": 5.098164705882353e-05, "loss": 0.1916, "step": 5200 }, { "epoch": 11.4, "learning_rate": 5.069929411764706e-05, "loss": 0.1896, "step": 5300 }, { "epoch": 11.61, "learning_rate": 5.041694117647059e-05, "loss": 0.207, "step": 5400 }, { "epoch": 11.83, "learning_rate": 5.013458823529412e-05, "loss": 0.1949, "step": 5500 }, { "epoch": 11.83, "eval_loss": 0.3977554738521576, "eval_runtime": 80.708, "eval_samples_per_second": 10.247, "eval_steps_per_second": 1.289, "eval_wer": 0.20633270321361058, "step": 5500 }, { "epoch": 12.04, "learning_rate": 4.985223529411765e-05, "loss": 0.2177, "step": 5600 }, { "epoch": 12.26, "learning_rate": 4.956988235294118e-05, "loss": 0.182, "step": 5700 }, { "epoch": 12.47, "learning_rate": 4.928752941176471e-05, "loss": 0.1809, "step": 5800 }, { "epoch": 12.69, "learning_rate": 4.900517647058823e-05, "loss": 0.1876, "step": 5900 }, { "epoch": 12.9, "learning_rate": 4.872282352941177e-05, "loss": 0.1929, "step": 6000 }, { "epoch": 12.9, "eval_loss": 0.38229823112487793, "eval_runtime": 82.0352, "eval_samples_per_second": 10.081, "eval_steps_per_second": 1.268, "eval_wer": 0.20264650283553876, "step": 6000 }, { "epoch": 13.12, "learning_rate": 4.8440470588235294e-05, "loss": 0.1952, "step": 6100 }, { "epoch": 13.33, "learning_rate": 4.8158117647058825e-05, "loss": 0.1739, "step": 6200 }, { "epoch": 13.55, "learning_rate": 4.7875764705882355e-05, "loss": 0.1748, "step": 6300 }, { "epoch": 13.76, "learning_rate": 4.759341176470588e-05, "loss": 0.1625, "step": 6400 }, { "epoch": 13.98, "learning_rate": 4.7311058823529416e-05, "loss": 0.1757, "step": 6500 }, { "epoch": 13.98, "eval_loss": 0.3409084379673004, "eval_runtime": 80.5311, "eval_samples_per_second": 10.269, "eval_steps_per_second": 1.291, "eval_wer": 0.19650283553875236, "step": 6500 }, { "epoch": 14.19, "learning_rate": 4.702870588235294e-05, "loss": 0.1422, "step": 6600 }, { "epoch": 14.41, "learning_rate": 4.674635294117647e-05, "loss": 0.1571, "step": 6700 }, { "epoch": 14.62, "learning_rate": 4.6464e-05, "loss": 0.1565, "step": 6800 }, { "epoch": 14.84, "learning_rate": 4.618164705882353e-05, "loss": 0.1717, "step": 6900 }, { "epoch": 15.05, "learning_rate": 4.589929411764706e-05, "loss": 0.1771, "step": 7000 }, { "epoch": 15.05, "eval_loss": 0.3844490051269531, "eval_runtime": 81.332, "eval_samples_per_second": 10.168, "eval_steps_per_second": 1.279, "eval_wer": 0.19357277882797733, "step": 7000 }, { "epoch": 15.27, "learning_rate": 4.561694117647059e-05, "loss": 0.1408, "step": 7100 }, { "epoch": 15.48, "learning_rate": 4.5334588235294116e-05, "loss": 0.1512, "step": 7200 }, { "epoch": 15.7, "learning_rate": 4.505223529411765e-05, "loss": 0.1505, "step": 7300 }, { "epoch": 15.91, "learning_rate": 4.476988235294118e-05, "loss": 0.1436, "step": 7400 }, { "epoch": 16.13, "learning_rate": 4.448752941176471e-05, "loss": 0.1452, "step": 7500 }, { "epoch": 16.13, "eval_loss": 0.37490135431289673, "eval_runtime": 81.4843, "eval_samples_per_second": 10.149, "eval_steps_per_second": 1.276, "eval_wer": 0.1899810964083176, "step": 7500 }, { "epoch": 16.34, "learning_rate": 4.420517647058824e-05, "loss": 0.1462, "step": 7600 }, { "epoch": 16.56, "learning_rate": 4.392282352941176e-05, "loss": 0.1425, "step": 7700 }, { "epoch": 16.77, "learning_rate": 4.36404705882353e-05, "loss": 0.144, "step": 7800 }, { "epoch": 16.99, "learning_rate": 4.335811764705882e-05, "loss": 0.1622, "step": 7900 }, { "epoch": 17.2, "learning_rate": 4.3075764705882354e-05, "loss": 0.1341, "step": 8000 }, { "epoch": 17.2, "eval_loss": 0.44071856141090393, "eval_runtime": 82.1207, "eval_samples_per_second": 10.071, "eval_steps_per_second": 1.266, "eval_wer": 0.20264650283553876, "step": 8000 }, { "epoch": 17.42, "learning_rate": 4.2793411764705884e-05, "loss": 0.1151, "step": 8100 }, { "epoch": 17.63, "learning_rate": 4.251105882352941e-05, "loss": 0.1477, "step": 8200 }, { "epoch": 17.85, "learning_rate": 4.2228705882352945e-05, "loss": 0.1377, "step": 8300 }, { "epoch": 18.06, "learning_rate": 4.194635294117647e-05, "loss": 0.129, "step": 8400 }, { "epoch": 18.28, "learning_rate": 4.1664e-05, "loss": 0.13, "step": 8500 }, { "epoch": 18.28, "eval_loss": 0.42531654238700867, "eval_runtime": 82.3744, "eval_samples_per_second": 10.04, "eval_steps_per_second": 1.263, "eval_wer": 0.1882797731568998, "step": 8500 }, { "epoch": 18.49, "learning_rate": 4.138164705882353e-05, "loss": 0.1353, "step": 8600 }, { "epoch": 18.71, "learning_rate": 4.109929411764706e-05, "loss": 0.1216, "step": 8700 }, { "epoch": 18.92, "learning_rate": 4.081694117647059e-05, "loss": 0.1301, "step": 8800 }, { "epoch": 19.14, "learning_rate": 4.053458823529412e-05, "loss": 0.1195, "step": 8900 }, { "epoch": 19.35, "learning_rate": 4.0252235294117645e-05, "loss": 0.1183, "step": 9000 }, { "epoch": 19.35, "eval_loss": 0.43106988072395325, "eval_runtime": 80.8241, "eval_samples_per_second": 10.232, "eval_steps_per_second": 1.287, "eval_wer": 0.18799621928166352, "step": 9000 }, { "epoch": 19.57, "learning_rate": 3.996988235294118e-05, "loss": 0.1147, "step": 9100 }, { "epoch": 19.78, "learning_rate": 3.9687529411764706e-05, "loss": 0.1178, "step": 9200 }, { "epoch": 20.0, "learning_rate": 3.940517647058823e-05, "loss": 0.1177, "step": 9300 }, { "epoch": 20.22, "learning_rate": 3.912282352941177e-05, "loss": 0.1142, "step": 9400 }, { "epoch": 20.43, "learning_rate": 3.884047058823529e-05, "loss": 0.118, "step": 9500 }, { "epoch": 20.43, "eval_loss": 0.4430650472640991, "eval_runtime": 81.2185, "eval_samples_per_second": 10.182, "eval_steps_per_second": 1.28, "eval_wer": 0.18818525519848772, "step": 9500 }, { "epoch": 20.65, "learning_rate": 3.855811764705883e-05, "loss": 0.1125, "step": 9600 }, { "epoch": 20.86, "learning_rate": 3.827576470588235e-05, "loss": 0.1231, "step": 9700 }, { "epoch": 21.08, "learning_rate": 3.799341176470588e-05, "loss": 0.1133, "step": 9800 }, { "epoch": 21.29, "learning_rate": 3.771105882352941e-05, "loss": 0.1152, "step": 9900 }, { "epoch": 21.51, "learning_rate": 3.7428705882352944e-05, "loss": 0.1123, "step": 10000 }, { "epoch": 21.51, "eval_loss": 0.4753414988517761, "eval_runtime": 81.7131, "eval_samples_per_second": 10.121, "eval_steps_per_second": 1.273, "eval_wer": 0.18204158790170133, "step": 10000 }, { "epoch": 21.72, "learning_rate": 3.7146352941176474e-05, "loss": 0.1186, "step": 10100 }, { "epoch": 21.94, "learning_rate": 3.6864e-05, "loss": 0.1113, "step": 10200 }, { "epoch": 22.15, "learning_rate": 3.658164705882353e-05, "loss": 0.1243, "step": 10300 }, { "epoch": 22.37, "learning_rate": 3.629929411764706e-05, "loss": 0.1056, "step": 10400 }, { "epoch": 22.58, "learning_rate": 3.601694117647059e-05, "loss": 0.1037, "step": 10500 }, { "epoch": 22.58, "eval_loss": 0.40868785977363586, "eval_runtime": 80.8904, "eval_samples_per_second": 10.224, "eval_steps_per_second": 1.286, "eval_wer": 0.1833648393194707, "step": 10500 }, { "epoch": 22.8, "learning_rate": 3.573458823529411e-05, "loss": 0.1055, "step": 10600 }, { "epoch": 23.01, "learning_rate": 3.545223529411765e-05, "loss": 0.0977, "step": 10700 }, { "epoch": 23.23, "learning_rate": 3.5169882352941174e-05, "loss": 0.1076, "step": 10800 }, { "epoch": 23.44, "learning_rate": 3.488752941176471e-05, "loss": 0.1028, "step": 10900 }, { "epoch": 23.66, "learning_rate": 3.4605176470588235e-05, "loss": 0.1066, "step": 11000 }, { "epoch": 23.66, "eval_loss": 0.415127694606781, "eval_runtime": 81.0404, "eval_samples_per_second": 10.205, "eval_steps_per_second": 1.283, "eval_wer": 0.18449905482041587, "step": 11000 }, { "epoch": 23.87, "learning_rate": 3.4322823529411766e-05, "loss": 0.1091, "step": 11100 }, { "epoch": 24.09, "learning_rate": 3.4040470588235296e-05, "loss": 0.089, "step": 11200 }, { "epoch": 24.3, "learning_rate": 3.375811764705882e-05, "loss": 0.1014, "step": 11300 }, { "epoch": 24.52, "learning_rate": 3.347576470588236e-05, "loss": 0.0864, "step": 11400 }, { "epoch": 24.73, "learning_rate": 3.319341176470588e-05, "loss": 0.0977, "step": 11500 }, { "epoch": 24.73, "eval_loss": 0.43674391508102417, "eval_runtime": 81.0756, "eval_samples_per_second": 10.2, "eval_steps_per_second": 1.283, "eval_wer": 0.1782608695652174, "step": 11500 }, { "epoch": 24.95, "learning_rate": 3.291105882352941e-05, "loss": 0.0989, "step": 11600 }, { "epoch": 25.16, "learning_rate": 3.262870588235294e-05, "loss": 0.0977, "step": 11700 }, { "epoch": 25.38, "learning_rate": 3.234635294117647e-05, "loss": 0.0869, "step": 11800 }, { "epoch": 25.59, "learning_rate": 3.2064e-05, "loss": 0.0851, "step": 11900 }, { "epoch": 25.81, "learning_rate": 3.1781647058823534e-05, "loss": 0.0968, "step": 12000 }, { "epoch": 25.81, "eval_loss": 0.4236660897731781, "eval_runtime": 80.9504, "eval_samples_per_second": 10.216, "eval_steps_per_second": 1.285, "eval_wer": 0.17561436672967864, "step": 12000 }, { "epoch": 26.02, "learning_rate": 3.149929411764706e-05, "loss": 0.0866, "step": 12100 }, { "epoch": 26.24, "learning_rate": 3.121694117647059e-05, "loss": 0.0774, "step": 12200 }, { "epoch": 26.45, "learning_rate": 3.093458823529412e-05, "loss": 0.0974, "step": 12300 }, { "epoch": 26.67, "learning_rate": 3.065223529411764e-05, "loss": 0.0807, "step": 12400 }, { "epoch": 26.88, "learning_rate": 3.0372705882352942e-05, "loss": 0.0835, "step": 12500 }, { "epoch": 26.88, "eval_loss": 0.4728855490684509, "eval_runtime": 81.9776, "eval_samples_per_second": 10.088, "eval_steps_per_second": 1.269, "eval_wer": 0.17807183364839319, "step": 12500 }, { "epoch": 27.1, "learning_rate": 3.009035294117647e-05, "loss": 0.0888, "step": 12600 }, { "epoch": 27.31, "learning_rate": 2.9808000000000003e-05, "loss": 0.0845, "step": 12700 }, { "epoch": 27.53, "learning_rate": 2.952564705882353e-05, "loss": 0.0888, "step": 12800 }, { "epoch": 27.74, "learning_rate": 2.9243294117647058e-05, "loss": 0.0816, "step": 12900 }, { "epoch": 27.96, "learning_rate": 2.8960941176470588e-05, "loss": 0.0919, "step": 13000 }, { "epoch": 27.96, "eval_loss": 0.4152912497520447, "eval_runtime": 81.9103, "eval_samples_per_second": 10.096, "eval_steps_per_second": 1.27, "eval_wer": 0.17013232514177692, "step": 13000 }, { "epoch": 28.17, "learning_rate": 2.867858823529412e-05, "loss": 0.0886, "step": 13100 }, { "epoch": 28.39, "learning_rate": 2.8396235294117646e-05, "loss": 0.0809, "step": 13200 }, { "epoch": 28.6, "learning_rate": 2.8113882352941176e-05, "loss": 0.0786, "step": 13300 }, { "epoch": 28.82, "learning_rate": 2.7831529411764707e-05, "loss": 0.0766, "step": 13400 }, { "epoch": 29.03, "learning_rate": 2.7552e-05, "loss": 0.0677, "step": 13500 }, { "epoch": 29.03, "eval_loss": 0.431657612323761, "eval_runtime": 81.4905, "eval_samples_per_second": 10.148, "eval_steps_per_second": 1.276, "eval_wer": 0.16928166351606805, "step": 13500 }, { "epoch": 29.25, "learning_rate": 2.726964705882353e-05, "loss": 0.0757, "step": 13600 }, { "epoch": 29.46, "learning_rate": 2.698729411764706e-05, "loss": 0.0719, "step": 13700 }, { "epoch": 29.68, "learning_rate": 2.670494117647059e-05, "loss": 0.0772, "step": 13800 }, { "epoch": 29.89, "learning_rate": 2.642258823529412e-05, "loss": 0.0762, "step": 13900 }, { "epoch": 30.11, "learning_rate": 2.614023529411765e-05, "loss": 0.0726, "step": 14000 }, { "epoch": 30.11, "eval_loss": 0.43801796436309814, "eval_runtime": 83.6487, "eval_samples_per_second": 9.887, "eval_steps_per_second": 1.243, "eval_wer": 0.17362948960302457, "step": 14000 }, { "epoch": 30.32, "learning_rate": 2.5857882352941176e-05, "loss": 0.0654, "step": 14100 }, { "epoch": 30.54, "learning_rate": 2.5575529411764707e-05, "loss": 0.0691, "step": 14200 }, { "epoch": 30.75, "learning_rate": 2.5293176470588234e-05, "loss": 0.0677, "step": 14300 }, { "epoch": 30.97, "learning_rate": 2.5010823529411764e-05, "loss": 0.0668, "step": 14400 }, { "epoch": 31.18, "learning_rate": 2.4728470588235295e-05, "loss": 0.066, "step": 14500 }, { "epoch": 31.18, "eval_loss": 0.4384245276451111, "eval_runtime": 81.1759, "eval_samples_per_second": 10.188, "eval_steps_per_second": 1.281, "eval_wer": 0.16814744801512288, "step": 14500 }, { "epoch": 31.4, "learning_rate": 2.4446117647058825e-05, "loss": 0.0663, "step": 14600 }, { "epoch": 31.61, "learning_rate": 2.4163764705882352e-05, "loss": 0.0688, "step": 14700 }, { "epoch": 31.83, "learning_rate": 2.3881411764705883e-05, "loss": 0.0677, "step": 14800 }, { "epoch": 32.04, "learning_rate": 2.3599058823529414e-05, "loss": 0.0718, "step": 14900 }, { "epoch": 32.26, "learning_rate": 2.3316705882352944e-05, "loss": 0.0713, "step": 15000 }, { "epoch": 32.26, "eval_loss": 0.4215048551559448, "eval_runtime": 80.5465, "eval_samples_per_second": 10.267, "eval_steps_per_second": 1.291, "eval_wer": 0.16294896030245748, "step": 15000 }, { "epoch": 32.47, "learning_rate": 2.303435294117647e-05, "loss": 0.065, "step": 15100 }, { "epoch": 32.69, "learning_rate": 2.2752e-05, "loss": 0.0631, "step": 15200 }, { "epoch": 32.9, "learning_rate": 2.246964705882353e-05, "loss": 0.0616, "step": 15300 }, { "epoch": 33.12, "learning_rate": 2.218729411764706e-05, "loss": 0.0812, "step": 15400 }, { "epoch": 33.33, "learning_rate": 2.190494117647059e-05, "loss": 0.0605, "step": 15500 }, { "epoch": 33.33, "eval_loss": 0.457427054643631, "eval_runtime": 80.6078, "eval_samples_per_second": 10.26, "eval_steps_per_second": 1.29, "eval_wer": 0.1713610586011342, "step": 15500 }, { "epoch": 33.55, "learning_rate": 2.1622588235294117e-05, "loss": 0.0566, "step": 15600 }, { "epoch": 33.76, "learning_rate": 2.1340235294117648e-05, "loss": 0.064, "step": 15700 }, { "epoch": 33.98, "learning_rate": 2.1057882352941178e-05, "loss": 0.0591, "step": 15800 }, { "epoch": 34.19, "learning_rate": 2.077552941176471e-05, "loss": 0.0581, "step": 15900 }, { "epoch": 34.41, "learning_rate": 2.0493176470588236e-05, "loss": 0.0632, "step": 16000 }, { "epoch": 34.41, "eval_loss": 0.43431583046913147, "eval_runtime": 81.6162, "eval_samples_per_second": 10.133, "eval_steps_per_second": 1.274, "eval_wer": 0.16417769376181474, "step": 16000 }, { "epoch": 34.62, "learning_rate": 2.0210823529411763e-05, "loss": 0.0578, "step": 16100 }, { "epoch": 34.84, "learning_rate": 1.9928470588235293e-05, "loss": 0.0616, "step": 16200 }, { "epoch": 35.05, "learning_rate": 1.9648941176470586e-05, "loss": 0.0516, "step": 16300 }, { "epoch": 35.27, "learning_rate": 1.9366588235294117e-05, "loss": 0.0533, "step": 16400 }, { "epoch": 35.48, "learning_rate": 1.9084235294117647e-05, "loss": 0.0567, "step": 16500 }, { "epoch": 35.48, "eval_loss": 0.42308300733566284, "eval_runtime": 82.0264, "eval_samples_per_second": 10.082, "eval_steps_per_second": 1.268, "eval_wer": 0.16011342155009453, "step": 16500 }, { "epoch": 35.7, "learning_rate": 1.8801882352941178e-05, "loss": 0.0562, "step": 16600 }, { "epoch": 35.91, "learning_rate": 1.8519529411764705e-05, "loss": 0.0554, "step": 16700 }, { "epoch": 36.13, "learning_rate": 1.8237176470588236e-05, "loss": 0.0564, "step": 16800 }, { "epoch": 36.34, "learning_rate": 1.7954823529411766e-05, "loss": 0.0617, "step": 16900 }, { "epoch": 36.56, "learning_rate": 1.7672470588235297e-05, "loss": 0.0556, "step": 17000 }, { "epoch": 36.56, "eval_loss": 0.4404306411743164, "eval_runtime": 81.4874, "eval_samples_per_second": 10.149, "eval_steps_per_second": 1.276, "eval_wer": 0.1667296786389414, "step": 17000 }, { "epoch": 36.77, "learning_rate": 1.7390117647058824e-05, "loss": 0.0524, "step": 17100 }, { "epoch": 36.99, "learning_rate": 1.7107764705882354e-05, "loss": 0.0602, "step": 17200 }, { "epoch": 37.2, "learning_rate": 1.682541176470588e-05, "loss": 0.0472, "step": 17300 }, { "epoch": 37.42, "learning_rate": 1.6543058823529412e-05, "loss": 0.047, "step": 17400 }, { "epoch": 37.63, "learning_rate": 1.6260705882352943e-05, "loss": 0.0426, "step": 17500 }, { "epoch": 37.63, "eval_loss": 0.4458593428134918, "eval_runtime": 80.8169, "eval_samples_per_second": 10.233, "eval_steps_per_second": 1.287, "eval_wer": 0.16247637051039698, "step": 17500 }, { "epoch": 37.85, "learning_rate": 1.597835294117647e-05, "loss": 0.0513, "step": 17600 }, { "epoch": 38.06, "learning_rate": 1.5696e-05, "loss": 0.056, "step": 17700 }, { "epoch": 38.28, "learning_rate": 1.541364705882353e-05, "loss": 0.0498, "step": 17800 }, { "epoch": 38.49, "learning_rate": 1.513129411764706e-05, "loss": 0.0473, "step": 17900 }, { "epoch": 38.71, "learning_rate": 1.4848941176470588e-05, "loss": 0.0445, "step": 18000 }, { "epoch": 38.71, "eval_loss": 0.4483908712863922, "eval_runtime": 82.388, "eval_samples_per_second": 10.038, "eval_steps_per_second": 1.262, "eval_wer": 0.16285444234404536, "step": 18000 }, { "epoch": 38.92, "learning_rate": 1.4566588235294117e-05, "loss": 0.0487, "step": 18100 }, { "epoch": 39.14, "learning_rate": 1.4284235294117648e-05, "loss": 0.0491, "step": 18200 }, { "epoch": 39.35, "learning_rate": 1.4001882352941177e-05, "loss": 0.044, "step": 18300 }, { "epoch": 39.57, "learning_rate": 1.3722352941176471e-05, "loss": 0.0467, "step": 18400 }, { "epoch": 39.78, "learning_rate": 1.344e-05, "loss": 0.0463, "step": 18500 }, { "epoch": 39.78, "eval_loss": 0.45076683163642883, "eval_runtime": 81.2197, "eval_samples_per_second": 10.182, "eval_steps_per_second": 1.28, "eval_wer": 0.15964083175803404, "step": 18500 }, { "epoch": 40.0, "learning_rate": 1.315764705882353e-05, "loss": 0.0464, "step": 18600 }, { "epoch": 40.22, "learning_rate": 1.2875294117647058e-05, "loss": 0.0395, "step": 18700 }, { "epoch": 40.43, "learning_rate": 1.2592941176470588e-05, "loss": 0.0421, "step": 18800 }, { "epoch": 40.65, "learning_rate": 1.2310588235294119e-05, "loss": 0.0463, "step": 18900 }, { "epoch": 40.86, "learning_rate": 1.2028235294117648e-05, "loss": 0.0448, "step": 19000 }, { "epoch": 40.86, "eval_loss": 0.4395386278629303, "eval_runtime": 82.035, "eval_samples_per_second": 10.081, "eval_steps_per_second": 1.268, "eval_wer": 0.1604914933837429, "step": 19000 }, { "epoch": 41.08, "learning_rate": 1.1745882352941178e-05, "loss": 0.0483, "step": 19100 }, { "epoch": 41.29, "learning_rate": 1.1463529411764705e-05, "loss": 0.0457, "step": 19200 }, { "epoch": 41.51, "learning_rate": 1.1181176470588236e-05, "loss": 0.0363, "step": 19300 }, { "epoch": 41.72, "learning_rate": 1.0898823529411765e-05, "loss": 0.0428, "step": 19400 }, { "epoch": 41.94, "learning_rate": 1.0616470588235295e-05, "loss": 0.0434, "step": 19500 }, { "epoch": 41.94, "eval_loss": 0.4489993453025818, "eval_runtime": 82.599, "eval_samples_per_second": 10.012, "eval_steps_per_second": 1.259, "eval_wer": 0.16068052930056712, "step": 19500 }, { "epoch": 42.15, "learning_rate": 1.0334117647058824e-05, "loss": 0.0389, "step": 19600 }, { "epoch": 42.37, "learning_rate": 1.0051764705882353e-05, "loss": 0.0482, "step": 19700 }, { "epoch": 42.58, "learning_rate": 9.769411764705882e-06, "loss": 0.0467, "step": 19800 }, { "epoch": 42.8, "learning_rate": 9.487058823529412e-06, "loss": 0.0379, "step": 19900 }, { "epoch": 43.01, "learning_rate": 9.204705882352941e-06, "loss": 0.0347, "step": 20000 }, { "epoch": 43.01, "eval_loss": 0.47717225551605225, "eval_runtime": 81.2937, "eval_samples_per_second": 10.173, "eval_steps_per_second": 1.279, "eval_wer": 0.15822306238185255, "step": 20000 }, { "epoch": 43.23, "learning_rate": 8.922352941176471e-06, "loss": 0.0327, "step": 20100 }, { "epoch": 43.44, "learning_rate": 8.64e-06, "loss": 0.0407, "step": 20200 }, { "epoch": 43.66, "learning_rate": 8.357647058823529e-06, "loss": 0.0368, "step": 20300 }, { "epoch": 43.87, "learning_rate": 8.07529411764706e-06, "loss": 0.0365, "step": 20400 }, { "epoch": 44.09, "learning_rate": 7.792941176470588e-06, "loss": 0.0332, "step": 20500 }, { "epoch": 44.09, "eval_loss": 0.4728728234767914, "eval_runtime": 81.4631, "eval_samples_per_second": 10.152, "eval_steps_per_second": 1.277, "eval_wer": 0.15822306238185255, "step": 20500 }, { "epoch": 44.3, "learning_rate": 7.510588235294118e-06, "loss": 0.0353, "step": 20600 }, { "epoch": 44.52, "learning_rate": 7.228235294117648e-06, "loss": 0.039, "step": 20700 }, { "epoch": 44.73, "learning_rate": 6.945882352941177e-06, "loss": 0.0395, "step": 20800 }, { "epoch": 44.95, "learning_rate": 6.663529411764706e-06, "loss": 0.0368, "step": 20900 }, { "epoch": 45.16, "learning_rate": 6.381176470588236e-06, "loss": 0.037, "step": 21000 }, { "epoch": 45.16, "eval_loss": 0.45591220259666443, "eval_runtime": 83.1729, "eval_samples_per_second": 9.943, "eval_steps_per_second": 1.25, "eval_wer": 0.15727788279773156, "step": 21000 }, { "epoch": 45.38, "learning_rate": 6.098823529411765e-06, "loss": 0.0328, "step": 21100 }, { "epoch": 45.59, "learning_rate": 5.8164705882352945e-06, "loss": 0.0541, "step": 21200 }, { "epoch": 45.81, "learning_rate": 5.534117647058823e-06, "loss": 0.0413, "step": 21300 }, { "epoch": 46.02, "learning_rate": 5.251764705882353e-06, "loss": 0.0403, "step": 21400 }, { "epoch": 46.24, "learning_rate": 4.969411764705883e-06, "loss": 0.0328, "step": 21500 }, { "epoch": 46.24, "eval_loss": 0.46641021966934204, "eval_runtime": 82.6563, "eval_samples_per_second": 10.005, "eval_steps_per_second": 1.258, "eval_wer": 0.1560491493383743, "step": 21500 }, { "epoch": 46.45, "learning_rate": 4.6870588235294115e-06, "loss": 0.0334, "step": 21600 }, { "epoch": 46.67, "learning_rate": 4.404705882352941e-06, "loss": 0.0334, "step": 21700 }, { "epoch": 46.88, "learning_rate": 4.122352941176471e-06, "loss": 0.0324, "step": 21800 }, { "epoch": 47.1, "learning_rate": 3.8400000000000005e-06, "loss": 0.0337, "step": 21900 }, { "epoch": 47.31, "learning_rate": 3.5576470588235293e-06, "loss": 0.0366, "step": 22000 }, { "epoch": 47.31, "eval_loss": 0.454330176115036, "eval_runtime": 81.2727, "eval_samples_per_second": 10.176, "eval_steps_per_second": 1.28, "eval_wer": 0.15434782608695652, "step": 22000 }, { "epoch": 47.53, "learning_rate": 3.275294117647059e-06, "loss": 0.0332, "step": 22100 }, { "epoch": 47.74, "learning_rate": 2.9929411764705883e-06, "loss": 0.0295, "step": 22200 }, { "epoch": 47.96, "learning_rate": 2.710588235294118e-06, "loss": 0.037, "step": 22300 }, { "epoch": 48.17, "learning_rate": 2.428235294117647e-06, "loss": 0.0314, "step": 22400 }, { "epoch": 48.39, "learning_rate": 2.1458823529411764e-06, "loss": 0.0377, "step": 22500 }, { "epoch": 48.39, "eval_loss": 0.45068359375, "eval_runtime": 80.8954, "eval_samples_per_second": 10.223, "eval_steps_per_second": 1.286, "eval_wer": 0.15595463137996218, "step": 22500 }, { "epoch": 48.6, "learning_rate": 1.8635294117647059e-06, "loss": 0.0284, "step": 22600 }, { "epoch": 48.82, "learning_rate": 1.5811764705882354e-06, "loss": 0.0326, "step": 22700 }, { "epoch": 49.03, "learning_rate": 1.2988235294117648e-06, "loss": 0.034, "step": 22800 }, { "epoch": 49.25, "learning_rate": 1.016470588235294e-06, "loss": 0.0319, "step": 22900 }, { "epoch": 49.46, "learning_rate": 7.341176470588236e-07, "loss": 0.0331, "step": 23000 }, { "epoch": 49.46, "eval_loss": 0.4567229151725769, "eval_runtime": 80.8744, "eval_samples_per_second": 10.226, "eval_steps_per_second": 1.286, "eval_wer": 0.15330812854442344, "step": 23000 }, { "epoch": 49.68, "learning_rate": 4.51764705882353e-07, "loss": 0.0334, "step": 23100 }, { "epoch": 49.89, "learning_rate": 1.6941176470588237e-07, "loss": 0.027, "step": 23200 }, { "epoch": 50.0, "step": 23250, "total_flos": 2.4501256105279144e+20, "train_loss": 0.23119104407936014, "train_runtime": 64754.7357, "train_samples_per_second": 5.744, "train_steps_per_second": 0.359 } ], "max_steps": 23250, "num_train_epochs": 50, "total_flos": 2.4501256105279144e+20, "trial_name": null, "trial_params": null }