{ "best_metric": null, "best_model_checkpoint": null, "epoch": 99.99866131191432, "global_step": 37300, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.27, "learning_rate": 4.85e-06, "loss": 153.5094, "step": 100 }, { "epoch": 0.54, "learning_rate": 9.85e-06, "loss": 108.8648, "step": 200 }, { "epoch": 0.8, "learning_rate": 1.48e-05, "loss": 92.5714, "step": 300 }, { "epoch": 1.07, "learning_rate": 1.9800000000000004e-05, "loss": 79.9356, "step": 400 }, { "epoch": 1.34, "learning_rate": 2.48e-05, "loss": 69.8341, "step": 500 }, { "epoch": 1.34, "eval_cer": 1.0, "eval_loss": 80.07215118408203, "eval_runtime": 130.8213, "eval_samples_per_second": 17.597, "eval_steps_per_second": 2.201, "eval_wer": 1.0, "step": 500 }, { "epoch": 1.61, "learning_rate": 2.98e-05, "loss": 54.2478, "step": 600 }, { "epoch": 1.88, "learning_rate": 3.48e-05, "loss": 35.5793, "step": 700 }, { "epoch": 2.14, "learning_rate": 3.9800000000000005e-05, "loss": 17.7978, "step": 800 }, { "epoch": 2.41, "learning_rate": 4.4800000000000005e-05, "loss": 8.1204, "step": 900 }, { "epoch": 2.68, "learning_rate": 4.9800000000000004e-05, "loss": 6.6418, "step": 1000 }, { "epoch": 2.68, "eval_cer": 1.0, "eval_loss": 6.634645938873291, "eval_runtime": 117.0489, "eval_samples_per_second": 19.667, "eval_steps_per_second": 2.461, "eval_wer": 1.0, "step": 1000 }, { "epoch": 2.95, "learning_rate": 5.4800000000000004e-05, "loss": 6.3633, "step": 1100 }, { "epoch": 3.22, "learning_rate": 5.9800000000000003e-05, "loss": 6.364, "step": 1200 }, { "epoch": 3.48, "learning_rate": 6.48e-05, "loss": 6.2461, "step": 1300 }, { "epoch": 3.75, "learning_rate": 6.98e-05, "loss": 6.2242, "step": 1400 }, { "epoch": 4.02, "learning_rate": 7.48e-05, "loss": 6.2419, "step": 1500 }, { "epoch": 4.02, "eval_cer": 1.0, "eval_loss": 6.290937423706055, "eval_runtime": 117.0781, "eval_samples_per_second": 19.662, "eval_steps_per_second": 2.46, "eval_wer": 1.0, "step": 1500 }, { "epoch": 4.29, "learning_rate": 7.98e-05, "loss": 6.1691, "step": 1600 }, { "epoch": 4.56, "learning_rate": 8.48e-05, "loss": 6.1668, "step": 1700 }, { "epoch": 4.82, "learning_rate": 8.98e-05, "loss": 6.1623, "step": 1800 }, { "epoch": 5.09, "learning_rate": 9.48e-05, "loss": 6.1884, "step": 1900 }, { "epoch": 5.36, "learning_rate": 9.98e-05, "loss": 6.0813, "step": 2000 }, { "epoch": 5.36, "eval_cer": 1.0, "eval_loss": 6.115033149719238, "eval_runtime": 117.9948, "eval_samples_per_second": 19.509, "eval_steps_per_second": 2.441, "eval_wer": 1.0, "step": 2000 }, { "epoch": 5.63, "learning_rate": 9.972804532577904e-05, "loss": 6.1027, "step": 2100 }, { "epoch": 5.9, "learning_rate": 9.944475920679887e-05, "loss": 6.0586, "step": 2200 }, { "epoch": 6.17, "learning_rate": 9.91614730878187e-05, "loss": 6.0399, "step": 2300 }, { "epoch": 6.43, "learning_rate": 9.887818696883852e-05, "loss": 6.0035, "step": 2400 }, { "epoch": 6.7, "learning_rate": 9.859490084985836e-05, "loss": 5.9677, "step": 2500 }, { "epoch": 6.7, "eval_cer": 1.002818566168114, "eval_loss": 6.030123233795166, "eval_runtime": 116.8078, "eval_samples_per_second": 19.708, "eval_steps_per_second": 2.466, "eval_wer": 1.1385881333910783, "step": 2500 }, { "epoch": 6.97, "learning_rate": 9.831161473087818e-05, "loss": 5.9617, "step": 2600 }, { "epoch": 7.24, "learning_rate": 9.802832861189802e-05, "loss": 5.9736, "step": 2700 }, { "epoch": 7.51, "learning_rate": 9.774504249291784e-05, "loss": 5.9098, "step": 2800 }, { "epoch": 7.77, "learning_rate": 9.746175637393768e-05, "loss": 5.9069, "step": 2900 }, { "epoch": 8.04, "learning_rate": 9.717847025495752e-05, "loss": 5.9296, "step": 3000 }, { "epoch": 8.04, "eval_cer": 1.0057735145701687, "eval_loss": 5.897457599639893, "eval_runtime": 116.427, "eval_samples_per_second": 19.772, "eval_steps_per_second": 2.474, "eval_wer": 1.2113469034213946, "step": 3000 }, { "epoch": 8.31, "learning_rate": 9.689518413597734e-05, "loss": 5.8213, "step": 3100 }, { "epoch": 8.58, "learning_rate": 9.661189801699718e-05, "loss": 5.8241, "step": 3200 }, { "epoch": 8.85, "learning_rate": 9.632861189801701e-05, "loss": 5.7787, "step": 3300 }, { "epoch": 9.12, "learning_rate": 9.604532577903684e-05, "loss": 5.7529, "step": 3400 }, { "epoch": 9.38, "learning_rate": 9.576203966005666e-05, "loss": 5.6434, "step": 3500 }, { "epoch": 9.38, "eval_cer": 1.017093239987271, "eval_loss": 5.540365219116211, "eval_runtime": 116.7079, "eval_samples_per_second": 19.724, "eval_steps_per_second": 2.468, "eval_wer": 2.16240796881767, "step": 3500 }, { "epoch": 9.65, "learning_rate": 9.54787535410765e-05, "loss": 5.6259, "step": 3600 }, { "epoch": 9.92, "learning_rate": 9.519546742209632e-05, "loss": 5.5488, "step": 3700 }, { "epoch": 10.19, "learning_rate": 9.491218130311616e-05, "loss": 5.5068, "step": 3800 }, { "epoch": 10.46, "learning_rate": 9.462889518413598e-05, "loss": 5.3439, "step": 3900 }, { "epoch": 10.72, "learning_rate": 9.434560906515582e-05, "loss": 5.1974, "step": 4000 }, { "epoch": 10.72, "eval_cer": 0.9365822612174387, "eval_loss": 4.543997287750244, "eval_runtime": 115.8257, "eval_samples_per_second": 19.875, "eval_steps_per_second": 2.486, "eval_wer": 2.170203551320918, "step": 4000 }, { "epoch": 10.99, "learning_rate": 9.406232294617564e-05, "loss": 4.8692, "step": 4100 }, { "epoch": 11.26, "learning_rate": 9.377903682719548e-05, "loss": 4.7155, "step": 4200 }, { "epoch": 11.53, "learning_rate": 9.34957507082153e-05, "loss": 4.4978, "step": 4300 }, { "epoch": 11.8, "learning_rate": 9.321246458923513e-05, "loss": 4.4105, "step": 4400 }, { "epoch": 12.06, "learning_rate": 9.292917847025496e-05, "loss": 4.3601, "step": 4500 }, { "epoch": 12.06, "eval_cer": 0.8998045187980179, "eval_loss": 3.383898973464966, "eval_runtime": 117.0791, "eval_samples_per_second": 19.662, "eval_steps_per_second": 2.46, "eval_wer": 2.246427024686011, "step": 4500 }, { "epoch": 12.33, "learning_rate": 9.264589235127479e-05, "loss": 4.1745, "step": 4600 }, { "epoch": 12.6, "learning_rate": 9.236260623229462e-05, "loss": 4.1194, "step": 4700 }, { "epoch": 12.87, "learning_rate": 9.207932011331445e-05, "loss": 4.0809, "step": 4800 }, { "epoch": 13.14, "learning_rate": 9.179603399433428e-05, "loss": 3.9556, "step": 4900 }, { "epoch": 13.4, "learning_rate": 9.151558073654392e-05, "loss": 3.9321, "step": 5000 }, { "epoch": 13.4, "eval_cer": 0.8400236395872165, "eval_loss": 2.8784573078155518, "eval_runtime": 117.9053, "eval_samples_per_second": 19.524, "eval_steps_per_second": 2.443, "eval_wer": 2.3096578605456908, "step": 5000 }, { "epoch": 13.67, "learning_rate": 9.123229461756374e-05, "loss": 3.8826, "step": 5100 }, { "epoch": 13.94, "learning_rate": 9.094900849858358e-05, "loss": 3.7975, "step": 5200 }, { "epoch": 14.21, "learning_rate": 9.06657223796034e-05, "loss": 3.7704, "step": 5300 }, { "epoch": 14.48, "learning_rate": 9.038243626062324e-05, "loss": 3.6848, "step": 5400 }, { "epoch": 14.74, "learning_rate": 9.009915014164306e-05, "loss": 3.6462, "step": 5500 }, { "epoch": 14.74, "eval_cer": 0.6663181342910397, "eval_loss": 2.510770797729492, "eval_runtime": 116.5338, "eval_samples_per_second": 19.754, "eval_steps_per_second": 2.471, "eval_wer": 1.9623213512343005, "step": 5500 }, { "epoch": 15.01, "learning_rate": 8.98158640226629e-05, "loss": 3.584, "step": 5600 }, { "epoch": 15.28, "learning_rate": 8.953257790368272e-05, "loss": 3.531, "step": 5700 }, { "epoch": 15.55, "learning_rate": 8.925212464589235e-05, "loss": 3.5509, "step": 5800 }, { "epoch": 15.82, "learning_rate": 8.896883852691219e-05, "loss": 3.524, "step": 5900 }, { "epoch": 16.09, "learning_rate": 8.868555240793201e-05, "loss": 3.5156, "step": 6000 }, { "epoch": 16.09, "eval_cer": 0.5705778060644633, "eval_loss": 2.2789571285247803, "eval_runtime": 115.731, "eval_samples_per_second": 19.891, "eval_steps_per_second": 2.489, "eval_wer": 1.6478995236032914, "step": 6000 }, { "epoch": 16.35, "learning_rate": 8.840226628895184e-05, "loss": 3.4294, "step": 6100 }, { "epoch": 16.62, "learning_rate": 8.811898016997168e-05, "loss": 3.4156, "step": 6200 }, { "epoch": 16.89, "learning_rate": 8.78356940509915e-05, "loss": 3.3933, "step": 6300 }, { "epoch": 17.16, "learning_rate": 8.755240793201134e-05, "loss": 3.3293, "step": 6400 }, { "epoch": 17.43, "learning_rate": 8.726912181303116e-05, "loss": 3.32, "step": 6500 }, { "epoch": 17.43, "eval_cer": 0.6244033277265082, "eval_loss": 2.1449646949768066, "eval_runtime": 119.2852, "eval_samples_per_second": 19.298, "eval_steps_per_second": 2.414, "eval_wer": 1.833694239930706, "step": 6500 }, { "epoch": 17.69, "learning_rate": 8.6985835694051e-05, "loss": 3.3019, "step": 6600 }, { "epoch": 17.96, "learning_rate": 8.670538243626063e-05, "loss": 3.3058, "step": 6700 }, { "epoch": 18.23, "learning_rate": 8.642209631728045e-05, "loss": 3.289, "step": 6800 }, { "epoch": 18.5, "learning_rate": 8.613881019830029e-05, "loss": 3.208, "step": 6900 }, { "epoch": 18.77, "learning_rate": 8.585552407932011e-05, "loss": 3.1918, "step": 7000 }, { "epoch": 18.77, "eval_cer": 0.6017184161476565, "eval_loss": 1.8536365032196045, "eval_runtime": 116.4854, "eval_samples_per_second": 19.762, "eval_steps_per_second": 2.472, "eval_wer": 1.939367691641403, "step": 7000 }, { "epoch": 19.03, "learning_rate": 8.557223796033995e-05, "loss": 3.1877, "step": 7100 }, { "epoch": 19.3, "learning_rate": 8.528895184135977e-05, "loss": 3.0893, "step": 7200 }, { "epoch": 19.57, "learning_rate": 8.500566572237961e-05, "loss": 3.102, "step": 7300 }, { "epoch": 19.84, "learning_rate": 8.472521246458924e-05, "loss": 3.0536, "step": 7400 }, { "epoch": 20.11, "learning_rate": 8.444192634560907e-05, "loss": 3.1139, "step": 7500 }, { "epoch": 20.11, "eval_cer": 0.5638496158567078, "eval_loss": 1.7204933166503906, "eval_runtime": 116.1457, "eval_samples_per_second": 19.82, "eval_steps_per_second": 2.48, "eval_wer": 1.9112169770463403, "step": 7500 }, { "epoch": 20.37, "learning_rate": 8.41586402266289e-05, "loss": 2.9958, "step": 7600 }, { "epoch": 20.64, "learning_rate": 8.387535410764873e-05, "loss": 3.0055, "step": 7700 }, { "epoch": 20.91, "learning_rate": 8.359206798866855e-05, "loss": 2.9673, "step": 7800 }, { "epoch": 21.18, "learning_rate": 8.330878186968839e-05, "loss": 2.9276, "step": 7900 }, { "epoch": 21.45, "learning_rate": 8.302549575070821e-05, "loss": 2.8995, "step": 8000 }, { "epoch": 21.45, "eval_cer": 0.3250443242260308, "eval_loss": 1.5478395223617554, "eval_runtime": 120.8897, "eval_samples_per_second": 19.042, "eval_steps_per_second": 2.382, "eval_wer": 1.0623646600259853, "step": 8000 }, { "epoch": 21.71, "learning_rate": 8.274220963172805e-05, "loss": 2.8602, "step": 8100 }, { "epoch": 21.98, "learning_rate": 8.245892351274787e-05, "loss": 2.877, "step": 8200 }, { "epoch": 22.25, "learning_rate": 8.217563739376771e-05, "loss": 2.8283, "step": 8300 }, { "epoch": 22.52, "learning_rate": 8.189235127478753e-05, "loss": 2.7887, "step": 8400 }, { "epoch": 22.79, "learning_rate": 8.160906515580737e-05, "loss": 2.7572, "step": 8500 }, { "epoch": 22.79, "eval_cer": 0.33668227485566216, "eval_loss": 1.406813144683838, "eval_runtime": 117.2331, "eval_samples_per_second": 19.636, "eval_steps_per_second": 2.457, "eval_wer": 1.141186660892161, "step": 8500 }, { "epoch": 23.06, "learning_rate": 8.132577903682719e-05, "loss": 2.7576, "step": 8600 }, { "epoch": 23.32, "learning_rate": 8.104249291784703e-05, "loss": 2.7336, "step": 8700 }, { "epoch": 23.59, "learning_rate": 8.075920679886687e-05, "loss": 2.6792, "step": 8800 }, { "epoch": 23.86, "learning_rate": 8.047592067988669e-05, "loss": 2.6983, "step": 8900 }, { "epoch": 24.13, "learning_rate": 8.019263456090653e-05, "loss": 2.6881, "step": 9000 }, { "epoch": 24.13, "eval_cer": 0.5683047688321134, "eval_loss": 1.3311798572540283, "eval_runtime": 116.7773, "eval_samples_per_second": 19.713, "eval_steps_per_second": 2.466, "eval_wer": 2.009961022087484, "step": 9000 }, { "epoch": 24.4, "learning_rate": 7.990934844192635e-05, "loss": 2.6439, "step": 9100 }, { "epoch": 24.66, "learning_rate": 7.962606232294619e-05, "loss": 2.6563, "step": 9200 }, { "epoch": 24.93, "learning_rate": 7.934277620396601e-05, "loss": 2.6792, "step": 9300 }, { "epoch": 25.2, "learning_rate": 7.905949008498585e-05, "loss": 2.6905, "step": 9400 }, { "epoch": 25.47, "learning_rate": 7.877620396600567e-05, "loss": 2.5993, "step": 9500 }, { "epoch": 25.47, "eval_cer": 0.6450425057962449, "eval_loss": 1.2552706003189087, "eval_runtime": 117.844, "eval_samples_per_second": 19.534, "eval_steps_per_second": 2.444, "eval_wer": 2.003897791251624, "step": 9500 }, { "epoch": 25.74, "learning_rate": 7.849291784702551e-05, "loss": 2.6243, "step": 9600 }, { "epoch": 26.01, "learning_rate": 7.820963172804533e-05, "loss": 2.5753, "step": 9700 }, { "epoch": 26.27, "learning_rate": 7.792634560906516e-05, "loss": 2.521, "step": 9800 }, { "epoch": 26.54, "learning_rate": 7.76458923512748e-05, "loss": 2.5546, "step": 9900 }, { "epoch": 26.81, "learning_rate": 7.736260623229463e-05, "loss": 2.5304, "step": 10000 }, { "epoch": 26.81, "eval_cer": 0.5788971223348638, "eval_loss": 1.242166519165039, "eval_runtime": 116.3994, "eval_samples_per_second": 19.777, "eval_steps_per_second": 2.474, "eval_wer": 2.039411000433088, "step": 10000 }, { "epoch": 27.08, "learning_rate": 7.707932011331445e-05, "loss": 2.5599, "step": 10100 }, { "epoch": 27.35, "learning_rate": 7.679603399433429e-05, "loss": 2.4878, "step": 10200 }, { "epoch": 27.61, "learning_rate": 7.651274787535411e-05, "loss": 2.4684, "step": 10300 }, { "epoch": 27.88, "learning_rate": 7.622946175637395e-05, "loss": 2.4647, "step": 10400 }, { "epoch": 28.15, "learning_rate": 7.594617563739377e-05, "loss": 2.4352, "step": 10500 }, { "epoch": 28.15, "eval_cer": 0.5506659999090785, "eval_loss": 1.1581844091415405, "eval_runtime": 116.2358, "eval_samples_per_second": 19.805, "eval_steps_per_second": 2.478, "eval_wer": 1.9969683845820703, "step": 10500 }, { "epoch": 28.42, "learning_rate": 7.56628895184136e-05, "loss": 2.4437, "step": 10600 }, { "epoch": 28.69, "learning_rate": 7.537960339943343e-05, "loss": 2.44, "step": 10700 }, { "epoch": 28.95, "learning_rate": 7.509631728045327e-05, "loss": 2.447, "step": 10800 }, { "epoch": 29.22, "learning_rate": 7.481303116147309e-05, "loss": 2.4203, "step": 10900 }, { "epoch": 29.49, "learning_rate": 7.452974504249293e-05, "loss": 2.3795, "step": 11000 }, { "epoch": 29.49, "eval_cer": 0.4843842342137564, "eval_loss": 1.1159536838531494, "eval_runtime": 117.3261, "eval_samples_per_second": 19.621, "eval_steps_per_second": 2.455, "eval_wer": 1.8254655695106106, "step": 11000 }, { "epoch": 29.76, "learning_rate": 7.424645892351275e-05, "loss": 2.3967, "step": 11100 }, { "epoch": 30.03, "learning_rate": 7.396317280453257e-05, "loss": 2.3546, "step": 11200 }, { "epoch": 30.29, "learning_rate": 7.367988668555241e-05, "loss": 2.343, "step": 11300 }, { "epoch": 30.56, "learning_rate": 7.339660056657224e-05, "loss": 2.3377, "step": 11400 }, { "epoch": 30.83, "learning_rate": 7.311331444759207e-05, "loss": 2.3287, "step": 11500 }, { "epoch": 30.83, "eval_cer": 0.3780060917397827, "eval_loss": 1.0775071382522583, "eval_runtime": 118.2979, "eval_samples_per_second": 19.459, "eval_steps_per_second": 2.435, "eval_wer": 1.4122996968384582, "step": 11500 }, { "epoch": 31.1, "learning_rate": 7.28300283286119e-05, "loss": 2.341, "step": 11600 }, { "epoch": 31.37, "learning_rate": 7.254674220963173e-05, "loss": 2.3039, "step": 11700 }, { "epoch": 31.63, "learning_rate": 7.226345609065156e-05, "loss": 2.2769, "step": 11800 }, { "epoch": 31.9, "learning_rate": 7.198300283286119e-05, "loss": 2.323, "step": 11900 }, { "epoch": 32.17, "learning_rate": 7.169971671388103e-05, "loss": 2.2622, "step": 12000 }, { "epoch": 32.17, "eval_cer": 0.48938491612492613, "eval_loss": 1.0703905820846558, "eval_runtime": 116.0515, "eval_samples_per_second": 19.836, "eval_steps_per_second": 2.482, "eval_wer": 1.7444781290601992, "step": 12000 }, { "epoch": 32.44, "learning_rate": 7.141643059490085e-05, "loss": 2.2663, "step": 12100 }, { "epoch": 32.71, "learning_rate": 7.113314447592069e-05, "loss": 2.2797, "step": 12200 }, { "epoch": 32.97, "learning_rate": 7.084985835694051e-05, "loss": 2.264, "step": 12300 }, { "epoch": 33.24, "learning_rate": 7.056657223796033e-05, "loss": 2.2497, "step": 12400 }, { "epoch": 33.51, "learning_rate": 7.028328611898017e-05, "loss": 2.2225, "step": 12500 }, { "epoch": 33.51, "eval_cer": 0.5057962449424922, "eval_loss": 1.0272445678710938, "eval_runtime": 118.2938, "eval_samples_per_second": 19.46, "eval_steps_per_second": 2.435, "eval_wer": 1.7236899090515374, "step": 12500 }, { "epoch": 33.78, "learning_rate": 7e-05, "loss": 2.2025, "step": 12600 }, { "epoch": 34.05, "learning_rate": 6.971671388101983e-05, "loss": 2.1892, "step": 12700 }, { "epoch": 34.32, "learning_rate": 6.943342776203965e-05, "loss": 2.1498, "step": 12800 }, { "epoch": 34.58, "learning_rate": 6.915014164305949e-05, "loss": 2.1819, "step": 12900 }, { "epoch": 34.85, "learning_rate": 6.886685552407931e-05, "loss": 2.1843, "step": 13000 }, { "epoch": 34.85, "eval_cer": 0.5028412965404373, "eval_loss": 0.9756352305412292, "eval_runtime": 117.2229, "eval_samples_per_second": 19.638, "eval_steps_per_second": 2.457, "eval_wer": 1.8042442615851018, "step": 13000 }, { "epoch": 35.12, "learning_rate": 6.858356940509915e-05, "loss": 2.1578, "step": 13100 }, { "epoch": 35.39, "learning_rate": 6.830028328611899e-05, "loss": 2.1083, "step": 13200 }, { "epoch": 35.66, "learning_rate": 6.801699716713881e-05, "loss": 2.1531, "step": 13300 }, { "epoch": 35.92, "learning_rate": 6.773371104815865e-05, "loss": 2.11, "step": 13400 }, { "epoch": 36.19, "learning_rate": 6.745042492917847e-05, "loss": 2.1, "step": 13500 }, { "epoch": 36.19, "eval_cer": 0.6055371186980043, "eval_loss": 0.9526697993278503, "eval_runtime": 118.3448, "eval_samples_per_second": 19.452, "eval_steps_per_second": 2.434, "eval_wer": 1.8908618449545258, "step": 13500 }, { "epoch": 36.46, "learning_rate": 6.716713881019831e-05, "loss": 2.0948, "step": 13600 }, { "epoch": 36.73, "learning_rate": 6.688385269121813e-05, "loss": 2.071, "step": 13700 }, { "epoch": 37.0, "learning_rate": 6.660056657223797e-05, "loss": 2.1179, "step": 13800 }, { "epoch": 37.27, "learning_rate": 6.63172804532578e-05, "loss": 2.0444, "step": 13900 }, { "epoch": 37.53, "learning_rate": 6.603399433427763e-05, "loss": 2.0741, "step": 14000 }, { "epoch": 37.53, "eval_cer": 0.5880347320089103, "eval_loss": 0.941799521446228, "eval_runtime": 117.0385, "eval_samples_per_second": 19.669, "eval_steps_per_second": 2.461, "eval_wer": 1.902555218709398, "step": 14000 }, { "epoch": 37.8, "learning_rate": 6.575070821529745e-05, "loss": 2.0937, "step": 14100 }, { "epoch": 38.07, "learning_rate": 6.546742209631729e-05, "loss": 2.0848, "step": 14200 }, { "epoch": 38.34, "learning_rate": 6.518413597733712e-05, "loss": 2.0235, "step": 14300 }, { "epoch": 38.61, "learning_rate": 6.490084985835695e-05, "loss": 2.0165, "step": 14400 }, { "epoch": 38.87, "learning_rate": 6.461756373937678e-05, "loss": 2.0179, "step": 14500 }, { "epoch": 38.87, "eval_cer": 0.5245715324817021, "eval_loss": 0.93625807762146, "eval_runtime": 117.33, "eval_samples_per_second": 19.62, "eval_steps_per_second": 2.455, "eval_wer": 1.797747942832395, "step": 14500 }, { "epoch": 39.14, "learning_rate": 6.43342776203966e-05, "loss": 1.9771, "step": 14600 }, { "epoch": 39.41, "learning_rate": 6.405099150141644e-05, "loss": 1.9721, "step": 14700 }, { "epoch": 39.68, "learning_rate": 6.376770538243626e-05, "loss": 2.0099, "step": 14800 }, { "epoch": 39.95, "learning_rate": 6.34844192634561e-05, "loss": 2.0237, "step": 14900 }, { "epoch": 40.21, "learning_rate": 6.320113314447592e-05, "loss": 2.0615, "step": 15000 }, { "epoch": 40.21, "eval_cer": 0.5598945310724189, "eval_loss": 0.9634870886802673, "eval_runtime": 118.3611, "eval_samples_per_second": 19.449, "eval_steps_per_second": 2.433, "eval_wer": 1.8111736682546558, "step": 15000 }, { "epoch": 40.48, "learning_rate": 6.291784702549576e-05, "loss": 1.9647, "step": 15100 }, { "epoch": 40.75, "learning_rate": 6.263456090651558e-05, "loss": 1.9683, "step": 15200 }, { "epoch": 41.02, "learning_rate": 6.235127478753542e-05, "loss": 1.9311, "step": 15300 }, { "epoch": 41.29, "learning_rate": 6.206798866855524e-05, "loss": 1.9126, "step": 15400 }, { "epoch": 41.55, "learning_rate": 6.178470254957506e-05, "loss": 1.9448, "step": 15500 }, { "epoch": 41.55, "eval_cer": 0.491430649634041, "eval_loss": 0.9248816967010498, "eval_runtime": 116.7415, "eval_samples_per_second": 19.719, "eval_steps_per_second": 2.467, "eval_wer": 1.7249891728020788, "step": 15500 }, { "epoch": 41.82, "learning_rate": 6.15014164305949e-05, "loss": 1.934, "step": 15600 }, { "epoch": 42.09, "learning_rate": 6.121813031161473e-05, "loss": 2.0163, "step": 15700 }, { "epoch": 42.36, "learning_rate": 6.093484419263457e-05, "loss": 1.8725, "step": 15800 }, { "epoch": 42.63, "learning_rate": 6.065155807365439e-05, "loss": 1.9246, "step": 15900 }, { "epoch": 42.89, "learning_rate": 6.036827195467423e-05, "loss": 1.8966, "step": 16000 }, { "epoch": 42.89, "eval_cer": 0.4318770741464745, "eval_loss": 0.9022775888442993, "eval_runtime": 116.8996, "eval_samples_per_second": 19.692, "eval_steps_per_second": 2.464, "eval_wer": 1.5829363360762234, "step": 16000 }, { "epoch": 43.16, "learning_rate": 6.008498583569405e-05, "loss": 1.8316, "step": 16100 }, { "epoch": 43.43, "learning_rate": 5.9804532577903686e-05, "loss": 1.8786, "step": 16200 }, { "epoch": 43.7, "learning_rate": 5.9521246458923516e-05, "loss": 1.84, "step": 16300 }, { "epoch": 43.97, "learning_rate": 5.9237960339943346e-05, "loss": 1.875, "step": 16400 }, { "epoch": 44.24, "learning_rate": 5.895750708215297e-05, "loss": 1.8662, "step": 16500 }, { "epoch": 44.24, "eval_cer": 0.42301222894031004, "eval_loss": 0.9001737236976624, "eval_runtime": 118.0444, "eval_samples_per_second": 19.501, "eval_steps_per_second": 2.44, "eval_wer": 1.483326115201386, "step": 16500 }, { "epoch": 44.5, "learning_rate": 5.867422096317281e-05, "loss": 1.8645, "step": 16600 }, { "epoch": 44.77, "learning_rate": 5.839093484419263e-05, "loss": 1.8243, "step": 16700 }, { "epoch": 45.04, "learning_rate": 5.810764872521247e-05, "loss": 1.7991, "step": 16800 }, { "epoch": 45.31, "learning_rate": 5.78243626062323e-05, "loss": 1.7956, "step": 16900 }, { "epoch": 45.58, "learning_rate": 5.754107648725213e-05, "loss": 1.8136, "step": 17000 }, { "epoch": 45.58, "eval_cer": 0.2986770923307724, "eval_loss": 0.9075531959533691, "eval_runtime": 118.7643, "eval_samples_per_second": 19.383, "eval_steps_per_second": 2.425, "eval_wer": 1.1827631009094846, "step": 17000 }, { "epoch": 45.84, "learning_rate": 5.725779036827196e-05, "loss": 1.8, "step": 17100 }, { "epoch": 46.11, "learning_rate": 5.6974504249291784e-05, "loss": 1.8339, "step": 17200 }, { "epoch": 46.38, "learning_rate": 5.669121813031162e-05, "loss": 1.7869, "step": 17300 }, { "epoch": 46.65, "learning_rate": 5.6407932011331444e-05, "loss": 1.8145, "step": 17400 }, { "epoch": 46.92, "learning_rate": 5.612464589235128e-05, "loss": 1.7908, "step": 17500 }, { "epoch": 46.92, "eval_cer": 0.42578533436377686, "eval_loss": 0.8774313926696777, "eval_runtime": 118.9119, "eval_samples_per_second": 19.359, "eval_steps_per_second": 2.422, "eval_wer": 1.577306193157211, "step": 17500 }, { "epoch": 47.18, "learning_rate": 5.5841359773371105e-05, "loss": 1.7488, "step": 17600 }, { "epoch": 47.45, "learning_rate": 5.555807365439094e-05, "loss": 1.7289, "step": 17700 }, { "epoch": 47.72, "learning_rate": 5.5274787535410765e-05, "loss": 1.7722, "step": 17800 }, { "epoch": 47.99, "learning_rate": 5.49915014164306e-05, "loss": 1.7659, "step": 17900 }, { "epoch": 48.26, "learning_rate": 5.4708215297450426e-05, "loss": 1.7354, "step": 18000 }, { "epoch": 48.26, "eval_cer": 0.40241851161522024, "eval_loss": 0.8727295398712158, "eval_runtime": 117.7378, "eval_samples_per_second": 19.552, "eval_steps_per_second": 2.446, "eval_wer": 1.5036812472932006, "step": 18000 }, { "epoch": 48.52, "learning_rate": 5.442492917847026e-05, "loss": 1.7538, "step": 18100 }, { "epoch": 48.79, "learning_rate": 5.4141643059490086e-05, "loss": 1.7304, "step": 18200 }, { "epoch": 49.06, "learning_rate": 5.385835694050991e-05, "loss": 1.7194, "step": 18300 }, { "epoch": 49.33, "learning_rate": 5.357507082152975e-05, "loss": 1.6824, "step": 18400 }, { "epoch": 49.6, "learning_rate": 5.329178470254958e-05, "loss": 1.6739, "step": 18500 }, { "epoch": 49.6, "eval_cer": 0.27890166840932856, "eval_loss": 0.8635693788528442, "eval_runtime": 118.5578, "eval_samples_per_second": 19.417, "eval_steps_per_second": 2.429, "eval_wer": 1.1238631442182763, "step": 18500 }, { "epoch": 49.86, "learning_rate": 5.300849858356941e-05, "loss": 1.6807, "step": 18600 }, { "epoch": 50.13, "learning_rate": 5.272521246458924e-05, "loss": 1.6651, "step": 18700 }, { "epoch": 50.4, "learning_rate": 5.2441926345609075e-05, "loss": 1.7008, "step": 18800 }, { "epoch": 50.67, "learning_rate": 5.21586402266289e-05, "loss": 1.6183, "step": 18900 }, { "epoch": 50.94, "learning_rate": 5.1875354107648735e-05, "loss": 1.6457, "step": 19000 }, { "epoch": 50.94, "eval_cer": 0.3103605037050507, "eval_loss": 0.8516315221786499, "eval_runtime": 117.1957, "eval_samples_per_second": 19.642, "eval_steps_per_second": 2.457, "eval_wer": 1.2269380684278908, "step": 19000 }, { "epoch": 51.21, "learning_rate": 5.159206798866856e-05, "loss": 1.6506, "step": 19100 }, { "epoch": 51.47, "learning_rate": 5.130878186968838e-05, "loss": 1.6612, "step": 19200 }, { "epoch": 51.74, "learning_rate": 5.102549575070822e-05, "loss": 1.6339, "step": 19300 }, { "epoch": 52.01, "learning_rate": 5.074220963172804e-05, "loss": 1.6134, "step": 19400 }, { "epoch": 52.28, "learning_rate": 5.045892351274788e-05, "loss": 1.5847, "step": 19500 }, { "epoch": 52.28, "eval_cer": 0.33600036368595715, "eval_loss": 0.8398524522781372, "eval_runtime": 116.7512, "eval_samples_per_second": 19.717, "eval_steps_per_second": 2.467, "eval_wer": 1.3308791684711996, "step": 19500 }, { "epoch": 52.55, "learning_rate": 5.01756373937677e-05, "loss": 1.5839, "step": 19600 }, { "epoch": 52.81, "learning_rate": 4.9892351274787533e-05, "loss": 1.5887, "step": 19700 }, { "epoch": 53.08, "learning_rate": 4.9609065155807364e-05, "loss": 1.6578, "step": 19800 }, { "epoch": 53.35, "learning_rate": 4.9325779036827194e-05, "loss": 1.5896, "step": 19900 }, { "epoch": 53.62, "learning_rate": 4.9042492917847024e-05, "loss": 1.5971, "step": 20000 }, { "epoch": 53.62, "eval_cer": 0.3334545619857253, "eval_loss": 0.844145655632019, "eval_runtime": 117.5266, "eval_samples_per_second": 19.587, "eval_steps_per_second": 2.451, "eval_wer": 1.3152880034647034, "step": 20000 }, { "epoch": 53.89, "learning_rate": 4.8759206798866854e-05, "loss": 1.5645, "step": 20100 }, { "epoch": 54.16, "learning_rate": 4.847592067988669e-05, "loss": 1.481, "step": 20200 }, { "epoch": 54.42, "learning_rate": 4.819263456090652e-05, "loss": 1.5474, "step": 20300 }, { "epoch": 54.69, "learning_rate": 4.790934844192635e-05, "loss": 1.576, "step": 20400 }, { "epoch": 54.96, "learning_rate": 4.762606232294618e-05, "loss": 1.602, "step": 20500 }, { "epoch": 54.96, "eval_cer": 0.34331954357412375, "eval_loss": 0.8589980602264404, "eval_runtime": 117.5211, "eval_samples_per_second": 19.588, "eval_steps_per_second": 2.451, "eval_wer": 1.2932005197055003, "step": 20500 }, { "epoch": 55.23, "learning_rate": 4.734277620396601e-05, "loss": 1.6106, "step": 20600 }, { "epoch": 55.5, "learning_rate": 4.7059490084985836e-05, "loss": 1.551, "step": 20700 }, { "epoch": 55.76, "learning_rate": 4.6776203966005666e-05, "loss": 1.5118, "step": 20800 }, { "epoch": 56.03, "learning_rate": 4.64957507082153e-05, "loss": 1.5028, "step": 20900 }, { "epoch": 56.3, "learning_rate": 4.621246458923513e-05, "loss": 1.5063, "step": 21000 }, { "epoch": 56.3, "eval_cer": 0.28749374914761106, "eval_loss": 0.8333584070205688, "eval_runtime": 116.6165, "eval_samples_per_second": 19.74, "eval_steps_per_second": 2.47, "eval_wer": 1.1312256388046773, "step": 21000 }, { "epoch": 56.57, "learning_rate": 4.592917847025496e-05, "loss": 1.5195, "step": 21100 }, { "epoch": 56.84, "learning_rate": 4.564589235127479e-05, "loss": 1.549, "step": 21200 }, { "epoch": 57.1, "learning_rate": 4.536260623229462e-05, "loss": 1.5636, "step": 21300 }, { "epoch": 57.37, "learning_rate": 4.507932011331445e-05, "loss": 1.482, "step": 21400 }, { "epoch": 57.64, "learning_rate": 4.479603399433428e-05, "loss": 1.4631, "step": 21500 }, { "epoch": 57.64, "eval_cer": 0.2999045324362413, "eval_loss": 0.8474038243293762, "eval_runtime": 118.5932, "eval_samples_per_second": 19.411, "eval_steps_per_second": 2.428, "eval_wer": 1.169770463404071, "step": 21500 }, { "epoch": 57.91, "learning_rate": 4.451274787535411e-05, "loss": 1.4869, "step": 21600 }, { "epoch": 58.18, "learning_rate": 4.422946175637394e-05, "loss": 1.4692, "step": 21700 }, { "epoch": 58.44, "learning_rate": 4.394617563739377e-05, "loss": 1.4673, "step": 21800 }, { "epoch": 58.71, "learning_rate": 4.36628895184136e-05, "loss": 1.501, "step": 21900 }, { "epoch": 58.98, "learning_rate": 4.3379603399433425e-05, "loss": 1.4997, "step": 22000 }, { "epoch": 58.98, "eval_cer": 0.38541619311724323, "eval_loss": 0.8637779355049133, "eval_runtime": 116.453, "eval_samples_per_second": 19.768, "eval_steps_per_second": 2.473, "eval_wer": 1.4278908618449546, "step": 22000 }, { "epoch": 59.25, "learning_rate": 4.3096317280453255e-05, "loss": 1.4404, "step": 22100 }, { "epoch": 59.52, "learning_rate": 4.2815864022662894e-05, "loss": 1.4639, "step": 22200 }, { "epoch": 59.78, "learning_rate": 4.2532577903682725e-05, "loss": 1.4724, "step": 22300 }, { "epoch": 60.05, "learning_rate": 4.224929178470255e-05, "loss": 1.4146, "step": 22400 }, { "epoch": 60.32, "learning_rate": 4.196600566572238e-05, "loss": 1.4301, "step": 22500 }, { "epoch": 60.32, "eval_cer": 0.32995408464790654, "eval_loss": 0.8549993634223938, "eval_runtime": 118.194, "eval_samples_per_second": 19.476, "eval_steps_per_second": 2.437, "eval_wer": 1.27371156344738, "step": 22500 }, { "epoch": 60.59, "learning_rate": 4.168271954674221e-05, "loss": 1.4288, "step": 22600 }, { "epoch": 60.86, "learning_rate": 4.139943342776204e-05, "loss": 1.4183, "step": 22700 }, { "epoch": 61.13, "learning_rate": 4.111614730878187e-05, "loss": 1.3995, "step": 22800 }, { "epoch": 61.39, "learning_rate": 4.08328611898017e-05, "loss": 1.3967, "step": 22900 }, { "epoch": 61.66, "learning_rate": 4.054957507082153e-05, "loss": 1.3798, "step": 23000 }, { "epoch": 61.66, "eval_cer": 0.2934491066963677, "eval_loss": 0.8265963792800903, "eval_runtime": 118.5302, "eval_samples_per_second": 19.421, "eval_steps_per_second": 2.43, "eval_wer": 1.1801645734084019, "step": 23000 }, { "epoch": 61.93, "learning_rate": 4.026628895184136e-05, "loss": 1.3781, "step": 23100 }, { "epoch": 62.2, "learning_rate": 3.99830028328612e-05, "loss": 1.412, "step": 23200 }, { "epoch": 62.47, "learning_rate": 3.969971671388103e-05, "loss": 1.3643, "step": 23300 }, { "epoch": 62.73, "learning_rate": 3.941643059490085e-05, "loss": 1.3848, "step": 23400 }, { "epoch": 63.0, "learning_rate": 3.913314447592068e-05, "loss": 1.3454, "step": 23500 }, { "epoch": 63.0, "eval_cer": 0.3711415192980861, "eval_loss": 0.8234531879425049, "eval_runtime": 118.9878, "eval_samples_per_second": 19.347, "eval_steps_per_second": 2.42, "eval_wer": 1.3815504547423128, "step": 23500 }, { "epoch": 63.27, "learning_rate": 3.884985835694051e-05, "loss": 1.3549, "step": 23600 }, { "epoch": 63.54, "learning_rate": 3.856657223796034e-05, "loss": 1.3746, "step": 23700 }, { "epoch": 63.81, "learning_rate": 3.828328611898017e-05, "loss": 1.3619, "step": 23800 }, { "epoch": 64.07, "learning_rate": 3.8e-05, "loss": 1.4683, "step": 23900 }, { "epoch": 64.34, "learning_rate": 3.771671388101983e-05, "loss": 1.3678, "step": 24000 }, { "epoch": 64.34, "eval_cer": 0.5034777469654953, "eval_loss": 0.8549569249153137, "eval_runtime": 117.2623, "eval_samples_per_second": 19.631, "eval_steps_per_second": 2.456, "eval_wer": 1.642702468601126, "step": 24000 }, { "epoch": 64.61, "learning_rate": 3.743342776203966e-05, "loss": 1.3534, "step": 24100 }, { "epoch": 64.88, "learning_rate": 3.715014164305949e-05, "loss": 1.341, "step": 24200 }, { "epoch": 65.15, "learning_rate": 3.686685552407932e-05, "loss": 1.2738, "step": 24300 }, { "epoch": 65.41, "learning_rate": 3.658356940509915e-05, "loss": 1.3237, "step": 24400 }, { "epoch": 65.68, "learning_rate": 3.630028328611898e-05, "loss": 1.3761, "step": 24500 }, { "epoch": 65.68, "eval_cer": 0.490703277719689, "eval_loss": 0.8510046601295471, "eval_runtime": 120.5712, "eval_samples_per_second": 19.092, "eval_steps_per_second": 2.389, "eval_wer": 1.6708531831961888, "step": 24500 }, { "epoch": 65.95, "learning_rate": 3.6016997167138814e-05, "loss": 1.3209, "step": 24600 }, { "epoch": 66.22, "learning_rate": 3.5733711048158644e-05, "loss": 1.4141, "step": 24700 }, { "epoch": 66.49, "learning_rate": 3.5450424929178474e-05, "loss": 1.3229, "step": 24800 }, { "epoch": 66.76, "learning_rate": 3.5167138810198305e-05, "loss": 1.3413, "step": 24900 }, { "epoch": 67.02, "learning_rate": 3.4883852691218135e-05, "loss": 1.2668, "step": 25000 }, { "epoch": 67.02, "eval_cer": 0.45051597945174343, "eval_loss": 0.8514528274536133, "eval_runtime": 118.6832, "eval_samples_per_second": 19.396, "eval_steps_per_second": 2.427, "eval_wer": 1.5842355998267648, "step": 25000 }, { "epoch": 67.29, "learning_rate": 3.4600566572237965e-05, "loss": 1.3151, "step": 25100 }, { "epoch": 67.56, "learning_rate": 3.4317280453257796e-05, "loss": 1.3491, "step": 25200 }, { "epoch": 67.83, "learning_rate": 3.4033994334277626e-05, "loss": 1.3392, "step": 25300 }, { "epoch": 68.1, "learning_rate": 3.375070821529745e-05, "loss": 1.3551, "step": 25400 }, { "epoch": 68.36, "learning_rate": 3.346742209631728e-05, "loss": 1.2835, "step": 25500 }, { "epoch": 68.36, "eval_cer": 0.4221030140473701, "eval_loss": 0.8283268213272095, "eval_runtime": 118.4861, "eval_samples_per_second": 19.428, "eval_steps_per_second": 2.431, "eval_wer": 1.5352966652230402, "step": 25500 }, { "epoch": 68.63, "learning_rate": 3.318413597733711e-05, "loss": 1.2847, "step": 25600 }, { "epoch": 68.9, "learning_rate": 3.290084985835694e-05, "loss": 1.3164, "step": 25700 }, { "epoch": 69.17, "learning_rate": 3.261756373937677e-05, "loss": 1.2624, "step": 25800 }, { "epoch": 69.44, "learning_rate": 3.23342776203966e-05, "loss": 1.3301, "step": 25900 }, { "epoch": 69.7, "learning_rate": 3.205099150141643e-05, "loss": 1.2961, "step": 26000 }, { "epoch": 69.7, "eval_cer": 0.43692321680229124, "eval_loss": 0.8339292407035828, "eval_runtime": 119.6696, "eval_samples_per_second": 19.236, "eval_steps_per_second": 2.407, "eval_wer": 1.574274577739281, "step": 26000 }, { "epoch": 69.97, "learning_rate": 3.176770538243626e-05, "loss": 1.2716, "step": 26100 }, { "epoch": 70.24, "learning_rate": 3.148441926345609e-05, "loss": 1.2832, "step": 26200 }, { "epoch": 70.51, "learning_rate": 3.120113314447592e-05, "loss": 1.2607, "step": 26300 }, { "epoch": 70.78, "learning_rate": 3.091784702549575e-05, "loss": 1.2774, "step": 26400 }, { "epoch": 71.05, "learning_rate": 3.0637393767705384e-05, "loss": 1.2656, "step": 26500 }, { "epoch": 71.05, "eval_cer": 0.42169386734554715, "eval_loss": 0.8330555558204651, "eval_runtime": 120.4651, "eval_samples_per_second": 19.109, "eval_steps_per_second": 2.391, "eval_wer": 1.5331312256388048, "step": 26500 }, { "epoch": 71.31, "learning_rate": 3.0354107648725215e-05, "loss": 1.2885, "step": 26600 }, { "epoch": 71.58, "learning_rate": 3.007082152974504e-05, "loss": 1.2552, "step": 26700 }, { "epoch": 71.85, "learning_rate": 2.9787535410764872e-05, "loss": 1.2682, "step": 26800 }, { "epoch": 72.12, "learning_rate": 2.9504249291784702e-05, "loss": 1.2665, "step": 26900 }, { "epoch": 72.39, "learning_rate": 2.9220963172804532e-05, "loss": 1.2556, "step": 27000 }, { "epoch": 72.39, "eval_cer": 0.4109196708642088, "eval_loss": 0.8242233991622925, "eval_runtime": 118.9857, "eval_samples_per_second": 19.347, "eval_steps_per_second": 2.42, "eval_wer": 1.4707665656128195, "step": 27000 }, { "epoch": 72.65, "learning_rate": 2.8937677053824363e-05, "loss": 1.2125, "step": 27100 }, { "epoch": 72.92, "learning_rate": 2.8654390934844193e-05, "loss": 1.2157, "step": 27200 }, { "epoch": 73.19, "learning_rate": 2.8371104815864023e-05, "loss": 1.2664, "step": 27300 }, { "epoch": 73.46, "learning_rate": 2.8087818696883857e-05, "loss": 1.2075, "step": 27400 }, { "epoch": 73.73, "learning_rate": 2.7804532577903687e-05, "loss": 1.2043, "step": 27500 }, { "epoch": 73.73, "eval_cer": 0.40305496204027824, "eval_loss": 0.8244912624359131, "eval_runtime": 118.8221, "eval_samples_per_second": 19.373, "eval_steps_per_second": 2.424, "eval_wer": 1.4469467301862278, "step": 27500 }, { "epoch": 73.99, "learning_rate": 2.7521246458923517e-05, "loss": 1.2218, "step": 27600 }, { "epoch": 74.26, "learning_rate": 2.723796033994334e-05, "loss": 1.2257, "step": 27700 }, { "epoch": 74.53, "learning_rate": 2.695467422096317e-05, "loss": 1.1943, "step": 27800 }, { "epoch": 74.8, "learning_rate": 2.6671388101983e-05, "loss": 1.2292, "step": 27900 }, { "epoch": 75.07, "learning_rate": 2.638810198300283e-05, "loss": 1.2722, "step": 28000 }, { "epoch": 75.07, "eval_cer": 0.4095558485247988, "eval_loss": 0.8202398419380188, "eval_runtime": 118.3165, "eval_samples_per_second": 19.456, "eval_steps_per_second": 2.434, "eval_wer": 1.4924209614551753, "step": 28000 }, { "epoch": 75.33, "learning_rate": 2.6104815864022665e-05, "loss": 1.2, "step": 28100 }, { "epoch": 75.6, "learning_rate": 2.5821529745042495e-05, "loss": 1.1984, "step": 28200 }, { "epoch": 75.87, "learning_rate": 2.5538243626062326e-05, "loss": 1.204, "step": 28300 }, { "epoch": 76.14, "learning_rate": 2.5254957507082156e-05, "loss": 1.103, "step": 28400 }, { "epoch": 76.41, "learning_rate": 2.4971671388101983e-05, "loss": 1.202, "step": 28500 }, { "epoch": 76.41, "eval_cer": 0.37186889121243805, "eval_loss": 0.8290452361106873, "eval_runtime": 117.9552, "eval_samples_per_second": 19.516, "eval_steps_per_second": 2.442, "eval_wer": 1.3806842789086184, "step": 28500 }, { "epoch": 76.67, "learning_rate": 2.4691218130311615e-05, "loss": 1.1915, "step": 28600 }, { "epoch": 76.94, "learning_rate": 2.4407932011331446e-05, "loss": 1.1898, "step": 28700 }, { "epoch": 77.21, "learning_rate": 2.4124645892351276e-05, "loss": 1.2433, "step": 28800 }, { "epoch": 77.48, "learning_rate": 2.3841359773371106e-05, "loss": 1.1887, "step": 28900 }, { "epoch": 77.75, "learning_rate": 2.3558073654390936e-05, "loss": 1.1679, "step": 29000 }, { "epoch": 77.75, "eval_cer": 0.3748693003591399, "eval_loss": 0.8194963335990906, "eval_runtime": 117.7002, "eval_samples_per_second": 19.558, "eval_steps_per_second": 2.447, "eval_wer": 1.4097011693373755, "step": 29000 }, { "epoch": 78.02, "learning_rate": 2.3274787535410767e-05, "loss": 1.1151, "step": 29100 }, { "epoch": 78.28, "learning_rate": 2.2991501416430597e-05, "loss": 1.1638, "step": 29200 }, { "epoch": 78.55, "learning_rate": 2.2708215297450424e-05, "loss": 1.1516, "step": 29300 }, { "epoch": 78.82, "learning_rate": 2.2424929178470254e-05, "loss": 1.1652, "step": 29400 }, { "epoch": 79.09, "learning_rate": 2.2141643059490084e-05, "loss": 1.1967, "step": 29500 }, { "epoch": 79.09, "eval_cer": 0.30767831977087784, "eval_loss": 0.8058642148971558, "eval_runtime": 118.6259, "eval_samples_per_second": 19.406, "eval_steps_per_second": 2.428, "eval_wer": 1.2074491121697704, "step": 29500 }, { "epoch": 79.36, "learning_rate": 2.1858356940509918e-05, "loss": 1.1425, "step": 29600 }, { "epoch": 79.62, "learning_rate": 2.1575070821529748e-05, "loss": 1.1489, "step": 29700 }, { "epoch": 79.89, "learning_rate": 2.1291784702549575e-05, "loss": 1.136, "step": 29800 }, { "epoch": 80.16, "learning_rate": 2.1008498583569405e-05, "loss": 1.0913, "step": 29900 }, { "epoch": 80.43, "learning_rate": 2.0725212464589236e-05, "loss": 1.1241, "step": 30000 }, { "epoch": 80.43, "eval_cer": 0.3270445969904987, "eval_loss": 0.8137025833129883, "eval_runtime": 117.4298, "eval_samples_per_second": 19.603, "eval_steps_per_second": 2.453, "eval_wer": 1.2451277609354698, "step": 30000 }, { "epoch": 80.7, "learning_rate": 2.0441926345609066e-05, "loss": 1.1314, "step": 30100 }, { "epoch": 80.96, "learning_rate": 2.0158640226628896e-05, "loss": 1.1315, "step": 30200 }, { "epoch": 81.23, "learning_rate": 1.9875354107648726e-05, "loss": 1.1535, "step": 30300 }, { "epoch": 81.5, "learning_rate": 1.9592067988668557e-05, "loss": 1.1567, "step": 30400 }, { "epoch": 81.77, "learning_rate": 1.931161473087819e-05, "loss": 1.1414, "step": 30500 }, { "epoch": 81.77, "eval_cer": 0.3120880120016366, "eval_loss": 0.8117419481277466, "eval_runtime": 116.2752, "eval_samples_per_second": 19.798, "eval_steps_per_second": 2.477, "eval_wer": 1.2031182330012993, "step": 30500 }, { "epoch": 82.04, "learning_rate": 1.902832861189802e-05, "loss": 1.0878, "step": 30600 }, { "epoch": 82.31, "learning_rate": 1.8745042492917846e-05, "loss": 1.0806, "step": 30700 }, { "epoch": 82.57, "learning_rate": 1.8461756373937677e-05, "loss": 1.0914, "step": 30800 }, { "epoch": 82.84, "learning_rate": 1.8178470254957507e-05, "loss": 1.1274, "step": 30900 }, { "epoch": 83.11, "learning_rate": 1.7895184135977337e-05, "loss": 1.132, "step": 31000 }, { "epoch": 83.11, "eval_cer": 0.390053189071237, "eval_loss": 0.823433518409729, "eval_runtime": 118.2321, "eval_samples_per_second": 19.47, "eval_steps_per_second": 2.436, "eval_wer": 1.4265915980944133, "step": 31000 }, { "epoch": 83.38, "learning_rate": 1.761189801699717e-05, "loss": 1.0891, "step": 31100 }, { "epoch": 83.65, "learning_rate": 1.7328611898016998e-05, "loss": 1.1041, "step": 31200 }, { "epoch": 83.91, "learning_rate": 1.7045325779036828e-05, "loss": 1.0895, "step": 31300 }, { "epoch": 84.18, "learning_rate": 1.6762039660056658e-05, "loss": 1.0966, "step": 31400 }, { "epoch": 84.45, "learning_rate": 1.647875354107649e-05, "loss": 1.0982, "step": 31500 }, { "epoch": 84.45, "eval_cer": 0.3606855480292767, "eval_loss": 0.8063952326774597, "eval_runtime": 119.0446, "eval_samples_per_second": 19.337, "eval_steps_per_second": 2.419, "eval_wer": 1.3711563447379818, "step": 31500 }, { "epoch": 84.72, "learning_rate": 1.619546742209632e-05, "loss": 1.0969, "step": 31600 }, { "epoch": 84.99, "learning_rate": 1.5912181303116146e-05, "loss": 1.0552, "step": 31700 }, { "epoch": 85.25, "learning_rate": 1.562889518413598e-05, "loss": 1.079, "step": 31800 }, { "epoch": 85.52, "learning_rate": 1.534560906515581e-05, "loss": 1.0959, "step": 31900 }, { "epoch": 85.79, "learning_rate": 1.5062322946175638e-05, "loss": 1.0797, "step": 32000 }, { "epoch": 85.79, "eval_cer": 0.356184934309224, "eval_loss": 0.8166823983192444, "eval_runtime": 117.1158, "eval_samples_per_second": 19.656, "eval_steps_per_second": 2.459, "eval_wer": 1.335643135556518, "step": 32000 }, { "epoch": 86.06, "learning_rate": 1.477903682719547e-05, "loss": 1.1358, "step": 32100 }, { "epoch": 86.33, "learning_rate": 1.4495750708215297e-05, "loss": 1.0718, "step": 32200 }, { "epoch": 86.59, "learning_rate": 1.4212464589235127e-05, "loss": 1.0725, "step": 32300 }, { "epoch": 86.86, "learning_rate": 1.3929178470254959e-05, "loss": 1.0705, "step": 32400 }, { "epoch": 87.13, "learning_rate": 1.364589235127479e-05, "loss": 1.0119, "step": 32500 }, { "epoch": 87.13, "eval_cer": 0.3267718325226167, "eval_loss": 0.8214733600616455, "eval_runtime": 118.8447, "eval_samples_per_second": 19.37, "eval_steps_per_second": 2.423, "eval_wer": 1.2754439151147683, "step": 32500 }, { "epoch": 87.4, "learning_rate": 1.336260623229462e-05, "loss": 1.0571, "step": 32600 }, { "epoch": 87.67, "learning_rate": 1.3079320113314446e-05, "loss": 1.0636, "step": 32700 }, { "epoch": 87.93, "learning_rate": 1.2796033994334278e-05, "loss": 1.0892, "step": 32800 }, { "epoch": 88.2, "learning_rate": 1.2512747875354109e-05, "loss": 1.1356, "step": 32900 }, { "epoch": 88.47, "learning_rate": 1.2229461756373939e-05, "loss": 1.0216, "step": 33000 }, { "epoch": 88.47, "eval_cer": 0.3183615947629222, "eval_loss": 0.816307008266449, "eval_runtime": 118.8553, "eval_samples_per_second": 19.368, "eval_steps_per_second": 2.423, "eval_wer": 1.2511909917713295, "step": 33000 }, { "epoch": 88.74, "learning_rate": 1.1946175637393768e-05, "loss": 1.0226, "step": 33100 }, { "epoch": 89.01, "learning_rate": 1.1662889518413598e-05, "loss": 1.0002, "step": 33200 }, { "epoch": 89.28, "learning_rate": 1.1379603399433428e-05, "loss": 1.0296, "step": 33300 }, { "epoch": 89.54, "learning_rate": 1.1096317280453258e-05, "loss": 1.0635, "step": 33400 }, { "epoch": 89.81, "learning_rate": 1.0813031161473089e-05, "loss": 1.0375, "step": 33500 }, { "epoch": 89.81, "eval_cer": 0.3290448697549666, "eval_loss": 0.8136931657791138, "eval_runtime": 119.3202, "eval_samples_per_second": 19.293, "eval_steps_per_second": 2.414, "eval_wer": 1.2685145084452143, "step": 33500 }, { "epoch": 90.08, "learning_rate": 1.0529745042492919e-05, "loss": 1.096, "step": 33600 }, { "epoch": 90.35, "learning_rate": 1.0246458923512749e-05, "loss": 1.0465, "step": 33700 }, { "epoch": 90.62, "learning_rate": 9.963172804532578e-06, "loss": 1.0637, "step": 33800 }, { "epoch": 90.88, "learning_rate": 9.679886685552408e-06, "loss": 1.0514, "step": 33900 }, { "epoch": 91.15, "learning_rate": 9.396600566572238e-06, "loss": 0.9794, "step": 34000 }, { "epoch": 91.15, "eval_cer": 0.32549893167250077, "eval_loss": 0.8219542503356934, "eval_runtime": 117.5416, "eval_samples_per_second": 19.585, "eval_steps_per_second": 2.45, "eval_wer": 1.2724122996968386, "step": 34000 }, { "epoch": 91.42, "learning_rate": 9.113314447592068e-06, "loss": 1.0268, "step": 34100 }, { "epoch": 91.69, "learning_rate": 8.830028328611899e-06, "loss": 1.0211, "step": 34200 }, { "epoch": 91.96, "learning_rate": 8.546742209631727e-06, "loss": 1.0557, "step": 34300 }, { "epoch": 92.22, "learning_rate": 8.26345609065156e-06, "loss": 1.0814, "step": 34400 }, { "epoch": 92.49, "learning_rate": 7.98016997167139e-06, "loss": 1.0207, "step": 34500 }, { "epoch": 92.49, "eval_cer": 0.33609128517525116, "eval_loss": 0.8165063261985779, "eval_runtime": 117.4538, "eval_samples_per_second": 19.599, "eval_steps_per_second": 2.452, "eval_wer": 1.2906019922044174, "step": 34500 }, { "epoch": 92.76, "learning_rate": 7.696883852691218e-06, "loss": 1.0183, "step": 34600 }, { "epoch": 93.03, "learning_rate": 7.413597733711048e-06, "loss": 0.9889, "step": 34700 }, { "epoch": 93.3, "learning_rate": 7.130311614730878e-06, "loss": 1.0047, "step": 34800 }, { "epoch": 93.56, "learning_rate": 6.847025495750709e-06, "loss": 1.0318, "step": 34900 }, { "epoch": 93.83, "learning_rate": 6.563739376770539e-06, "loss": 1.0169, "step": 35000 }, { "epoch": 93.83, "eval_cer": 0.3305450743283175, "eval_loss": 0.8153378367424011, "eval_runtime": 118.1425, "eval_samples_per_second": 19.485, "eval_steps_per_second": 2.438, "eval_wer": 1.281940233867475, "step": 35000 }, { "epoch": 94.1, "learning_rate": 6.2804532577903686e-06, "loss": 1.0584, "step": 35100 }, { "epoch": 94.37, "learning_rate": 5.997167138810199e-06, "loss": 1.0074, "step": 35200 }, { "epoch": 94.64, "learning_rate": 5.71671388101983e-06, "loss": 1.0278, "step": 35300 }, { "epoch": 94.9, "learning_rate": 5.433427762039661e-06, "loss": 1.0251, "step": 35400 }, { "epoch": 95.17, "learning_rate": 5.15014164305949e-06, "loss": 1.0127, "step": 35500 }, { "epoch": 95.17, "eval_cer": 0.3251807064599718, "eval_loss": 0.8187472224235535, "eval_runtime": 117.413, "eval_samples_per_second": 19.606, "eval_steps_per_second": 2.453, "eval_wer": 1.2832394976180164, "step": 35500 }, { "epoch": 95.44, "learning_rate": 4.86685552407932e-06, "loss": 1.0153, "step": 35600 }, { "epoch": 95.71, "learning_rate": 4.58356940509915e-06, "loss": 1.0098, "step": 35700 }, { "epoch": 95.98, "learning_rate": 4.300283286118981e-06, "loss": 1.0034, "step": 35800 }, { "epoch": 96.25, "learning_rate": 4.01699716713881e-06, "loss": 1.017, "step": 35900 }, { "epoch": 96.51, "learning_rate": 3.7337110481586406e-06, "loss": 0.9978, "step": 36000 }, { "epoch": 96.51, "eval_cer": 0.3209528572078011, "eval_loss": 0.811066746711731, "eval_runtime": 117.24, "eval_samples_per_second": 19.635, "eval_steps_per_second": 2.456, "eval_wer": 1.2611520138588133, "step": 36000 }, { "epoch": 96.78, "learning_rate": 3.4504249291784704e-06, "loss": 1.0111, "step": 36100 }, { "epoch": 97.05, "learning_rate": 3.1671388101983003e-06, "loss": 1.0185, "step": 36200 }, { "epoch": 97.32, "learning_rate": 2.8838526912181305e-06, "loss": 0.9678, "step": 36300 }, { "epoch": 97.59, "learning_rate": 2.6005665722379608e-06, "loss": 1.0271, "step": 36400 }, { "epoch": 97.85, "learning_rate": 2.3172804532577906e-06, "loss": 0.9923, "step": 36500 }, { "epoch": 97.85, "eval_cer": 0.3122243942355776, "eval_loss": 0.8076378703117371, "eval_runtime": 116.5522, "eval_samples_per_second": 19.751, "eval_steps_per_second": 2.471, "eval_wer": 1.227804244261585, "step": 36500 }, { "epoch": 98.12, "learning_rate": 2.0339943342776205e-06, "loss": 0.9955, "step": 36600 }, { "epoch": 98.39, "learning_rate": 1.7507082152974505e-06, "loss": 1.0009, "step": 36700 }, { "epoch": 98.66, "learning_rate": 1.4674220963172806e-06, "loss": 0.9855, "step": 36800 }, { "epoch": 98.93, "learning_rate": 1.1869688385269122e-06, "loss": 1.0056, "step": 36900 }, { "epoch": 99.2, "learning_rate": 9.036827195467423e-07, "loss": 1.0451, "step": 37000 }, { "epoch": 99.2, "eval_cer": 0.3155884893394554, "eval_loss": 0.8086187243461609, "eval_runtime": 119.7199, "eval_samples_per_second": 19.228, "eval_steps_per_second": 2.406, "eval_wer": 1.2451277609354698, "step": 37000 }, { "epoch": 99.46, "learning_rate": 6.203966005665723e-07, "loss": 1.0189, "step": 37100 }, { "epoch": 99.73, "learning_rate": 3.371104815864023e-07, "loss": 0.9878, "step": 37200 }, { "epoch": 100.0, "learning_rate": 5.3824362606232296e-08, "loss": 0.96, "step": 37300 }, { "epoch": 100.0, "step": 37300, "total_flos": 1.39626030021533e+20, "train_loss": 3.7852419735087786, "train_runtime": 72640.1075, "train_samples_per_second": 16.45, "train_steps_per_second": 0.513 } ], "max_steps": 37300, "num_train_epochs": 100, "total_flos": 1.39626030021533e+20, "trial_name": null, "trial_params": null }