diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,5437 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 99.99963086009598, + "global_step": 67700, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.15, + "learning_rate": 3.6375e-06, + "loss": 135.2123, + "step": 100 + }, + { + "epoch": 0.3, + "learning_rate": 7.3875e-06, + "loss": 102.0052, + "step": 200 + }, + { + "epoch": 0.44, + "learning_rate": 1.1099999999999999e-05, + "loss": 85.3955, + "step": 300 + }, + { + "epoch": 0.59, + "learning_rate": 1.485e-05, + "loss": 78.0905, + "step": 400 + }, + { + "epoch": 0.74, + "learning_rate": 1.8599999999999998e-05, + "loss": 69.215, + "step": 500 + }, + { + "epoch": 0.74, + "eval_cer": 1.0, + "eval_loss": 74.97510528564453, + "eval_runtime": 128.2289, + "eval_samples_per_second": 15.761, + "eval_steps_per_second": 1.973, + "eval_wer": 1.0, + "step": 500 + }, + { + "epoch": 0.89, + "learning_rate": 2.2349999999999998e-05, + "loss": 58.6672, + "step": 600 + }, + { + "epoch": 1.03, + "learning_rate": 2.6099999999999997e-05, + "loss": 44.0383, + "step": 700 + }, + { + "epoch": 1.18, + "learning_rate": 2.985e-05, + "loss": 29.3981, + "step": 800 + }, + { + "epoch": 1.33, + "learning_rate": 3.36e-05, + "loss": 15.4855, + "step": 900 + }, + { + "epoch": 1.48, + "learning_rate": 3.735e-05, + "loss": 8.2109, + "step": 1000 + }, + { + "epoch": 1.48, + "eval_cer": 1.0, + "eval_loss": 7.061740875244141, + "eval_runtime": 114.3157, + "eval_samples_per_second": 17.679, + "eval_steps_per_second": 2.213, + "eval_wer": 1.0, + "step": 1000 + }, + { + "epoch": 1.62, + "learning_rate": 4.11e-05, + "loss": 6.7787, + "step": 1100 + }, + { + "epoch": 1.77, + "learning_rate": 4.484999999999999e-05, + "loss": 6.5334, + "step": 1200 + }, + { + "epoch": 1.92, + "learning_rate": 4.8599999999999995e-05, + "loss": 6.4625, + "step": 1300 + }, + { + "epoch": 2.07, + "learning_rate": 5.234999999999999e-05, + "loss": 6.4425, + "step": 1400 + }, + { + "epoch": 2.22, + "learning_rate": 5.6099999999999995e-05, + "loss": 6.4277, + "step": 1500 + }, + { + "epoch": 2.22, + "eval_cer": 1.0, + "eval_loss": 6.3811211585998535, + "eval_runtime": 113.5954, + "eval_samples_per_second": 17.791, + "eval_steps_per_second": 2.227, + "eval_wer": 1.0, + "step": 1500 + }, + { + "epoch": 2.36, + "learning_rate": 5.985e-05, + "loss": 6.403, + "step": 1600 + }, + { + "epoch": 2.51, + "learning_rate": 6.359999999999999e-05, + "loss": 6.3874, + "step": 1700 + }, + { + "epoch": 2.66, + "learning_rate": 6.735e-05, + "loss": 6.3883, + "step": 1800 + }, + { + "epoch": 2.81, + "learning_rate": 7.11e-05, + "loss": 6.3725, + "step": 1900 + }, + { + "epoch": 2.95, + "learning_rate": 7.484999999999999e-05, + "loss": 6.3513, + "step": 2000 + }, + { + "epoch": 2.95, + "eval_cer": 1.0, + "eval_loss": 6.306080341339111, + "eval_runtime": 112.6769, + "eval_samples_per_second": 17.936, + "eval_steps_per_second": 2.245, + "eval_wer": 1.0, + "step": 2000 + }, + { + "epoch": 3.1, + "learning_rate": 7.489041095890411e-05, + "loss": 6.3448, + "step": 2100 + }, + { + "epoch": 3.25, + "learning_rate": 7.477625570776254e-05, + "loss": 6.3001, + "step": 2200 + }, + { + "epoch": 3.4, + "learning_rate": 7.466210045662099e-05, + "loss": 6.299, + "step": 2300 + }, + { + "epoch": 3.54, + "learning_rate": 7.454794520547944e-05, + "loss": 6.2702, + "step": 2400 + }, + { + "epoch": 3.69, + "learning_rate": 7.443378995433789e-05, + "loss": 6.2522, + "step": 2500 + }, + { + "epoch": 3.69, + "eval_cer": 1.0, + "eval_loss": 6.214696884155273, + "eval_runtime": 112.5168, + "eval_samples_per_second": 17.962, + "eval_steps_per_second": 2.249, + "eval_wer": 1.0, + "step": 2500 + }, + { + "epoch": 3.84, + "learning_rate": 7.431963470319634e-05, + "loss": 6.2413, + "step": 2600 + }, + { + "epoch": 3.99, + "learning_rate": 7.420547945205478e-05, + "loss": 6.2068, + "step": 2700 + }, + { + "epoch": 4.14, + "learning_rate": 7.409132420091323e-05, + "loss": 6.1637, + "step": 2800 + }, + { + "epoch": 4.28, + "learning_rate": 7.397716894977168e-05, + "loss": 6.0668, + "step": 2900 + }, + { + "epoch": 4.43, + "learning_rate": 7.386301369863013e-05, + "loss": 5.9757, + "step": 3000 + }, + { + "epoch": 4.43, + "eval_cer": 0.992371009837382, + "eval_loss": 5.7906413078308105, + "eval_runtime": 112.7465, + "eval_samples_per_second": 17.925, + "eval_steps_per_second": 2.244, + "eval_wer": 1.1004453240969816, + "step": 3000 + }, + { + "epoch": 4.58, + "learning_rate": 7.374885844748858e-05, + "loss": 5.8919, + "step": 3100 + }, + { + "epoch": 4.73, + "learning_rate": 7.363470319634702e-05, + "loss": 5.7631, + "step": 3200 + }, + { + "epoch": 4.87, + "learning_rate": 7.352054794520547e-05, + "loss": 5.4749, + "step": 3300 + }, + { + "epoch": 5.02, + "learning_rate": 7.340639269406392e-05, + "loss": 5.2423, + "step": 3400 + }, + { + "epoch": 5.17, + "learning_rate": 7.329223744292237e-05, + "loss": 5.0642, + "step": 3500 + }, + { + "epoch": 5.17, + "eval_cer": 0.8214414776149368, + "eval_loss": 4.2983784675598145, + "eval_runtime": 113.4295, + "eval_samples_per_second": 17.817, + "eval_steps_per_second": 2.23, + "eval_wer": 1.772884710539337, + "step": 3500 + }, + { + "epoch": 5.32, + "learning_rate": 7.317808219178082e-05, + "loss": 4.9084, + "step": 3600 + }, + { + "epoch": 5.47, + "learning_rate": 7.306392694063926e-05, + "loss": 4.8479, + "step": 3700 + }, + { + "epoch": 5.61, + "learning_rate": 7.294977168949771e-05, + "loss": 4.7123, + "step": 3800 + }, + { + "epoch": 5.76, + "learning_rate": 7.283561643835616e-05, + "loss": 4.6807, + "step": 3900 + }, + { + "epoch": 5.91, + "learning_rate": 7.272146118721461e-05, + "loss": 4.6346, + "step": 4000 + }, + { + "epoch": 5.91, + "eval_cer": 0.7727765508933949, + "eval_loss": 3.7128512859344482, + "eval_runtime": 112.7717, + "eval_samples_per_second": 17.921, + "eval_steps_per_second": 2.243, + "eval_wer": 1.8946066303809994, + "step": 4000 + }, + { + "epoch": 6.06, + "learning_rate": 7.260730593607306e-05, + "loss": 4.5142, + "step": 4100 + }, + { + "epoch": 6.2, + "learning_rate": 7.249315068493149e-05, + "loss": 4.4345, + "step": 4200 + }, + { + "epoch": 6.35, + "learning_rate": 7.237899543378995e-05, + "loss": 4.3973, + "step": 4300 + }, + { + "epoch": 6.5, + "learning_rate": 7.226484018264839e-05, + "loss": 4.3012, + "step": 4400 + }, + { + "epoch": 6.65, + "learning_rate": 7.215068493150685e-05, + "loss": 4.267, + "step": 4500 + }, + { + "epoch": 6.65, + "eval_cer": 0.6921501706484642, + "eval_loss": 3.217658042907715, + "eval_runtime": 113.1901, + "eval_samples_per_second": 17.855, + "eval_steps_per_second": 2.235, + "eval_wer": 1.7525977238990598, + "step": 4500 + }, + { + "epoch": 6.79, + "learning_rate": 7.203767123287671e-05, + "loss": 4.2237, + "step": 4600 + }, + { + "epoch": 6.94, + "learning_rate": 7.192351598173514e-05, + "loss": 4.1326, + "step": 4700 + }, + { + "epoch": 7.09, + "learning_rate": 7.18093607305936e-05, + "loss": 4.0728, + "step": 4800 + }, + { + "epoch": 7.24, + "learning_rate": 7.169520547945204e-05, + "loss": 4.0127, + "step": 4900 + }, + { + "epoch": 7.39, + "learning_rate": 7.15810502283105e-05, + "loss": 3.9964, + "step": 5000 + }, + { + "epoch": 7.39, + "eval_cer": 0.6546476611122265, + "eval_loss": 2.8337087631225586, + "eval_runtime": 112.9864, + "eval_samples_per_second": 17.887, + "eval_steps_per_second": 2.239, + "eval_wer": 1.805541810984661, + "step": 5000 + }, + { + "epoch": 7.53, + "learning_rate": 7.146689497716894e-05, + "loss": 3.9397, + "step": 5100 + }, + { + "epoch": 7.68, + "learning_rate": 7.13527397260274e-05, + "loss": 3.8843, + "step": 5200 + }, + { + "epoch": 7.83, + "learning_rate": 7.123972602739726e-05, + "loss": 3.8829, + "step": 5300 + }, + { + "epoch": 7.98, + "learning_rate": 7.112557077625571e-05, + "loss": 3.8111, + "step": 5400 + }, + { + "epoch": 8.12, + "learning_rate": 7.101141552511414e-05, + "loss": 3.8035, + "step": 5500 + }, + { + "epoch": 8.12, + "eval_cer": 0.6992170246938366, + "eval_loss": 2.572591543197632, + "eval_runtime": 113.1543, + "eval_samples_per_second": 17.861, + "eval_steps_per_second": 2.236, + "eval_wer": 2.1850569025235034, + "step": 5500 + }, + { + "epoch": 8.27, + "learning_rate": 7.08972602739726e-05, + "loss": 3.7678, + "step": 5600 + }, + { + "epoch": 8.42, + "learning_rate": 7.078310502283104e-05, + "loss": 3.7781, + "step": 5700 + }, + { + "epoch": 8.57, + "learning_rate": 7.06689497716895e-05, + "loss": 3.7481, + "step": 5800 + }, + { + "epoch": 8.71, + "learning_rate": 7.055479452054793e-05, + "loss": 3.6356, + "step": 5900 + }, + { + "epoch": 8.86, + "learning_rate": 7.04406392694064e-05, + "loss": 3.6273, + "step": 6000 + }, + { + "epoch": 8.86, + "eval_cer": 0.651074081509737, + "eval_loss": 2.3390893936157227, + "eval_runtime": 111.201, + "eval_samples_per_second": 18.174, + "eval_steps_per_second": 2.275, + "eval_wer": 2.102919346857991, + "step": 6000 + }, + { + "epoch": 9.01, + "learning_rate": 7.032648401826483e-05, + "loss": 3.5851, + "step": 6100 + }, + { + "epoch": 9.16, + "learning_rate": 7.021232876712329e-05, + "loss": 3.5359, + "step": 6200 + }, + { + "epoch": 9.31, + "learning_rate": 7.009817351598173e-05, + "loss": 3.5385, + "step": 6300 + }, + { + "epoch": 9.45, + "learning_rate": 6.998401826484017e-05, + "loss": 3.5492, + "step": 6400 + }, + { + "epoch": 9.6, + "learning_rate": 6.986986301369862e-05, + "loss": 3.5248, + "step": 6500 + }, + { + "epoch": 9.6, + "eval_cer": 0.6858863681991568, + "eval_loss": 2.1943578720092773, + "eval_runtime": 112.8084, + "eval_samples_per_second": 17.915, + "eval_steps_per_second": 2.243, + "eval_wer": 2.3617021276595747, + "step": 6500 + }, + { + "epoch": 9.75, + "learning_rate": 6.975570776255707e-05, + "loss": 3.4791, + "step": 6600 + }, + { + "epoch": 9.9, + "learning_rate": 6.964269406392693e-05, + "loss": 3.4611, + "step": 6700 + }, + { + "epoch": 10.04, + "learning_rate": 6.952853881278538e-05, + "loss": 3.4418, + "step": 6800 + }, + { + "epoch": 10.19, + "learning_rate": 6.941438356164383e-05, + "loss": 3.4109, + "step": 6900 + }, + { + "epoch": 10.34, + "learning_rate": 6.930022831050228e-05, + "loss": 3.3683, + "step": 7000 + }, + { + "epoch": 10.34, + "eval_cer": 0.6063039550291106, + "eval_loss": 1.982745885848999, + "eval_runtime": 112.9729, + "eval_samples_per_second": 17.889, + "eval_steps_per_second": 2.239, + "eval_wer": 2.1014349332013853, + "step": 7000 + }, + { + "epoch": 10.49, + "learning_rate": 6.918721461187214e-05, + "loss": 3.3105, + "step": 7100 + }, + { + "epoch": 10.63, + "learning_rate": 6.907305936073059e-05, + "loss": 3.3242, + "step": 7200 + }, + { + "epoch": 10.78, + "learning_rate": 6.895890410958903e-05, + "loss": 3.3008, + "step": 7300 + }, + { + "epoch": 10.93, + "learning_rate": 6.884474885844748e-05, + "loss": 3.236, + "step": 7400 + }, + { + "epoch": 11.08, + "learning_rate": 6.873059360730593e-05, + "loss": 3.2411, + "step": 7500 + }, + { + "epoch": 11.08, + "eval_cer": 0.5135113431037944, + "eval_loss": 1.8609611988067627, + "eval_runtime": 112.6, + "eval_samples_per_second": 17.948, + "eval_steps_per_second": 2.247, + "eval_wer": 1.6160316674913409, + "step": 7500 + }, + { + "epoch": 11.23, + "learning_rate": 6.861643835616438e-05, + "loss": 3.2234, + "step": 7600 + }, + { + "epoch": 11.37, + "learning_rate": 6.850228310502283e-05, + "loss": 3.2441, + "step": 7700 + }, + { + "epoch": 11.52, + "learning_rate": 6.838812785388127e-05, + "loss": 3.2143, + "step": 7800 + }, + { + "epoch": 11.67, + "learning_rate": 6.827511415525114e-05, + "loss": 3.1661, + "step": 7900 + }, + { + "epoch": 11.82, + "learning_rate": 6.816095890410958e-05, + "loss": 3.1299, + "step": 8000 + }, + { + "epoch": 11.82, + "eval_cer": 0.4946396305962658, + "eval_loss": 1.744581937789917, + "eval_runtime": 113.3696, + "eval_samples_per_second": 17.827, + "eval_steps_per_second": 2.232, + "eval_wer": 1.59475507174666, + "step": 8000 + }, + { + "epoch": 11.96, + "learning_rate": 6.804680365296803e-05, + "loss": 3.1423, + "step": 8100 + }, + { + "epoch": 12.11, + "learning_rate": 6.793264840182648e-05, + "loss": 3.0895, + "step": 8200 + }, + { + "epoch": 12.26, + "learning_rate": 6.781849315068493e-05, + "loss": 3.0671, + "step": 8300 + }, + { + "epoch": 12.41, + "learning_rate": 6.770433789954338e-05, + "loss": 3.0724, + "step": 8400 + }, + { + "epoch": 12.56, + "learning_rate": 6.759018264840182e-05, + "loss": 3.0574, + "step": 8500 + }, + { + "epoch": 12.56, + "eval_cer": 0.4051395302148163, + "eval_loss": 1.6454455852508545, + "eval_runtime": 113.3175, + "eval_samples_per_second": 17.835, + "eval_steps_per_second": 2.233, + "eval_wer": 1.1291439881246907, + "step": 8500 + }, + { + "epoch": 12.7, + "learning_rate": 6.747602739726027e-05, + "loss": 3.047, + "step": 8600 + }, + { + "epoch": 12.85, + "learning_rate": 6.736187214611872e-05, + "loss": 3.0417, + "step": 8700 + }, + { + "epoch": 13.0, + "learning_rate": 6.724771689497717e-05, + "loss": 3.0791, + "step": 8800 + }, + { + "epoch": 13.15, + "learning_rate": 6.713356164383562e-05, + "loss": 3.0183, + "step": 8900 + }, + { + "epoch": 13.29, + "learning_rate": 6.701940639269405e-05, + "loss": 2.985, + "step": 9000 + }, + { + "epoch": 13.29, + "eval_cer": 0.38931941377233487, + "eval_loss": 1.5918797254562378, + "eval_runtime": 113.1024, + "eval_samples_per_second": 17.869, + "eval_steps_per_second": 2.237, + "eval_wer": 1.0672934190994556, + "step": 9000 + }, + { + "epoch": 13.44, + "learning_rate": 6.690525114155251e-05, + "loss": 3.001, + "step": 9100 + }, + { + "epoch": 13.59, + "learning_rate": 6.679109589041095e-05, + "loss": 2.9494, + "step": 9200 + }, + { + "epoch": 13.74, + "learning_rate": 6.667694063926941e-05, + "loss": 2.966, + "step": 9300 + }, + { + "epoch": 13.88, + "learning_rate": 6.656278538812784e-05, + "loss": 2.931, + "step": 9400 + }, + { + "epoch": 14.03, + "learning_rate": 6.64486301369863e-05, + "loss": 2.9573, + "step": 9500 + }, + { + "epoch": 14.03, + "eval_cer": 0.3765910459747039, + "eval_loss": 1.4903326034545898, + "eval_runtime": 113.3108, + "eval_samples_per_second": 17.836, + "eval_steps_per_second": 2.233, + "eval_wer": 1.0603661553686294, + "step": 9500 + }, + { + "epoch": 14.18, + "learning_rate": 6.633447488584474e-05, + "loss": 2.9104, + "step": 9600 + }, + { + "epoch": 14.33, + "learning_rate": 6.622031963470319e-05, + "loss": 2.9196, + "step": 9700 + }, + { + "epoch": 14.48, + "learning_rate": 6.610616438356163e-05, + "loss": 2.9326, + "step": 9800 + }, + { + "epoch": 14.62, + "learning_rate": 6.599200913242008e-05, + "loss": 2.9378, + "step": 9900 + }, + { + "epoch": 14.77, + "learning_rate": 6.587785388127853e-05, + "loss": 2.8897, + "step": 10000 + }, + { + "epoch": 14.77, + "eval_cer": 0.3652680184701867, + "eval_loss": 1.4614206552505493, + "eval_runtime": 113.6791, + "eval_samples_per_second": 17.778, + "eval_steps_per_second": 2.226, + "eval_wer": 1.0059376546264225, + "step": 10000 + }, + { + "epoch": 14.92, + "learning_rate": 6.576369863013698e-05, + "loss": 2.8819, + "step": 10100 + }, + { + "epoch": 15.07, + "learning_rate": 6.564954337899543e-05, + "loss": 2.8727, + "step": 10200 + }, + { + "epoch": 15.21, + "learning_rate": 6.553538812785387e-05, + "loss": 2.8518, + "step": 10300 + }, + { + "epoch": 15.36, + "learning_rate": 6.542123287671232e-05, + "loss": 2.8674, + "step": 10400 + }, + { + "epoch": 15.51, + "learning_rate": 6.530707762557077e-05, + "loss": 2.8169, + "step": 10500 + }, + { + "epoch": 15.51, + "eval_cer": 0.3549889580405541, + "eval_loss": 1.3997114896774292, + "eval_runtime": 113.6295, + "eval_samples_per_second": 17.786, + "eval_steps_per_second": 2.227, + "eval_wer": 1.0029688273132114, + "step": 10500 + }, + { + "epoch": 15.66, + "learning_rate": 6.519292237442922e-05, + "loss": 2.8574, + "step": 10600 + }, + { + "epoch": 15.8, + "learning_rate": 6.507876712328767e-05, + "loss": 2.813, + "step": 10700 + }, + { + "epoch": 15.95, + "learning_rate": 6.496461187214611e-05, + "loss": 2.8157, + "step": 10800 + }, + { + "epoch": 16.1, + "learning_rate": 6.485045662100456e-05, + "loss": 2.7573, + "step": 10900 + }, + { + "epoch": 16.25, + "learning_rate": 6.473630136986301e-05, + "loss": 2.8155, + "step": 11000 + }, + { + "epoch": 16.25, + "eval_cer": 0.344147761493676, + "eval_loss": 1.344403862953186, + "eval_runtime": 114.1268, + "eval_samples_per_second": 17.708, + "eval_steps_per_second": 2.217, + "eval_wer": 0.9980207817911925, + "step": 11000 + }, + { + "epoch": 16.4, + "learning_rate": 6.462214611872146e-05, + "loss": 2.7492, + "step": 11100 + }, + { + "epoch": 16.54, + "learning_rate": 6.45079908675799e-05, + "loss": 2.7507, + "step": 11200 + }, + { + "epoch": 16.69, + "learning_rate": 6.439383561643835e-05, + "loss": 2.7498, + "step": 11300 + }, + { + "epoch": 16.84, + "learning_rate": 6.42796803652968e-05, + "loss": 2.7684, + "step": 11400 + }, + { + "epoch": 16.99, + "learning_rate": 6.416552511415525e-05, + "loss": 2.7595, + "step": 11500 + }, + { + "epoch": 16.99, + "eval_cer": 0.332543665930536, + "eval_loss": 1.291101098060608, + "eval_runtime": 113.7438, + "eval_samples_per_second": 17.768, + "eval_steps_per_second": 2.224, + "eval_wer": 0.9703117268678871, + "step": 11500 + }, + { + "epoch": 17.13, + "learning_rate": 6.405136986301368e-05, + "loss": 2.6838, + "step": 11600 + }, + { + "epoch": 17.28, + "learning_rate": 6.393721461187215e-05, + "loss": 2.7349, + "step": 11700 + }, + { + "epoch": 17.43, + "learning_rate": 6.382305936073058e-05, + "loss": 2.6699, + "step": 11800 + }, + { + "epoch": 17.58, + "learning_rate": 6.370890410958904e-05, + "loss": 2.6713, + "step": 11900 + }, + { + "epoch": 17.72, + "learning_rate": 6.359474885844748e-05, + "loss": 2.7107, + "step": 12000 + }, + { + "epoch": 17.72, + "eval_cer": 0.32274643645854245, + "eval_loss": 1.246199369430542, + "eval_runtime": 113.0259, + "eval_samples_per_second": 17.881, + "eval_steps_per_second": 2.238, + "eval_wer": 0.9564571994062345, + "step": 12000 + }, + { + "epoch": 17.87, + "learning_rate": 6.348059360730594e-05, + "loss": 2.6684, + "step": 12100 + }, + { + "epoch": 18.02, + "learning_rate": 6.336643835616437e-05, + "loss": 2.6952, + "step": 12200 + }, + { + "epoch": 18.17, + "learning_rate": 6.325228310502283e-05, + "loss": 2.674, + "step": 12300 + }, + { + "epoch": 18.32, + "learning_rate": 6.313812785388127e-05, + "loss": 2.6453, + "step": 12400 + }, + { + "epoch": 18.46, + "learning_rate": 6.302397260273972e-05, + "loss": 2.6358, + "step": 12500 + }, + { + "epoch": 18.46, + "eval_cer": 0.3333467175266011, + "eval_loss": 1.2466477155685425, + "eval_runtime": 113.2849, + "eval_samples_per_second": 17.84, + "eval_steps_per_second": 2.233, + "eval_wer": 0.9955467590301831, + "step": 12500 + }, + { + "epoch": 18.61, + "learning_rate": 6.290981735159816e-05, + "loss": 2.6803, + "step": 12600 + }, + { + "epoch": 18.76, + "learning_rate": 6.279566210045661e-05, + "loss": 2.65, + "step": 12700 + }, + { + "epoch": 18.91, + "learning_rate": 6.268150684931506e-05, + "loss": 2.6329, + "step": 12800 + }, + { + "epoch": 19.05, + "learning_rate": 6.256735159817351e-05, + "loss": 2.6163, + "step": 12900 + }, + { + "epoch": 19.2, + "learning_rate": 6.245319634703196e-05, + "loss": 2.5801, + "step": 13000 + }, + { + "epoch": 19.2, + "eval_cer": 0.3226259787191327, + "eval_loss": 1.2059358358383179, + "eval_runtime": 112.9979, + "eval_samples_per_second": 17.885, + "eval_steps_per_second": 2.239, + "eval_wer": 1.0009896091044037, + "step": 13000 + }, + { + "epoch": 19.35, + "learning_rate": 6.23390410958904e-05, + "loss": 2.5665, + "step": 13100 + }, + { + "epoch": 19.5, + "learning_rate": 6.222602739726027e-05, + "loss": 2.6008, + "step": 13200 + }, + { + "epoch": 19.65, + "learning_rate": 6.211187214611871e-05, + "loss": 2.6289, + "step": 13300 + }, + { + "epoch": 19.79, + "learning_rate": 6.199771689497716e-05, + "loss": 2.5758, + "step": 13400 + }, + { + "epoch": 19.94, + "learning_rate": 6.188356164383561e-05, + "loss": 2.5554, + "step": 13500 + }, + { + "epoch": 19.94, + "eval_cer": 0.32234491066050996, + "eval_loss": 1.1918950080871582, + "eval_runtime": 113.3722, + "eval_samples_per_second": 17.826, + "eval_steps_per_second": 2.232, + "eval_wer": 1.0094012864918358, + "step": 13500 + }, + { + "epoch": 20.09, + "learning_rate": 6.176940639269406e-05, + "loss": 2.5404, + "step": 13600 + }, + { + "epoch": 20.24, + "learning_rate": 6.16552511415525e-05, + "loss": 2.4976, + "step": 13700 + }, + { + "epoch": 20.38, + "learning_rate": 6.154109589041095e-05, + "loss": 2.5373, + "step": 13800 + }, + { + "epoch": 20.53, + "learning_rate": 6.14269406392694e-05, + "loss": 2.5567, + "step": 13900 + }, + { + "epoch": 20.68, + "learning_rate": 6.131278538812785e-05, + "loss": 2.5314, + "step": 14000 + }, + { + "epoch": 20.68, + "eval_cer": 0.3155591246737603, + "eval_loss": 1.1703130006790161, + "eval_runtime": 113.4999, + "eval_samples_per_second": 17.806, + "eval_steps_per_second": 2.229, + "eval_wer": 0.9846610588817417, + "step": 14000 + }, + { + "epoch": 20.83, + "learning_rate": 6.11986301369863e-05, + "loss": 2.5179, + "step": 14100 + }, + { + "epoch": 20.97, + "learning_rate": 6.108447488584475e-05, + "loss": 2.5583, + "step": 14200 + }, + { + "epoch": 21.12, + "learning_rate": 6.097031963470319e-05, + "loss": 2.5269, + "step": 14300 + }, + { + "epoch": 21.27, + "learning_rate": 6.085616438356164e-05, + "loss": 2.5432, + "step": 14400 + }, + { + "epoch": 21.42, + "learning_rate": 6.074200913242008e-05, + "loss": 2.509, + "step": 14500 + }, + { + "epoch": 21.42, + "eval_cer": 0.3177273639831359, + "eval_loss": 1.173261284828186, + "eval_runtime": 113.3988, + "eval_samples_per_second": 17.822, + "eval_steps_per_second": 2.231, + "eval_wer": 0.9896091044037605, + "step": 14500 + }, + { + "epoch": 21.57, + "learning_rate": 6.062785388127854e-05, + "loss": 2.5077, + "step": 14600 + }, + { + "epoch": 21.71, + "learning_rate": 6.051369863013698e-05, + "loss": 2.4927, + "step": 14700 + }, + { + "epoch": 21.86, + "learning_rate": 6.039954337899543e-05, + "loss": 2.5003, + "step": 14800 + }, + { + "epoch": 22.01, + "learning_rate": 6.0285388127853875e-05, + "loss": 2.4673, + "step": 14900 + }, + { + "epoch": 22.16, + "learning_rate": 6.017123287671232e-05, + "loss": 2.4391, + "step": 15000 + }, + { + "epoch": 22.16, + "eval_cer": 0.3164023288496286, + "eval_loss": 1.181095838546753, + "eval_runtime": 113.7552, + "eval_samples_per_second": 17.766, + "eval_steps_per_second": 2.224, + "eval_wer": 0.9722909450766947, + "step": 15000 + }, + { + "epoch": 22.3, + "learning_rate": 6.005707762557077e-05, + "loss": 2.5065, + "step": 15100 + }, + { + "epoch": 22.45, + "learning_rate": 5.994292237442922e-05, + "loss": 2.4362, + "step": 15200 + }, + { + "epoch": 22.6, + "learning_rate": 5.982876712328766e-05, + "loss": 2.4788, + "step": 15300 + }, + { + "epoch": 22.75, + "learning_rate": 5.9714611872146115e-05, + "loss": 2.4702, + "step": 15400 + }, + { + "epoch": 22.89, + "learning_rate": 5.9600456621004556e-05, + "loss": 2.4631, + "step": 15500 + }, + { + "epoch": 22.89, + "eval_cer": 0.3059225055209797, + "eval_loss": 1.1381694078445435, + "eval_runtime": 114.6577, + "eval_samples_per_second": 17.626, + "eval_steps_per_second": 2.207, + "eval_wer": 0.9698169223156853, + "step": 15500 + }, + { + "epoch": 23.04, + "learning_rate": 5.948630136986301e-05, + "loss": 2.4725, + "step": 15600 + }, + { + "epoch": 23.19, + "learning_rate": 5.937328767123287e-05, + "loss": 2.4331, + "step": 15700 + }, + { + "epoch": 23.34, + "learning_rate": 5.9259132420091314e-05, + "loss": 2.4275, + "step": 15800 + }, + { + "epoch": 23.49, + "learning_rate": 5.914497716894977e-05, + "loss": 2.444, + "step": 15900 + }, + { + "epoch": 23.63, + "learning_rate": 5.903082191780821e-05, + "loss": 2.4414, + "step": 16000 + }, + { + "epoch": 23.63, + "eval_cer": 0.2972495482834772, + "eval_loss": 1.0892748832702637, + "eval_runtime": 114.5627, + "eval_samples_per_second": 17.641, + "eval_steps_per_second": 2.208, + "eval_wer": 0.9643740722414647, + "step": 16000 + }, + { + "epoch": 23.78, + "learning_rate": 5.8916666666666664e-05, + "loss": 2.3934, + "step": 16100 + }, + { + "epoch": 23.93, + "learning_rate": 5.8802511415525106e-05, + "loss": 2.4254, + "step": 16200 + }, + { + "epoch": 24.08, + "learning_rate": 5.8688356164383554e-05, + "loss": 2.4022, + "step": 16300 + }, + { + "epoch": 24.22, + "learning_rate": 5.8574200913242e-05, + "loss": 2.3613, + "step": 16400 + }, + { + "epoch": 24.37, + "learning_rate": 5.846004566210045e-05, + "loss": 2.3771, + "step": 16500 + }, + { + "epoch": 24.37, + "eval_cer": 0.2954025296125276, + "eval_loss": 1.0930148363113403, + "eval_runtime": 114.2056, + "eval_samples_per_second": 17.696, + "eval_steps_per_second": 2.215, + "eval_wer": 0.9505195447798119, + "step": 16500 + }, + { + "epoch": 24.52, + "learning_rate": 5.83458904109589e-05, + "loss": 2.3801, + "step": 16600 + }, + { + "epoch": 24.67, + "learning_rate": 5.8231735159817346e-05, + "loss": 2.3638, + "step": 16700 + }, + { + "epoch": 24.82, + "learning_rate": 5.8117579908675794e-05, + "loss": 2.3813, + "step": 16800 + }, + { + "epoch": 24.96, + "learning_rate": 5.800342465753424e-05, + "loss": 2.388, + "step": 16900 + }, + { + "epoch": 25.11, + "learning_rate": 5.788926940639268e-05, + "loss": 2.3658, + "step": 17000 + }, + { + "epoch": 25.11, + "eval_cer": 0.2925918490262999, + "eval_loss": 1.0755608081817627, + "eval_runtime": 112.4178, + "eval_samples_per_second": 17.978, + "eval_steps_per_second": 2.251, + "eval_wer": 0.9609104403760514, + "step": 17000 + }, + { + "epoch": 25.26, + "learning_rate": 5.777511415525114e-05, + "loss": 2.3624, + "step": 17100 + }, + { + "epoch": 25.41, + "learning_rate": 5.766095890410958e-05, + "loss": 2.376, + "step": 17200 + }, + { + "epoch": 25.55, + "learning_rate": 5.7546803652968034e-05, + "loss": 2.3337, + "step": 17300 + }, + { + "epoch": 25.7, + "learning_rate": 5.7432648401826475e-05, + "loss": 2.332, + "step": 17400 + }, + { + "epoch": 25.85, + "learning_rate": 5.731849315068493e-05, + "loss": 2.3215, + "step": 17500 + }, + { + "epoch": 25.85, + "eval_cer": 0.2889781168440072, + "eval_loss": 1.0512421131134033, + "eval_runtime": 113.4988, + "eval_samples_per_second": 17.806, + "eval_steps_per_second": 2.229, + "eval_wer": 0.9614052449282533, + "step": 17500 + }, + { + "epoch": 26.0, + "learning_rate": 5.720433789954337e-05, + "loss": 2.3507, + "step": 17600 + }, + { + "epoch": 26.14, + "learning_rate": 5.709246575342465e-05, + "loss": 2.3051, + "step": 17700 + }, + { + "epoch": 26.29, + "learning_rate": 5.69783105022831e-05, + "loss": 2.2839, + "step": 17800 + }, + { + "epoch": 26.44, + "learning_rate": 5.686415525114155e-05, + "loss": 2.3098, + "step": 17900 + }, + { + "epoch": 26.59, + "learning_rate": 5.675e-05, + "loss": 2.3327, + "step": 18000 + }, + { + "epoch": 26.59, + "eval_cer": 0.32816703473198156, + "eval_loss": 1.0626572370529175, + "eval_runtime": 113.1488, + "eval_samples_per_second": 17.861, + "eval_steps_per_second": 2.236, + "eval_wer": 1.198416625432954, + "step": 18000 + }, + { + "epoch": 26.74, + "learning_rate": 5.663584474885844e-05, + "loss": 2.2986, + "step": 18100 + }, + { + "epoch": 26.88, + "learning_rate": 5.652168949771689e-05, + "loss": 2.3408, + "step": 18200 + }, + { + "epoch": 27.03, + "learning_rate": 5.6407534246575334e-05, + "loss": 2.311, + "step": 18300 + }, + { + "epoch": 27.18, + "learning_rate": 5.629337899543379e-05, + "loss": 2.2481, + "step": 18400 + }, + { + "epoch": 27.33, + "learning_rate": 5.617922374429223e-05, + "loss": 2.3055, + "step": 18500 + }, + { + "epoch": 27.33, + "eval_cer": 0.2841196546878137, + "eval_loss": 1.0581693649291992, + "eval_runtime": 113.906, + "eval_samples_per_second": 17.743, + "eval_steps_per_second": 2.221, + "eval_wer": 0.9520039584364176, + "step": 18500 + }, + { + "epoch": 27.47, + "learning_rate": 5.6065068493150685e-05, + "loss": 2.3342, + "step": 18600 + }, + { + "epoch": 27.62, + "learning_rate": 5.5950913242009126e-05, + "loss": 2.3318, + "step": 18700 + }, + { + "epoch": 27.77, + "learning_rate": 5.583675799086758e-05, + "loss": 2.2844, + "step": 18800 + }, + { + "epoch": 27.92, + "learning_rate": 5.572260273972602e-05, + "loss": 2.3207, + "step": 18900 + }, + { + "epoch": 28.06, + "learning_rate": 5.560844748858447e-05, + "loss": 2.299, + "step": 19000 + }, + { + "epoch": 28.06, + "eval_cer": 0.2816703473198153, + "eval_loss": 1.0356189012527466, + "eval_runtime": 114.3143, + "eval_samples_per_second": 17.679, + "eval_steps_per_second": 2.213, + "eval_wer": 0.9480455220188025, + "step": 19000 + }, + { + "epoch": 28.21, + "learning_rate": 5.549429223744292e-05, + "loss": 2.269, + "step": 19100 + }, + { + "epoch": 28.36, + "learning_rate": 5.5380136986301366e-05, + "loss": 2.2875, + "step": 19200 + }, + { + "epoch": 28.51, + "learning_rate": 5.5265981735159814e-05, + "loss": 2.241, + "step": 19300 + }, + { + "epoch": 28.66, + "learning_rate": 5.515182648401826e-05, + "loss": 2.2714, + "step": 19400 + }, + { + "epoch": 28.8, + "learning_rate": 5.5037671232876703e-05, + "loss": 2.2673, + "step": 19500 + }, + { + "epoch": 28.8, + "eval_cer": 0.27709295322224453, + "eval_loss": 1.030503749847412, + "eval_runtime": 113.8786, + "eval_samples_per_second": 17.747, + "eval_steps_per_second": 2.222, + "eval_wer": 0.9366650173181593, + "step": 19500 + }, + { + "epoch": 28.95, + "learning_rate": 5.492351598173516e-05, + "loss": 2.2386, + "step": 19600 + }, + { + "epoch": 29.1, + "learning_rate": 5.48093607305936e-05, + "loss": 2.2634, + "step": 19700 + }, + { + "epoch": 29.25, + "learning_rate": 5.4695205479452054e-05, + "loss": 2.2135, + "step": 19800 + }, + { + "epoch": 29.39, + "learning_rate": 5.4581050228310495e-05, + "loss": 2.2101, + "step": 19900 + }, + { + "epoch": 29.54, + "learning_rate": 5.446689497716895e-05, + "loss": 2.2166, + "step": 20000 + }, + { + "epoch": 29.54, + "eval_cer": 0.27018670949608514, + "eval_loss": 1.0138665437698364, + "eval_runtime": 113.1869, + "eval_samples_per_second": 17.855, + "eval_steps_per_second": 2.235, + "eval_wer": 0.9223156853043047, + "step": 20000 + }, + { + "epoch": 29.69, + "learning_rate": 5.435273972602739e-05, + "loss": 2.2537, + "step": 20100 + }, + { + "epoch": 29.84, + "learning_rate": 5.4238584474885846e-05, + "loss": 2.2282, + "step": 20200 + }, + { + "epoch": 29.98, + "learning_rate": 5.412442922374429e-05, + "loss": 2.2253, + "step": 20300 + }, + { + "epoch": 30.13, + "learning_rate": 5.4010273972602735e-05, + "loss": 2.1831, + "step": 20400 + }, + { + "epoch": 30.28, + "learning_rate": 5.389611872146118e-05, + "loss": 2.2378, + "step": 20500 + }, + { + "epoch": 30.28, + "eval_cer": 0.27215418590644447, + "eval_loss": 1.009473204612732, + "eval_runtime": 113.8468, + "eval_samples_per_second": 17.752, + "eval_steps_per_second": 2.222, + "eval_wer": 0.9267689262741218, + "step": 20500 + }, + { + "epoch": 30.43, + "learning_rate": 5.378196347031963e-05, + "loss": 2.2226, + "step": 20600 + }, + { + "epoch": 30.58, + "learning_rate": 5.366780821917808e-05, + "loss": 2.212, + "step": 20700 + }, + { + "epoch": 30.72, + "learning_rate": 5.355365296803653e-05, + "loss": 2.2008, + "step": 20800 + }, + { + "epoch": 30.87, + "learning_rate": 5.343949771689497e-05, + "loss": 2.1976, + "step": 20900 + }, + { + "epoch": 31.02, + "learning_rate": 5.332534246575342e-05, + "loss": 2.2168, + "step": 21000 + }, + { + "epoch": 31.02, + "eval_cer": 0.26906243726159407, + "eval_loss": 1.000132441520691, + "eval_runtime": 113.357, + "eval_samples_per_second": 17.829, + "eval_steps_per_second": 2.232, + "eval_wer": 0.9084611578426521, + "step": 21000 + }, + { + "epoch": 31.17, + "learning_rate": 5.3211187214611865e-05, + "loss": 2.1397, + "step": 21100 + }, + { + "epoch": 31.31, + "learning_rate": 5.309703196347032e-05, + "loss": 2.1295, + "step": 21200 + }, + { + "epoch": 31.46, + "learning_rate": 5.298287671232876e-05, + "loss": 2.1799, + "step": 21300 + }, + { + "epoch": 31.61, + "learning_rate": 5.2868721461187215e-05, + "loss": 2.1911, + "step": 21400 + }, + { + "epoch": 31.76, + "learning_rate": 5.2754566210045657e-05, + "loss": 2.1766, + "step": 21500 + }, + { + "epoch": 31.76, + "eval_cer": 0.2640032122063843, + "eval_loss": 0.9883873462677002, + "eval_runtime": 112.449, + "eval_samples_per_second": 17.973, + "eval_steps_per_second": 2.25, + "eval_wer": 0.904997525977239, + "step": 21500 + }, + { + "epoch": 31.91, + "learning_rate": 5.264041095890411e-05, + "loss": 2.1739, + "step": 21600 + }, + { + "epoch": 32.05, + "learning_rate": 5.252625570776255e-05, + "loss": 2.1613, + "step": 21700 + }, + { + "epoch": 32.2, + "learning_rate": 5.2413242009132414e-05, + "loss": 2.1642, + "step": 21800 + }, + { + "epoch": 32.35, + "learning_rate": 5.229908675799087e-05, + "loss": 2.1227, + "step": 21900 + }, + { + "epoch": 32.5, + "learning_rate": 5.218493150684931e-05, + "loss": 2.1715, + "step": 22000 + }, + { + "epoch": 32.5, + "eval_cer": 0.271913270427625, + "eval_loss": 0.9729828834533691, + "eval_runtime": 113.2869, + "eval_samples_per_second": 17.84, + "eval_steps_per_second": 2.233, + "eval_wer": 0.9505195447798119, + "step": 22000 + }, + { + "epoch": 32.64, + "learning_rate": 5.207077625570776e-05, + "loss": 2.1648, + "step": 22100 + }, + { + "epoch": 32.79, + "learning_rate": 5.1956621004566206e-05, + "loss": 2.1391, + "step": 22200 + }, + { + "epoch": 32.94, + "learning_rate": 5.1842465753424654e-05, + "loss": 2.1289, + "step": 22300 + }, + { + "epoch": 33.09, + "learning_rate": 5.17283105022831e-05, + "loss": 2.1069, + "step": 22400 + }, + { + "epoch": 33.23, + "learning_rate": 5.161415525114155e-05, + "loss": 2.1104, + "step": 22500 + }, + { + "epoch": 33.23, + "eval_cer": 0.2655691628187111, + "eval_loss": 0.975226104259491, + "eval_runtime": 113.7072, + "eval_samples_per_second": 17.774, + "eval_steps_per_second": 2.225, + "eval_wer": 0.9361702127659575, + "step": 22500 + }, + { + "epoch": 33.38, + "learning_rate": 5.149999999999999e-05, + "loss": 2.104, + "step": 22600 + }, + { + "epoch": 33.53, + "learning_rate": 5.1385844748858446e-05, + "loss": 2.1359, + "step": 22700 + }, + { + "epoch": 33.68, + "learning_rate": 5.127168949771689e-05, + "loss": 2.1363, + "step": 22800 + }, + { + "epoch": 33.83, + "learning_rate": 5.115753424657534e-05, + "loss": 2.1217, + "step": 22900 + }, + { + "epoch": 33.97, + "learning_rate": 5.1043378995433783e-05, + "loss": 2.1158, + "step": 23000 + }, + { + "epoch": 33.97, + "eval_cer": 0.26239710901425417, + "eval_loss": 0.971994161605835, + "eval_runtime": 113.8291, + "eval_samples_per_second": 17.755, + "eval_steps_per_second": 2.223, + "eval_wer": 0.9262741217219198, + "step": 23000 + }, + { + "epoch": 34.12, + "learning_rate": 5.092922374429224e-05, + "loss": 2.1035, + "step": 23100 + }, + { + "epoch": 34.27, + "learning_rate": 5.081506849315068e-05, + "loss": 2.0811, + "step": 23200 + }, + { + "epoch": 34.42, + "learning_rate": 5.070091324200913e-05, + "loss": 2.0994, + "step": 23300 + }, + { + "epoch": 34.56, + "learning_rate": 5.0586757990867575e-05, + "loss": 2.1054, + "step": 23400 + }, + { + "epoch": 34.71, + "learning_rate": 5.047260273972602e-05, + "loss": 2.0718, + "step": 23500 + }, + { + "epoch": 34.71, + "eval_cer": 0.27588837582814696, + "eval_loss": 0.9573031663894653, + "eval_runtime": 113.0887, + "eval_samples_per_second": 17.871, + "eval_steps_per_second": 2.237, + "eval_wer": 1.000494804552202, + "step": 23500 + }, + { + "epoch": 34.86, + "learning_rate": 5.035844748858447e-05, + "loss": 2.1031, + "step": 23600 + }, + { + "epoch": 35.01, + "learning_rate": 5.024429223744292e-05, + "loss": 2.0799, + "step": 23700 + }, + { + "epoch": 35.16, + "learning_rate": 5.013127853881278e-05, + "loss": 2.0666, + "step": 23800 + }, + { + "epoch": 35.3, + "learning_rate": 5.001712328767123e-05, + "loss": 2.079, + "step": 23900 + }, + { + "epoch": 35.45, + "learning_rate": 4.990296803652968e-05, + "loss": 2.0824, + "step": 24000 + }, + { + "epoch": 35.45, + "eval_cer": 0.26428428026500705, + "eval_loss": 0.960873007774353, + "eval_runtime": 113.6706, + "eval_samples_per_second": 17.779, + "eval_steps_per_second": 2.226, + "eval_wer": 0.9524987629886195, + "step": 24000 + }, + { + "epoch": 35.6, + "learning_rate": 4.978881278538812e-05, + "loss": 2.0896, + "step": 24100 + }, + { + "epoch": 35.75, + "learning_rate": 4.967465753424657e-05, + "loss": 2.0978, + "step": 24200 + }, + { + "epoch": 35.89, + "learning_rate": 4.9560502283105014e-05, + "loss": 2.0796, + "step": 24300 + }, + { + "epoch": 36.04, + "learning_rate": 4.944634703196347e-05, + "loss": 2.047, + "step": 24400 + }, + { + "epoch": 36.19, + "learning_rate": 4.933219178082191e-05, + "loss": 2.0591, + "step": 24500 + }, + { + "epoch": 36.19, + "eval_cer": 0.26669343505320214, + "eval_loss": 0.9662116169929504, + "eval_runtime": 112.5392, + "eval_samples_per_second": 17.958, + "eval_steps_per_second": 2.248, + "eval_wer": 0.9569520039584364, + "step": 24500 + }, + { + "epoch": 36.34, + "learning_rate": 4.9218036529680365e-05, + "loss": 2.0458, + "step": 24600 + }, + { + "epoch": 36.48, + "learning_rate": 4.910502283105023e-05, + "loss": 2.0697, + "step": 24700 + }, + { + "epoch": 36.63, + "learning_rate": 4.899086757990867e-05, + "loss": 2.0633, + "step": 24800 + }, + { + "epoch": 36.78, + "learning_rate": 4.887671232876712e-05, + "loss": 2.0296, + "step": 24900 + }, + { + "epoch": 36.93, + "learning_rate": 4.8762557077625564e-05, + "loss": 2.0768, + "step": 25000 + }, + { + "epoch": 36.93, + "eval_cer": 0.26456534832362977, + "eval_loss": 0.9527919292449951, + "eval_runtime": 113.2105, + "eval_samples_per_second": 17.852, + "eval_steps_per_second": 2.235, + "eval_wer": 0.9574468085106383, + "step": 25000 + }, + { + "epoch": 37.08, + "learning_rate": 4.864840182648401e-05, + "loss": 2.0588, + "step": 25100 + }, + { + "epoch": 37.22, + "learning_rate": 4.853424657534246e-05, + "loss": 2.0251, + "step": 25200 + }, + { + "epoch": 37.37, + "learning_rate": 4.842009132420091e-05, + "loss": 2.0823, + "step": 25300 + }, + { + "epoch": 37.52, + "learning_rate": 4.8305936073059356e-05, + "loss": 2.0337, + "step": 25400 + }, + { + "epoch": 37.67, + "learning_rate": 4.8191780821917804e-05, + "loss": 2.0893, + "step": 25500 + }, + { + "epoch": 37.67, + "eval_cer": 0.2612326841999599, + "eval_loss": 0.9809953570365906, + "eval_runtime": 112.6858, + "eval_samples_per_second": 17.935, + "eval_steps_per_second": 2.245, + "eval_wer": 0.9168728352300841, + "step": 25500 + }, + { + "epoch": 37.81, + "learning_rate": 4.807762557077625e-05, + "loss": 2.0755, + "step": 25600 + }, + { + "epoch": 37.96, + "learning_rate": 4.79634703196347e-05, + "loss": 2.0494, + "step": 25700 + }, + { + "epoch": 38.11, + "learning_rate": 4.784931506849314e-05, + "loss": 2.0441, + "step": 25800 + }, + { + "epoch": 38.26, + "learning_rate": 4.7735159817351596e-05, + "loss": 2.022, + "step": 25900 + }, + { + "epoch": 38.4, + "learning_rate": 4.762100456621004e-05, + "loss": 2.0282, + "step": 26000 + }, + { + "epoch": 38.4, + "eval_cer": 0.2527604898614736, + "eval_loss": 0.9555507302284241, + "eval_runtime": 115.3715, + "eval_samples_per_second": 17.517, + "eval_steps_per_second": 2.193, + "eval_wer": 0.8876793666501732, + "step": 26000 + }, + { + "epoch": 38.55, + "learning_rate": 4.750684931506849e-05, + "loss": 2.0534, + "step": 26100 + }, + { + "epoch": 38.7, + "learning_rate": 4.739269406392693e-05, + "loss": 2.0246, + "step": 26200 + }, + { + "epoch": 38.85, + "learning_rate": 4.727853881278539e-05, + "loss": 2.027, + "step": 26300 + }, + { + "epoch": 39.0, + "learning_rate": 4.716438356164383e-05, + "loss": 2.0049, + "step": 26400 + }, + { + "epoch": 39.14, + "learning_rate": 4.705022831050228e-05, + "loss": 1.997, + "step": 26500 + }, + { + "epoch": 39.14, + "eval_cer": 0.2500702670146557, + "eval_loss": 0.9522635340690613, + "eval_runtime": 113.4036, + "eval_samples_per_second": 17.821, + "eval_steps_per_second": 2.231, + "eval_wer": 0.8723404255319149, + "step": 26500 + }, + { + "epoch": 39.29, + "learning_rate": 4.6936073059360725e-05, + "loss": 2.0177, + "step": 26600 + }, + { + "epoch": 39.44, + "learning_rate": 4.682191780821917e-05, + "loss": 2.0133, + "step": 26700 + }, + { + "epoch": 39.59, + "learning_rate": 4.670776255707762e-05, + "loss": 1.9869, + "step": 26800 + }, + { + "epoch": 39.73, + "learning_rate": 4.659360730593607e-05, + "loss": 2.0126, + "step": 26900 + }, + { + "epoch": 39.88, + "learning_rate": 4.647945205479452e-05, + "loss": 2.0209, + "step": 27000 + }, + { + "epoch": 39.88, + "eval_cer": 0.2503111824934752, + "eval_loss": 0.9542492032051086, + "eval_runtime": 112.4842, + "eval_samples_per_second": 17.967, + "eval_steps_per_second": 2.249, + "eval_wer": 0.8772884710539337, + "step": 27000 + }, + { + "epoch": 40.03, + "learning_rate": 4.6365296803652965e-05, + "loss": 2.0127, + "step": 27100 + }, + { + "epoch": 40.18, + "learning_rate": 4.6251141552511406e-05, + "loss": 1.9615, + "step": 27200 + }, + { + "epoch": 40.32, + "learning_rate": 4.613698630136986e-05, + "loss": 2.0066, + "step": 27300 + }, + { + "epoch": 40.47, + "learning_rate": 4.60228310502283e-05, + "loss": 1.9535, + "step": 27400 + }, + { + "epoch": 40.62, + "learning_rate": 4.590867579908676e-05, + "loss": 1.987, + "step": 27500 + }, + { + "epoch": 40.62, + "eval_cer": 0.25003011443485246, + "eval_loss": 0.9427167177200317, + "eval_runtime": 113.6661, + "eval_samples_per_second": 17.78, + "eval_steps_per_second": 2.226, + "eval_wer": 0.8866897575457694, + "step": 27500 + }, + { + "epoch": 40.77, + "learning_rate": 4.57945205479452e-05, + "loss": 2.0127, + "step": 27600 + }, + { + "epoch": 40.92, + "learning_rate": 4.568036529680365e-05, + "loss": 2.0376, + "step": 27700 + }, + { + "epoch": 41.06, + "learning_rate": 4.5566210045662094e-05, + "loss": 1.9884, + "step": 27800 + }, + { + "epoch": 41.21, + "learning_rate": 4.545205479452054e-05, + "loss": 1.9957, + "step": 27900 + }, + { + "epoch": 41.36, + "learning_rate": 4.533789954337899e-05, + "loss": 1.9663, + "step": 28000 + }, + { + "epoch": 41.36, + "eval_cer": 0.2546476611122265, + "eval_loss": 0.9546382427215576, + "eval_runtime": 113.2668, + "eval_samples_per_second": 17.843, + "eval_steps_per_second": 2.234, + "eval_wer": 0.9064819396338446, + "step": 28000 + }, + { + "epoch": 41.51, + "learning_rate": 4.522374429223744e-05, + "loss": 1.9957, + "step": 28100 + }, + { + "epoch": 41.65, + "learning_rate": 4.5109589041095886e-05, + "loss": 1.9788, + "step": 28200 + }, + { + "epoch": 41.8, + "learning_rate": 4.4995433789954334e-05, + "loss": 1.992, + "step": 28300 + }, + { + "epoch": 41.95, + "learning_rate": 4.4881278538812775e-05, + "loss": 1.9798, + "step": 28400 + }, + { + "epoch": 42.1, + "learning_rate": 4.476712328767123e-05, + "loss": 1.9945, + "step": 28500 + }, + { + "epoch": 42.1, + "eval_cer": 0.2536036940373419, + "eval_loss": 0.9430962204933167, + "eval_runtime": 113.3718, + "eval_samples_per_second": 17.826, + "eval_steps_per_second": 2.232, + "eval_wer": 0.9119247897080653, + "step": 28500 + }, + { + "epoch": 42.25, + "learning_rate": 4.465296803652967e-05, + "loss": 1.9457, + "step": 28600 + }, + { + "epoch": 42.39, + "learning_rate": 4.4538812785388126e-05, + "loss": 1.9808, + "step": 28700 + }, + { + "epoch": 42.54, + "learning_rate": 4.442465753424657e-05, + "loss": 1.9565, + "step": 28800 + }, + { + "epoch": 42.69, + "learning_rate": 4.431050228310502e-05, + "loss": 1.9395, + "step": 28900 + }, + { + "epoch": 42.84, + "learning_rate": 4.4196347031963463e-05, + "loss": 1.9604, + "step": 29000 + }, + { + "epoch": 42.84, + "eval_cer": 0.24898614735996788, + "eval_loss": 0.936680018901825, + "eval_runtime": 113.5751, + "eval_samples_per_second": 17.794, + "eval_steps_per_second": 2.228, + "eval_wer": 0.9030183077684315, + "step": 29000 + }, + { + "epoch": 42.98, + "learning_rate": 4.4083333333333325e-05, + "loss": 1.9569, + "step": 29100 + }, + { + "epoch": 43.13, + "learning_rate": 4.396917808219178e-05, + "loss": 1.9521, + "step": 29200 + }, + { + "epoch": 43.28, + "learning_rate": 4.385502283105022e-05, + "loss": 1.9383, + "step": 29300 + }, + { + "epoch": 43.43, + "learning_rate": 4.3740867579908676e-05, + "loss": 1.9199, + "step": 29400 + }, + { + "epoch": 43.57, + "learning_rate": 4.362671232876712e-05, + "loss": 1.933, + "step": 29500 + }, + { + "epoch": 43.57, + "eval_cer": 0.2431640232884963, + "eval_loss": 0.9071494936943054, + "eval_runtime": 113.901, + "eval_samples_per_second": 17.743, + "eval_steps_per_second": 2.221, + "eval_wer": 0.8916378030677882, + "step": 29500 + }, + { + "epoch": 43.72, + "learning_rate": 4.3512557077625565e-05, + "loss": 1.9212, + "step": 29600 + }, + { + "epoch": 43.87, + "learning_rate": 4.339840182648401e-05, + "loss": 1.9337, + "step": 29700 + }, + { + "epoch": 44.02, + "learning_rate": 4.328424657534246e-05, + "loss": 1.9216, + "step": 29800 + }, + { + "epoch": 44.17, + "learning_rate": 4.317009132420091e-05, + "loss": 1.8678, + "step": 29900 + }, + { + "epoch": 44.31, + "learning_rate": 4.305593607305936e-05, + "loss": 1.9227, + "step": 30000 + }, + { + "epoch": 44.31, + "eval_cer": 0.24284280265007027, + "eval_loss": 0.9047707319259644, + "eval_runtime": 114.4717, + "eval_samples_per_second": 17.655, + "eval_steps_per_second": 2.21, + "eval_wer": 0.8881741712023751, + "step": 30000 + }, + { + "epoch": 44.46, + "learning_rate": 4.29417808219178e-05, + "loss": 1.9218, + "step": 30100 + }, + { + "epoch": 44.61, + "learning_rate": 4.282762557077625e-05, + "loss": 1.8979, + "step": 30200 + }, + { + "epoch": 44.76, + "learning_rate": 4.2713470319634694e-05, + "loss": 1.8942, + "step": 30300 + }, + { + "epoch": 44.9, + "learning_rate": 4.259931506849315e-05, + "loss": 1.9312, + "step": 30400 + }, + { + "epoch": 45.05, + "learning_rate": 4.248515981735159e-05, + "loss": 1.8784, + "step": 30500 + }, + { + "epoch": 45.05, + "eval_cer": 0.24372615940574183, + "eval_loss": 0.9106321930885315, + "eval_runtime": 113.4695, + "eval_samples_per_second": 17.811, + "eval_steps_per_second": 2.23, + "eval_wer": 0.8990598713508164, + "step": 30500 + }, + { + "epoch": 45.2, + "learning_rate": 4.2371004566210045e-05, + "loss": 1.8804, + "step": 30600 + }, + { + "epoch": 45.35, + "learning_rate": 4.2256849315068486e-05, + "loss": 1.8806, + "step": 30700 + }, + { + "epoch": 45.49, + "learning_rate": 4.214269406392694e-05, + "loss": 1.8847, + "step": 30800 + }, + { + "epoch": 45.64, + "learning_rate": 4.20296803652968e-05, + "loss": 1.8782, + "step": 30900 + }, + { + "epoch": 45.79, + "learning_rate": 4.1915525114155244e-05, + "loss": 1.8844, + "step": 31000 + }, + { + "epoch": 45.79, + "eval_cer": 0.237863882754467, + "eval_loss": 0.8996090292930603, + "eval_runtime": 113.2035, + "eval_samples_per_second": 17.853, + "eval_steps_per_second": 2.235, + "eval_wer": 0.875804057397328, + "step": 31000 + }, + { + "epoch": 45.94, + "learning_rate": 4.180136986301369e-05, + "loss": 1.8547, + "step": 31100 + }, + { + "epoch": 46.09, + "learning_rate": 4.168721461187214e-05, + "loss": 1.8476, + "step": 31200 + }, + { + "epoch": 46.23, + "learning_rate": 4.157305936073059e-05, + "loss": 1.8317, + "step": 31300 + }, + { + "epoch": 46.38, + "learning_rate": 4.1458904109589036e-05, + "loss": 1.8754, + "step": 31400 + }, + { + "epoch": 46.53, + "learning_rate": 4.1344748858447484e-05, + "loss": 1.8776, + "step": 31500 + }, + { + "epoch": 46.53, + "eval_cer": 0.23946998594659707, + "eval_loss": 0.9027520418167114, + "eval_runtime": 112.8193, + "eval_samples_per_second": 17.914, + "eval_steps_per_second": 2.243, + "eval_wer": 0.8797624938149431, + "step": 31500 + }, + { + "epoch": 46.68, + "learning_rate": 4.1230593607305925e-05, + "loss": 1.8392, + "step": 31600 + }, + { + "epoch": 46.82, + "learning_rate": 4.111643835616438e-05, + "loss": 1.8412, + "step": 31700 + }, + { + "epoch": 46.97, + "learning_rate": 4.100228310502282e-05, + "loss": 1.8716, + "step": 31800 + }, + { + "epoch": 47.12, + "learning_rate": 4.0888127853881276e-05, + "loss": 1.83, + "step": 31900 + }, + { + "epoch": 47.27, + "learning_rate": 4.077397260273972e-05, + "loss": 1.8372, + "step": 32000 + }, + { + "epoch": 47.27, + "eval_cer": 0.23790403533427024, + "eval_loss": 0.9047237634658813, + "eval_runtime": 113.7608, + "eval_samples_per_second": 17.765, + "eval_steps_per_second": 2.224, + "eval_wer": 0.8777832756061356, + "step": 32000 + }, + { + "epoch": 47.41, + "learning_rate": 4.065981735159817e-05, + "loss": 1.8219, + "step": 32100 + }, + { + "epoch": 47.56, + "learning_rate": 4.054566210045661e-05, + "loss": 1.8591, + "step": 32200 + }, + { + "epoch": 47.71, + "learning_rate": 4.043150684931507e-05, + "loss": 1.8271, + "step": 32300 + }, + { + "epoch": 47.86, + "learning_rate": 4.031735159817351e-05, + "loss": 1.8388, + "step": 32400 + }, + { + "epoch": 48.01, + "learning_rate": 4.020319634703196e-05, + "loss": 1.832, + "step": 32500 + }, + { + "epoch": 48.01, + "eval_cer": 0.2392692230475808, + "eval_loss": 0.9015888571739197, + "eval_runtime": 113.7725, + "eval_samples_per_second": 17.764, + "eval_steps_per_second": 2.224, + "eval_wer": 0.8941118258287977, + "step": 32500 + }, + { + "epoch": 48.15, + "learning_rate": 4.0089041095890405e-05, + "loss": 1.8286, + "step": 32600 + }, + { + "epoch": 48.3, + "learning_rate": 3.997488584474885e-05, + "loss": 1.8156, + "step": 32700 + }, + { + "epoch": 48.45, + "learning_rate": 3.98607305936073e-05, + "loss": 1.8137, + "step": 32800 + }, + { + "epoch": 48.6, + "learning_rate": 3.974657534246575e-05, + "loss": 1.816, + "step": 32900 + }, + { + "epoch": 48.74, + "learning_rate": 3.963242009132419e-05, + "loss": 1.8154, + "step": 33000 + }, + { + "epoch": 48.74, + "eval_cer": 0.23722144147761492, + "eval_loss": 0.8914681077003479, + "eval_runtime": 114.1832, + "eval_samples_per_second": 17.7, + "eval_steps_per_second": 2.216, + "eval_wer": 0.8916378030677882, + "step": 33000 + }, + { + "epoch": 48.89, + "learning_rate": 3.9518264840182645e-05, + "loss": 1.8166, + "step": 33100 + }, + { + "epoch": 49.04, + "learning_rate": 3.9404109589041086e-05, + "loss": 1.7795, + "step": 33200 + }, + { + "epoch": 49.19, + "learning_rate": 3.928995433789954e-05, + "loss": 1.7661, + "step": 33300 + }, + { + "epoch": 49.34, + "learning_rate": 3.917579908675798e-05, + "loss": 1.8, + "step": 33400 + }, + { + "epoch": 49.48, + "learning_rate": 3.906164383561644e-05, + "loss": 1.8072, + "step": 33500 + }, + { + "epoch": 49.48, + "eval_cer": 0.23653884762095964, + "eval_loss": 0.8781279921531677, + "eval_runtime": 113.0385, + "eval_samples_per_second": 17.879, + "eval_steps_per_second": 2.238, + "eval_wer": 0.8871845620979713, + "step": 33500 + }, + { + "epoch": 49.63, + "learning_rate": 3.89486301369863e-05, + "loss": 1.7845, + "step": 33600 + }, + { + "epoch": 49.78, + "learning_rate": 3.883447488584475e-05, + "loss": 1.785, + "step": 33700 + }, + { + "epoch": 49.93, + "learning_rate": 3.8720319634703195e-05, + "loss": 1.784, + "step": 33800 + }, + { + "epoch": 50.07, + "learning_rate": 3.860616438356164e-05, + "loss": 1.7633, + "step": 33900 + }, + { + "epoch": 50.22, + "learning_rate": 3.8492009132420084e-05, + "loss": 1.7489, + "step": 34000 + }, + { + "epoch": 50.22, + "eval_cer": 0.234049387673158, + "eval_loss": 0.8738340139389038, + "eval_runtime": 114.2979, + "eval_samples_per_second": 17.682, + "eval_steps_per_second": 2.214, + "eval_wer": 0.8955962394854032, + "step": 34000 + }, + { + "epoch": 50.37, + "learning_rate": 3.837785388127854e-05, + "loss": 1.7925, + "step": 34100 + }, + { + "epoch": 50.52, + "learning_rate": 3.826369863013698e-05, + "loss": 1.7551, + "step": 34200 + }, + { + "epoch": 50.66, + "learning_rate": 3.8149543378995435e-05, + "loss": 1.7946, + "step": 34300 + }, + { + "epoch": 50.81, + "learning_rate": 3.8035388127853876e-05, + "loss": 1.7948, + "step": 34400 + }, + { + "epoch": 50.96, + "learning_rate": 3.792123287671233e-05, + "loss": 1.7928, + "step": 34500 + }, + { + "epoch": 50.96, + "eval_cer": 0.2322826741618149, + "eval_loss": 0.8683820962905884, + "eval_runtime": 113.3816, + "eval_samples_per_second": 17.825, + "eval_steps_per_second": 2.231, + "eval_wer": 0.8871845620979713, + "step": 34500 + }, + { + "epoch": 51.11, + "learning_rate": 3.780707762557077e-05, + "loss": 1.8077, + "step": 34600 + }, + { + "epoch": 51.26, + "learning_rate": 3.769292237442923e-05, + "loss": 1.7541, + "step": 34700 + }, + { + "epoch": 51.4, + "learning_rate": 3.757876712328767e-05, + "loss": 1.7685, + "step": 34800 + }, + { + "epoch": 51.55, + "learning_rate": 3.7464611872146116e-05, + "loss": 1.7638, + "step": 34900 + }, + { + "epoch": 51.7, + "learning_rate": 3.7350456621004564e-05, + "loss": 1.7748, + "step": 35000 + }, + { + "epoch": 51.7, + "eval_cer": 0.23208191126279865, + "eval_loss": 0.8722881078720093, + "eval_runtime": 114.2085, + "eval_samples_per_second": 17.696, + "eval_steps_per_second": 2.215, + "eval_wer": 0.871845620979713, + "step": 35000 + }, + { + "epoch": 51.85, + "learning_rate": 3.723630136986301e-05, + "loss": 1.7681, + "step": 35100 + }, + { + "epoch": 51.99, + "learning_rate": 3.712214611872146e-05, + "loss": 1.7296, + "step": 35200 + }, + { + "epoch": 52.14, + "learning_rate": 3.700799086757991e-05, + "loss": 1.7553, + "step": 35300 + }, + { + "epoch": 52.29, + "learning_rate": 3.689383561643835e-05, + "loss": 1.7645, + "step": 35400 + }, + { + "epoch": 52.44, + "learning_rate": 3.67796803652968e-05, + "loss": 1.7355, + "step": 35500 + }, + { + "epoch": 52.44, + "eval_cer": 0.2331258783376832, + "eval_loss": 0.8760409951210022, + "eval_runtime": 113.1141, + "eval_samples_per_second": 17.867, + "eval_steps_per_second": 2.237, + "eval_wer": 0.88421573478476, + "step": 35500 + }, + { + "epoch": 52.58, + "learning_rate": 3.6665525114155245e-05, + "loss": 1.7223, + "step": 35600 + }, + { + "epoch": 52.73, + "learning_rate": 3.655136986301369e-05, + "loss": 1.7647, + "step": 35700 + }, + { + "epoch": 52.88, + "learning_rate": 3.643721461187214e-05, + "loss": 1.7258, + "step": 35800 + }, + { + "epoch": 53.03, + "learning_rate": 3.632305936073059e-05, + "loss": 1.7425, + "step": 35900 + }, + { + "epoch": 53.18, + "learning_rate": 3.620890410958904e-05, + "loss": 1.7167, + "step": 36000 + }, + { + "epoch": 53.18, + "eval_cer": 0.23236297932142141, + "eval_loss": 0.8746222853660583, + "eval_runtime": 112.5705, + "eval_samples_per_second": 17.953, + "eval_steps_per_second": 2.247, + "eval_wer": 0.8817417120237506, + "step": 36000 + }, + { + "epoch": 53.32, + "learning_rate": 3.6094748858447485e-05, + "loss": 1.7699, + "step": 36100 + }, + { + "epoch": 53.47, + "learning_rate": 3.598059360730593e-05, + "loss": 1.7263, + "step": 36200 + }, + { + "epoch": 53.62, + "learning_rate": 3.586643835616438e-05, + "loss": 1.74, + "step": 36300 + }, + { + "epoch": 53.77, + "learning_rate": 3.575228310502283e-05, + "loss": 1.7331, + "step": 36400 + }, + { + "epoch": 53.91, + "learning_rate": 3.563812785388128e-05, + "loss": 1.7479, + "step": 36500 + }, + { + "epoch": 53.91, + "eval_cer": 0.22810680586227666, + "eval_loss": 0.8762161731719971, + "eval_runtime": 112.6782, + "eval_samples_per_second": 17.936, + "eval_steps_per_second": 2.245, + "eval_wer": 0.8753092528451262, + "step": 36500 + }, + { + "epoch": 54.06, + "learning_rate": 3.5523972602739725e-05, + "loss": 1.745, + "step": 36600 + }, + { + "epoch": 54.21, + "learning_rate": 3.540981735159817e-05, + "loss": 1.7267, + "step": 36700 + }, + { + "epoch": 54.36, + "learning_rate": 3.5295662100456614e-05, + "loss": 1.7239, + "step": 36800 + }, + { + "epoch": 54.5, + "learning_rate": 3.518150684931506e-05, + "loss": 1.7326, + "step": 36900 + }, + { + "epoch": 54.65, + "learning_rate": 3.506735159817351e-05, + "loss": 1.7428, + "step": 37000 + }, + { + "epoch": 54.65, + "eval_cer": 0.22766512748444087, + "eval_loss": 0.8732947707176208, + "eval_runtime": 114.6142, + "eval_samples_per_second": 17.633, + "eval_steps_per_second": 2.207, + "eval_wer": 0.8698664027709055, + "step": 37000 + }, + { + "epoch": 54.8, + "learning_rate": 3.495319634703196e-05, + "loss": 1.7414, + "step": 37100 + }, + { + "epoch": 54.95, + "learning_rate": 3.4839041095890406e-05, + "loss": 1.7484, + "step": 37200 + }, + { + "epoch": 55.1, + "learning_rate": 3.4724885844748854e-05, + "loss": 1.6943, + "step": 37300 + }, + { + "epoch": 55.24, + "learning_rate": 3.46107305936073e-05, + "loss": 1.7224, + "step": 37400 + }, + { + "epoch": 55.39, + "learning_rate": 3.449657534246575e-05, + "loss": 1.7058, + "step": 37500 + }, + { + "epoch": 55.39, + "eval_cer": 0.22625978719132706, + "eval_loss": 0.8816365003585815, + "eval_runtime": 114.0721, + "eval_samples_per_second": 17.717, + "eval_steps_per_second": 2.218, + "eval_wer": 0.8649183572488867, + "step": 37500 + }, + { + "epoch": 55.54, + "learning_rate": 3.43824200913242e-05, + "loss": 1.7052, + "step": 37600 + }, + { + "epoch": 55.69, + "learning_rate": 3.4268264840182646e-05, + "loss": 1.711, + "step": 37700 + }, + { + "epoch": 55.83, + "learning_rate": 3.415525114155251e-05, + "loss": 1.7245, + "step": 37800 + }, + { + "epoch": 55.98, + "learning_rate": 3.4041095890410956e-05, + "loss": 1.7193, + "step": 37900 + }, + { + "epoch": 56.13, + "learning_rate": 3.3926940639269404e-05, + "loss": 1.7045, + "step": 38000 + }, + { + "epoch": 56.13, + "eval_cer": 0.2296727564746035, + "eval_loss": 0.8732736110687256, + "eval_runtime": 115.1044, + "eval_samples_per_second": 17.558, + "eval_steps_per_second": 2.198, + "eval_wer": 0.8688767936665017, + "step": 38000 + }, + { + "epoch": 56.28, + "learning_rate": 3.381278538812785e-05, + "loss": 1.7185, + "step": 38100 + }, + { + "epoch": 56.43, + "learning_rate": 3.36986301369863e-05, + "loss": 1.6965, + "step": 38200 + }, + { + "epoch": 56.57, + "learning_rate": 3.358447488584475e-05, + "loss": 1.6843, + "step": 38300 + }, + { + "epoch": 56.72, + "learning_rate": 3.347031963470319e-05, + "loss": 1.7138, + "step": 38400 + }, + { + "epoch": 56.87, + "learning_rate": 3.335616438356164e-05, + "loss": 1.709, + "step": 38500 + }, + { + "epoch": 56.87, + "eval_cer": 0.22320819112627988, + "eval_loss": 0.8648238182067871, + "eval_runtime": 113.8724, + "eval_samples_per_second": 17.748, + "eval_steps_per_second": 2.222, + "eval_wer": 0.8654131618010885, + "step": 38500 + }, + { + "epoch": 57.02, + "learning_rate": 3.3242009132420085e-05, + "loss": 1.6913, + "step": 38600 + }, + { + "epoch": 57.16, + "learning_rate": 3.312785388127853e-05, + "loss": 1.6798, + "step": 38700 + }, + { + "epoch": 57.31, + "learning_rate": 3.301369863013698e-05, + "loss": 1.6548, + "step": 38800 + }, + { + "epoch": 57.46, + "learning_rate": 3.289954337899543e-05, + "loss": 1.6862, + "step": 38900 + }, + { + "epoch": 57.61, + "learning_rate": 3.278538812785388e-05, + "loss": 1.6799, + "step": 39000 + }, + { + "epoch": 57.61, + "eval_cer": 0.22441276852037745, + "eval_loss": 0.8717127442359924, + "eval_runtime": 113.7564, + "eval_samples_per_second": 17.766, + "eval_steps_per_second": 2.224, + "eval_wer": 0.8579910935180604, + "step": 39000 + }, + { + "epoch": 57.75, + "learning_rate": 3.2671232876712325e-05, + "loss": 1.6937, + "step": 39100 + }, + { + "epoch": 57.9, + "learning_rate": 3.255707762557077e-05, + "loss": 1.6659, + "step": 39200 + }, + { + "epoch": 58.05, + "learning_rate": 3.244292237442922e-05, + "loss": 1.7202, + "step": 39300 + }, + { + "epoch": 58.2, + "learning_rate": 3.232876712328767e-05, + "loss": 1.6737, + "step": 39400 + }, + { + "epoch": 58.35, + "learning_rate": 3.221461187214612e-05, + "loss": 1.664, + "step": 39500 + }, + { + "epoch": 58.35, + "eval_cer": 0.225938566552901, + "eval_loss": 0.8653290867805481, + "eval_runtime": 114.0905, + "eval_samples_per_second": 17.714, + "eval_steps_per_second": 2.218, + "eval_wer": 0.8723404255319149, + "step": 39500 + }, + { + "epoch": 58.49, + "learning_rate": 3.2100456621004565e-05, + "loss": 1.6909, + "step": 39600 + }, + { + "epoch": 58.64, + "learning_rate": 3.198630136986301e-05, + "loss": 1.6565, + "step": 39700 + }, + { + "epoch": 58.79, + "learning_rate": 3.1872146118721454e-05, + "loss": 1.6563, + "step": 39800 + }, + { + "epoch": 58.94, + "learning_rate": 3.17579908675799e-05, + "loss": 1.658, + "step": 39900 + }, + { + "epoch": 59.08, + "learning_rate": 3.164383561643835e-05, + "loss": 1.6488, + "step": 40000 + }, + { + "epoch": 59.08, + "eval_cer": 0.22710299136719533, + "eval_loss": 0.8637419939041138, + "eval_runtime": 114.7268, + "eval_samples_per_second": 17.616, + "eval_steps_per_second": 2.205, + "eval_wer": 0.880257298367145, + "step": 40000 + }, + { + "epoch": 59.23, + "learning_rate": 3.15296803652968e-05, + "loss": 1.6174, + "step": 40100 + }, + { + "epoch": 59.38, + "learning_rate": 3.1415525114155246e-05, + "loss": 1.6634, + "step": 40200 + }, + { + "epoch": 59.53, + "learning_rate": 3.1301369863013694e-05, + "loss": 1.649, + "step": 40300 + }, + { + "epoch": 59.67, + "learning_rate": 3.118721461187214e-05, + "loss": 1.645, + "step": 40400 + }, + { + "epoch": 59.82, + "learning_rate": 3.107305936073059e-05, + "loss": 1.6298, + "step": 40500 + }, + { + "epoch": 59.82, + "eval_cer": 0.22533627785585225, + "eval_loss": 0.8553184866905212, + "eval_runtime": 113.9415, + "eval_samples_per_second": 17.737, + "eval_steps_per_second": 2.22, + "eval_wer": 0.8767936665017318, + "step": 40500 + }, + { + "epoch": 59.97, + "learning_rate": 3.096004566210045e-05, + "loss": 1.6439, + "step": 40600 + }, + { + "epoch": 60.12, + "learning_rate": 3.08458904109589e-05, + "loss": 1.645, + "step": 40700 + }, + { + "epoch": 60.27, + "learning_rate": 3.073173515981735e-05, + "loss": 1.6105, + "step": 40800 + }, + { + "epoch": 60.41, + "learning_rate": 3.0617579908675796e-05, + "loss": 1.6258, + "step": 40900 + }, + { + "epoch": 60.56, + "learning_rate": 3.050342465753424e-05, + "loss": 1.6185, + "step": 41000 + }, + { + "epoch": 60.56, + "eval_cer": 0.2240112427223449, + "eval_loss": 0.8511665463447571, + "eval_runtime": 113.6325, + "eval_samples_per_second": 17.785, + "eval_steps_per_second": 2.226, + "eval_wer": 0.871845620979713, + "step": 41000 + }, + { + "epoch": 60.71, + "learning_rate": 3.038926940639269e-05, + "loss": 1.6054, + "step": 41100 + }, + { + "epoch": 60.86, + "learning_rate": 3.0275114155251137e-05, + "loss": 1.6495, + "step": 41200 + }, + { + "epoch": 61.0, + "learning_rate": 3.0160958904109585e-05, + "loss": 1.6341, + "step": 41300 + }, + { + "epoch": 61.15, + "learning_rate": 3.0046803652968033e-05, + "loss": 1.6092, + "step": 41400 + }, + { + "epoch": 61.3, + "learning_rate": 2.993264840182648e-05, + "loss": 1.574, + "step": 41500 + }, + { + "epoch": 61.3, + "eval_cer": 0.22509536237703273, + "eval_loss": 0.8579218983650208, + "eval_runtime": 114.0127, + "eval_samples_per_second": 17.726, + "eval_steps_per_second": 2.219, + "eval_wer": 0.8772884710539337, + "step": 41500 + }, + { + "epoch": 61.45, + "learning_rate": 2.981849315068493e-05, + "loss": 1.604, + "step": 41600 + }, + { + "epoch": 61.6, + "learning_rate": 2.9704337899543373e-05, + "loss": 1.6208, + "step": 41700 + }, + { + "epoch": 61.74, + "learning_rate": 2.959018264840182e-05, + "loss": 1.5976, + "step": 41800 + }, + { + "epoch": 61.89, + "learning_rate": 2.947602739726027e-05, + "loss": 1.6118, + "step": 41900 + }, + { + "epoch": 62.04, + "learning_rate": 2.9361872146118717e-05, + "loss": 1.6192, + "step": 42000 + }, + { + "epoch": 62.04, + "eval_cer": 0.22421200562136118, + "eval_loss": 0.8499256372451782, + "eval_runtime": 115.1721, + "eval_samples_per_second": 17.548, + "eval_steps_per_second": 2.197, + "eval_wer": 0.8743196437407225, + "step": 42000 + }, + { + "epoch": 62.19, + "learning_rate": 2.9247716894977165e-05, + "loss": 1.5818, + "step": 42100 + }, + { + "epoch": 62.33, + "learning_rate": 2.9133561643835613e-05, + "loss": 1.5852, + "step": 42200 + }, + { + "epoch": 62.48, + "learning_rate": 2.901940639269406e-05, + "loss": 1.6057, + "step": 42300 + }, + { + "epoch": 62.63, + "learning_rate": 2.8905251141552506e-05, + "loss": 1.5922, + "step": 42400 + }, + { + "epoch": 62.78, + "learning_rate": 2.8791095890410954e-05, + "loss": 1.6275, + "step": 42500 + }, + { + "epoch": 62.78, + "eval_cer": 0.22160208793414976, + "eval_loss": 0.8418570756912231, + "eval_runtime": 114.0321, + "eval_samples_per_second": 17.723, + "eval_steps_per_second": 2.219, + "eval_wer": 0.875804057397328, + "step": 42500 + }, + { + "epoch": 62.92, + "learning_rate": 2.8676940639269402e-05, + "loss": 1.5781, + "step": 42600 + }, + { + "epoch": 63.07, + "learning_rate": 2.856278538812785e-05, + "loss": 1.5754, + "step": 42700 + }, + { + "epoch": 63.22, + "learning_rate": 2.8449771689497715e-05, + "loss": 1.5775, + "step": 42800 + }, + { + "epoch": 63.37, + "learning_rate": 2.8335616438356163e-05, + "loss": 1.568, + "step": 42900 + }, + { + "epoch": 63.52, + "learning_rate": 2.822146118721461e-05, + "loss": 1.5697, + "step": 43000 + }, + { + "epoch": 63.52, + "eval_cer": 0.2221642240513953, + "eval_loss": 0.8445881009101868, + "eval_runtime": 114.2892, + "eval_samples_per_second": 17.683, + "eval_steps_per_second": 2.214, + "eval_wer": 0.8698664027709055, + "step": 43000 + }, + { + "epoch": 63.66, + "learning_rate": 2.810730593607306e-05, + "loss": 1.6005, + "step": 43100 + }, + { + "epoch": 63.81, + "learning_rate": 2.7993150684931507e-05, + "loss": 1.5897, + "step": 43200 + }, + { + "epoch": 63.96, + "learning_rate": 2.7878995433789955e-05, + "loss": 1.5433, + "step": 43300 + }, + { + "epoch": 64.11, + "learning_rate": 2.7764840182648403e-05, + "loss": 1.5925, + "step": 43400 + }, + { + "epoch": 64.25, + "learning_rate": 2.7650684931506847e-05, + "loss": 1.5384, + "step": 43500 + }, + { + "epoch": 64.25, + "eval_cer": 0.21995583216221642, + "eval_loss": 0.8461813926696777, + "eval_runtime": 113.6821, + "eval_samples_per_second": 17.778, + "eval_steps_per_second": 2.226, + "eval_wer": 0.8579910935180604, + "step": 43500 + }, + { + "epoch": 64.4, + "learning_rate": 2.7536529680365295e-05, + "loss": 1.5826, + "step": 43600 + }, + { + "epoch": 64.55, + "learning_rate": 2.7422374429223743e-05, + "loss": 1.5888, + "step": 43700 + }, + { + "epoch": 64.7, + "learning_rate": 2.730821917808219e-05, + "loss": 1.5746, + "step": 43800 + }, + { + "epoch": 64.84, + "learning_rate": 2.719406392694064e-05, + "loss": 1.5332, + "step": 43900 + }, + { + "epoch": 64.99, + "learning_rate": 2.7079908675799087e-05, + "loss": 1.5115, + "step": 44000 + }, + { + "epoch": 64.99, + "eval_cer": 0.22140132503513352, + "eval_loss": 0.8466500043869019, + "eval_runtime": 113.7712, + "eval_samples_per_second": 17.764, + "eval_steps_per_second": 2.224, + "eval_wer": 0.867392380009896, + "step": 44000 + }, + { + "epoch": 65.14, + "learning_rate": 2.6965753424657535e-05, + "loss": 1.5704, + "step": 44100 + }, + { + "epoch": 65.29, + "learning_rate": 2.685159817351598e-05, + "loss": 1.5518, + "step": 44200 + }, + { + "epoch": 65.44, + "learning_rate": 2.6737442922374428e-05, + "loss": 1.5502, + "step": 44300 + }, + { + "epoch": 65.58, + "learning_rate": 2.6623287671232876e-05, + "loss": 1.5401, + "step": 44400 + }, + { + "epoch": 65.73, + "learning_rate": 2.6509132420091324e-05, + "loss": 1.5547, + "step": 44500 + }, + { + "epoch": 65.73, + "eval_cer": 0.22043766311985544, + "eval_loss": 0.8505166172981262, + "eval_runtime": 114.0059, + "eval_samples_per_second": 17.727, + "eval_steps_per_second": 2.219, + "eval_wer": 0.8668975754576942, + "step": 44500 + }, + { + "epoch": 65.88, + "learning_rate": 2.6394977168949772e-05, + "loss": 1.5391, + "step": 44600 + }, + { + "epoch": 66.03, + "learning_rate": 2.628082191780822e-05, + "loss": 1.5263, + "step": 44700 + }, + { + "epoch": 66.17, + "learning_rate": 2.6166666666666668e-05, + "loss": 1.5159, + "step": 44800 + }, + { + "epoch": 66.32, + "learning_rate": 2.6052511415525113e-05, + "loss": 1.5416, + "step": 44900 + }, + { + "epoch": 66.47, + "learning_rate": 2.593835616438356e-05, + "loss": 1.5597, + "step": 45000 + }, + { + "epoch": 66.47, + "eval_cer": 0.2192330857257579, + "eval_loss": 0.8420690298080444, + "eval_runtime": 114.4813, + "eval_samples_per_second": 17.654, + "eval_steps_per_second": 2.21, + "eval_wer": 0.8683819891142999, + "step": 45000 + }, + { + "epoch": 66.62, + "learning_rate": 2.582420091324201e-05, + "loss": 1.5107, + "step": 45100 + }, + { + "epoch": 66.76, + "learning_rate": 2.5710045662100457e-05, + "loss": 1.5373, + "step": 45200 + }, + { + "epoch": 66.91, + "learning_rate": 2.5595890410958905e-05, + "loss": 1.503, + "step": 45300 + }, + { + "epoch": 67.06, + "learning_rate": 2.5482876712328766e-05, + "loss": 1.5408, + "step": 45400 + }, + { + "epoch": 67.21, + "learning_rate": 2.5368721461187214e-05, + "loss": 1.505, + "step": 45500 + }, + { + "epoch": 67.21, + "eval_cer": 0.21867094960851235, + "eval_loss": 0.8485424518585205, + "eval_runtime": 113.1044, + "eval_samples_per_second": 17.868, + "eval_steps_per_second": 2.237, + "eval_wer": 0.8619495299356754, + "step": 45500 + }, + { + "epoch": 67.36, + "learning_rate": 2.5254566210045662e-05, + "loss": 1.5071, + "step": 45600 + }, + { + "epoch": 67.5, + "learning_rate": 2.514041095890411e-05, + "loss": 1.5392, + "step": 45700 + }, + { + "epoch": 67.65, + "learning_rate": 2.5026255707762555e-05, + "loss": 1.5284, + "step": 45800 + }, + { + "epoch": 67.8, + "learning_rate": 2.4912100456621003e-05, + "loss": 1.5282, + "step": 45900 + }, + { + "epoch": 67.95, + "learning_rate": 2.479794520547945e-05, + "loss": 1.5101, + "step": 46000 + }, + { + "epoch": 67.95, + "eval_cer": 0.22035735796024894, + "eval_loss": 0.8489089012145996, + "eval_runtime": 114.6545, + "eval_samples_per_second": 17.627, + "eval_steps_per_second": 2.207, + "eval_wer": 0.8649183572488867, + "step": 46000 + }, + { + "epoch": 68.09, + "learning_rate": 2.46837899543379e-05, + "loss": 1.503, + "step": 46100 + }, + { + "epoch": 68.24, + "learning_rate": 2.4569634703196347e-05, + "loss": 1.5194, + "step": 46200 + }, + { + "epoch": 68.39, + "learning_rate": 2.4455479452054795e-05, + "loss": 1.493, + "step": 46300 + }, + { + "epoch": 68.54, + "learning_rate": 2.434132420091324e-05, + "loss": 1.5327, + "step": 46400 + }, + { + "epoch": 68.69, + "learning_rate": 2.4227168949771687e-05, + "loss": 1.5199, + "step": 46500 + }, + { + "epoch": 68.69, + "eval_cer": 0.21802850833166032, + "eval_loss": 0.8407037258148193, + "eval_runtime": 114.1903, + "eval_samples_per_second": 17.699, + "eval_steps_per_second": 2.216, + "eval_wer": 0.8619495299356754, + "step": 46500 + }, + { + "epoch": 68.83, + "learning_rate": 2.4113013698630135e-05, + "loss": 1.489, + "step": 46600 + }, + { + "epoch": 68.98, + "learning_rate": 2.3998858447488583e-05, + "loss": 1.4968, + "step": 46700 + }, + { + "epoch": 69.13, + "learning_rate": 2.388470319634703e-05, + "loss": 1.4959, + "step": 46800 + }, + { + "epoch": 69.28, + "learning_rate": 2.377054794520548e-05, + "loss": 1.4913, + "step": 46900 + }, + { + "epoch": 69.42, + "learning_rate": 2.3656392694063927e-05, + "loss": 1.5207, + "step": 47000 + }, + { + "epoch": 69.42, + "eval_cer": 0.2162617948203172, + "eval_loss": 0.8378761410713196, + "eval_runtime": 113.9918, + "eval_samples_per_second": 17.729, + "eval_steps_per_second": 2.219, + "eval_wer": 0.8495794161306284, + "step": 47000 + }, + { + "epoch": 69.57, + "learning_rate": 2.3542237442922372e-05, + "loss": 1.4881, + "step": 47100 + }, + { + "epoch": 69.72, + "learning_rate": 2.342808219178082e-05, + "loss": 1.4951, + "step": 47200 + }, + { + "epoch": 69.87, + "learning_rate": 2.3313926940639268e-05, + "loss": 1.5063, + "step": 47300 + }, + { + "epoch": 70.01, + "learning_rate": 2.320091324200913e-05, + "loss": 1.4925, + "step": 47400 + }, + { + "epoch": 70.16, + "learning_rate": 2.3086757990867578e-05, + "loss": 1.478, + "step": 47500 + }, + { + "epoch": 70.16, + "eval_cer": 0.2163420999799237, + "eval_loss": 0.835663914680481, + "eval_runtime": 115.4983, + "eval_samples_per_second": 17.498, + "eval_steps_per_second": 2.191, + "eval_wer": 0.859475507174666, + "step": 47500 + }, + { + "epoch": 70.31, + "learning_rate": 2.2972602739726026e-05, + "loss": 1.4823, + "step": 47600 + }, + { + "epoch": 70.46, + "learning_rate": 2.2858447488584474e-05, + "loss": 1.5068, + "step": 47700 + }, + { + "epoch": 70.61, + "learning_rate": 2.2744292237442922e-05, + "loss": 1.5096, + "step": 47800 + }, + { + "epoch": 70.75, + "learning_rate": 2.263013698630137e-05, + "loss": 1.4897, + "step": 47900 + }, + { + "epoch": 70.9, + "learning_rate": 2.2515981735159814e-05, + "loss": 1.4817, + "step": 48000 + }, + { + "epoch": 70.9, + "eval_cer": 0.21513752258582614, + "eval_loss": 0.8346221446990967, + "eval_runtime": 113.9435, + "eval_samples_per_second": 17.737, + "eval_steps_per_second": 2.22, + "eval_wer": 0.8495794161306284, + "step": 48000 + }, + { + "epoch": 71.05, + "learning_rate": 2.2401826484018262e-05, + "loss": 1.5342, + "step": 48100 + }, + { + "epoch": 71.2, + "learning_rate": 2.228767123287671e-05, + "loss": 1.4723, + "step": 48200 + }, + { + "epoch": 71.34, + "learning_rate": 2.217351598173516e-05, + "loss": 1.4929, + "step": 48300 + }, + { + "epoch": 71.49, + "learning_rate": 2.2059360730593606e-05, + "loss": 1.4785, + "step": 48400 + }, + { + "epoch": 71.64, + "learning_rate": 2.1945205479452054e-05, + "loss": 1.4827, + "step": 48500 + }, + { + "epoch": 71.64, + "eval_cer": 0.216864083517366, + "eval_loss": 0.8362293243408203, + "eval_runtime": 114.6857, + "eval_samples_per_second": 17.622, + "eval_steps_per_second": 2.206, + "eval_wer": 0.8624443344878773, + "step": 48500 + }, + { + "epoch": 71.79, + "learning_rate": 2.1831050228310502e-05, + "loss": 1.4732, + "step": 48600 + }, + { + "epoch": 71.93, + "learning_rate": 2.1716894977168947e-05, + "loss": 1.4611, + "step": 48700 + }, + { + "epoch": 72.08, + "learning_rate": 2.1602739726027395e-05, + "loss": 1.4621, + "step": 48800 + }, + { + "epoch": 72.23, + "learning_rate": 2.1488584474885843e-05, + "loss": 1.4636, + "step": 48900 + }, + { + "epoch": 72.38, + "learning_rate": 2.137442922374429e-05, + "loss": 1.4513, + "step": 49000 + }, + { + "epoch": 72.38, + "eval_cer": 0.21369202971290904, + "eval_loss": 0.835486888885498, + "eval_runtime": 113.6809, + "eval_samples_per_second": 17.778, + "eval_steps_per_second": 2.226, + "eval_wer": 0.8451261751608115, + "step": 49000 + }, + { + "epoch": 72.53, + "learning_rate": 2.126027397260274e-05, + "loss": 1.4469, + "step": 49100 + }, + { + "epoch": 72.67, + "learning_rate": 2.1146118721461187e-05, + "loss": 1.4521, + "step": 49200 + }, + { + "epoch": 72.82, + "learning_rate": 2.1031963470319635e-05, + "loss": 1.4285, + "step": 49300 + }, + { + "epoch": 72.97, + "learning_rate": 2.091780821917808e-05, + "loss": 1.4663, + "step": 49400 + }, + { + "epoch": 73.12, + "learning_rate": 2.0803652968036527e-05, + "loss": 1.4988, + "step": 49500 + }, + { + "epoch": 73.12, + "eval_cer": 0.21606103192130094, + "eval_loss": 0.8324964642524719, + "eval_runtime": 114.8092, + "eval_samples_per_second": 17.603, + "eval_steps_per_second": 2.204, + "eval_wer": 0.8624443344878773, + "step": 49500 + }, + { + "epoch": 73.26, + "learning_rate": 2.0689497716894975e-05, + "loss": 1.4531, + "step": 49600 + }, + { + "epoch": 73.41, + "learning_rate": 2.0576484018264837e-05, + "loss": 1.4415, + "step": 49700 + }, + { + "epoch": 73.56, + "learning_rate": 2.0462328767123285e-05, + "loss": 1.4415, + "step": 49800 + }, + { + "epoch": 73.71, + "learning_rate": 2.0348173515981733e-05, + "loss": 1.4495, + "step": 49900 + }, + { + "epoch": 73.85, + "learning_rate": 2.023401826484018e-05, + "loss": 1.4267, + "step": 50000 + }, + { + "epoch": 73.85, + "eval_cer": 0.21565950612326842, + "eval_loss": 0.8395733833312988, + "eval_runtime": 113.3592, + "eval_samples_per_second": 17.828, + "eval_steps_per_second": 2.232, + "eval_wer": 0.8480950024740228, + "step": 50000 + }, + { + "epoch": 74.0, + "learning_rate": 2.011986301369863e-05, + "loss": 1.4187, + "step": 50100 + }, + { + "epoch": 74.15, + "learning_rate": 2.0005707762557077e-05, + "loss": 1.4261, + "step": 50200 + }, + { + "epoch": 74.3, + "learning_rate": 1.9891552511415522e-05, + "loss": 1.3826, + "step": 50300 + }, + { + "epoch": 74.45, + "learning_rate": 1.977739726027397e-05, + "loss": 1.4491, + "step": 50400 + }, + { + "epoch": 74.59, + "learning_rate": 1.9663242009132418e-05, + "loss": 1.4421, + "step": 50500 + }, + { + "epoch": 74.59, + "eval_cer": 0.21216623168038545, + "eval_loss": 0.8354990482330322, + "eval_runtime": 114.0851, + "eval_samples_per_second": 17.715, + "eval_steps_per_second": 2.218, + "eval_wer": 0.8490846115784265, + "step": 50500 + }, + { + "epoch": 74.74, + "learning_rate": 1.9549086757990866e-05, + "loss": 1.4689, + "step": 50600 + }, + { + "epoch": 74.89, + "learning_rate": 1.9434931506849314e-05, + "loss": 1.4471, + "step": 50700 + }, + { + "epoch": 75.04, + "learning_rate": 1.9320776255707762e-05, + "loss": 1.4333, + "step": 50800 + }, + { + "epoch": 75.18, + "learning_rate": 1.920662100456621e-05, + "loss": 1.4211, + "step": 50900 + }, + { + "epoch": 75.33, + "learning_rate": 1.9092465753424654e-05, + "loss": 1.4311, + "step": 51000 + }, + { + "epoch": 75.33, + "eval_cer": 0.2118048584621562, + "eval_loss": 0.8357799649238586, + "eval_runtime": 112.9085, + "eval_samples_per_second": 17.899, + "eval_steps_per_second": 2.241, + "eval_wer": 0.8476001979218208, + "step": 51000 + }, + { + "epoch": 75.48, + "learning_rate": 1.8978310502283102e-05, + "loss": 1.4136, + "step": 51100 + }, + { + "epoch": 75.63, + "learning_rate": 1.886415525114155e-05, + "loss": 1.4144, + "step": 51200 + }, + { + "epoch": 75.78, + "learning_rate": 1.875e-05, + "loss": 1.4479, + "step": 51300 + }, + { + "epoch": 75.92, + "learning_rate": 1.8635844748858446e-05, + "loss": 1.4218, + "step": 51400 + }, + { + "epoch": 76.07, + "learning_rate": 1.8521689497716894e-05, + "loss": 1.4174, + "step": 51500 + }, + { + "epoch": 76.07, + "eval_cer": 0.21007829753061635, + "eval_loss": 0.8288608193397522, + "eval_runtime": 114.429, + "eval_samples_per_second": 17.662, + "eval_steps_per_second": 2.211, + "eval_wer": 0.8451261751608115, + "step": 51500 + }, + { + "epoch": 76.22, + "learning_rate": 1.8407534246575342e-05, + "loss": 1.4225, + "step": 51600 + }, + { + "epoch": 76.37, + "learning_rate": 1.8294520547945204e-05, + "loss": 1.4181, + "step": 51700 + }, + { + "epoch": 76.51, + "learning_rate": 1.8180365296803652e-05, + "loss": 1.3998, + "step": 51800 + }, + { + "epoch": 76.66, + "learning_rate": 1.8066210045662097e-05, + "loss": 1.4383, + "step": 51900 + }, + { + "epoch": 76.81, + "learning_rate": 1.7952054794520545e-05, + "loss": 1.4349, + "step": 52000 + }, + { + "epoch": 76.81, + "eval_cer": 0.21401325035133506, + "eval_loss": 0.8371652960777283, + "eval_runtime": 113.9028, + "eval_samples_per_second": 17.743, + "eval_steps_per_second": 2.221, + "eval_wer": 0.8579910935180604, + "step": 52000 + }, + { + "epoch": 76.96, + "learning_rate": 1.7837899543378993e-05, + "loss": 1.4007, + "step": 52100 + }, + { + "epoch": 77.1, + "learning_rate": 1.772374429223744e-05, + "loss": 1.4512, + "step": 52200 + }, + { + "epoch": 77.25, + "learning_rate": 1.760958904109589e-05, + "loss": 1.3999, + "step": 52300 + }, + { + "epoch": 77.4, + "learning_rate": 1.7495433789954337e-05, + "loss": 1.4224, + "step": 52400 + }, + { + "epoch": 77.55, + "learning_rate": 1.7381278538812785e-05, + "loss": 1.3959, + "step": 52500 + }, + { + "epoch": 77.55, + "eval_cer": 0.21156394298333667, + "eval_loss": 0.8324997425079346, + "eval_runtime": 112.9164, + "eval_samples_per_second": 17.898, + "eval_steps_per_second": 2.241, + "eval_wer": 0.8436417615042059, + "step": 52500 + }, + { + "epoch": 77.7, + "learning_rate": 1.726712328767123e-05, + "loss": 1.4086, + "step": 52600 + }, + { + "epoch": 77.84, + "learning_rate": 1.7152968036529677e-05, + "loss": 1.4345, + "step": 52700 + }, + { + "epoch": 77.99, + "learning_rate": 1.7038812785388125e-05, + "loss": 1.3828, + "step": 52800 + }, + { + "epoch": 78.14, + "learning_rate": 1.6924657534246573e-05, + "loss": 1.4159, + "step": 52900 + }, + { + "epoch": 78.29, + "learning_rate": 1.681050228310502e-05, + "loss": 1.4087, + "step": 53000 + }, + { + "epoch": 78.29, + "eval_cer": 0.21051997590845212, + "eval_loss": 0.8351333141326904, + "eval_runtime": 114.5275, + "eval_samples_per_second": 17.646, + "eval_steps_per_second": 2.209, + "eval_wer": 0.8446313706086096, + "step": 53000 + }, + { + "epoch": 78.43, + "learning_rate": 1.669634703196347e-05, + "loss": 1.4303, + "step": 53100 + }, + { + "epoch": 78.58, + "learning_rate": 1.6582191780821917e-05, + "loss": 1.4233, + "step": 53200 + }, + { + "epoch": 78.73, + "learning_rate": 1.6468036529680362e-05, + "loss": 1.3758, + "step": 53300 + }, + { + "epoch": 78.88, + "learning_rate": 1.635388127853881e-05, + "loss": 1.3703, + "step": 53400 + }, + { + "epoch": 79.03, + "learning_rate": 1.6239726027397258e-05, + "loss": 1.415, + "step": 53500 + }, + { + "epoch": 79.03, + "eval_cer": 0.21228668941979523, + "eval_loss": 0.8363248109817505, + "eval_runtime": 115.4621, + "eval_samples_per_second": 17.504, + "eval_steps_per_second": 2.191, + "eval_wer": 0.8476001979218208, + "step": 53500 + }, + { + "epoch": 79.17, + "learning_rate": 1.6125570776255706e-05, + "loss": 1.3768, + "step": 53600 + }, + { + "epoch": 79.32, + "learning_rate": 1.6011415525114154e-05, + "loss": 1.399, + "step": 53700 + }, + { + "epoch": 79.47, + "learning_rate": 1.5897260273972602e-05, + "loss": 1.4076, + "step": 53800 + }, + { + "epoch": 79.62, + "learning_rate": 1.5784246575342464e-05, + "loss": 1.3912, + "step": 53900 + }, + { + "epoch": 79.76, + "learning_rate": 1.567009132420091e-05, + "loss": 1.4122, + "step": 54000 + }, + { + "epoch": 79.76, + "eval_cer": 0.21116241718530415, + "eval_loss": 0.8309991359710693, + "eval_runtime": 113.1017, + "eval_samples_per_second": 17.869, + "eval_steps_per_second": 2.237, + "eval_wer": 0.8480950024740228, + "step": 54000 + }, + { + "epoch": 79.91, + "learning_rate": 1.5557077625570773e-05, + "loss": 1.3916, + "step": 54100 + }, + { + "epoch": 80.06, + "learning_rate": 1.544292237442922e-05, + "loss": 1.4167, + "step": 54200 + }, + { + "epoch": 80.21, + "learning_rate": 1.532876712328767e-05, + "loss": 1.4019, + "step": 54300 + }, + { + "epoch": 80.35, + "learning_rate": 1.5214611872146117e-05, + "loss": 1.3909, + "step": 54400 + }, + { + "epoch": 80.5, + "learning_rate": 1.5100456621004565e-05, + "loss": 1.3969, + "step": 54500 + }, + { + "epoch": 80.5, + "eval_cer": 0.20951616141337082, + "eval_loss": 0.8239215612411499, + "eval_runtime": 113.6855, + "eval_samples_per_second": 17.777, + "eval_steps_per_second": 2.225, + "eval_wer": 0.8446313706086096, + "step": 54500 + }, + { + "epoch": 80.65, + "learning_rate": 1.4986301369863012e-05, + "loss": 1.3952, + "step": 54600 + }, + { + "epoch": 80.8, + "learning_rate": 1.487214611872146e-05, + "loss": 1.3998, + "step": 54700 + }, + { + "epoch": 80.94, + "learning_rate": 1.4757990867579908e-05, + "loss": 1.3902, + "step": 54800 + }, + { + "epoch": 81.09, + "learning_rate": 1.4643835616438356e-05, + "loss": 1.3526, + "step": 54900 + }, + { + "epoch": 81.24, + "learning_rate": 1.4529680365296802e-05, + "loss": 1.361, + "step": 55000 + }, + { + "epoch": 81.24, + "eval_cer": 0.20907448303553502, + "eval_loss": 0.8281779289245605, + "eval_runtime": 112.7357, + "eval_samples_per_second": 17.927, + "eval_steps_per_second": 2.244, + "eval_wer": 0.8426521523998021, + "step": 55000 + }, + { + "epoch": 81.39, + "learning_rate": 1.441552511415525e-05, + "loss": 1.4054, + "step": 55100 + }, + { + "epoch": 81.54, + "learning_rate": 1.4302511415525112e-05, + "loss": 1.4048, + "step": 55200 + }, + { + "epoch": 81.68, + "learning_rate": 1.418835616438356e-05, + "loss": 1.401, + "step": 55300 + }, + { + "epoch": 81.83, + "learning_rate": 1.4074200913242008e-05, + "loss": 1.3824, + "step": 55400 + }, + { + "epoch": 81.98, + "learning_rate": 1.3960045662100454e-05, + "loss": 1.3611, + "step": 55500 + }, + { + "epoch": 81.98, + "eval_cer": 0.20923509335474805, + "eval_loss": 0.8281756043434143, + "eval_runtime": 114.1698, + "eval_samples_per_second": 17.702, + "eval_steps_per_second": 2.216, + "eval_wer": 0.8406729341909945, + "step": 55500 + }, + { + "epoch": 82.13, + "learning_rate": 1.3845890410958902e-05, + "loss": 1.4021, + "step": 55600 + }, + { + "epoch": 82.27, + "learning_rate": 1.373173515981735e-05, + "loss": 1.3655, + "step": 55700 + }, + { + "epoch": 82.42, + "learning_rate": 1.3617579908675798e-05, + "loss": 1.368, + "step": 55800 + }, + { + "epoch": 82.57, + "learning_rate": 1.3503424657534244e-05, + "loss": 1.355, + "step": 55900 + }, + { + "epoch": 82.72, + "learning_rate": 1.3389269406392692e-05, + "loss": 1.3677, + "step": 56000 + }, + { + "epoch": 82.72, + "eval_cer": 0.2083517365990765, + "eval_loss": 0.8234991431236267, + "eval_runtime": 113.0903, + "eval_samples_per_second": 17.871, + "eval_steps_per_second": 2.237, + "eval_wer": 0.8436417615042059, + "step": 56000 + }, + { + "epoch": 82.87, + "learning_rate": 1.327511415525114e-05, + "loss": 1.3752, + "step": 56100 + }, + { + "epoch": 83.01, + "learning_rate": 1.3160958904109586e-05, + "loss": 1.3599, + "step": 56200 + }, + { + "epoch": 83.16, + "learning_rate": 1.3046803652968034e-05, + "loss": 1.331, + "step": 56300 + }, + { + "epoch": 83.31, + "learning_rate": 1.2932648401826482e-05, + "loss": 1.3881, + "step": 56400 + }, + { + "epoch": 83.46, + "learning_rate": 1.281849315068493e-05, + "loss": 1.3361, + "step": 56500 + }, + { + "epoch": 83.46, + "eval_cer": 0.20686609114635615, + "eval_loss": 0.8230801224708557, + "eval_runtime": 113.4808, + "eval_samples_per_second": 17.809, + "eval_steps_per_second": 2.229, + "eval_wer": 0.8377041068777833, + "step": 56500 + }, + { + "epoch": 83.6, + "learning_rate": 1.2704337899543377e-05, + "loss": 1.3507, + "step": 56600 + }, + { + "epoch": 83.75, + "learning_rate": 1.2590182648401825e-05, + "loss": 1.3529, + "step": 56700 + }, + { + "epoch": 83.9, + "learning_rate": 1.2476027397260273e-05, + "loss": 1.3656, + "step": 56800 + }, + { + "epoch": 84.05, + "learning_rate": 1.2361872146118719e-05, + "loss": 1.3621, + "step": 56900 + }, + { + "epoch": 84.19, + "learning_rate": 1.2247716894977167e-05, + "loss": 1.3779, + "step": 57000 + }, + { + "epoch": 84.19, + "eval_cer": 0.2069865488857659, + "eval_loss": 0.8206142783164978, + "eval_runtime": 112.7557, + "eval_samples_per_second": 17.924, + "eval_steps_per_second": 2.244, + "eval_wer": 0.8436417615042059, + "step": 57000 + }, + { + "epoch": 84.34, + "learning_rate": 1.2133561643835615e-05, + "loss": 1.3326, + "step": 57100 + }, + { + "epoch": 84.49, + "learning_rate": 1.2019406392694063e-05, + "loss": 1.3411, + "step": 57200 + }, + { + "epoch": 84.64, + "learning_rate": 1.190525114155251e-05, + "loss": 1.3424, + "step": 57300 + }, + { + "epoch": 84.79, + "learning_rate": 1.1791095890410957e-05, + "loss": 1.342, + "step": 57400 + }, + { + "epoch": 84.93, + "learning_rate": 1.1676940639269405e-05, + "loss": 1.3727, + "step": 57500 + }, + { + "epoch": 84.93, + "eval_cer": 0.20646456534832364, + "eval_loss": 0.8204257488250732, + "eval_runtime": 112.5887, + "eval_samples_per_second": 17.95, + "eval_steps_per_second": 2.247, + "eval_wer": 0.839188520534389, + "step": 57500 + }, + { + "epoch": 85.08, + "learning_rate": 1.1562785388127852e-05, + "loss": 1.3352, + "step": 57600 + }, + { + "epoch": 85.23, + "learning_rate": 1.14486301369863e-05, + "loss": 1.3232, + "step": 57700 + }, + { + "epoch": 85.38, + "learning_rate": 1.1334474885844748e-05, + "loss": 1.3534, + "step": 57800 + }, + { + "epoch": 85.52, + "learning_rate": 1.1220319634703194e-05, + "loss": 1.3834, + "step": 57900 + }, + { + "epoch": 85.67, + "learning_rate": 1.1106164383561642e-05, + "loss": 1.3317, + "step": 58000 + }, + { + "epoch": 85.67, + "eval_cer": 0.20654487050793013, + "eval_loss": 0.8206654787063599, + "eval_runtime": 113.345, + "eval_samples_per_second": 17.831, + "eval_steps_per_second": 2.232, + "eval_wer": 0.8436417615042059, + "step": 58000 + }, + { + "epoch": 85.82, + "learning_rate": 1.099200913242009e-05, + "loss": 1.3349, + "step": 58100 + }, + { + "epoch": 85.97, + "learning_rate": 1.0877853881278538e-05, + "loss": 1.326, + "step": 58200 + }, + { + "epoch": 86.12, + "learning_rate": 1.0763698630136984e-05, + "loss": 1.3541, + "step": 58300 + }, + { + "epoch": 86.26, + "learning_rate": 1.0649543378995432e-05, + "loss": 1.3306, + "step": 58400 + }, + { + "epoch": 86.41, + "learning_rate": 1.053538812785388e-05, + "loss": 1.3332, + "step": 58500 + }, + { + "epoch": 86.41, + "eval_cer": 0.20554105601284883, + "eval_loss": 0.818649172782898, + "eval_runtime": 113.2293, + "eval_samples_per_second": 17.849, + "eval_steps_per_second": 2.234, + "eval_wer": 0.8357248886689758, + "step": 58500 + }, + { + "epoch": 86.56, + "learning_rate": 1.0421232876712326e-05, + "loss": 1.3258, + "step": 58600 + }, + { + "epoch": 86.71, + "learning_rate": 1.0307077625570774e-05, + "loss": 1.3269, + "step": 58700 + }, + { + "epoch": 86.85, + "learning_rate": 1.0192922374429222e-05, + "loss": 1.3411, + "step": 58800 + }, + { + "epoch": 87.0, + "learning_rate": 1.007876712328767e-05, + "loss": 1.3136, + "step": 58900 + }, + { + "epoch": 87.15, + "learning_rate": 9.964611872146117e-06, + "loss": 1.3299, + "step": 59000 + }, + { + "epoch": 87.15, + "eval_cer": 0.20754868500301143, + "eval_loss": 0.8192667961120605, + "eval_runtime": 113.1322, + "eval_samples_per_second": 17.864, + "eval_steps_per_second": 2.236, + "eval_wer": 0.8416625432953984, + "step": 59000 + }, + { + "epoch": 87.3, + "learning_rate": 9.850456621004565e-06, + "loss": 1.3134, + "step": 59100 + }, + { + "epoch": 87.44, + "learning_rate": 9.736301369863013e-06, + "loss": 1.3108, + "step": 59200 + }, + { + "epoch": 87.59, + "learning_rate": 9.622146118721459e-06, + "loss": 1.3347, + "step": 59300 + }, + { + "epoch": 87.74, + "learning_rate": 9.507990867579907e-06, + "loss": 1.314, + "step": 59400 + }, + { + "epoch": 87.89, + "learning_rate": 9.393835616438355e-06, + "loss": 1.3129, + "step": 59500 + }, + { + "epoch": 87.89, + "eval_cer": 0.20650471792812689, + "eval_loss": 0.8182885050773621, + "eval_runtime": 112.6479, + "eval_samples_per_second": 17.941, + "eval_steps_per_second": 2.246, + "eval_wer": 0.8431469569520039, + "step": 59500 + }, + { + "epoch": 88.04, + "learning_rate": 9.279680365296803e-06, + "loss": 1.3839, + "step": 59600 + }, + { + "epoch": 88.18, + "learning_rate": 9.166666666666665e-06, + "loss": 1.3179, + "step": 59700 + }, + { + "epoch": 88.33, + "learning_rate": 9.052511415525113e-06, + "loss": 1.3065, + "step": 59800 + }, + { + "epoch": 88.48, + "learning_rate": 8.93835616438356e-06, + "loss": 1.3093, + "step": 59900 + }, + { + "epoch": 88.63, + "learning_rate": 8.824200913242009e-06, + "loss": 1.3352, + "step": 60000 + }, + { + "epoch": 88.63, + "eval_cer": 0.20622364986950412, + "eval_loss": 0.8150643110275269, + "eval_runtime": 112.8416, + "eval_samples_per_second": 17.91, + "eval_steps_per_second": 2.242, + "eval_wer": 0.847105393369619, + "step": 60000 + }, + { + "epoch": 88.77, + "learning_rate": 8.710045662100455e-06, + "loss": 1.3146, + "step": 60100 + }, + { + "epoch": 88.92, + "learning_rate": 8.595890410958903e-06, + "loss": 1.3185, + "step": 60200 + }, + { + "epoch": 89.07, + "learning_rate": 8.481735159817351e-06, + "loss": 1.291, + "step": 60300 + }, + { + "epoch": 89.22, + "learning_rate": 8.367579908675797e-06, + "loss": 1.2985, + "step": 60400 + }, + { + "epoch": 89.36, + "learning_rate": 8.253424657534245e-06, + "loss": 1.3026, + "step": 60500 + }, + { + "epoch": 89.36, + "eval_cer": 0.20666532824733988, + "eval_loss": 0.8124769330024719, + "eval_runtime": 113.2992, + "eval_samples_per_second": 17.838, + "eval_steps_per_second": 2.233, + "eval_wer": 0.8485898070262247, + "step": 60500 + }, + { + "epoch": 89.51, + "learning_rate": 8.139269406392693e-06, + "loss": 1.3266, + "step": 60600 + }, + { + "epoch": 89.66, + "learning_rate": 8.025114155251141e-06, + "loss": 1.2908, + "step": 60700 + }, + { + "epoch": 89.81, + "learning_rate": 7.910958904109588e-06, + "loss": 1.3219, + "step": 60800 + }, + { + "epoch": 89.96, + "learning_rate": 7.796803652968036e-06, + "loss": 1.3354, + "step": 60900 + }, + { + "epoch": 90.1, + "learning_rate": 7.682648401826484e-06, + "loss": 1.3468, + "step": 61000 + }, + { + "epoch": 90.1, + "eval_cer": 0.2058221240714716, + "eval_loss": 0.8124193549156189, + "eval_runtime": 113.1466, + "eval_samples_per_second": 17.862, + "eval_steps_per_second": 2.236, + "eval_wer": 0.8406729341909945, + "step": 61000 + }, + { + "epoch": 90.25, + "learning_rate": 7.568493150684931e-06, + "loss": 1.2959, + "step": 61100 + }, + { + "epoch": 90.4, + "learning_rate": 7.454337899543378e-06, + "loss": 1.3095, + "step": 61200 + }, + { + "epoch": 90.55, + "learning_rate": 7.340182648401826e-06, + "loss": 1.3018, + "step": 61300 + }, + { + "epoch": 90.69, + "learning_rate": 7.226027397260273e-06, + "loss": 1.3182, + "step": 61400 + }, + { + "epoch": 90.84, + "learning_rate": 7.111872146118721e-06, + "loss": 1.3028, + "step": 61500 + }, + { + "epoch": 90.84, + "eval_cer": 0.20509937763501304, + "eval_loss": 0.8122348189353943, + "eval_runtime": 113.8373, + "eval_samples_per_second": 17.753, + "eval_steps_per_second": 2.222, + "eval_wer": 0.8461157842652153, + "step": 61500 + }, + { + "epoch": 90.99, + "learning_rate": 6.997716894977168e-06, + "loss": 1.2779, + "step": 61600 + }, + { + "epoch": 91.14, + "learning_rate": 6.883561643835615e-06, + "loss": 1.3199, + "step": 61700 + }, + { + "epoch": 91.28, + "learning_rate": 6.769406392694063e-06, + "loss": 1.282, + "step": 61800 + }, + { + "epoch": 91.43, + "learning_rate": 6.6552511415525104e-06, + "loss": 1.3319, + "step": 61900 + }, + { + "epoch": 91.58, + "learning_rate": 6.5410958904109584e-06, + "loss": 1.2884, + "step": 62000 + }, + { + "epoch": 91.58, + "eval_cer": 0.20477815699658702, + "eval_loss": 0.8086355328559875, + "eval_runtime": 114.5869, + "eval_samples_per_second": 17.637, + "eval_steps_per_second": 2.208, + "eval_wer": 0.8426521523998021, + "step": 62000 + }, + { + "epoch": 91.73, + "learning_rate": 6.4269406392694056e-06, + "loss": 1.3206, + "step": 62100 + }, + { + "epoch": 91.88, + "learning_rate": 6.3127853881278536e-06, + "loss": 1.3219, + "step": 62200 + }, + { + "epoch": 92.02, + "learning_rate": 6.198630136986301e-06, + "loss": 1.2887, + "step": 62300 + }, + { + "epoch": 92.17, + "learning_rate": 6.084474885844748e-06, + "loss": 1.3125, + "step": 62400 + }, + { + "epoch": 92.32, + "learning_rate": 5.970319634703196e-06, + "loss": 1.3005, + "step": 62500 + }, + { + "epoch": 92.32, + "eval_cer": 0.2054607508532423, + "eval_loss": 0.8109722137451172, + "eval_runtime": 114.6224, + "eval_samples_per_second": 17.632, + "eval_steps_per_second": 2.207, + "eval_wer": 0.838693715982187, + "step": 62500 + }, + { + "epoch": 92.47, + "learning_rate": 5.856164383561643e-06, + "loss": 1.3115, + "step": 62600 + }, + { + "epoch": 92.61, + "learning_rate": 5.7431506849315064e-06, + "loss": 1.3024, + "step": 62700 + }, + { + "epoch": 92.76, + "learning_rate": 5.628995433789954e-06, + "loss": 1.2729, + "step": 62800 + }, + { + "epoch": 92.91, + "learning_rate": 5.514840182648402e-06, + "loss": 1.2953, + "step": 62900 + }, + { + "epoch": 93.06, + "learning_rate": 5.400684931506849e-06, + "loss": 1.2996, + "step": 63000 + }, + { + "epoch": 93.06, + "eval_cer": 0.20570166633206183, + "eval_loss": 0.8125805854797363, + "eval_runtime": 113.0806, + "eval_samples_per_second": 17.872, + "eval_steps_per_second": 2.237, + "eval_wer": 0.8327560613557645, + "step": 63000 + }, + { + "epoch": 93.21, + "learning_rate": 5.286529680365297e-06, + "loss": 1.3043, + "step": 63100 + }, + { + "epoch": 93.35, + "learning_rate": 5.172374429223744e-06, + "loss": 1.2749, + "step": 63200 + }, + { + "epoch": 93.5, + "learning_rate": 5.058219178082192e-06, + "loss": 1.2701, + "step": 63300 + }, + { + "epoch": 93.65, + "learning_rate": 4.944063926940639e-06, + "loss": 1.3151, + "step": 63400 + }, + { + "epoch": 93.8, + "learning_rate": 4.829908675799086e-06, + "loss": 1.2707, + "step": 63500 + }, + { + "epoch": 93.8, + "eval_cer": 0.20465769925717728, + "eval_loss": 0.8097530603408813, + "eval_runtime": 113.7446, + "eval_samples_per_second": 17.768, + "eval_steps_per_second": 2.224, + "eval_wer": 0.8401781296387927, + "step": 63500 + }, + { + "epoch": 93.94, + "learning_rate": 4.715753424657534e-06, + "loss": 1.2936, + "step": 63600 + }, + { + "epoch": 94.09, + "learning_rate": 4.601598173515981e-06, + "loss": 1.2972, + "step": 63700 + }, + { + "epoch": 94.24, + "learning_rate": 4.487442922374429e-06, + "loss": 1.2465, + "step": 63800 + }, + { + "epoch": 94.39, + "learning_rate": 4.3732876712328764e-06, + "loss": 1.3015, + "step": 63900 + }, + { + "epoch": 94.53, + "learning_rate": 4.2591324200913244e-06, + "loss": 1.3026, + "step": 64000 + }, + { + "epoch": 94.53, + "eval_cer": 0.20501907247540654, + "eval_loss": 0.8096847534179688, + "eval_runtime": 113.8078, + "eval_samples_per_second": 17.758, + "eval_steps_per_second": 2.223, + "eval_wer": 0.8401781296387927, + "step": 64000 + }, + { + "epoch": 94.68, + "learning_rate": 4.144977168949772e-06, + "loss": 1.3007, + "step": 64100 + }, + { + "epoch": 94.83, + "learning_rate": 4.030821917808219e-06, + "loss": 1.2671, + "step": 64200 + }, + { + "epoch": 94.98, + "learning_rate": 3.916666666666667e-06, + "loss": 1.2834, + "step": 64300 + }, + { + "epoch": 95.13, + "learning_rate": 3.802511415525114e-06, + "loss": 1.3017, + "step": 64400 + }, + { + "epoch": 95.27, + "learning_rate": 3.689497716894977e-06, + "loss": 1.2546, + "step": 64500 + }, + { + "epoch": 95.27, + "eval_cer": 0.2054607508532423, + "eval_loss": 0.8111132383346558, + "eval_runtime": 114.1208, + "eval_samples_per_second": 17.709, + "eval_steps_per_second": 2.217, + "eval_wer": 0.8401781296387927, + "step": 64500 + }, + { + "epoch": 95.42, + "learning_rate": 3.5753424657534245e-06, + "loss": 1.3115, + "step": 64600 + }, + { + "epoch": 95.57, + "learning_rate": 3.461187214611872e-06, + "loss": 1.2918, + "step": 64700 + }, + { + "epoch": 95.72, + "learning_rate": 3.347031963470319e-06, + "loss": 1.2863, + "step": 64800 + }, + { + "epoch": 95.86, + "learning_rate": 3.2328767123287667e-06, + "loss": 1.3075, + "step": 64900 + }, + { + "epoch": 96.01, + "learning_rate": 3.1187214611872143e-06, + "loss": 1.2426, + "step": 65000 + }, + { + "epoch": 96.01, + "eval_cer": 0.20594258181088135, + "eval_loss": 0.8087843656539917, + "eval_runtime": 113.7192, + "eval_samples_per_second": 17.772, + "eval_steps_per_second": 2.225, + "eval_wer": 0.8372093023255814, + "step": 65000 + }, + { + "epoch": 96.16, + "learning_rate": 3.004566210045662e-06, + "loss": 1.2996, + "step": 65100 + }, + { + "epoch": 96.31, + "learning_rate": 2.8904109589041095e-06, + "loss": 1.3113, + "step": 65200 + }, + { + "epoch": 96.45, + "learning_rate": 2.776255707762557e-06, + "loss": 1.287, + "step": 65300 + }, + { + "epoch": 96.6, + "learning_rate": 2.662100456621004e-06, + "loss": 1.2838, + "step": 65400 + }, + { + "epoch": 96.75, + "learning_rate": 2.5479452054794517e-06, + "loss": 1.2869, + "step": 65500 + }, + { + "epoch": 96.75, + "eval_cer": 0.20481830957639027, + "eval_loss": 0.8092992305755615, + "eval_runtime": 113.4179, + "eval_samples_per_second": 17.819, + "eval_steps_per_second": 2.231, + "eval_wer": 0.8396833250865908, + "step": 65500 + }, + { + "epoch": 96.9, + "learning_rate": 2.4337899543378993e-06, + "loss": 1.2994, + "step": 65600 + }, + { + "epoch": 97.05, + "learning_rate": 2.319634703196347e-06, + "loss": 1.2983, + "step": 65700 + }, + { + "epoch": 97.19, + "learning_rate": 2.2054794520547945e-06, + "loss": 1.2737, + "step": 65800 + }, + { + "epoch": 97.34, + "learning_rate": 2.091324200913242e-06, + "loss": 1.2965, + "step": 65900 + }, + { + "epoch": 97.49, + "learning_rate": 1.9771689497716896e-06, + "loss": 1.2782, + "step": 66000 + }, + { + "epoch": 97.49, + "eval_cer": 0.2048986147359968, + "eval_loss": 0.8099350929260254, + "eval_runtime": 113.4967, + "eval_samples_per_second": 17.807, + "eval_steps_per_second": 2.229, + "eval_wer": 0.8411677387431964, + "step": 66000 + }, + { + "epoch": 97.64, + "learning_rate": 1.863013698630137e-06, + "loss": 1.2485, + "step": 66100 + }, + { + "epoch": 97.78, + "learning_rate": 1.7488584474885843e-06, + "loss": 1.281, + "step": 66200 + }, + { + "epoch": 97.93, + "learning_rate": 1.6347031963470319e-06, + "loss": 1.2679, + "step": 66300 + }, + { + "epoch": 98.08, + "learning_rate": 1.5205479452054794e-06, + "loss": 1.2255, + "step": 66400 + }, + { + "epoch": 98.23, + "learning_rate": 1.406392694063927e-06, + "loss": 1.2457, + "step": 66500 + }, + { + "epoch": 98.23, + "eval_cer": 0.20618349728970087, + "eval_loss": 0.8133619427680969, + "eval_runtime": 113.8096, + "eval_samples_per_second": 17.758, + "eval_steps_per_second": 2.223, + "eval_wer": 0.8411677387431964, + "step": 66500 + }, + { + "epoch": 98.38, + "learning_rate": 1.2922374429223744e-06, + "loss": 1.2885, + "step": 66600 + }, + { + "epoch": 98.52, + "learning_rate": 1.178082191780822e-06, + "loss": 1.2787, + "step": 66700 + }, + { + "epoch": 98.67, + "learning_rate": 1.0639269406392693e-06, + "loss": 1.271, + "step": 66800 + }, + { + "epoch": 98.82, + "learning_rate": 9.497716894977168e-07, + "loss": 1.2858, + "step": 66900 + }, + { + "epoch": 98.97, + "learning_rate": 8.356164383561643e-07, + "loss": 1.2967, + "step": 67000 + }, + { + "epoch": 98.97, + "eval_cer": 0.20554105601284883, + "eval_loss": 0.8114518523216248, + "eval_runtime": 113.394, + "eval_samples_per_second": 17.823, + "eval_steps_per_second": 2.231, + "eval_wer": 0.8381989114299851, + "step": 67000 + }, + { + "epoch": 99.11, + "learning_rate": 7.214611872146118e-07, + "loss": 1.3312, + "step": 67100 + }, + { + "epoch": 99.26, + "learning_rate": 6.073059360730594e-07, + "loss": 1.2486, + "step": 67200 + }, + { + "epoch": 99.41, + "learning_rate": 4.931506849315068e-07, + "loss": 1.2672, + "step": 67300 + }, + { + "epoch": 99.56, + "learning_rate": 3.7899543378995425e-07, + "loss": 1.2776, + "step": 67400 + }, + { + "epoch": 99.7, + "learning_rate": 2.648401826484018e-07, + "loss": 1.2817, + "step": 67500 + }, + { + "epoch": 99.7, + "eval_cer": 0.20626380244930737, + "eval_loss": 0.8127681612968445, + "eval_runtime": 113.1151, + "eval_samples_per_second": 17.867, + "eval_steps_per_second": 2.237, + "eval_wer": 0.839188520534389, + "step": 67500 + }, + { + "epoch": 99.85, + "learning_rate": 1.5068493150684929e-07, + "loss": 1.2743, + "step": 67600 + }, + { + "epoch": 100.0, + "learning_rate": 3.6529680365296796e-08, + "loss": 1.2394, + "step": 67700 + }, + { + "epoch": 100.0, + "step": 67700, + "total_flos": 3.080356462743465e+20, + "train_loss": 3.0068138397532342, + "train_runtime": 143778.5903, + "train_samples_per_second": 15.073, + "train_steps_per_second": 0.471 + } + ], + "max_steps": 67700, + "num_train_epochs": 100, + "total_flos": 3.080356462743465e+20, + "trial_name": null, + "trial_params": null +}