{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "eval_steps": 500, "global_step": 3280, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.3, "learning_rate": 1.0000000000000002e-06, "loss": 9.6805, "step": 50 }, { "epoch": 0.61, "learning_rate": 2.0000000000000003e-06, "loss": 9.3474, "step": 100 }, { "epoch": 0.91, "learning_rate": 3e-06, "loss": 8.6058, "step": 150 }, { "epoch": 1.0, "eval_cer": 0.986131055853677, "eval_loss": 6.288315296173096, "eval_runtime": 21.6292, "eval_samples_per_second": 121.318, "eval_steps_per_second": 7.582, "eval_wer": 0.9742561721882254, "step": 164 }, { "epoch": 1.22, "learning_rate": 4.000000000000001e-06, "loss": 6.5168, "step": 200 }, { "epoch": 1.52, "learning_rate": 5e-06, "loss": 5.178, "step": 250 }, { "epoch": 1.83, "learning_rate": 6e-06, "loss": 4.6616, "step": 300 }, { "epoch": 2.0, "eval_cer": 0.986131055853677, "eval_loss": 4.045103549957275, "eval_runtime": 21.856, "eval_samples_per_second": 120.059, "eval_steps_per_second": 7.504, "eval_wer": 0.9742561721882254, "step": 328 }, { "epoch": 2.13, "learning_rate": 7e-06, "loss": 4.2963, "step": 350 }, { "epoch": 2.44, "learning_rate": 8.000000000000001e-06, "loss": 4.0823, "step": 400 }, { "epoch": 2.74, "learning_rate": 9e-06, "loss": 3.8526, "step": 450 }, { "epoch": 3.0, "eval_cer": 0.986131055853677, "eval_loss": 3.5416758060455322, "eval_runtime": 21.8041, "eval_samples_per_second": 120.344, "eval_steps_per_second": 7.522, "eval_wer": 0.9742561721882254, "step": 492 }, { "epoch": 3.05, "learning_rate": 1e-05, "loss": 3.7408, "step": 500 }, { "epoch": 3.35, "learning_rate": 1.1000000000000001e-05, "loss": 3.5235, "step": 550 }, { "epoch": 3.66, "learning_rate": 1.2e-05, "loss": 3.3617, "step": 600 }, { "epoch": 3.96, "learning_rate": 1.3000000000000001e-05, "loss": 3.2384, "step": 650 }, { "epoch": 4.0, "eval_cer": 0.986131055853677, "eval_loss": 3.0504775047302246, "eval_runtime": 22.0029, "eval_samples_per_second": 119.257, "eval_steps_per_second": 7.454, "eval_wer": 0.9742561721882254, "step": 656 }, { "epoch": 4.27, "learning_rate": 1.4e-05, "loss": 3.0665, "step": 700 }, { "epoch": 4.57, "learning_rate": 1.5000000000000002e-05, "loss": 2.9046, "step": 750 }, { "epoch": 4.88, "learning_rate": 1.6000000000000003e-05, "loss": 2.7948, "step": 800 }, { "epoch": 5.0, "eval_cer": 0.986131055853677, "eval_loss": 2.670631170272827, "eval_runtime": 21.7032, "eval_samples_per_second": 120.904, "eval_steps_per_second": 7.556, "eval_wer": 0.9742561721882254, "step": 820 }, { "epoch": 5.18, "learning_rate": 1.7e-05, "loss": 2.6959, "step": 850 }, { "epoch": 5.49, "learning_rate": 1.8e-05, "loss": 2.6047, "step": 900 }, { "epoch": 5.79, "learning_rate": 1.9e-05, "loss": 2.549, "step": 950 }, { "epoch": 6.0, "eval_cer": 0.986131055853677, "eval_loss": 2.426814317703247, "eval_runtime": 21.9773, "eval_samples_per_second": 119.396, "eval_steps_per_second": 7.462, "eval_wer": 0.9742561721882254, "step": 984 }, { "epoch": 6.1, "learning_rate": 2e-05, "loss": 2.4746, "step": 1000 }, { "epoch": 6.4, "learning_rate": 1.9561403508771933e-05, "loss": 2.353, "step": 1050 }, { "epoch": 6.71, "learning_rate": 1.912280701754386e-05, "loss": 2.1808, "step": 1100 }, { "epoch": 7.0, "eval_cer": 0.986131055853677, "eval_loss": 1.8554401397705078, "eval_runtime": 21.7818, "eval_samples_per_second": 120.467, "eval_steps_per_second": 7.529, "eval_wer": 0.9742561721882254, "step": 1148 }, { "epoch": 7.01, "learning_rate": 1.868421052631579e-05, "loss": 2.028, "step": 1150 }, { "epoch": 7.32, "learning_rate": 1.824561403508772e-05, "loss": 1.8932, "step": 1200 }, { "epoch": 7.62, "learning_rate": 1.780701754385965e-05, "loss": 1.7557, "step": 1250 }, { "epoch": 7.93, "learning_rate": 1.736842105263158e-05, "loss": 1.6069, "step": 1300 }, { "epoch": 8.0, "eval_cer": 0.6230821555437986, "eval_loss": 1.2551288604736328, "eval_runtime": 21.5405, "eval_samples_per_second": 121.817, "eval_steps_per_second": 7.614, "eval_wer": 0.6822114370120278, "step": 1312 }, { "epoch": 8.23, "learning_rate": 1.692982456140351e-05, "loss": 1.4826, "step": 1350 }, { "epoch": 8.54, "learning_rate": 1.649122807017544e-05, "loss": 1.3003, "step": 1400 }, { "epoch": 8.84, "learning_rate": 1.605263157894737e-05, "loss": 1.1916, "step": 1450 }, { "epoch": 9.0, "eval_cer": 0.22420829869246467, "eval_loss": 0.7985360026359558, "eval_runtime": 22.1233, "eval_samples_per_second": 118.608, "eval_steps_per_second": 7.413, "eval_wer": 0.3679397903917845, "step": 1476 }, { "epoch": 9.15, "learning_rate": 1.56140350877193e-05, "loss": 1.1239, "step": 1500 }, { "epoch": 9.45, "learning_rate": 1.517543859649123e-05, "loss": 1.0425, "step": 1550 }, { "epoch": 9.76, "learning_rate": 1.4736842105263159e-05, "loss": 0.9977, "step": 1600 }, { "epoch": 10.0, "eval_cer": 0.18267704633058726, "eval_loss": 0.6234104633331299, "eval_runtime": 21.4356, "eval_samples_per_second": 122.413, "eval_steps_per_second": 7.651, "eval_wer": 0.31180980516283324, "step": 1640 }, { "epoch": 10.06, "learning_rate": 1.429824561403509e-05, "loss": 0.9625, "step": 1650 }, { "epoch": 10.37, "learning_rate": 1.385964912280702e-05, "loss": 0.9316, "step": 1700 }, { "epoch": 10.67, "learning_rate": 1.3421052631578948e-05, "loss": 0.8949, "step": 1750 }, { "epoch": 10.98, "learning_rate": 1.2982456140350879e-05, "loss": 0.836, "step": 1800 }, { "epoch": 11.0, "eval_cer": 0.16431108759730934, "eval_loss": 0.510288417339325, "eval_runtime": 21.4424, "eval_samples_per_second": 122.374, "eval_steps_per_second": 7.648, "eval_wer": 0.2800872195259197, "step": 1804 }, { "epoch": 11.28, "learning_rate": 1.2543859649122808e-05, "loss": 0.821, "step": 1850 }, { "epoch": 11.59, "learning_rate": 1.2105263157894737e-05, "loss": 0.7918, "step": 1900 }, { "epoch": 11.89, "learning_rate": 1.1666666666666668e-05, "loss": 0.7515, "step": 1950 }, { "epoch": 12.0, "eval_cer": 0.1549391580379412, "eval_loss": 0.43052738904953003, "eval_runtime": 21.9755, "eval_samples_per_second": 119.406, "eval_steps_per_second": 7.463, "eval_wer": 0.26630090736442286, "step": 1968 }, { "epoch": 12.2, "learning_rate": 1.1228070175438597e-05, "loss": 0.7517, "step": 2000 }, { "epoch": 12.5, "learning_rate": 1.0789473684210528e-05, "loss": 0.7086, "step": 2050 }, { "epoch": 12.8, "learning_rate": 1.0350877192982459e-05, "loss": 0.7045, "step": 2100 }, { "epoch": 13.0, "eval_cer": 0.14133474416143904, "eval_loss": 0.3688310384750366, "eval_runtime": 21.9834, "eval_samples_per_second": 119.363, "eval_steps_per_second": 7.46, "eval_wer": 0.24892734050784274, "step": 2132 }, { "epoch": 13.11, "learning_rate": 9.912280701754386e-06, "loss": 0.6799, "step": 2150 }, { "epoch": 13.41, "learning_rate": 9.473684210526315e-06, "loss": 0.6578, "step": 2200 }, { "epoch": 13.72, "learning_rate": 9.035087719298246e-06, "loss": 0.6533, "step": 2250 }, { "epoch": 14.0, "eval_cer": 0.1339656866450004, "eval_loss": 0.3257971704006195, "eval_runtime": 21.7706, "eval_samples_per_second": 120.529, "eval_steps_per_second": 7.533, "eval_wer": 0.2398536962791025, "step": 2296 }, { "epoch": 14.02, "learning_rate": 8.596491228070176e-06, "loss": 0.6267, "step": 2300 }, { "epoch": 14.33, "learning_rate": 8.157894736842106e-06, "loss": 0.6625, "step": 2350 }, { "epoch": 14.63, "learning_rate": 7.719298245614036e-06, "loss": 0.6162, "step": 2400 }, { "epoch": 14.94, "learning_rate": 7.280701754385966e-06, "loss": 0.5906, "step": 2450 }, { "epoch": 15.0, "eval_cer": 0.1287884513642204, "eval_loss": 0.29405683279037476, "eval_runtime": 21.5389, "eval_samples_per_second": 121.826, "eval_steps_per_second": 7.614, "eval_wer": 0.23183512696068087, "step": 2460 }, { "epoch": 15.24, "learning_rate": 6.842105263157896e-06, "loss": 0.6021, "step": 2500 }, { "epoch": 15.55, "learning_rate": 6.403508771929825e-06, "loss": 0.5722, "step": 2550 }, { "epoch": 15.85, "learning_rate": 5.964912280701755e-06, "loss": 0.5746, "step": 2600 }, { "epoch": 16.0, "eval_cer": 0.12780591036202857, "eval_loss": 0.2748357355594635, "eval_runtime": 21.719, "eval_samples_per_second": 120.816, "eval_steps_per_second": 7.551, "eval_wer": 0.23000633044946192, "step": 2624 }, { "epoch": 16.16, "learning_rate": 5.526315789473685e-06, "loss": 0.559, "step": 2650 }, { "epoch": 16.46, "learning_rate": 5.087719298245615e-06, "loss": 0.5676, "step": 2700 }, { "epoch": 16.77, "learning_rate": 4.649122807017544e-06, "loss": 0.5169, "step": 2750 }, { "epoch": 17.0, "eval_cer": 0.12417806666162799, "eval_loss": 0.25731268525123596, "eval_runtime": 21.7044, "eval_samples_per_second": 120.897, "eval_steps_per_second": 7.556, "eval_wer": 0.224027572624323, "step": 2788 }, { "epoch": 17.07, "learning_rate": 4.210526315789474e-06, "loss": 0.5603, "step": 2800 }, { "epoch": 17.38, "learning_rate": 3.7719298245614037e-06, "loss": 0.5362, "step": 2850 }, { "epoch": 17.68, "learning_rate": 3.3333333333333333e-06, "loss": 0.5187, "step": 2900 }, { "epoch": 17.99, "learning_rate": 2.8947368421052634e-06, "loss": 0.5511, "step": 2950 }, { "epoch": 18.0, "eval_cer": 0.12277983523543194, "eval_loss": 0.24789051711559296, "eval_runtime": 21.566, "eval_samples_per_second": 121.673, "eval_steps_per_second": 7.605, "eval_wer": 0.22107336287543083, "step": 2952 }, { "epoch": 18.29, "learning_rate": 2.456140350877193e-06, "loss": 0.541, "step": 3000 }, { "epoch": 18.6, "learning_rate": 2.017543859649123e-06, "loss": 0.5095, "step": 3050 }, { "epoch": 18.9, "learning_rate": 1.5789473684210526e-06, "loss": 0.5318, "step": 3100 }, { "epoch": 19.0, "eval_cer": 0.12100370342377749, "eval_loss": 0.2409585565328598, "eval_runtime": 21.8589, "eval_samples_per_second": 120.043, "eval_steps_per_second": 7.503, "eval_wer": 0.21861152141802068, "step": 3116 }, { "epoch": 19.21, "learning_rate": 1.1403508771929824e-06, "loss": 0.5156, "step": 3150 }, { "epoch": 19.51, "learning_rate": 7.017543859649123e-07, "loss": 0.4971, "step": 3200 }, { "epoch": 19.82, "learning_rate": 2.6315789473684213e-07, "loss": 0.5174, "step": 3250 }, { "epoch": 20.0, "eval_cer": 0.12100370342377749, "eval_loss": 0.23930250108242035, "eval_runtime": 21.6599, "eval_samples_per_second": 121.146, "eval_steps_per_second": 7.572, "eval_wer": 0.21868185974537527, "step": 3280 }, { "epoch": 20.0, "step": 3280, "total_flos": 1.8383341927882616e+18, "train_loss": 1.9778692466456715, "train_runtime": 7257.7038, "train_samples_per_second": 28.924, "train_steps_per_second": 0.452 } ], "logging_steps": 50, "max_steps": 3280, "num_train_epochs": 20, "save_steps": 500, "total_flos": 1.8383341927882616e+18, "trial_name": null, "trial_params": null }