{ "best_metric": null, "best_model_checkpoint": null, "epoch": 19.99970457902511, "global_step": 16920, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.47, "learning_rate": 0.00029775, "loss": 3.3587, "step": 400 }, { "epoch": 0.47, "eval_loss": 1.1883399486541748, "eval_runtime": 298.8414, "eval_samples_per_second": 16.986, "eval_steps_per_second": 2.831, "eval_wer": 0.8392195865162833, "step": 400 }, { "epoch": 0.95, "learning_rate": 0.00029279055690072635, "loss": 1.8377, "step": 800 }, { "epoch": 0.95, "eval_loss": 0.8830727338790894, "eval_runtime": 295.1554, "eval_samples_per_second": 17.198, "eval_steps_per_second": 2.866, "eval_wer": 0.6852318175085172, "step": 800 }, { "epoch": 1.42, "learning_rate": 0.00028552663438256653, "loss": 1.7118, "step": 1200 }, { "epoch": 1.42, "eval_loss": 0.8031275868415833, "eval_runtime": 296.1538, "eval_samples_per_second": 17.14, "eval_steps_per_second": 2.857, "eval_wer": 0.6565588166832427, "step": 1200 }, { "epoch": 1.89, "learning_rate": 0.00027826271186440676, "loss": 1.6741, "step": 1600 }, { "epoch": 1.89, "eval_loss": 0.7517648339271545, "eval_runtime": 296.1537, "eval_samples_per_second": 17.14, "eval_steps_per_second": 2.857, "eval_wer": 0.6104492456144063, "step": 1600 }, { "epoch": 2.36, "learning_rate": 0.00027099878934624694, "loss": 1.6163, "step": 2000 }, { "epoch": 2.36, "eval_loss": 0.6887586712837219, "eval_runtime": 298.4301, "eval_samples_per_second": 17.009, "eval_steps_per_second": 2.835, "eval_wer": 0.5590706138773092, "step": 2000 }, { "epoch": 2.84, "learning_rate": 0.0002637348668280871, "loss": 1.5782, "step": 2400 }, { "epoch": 2.84, "eval_loss": 0.6580386161804199, "eval_runtime": 296.628, "eval_samples_per_second": 17.112, "eval_steps_per_second": 2.852, "eval_wer": 0.5164525890344288, "step": 2400 }, { "epoch": 3.31, "learning_rate": 0.0002564709443099273, "loss": 1.5548, "step": 2800 }, { "epoch": 3.31, "eval_loss": 0.6505530476570129, "eval_runtime": 300.5251, "eval_samples_per_second": 16.89, "eval_steps_per_second": 2.815, "eval_wer": 0.5184417123389128, "step": 2800 }, { "epoch": 3.78, "learning_rate": 0.00024920702179176754, "loss": 1.5249, "step": 3200 }, { "epoch": 3.78, "eval_loss": 0.6197642683982849, "eval_runtime": 298.2224, "eval_samples_per_second": 17.021, "eval_steps_per_second": 2.837, "eval_wer": 0.5028461391963095, "step": 3200 }, { "epoch": 4.26, "learning_rate": 0.00024194309927360772, "loss": 1.5078, "step": 3600 }, { "epoch": 4.26, "eval_loss": 0.5992246866226196, "eval_runtime": 298.0744, "eval_samples_per_second": 17.029, "eval_steps_per_second": 2.838, "eval_wer": 0.4932179359671583, "step": 3600 }, { "epoch": 4.73, "learning_rate": 0.0002346791767554479, "loss": 1.4836, "step": 4000 }, { "epoch": 4.73, "eval_loss": 0.5705241560935974, "eval_runtime": 292.5306, "eval_samples_per_second": 17.352, "eval_steps_per_second": 2.892, "eval_wer": 0.46513743995598533, "step": 4000 }, { "epoch": 5.2, "learning_rate": 0.0002274152542372881, "loss": 1.4505, "step": 4400 }, { "epoch": 5.2, "eval_loss": 0.5488837957382202, "eval_runtime": 300.7764, "eval_samples_per_second": 16.876, "eval_steps_per_second": 2.813, "eval_wer": 0.4507691982140212, "step": 4400 }, { "epoch": 5.67, "learning_rate": 0.00022015133171912832, "loss": 1.4481, "step": 4800 }, { "epoch": 5.67, "eval_loss": 0.5577110052108765, "eval_runtime": 299.1357, "eval_samples_per_second": 16.969, "eval_steps_per_second": 2.828, "eval_wer": 0.45624986774446114, "step": 4800 }, { "epoch": 6.15, "learning_rate": 0.0002128874092009685, "loss": 1.4136, "step": 5200 }, { "epoch": 6.15, "eval_loss": 0.5452219247817993, "eval_runtime": 298.6905, "eval_samples_per_second": 16.994, "eval_steps_per_second": 2.832, "eval_wer": 0.4370992657172482, "step": 5200 }, { "epoch": 6.62, "learning_rate": 0.00020564164648910412, "loss": 1.3861, "step": 5600 }, { "epoch": 6.62, "eval_loss": 0.510087788105011, "eval_runtime": 298.7447, "eval_samples_per_second": 16.991, "eval_steps_per_second": 2.832, "eval_wer": 0.40865903464037073, "step": 5600 }, { "epoch": 7.09, "learning_rate": 0.0001983777239709443, "loss": 1.3772, "step": 6000 }, { "epoch": 7.09, "eval_loss": 0.49333110451698303, "eval_runtime": 298.2699, "eval_samples_per_second": 17.018, "eval_steps_per_second": 2.836, "eval_wer": 0.39513722834712317, "step": 6000 }, { "epoch": 7.56, "learning_rate": 0.00019111380145278448, "loss": 1.3478, "step": 6400 }, { "epoch": 7.56, "eval_loss": 0.4849308133125305, "eval_runtime": 299.279, "eval_samples_per_second": 16.961, "eval_steps_per_second": 2.827, "eval_wer": 0.3921747042766151, "step": 6400 }, { "epoch": 8.04, "learning_rate": 0.0001838498789346247, "loss": 1.3394, "step": 6800 }, { "epoch": 8.04, "eval_loss": 0.4805210828781128, "eval_runtime": 297.1556, "eval_samples_per_second": 17.082, "eval_steps_per_second": 2.847, "eval_wer": 0.3891698584336712, "step": 6800 }, { "epoch": 8.51, "learning_rate": 0.00017658595641646487, "loss": 1.3095, "step": 7200 }, { "epoch": 8.51, "eval_loss": 0.48388615250587463, "eval_runtime": 298.6068, "eval_samples_per_second": 16.999, "eval_steps_per_second": 2.833, "eval_wer": 0.38335061472374465, "step": 7200 }, { "epoch": 8.98, "learning_rate": 0.00016932203389830508, "loss": 1.306, "step": 7600 }, { "epoch": 8.98, "eval_loss": 0.461063027381897, "eval_runtime": 296.7161, "eval_samples_per_second": 17.107, "eval_steps_per_second": 2.851, "eval_wer": 0.35865586050743803, "step": 7600 }, { "epoch": 9.46, "learning_rate": 0.00016205811138014526, "loss": 1.2707, "step": 8000 }, { "epoch": 9.46, "eval_loss": 0.4544948935508728, "eval_runtime": 298.2639, "eval_samples_per_second": 17.018, "eval_steps_per_second": 2.836, "eval_wer": 0.3730241022494022, "step": 8000 }, { "epoch": 9.93, "learning_rate": 0.00015479418886198547, "loss": 1.2626, "step": 8400 }, { "epoch": 9.93, "eval_loss": 0.4515869617462158, "eval_runtime": 299.4234, "eval_samples_per_second": 16.953, "eval_steps_per_second": 2.825, "eval_wer": 0.35241339907315317, "step": 8400 }, { "epoch": 10.4, "learning_rate": 0.00014753026634382565, "loss": 1.2412, "step": 8800 }, { "epoch": 10.4, "eval_loss": 0.4314204454421997, "eval_runtime": 294.8808, "eval_samples_per_second": 17.214, "eval_steps_per_second": 2.869, "eval_wer": 0.3310197431068413, "step": 8800 }, { "epoch": 10.87, "learning_rate": 0.00014026634382566583, "loss": 1.2456, "step": 9200 }, { "epoch": 10.87, "eval_loss": 0.4400792419910431, "eval_runtime": 296.6883, "eval_samples_per_second": 17.109, "eval_steps_per_second": 2.851, "eval_wer": 0.3458958461180354, "step": 9200 }, { "epoch": 11.35, "learning_rate": 0.00013302058111380144, "loss": 1.2081, "step": 9600 }, { "epoch": 11.35, "eval_loss": 0.4399240016937256, "eval_runtime": 295.1566, "eval_samples_per_second": 17.198, "eval_steps_per_second": 2.866, "eval_wer": 0.33563281630234676, "step": 9600 }, { "epoch": 11.82, "learning_rate": 0.00012575665859564165, "loss": 1.1998, "step": 10000 }, { "epoch": 11.82, "eval_loss": 0.41947221755981445, "eval_runtime": 296.8576, "eval_samples_per_second": 17.099, "eval_steps_per_second": 2.85, "eval_wer": 0.3215396660812155, "step": 10000 }, { "epoch": 12.29, "learning_rate": 0.00011851089588377723, "loss": 1.1826, "step": 10400 }, { "epoch": 12.29, "eval_loss": 0.4220864474773407, "eval_runtime": 296.3734, "eval_samples_per_second": 17.127, "eval_steps_per_second": 2.855, "eval_wer": 0.3177518674482087, "step": 10400 }, { "epoch": 12.77, "learning_rate": 0.00011124697336561742, "loss": 1.1573, "step": 10800 }, { "epoch": 12.77, "eval_loss": 0.4097737967967987, "eval_runtime": 297.6541, "eval_samples_per_second": 17.053, "eval_steps_per_second": 2.842, "eval_wer": 0.3084199166261083, "step": 10800 }, { "epoch": 13.24, "learning_rate": 0.00010398305084745762, "loss": 1.1416, "step": 11200 }, { "epoch": 13.24, "eval_loss": 0.4085627496242523, "eval_runtime": 296.7019, "eval_samples_per_second": 17.108, "eval_steps_per_second": 2.851, "eval_wer": 0.3119114628520642, "step": 11200 }, { "epoch": 13.71, "learning_rate": 9.671912832929781e-05, "loss": 1.1174, "step": 11600 }, { "epoch": 13.71, "eval_loss": 0.3854358196258545, "eval_runtime": 297.2728, "eval_samples_per_second": 17.075, "eval_steps_per_second": 2.846, "eval_wer": 0.2910468290412003, "step": 11600 }, { "epoch": 14.18, "learning_rate": 8.947336561743341e-05, "loss": 1.1048, "step": 12000 }, { "epoch": 14.18, "eval_loss": 0.38590070605278015, "eval_runtime": 297.414, "eval_samples_per_second": 17.067, "eval_steps_per_second": 2.845, "eval_wer": 0.282413187464291, "step": 12000 }, { "epoch": 14.66, "learning_rate": 8.22094430992736e-05, "loss": 1.0748, "step": 12400 }, { "epoch": 14.66, "eval_loss": 0.3854171633720398, "eval_runtime": 295.6597, "eval_samples_per_second": 17.168, "eval_steps_per_second": 2.861, "eval_wer": 0.275705186533212, "step": 12400 }, { "epoch": 15.13, "learning_rate": 7.494552058111379e-05, "loss": 1.0697, "step": 12800 }, { "epoch": 15.13, "eval_loss": 0.37401217222213745, "eval_runtime": 297.4379, "eval_samples_per_second": 17.066, "eval_steps_per_second": 2.844, "eval_wer": 0.2723829273969994, "step": 12800 }, { "epoch": 15.6, "learning_rate": 6.7681598062954e-05, "loss": 1.0477, "step": 13200 }, { "epoch": 15.6, "eval_loss": 0.3693487048149109, "eval_runtime": 298.4761, "eval_samples_per_second": 17.006, "eval_steps_per_second": 2.834, "eval_wer": 0.2642994688617559, "step": 13200 }, { "epoch": 16.08, "learning_rate": 6.0417675544794184e-05, "loss": 1.0356, "step": 13600 }, { "epoch": 16.08, "eval_loss": 0.3726678788661957, "eval_runtime": 298.583, "eval_samples_per_second": 17.0, "eval_steps_per_second": 2.833, "eval_wer": 0.2561102058954229, "step": 13600 }, { "epoch": 16.55, "learning_rate": 5.315375302663438e-05, "loss": 1.0083, "step": 14000 }, { "epoch": 16.55, "eval_loss": 0.36522331833839417, "eval_runtime": 301.7885, "eval_samples_per_second": 16.82, "eval_steps_per_second": 2.803, "eval_wer": 0.25007935332331716, "step": 14000 }, { "epoch": 17.02, "learning_rate": 4.5889830508474574e-05, "loss": 1.0, "step": 14400 }, { "epoch": 17.02, "eval_loss": 0.36411818861961365, "eval_runtime": 297.5091, "eval_samples_per_second": 17.062, "eval_steps_per_second": 2.844, "eval_wer": 0.24572021076242673, "step": 14400 }, { "epoch": 17.49, "learning_rate": 3.862590799031477e-05, "loss": 0.9779, "step": 14800 }, { "epoch": 17.49, "eval_loss": 0.35678642988204956, "eval_runtime": 295.5669, "eval_samples_per_second": 17.174, "eval_steps_per_second": 2.862, "eval_wer": 0.24089552870474215, "step": 14800 }, { "epoch": 17.97, "learning_rate": 3.1361985472154963e-05, "loss": 0.9596, "step": 15200 }, { "epoch": 17.97, "eval_loss": 0.3557918071746826, "eval_runtime": 296.6965, "eval_samples_per_second": 17.108, "eval_steps_per_second": 2.851, "eval_wer": 0.23759443045474746, "step": 15200 }, { "epoch": 18.44, "learning_rate": 2.4098062953995155e-05, "loss": 0.946, "step": 15600 }, { "epoch": 18.44, "eval_loss": 0.35907429456710815, "eval_runtime": 295.7242, "eval_samples_per_second": 17.165, "eval_steps_per_second": 2.861, "eval_wer": 0.23114036015828343, "step": 15600 }, { "epoch": 18.91, "learning_rate": 1.685230024213075e-05, "loss": 0.9389, "step": 16000 }, { "epoch": 18.91, "eval_loss": 0.3540380001068115, "eval_runtime": 296.3149, "eval_samples_per_second": 17.13, "eval_steps_per_second": 2.855, "eval_wer": 0.22828364051886493, "step": 16000 }, { "epoch": 19.39, "learning_rate": 9.588377723970942e-06, "loss": 0.9173, "step": 16400 }, { "epoch": 19.39, "eval_loss": 0.35515815019607544, "eval_runtime": 296.2267, "eval_samples_per_second": 17.136, "eval_steps_per_second": 2.856, "eval_wer": 0.2265061260765601, "step": 16400 }, { "epoch": 19.86, "learning_rate": 2.3244552058111378e-06, "loss": 0.9122, "step": 16800 }, { "epoch": 19.86, "eval_loss": 0.353456050157547, "eval_runtime": 298.3571, "eval_samples_per_second": 17.013, "eval_steps_per_second": 2.836, "eval_wer": 0.22502486404130606, "step": 16800 }, { "epoch": 20.0, "step": 16920, "total_flos": 1.9284410553255697e+20, "train_loss": 1.3125287554224614, "train_runtime": 67690.7896, "train_samples_per_second": 6.0, "train_steps_per_second": 0.25 } ], "max_steps": 16920, "num_train_epochs": 20, "total_flos": 1.9284410553255697e+20, "trial_name": null, "trial_params": null }