{ "best_metric": null, "best_model_checkpoint": null, "epoch": 30.0, "global_step": 960, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.56, "learning_rate": 7.5e-05, "loss": 13.0978, "step": 50 }, { "epoch": 1.56, "eval_loss": 13.780136108398438, "eval_runtime": 82.8799, "eval_samples_per_second": 7.891, "eval_wer": 1.0, "step": 50 }, { "epoch": 3.12, "learning_rate": 0.00015, "loss": 7.3093, "step": 100 }, { "epoch": 3.12, "eval_loss": 3.198237419128418, "eval_runtime": 81.5893, "eval_samples_per_second": 8.016, "eval_wer": 1.0, "step": 100 }, { "epoch": 4.69, "learning_rate": 0.000225, "loss": 3.0745, "step": 150 }, { "epoch": 4.69, "eval_loss": 3.1082892417907715, "eval_runtime": 82.4037, "eval_samples_per_second": 7.937, "eval_wer": 1.0, "step": 150 }, { "epoch": 6.25, "learning_rate": 0.0003, "loss": 3.0551, "step": 200 }, { "epoch": 6.25, "eval_loss": 3.0994772911071777, "eval_runtime": 82.7226, "eval_samples_per_second": 7.906, "eval_wer": 1.0, "step": 200 }, { "epoch": 7.81, "learning_rate": 0.00028026315789473683, "loss": 3.0632, "step": 250 }, { "epoch": 7.81, "eval_loss": 3.0916755199432373, "eval_runtime": 83.5323, "eval_samples_per_second": 7.829, "eval_wer": 1.0, "step": 250 }, { "epoch": 9.38, "learning_rate": 0.0002605263157894737, "loss": 3.0391, "step": 300 }, { "epoch": 9.38, "eval_loss": 3.0707435607910156, "eval_runtime": 82.7328, "eval_samples_per_second": 7.905, "eval_wer": 1.0, "step": 300 }, { "epoch": 10.94, "learning_rate": 0.00024078947368421052, "loss": 3.0321, "step": 350 }, { "epoch": 10.94, "eval_loss": 3.0443670749664307, "eval_runtime": 84.1437, "eval_samples_per_second": 7.772, "eval_wer": 1.0, "step": 350 }, { "epoch": 12.5, "learning_rate": 0.00022105263157894733, "loss": 3.0069, "step": 400 }, { "epoch": 12.5, "eval_loss": 2.998474359512329, "eval_runtime": 83.9178, "eval_samples_per_second": 7.793, "eval_wer": 1.0, "step": 400 }, { "epoch": 14.06, "learning_rate": 0.0002013157894736842, "loss": 2.9623, "step": 450 }, { "epoch": 14.06, "eval_loss": 2.866849184036255, "eval_runtime": 82.5906, "eval_samples_per_second": 7.919, "eval_wer": 1.0, "step": 450 }, { "epoch": 15.62, "learning_rate": 0.00018157894736842105, "loss": 2.4771, "step": 500 }, { "epoch": 15.62, "eval_loss": 1.5367902517318726, "eval_runtime": 85.6456, "eval_samples_per_second": 7.636, "eval_wer": 0.9838912133891213, "step": 500 }, { "epoch": 17.19, "learning_rate": 0.00016184210526315788, "loss": 1.0561, "step": 550 }, { "epoch": 17.19, "eval_loss": 0.6924143433570862, "eval_runtime": 85.1658, "eval_samples_per_second": 7.679, "eval_wer": 0.7548117154811715, "step": 550 }, { "epoch": 18.75, "learning_rate": 0.0001421052631578947, "loss": 0.5288, "step": 600 }, { "epoch": 18.75, "eval_loss": 0.5334728956222534, "eval_runtime": 83.737, "eval_samples_per_second": 7.81, "eval_wer": 0.6569037656903766, "step": 600 }, { "epoch": 20.31, "learning_rate": 0.00012236842105263157, "loss": 0.3581, "step": 650 }, { "epoch": 20.31, "eval_loss": 0.48591092228889465, "eval_runtime": 86.2479, "eval_samples_per_second": 7.583, "eval_wer": 0.605857740585774, "step": 650 }, { "epoch": 21.88, "learning_rate": 0.00010263157894736841, "loss": 0.2638, "step": 700 }, { "epoch": 21.88, "eval_loss": 0.4631027579307556, "eval_runtime": 84.0825, "eval_samples_per_second": 7.778, "eval_wer": 0.5648535564853556, "step": 700 }, { "epoch": 23.44, "learning_rate": 8.289473684210526e-05, "loss": 0.2284, "step": 750 }, { "epoch": 23.44, "eval_loss": 0.4597685933113098, "eval_runtime": 86.122, "eval_samples_per_second": 7.594, "eval_wer": 0.5594142259414226, "step": 750 }, { "epoch": 25.0, "learning_rate": 6.315789473684209e-05, "loss": 0.1965, "step": 800 }, { "epoch": 25.0, "eval_loss": 0.4614764153957367, "eval_runtime": 86.0272, "eval_samples_per_second": 7.602, "eval_wer": 0.5535564853556485, "step": 800 }, { "epoch": 26.56, "learning_rate": 4.342105263157895e-05, "loss": 0.1837, "step": 850 }, { "epoch": 26.56, "eval_loss": 0.4499300718307495, "eval_runtime": 89.3292, "eval_samples_per_second": 7.321, "eval_wer": 0.5349372384937239, "step": 850 }, { "epoch": 28.12, "learning_rate": 2.3684210526315787e-05, "loss": 0.187, "step": 900 }, { "epoch": 28.12, "eval_loss": 0.45425695180892944, "eval_runtime": 85.6275, "eval_samples_per_second": 7.638, "eval_wer": 0.5345188284518828, "step": 900 }, { "epoch": 29.69, "learning_rate": 3.947368421052631e-06, "loss": 0.1568, "step": 950 }, { "epoch": 29.69, "eval_loss": 0.4458238184452057, "eval_runtime": 84.9753, "eval_samples_per_second": 7.696, "eval_wer": 0.5290794979079498, "step": 950 }, { "epoch": 30.0, "step": 960, "total_flos": 8.556740517881789e+18, "train_runtime": 8781.3793, "train_samples_per_second": 0.109 }, { "epoch": 30.0, "eval_loss": 0.4455166161060333, "eval_runtime": 81.4513, "eval_samples_per_second": 8.029, "eval_wer": 0.5288702928870292, "step": 960 } ], "max_steps": 960, "num_train_epochs": 30, "total_flos": 8.556740517881789e+18, "trial_name": null, "trial_params": null }