{ "best_metric": null, "best_model_checkpoint": null, "epoch": 99.98461538461538, "global_step": 3200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 3.12, "learning_rate": 1.9200000000000003e-06, "loss": 10.7305, "step": 100 }, { "epoch": 6.25, "learning_rate": 3.920000000000001e-06, "loss": 3.0098, "step": 200 }, { "epoch": 9.37, "learning_rate": 5.92e-06, "loss": 2.9327, "step": 300 }, { "epoch": 12.49, "learning_rate": 7.92e-06, "loss": 2.8216, "step": 400 }, { "epoch": 15.62, "learning_rate": 9.920000000000002e-06, "loss": 2.3731, "step": 500 }, { "epoch": 15.62, "eval_loss": 1.5517226457595825, "eval_runtime": 24.8327, "eval_samples_per_second": 20.497, "eval_steps_per_second": 1.289, "eval_wer": 0.9499121265377856, "step": 500 }, { "epoch": 18.74, "learning_rate": 1.1920000000000001e-05, "loss": 1.9105, "step": 600 }, { "epoch": 21.86, "learning_rate": 1.392e-05, "loss": 1.714, "step": 700 }, { "epoch": 24.98, "learning_rate": 1.5920000000000003e-05, "loss": 1.5476, "step": 800 }, { "epoch": 28.12, "learning_rate": 1.792e-05, "loss": 1.4238, "step": 900 }, { "epoch": 31.25, "learning_rate": 1.9920000000000002e-05, "loss": 1.3312, "step": 1000 }, { "epoch": 31.25, "eval_loss": 0.8717297911643982, "eval_runtime": 24.7966, "eval_samples_per_second": 20.527, "eval_steps_per_second": 1.29, "eval_wer": 0.6189220855301699, "step": 1000 }, { "epoch": 34.37, "learning_rate": 1.912727272727273e-05, "loss": 1.2049, "step": 1100 }, { "epoch": 37.49, "learning_rate": 1.821818181818182e-05, "loss": 1.1346, "step": 1200 }, { "epoch": 40.62, "learning_rate": 1.730909090909091e-05, "loss": 1.0533, "step": 1300 }, { "epoch": 43.74, "learning_rate": 1.64e-05, "loss": 0.9638, "step": 1400 }, { "epoch": 46.86, "learning_rate": 1.549090909090909e-05, "loss": 0.9135, "step": 1500 }, { "epoch": 46.86, "eval_loss": 0.8298946619033813, "eval_runtime": 24.721, "eval_samples_per_second": 20.59, "eval_steps_per_second": 1.294, "eval_wer": 0.5310486233157586, "step": 1500 }, { "epoch": 49.98, "learning_rate": 1.4581818181818184e-05, "loss": 0.8568, "step": 1600 }, { "epoch": 53.12, "learning_rate": 1.3672727272727273e-05, "loss": 0.8141, "step": 1700 }, { "epoch": 56.25, "learning_rate": 1.2763636363636365e-05, "loss": 0.7526, "step": 1800 }, { "epoch": 59.37, "learning_rate": 1.1854545454545457e-05, "loss": 0.7177, "step": 1900 }, { "epoch": 62.49, "learning_rate": 1.0945454545454545e-05, "loss": 0.6719, "step": 2000 }, { "epoch": 62.49, "eval_loss": 0.8842366933822632, "eval_runtime": 25.0435, "eval_samples_per_second": 20.325, "eval_steps_per_second": 1.278, "eval_wer": 0.5043936731107206, "step": 2000 }, { "epoch": 65.62, "learning_rate": 1.0036363636363637e-05, "loss": 0.6552, "step": 2100 }, { "epoch": 68.74, "learning_rate": 9.127272727272727e-06, "loss": 0.6145, "step": 2200 }, { "epoch": 71.86, "learning_rate": 8.21818181818182e-06, "loss": 0.596, "step": 2300 }, { "epoch": 74.98, "learning_rate": 7.30909090909091e-06, "loss": 0.5719, "step": 2400 }, { "epoch": 78.12, "learning_rate": 6.4000000000000006e-06, "loss": 0.5583, "step": 2500 }, { "epoch": 78.12, "eval_loss": 0.9093144536018372, "eval_runtime": 24.6074, "eval_samples_per_second": 20.685, "eval_steps_per_second": 1.3, "eval_wer": 0.4800820152314001, "step": 2500 }, { "epoch": 81.25, "learning_rate": 5.490909090909091e-06, "loss": 0.5417, "step": 2600 }, { "epoch": 84.37, "learning_rate": 4.581818181818183e-06, "loss": 0.5241, "step": 2700 }, { "epoch": 87.49, "learning_rate": 3.672727272727273e-06, "loss": 0.4901, "step": 2800 }, { "epoch": 90.62, "learning_rate": 2.763636363636364e-06, "loss": 0.4882, "step": 2900 }, { "epoch": 93.74, "learning_rate": 1.8545454545454546e-06, "loss": 0.4728, "step": 3000 }, { "epoch": 93.74, "eval_loss": 0.9488239884376526, "eval_runtime": 24.6884, "eval_samples_per_second": 20.617, "eval_steps_per_second": 1.296, "eval_wer": 0.48125366139425896, "step": 3000 }, { "epoch": 96.86, "learning_rate": 9.454545454545455e-07, "loss": 0.4682, "step": 3100 }, { "epoch": 99.98, "learning_rate": 3.636363636363637e-08, "loss": 0.4634, "step": 3200 }, { "epoch": 99.98, "step": 3200, "total_flos": 3.840629068156852e+19, "train_loss": 1.4163260304927825, "train_runtime": 8387.3816, "train_samples_per_second": 12.34, "train_steps_per_second": 0.382 } ], "max_steps": 3200, "num_train_epochs": 100, "total_flos": 3.840629068156852e+19, "trial_name": null, "trial_params": null }