{ "best_metric": null, "best_model_checkpoint": null, "epoch": 100.0, "global_step": 3200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 3.12, "learning_rate": 1.3579999999999999e-05, "loss": 14.122, "step": 100 }, { "epoch": 6.25, "learning_rate": 2.758e-05, "loss": 5.0536, "step": 200 }, { "epoch": 9.38, "learning_rate": 4.157999999999999e-05, "loss": 3.5787, "step": 300 }, { "epoch": 12.5, "learning_rate": 5.558e-05, "loss": 3.0454, "step": 400 }, { "epoch": 15.62, "learning_rate": 6.958e-05, "loss": 2.9556, "step": 500 }, { "epoch": 15.62, "eval_loss": 2.929969549179077, "eval_runtime": 26.3053, "eval_samples_per_second": 16.004, "eval_steps_per_second": 16.004, "eval_wer": 1.0, "step": 500 }, { "epoch": 18.75, "learning_rate": 6.748518518518517e-05, "loss": 2.8989, "step": 600 }, { "epoch": 21.88, "learning_rate": 6.489259259259258e-05, "loss": 2.8712, "step": 700 }, { "epoch": 25.0, "learning_rate": 6.23e-05, "loss": 2.8548, "step": 800 }, { "epoch": 28.12, "learning_rate": 5.97074074074074e-05, "loss": 2.6412, "step": 900 }, { "epoch": 31.25, "learning_rate": 5.711481481481481e-05, "loss": 1.7874, "step": 1000 }, { "epoch": 31.25, "eval_loss": 0.7566312551498413, "eval_runtime": 26.7283, "eval_samples_per_second": 15.751, "eval_steps_per_second": 15.751, "eval_wer": 0.65087890625, "step": 1000 }, { "epoch": 34.38, "learning_rate": 5.4522222222222216e-05, "loss": 1.3943, "step": 1100 }, { "epoch": 37.5, "learning_rate": 5.1929629629629626e-05, "loss": 1.2438, "step": 1200 }, { "epoch": 40.62, "learning_rate": 4.933703703703703e-05, "loss": 1.1248, "step": 1300 }, { "epoch": 43.75, "learning_rate": 4.674444444444444e-05, "loss": 1.0708, "step": 1400 }, { "epoch": 46.88, "learning_rate": 4.4151851851851846e-05, "loss": 1.0131, "step": 1500 }, { "epoch": 46.88, "eval_loss": 0.36714494228363037, "eval_runtime": 25.9364, "eval_samples_per_second": 16.232, "eval_steps_per_second": 16.232, "eval_wer": 0.3828125, "step": 1500 }, { "epoch": 50.0, "learning_rate": 4.1559259259259257e-05, "loss": 0.9624, "step": 1600 }, { "epoch": 53.12, "learning_rate": 3.896666666666666e-05, "loss": 0.9292, "step": 1700 }, { "epoch": 56.25, "learning_rate": 3.637407407407407e-05, "loss": 0.899, "step": 1800 }, { "epoch": 59.38, "learning_rate": 3.3781481481481476e-05, "loss": 0.8697, "step": 1900 }, { "epoch": 62.5, "learning_rate": 3.1188888888888887e-05, "loss": 0.8439, "step": 2000 }, { "epoch": 62.5, "eval_loss": 0.33496010303497314, "eval_runtime": 26.254, "eval_samples_per_second": 16.036, "eval_steps_per_second": 16.036, "eval_wer": 0.341552734375, "step": 2000 }, { "epoch": 65.62, "learning_rate": 2.8596296296296294e-05, "loss": 0.8319, "step": 2100 }, { "epoch": 68.75, "learning_rate": 2.60037037037037e-05, "loss": 0.808, "step": 2200 }, { "epoch": 71.88, "learning_rate": 2.341111111111111e-05, "loss": 0.7916, "step": 2300 }, { "epoch": 75.0, "learning_rate": 2.0818518518518517e-05, "loss": 0.7731, "step": 2400 }, { "epoch": 78.12, "learning_rate": 1.8225925925925924e-05, "loss": 0.7502, "step": 2500 }, { "epoch": 78.12, "eval_loss": 0.3154859244823456, "eval_runtime": 26.713, "eval_samples_per_second": 15.76, "eval_steps_per_second": 15.76, "eval_wer": 0.32958984375, "step": 2500 }, { "epoch": 81.25, "learning_rate": 1.563333333333333e-05, "loss": 0.7307, "step": 2600 }, { "epoch": 84.38, "learning_rate": 1.3040740740740739e-05, "loss": 0.7262, "step": 2700 }, { "epoch": 87.5, "learning_rate": 1.0448148148148147e-05, "loss": 0.7154, "step": 2800 }, { "epoch": 90.62, "learning_rate": 7.855555555555554e-06, "loss": 0.7092, "step": 2900 }, { "epoch": 93.75, "learning_rate": 5.2629629629629625e-06, "loss": 0.7093, "step": 3000 }, { "epoch": 93.75, "eval_loss": 0.31820571422576904, "eval_runtime": 27.0308, "eval_samples_per_second": 15.575, "eval_steps_per_second": 15.575, "eval_wer": 0.318603515625, "step": 3000 }, { "epoch": 96.88, "learning_rate": 2.67037037037037e-06, "loss": 0.6896, "step": 3100 }, { "epoch": 100.0, "learning_rate": 7.777777777777777e-08, "loss": 0.688, "step": 3200 }, { "epoch": 100.0, "step": 3200, "total_flos": 2.067522226545355e+19, "train_loss": 1.9088435792922973, "train_runtime": 7870.625, "train_samples_per_second": 12.833, "train_steps_per_second": 0.407 } ], "max_steps": 3200, "num_train_epochs": 100, "total_flos": 2.067522226545355e+19, "trial_name": null, "trial_params": null }