{ "best_metric": null, "best_model_checkpoint": null, "epoch": 137.93103448275863, "global_step": 4000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 3.45, "learning_rate": 7.2e-06, "loss": 16.2031, "step": 100 }, { "epoch": 6.9, "learning_rate": 1.47e-05, "loss": 6.6774, "step": 200 }, { "epoch": 10.34, "learning_rate": 2.2199999999999998e-05, "loss": 4.7603, "step": 300 }, { "epoch": 13.79, "learning_rate": 2.97e-05, "loss": 3.9198, "step": 400 }, { "epoch": 17.24, "learning_rate": 3.7199999999999996e-05, "loss": 3.5558, "step": 500 }, { "epoch": 17.24, "eval_loss": 3.5216922760009766, "eval_runtime": 2.4797, "eval_samples_per_second": 108.882, "eval_steps_per_second": 3.629, "eval_wer": 0.9812865497076023, "step": 500 }, { "epoch": 20.69, "learning_rate": 4.4699999999999996e-05, "loss": 3.4799, "step": 600 }, { "epoch": 24.14, "learning_rate": 5.2199999999999995e-05, "loss": 3.4271, "step": 700 }, { "epoch": 27.59, "learning_rate": 5.97e-05, "loss": 3.2779, "step": 800 }, { "epoch": 31.03, "learning_rate": 6.72e-05, "loss": 2.9317, "step": 900 }, { "epoch": 34.48, "learning_rate": 7.47e-05, "loss": 2.2202, "step": 1000 }, { "epoch": 34.48, "eval_loss": 0.9991578459739685, "eval_runtime": 1.9665, "eval_samples_per_second": 137.299, "eval_steps_per_second": 4.577, "eval_wer": 0.2637958532695375, "step": 1000 }, { "epoch": 37.93, "learning_rate": 7.285074626865672e-05, "loss": 1.835, "step": 1100 }, { "epoch": 41.38, "learning_rate": 7.061194029850745e-05, "loss": 1.653, "step": 1200 }, { "epoch": 44.83, "learning_rate": 6.83731343283582e-05, "loss": 1.5243, "step": 1300 }, { "epoch": 48.28, "learning_rate": 6.613432835820895e-05, "loss": 1.4343, "step": 1400 }, { "epoch": 51.72, "learning_rate": 6.38955223880597e-05, "loss": 1.376, "step": 1500 }, { "epoch": 51.72, "eval_loss": 0.5396047830581665, "eval_runtime": 1.8956, "eval_samples_per_second": 142.435, "eval_steps_per_second": 4.748, "eval_wer": 0.1946836788942052, "step": 1500 }, { "epoch": 55.17, "learning_rate": 6.165671641791044e-05, "loss": 1.3168, "step": 1600 }, { "epoch": 58.62, "learning_rate": 5.941791044776119e-05, "loss": 1.2773, "step": 1700 }, { "epoch": 62.07, "learning_rate": 5.717910447761193e-05, "loss": 1.2455, "step": 1800 }, { "epoch": 65.52, "learning_rate": 5.494029850746268e-05, "loss": 1.1881, "step": 1900 }, { "epoch": 68.97, "learning_rate": 5.272388059701492e-05, "loss": 1.1565, "step": 2000 }, { "epoch": 68.97, "eval_loss": 0.4707144498825073, "eval_runtime": 1.8963, "eval_samples_per_second": 142.381, "eval_steps_per_second": 4.746, "eval_wer": 0.1773524720893142, "step": 2000 }, { "epoch": 72.41, "learning_rate": 5.048507462686567e-05, "loss": 1.1295, "step": 2100 }, { "epoch": 75.86, "learning_rate": 4.8268656716417906e-05, "loss": 1.0983, "step": 2200 }, { "epoch": 79.31, "learning_rate": 4.6029850746268655e-05, "loss": 1.0716, "step": 2300 }, { "epoch": 82.76, "learning_rate": 4.37910447761194e-05, "loss": 1.0582, "step": 2400 }, { "epoch": 86.21, "learning_rate": 4.155223880597015e-05, "loss": 1.0112, "step": 2500 }, { "epoch": 86.21, "eval_loss": 0.46029242873191833, "eval_runtime": 1.9007, "eval_samples_per_second": 142.05, "eval_steps_per_second": 4.735, "eval_wer": 0.17533227006911217, "step": 2500 }, { "epoch": 89.66, "learning_rate": 3.931343283582089e-05, "loss": 0.9962, "step": 2600 }, { "epoch": 93.1, "learning_rate": 3.707462686567164e-05, "loss": 0.9746, "step": 2700 }, { "epoch": 96.55, "learning_rate": 3.483582089552238e-05, "loss": 0.9592, "step": 2800 }, { "epoch": 100.0, "learning_rate": 3.259701492537313e-05, "loss": 0.9364, "step": 2900 }, { "epoch": 103.45, "learning_rate": 3.0358208955223878e-05, "loss": 0.9387, "step": 3000 }, { "epoch": 103.45, "eval_loss": 0.4466392397880554, "eval_runtime": 1.9017, "eval_samples_per_second": 141.977, "eval_steps_per_second": 4.733, "eval_wer": 0.1721424774056353, "step": 3000 }, { "epoch": 106.9, "learning_rate": 2.811940298507462e-05, "loss": 0.9223, "step": 3100 }, { "epoch": 110.34, "learning_rate": 2.588059701492537e-05, "loss": 0.8839, "step": 3200 }, { "epoch": 113.79, "learning_rate": 2.364179104477612e-05, "loss": 0.8845, "step": 3300 }, { "epoch": 117.24, "learning_rate": 2.1402985074626863e-05, "loss": 0.8604, "step": 3400 }, { "epoch": 120.69, "learning_rate": 1.9164179104477612e-05, "loss": 0.8526, "step": 3500 }, { "epoch": 120.69, "eval_loss": 0.43604278564453125, "eval_runtime": 1.8975, "eval_samples_per_second": 142.29, "eval_steps_per_second": 4.743, "eval_wer": 0.17044125465178098, "step": 3500 }, { "epoch": 124.14, "learning_rate": 1.6925373134328355e-05, "loss": 0.8311, "step": 3600 }, { "epoch": 127.59, "learning_rate": 1.4686567164179104e-05, "loss": 0.832, "step": 3700 }, { "epoch": 131.03, "learning_rate": 1.244776119402985e-05, "loss": 0.812, "step": 3800 }, { "epoch": 134.48, "learning_rate": 1.0208955223880597e-05, "loss": 0.8016, "step": 3900 }, { "epoch": 137.93, "learning_rate": 7.970149253731343e-06, "loss": 0.8018, "step": 4000 }, { "epoch": 137.93, "eval_loss": 0.437362939119339, "eval_runtime": 1.9635, "eval_samples_per_second": 137.506, "eval_steps_per_second": 4.584, "eval_wer": 0.1720361509835194, "step": 4000 } ], "max_steps": 4350, "num_train_epochs": 150, "total_flos": 1.7217444822080553e+19, "trial_name": null, "trial_params": null }