{ "best_metric": null, "best_model_checkpoint": null, "epoch": 28.846153846153847, "global_step": 10500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.37, "learning_rate": 4.99e-05, "loss": 3.5823, "step": 500 }, { "epoch": 1.37, "eval_loss": 3.32504940032959, "eval_runtime": 45.0187, "eval_samples_per_second": 14.394, "eval_steps_per_second": 0.466, "eval_wer": 1.0, "step": 500 }, { "epoch": 2.75, "learning_rate": 9.970000000000001e-05, "loss": 1.6148, "step": 1000 }, { "epoch": 2.75, "eval_loss": 0.9707893133163452, "eval_runtime": 41.6379, "eval_samples_per_second": 15.563, "eval_steps_per_second": 0.504, "eval_wer": 0.5453111956219274, "step": 1000 }, { "epoch": 4.12, "learning_rate": 9.50100806451613e-05, "loss": 0.7901, "step": 1500 }, { "epoch": 4.12, "eval_loss": 0.7987369298934937, "eval_runtime": 42.0128, "eval_samples_per_second": 15.424, "eval_steps_per_second": 0.5, "eval_wer": 0.4292737222892125, "step": 1500 }, { "epoch": 5.49, "learning_rate": 8.996975806451613e-05, "loss": 0.5529, "step": 2000 }, { "epoch": 5.49, "eval_loss": 0.803723156452179, "eval_runtime": 41.9975, "eval_samples_per_second": 15.43, "eval_steps_per_second": 0.5, "eval_wer": 0.4209257026249884, "step": 2000 }, { "epoch": 6.87, "learning_rate": 8.492943548387097e-05, "loss": 0.4404, "step": 2500 }, { "epoch": 6.87, "eval_loss": 0.7943486571311951, "eval_runtime": 42.0152, "eval_samples_per_second": 15.423, "eval_steps_per_second": 0.5, "eval_wer": 0.45524533902235415, "step": 2500 }, { "epoch": 8.24, "learning_rate": 7.988911290322581e-05, "loss": 0.3528, "step": 3000 }, { "epoch": 8.24, "eval_loss": 0.8733369708061218, "eval_runtime": 42.4523, "eval_samples_per_second": 15.264, "eval_steps_per_second": 0.495, "eval_wer": 0.37658844263055374, "step": 3000 }, { "epoch": 9.62, "learning_rate": 7.484879032258065e-05, "loss": 0.3008, "step": 3500 }, { "epoch": 9.62, "eval_loss": 0.914967954158783, "eval_runtime": 42.0647, "eval_samples_per_second": 15.405, "eval_steps_per_second": 0.499, "eval_wer": 0.3725999443465356, "step": 3500 }, { "epoch": 10.99, "learning_rate": 6.98084677419355e-05, "loss": 0.2625, "step": 4000 }, { "epoch": 10.99, "eval_loss": 0.9753163456916809, "eval_runtime": 41.804, "eval_samples_per_second": 15.501, "eval_steps_per_second": 0.502, "eval_wer": 0.3690752249327521, "step": 4000 }, { "epoch": 12.36, "learning_rate": 6.476814516129032e-05, "loss": 0.2153, "step": 4500 }, { "epoch": 12.36, "eval_loss": 0.9292902946472168, "eval_runtime": 53.6443, "eval_samples_per_second": 12.08, "eval_steps_per_second": 0.391, "eval_wer": 0.35284296447453856, "step": 4500 }, { "epoch": 13.74, "learning_rate": 5.9727822580645166e-05, "loss": 0.1926, "step": 5000 }, { "epoch": 13.74, "eval_loss": 0.9629846215248108, "eval_runtime": 43.3856, "eval_samples_per_second": 14.936, "eval_steps_per_second": 0.484, "eval_wer": 0.3697245153510806, "step": 5000 }, { "epoch": 15.11, "learning_rate": 5.46875e-05, "loss": 0.1634, "step": 5500 }, { "epoch": 15.11, "eval_loss": 0.9719225764274597, "eval_runtime": 43.1527, "eval_samples_per_second": 15.016, "eval_steps_per_second": 0.487, "eval_wer": 0.37473332714961505, "step": 5500 }, { "epoch": 16.48, "learning_rate": 4.964717741935484e-05, "loss": 0.1422, "step": 6000 }, { "epoch": 16.48, "eval_loss": 1.0191898345947266, "eval_runtime": 43.4487, "eval_samples_per_second": 14.914, "eval_steps_per_second": 0.483, "eval_wer": 0.38085520823671276, "step": 6000 }, { "epoch": 17.86, "learning_rate": 4.460685483870968e-05, "loss": 0.1253, "step": 6500 }, { "epoch": 17.86, "eval_loss": 1.0450010299682617, "eval_runtime": 42.8089, "eval_samples_per_second": 15.137, "eval_steps_per_second": 0.491, "eval_wer": 0.37686670995269456, "step": 6500 }, { "epoch": 19.23, "learning_rate": 3.956653225806452e-05, "loss": 0.1068, "step": 7000 }, { "epoch": 19.23, "eval_loss": 1.0906848907470703, "eval_runtime": 43.3307, "eval_samples_per_second": 14.955, "eval_steps_per_second": 0.485, "eval_wer": 0.359243112883777, "step": 7000 }, { "epoch": 20.6, "learning_rate": 3.4526209677419356e-05, "loss": 0.095, "step": 7500 }, { "epoch": 20.6, "eval_loss": 1.0443437099456787, "eval_runtime": 53.9592, "eval_samples_per_second": 12.009, "eval_steps_per_second": 0.389, "eval_wer": 0.35794453204711996, "step": 7500 }, { "epoch": 21.98, "learning_rate": 2.9495967741935488e-05, "loss": 0.0814, "step": 8000 }, { "epoch": 21.98, "eval_loss": 1.140635371208191, "eval_runtime": 43.1083, "eval_samples_per_second": 15.032, "eval_steps_per_second": 0.487, "eval_wer": 0.36258232074946667, "step": 8000 }, { "epoch": 23.35, "learning_rate": 2.4455645161290326e-05, "loss": 0.069, "step": 8500 }, { "epoch": 23.35, "eval_loss": 1.1894210577011108, "eval_runtime": 42.7802, "eval_samples_per_second": 15.147, "eval_steps_per_second": 0.491, "eval_wer": 0.3690752249327521, "step": 8500 }, { "epoch": 24.73, "learning_rate": 1.942540322580645e-05, "loss": 0.0623, "step": 9000 }, { "epoch": 24.73, "eval_loss": 1.2040750980377197, "eval_runtime": 43.2859, "eval_samples_per_second": 14.97, "eval_steps_per_second": 0.485, "eval_wer": 0.3687042018365643, "step": 9000 }, { "epoch": 26.1, "learning_rate": 1.4385080645161292e-05, "loss": 0.0554, "step": 9500 }, { "epoch": 26.1, "eval_loss": 1.2136509418487549, "eval_runtime": 42.5198, "eval_samples_per_second": 15.24, "eval_steps_per_second": 0.494, "eval_wer": 0.35720248585474446, "step": 9500 }, { "epoch": 27.47, "learning_rate": 9.344758064516129e-06, "loss": 0.0475, "step": 10000 }, { "epoch": 27.47, "eval_loss": 1.2355363368988037, "eval_runtime": 43.577, "eval_samples_per_second": 14.87, "eval_steps_per_second": 0.482, "eval_wer": 0.3565531954364159, "step": 10000 }, { "epoch": 28.85, "learning_rate": 4.304435483870968e-06, "loss": 0.042, "step": 10500 }, { "epoch": 28.85, "eval_loss": 1.2368338108062744, "eval_runtime": 43.2519, "eval_samples_per_second": 14.982, "eval_steps_per_second": 0.486, "eval_wer": 0.35553288192189964, "step": 10500 } ], "max_steps": 10920, "num_train_epochs": 30, "total_flos": 1.5691152295536976e+19, "trial_name": null, "trial_params": null }