{ "best_metric": null, "best_model_checkpoint": null, "epoch": 11.297071129707113, "global_step": 10800, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.42, "learning_rate": 0.00023999999999999998, "loss": 6.6124, "step": 400 }, { "epoch": 0.42, "eval_loss": 3.268874168395996, "eval_runtime": 47.7642, "eval_samples_per_second": 10.594, "eval_wer": 0.9499561018437226, "step": 400 }, { "epoch": 0.84, "learning_rate": 0.0002924547283702213, "loss": Infinity, "step": 800 }, { "epoch": 0.84, "eval_loss": 1.5890493392944336, "eval_runtime": 46.9733, "eval_samples_per_second": 10.772, "eval_wer": 0.9242025168276266, "step": 800 }, { "epoch": 1.26, "learning_rate": 0.0002823943661971831, "loss": NaN, "step": 1200 }, { "epoch": 1.26, "eval_loss": 1.399864673614502, "eval_runtime": 49.6014, "eval_samples_per_second": 10.201, "eval_wer": 0.8653789874158618, "step": 1200 }, { "epoch": 1.67, "learning_rate": 0.00027233400402414484, "loss": NaN, "step": 1600 }, { "epoch": 1.67, "eval_loss": 1.1861594915390015, "eval_runtime": 45.83, "eval_samples_per_second": 11.041, "eval_wer": 0.8627450980392157, "step": 1600 }, { "epoch": 2.09, "learning_rate": 0.0002622736418511066, "loss": NaN, "step": 2000 }, { "epoch": 2.09, "eval_loss": 1.1273273229599, "eval_runtime": 47.5676, "eval_samples_per_second": 10.638, "eval_wer": 0.8124085455077553, "step": 2000 }, { "epoch": 2.51, "learning_rate": 0.0002522132796780684, "loss": NaN, "step": 2400 }, { "epoch": 2.51, "eval_loss": 1.0673868656158447, "eval_runtime": 46.2685, "eval_samples_per_second": 10.936, "eval_wer": 0.8141644717588528, "step": 2400 }, { "epoch": 2.93, "learning_rate": 0.00024215291750503016, "loss": NaN, "step": 2800 }, { "epoch": 2.93, "eval_loss": 1.012987732887268, "eval_runtime": 46.0398, "eval_samples_per_second": 10.991, "eval_wer": 0.8112379280070237, "step": 2800 }, { "epoch": 3.35, "learning_rate": 0.00023209255533199194, "loss": NaN, "step": 3200 }, { "epoch": 3.35, "eval_loss": 1.0372620820999146, "eval_runtime": 47.1969, "eval_samples_per_second": 10.721, "eval_wer": 0.7860696517412935, "step": 3200 }, { "epoch": 3.77, "learning_rate": 0.00022203219315895372, "loss": NaN, "step": 3600 }, { "epoch": 3.77, "eval_loss": 0.9511893391609192, "eval_runtime": 46.2728, "eval_samples_per_second": 10.935, "eval_wer": 0.7960199004975125, "step": 3600 }, { "epoch": 4.18, "learning_rate": 0.00021197183098591548, "loss": NaN, "step": 4000 }, { "epoch": 4.18, "eval_loss": 0.9626357555389404, "eval_runtime": 46.2615, "eval_samples_per_second": 10.938, "eval_wer": 0.7573895229733685, "step": 4000 }, { "epoch": 4.6, "learning_rate": 0.00020191146881287726, "loss": NaN, "step": 4400 }, { "epoch": 4.6, "eval_loss": 0.9149179458618164, "eval_runtime": 47.2032, "eval_samples_per_second": 10.72, "eval_wer": 0.755340942347088, "step": 4400 }, { "epoch": 5.02, "learning_rate": 0.000191851106639839, "loss": NaN, "step": 4800 }, { "epoch": 5.02, "eval_loss": 0.945462703704834, "eval_runtime": 47.6471, "eval_samples_per_second": 10.62, "eval_wer": 0.7515364354697103, "step": 4800 }, { "epoch": 5.44, "learning_rate": 0.0001817907444668008, "loss": NaN, "step": 5200 }, { "epoch": 5.44, "eval_loss": 0.9152739644050598, "eval_runtime": 48.1688, "eval_samples_per_second": 10.505, "eval_wer": 0.746268656716418, "step": 5200 }, { "epoch": 5.86, "learning_rate": 0.00017173038229376258, "loss": NaN, "step": 5600 }, { "epoch": 5.86, "eval_loss": 0.9364785552024841, "eval_runtime": 47.6477, "eval_samples_per_second": 10.62, "eval_wer": 0.7260755048287972, "step": 5600 }, { "epoch": 6.28, "learning_rate": 0.00016167002012072433, "loss": NaN, "step": 6000 }, { "epoch": 6.28, "eval_loss": 0.9098660349845886, "eval_runtime": 46.4104, "eval_samples_per_second": 10.903, "eval_wer": 0.7380743342112964, "step": 6000 }, { "epoch": 6.69, "learning_rate": 0.0001516096579476861, "loss": NaN, "step": 6400 }, { "epoch": 6.69, "eval_loss": 0.9048272371292114, "eval_runtime": 46.2425, "eval_samples_per_second": 10.942, "eval_wer": 0.7205150717003219, "step": 6400 }, { "epoch": 7.11, "learning_rate": 0.00014154929577464787, "loss": NaN, "step": 6800 }, { "epoch": 7.11, "eval_loss": 0.8889923691749573, "eval_runtime": 46.6074, "eval_samples_per_second": 10.857, "eval_wer": 0.7172958735733099, "step": 6800 }, { "epoch": 7.53, "learning_rate": 0.00013148893360160965, "loss": NaN, "step": 7200 }, { "epoch": 7.53, "eval_loss": 0.870766818523407, "eval_runtime": 47.1858, "eval_samples_per_second": 10.724, "eval_wer": 0.7091015510681885, "step": 7200 }, { "epoch": 7.95, "learning_rate": 0.00012142857142857142, "loss": NaN, "step": 7600 }, { "epoch": 7.95, "eval_loss": 0.8733641505241394, "eval_runtime": 47.6751, "eval_samples_per_second": 10.614, "eval_wer": 0.6971027216856892, "step": 7600 }, { "epoch": 8.37, "learning_rate": 0.00011136820925553318, "loss": NaN, "step": 8000 }, { "epoch": 8.37, "eval_loss": 0.88031005859375, "eval_runtime": 47.4892, "eval_samples_per_second": 10.655, "eval_wer": 0.6947614866842259, "step": 8000 }, { "epoch": 8.79, "learning_rate": 0.00010130784708249495, "loss": NaN, "step": 8400 }, { "epoch": 8.79, "eval_loss": 0.8849018216133118, "eval_runtime": 47.5503, "eval_samples_per_second": 10.641, "eval_wer": 0.6930055604331284, "step": 8400 }, { "epoch": 9.21, "learning_rate": 9.124748490945673e-05, "loss": NaN, "step": 8800 }, { "epoch": 9.21, "eval_loss": 0.8564967513084412, "eval_runtime": 46.9348, "eval_samples_per_second": 10.781, "eval_wer": 0.6903716710564823, "step": 8800 }, { "epoch": 9.62, "learning_rate": 8.11871227364185e-05, "loss": NaN, "step": 9200 }, { "epoch": 9.62, "eval_loss": 0.875277042388916, "eval_runtime": 46.126, "eval_samples_per_second": 10.97, "eval_wer": 0.6871524729294703, "step": 9200 }, { "epoch": 10.04, "learning_rate": 7.112676056338028e-05, "loss": NaN, "step": 9600 }, { "epoch": 10.04, "eval_loss": 0.836927056312561, "eval_runtime": 47.5639, "eval_samples_per_second": 10.638, "eval_wer": 0.6877377816798361, "step": 9600 }, { "epoch": 10.46, "learning_rate": 6.106639839034204e-05, "loss": NaN, "step": 10000 }, { "epoch": 10.46, "eval_loss": 0.8403338193893433, "eval_runtime": 47.7824, "eval_samples_per_second": 10.59, "eval_wer": 0.6798361135498976, "step": 10000 }, { "epoch": 10.88, "learning_rate": 5.100603621730382e-05, "loss": NaN, "step": 10400 }, { "epoch": 10.88, "eval_loss": 0.8528462648391724, "eval_runtime": 47.7751, "eval_samples_per_second": 10.591, "eval_wer": 0.6865671641791045, "step": 10400 }, { "epoch": 11.3, "learning_rate": 4.094567404426559e-05, "loss": NaN, "step": 10800 }, { "epoch": 11.3, "eval_loss": 0.8435601592063904, "eval_runtime": 47.6913, "eval_samples_per_second": 10.61, "eval_wer": 0.6777875329236172, "step": 10800 } ], "max_steps": 12428, "num_train_epochs": 13, "total_flos": 2.5666394845611536e+19, "trial_name": null, "trial_params": null }