{ "best_metric": null, "best_model_checkpoint": null, "epoch": 30.0, "global_step": 10440, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.44, "learning_rate": 0.0003, "loss": NaN, "step": 500 }, { "epoch": 1.44, "eval_loss": 2.3965017795562744, "eval_runtime": 464.2782, "eval_samples_per_second": 9.996, "eval_wer": 0.9993478137582941, "step": 500 }, { "epoch": 2.87, "learning_rate": 0.00028490945674044263, "loss": NaN, "step": 1000 }, { "epoch": 2.87, "eval_loss": 0.40979766845703125, "eval_runtime": 469.7691, "eval_samples_per_second": 9.879, "eval_wer": 0.4245448874269835, "step": 1000 }, { "epoch": 4.31, "learning_rate": 0.0002698189134808853, "loss": NaN, "step": 1500 }, { "epoch": 4.31, "eval_loss": 0.32825562357902527, "eval_runtime": 475.331, "eval_samples_per_second": 9.764, "eval_wer": 0.37052685305960414, "step": 1500 }, { "epoch": 5.75, "learning_rate": 0.00025472837022132795, "loss": NaN, "step": 2000 }, { "epoch": 5.75, "eval_loss": 0.30649319291114807, "eval_runtime": 474.8166, "eval_samples_per_second": 9.774, "eval_wer": 0.35002552033119716, "step": 2000 }, { "epoch": 7.18, "learning_rate": 0.0002396378269617706, "loss": NaN, "step": 2500 }, { "epoch": 7.18, "eval_loss": 0.3162720799446106, "eval_runtime": 475.8373, "eval_samples_per_second": 9.753, "eval_wer": 0.3463392502693813, "step": 2500 }, { "epoch": 8.62, "learning_rate": 0.00022454728370221327, "loss": NaN, "step": 3000 }, { "epoch": 8.62, "eval_loss": 0.30734783411026, "eval_runtime": 475.1293, "eval_samples_per_second": 9.768, "eval_wer": 0.3295808994498951, "step": 3000 }, { "epoch": 10.06, "learning_rate": 0.00020945674044265593, "loss": NaN, "step": 3500 }, { "epoch": 10.06, "eval_loss": 0.32298463582992554, "eval_runtime": 478.2831, "eval_samples_per_second": 9.703, "eval_wer": 0.32586627346452673, "step": 3500 }, { "epoch": 11.49, "learning_rate": 0.0001943661971830986, "loss": NaN, "step": 4000 }, { "epoch": 11.49, "eval_loss": 0.3256703317165375, "eval_runtime": 476.0024, "eval_samples_per_second": 9.75, "eval_wer": 0.3196279702829921, "step": 4000 }, { "epoch": 12.93, "learning_rate": 0.00017927565392354125, "loss": NaN, "step": 4500 }, { "epoch": 12.93, "eval_loss": 0.32864469289779663, "eval_runtime": 473.6083, "eval_samples_per_second": 9.799, "eval_wer": 0.32019508875404074, "step": 4500 }, { "epoch": 14.37, "learning_rate": 0.0001641851106639839, "loss": NaN, "step": 5000 }, { "epoch": 14.37, "eval_loss": 0.32181525230407715, "eval_runtime": 474.4211, "eval_samples_per_second": 9.782, "eval_wer": 0.31662224238643455, "step": 5000 }, { "epoch": 15.8, "learning_rate": 0.00014909456740442654, "loss": NaN, "step": 5500 }, { "epoch": 15.8, "eval_loss": 0.33631598949432373, "eval_runtime": 477.6284, "eval_samples_per_second": 9.717, "eval_wer": 0.3121136505415981, "step": 5500 }, { "epoch": 17.24, "learning_rate": 0.0001340040241448692, "loss": NaN, "step": 6000 }, { "epoch": 17.24, "eval_loss": 0.3525121510028839, "eval_runtime": 482.4872, "eval_samples_per_second": 9.619, "eval_wer": 0.30533658481256737, "step": 6000 }, { "epoch": 18.68, "learning_rate": 0.00011891348088531185, "loss": NaN, "step": 6500 }, { "epoch": 18.68, "eval_loss": 0.34829291701316833, "eval_runtime": 477.1506, "eval_samples_per_second": 9.726, "eval_wer": 0.3055634322009868, "step": 6500 }, { "epoch": 20.11, "learning_rate": 0.00010382293762575451, "loss": NaN, "step": 7000 }, { "epoch": 20.11, "eval_loss": 0.35680249333381653, "eval_runtime": 474.1059, "eval_samples_per_second": 9.789, "eval_wer": 0.30780355016162875, "step": 7000 }, { "epoch": 21.55, "learning_rate": 8.873239436619717e-05, "loss": NaN, "step": 7500 }, { "epoch": 21.55, "eval_loss": 0.36808809638023376, "eval_runtime": 477.9995, "eval_samples_per_second": 9.709, "eval_wer": 0.3064424658311121, "step": 7500 }, { "epoch": 22.99, "learning_rate": 7.364185110663983e-05, "loss": NaN, "step": 8000 }, { "epoch": 22.99, "eval_loss": 0.36075320839881897, "eval_runtime": 479.4594, "eval_samples_per_second": 9.68, "eval_wer": 0.29960868825497644, "step": 8000 }, { "epoch": 24.43, "learning_rate": 5.855130784708249e-05, "loss": NaN, "step": 8500 }, { "epoch": 24.43, "eval_loss": 0.36022624373435974, "eval_runtime": 482.5145, "eval_samples_per_second": 9.618, "eval_wer": 0.29745363806499175, "step": 8500 }, { "epoch": 25.86, "learning_rate": 4.346076458752515e-05, "loss": NaN, "step": 9000 }, { "epoch": 25.86, "eval_loss": 0.35646718740463257, "eval_runtime": 476.5808, "eval_samples_per_second": 9.738, "eval_wer": 0.2930301139908127, "step": 9000 }, { "epoch": 27.3, "learning_rate": 2.8370221327967802e-05, "loss": NaN, "step": 9500 }, { "epoch": 27.3, "eval_loss": 0.3554363548755646, "eval_runtime": 478.8286, "eval_samples_per_second": 9.692, "eval_wer": 0.29130040265411444, "step": 9500 }, { "epoch": 28.74, "learning_rate": 1.3279678068410461e-05, "loss": NaN, "step": 10000 }, { "epoch": 28.74, "eval_loss": 0.3562660217285156, "eval_runtime": 478.4327, "eval_samples_per_second": 9.7, "eval_wer": 0.29016616571201725, "step": 10000 }, { "epoch": 30.0, "step": 10440, "total_flos": 4.532712593948488e+19, "train_runtime": 43862.7197, "train_samples_per_second": 0.238 } ], "max_steps": 10440, "num_train_epochs": 30, "total_flos": 4.532712593948488e+19, "trial_name": null, "trial_params": null }