{ "best_metric": null, "best_model_checkpoint": null, "epoch": 27.11864406779661, "global_step": 8000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.36, "eval_loss": 1.4595407247543335, "eval_runtime": 81.6295, "eval_samples_per_second": 25.665, "eval_steps_per_second": 3.21, "eval_wer": 1.0039260592180599, "step": 400 }, { "epoch": 1.69, "learning_rate": 0.0002982, "loss": 4.7778, "step": 500 }, { "epoch": 2.71, "eval_loss": 0.8082281947135925, "eval_runtime": 80.8384, "eval_samples_per_second": 25.916, "eval_steps_per_second": 3.241, "eval_wer": 1.0115055346529254, "step": 800 }, { "epoch": 3.39, "learning_rate": 0.00028011999999999997, "loss": 0.6408, "step": 1000 }, { "epoch": 4.07, "eval_loss": 0.7031667828559875, "eval_runtime": 84.117, "eval_samples_per_second": 24.906, "eval_steps_per_second": 3.115, "eval_wer": 1.0078521184361198, "step": 1200 }, { "epoch": 5.08, "learning_rate": 0.00026011999999999997, "loss": 0.3937, "step": 1500 }, { "epoch": 5.42, "eval_loss": 0.6889204382896423, "eval_runtime": 80.1903, "eval_samples_per_second": 26.125, "eval_steps_per_second": 3.267, "eval_wer": 1.0432957085991603, "step": 1600 }, { "epoch": 6.78, "learning_rate": 0.00024011999999999997, "loss": 0.3, "step": 2000 }, { "epoch": 6.78, "eval_loss": 0.6820310354232788, "eval_runtime": 80.2232, "eval_samples_per_second": 26.115, "eval_steps_per_second": 3.266, "eval_wer": 1.0068706036316049, "step": 2000 }, { "epoch": 8.14, "eval_loss": 0.6669920086860657, "eval_runtime": 79.89, "eval_samples_per_second": 26.224, "eval_steps_per_second": 3.28, "eval_wer": 1.0196302960902994, "step": 2400 }, { "epoch": 8.47, "learning_rate": 0.00022011999999999997, "loss": 0.226, "step": 2500 }, { "epoch": 9.49, "eval_loss": 0.7215595841407776, "eval_runtime": 80.0002, "eval_samples_per_second": 26.187, "eval_steps_per_second": 3.275, "eval_wer": 1.0422051365941436, "step": 2800 }, { "epoch": 10.17, "learning_rate": 0.00020012, "loss": 0.197, "step": 3000 }, { "epoch": 10.85, "eval_loss": 0.7669464945793152, "eval_runtime": 80.1105, "eval_samples_per_second": 26.151, "eval_steps_per_second": 3.27, "eval_wer": 1.053383499645564, "step": 3200 }, { "epoch": 11.86, "learning_rate": 0.00018012, "loss": 0.165, "step": 3500 }, { "epoch": 12.2, "eval_loss": 0.7517344951629639, "eval_runtime": 79.716, "eval_samples_per_second": 26.281, "eval_steps_per_second": 3.287, "eval_wer": 1.0199574676918044, "step": 3600 }, { "epoch": 13.56, "learning_rate": 0.00016011999999999998, "loss": 0.1486, "step": 4000 }, { "epoch": 13.56, "eval_loss": 0.7124771475791931, "eval_runtime": 79.8981, "eval_samples_per_second": 26.221, "eval_steps_per_second": 3.279, "eval_wer": 1.0357162331642946, "step": 4000 }, { "epoch": 14.92, "eval_loss": 0.7447456121444702, "eval_runtime": 82.2103, "eval_samples_per_second": 25.483, "eval_steps_per_second": 3.187, "eval_wer": 1.0347347183597797, "step": 4400 }, { "epoch": 15.25, "learning_rate": 0.00014012, "loss": 0.122, "step": 4500 }, { "epoch": 16.27, "eval_loss": 0.6899322271347046, "eval_runtime": 79.6624, "eval_samples_per_second": 26.298, "eval_steps_per_second": 3.289, "eval_wer": 1.0440045804024212, "step": 4800 }, { "epoch": 16.95, "learning_rate": 0.00012011999999999998, "loss": 0.1069, "step": 5000 }, { "epoch": 17.63, "eval_loss": 0.7212241291999817, "eval_runtime": 80.3141, "eval_samples_per_second": 26.085, "eval_steps_per_second": 3.262, "eval_wer": 1.0350073613610338, "step": 5200 }, { "epoch": 18.64, "learning_rate": 0.00010011999999999998, "loss": 0.0961, "step": 5500 }, { "epoch": 18.98, "eval_loss": 0.7417359352111816, "eval_runtime": 80.0211, "eval_samples_per_second": 26.181, "eval_steps_per_second": 3.274, "eval_wer": 1.0408419215878728, "step": 5600 }, { "epoch": 20.34, "learning_rate": 8.012e-05, "loss": 0.086, "step": 6000 }, { "epoch": 20.34, "eval_loss": 0.7402010560035706, "eval_runtime": 80.0522, "eval_samples_per_second": 26.17, "eval_steps_per_second": 3.273, "eval_wer": 1.0355526473635421, "step": 6000 }, { "epoch": 21.69, "eval_loss": 0.7760854959487915, "eval_runtime": 80.138, "eval_samples_per_second": 26.142, "eval_steps_per_second": 3.269, "eval_wer": 1.0419870221931402, "step": 6400 }, { "epoch": 22.03, "learning_rate": 6.0119999999999994e-05, "loss": 0.0756, "step": 6500 }, { "epoch": 23.05, "eval_loss": 0.7345677614212036, "eval_runtime": 80.4841, "eval_samples_per_second": 26.03, "eval_steps_per_second": 3.255, "eval_wer": 1.036915862369813, "step": 6800 }, { "epoch": 23.73, "learning_rate": 4.012e-05, "loss": 0.0666, "step": 7000 }, { "epoch": 24.41, "eval_loss": 0.7506045699119568, "eval_runtime": 82.6434, "eval_samples_per_second": 25.35, "eval_steps_per_second": 3.17, "eval_wer": 1.0449315666066852, "step": 7200 }, { "epoch": 25.42, "learning_rate": 2.0119999999999997e-05, "loss": 0.0595, "step": 7500 }, { "epoch": 25.76, "eval_loss": 0.7319227457046509, "eval_runtime": 79.8082, "eval_samples_per_second": 26.25, "eval_steps_per_second": 3.283, "eval_wer": 1.0476034680189759, "step": 7600 }, { "epoch": 27.12, "learning_rate": 1.6e-07, "loss": 0.054, "step": 8000 }, { "epoch": 27.12, "eval_loss": 0.7346429228782654, "eval_runtime": 79.9162, "eval_samples_per_second": 26.215, "eval_steps_per_second": 3.278, "eval_wer": 1.0478761110202301, "step": 8000 }, { "epoch": 27.12, "step": 8000, "total_flos": 1.592873144248711e+19, "train_loss": 0.46973063707351687, "train_runtime": 10600.4383, "train_samples_per_second": 12.075, "train_steps_per_second": 0.755 } ], "max_steps": 8000, "num_train_epochs": 28, "total_flos": 1.592873144248711e+19, "trial_name": null, "trial_params": null }