{ "best_metric": 1.019495964050293, "best_model_checkpoint": "./checkpoint-100", "epoch": 42.857142857142854, "global_step": 300, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.43, "learning_rate": 1.75e-06, "loss": 3.252, "step": 10 }, { "epoch": 2.86, "learning_rate": 4.25e-06, "loss": 2.8169, "step": 20 }, { "epoch": 4.29, "learning_rate": 6.750000000000001e-06, "loss": 2.2237, "step": 30 }, { "epoch": 5.71, "learning_rate": 9.250000000000001e-06, "loss": 1.6944, "step": 40 }, { "epoch": 7.14, "learning_rate": 9.730769230769231e-06, "loss": 1.2764, "step": 50 }, { "epoch": 8.57, "learning_rate": 9.346153846153847e-06, "loss": 1.0164, "step": 60 }, { "epoch": 10.0, "learning_rate": 8.961538461538462e-06, "loss": 0.854, "step": 70 }, { "epoch": 11.43, "learning_rate": 8.576923076923077e-06, "loss": 0.7109, "step": 80 }, { "epoch": 12.86, "learning_rate": 8.192307692307692e-06, "loss": 0.613, "step": 90 }, { "epoch": 14.29, "learning_rate": 7.807692307692309e-06, "loss": 0.5307, "step": 100 }, { "epoch": 14.29, "eval_loss": 1.019495964050293, "eval_runtime": 229.2012, "eval_samples_per_second": 2.234, "eval_steps_per_second": 0.14, "eval_wer": 66.14709443099274, "step": 100 }, { "epoch": 15.71, "learning_rate": 7.423076923076924e-06, "loss": 0.457, "step": 110 }, { "epoch": 17.14, "learning_rate": 7.038461538461539e-06, "loss": 0.3915, "step": 120 }, { "epoch": 18.57, "learning_rate": 6.653846153846154e-06, "loss": 0.3438, "step": 130 }, { "epoch": 20.0, "learning_rate": 6.26923076923077e-06, "loss": 0.2985, "step": 140 }, { "epoch": 21.43, "learning_rate": 5.884615384615385e-06, "loss": 0.2591, "step": 150 }, { "epoch": 22.86, "learning_rate": 5.500000000000001e-06, "loss": 0.2199, "step": 160 }, { "epoch": 24.29, "learning_rate": 5.115384615384616e-06, "loss": 0.1928, "step": 170 }, { "epoch": 25.71, "learning_rate": 4.730769230769231e-06, "loss": 0.1676, "step": 180 }, { "epoch": 27.14, "learning_rate": 4.346153846153846e-06, "loss": 0.1435, "step": 190 }, { "epoch": 28.57, "learning_rate": 3.961538461538462e-06, "loss": 0.1225, "step": 200 }, { "epoch": 28.57, "eval_loss": 1.146507740020752, "eval_runtime": 276.2984, "eval_samples_per_second": 1.853, "eval_steps_per_second": 0.116, "eval_wer": 66.01846246973365, "step": 200 }, { "epoch": 30.0, "learning_rate": 3.5769230769230773e-06, "loss": 0.108, "step": 210 }, { "epoch": 31.43, "learning_rate": 3.192307692307692e-06, "loss": 0.0941, "step": 220 }, { "epoch": 32.86, "learning_rate": 2.807692307692308e-06, "loss": 0.0811, "step": 230 }, { "epoch": 34.29, "learning_rate": 2.4230769230769233e-06, "loss": 0.0729, "step": 240 }, { "epoch": 35.71, "learning_rate": 2.0384615384615386e-06, "loss": 0.0655, "step": 250 }, { "epoch": 37.14, "learning_rate": 1.653846153846154e-06, "loss": 0.0604, "step": 260 }, { "epoch": 38.57, "learning_rate": 1.2692307692307692e-06, "loss": 0.055, "step": 270 }, { "epoch": 40.0, "learning_rate": 8.846153846153848e-07, "loss": 0.0524, "step": 280 }, { "epoch": 41.43, "learning_rate": 5.000000000000001e-07, "loss": 0.0505, "step": 290 }, { "epoch": 42.86, "learning_rate": 1.153846153846154e-07, "loss": 0.0498, "step": 300 }, { "epoch": 42.86, "eval_loss": 1.229990839958191, "eval_runtime": 273.4631, "eval_samples_per_second": 1.872, "eval_steps_per_second": 0.117, "eval_wer": 66.03359564164649, "step": 300 }, { "epoch": 42.86, "step": 300, "total_flos": 1.19900188901376e+18, "train_loss": 0.6091492390632629, "train_runtime": 1414.6972, "train_samples_per_second": 13.572, "train_steps_per_second": 0.212 } ], "max_steps": 300, "num_train_epochs": 43, "total_flos": 1.19900188901376e+18, "trial_name": null, "trial_params": null }