{ "best_metric": null, "best_model_checkpoint": null, "epoch": 60.0, "global_step": 4920, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.22, "learning_rate": 7.35e-06, "loss": 12.2749, "step": 100 }, { "epoch": 2.44, "learning_rate": 1.485e-05, "loss": 4.8071, "step": 200 }, { "epoch": 3.66, "learning_rate": 2.2349999999999998e-05, "loss": 3.5841, "step": 300 }, { "epoch": 4.88, "learning_rate": 2.985e-05, "loss": 3.1829, "step": 400 }, { "epoch": 4.88, "eval_loss": 3.1227903366088867, "eval_runtime": 33.8472, "eval_samples_per_second": 35.247, "eval_steps_per_second": 2.216, "eval_wer": 1.0, "step": 400 }, { "epoch": 6.1, "learning_rate": 3.735e-05, "loss": 3.0323, "step": 500 }, { "epoch": 7.32, "learning_rate": 4.484999999999999e-05, "loss": 2.9603, "step": 600 }, { "epoch": 8.54, "learning_rate": 5.234999999999999e-05, "loss": 2.8982, "step": 700 }, { "epoch": 9.76, "learning_rate": 5.985e-05, "loss": 2.8675, "step": 800 }, { "epoch": 9.76, "eval_loss": 2.8616490364074707, "eval_runtime": 33.8201, "eval_samples_per_second": 35.275, "eval_steps_per_second": 2.218, "eval_wer": 0.9992680427463037, "step": 800 }, { "epoch": 10.98, "learning_rate": 6.735e-05, "loss": 2.7893, "step": 900 }, { "epoch": 12.2, "learning_rate": 7.484999999999999e-05, "loss": 2.2812, "step": 1000 }, { "epoch": 13.41, "learning_rate": 7.312499999999999e-05, "loss": 1.7369, "step": 1100 }, { "epoch": 14.63, "learning_rate": 7.121173469387755e-05, "loss": 1.583, "step": 1200 }, { "epoch": 14.63, "eval_loss": 0.6392253637313843, "eval_runtime": 33.9553, "eval_samples_per_second": 35.134, "eval_steps_per_second": 2.209, "eval_wer": 0.6239203630507978, "step": 1200 }, { "epoch": 15.85, "learning_rate": 6.92984693877551e-05, "loss": 1.4572, "step": 1300 }, { "epoch": 17.07, "learning_rate": 6.738520408163265e-05, "loss": 1.3366, "step": 1400 }, { "epoch": 18.29, "learning_rate": 6.547193877551019e-05, "loss": 1.2668, "step": 1500 }, { "epoch": 19.51, "learning_rate": 6.355867346938775e-05, "loss": 1.1959, "step": 1600 }, { "epoch": 19.51, "eval_loss": 0.36018967628479004, "eval_runtime": 33.3624, "eval_samples_per_second": 35.759, "eval_steps_per_second": 2.248, "eval_wer": 0.3651002781437564, "step": 1600 }, { "epoch": 20.73, "learning_rate": 6.164540816326531e-05, "loss": 1.1694, "step": 1700 }, { "epoch": 21.95, "learning_rate": 5.9732142857142846e-05, "loss": 1.1001, "step": 1800 }, { "epoch": 23.17, "learning_rate": 5.7818877551020404e-05, "loss": 1.0737, "step": 1900 }, { "epoch": 24.39, "learning_rate": 5.5905612244897955e-05, "loss": 1.0276, "step": 2000 }, { "epoch": 24.39, "eval_loss": 0.3020930588245392, "eval_runtime": 33.6823, "eval_samples_per_second": 35.419, "eval_steps_per_second": 2.227, "eval_wer": 0.29805299370516763, "step": 2000 }, { "epoch": 25.61, "learning_rate": 5.3992346938775506e-05, "loss": 1.0293, "step": 2100 }, { "epoch": 26.83, "learning_rate": 5.207908163265306e-05, "loss": 0.9966, "step": 2200 }, { "epoch": 28.05, "learning_rate": 5.016581632653061e-05, "loss": 0.9721, "step": 2300 }, { "epoch": 29.27, "learning_rate": 4.8271683673469385e-05, "loss": 0.9671, "step": 2400 }, { "epoch": 29.27, "eval_loss": 0.28717485070228577, "eval_runtime": 33.2455, "eval_samples_per_second": 35.885, "eval_steps_per_second": 2.256, "eval_wer": 0.27389840433318696, "step": 2400 }, { "epoch": 30.49, "learning_rate": 4.635841836734693e-05, "loss": 0.9536, "step": 2500 }, { "epoch": 31.71, "learning_rate": 4.444515306122448e-05, "loss": 0.934, "step": 2600 }, { "epoch": 32.93, "learning_rate": 4.253188775510204e-05, "loss": 0.9136, "step": 2700 }, { "epoch": 34.15, "learning_rate": 4.061862244897959e-05, "loss": 0.873, "step": 2800 }, { "epoch": 34.15, "eval_loss": 0.259307324886322, "eval_runtime": 33.4272, "eval_samples_per_second": 35.689, "eval_steps_per_second": 2.244, "eval_wer": 0.24593763724198506, "step": 2800 }, { "epoch": 35.37, "learning_rate": 3.870535714285714e-05, "loss": 0.8649, "step": 2900 }, { "epoch": 36.59, "learning_rate": 3.679209183673469e-05, "loss": 0.8464, "step": 3000 }, { "epoch": 37.8, "learning_rate": 3.487882653061224e-05, "loss": 0.8545, "step": 3100 }, { "epoch": 39.02, "learning_rate": 3.2965561224489794e-05, "loss": 0.8513, "step": 3200 }, { "epoch": 39.02, "eval_loss": 0.2617344856262207, "eval_runtime": 33.3367, "eval_samples_per_second": 35.786, "eval_steps_per_second": 2.25, "eval_wer": 0.24725516029863856, "step": 3200 }, { "epoch": 40.24, "learning_rate": 3.1052295918367345e-05, "loss": 0.8424, "step": 3300 }, { "epoch": 41.46, "learning_rate": 2.9139030612244896e-05, "loss": 0.8244, "step": 3400 }, { "epoch": 42.68, "learning_rate": 2.7225765306122448e-05, "loss": 0.8157, "step": 3500 }, { "epoch": 43.9, "learning_rate": 2.53125e-05, "loss": 0.8132, "step": 3600 }, { "epoch": 43.9, "eval_loss": 0.25475597381591797, "eval_runtime": 33.5269, "eval_samples_per_second": 35.583, "eval_steps_per_second": 2.237, "eval_wer": 0.2425706338749817, "step": 3600 }, { "epoch": 45.12, "learning_rate": 2.339923469387755e-05, "loss": 0.8008, "step": 3700 }, { "epoch": 46.34, "learning_rate": 2.14859693877551e-05, "loss": 0.7863, "step": 3800 }, { "epoch": 47.56, "learning_rate": 1.9572704081632652e-05, "loss": 0.7965, "step": 3900 }, { "epoch": 48.78, "learning_rate": 1.7659438775510203e-05, "loss": 0.7935, "step": 4000 }, { "epoch": 48.78, "eval_loss": 0.2637428939342499, "eval_runtime": 33.4727, "eval_samples_per_second": 35.641, "eval_steps_per_second": 2.241, "eval_wer": 0.23525106133801785, "step": 4000 }, { "epoch": 50.0, "learning_rate": 1.5746173469387754e-05, "loss": 0.7857, "step": 4100 }, { "epoch": 51.22, "learning_rate": 1.3832908163265304e-05, "loss": 0.7834, "step": 4200 }, { "epoch": 52.44, "learning_rate": 1.1919642857142855e-05, "loss": 0.7732, "step": 4300 }, { "epoch": 53.66, "learning_rate": 1.0006377551020406e-05, "loss": 0.7565, "step": 4400 }, { "epoch": 53.66, "eval_loss": 0.2629481256008148, "eval_runtime": 33.9009, "eval_samples_per_second": 35.191, "eval_steps_per_second": 2.212, "eval_wer": 0.23217684087249305, "step": 4400 }, { "epoch": 54.88, "learning_rate": 8.093112244897959e-06, "loss": 0.7525, "step": 4500 }, { "epoch": 56.1, "learning_rate": 6.198979591836734e-06, "loss": 0.7716, "step": 4600 }, { "epoch": 57.32, "learning_rate": 4.2857142857142855e-06, "loss": 0.7521, "step": 4700 }, { "epoch": 58.54, "learning_rate": 2.372448979591836e-06, "loss": 0.7359, "step": 4800 }, { "epoch": 58.54, "eval_loss": 0.2578625977039337, "eval_runtime": 33.5888, "eval_samples_per_second": 35.518, "eval_steps_per_second": 2.233, "eval_wer": 0.22529644268774704, "step": 4800 }, { "epoch": 59.76, "learning_rate": 4.591836734693877e-07, "loss": 0.7476, "step": 4900 }, { "epoch": 60.0, "step": 4920, "total_flos": 1.5656877251307375e+19, "train_loss": 1.5886853283982936, "train_runtime": 6665.2028, "train_samples_per_second": 23.459, "train_steps_per_second": 0.738 } ], "max_steps": 4920, "num_train_epochs": 60, "total_flos": 1.5656877251307375e+19, "trial_name": null, "trial_params": null }