{ "best_metric": null, "best_model_checkpoint": null, "epoch": 49.9937106918239, "global_step": 3950, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.26, "learning_rate": 4.849999999999999e-06, "loss": 18.5969, "step": 100 }, { "epoch": 2.53, "learning_rate": 9.849999999999999e-06, "loss": 4.2811, "step": 200 }, { "epoch": 3.79, "learning_rate": 1.485e-05, "loss": 3.9192, "step": 300 }, { "epoch": 5.06, "learning_rate": 1.9849999999999998e-05, "loss": 2.7895, "step": 400 }, { "epoch": 6.33, "learning_rate": 2.4849999999999998e-05, "loss": 2.142, "step": 500 }, { "epoch": 7.59, "learning_rate": 2.985e-05, "loss": 1.9161, "step": 600 }, { "epoch": 8.86, "learning_rate": 3.485e-05, "loss": 1.7888, "step": 700 }, { "epoch": 10.13, "learning_rate": 3.984999999999999e-05, "loss": 1.7494, "step": 800 }, { "epoch": 11.39, "learning_rate": 4.484999999999999e-05, "loss": 1.7129, "step": 900 }, { "epoch": 12.65, "learning_rate": 4.984999999999999e-05, "loss": 1.7019, "step": 1000 }, { "epoch": 12.65, "eval_cer": 0.2589387701435627, "eval_loss": 1.051009178161621, "eval_runtime": 302.233, "eval_samples_per_second": 14.777, "eval_steps_per_second": 1.85, "eval_wer": 0.9832177220854778, "step": 1000 }, { "epoch": 13.92, "learning_rate": 5.484999999999999e-05, "loss": 1.6797, "step": 1100 }, { "epoch": 15.19, "learning_rate": 5.985e-05, "loss": 1.6992, "step": 1200 }, { "epoch": 16.45, "learning_rate": 6.484999999999999e-05, "loss": 1.703, "step": 1300 }, { "epoch": 17.72, "learning_rate": 6.984999999999999e-05, "loss": 1.7304, "step": 1400 }, { "epoch": 18.98, "learning_rate": 7.484999999999999e-05, "loss": 1.7436, "step": 1500 }, { "epoch": 20.25, "learning_rate": 7.203061224489795e-05, "loss": 1.7564, "step": 1600 }, { "epoch": 21.52, "learning_rate": 6.896938775510203e-05, "loss": 1.6982, "step": 1700 }, { "epoch": 22.78, "learning_rate": 6.590816326530612e-05, "loss": 1.7184, "step": 1800 }, { "epoch": 24.05, "learning_rate": 6.28469387755102e-05, "loss": 1.692, "step": 1900 }, { "epoch": 25.31, "learning_rate": 5.978571428571428e-05, "loss": 1.6385, "step": 2000 }, { "epoch": 25.31, "eval_cer": 0.18507013781031975, "eval_loss": 0.6670215129852295, "eval_runtime": 300.7027, "eval_samples_per_second": 14.852, "eval_steps_per_second": 1.859, "eval_wer": 0.9914969791899754, "step": 2000 }, { "epoch": 26.58, "learning_rate": 5.6724489795918356e-05, "loss": 1.6092, "step": 2100 }, { "epoch": 27.84, "learning_rate": 5.3663265306122446e-05, "loss": 1.6209, "step": 2200 }, { "epoch": 29.11, "learning_rate": 5.060204081632652e-05, "loss": 1.5933, "step": 2300 }, { "epoch": 30.38, "learning_rate": 4.754081632653061e-05, "loss": 1.5673, "step": 2400 }, { "epoch": 31.64, "learning_rate": 4.447959183673469e-05, "loss": 1.5518, "step": 2500 }, { "epoch": 32.91, "learning_rate": 4.141836734693877e-05, "loss": 1.5398, "step": 2600 }, { "epoch": 34.18, "learning_rate": 3.8357142857142855e-05, "loss": 1.5095, "step": 2700 }, { "epoch": 35.44, "learning_rate": 3.529591836734693e-05, "loss": 1.508, "step": 2800 }, { "epoch": 36.7, "learning_rate": 3.2234693877551015e-05, "loss": 1.4769, "step": 2900 }, { "epoch": 37.97, "learning_rate": 2.9173469387755098e-05, "loss": 1.4344, "step": 3000 }, { "epoch": 37.97, "eval_cer": 0.17974508788797672, "eval_loss": 0.6182843446731567, "eval_runtime": 301.8085, "eval_samples_per_second": 14.797, "eval_steps_per_second": 1.852, "eval_wer": 1.0212575520250615, "step": 3000 }, { "epoch": 39.24, "learning_rate": 2.611224489795918e-05, "loss": 1.4331, "step": 3100 }, { "epoch": 40.5, "learning_rate": 2.3051020408163264e-05, "loss": 1.4042, "step": 3200 }, { "epoch": 41.77, "learning_rate": 2.0020408163265303e-05, "loss": 1.3822, "step": 3300 }, { "epoch": 43.04, "learning_rate": 1.6959183673469386e-05, "loss": 1.3768, "step": 3400 }, { "epoch": 44.3, "learning_rate": 1.3897959183673468e-05, "loss": 1.3447, "step": 3500 }, { "epoch": 45.57, "learning_rate": 1.0836734693877551e-05, "loss": 1.3359, "step": 3600 }, { "epoch": 46.83, "learning_rate": 7.77551020408163e-06, "loss": 1.3113, "step": 3700 }, { "epoch": 48.1, "learning_rate": 4.714285714285714e-06, "loss": 1.3053, "step": 3800 }, { "epoch": 49.36, "learning_rate": 1.6530612244897958e-06, "loss": 1.2802, "step": 3900 }, { "epoch": 49.99, "step": 3950, "total_flos": 2.372869475210465e+20, "train_loss": 2.1743111564539657, "train_runtime": 44274.9554, "train_samples_per_second": 11.465, "train_steps_per_second": 0.089 } ], "max_steps": 3950, "num_train_epochs": 50, "total_flos": 2.372869475210465e+20, "trial_name": null, "trial_params": null }