{ "best_metric": null, "best_model_checkpoint": null, "epoch": 11.564625850340136, "global_step": 1700, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.34, "learning_rate": 2.8199999999999998e-05, "loss": 14.6495, "step": 50 }, { "epoch": 0.68, "learning_rate": 5.82e-05, "loss": 8.9314, "step": 100 }, { "epoch": 0.68, "eval_cer": 0.9940322843632806, "eval_loss": 4.78579044342041, "eval_runtime": 24.1442, "eval_samples_per_second": 24.354, "eval_steps_per_second": 3.065, "step": 100 }, { "epoch": 1.02, "learning_rate": 8.819999999999999e-05, "loss": 4.2225, "step": 150 }, { "epoch": 1.36, "learning_rate": 0.0001182, "loss": 3.2937, "step": 200 }, { "epoch": 1.36, "eval_cer": 0.9940322843632806, "eval_loss": 3.2489850521087646, "eval_runtime": 24.3318, "eval_samples_per_second": 24.166, "eval_steps_per_second": 3.041, "step": 200 }, { "epoch": 1.7, "learning_rate": 0.0001482, "loss": 3.2508, "step": 250 }, { "epoch": 2.04, "learning_rate": 0.00017819999999999997, "loss": 3.2412, "step": 300 }, { "epoch": 2.04, "eval_cer": 0.9940322843632806, "eval_loss": 3.222844123840332, "eval_runtime": 24.3579, "eval_samples_per_second": 24.14, "eval_steps_per_second": 3.038, "step": 300 }, { "epoch": 2.38, "learning_rate": 0.00020819999999999996, "loss": 3.1855, "step": 350 }, { "epoch": 2.72, "learning_rate": 0.0002382, "loss": 3.0985, "step": 400 }, { "epoch": 2.72, "eval_cer": 0.9940322843632806, "eval_loss": 3.1458029747009277, "eval_runtime": 24.7673, "eval_samples_per_second": 23.741, "eval_steps_per_second": 2.988, "step": 400 }, { "epoch": 3.06, "learning_rate": 0.00026819999999999996, "loss": 3.0739, "step": 450 }, { "epoch": 3.4, "learning_rate": 0.0002982, "loss": 3.0209, "step": 500 }, { "epoch": 3.4, "eval_cer": 0.9938692320234795, "eval_loss": 3.004169225692749, "eval_runtime": 24.3542, "eval_samples_per_second": 24.144, "eval_steps_per_second": 3.038, "step": 500 }, { "epoch": 3.74, "learning_rate": 0.0002955590551181102, "loss": 2.9536, "step": 550 }, { "epoch": 4.08, "learning_rate": 0.0002908346456692913, "loss": 2.518, "step": 600 }, { "epoch": 4.08, "eval_cer": 0.5400945703570846, "eval_loss": 1.770609736442566, "eval_runtime": 24.8378, "eval_samples_per_second": 23.674, "eval_steps_per_second": 2.979, "step": 600 }, { "epoch": 4.42, "learning_rate": 0.00028611023622047245, "loss": 1.6291, "step": 650 }, { "epoch": 4.76, "learning_rate": 0.0002813858267716535, "loss": 1.2291, "step": 700 }, { "epoch": 4.76, "eval_cer": 0.30572313712701776, "eval_loss": 0.8816090226173401, "eval_runtime": 24.4305, "eval_samples_per_second": 24.068, "eval_steps_per_second": 3.029, "step": 700 }, { "epoch": 5.1, "learning_rate": 0.0002766614173228346, "loss": 1.0153, "step": 750 }, { "epoch": 5.44, "learning_rate": 0.0002719370078740157, "loss": 0.8879, "step": 800 }, { "epoch": 5.44, "eval_cer": 0.23668677645524214, "eval_loss": 0.6693652868270874, "eval_runtime": 24.5154, "eval_samples_per_second": 23.985, "eval_steps_per_second": 3.019, "step": 800 }, { "epoch": 5.78, "learning_rate": 0.00026721259842519684, "loss": 0.8337, "step": 850 }, { "epoch": 6.12, "learning_rate": 0.00026248818897637796, "loss": 0.7717, "step": 900 }, { "epoch": 6.12, "eval_cer": 0.20955486711234306, "eval_loss": 0.5637524127960205, "eval_runtime": 24.5794, "eval_samples_per_second": 23.922, "eval_steps_per_second": 3.011, "step": 900 }, { "epoch": 6.46, "learning_rate": 0.00025776377952755903, "loss": 0.6711, "step": 950 }, { "epoch": 6.8, "learning_rate": 0.00025303937007874016, "loss": 0.7003, "step": 1000 }, { "epoch": 6.8, "eval_cer": 0.19279308658079244, "eval_loss": 0.5100580453872681, "eval_runtime": 24.6049, "eval_samples_per_second": 23.898, "eval_steps_per_second": 3.008, "step": 1000 }, { "epoch": 7.14, "learning_rate": 0.0002483149606299212, "loss": 0.6349, "step": 1050 }, { "epoch": 7.48, "learning_rate": 0.00024359055118110235, "loss": 0.5935, "step": 1100 }, { "epoch": 7.48, "eval_cer": 0.17746616663949127, "eval_loss": 0.46862325072288513, "eval_runtime": 24.2024, "eval_samples_per_second": 24.295, "eval_steps_per_second": 3.058, "step": 1100 }, { "epoch": 7.82, "learning_rate": 0.00023886614173228342, "loss": 0.576, "step": 1150 }, { "epoch": 8.16, "learning_rate": 0.00023414173228346455, "loss": 0.5239, "step": 1200 }, { "epoch": 8.16, "eval_cer": 0.1918799934779064, "eval_loss": 0.4271094799041748, "eval_runtime": 24.6968, "eval_samples_per_second": 23.809, "eval_steps_per_second": 2.996, "step": 1200 }, { "epoch": 8.5, "learning_rate": 0.00022941732283464564, "loss": 0.5207, "step": 1250 }, { "epoch": 8.84, "learning_rate": 0.00022469291338582677, "loss": 0.4855, "step": 1300 }, { "epoch": 8.84, "eval_cer": 0.17550953856187837, "eval_loss": 0.40729257464408875, "eval_runtime": 24.885, "eval_samples_per_second": 23.629, "eval_steps_per_second": 2.974, "step": 1300 }, { "epoch": 9.18, "learning_rate": 0.00021996850393700784, "loss": 0.4621, "step": 1350 }, { "epoch": 9.52, "learning_rate": 0.00021524409448818896, "loss": 0.4583, "step": 1400 }, { "epoch": 9.52, "eval_cer": 0.1760313060492418, "eval_loss": 0.41417357325553894, "eval_runtime": 24.8425, "eval_samples_per_second": 23.669, "eval_steps_per_second": 2.979, "step": 1400 }, { "epoch": 9.86, "learning_rate": 0.00021051968503937006, "loss": 0.4415, "step": 1450 }, { "epoch": 10.2, "learning_rate": 0.00020579527559055118, "loss": 0.417, "step": 1500 }, { "epoch": 10.2, "eval_cer": 0.17156367193869232, "eval_loss": 0.3833578824996948, "eval_runtime": 24.455, "eval_samples_per_second": 24.044, "eval_steps_per_second": 3.026, "step": 1500 }, { "epoch": 10.54, "learning_rate": 0.00020107086614173225, "loss": 0.3959, "step": 1550 }, { "epoch": 10.88, "learning_rate": 0.00019634645669291338, "loss": 0.4074, "step": 1600 }, { "epoch": 10.88, "eval_cer": 0.16301972933311593, "eval_loss": 0.3626195192337036, "eval_runtime": 24.5184, "eval_samples_per_second": 23.982, "eval_steps_per_second": 3.018, "step": 1600 }, { "epoch": 11.22, "learning_rate": 0.00019162204724409448, "loss": 0.3584, "step": 1650 }, { "epoch": 11.56, "learning_rate": 0.0001868976377952756, "loss": 0.3682, "step": 1700 }, { "epoch": 11.56, "eval_cer": 0.15685635088863525, "eval_loss": 0.36927542090415955, "eval_runtime": 24.3751, "eval_samples_per_second": 24.123, "eval_steps_per_second": 3.036, "step": 1700 } ], "max_steps": 3675, "num_train_epochs": 25, "total_flos": 6.617213289199883e+18, "trial_name": null, "trial_params": null }