{ "best_metric": null, "best_model_checkpoint": null, "epoch": 22.448979591836736, "global_step": 3300, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.34, "learning_rate": 2.8199999999999998e-05, "loss": 14.6495, "step": 50 }, { "epoch": 0.68, "learning_rate": 5.82e-05, "loss": 8.9314, "step": 100 }, { "epoch": 0.68, "eval_cer": 0.9940322843632806, "eval_loss": 4.78579044342041, "eval_runtime": 24.1442, "eval_samples_per_second": 24.354, "eval_steps_per_second": 3.065, "step": 100 }, { "epoch": 1.02, "learning_rate": 8.819999999999999e-05, "loss": 4.2225, "step": 150 }, { "epoch": 1.36, "learning_rate": 0.0001182, "loss": 3.2937, "step": 200 }, { "epoch": 1.36, "eval_cer": 0.9940322843632806, "eval_loss": 3.2489850521087646, "eval_runtime": 24.3318, "eval_samples_per_second": 24.166, "eval_steps_per_second": 3.041, "step": 200 }, { "epoch": 1.7, "learning_rate": 0.0001482, "loss": 3.2508, "step": 250 }, { "epoch": 2.04, "learning_rate": 0.00017819999999999997, "loss": 3.2412, "step": 300 }, { "epoch": 2.04, "eval_cer": 0.9940322843632806, "eval_loss": 3.222844123840332, "eval_runtime": 24.3579, "eval_samples_per_second": 24.14, "eval_steps_per_second": 3.038, "step": 300 }, { "epoch": 2.38, "learning_rate": 0.00020819999999999996, "loss": 3.1855, "step": 350 }, { "epoch": 2.72, "learning_rate": 0.0002382, "loss": 3.0985, "step": 400 }, { "epoch": 2.72, "eval_cer": 0.9940322843632806, "eval_loss": 3.1458029747009277, "eval_runtime": 24.7673, "eval_samples_per_second": 23.741, "eval_steps_per_second": 2.988, "step": 400 }, { "epoch": 3.06, "learning_rate": 0.00026819999999999996, "loss": 3.0739, "step": 450 }, { "epoch": 3.4, "learning_rate": 0.0002982, "loss": 3.0209, "step": 500 }, { "epoch": 3.4, "eval_cer": 0.9938692320234795, "eval_loss": 3.004169225692749, "eval_runtime": 24.3542, "eval_samples_per_second": 24.144, "eval_steps_per_second": 3.038, "step": 500 }, { "epoch": 3.74, "learning_rate": 0.0002955590551181102, "loss": 2.9536, "step": 550 }, { "epoch": 4.08, "learning_rate": 0.0002908346456692913, "loss": 2.518, "step": 600 }, { "epoch": 4.08, "eval_cer": 0.5400945703570846, "eval_loss": 1.770609736442566, "eval_runtime": 24.8378, "eval_samples_per_second": 23.674, "eval_steps_per_second": 2.979, "step": 600 }, { "epoch": 4.42, "learning_rate": 0.00028611023622047245, "loss": 1.6291, "step": 650 }, { "epoch": 4.76, "learning_rate": 0.0002813858267716535, "loss": 1.2291, "step": 700 }, { "epoch": 4.76, "eval_cer": 0.30572313712701776, "eval_loss": 0.8816090226173401, "eval_runtime": 24.4305, "eval_samples_per_second": 24.068, "eval_steps_per_second": 3.029, "step": 700 }, { "epoch": 5.1, "learning_rate": 0.0002766614173228346, "loss": 1.0153, "step": 750 }, { "epoch": 5.44, "learning_rate": 0.0002719370078740157, "loss": 0.8879, "step": 800 }, { "epoch": 5.44, "eval_cer": 0.23668677645524214, "eval_loss": 0.6693652868270874, "eval_runtime": 24.5154, "eval_samples_per_second": 23.985, "eval_steps_per_second": 3.019, "step": 800 }, { "epoch": 5.78, "learning_rate": 0.00026721259842519684, "loss": 0.8337, "step": 850 }, { "epoch": 6.12, "learning_rate": 0.00026248818897637796, "loss": 0.7717, "step": 900 }, { "epoch": 6.12, "eval_cer": 0.20955486711234306, "eval_loss": 0.5637524127960205, "eval_runtime": 24.5794, "eval_samples_per_second": 23.922, "eval_steps_per_second": 3.011, "step": 900 }, { "epoch": 6.46, "learning_rate": 0.00025776377952755903, "loss": 0.6711, "step": 950 }, { "epoch": 6.8, "learning_rate": 0.00025303937007874016, "loss": 0.7003, "step": 1000 }, { "epoch": 6.8, "eval_cer": 0.19279308658079244, "eval_loss": 0.5100580453872681, "eval_runtime": 24.6049, "eval_samples_per_second": 23.898, "eval_steps_per_second": 3.008, "step": 1000 }, { "epoch": 7.14, "learning_rate": 0.0002483149606299212, "loss": 0.6349, "step": 1050 }, { "epoch": 7.48, "learning_rate": 0.00024359055118110235, "loss": 0.5935, "step": 1100 }, { "epoch": 7.48, "eval_cer": 0.17746616663949127, "eval_loss": 0.46862325072288513, "eval_runtime": 24.2024, "eval_samples_per_second": 24.295, "eval_steps_per_second": 3.058, "step": 1100 }, { "epoch": 7.82, "learning_rate": 0.00023886614173228342, "loss": 0.576, "step": 1150 }, { "epoch": 8.16, "learning_rate": 0.00023414173228346455, "loss": 0.5239, "step": 1200 }, { "epoch": 8.16, "eval_cer": 0.1918799934779064, "eval_loss": 0.4271094799041748, "eval_runtime": 24.6968, "eval_samples_per_second": 23.809, "eval_steps_per_second": 2.996, "step": 1200 }, { "epoch": 8.5, "learning_rate": 0.00022941732283464564, "loss": 0.5207, "step": 1250 }, { "epoch": 8.84, "learning_rate": 0.00022469291338582677, "loss": 0.4855, "step": 1300 }, { "epoch": 8.84, "eval_cer": 0.17550953856187837, "eval_loss": 0.40729257464408875, "eval_runtime": 24.885, "eval_samples_per_second": 23.629, "eval_steps_per_second": 2.974, "step": 1300 }, { "epoch": 9.18, "learning_rate": 0.00021996850393700784, "loss": 0.4621, "step": 1350 }, { "epoch": 9.52, "learning_rate": 0.00021524409448818896, "loss": 0.4583, "step": 1400 }, { "epoch": 9.52, "eval_cer": 0.1760313060492418, "eval_loss": 0.41417357325553894, "eval_runtime": 24.8425, "eval_samples_per_second": 23.669, "eval_steps_per_second": 2.979, "step": 1400 }, { "epoch": 9.86, "learning_rate": 0.00021051968503937006, "loss": 0.4415, "step": 1450 }, { "epoch": 10.2, "learning_rate": 0.00020579527559055118, "loss": 0.417, "step": 1500 }, { "epoch": 10.2, "eval_cer": 0.17156367193869232, "eval_loss": 0.3833578824996948, "eval_runtime": 24.455, "eval_samples_per_second": 24.044, "eval_steps_per_second": 3.026, "step": 1500 }, { "epoch": 10.54, "learning_rate": 0.00020107086614173225, "loss": 0.3959, "step": 1550 }, { "epoch": 10.88, "learning_rate": 0.00019634645669291338, "loss": 0.4074, "step": 1600 }, { "epoch": 10.88, "eval_cer": 0.16301972933311593, "eval_loss": 0.3626195192337036, "eval_runtime": 24.5184, "eval_samples_per_second": 23.982, "eval_steps_per_second": 3.018, "step": 1600 }, { "epoch": 11.22, "learning_rate": 0.00019162204724409448, "loss": 0.3584, "step": 1650 }, { "epoch": 11.56, "learning_rate": 0.0001868976377952756, "loss": 0.3682, "step": 1700 }, { "epoch": 11.56, "eval_cer": 0.15685635088863525, "eval_loss": 0.36927542090415955, "eval_runtime": 24.3751, "eval_samples_per_second": 24.123, "eval_steps_per_second": 3.036, "step": 1700 }, { "epoch": 11.9, "learning_rate": 0.00018217322834645667, "loss": 0.3455, "step": 1750 }, { "epoch": 12.24, "learning_rate": 0.00017744881889763777, "loss": 0.3245, "step": 1800 }, { "epoch": 12.24, "eval_cer": 0.15816076960704387, "eval_loss": 0.3740461468696594, "eval_runtime": 24.7162, "eval_samples_per_second": 23.79, "eval_steps_per_second": 2.994, "step": 1800 }, { "epoch": 12.59, "learning_rate": 0.0001727244094488189, "loss": 0.3208, "step": 1850 }, { "epoch": 12.93, "learning_rate": 0.000168, "loss": 0.3063, "step": 1900 }, { "epoch": 12.93, "eval_cer": 0.15904125224196966, "eval_loss": 0.3622555434703827, "eval_runtime": 24.4729, "eval_samples_per_second": 24.027, "eval_steps_per_second": 3.024, "step": 1900 }, { "epoch": 13.27, "learning_rate": 0.0001632755905511811, "loss": 0.3019, "step": 1950 }, { "epoch": 13.61, "learning_rate": 0.00015855118110236219, "loss": 0.2945, "step": 2000 }, { "epoch": 13.61, "eval_cer": 0.16634599706505787, "eval_loss": 0.3725011348724365, "eval_runtime": 25.0023, "eval_samples_per_second": 23.518, "eval_steps_per_second": 2.96, "step": 2000 }, { "epoch": 13.95, "learning_rate": 0.0001538267716535433, "loss": 0.279, "step": 2050 }, { "epoch": 14.29, "learning_rate": 0.0001491023622047244, "loss": 0.2674, "step": 2100 }, { "epoch": 14.29, "eval_cer": 0.15731289744007826, "eval_loss": 0.3531067371368408, "eval_runtime": 24.8381, "eval_samples_per_second": 23.673, "eval_steps_per_second": 2.979, "step": 2100 }, { "epoch": 14.63, "learning_rate": 0.0001443779527559055, "loss": 0.2584, "step": 2150 }, { "epoch": 14.97, "learning_rate": 0.0001396535433070866, "loss": 0.2796, "step": 2200 }, { "epoch": 14.97, "eval_cer": 0.14808413500733736, "eval_loss": 0.3606802523136139, "eval_runtime": 24.8151, "eval_samples_per_second": 23.695, "eval_steps_per_second": 2.982, "step": 2200 }, { "epoch": 15.31, "learning_rate": 0.0001349291338582677, "loss": 0.2462, "step": 2250 }, { "epoch": 15.65, "learning_rate": 0.0001302047244094488, "loss": 0.256, "step": 2300 }, { "epoch": 15.65, "eval_cer": 0.15819338007500408, "eval_loss": 0.3580550253391266, "eval_runtime": 24.5695, "eval_samples_per_second": 23.932, "eval_steps_per_second": 3.012, "step": 2300 }, { "epoch": 15.99, "learning_rate": 0.00012548031496062992, "loss": 0.2524, "step": 2350 }, { "epoch": 16.33, "learning_rate": 0.00012075590551181102, "loss": 0.2219, "step": 2400 }, { "epoch": 16.33, "eval_cer": 0.14801891407141693, "eval_loss": 0.35925593972206116, "eval_runtime": 24.982, "eval_samples_per_second": 23.537, "eval_steps_per_second": 2.962, "step": 2400 }, { "epoch": 16.67, "learning_rate": 0.0001160314960629921, "loss": 0.2364, "step": 2450 }, { "epoch": 17.01, "learning_rate": 0.00011130708661417321, "loss": 0.2291, "step": 2500 }, { "epoch": 17.01, "eval_cer": 0.1471058209685309, "eval_loss": 0.35567909479141235, "eval_runtime": 24.4749, "eval_samples_per_second": 24.025, "eval_steps_per_second": 3.024, "step": 2500 }, { "epoch": 17.35, "learning_rate": 0.00010658267716535431, "loss": 0.2045, "step": 2550 }, { "epoch": 17.69, "learning_rate": 0.00010185826771653542, "loss": 0.2172, "step": 2600 }, { "epoch": 17.69, "eval_cer": 0.14792108266753629, "eval_loss": 0.3606509566307068, "eval_runtime": 25.1105, "eval_samples_per_second": 23.416, "eval_steps_per_second": 2.947, "step": 2600 }, { "epoch": 18.03, "learning_rate": 9.713385826771652e-05, "loss": 0.2271, "step": 2650 }, { "epoch": 18.37, "learning_rate": 9.240944881889763e-05, "loss": 0.1858, "step": 2700 }, { "epoch": 18.37, "eval_cer": 0.15144301320723952, "eval_loss": 0.3589307963848114, "eval_runtime": 24.5005, "eval_samples_per_second": 24.0, "eval_steps_per_second": 3.02, "step": 2700 }, { "epoch": 18.71, "learning_rate": 8.768503937007873e-05, "loss": 0.1995, "step": 2750 }, { "epoch": 19.05, "learning_rate": 8.296062992125984e-05, "loss": 0.1872, "step": 2800 }, { "epoch": 19.05, "eval_cer": 0.14766019892385457, "eval_loss": 0.36663514375686646, "eval_runtime": 24.4383, "eval_samples_per_second": 24.061, "eval_steps_per_second": 3.028, "step": 2800 }, { "epoch": 19.39, "learning_rate": 7.823622047244094e-05, "loss": 0.177, "step": 2850 }, { "epoch": 19.73, "learning_rate": 7.351181102362205e-05, "loss": 0.1855, "step": 2900 }, { "epoch": 19.73, "eval_cer": 0.1432577857492255, "eval_loss": 0.3651330769062042, "eval_runtime": 24.7826, "eval_samples_per_second": 23.726, "eval_steps_per_second": 2.986, "step": 2900 }, { "epoch": 20.07, "learning_rate": 6.878740157480315e-05, "loss": 0.1773, "step": 2950 }, { "epoch": 20.41, "learning_rate": 6.406299212598424e-05, "loss": 0.185, "step": 3000 }, { "epoch": 20.41, "eval_cer": 0.1447578672753954, "eval_loss": 0.36554473638534546, "eval_runtime": 24.7061, "eval_samples_per_second": 23.8, "eval_steps_per_second": 2.995, "step": 3000 }, { "epoch": 20.75, "learning_rate": 5.933858267716535e-05, "loss": 0.1735, "step": 3050 }, { "epoch": 21.09, "learning_rate": 5.461417322834645e-05, "loss": 0.1599, "step": 3100 }, { "epoch": 21.09, "eval_cer": 0.14867112343062122, "eval_loss": 0.37344449758529663, "eval_runtime": 24.8752, "eval_samples_per_second": 23.638, "eval_steps_per_second": 2.975, "step": 3100 }, { "epoch": 21.43, "learning_rate": 4.9889763779527555e-05, "loss": 0.1578, "step": 3150 }, { "epoch": 21.77, "learning_rate": 4.516535433070866e-05, "loss": 0.1742, "step": 3200 }, { "epoch": 21.77, "eval_cer": 0.148638512962661, "eval_loss": 0.3685232102870941, "eval_runtime": 25.1361, "eval_samples_per_second": 23.393, "eval_steps_per_second": 2.944, "step": 3200 }, { "epoch": 22.11, "learning_rate": 4.0440944881889764e-05, "loss": 0.1666, "step": 3250 }, { "epoch": 22.45, "learning_rate": 3.571653543307086e-05, "loss": 0.1605, "step": 3300 }, { "epoch": 22.45, "eval_cer": 0.14746453611609325, "eval_loss": 0.3691498637199402, "eval_runtime": 24.7916, "eval_samples_per_second": 23.718, "eval_steps_per_second": 2.985, "step": 3300 } ], "max_steps": 3675, "num_train_epochs": 25, "total_flos": 1.2812925033719341e+19, "trial_name": null, "trial_params": null }