|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 18.367346938775512, |
|
"global_step": 2700, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 2.8199999999999998e-05, |
|
"loss": 14.6495, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 5.82e-05, |
|
"loss": 8.9314, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_cer": 0.9940322843632806, |
|
"eval_loss": 4.78579044342041, |
|
"eval_runtime": 24.1442, |
|
"eval_samples_per_second": 24.354, |
|
"eval_steps_per_second": 3.065, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 8.819999999999999e-05, |
|
"loss": 4.2225, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 0.0001182, |
|
"loss": 3.2937, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"eval_cer": 0.9940322843632806, |
|
"eval_loss": 3.2489850521087646, |
|
"eval_runtime": 24.3318, |
|
"eval_samples_per_second": 24.166, |
|
"eval_steps_per_second": 3.041, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 0.0001482, |
|
"loss": 3.2508, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 0.00017819999999999997, |
|
"loss": 3.2412, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"eval_cer": 0.9940322843632806, |
|
"eval_loss": 3.222844123840332, |
|
"eval_runtime": 24.3579, |
|
"eval_samples_per_second": 24.14, |
|
"eval_steps_per_second": 3.038, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 0.00020819999999999996, |
|
"loss": 3.1855, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 0.0002382, |
|
"loss": 3.0985, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"eval_cer": 0.9940322843632806, |
|
"eval_loss": 3.1458029747009277, |
|
"eval_runtime": 24.7673, |
|
"eval_samples_per_second": 23.741, |
|
"eval_steps_per_second": 2.988, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 0.00026819999999999996, |
|
"loss": 3.0739, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 0.0002982, |
|
"loss": 3.0209, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"eval_cer": 0.9938692320234795, |
|
"eval_loss": 3.004169225692749, |
|
"eval_runtime": 24.3542, |
|
"eval_samples_per_second": 24.144, |
|
"eval_steps_per_second": 3.038, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 0.0002955590551181102, |
|
"loss": 2.9536, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 0.0002908346456692913, |
|
"loss": 2.518, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"eval_cer": 0.5400945703570846, |
|
"eval_loss": 1.770609736442566, |
|
"eval_runtime": 24.8378, |
|
"eval_samples_per_second": 23.674, |
|
"eval_steps_per_second": 2.979, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 4.42, |
|
"learning_rate": 0.00028611023622047245, |
|
"loss": 1.6291, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"learning_rate": 0.0002813858267716535, |
|
"loss": 1.2291, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"eval_cer": 0.30572313712701776, |
|
"eval_loss": 0.8816090226173401, |
|
"eval_runtime": 24.4305, |
|
"eval_samples_per_second": 24.068, |
|
"eval_steps_per_second": 3.029, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"learning_rate": 0.0002766614173228346, |
|
"loss": 1.0153, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 5.44, |
|
"learning_rate": 0.0002719370078740157, |
|
"loss": 0.8879, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 5.44, |
|
"eval_cer": 0.23668677645524214, |
|
"eval_loss": 0.6693652868270874, |
|
"eval_runtime": 24.5154, |
|
"eval_samples_per_second": 23.985, |
|
"eval_steps_per_second": 3.019, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 5.78, |
|
"learning_rate": 0.00026721259842519684, |
|
"loss": 0.8337, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 6.12, |
|
"learning_rate": 0.00026248818897637796, |
|
"loss": 0.7717, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 6.12, |
|
"eval_cer": 0.20955486711234306, |
|
"eval_loss": 0.5637524127960205, |
|
"eval_runtime": 24.5794, |
|
"eval_samples_per_second": 23.922, |
|
"eval_steps_per_second": 3.011, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 6.46, |
|
"learning_rate": 0.00025776377952755903, |
|
"loss": 0.6711, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"learning_rate": 0.00025303937007874016, |
|
"loss": 0.7003, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"eval_cer": 0.19279308658079244, |
|
"eval_loss": 0.5100580453872681, |
|
"eval_runtime": 24.6049, |
|
"eval_samples_per_second": 23.898, |
|
"eval_steps_per_second": 3.008, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 7.14, |
|
"learning_rate": 0.0002483149606299212, |
|
"loss": 0.6349, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 7.48, |
|
"learning_rate": 0.00024359055118110235, |
|
"loss": 0.5935, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 7.48, |
|
"eval_cer": 0.17746616663949127, |
|
"eval_loss": 0.46862325072288513, |
|
"eval_runtime": 24.2024, |
|
"eval_samples_per_second": 24.295, |
|
"eval_steps_per_second": 3.058, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 7.82, |
|
"learning_rate": 0.00023886614173228342, |
|
"loss": 0.576, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 8.16, |
|
"learning_rate": 0.00023414173228346455, |
|
"loss": 0.5239, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 8.16, |
|
"eval_cer": 0.1918799934779064, |
|
"eval_loss": 0.4271094799041748, |
|
"eval_runtime": 24.6968, |
|
"eval_samples_per_second": 23.809, |
|
"eval_steps_per_second": 2.996, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 8.5, |
|
"learning_rate": 0.00022941732283464564, |
|
"loss": 0.5207, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 8.84, |
|
"learning_rate": 0.00022469291338582677, |
|
"loss": 0.4855, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 8.84, |
|
"eval_cer": 0.17550953856187837, |
|
"eval_loss": 0.40729257464408875, |
|
"eval_runtime": 24.885, |
|
"eval_samples_per_second": 23.629, |
|
"eval_steps_per_second": 2.974, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 9.18, |
|
"learning_rate": 0.00021996850393700784, |
|
"loss": 0.4621, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 9.52, |
|
"learning_rate": 0.00021524409448818896, |
|
"loss": 0.4583, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 9.52, |
|
"eval_cer": 0.1760313060492418, |
|
"eval_loss": 0.41417357325553894, |
|
"eval_runtime": 24.8425, |
|
"eval_samples_per_second": 23.669, |
|
"eval_steps_per_second": 2.979, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 9.86, |
|
"learning_rate": 0.00021051968503937006, |
|
"loss": 0.4415, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 10.2, |
|
"learning_rate": 0.00020579527559055118, |
|
"loss": 0.417, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 10.2, |
|
"eval_cer": 0.17156367193869232, |
|
"eval_loss": 0.3833578824996948, |
|
"eval_runtime": 24.455, |
|
"eval_samples_per_second": 24.044, |
|
"eval_steps_per_second": 3.026, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 10.54, |
|
"learning_rate": 0.00020107086614173225, |
|
"loss": 0.3959, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 10.88, |
|
"learning_rate": 0.00019634645669291338, |
|
"loss": 0.4074, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 10.88, |
|
"eval_cer": 0.16301972933311593, |
|
"eval_loss": 0.3626195192337036, |
|
"eval_runtime": 24.5184, |
|
"eval_samples_per_second": 23.982, |
|
"eval_steps_per_second": 3.018, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 11.22, |
|
"learning_rate": 0.00019162204724409448, |
|
"loss": 0.3584, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 11.56, |
|
"learning_rate": 0.0001868976377952756, |
|
"loss": 0.3682, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 11.56, |
|
"eval_cer": 0.15685635088863525, |
|
"eval_loss": 0.36927542090415955, |
|
"eval_runtime": 24.3751, |
|
"eval_samples_per_second": 24.123, |
|
"eval_steps_per_second": 3.036, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 11.9, |
|
"learning_rate": 0.00018217322834645667, |
|
"loss": 0.3455, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 12.24, |
|
"learning_rate": 0.00017744881889763777, |
|
"loss": 0.3245, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 12.24, |
|
"eval_cer": 0.15816076960704387, |
|
"eval_loss": 0.3740461468696594, |
|
"eval_runtime": 24.7162, |
|
"eval_samples_per_second": 23.79, |
|
"eval_steps_per_second": 2.994, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 12.59, |
|
"learning_rate": 0.0001727244094488189, |
|
"loss": 0.3208, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 12.93, |
|
"learning_rate": 0.000168, |
|
"loss": 0.3063, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 12.93, |
|
"eval_cer": 0.15904125224196966, |
|
"eval_loss": 0.3622555434703827, |
|
"eval_runtime": 24.4729, |
|
"eval_samples_per_second": 24.027, |
|
"eval_steps_per_second": 3.024, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 13.27, |
|
"learning_rate": 0.0001632755905511811, |
|
"loss": 0.3019, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 13.61, |
|
"learning_rate": 0.00015855118110236219, |
|
"loss": 0.2945, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 13.61, |
|
"eval_cer": 0.16634599706505787, |
|
"eval_loss": 0.3725011348724365, |
|
"eval_runtime": 25.0023, |
|
"eval_samples_per_second": 23.518, |
|
"eval_steps_per_second": 2.96, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 13.95, |
|
"learning_rate": 0.0001538267716535433, |
|
"loss": 0.279, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 14.29, |
|
"learning_rate": 0.0001491023622047244, |
|
"loss": 0.2674, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 14.29, |
|
"eval_cer": 0.15731289744007826, |
|
"eval_loss": 0.3531067371368408, |
|
"eval_runtime": 24.8381, |
|
"eval_samples_per_second": 23.673, |
|
"eval_steps_per_second": 2.979, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 14.63, |
|
"learning_rate": 0.0001443779527559055, |
|
"loss": 0.2584, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 14.97, |
|
"learning_rate": 0.0001396535433070866, |
|
"loss": 0.2796, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 14.97, |
|
"eval_cer": 0.14808413500733736, |
|
"eval_loss": 0.3606802523136139, |
|
"eval_runtime": 24.8151, |
|
"eval_samples_per_second": 23.695, |
|
"eval_steps_per_second": 2.982, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 15.31, |
|
"learning_rate": 0.0001349291338582677, |
|
"loss": 0.2462, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 15.65, |
|
"learning_rate": 0.0001302047244094488, |
|
"loss": 0.256, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 15.65, |
|
"eval_cer": 0.15819338007500408, |
|
"eval_loss": 0.3580550253391266, |
|
"eval_runtime": 24.5695, |
|
"eval_samples_per_second": 23.932, |
|
"eval_steps_per_second": 3.012, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 15.99, |
|
"learning_rate": 0.00012548031496062992, |
|
"loss": 0.2524, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 16.33, |
|
"learning_rate": 0.00012075590551181102, |
|
"loss": 0.2219, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 16.33, |
|
"eval_cer": 0.14801891407141693, |
|
"eval_loss": 0.35925593972206116, |
|
"eval_runtime": 24.982, |
|
"eval_samples_per_second": 23.537, |
|
"eval_steps_per_second": 2.962, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 16.67, |
|
"learning_rate": 0.0001160314960629921, |
|
"loss": 0.2364, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 17.01, |
|
"learning_rate": 0.00011130708661417321, |
|
"loss": 0.2291, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 17.01, |
|
"eval_cer": 0.1471058209685309, |
|
"eval_loss": 0.35567909479141235, |
|
"eval_runtime": 24.4749, |
|
"eval_samples_per_second": 24.025, |
|
"eval_steps_per_second": 3.024, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 17.35, |
|
"learning_rate": 0.00010658267716535431, |
|
"loss": 0.2045, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 17.69, |
|
"learning_rate": 0.00010185826771653542, |
|
"loss": 0.2172, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 17.69, |
|
"eval_cer": 0.14792108266753629, |
|
"eval_loss": 0.3606509566307068, |
|
"eval_runtime": 25.1105, |
|
"eval_samples_per_second": 23.416, |
|
"eval_steps_per_second": 2.947, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 18.03, |
|
"learning_rate": 9.713385826771652e-05, |
|
"loss": 0.2271, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 18.37, |
|
"learning_rate": 9.240944881889763e-05, |
|
"loss": 0.1858, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 18.37, |
|
"eval_cer": 0.15144301320723952, |
|
"eval_loss": 0.3589307963848114, |
|
"eval_runtime": 24.5005, |
|
"eval_samples_per_second": 24.0, |
|
"eval_steps_per_second": 3.02, |
|
"step": 2700 |
|
} |
|
], |
|
"max_steps": 3675, |
|
"num_train_epochs": 25, |
|
"total_flos": 1.0494071921698755e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|