|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 29.930420896543858, |
|
"eval_steps": 300, |
|
"global_step": 24600, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 9.900000000000002e-06, |
|
"loss": 19.7382, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_cer": 1.0, |
|
"eval_loss": 6.821648120880127, |
|
"eval_runtime": 52.3193, |
|
"eval_samples_per_second": 42.451, |
|
"eval_steps_per_second": 5.314, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.9900000000000003e-05, |
|
"loss": 9.8181, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_cer": 1.0, |
|
"eval_loss": 6.651111602783203, |
|
"eval_runtime": 43.577, |
|
"eval_samples_per_second": 50.967, |
|
"eval_steps_per_second": 6.38, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 2.9900000000000002e-05, |
|
"loss": 9.5191, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"eval_cer": 0.9806448402826152, |
|
"eval_loss": 6.58424186706543, |
|
"eval_runtime": 43.1655, |
|
"eval_samples_per_second": 51.453, |
|
"eval_steps_per_second": 6.44, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 3.99e-05, |
|
"loss": 8.6238, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"eval_cer": 0.9216837496268285, |
|
"eval_loss": 6.142301082611084, |
|
"eval_runtime": 42.9764, |
|
"eval_samples_per_second": 51.68, |
|
"eval_steps_per_second": 6.469, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 4.99e-05, |
|
"loss": 6.883, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"eval_cer": 0.850830928450592, |
|
"eval_loss": 3.596842050552368, |
|
"eval_runtime": 42.8348, |
|
"eval_samples_per_second": 51.85, |
|
"eval_steps_per_second": 6.49, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 4.93579766536965e-05, |
|
"loss": 4.0838, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"eval_cer": 0.48343118718280426, |
|
"eval_loss": 2.5516390800476074, |
|
"eval_runtime": 42.9354, |
|
"eval_samples_per_second": 51.729, |
|
"eval_steps_per_second": 6.475, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 4.87094682230869e-05, |
|
"loss": 3.167, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"eval_cer": 0.4450691611105583, |
|
"eval_loss": 2.2739391326904297, |
|
"eval_runtime": 42.8894, |
|
"eval_samples_per_second": 51.784, |
|
"eval_steps_per_second": 6.482, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 4.806312148724601e-05, |
|
"loss": 2.826, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"eval_cer": 0.4178525226390686, |
|
"eval_loss": 2.0223917961120605, |
|
"eval_runtime": 42.9677, |
|
"eval_samples_per_second": 51.69, |
|
"eval_steps_per_second": 6.47, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"learning_rate": 4.7414613056636405e-05, |
|
"loss": 2.6955, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"eval_cer": 0.4174544730818987, |
|
"eval_loss": 1.9600275754928589, |
|
"eval_runtime": 42.8412, |
|
"eval_samples_per_second": 51.843, |
|
"eval_steps_per_second": 6.489, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 4.676610462602681e-05, |
|
"loss": 2.5812, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"eval_cer": 0.40994128769031746, |
|
"eval_loss": 1.769142985343933, |
|
"eval_runtime": 42.9361, |
|
"eval_samples_per_second": 51.728, |
|
"eval_steps_per_second": 6.475, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 4.611975789018591e-05, |
|
"loss": 2.4952, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"eval_cer": 0.4013832222111653, |
|
"eval_loss": 1.8323670625686646, |
|
"eval_runtime": 42.7115, |
|
"eval_samples_per_second": 52.0, |
|
"eval_steps_per_second": 6.509, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"learning_rate": 4.547124945957631e-05, |
|
"loss": 2.3938, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"eval_cer": 0.37799781072743555, |
|
"eval_loss": 1.7351980209350586, |
|
"eval_runtime": 42.7436, |
|
"eval_samples_per_second": 51.961, |
|
"eval_steps_per_second": 6.504, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"learning_rate": 4.482490272373541e-05, |
|
"loss": 2.3584, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"eval_cer": 0.3678475470196039, |
|
"eval_loss": 1.64540696144104, |
|
"eval_runtime": 42.7445, |
|
"eval_samples_per_second": 51.96, |
|
"eval_steps_per_second": 6.504, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 5.11, |
|
"learning_rate": 4.417639429312581e-05, |
|
"loss": 2.325, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 5.11, |
|
"eval_cer": 0.38352074833316746, |
|
"eval_loss": 1.6946874856948853, |
|
"eval_runtime": 42.4882, |
|
"eval_samples_per_second": 52.273, |
|
"eval_steps_per_second": 6.543, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 5.48, |
|
"learning_rate": 4.3527885862516214e-05, |
|
"loss": 2.2454, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 5.48, |
|
"eval_cer": 0.34078017713205294, |
|
"eval_loss": 1.5765234231948853, |
|
"eval_runtime": 42.1243, |
|
"eval_samples_per_second": 52.725, |
|
"eval_steps_per_second": 6.6, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 5.84, |
|
"learning_rate": 4.287937743190661e-05, |
|
"loss": 2.1954, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 5.84, |
|
"eval_cer": 0.37093243108767043, |
|
"eval_loss": 1.603211760520935, |
|
"eval_runtime": 42.6116, |
|
"eval_samples_per_second": 52.122, |
|
"eval_steps_per_second": 6.524, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 6.21, |
|
"learning_rate": 4.223086900129702e-05, |
|
"loss": 2.1492, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 6.21, |
|
"eval_cer": 0.3447606727037516, |
|
"eval_loss": 1.6078611612319946, |
|
"eval_runtime": 42.9188, |
|
"eval_samples_per_second": 51.749, |
|
"eval_steps_per_second": 6.477, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 6.57, |
|
"learning_rate": 4.1582360570687426e-05, |
|
"loss": 2.1655, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 6.57, |
|
"eval_cer": 0.33555577669419845, |
|
"eval_loss": 1.4955742359161377, |
|
"eval_runtime": 42.6136, |
|
"eval_samples_per_second": 52.12, |
|
"eval_steps_per_second": 6.524, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 6.94, |
|
"learning_rate": 4.093385214007782e-05, |
|
"loss": 2.1393, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 6.94, |
|
"eval_cer": 0.331625037317146, |
|
"eval_loss": 1.4772567749023438, |
|
"eval_runtime": 42.6929, |
|
"eval_samples_per_second": 52.023, |
|
"eval_steps_per_second": 6.512, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 7.3, |
|
"learning_rate": 4.028534370946823e-05, |
|
"loss": 2.1027, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 7.3, |
|
"eval_cer": 0.341427007662454, |
|
"eval_loss": 1.5089548826217651, |
|
"eval_runtime": 42.7699, |
|
"eval_samples_per_second": 51.929, |
|
"eval_steps_per_second": 6.5, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 7.67, |
|
"learning_rate": 3.9636835278858624e-05, |
|
"loss": 2.0824, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 7.67, |
|
"eval_cer": 0.34575579659667627, |
|
"eval_loss": 1.5948169231414795, |
|
"eval_runtime": 42.6031, |
|
"eval_samples_per_second": 52.132, |
|
"eval_steps_per_second": 6.525, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 8.03, |
|
"learning_rate": 3.899048854301773e-05, |
|
"loss": 2.061, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 8.03, |
|
"eval_cer": 0.35058214747736094, |
|
"eval_loss": 1.4923882484436035, |
|
"eval_runtime": 42.6516, |
|
"eval_samples_per_second": 52.073, |
|
"eval_steps_per_second": 6.518, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 8.4, |
|
"learning_rate": 3.8341980112408135e-05, |
|
"loss": 2.0212, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 8.4, |
|
"eval_cer": 0.33247089262613194, |
|
"eval_loss": 1.4590569734573364, |
|
"eval_runtime": 42.5489, |
|
"eval_samples_per_second": 52.199, |
|
"eval_steps_per_second": 6.534, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 8.76, |
|
"learning_rate": 3.769347168179853e-05, |
|
"loss": 2.0504, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 8.76, |
|
"eval_cer": 0.3344611404119813, |
|
"eval_loss": 1.4551000595092773, |
|
"eval_runtime": 42.7689, |
|
"eval_samples_per_second": 51.93, |
|
"eval_steps_per_second": 6.5, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 9.13, |
|
"learning_rate": 3.7044963251188936e-05, |
|
"loss": 2.0113, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 9.13, |
|
"eval_cer": 0.3344113842173351, |
|
"eval_loss": 1.4068984985351562, |
|
"eval_runtime": 42.6741, |
|
"eval_samples_per_second": 52.046, |
|
"eval_steps_per_second": 6.514, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 9.49, |
|
"learning_rate": 3.639645482057933e-05, |
|
"loss": 2.0057, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 9.49, |
|
"eval_cer": 0.3230669718379938, |
|
"eval_loss": 1.443265676498413, |
|
"eval_runtime": 42.7855, |
|
"eval_samples_per_second": 51.91, |
|
"eval_steps_per_second": 6.498, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 9.86, |
|
"learning_rate": 3.574794638996974e-05, |
|
"loss": 1.9741, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 9.86, |
|
"eval_cer": 0.3216240421932531, |
|
"eval_loss": 1.426885724067688, |
|
"eval_runtime": 42.8301, |
|
"eval_samples_per_second": 51.856, |
|
"eval_steps_per_second": 6.491, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 10.22, |
|
"learning_rate": 3.509943795936014e-05, |
|
"loss": 1.936, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 10.22, |
|
"eval_cer": 0.3227186784754702, |
|
"eval_loss": 1.3612221479415894, |
|
"eval_runtime": 43.0542, |
|
"eval_samples_per_second": 51.586, |
|
"eval_steps_per_second": 6.457, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 10.59, |
|
"learning_rate": 3.4450929528750544e-05, |
|
"loss": 1.9416, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 10.59, |
|
"eval_cer": 0.3027664444223306, |
|
"eval_loss": 1.363100290298462, |
|
"eval_runtime": 43.1271, |
|
"eval_samples_per_second": 51.499, |
|
"eval_steps_per_second": 6.446, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 10.95, |
|
"learning_rate": 3.380242109814095e-05, |
|
"loss": 1.9425, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 10.95, |
|
"eval_cer": 0.3038610807045477, |
|
"eval_loss": 1.3716000318527222, |
|
"eval_runtime": 43.286, |
|
"eval_samples_per_second": 51.31, |
|
"eval_steps_per_second": 6.422, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 11.32, |
|
"learning_rate": 3.3153912667531345e-05, |
|
"loss": 1.9351, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 11.32, |
|
"eval_cer": 0.31490695591601153, |
|
"eval_loss": 1.3932286500930786, |
|
"eval_runtime": 43.3029, |
|
"eval_samples_per_second": 51.29, |
|
"eval_steps_per_second": 6.42, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 11.68, |
|
"learning_rate": 3.250540423692175e-05, |
|
"loss": 1.9046, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 11.68, |
|
"eval_cer": 0.3329684545725943, |
|
"eval_loss": 1.4470584392547607, |
|
"eval_runtime": 42.9923, |
|
"eval_samples_per_second": 51.66, |
|
"eval_steps_per_second": 6.466, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 12.05, |
|
"learning_rate": 3.185905750108085e-05, |
|
"loss": 1.8587, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 12.05, |
|
"eval_cer": 0.3056523037118121, |
|
"eval_loss": 1.3519924879074097, |
|
"eval_runtime": 42.7004, |
|
"eval_samples_per_second": 52.014, |
|
"eval_steps_per_second": 6.51, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 12.41, |
|
"learning_rate": 3.1210549070471253e-05, |
|
"loss": 1.8699, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 12.41, |
|
"eval_cer": 0.3289879590008956, |
|
"eval_loss": 1.4434651136398315, |
|
"eval_runtime": 42.904, |
|
"eval_samples_per_second": 51.767, |
|
"eval_steps_per_second": 6.48, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 12.78, |
|
"learning_rate": 3.056204063986166e-05, |
|
"loss": 1.8328, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 12.78, |
|
"eval_cer": 0.31356353866056325, |
|
"eval_loss": 1.361649751663208, |
|
"eval_runtime": 42.7673, |
|
"eval_samples_per_second": 51.932, |
|
"eval_steps_per_second": 6.5, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 13.14, |
|
"learning_rate": 2.9913532209252054e-05, |
|
"loss": 1.8136, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 13.14, |
|
"eval_cer": 0.2943078913324709, |
|
"eval_loss": 1.3512203693389893, |
|
"eval_runtime": 42.5723, |
|
"eval_samples_per_second": 52.17, |
|
"eval_steps_per_second": 6.53, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 13.51, |
|
"learning_rate": 2.9265023778642458e-05, |
|
"loss": 1.8099, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 13.51, |
|
"eval_cer": 0.2956513085879192, |
|
"eval_loss": 1.3534834384918213, |
|
"eval_runtime": 42.854, |
|
"eval_samples_per_second": 51.827, |
|
"eval_steps_per_second": 6.487, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 13.87, |
|
"learning_rate": 2.861651534803286e-05, |
|
"loss": 1.8021, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 13.87, |
|
"eval_cer": 0.2981888745148771, |
|
"eval_loss": 1.3732918500900269, |
|
"eval_runtime": 42.5792, |
|
"eval_samples_per_second": 52.162, |
|
"eval_steps_per_second": 6.529, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 14.24, |
|
"learning_rate": 2.7968006917423263e-05, |
|
"loss": 1.7809, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 14.24, |
|
"eval_cer": 0.30804060105483133, |
|
"eval_loss": 1.3088232278823853, |
|
"eval_runtime": 42.9072, |
|
"eval_samples_per_second": 51.763, |
|
"eval_steps_per_second": 6.479, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 14.6, |
|
"learning_rate": 2.731949848681366e-05, |
|
"loss": 1.7734, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 14.6, |
|
"eval_cer": 0.28858592894815405, |
|
"eval_loss": 1.320089340209961, |
|
"eval_runtime": 42.6671, |
|
"eval_samples_per_second": 52.054, |
|
"eval_steps_per_second": 6.516, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 14.97, |
|
"learning_rate": 2.6670990056204063e-05, |
|
"loss": 1.7646, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 14.97, |
|
"eval_cer": 0.3268981988257538, |
|
"eval_loss": 1.3471167087554932, |
|
"eval_runtime": 42.7924, |
|
"eval_samples_per_second": 51.902, |
|
"eval_steps_per_second": 6.496, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 15.33, |
|
"learning_rate": 2.602248162559447e-05, |
|
"loss": 1.733, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 15.33, |
|
"eval_cer": 0.30321425017414666, |
|
"eval_loss": 1.3437916040420532, |
|
"eval_runtime": 42.7066, |
|
"eval_samples_per_second": 52.006, |
|
"eval_steps_per_second": 6.51, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 15.7, |
|
"learning_rate": 2.5373973194984868e-05, |
|
"loss": 1.7182, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 15.7, |
|
"eval_cer": 0.2999800975221415, |
|
"eval_loss": 1.3310909271240234, |
|
"eval_runtime": 42.787, |
|
"eval_samples_per_second": 51.908, |
|
"eval_steps_per_second": 6.497, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 16.06, |
|
"learning_rate": 2.472546476437527e-05, |
|
"loss": 1.7071, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 16.06, |
|
"eval_cer": 0.3073937705244303, |
|
"eval_loss": 1.2641910314559937, |
|
"eval_runtime": 42.6973, |
|
"eval_samples_per_second": 52.017, |
|
"eval_steps_per_second": 6.511, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 16.43, |
|
"learning_rate": 2.4076956333765675e-05, |
|
"loss": 1.7196, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 16.43, |
|
"eval_cer": 0.2859488506319037, |
|
"eval_loss": 1.2662409543991089, |
|
"eval_runtime": 42.6819, |
|
"eval_samples_per_second": 52.036, |
|
"eval_steps_per_second": 6.513, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 16.79, |
|
"learning_rate": 2.3428447903156076e-05, |
|
"loss": 1.7264, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 16.79, |
|
"eval_cer": 0.2878893422231068, |
|
"eval_loss": 1.2460156679153442, |
|
"eval_runtime": 42.7771, |
|
"eval_samples_per_second": 51.92, |
|
"eval_steps_per_second": 6.499, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 17.16, |
|
"learning_rate": 2.2782101167315176e-05, |
|
"loss": 1.6875, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 17.16, |
|
"eval_cer": 0.2931137426609613, |
|
"eval_loss": 1.3022774457931519, |
|
"eval_runtime": 42.5345, |
|
"eval_samples_per_second": 52.216, |
|
"eval_steps_per_second": 6.536, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 17.52, |
|
"learning_rate": 2.2133592736705577e-05, |
|
"loss": 1.6659, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 17.52, |
|
"eval_cer": 0.2927654492984377, |
|
"eval_loss": 1.32107675075531, |
|
"eval_runtime": 42.821, |
|
"eval_samples_per_second": 51.867, |
|
"eval_steps_per_second": 6.492, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 17.89, |
|
"learning_rate": 2.148508430609598e-05, |
|
"loss": 1.6694, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 17.89, |
|
"eval_cer": 0.2882873917802766, |
|
"eval_loss": 1.3291140794754028, |
|
"eval_runtime": 42.7715, |
|
"eval_samples_per_second": 51.927, |
|
"eval_steps_per_second": 6.5, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 18.25, |
|
"learning_rate": 2.0836575875486384e-05, |
|
"loss": 1.643, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 18.25, |
|
"eval_cer": 0.294755697084287, |
|
"eval_loss": 1.2615532875061035, |
|
"eval_runtime": 42.8646, |
|
"eval_samples_per_second": 51.814, |
|
"eval_steps_per_second": 6.486, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 18.62, |
|
"learning_rate": 2.0188067444876785e-05, |
|
"loss": 1.676, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 18.62, |
|
"eval_cer": 0.2835107970942382, |
|
"eval_loss": 1.2185758352279663, |
|
"eval_runtime": 42.7823, |
|
"eval_samples_per_second": 51.914, |
|
"eval_steps_per_second": 6.498, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 18.98, |
|
"learning_rate": 1.9539559014267185e-05, |
|
"loss": 1.6397, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 18.98, |
|
"eval_cer": 0.2810727435565728, |
|
"eval_loss": 1.3059513568878174, |
|
"eval_runtime": 42.9668, |
|
"eval_samples_per_second": 51.691, |
|
"eval_steps_per_second": 6.47, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 19.35, |
|
"learning_rate": 1.8893212278426286e-05, |
|
"loss": 1.6347, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 19.35, |
|
"eval_cer": 0.28838690416956914, |
|
"eval_loss": 1.2377227544784546, |
|
"eval_runtime": 42.8094, |
|
"eval_samples_per_second": 51.881, |
|
"eval_steps_per_second": 6.494, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 19.71, |
|
"learning_rate": 1.824470384781669e-05, |
|
"loss": 1.6328, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 19.71, |
|
"eval_cer": 0.27599761170265696, |
|
"eval_loss": 1.2721112966537476, |
|
"eval_runtime": 42.6795, |
|
"eval_samples_per_second": 52.039, |
|
"eval_steps_per_second": 6.514, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 20.08, |
|
"learning_rate": 1.7596195417207094e-05, |
|
"loss": 1.6092, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 20.08, |
|
"eval_cer": 0.28231664842272863, |
|
"eval_loss": 1.2696741819381714, |
|
"eval_runtime": 42.6768, |
|
"eval_samples_per_second": 52.042, |
|
"eval_steps_per_second": 6.514, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 20.44, |
|
"learning_rate": 1.6947686986597494e-05, |
|
"loss": 1.5737, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 20.44, |
|
"eval_cer": 0.28306299134242213, |
|
"eval_loss": 1.2230887413024902, |
|
"eval_runtime": 42.9425, |
|
"eval_samples_per_second": 51.72, |
|
"eval_steps_per_second": 6.474, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 20.81, |
|
"learning_rate": 1.6299178555987894e-05, |
|
"loss": 1.6166, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 20.81, |
|
"eval_cer": 0.2663449099412877, |
|
"eval_loss": 1.2277541160583496, |
|
"eval_runtime": 42.813, |
|
"eval_samples_per_second": 51.877, |
|
"eval_steps_per_second": 6.493, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 21.17, |
|
"learning_rate": 1.56506701253783e-05, |
|
"loss": 1.5964, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 21.17, |
|
"eval_cer": 0.27355955816499156, |
|
"eval_loss": 1.2313120365142822, |
|
"eval_runtime": 42.7309, |
|
"eval_samples_per_second": 51.976, |
|
"eval_steps_per_second": 6.506, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 21.54, |
|
"learning_rate": 1.5002161694768699e-05, |
|
"loss": 1.5237, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 21.54, |
|
"eval_cer": 0.27863469001890734, |
|
"eval_loss": 1.2411593198776245, |
|
"eval_runtime": 42.9368, |
|
"eval_samples_per_second": 51.727, |
|
"eval_steps_per_second": 6.475, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 21.9, |
|
"learning_rate": 1.4353653264159101e-05, |
|
"loss": 1.5419, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 21.9, |
|
"eval_cer": 0.28634690018907355, |
|
"eval_loss": 1.2718561887741089, |
|
"eval_runtime": 42.5781, |
|
"eval_samples_per_second": 52.163, |
|
"eval_steps_per_second": 6.529, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 22.27, |
|
"learning_rate": 1.3705144833549505e-05, |
|
"loss": 1.5654, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 22.27, |
|
"eval_cer": 0.26734003383421234, |
|
"eval_loss": 1.2373576164245605, |
|
"eval_runtime": 42.5574, |
|
"eval_samples_per_second": 52.188, |
|
"eval_steps_per_second": 6.532, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 22.63, |
|
"learning_rate": 1.3060959792477304e-05, |
|
"loss": 1.5331, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 22.63, |
|
"eval_cer": 0.2708727236540949, |
|
"eval_loss": 1.197614073753357, |
|
"eval_runtime": 42.6921, |
|
"eval_samples_per_second": 52.024, |
|
"eval_steps_per_second": 6.512, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"learning_rate": 1.2412451361867706e-05, |
|
"loss": 1.5378, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_cer": 0.26838491392178326, |
|
"eval_loss": 1.1672557592391968, |
|
"eval_runtime": 42.9497, |
|
"eval_samples_per_second": 51.712, |
|
"eval_steps_per_second": 6.473, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 23.36, |
|
"learning_rate": 1.1763942931258106e-05, |
|
"loss": 1.4972, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 23.36, |
|
"eval_cer": 0.26938003781470793, |
|
"eval_loss": 1.1548832654953003, |
|
"eval_runtime": 42.5425, |
|
"eval_samples_per_second": 52.207, |
|
"eval_steps_per_second": 6.535, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 23.73, |
|
"learning_rate": 1.1115434500648508e-05, |
|
"loss": 1.5112, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 23.73, |
|
"eval_cer": 0.2684844263110757, |
|
"eval_loss": 1.2580962181091309, |
|
"eval_runtime": 43.0178, |
|
"eval_samples_per_second": 51.63, |
|
"eval_steps_per_second": 6.462, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 24.09, |
|
"learning_rate": 1.046692607003891e-05, |
|
"loss": 1.5026, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 24.09, |
|
"eval_cer": 0.26957906259329284, |
|
"eval_loss": 1.2475780248641968, |
|
"eval_runtime": 42.8521, |
|
"eval_samples_per_second": 51.829, |
|
"eval_steps_per_second": 6.487, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 24.46, |
|
"learning_rate": 9.818417639429313e-06, |
|
"loss": 1.5062, |
|
"step": 20100 |
|
}, |
|
{ |
|
"epoch": 24.46, |
|
"eval_cer": 0.2755995621454871, |
|
"eval_loss": 1.2111254930496216, |
|
"eval_runtime": 42.9059, |
|
"eval_samples_per_second": 51.764, |
|
"eval_steps_per_second": 6.479, |
|
"step": 20100 |
|
}, |
|
{ |
|
"epoch": 24.82, |
|
"learning_rate": 9.169909208819715e-06, |
|
"loss": 1.4816, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 24.82, |
|
"eval_cer": 0.26345905065180614, |
|
"eval_loss": 1.2007070779800415, |
|
"eval_runtime": 42.6871, |
|
"eval_samples_per_second": 52.03, |
|
"eval_steps_per_second": 6.513, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 25.19, |
|
"learning_rate": 8.521400778210117e-06, |
|
"loss": 1.4836, |
|
"step": 20700 |
|
}, |
|
{ |
|
"epoch": 25.19, |
|
"eval_cer": 0.27435565727933126, |
|
"eval_loss": 1.2548900842666626, |
|
"eval_runtime": 42.6925, |
|
"eval_samples_per_second": 52.023, |
|
"eval_steps_per_second": 6.512, |
|
"step": 20700 |
|
}, |
|
{ |
|
"epoch": 25.55, |
|
"learning_rate": 7.87289234760052e-06, |
|
"loss": 1.479, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 25.55, |
|
"eval_cer": 0.26987759976117026, |
|
"eval_loss": 1.1535056829452515, |
|
"eval_runtime": 42.5922, |
|
"eval_samples_per_second": 52.146, |
|
"eval_steps_per_second": 6.527, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 25.92, |
|
"learning_rate": 7.2243839169909205e-06, |
|
"loss": 1.493, |
|
"step": 21300 |
|
}, |
|
{ |
|
"epoch": 25.92, |
|
"eval_cer": 0.26972833117723155, |
|
"eval_loss": 1.198728084564209, |
|
"eval_runtime": 42.6034, |
|
"eval_samples_per_second": 52.132, |
|
"eval_steps_per_second": 6.525, |
|
"step": 21300 |
|
}, |
|
{ |
|
"epoch": 26.28, |
|
"learning_rate": 6.5758754863813235e-06, |
|
"loss": 1.4524, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 26.28, |
|
"eval_cer": 0.27470395064185493, |
|
"eval_loss": 1.2245545387268066, |
|
"eval_runtime": 42.9242, |
|
"eval_samples_per_second": 51.742, |
|
"eval_steps_per_second": 6.477, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 26.65, |
|
"learning_rate": 5.927367055771725e-06, |
|
"loss": 1.4569, |
|
"step": 21900 |
|
}, |
|
{ |
|
"epoch": 26.65, |
|
"eval_cer": 0.2605234351676784, |
|
"eval_loss": 1.1879122257232666, |
|
"eval_runtime": 42.634, |
|
"eval_samples_per_second": 52.095, |
|
"eval_steps_per_second": 6.521, |
|
"step": 21900 |
|
}, |
|
{ |
|
"epoch": 27.01, |
|
"learning_rate": 5.278858625162128e-06, |
|
"loss": 1.4535, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 27.01, |
|
"eval_cer": 0.27281321524529806, |
|
"eval_loss": 1.2265853881835938, |
|
"eval_runtime": 42.7816, |
|
"eval_samples_per_second": 51.915, |
|
"eval_steps_per_second": 6.498, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 27.38, |
|
"learning_rate": 4.63035019455253e-06, |
|
"loss": 1.4452, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 27.38, |
|
"eval_cer": 0.2566424519852722, |
|
"eval_loss": 1.1812487840652466, |
|
"eval_runtime": 42.5285, |
|
"eval_samples_per_second": 52.224, |
|
"eval_steps_per_second": 6.537, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 27.74, |
|
"learning_rate": 3.981841763942931e-06, |
|
"loss": 1.4513, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 27.74, |
|
"eval_cer": 0.26286197631605135, |
|
"eval_loss": 1.1672886610031128, |
|
"eval_runtime": 43.0029, |
|
"eval_samples_per_second": 51.648, |
|
"eval_steps_per_second": 6.465, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 28.11, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 1.4561, |
|
"step": 23100 |
|
}, |
|
{ |
|
"epoch": 28.11, |
|
"eval_cer": 0.26181709622848043, |
|
"eval_loss": 1.1963270902633667, |
|
"eval_runtime": 42.7473, |
|
"eval_samples_per_second": 51.957, |
|
"eval_steps_per_second": 6.503, |
|
"step": 23100 |
|
}, |
|
{ |
|
"epoch": 28.47, |
|
"learning_rate": 2.6848249027237355e-06, |
|
"loss": 1.4357, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 28.47, |
|
"eval_cer": 0.26913125684147676, |
|
"eval_loss": 1.201293706893921, |
|
"eval_runtime": 42.6759, |
|
"eval_samples_per_second": 52.043, |
|
"eval_steps_per_second": 6.514, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 28.84, |
|
"learning_rate": 2.0363164721141376e-06, |
|
"loss": 1.4427, |
|
"step": 23700 |
|
}, |
|
{ |
|
"epoch": 28.84, |
|
"eval_cer": 0.2726141904667131, |
|
"eval_loss": 1.2448346614837646, |
|
"eval_runtime": 42.4761, |
|
"eval_samples_per_second": 52.288, |
|
"eval_steps_per_second": 6.545, |
|
"step": 23700 |
|
}, |
|
{ |
|
"epoch": 29.2, |
|
"learning_rate": 1.3899697362732382e-06, |
|
"loss": 1.4171, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 29.2, |
|
"eval_cer": 0.26684247188775, |
|
"eval_loss": 1.2063277959823608, |
|
"eval_runtime": 42.7033, |
|
"eval_samples_per_second": 52.01, |
|
"eval_steps_per_second": 6.51, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 29.57, |
|
"learning_rate": 7.414613056636403e-07, |
|
"loss": 1.4639, |
|
"step": 24300 |
|
}, |
|
{ |
|
"epoch": 29.57, |
|
"eval_cer": 0.26694198427704247, |
|
"eval_loss": 1.2228556871414185, |
|
"eval_runtime": 42.5785, |
|
"eval_samples_per_second": 52.162, |
|
"eval_steps_per_second": 6.529, |
|
"step": 24300 |
|
}, |
|
{ |
|
"epoch": 29.93, |
|
"learning_rate": 9.295287505404236e-08, |
|
"loss": 1.4234, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 29.93, |
|
"eval_cer": 0.2594785550801075, |
|
"eval_loss": 1.1955249309539795, |
|
"eval_runtime": 43.02, |
|
"eval_samples_per_second": 51.627, |
|
"eval_steps_per_second": 6.462, |
|
"step": 24600 |
|
} |
|
], |
|
"logging_steps": 300, |
|
"max_steps": 24630, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 30, |
|
"save_steps": 600, |
|
"total_flos": 3.9184197928838064e+20, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|