|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 11.680164252309797, |
|
"eval_steps": 300, |
|
"global_step": 9600, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 5.96e-05, |
|
"loss": 14.3929, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_cer": 0.9869306169517587, |
|
"eval_loss": 7.0630784034729, |
|
"eval_runtime": 45.4918, |
|
"eval_samples_per_second": 48.822, |
|
"eval_steps_per_second": 6.111, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 9.895209580838324e-05, |
|
"loss": 8.6544, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_cer": 0.8301460695752451, |
|
"eval_loss": 5.885794639587402, |
|
"eval_runtime": 39.1068, |
|
"eval_samples_per_second": 56.793, |
|
"eval_steps_per_second": 7.109, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 9.575491873396066e-05, |
|
"loss": 4.2963, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"eval_cer": 0.5212857966557756, |
|
"eval_loss": 2.0850701332092285, |
|
"eval_runtime": 39.3849, |
|
"eval_samples_per_second": 56.392, |
|
"eval_steps_per_second": 7.059, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 9.256843455945254e-05, |
|
"loss": 2.416, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"eval_cer": 0.42461080146069574, |
|
"eval_loss": 1.6146297454833984, |
|
"eval_runtime": 39.2918, |
|
"eval_samples_per_second": 56.526, |
|
"eval_steps_per_second": 7.075, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 8.936056458511549e-05, |
|
"loss": 2.0879, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"eval_cer": 0.38155871612531234, |
|
"eval_loss": 1.3970342874526978, |
|
"eval_runtime": 39.3238, |
|
"eval_samples_per_second": 56.48, |
|
"eval_steps_per_second": 7.07, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 8.615269461077845e-05, |
|
"loss": 1.8739, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"eval_cer": 0.3361522198731501, |
|
"eval_loss": 1.259849190711975, |
|
"eval_runtime": 39.8822, |
|
"eval_samples_per_second": 55.689, |
|
"eval_steps_per_second": 6.971, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 8.29448246364414e-05, |
|
"loss": 1.7464, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"eval_cer": 0.3469632904093792, |
|
"eval_loss": 1.1771619319915771, |
|
"eval_runtime": 39.4164, |
|
"eval_samples_per_second": 56.347, |
|
"eval_steps_per_second": 7.053, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 7.973695466210437e-05, |
|
"loss": 1.7025, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"eval_cer": 0.31635594849125503, |
|
"eval_loss": 1.1029127836227417, |
|
"eval_runtime": 39.436, |
|
"eval_samples_per_second": 56.319, |
|
"eval_steps_per_second": 7.049, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"learning_rate": 7.652908468776732e-05, |
|
"loss": 1.5982, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"eval_cer": 0.3095329617528349, |
|
"eval_loss": 1.049662709236145, |
|
"eval_runtime": 39.3386, |
|
"eval_samples_per_second": 56.459, |
|
"eval_steps_per_second": 7.067, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 7.332121471343029e-05, |
|
"loss": 1.5632, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"eval_cer": 0.30030751489525276, |
|
"eval_loss": 1.013907790184021, |
|
"eval_runtime": 39.4665, |
|
"eval_samples_per_second": 56.276, |
|
"eval_steps_per_second": 7.044, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 7.011334473909324e-05, |
|
"loss": 1.5063, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"eval_cer": 0.28858350951374206, |
|
"eval_loss": 0.9488086104393005, |
|
"eval_runtime": 39.4805, |
|
"eval_samples_per_second": 56.256, |
|
"eval_steps_per_second": 7.041, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"learning_rate": 6.69054747647562e-05, |
|
"loss": 1.4507, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"eval_cer": 0.272583125120123, |
|
"eval_loss": 0.9192214012145996, |
|
"eval_runtime": 39.3363, |
|
"eval_samples_per_second": 56.462, |
|
"eval_steps_per_second": 7.067, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"learning_rate": 6.369760479041916e-05, |
|
"loss": 1.4029, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"eval_cer": 0.2764751105131655, |
|
"eval_loss": 0.9218717217445374, |
|
"eval_runtime": 39.3534, |
|
"eval_samples_per_second": 56.437, |
|
"eval_steps_per_second": 7.064, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 5.11, |
|
"learning_rate": 6.050042771599658e-05, |
|
"loss": 1.3126, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 5.11, |
|
"eval_cer": 0.27450509321545263, |
|
"eval_loss": 0.9048557281494141, |
|
"eval_runtime": 39.4768, |
|
"eval_samples_per_second": 56.261, |
|
"eval_steps_per_second": 7.042, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 5.48, |
|
"learning_rate": 5.729255774165954e-05, |
|
"loss": 1.3053, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 5.48, |
|
"eval_cer": 0.2652315971554872, |
|
"eval_loss": 0.8531870245933533, |
|
"eval_runtime": 39.4442, |
|
"eval_samples_per_second": 56.307, |
|
"eval_steps_per_second": 7.048, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 5.84, |
|
"learning_rate": 5.40846877673225e-05, |
|
"loss": 1.2989, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 5.84, |
|
"eval_cer": 0.24903901595233519, |
|
"eval_loss": 0.8212350010871887, |
|
"eval_runtime": 39.4345, |
|
"eval_samples_per_second": 56.321, |
|
"eval_steps_per_second": 7.05, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 6.21, |
|
"learning_rate": 5.087681779298546e-05, |
|
"loss": 1.2403, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 6.21, |
|
"eval_cer": 0.26124351335767826, |
|
"eval_loss": 0.8196715116500854, |
|
"eval_runtime": 39.3565, |
|
"eval_samples_per_second": 56.433, |
|
"eval_steps_per_second": 7.064, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 6.57, |
|
"learning_rate": 4.766894781864842e-05, |
|
"loss": 1.1903, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 6.57, |
|
"eval_cer": 0.2607630213338459, |
|
"eval_loss": 0.8173399567604065, |
|
"eval_runtime": 39.5209, |
|
"eval_samples_per_second": 56.198, |
|
"eval_steps_per_second": 7.034, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 6.94, |
|
"learning_rate": 4.446107784431138e-05, |
|
"loss": 1.2313, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 6.94, |
|
"eval_cer": 0.24995195079761676, |
|
"eval_loss": 0.8241677284240723, |
|
"eval_runtime": 39.4127, |
|
"eval_samples_per_second": 56.352, |
|
"eval_steps_per_second": 7.054, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 7.3, |
|
"learning_rate": 4.125320786997434e-05, |
|
"loss": 1.1554, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 7.3, |
|
"eval_cer": 0.24529117816644244, |
|
"eval_loss": 0.7795117497444153, |
|
"eval_runtime": 39.3516, |
|
"eval_samples_per_second": 56.44, |
|
"eval_steps_per_second": 7.065, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 7.67, |
|
"learning_rate": 3.80453378956373e-05, |
|
"loss": 1.1243, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 7.67, |
|
"eval_cer": 0.2526427061310782, |
|
"eval_loss": 0.782616913318634, |
|
"eval_runtime": 39.456, |
|
"eval_samples_per_second": 56.291, |
|
"eval_steps_per_second": 7.046, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 8.03, |
|
"learning_rate": 3.483746792130026e-05, |
|
"loss": 1.099, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 8.03, |
|
"eval_cer": 0.23010763021333847, |
|
"eval_loss": 0.7462431192398071, |
|
"eval_runtime": 39.3849, |
|
"eval_samples_per_second": 56.392, |
|
"eval_steps_per_second": 7.059, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 8.4, |
|
"learning_rate": 3.1629597946963216e-05, |
|
"loss": 1.0777, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 8.4, |
|
"eval_cer": 0.22544685758216412, |
|
"eval_loss": 0.7633857131004333, |
|
"eval_runtime": 39.2653, |
|
"eval_samples_per_second": 56.564, |
|
"eval_steps_per_second": 7.08, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 8.76, |
|
"learning_rate": 2.8421727972626178e-05, |
|
"loss": 1.0901, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 8.76, |
|
"eval_cer": 0.23880453584470498, |
|
"eval_loss": 0.7462579607963562, |
|
"eval_runtime": 39.2693, |
|
"eval_samples_per_second": 56.558, |
|
"eval_steps_per_second": 7.079, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 9.13, |
|
"learning_rate": 2.5224550898203592e-05, |
|
"loss": 1.0049, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 9.13, |
|
"eval_cer": 0.22155487218912165, |
|
"eval_loss": 0.7342504858970642, |
|
"eval_runtime": 39.3738, |
|
"eval_samples_per_second": 56.408, |
|
"eval_steps_per_second": 7.061, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 9.49, |
|
"learning_rate": 2.2016680923866555e-05, |
|
"loss": 1.0011, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 9.49, |
|
"eval_cer": 0.22674418604651161, |
|
"eval_loss": 0.7101256251335144, |
|
"eval_runtime": 39.3531, |
|
"eval_samples_per_second": 56.438, |
|
"eval_steps_per_second": 7.064, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 9.86, |
|
"learning_rate": 1.8808810949529513e-05, |
|
"loss": 1.0084, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 9.86, |
|
"eval_cer": 0.22189121660580435, |
|
"eval_loss": 0.698137640953064, |
|
"eval_runtime": 39.3187, |
|
"eval_samples_per_second": 56.487, |
|
"eval_steps_per_second": 7.07, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 10.22, |
|
"learning_rate": 1.5600940975192476e-05, |
|
"loss": 0.9547, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 10.22, |
|
"eval_cer": 0.22222756102248703, |
|
"eval_loss": 0.7049764394760132, |
|
"eval_runtime": 39.2625, |
|
"eval_samples_per_second": 56.568, |
|
"eval_steps_per_second": 7.081, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 10.59, |
|
"learning_rate": 1.2393071000855433e-05, |
|
"loss": 0.9304, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 10.59, |
|
"eval_cer": 0.2266961368441284, |
|
"eval_loss": 0.6863571405410767, |
|
"eval_runtime": 39.4555, |
|
"eval_samples_per_second": 56.291, |
|
"eval_steps_per_second": 7.046, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 10.95, |
|
"learning_rate": 9.185201026518392e-06, |
|
"loss": 0.9044, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 10.95, |
|
"eval_cer": 0.2206419373438401, |
|
"eval_loss": 0.6961002945899963, |
|
"eval_runtime": 39.4203, |
|
"eval_samples_per_second": 56.341, |
|
"eval_steps_per_second": 7.052, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 11.32, |
|
"learning_rate": 5.977331052181352e-06, |
|
"loss": 0.9054, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 11.32, |
|
"eval_cer": 0.21146453968864118, |
|
"eval_loss": 0.6892764568328857, |
|
"eval_runtime": 39.4357, |
|
"eval_samples_per_second": 56.32, |
|
"eval_steps_per_second": 7.049, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 11.68, |
|
"learning_rate": 2.7694610778443115e-06, |
|
"loss": 0.9067, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 11.68, |
|
"eval_cer": 0.21718239477224677, |
|
"eval_loss": 0.682004451751709, |
|
"eval_runtime": 39.4317, |
|
"eval_samples_per_second": 56.325, |
|
"eval_steps_per_second": 7.05, |
|
"step": 9600 |
|
} |
|
], |
|
"logging_steps": 300, |
|
"max_steps": 9852, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 12, |
|
"save_steps": 600, |
|
"total_flos": 1.5092741663533305e+20, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|