|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 11.999599519423308, |
|
"global_step": 7488, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 9.836092715231788e-05, |
|
"loss": 7.1097, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_cer": 0.9965361196010141, |
|
"eval_loss": 3.3989405632019043, |
|
"eval_runtime": 296.7149, |
|
"eval_samples_per_second": 12.952, |
|
"eval_steps_per_second": 1.621, |
|
"eval_wer": 1.0, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 9.339403973509933e-05, |
|
"loss": 3.0235, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_cer": 0.31626033596322883, |
|
"eval_loss": 1.3183486461639404, |
|
"eval_runtime": 296.8438, |
|
"eval_samples_per_second": 12.946, |
|
"eval_steps_per_second": 1.62, |
|
"eval_wer": 0.7976831955321887, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 8.84271523178808e-05, |
|
"loss": 1.1419, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"eval_cer": 0.19131423697490937, |
|
"eval_loss": 0.6415870785713196, |
|
"eval_runtime": 297.4006, |
|
"eval_samples_per_second": 12.922, |
|
"eval_steps_per_second": 1.617, |
|
"eval_wer": 0.5543314644734832, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 8.346026490066225e-05, |
|
"loss": 0.8242, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"eval_cer": 0.16081218755182788, |
|
"eval_loss": 0.5063228011131287, |
|
"eval_runtime": 296.5772, |
|
"eval_samples_per_second": 12.958, |
|
"eval_steps_per_second": 1.622, |
|
"eval_wer": 0.4804206025436855, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 7.683774834437087e-05, |
|
"loss": 0.6876, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"eval_cer": 0.1387068495747151, |
|
"eval_loss": 0.44008567929267883, |
|
"eval_runtime": 296.3457, |
|
"eval_samples_per_second": 12.968, |
|
"eval_steps_per_second": 1.623, |
|
"eval_wer": 0.42799799297541397, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 7.021523178807947e-05, |
|
"loss": 0.5868, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"eval_cer": 0.12487975928163575, |
|
"eval_loss": 0.39397352933883667, |
|
"eval_runtime": 295.9984, |
|
"eval_samples_per_second": 12.983, |
|
"eval_steps_per_second": 1.625, |
|
"eval_wer": 0.3906716987717882, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 6.359271523178809e-05, |
|
"loss": 0.5285, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"eval_cer": 0.1200464377946786, |
|
"eval_loss": 0.36609867215156555, |
|
"eval_runtime": 296.3002, |
|
"eval_samples_per_second": 12.97, |
|
"eval_steps_per_second": 1.623, |
|
"eval_wer": 0.3762952943999651, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"learning_rate": 6.433863885839737e-05, |
|
"loss": 0.5, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"eval_cer": 0.11358305494349279, |
|
"eval_loss": 0.35275039076805115, |
|
"eval_runtime": 294.0242, |
|
"eval_samples_per_second": 13.07, |
|
"eval_steps_per_second": 1.636, |
|
"eval_wer": 0.36098082418900934, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 5.13, |
|
"learning_rate": 5.885016465422613e-05, |
|
"loss": 0.4538, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 5.13, |
|
"eval_cer": 0.10862653114412302, |
|
"eval_loss": 0.34030598402023315, |
|
"eval_runtime": 295.0902, |
|
"eval_samples_per_second": 13.023, |
|
"eval_steps_per_second": 1.63, |
|
"eval_wer": 0.34854599794934443, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 5.77, |
|
"learning_rate": 5.336169045005489e-05, |
|
"loss": 0.4165, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 5.77, |
|
"eval_cer": 0.1061861776482574, |
|
"eval_loss": 0.33348363637924194, |
|
"eval_runtime": 294.4649, |
|
"eval_samples_per_second": 13.051, |
|
"eval_steps_per_second": 1.633, |
|
"eval_wer": 0.3438774842383124, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 6.41, |
|
"learning_rate": 4.787321624588365e-05, |
|
"loss": 0.3989, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 6.41, |
|
"eval_cer": 0.1035941905371147, |
|
"eval_loss": 0.3264155387878418, |
|
"eval_runtime": 294.7693, |
|
"eval_samples_per_second": 13.037, |
|
"eval_steps_per_second": 1.632, |
|
"eval_wer": 0.3339950697004734, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"learning_rate": 4.2384742041712404e-05, |
|
"loss": 0.3679, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"eval_cer": 0.10127703935366171, |
|
"eval_loss": 0.3256165385246277, |
|
"eval_runtime": 293.9503, |
|
"eval_samples_per_second": 13.074, |
|
"eval_steps_per_second": 1.636, |
|
"eval_wer": 0.3287375379044045, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 7.69, |
|
"learning_rate": 3.689626783754117e-05, |
|
"loss": 0.3517, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 7.69, |
|
"eval_cer": 0.10020612694576729, |
|
"eval_loss": 0.3212122619152069, |
|
"eval_runtime": 293.3063, |
|
"eval_samples_per_second": 13.102, |
|
"eval_steps_per_second": 1.64, |
|
"eval_wer": 0.32230196993826216, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 8.33, |
|
"learning_rate": 3.142151481888035e-05, |
|
"loss": 0.3357, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 8.33, |
|
"eval_cer": 0.09861871253583529, |
|
"eval_loss": 0.317330539226532, |
|
"eval_runtime": 293.5785, |
|
"eval_samples_per_second": 13.09, |
|
"eval_steps_per_second": 1.638, |
|
"eval_wer": 0.3196404808132813, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 8.97, |
|
"learning_rate": 2.5933040614709113e-05, |
|
"loss": 0.3225, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 8.97, |
|
"eval_cer": 0.09852868007676452, |
|
"eval_loss": 0.31416377425193787, |
|
"eval_runtime": 294.148, |
|
"eval_samples_per_second": 13.065, |
|
"eval_steps_per_second": 1.635, |
|
"eval_wer": 0.31769890268112305, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 9.62, |
|
"learning_rate": 2.044456641053787e-05, |
|
"loss": 0.3057, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 9.62, |
|
"eval_cer": 0.09754780012794086, |
|
"eval_loss": 0.31993794441223145, |
|
"eval_runtime": 293.1881, |
|
"eval_samples_per_second": 13.108, |
|
"eval_steps_per_second": 1.641, |
|
"eval_wer": 0.3155828006719169, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 10.26, |
|
"learning_rate": 1.4956092206366632e-05, |
|
"loss": 0.2972, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 10.26, |
|
"eval_cer": 0.09668064539057503, |
|
"eval_loss": 0.3139244616031647, |
|
"eval_runtime": 292.3994, |
|
"eval_samples_per_second": 13.143, |
|
"eval_steps_per_second": 1.645, |
|
"eval_wer": 0.3127686031545191, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 10.9, |
|
"learning_rate": 9.46761800219539e-06, |
|
"loss": 0.2881, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 10.9, |
|
"eval_cer": 0.09570450399222878, |
|
"eval_loss": 0.31835824251174927, |
|
"eval_runtime": 293.4248, |
|
"eval_samples_per_second": 13.097, |
|
"eval_steps_per_second": 1.639, |
|
"eval_wer": 0.31065250114531295, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 11.54, |
|
"learning_rate": 3.979143798024149e-06, |
|
"loss": 0.2791, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 11.54, |
|
"eval_cer": 0.09584192195607363, |
|
"eval_loss": 0.3184186816215515, |
|
"eval_runtime": 293.2834, |
|
"eval_samples_per_second": 13.103, |
|
"eval_steps_per_second": 1.64, |
|
"eval_wer": 0.31036889984511007, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"step": 7488, |
|
"total_flos": 5.018412278085267e+19, |
|
"train_loss": 0.24150397533025497, |
|
"train_runtime": 25397.5173, |
|
"train_samples_per_second": 18.876, |
|
"train_steps_per_second": 0.295 |
|
} |
|
], |
|
"max_steps": 7488, |
|
"num_train_epochs": 12, |
|
"total_flos": 5.018412278085267e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|