|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 15.0, |
|
"global_step": 64050, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 9.82612411254273e-05, |
|
"loss": 41.6673, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_cer": 1.0, |
|
"eval_loss": 3.6662509441375732, |
|
"eval_new_wer": 1.0, |
|
"eval_old_wer": 1.0, |
|
"eval_runtime": 8.1005, |
|
"eval_samples_per_second": 25.924, |
|
"eval_steps_per_second": 3.333, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 9.124375493031818e-05, |
|
"loss": 3.3366, |
|
"step": 8540 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_cer": 1.0, |
|
"eval_loss": 3.467925548553467, |
|
"eval_new_wer": 1.0, |
|
"eval_old_wer": 1.0, |
|
"eval_runtime": 8.5159, |
|
"eval_samples_per_second": 24.66, |
|
"eval_steps_per_second": 3.171, |
|
"step": 8540 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 8.422626873520904e-05, |
|
"loss": 3.2357, |
|
"step": 12810 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_cer": 1.0, |
|
"eval_loss": 3.2729125022888184, |
|
"eval_new_wer": 1.0, |
|
"eval_old_wer": 1.0, |
|
"eval_runtime": 7.8374, |
|
"eval_samples_per_second": 26.794, |
|
"eval_steps_per_second": 3.445, |
|
"step": 12810 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 7.721042597948987e-05, |
|
"loss": 3.0695, |
|
"step": 17080 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_cer": 0.9965367965367965, |
|
"eval_loss": 3.2063045501708984, |
|
"eval_new_wer": 0.9952380952380953, |
|
"eval_old_wer": 1.0, |
|
"eval_runtime": 8.0285, |
|
"eval_samples_per_second": 26.157, |
|
"eval_steps_per_second": 3.363, |
|
"step": 17080 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 7.019622666316067e-05, |
|
"loss": 2.9729, |
|
"step": 21350 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_cer": 0.987012987012987, |
|
"eval_loss": 3.182396411895752, |
|
"eval_new_wer": 0.9809523809523809, |
|
"eval_old_wer": 1.0, |
|
"eval_runtime": 8.1314, |
|
"eval_samples_per_second": 25.826, |
|
"eval_steps_per_second": 3.32, |
|
"step": 21350 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 6.318202734683146e-05, |
|
"loss": 2.9161, |
|
"step": 25620 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_cer": 0.9766233766233766, |
|
"eval_loss": 3.1507506370544434, |
|
"eval_new_wer": 0.9619047619047619, |
|
"eval_old_wer": 1.0, |
|
"eval_runtime": 8.2738, |
|
"eval_samples_per_second": 25.381, |
|
"eval_steps_per_second": 3.263, |
|
"step": 25620 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 5.616782803050224e-05, |
|
"loss": 2.8792, |
|
"step": 29890 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_cer": 0.9688311688311688, |
|
"eval_loss": 3.118381977081299, |
|
"eval_new_wer": 0.9476190476190476, |
|
"eval_old_wer": 1.0, |
|
"eval_runtime": 8.1799, |
|
"eval_samples_per_second": 25.673, |
|
"eval_steps_per_second": 3.301, |
|
"step": 29890 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 4.9153628714173024e-05, |
|
"loss": 2.8532, |
|
"step": 34160 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_cer": 0.9679653679653679, |
|
"eval_loss": 3.119230031967163, |
|
"eval_new_wer": 0.9476190476190476, |
|
"eval_old_wer": 1.0, |
|
"eval_runtime": 8.122, |
|
"eval_samples_per_second": 25.856, |
|
"eval_steps_per_second": 3.324, |
|
"step": 34160 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 4.213942939784381e-05, |
|
"loss": 2.8319, |
|
"step": 38430 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_cer": 0.9679653679653679, |
|
"eval_loss": 3.1226253509521484, |
|
"eval_new_wer": 0.9476190476190476, |
|
"eval_old_wer": 1.0, |
|
"eval_runtime": 8.203, |
|
"eval_samples_per_second": 25.6, |
|
"eval_steps_per_second": 3.291, |
|
"step": 38430 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 3.512358664212464e-05, |
|
"loss": 2.8189, |
|
"step": 42700 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_cer": 0.9636363636363636, |
|
"eval_loss": 3.1021082401275635, |
|
"eval_new_wer": 0.9428571428571428, |
|
"eval_old_wer": 1.0, |
|
"eval_runtime": 8.2497, |
|
"eval_samples_per_second": 25.455, |
|
"eval_steps_per_second": 3.273, |
|
"step": 42700 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 2.8107743886405467e-05, |
|
"loss": 2.8081, |
|
"step": 46970 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_cer": 0.9584415584415584, |
|
"eval_loss": 3.1021568775177, |
|
"eval_new_wer": 0.9333333333333333, |
|
"eval_old_wer": 1.0, |
|
"eval_runtime": 8.1922, |
|
"eval_samples_per_second": 25.634, |
|
"eval_steps_per_second": 3.296, |
|
"step": 46970 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 2.1095188009466215e-05, |
|
"loss": 2.8027, |
|
"step": 51240 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_cer": 0.9575757575757575, |
|
"eval_loss": 3.1064510345458984, |
|
"eval_new_wer": 0.9333333333333333, |
|
"eval_old_wer": 1.0, |
|
"eval_runtime": 8.1815, |
|
"eval_samples_per_second": 25.668, |
|
"eval_steps_per_second": 3.3, |
|
"step": 51240 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"learning_rate": 1.4080988693136998e-05, |
|
"loss": 2.7939, |
|
"step": 55510 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_cer": 0.9575757575757575, |
|
"eval_loss": 3.102414608001709, |
|
"eval_new_wer": 0.9333333333333333, |
|
"eval_old_wer": 1.0, |
|
"eval_runtime": 9.239, |
|
"eval_samples_per_second": 22.73, |
|
"eval_steps_per_second": 2.922, |
|
"step": 55510 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 7.066789376807783e-06, |
|
"loss": 2.7917, |
|
"step": 59780 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_cer": 0.9584415584415584, |
|
"eval_loss": 3.100308895111084, |
|
"eval_new_wer": 0.9333333333333333, |
|
"eval_old_wer": 1.0, |
|
"eval_runtime": 9.2123, |
|
"eval_samples_per_second": 22.796, |
|
"eval_steps_per_second": 2.931, |
|
"step": 59780 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 5.094662108861425e-08, |
|
"loss": 2.7903, |
|
"step": 64050 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_cer": 0.9575757575757575, |
|
"eval_loss": 3.104132890701294, |
|
"eval_new_wer": 0.9333333333333333, |
|
"eval_old_wer": 1.0, |
|
"eval_runtime": 8.8255, |
|
"eval_samples_per_second": 23.795, |
|
"eval_steps_per_second": 3.059, |
|
"step": 64050 |
|
} |
|
], |
|
"max_steps": 64050, |
|
"num_train_epochs": 15, |
|
"total_flos": 5.076674134512955e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|