large_base_M07 / last-checkpoint /trainer_state.json
mprzibilla's picture
Training in progress, epoch 15
8ca518a
raw
history blame
6.69 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 15.0,
"global_step": 64050,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"learning_rate": 9.82612411254273e-05,
"loss": 41.6673,
"step": 4270
},
{
"epoch": 1.0,
"eval_cer": 1.0,
"eval_loss": 3.6662509441375732,
"eval_new_wer": 1.0,
"eval_old_wer": 1.0,
"eval_runtime": 8.1005,
"eval_samples_per_second": 25.924,
"eval_steps_per_second": 3.333,
"step": 4270
},
{
"epoch": 2.0,
"learning_rate": 9.124375493031818e-05,
"loss": 3.3366,
"step": 8540
},
{
"epoch": 2.0,
"eval_cer": 1.0,
"eval_loss": 3.467925548553467,
"eval_new_wer": 1.0,
"eval_old_wer": 1.0,
"eval_runtime": 8.5159,
"eval_samples_per_second": 24.66,
"eval_steps_per_second": 3.171,
"step": 8540
},
{
"epoch": 3.0,
"learning_rate": 8.422626873520904e-05,
"loss": 3.2357,
"step": 12810
},
{
"epoch": 3.0,
"eval_cer": 1.0,
"eval_loss": 3.2729125022888184,
"eval_new_wer": 1.0,
"eval_old_wer": 1.0,
"eval_runtime": 7.8374,
"eval_samples_per_second": 26.794,
"eval_steps_per_second": 3.445,
"step": 12810
},
{
"epoch": 4.0,
"learning_rate": 7.721042597948987e-05,
"loss": 3.0695,
"step": 17080
},
{
"epoch": 4.0,
"eval_cer": 0.9965367965367965,
"eval_loss": 3.2063045501708984,
"eval_new_wer": 0.9952380952380953,
"eval_old_wer": 1.0,
"eval_runtime": 8.0285,
"eval_samples_per_second": 26.157,
"eval_steps_per_second": 3.363,
"step": 17080
},
{
"epoch": 5.0,
"learning_rate": 7.019622666316067e-05,
"loss": 2.9729,
"step": 21350
},
{
"epoch": 5.0,
"eval_cer": 0.987012987012987,
"eval_loss": 3.182396411895752,
"eval_new_wer": 0.9809523809523809,
"eval_old_wer": 1.0,
"eval_runtime": 8.1314,
"eval_samples_per_second": 25.826,
"eval_steps_per_second": 3.32,
"step": 21350
},
{
"epoch": 6.0,
"learning_rate": 6.318202734683146e-05,
"loss": 2.9161,
"step": 25620
},
{
"epoch": 6.0,
"eval_cer": 0.9766233766233766,
"eval_loss": 3.1507506370544434,
"eval_new_wer": 0.9619047619047619,
"eval_old_wer": 1.0,
"eval_runtime": 8.2738,
"eval_samples_per_second": 25.381,
"eval_steps_per_second": 3.263,
"step": 25620
},
{
"epoch": 7.0,
"learning_rate": 5.616782803050224e-05,
"loss": 2.8792,
"step": 29890
},
{
"epoch": 7.0,
"eval_cer": 0.9688311688311688,
"eval_loss": 3.118381977081299,
"eval_new_wer": 0.9476190476190476,
"eval_old_wer": 1.0,
"eval_runtime": 8.1799,
"eval_samples_per_second": 25.673,
"eval_steps_per_second": 3.301,
"step": 29890
},
{
"epoch": 8.0,
"learning_rate": 4.9153628714173024e-05,
"loss": 2.8532,
"step": 34160
},
{
"epoch": 8.0,
"eval_cer": 0.9679653679653679,
"eval_loss": 3.119230031967163,
"eval_new_wer": 0.9476190476190476,
"eval_old_wer": 1.0,
"eval_runtime": 8.122,
"eval_samples_per_second": 25.856,
"eval_steps_per_second": 3.324,
"step": 34160
},
{
"epoch": 9.0,
"learning_rate": 4.213942939784381e-05,
"loss": 2.8319,
"step": 38430
},
{
"epoch": 9.0,
"eval_cer": 0.9679653679653679,
"eval_loss": 3.1226253509521484,
"eval_new_wer": 0.9476190476190476,
"eval_old_wer": 1.0,
"eval_runtime": 8.203,
"eval_samples_per_second": 25.6,
"eval_steps_per_second": 3.291,
"step": 38430
},
{
"epoch": 10.0,
"learning_rate": 3.512358664212464e-05,
"loss": 2.8189,
"step": 42700
},
{
"epoch": 10.0,
"eval_cer": 0.9636363636363636,
"eval_loss": 3.1021082401275635,
"eval_new_wer": 0.9428571428571428,
"eval_old_wer": 1.0,
"eval_runtime": 8.2497,
"eval_samples_per_second": 25.455,
"eval_steps_per_second": 3.273,
"step": 42700
},
{
"epoch": 11.0,
"learning_rate": 2.8107743886405467e-05,
"loss": 2.8081,
"step": 46970
},
{
"epoch": 11.0,
"eval_cer": 0.9584415584415584,
"eval_loss": 3.1021568775177,
"eval_new_wer": 0.9333333333333333,
"eval_old_wer": 1.0,
"eval_runtime": 8.1922,
"eval_samples_per_second": 25.634,
"eval_steps_per_second": 3.296,
"step": 46970
},
{
"epoch": 12.0,
"learning_rate": 2.1095188009466215e-05,
"loss": 2.8027,
"step": 51240
},
{
"epoch": 12.0,
"eval_cer": 0.9575757575757575,
"eval_loss": 3.1064510345458984,
"eval_new_wer": 0.9333333333333333,
"eval_old_wer": 1.0,
"eval_runtime": 8.1815,
"eval_samples_per_second": 25.668,
"eval_steps_per_second": 3.3,
"step": 51240
},
{
"epoch": 13.0,
"learning_rate": 1.4080988693136998e-05,
"loss": 2.7939,
"step": 55510
},
{
"epoch": 13.0,
"eval_cer": 0.9575757575757575,
"eval_loss": 3.102414608001709,
"eval_new_wer": 0.9333333333333333,
"eval_old_wer": 1.0,
"eval_runtime": 9.239,
"eval_samples_per_second": 22.73,
"eval_steps_per_second": 2.922,
"step": 55510
},
{
"epoch": 14.0,
"learning_rate": 7.066789376807783e-06,
"loss": 2.7917,
"step": 59780
},
{
"epoch": 14.0,
"eval_cer": 0.9584415584415584,
"eval_loss": 3.100308895111084,
"eval_new_wer": 0.9333333333333333,
"eval_old_wer": 1.0,
"eval_runtime": 9.2123,
"eval_samples_per_second": 22.796,
"eval_steps_per_second": 2.931,
"step": 59780
},
{
"epoch": 15.0,
"learning_rate": 5.094662108861425e-08,
"loss": 2.7903,
"step": 64050
},
{
"epoch": 15.0,
"eval_cer": 0.9575757575757575,
"eval_loss": 3.104132890701294,
"eval_new_wer": 0.9333333333333333,
"eval_old_wer": 1.0,
"eval_runtime": 8.8255,
"eval_samples_per_second": 23.795,
"eval_steps_per_second": 3.059,
"step": 64050
}
],
"max_steps": 64050,
"num_train_epochs": 15,
"total_flos": 5.076674134512955e+19,
"trial_name": null,
"trial_params": null
}