med_v1_F02 / last-checkpoint /trainer_state.json
mprzibilla's picture
Training in progress, epoch 15
d5a3068
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 15.0,
"global_step": 4830,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"learning_rate": 9.83874482458052e-05,
"loss": 22.5987,
"step": 322
},
{
"epoch": 1.0,
"eval_cer": 1.0,
"eval_loss": 3.259235143661499,
"eval_new_wer": 1.0,
"eval_old_wer": 1.0,
"eval_runtime": 8.6332,
"eval_samples_per_second": 24.325,
"eval_steps_per_second": 3.127,
"step": 322
},
{
"epoch": 2.0,
"learning_rate": 9.13706689910656e-05,
"loss": 3.2658,
"step": 644
},
{
"epoch": 2.0,
"eval_cer": 1.0,
"eval_loss": 3.177554130554199,
"eval_new_wer": 1.0,
"eval_old_wer": 1.0,
"eval_runtime": 8.5905,
"eval_samples_per_second": 24.446,
"eval_steps_per_second": 3.143,
"step": 644
},
{
"epoch": 3.0,
"learning_rate": 8.4353889736326e-05,
"loss": 3.0366,
"step": 966
},
{
"epoch": 3.0,
"eval_cer": 1.0,
"eval_loss": 3.093811511993408,
"eval_new_wer": 1.0,
"eval_old_wer": 1.0,
"eval_runtime": 9.0154,
"eval_samples_per_second": 23.293,
"eval_steps_per_second": 2.995,
"step": 966
},
{
"epoch": 4.0,
"learning_rate": 7.73371104815864e-05,
"loss": 2.635,
"step": 1288
},
{
"epoch": 4.0,
"eval_cer": 1.0,
"eval_loss": 3.0094852447509766,
"eval_new_wer": 1.0,
"eval_old_wer": 1.0,
"eval_runtime": 8.5468,
"eval_samples_per_second": 24.571,
"eval_steps_per_second": 3.159,
"step": 1288
},
{
"epoch": 5.0,
"learning_rate": 7.032033122684682e-05,
"loss": 2.0662,
"step": 1610
},
{
"epoch": 5.0,
"eval_cer": 0.9774891774891775,
"eval_loss": 2.8005685806274414,
"eval_new_wer": 0.9952380952380953,
"eval_old_wer": 1.0,
"eval_runtime": 8.9981,
"eval_samples_per_second": 23.338,
"eval_steps_per_second": 3.001,
"step": 1610
},
{
"epoch": 6.0,
"learning_rate": 6.330355197210721e-05,
"loss": 1.5835,
"step": 1932
},
{
"epoch": 6.0,
"eval_cer": 0.7350649350649351,
"eval_loss": 2.7513883113861084,
"eval_new_wer": 0.5238095238095238,
"eval_old_wer": 1.0,
"eval_runtime": 8.5668,
"eval_samples_per_second": 24.513,
"eval_steps_per_second": 3.152,
"step": 1932
},
{
"epoch": 7.0,
"learning_rate": 5.628677271736762e-05,
"loss": 1.204,
"step": 2254
},
{
"epoch": 7.0,
"eval_cer": 0.7177489177489178,
"eval_loss": 3.089792013168335,
"eval_new_wer": 0.5666666666666667,
"eval_old_wer": 1.0,
"eval_runtime": 8.7621,
"eval_samples_per_second": 23.967,
"eval_steps_per_second": 3.081,
"step": 2254
},
{
"epoch": 8.0,
"learning_rate": 4.9269993462628025e-05,
"loss": 0.8525,
"step": 2576
},
{
"epoch": 8.0,
"eval_cer": 0.625974025974026,
"eval_loss": 3.1094284057617188,
"eval_new_wer": 0.35714285714285715,
"eval_old_wer": 1.0,
"eval_runtime": 8.5847,
"eval_samples_per_second": 24.462,
"eval_steps_per_second": 3.145,
"step": 2576
},
{
"epoch": 9.0,
"learning_rate": 4.225321420788843e-05,
"loss": 0.6166,
"step": 2898
},
{
"epoch": 9.0,
"eval_cer": 0.6337662337662338,
"eval_loss": 3.5997140407562256,
"eval_new_wer": 0.35714285714285715,
"eval_old_wer": 1.0,
"eval_runtime": 8.5651,
"eval_samples_per_second": 24.518,
"eval_steps_per_second": 3.152,
"step": 2898
},
{
"epoch": 10.0,
"learning_rate": 3.5236434953148835e-05,
"loss": 0.4461,
"step": 3220
},
{
"epoch": 10.0,
"eval_cer": 0.6562770562770562,
"eval_loss": 3.8030993938446045,
"eval_new_wer": 0.3523809523809524,
"eval_old_wer": 0.9952380952380953,
"eval_runtime": 8.652,
"eval_samples_per_second": 24.272,
"eval_steps_per_second": 3.121,
"step": 3220
},
{
"epoch": 11.0,
"learning_rate": 2.8219655698409243e-05,
"loss": 0.3257,
"step": 3542
},
{
"epoch": 11.0,
"eval_cer": 0.6060606060606061,
"eval_loss": 4.010402202606201,
"eval_new_wer": 0.3238095238095238,
"eval_old_wer": 0.9857142857142858,
"eval_runtime": 9.2292,
"eval_samples_per_second": 22.754,
"eval_steps_per_second": 2.925,
"step": 3542
},
{
"epoch": 12.0,
"learning_rate": 2.1202876443669645e-05,
"loss": 0.254,
"step": 3864
},
{
"epoch": 12.0,
"eval_cer": 0.6164502164502165,
"eval_loss": 3.9872653484344482,
"eval_new_wer": 0.3238095238095238,
"eval_old_wer": 0.9857142857142858,
"eval_runtime": 8.5518,
"eval_samples_per_second": 24.556,
"eval_steps_per_second": 3.157,
"step": 3864
},
{
"epoch": 13.0,
"learning_rate": 1.418609718893005e-05,
"loss": 0.1988,
"step": 4186
},
{
"epoch": 13.0,
"eval_cer": 0.5922077922077922,
"eval_loss": 3.9551496505737305,
"eval_new_wer": 0.2619047619047619,
"eval_old_wer": 0.9714285714285714,
"eval_runtime": 8.0772,
"eval_samples_per_second": 25.999,
"eval_steps_per_second": 3.343,
"step": 4186
},
{
"epoch": 14.0,
"learning_rate": 7.1693179341904555e-06,
"loss": 0.1663,
"step": 4508
},
{
"epoch": 14.0,
"eval_cer": 0.6095238095238096,
"eval_loss": 4.294742107391357,
"eval_new_wer": 0.2904761904761905,
"eval_old_wer": 0.9809523809523809,
"eval_runtime": 8.1055,
"eval_samples_per_second": 25.908,
"eval_steps_per_second": 3.331,
"step": 4508
},
{
"epoch": 15.0,
"learning_rate": 1.5253867945086075e-07,
"loss": 0.1398,
"step": 4830
},
{
"epoch": 15.0,
"eval_cer": 0.6207792207792208,
"eval_loss": 4.198614120483398,
"eval_new_wer": 0.319047619047619,
"eval_old_wer": 0.9904761904761905,
"eval_runtime": 8.1622,
"eval_samples_per_second": 25.728,
"eval_steps_per_second": 3.308,
"step": 4830
}
],
"max_steps": 4830,
"num_train_epochs": 15,
"total_flos": 3.586056332009567e+18,
"trial_name": null,
"trial_params": null
}