|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 15.0, |
|
"global_step": 4830, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 9.83874482458052e-05, |
|
"loss": 22.5987, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_cer": 1.0, |
|
"eval_loss": 3.259235143661499, |
|
"eval_new_wer": 1.0, |
|
"eval_old_wer": 1.0, |
|
"eval_runtime": 8.6332, |
|
"eval_samples_per_second": 24.325, |
|
"eval_steps_per_second": 3.127, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 9.13706689910656e-05, |
|
"loss": 3.2658, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_cer": 1.0, |
|
"eval_loss": 3.177554130554199, |
|
"eval_new_wer": 1.0, |
|
"eval_old_wer": 1.0, |
|
"eval_runtime": 8.5905, |
|
"eval_samples_per_second": 24.446, |
|
"eval_steps_per_second": 3.143, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 8.4353889736326e-05, |
|
"loss": 3.0366, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_cer": 1.0, |
|
"eval_loss": 3.093811511993408, |
|
"eval_new_wer": 1.0, |
|
"eval_old_wer": 1.0, |
|
"eval_runtime": 9.0154, |
|
"eval_samples_per_second": 23.293, |
|
"eval_steps_per_second": 2.995, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 7.73371104815864e-05, |
|
"loss": 2.635, |
|
"step": 1288 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_cer": 1.0, |
|
"eval_loss": 3.0094852447509766, |
|
"eval_new_wer": 1.0, |
|
"eval_old_wer": 1.0, |
|
"eval_runtime": 8.5468, |
|
"eval_samples_per_second": 24.571, |
|
"eval_steps_per_second": 3.159, |
|
"step": 1288 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 7.032033122684682e-05, |
|
"loss": 2.0662, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_cer": 0.9774891774891775, |
|
"eval_loss": 2.8005685806274414, |
|
"eval_new_wer": 0.9952380952380953, |
|
"eval_old_wer": 1.0, |
|
"eval_runtime": 8.9981, |
|
"eval_samples_per_second": 23.338, |
|
"eval_steps_per_second": 3.001, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 6.330355197210721e-05, |
|
"loss": 1.5835, |
|
"step": 1932 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_cer": 0.7350649350649351, |
|
"eval_loss": 2.7513883113861084, |
|
"eval_new_wer": 0.5238095238095238, |
|
"eval_old_wer": 1.0, |
|
"eval_runtime": 8.5668, |
|
"eval_samples_per_second": 24.513, |
|
"eval_steps_per_second": 3.152, |
|
"step": 1932 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 5.628677271736762e-05, |
|
"loss": 1.204, |
|
"step": 2254 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_cer": 0.7177489177489178, |
|
"eval_loss": 3.089792013168335, |
|
"eval_new_wer": 0.5666666666666667, |
|
"eval_old_wer": 1.0, |
|
"eval_runtime": 8.7621, |
|
"eval_samples_per_second": 23.967, |
|
"eval_steps_per_second": 3.081, |
|
"step": 2254 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 4.9269993462628025e-05, |
|
"loss": 0.8525, |
|
"step": 2576 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_cer": 0.625974025974026, |
|
"eval_loss": 3.1094284057617188, |
|
"eval_new_wer": 0.35714285714285715, |
|
"eval_old_wer": 1.0, |
|
"eval_runtime": 8.5847, |
|
"eval_samples_per_second": 24.462, |
|
"eval_steps_per_second": 3.145, |
|
"step": 2576 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 4.225321420788843e-05, |
|
"loss": 0.6166, |
|
"step": 2898 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_cer": 0.6337662337662338, |
|
"eval_loss": 3.5997140407562256, |
|
"eval_new_wer": 0.35714285714285715, |
|
"eval_old_wer": 1.0, |
|
"eval_runtime": 8.5651, |
|
"eval_samples_per_second": 24.518, |
|
"eval_steps_per_second": 3.152, |
|
"step": 2898 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 3.5236434953148835e-05, |
|
"loss": 0.4461, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_cer": 0.6562770562770562, |
|
"eval_loss": 3.8030993938446045, |
|
"eval_new_wer": 0.3523809523809524, |
|
"eval_old_wer": 0.9952380952380953, |
|
"eval_runtime": 8.652, |
|
"eval_samples_per_second": 24.272, |
|
"eval_steps_per_second": 3.121, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 2.8219655698409243e-05, |
|
"loss": 0.3257, |
|
"step": 3542 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_cer": 0.6060606060606061, |
|
"eval_loss": 4.010402202606201, |
|
"eval_new_wer": 0.3238095238095238, |
|
"eval_old_wer": 0.9857142857142858, |
|
"eval_runtime": 9.2292, |
|
"eval_samples_per_second": 22.754, |
|
"eval_steps_per_second": 2.925, |
|
"step": 3542 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 2.1202876443669645e-05, |
|
"loss": 0.254, |
|
"step": 3864 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_cer": 0.6164502164502165, |
|
"eval_loss": 3.9872653484344482, |
|
"eval_new_wer": 0.3238095238095238, |
|
"eval_old_wer": 0.9857142857142858, |
|
"eval_runtime": 8.5518, |
|
"eval_samples_per_second": 24.556, |
|
"eval_steps_per_second": 3.157, |
|
"step": 3864 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"learning_rate": 1.418609718893005e-05, |
|
"loss": 0.1988, |
|
"step": 4186 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_cer": 0.5922077922077922, |
|
"eval_loss": 3.9551496505737305, |
|
"eval_new_wer": 0.2619047619047619, |
|
"eval_old_wer": 0.9714285714285714, |
|
"eval_runtime": 8.0772, |
|
"eval_samples_per_second": 25.999, |
|
"eval_steps_per_second": 3.343, |
|
"step": 4186 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 7.1693179341904555e-06, |
|
"loss": 0.1663, |
|
"step": 4508 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_cer": 0.6095238095238096, |
|
"eval_loss": 4.294742107391357, |
|
"eval_new_wer": 0.2904761904761905, |
|
"eval_old_wer": 0.9809523809523809, |
|
"eval_runtime": 8.1055, |
|
"eval_samples_per_second": 25.908, |
|
"eval_steps_per_second": 3.331, |
|
"step": 4508 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 1.5253867945086075e-07, |
|
"loss": 0.1398, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_cer": 0.6207792207792208, |
|
"eval_loss": 4.198614120483398, |
|
"eval_new_wer": 0.319047619047619, |
|
"eval_old_wer": 0.9904761904761905, |
|
"eval_runtime": 8.1622, |
|
"eval_samples_per_second": 25.728, |
|
"eval_steps_per_second": 3.308, |
|
"step": 4830 |
|
} |
|
], |
|
"max_steps": 4830, |
|
"num_train_epochs": 15, |
|
"total_flos": 3.586056332009567e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|