|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 15.0, |
|
"global_step": 64050, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 9.826288456481726e-05, |
|
"loss": 37.9017, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_cer": 1.0, |
|
"eval_loss": 3.8080713748931885, |
|
"eval_new_wer": 1.0, |
|
"eval_old_wer": 1.0, |
|
"eval_runtime": 7.7466, |
|
"eval_samples_per_second": 27.109, |
|
"eval_steps_per_second": 3.485, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 9.124539836970814e-05, |
|
"loss": 3.3383, |
|
"step": 8540 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_cer": 1.0, |
|
"eval_loss": 3.6899821758270264, |
|
"eval_new_wer": 1.0, |
|
"eval_old_wer": 1.0, |
|
"eval_runtime": 7.6904, |
|
"eval_samples_per_second": 27.307, |
|
"eval_steps_per_second": 3.511, |
|
"step": 8540 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 8.4227912174599e-05, |
|
"loss": 3.2564, |
|
"step": 12810 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_cer": 0.9982683982683983, |
|
"eval_loss": 3.388317823410034, |
|
"eval_new_wer": 1.0, |
|
"eval_old_wer": 1.0, |
|
"eval_runtime": 7.7217, |
|
"eval_samples_per_second": 27.196, |
|
"eval_steps_per_second": 3.497, |
|
"step": 12810 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 7.721042597948987e-05, |
|
"loss": 3.104, |
|
"step": 17080 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_cer": 0.9939393939393939, |
|
"eval_loss": 3.202686071395874, |
|
"eval_new_wer": 1.0, |
|
"eval_old_wer": 1.0, |
|
"eval_runtime": 7.9831, |
|
"eval_samples_per_second": 26.306, |
|
"eval_steps_per_second": 3.382, |
|
"step": 17080 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 7.019622666316067e-05, |
|
"loss": 2.997, |
|
"step": 21350 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_cer": 0.9904761904761905, |
|
"eval_loss": 3.177474021911621, |
|
"eval_new_wer": 0.9904761904761905, |
|
"eval_old_wer": 1.0, |
|
"eval_runtime": 7.7653, |
|
"eval_samples_per_second": 27.043, |
|
"eval_steps_per_second": 3.477, |
|
"step": 21350 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 6.318202734683146e-05, |
|
"loss": 2.9391, |
|
"step": 25620 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_cer": 0.9861471861471861, |
|
"eval_loss": 3.1568691730499268, |
|
"eval_new_wer": 0.9809523809523809, |
|
"eval_old_wer": 1.0, |
|
"eval_runtime": 7.7615, |
|
"eval_samples_per_second": 27.057, |
|
"eval_steps_per_second": 3.479, |
|
"step": 25620 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 5.616782803050224e-05, |
|
"loss": 2.8989, |
|
"step": 29890 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_cer": 0.9766233766233766, |
|
"eval_loss": 3.164285182952881, |
|
"eval_new_wer": 0.9714285714285714, |
|
"eval_old_wer": 1.0, |
|
"eval_runtime": 7.8832, |
|
"eval_samples_per_second": 26.639, |
|
"eval_steps_per_second": 3.425, |
|
"step": 29890 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 4.9153628714173024e-05, |
|
"loss": 2.8731, |
|
"step": 34160 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_cer": 0.9670995670995671, |
|
"eval_loss": 3.1436803340911865, |
|
"eval_new_wer": 0.9523809523809523, |
|
"eval_old_wer": 1.0, |
|
"eval_runtime": 7.7748, |
|
"eval_samples_per_second": 27.01, |
|
"eval_steps_per_second": 3.473, |
|
"step": 34160 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 4.213778595845386e-05, |
|
"loss": 2.8549, |
|
"step": 38430 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_cer": 0.9645021645021645, |
|
"eval_loss": 3.1454625129699707, |
|
"eval_new_wer": 0.9428571428571428, |
|
"eval_old_wer": 1.0, |
|
"eval_runtime": 7.8309, |
|
"eval_samples_per_second": 26.817, |
|
"eval_steps_per_second": 3.448, |
|
"step": 38430 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 3.512358664212464e-05, |
|
"loss": 2.8407, |
|
"step": 42700 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_cer": 0.9627705627705627, |
|
"eval_loss": 3.1407470703125, |
|
"eval_new_wer": 0.9380952380952381, |
|
"eval_old_wer": 1.0, |
|
"eval_runtime": 7.787, |
|
"eval_samples_per_second": 26.968, |
|
"eval_steps_per_second": 3.467, |
|
"step": 42700 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 2.8107743886405467e-05, |
|
"loss": 2.8301, |
|
"step": 46970 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_cer": 0.9601731601731601, |
|
"eval_loss": 3.141080141067505, |
|
"eval_new_wer": 0.9333333333333333, |
|
"eval_old_wer": 1.0, |
|
"eval_runtime": 7.8547, |
|
"eval_samples_per_second": 26.735, |
|
"eval_steps_per_second": 3.437, |
|
"step": 46970 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 2.1093544570076258e-05, |
|
"loss": 2.8215, |
|
"step": 51240 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_cer": 0.9601731601731601, |
|
"eval_loss": 3.1361286640167236, |
|
"eval_new_wer": 0.9380952380952381, |
|
"eval_old_wer": 1.0, |
|
"eval_runtime": 7.7971, |
|
"eval_samples_per_second": 26.933, |
|
"eval_steps_per_second": 3.463, |
|
"step": 51240 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"learning_rate": 1.4079345253747043e-05, |
|
"loss": 2.8178, |
|
"step": 55510 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_cer": 0.9567099567099567, |
|
"eval_loss": 3.1265242099761963, |
|
"eval_new_wer": 0.9285714285714286, |
|
"eval_old_wer": 1.0, |
|
"eval_runtime": 7.8518, |
|
"eval_samples_per_second": 26.746, |
|
"eval_steps_per_second": 3.439, |
|
"step": 55510 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 7.065145937417829e-06, |
|
"loss": 2.816, |
|
"step": 59780 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_cer": 0.9575757575757575, |
|
"eval_loss": 3.1279547214508057, |
|
"eval_new_wer": 0.9238095238095239, |
|
"eval_old_wer": 1.0, |
|
"eval_runtime": 8.6197, |
|
"eval_samples_per_second": 24.363, |
|
"eval_steps_per_second": 3.132, |
|
"step": 59780 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 4.930318169865895e-08, |
|
"loss": 2.8106, |
|
"step": 64050 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_cer": 0.9593073593073593, |
|
"eval_loss": 3.131213426589966, |
|
"eval_new_wer": 0.9333333333333333, |
|
"eval_old_wer": 1.0, |
|
"eval_runtime": 8.5832, |
|
"eval_samples_per_second": 24.466, |
|
"eval_steps_per_second": 3.146, |
|
"step": 64050 |
|
} |
|
], |
|
"max_steps": 64050, |
|
"num_train_epochs": 15, |
|
"total_flos": 5.079400634281409e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|