|
{ |
|
"best_metric": 0.16926079988479614, |
|
"best_model_checkpoint": "./checkpoint-2000", |
|
"epoch": 28.97666068222621, |
|
"global_step": 2000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 1.0202e-05, |
|
"loss": 6.1347, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 2.0102e-05, |
|
"loss": 3.041, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 4.34, |
|
"learning_rate": 3.0002000000000002e-05, |
|
"loss": 1.8943, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 5.79, |
|
"learning_rate": 3.9902e-05, |
|
"loss": 1.473, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 7.24, |
|
"learning_rate": 4.9802e-05, |
|
"loss": 1.255, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 7.24, |
|
"eval_cer": 0.07577137076378351, |
|
"eval_loss": 0.29775065183639526, |
|
"eval_runtime": 14.4182, |
|
"eval_samples_per_second": 23.234, |
|
"eval_steps_per_second": 0.416, |
|
"eval_wer": 0.42935206869633097, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 8.69, |
|
"learning_rate": 5e-05, |
|
"loss": 1.1289, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 10.14, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0964, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 11.59, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0491, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 13.04, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0342, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 14.49, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0058, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 14.49, |
|
"eval_cer": 0.04825493171471927, |
|
"eval_loss": 0.18830506503582, |
|
"eval_runtime": 13.5348, |
|
"eval_samples_per_second": 24.751, |
|
"eval_steps_per_second": 0.443, |
|
"eval_wer": 0.28376268540202965, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 15.93, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9735, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 17.39, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9772, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 18.83, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9625, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 20.29, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9492, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 21.73, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9371, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 21.73, |
|
"eval_cer": 0.045725847243297923, |
|
"eval_loss": 0.1812673956155777, |
|
"eval_runtime": 13.3792, |
|
"eval_samples_per_second": 25.039, |
|
"eval_steps_per_second": 0.448, |
|
"eval_wer": 0.26268540202966434, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 23.19, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9325, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 24.63, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9169, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 26.09, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9168, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 27.53, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9111, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 28.98, |
|
"learning_rate": 5e-05, |
|
"loss": 0.8999, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 28.98, |
|
"eval_cer": 0.042943854324734444, |
|
"eval_loss": 0.16926079988479614, |
|
"eval_runtime": 13.6397, |
|
"eval_samples_per_second": 24.561, |
|
"eval_steps_per_second": 0.44, |
|
"eval_wer": 0.2373145979703357, |
|
"step": 2000 |
|
} |
|
], |
|
"max_steps": 5000, |
|
"num_train_epochs": 73, |
|
"total_flos": 1.8112953812816295e+20, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|