|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 100.0, |
|
"global_step": 2200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 4.55, |
|
"learning_rate": 3.4299999999999993e-05, |
|
"loss": 9.7952, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"learning_rate": 6.929999999999999e-05, |
|
"loss": 3.6097, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 13.64, |
|
"learning_rate": 6.657e-05, |
|
"loss": 3.0981, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 18.18, |
|
"learning_rate": 6.306999999999999e-05, |
|
"loss": 3.0036, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 22.73, |
|
"learning_rate": 5.9569999999999994e-05, |
|
"loss": 2.8617, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 22.73, |
|
"eval_loss": 2.626376152038574, |
|
"eval_runtime": 13.7667, |
|
"eval_samples_per_second": 21.864, |
|
"eval_steps_per_second": 0.726, |
|
"eval_wer": 1.0012568077084205, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 27.27, |
|
"learning_rate": 5.607e-05, |
|
"loss": 2.4115, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 31.82, |
|
"learning_rate": 5.256999999999999e-05, |
|
"loss": 1.8202, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 36.36, |
|
"learning_rate": 4.906999999999999e-05, |
|
"loss": 1.5225, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 40.91, |
|
"learning_rate": 4.557e-05, |
|
"loss": 1.3681, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 45.45, |
|
"learning_rate": 4.2069999999999995e-05, |
|
"loss": 1.2716, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 45.45, |
|
"eval_loss": 0.6217501163482666, |
|
"eval_runtime": 13.948, |
|
"eval_samples_per_second": 21.58, |
|
"eval_steps_per_second": 0.717, |
|
"eval_wer": 0.6941767909509845, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"learning_rate": 3.857e-05, |
|
"loss": 1.2079, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 54.55, |
|
"learning_rate": 3.5069999999999995e-05, |
|
"loss": 1.154, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 59.09, |
|
"learning_rate": 3.157e-05, |
|
"loss": 1.1124, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 63.64, |
|
"learning_rate": 2.807e-05, |
|
"loss": 1.087, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 68.18, |
|
"learning_rate": 2.4569999999999997e-05, |
|
"loss": 1.049, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 68.18, |
|
"eval_loss": 0.544226348400116, |
|
"eval_runtime": 13.3563, |
|
"eval_samples_per_second": 22.536, |
|
"eval_steps_per_second": 0.749, |
|
"eval_wer": 0.6367825722664432, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 72.73, |
|
"learning_rate": 2.1069999999999996e-05, |
|
"loss": 1.023, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 77.27, |
|
"learning_rate": 1.757e-05, |
|
"loss": 1.0046, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 81.82, |
|
"learning_rate": 1.4069999999999999e-05, |
|
"loss": 0.9863, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 86.36, |
|
"learning_rate": 1.0569999999999999e-05, |
|
"loss": 0.9772, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 90.91, |
|
"learning_rate": 7.07e-06, |
|
"loss": 0.9632, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 90.91, |
|
"eval_loss": 0.5364273190498352, |
|
"eval_runtime": 13.6559, |
|
"eval_samples_per_second": 22.042, |
|
"eval_steps_per_second": 0.732, |
|
"eval_wer": 0.6242144951822372, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 95.45, |
|
"learning_rate": 3.5699999999999993e-06, |
|
"loss": 0.9443, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"learning_rate": 6.999999999999999e-08, |
|
"loss": 0.9485, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"step": 2200, |
|
"total_flos": 1.4967759850398351e+19, |
|
"train_loss": 1.9645296894420277, |
|
"train_runtime": 5564.2506, |
|
"train_samples_per_second": 12.652, |
|
"train_steps_per_second": 0.395 |
|
} |
|
], |
|
"max_steps": 2200, |
|
"num_train_epochs": 100, |
|
"total_flos": 1.4967759850398351e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|