|
{ |
|
"best_metric": 0.27719950675964355, |
|
"best_model_checkpoint": "./xls-r-300m-nyanja-model_v1/checkpoint-1000", |
|
"epoch": 5.0, |
|
"global_step": 1580, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.9000000000000005e-05, |
|
"loss": 9.53, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 9.900000000000001e-05, |
|
"loss": 3.1099, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.000149, |
|
"loss": 2.7322, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 0.000199, |
|
"loss": 1.2227, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 0.000249, |
|
"loss": 0.7585, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"eval_loss": 0.35743284225463867, |
|
"eval_runtime": 290.0599, |
|
"eval_samples_per_second": 5.806, |
|
"eval_steps_per_second": 0.727, |
|
"eval_wer": 0.9679334916864608, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 0.000299, |
|
"loss": 0.6269, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 0.00034899999999999997, |
|
"loss": 0.5478, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 0.00039900000000000005, |
|
"loss": 0.5208, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 0.000449, |
|
"loss": 0.5211, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 0.000499, |
|
"loss": 0.4736, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"eval_loss": 0.27719950675964355, |
|
"eval_runtime": 292.8589, |
|
"eval_samples_per_second": 5.75, |
|
"eval_steps_per_second": 0.72, |
|
"eval_wer": 0.9073634204275535, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 0.000549, |
|
"loss": 0.4533, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 0.000599, |
|
"loss": 0.4563, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"learning_rate": 0.0006490000000000001, |
|
"loss": 0.4883, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"learning_rate": 0.000699, |
|
"loss": 0.465, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"learning_rate": 0.000749, |
|
"loss": 0.4776, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"eval_loss": 0.28532925248146057, |
|
"eval_runtime": 291.534, |
|
"eval_samples_per_second": 5.776, |
|
"eval_steps_per_second": 0.724, |
|
"eval_wer": 0.9578384798099763, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 1580, |
|
"total_flos": 7.433221301093351e+18, |
|
"train_loss": 1.4388394899006132, |
|
"train_runtime": 7577.7535, |
|
"train_samples_per_second": 3.334, |
|
"train_steps_per_second": 0.209 |
|
} |
|
], |
|
"max_steps": 1580, |
|
"num_train_epochs": 5, |
|
"total_flos": 7.433221301093351e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|