|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 4.9504950495049505, |
|
"global_step": 2000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.6674917491749177e-05, |
|
"loss": 1.9722, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 2.093264102935791, |
|
"eval_runtime": 1.5252, |
|
"eval_samples_per_second": 1433.916, |
|
"eval_steps_per_second": 45.24, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.3349834983498351e-05, |
|
"loss": 1.9372, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 1.9881993532180786, |
|
"eval_runtime": 1.5777, |
|
"eval_samples_per_second": 1386.229, |
|
"eval_steps_per_second": 43.736, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 1.0024752475247525e-05, |
|
"loss": 1.9278, |
|
"step": 1209 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 2.013523578643799, |
|
"eval_runtime": 1.6453, |
|
"eval_samples_per_second": 1329.222, |
|
"eval_steps_per_second": 41.937, |
|
"step": 1212 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"learning_rate": 6.6996699669967e-06, |
|
"loss": 1.9295, |
|
"step": 1612 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 1.9506467580795288, |
|
"eval_runtime": 1.7203, |
|
"eval_samples_per_second": 1271.257, |
|
"eval_steps_per_second": 40.108, |
|
"step": 1616 |
|
} |
|
], |
|
"max_steps": 2424, |
|
"num_train_epochs": 6, |
|
"total_flos": 2119255202506752.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|