|
{ |
|
"best_metric": 0.5592061877250671, |
|
"best_model_checkpoint": "output/checkpoint-200", |
|
"epoch": 0.201765447667087, |
|
"eval_steps": 50, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.6666666666666667e-06, |
|
"loss": 0.5465, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 7.5e-05, |
|
"loss": 0.7053, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_accuracy": 0.6561679790026247, |
|
"eval_loss": 0.6764907836914062, |
|
"eval_runtime": 59.7766, |
|
"eval_samples_per_second": 6.374, |
|
"eval_steps_per_second": 1.606, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00015666666666666666, |
|
"loss": 0.6576, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_accuracy": 0.7217847769028871, |
|
"eval_loss": 0.574826717376709, |
|
"eval_runtime": 59.8708, |
|
"eval_samples_per_second": 6.364, |
|
"eval_steps_per_second": 1.603, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00023999999999999998, |
|
"loss": 0.5874, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_accuracy": 0.7480314960629921, |
|
"eval_loss": 0.5501317381858826, |
|
"eval_runtime": 55.7487, |
|
"eval_samples_per_second": 6.834, |
|
"eval_steps_per_second": 1.722, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0002948780487804878, |
|
"loss": 0.5775, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_accuracy": 0.7401574803149606, |
|
"eval_loss": 0.5592061877250671, |
|
"eval_runtime": 54.7265, |
|
"eval_samples_per_second": 6.962, |
|
"eval_steps_per_second": 1.754, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 1000, |
|
"num_train_epochs": 2, |
|
"save_steps": 100, |
|
"total_flos": 3.444343869136896e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|