|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 4.0, |
|
"eval_steps": 500, |
|
"global_step": 750, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 3e-05, |
|
"loss": 2.0369, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.6128205128205129, |
|
"eval_loss": 1.2953312397003174, |
|
"eval_runtime": 4.1114, |
|
"eval_samples_per_second": 121.614, |
|
"eval_steps_per_second": 15.323, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_exact_match": 27.2, |
|
"eval_f1": 37.44218938149973, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3e-05, |
|
"loss": 1.3264, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 3e-05, |
|
"loss": 1.2821, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.6146153846153846, |
|
"eval_loss": 1.2741013765335083, |
|
"eval_runtime": 4.8302, |
|
"eval_samples_per_second": 103.516, |
|
"eval_steps_per_second": 13.043, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_exact_match": 25.6, |
|
"eval_f1": 38.026666666666664, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 3e-05, |
|
"loss": 1.2348, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 3e-05, |
|
"loss": 1.1987, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.6161538461538462, |
|
"eval_loss": 1.2714948654174805, |
|
"eval_runtime": 4.868, |
|
"eval_samples_per_second": 102.712, |
|
"eval_steps_per_second": 12.942, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_exact_match": 28.0, |
|
"eval_f1": 40.6962091503268, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 3e-05, |
|
"loss": 1.1537, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 3e-05, |
|
"loss": 1.066, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.6151282051282051, |
|
"eval_loss": 1.3011157512664795, |
|
"eval_runtime": 4.407, |
|
"eval_samples_per_second": 113.455, |
|
"eval_steps_per_second": 14.295, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_exact_match": 26.2, |
|
"eval_f1": 38.19190476190477, |
|
"step": 750 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 9350, |
|
"num_train_epochs": 50, |
|
"save_steps": 500, |
|
"total_flos": 2.7990414006943744e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|