|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 10.0, |
|
"eval_steps": 500, |
|
"global_step": 2500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1914, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5e-05, |
|
"loss": 1.798, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.6067341772151899, |
|
"eval_loss": 1.8213441371917725, |
|
"eval_runtime": 4.8116, |
|
"eval_samples_per_second": 103.916, |
|
"eval_steps_per_second": 13.093, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_exact_match": 25.4, |
|
"eval_f1": 35.50992875545509, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 5e-05, |
|
"loss": 1.7534, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 5e-05, |
|
"loss": 1.7163, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 5e-05, |
|
"loss": 1.7, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.6077215189873417, |
|
"eval_loss": 1.8046445846557617, |
|
"eval_runtime": 5.1228, |
|
"eval_samples_per_second": 97.603, |
|
"eval_steps_per_second": 12.298, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_exact_match": 25.8, |
|
"eval_f1": 35.76454372424963, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 5e-05, |
|
"loss": 1.5854, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 5e-05, |
|
"loss": 1.5869, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.6071392405063291, |
|
"eval_loss": 1.829284906387329, |
|
"eval_runtime": 4.5692, |
|
"eval_samples_per_second": 109.429, |
|
"eval_steps_per_second": 13.788, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_exact_match": 24.4, |
|
"eval_f1": 34.46831890331891, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 5e-05, |
|
"loss": 1.5039, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 5e-05, |
|
"loss": 1.4165, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 5e-05, |
|
"loss": 1.4349, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.6042784810126582, |
|
"eval_loss": 1.89736008644104, |
|
"eval_runtime": 4.5698, |
|
"eval_samples_per_second": 109.415, |
|
"eval_steps_per_second": 13.786, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_exact_match": 22.8, |
|
"eval_f1": 32.991320346320364, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2624, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3111, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.601493670886076, |
|
"eval_loss": 1.9769378900527954, |
|
"eval_runtime": 4.7168, |
|
"eval_samples_per_second": 106.005, |
|
"eval_steps_per_second": 13.357, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_exact_match": 22.6, |
|
"eval_f1": 32.46891774891776, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"learning_rate": 5e-05, |
|
"loss": 1.226, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"learning_rate": 5e-05, |
|
"loss": 1.1608, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 5e-05, |
|
"loss": 1.197, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.599240506329114, |
|
"eval_loss": 2.0634872913360596, |
|
"eval_runtime": 4.4047, |
|
"eval_samples_per_second": 113.515, |
|
"eval_steps_per_second": 14.303, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_exact_match": 21.4, |
|
"eval_f1": 29.995800865800874, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0336, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0729, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.5975443037974684, |
|
"eval_loss": 2.1523237228393555, |
|
"eval_runtime": 4.4025, |
|
"eval_samples_per_second": 113.571, |
|
"eval_steps_per_second": 14.31, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_exact_match": 20.8, |
|
"eval_f1": 29.054994058229354, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0005, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 7.6, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9437, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9833, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.5947341772151898, |
|
"eval_loss": 2.2639756202697754, |
|
"eval_runtime": 4.7031, |
|
"eval_samples_per_second": 106.312, |
|
"eval_steps_per_second": 13.395, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_exact_match": 20.8, |
|
"eval_f1": 28.71228732052262, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 8.4, |
|
"learning_rate": 5e-05, |
|
"loss": 0.823, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 8.8, |
|
"learning_rate": 5e-05, |
|
"loss": 0.8672, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.5924050632911393, |
|
"eval_loss": 2.3642578125, |
|
"eval_runtime": 4.4051, |
|
"eval_samples_per_second": 113.505, |
|
"eval_steps_per_second": 14.302, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_exact_match": 22.4, |
|
"eval_f1": 30.14020120686362, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 9.2, |
|
"learning_rate": 5e-05, |
|
"loss": 0.8032, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"learning_rate": 5e-05, |
|
"loss": 0.7566, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 5e-05, |
|
"loss": 0.7883, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.5908101265822785, |
|
"eval_loss": 2.4598007202148438, |
|
"eval_runtime": 4.5544, |
|
"eval_samples_per_second": 109.783, |
|
"eval_steps_per_second": 13.833, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_exact_match": 21.4, |
|
"eval_f1": 29.271013039248334, |
|
"step": 2500 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 12500, |
|
"num_train_epochs": 50, |
|
"save_steps": 500, |
|
"total_flos": 1.5979216471955866e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|