|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 7.0, |
|
"eval_steps": 500, |
|
"global_step": 3703, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6355, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 5e-05, |
|
"loss": 1.4037, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 5e-05, |
|
"loss": 1.4183, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3881, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3891, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.6137948717948718, |
|
"eval_loss": 1.3015451431274414, |
|
"eval_runtime": 4.1821, |
|
"eval_samples_per_second": 119.557, |
|
"eval_steps_per_second": 15.064, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_exact_match": 23.2, |
|
"eval_f1": 35.17014759120022, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3808, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 5e-05, |
|
"loss": 1.348, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3627, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3473, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3633, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.6165641025641025, |
|
"eval_loss": 1.2854759693145752, |
|
"eval_runtime": 4.1064, |
|
"eval_samples_per_second": 121.761, |
|
"eval_steps_per_second": 15.342, |
|
"step": 1058 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_exact_match": 25.8, |
|
"eval_f1": 37.509047619047635, |
|
"step": 1058 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3358, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2917, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2948, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3144, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2929, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.6176923076923077, |
|
"eval_loss": 1.2953603267669678, |
|
"eval_runtime": 4.0992, |
|
"eval_samples_per_second": 121.974, |
|
"eval_steps_per_second": 15.369, |
|
"step": 1587 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_exact_match": 25.0, |
|
"eval_f1": 37.08253968253969, |
|
"step": 1587 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2906, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2216, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2459, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2474, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2518, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2361, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.6045128205128205, |
|
"eval_loss": 1.3488503694534302, |
|
"eval_runtime": 4.2015, |
|
"eval_samples_per_second": 119.004, |
|
"eval_steps_per_second": 14.995, |
|
"step": 2116 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_exact_match": 25.0, |
|
"eval_f1": 36.82126984126986, |
|
"step": 2116 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"learning_rate": 5e-05, |
|
"loss": 1.1856, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 5e-05, |
|
"loss": 1.1556, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"learning_rate": 5e-05, |
|
"loss": 1.1828, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"learning_rate": 5e-05, |
|
"loss": 1.1687, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 4.91, |
|
"learning_rate": 5e-05, |
|
"loss": 1.1856, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.6125128205128205, |
|
"eval_loss": 1.3968186378479004, |
|
"eval_runtime": 4.3081, |
|
"eval_samples_per_second": 116.06, |
|
"eval_steps_per_second": 14.624, |
|
"step": 2645 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_exact_match": 24.6, |
|
"eval_f1": 37.079523809523806, |
|
"step": 2645 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"learning_rate": 5e-05, |
|
"loss": 1.1376, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 5.29, |
|
"learning_rate": 5e-05, |
|
"loss": 1.1079, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 5.48, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0954, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 5.67, |
|
"learning_rate": 5e-05, |
|
"loss": 1.1317, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 5.86, |
|
"learning_rate": 5e-05, |
|
"loss": 1.1098, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.6114871794871795, |
|
"eval_loss": 1.4720605611801147, |
|
"eval_runtime": 4.5, |
|
"eval_samples_per_second": 111.11, |
|
"eval_steps_per_second": 14.0, |
|
"step": 3174 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_exact_match": 24.4, |
|
"eval_f1": 35.78015873015873, |
|
"step": 3174 |
|
}, |
|
{ |
|
"epoch": 6.05, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0945, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 6.24, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0309, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 6.43, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0389, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 6.62, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0547, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 6.81, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0618, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0753, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.6076410256410256, |
|
"eval_loss": 1.5797967910766602, |
|
"eval_runtime": 4.8128, |
|
"eval_samples_per_second": 103.891, |
|
"eval_steps_per_second": 13.09, |
|
"step": 3703 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_exact_match": 21.8, |
|
"eval_f1": 33.15111111111111, |
|
"step": 3703 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 26450, |
|
"num_train_epochs": 50, |
|
"save_steps": 500, |
|
"total_flos": 6.255076307642614e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|