|
{ |
|
"best_metric": 74.36995381137226, |
|
"best_model_checkpoint": "/home/jcanete/ft-data/all_results/tar/albeto_base_6/epochs_2_bs_16_lr_5e-5/checkpoint-8700", |
|
"epoch": 2.0, |
|
"global_step": 10970, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05, |
|
"eval_exact_match": 40.719016083254495, |
|
"eval_f1": 58.77945049839472, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.773473108477667e-05, |
|
"loss": 2.5693, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_exact_match": 45.44938505203406, |
|
"eval_f1": 63.936960472857905, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_exact_match": 48.42005676442763, |
|
"eval_f1": 66.7942849285666, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.545578851412944e-05, |
|
"loss": 1.9654, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_exact_match": 49.725638599810786, |
|
"eval_f1": 68.05815355034528, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.3176845943482223e-05, |
|
"loss": 1.8622, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_exact_match": 50.37842951750237, |
|
"eval_f1": 68.12485071659977, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_exact_match": 51.59886471144749, |
|
"eval_f1": 69.3844758904365, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.0897903372835004e-05, |
|
"loss": 1.8064, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_exact_match": 52.92336802270577, |
|
"eval_f1": 71.05206739179368, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_exact_match": 52.535477767265846, |
|
"eval_f1": 70.11029061313148, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.8618960802187785e-05, |
|
"loss": 1.7133, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_exact_match": 52.904446546830656, |
|
"eval_f1": 70.98616483675112, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 3.6340018231540566e-05, |
|
"loss": 1.7045, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_exact_match": 54.276253547776726, |
|
"eval_f1": 71.66909074175824, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_exact_match": 54.46546830652791, |
|
"eval_f1": 71.54479654097987, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.406107566089335e-05, |
|
"loss": 1.6732, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_exact_match": 55.0236518448439, |
|
"eval_f1": 72.43165471889594, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_exact_match": 55.21286660359508, |
|
"eval_f1": 72.64020525017258, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.178213309024613e-05, |
|
"loss": 1.6218, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_exact_match": 55.581835383159884, |
|
"eval_f1": 73.12194401332194, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 2.9503190519598906e-05, |
|
"loss": 1.6245, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_exact_match": 55.65752128666036, |
|
"eval_f1": 72.86127958084641, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_exact_match": 55.96026490066225, |
|
"eval_f1": 73.4084850665819, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.7224247948951686e-05, |
|
"loss": 1.6021, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_exact_match": 56.40491958372753, |
|
"eval_f1": 73.57999833438744, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_exact_match": 56.34815515610217, |
|
"eval_f1": 73.92632868373076, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 2.4949863263445765e-05, |
|
"loss": 1.5458, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"eval_exact_match": 56.7360454115421, |
|
"eval_f1": 73.96946840195473, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 2.2670920692798542e-05, |
|
"loss": 1.2682, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"eval_exact_match": 56.71712393566698, |
|
"eval_f1": 73.83807242592808, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"eval_exact_match": 56.61305581835383, |
|
"eval_f1": 73.47868870604212, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 2.0391978122151323e-05, |
|
"loss": 1.2702, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_exact_match": 56.08325449385052, |
|
"eval_f1": 73.87945111610915, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"eval_exact_match": 56.395458845789975, |
|
"eval_f1": 73.26116080395376, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 1.8113035551504104e-05, |
|
"loss": 1.2553, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"eval_exact_match": 56.50898770104068, |
|
"eval_f1": 73.92339178593369, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 1.583409298085688e-05, |
|
"loss": 1.2378, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"eval_exact_match": 56.471144749290445, |
|
"eval_f1": 74.01594585064186, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"eval_exact_match": 56.61305581835383, |
|
"eval_f1": 74.11650132906428, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 1.3555150410209664e-05, |
|
"loss": 1.3004, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"eval_exact_match": 56.64143803216651, |
|
"eval_f1": 74.03120203258294, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"eval_exact_match": 56.84011352885525, |
|
"eval_f1": 74.3235867027388, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 1.1276207839562443e-05, |
|
"loss": 1.2482, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"eval_exact_match": 57.28476821192053, |
|
"eval_f1": 74.36995381137226, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 8.997265268915224e-06, |
|
"loss": 1.2677, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"eval_exact_match": 57.17123935666982, |
|
"eval_f1": 74.31360595646503, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"eval_exact_match": 56.9914853358562, |
|
"eval_f1": 74.27345402299513, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 6.7183226982680034e-06, |
|
"loss": 1.257, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"eval_exact_match": 56.80227057710501, |
|
"eval_f1": 74.05651657911156, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"eval_exact_match": 57.05771050141911, |
|
"eval_f1": 74.23683269095346, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 4.448495897903373e-06, |
|
"loss": 1.2459, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"eval_exact_match": 57.237464522232735, |
|
"eval_f1": 74.29842936518452, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 2.169553327256153e-06, |
|
"loss": 1.2465, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"eval_exact_match": 57.17123935666982, |
|
"eval_f1": 74.35402701072998, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"eval_exact_match": 57.142857142857146, |
|
"eval_f1": 74.2724134993522, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 10970, |
|
"total_flos": 2544773915225664.0, |
|
"train_loss": 1.524520064486953, |
|
"train_runtime": 1743.0774, |
|
"train_samples_per_second": 100.681, |
|
"train_steps_per_second": 6.293 |
|
} |
|
], |
|
"max_steps": 10970, |
|
"num_train_epochs": 2, |
|
"total_flos": 2544773915225664.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|