|
{ |
|
"best_metric": 66.26645652732091, |
|
"best_model_checkpoint": "/data/jcanete/all_results/tar/albeto_tiny/epochs_4_bs_16_lr_5e-5/checkpoint-10800", |
|
"epoch": 4.0, |
|
"global_step": 21940, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05, |
|
"eval_exact_match": 25.279091769157993, |
|
"eval_f1": 41.71460503665182, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.8867365542388335e-05, |
|
"loss": 3.1064, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_exact_match": 33.77483443708609, |
|
"eval_f1": 51.462032531677195, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_exact_match": 37.78618732261116, |
|
"eval_f1": 55.147295523401176, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.7727894257064726e-05, |
|
"loss": 2.4193, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_exact_match": 39.735099337748345, |
|
"eval_f1": 57.70943484117446, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.6588422971741116e-05, |
|
"loss": 2.2494, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_exact_match": 40.69063386944182, |
|
"eval_f1": 58.161449729726456, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_exact_match": 41.72185430463576, |
|
"eval_f1": 59.240651392335586, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.544895168641751e-05, |
|
"loss": 2.1889, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_exact_match": 41.627246925260174, |
|
"eval_f1": 60.05133975647362, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_exact_match": 43.33964049195837, |
|
"eval_f1": 61.334205952553624, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.431175934366454e-05, |
|
"loss": 2.0954, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_exact_match": 44.1438032166509, |
|
"eval_f1": 61.6109819191811, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.317228805834093e-05, |
|
"loss": 2.0585, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_exact_match": 44.55061494796594, |
|
"eval_f1": 62.74097437748746, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_exact_match": 45.14664143803217, |
|
"eval_f1": 62.716820973684904, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.203281677301732e-05, |
|
"loss": 2.0252, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_exact_match": 45.22232734153264, |
|
"eval_f1": 63.111925308146255, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_exact_match": 45.13718070009461, |
|
"eval_f1": 63.12474940531198, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 4.0893345487693714e-05, |
|
"loss": 1.9861, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_exact_match": 46.21570482497635, |
|
"eval_f1": 64.00817566022319, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.975615314494075e-05, |
|
"loss": 1.9881, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_exact_match": 46.72658467360454, |
|
"eval_f1": 64.45739871415881, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_exact_match": 46.31031220435194, |
|
"eval_f1": 64.03851598846974, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.861668185961714e-05, |
|
"loss": 1.9619, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_exact_match": 46.57521286660359, |
|
"eval_f1": 64.71289725300291, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_exact_match": 46.80227057710501, |
|
"eval_f1": 64.89896140882486, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.747721057429353e-05, |
|
"loss": 1.8961, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"eval_exact_match": 46.868495742667925, |
|
"eval_f1": 64.9566553926742, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.633773928896992e-05, |
|
"loss": 1.6613, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"eval_exact_match": 47.05771050141911, |
|
"eval_f1": 65.16060187033897, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"eval_exact_match": 47.010406811731315, |
|
"eval_f1": 64.63527459887219, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 3.520054694621696e-05, |
|
"loss": 1.6724, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_exact_match": 46.44276253547777, |
|
"eval_f1": 64.6485660215026, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"eval_exact_match": 47.02932828760643, |
|
"eval_f1": 65.25056698142618, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 3.406107566089335e-05, |
|
"loss": 1.6622, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"eval_exact_match": 47.38883632923368, |
|
"eval_f1": 65.14765281416761, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 3.292160437556974e-05, |
|
"loss": 1.6765, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"eval_exact_match": 47.52128666035951, |
|
"eval_f1": 65.23959133286817, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"eval_exact_match": 47.24692526017029, |
|
"eval_f1": 64.89498224105805, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 3.178213309024613e-05, |
|
"loss": 1.6831, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"eval_exact_match": 47.42667928098392, |
|
"eval_f1": 65.43419870447086, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"eval_exact_match": 47.237464522232735, |
|
"eval_f1": 65.29424940770944, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 3.0644940747493164e-05, |
|
"loss": 1.6652, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"eval_exact_match": 47.74834437086093, |
|
"eval_f1": 65.62503101399187, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.9505469462169554e-05, |
|
"loss": 1.6622, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"eval_exact_match": 48.04162724692526, |
|
"eval_f1": 66.13531062019833, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"eval_exact_match": 47.6631977294229, |
|
"eval_f1": 65.69495765526571, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.836599817684594e-05, |
|
"loss": 1.6282, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"eval_exact_match": 48.15515610217597, |
|
"eval_f1": 65.85406198831797, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"eval_exact_match": 48.34437086092715, |
|
"eval_f1": 65.82648855586231, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 2.7226526891522335e-05, |
|
"loss": 1.6311, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"eval_exact_match": 48.17407757805109, |
|
"eval_f1": 65.95821129767383, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 2.6089334548769374e-05, |
|
"loss": 1.6879, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"eval_exact_match": 48.070009460737936, |
|
"eval_f1": 65.61674709032437, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"eval_exact_match": 48.33491012298959, |
|
"eval_f1": 66.26645652732091, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 2.4949863263445765e-05, |
|
"loss": 1.6127, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"eval_exact_match": 47.776726584673604, |
|
"eval_f1": 65.67716714026636, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"eval_exact_match": 46.97256385998108, |
|
"eval_f1": 65.35488377881, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 2.3810391978122152e-05, |
|
"loss": 1.3442, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"eval_exact_match": 47.25638599810785, |
|
"eval_f1": 65.11588968827414, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 2.2670920692798542e-05, |
|
"loss": 1.3419, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"eval_exact_match": 47.918637653736994, |
|
"eval_f1": 65.76384490998669, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"eval_exact_match": 47.71996215704825, |
|
"eval_f1": 65.51523648050365, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 2.1533728350045578e-05, |
|
"loss": 1.3392, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"eval_exact_match": 47.81456953642384, |
|
"eval_f1": 65.72328974412012, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"eval_exact_match": 46.868495742667925, |
|
"eval_f1": 64.95043374963731, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 2.0396536007292617e-05, |
|
"loss": 1.3365, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"eval_exact_match": 47.47398297067171, |
|
"eval_f1": 65.46687084562042, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 1.9257064721969007e-05, |
|
"loss": 1.3863, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"eval_exact_match": 47.64427625354778, |
|
"eval_f1": 65.54344572464974, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"eval_exact_match": 47.70104068117313, |
|
"eval_f1": 65.29014560945414, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 1.8117593436645398e-05, |
|
"loss": 1.364, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"eval_exact_match": 47.918637653736994, |
|
"eval_f1": 65.66170183373373, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"eval_exact_match": 48.33491012298959, |
|
"eval_f1": 65.94289128043252, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 1.697812215132179e-05, |
|
"loss": 1.3864, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"eval_exact_match": 48.24976348155156, |
|
"eval_f1": 65.78253411845539, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 1.583865086599818e-05, |
|
"loss": 1.3636, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"eval_exact_match": 48.438978240302745, |
|
"eval_f1": 66.07660779936401, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"eval_exact_match": 48.16461684011353, |
|
"eval_f1": 66.2498588216324, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 1.469917958067457e-05, |
|
"loss": 1.3726, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"eval_exact_match": 47.95648060548723, |
|
"eval_f1": 65.7260651497257, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"eval_exact_match": 47.88079470198676, |
|
"eval_f1": 66.01379305937202, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 1.355970829535096e-05, |
|
"loss": 1.379, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"eval_exact_match": 48.240302743614, |
|
"eval_f1": 65.53479147043561, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 1.2422515952597995e-05, |
|
"loss": 1.3401, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"eval_exact_match": 48.221381267738884, |
|
"eval_f1": 65.86023587215384, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"eval_exact_match": 47.615894039735096, |
|
"eval_f1": 65.12853949917962, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 1.1283044667274386e-05, |
|
"loss": 1.1281, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"eval_exact_match": 47.12393566698202, |
|
"eval_f1": 65.15991467915553, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"eval_exact_match": 47.21854304635762, |
|
"eval_f1": 65.02752189426162, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 1.0143573381950776e-05, |
|
"loss": 1.1161, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"eval_exact_match": 47.13339640491959, |
|
"eval_f1": 65.2429493800908, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 9.004102096627165e-06, |
|
"loss": 1.1337, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"eval_exact_match": 47.010406811731315, |
|
"eval_f1": 65.03577782296541, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"eval_exact_match": 47.379375591296125, |
|
"eval_f1": 65.13654689416043, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 7.864630811303556e-06, |
|
"loss": 1.1477, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"eval_exact_match": 47.35099337748344, |
|
"eval_f1": 64.86398318568581, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"eval_exact_match": 47.30368968779565, |
|
"eval_f1": 65.05463221900013, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 6.725159525979946e-06, |
|
"loss": 1.1414, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"eval_exact_match": 46.97256385998108, |
|
"eval_f1": 65.11680930447929, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 5.587967183226983e-06, |
|
"loss": 1.1454, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"eval_exact_match": 47.13339640491959, |
|
"eval_f1": 65.165125839751, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"eval_exact_match": 47.20908230842006, |
|
"eval_f1": 65.09005212297234, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 4.448495897903373e-06, |
|
"loss": 1.1318, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"eval_exact_match": 46.95364238410596, |
|
"eval_f1": 65.02854138983005, |
|
"step": 20100 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"eval_exact_match": 46.773888363292336, |
|
"eval_f1": 64.98586781544525, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 3.3090246125797635e-06, |
|
"loss": 1.124, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"eval_exact_match": 46.76442762535478, |
|
"eval_f1": 65.02603195736303, |
|
"step": 20700 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 2.169553327256153e-06, |
|
"loss": 1.1301, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"eval_exact_match": 46.88741721854305, |
|
"eval_f1": 65.03187807653661, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"eval_exact_match": 47.086092715231786, |
|
"eval_f1": 65.13748531716237, |
|
"step": 21300 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 1.0323609845031905e-06, |
|
"loss": 1.1298, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"eval_exact_match": 47.095553453169344, |
|
"eval_f1": 65.03629129514118, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"eval_exact_match": 47.095553453169344, |
|
"eval_f1": 65.08861259333764, |
|
"step": 21900 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"step": 21940, |
|
"total_flos": 859515753161088.0, |
|
"train_loss": 1.5832800059288115, |
|
"train_runtime": 4642.7737, |
|
"train_samples_per_second": 75.599, |
|
"train_steps_per_second": 4.726 |
|
} |
|
], |
|
"max_steps": 21940, |
|
"num_train_epochs": 4, |
|
"total_flos": 859515753161088.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|