|
{ |
|
"best_metric": 76.88812735673795, |
|
"best_model_checkpoint": "/data/jcanete/all_results/tar/albeto_large/epochs_4_bs_16_lr_5e-6/checkpoint-15600", |
|
"epoch": 3.99990883398669, |
|
"global_step": 21936, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05, |
|
"eval_exact_match": 41.7123935666982, |
|
"eval_f1": 60.29401087357971, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.887171772428884e-06, |
|
"loss": 2.7603, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_exact_match": 47.88079470198676, |
|
"eval_f1": 66.39084020086572, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_exact_match": 50.52034058656575, |
|
"eval_f1": 68.74096558529118, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.773203865791394e-06, |
|
"loss": 1.9271, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_exact_match": 50.91769157994324, |
|
"eval_f1": 70.11766866179605, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.659235959153902e-06, |
|
"loss": 1.8139, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_exact_match": 52.469252601702934, |
|
"eval_f1": 70.3737951692374, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_exact_match": 53.90728476821192, |
|
"eval_f1": 71.8131971248393, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.5452680525164115e-06, |
|
"loss": 1.7447, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_exact_match": 54.00189214758751, |
|
"eval_f1": 72.7403357532424, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_exact_match": 54.7682119205298, |
|
"eval_f1": 72.50269883199105, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.431300145878921e-06, |
|
"loss": 1.6775, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_exact_match": 55.50614947965941, |
|
"eval_f1": 73.58281516410962, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.3173322392414305e-06, |
|
"loss": 1.6541, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_exact_match": 56.19678334910123, |
|
"eval_f1": 74.02233505494875, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_exact_match": 56.2724692526017, |
|
"eval_f1": 73.79619452732652, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.203364332603939e-06, |
|
"loss": 1.6342, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_exact_match": 56.177861873226114, |
|
"eval_f1": 73.84756635429511, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_exact_match": 57.1996215704825, |
|
"eval_f1": 74.85348692600742, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 4.089396425966448e-06, |
|
"loss": 1.5771, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_exact_match": 57.49290444654683, |
|
"eval_f1": 75.1912922642055, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.975428519328957e-06, |
|
"loss": 1.5866, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_exact_match": 57.74834437086093, |
|
"eval_f1": 75.25632518139247, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_exact_match": 57.47398297067171, |
|
"eval_f1": 75.12925316562874, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.861460612691467e-06, |
|
"loss": 1.559, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_exact_match": 57.51182592242195, |
|
"eval_f1": 74.90745642100545, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_exact_match": 57.74834437086093, |
|
"eval_f1": 75.43001455522467, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.7477206418672507e-06, |
|
"loss": 1.5324, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"eval_exact_match": 58.19299905392621, |
|
"eval_f1": 75.72737754816276, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.63375273522976e-06, |
|
"loss": 1.4262, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"eval_exact_match": 58.42951750236519, |
|
"eval_f1": 75.89034035367936, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"eval_exact_match": 57.852412488174075, |
|
"eval_f1": 75.61206860863662, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 3.5197848285922685e-06, |
|
"loss": 1.4304, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_exact_match": 58.50520340586566, |
|
"eval_f1": 75.88296862635806, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"eval_exact_match": 58.27814569536424, |
|
"eval_f1": 75.90558506335344, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 3.4058169219547776e-06, |
|
"loss": 1.4139, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"eval_exact_match": 58.666035950804165, |
|
"eval_f1": 75.93228571278655, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 3.291849015317287e-06, |
|
"loss": 1.4231, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"eval_exact_match": 58.372753074739826, |
|
"eval_f1": 75.62654338728922, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"eval_exact_match": 58.98770104068117, |
|
"eval_f1": 76.04881705292577, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 3.1778811086797962e-06, |
|
"loss": 1.4247, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"eval_exact_match": 58.760643330179754, |
|
"eval_f1": 76.17322174292238, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"eval_exact_match": 58.77956480605487, |
|
"eval_f1": 76.19180002443377, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 3.0639132020423053e-06, |
|
"loss": 1.4018, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"eval_exact_match": 58.87417218543046, |
|
"eval_f1": 76.19402653807724, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.949945295404814e-06, |
|
"loss": 1.4026, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"eval_exact_match": 59.205298013245034, |
|
"eval_f1": 76.31775571296018, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"eval_exact_match": 58.79848628192999, |
|
"eval_f1": 76.05081038788326, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.835977388767323e-06, |
|
"loss": 1.3831, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"eval_exact_match": 58.902554399243144, |
|
"eval_f1": 76.05042386760238, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"eval_exact_match": 59.11069063386944, |
|
"eval_f1": 76.2790864798194, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 2.7222374179431077e-06, |
|
"loss": 1.3788, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"eval_exact_match": 59.13907284768212, |
|
"eval_f1": 76.1649200485559, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 2.608269511305617e-06, |
|
"loss": 1.4271, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"eval_exact_match": 59.271523178807946, |
|
"eval_f1": 76.31194456888919, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"eval_exact_match": 59.0728476821192, |
|
"eval_f1": 76.3883200778918, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 2.4943016046681255e-06, |
|
"loss": 1.3668, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"eval_exact_match": 58.968779564806056, |
|
"eval_f1": 76.36879525654898, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"eval_exact_match": 59.10122989593188, |
|
"eval_f1": 76.61210895681596, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 2.3803336980306346e-06, |
|
"loss": 1.2558, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"eval_exact_match": 59.04446546830653, |
|
"eval_f1": 76.39711273020981, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 2.266593727206419e-06, |
|
"loss": 1.2579, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"eval_exact_match": 58.94039735099338, |
|
"eval_f1": 76.52845917449174, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"eval_exact_match": 59.432355723746454, |
|
"eval_f1": 76.44161025565445, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 2.152625820568928e-06, |
|
"loss": 1.2599, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"eval_exact_match": 59.29990539262062, |
|
"eval_f1": 76.46886303091172, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"eval_exact_match": 59.508041627246925, |
|
"eval_f1": 76.69876166788403, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 2.038657913931437e-06, |
|
"loss": 1.2296, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"eval_exact_match": 58.9593188268685, |
|
"eval_f1": 76.41311355011096, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 1.924690007293946e-06, |
|
"loss": 1.2978, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"eval_exact_match": 59.39451277199622, |
|
"eval_f1": 76.57721571752349, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"eval_exact_match": 59.40397350993378, |
|
"eval_f1": 76.60409469987792, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 1.8109500364697303e-06, |
|
"loss": 1.2498, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"eval_exact_match": 59.508041627246925, |
|
"eval_f1": 76.75425085048117, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"eval_exact_match": 59.45127719962157, |
|
"eval_f1": 76.45228535462638, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 1.6969821298322394e-06, |
|
"loss": 1.2848, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"eval_exact_match": 59.640491958372756, |
|
"eval_f1": 76.65492303716947, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 1.5830142231947485e-06, |
|
"loss": 1.2707, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"eval_exact_match": 59.69725638599811, |
|
"eval_f1": 76.61197101582836, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"eval_exact_match": 59.356669820245976, |
|
"eval_f1": 76.54400801502746, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 1.4690463165572578e-06, |
|
"loss": 1.2673, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"eval_exact_match": 59.86754966887417, |
|
"eval_f1": 76.88812735673795, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"eval_exact_match": 59.56480605487228, |
|
"eval_f1": 76.78104792015891, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 1.3553063457330418e-06, |
|
"loss": 1.2817, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"eval_exact_match": 59.33774834437086, |
|
"eval_f1": 76.65914116498631, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 1.2413384390955507e-06, |
|
"loss": 1.238, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"eval_exact_match": 59.75402081362346, |
|
"eval_f1": 76.71234598258661, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"eval_exact_match": 59.52696310312204, |
|
"eval_f1": 76.58652846423291, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 1.1273705324580598e-06, |
|
"loss": 1.1811, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"eval_exact_match": 59.120151371807, |
|
"eval_f1": 76.43831302372423, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"eval_exact_match": 59.31882686849574, |
|
"eval_f1": 76.330035615797, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 1.013630561633844e-06, |
|
"loss": 1.1526, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"eval_exact_match": 59.46073793755913, |
|
"eval_f1": 76.62045789210767, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 8.996626549963532e-07, |
|
"loss": 1.1639, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"eval_exact_match": 59.602649006622514, |
|
"eval_f1": 76.58821728052384, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"eval_exact_match": 59.5364238410596, |
|
"eval_f1": 76.48138372732446, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 7.856947483588622e-07, |
|
"loss": 1.1736, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"eval_exact_match": 59.39451277199622, |
|
"eval_f1": 76.44759593814429, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"eval_exact_match": 59.57426679280984, |
|
"eval_f1": 76.59464831626804, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 6.717268417213713e-07, |
|
"loss": 1.1616, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"eval_exact_match": 59.24314096499527, |
|
"eval_f1": 76.46679748121056, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 5.577589350838805e-07, |
|
"loss": 1.1862, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"eval_exact_match": 59.356669820245976, |
|
"eval_f1": 76.48581357909573, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"eval_exact_match": 59.11069063386944, |
|
"eval_f1": 76.28951562925242, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 4.4379102844638954e-07, |
|
"loss": 1.1581, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"eval_exact_match": 59.23368022705771, |
|
"eval_f1": 76.38635194011843, |
|
"step": 20100 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"eval_exact_match": 59.205298013245034, |
|
"eval_f1": 76.5121674711018, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 3.298231218088986e-07, |
|
"loss": 1.1714, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"eval_exact_match": 59.3755912961211, |
|
"eval_f1": 76.52009882433762, |
|
"step": 20700 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 2.1585521517140777e-07, |
|
"loss": 1.1563, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"eval_exact_match": 59.33774834437086, |
|
"eval_f1": 76.54016429214948, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"eval_exact_match": 59.432355723746454, |
|
"eval_f1": 76.54804939590761, |
|
"step": 21300 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 1.0188730853391687e-07, |
|
"loss": 1.1882, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"eval_exact_match": 59.34720908230842, |
|
"eval_f1": 76.51599845264303, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"eval_exact_match": 59.34720908230842, |
|
"eval_f1": 76.5156670132116, |
|
"step": 21900 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"step": 21936, |
|
"total_flos": 7984258357129344.0, |
|
"train_loss": 1.4025724537610833, |
|
"train_runtime": 13926.1742, |
|
"train_samples_per_second": 25.203, |
|
"train_steps_per_second": 1.575 |
|
} |
|
], |
|
"max_steps": 21936, |
|
"num_train_epochs": 4, |
|
"total_flos": 7984258357129344.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|