{ "best_metric": 76.88812735673795, "best_model_checkpoint": "/data/jcanete/all_results/tar/albeto_large/epochs_4_bs_16_lr_5e-6/checkpoint-15600", "epoch": 3.99990883398669, "global_step": 21936, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "eval_exact_match": 41.7123935666982, "eval_f1": 60.29401087357971, "step": 300 }, { "epoch": 0.09, "learning_rate": 4.887171772428884e-06, "loss": 2.7603, "step": 500 }, { "epoch": 0.11, "eval_exact_match": 47.88079470198676, "eval_f1": 66.39084020086572, "step": 600 }, { "epoch": 0.16, "eval_exact_match": 50.52034058656575, "eval_f1": 68.74096558529118, "step": 900 }, { "epoch": 0.18, "learning_rate": 4.773203865791394e-06, "loss": 1.9271, "step": 1000 }, { "epoch": 0.22, "eval_exact_match": 50.91769157994324, "eval_f1": 70.11766866179605, "step": 1200 }, { "epoch": 0.27, "learning_rate": 4.659235959153902e-06, "loss": 1.8139, "step": 1500 }, { "epoch": 0.27, "eval_exact_match": 52.469252601702934, "eval_f1": 70.3737951692374, "step": 1500 }, { "epoch": 0.33, "eval_exact_match": 53.90728476821192, "eval_f1": 71.8131971248393, "step": 1800 }, { "epoch": 0.36, "learning_rate": 4.5452680525164115e-06, "loss": 1.7447, "step": 2000 }, { "epoch": 0.38, "eval_exact_match": 54.00189214758751, "eval_f1": 72.7403357532424, "step": 2100 }, { "epoch": 0.44, "eval_exact_match": 54.7682119205298, "eval_f1": 72.50269883199105, "step": 2400 }, { "epoch": 0.46, "learning_rate": 4.431300145878921e-06, "loss": 1.6775, "step": 2500 }, { "epoch": 0.49, "eval_exact_match": 55.50614947965941, "eval_f1": 73.58281516410962, "step": 2700 }, { "epoch": 0.55, "learning_rate": 4.3173322392414305e-06, "loss": 1.6541, "step": 3000 }, { "epoch": 0.55, "eval_exact_match": 56.19678334910123, "eval_f1": 74.02233505494875, "step": 3000 }, { "epoch": 0.6, "eval_exact_match": 56.2724692526017, "eval_f1": 73.79619452732652, "step": 3300 }, { "epoch": 0.64, "learning_rate": 4.203364332603939e-06, "loss": 1.6342, "step": 3500 }, { "epoch": 0.66, "eval_exact_match": 56.177861873226114, "eval_f1": 73.84756635429511, "step": 3600 }, { "epoch": 0.71, "eval_exact_match": 57.1996215704825, "eval_f1": 74.85348692600742, "step": 3900 }, { "epoch": 0.73, "learning_rate": 4.089396425966448e-06, "loss": 1.5771, "step": 4000 }, { "epoch": 0.77, "eval_exact_match": 57.49290444654683, "eval_f1": 75.1912922642055, "step": 4200 }, { "epoch": 0.82, "learning_rate": 3.975428519328957e-06, "loss": 1.5866, "step": 4500 }, { "epoch": 0.82, "eval_exact_match": 57.74834437086093, "eval_f1": 75.25632518139247, "step": 4500 }, { "epoch": 0.88, "eval_exact_match": 57.47398297067171, "eval_f1": 75.12925316562874, "step": 4800 }, { "epoch": 0.91, "learning_rate": 3.861460612691467e-06, "loss": 1.559, "step": 5000 }, { "epoch": 0.93, "eval_exact_match": 57.51182592242195, "eval_f1": 74.90745642100545, "step": 5100 }, { "epoch": 0.98, "eval_exact_match": 57.74834437086093, "eval_f1": 75.43001455522467, "step": 5400 }, { "epoch": 1.0, "learning_rate": 3.7477206418672507e-06, "loss": 1.5324, "step": 5500 }, { "epoch": 1.04, "eval_exact_match": 58.19299905392621, "eval_f1": 75.72737754816276, "step": 5700 }, { "epoch": 1.09, "learning_rate": 3.63375273522976e-06, "loss": 1.4262, "step": 6000 }, { "epoch": 1.09, "eval_exact_match": 58.42951750236519, "eval_f1": 75.89034035367936, "step": 6000 }, { "epoch": 1.15, "eval_exact_match": 57.852412488174075, "eval_f1": 75.61206860863662, "step": 6300 }, { "epoch": 1.19, "learning_rate": 3.5197848285922685e-06, "loss": 1.4304, "step": 6500 }, { "epoch": 1.2, "eval_exact_match": 58.50520340586566, "eval_f1": 75.88296862635806, "step": 6600 }, { "epoch": 1.26, "eval_exact_match": 58.27814569536424, "eval_f1": 75.90558506335344, "step": 6900 }, { "epoch": 1.28, "learning_rate": 3.4058169219547776e-06, "loss": 1.4139, "step": 7000 }, { "epoch": 1.31, "eval_exact_match": 58.666035950804165, "eval_f1": 75.93228571278655, "step": 7200 }, { "epoch": 1.37, "learning_rate": 3.291849015317287e-06, "loss": 1.4231, "step": 7500 }, { "epoch": 1.37, "eval_exact_match": 58.372753074739826, "eval_f1": 75.62654338728922, "step": 7500 }, { "epoch": 1.42, "eval_exact_match": 58.98770104068117, "eval_f1": 76.04881705292577, "step": 7800 }, { "epoch": 1.46, "learning_rate": 3.1778811086797962e-06, "loss": 1.4247, "step": 8000 }, { "epoch": 1.48, "eval_exact_match": 58.760643330179754, "eval_f1": 76.17322174292238, "step": 8100 }, { "epoch": 1.53, "eval_exact_match": 58.77956480605487, "eval_f1": 76.19180002443377, "step": 8400 }, { "epoch": 1.55, "learning_rate": 3.0639132020423053e-06, "loss": 1.4018, "step": 8500 }, { "epoch": 1.59, "eval_exact_match": 58.87417218543046, "eval_f1": 76.19402653807724, "step": 8700 }, { "epoch": 1.64, "learning_rate": 2.949945295404814e-06, "loss": 1.4026, "step": 9000 }, { "epoch": 1.64, "eval_exact_match": 59.205298013245034, "eval_f1": 76.31775571296018, "step": 9000 }, { "epoch": 1.7, "eval_exact_match": 58.79848628192999, "eval_f1": 76.05081038788326, "step": 9300 }, { "epoch": 1.73, "learning_rate": 2.835977388767323e-06, "loss": 1.3831, "step": 9500 }, { "epoch": 1.75, "eval_exact_match": 58.902554399243144, "eval_f1": 76.05042386760238, "step": 9600 }, { "epoch": 1.81, "eval_exact_match": 59.11069063386944, "eval_f1": 76.2790864798194, "step": 9900 }, { "epoch": 1.82, "learning_rate": 2.7222374179431077e-06, "loss": 1.3788, "step": 10000 }, { "epoch": 1.86, "eval_exact_match": 59.13907284768212, "eval_f1": 76.1649200485559, "step": 10200 }, { "epoch": 1.91, "learning_rate": 2.608269511305617e-06, "loss": 1.4271, "step": 10500 }, { "epoch": 1.91, "eval_exact_match": 59.271523178807946, "eval_f1": 76.31194456888919, "step": 10500 }, { "epoch": 1.97, "eval_exact_match": 59.0728476821192, "eval_f1": 76.3883200778918, "step": 10800 }, { "epoch": 2.01, "learning_rate": 2.4943016046681255e-06, "loss": 1.3668, "step": 11000 }, { "epoch": 2.02, "eval_exact_match": 58.968779564806056, "eval_f1": 76.36879525654898, "step": 11100 }, { "epoch": 2.08, "eval_exact_match": 59.10122989593188, "eval_f1": 76.61210895681596, "step": 11400 }, { "epoch": 2.1, "learning_rate": 2.3803336980306346e-06, "loss": 1.2558, "step": 11500 }, { "epoch": 2.13, "eval_exact_match": 59.04446546830653, "eval_f1": 76.39711273020981, "step": 11700 }, { "epoch": 2.19, "learning_rate": 2.266593727206419e-06, "loss": 1.2579, "step": 12000 }, { "epoch": 2.19, "eval_exact_match": 58.94039735099338, "eval_f1": 76.52845917449174, "step": 12000 }, { "epoch": 2.24, "eval_exact_match": 59.432355723746454, "eval_f1": 76.44161025565445, "step": 12300 }, { "epoch": 2.28, "learning_rate": 2.152625820568928e-06, "loss": 1.2599, "step": 12500 }, { "epoch": 2.3, "eval_exact_match": 59.29990539262062, "eval_f1": 76.46886303091172, "step": 12600 }, { "epoch": 2.35, "eval_exact_match": 59.508041627246925, "eval_f1": 76.69876166788403, "step": 12900 }, { "epoch": 2.37, "learning_rate": 2.038657913931437e-06, "loss": 1.2296, "step": 13000 }, { "epoch": 2.41, "eval_exact_match": 58.9593188268685, "eval_f1": 76.41311355011096, "step": 13200 }, { "epoch": 2.46, "learning_rate": 1.924690007293946e-06, "loss": 1.2978, "step": 13500 }, { "epoch": 2.46, "eval_exact_match": 59.39451277199622, "eval_f1": 76.57721571752349, "step": 13500 }, { "epoch": 2.52, "eval_exact_match": 59.40397350993378, "eval_f1": 76.60409469987792, "step": 13800 }, { "epoch": 2.55, "learning_rate": 1.8109500364697303e-06, "loss": 1.2498, "step": 14000 }, { "epoch": 2.57, "eval_exact_match": 59.508041627246925, "eval_f1": 76.75425085048117, "step": 14100 }, { "epoch": 2.63, "eval_exact_match": 59.45127719962157, "eval_f1": 76.45228535462638, "step": 14400 }, { "epoch": 2.64, "learning_rate": 1.6969821298322394e-06, "loss": 1.2848, "step": 14500 }, { "epoch": 2.68, "eval_exact_match": 59.640491958372756, "eval_f1": 76.65492303716947, "step": 14700 }, { "epoch": 2.74, "learning_rate": 1.5830142231947485e-06, "loss": 1.2707, "step": 15000 }, { "epoch": 2.74, "eval_exact_match": 59.69725638599811, "eval_f1": 76.61197101582836, "step": 15000 }, { "epoch": 2.79, "eval_exact_match": 59.356669820245976, "eval_f1": 76.54400801502746, "step": 15300 }, { "epoch": 2.83, "learning_rate": 1.4690463165572578e-06, "loss": 1.2673, "step": 15500 }, { "epoch": 2.84, "eval_exact_match": 59.86754966887417, "eval_f1": 76.88812735673795, "step": 15600 }, { "epoch": 2.9, "eval_exact_match": 59.56480605487228, "eval_f1": 76.78104792015891, "step": 15900 }, { "epoch": 2.92, "learning_rate": 1.3553063457330418e-06, "loss": 1.2817, "step": 16000 }, { "epoch": 2.95, "eval_exact_match": 59.33774834437086, "eval_f1": 76.65914116498631, "step": 16200 }, { "epoch": 3.01, "learning_rate": 1.2413384390955507e-06, "loss": 1.238, "step": 16500 }, { "epoch": 3.01, "eval_exact_match": 59.75402081362346, "eval_f1": 76.71234598258661, "step": 16500 }, { "epoch": 3.06, "eval_exact_match": 59.52696310312204, "eval_f1": 76.58652846423291, "step": 16800 }, { "epoch": 3.1, "learning_rate": 1.1273705324580598e-06, "loss": 1.1811, "step": 17000 }, { "epoch": 3.12, "eval_exact_match": 59.120151371807, "eval_f1": 76.43831302372423, "step": 17100 }, { "epoch": 3.17, "eval_exact_match": 59.31882686849574, "eval_f1": 76.330035615797, "step": 17400 }, { "epoch": 3.19, "learning_rate": 1.013630561633844e-06, "loss": 1.1526, "step": 17500 }, { "epoch": 3.23, "eval_exact_match": 59.46073793755913, "eval_f1": 76.62045789210767, "step": 17700 }, { "epoch": 3.28, "learning_rate": 8.996626549963532e-07, "loss": 1.1639, "step": 18000 }, { "epoch": 3.28, "eval_exact_match": 59.602649006622514, "eval_f1": 76.58821728052384, "step": 18000 }, { "epoch": 3.34, "eval_exact_match": 59.5364238410596, "eval_f1": 76.48138372732446, "step": 18300 }, { "epoch": 3.37, "learning_rate": 7.856947483588622e-07, "loss": 1.1736, "step": 18500 }, { "epoch": 3.39, "eval_exact_match": 59.39451277199622, "eval_f1": 76.44759593814429, "step": 18600 }, { "epoch": 3.45, "eval_exact_match": 59.57426679280984, "eval_f1": 76.59464831626804, "step": 18900 }, { "epoch": 3.46, "learning_rate": 6.717268417213713e-07, "loss": 1.1616, "step": 19000 }, { "epoch": 3.5, "eval_exact_match": 59.24314096499527, "eval_f1": 76.46679748121056, "step": 19200 }, { "epoch": 3.56, "learning_rate": 5.577589350838805e-07, "loss": 1.1862, "step": 19500 }, { "epoch": 3.56, "eval_exact_match": 59.356669820245976, "eval_f1": 76.48581357909573, "step": 19500 }, { "epoch": 3.61, "eval_exact_match": 59.11069063386944, "eval_f1": 76.28951562925242, "step": 19800 }, { "epoch": 3.65, "learning_rate": 4.4379102844638954e-07, "loss": 1.1581, "step": 20000 }, { "epoch": 3.67, "eval_exact_match": 59.23368022705771, "eval_f1": 76.38635194011843, "step": 20100 }, { "epoch": 3.72, "eval_exact_match": 59.205298013245034, "eval_f1": 76.5121674711018, "step": 20400 }, { "epoch": 3.74, "learning_rate": 3.298231218088986e-07, "loss": 1.1714, "step": 20500 }, { "epoch": 3.77, "eval_exact_match": 59.3755912961211, "eval_f1": 76.52009882433762, "step": 20700 }, { "epoch": 3.83, "learning_rate": 2.1585521517140777e-07, "loss": 1.1563, "step": 21000 }, { "epoch": 3.83, "eval_exact_match": 59.33774834437086, "eval_f1": 76.54016429214948, "step": 21000 }, { "epoch": 3.88, "eval_exact_match": 59.432355723746454, "eval_f1": 76.54804939590761, "step": 21300 }, { "epoch": 3.92, "learning_rate": 1.0188730853391687e-07, "loss": 1.1882, "step": 21500 }, { "epoch": 3.94, "eval_exact_match": 59.34720908230842, "eval_f1": 76.51599845264303, "step": 21600 }, { "epoch": 3.99, "eval_exact_match": 59.34720908230842, "eval_f1": 76.5156670132116, "step": 21900 }, { "epoch": 4.0, "step": 21936, "total_flos": 7984258357129344.0, "train_loss": 1.4025724537610833, "train_runtime": 13926.1742, "train_samples_per_second": 25.203, "train_steps_per_second": 1.575 } ], "max_steps": 21936, "num_train_epochs": 4, "total_flos": 7984258357129344.0, "trial_name": null, "trial_params": null }