{ "best_metric": 66.26645652732091, "best_model_checkpoint": "/data/jcanete/all_results/tar/albeto_tiny/epochs_4_bs_16_lr_5e-5/checkpoint-10800", "epoch": 4.0, "global_step": 21940, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "eval_exact_match": 25.279091769157993, "eval_f1": 41.71460503665182, "step": 300 }, { "epoch": 0.09, "learning_rate": 4.8867365542388335e-05, "loss": 3.1064, "step": 500 }, { "epoch": 0.11, "eval_exact_match": 33.77483443708609, "eval_f1": 51.462032531677195, "step": 600 }, { "epoch": 0.16, "eval_exact_match": 37.78618732261116, "eval_f1": 55.147295523401176, "step": 900 }, { "epoch": 0.18, "learning_rate": 4.7727894257064726e-05, "loss": 2.4193, "step": 1000 }, { "epoch": 0.22, "eval_exact_match": 39.735099337748345, "eval_f1": 57.70943484117446, "step": 1200 }, { "epoch": 0.27, "learning_rate": 4.6588422971741116e-05, "loss": 2.2494, "step": 1500 }, { "epoch": 0.27, "eval_exact_match": 40.69063386944182, "eval_f1": 58.161449729726456, "step": 1500 }, { "epoch": 0.33, "eval_exact_match": 41.72185430463576, "eval_f1": 59.240651392335586, "step": 1800 }, { "epoch": 0.36, "learning_rate": 4.544895168641751e-05, "loss": 2.1889, "step": 2000 }, { "epoch": 0.38, "eval_exact_match": 41.627246925260174, "eval_f1": 60.05133975647362, "step": 2100 }, { "epoch": 0.44, "eval_exact_match": 43.33964049195837, "eval_f1": 61.334205952553624, "step": 2400 }, { "epoch": 0.46, "learning_rate": 4.431175934366454e-05, "loss": 2.0954, "step": 2500 }, { "epoch": 0.49, "eval_exact_match": 44.1438032166509, "eval_f1": 61.6109819191811, "step": 2700 }, { "epoch": 0.55, "learning_rate": 4.317228805834093e-05, "loss": 2.0585, "step": 3000 }, { "epoch": 0.55, "eval_exact_match": 44.55061494796594, "eval_f1": 62.74097437748746, "step": 3000 }, { "epoch": 0.6, "eval_exact_match": 45.14664143803217, "eval_f1": 62.716820973684904, "step": 3300 }, { "epoch": 0.64, "learning_rate": 4.203281677301732e-05, "loss": 2.0252, "step": 3500 }, { "epoch": 0.66, "eval_exact_match": 45.22232734153264, "eval_f1": 63.111925308146255, "step": 3600 }, { "epoch": 0.71, "eval_exact_match": 45.13718070009461, "eval_f1": 63.12474940531198, "step": 3900 }, { "epoch": 0.73, "learning_rate": 4.0893345487693714e-05, "loss": 1.9861, "step": 4000 }, { "epoch": 0.77, "eval_exact_match": 46.21570482497635, "eval_f1": 64.00817566022319, "step": 4200 }, { "epoch": 0.82, "learning_rate": 3.975615314494075e-05, "loss": 1.9881, "step": 4500 }, { "epoch": 0.82, "eval_exact_match": 46.72658467360454, "eval_f1": 64.45739871415881, "step": 4500 }, { "epoch": 0.88, "eval_exact_match": 46.31031220435194, "eval_f1": 64.03851598846974, "step": 4800 }, { "epoch": 0.91, "learning_rate": 3.861668185961714e-05, "loss": 1.9619, "step": 5000 }, { "epoch": 0.93, "eval_exact_match": 46.57521286660359, "eval_f1": 64.71289725300291, "step": 5100 }, { "epoch": 0.98, "eval_exact_match": 46.80227057710501, "eval_f1": 64.89896140882486, "step": 5400 }, { "epoch": 1.0, "learning_rate": 3.747721057429353e-05, "loss": 1.8961, "step": 5500 }, { "epoch": 1.04, "eval_exact_match": 46.868495742667925, "eval_f1": 64.9566553926742, "step": 5700 }, { "epoch": 1.09, "learning_rate": 3.633773928896992e-05, "loss": 1.6613, "step": 6000 }, { "epoch": 1.09, "eval_exact_match": 47.05771050141911, "eval_f1": 65.16060187033897, "step": 6000 }, { "epoch": 1.15, "eval_exact_match": 47.010406811731315, "eval_f1": 64.63527459887219, "step": 6300 }, { "epoch": 1.19, "learning_rate": 3.520054694621696e-05, "loss": 1.6724, "step": 6500 }, { "epoch": 1.2, "eval_exact_match": 46.44276253547777, "eval_f1": 64.6485660215026, "step": 6600 }, { "epoch": 1.26, "eval_exact_match": 47.02932828760643, "eval_f1": 65.25056698142618, "step": 6900 }, { "epoch": 1.28, "learning_rate": 3.406107566089335e-05, "loss": 1.6622, "step": 7000 }, { "epoch": 1.31, "eval_exact_match": 47.38883632923368, "eval_f1": 65.14765281416761, "step": 7200 }, { "epoch": 1.37, "learning_rate": 3.292160437556974e-05, "loss": 1.6765, "step": 7500 }, { "epoch": 1.37, "eval_exact_match": 47.52128666035951, "eval_f1": 65.23959133286817, "step": 7500 }, { "epoch": 1.42, "eval_exact_match": 47.24692526017029, "eval_f1": 64.89498224105805, "step": 7800 }, { "epoch": 1.46, "learning_rate": 3.178213309024613e-05, "loss": 1.6831, "step": 8000 }, { "epoch": 1.48, "eval_exact_match": 47.42667928098392, "eval_f1": 65.43419870447086, "step": 8100 }, { "epoch": 1.53, "eval_exact_match": 47.237464522232735, "eval_f1": 65.29424940770944, "step": 8400 }, { "epoch": 1.55, "learning_rate": 3.0644940747493164e-05, "loss": 1.6652, "step": 8500 }, { "epoch": 1.59, "eval_exact_match": 47.74834437086093, "eval_f1": 65.62503101399187, "step": 8700 }, { "epoch": 1.64, "learning_rate": 2.9505469462169554e-05, "loss": 1.6622, "step": 9000 }, { "epoch": 1.64, "eval_exact_match": 48.04162724692526, "eval_f1": 66.13531062019833, "step": 9000 }, { "epoch": 1.7, "eval_exact_match": 47.6631977294229, "eval_f1": 65.69495765526571, "step": 9300 }, { "epoch": 1.73, "learning_rate": 2.836599817684594e-05, "loss": 1.6282, "step": 9500 }, { "epoch": 1.75, "eval_exact_match": 48.15515610217597, "eval_f1": 65.85406198831797, "step": 9600 }, { "epoch": 1.8, "eval_exact_match": 48.34437086092715, "eval_f1": 65.82648855586231, "step": 9900 }, { "epoch": 1.82, "learning_rate": 2.7226526891522335e-05, "loss": 1.6311, "step": 10000 }, { "epoch": 1.86, "eval_exact_match": 48.17407757805109, "eval_f1": 65.95821129767383, "step": 10200 }, { "epoch": 1.91, "learning_rate": 2.6089334548769374e-05, "loss": 1.6879, "step": 10500 }, { "epoch": 1.91, "eval_exact_match": 48.070009460737936, "eval_f1": 65.61674709032437, "step": 10500 }, { "epoch": 1.97, "eval_exact_match": 48.33491012298959, "eval_f1": 66.26645652732091, "step": 10800 }, { "epoch": 2.01, "learning_rate": 2.4949863263445765e-05, "loss": 1.6127, "step": 11000 }, { "epoch": 2.02, "eval_exact_match": 47.776726584673604, "eval_f1": 65.67716714026636, "step": 11100 }, { "epoch": 2.08, "eval_exact_match": 46.97256385998108, "eval_f1": 65.35488377881, "step": 11400 }, { "epoch": 2.1, "learning_rate": 2.3810391978122152e-05, "loss": 1.3442, "step": 11500 }, { "epoch": 2.13, "eval_exact_match": 47.25638599810785, "eval_f1": 65.11588968827414, "step": 11700 }, { "epoch": 2.19, "learning_rate": 2.2670920692798542e-05, "loss": 1.3419, "step": 12000 }, { "epoch": 2.19, "eval_exact_match": 47.918637653736994, "eval_f1": 65.76384490998669, "step": 12000 }, { "epoch": 2.24, "eval_exact_match": 47.71996215704825, "eval_f1": 65.51523648050365, "step": 12300 }, { "epoch": 2.28, "learning_rate": 2.1533728350045578e-05, "loss": 1.3392, "step": 12500 }, { "epoch": 2.3, "eval_exact_match": 47.81456953642384, "eval_f1": 65.72328974412012, "step": 12600 }, { "epoch": 2.35, "eval_exact_match": 46.868495742667925, "eval_f1": 64.95043374963731, "step": 12900 }, { "epoch": 2.37, "learning_rate": 2.0396536007292617e-05, "loss": 1.3365, "step": 13000 }, { "epoch": 2.41, "eval_exact_match": 47.47398297067171, "eval_f1": 65.46687084562042, "step": 13200 }, { "epoch": 2.46, "learning_rate": 1.9257064721969007e-05, "loss": 1.3863, "step": 13500 }, { "epoch": 2.46, "eval_exact_match": 47.64427625354778, "eval_f1": 65.54344572464974, "step": 13500 }, { "epoch": 2.52, "eval_exact_match": 47.70104068117313, "eval_f1": 65.29014560945414, "step": 13800 }, { "epoch": 2.55, "learning_rate": 1.8117593436645398e-05, "loss": 1.364, "step": 14000 }, { "epoch": 2.57, "eval_exact_match": 47.918637653736994, "eval_f1": 65.66170183373373, "step": 14100 }, { "epoch": 2.63, "eval_exact_match": 48.33491012298959, "eval_f1": 65.94289128043252, "step": 14400 }, { "epoch": 2.64, "learning_rate": 1.697812215132179e-05, "loss": 1.3864, "step": 14500 }, { "epoch": 2.68, "eval_exact_match": 48.24976348155156, "eval_f1": 65.78253411845539, "step": 14700 }, { "epoch": 2.73, "learning_rate": 1.583865086599818e-05, "loss": 1.3636, "step": 15000 }, { "epoch": 2.73, "eval_exact_match": 48.438978240302745, "eval_f1": 66.07660779936401, "step": 15000 }, { "epoch": 2.79, "eval_exact_match": 48.16461684011353, "eval_f1": 66.2498588216324, "step": 15300 }, { "epoch": 2.83, "learning_rate": 1.469917958067457e-05, "loss": 1.3726, "step": 15500 }, { "epoch": 2.84, "eval_exact_match": 47.95648060548723, "eval_f1": 65.7260651497257, "step": 15600 }, { "epoch": 2.9, "eval_exact_match": 47.88079470198676, "eval_f1": 66.01379305937202, "step": 15900 }, { "epoch": 2.92, "learning_rate": 1.355970829535096e-05, "loss": 1.379, "step": 16000 }, { "epoch": 2.95, "eval_exact_match": 48.240302743614, "eval_f1": 65.53479147043561, "step": 16200 }, { "epoch": 3.01, "learning_rate": 1.2422515952597995e-05, "loss": 1.3401, "step": 16500 }, { "epoch": 3.01, "eval_exact_match": 48.221381267738884, "eval_f1": 65.86023587215384, "step": 16500 }, { "epoch": 3.06, "eval_exact_match": 47.615894039735096, "eval_f1": 65.12853949917962, "step": 16800 }, { "epoch": 3.1, "learning_rate": 1.1283044667274386e-05, "loss": 1.1281, "step": 17000 }, { "epoch": 3.12, "eval_exact_match": 47.12393566698202, "eval_f1": 65.15991467915553, "step": 17100 }, { "epoch": 3.17, "eval_exact_match": 47.21854304635762, "eval_f1": 65.02752189426162, "step": 17400 }, { "epoch": 3.19, "learning_rate": 1.0143573381950776e-05, "loss": 1.1161, "step": 17500 }, { "epoch": 3.23, "eval_exact_match": 47.13339640491959, "eval_f1": 65.2429493800908, "step": 17700 }, { "epoch": 3.28, "learning_rate": 9.004102096627165e-06, "loss": 1.1337, "step": 18000 }, { "epoch": 3.28, "eval_exact_match": 47.010406811731315, "eval_f1": 65.03577782296541, "step": 18000 }, { "epoch": 3.34, "eval_exact_match": 47.379375591296125, "eval_f1": 65.13654689416043, "step": 18300 }, { "epoch": 3.37, "learning_rate": 7.864630811303556e-06, "loss": 1.1477, "step": 18500 }, { "epoch": 3.39, "eval_exact_match": 47.35099337748344, "eval_f1": 64.86398318568581, "step": 18600 }, { "epoch": 3.45, "eval_exact_match": 47.30368968779565, "eval_f1": 65.05463221900013, "step": 18900 }, { "epoch": 3.46, "learning_rate": 6.725159525979946e-06, "loss": 1.1414, "step": 19000 }, { "epoch": 3.5, "eval_exact_match": 46.97256385998108, "eval_f1": 65.11680930447929, "step": 19200 }, { "epoch": 3.56, "learning_rate": 5.587967183226983e-06, "loss": 1.1454, "step": 19500 }, { "epoch": 3.56, "eval_exact_match": 47.13339640491959, "eval_f1": 65.165125839751, "step": 19500 }, { "epoch": 3.61, "eval_exact_match": 47.20908230842006, "eval_f1": 65.09005212297234, "step": 19800 }, { "epoch": 3.65, "learning_rate": 4.448495897903373e-06, "loss": 1.1318, "step": 20000 }, { "epoch": 3.66, "eval_exact_match": 46.95364238410596, "eval_f1": 65.02854138983005, "step": 20100 }, { "epoch": 3.72, "eval_exact_match": 46.773888363292336, "eval_f1": 64.98586781544525, "step": 20400 }, { "epoch": 3.74, "learning_rate": 3.3090246125797635e-06, "loss": 1.124, "step": 20500 }, { "epoch": 3.77, "eval_exact_match": 46.76442762535478, "eval_f1": 65.02603195736303, "step": 20700 }, { "epoch": 3.83, "learning_rate": 2.169553327256153e-06, "loss": 1.1301, "step": 21000 }, { "epoch": 3.83, "eval_exact_match": 46.88741721854305, "eval_f1": 65.03187807653661, "step": 21000 }, { "epoch": 3.88, "eval_exact_match": 47.086092715231786, "eval_f1": 65.13748531716237, "step": 21300 }, { "epoch": 3.92, "learning_rate": 1.0323609845031905e-06, "loss": 1.1298, "step": 21500 }, { "epoch": 3.94, "eval_exact_match": 47.095553453169344, "eval_f1": 65.03629129514118, "step": 21600 }, { "epoch": 3.99, "eval_exact_match": 47.095553453169344, "eval_f1": 65.08861259333764, "step": 21900 }, { "epoch": 4.0, "step": 21940, "total_flos": 859515753161088.0, "train_loss": 1.5832800059288115, "train_runtime": 4642.7737, "train_samples_per_second": 75.599, "train_steps_per_second": 4.726 } ], "max_steps": 21940, "num_train_epochs": 4, "total_flos": 859515753161088.0, "trial_name": null, "trial_params": null }