{ "best_metric": 66.41312856742334, "best_model_checkpoint": "/data/jcanete/all_results/mlqa/beto_uncased/epochs_4_bs_16_lr_3e-5/checkpoint-9000", "epoch": 4.0, "global_step": 20508, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.06, "eval_exact_match": 23.4, "eval_f1": 39.87710850110139, "step": 300 }, { "epoch": 0.1, "learning_rate": 2.9271503803393797e-05, "loss": 3.1626, "step": 500 }, { "epoch": 0.12, "eval_exact_match": 28.4, "eval_f1": 46.509597922029684, "step": 600 }, { "epoch": 0.18, "eval_exact_match": 33.0, "eval_f1": 52.66068755924698, "step": 900 }, { "epoch": 0.2, "learning_rate": 2.8540081919251023e-05, "loss": 2.4525, "step": 1000 }, { "epoch": 0.23, "eval_exact_match": 35.4, "eval_f1": 56.28184887315056, "step": 1200 }, { "epoch": 0.29, "learning_rate": 2.780866003510825e-05, "loss": 2.3232, "step": 1500 }, { "epoch": 0.29, "eval_exact_match": 35.4, "eval_f1": 56.05406960312001, "step": 1500 }, { "epoch": 0.35, "eval_exact_match": 33.2, "eval_f1": 56.31194498874277, "step": 1800 }, { "epoch": 0.39, "learning_rate": 2.707723815096548e-05, "loss": 2.2057, "step": 2000 }, { "epoch": 0.41, "eval_exact_match": 37.6, "eval_f1": 60.7500417692224, "step": 2100 }, { "epoch": 0.47, "eval_exact_match": 38.6, "eval_f1": 61.9826845317547, "step": 2400 }, { "epoch": 0.49, "learning_rate": 2.634727911059099e-05, "loss": 2.1759, "step": 2500 }, { "epoch": 0.53, "eval_exact_match": 39.8, "eval_f1": 64.26755886141255, "step": 2700 }, { "epoch": 0.59, "learning_rate": 2.5615857226448216e-05, "loss": 2.115, "step": 3000 }, { "epoch": 0.59, "eval_exact_match": 35.6, "eval_f1": 58.87759205203179, "step": 3000 }, { "epoch": 0.64, "eval_exact_match": 38.0, "eval_f1": 63.1853755598259, "step": 3300 }, { "epoch": 0.68, "learning_rate": 2.4884435342305442e-05, "loss": 2.0455, "step": 3500 }, { "epoch": 0.7, "eval_exact_match": 38.6, "eval_f1": 62.17141554579418, "step": 3600 }, { "epoch": 0.76, "eval_exact_match": 40.8, "eval_f1": 63.814839462786935, "step": 3900 }, { "epoch": 0.78, "learning_rate": 2.415301345816267e-05, "loss": 2.0378, "step": 4000 }, { "epoch": 0.82, "eval_exact_match": 38.2, "eval_f1": 63.452901777349766, "step": 4200 }, { "epoch": 0.88, "learning_rate": 2.3421591574019895e-05, "loss": 1.9839, "step": 4500 }, { "epoch": 0.88, "eval_exact_match": 40.6, "eval_f1": 64.348618765079, "step": 4500 }, { "epoch": 0.94, "eval_exact_match": 39.4, "eval_f1": 62.57530588719265, "step": 4800 }, { "epoch": 0.98, "learning_rate": 2.269163253364541e-05, "loss": 1.9543, "step": 5000 }, { "epoch": 0.99, "eval_exact_match": 37.8, "eval_f1": 62.47954703797214, "step": 5100 }, { "epoch": 1.05, "eval_exact_match": 38.8, "eval_f1": 62.7759255252296, "step": 5400 }, { "epoch": 1.07, "learning_rate": 2.196167349327092e-05, "loss": 1.7389, "step": 5500 }, { "epoch": 1.11, "eval_exact_match": 40.0, "eval_f1": 63.39625245328542, "step": 5700 }, { "epoch": 1.17, "learning_rate": 2.1230251609128147e-05, "loss": 1.6611, "step": 6000 }, { "epoch": 1.17, "eval_exact_match": 40.0, "eval_f1": 63.10882532261943, "step": 6000 }, { "epoch": 1.23, "eval_exact_match": 39.0, "eval_f1": 63.122784024749606, "step": 6300 }, { "epoch": 1.27, "learning_rate": 2.0498829724985373e-05, "loss": 1.6013, "step": 6500 }, { "epoch": 1.29, "eval_exact_match": 40.4, "eval_f1": 64.3832532899268, "step": 6600 }, { "epoch": 1.35, "eval_exact_match": 41.2, "eval_f1": 65.07150560199364, "step": 6900 }, { "epoch": 1.37, "learning_rate": 1.97674078408426e-05, "loss": 1.6499, "step": 7000 }, { "epoch": 1.4, "eval_exact_match": 40.8, "eval_f1": 63.757805668028354, "step": 7200 }, { "epoch": 1.46, "learning_rate": 1.9035985956699825e-05, "loss": 1.6441, "step": 7500 }, { "epoch": 1.46, "eval_exact_match": 40.0, "eval_f1": 64.51824146230119, "step": 7500 }, { "epoch": 1.52, "eval_exact_match": 41.8, "eval_f1": 64.5385854544337, "step": 7800 }, { "epoch": 1.56, "learning_rate": 1.830456407255705e-05, "loss": 1.6383, "step": 8000 }, { "epoch": 1.58, "eval_exact_match": 41.4, "eval_f1": 65.35549998349387, "step": 8100 }, { "epoch": 1.64, "eval_exact_match": 40.6, "eval_f1": 64.45859595523329, "step": 8400 }, { "epoch": 1.66, "learning_rate": 1.7573142188414278e-05, "loss": 1.5992, "step": 8500 }, { "epoch": 1.7, "eval_exact_match": 42.6, "eval_f1": 66.32210814293916, "step": 8700 }, { "epoch": 1.76, "learning_rate": 1.6841720304271504e-05, "loss": 1.6092, "step": 9000 }, { "epoch": 1.76, "eval_exact_match": 44.0, "eval_f1": 66.41312856742334, "step": 9000 }, { "epoch": 1.81, "eval_exact_match": 42.0, "eval_f1": 65.70423087204887, "step": 9300 }, { "epoch": 1.85, "learning_rate": 1.611029842012873e-05, "loss": 1.637, "step": 9500 }, { "epoch": 1.87, "eval_exact_match": 43.0, "eval_f1": 65.2546958366835, "step": 9600 }, { "epoch": 1.93, "eval_exact_match": 42.2, "eval_f1": 66.10494154148009, "step": 9900 }, { "epoch": 1.95, "learning_rate": 1.538033937975424e-05, "loss": 1.5978, "step": 10000 }, { "epoch": 1.99, "eval_exact_match": 40.0, "eval_f1": 63.806088000887634, "step": 10200 }, { "epoch": 2.05, "learning_rate": 1.4650380339379754e-05, "loss": 1.4462, "step": 10500 }, { "epoch": 2.05, "eval_exact_match": 40.6, "eval_f1": 65.01325773791683, "step": 10500 }, { "epoch": 2.11, "eval_exact_match": 41.2, "eval_f1": 64.50214370703992, "step": 10800 }, { "epoch": 2.15, "learning_rate": 1.3918958455236982e-05, "loss": 1.2958, "step": 11000 }, { "epoch": 2.17, "eval_exact_match": 40.6, "eval_f1": 65.36716761169852, "step": 11100 }, { "epoch": 2.22, "eval_exact_match": 40.8, "eval_f1": 64.11070880878887, "step": 11400 }, { "epoch": 2.24, "learning_rate": 1.3187536571094208e-05, "loss": 1.3027, "step": 11500 }, { "epoch": 2.28, "eval_exact_match": 39.2, "eval_f1": 63.682573407854235, "step": 11700 }, { "epoch": 2.34, "learning_rate": 1.2456114686951432e-05, "loss": 1.2731, "step": 12000 }, { "epoch": 2.34, "eval_exact_match": 39.8, "eval_f1": 63.59941485060206, "step": 12000 }, { "epoch": 2.4, "eval_exact_match": 37.8, "eval_f1": 62.92318349722304, "step": 12300 }, { "epoch": 2.44, "learning_rate": 1.172469280280866e-05, "loss": 1.3022, "step": 12500 }, { "epoch": 2.46, "eval_exact_match": 39.4, "eval_f1": 64.09272643867237, "step": 12600 }, { "epoch": 2.52, "eval_exact_match": 40.6, "eval_f1": 65.27753571541867, "step": 12900 }, { "epoch": 2.54, "learning_rate": 1.0993270918665886e-05, "loss": 1.3036, "step": 13000 }, { "epoch": 2.57, "eval_exact_match": 40.4, "eval_f1": 64.22110332511052, "step": 13200 }, { "epoch": 2.63, "learning_rate": 1.0261849034523113e-05, "loss": 1.2821, "step": 13500 }, { "epoch": 2.63, "eval_exact_match": 39.8, "eval_f1": 63.332714021581985, "step": 13500 }, { "epoch": 2.69, "eval_exact_match": 39.6, "eval_f1": 63.578162829605446, "step": 13800 }, { "epoch": 2.73, "learning_rate": 9.531889994148626e-06, "loss": 1.2877, "step": 14000 }, { "epoch": 2.75, "eval_exact_match": 40.0, "eval_f1": 63.35184925186776, "step": 14100 }, { "epoch": 2.81, "eval_exact_match": 40.2, "eval_f1": 64.16356452794574, "step": 14400 }, { "epoch": 2.83, "learning_rate": 8.800468110005852e-06, "loss": 1.2603, "step": 14500 }, { "epoch": 2.87, "eval_exact_match": 41.2, "eval_f1": 64.11806249593292, "step": 14700 }, { "epoch": 2.93, "learning_rate": 8.069046225863078e-06, "loss": 1.2756, "step": 15000 }, { "epoch": 2.93, "eval_exact_match": 40.4, "eval_f1": 63.824419143757055, "step": 15000 }, { "epoch": 2.98, "eval_exact_match": 40.6, "eval_f1": 64.24451284277517, "step": 15300 }, { "epoch": 3.02, "learning_rate": 7.337624341720305e-06, "loss": 1.2121, "step": 15500 }, { "epoch": 3.04, "eval_exact_match": 40.0, "eval_f1": 63.53412990760953, "step": 15600 }, { "epoch": 3.1, "eval_exact_match": 39.0, "eval_f1": 62.80074251846947, "step": 15900 }, { "epoch": 3.12, "learning_rate": 6.606202457577531e-06, "loss": 1.0572, "step": 16000 }, { "epoch": 3.16, "eval_exact_match": 39.6, "eval_f1": 63.656775539732564, "step": 16200 }, { "epoch": 3.22, "learning_rate": 5.874780573434757e-06, "loss": 1.0379, "step": 16500 }, { "epoch": 3.22, "eval_exact_match": 40.2, "eval_f1": 63.43415180275925, "step": 16500 }, { "epoch": 3.28, "eval_exact_match": 40.6, "eval_f1": 64.05233541882214, "step": 16800 }, { "epoch": 3.32, "learning_rate": 5.143358689291984e-06, "loss": 1.0491, "step": 17000 }, { "epoch": 3.34, "eval_exact_match": 38.4, "eval_f1": 63.21753166648462, "step": 17100 }, { "epoch": 3.39, "eval_exact_match": 38.4, "eval_f1": 63.12095402408347, "step": 17400 }, { "epoch": 3.41, "learning_rate": 4.4119368051492096e-06, "loss": 1.0581, "step": 17500 }, { "epoch": 3.45, "eval_exact_match": 40.2, "eval_f1": 64.48156998952923, "step": 17700 }, { "epoch": 3.51, "learning_rate": 3.681977764774722e-06, "loss": 1.0731, "step": 18000 }, { "epoch": 3.51, "eval_exact_match": 40.0, "eval_f1": 64.22656695334973, "step": 18000 }, { "epoch": 3.57, "eval_exact_match": 40.0, "eval_f1": 63.74403257605126, "step": 18300 }, { "epoch": 3.61, "learning_rate": 2.9505558806319486e-06, "loss": 1.0486, "step": 18500 }, { "epoch": 3.63, "eval_exact_match": 39.8, "eval_f1": 63.72890744409074, "step": 18600 }, { "epoch": 3.69, "eval_exact_match": 40.6, "eval_f1": 64.93728425900885, "step": 18900 }, { "epoch": 3.71, "learning_rate": 2.219133996489175e-06, "loss": 1.0584, "step": 19000 }, { "epoch": 3.74, "eval_exact_match": 39.8, "eval_f1": 64.31547047147733, "step": 19200 }, { "epoch": 3.8, "learning_rate": 1.4877121123464014e-06, "loss": 1.0494, "step": 19500 }, { "epoch": 3.8, "eval_exact_match": 40.6, "eval_f1": 64.73621892117005, "step": 19500 }, { "epoch": 3.86, "eval_exact_match": 40.2, "eval_f1": 64.61480926659202, "step": 19800 }, { "epoch": 3.9, "learning_rate": 7.577530719719134e-07, "loss": 1.058, "step": 20000 }, { "epoch": 3.92, "eval_exact_match": 40.4, "eval_f1": 64.58939713486411, "step": 20100 }, { "epoch": 3.98, "eval_exact_match": 40.4, "eval_f1": 64.5227927048114, "step": 20400 }, { "epoch": 4.0, "learning_rate": 2.633118782913985e-08, "loss": 1.0348, "step": 20500 }, { "epoch": 4.0, "step": 20508, "total_flos": 5.891975668325875e+16, "train_loss": 1.551024980565505, "train_runtime": 35295.3339, "train_samples_per_second": 9.296, "train_steps_per_second": 0.581 } ], "max_steps": 20508, "num_train_epochs": 4, "total_flos": 5.891975668325875e+16, "trial_name": null, "trial_params": null }