josecannete's picture
adding model finetuned on QA (MLQA)
343a691
{
"best_metric": 66.41312856742334,
"best_model_checkpoint": "/data/jcanete/all_results/mlqa/beto_uncased/epochs_4_bs_16_lr_3e-5/checkpoint-9000",
"epoch": 4.0,
"global_step": 20508,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.06,
"eval_exact_match": 23.4,
"eval_f1": 39.87710850110139,
"step": 300
},
{
"epoch": 0.1,
"learning_rate": 2.9271503803393797e-05,
"loss": 3.1626,
"step": 500
},
{
"epoch": 0.12,
"eval_exact_match": 28.4,
"eval_f1": 46.509597922029684,
"step": 600
},
{
"epoch": 0.18,
"eval_exact_match": 33.0,
"eval_f1": 52.66068755924698,
"step": 900
},
{
"epoch": 0.2,
"learning_rate": 2.8540081919251023e-05,
"loss": 2.4525,
"step": 1000
},
{
"epoch": 0.23,
"eval_exact_match": 35.4,
"eval_f1": 56.28184887315056,
"step": 1200
},
{
"epoch": 0.29,
"learning_rate": 2.780866003510825e-05,
"loss": 2.3232,
"step": 1500
},
{
"epoch": 0.29,
"eval_exact_match": 35.4,
"eval_f1": 56.05406960312001,
"step": 1500
},
{
"epoch": 0.35,
"eval_exact_match": 33.2,
"eval_f1": 56.31194498874277,
"step": 1800
},
{
"epoch": 0.39,
"learning_rate": 2.707723815096548e-05,
"loss": 2.2057,
"step": 2000
},
{
"epoch": 0.41,
"eval_exact_match": 37.6,
"eval_f1": 60.7500417692224,
"step": 2100
},
{
"epoch": 0.47,
"eval_exact_match": 38.6,
"eval_f1": 61.9826845317547,
"step": 2400
},
{
"epoch": 0.49,
"learning_rate": 2.634727911059099e-05,
"loss": 2.1759,
"step": 2500
},
{
"epoch": 0.53,
"eval_exact_match": 39.8,
"eval_f1": 64.26755886141255,
"step": 2700
},
{
"epoch": 0.59,
"learning_rate": 2.5615857226448216e-05,
"loss": 2.115,
"step": 3000
},
{
"epoch": 0.59,
"eval_exact_match": 35.6,
"eval_f1": 58.87759205203179,
"step": 3000
},
{
"epoch": 0.64,
"eval_exact_match": 38.0,
"eval_f1": 63.1853755598259,
"step": 3300
},
{
"epoch": 0.68,
"learning_rate": 2.4884435342305442e-05,
"loss": 2.0455,
"step": 3500
},
{
"epoch": 0.7,
"eval_exact_match": 38.6,
"eval_f1": 62.17141554579418,
"step": 3600
},
{
"epoch": 0.76,
"eval_exact_match": 40.8,
"eval_f1": 63.814839462786935,
"step": 3900
},
{
"epoch": 0.78,
"learning_rate": 2.415301345816267e-05,
"loss": 2.0378,
"step": 4000
},
{
"epoch": 0.82,
"eval_exact_match": 38.2,
"eval_f1": 63.452901777349766,
"step": 4200
},
{
"epoch": 0.88,
"learning_rate": 2.3421591574019895e-05,
"loss": 1.9839,
"step": 4500
},
{
"epoch": 0.88,
"eval_exact_match": 40.6,
"eval_f1": 64.348618765079,
"step": 4500
},
{
"epoch": 0.94,
"eval_exact_match": 39.4,
"eval_f1": 62.57530588719265,
"step": 4800
},
{
"epoch": 0.98,
"learning_rate": 2.269163253364541e-05,
"loss": 1.9543,
"step": 5000
},
{
"epoch": 0.99,
"eval_exact_match": 37.8,
"eval_f1": 62.47954703797214,
"step": 5100
},
{
"epoch": 1.05,
"eval_exact_match": 38.8,
"eval_f1": 62.7759255252296,
"step": 5400
},
{
"epoch": 1.07,
"learning_rate": 2.196167349327092e-05,
"loss": 1.7389,
"step": 5500
},
{
"epoch": 1.11,
"eval_exact_match": 40.0,
"eval_f1": 63.39625245328542,
"step": 5700
},
{
"epoch": 1.17,
"learning_rate": 2.1230251609128147e-05,
"loss": 1.6611,
"step": 6000
},
{
"epoch": 1.17,
"eval_exact_match": 40.0,
"eval_f1": 63.10882532261943,
"step": 6000
},
{
"epoch": 1.23,
"eval_exact_match": 39.0,
"eval_f1": 63.122784024749606,
"step": 6300
},
{
"epoch": 1.27,
"learning_rate": 2.0498829724985373e-05,
"loss": 1.6013,
"step": 6500
},
{
"epoch": 1.29,
"eval_exact_match": 40.4,
"eval_f1": 64.3832532899268,
"step": 6600
},
{
"epoch": 1.35,
"eval_exact_match": 41.2,
"eval_f1": 65.07150560199364,
"step": 6900
},
{
"epoch": 1.37,
"learning_rate": 1.97674078408426e-05,
"loss": 1.6499,
"step": 7000
},
{
"epoch": 1.4,
"eval_exact_match": 40.8,
"eval_f1": 63.757805668028354,
"step": 7200
},
{
"epoch": 1.46,
"learning_rate": 1.9035985956699825e-05,
"loss": 1.6441,
"step": 7500
},
{
"epoch": 1.46,
"eval_exact_match": 40.0,
"eval_f1": 64.51824146230119,
"step": 7500
},
{
"epoch": 1.52,
"eval_exact_match": 41.8,
"eval_f1": 64.5385854544337,
"step": 7800
},
{
"epoch": 1.56,
"learning_rate": 1.830456407255705e-05,
"loss": 1.6383,
"step": 8000
},
{
"epoch": 1.58,
"eval_exact_match": 41.4,
"eval_f1": 65.35549998349387,
"step": 8100
},
{
"epoch": 1.64,
"eval_exact_match": 40.6,
"eval_f1": 64.45859595523329,
"step": 8400
},
{
"epoch": 1.66,
"learning_rate": 1.7573142188414278e-05,
"loss": 1.5992,
"step": 8500
},
{
"epoch": 1.7,
"eval_exact_match": 42.6,
"eval_f1": 66.32210814293916,
"step": 8700
},
{
"epoch": 1.76,
"learning_rate": 1.6841720304271504e-05,
"loss": 1.6092,
"step": 9000
},
{
"epoch": 1.76,
"eval_exact_match": 44.0,
"eval_f1": 66.41312856742334,
"step": 9000
},
{
"epoch": 1.81,
"eval_exact_match": 42.0,
"eval_f1": 65.70423087204887,
"step": 9300
},
{
"epoch": 1.85,
"learning_rate": 1.611029842012873e-05,
"loss": 1.637,
"step": 9500
},
{
"epoch": 1.87,
"eval_exact_match": 43.0,
"eval_f1": 65.2546958366835,
"step": 9600
},
{
"epoch": 1.93,
"eval_exact_match": 42.2,
"eval_f1": 66.10494154148009,
"step": 9900
},
{
"epoch": 1.95,
"learning_rate": 1.538033937975424e-05,
"loss": 1.5978,
"step": 10000
},
{
"epoch": 1.99,
"eval_exact_match": 40.0,
"eval_f1": 63.806088000887634,
"step": 10200
},
{
"epoch": 2.05,
"learning_rate": 1.4650380339379754e-05,
"loss": 1.4462,
"step": 10500
},
{
"epoch": 2.05,
"eval_exact_match": 40.6,
"eval_f1": 65.01325773791683,
"step": 10500
},
{
"epoch": 2.11,
"eval_exact_match": 41.2,
"eval_f1": 64.50214370703992,
"step": 10800
},
{
"epoch": 2.15,
"learning_rate": 1.3918958455236982e-05,
"loss": 1.2958,
"step": 11000
},
{
"epoch": 2.17,
"eval_exact_match": 40.6,
"eval_f1": 65.36716761169852,
"step": 11100
},
{
"epoch": 2.22,
"eval_exact_match": 40.8,
"eval_f1": 64.11070880878887,
"step": 11400
},
{
"epoch": 2.24,
"learning_rate": 1.3187536571094208e-05,
"loss": 1.3027,
"step": 11500
},
{
"epoch": 2.28,
"eval_exact_match": 39.2,
"eval_f1": 63.682573407854235,
"step": 11700
},
{
"epoch": 2.34,
"learning_rate": 1.2456114686951432e-05,
"loss": 1.2731,
"step": 12000
},
{
"epoch": 2.34,
"eval_exact_match": 39.8,
"eval_f1": 63.59941485060206,
"step": 12000
},
{
"epoch": 2.4,
"eval_exact_match": 37.8,
"eval_f1": 62.92318349722304,
"step": 12300
},
{
"epoch": 2.44,
"learning_rate": 1.172469280280866e-05,
"loss": 1.3022,
"step": 12500
},
{
"epoch": 2.46,
"eval_exact_match": 39.4,
"eval_f1": 64.09272643867237,
"step": 12600
},
{
"epoch": 2.52,
"eval_exact_match": 40.6,
"eval_f1": 65.27753571541867,
"step": 12900
},
{
"epoch": 2.54,
"learning_rate": 1.0993270918665886e-05,
"loss": 1.3036,
"step": 13000
},
{
"epoch": 2.57,
"eval_exact_match": 40.4,
"eval_f1": 64.22110332511052,
"step": 13200
},
{
"epoch": 2.63,
"learning_rate": 1.0261849034523113e-05,
"loss": 1.2821,
"step": 13500
},
{
"epoch": 2.63,
"eval_exact_match": 39.8,
"eval_f1": 63.332714021581985,
"step": 13500
},
{
"epoch": 2.69,
"eval_exact_match": 39.6,
"eval_f1": 63.578162829605446,
"step": 13800
},
{
"epoch": 2.73,
"learning_rate": 9.531889994148626e-06,
"loss": 1.2877,
"step": 14000
},
{
"epoch": 2.75,
"eval_exact_match": 40.0,
"eval_f1": 63.35184925186776,
"step": 14100
},
{
"epoch": 2.81,
"eval_exact_match": 40.2,
"eval_f1": 64.16356452794574,
"step": 14400
},
{
"epoch": 2.83,
"learning_rate": 8.800468110005852e-06,
"loss": 1.2603,
"step": 14500
},
{
"epoch": 2.87,
"eval_exact_match": 41.2,
"eval_f1": 64.11806249593292,
"step": 14700
},
{
"epoch": 2.93,
"learning_rate": 8.069046225863078e-06,
"loss": 1.2756,
"step": 15000
},
{
"epoch": 2.93,
"eval_exact_match": 40.4,
"eval_f1": 63.824419143757055,
"step": 15000
},
{
"epoch": 2.98,
"eval_exact_match": 40.6,
"eval_f1": 64.24451284277517,
"step": 15300
},
{
"epoch": 3.02,
"learning_rate": 7.337624341720305e-06,
"loss": 1.2121,
"step": 15500
},
{
"epoch": 3.04,
"eval_exact_match": 40.0,
"eval_f1": 63.53412990760953,
"step": 15600
},
{
"epoch": 3.1,
"eval_exact_match": 39.0,
"eval_f1": 62.80074251846947,
"step": 15900
},
{
"epoch": 3.12,
"learning_rate": 6.606202457577531e-06,
"loss": 1.0572,
"step": 16000
},
{
"epoch": 3.16,
"eval_exact_match": 39.6,
"eval_f1": 63.656775539732564,
"step": 16200
},
{
"epoch": 3.22,
"learning_rate": 5.874780573434757e-06,
"loss": 1.0379,
"step": 16500
},
{
"epoch": 3.22,
"eval_exact_match": 40.2,
"eval_f1": 63.43415180275925,
"step": 16500
},
{
"epoch": 3.28,
"eval_exact_match": 40.6,
"eval_f1": 64.05233541882214,
"step": 16800
},
{
"epoch": 3.32,
"learning_rate": 5.143358689291984e-06,
"loss": 1.0491,
"step": 17000
},
{
"epoch": 3.34,
"eval_exact_match": 38.4,
"eval_f1": 63.21753166648462,
"step": 17100
},
{
"epoch": 3.39,
"eval_exact_match": 38.4,
"eval_f1": 63.12095402408347,
"step": 17400
},
{
"epoch": 3.41,
"learning_rate": 4.4119368051492096e-06,
"loss": 1.0581,
"step": 17500
},
{
"epoch": 3.45,
"eval_exact_match": 40.2,
"eval_f1": 64.48156998952923,
"step": 17700
},
{
"epoch": 3.51,
"learning_rate": 3.681977764774722e-06,
"loss": 1.0731,
"step": 18000
},
{
"epoch": 3.51,
"eval_exact_match": 40.0,
"eval_f1": 64.22656695334973,
"step": 18000
},
{
"epoch": 3.57,
"eval_exact_match": 40.0,
"eval_f1": 63.74403257605126,
"step": 18300
},
{
"epoch": 3.61,
"learning_rate": 2.9505558806319486e-06,
"loss": 1.0486,
"step": 18500
},
{
"epoch": 3.63,
"eval_exact_match": 39.8,
"eval_f1": 63.72890744409074,
"step": 18600
},
{
"epoch": 3.69,
"eval_exact_match": 40.6,
"eval_f1": 64.93728425900885,
"step": 18900
},
{
"epoch": 3.71,
"learning_rate": 2.219133996489175e-06,
"loss": 1.0584,
"step": 19000
},
{
"epoch": 3.74,
"eval_exact_match": 39.8,
"eval_f1": 64.31547047147733,
"step": 19200
},
{
"epoch": 3.8,
"learning_rate": 1.4877121123464014e-06,
"loss": 1.0494,
"step": 19500
},
{
"epoch": 3.8,
"eval_exact_match": 40.6,
"eval_f1": 64.73621892117005,
"step": 19500
},
{
"epoch": 3.86,
"eval_exact_match": 40.2,
"eval_f1": 64.61480926659202,
"step": 19800
},
{
"epoch": 3.9,
"learning_rate": 7.577530719719134e-07,
"loss": 1.058,
"step": 20000
},
{
"epoch": 3.92,
"eval_exact_match": 40.4,
"eval_f1": 64.58939713486411,
"step": 20100
},
{
"epoch": 3.98,
"eval_exact_match": 40.4,
"eval_f1": 64.5227927048114,
"step": 20400
},
{
"epoch": 4.0,
"learning_rate": 2.633118782913985e-08,
"loss": 1.0348,
"step": 20500
},
{
"epoch": 4.0,
"step": 20508,
"total_flos": 5.891975668325875e+16,
"train_loss": 1.551024980565505,
"train_runtime": 35295.3339,
"train_samples_per_second": 9.296,
"train_steps_per_second": 0.581
}
],
"max_steps": 20508,
"num_train_epochs": 4,
"total_flos": 5.891975668325875e+16,
"trial_name": null,
"trial_params": null
}