josecannete's picture
adding model finetuned on QA (TAR)
5d22d0c
raw
history blame
No virus
15.8 kB
{
"best_metric": 66.26645652732091,
"best_model_checkpoint": "/data/jcanete/all_results/tar/albeto_tiny/epochs_4_bs_16_lr_5e-5/checkpoint-10800",
"epoch": 4.0,
"global_step": 21940,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.05,
"eval_exact_match": 25.279091769157993,
"eval_f1": 41.71460503665182,
"step": 300
},
{
"epoch": 0.09,
"learning_rate": 4.8867365542388335e-05,
"loss": 3.1064,
"step": 500
},
{
"epoch": 0.11,
"eval_exact_match": 33.77483443708609,
"eval_f1": 51.462032531677195,
"step": 600
},
{
"epoch": 0.16,
"eval_exact_match": 37.78618732261116,
"eval_f1": 55.147295523401176,
"step": 900
},
{
"epoch": 0.18,
"learning_rate": 4.7727894257064726e-05,
"loss": 2.4193,
"step": 1000
},
{
"epoch": 0.22,
"eval_exact_match": 39.735099337748345,
"eval_f1": 57.70943484117446,
"step": 1200
},
{
"epoch": 0.27,
"learning_rate": 4.6588422971741116e-05,
"loss": 2.2494,
"step": 1500
},
{
"epoch": 0.27,
"eval_exact_match": 40.69063386944182,
"eval_f1": 58.161449729726456,
"step": 1500
},
{
"epoch": 0.33,
"eval_exact_match": 41.72185430463576,
"eval_f1": 59.240651392335586,
"step": 1800
},
{
"epoch": 0.36,
"learning_rate": 4.544895168641751e-05,
"loss": 2.1889,
"step": 2000
},
{
"epoch": 0.38,
"eval_exact_match": 41.627246925260174,
"eval_f1": 60.05133975647362,
"step": 2100
},
{
"epoch": 0.44,
"eval_exact_match": 43.33964049195837,
"eval_f1": 61.334205952553624,
"step": 2400
},
{
"epoch": 0.46,
"learning_rate": 4.431175934366454e-05,
"loss": 2.0954,
"step": 2500
},
{
"epoch": 0.49,
"eval_exact_match": 44.1438032166509,
"eval_f1": 61.6109819191811,
"step": 2700
},
{
"epoch": 0.55,
"learning_rate": 4.317228805834093e-05,
"loss": 2.0585,
"step": 3000
},
{
"epoch": 0.55,
"eval_exact_match": 44.55061494796594,
"eval_f1": 62.74097437748746,
"step": 3000
},
{
"epoch": 0.6,
"eval_exact_match": 45.14664143803217,
"eval_f1": 62.716820973684904,
"step": 3300
},
{
"epoch": 0.64,
"learning_rate": 4.203281677301732e-05,
"loss": 2.0252,
"step": 3500
},
{
"epoch": 0.66,
"eval_exact_match": 45.22232734153264,
"eval_f1": 63.111925308146255,
"step": 3600
},
{
"epoch": 0.71,
"eval_exact_match": 45.13718070009461,
"eval_f1": 63.12474940531198,
"step": 3900
},
{
"epoch": 0.73,
"learning_rate": 4.0893345487693714e-05,
"loss": 1.9861,
"step": 4000
},
{
"epoch": 0.77,
"eval_exact_match": 46.21570482497635,
"eval_f1": 64.00817566022319,
"step": 4200
},
{
"epoch": 0.82,
"learning_rate": 3.975615314494075e-05,
"loss": 1.9881,
"step": 4500
},
{
"epoch": 0.82,
"eval_exact_match": 46.72658467360454,
"eval_f1": 64.45739871415881,
"step": 4500
},
{
"epoch": 0.88,
"eval_exact_match": 46.31031220435194,
"eval_f1": 64.03851598846974,
"step": 4800
},
{
"epoch": 0.91,
"learning_rate": 3.861668185961714e-05,
"loss": 1.9619,
"step": 5000
},
{
"epoch": 0.93,
"eval_exact_match": 46.57521286660359,
"eval_f1": 64.71289725300291,
"step": 5100
},
{
"epoch": 0.98,
"eval_exact_match": 46.80227057710501,
"eval_f1": 64.89896140882486,
"step": 5400
},
{
"epoch": 1.0,
"learning_rate": 3.747721057429353e-05,
"loss": 1.8961,
"step": 5500
},
{
"epoch": 1.04,
"eval_exact_match": 46.868495742667925,
"eval_f1": 64.9566553926742,
"step": 5700
},
{
"epoch": 1.09,
"learning_rate": 3.633773928896992e-05,
"loss": 1.6613,
"step": 6000
},
{
"epoch": 1.09,
"eval_exact_match": 47.05771050141911,
"eval_f1": 65.16060187033897,
"step": 6000
},
{
"epoch": 1.15,
"eval_exact_match": 47.010406811731315,
"eval_f1": 64.63527459887219,
"step": 6300
},
{
"epoch": 1.19,
"learning_rate": 3.520054694621696e-05,
"loss": 1.6724,
"step": 6500
},
{
"epoch": 1.2,
"eval_exact_match": 46.44276253547777,
"eval_f1": 64.6485660215026,
"step": 6600
},
{
"epoch": 1.26,
"eval_exact_match": 47.02932828760643,
"eval_f1": 65.25056698142618,
"step": 6900
},
{
"epoch": 1.28,
"learning_rate": 3.406107566089335e-05,
"loss": 1.6622,
"step": 7000
},
{
"epoch": 1.31,
"eval_exact_match": 47.38883632923368,
"eval_f1": 65.14765281416761,
"step": 7200
},
{
"epoch": 1.37,
"learning_rate": 3.292160437556974e-05,
"loss": 1.6765,
"step": 7500
},
{
"epoch": 1.37,
"eval_exact_match": 47.52128666035951,
"eval_f1": 65.23959133286817,
"step": 7500
},
{
"epoch": 1.42,
"eval_exact_match": 47.24692526017029,
"eval_f1": 64.89498224105805,
"step": 7800
},
{
"epoch": 1.46,
"learning_rate": 3.178213309024613e-05,
"loss": 1.6831,
"step": 8000
},
{
"epoch": 1.48,
"eval_exact_match": 47.42667928098392,
"eval_f1": 65.43419870447086,
"step": 8100
},
{
"epoch": 1.53,
"eval_exact_match": 47.237464522232735,
"eval_f1": 65.29424940770944,
"step": 8400
},
{
"epoch": 1.55,
"learning_rate": 3.0644940747493164e-05,
"loss": 1.6652,
"step": 8500
},
{
"epoch": 1.59,
"eval_exact_match": 47.74834437086093,
"eval_f1": 65.62503101399187,
"step": 8700
},
{
"epoch": 1.64,
"learning_rate": 2.9505469462169554e-05,
"loss": 1.6622,
"step": 9000
},
{
"epoch": 1.64,
"eval_exact_match": 48.04162724692526,
"eval_f1": 66.13531062019833,
"step": 9000
},
{
"epoch": 1.7,
"eval_exact_match": 47.6631977294229,
"eval_f1": 65.69495765526571,
"step": 9300
},
{
"epoch": 1.73,
"learning_rate": 2.836599817684594e-05,
"loss": 1.6282,
"step": 9500
},
{
"epoch": 1.75,
"eval_exact_match": 48.15515610217597,
"eval_f1": 65.85406198831797,
"step": 9600
},
{
"epoch": 1.8,
"eval_exact_match": 48.34437086092715,
"eval_f1": 65.82648855586231,
"step": 9900
},
{
"epoch": 1.82,
"learning_rate": 2.7226526891522335e-05,
"loss": 1.6311,
"step": 10000
},
{
"epoch": 1.86,
"eval_exact_match": 48.17407757805109,
"eval_f1": 65.95821129767383,
"step": 10200
},
{
"epoch": 1.91,
"learning_rate": 2.6089334548769374e-05,
"loss": 1.6879,
"step": 10500
},
{
"epoch": 1.91,
"eval_exact_match": 48.070009460737936,
"eval_f1": 65.61674709032437,
"step": 10500
},
{
"epoch": 1.97,
"eval_exact_match": 48.33491012298959,
"eval_f1": 66.26645652732091,
"step": 10800
},
{
"epoch": 2.01,
"learning_rate": 2.4949863263445765e-05,
"loss": 1.6127,
"step": 11000
},
{
"epoch": 2.02,
"eval_exact_match": 47.776726584673604,
"eval_f1": 65.67716714026636,
"step": 11100
},
{
"epoch": 2.08,
"eval_exact_match": 46.97256385998108,
"eval_f1": 65.35488377881,
"step": 11400
},
{
"epoch": 2.1,
"learning_rate": 2.3810391978122152e-05,
"loss": 1.3442,
"step": 11500
},
{
"epoch": 2.13,
"eval_exact_match": 47.25638599810785,
"eval_f1": 65.11588968827414,
"step": 11700
},
{
"epoch": 2.19,
"learning_rate": 2.2670920692798542e-05,
"loss": 1.3419,
"step": 12000
},
{
"epoch": 2.19,
"eval_exact_match": 47.918637653736994,
"eval_f1": 65.76384490998669,
"step": 12000
},
{
"epoch": 2.24,
"eval_exact_match": 47.71996215704825,
"eval_f1": 65.51523648050365,
"step": 12300
},
{
"epoch": 2.28,
"learning_rate": 2.1533728350045578e-05,
"loss": 1.3392,
"step": 12500
},
{
"epoch": 2.3,
"eval_exact_match": 47.81456953642384,
"eval_f1": 65.72328974412012,
"step": 12600
},
{
"epoch": 2.35,
"eval_exact_match": 46.868495742667925,
"eval_f1": 64.95043374963731,
"step": 12900
},
{
"epoch": 2.37,
"learning_rate": 2.0396536007292617e-05,
"loss": 1.3365,
"step": 13000
},
{
"epoch": 2.41,
"eval_exact_match": 47.47398297067171,
"eval_f1": 65.46687084562042,
"step": 13200
},
{
"epoch": 2.46,
"learning_rate": 1.9257064721969007e-05,
"loss": 1.3863,
"step": 13500
},
{
"epoch": 2.46,
"eval_exact_match": 47.64427625354778,
"eval_f1": 65.54344572464974,
"step": 13500
},
{
"epoch": 2.52,
"eval_exact_match": 47.70104068117313,
"eval_f1": 65.29014560945414,
"step": 13800
},
{
"epoch": 2.55,
"learning_rate": 1.8117593436645398e-05,
"loss": 1.364,
"step": 14000
},
{
"epoch": 2.57,
"eval_exact_match": 47.918637653736994,
"eval_f1": 65.66170183373373,
"step": 14100
},
{
"epoch": 2.63,
"eval_exact_match": 48.33491012298959,
"eval_f1": 65.94289128043252,
"step": 14400
},
{
"epoch": 2.64,
"learning_rate": 1.697812215132179e-05,
"loss": 1.3864,
"step": 14500
},
{
"epoch": 2.68,
"eval_exact_match": 48.24976348155156,
"eval_f1": 65.78253411845539,
"step": 14700
},
{
"epoch": 2.73,
"learning_rate": 1.583865086599818e-05,
"loss": 1.3636,
"step": 15000
},
{
"epoch": 2.73,
"eval_exact_match": 48.438978240302745,
"eval_f1": 66.07660779936401,
"step": 15000
},
{
"epoch": 2.79,
"eval_exact_match": 48.16461684011353,
"eval_f1": 66.2498588216324,
"step": 15300
},
{
"epoch": 2.83,
"learning_rate": 1.469917958067457e-05,
"loss": 1.3726,
"step": 15500
},
{
"epoch": 2.84,
"eval_exact_match": 47.95648060548723,
"eval_f1": 65.7260651497257,
"step": 15600
},
{
"epoch": 2.9,
"eval_exact_match": 47.88079470198676,
"eval_f1": 66.01379305937202,
"step": 15900
},
{
"epoch": 2.92,
"learning_rate": 1.355970829535096e-05,
"loss": 1.379,
"step": 16000
},
{
"epoch": 2.95,
"eval_exact_match": 48.240302743614,
"eval_f1": 65.53479147043561,
"step": 16200
},
{
"epoch": 3.01,
"learning_rate": 1.2422515952597995e-05,
"loss": 1.3401,
"step": 16500
},
{
"epoch": 3.01,
"eval_exact_match": 48.221381267738884,
"eval_f1": 65.86023587215384,
"step": 16500
},
{
"epoch": 3.06,
"eval_exact_match": 47.615894039735096,
"eval_f1": 65.12853949917962,
"step": 16800
},
{
"epoch": 3.1,
"learning_rate": 1.1283044667274386e-05,
"loss": 1.1281,
"step": 17000
},
{
"epoch": 3.12,
"eval_exact_match": 47.12393566698202,
"eval_f1": 65.15991467915553,
"step": 17100
},
{
"epoch": 3.17,
"eval_exact_match": 47.21854304635762,
"eval_f1": 65.02752189426162,
"step": 17400
},
{
"epoch": 3.19,
"learning_rate": 1.0143573381950776e-05,
"loss": 1.1161,
"step": 17500
},
{
"epoch": 3.23,
"eval_exact_match": 47.13339640491959,
"eval_f1": 65.2429493800908,
"step": 17700
},
{
"epoch": 3.28,
"learning_rate": 9.004102096627165e-06,
"loss": 1.1337,
"step": 18000
},
{
"epoch": 3.28,
"eval_exact_match": 47.010406811731315,
"eval_f1": 65.03577782296541,
"step": 18000
},
{
"epoch": 3.34,
"eval_exact_match": 47.379375591296125,
"eval_f1": 65.13654689416043,
"step": 18300
},
{
"epoch": 3.37,
"learning_rate": 7.864630811303556e-06,
"loss": 1.1477,
"step": 18500
},
{
"epoch": 3.39,
"eval_exact_match": 47.35099337748344,
"eval_f1": 64.86398318568581,
"step": 18600
},
{
"epoch": 3.45,
"eval_exact_match": 47.30368968779565,
"eval_f1": 65.05463221900013,
"step": 18900
},
{
"epoch": 3.46,
"learning_rate": 6.725159525979946e-06,
"loss": 1.1414,
"step": 19000
},
{
"epoch": 3.5,
"eval_exact_match": 46.97256385998108,
"eval_f1": 65.11680930447929,
"step": 19200
},
{
"epoch": 3.56,
"learning_rate": 5.587967183226983e-06,
"loss": 1.1454,
"step": 19500
},
{
"epoch": 3.56,
"eval_exact_match": 47.13339640491959,
"eval_f1": 65.165125839751,
"step": 19500
},
{
"epoch": 3.61,
"eval_exact_match": 47.20908230842006,
"eval_f1": 65.09005212297234,
"step": 19800
},
{
"epoch": 3.65,
"learning_rate": 4.448495897903373e-06,
"loss": 1.1318,
"step": 20000
},
{
"epoch": 3.66,
"eval_exact_match": 46.95364238410596,
"eval_f1": 65.02854138983005,
"step": 20100
},
{
"epoch": 3.72,
"eval_exact_match": 46.773888363292336,
"eval_f1": 64.98586781544525,
"step": 20400
},
{
"epoch": 3.74,
"learning_rate": 3.3090246125797635e-06,
"loss": 1.124,
"step": 20500
},
{
"epoch": 3.77,
"eval_exact_match": 46.76442762535478,
"eval_f1": 65.02603195736303,
"step": 20700
},
{
"epoch": 3.83,
"learning_rate": 2.169553327256153e-06,
"loss": 1.1301,
"step": 21000
},
{
"epoch": 3.83,
"eval_exact_match": 46.88741721854305,
"eval_f1": 65.03187807653661,
"step": 21000
},
{
"epoch": 3.88,
"eval_exact_match": 47.086092715231786,
"eval_f1": 65.13748531716237,
"step": 21300
},
{
"epoch": 3.92,
"learning_rate": 1.0323609845031905e-06,
"loss": 1.1298,
"step": 21500
},
{
"epoch": 3.94,
"eval_exact_match": 47.095553453169344,
"eval_f1": 65.03629129514118,
"step": 21600
},
{
"epoch": 3.99,
"eval_exact_match": 47.095553453169344,
"eval_f1": 65.08861259333764,
"step": 21900
},
{
"epoch": 4.0,
"step": 21940,
"total_flos": 859515753161088.0,
"train_loss": 1.5832800059288115,
"train_runtime": 4642.7737,
"train_samples_per_second": 75.599,
"train_steps_per_second": 4.726
}
],
"max_steps": 21940,
"num_train_epochs": 4,
"total_flos": 859515753161088.0,
"trial_name": null,
"trial_params": null
}