InfoCSE-bert-base-rtd / trainer_state.json
Ma787639046's picture
init
96d71cc
raw
history blame
65.1 kB
{
"best_metric": 0.8583291200488784,
"best_model_checkpoint": "condenser-bert-base-uncased_mix_0.00001_cls_output_dir",
"epoch": 2.0,
"global_step": 31252,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"electra_acc": 97.8496,
"electra_fix_acc": 0.9447,
"electra_rep_acc": 0.0242,
"epoch": 0.0,
"learning_rate": 6.999776014335083e-06,
"loss": 0.0046,
"neg_sim": -0.0126,
"pos_sim": 0.6661,
"step": 1
},
{
"epoch": 0.01,
"eval_avg_sts": 0.6467281681849183,
"eval_sickr_spearman": 0.6493190116334423,
"eval_stsb_spearman": 0.6441373247363943,
"step": 125
},
{
"epoch": 0.02,
"eval_avg_sts": 0.6766102197245321,
"eval_sickr_spearman": 0.6747404807226937,
"eval_stsb_spearman": 0.6784799587263706,
"step": 250
},
{
"epoch": 0.02,
"eval_avg_sts": 0.6927318121134369,
"eval_sickr_spearman": 0.6849395969264682,
"eval_stsb_spearman": 0.7005240273004057,
"step": 375
},
{
"electra_acc": 111.3766,
"electra_fix_acc": 0.9699,
"electra_rep_acc": 0.4695,
"epoch": 0.03,
"learning_rate": 6.888007167541277e-06,
"loss": 0.0027,
"neg_sim": -0.0136,
"pos_sim": 0.7499,
"step": 500
},
{
"epoch": 0.03,
"eval_avg_sts": 0.70406785938114,
"eval_sickr_spearman": 0.6912377231336422,
"eval_stsb_spearman": 0.716897995628638,
"step": 500
},
{
"epoch": 0.04,
"eval_avg_sts": 0.7079843162896757,
"eval_sickr_spearman": 0.6933601214439895,
"eval_stsb_spearman": 0.7226085111353618,
"step": 625
},
{
"epoch": 0.05,
"eval_avg_sts": 0.721992790338854,
"eval_sickr_spearman": 0.7041983394834997,
"eval_stsb_spearman": 0.7397872411942082,
"step": 750
},
{
"epoch": 0.06,
"eval_avg_sts": 0.7371963659388359,
"eval_sickr_spearman": 0.7161540010979675,
"eval_stsb_spearman": 0.7582387307797042,
"step": 875
},
{
"electra_acc": 115.0045,
"electra_fix_acc": 0.9686,
"electra_rep_acc": 0.6116,
"epoch": 0.06,
"learning_rate": 6.776014335082554e-06,
"loss": 0.002,
"neg_sim": -0.0139,
"pos_sim": 0.7675,
"step": 1000
},
{
"epoch": 0.06,
"eval_avg_sts": 0.7419491613050363,
"eval_sickr_spearman": 0.7199929829426116,
"eval_stsb_spearman": 0.763905339667461,
"step": 1000
},
{
"epoch": 0.07,
"eval_avg_sts": 0.7503468807257706,
"eval_sickr_spearman": 0.7253389886259777,
"eval_stsb_spearman": 0.7753547728255635,
"step": 1125
},
{
"epoch": 0.08,
"eval_avg_sts": 0.755381635152361,
"eval_sickr_spearman": 0.7283938147085431,
"eval_stsb_spearman": 0.782369455596179,
"step": 1250
},
{
"epoch": 0.09,
"eval_avg_sts": 0.7557951064861299,
"eval_sickr_spearman": 0.7286210018183454,
"eval_stsb_spearman": 0.7829692111539145,
"step": 1375
},
{
"electra_acc": 115.5115,
"electra_fix_acc": 0.9698,
"electra_rep_acc": 0.6302,
"epoch": 0.1,
"learning_rate": 6.6640215026238325e-06,
"loss": 0.0017,
"neg_sim": -0.014,
"pos_sim": 0.7738,
"step": 1500
},
{
"epoch": 0.1,
"eval_avg_sts": 0.7624381952103951,
"eval_sickr_spearman": 0.7329619086730178,
"eval_stsb_spearman": 0.7919144817477726,
"step": 1500
},
{
"epoch": 0.1,
"eval_avg_sts": 0.765407531009575,
"eval_sickr_spearman": 0.7350173996590618,
"eval_stsb_spearman": 0.7957976623600883,
"step": 1625
},
{
"epoch": 0.11,
"eval_avg_sts": 0.7663640946087169,
"eval_sickr_spearman": 0.7356288836114704,
"eval_stsb_spearman": 0.7970993056059634,
"step": 1750
},
{
"epoch": 0.12,
"eval_avg_sts": 0.7654280480014255,
"eval_sickr_spearman": 0.7361625571795493,
"eval_stsb_spearman": 0.7946935388233016,
"step": 1875
},
{
"electra_acc": 115.9922,
"electra_fix_acc": 0.971,
"electra_rep_acc": 0.641,
"epoch": 0.13,
"learning_rate": 6.55202867016511e-06,
"loss": 0.0016,
"neg_sim": -0.014,
"pos_sim": 0.7751,
"step": 2000
},
{
"epoch": 0.13,
"eval_avg_sts": 0.7672864050175499,
"eval_sickr_spearman": 0.7369789418106929,
"eval_stsb_spearman": 0.7975938682244069,
"step": 2000
},
{
"epoch": 0.14,
"eval_avg_sts": 0.7747771973615925,
"eval_sickr_spearman": 0.7388607043028352,
"eval_stsb_spearman": 0.8106936904203497,
"step": 2125
},
{
"epoch": 0.14,
"eval_avg_sts": 0.7774171758966744,
"eval_sickr_spearman": 0.7346681655205053,
"eval_stsb_spearman": 0.8201661862728435,
"step": 2250
},
{
"epoch": 0.15,
"eval_avg_sts": 0.7804372234935246,
"eval_sickr_spearman": 0.7373037281186174,
"eval_stsb_spearman": 0.8235707188684319,
"step": 2375
},
{
"electra_acc": 116.1787,
"electra_fix_acc": 0.971,
"electra_rep_acc": 0.6492,
"epoch": 0.16,
"learning_rate": 6.440035837706387e-06,
"loss": 0.0018,
"neg_sim": -0.0139,
"pos_sim": 0.7692,
"step": 2500
},
{
"epoch": 0.16,
"eval_avg_sts": 0.7810767988630156,
"eval_sickr_spearman": 0.7360565525386754,
"eval_stsb_spearman": 0.8260970451873557,
"step": 2500
},
{
"epoch": 0.17,
"eval_avg_sts": 0.7810534850927242,
"eval_sickr_spearman": 0.737071930023077,
"eval_stsb_spearman": 0.8250350401623715,
"step": 2625
},
{
"epoch": 0.18,
"eval_avg_sts": 0.7840637132723827,
"eval_sickr_spearman": 0.7369239942306477,
"eval_stsb_spearman": 0.8312034323141176,
"step": 2750
},
{
"epoch": 0.18,
"eval_avg_sts": 0.7807650924553761,
"eval_sickr_spearman": 0.736852331827302,
"eval_stsb_spearman": 0.8246778530834501,
"step": 2875
},
{
"electra_acc": 116.4021,
"electra_fix_acc": 0.9711,
"electra_rep_acc": 0.6575,
"epoch": 0.19,
"learning_rate": 6.328043005247664e-06,
"loss": 0.0017,
"neg_sim": -0.014,
"pos_sim": 0.778,
"step": 3000
},
{
"epoch": 0.19,
"eval_avg_sts": 0.7812090218351604,
"eval_sickr_spearman": 0.7374819235049528,
"eval_stsb_spearman": 0.824936120165368,
"step": 3000
},
{
"epoch": 0.2,
"eval_avg_sts": 0.7792184022991786,
"eval_sickr_spearman": 0.735666443932795,
"eval_stsb_spearman": 0.8227703606655621,
"step": 3125
},
{
"epoch": 0.21,
"eval_avg_sts": 0.7868425974055346,
"eval_sickr_spearman": 0.7406432345394065,
"eval_stsb_spearman": 0.8330419602716627,
"step": 3250
},
{
"epoch": 0.22,
"eval_avg_sts": 0.7879152712258332,
"eval_sickr_spearman": 0.74201269730361,
"eval_stsb_spearman": 0.8338178451480563,
"step": 3375
},
{
"electra_acc": 116.4857,
"electra_fix_acc": 0.9706,
"electra_rep_acc": 0.6652,
"epoch": 0.22,
"learning_rate": 6.2160501727889415e-06,
"loss": 0.0016,
"neg_sim": -0.014,
"pos_sim": 0.7831,
"step": 3500
},
{
"epoch": 0.22,
"eval_avg_sts": 0.7874505134915601,
"eval_sickr_spearman": 0.7415818102926086,
"eval_stsb_spearman": 0.8333192166905118,
"step": 3500
},
{
"epoch": 0.23,
"eval_avg_sts": 0.7885881180801638,
"eval_sickr_spearman": 0.745498890708191,
"eval_stsb_spearman": 0.8316773454521367,
"step": 3625
},
{
"epoch": 0.24,
"eval_avg_sts": 0.786897125097908,
"eval_sickr_spearman": 0.7427211560498221,
"eval_stsb_spearman": 0.8310730941459938,
"step": 3750
},
{
"epoch": 0.25,
"eval_avg_sts": 0.7884858023873396,
"eval_sickr_spearman": 0.7450858232358233,
"eval_stsb_spearman": 0.831885781538856,
"step": 3875
},
{
"electra_acc": 116.6361,
"electra_fix_acc": 0.9706,
"electra_rep_acc": 0.6699,
"epoch": 0.26,
"learning_rate": 6.104057340330219e-06,
"loss": 0.0016,
"neg_sim": -0.014,
"pos_sim": 0.7846,
"step": 4000
},
{
"epoch": 0.26,
"eval_avg_sts": 0.7883005639004375,
"eval_sickr_spearman": 0.7467304081490642,
"eval_stsb_spearman": 0.8298707196518108,
"step": 4000
},
{
"epoch": 0.26,
"eval_avg_sts": 0.7890018606938185,
"eval_sickr_spearman": 0.7466418868291138,
"eval_stsb_spearman": 0.8313618345585232,
"step": 4125
},
{
"epoch": 0.27,
"eval_avg_sts": 0.794939161048242,
"eval_sickr_spearman": 0.7542289276433795,
"eval_stsb_spearman": 0.8356493944531046,
"step": 4250
},
{
"epoch": 0.28,
"eval_avg_sts": 0.7950124813232355,
"eval_sickr_spearman": 0.7546040025145097,
"eval_stsb_spearman": 0.8354209601319615,
"step": 4375
},
{
"electra_acc": 116.6274,
"electra_fix_acc": 0.9702,
"electra_rep_acc": 0.6733,
"epoch": 0.29,
"learning_rate": 5.992064507871497e-06,
"loss": 0.0016,
"neg_sim": -0.0141,
"pos_sim": 0.7871,
"step": 4500
},
{
"epoch": 0.29,
"eval_avg_sts": 0.7938800981934206,
"eval_sickr_spearman": 0.7533463081233527,
"eval_stsb_spearman": 0.8344138882634886,
"step": 4500
},
{
"epoch": 0.3,
"eval_avg_sts": 0.794615358586916,
"eval_sickr_spearman": 0.7546161063520722,
"eval_stsb_spearman": 0.8346146108217597,
"step": 4625
},
{
"epoch": 0.3,
"eval_avg_sts": 0.7945912313688637,
"eval_sickr_spearman": 0.7546232629861865,
"eval_stsb_spearman": 0.8345591997515407,
"step": 4750
},
{
"epoch": 0.31,
"eval_avg_sts": 0.7926504221766928,
"eval_sickr_spearman": 0.7513844777851131,
"eval_stsb_spearman": 0.8339163665682726,
"step": 4875
},
{
"electra_acc": 116.73,
"electra_fix_acc": 0.9701,
"electra_rep_acc": 0.6783,
"epoch": 0.32,
"learning_rate": 5.880071675412773e-06,
"loss": 0.0015,
"neg_sim": -0.0141,
"pos_sim": 0.7904,
"step": 5000
},
{
"epoch": 0.32,
"eval_avg_sts": 0.7934518878705803,
"eval_sickr_spearman": 0.7517016271479089,
"eval_stsb_spearman": 0.8352021485932517,
"step": 5000
},
{
"epoch": 0.33,
"eval_avg_sts": 0.7947730330766363,
"eval_sickr_spearman": 0.7522364054313209,
"eval_stsb_spearman": 0.8373096607219516,
"step": 5125
},
{
"epoch": 0.34,
"eval_avg_sts": 0.7957221976017312,
"eval_sickr_spearman": 0.7467118201128077,
"eval_stsb_spearman": 0.8447325750906549,
"step": 5250
},
{
"epoch": 0.34,
"eval_avg_sts": 0.7897362921108364,
"eval_sickr_spearman": 0.7382444172402828,
"eval_stsb_spearman": 0.8412281669813901,
"step": 5375
},
{
"electra_acc": 116.8512,
"electra_fix_acc": 0.9704,
"electra_rep_acc": 0.6807,
"epoch": 0.35,
"learning_rate": 5.7680788429540505e-06,
"loss": 0.0016,
"neg_sim": -0.014,
"pos_sim": 0.7834,
"step": 5500
},
{
"epoch": 0.35,
"eval_avg_sts": 0.791664811568233,
"eval_sickr_spearman": 0.7412088968210431,
"eval_stsb_spearman": 0.8421207263154228,
"step": 5500
},
{
"epoch": 0.36,
"eval_avg_sts": 0.791979531720516,
"eval_sickr_spearman": 0.7422650046794644,
"eval_stsb_spearman": 0.8416940587615678,
"step": 5625
},
{
"epoch": 0.37,
"eval_avg_sts": 0.7909414570864208,
"eval_sickr_spearman": 0.7407966939085013,
"eval_stsb_spearman": 0.8410862202643404,
"step": 5750
},
{
"epoch": 0.38,
"eval_avg_sts": 0.7916033250892136,
"eval_sickr_spearman": 0.7420070776647416,
"eval_stsb_spearman": 0.8411995725136856,
"step": 5875
},
{
"electra_acc": 116.9386,
"electra_fix_acc": 0.9702,
"electra_rep_acc": 0.6839,
"epoch": 0.38,
"learning_rate": 5.656086010495328e-06,
"loss": 0.0014,
"neg_sim": -0.014,
"pos_sim": 0.7867,
"step": 6000
},
{
"epoch": 0.38,
"eval_avg_sts": 0.7939838516948429,
"eval_sickr_spearman": 0.7457015819562598,
"eval_stsb_spearman": 0.8422661214334259,
"step": 6000
},
{
"epoch": 0.39,
"eval_avg_sts": 0.7897949803868374,
"eval_sickr_spearman": 0.7381373559151772,
"eval_stsb_spearman": 0.8414526048584976,
"step": 6125
},
{
"epoch": 0.4,
"eval_avg_sts": 0.792929873578434,
"eval_sickr_spearman": 0.7426651998166468,
"eval_stsb_spearman": 0.8431945473402213,
"step": 6250
},
{
"epoch": 0.41,
"eval_avg_sts": 0.7941689576028486,
"eval_sickr_spearman": 0.7447726604544468,
"eval_stsb_spearman": 0.8435652547512503,
"step": 6375
},
{
"electra_acc": 116.9876,
"electra_fix_acc": 0.9702,
"electra_rep_acc": 0.6869,
"epoch": 0.42,
"learning_rate": 5.544093178036605e-06,
"loss": 0.0014,
"neg_sim": -0.0141,
"pos_sim": 0.7923,
"step": 6500
},
{
"epoch": 0.42,
"eval_avg_sts": 0.7943810788058159,
"eval_sickr_spearman": 0.7449998475642491,
"eval_stsb_spearman": 0.8437623100473826,
"step": 6500
},
{
"epoch": 0.42,
"eval_avg_sts": 0.7943037355857338,
"eval_sickr_spearman": 0.7452223756572118,
"eval_stsb_spearman": 0.8433850955142557,
"step": 6625
},
{
"epoch": 0.43,
"eval_avg_sts": 0.7941830032512052,
"eval_sickr_spearman": 0.7457148385402568,
"eval_stsb_spearman": 0.8426511679621534,
"step": 6750
},
{
"epoch": 0.44,
"eval_avg_sts": 0.7945109790018022,
"eval_sickr_spearman": 0.7458948591084468,
"eval_stsb_spearman": 0.8431270988951577,
"step": 6875
},
{
"electra_acc": 117.061,
"electra_fix_acc": 0.9704,
"electra_rep_acc": 0.6883,
"epoch": 0.45,
"learning_rate": 5.432100345577883e-06,
"loss": 0.0014,
"neg_sim": -0.0142,
"pos_sim": 0.8,
"step": 7000
},
{
"epoch": 0.45,
"eval_avg_sts": 0.7988457805161364,
"eval_sickr_spearman": 0.7515535952932767,
"eval_stsb_spearman": 0.846137965738996,
"step": 7000
},
{
"epoch": 0.46,
"eval_avg_sts": 0.7981158834848314,
"eval_sickr_spearman": 0.75075815222236,
"eval_stsb_spearman": 0.845473614747303,
"step": 7125
},
{
"epoch": 0.46,
"eval_avg_sts": 0.7984443557563039,
"eval_sickr_spearman": 0.7501957080245197,
"eval_stsb_spearman": 0.8466930034880882,
"step": 7250
},
{
"epoch": 0.47,
"eval_avg_sts": 0.7981127107574888,
"eval_sickr_spearman": 0.750318523550897,
"eval_stsb_spearman": 0.8459068979640807,
"step": 7375
},
{
"electra_acc": 117.069,
"electra_fix_acc": 0.9701,
"electra_rep_acc": 0.6888,
"epoch": 0.48,
"learning_rate": 5.32010751311916e-06,
"loss": 0.0015,
"neg_sim": -0.0142,
"pos_sim": 0.8001,
"step": 7500
},
{
"epoch": 0.48,
"eval_avg_sts": 0.7955868437200273,
"eval_sickr_spearman": 0.7487511246176671,
"eval_stsb_spearman": 0.8424225628223875,
"step": 7500
},
{
"epoch": 0.49,
"eval_avg_sts": 0.7949766204622221,
"eval_sickr_spearman": 0.7481150007102207,
"eval_stsb_spearman": 0.8418382402142234,
"step": 7625
},
{
"epoch": 0.5,
"eval_avg_sts": 0.7950165968598919,
"eval_sickr_spearman": 0.7485309980797762,
"eval_stsb_spearman": 0.8415021956400077,
"step": 7750
},
{
"epoch": 0.5,
"eval_avg_sts": 0.7952250496118516,
"eval_sickr_spearman": 0.7486211044260741,
"eval_stsb_spearman": 0.8418289947976292,
"step": 7875
},
{
"electra_acc": 117.1619,
"electra_fix_acc": 0.9703,
"electra_rep_acc": 0.6937,
"epoch": 0.51,
"learning_rate": 5.2081146806604376e-06,
"loss": 0.0014,
"neg_sim": -0.0142,
"pos_sim": 0.7987,
"step": 8000
},
{
"epoch": 0.51,
"eval_avg_sts": 0.7952021451003002,
"eval_sickr_spearman": 0.7491326836874914,
"eval_stsb_spearman": 0.8412716065131091,
"step": 8000
},
{
"epoch": 0.52,
"eval_avg_sts": 0.7951490971039081,
"eval_sickr_spearman": 0.749455164502547,
"eval_stsb_spearman": 0.8408430297052691,
"step": 8125
},
{
"epoch": 0.53,
"eval_avg_sts": 0.7950116620178889,
"eval_sickr_spearman": 0.749741093649408,
"eval_stsb_spearman": 0.8402822303863696,
"step": 8250
},
{
"epoch": 0.54,
"eval_avg_sts": 0.7951408099684255,
"eval_sickr_spearman": 0.7501430179062422,
"eval_stsb_spearman": 0.8401386020306089,
"step": 8375
},
{
"electra_acc": 117.2641,
"electra_fix_acc": 0.9702,
"electra_rep_acc": 0.6954,
"epoch": 0.54,
"learning_rate": 5.096121848201715e-06,
"loss": 0.0014,
"neg_sim": -0.0142,
"pos_sim": 0.806,
"step": 8500
},
{
"epoch": 0.54,
"eval_avg_sts": 0.7949691120514141,
"eval_sickr_spearman": 0.7501533445930513,
"eval_stsb_spearman": 0.8397848795097769,
"step": 8500
},
{
"epoch": 0.55,
"eval_avg_sts": 0.7947062336358759,
"eval_sickr_spearman": 0.7503012323543793,
"eval_stsb_spearman": 0.8391112349173724,
"step": 8625
},
{
"epoch": 0.56,
"eval_avg_sts": 0.7944181183521635,
"eval_sickr_spearman": 0.7493049712483499,
"eval_stsb_spearman": 0.8395312654559769,
"step": 8750
},
{
"epoch": 0.57,
"eval_avg_sts": 0.7950044953143297,
"eval_sickr_spearman": 0.7496116017999308,
"eval_stsb_spearman": 0.8403973888287286,
"step": 8875
},
{
"electra_acc": 117.2706,
"electra_fix_acc": 0.9705,
"electra_rep_acc": 0.6956,
"epoch": 0.58,
"learning_rate": 4.984129015742992e-06,
"loss": 0.0014,
"neg_sim": -0.0143,
"pos_sim": 0.8093,
"step": 9000
},
{
"epoch": 0.58,
"eval_avg_sts": 0.7950848432290827,
"eval_sickr_spearman": 0.7493472386176154,
"eval_stsb_spearman": 0.8408224478405498,
"step": 9000
},
{
"epoch": 0.58,
"eval_avg_sts": 0.794847808534162,
"eval_sickr_spearman": 0.7489566016696193,
"eval_stsb_spearman": 0.8407390153987048,
"step": 9125
},
{
"epoch": 0.59,
"eval_avg_sts": 0.795644348129942,
"eval_sickr_spearman": 0.7493907547955183,
"eval_stsb_spearman": 0.8418979414643657,
"step": 9250
},
{
"epoch": 0.6,
"eval_avg_sts": 0.7966154624139274,
"eval_sickr_spearman": 0.7486175020934662,
"eval_stsb_spearman": 0.8446134227343888,
"step": 9375
},
{
"electra_acc": 117.2471,
"electra_fix_acc": 0.9703,
"electra_rep_acc": 0.6966,
"epoch": 0.61,
"learning_rate": 4.872136183284269e-06,
"loss": 0.0015,
"neg_sim": -0.0143,
"pos_sim": 0.8085,
"step": 9500
},
{
"epoch": 0.61,
"eval_avg_sts": 0.7962741237962319,
"eval_sickr_spearman": 0.7489400789707246,
"eval_stsb_spearman": 0.8436081686217393,
"step": 9500
},
{
"epoch": 0.62,
"eval_avg_sts": 0.7966016709380292,
"eval_sickr_spearman": 0.7497793264061527,
"eval_stsb_spearman": 0.8434240154699058,
"step": 9625
},
{
"epoch": 0.62,
"eval_avg_sts": 0.7969314966558516,
"eval_sickr_spearman": 0.750271933382502,
"eval_stsb_spearman": 0.8435910599292011,
"step": 9750
},
{
"epoch": 0.63,
"eval_avg_sts": 0.7965405773738363,
"eval_sickr_spearman": 0.7509994124448837,
"eval_stsb_spearman": 0.842081742302789,
"step": 9875
},
{
"electra_acc": 117.3599,
"electra_fix_acc": 0.9704,
"electra_rep_acc": 0.6998,
"epoch": 0.64,
"learning_rate": 4.760143350825547e-06,
"loss": 0.0014,
"neg_sim": -0.0143,
"pos_sim": 0.8077,
"step": 10000
},
{
"epoch": 0.64,
"eval_avg_sts": 0.7964394231656002,
"eval_sickr_spearman": 0.7509571931067197,
"eval_stsb_spearman": 0.8419216532244808,
"step": 10000
},
{
"epoch": 0.65,
"eval_avg_sts": 0.7967246593169031,
"eval_sickr_spearman": 0.7512494143278691,
"eval_stsb_spearman": 0.842199904305937,
"step": 10125
},
{
"epoch": 0.66,
"eval_avg_sts": 0.798107636355013,
"eval_sickr_spearman": 0.750279041985515,
"eval_stsb_spearman": 0.845936230724511,
"step": 10250
},
{
"epoch": 0.66,
"eval_avg_sts": 0.7980710535062169,
"eval_sickr_spearman": 0.7505661719099119,
"eval_stsb_spearman": 0.8455759351025218,
"step": 10375
},
{
"electra_acc": 117.3933,
"electra_fix_acc": 0.9703,
"electra_rep_acc": 0.7006,
"epoch": 0.67,
"learning_rate": 4.648150518366825e-06,
"loss": 0.0014,
"neg_sim": -0.0143,
"pos_sim": 0.8081,
"step": 10500
},
{
"epoch": 0.67,
"eval_avg_sts": 0.7985924819917696,
"eval_sickr_spearman": 0.7519674312632674,
"eval_stsb_spearman": 0.8452175327202718,
"step": 10500
},
{
"epoch": 0.68,
"eval_avg_sts": 0.7991692973398108,
"eval_sickr_spearman": 0.7515644683598212,
"eval_stsb_spearman": 0.8467741263198004,
"step": 10625
},
{
"epoch": 0.69,
"eval_avg_sts": 0.7987755187970038,
"eval_sickr_spearman": 0.751040334943309,
"eval_stsb_spearman": 0.8465107026506987,
"step": 10750
},
{
"epoch": 0.7,
"eval_avg_sts": 0.7986197258898549,
"eval_sickr_spearman": 0.7514156980010478,
"eval_stsb_spearman": 0.8458237537786619,
"step": 10875
},
{
"electra_acc": 117.4302,
"electra_fix_acc": 0.9705,
"electra_rep_acc": 0.7012,
"epoch": 0.7,
"learning_rate": 4.536157685908102e-06,
"loss": 0.0013,
"neg_sim": -0.0143,
"pos_sim": 0.8102,
"step": 11000
},
{
"epoch": 0.7,
"eval_avg_sts": 0.7996251236145968,
"eval_sickr_spearman": 0.7529341052108109,
"eval_stsb_spearman": 0.8463161420183827,
"step": 11000
},
{
"epoch": 0.71,
"eval_avg_sts": 0.7994727880295167,
"eval_sickr_spearman": 0.752963500244891,
"eval_stsb_spearman": 0.8459820758141423,
"step": 11125
},
{
"epoch": 0.72,
"eval_avg_sts": 0.7988993639342742,
"eval_sickr_spearman": 0.7527381383169434,
"eval_stsb_spearman": 0.845060589551605,
"step": 11250
},
{
"epoch": 0.73,
"eval_avg_sts": 0.7949700279790515,
"eval_sickr_spearman": 0.7504287549286974,
"eval_stsb_spearman": 0.8395113010294055,
"step": 11375
},
{
"electra_acc": 117.3738,
"electra_fix_acc": 0.9702,
"electra_rep_acc": 0.6995,
"epoch": 0.74,
"learning_rate": 4.424164853449379e-06,
"loss": 0.0014,
"neg_sim": -0.0143,
"pos_sim": 0.8157,
"step": 11500
},
{
"epoch": 0.74,
"eval_avg_sts": 0.7945615778286725,
"eval_sickr_spearman": 0.7499855239246266,
"eval_stsb_spearman": 0.8391376317327186,
"step": 11500
},
{
"epoch": 0.74,
"eval_avg_sts": 0.7934292933863014,
"eval_sickr_spearman": 0.7480520799673368,
"eval_stsb_spearman": 0.838806506805266,
"step": 11625
},
{
"epoch": 0.75,
"eval_avg_sts": 0.7937723514656398,
"eval_sickr_spearman": 0.7481406973494901,
"eval_stsb_spearman": 0.8394040055817897,
"step": 11750
},
{
"epoch": 0.76,
"eval_avg_sts": 0.7945405318685601,
"eval_sickr_spearman": 0.7490017989360725,
"eval_stsb_spearman": 0.8400792648010476,
"step": 11875
},
{
"electra_acc": 117.5412,
"electra_fix_acc": 0.9707,
"electra_rep_acc": 0.7042,
"epoch": 0.77,
"learning_rate": 4.312172020990656e-06,
"loss": 0.0016,
"neg_sim": -0.0143,
"pos_sim": 0.812,
"step": 12000
},
{
"epoch": 0.77,
"eval_avg_sts": 0.7960514566703415,
"eval_sickr_spearman": 0.7499365802322611,
"eval_stsb_spearman": 0.8421663331084219,
"step": 12000
},
{
"epoch": 0.78,
"eval_avg_sts": 0.7951893349270129,
"eval_sickr_spearman": 0.7490124138094904,
"eval_stsb_spearman": 0.8413662560445356,
"step": 12125
},
{
"epoch": 0.78,
"eval_avg_sts": 0.7937236608939515,
"eval_sickr_spearman": 0.7449763603556459,
"eval_stsb_spearman": 0.8424709614322572,
"step": 12250
},
{
"epoch": 0.79,
"eval_avg_sts": 0.7934627772608434,
"eval_sickr_spearman": 0.746114793521932,
"eval_stsb_spearman": 0.8408107609997548,
"step": 12375
},
{
"electra_acc": 117.4273,
"electra_fix_acc": 0.9702,
"electra_rep_acc": 0.7033,
"epoch": 0.8,
"learning_rate": 4.2001791885319345e-06,
"loss": 0.0016,
"neg_sim": -0.0143,
"pos_sim": 0.8084,
"step": 12500
},
{
"epoch": 0.8,
"eval_avg_sts": 0.793506856374318,
"eval_sickr_spearman": 0.7463023069419462,
"eval_stsb_spearman": 0.8407114058066898,
"step": 12500
},
{
"epoch": 0.81,
"eval_avg_sts": 0.7929774718082235,
"eval_sickr_spearman": 0.7465257476258366,
"eval_stsb_spearman": 0.8394291959906105,
"step": 12625
},
{
"epoch": 0.82,
"eval_avg_sts": 0.7907041469753885,
"eval_sickr_spearman": 0.7438328358926072,
"eval_stsb_spearman": 0.8375754580581698,
"step": 12750
},
{
"epoch": 0.82,
"eval_avg_sts": 0.7919377238754862,
"eval_sickr_spearman": 0.744254116683321,
"eval_stsb_spearman": 0.8396213310676515,
"step": 12875
},
{
"electra_acc": 117.4657,
"electra_fix_acc": 0.9701,
"electra_rep_acc": 0.7049,
"epoch": 0.83,
"learning_rate": 4.088186356073212e-06,
"loss": 0.0014,
"neg_sim": -0.0143,
"pos_sim": 0.8091,
"step": 13000
},
{
"epoch": 0.83,
"eval_avg_sts": 0.7950524625796482,
"eval_sickr_spearman": 0.7459233415515996,
"eval_stsb_spearman": 0.8441815836076968,
"step": 13000
},
{
"epoch": 0.84,
"eval_avg_sts": 0.7953046984165892,
"eval_sickr_spearman": 0.7460563877025831,
"eval_stsb_spearman": 0.8445530091305953,
"step": 13125
},
{
"epoch": 0.85,
"eval_avg_sts": 0.7960686224710865,
"eval_sickr_spearman": 0.745027801665286,
"eval_stsb_spearman": 0.8471094432768869,
"step": 13250
},
{
"epoch": 0.86,
"eval_avg_sts": 0.7964731409284048,
"eval_sickr_spearman": 0.7453551336215867,
"eval_stsb_spearman": 0.847591148235223,
"step": 13375
},
{
"electra_acc": 117.5481,
"electra_fix_acc": 0.9705,
"electra_rep_acc": 0.7049,
"epoch": 0.86,
"learning_rate": 3.976193523614489e-06,
"loss": 0.0014,
"neg_sim": -0.0143,
"pos_sim": 0.8115,
"step": 13500
},
{
"epoch": 0.86,
"eval_avg_sts": 0.7966766934403904,
"eval_sickr_spearman": 0.7463476483017039,
"eval_stsb_spearman": 0.8470057385790768,
"step": 13500
},
{
"epoch": 0.87,
"eval_avg_sts": 0.7966025495961442,
"eval_sickr_spearman": 0.7463816062904207,
"eval_stsb_spearman": 0.8468234929018676,
"step": 13625
},
{
"epoch": 0.88,
"eval_avg_sts": 0.7967818015292757,
"eval_sickr_spearman": 0.746880793527667,
"eval_stsb_spearman": 0.8466828095308844,
"step": 13750
},
{
"epoch": 0.89,
"eval_avg_sts": 0.7969535576693652,
"eval_sickr_spearman": 0.7471762808637142,
"eval_stsb_spearman": 0.8467308344750161,
"step": 13875
},
{
"electra_acc": 117.5482,
"electra_fix_acc": 0.9705,
"electra_rep_acc": 0.7068,
"epoch": 0.9,
"learning_rate": 3.864200691155766e-06,
"loss": 0.0013,
"neg_sim": -0.0143,
"pos_sim": 0.8142,
"step": 14000
},
{
"epoch": 0.9,
"eval_avg_sts": 0.7965629720194758,
"eval_sickr_spearman": 0.7468308350906325,
"eval_stsb_spearman": 0.8462951089483192,
"step": 14000
},
{
"epoch": 0.9,
"eval_avg_sts": 0.7965266307017043,
"eval_sickr_spearman": 0.7447568101909722,
"eval_stsb_spearman": 0.8482964512124362,
"step": 14125
},
{
"epoch": 0.91,
"eval_avg_sts": 0.7965480662045883,
"eval_sickr_spearman": 0.7448633912050633,
"eval_stsb_spearman": 0.8482327412041132,
"step": 14250
},
{
"epoch": 0.92,
"eval_avg_sts": 0.7964806384496487,
"eval_sickr_spearman": 0.745111231688484,
"eval_stsb_spearman": 0.8478500452108133,
"step": 14375
},
{
"electra_acc": 117.5208,
"electra_fix_acc": 0.9701,
"electra_rep_acc": 0.7071,
"epoch": 0.93,
"learning_rate": 3.7522078586970435e-06,
"loss": 0.0013,
"neg_sim": -0.0143,
"pos_sim": 0.8155,
"step": 14500
},
{
"epoch": 0.93,
"eval_avg_sts": 0.7971949399590785,
"eval_sickr_spearman": 0.7457434650767139,
"eval_stsb_spearman": 0.8486464148414431,
"step": 14500
},
{
"epoch": 0.94,
"eval_avg_sts": 0.7970029281099968,
"eval_sickr_spearman": 0.7458370776934168,
"eval_stsb_spearman": 0.8481687785265768,
"step": 14625
},
{
"epoch": 0.94,
"eval_avg_sts": 0.7991675032273975,
"eval_sickr_spearman": 0.7483727836316391,
"eval_stsb_spearman": 0.8499622228231559,
"step": 14750
},
{
"epoch": 0.95,
"eval_avg_sts": 0.79901349058513,
"eval_sickr_spearman": 0.748446895621158,
"eval_stsb_spearman": 0.8495800855491022,
"step": 14875
},
{
"electra_acc": 117.6139,
"electra_fix_acc": 0.9705,
"electra_rep_acc": 0.7091,
"epoch": 0.96,
"learning_rate": 3.6402150262383203e-06,
"loss": 0.0014,
"neg_sim": -0.0143,
"pos_sim": 0.8156,
"step": 15000
},
{
"epoch": 0.96,
"eval_avg_sts": 0.79914478688965,
"eval_sickr_spearman": 0.7468535118620501,
"eval_stsb_spearman": 0.8514360619172497,
"step": 15000
},
{
"epoch": 0.97,
"eval_avg_sts": 0.7991571351120899,
"eval_sickr_spearman": 0.7471076924508606,
"eval_stsb_spearman": 0.8512065777733191,
"step": 15125
},
{
"epoch": 0.98,
"eval_avg_sts": 0.8012474062999755,
"eval_sickr_spearman": 0.7502938355647578,
"eval_stsb_spearman": 0.8522009770351932,
"step": 15250
},
{
"epoch": 0.98,
"eval_avg_sts": 0.7993104516704408,
"eval_sickr_spearman": 0.7512376467080167,
"eval_stsb_spearman": 0.8473832566328647,
"step": 15375
},
{
"electra_acc": 117.5407,
"electra_fix_acc": 0.9702,
"electra_rep_acc": 0.7085,
"epoch": 0.99,
"learning_rate": 3.5282221937795976e-06,
"loss": 0.0015,
"neg_sim": -0.0143,
"pos_sim": 0.8109,
"step": 15500
},
{
"epoch": 0.99,
"eval_avg_sts": 0.7991830405974262,
"eval_sickr_spearman": 0.7510609883169274,
"eval_stsb_spearman": 0.847305092877925,
"step": 15500
},
{
"epoch": 1.0,
"eval_avg_sts": 0.7993142209919086,
"eval_sickr_spearman": 0.750953302587503,
"eval_stsb_spearman": 0.8476751393963143,
"step": 15625
},
{
"epoch": 1.01,
"eval_avg_sts": 0.79998042307191,
"eval_sickr_spearman": 0.7521459148362114,
"eval_stsb_spearman": 0.8478149313076085,
"step": 15750
},
{
"epoch": 1.02,
"eval_avg_sts": 0.8000408218838944,
"eval_sickr_spearman": 0.7522030718469228,
"eval_stsb_spearman": 0.847878571920866,
"step": 15875
},
{
"electra_acc": NaN,
"electra_fix_acc": NaN,
"electra_rep_acc": NaN,
"epoch": 1.02,
"learning_rate": 3.4162293613208757e-06,
"loss": 0.0013,
"neg_sim": NaN,
"pos_sim": 0.8075,
"step": 16000
},
{
"epoch": 1.02,
"eval_avg_sts": 0.7998575815186055,
"eval_sickr_spearman": 0.7520637336216508,
"eval_stsb_spearman": 0.8476514294155602,
"step": 16000
},
{
"epoch": 1.03,
"eval_avg_sts": 0.7998976758754202,
"eval_sickr_spearman": 0.752151678568384,
"eval_stsb_spearman": 0.8476436731824563,
"step": 16125
},
{
"epoch": 1.04,
"eval_avg_sts": 0.7993375115090717,
"eval_sickr_spearman": 0.7512065225542848,
"eval_stsb_spearman": 0.8474685004638588,
"step": 16250
},
{
"epoch": 1.05,
"eval_avg_sts": 0.8005700206739319,
"eval_sickr_spearman": 0.7500328285448524,
"eval_stsb_spearman": 0.8511072128030113,
"step": 16375
},
{
"electra_acc": NaN,
"electra_fix_acc": NaN,
"electra_rep_acc": NaN,
"epoch": 1.06,
"learning_rate": 3.304236528862153e-06,
"loss": 0.0014,
"neg_sim": NaN,
"pos_sim": 0.8139,
"step": 16500
},
{
"epoch": 1.06,
"eval_avg_sts": 0.800932566485014,
"eval_sickr_spearman": 0.7492046823085472,
"eval_stsb_spearman": 0.8526604506614809,
"step": 16500
},
{
"epoch": 1.06,
"eval_avg_sts": 0.8020790163363096,
"eval_sickr_spearman": 0.7523164732774181,
"eval_stsb_spearman": 0.851841559395201,
"step": 16625
},
{
"epoch": 1.07,
"eval_avg_sts": 0.8034376489964099,
"eval_sickr_spearman": 0.7546485273455429,
"eval_stsb_spearman": 0.8522267706472768,
"step": 16750
},
{
"epoch": 1.08,
"eval_avg_sts": 0.8035121251749653,
"eval_sickr_spearman": 0.7551173589266803,
"eval_stsb_spearman": 0.8519068914232504,
"step": 16875
},
{
"electra_acc": NaN,
"electra_fix_acc": NaN,
"electra_rep_acc": NaN,
"epoch": 1.09,
"learning_rate": 3.19224369640343e-06,
"loss": 0.0015,
"neg_sim": NaN,
"pos_sim": 0.8123,
"step": 17000
},
{
"epoch": 1.09,
"eval_avg_sts": 0.8034498780822419,
"eval_sickr_spearman": 0.755224660407293,
"eval_stsb_spearman": 0.8516750957571909,
"step": 17000
},
{
"epoch": 1.1,
"eval_avg_sts": 0.8032831913725695,
"eval_sickr_spearman": 0.7549221124993344,
"eval_stsb_spearman": 0.8516442702458048,
"step": 17125
},
{
"epoch": 1.1,
"eval_avg_sts": 0.8025478170596181,
"eval_sickr_spearman": 0.7531311768200115,
"eval_stsb_spearman": 0.8519644572992248,
"step": 17250
},
{
"epoch": 1.11,
"eval_avg_sts": 0.8023474610208348,
"eval_sickr_spearman": 0.7529551428332407,
"eval_stsb_spearman": 0.8517397792084288,
"step": 17375
},
{
"electra_acc": NaN,
"electra_fix_acc": NaN,
"electra_rep_acc": NaN,
"epoch": 1.12,
"learning_rate": 3.0802508639447074e-06,
"loss": 0.0015,
"neg_sim": NaN,
"pos_sim": 0.8124,
"step": 17500
},
{
"epoch": 1.12,
"eval_avg_sts": 0.8036377678015303,
"eval_sickr_spearman": 0.7540948728392658,
"eval_stsb_spearman": 0.8531806627637948,
"step": 17500
},
{
"epoch": 1.13,
"eval_avg_sts": 0.803436785093961,
"eval_sickr_spearman": 0.7537820462755993,
"eval_stsb_spearman": 0.8530915239123227,
"step": 17625
},
{
"epoch": 1.14,
"eval_avg_sts": 0.8034543332861956,
"eval_sickr_spearman": 0.753641363179487,
"eval_stsb_spearman": 0.8532673033929044,
"step": 17750
},
{
"epoch": 1.14,
"eval_avg_sts": 0.8031959700775069,
"eval_sickr_spearman": 0.7533919857008204,
"eval_stsb_spearman": 0.8529999544541933,
"step": 17875
},
{
"electra_acc": NaN,
"electra_fix_acc": NaN,
"electra_rep_acc": NaN,
"epoch": 1.15,
"learning_rate": 2.9682580314859847e-06,
"loss": 0.0013,
"neg_sim": NaN,
"pos_sim": 0.8138,
"step": 18000
},
{
"epoch": 1.15,
"eval_avg_sts": 0.8033449509317736,
"eval_sickr_spearman": 0.7535802676184579,
"eval_stsb_spearman": 0.8531096342450895,
"step": 18000
},
{
"epoch": 1.16,
"eval_avg_sts": 0.803034323911963,
"eval_sickr_spearman": 0.7532389586116386,
"eval_stsb_spearman": 0.8528296892122876,
"step": 18125
},
{
"epoch": 1.17,
"eval_avg_sts": 0.8030336070807917,
"eval_sickr_spearman": 0.753408940679628,
"eval_stsb_spearman": 0.8526582734819556,
"step": 18250
},
{
"epoch": 1.18,
"eval_avg_sts": 0.8042466753465274,
"eval_sickr_spearman": 0.754791275779017,
"eval_stsb_spearman": 0.8537020749140378,
"step": 18375
},
{
"electra_acc": NaN,
"electra_fix_acc": NaN,
"electra_rep_acc": NaN,
"epoch": 1.18,
"learning_rate": 2.8562651990272623e-06,
"loss": 0.0013,
"neg_sim": NaN,
"pos_sim": 0.8154,
"step": 18500
},
{
"epoch": 1.18,
"eval_avg_sts": 0.8041866669158593,
"eval_sickr_spearman": 0.7544020317329624,
"eval_stsb_spearman": 0.8539713020987562,
"step": 18500
},
{
"epoch": 1.19,
"eval_avg_sts": 0.8040941868331957,
"eval_sickr_spearman": 0.7544721091099607,
"eval_stsb_spearman": 0.8537162645564307,
"step": 18625
},
{
"epoch": 1.2,
"eval_avg_sts": 0.8041380482719174,
"eval_sickr_spearman": 0.7546320526777496,
"eval_stsb_spearman": 0.8536440438660853,
"step": 18750
},
{
"epoch": 1.21,
"eval_avg_sts": 0.8038765751251531,
"eval_sickr_spearman": 0.7544602934590069,
"eval_stsb_spearman": 0.8532928567912992,
"step": 18875
},
{
"electra_acc": NaN,
"electra_fix_acc": NaN,
"electra_rep_acc": NaN,
"epoch": 1.22,
"learning_rate": 2.7442723665685396e-06,
"loss": 0.0014,
"neg_sim": NaN,
"pos_sim": 0.8165,
"step": 19000
},
{
"epoch": 1.22,
"eval_avg_sts": 0.8066267695432612,
"eval_sickr_spearman": 0.7576009030887407,
"eval_stsb_spearman": 0.8556526359977817,
"step": 19000
},
{
"epoch": 1.22,
"eval_avg_sts": 0.8079773606909346,
"eval_sickr_spearman": 0.7579651709620474,
"eval_stsb_spearman": 0.8579895504198217,
"step": 19125
},
{
"epoch": 1.23,
"eval_avg_sts": 0.8076160011490612,
"eval_sickr_spearman": 0.7573030141976216,
"eval_stsb_spearman": 0.8579289881005008,
"step": 19250
},
{
"epoch": 1.24,
"eval_avg_sts": 0.8076237482649742,
"eval_sickr_spearman": 0.7578992242597729,
"eval_stsb_spearman": 0.8573482722701754,
"step": 19375
},
{
"electra_acc": NaN,
"electra_fix_acc": NaN,
"electra_rep_acc": NaN,
"epoch": 1.25,
"learning_rate": 2.632279534109817e-06,
"loss": 0.0014,
"neg_sim": NaN,
"pos_sim": 0.817,
"step": 19500
},
{
"epoch": 1.25,
"eval_avg_sts": 0.8074421455549705,
"eval_sickr_spearman": 0.7572703530486437,
"eval_stsb_spearman": 0.8576139380612975,
"step": 19500
},
{
"epoch": 1.26,
"eval_avg_sts": 0.8072772138963991,
"eval_sickr_spearman": 0.7571532532233376,
"eval_stsb_spearman": 0.8574011745694605,
"step": 19625
},
{
"epoch": 1.26,
"eval_avg_sts": 0.8073302425810744,
"eval_sickr_spearman": 0.7571811592932731,
"eval_stsb_spearman": 0.8574793258688757,
"step": 19750
},
{
"epoch": 1.27,
"eval_avg_sts": 0.8072437293168806,
"eval_sickr_spearman": 0.7571653090297986,
"eval_stsb_spearman": 0.8573221496039627,
"step": 19875
},
{
"electra_acc": NaN,
"electra_fix_acc": NaN,
"electra_rep_acc": NaN,
"epoch": 1.28,
"learning_rate": 2.5202867016510945e-06,
"loss": 0.0013,
"neg_sim": NaN,
"pos_sim": 0.819,
"step": 20000
},
{
"epoch": 1.28,
"eval_avg_sts": 0.8077588380658864,
"eval_sickr_spearman": 0.7571885560828946,
"eval_stsb_spearman": 0.8583291200488784,
"step": 20000
},
{
"epoch": 1.29,
"eval_avg_sts": 0.8073697057890559,
"eval_sickr_spearman": 0.7569122331563212,
"eval_stsb_spearman": 0.8578271784217905,
"step": 20125
},
{
"epoch": 1.3,
"eval_avg_sts": 0.8072112242255804,
"eval_sickr_spearman": 0.7568430683702502,
"eval_stsb_spearman": 0.8575793800809108,
"step": 20250
},
{
"epoch": 1.3,
"eval_avg_sts": 0.8070283089413652,
"eval_sickr_spearman": 0.7565875429105995,
"eval_stsb_spearman": 0.8574690749721309,
"step": 20375
},
{
"electra_acc": NaN,
"electra_fix_acc": NaN,
"electra_rep_acc": NaN,
"epoch": 1.31,
"learning_rate": 2.4082938691923718e-06,
"loss": 0.0013,
"neg_sim": NaN,
"pos_sim": 0.8202,
"step": 20500
},
{
"epoch": 1.31,
"eval_avg_sts": 0.806707361091437,
"eval_sickr_spearman": 0.7562538228178075,
"eval_stsb_spearman": 0.8571608993650666,
"step": 20500
},
{
"epoch": 1.32,
"eval_avg_sts": 0.8065118723390399,
"eval_sickr_spearman": 0.7561714014477396,
"eval_stsb_spearman": 0.85685234323034,
"step": 20625
},
{
"epoch": 1.33,
"eval_avg_sts": 0.8067084472431667,
"eval_sickr_spearman": 0.756075627431472,
"eval_stsb_spearman": 0.8573412670548614,
"step": 20750
},
{
"epoch": 1.34,
"eval_avg_sts": 0.8066874969794837,
"eval_sickr_spearman": 0.7560982500802493,
"eval_stsb_spearman": 0.857276743878718,
"step": 20875
},
{
"electra_acc": NaN,
"electra_fix_acc": NaN,
"electra_rep_acc": NaN,
"epoch": 1.34,
"learning_rate": 2.296301036733649e-06,
"loss": 0.0013,
"neg_sim": NaN,
"pos_sim": 0.8217,
"step": 21000
},
{
"epoch": 1.34,
"eval_avg_sts": 0.80597633722049,
"eval_sickr_spearman": 0.7559074225142358,
"eval_stsb_spearman": 0.8560452519267441,
"step": 21000
},
{
"epoch": 1.35,
"eval_avg_sts": 0.8058007667522757,
"eval_sickr_spearman": 0.7558859045807915,
"eval_stsb_spearman": 0.8557156289237601,
"step": 21125
},
{
"epoch": 1.36,
"eval_avg_sts": 0.8052139487149964,
"eval_sickr_spearman": 0.7551075125508855,
"eval_stsb_spearman": 0.8553203848791073,
"step": 21250
},
{
"epoch": 1.37,
"eval_avg_sts": 0.8050337032021436,
"eval_sickr_spearman": 0.7547609681540094,
"eval_stsb_spearman": 0.8553064382502779,
"step": 21375
},
{
"electra_acc": NaN,
"electra_fix_acc": NaN,
"electra_rep_acc": NaN,
"epoch": 1.38,
"learning_rate": 2.1843082042749267e-06,
"loss": 0.0014,
"neg_sim": NaN,
"pos_sim": 0.8233,
"step": 21500
},
{
"epoch": 1.38,
"eval_avg_sts": 0.8048704441406708,
"eval_sickr_spearman": 0.7537258018558153,
"eval_stsb_spearman": 0.8560150864255263,
"step": 21500
},
{
"epoch": 1.38,
"eval_avg_sts": 0.8048149155952563,
"eval_sickr_spearman": 0.7537628338350241,
"eval_stsb_spearman": 0.8558669973554885,
"step": 21625
},
{
"epoch": 1.39,
"eval_avg_sts": 0.8049705872600981,
"eval_sickr_spearman": 0.7530663348330701,
"eval_stsb_spearman": 0.8568748396871261,
"step": 21750
},
{
"epoch": 1.4,
"eval_avg_sts": 0.8048803781742058,
"eval_sickr_spearman": 0.7529998117575781,
"eval_stsb_spearman": 0.8567609445908335,
"step": 21875
},
{
"electra_acc": NaN,
"electra_fix_acc": NaN,
"electra_rep_acc": NaN,
"epoch": 1.41,
"learning_rate": 2.072315371816204e-06,
"loss": 0.0014,
"neg_sim": NaN,
"pos_sim": 0.8232,
"step": 22000
},
{
"epoch": 1.41,
"eval_avg_sts": 0.8048396134318049,
"eval_sickr_spearman": 0.7529523570293574,
"eval_stsb_spearman": 0.8567268698342524,
"step": 22000
},
{
"epoch": 1.42,
"eval_avg_sts": 0.8052282014222374,
"eval_sickr_spearman": 0.754343433789208,
"eval_stsb_spearman": 0.8561129690552667,
"step": 22125
},
{
"epoch": 1.42,
"eval_avg_sts": 0.8052272637457882,
"eval_sickr_spearman": 0.7546001119952933,
"eval_stsb_spearman": 0.855854415496283,
"step": 22250
},
{
"epoch": 1.43,
"eval_avg_sts": 0.8050641056280918,
"eval_sickr_spearman": 0.7546412266181242,
"eval_stsb_spearman": 0.8554869846380595,
"step": 22375
},
{
"electra_acc": NaN,
"electra_fix_acc": NaN,
"electra_rep_acc": NaN,
"epoch": 1.44,
"learning_rate": 1.9603225393574807e-06,
"loss": 0.0014,
"neg_sim": NaN,
"pos_sim": 0.8233,
"step": 22500
},
{
"epoch": 1.44,
"eval_avg_sts": 0.8049566807526001,
"eval_sickr_spearman": 0.7546373841300092,
"eval_stsb_spearman": 0.8552759773751909,
"step": 22500
},
{
"epoch": 1.45,
"eval_avg_sts": 0.8022531756326772,
"eval_sickr_spearman": 0.7524902498024213,
"eval_stsb_spearman": 0.8520161014629332,
"step": 22625
},
{
"epoch": 1.46,
"eval_avg_sts": 0.80224163626539,
"eval_sickr_spearman": 0.7525507689902333,
"eval_stsb_spearman": 0.8519325035405467,
"step": 22750
},
{
"epoch": 1.46,
"eval_avg_sts": 0.8021754313869522,
"eval_sickr_spearman": 0.7525084055587649,
"eval_stsb_spearman": 0.8518424572151397,
"step": 22875
},
{
"electra_acc": NaN,
"electra_fix_acc": NaN,
"electra_rep_acc": NaN,
"epoch": 1.47,
"learning_rate": 1.8483297068987584e-06,
"loss": 0.0015,
"neg_sim": NaN,
"pos_sim": 0.8241,
"step": 23000
},
{
"epoch": 1.47,
"eval_avg_sts": 0.8024969817513419,
"eval_sickr_spearman": 0.752065078492491,
"eval_stsb_spearman": 0.8529288850101928,
"step": 23000
},
{
"epoch": 1.48,
"eval_avg_sts": 0.8024605472785751,
"eval_sickr_spearman": 0.7519645974282826,
"eval_stsb_spearman": 0.8529564971288677,
"step": 23125
},
{
"epoch": 1.49,
"eval_avg_sts": 0.8024320134196474,
"eval_sickr_spearman": 0.751913444305251,
"eval_stsb_spearman": 0.8529505825340439,
"step": 23250
},
{
"epoch": 1.5,
"eval_avg_sts": 0.8024048624411102,
"eval_sickr_spearman": 0.7518156049516216,
"eval_stsb_spearman": 0.8529941199305989,
"step": 23375
},
{
"electra_acc": NaN,
"electra_fix_acc": NaN,
"electra_rep_acc": NaN,
"epoch": 1.5,
"learning_rate": 1.7363368744400357e-06,
"loss": 0.0013,
"neg_sim": NaN,
"pos_sim": 0.8244,
"step": 23500
},
{
"epoch": 1.5,
"eval_avg_sts": 0.802379657071767,
"eval_sickr_spearman": 0.7517114254926023,
"eval_stsb_spearman": 0.8530478886509316,
"step": 23500
},
{
"epoch": 1.51,
"eval_avg_sts": 0.8022866209294395,
"eval_sickr_spearman": 0.7517222805215273,
"eval_stsb_spearman": 0.8528509613373516,
"step": 23625
},
{
"epoch": 1.52,
"eval_avg_sts": 0.8029982809362839,
"eval_sickr_spearman": 0.7519777099189752,
"eval_stsb_spearman": 0.8540188519535926,
"step": 23750
},
{
"epoch": 1.53,
"eval_avg_sts": 0.8029407406070306,
"eval_sickr_spearman": 0.7520159907068213,
"eval_stsb_spearman": 0.8538654905072398,
"step": 23875
},
{
"electra_acc": NaN,
"electra_fix_acc": NaN,
"electra_rep_acc": NaN,
"epoch": 1.54,
"learning_rate": 1.6243440419813131e-06,
"loss": 0.0014,
"neg_sim": NaN,
"pos_sim": 0.8241,
"step": 24000
},
{
"epoch": 1.54,
"eval_avg_sts": 0.8026597102203472,
"eval_sickr_spearman": 0.7517222805215273,
"eval_stsb_spearman": 0.8535971399191672,
"step": 24000
},
{
"epoch": 1.54,
"eval_avg_sts": 0.8025235294672179,
"eval_sickr_spearman": 0.7517699273741539,
"eval_stsb_spearman": 0.8532771315602818,
"step": 24125
},
{
"epoch": 1.55,
"eval_avg_sts": 0.8024910901785833,
"eval_sickr_spearman": 0.7517942311114816,
"eval_stsb_spearman": 0.8531879492456851,
"step": 24250
},
{
"epoch": 1.56,
"eval_avg_sts": 0.8022646880892426,
"eval_sickr_spearman": 0.7516456709147336,
"eval_stsb_spearman": 0.8528837052637515,
"step": 24375
},
{
"electra_acc": NaN,
"electra_fix_acc": NaN,
"electra_rep_acc": NaN,
"epoch": 1.57,
"learning_rate": 1.5123512095225906e-06,
"loss": 0.0013,
"neg_sim": NaN,
"pos_sim": 0.8242,
"step": 24500
},
{
"epoch": 1.57,
"eval_avg_sts": 0.8021439174552474,
"eval_sickr_spearman": 0.7515729518271562,
"eval_stsb_spearman": 0.8527148830833385,
"step": 24500
},
{
"epoch": 1.58,
"eval_avg_sts": 0.8023944530487281,
"eval_sickr_spearman": 0.7522279999885693,
"eval_stsb_spearman": 0.8525609061088869,
"step": 24625
},
{
"epoch": 1.58,
"eval_avg_sts": 0.8023338277994898,
"eval_sickr_spearman": 0.7522989419253933,
"eval_stsb_spearman": 0.8523687136735865,
"step": 24750
},
{
"epoch": 1.59,
"eval_avg_sts": 0.8021448888713669,
"eval_sickr_spearman": 0.7524111906094542,
"eval_stsb_spearman": 0.8518785871332797,
"step": 24875
},
{
"electra_acc": NaN,
"electra_fix_acc": NaN,
"electra_rep_acc": NaN,
"epoch": 1.6,
"learning_rate": 1.4003583770638678e-06,
"loss": 0.0014,
"neg_sim": NaN,
"pos_sim": 0.825,
"step": 25000
},
{
"epoch": 1.6,
"eval_avg_sts": 0.8020245919486174,
"eval_sickr_spearman": 0.7524232944470166,
"eval_stsb_spearman": 0.8516258894502182,
"step": 25000
},
{
"epoch": 1.61,
"eval_avg_sts": 0.8027255384898491,
"eval_sickr_spearman": 0.753412110732323,
"eval_stsb_spearman": 0.8520389662473753,
"step": 25125
},
{
"epoch": 1.62,
"eval_avg_sts": 0.8025519191789285,
"eval_sickr_spearman": 0.7524277133083489,
"eval_stsb_spearman": 0.8526761250495083,
"step": 25250
},
{
"epoch": 1.62,
"eval_avg_sts": 0.8020490496082332,
"eval_sickr_spearman": 0.7522735334727325,
"eval_stsb_spearman": 0.8518245657437339,
"step": 25375
},
{
"electra_acc": NaN,
"electra_fix_acc": NaN,
"electra_rep_acc": NaN,
"epoch": 1.63,
"learning_rate": 1.2883655446051453e-06,
"loss": 0.0014,
"neg_sim": NaN,
"pos_sim": 0.8247,
"step": 25500
},
{
"epoch": 1.63,
"eval_avg_sts": 0.8019816352585948,
"eval_sickr_spearman": 0.7522344361561619,
"eval_stsb_spearman": 0.8517288343610276,
"step": 25500
},
{
"epoch": 1.64,
"eval_avg_sts": 0.8013792896117395,
"eval_sickr_spearman": 0.7513900974239813,
"eval_stsb_spearman": 0.8513684817994976,
"step": 25625
},
{
"epoch": 1.65,
"eval_avg_sts": 0.8012957662635658,
"eval_sickr_spearman": 0.7513326522266613,
"eval_stsb_spearman": 0.8512588803004703,
"step": 25750
},
{
"epoch": 1.66,
"eval_avg_sts": 0.8012601775924122,
"eval_sickr_spearman": 0.751396101311661,
"eval_stsb_spearman": 0.8511242538731633,
"step": 25875
},
{
"electra_acc": NaN,
"electra_fix_acc": NaN,
"electra_rep_acc": NaN,
"epoch": 1.66,
"learning_rate": 1.1763727121464225e-06,
"loss": 0.0014,
"neg_sim": NaN,
"pos_sim": 0.825,
"step": 26000
},
{
"epoch": 1.66,
"eval_avg_sts": 0.80119947875736,
"eval_sickr_spearman": 0.7513846699095188,
"eval_stsb_spearman": 0.8510142876052013,
"step": 26000
},
{
"epoch": 1.67,
"eval_avg_sts": 0.8011593246270685,
"eval_sickr_spearman": 0.751327464867706,
"eval_stsb_spearman": 0.8509911843864311,
"step": 26125
},
{
"epoch": 1.68,
"eval_avg_sts": 0.8011629421191884,
"eval_sickr_spearman": 0.7513375033679065,
"eval_stsb_spearman": 0.8509883808704702,
"step": 26250
},
{
"epoch": 1.69,
"eval_avg_sts": 0.801390135441459,
"eval_sickr_spearman": 0.7513261680279671,
"eval_stsb_spearman": 0.8514541028549507,
"step": 26375
},
{
"electra_acc": NaN,
"electra_fix_acc": NaN,
"electra_rep_acc": NaN,
"epoch": 1.7,
"learning_rate": 1.0643798796877e-06,
"loss": 0.0013,
"neg_sim": NaN,
"pos_sim": 0.8251,
"step": 26500
},
{
"epoch": 1.7,
"eval_avg_sts": 0.8011107242729139,
"eval_sickr_spearman": 0.7510607481614202,
"eval_stsb_spearman": 0.8511607003844076,
"step": 26500
},
{
"epoch": 1.7,
"eval_avg_sts": 0.8017416851794723,
"eval_sickr_spearman": 0.7518608022180748,
"eval_stsb_spearman": 0.8516225681408698,
"step": 26625
},
{
"epoch": 1.71,
"eval_avg_sts": 0.8012848966551385,
"eval_sickr_spearman": 0.7518499471891499,
"eval_stsb_spearman": 0.8507198461211269,
"step": 26750
},
{
"epoch": 1.72,
"eval_avg_sts": 0.8018531908928693,
"eval_sickr_spearman": 0.7526851600120571,
"eval_stsb_spearman": 0.8510212217736816,
"step": 26875
},
{
"electra_acc": NaN,
"electra_fix_acc": NaN,
"electra_rep_acc": NaN,
"epoch": 1.73,
"learning_rate": 9.523870472289774e-07,
"loss": 0.0015,
"neg_sim": NaN,
"pos_sim": 0.8252,
"step": 27000
},
{
"epoch": 1.73,
"eval_avg_sts": 0.8022264378652083,
"eval_sickr_spearman": 0.7527636908629084,
"eval_stsb_spearman": 0.8516891848675082,
"step": 27000
},
{
"epoch": 1.74,
"eval_avg_sts": 0.8022941628228328,
"eval_sickr_spearman": 0.7528165731055918,
"eval_stsb_spearman": 0.851771752540074,
"step": 27125
},
{
"epoch": 1.74,
"eval_avg_sts": 0.802330897941351,
"eval_sickr_spearman": 0.7527780521622385,
"eval_stsb_spearman": 0.8518837437204637,
"step": 27250
},
{
"epoch": 1.75,
"eval_avg_sts": 0.8023123636818033,
"eval_sickr_spearman": 0.75282886906756,
"eval_stsb_spearman": 0.8517958582960464,
"step": 27375
},
{
"electra_acc": NaN,
"electra_fix_acc": NaN,
"electra_rep_acc": NaN,
"epoch": 1.76,
"learning_rate": 8.403942147702547e-07,
"loss": 0.0013,
"neg_sim": NaN,
"pos_sim": 0.8254,
"step": 27500
},
{
"epoch": 1.76,
"eval_avg_sts": 0.8022805236271966,
"eval_sickr_spearman": 0.7531723875050453,
"eval_stsb_spearman": 0.8513886597493479,
"step": 27500
},
{
"epoch": 1.77,
"eval_avg_sts": 0.8021820813348178,
"eval_sickr_spearman": 0.7531067289893795,
"eval_stsb_spearman": 0.8512574336802562,
"step": 27625
},
{
"epoch": 1.78,
"eval_avg_sts": 0.8021045835263574,
"eval_sickr_spearman": 0.7530147494301255,
"eval_stsb_spearman": 0.8511944176225893,
"step": 27750
},
{
"epoch": 1.78,
"eval_avg_sts": 0.8021931793671637,
"eval_sickr_spearman": 0.7531494766696594,
"eval_stsb_spearman": 0.8512368820646681,
"step": 27875
},
{
"electra_acc": NaN,
"electra_fix_acc": NaN,
"electra_rep_acc": NaN,
"epoch": 1.79,
"learning_rate": 7.284013823115321e-07,
"loss": 0.0014,
"neg_sim": NaN,
"pos_sim": 0.8249,
"step": 28000
},
{
"epoch": 1.79,
"eval_avg_sts": 0.8019519952437764,
"eval_sickr_spearman": 0.752846640575092,
"eval_stsb_spearman": 0.8510573499124608,
"step": 28000
},
{
"epoch": 1.8,
"eval_avg_sts": 0.8020597161190861,
"eval_sickr_spearman": 0.7531391019517488,
"eval_stsb_spearman": 0.8509803302864235,
"step": 28125
},
{
"epoch": 1.81,
"eval_avg_sts": 0.8017160897849254,
"eval_sickr_spearman": 0.7524687799000784,
"eval_stsb_spearman": 0.8509633996697724,
"step": 28250
},
{
"epoch": 1.82,
"eval_avg_sts": 0.8019391531942626,
"eval_sickr_spearman": 0.7527104243714136,
"eval_stsb_spearman": 0.8511678820171117,
"step": 28375
},
{
"electra_acc": NaN,
"electra_fix_acc": NaN,
"electra_rep_acc": NaN,
"epoch": 1.82,
"learning_rate": 6.164085498528094e-07,
"loss": 0.0014,
"neg_sim": NaN,
"pos_sim": 0.8248,
"step": 28500
},
{
"epoch": 1.82,
"eval_avg_sts": 0.8019393193841213,
"eval_sickr_spearman": 0.7527119133355582,
"eval_stsb_spearman": 0.8511667254326846,
"step": 28500
},
{
"epoch": 1.83,
"eval_avg_sts": 0.8019492124313456,
"eval_sickr_spearman": 0.7531468829901817,
"eval_stsb_spearman": 0.8507515418725097,
"step": 28625
},
{
"epoch": 1.84,
"eval_avg_sts": 0.8018476761350394,
"eval_sickr_spearman": 0.753012059688445,
"eval_stsb_spearman": 0.8506832925816338,
"step": 28750
},
{
"epoch": 1.85,
"eval_avg_sts": 0.8021637081346856,
"eval_sickr_spearman": 0.7534332924480571,
"eval_stsb_spearman": 0.8508941238213141,
"step": 28875
},
{
"electra_acc": NaN,
"electra_fix_acc": NaN,
"electra_rep_acc": NaN,
"epoch": 1.86,
"learning_rate": 5.044157173940868e-07,
"loss": 0.0014,
"neg_sim": NaN,
"pos_sim": 0.8253,
"step": 29000
},
{
"epoch": 1.86,
"eval_avg_sts": 0.8021424201726441,
"eval_sickr_spearman": 0.7534233980411609,
"eval_stsb_spearman": 0.8508614423041275,
"step": 29000
},
{
"epoch": 1.86,
"eval_avg_sts": 0.8021059378722697,
"eval_sickr_spearman": 0.753409276897338,
"eval_stsb_spearman": 0.8508025988472012,
"step": 29125
},
{
"epoch": 1.87,
"eval_avg_sts": 0.802109297825347,
"eval_sickr_spearman": 0.7534563954078488,
"eval_stsb_spearman": 0.8507622002428452,
"step": 29250
},
{
"epoch": 1.88,
"eval_avg_sts": 0.802140899314784,
"eval_sickr_spearman": 0.7535362711295405,
"eval_stsb_spearman": 0.8507455275000276,
"step": 29375
},
{
"electra_acc": NaN,
"electra_fix_acc": NaN,
"electra_rep_acc": NaN,
"epoch": 1.89,
"learning_rate": 3.9242288493536413e-07,
"loss": 0.0013,
"neg_sim": NaN,
"pos_sim": 0.8247,
"step": 29500
},
{
"epoch": 1.89,
"eval_avg_sts": 0.8021494286648734,
"eval_sickr_spearman": 0.7535442923234806,
"eval_stsb_spearman": 0.850754565006266,
"step": 29500
},
{
"epoch": 1.9,
"eval_avg_sts": 0.802139809482882,
"eval_sickr_spearman": 0.7535292585887305,
"eval_stsb_spearman": 0.8507503603770336,
"step": 29625
},
{
"epoch": 1.9,
"eval_avg_sts": 0.8021667358363417,
"eval_sickr_spearman": 0.7535602866802595,
"eval_stsb_spearman": 0.8507731849924238,
"step": 29750
},
{
"epoch": 1.91,
"eval_avg_sts": 0.802187610188689,
"eval_sickr_spearman": 0.7535883368434994,
"eval_stsb_spearman": 0.8507868835338787,
"step": 29875
},
{
"electra_acc": NaN,
"electra_fix_acc": NaN,
"electra_rep_acc": NaN,
"epoch": 1.92,
"learning_rate": 2.804300524766415e-07,
"loss": 0.0013,
"neg_sim": NaN,
"pos_sim": 0.8251,
"step": 30000
},
{
"epoch": 1.92,
"eval_avg_sts": 0.802156902456721,
"eval_sickr_spearman": 0.7535403537731628,
"eval_stsb_spearman": 0.8507734511402791,
"step": 30000
},
{
"epoch": 1.93,
"eval_avg_sts": 0.8024228445195005,
"eval_sickr_spearman": 0.7538071665416515,
"eval_stsb_spearman": 0.8510385224973495,
"step": 30125
},
{
"epoch": 1.94,
"eval_avg_sts": 0.802384678105279,
"eval_sickr_spearman": 0.753726522322337,
"eval_stsb_spearman": 0.8510428338882212,
"step": 30250
},
{
"epoch": 1.94,
"eval_avg_sts": 0.8023422930470364,
"eval_sickr_spearman": 0.7536587504382077,
"eval_stsb_spearman": 0.8510258356558651,
"step": 30375
},
{
"electra_acc": NaN,
"electra_fix_acc": NaN,
"electra_rep_acc": NaN,
"epoch": 1.95,
"learning_rate": 1.6843722001791884e-07,
"loss": 0.0014,
"neg_sim": NaN,
"pos_sim": 0.8246,
"step": 30500
},
{
"epoch": 1.95,
"eval_avg_sts": 0.8023481067748586,
"eval_sickr_spearman": 0.7536835344865497,
"eval_stsb_spearman": 0.8510126790631675,
"step": 30500
},
{
"epoch": 1.96,
"eval_avg_sts": 0.8023432918069697,
"eval_sickr_spearman": 0.7536910273383741,
"eval_stsb_spearman": 0.8509955562755652,
"step": 30625
},
{
"epoch": 1.97,
"eval_avg_sts": 0.8023505057059448,
"eval_sickr_spearman": 0.7537099996234421,
"eval_stsb_spearman": 0.8509910117884474,
"step": 30750
},
{
"epoch": 1.98,
"eval_avg_sts": 0.8023412928143316,
"eval_sickr_spearman": 0.7536968391016481,
"eval_stsb_spearman": 0.8509857465270153,
"step": 30875
},
{
"electra_acc": NaN,
"electra_fix_acc": NaN,
"electra_rep_acc": NaN,
"epoch": 1.98,
"learning_rate": 5.64443875591962e-08,
"loss": 0.0013,
"neg_sim": NaN,
"pos_sim": 0.8246,
"step": 31000
},
{
"epoch": 1.98,
"eval_avg_sts": 0.8023353086079636,
"eval_sickr_spearman": 0.7536697975915385,
"eval_stsb_spearman": 0.8510008196243888,
"step": 31000
},
{
"epoch": 1.99,
"eval_avg_sts": 0.8023565101430139,
"eval_sickr_spearman": 0.7536807486826663,
"eval_stsb_spearman": 0.8510322716033614,
"step": 31125
},
{
"epoch": 2.0,
"eval_avg_sts": 0.802324478822219,
"eval_sickr_spearman": 0.7536380010023865,
"eval_stsb_spearman": 0.8510109566420515,
"step": 31250
},
{
"epoch": 2.0,
"step": 31252,
"train_runtime": 14615.2615,
"train_samples_per_second": 2.138
}
],
"max_steps": 31252,
"num_train_epochs": 2,
"total_flos": 603571104573277440,
"trial_name": null,
"trial_params": null
}