|
{ |
|
"best_metric": 0.8583291200488784, |
|
"best_model_checkpoint": "condenser-bert-base-uncased_mix_0.00001_cls_output_dir", |
|
"epoch": 2.0, |
|
"global_step": 31252, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"electra_acc": 97.8496, |
|
"electra_fix_acc": 0.9447, |
|
"electra_rep_acc": 0.0242, |
|
"epoch": 0.0, |
|
"learning_rate": 6.999776014335083e-06, |
|
"loss": 0.0046, |
|
"neg_sim": -0.0126, |
|
"pos_sim": 0.6661, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_avg_sts": 0.6467281681849183, |
|
"eval_sickr_spearman": 0.6493190116334423, |
|
"eval_stsb_spearman": 0.6441373247363943, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_avg_sts": 0.6766102197245321, |
|
"eval_sickr_spearman": 0.6747404807226937, |
|
"eval_stsb_spearman": 0.6784799587263706, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_avg_sts": 0.6927318121134369, |
|
"eval_sickr_spearman": 0.6849395969264682, |
|
"eval_stsb_spearman": 0.7005240273004057, |
|
"step": 375 |
|
}, |
|
{ |
|
"electra_acc": 111.3766, |
|
"electra_fix_acc": 0.9699, |
|
"electra_rep_acc": 0.4695, |
|
"epoch": 0.03, |
|
"learning_rate": 6.888007167541277e-06, |
|
"loss": 0.0027, |
|
"neg_sim": -0.0136, |
|
"pos_sim": 0.7499, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_avg_sts": 0.70406785938114, |
|
"eval_sickr_spearman": 0.6912377231336422, |
|
"eval_stsb_spearman": 0.716897995628638, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_avg_sts": 0.7079843162896757, |
|
"eval_sickr_spearman": 0.6933601214439895, |
|
"eval_stsb_spearman": 0.7226085111353618, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_avg_sts": 0.721992790338854, |
|
"eval_sickr_spearman": 0.7041983394834997, |
|
"eval_stsb_spearman": 0.7397872411942082, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_avg_sts": 0.7371963659388359, |
|
"eval_sickr_spearman": 0.7161540010979675, |
|
"eval_stsb_spearman": 0.7582387307797042, |
|
"step": 875 |
|
}, |
|
{ |
|
"electra_acc": 115.0045, |
|
"electra_fix_acc": 0.9686, |
|
"electra_rep_acc": 0.6116, |
|
"epoch": 0.06, |
|
"learning_rate": 6.776014335082554e-06, |
|
"loss": 0.002, |
|
"neg_sim": -0.0139, |
|
"pos_sim": 0.7675, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_avg_sts": 0.7419491613050363, |
|
"eval_sickr_spearman": 0.7199929829426116, |
|
"eval_stsb_spearman": 0.763905339667461, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_avg_sts": 0.7503468807257706, |
|
"eval_sickr_spearman": 0.7253389886259777, |
|
"eval_stsb_spearman": 0.7753547728255635, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_avg_sts": 0.755381635152361, |
|
"eval_sickr_spearman": 0.7283938147085431, |
|
"eval_stsb_spearman": 0.782369455596179, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_avg_sts": 0.7557951064861299, |
|
"eval_sickr_spearman": 0.7286210018183454, |
|
"eval_stsb_spearman": 0.7829692111539145, |
|
"step": 1375 |
|
}, |
|
{ |
|
"electra_acc": 115.5115, |
|
"electra_fix_acc": 0.9698, |
|
"electra_rep_acc": 0.6302, |
|
"epoch": 0.1, |
|
"learning_rate": 6.6640215026238325e-06, |
|
"loss": 0.0017, |
|
"neg_sim": -0.014, |
|
"pos_sim": 0.7738, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_avg_sts": 0.7624381952103951, |
|
"eval_sickr_spearman": 0.7329619086730178, |
|
"eval_stsb_spearman": 0.7919144817477726, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_avg_sts": 0.765407531009575, |
|
"eval_sickr_spearman": 0.7350173996590618, |
|
"eval_stsb_spearman": 0.7957976623600883, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_avg_sts": 0.7663640946087169, |
|
"eval_sickr_spearman": 0.7356288836114704, |
|
"eval_stsb_spearman": 0.7970993056059634, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_avg_sts": 0.7654280480014255, |
|
"eval_sickr_spearman": 0.7361625571795493, |
|
"eval_stsb_spearman": 0.7946935388233016, |
|
"step": 1875 |
|
}, |
|
{ |
|
"electra_acc": 115.9922, |
|
"electra_fix_acc": 0.971, |
|
"electra_rep_acc": 0.641, |
|
"epoch": 0.13, |
|
"learning_rate": 6.55202867016511e-06, |
|
"loss": 0.0016, |
|
"neg_sim": -0.014, |
|
"pos_sim": 0.7751, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_avg_sts": 0.7672864050175499, |
|
"eval_sickr_spearman": 0.7369789418106929, |
|
"eval_stsb_spearman": 0.7975938682244069, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_avg_sts": 0.7747771973615925, |
|
"eval_sickr_spearman": 0.7388607043028352, |
|
"eval_stsb_spearman": 0.8106936904203497, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_avg_sts": 0.7774171758966744, |
|
"eval_sickr_spearman": 0.7346681655205053, |
|
"eval_stsb_spearman": 0.8201661862728435, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_avg_sts": 0.7804372234935246, |
|
"eval_sickr_spearman": 0.7373037281186174, |
|
"eval_stsb_spearman": 0.8235707188684319, |
|
"step": 2375 |
|
}, |
|
{ |
|
"electra_acc": 116.1787, |
|
"electra_fix_acc": 0.971, |
|
"electra_rep_acc": 0.6492, |
|
"epoch": 0.16, |
|
"learning_rate": 6.440035837706387e-06, |
|
"loss": 0.0018, |
|
"neg_sim": -0.0139, |
|
"pos_sim": 0.7692, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_avg_sts": 0.7810767988630156, |
|
"eval_sickr_spearman": 0.7360565525386754, |
|
"eval_stsb_spearman": 0.8260970451873557, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_avg_sts": 0.7810534850927242, |
|
"eval_sickr_spearman": 0.737071930023077, |
|
"eval_stsb_spearman": 0.8250350401623715, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_avg_sts": 0.7840637132723827, |
|
"eval_sickr_spearman": 0.7369239942306477, |
|
"eval_stsb_spearman": 0.8312034323141176, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_avg_sts": 0.7807650924553761, |
|
"eval_sickr_spearman": 0.736852331827302, |
|
"eval_stsb_spearman": 0.8246778530834501, |
|
"step": 2875 |
|
}, |
|
{ |
|
"electra_acc": 116.4021, |
|
"electra_fix_acc": 0.9711, |
|
"electra_rep_acc": 0.6575, |
|
"epoch": 0.19, |
|
"learning_rate": 6.328043005247664e-06, |
|
"loss": 0.0017, |
|
"neg_sim": -0.014, |
|
"pos_sim": 0.778, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_avg_sts": 0.7812090218351604, |
|
"eval_sickr_spearman": 0.7374819235049528, |
|
"eval_stsb_spearman": 0.824936120165368, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_avg_sts": 0.7792184022991786, |
|
"eval_sickr_spearman": 0.735666443932795, |
|
"eval_stsb_spearman": 0.8227703606655621, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_avg_sts": 0.7868425974055346, |
|
"eval_sickr_spearman": 0.7406432345394065, |
|
"eval_stsb_spearman": 0.8330419602716627, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_avg_sts": 0.7879152712258332, |
|
"eval_sickr_spearman": 0.74201269730361, |
|
"eval_stsb_spearman": 0.8338178451480563, |
|
"step": 3375 |
|
}, |
|
{ |
|
"electra_acc": 116.4857, |
|
"electra_fix_acc": 0.9706, |
|
"electra_rep_acc": 0.6652, |
|
"epoch": 0.22, |
|
"learning_rate": 6.2160501727889415e-06, |
|
"loss": 0.0016, |
|
"neg_sim": -0.014, |
|
"pos_sim": 0.7831, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_avg_sts": 0.7874505134915601, |
|
"eval_sickr_spearman": 0.7415818102926086, |
|
"eval_stsb_spearman": 0.8333192166905118, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_avg_sts": 0.7885881180801638, |
|
"eval_sickr_spearman": 0.745498890708191, |
|
"eval_stsb_spearman": 0.8316773454521367, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_avg_sts": 0.786897125097908, |
|
"eval_sickr_spearman": 0.7427211560498221, |
|
"eval_stsb_spearman": 0.8310730941459938, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_avg_sts": 0.7884858023873396, |
|
"eval_sickr_spearman": 0.7450858232358233, |
|
"eval_stsb_spearman": 0.831885781538856, |
|
"step": 3875 |
|
}, |
|
{ |
|
"electra_acc": 116.6361, |
|
"electra_fix_acc": 0.9706, |
|
"electra_rep_acc": 0.6699, |
|
"epoch": 0.26, |
|
"learning_rate": 6.104057340330219e-06, |
|
"loss": 0.0016, |
|
"neg_sim": -0.014, |
|
"pos_sim": 0.7846, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_avg_sts": 0.7883005639004375, |
|
"eval_sickr_spearman": 0.7467304081490642, |
|
"eval_stsb_spearman": 0.8298707196518108, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_avg_sts": 0.7890018606938185, |
|
"eval_sickr_spearman": 0.7466418868291138, |
|
"eval_stsb_spearman": 0.8313618345585232, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_avg_sts": 0.794939161048242, |
|
"eval_sickr_spearman": 0.7542289276433795, |
|
"eval_stsb_spearman": 0.8356493944531046, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_avg_sts": 0.7950124813232355, |
|
"eval_sickr_spearman": 0.7546040025145097, |
|
"eval_stsb_spearman": 0.8354209601319615, |
|
"step": 4375 |
|
}, |
|
{ |
|
"electra_acc": 116.6274, |
|
"electra_fix_acc": 0.9702, |
|
"electra_rep_acc": 0.6733, |
|
"epoch": 0.29, |
|
"learning_rate": 5.992064507871497e-06, |
|
"loss": 0.0016, |
|
"neg_sim": -0.0141, |
|
"pos_sim": 0.7871, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_avg_sts": 0.7938800981934206, |
|
"eval_sickr_spearman": 0.7533463081233527, |
|
"eval_stsb_spearman": 0.8344138882634886, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_avg_sts": 0.794615358586916, |
|
"eval_sickr_spearman": 0.7546161063520722, |
|
"eval_stsb_spearman": 0.8346146108217597, |
|
"step": 4625 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_avg_sts": 0.7945912313688637, |
|
"eval_sickr_spearman": 0.7546232629861865, |
|
"eval_stsb_spearman": 0.8345591997515407, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_avg_sts": 0.7926504221766928, |
|
"eval_sickr_spearman": 0.7513844777851131, |
|
"eval_stsb_spearman": 0.8339163665682726, |
|
"step": 4875 |
|
}, |
|
{ |
|
"electra_acc": 116.73, |
|
"electra_fix_acc": 0.9701, |
|
"electra_rep_acc": 0.6783, |
|
"epoch": 0.32, |
|
"learning_rate": 5.880071675412773e-06, |
|
"loss": 0.0015, |
|
"neg_sim": -0.0141, |
|
"pos_sim": 0.7904, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_avg_sts": 0.7934518878705803, |
|
"eval_sickr_spearman": 0.7517016271479089, |
|
"eval_stsb_spearman": 0.8352021485932517, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_avg_sts": 0.7947730330766363, |
|
"eval_sickr_spearman": 0.7522364054313209, |
|
"eval_stsb_spearman": 0.8373096607219516, |
|
"step": 5125 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_avg_sts": 0.7957221976017312, |
|
"eval_sickr_spearman": 0.7467118201128077, |
|
"eval_stsb_spearman": 0.8447325750906549, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_avg_sts": 0.7897362921108364, |
|
"eval_sickr_spearman": 0.7382444172402828, |
|
"eval_stsb_spearman": 0.8412281669813901, |
|
"step": 5375 |
|
}, |
|
{ |
|
"electra_acc": 116.8512, |
|
"electra_fix_acc": 0.9704, |
|
"electra_rep_acc": 0.6807, |
|
"epoch": 0.35, |
|
"learning_rate": 5.7680788429540505e-06, |
|
"loss": 0.0016, |
|
"neg_sim": -0.014, |
|
"pos_sim": 0.7834, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_avg_sts": 0.791664811568233, |
|
"eval_sickr_spearman": 0.7412088968210431, |
|
"eval_stsb_spearman": 0.8421207263154228, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_avg_sts": 0.791979531720516, |
|
"eval_sickr_spearman": 0.7422650046794644, |
|
"eval_stsb_spearman": 0.8416940587615678, |
|
"step": 5625 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_avg_sts": 0.7909414570864208, |
|
"eval_sickr_spearman": 0.7407966939085013, |
|
"eval_stsb_spearman": 0.8410862202643404, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_avg_sts": 0.7916033250892136, |
|
"eval_sickr_spearman": 0.7420070776647416, |
|
"eval_stsb_spearman": 0.8411995725136856, |
|
"step": 5875 |
|
}, |
|
{ |
|
"electra_acc": 116.9386, |
|
"electra_fix_acc": 0.9702, |
|
"electra_rep_acc": 0.6839, |
|
"epoch": 0.38, |
|
"learning_rate": 5.656086010495328e-06, |
|
"loss": 0.0014, |
|
"neg_sim": -0.014, |
|
"pos_sim": 0.7867, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_avg_sts": 0.7939838516948429, |
|
"eval_sickr_spearman": 0.7457015819562598, |
|
"eval_stsb_spearman": 0.8422661214334259, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_avg_sts": 0.7897949803868374, |
|
"eval_sickr_spearman": 0.7381373559151772, |
|
"eval_stsb_spearman": 0.8414526048584976, |
|
"step": 6125 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_avg_sts": 0.792929873578434, |
|
"eval_sickr_spearman": 0.7426651998166468, |
|
"eval_stsb_spearman": 0.8431945473402213, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_avg_sts": 0.7941689576028486, |
|
"eval_sickr_spearman": 0.7447726604544468, |
|
"eval_stsb_spearman": 0.8435652547512503, |
|
"step": 6375 |
|
}, |
|
{ |
|
"electra_acc": 116.9876, |
|
"electra_fix_acc": 0.9702, |
|
"electra_rep_acc": 0.6869, |
|
"epoch": 0.42, |
|
"learning_rate": 5.544093178036605e-06, |
|
"loss": 0.0014, |
|
"neg_sim": -0.0141, |
|
"pos_sim": 0.7923, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_avg_sts": 0.7943810788058159, |
|
"eval_sickr_spearman": 0.7449998475642491, |
|
"eval_stsb_spearman": 0.8437623100473826, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_avg_sts": 0.7943037355857338, |
|
"eval_sickr_spearman": 0.7452223756572118, |
|
"eval_stsb_spearman": 0.8433850955142557, |
|
"step": 6625 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_avg_sts": 0.7941830032512052, |
|
"eval_sickr_spearman": 0.7457148385402568, |
|
"eval_stsb_spearman": 0.8426511679621534, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_avg_sts": 0.7945109790018022, |
|
"eval_sickr_spearman": 0.7458948591084468, |
|
"eval_stsb_spearman": 0.8431270988951577, |
|
"step": 6875 |
|
}, |
|
{ |
|
"electra_acc": 117.061, |
|
"electra_fix_acc": 0.9704, |
|
"electra_rep_acc": 0.6883, |
|
"epoch": 0.45, |
|
"learning_rate": 5.432100345577883e-06, |
|
"loss": 0.0014, |
|
"neg_sim": -0.0142, |
|
"pos_sim": 0.8, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_avg_sts": 0.7988457805161364, |
|
"eval_sickr_spearman": 0.7515535952932767, |
|
"eval_stsb_spearman": 0.846137965738996, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_avg_sts": 0.7981158834848314, |
|
"eval_sickr_spearman": 0.75075815222236, |
|
"eval_stsb_spearman": 0.845473614747303, |
|
"step": 7125 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_avg_sts": 0.7984443557563039, |
|
"eval_sickr_spearman": 0.7501957080245197, |
|
"eval_stsb_spearman": 0.8466930034880882, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_avg_sts": 0.7981127107574888, |
|
"eval_sickr_spearman": 0.750318523550897, |
|
"eval_stsb_spearman": 0.8459068979640807, |
|
"step": 7375 |
|
}, |
|
{ |
|
"electra_acc": 117.069, |
|
"electra_fix_acc": 0.9701, |
|
"electra_rep_acc": 0.6888, |
|
"epoch": 0.48, |
|
"learning_rate": 5.32010751311916e-06, |
|
"loss": 0.0015, |
|
"neg_sim": -0.0142, |
|
"pos_sim": 0.8001, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_avg_sts": 0.7955868437200273, |
|
"eval_sickr_spearman": 0.7487511246176671, |
|
"eval_stsb_spearman": 0.8424225628223875, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_avg_sts": 0.7949766204622221, |
|
"eval_sickr_spearman": 0.7481150007102207, |
|
"eval_stsb_spearman": 0.8418382402142234, |
|
"step": 7625 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_avg_sts": 0.7950165968598919, |
|
"eval_sickr_spearman": 0.7485309980797762, |
|
"eval_stsb_spearman": 0.8415021956400077, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_avg_sts": 0.7952250496118516, |
|
"eval_sickr_spearman": 0.7486211044260741, |
|
"eval_stsb_spearman": 0.8418289947976292, |
|
"step": 7875 |
|
}, |
|
{ |
|
"electra_acc": 117.1619, |
|
"electra_fix_acc": 0.9703, |
|
"electra_rep_acc": 0.6937, |
|
"epoch": 0.51, |
|
"learning_rate": 5.2081146806604376e-06, |
|
"loss": 0.0014, |
|
"neg_sim": -0.0142, |
|
"pos_sim": 0.7987, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_avg_sts": 0.7952021451003002, |
|
"eval_sickr_spearman": 0.7491326836874914, |
|
"eval_stsb_spearman": 0.8412716065131091, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_avg_sts": 0.7951490971039081, |
|
"eval_sickr_spearman": 0.749455164502547, |
|
"eval_stsb_spearman": 0.8408430297052691, |
|
"step": 8125 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_avg_sts": 0.7950116620178889, |
|
"eval_sickr_spearman": 0.749741093649408, |
|
"eval_stsb_spearman": 0.8402822303863696, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_avg_sts": 0.7951408099684255, |
|
"eval_sickr_spearman": 0.7501430179062422, |
|
"eval_stsb_spearman": 0.8401386020306089, |
|
"step": 8375 |
|
}, |
|
{ |
|
"electra_acc": 117.2641, |
|
"electra_fix_acc": 0.9702, |
|
"electra_rep_acc": 0.6954, |
|
"epoch": 0.54, |
|
"learning_rate": 5.096121848201715e-06, |
|
"loss": 0.0014, |
|
"neg_sim": -0.0142, |
|
"pos_sim": 0.806, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_avg_sts": 0.7949691120514141, |
|
"eval_sickr_spearman": 0.7501533445930513, |
|
"eval_stsb_spearman": 0.8397848795097769, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_avg_sts": 0.7947062336358759, |
|
"eval_sickr_spearman": 0.7503012323543793, |
|
"eval_stsb_spearman": 0.8391112349173724, |
|
"step": 8625 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_avg_sts": 0.7944181183521635, |
|
"eval_sickr_spearman": 0.7493049712483499, |
|
"eval_stsb_spearman": 0.8395312654559769, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_avg_sts": 0.7950044953143297, |
|
"eval_sickr_spearman": 0.7496116017999308, |
|
"eval_stsb_spearman": 0.8403973888287286, |
|
"step": 8875 |
|
}, |
|
{ |
|
"electra_acc": 117.2706, |
|
"electra_fix_acc": 0.9705, |
|
"electra_rep_acc": 0.6956, |
|
"epoch": 0.58, |
|
"learning_rate": 4.984129015742992e-06, |
|
"loss": 0.0014, |
|
"neg_sim": -0.0143, |
|
"pos_sim": 0.8093, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_avg_sts": 0.7950848432290827, |
|
"eval_sickr_spearman": 0.7493472386176154, |
|
"eval_stsb_spearman": 0.8408224478405498, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_avg_sts": 0.794847808534162, |
|
"eval_sickr_spearman": 0.7489566016696193, |
|
"eval_stsb_spearman": 0.8407390153987048, |
|
"step": 9125 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_avg_sts": 0.795644348129942, |
|
"eval_sickr_spearman": 0.7493907547955183, |
|
"eval_stsb_spearman": 0.8418979414643657, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_avg_sts": 0.7966154624139274, |
|
"eval_sickr_spearman": 0.7486175020934662, |
|
"eval_stsb_spearman": 0.8446134227343888, |
|
"step": 9375 |
|
}, |
|
{ |
|
"electra_acc": 117.2471, |
|
"electra_fix_acc": 0.9703, |
|
"electra_rep_acc": 0.6966, |
|
"epoch": 0.61, |
|
"learning_rate": 4.872136183284269e-06, |
|
"loss": 0.0015, |
|
"neg_sim": -0.0143, |
|
"pos_sim": 0.8085, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_avg_sts": 0.7962741237962319, |
|
"eval_sickr_spearman": 0.7489400789707246, |
|
"eval_stsb_spearman": 0.8436081686217393, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_avg_sts": 0.7966016709380292, |
|
"eval_sickr_spearman": 0.7497793264061527, |
|
"eval_stsb_spearman": 0.8434240154699058, |
|
"step": 9625 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_avg_sts": 0.7969314966558516, |
|
"eval_sickr_spearman": 0.750271933382502, |
|
"eval_stsb_spearman": 0.8435910599292011, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_avg_sts": 0.7965405773738363, |
|
"eval_sickr_spearman": 0.7509994124448837, |
|
"eval_stsb_spearman": 0.842081742302789, |
|
"step": 9875 |
|
}, |
|
{ |
|
"electra_acc": 117.3599, |
|
"electra_fix_acc": 0.9704, |
|
"electra_rep_acc": 0.6998, |
|
"epoch": 0.64, |
|
"learning_rate": 4.760143350825547e-06, |
|
"loss": 0.0014, |
|
"neg_sim": -0.0143, |
|
"pos_sim": 0.8077, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_avg_sts": 0.7964394231656002, |
|
"eval_sickr_spearman": 0.7509571931067197, |
|
"eval_stsb_spearman": 0.8419216532244808, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_avg_sts": 0.7967246593169031, |
|
"eval_sickr_spearman": 0.7512494143278691, |
|
"eval_stsb_spearman": 0.842199904305937, |
|
"step": 10125 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_avg_sts": 0.798107636355013, |
|
"eval_sickr_spearman": 0.750279041985515, |
|
"eval_stsb_spearman": 0.845936230724511, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_avg_sts": 0.7980710535062169, |
|
"eval_sickr_spearman": 0.7505661719099119, |
|
"eval_stsb_spearman": 0.8455759351025218, |
|
"step": 10375 |
|
}, |
|
{ |
|
"electra_acc": 117.3933, |
|
"electra_fix_acc": 0.9703, |
|
"electra_rep_acc": 0.7006, |
|
"epoch": 0.67, |
|
"learning_rate": 4.648150518366825e-06, |
|
"loss": 0.0014, |
|
"neg_sim": -0.0143, |
|
"pos_sim": 0.8081, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_avg_sts": 0.7985924819917696, |
|
"eval_sickr_spearman": 0.7519674312632674, |
|
"eval_stsb_spearman": 0.8452175327202718, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_avg_sts": 0.7991692973398108, |
|
"eval_sickr_spearman": 0.7515644683598212, |
|
"eval_stsb_spearman": 0.8467741263198004, |
|
"step": 10625 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_avg_sts": 0.7987755187970038, |
|
"eval_sickr_spearman": 0.751040334943309, |
|
"eval_stsb_spearman": 0.8465107026506987, |
|
"step": 10750 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_avg_sts": 0.7986197258898549, |
|
"eval_sickr_spearman": 0.7514156980010478, |
|
"eval_stsb_spearman": 0.8458237537786619, |
|
"step": 10875 |
|
}, |
|
{ |
|
"electra_acc": 117.4302, |
|
"electra_fix_acc": 0.9705, |
|
"electra_rep_acc": 0.7012, |
|
"epoch": 0.7, |
|
"learning_rate": 4.536157685908102e-06, |
|
"loss": 0.0013, |
|
"neg_sim": -0.0143, |
|
"pos_sim": 0.8102, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_avg_sts": 0.7996251236145968, |
|
"eval_sickr_spearman": 0.7529341052108109, |
|
"eval_stsb_spearman": 0.8463161420183827, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_avg_sts": 0.7994727880295167, |
|
"eval_sickr_spearman": 0.752963500244891, |
|
"eval_stsb_spearman": 0.8459820758141423, |
|
"step": 11125 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_avg_sts": 0.7988993639342742, |
|
"eval_sickr_spearman": 0.7527381383169434, |
|
"eval_stsb_spearman": 0.845060589551605, |
|
"step": 11250 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_avg_sts": 0.7949700279790515, |
|
"eval_sickr_spearman": 0.7504287549286974, |
|
"eval_stsb_spearman": 0.8395113010294055, |
|
"step": 11375 |
|
}, |
|
{ |
|
"electra_acc": 117.3738, |
|
"electra_fix_acc": 0.9702, |
|
"electra_rep_acc": 0.6995, |
|
"epoch": 0.74, |
|
"learning_rate": 4.424164853449379e-06, |
|
"loss": 0.0014, |
|
"neg_sim": -0.0143, |
|
"pos_sim": 0.8157, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_avg_sts": 0.7945615778286725, |
|
"eval_sickr_spearman": 0.7499855239246266, |
|
"eval_stsb_spearman": 0.8391376317327186, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_avg_sts": 0.7934292933863014, |
|
"eval_sickr_spearman": 0.7480520799673368, |
|
"eval_stsb_spearman": 0.838806506805266, |
|
"step": 11625 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_avg_sts": 0.7937723514656398, |
|
"eval_sickr_spearman": 0.7481406973494901, |
|
"eval_stsb_spearman": 0.8394040055817897, |
|
"step": 11750 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_avg_sts": 0.7945405318685601, |
|
"eval_sickr_spearman": 0.7490017989360725, |
|
"eval_stsb_spearman": 0.8400792648010476, |
|
"step": 11875 |
|
}, |
|
{ |
|
"electra_acc": 117.5412, |
|
"electra_fix_acc": 0.9707, |
|
"electra_rep_acc": 0.7042, |
|
"epoch": 0.77, |
|
"learning_rate": 4.312172020990656e-06, |
|
"loss": 0.0016, |
|
"neg_sim": -0.0143, |
|
"pos_sim": 0.812, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_avg_sts": 0.7960514566703415, |
|
"eval_sickr_spearman": 0.7499365802322611, |
|
"eval_stsb_spearman": 0.8421663331084219, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_avg_sts": 0.7951893349270129, |
|
"eval_sickr_spearman": 0.7490124138094904, |
|
"eval_stsb_spearman": 0.8413662560445356, |
|
"step": 12125 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_avg_sts": 0.7937236608939515, |
|
"eval_sickr_spearman": 0.7449763603556459, |
|
"eval_stsb_spearman": 0.8424709614322572, |
|
"step": 12250 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_avg_sts": 0.7934627772608434, |
|
"eval_sickr_spearman": 0.746114793521932, |
|
"eval_stsb_spearman": 0.8408107609997548, |
|
"step": 12375 |
|
}, |
|
{ |
|
"electra_acc": 117.4273, |
|
"electra_fix_acc": 0.9702, |
|
"electra_rep_acc": 0.7033, |
|
"epoch": 0.8, |
|
"learning_rate": 4.2001791885319345e-06, |
|
"loss": 0.0016, |
|
"neg_sim": -0.0143, |
|
"pos_sim": 0.8084, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_avg_sts": 0.793506856374318, |
|
"eval_sickr_spearman": 0.7463023069419462, |
|
"eval_stsb_spearman": 0.8407114058066898, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_avg_sts": 0.7929774718082235, |
|
"eval_sickr_spearman": 0.7465257476258366, |
|
"eval_stsb_spearman": 0.8394291959906105, |
|
"step": 12625 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_avg_sts": 0.7907041469753885, |
|
"eval_sickr_spearman": 0.7438328358926072, |
|
"eval_stsb_spearman": 0.8375754580581698, |
|
"step": 12750 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_avg_sts": 0.7919377238754862, |
|
"eval_sickr_spearman": 0.744254116683321, |
|
"eval_stsb_spearman": 0.8396213310676515, |
|
"step": 12875 |
|
}, |
|
{ |
|
"electra_acc": 117.4657, |
|
"electra_fix_acc": 0.9701, |
|
"electra_rep_acc": 0.7049, |
|
"epoch": 0.83, |
|
"learning_rate": 4.088186356073212e-06, |
|
"loss": 0.0014, |
|
"neg_sim": -0.0143, |
|
"pos_sim": 0.8091, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_avg_sts": 0.7950524625796482, |
|
"eval_sickr_spearman": 0.7459233415515996, |
|
"eval_stsb_spearman": 0.8441815836076968, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_avg_sts": 0.7953046984165892, |
|
"eval_sickr_spearman": 0.7460563877025831, |
|
"eval_stsb_spearman": 0.8445530091305953, |
|
"step": 13125 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"eval_avg_sts": 0.7960686224710865, |
|
"eval_sickr_spearman": 0.745027801665286, |
|
"eval_stsb_spearman": 0.8471094432768869, |
|
"step": 13250 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_avg_sts": 0.7964731409284048, |
|
"eval_sickr_spearman": 0.7453551336215867, |
|
"eval_stsb_spearman": 0.847591148235223, |
|
"step": 13375 |
|
}, |
|
{ |
|
"electra_acc": 117.5481, |
|
"electra_fix_acc": 0.9705, |
|
"electra_rep_acc": 0.7049, |
|
"epoch": 0.86, |
|
"learning_rate": 3.976193523614489e-06, |
|
"loss": 0.0014, |
|
"neg_sim": -0.0143, |
|
"pos_sim": 0.8115, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_avg_sts": 0.7966766934403904, |
|
"eval_sickr_spearman": 0.7463476483017039, |
|
"eval_stsb_spearman": 0.8470057385790768, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_avg_sts": 0.7966025495961442, |
|
"eval_sickr_spearman": 0.7463816062904207, |
|
"eval_stsb_spearman": 0.8468234929018676, |
|
"step": 13625 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_avg_sts": 0.7967818015292757, |
|
"eval_sickr_spearman": 0.746880793527667, |
|
"eval_stsb_spearman": 0.8466828095308844, |
|
"step": 13750 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_avg_sts": 0.7969535576693652, |
|
"eval_sickr_spearman": 0.7471762808637142, |
|
"eval_stsb_spearman": 0.8467308344750161, |
|
"step": 13875 |
|
}, |
|
{ |
|
"electra_acc": 117.5482, |
|
"electra_fix_acc": 0.9705, |
|
"electra_rep_acc": 0.7068, |
|
"epoch": 0.9, |
|
"learning_rate": 3.864200691155766e-06, |
|
"loss": 0.0013, |
|
"neg_sim": -0.0143, |
|
"pos_sim": 0.8142, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_avg_sts": 0.7965629720194758, |
|
"eval_sickr_spearman": 0.7468308350906325, |
|
"eval_stsb_spearman": 0.8462951089483192, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_avg_sts": 0.7965266307017043, |
|
"eval_sickr_spearman": 0.7447568101909722, |
|
"eval_stsb_spearman": 0.8482964512124362, |
|
"step": 14125 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"eval_avg_sts": 0.7965480662045883, |
|
"eval_sickr_spearman": 0.7448633912050633, |
|
"eval_stsb_spearman": 0.8482327412041132, |
|
"step": 14250 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_avg_sts": 0.7964806384496487, |
|
"eval_sickr_spearman": 0.745111231688484, |
|
"eval_stsb_spearman": 0.8478500452108133, |
|
"step": 14375 |
|
}, |
|
{ |
|
"electra_acc": 117.5208, |
|
"electra_fix_acc": 0.9701, |
|
"electra_rep_acc": 0.7071, |
|
"epoch": 0.93, |
|
"learning_rate": 3.7522078586970435e-06, |
|
"loss": 0.0013, |
|
"neg_sim": -0.0143, |
|
"pos_sim": 0.8155, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_avg_sts": 0.7971949399590785, |
|
"eval_sickr_spearman": 0.7457434650767139, |
|
"eval_stsb_spearman": 0.8486464148414431, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_avg_sts": 0.7970029281099968, |
|
"eval_sickr_spearman": 0.7458370776934168, |
|
"eval_stsb_spearman": 0.8481687785265768, |
|
"step": 14625 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_avg_sts": 0.7991675032273975, |
|
"eval_sickr_spearman": 0.7483727836316391, |
|
"eval_stsb_spearman": 0.8499622228231559, |
|
"step": 14750 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_avg_sts": 0.79901349058513, |
|
"eval_sickr_spearman": 0.748446895621158, |
|
"eval_stsb_spearman": 0.8495800855491022, |
|
"step": 14875 |
|
}, |
|
{ |
|
"electra_acc": 117.6139, |
|
"electra_fix_acc": 0.9705, |
|
"electra_rep_acc": 0.7091, |
|
"epoch": 0.96, |
|
"learning_rate": 3.6402150262383203e-06, |
|
"loss": 0.0014, |
|
"neg_sim": -0.0143, |
|
"pos_sim": 0.8156, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_avg_sts": 0.79914478688965, |
|
"eval_sickr_spearman": 0.7468535118620501, |
|
"eval_stsb_spearman": 0.8514360619172497, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_avg_sts": 0.7991571351120899, |
|
"eval_sickr_spearman": 0.7471076924508606, |
|
"eval_stsb_spearman": 0.8512065777733191, |
|
"step": 15125 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_avg_sts": 0.8012474062999755, |
|
"eval_sickr_spearman": 0.7502938355647578, |
|
"eval_stsb_spearman": 0.8522009770351932, |
|
"step": 15250 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_avg_sts": 0.7993104516704408, |
|
"eval_sickr_spearman": 0.7512376467080167, |
|
"eval_stsb_spearman": 0.8473832566328647, |
|
"step": 15375 |
|
}, |
|
{ |
|
"electra_acc": 117.5407, |
|
"electra_fix_acc": 0.9702, |
|
"electra_rep_acc": 0.7085, |
|
"epoch": 0.99, |
|
"learning_rate": 3.5282221937795976e-06, |
|
"loss": 0.0015, |
|
"neg_sim": -0.0143, |
|
"pos_sim": 0.8109, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_avg_sts": 0.7991830405974262, |
|
"eval_sickr_spearman": 0.7510609883169274, |
|
"eval_stsb_spearman": 0.847305092877925, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_avg_sts": 0.7993142209919086, |
|
"eval_sickr_spearman": 0.750953302587503, |
|
"eval_stsb_spearman": 0.8476751393963143, |
|
"step": 15625 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"eval_avg_sts": 0.79998042307191, |
|
"eval_sickr_spearman": 0.7521459148362114, |
|
"eval_stsb_spearman": 0.8478149313076085, |
|
"step": 15750 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"eval_avg_sts": 0.8000408218838944, |
|
"eval_sickr_spearman": 0.7522030718469228, |
|
"eval_stsb_spearman": 0.847878571920866, |
|
"step": 15875 |
|
}, |
|
{ |
|
"electra_acc": NaN, |
|
"electra_fix_acc": NaN, |
|
"electra_rep_acc": NaN, |
|
"epoch": 1.02, |
|
"learning_rate": 3.4162293613208757e-06, |
|
"loss": 0.0013, |
|
"neg_sim": NaN, |
|
"pos_sim": 0.8075, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"eval_avg_sts": 0.7998575815186055, |
|
"eval_sickr_spearman": 0.7520637336216508, |
|
"eval_stsb_spearman": 0.8476514294155602, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"eval_avg_sts": 0.7998976758754202, |
|
"eval_sickr_spearman": 0.752151678568384, |
|
"eval_stsb_spearman": 0.8476436731824563, |
|
"step": 16125 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"eval_avg_sts": 0.7993375115090717, |
|
"eval_sickr_spearman": 0.7512065225542848, |
|
"eval_stsb_spearman": 0.8474685004638588, |
|
"step": 16250 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"eval_avg_sts": 0.8005700206739319, |
|
"eval_sickr_spearman": 0.7500328285448524, |
|
"eval_stsb_spearman": 0.8511072128030113, |
|
"step": 16375 |
|
}, |
|
{ |
|
"electra_acc": NaN, |
|
"electra_fix_acc": NaN, |
|
"electra_rep_acc": NaN, |
|
"epoch": 1.06, |
|
"learning_rate": 3.304236528862153e-06, |
|
"loss": 0.0014, |
|
"neg_sim": NaN, |
|
"pos_sim": 0.8139, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"eval_avg_sts": 0.800932566485014, |
|
"eval_sickr_spearman": 0.7492046823085472, |
|
"eval_stsb_spearman": 0.8526604506614809, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"eval_avg_sts": 0.8020790163363096, |
|
"eval_sickr_spearman": 0.7523164732774181, |
|
"eval_stsb_spearman": 0.851841559395201, |
|
"step": 16625 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"eval_avg_sts": 0.8034376489964099, |
|
"eval_sickr_spearman": 0.7546485273455429, |
|
"eval_stsb_spearman": 0.8522267706472768, |
|
"step": 16750 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"eval_avg_sts": 0.8035121251749653, |
|
"eval_sickr_spearman": 0.7551173589266803, |
|
"eval_stsb_spearman": 0.8519068914232504, |
|
"step": 16875 |
|
}, |
|
{ |
|
"electra_acc": NaN, |
|
"electra_fix_acc": NaN, |
|
"electra_rep_acc": NaN, |
|
"epoch": 1.09, |
|
"learning_rate": 3.19224369640343e-06, |
|
"loss": 0.0015, |
|
"neg_sim": NaN, |
|
"pos_sim": 0.8123, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"eval_avg_sts": 0.8034498780822419, |
|
"eval_sickr_spearman": 0.755224660407293, |
|
"eval_stsb_spearman": 0.8516750957571909, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"eval_avg_sts": 0.8032831913725695, |
|
"eval_sickr_spearman": 0.7549221124993344, |
|
"eval_stsb_spearman": 0.8516442702458048, |
|
"step": 17125 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"eval_avg_sts": 0.8025478170596181, |
|
"eval_sickr_spearman": 0.7531311768200115, |
|
"eval_stsb_spearman": 0.8519644572992248, |
|
"step": 17250 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"eval_avg_sts": 0.8023474610208348, |
|
"eval_sickr_spearman": 0.7529551428332407, |
|
"eval_stsb_spearman": 0.8517397792084288, |
|
"step": 17375 |
|
}, |
|
{ |
|
"electra_acc": NaN, |
|
"electra_fix_acc": NaN, |
|
"electra_rep_acc": NaN, |
|
"epoch": 1.12, |
|
"learning_rate": 3.0802508639447074e-06, |
|
"loss": 0.0015, |
|
"neg_sim": NaN, |
|
"pos_sim": 0.8124, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"eval_avg_sts": 0.8036377678015303, |
|
"eval_sickr_spearman": 0.7540948728392658, |
|
"eval_stsb_spearman": 0.8531806627637948, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"eval_avg_sts": 0.803436785093961, |
|
"eval_sickr_spearman": 0.7537820462755993, |
|
"eval_stsb_spearman": 0.8530915239123227, |
|
"step": 17625 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"eval_avg_sts": 0.8034543332861956, |
|
"eval_sickr_spearman": 0.753641363179487, |
|
"eval_stsb_spearman": 0.8532673033929044, |
|
"step": 17750 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"eval_avg_sts": 0.8031959700775069, |
|
"eval_sickr_spearman": 0.7533919857008204, |
|
"eval_stsb_spearman": 0.8529999544541933, |
|
"step": 17875 |
|
}, |
|
{ |
|
"electra_acc": NaN, |
|
"electra_fix_acc": NaN, |
|
"electra_rep_acc": NaN, |
|
"epoch": 1.15, |
|
"learning_rate": 2.9682580314859847e-06, |
|
"loss": 0.0013, |
|
"neg_sim": NaN, |
|
"pos_sim": 0.8138, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"eval_avg_sts": 0.8033449509317736, |
|
"eval_sickr_spearman": 0.7535802676184579, |
|
"eval_stsb_spearman": 0.8531096342450895, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"eval_avg_sts": 0.803034323911963, |
|
"eval_sickr_spearman": 0.7532389586116386, |
|
"eval_stsb_spearman": 0.8528296892122876, |
|
"step": 18125 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"eval_avg_sts": 0.8030336070807917, |
|
"eval_sickr_spearman": 0.753408940679628, |
|
"eval_stsb_spearman": 0.8526582734819556, |
|
"step": 18250 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"eval_avg_sts": 0.8042466753465274, |
|
"eval_sickr_spearman": 0.754791275779017, |
|
"eval_stsb_spearman": 0.8537020749140378, |
|
"step": 18375 |
|
}, |
|
{ |
|
"electra_acc": NaN, |
|
"electra_fix_acc": NaN, |
|
"electra_rep_acc": NaN, |
|
"epoch": 1.18, |
|
"learning_rate": 2.8562651990272623e-06, |
|
"loss": 0.0013, |
|
"neg_sim": NaN, |
|
"pos_sim": 0.8154, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"eval_avg_sts": 0.8041866669158593, |
|
"eval_sickr_spearman": 0.7544020317329624, |
|
"eval_stsb_spearman": 0.8539713020987562, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"eval_avg_sts": 0.8040941868331957, |
|
"eval_sickr_spearman": 0.7544721091099607, |
|
"eval_stsb_spearman": 0.8537162645564307, |
|
"step": 18625 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_avg_sts": 0.8041380482719174, |
|
"eval_sickr_spearman": 0.7546320526777496, |
|
"eval_stsb_spearman": 0.8536440438660853, |
|
"step": 18750 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"eval_avg_sts": 0.8038765751251531, |
|
"eval_sickr_spearman": 0.7544602934590069, |
|
"eval_stsb_spearman": 0.8532928567912992, |
|
"step": 18875 |
|
}, |
|
{ |
|
"electra_acc": NaN, |
|
"electra_fix_acc": NaN, |
|
"electra_rep_acc": NaN, |
|
"epoch": 1.22, |
|
"learning_rate": 2.7442723665685396e-06, |
|
"loss": 0.0014, |
|
"neg_sim": NaN, |
|
"pos_sim": 0.8165, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"eval_avg_sts": 0.8066267695432612, |
|
"eval_sickr_spearman": 0.7576009030887407, |
|
"eval_stsb_spearman": 0.8556526359977817, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"eval_avg_sts": 0.8079773606909346, |
|
"eval_sickr_spearman": 0.7579651709620474, |
|
"eval_stsb_spearman": 0.8579895504198217, |
|
"step": 19125 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"eval_avg_sts": 0.8076160011490612, |
|
"eval_sickr_spearman": 0.7573030141976216, |
|
"eval_stsb_spearman": 0.8579289881005008, |
|
"step": 19250 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"eval_avg_sts": 0.8076237482649742, |
|
"eval_sickr_spearman": 0.7578992242597729, |
|
"eval_stsb_spearman": 0.8573482722701754, |
|
"step": 19375 |
|
}, |
|
{ |
|
"electra_acc": NaN, |
|
"electra_fix_acc": NaN, |
|
"electra_rep_acc": NaN, |
|
"epoch": 1.25, |
|
"learning_rate": 2.632279534109817e-06, |
|
"loss": 0.0014, |
|
"neg_sim": NaN, |
|
"pos_sim": 0.817, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"eval_avg_sts": 0.8074421455549705, |
|
"eval_sickr_spearman": 0.7572703530486437, |
|
"eval_stsb_spearman": 0.8576139380612975, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"eval_avg_sts": 0.8072772138963991, |
|
"eval_sickr_spearman": 0.7571532532233376, |
|
"eval_stsb_spearman": 0.8574011745694605, |
|
"step": 19625 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"eval_avg_sts": 0.8073302425810744, |
|
"eval_sickr_spearman": 0.7571811592932731, |
|
"eval_stsb_spearman": 0.8574793258688757, |
|
"step": 19750 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"eval_avg_sts": 0.8072437293168806, |
|
"eval_sickr_spearman": 0.7571653090297986, |
|
"eval_stsb_spearman": 0.8573221496039627, |
|
"step": 19875 |
|
}, |
|
{ |
|
"electra_acc": NaN, |
|
"electra_fix_acc": NaN, |
|
"electra_rep_acc": NaN, |
|
"epoch": 1.28, |
|
"learning_rate": 2.5202867016510945e-06, |
|
"loss": 0.0013, |
|
"neg_sim": NaN, |
|
"pos_sim": 0.819, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"eval_avg_sts": 0.8077588380658864, |
|
"eval_sickr_spearman": 0.7571885560828946, |
|
"eval_stsb_spearman": 0.8583291200488784, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"eval_avg_sts": 0.8073697057890559, |
|
"eval_sickr_spearman": 0.7569122331563212, |
|
"eval_stsb_spearman": 0.8578271784217905, |
|
"step": 20125 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"eval_avg_sts": 0.8072112242255804, |
|
"eval_sickr_spearman": 0.7568430683702502, |
|
"eval_stsb_spearman": 0.8575793800809108, |
|
"step": 20250 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"eval_avg_sts": 0.8070283089413652, |
|
"eval_sickr_spearman": 0.7565875429105995, |
|
"eval_stsb_spearman": 0.8574690749721309, |
|
"step": 20375 |
|
}, |
|
{ |
|
"electra_acc": NaN, |
|
"electra_fix_acc": NaN, |
|
"electra_rep_acc": NaN, |
|
"epoch": 1.31, |
|
"learning_rate": 2.4082938691923718e-06, |
|
"loss": 0.0013, |
|
"neg_sim": NaN, |
|
"pos_sim": 0.8202, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"eval_avg_sts": 0.806707361091437, |
|
"eval_sickr_spearman": 0.7562538228178075, |
|
"eval_stsb_spearman": 0.8571608993650666, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"eval_avg_sts": 0.8065118723390399, |
|
"eval_sickr_spearman": 0.7561714014477396, |
|
"eval_stsb_spearman": 0.85685234323034, |
|
"step": 20625 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"eval_avg_sts": 0.8067084472431667, |
|
"eval_sickr_spearman": 0.756075627431472, |
|
"eval_stsb_spearman": 0.8573412670548614, |
|
"step": 20750 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"eval_avg_sts": 0.8066874969794837, |
|
"eval_sickr_spearman": 0.7560982500802493, |
|
"eval_stsb_spearman": 0.857276743878718, |
|
"step": 20875 |
|
}, |
|
{ |
|
"electra_acc": NaN, |
|
"electra_fix_acc": NaN, |
|
"electra_rep_acc": NaN, |
|
"epoch": 1.34, |
|
"learning_rate": 2.296301036733649e-06, |
|
"loss": 0.0013, |
|
"neg_sim": NaN, |
|
"pos_sim": 0.8217, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"eval_avg_sts": 0.80597633722049, |
|
"eval_sickr_spearman": 0.7559074225142358, |
|
"eval_stsb_spearman": 0.8560452519267441, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"eval_avg_sts": 0.8058007667522757, |
|
"eval_sickr_spearman": 0.7558859045807915, |
|
"eval_stsb_spearman": 0.8557156289237601, |
|
"step": 21125 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"eval_avg_sts": 0.8052139487149964, |
|
"eval_sickr_spearman": 0.7551075125508855, |
|
"eval_stsb_spearman": 0.8553203848791073, |
|
"step": 21250 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"eval_avg_sts": 0.8050337032021436, |
|
"eval_sickr_spearman": 0.7547609681540094, |
|
"eval_stsb_spearman": 0.8553064382502779, |
|
"step": 21375 |
|
}, |
|
{ |
|
"electra_acc": NaN, |
|
"electra_fix_acc": NaN, |
|
"electra_rep_acc": NaN, |
|
"epoch": 1.38, |
|
"learning_rate": 2.1843082042749267e-06, |
|
"loss": 0.0014, |
|
"neg_sim": NaN, |
|
"pos_sim": 0.8233, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"eval_avg_sts": 0.8048704441406708, |
|
"eval_sickr_spearman": 0.7537258018558153, |
|
"eval_stsb_spearman": 0.8560150864255263, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"eval_avg_sts": 0.8048149155952563, |
|
"eval_sickr_spearman": 0.7537628338350241, |
|
"eval_stsb_spearman": 0.8558669973554885, |
|
"step": 21625 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"eval_avg_sts": 0.8049705872600981, |
|
"eval_sickr_spearman": 0.7530663348330701, |
|
"eval_stsb_spearman": 0.8568748396871261, |
|
"step": 21750 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"eval_avg_sts": 0.8048803781742058, |
|
"eval_sickr_spearman": 0.7529998117575781, |
|
"eval_stsb_spearman": 0.8567609445908335, |
|
"step": 21875 |
|
}, |
|
{ |
|
"electra_acc": NaN, |
|
"electra_fix_acc": NaN, |
|
"electra_rep_acc": NaN, |
|
"epoch": 1.41, |
|
"learning_rate": 2.072315371816204e-06, |
|
"loss": 0.0014, |
|
"neg_sim": NaN, |
|
"pos_sim": 0.8232, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"eval_avg_sts": 0.8048396134318049, |
|
"eval_sickr_spearman": 0.7529523570293574, |
|
"eval_stsb_spearman": 0.8567268698342524, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"eval_avg_sts": 0.8052282014222374, |
|
"eval_sickr_spearman": 0.754343433789208, |
|
"eval_stsb_spearman": 0.8561129690552667, |
|
"step": 22125 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"eval_avg_sts": 0.8052272637457882, |
|
"eval_sickr_spearman": 0.7546001119952933, |
|
"eval_stsb_spearman": 0.855854415496283, |
|
"step": 22250 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"eval_avg_sts": 0.8050641056280918, |
|
"eval_sickr_spearman": 0.7546412266181242, |
|
"eval_stsb_spearman": 0.8554869846380595, |
|
"step": 22375 |
|
}, |
|
{ |
|
"electra_acc": NaN, |
|
"electra_fix_acc": NaN, |
|
"electra_rep_acc": NaN, |
|
"epoch": 1.44, |
|
"learning_rate": 1.9603225393574807e-06, |
|
"loss": 0.0014, |
|
"neg_sim": NaN, |
|
"pos_sim": 0.8233, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"eval_avg_sts": 0.8049566807526001, |
|
"eval_sickr_spearman": 0.7546373841300092, |
|
"eval_stsb_spearman": 0.8552759773751909, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"eval_avg_sts": 0.8022531756326772, |
|
"eval_sickr_spearman": 0.7524902498024213, |
|
"eval_stsb_spearman": 0.8520161014629332, |
|
"step": 22625 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"eval_avg_sts": 0.80224163626539, |
|
"eval_sickr_spearman": 0.7525507689902333, |
|
"eval_stsb_spearman": 0.8519325035405467, |
|
"step": 22750 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"eval_avg_sts": 0.8021754313869522, |
|
"eval_sickr_spearman": 0.7525084055587649, |
|
"eval_stsb_spearman": 0.8518424572151397, |
|
"step": 22875 |
|
}, |
|
{ |
|
"electra_acc": NaN, |
|
"electra_fix_acc": NaN, |
|
"electra_rep_acc": NaN, |
|
"epoch": 1.47, |
|
"learning_rate": 1.8483297068987584e-06, |
|
"loss": 0.0015, |
|
"neg_sim": NaN, |
|
"pos_sim": 0.8241, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"eval_avg_sts": 0.8024969817513419, |
|
"eval_sickr_spearman": 0.752065078492491, |
|
"eval_stsb_spearman": 0.8529288850101928, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"eval_avg_sts": 0.8024605472785751, |
|
"eval_sickr_spearman": 0.7519645974282826, |
|
"eval_stsb_spearman": 0.8529564971288677, |
|
"step": 23125 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"eval_avg_sts": 0.8024320134196474, |
|
"eval_sickr_spearman": 0.751913444305251, |
|
"eval_stsb_spearman": 0.8529505825340439, |
|
"step": 23250 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"eval_avg_sts": 0.8024048624411102, |
|
"eval_sickr_spearman": 0.7518156049516216, |
|
"eval_stsb_spearman": 0.8529941199305989, |
|
"step": 23375 |
|
}, |
|
{ |
|
"electra_acc": NaN, |
|
"electra_fix_acc": NaN, |
|
"electra_rep_acc": NaN, |
|
"epoch": 1.5, |
|
"learning_rate": 1.7363368744400357e-06, |
|
"loss": 0.0013, |
|
"neg_sim": NaN, |
|
"pos_sim": 0.8244, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"eval_avg_sts": 0.802379657071767, |
|
"eval_sickr_spearman": 0.7517114254926023, |
|
"eval_stsb_spearman": 0.8530478886509316, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"eval_avg_sts": 0.8022866209294395, |
|
"eval_sickr_spearman": 0.7517222805215273, |
|
"eval_stsb_spearman": 0.8528509613373516, |
|
"step": 23625 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"eval_avg_sts": 0.8029982809362839, |
|
"eval_sickr_spearman": 0.7519777099189752, |
|
"eval_stsb_spearman": 0.8540188519535926, |
|
"step": 23750 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"eval_avg_sts": 0.8029407406070306, |
|
"eval_sickr_spearman": 0.7520159907068213, |
|
"eval_stsb_spearman": 0.8538654905072398, |
|
"step": 23875 |
|
}, |
|
{ |
|
"electra_acc": NaN, |
|
"electra_fix_acc": NaN, |
|
"electra_rep_acc": NaN, |
|
"epoch": 1.54, |
|
"learning_rate": 1.6243440419813131e-06, |
|
"loss": 0.0014, |
|
"neg_sim": NaN, |
|
"pos_sim": 0.8241, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"eval_avg_sts": 0.8026597102203472, |
|
"eval_sickr_spearman": 0.7517222805215273, |
|
"eval_stsb_spearman": 0.8535971399191672, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"eval_avg_sts": 0.8025235294672179, |
|
"eval_sickr_spearman": 0.7517699273741539, |
|
"eval_stsb_spearman": 0.8532771315602818, |
|
"step": 24125 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"eval_avg_sts": 0.8024910901785833, |
|
"eval_sickr_spearman": 0.7517942311114816, |
|
"eval_stsb_spearman": 0.8531879492456851, |
|
"step": 24250 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"eval_avg_sts": 0.8022646880892426, |
|
"eval_sickr_spearman": 0.7516456709147336, |
|
"eval_stsb_spearman": 0.8528837052637515, |
|
"step": 24375 |
|
}, |
|
{ |
|
"electra_acc": NaN, |
|
"electra_fix_acc": NaN, |
|
"electra_rep_acc": NaN, |
|
"epoch": 1.57, |
|
"learning_rate": 1.5123512095225906e-06, |
|
"loss": 0.0013, |
|
"neg_sim": NaN, |
|
"pos_sim": 0.8242, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"eval_avg_sts": 0.8021439174552474, |
|
"eval_sickr_spearman": 0.7515729518271562, |
|
"eval_stsb_spearman": 0.8527148830833385, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"eval_avg_sts": 0.8023944530487281, |
|
"eval_sickr_spearman": 0.7522279999885693, |
|
"eval_stsb_spearman": 0.8525609061088869, |
|
"step": 24625 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"eval_avg_sts": 0.8023338277994898, |
|
"eval_sickr_spearman": 0.7522989419253933, |
|
"eval_stsb_spearman": 0.8523687136735865, |
|
"step": 24750 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"eval_avg_sts": 0.8021448888713669, |
|
"eval_sickr_spearman": 0.7524111906094542, |
|
"eval_stsb_spearman": 0.8518785871332797, |
|
"step": 24875 |
|
}, |
|
{ |
|
"electra_acc": NaN, |
|
"electra_fix_acc": NaN, |
|
"electra_rep_acc": NaN, |
|
"epoch": 1.6, |
|
"learning_rate": 1.4003583770638678e-06, |
|
"loss": 0.0014, |
|
"neg_sim": NaN, |
|
"pos_sim": 0.825, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_avg_sts": 0.8020245919486174, |
|
"eval_sickr_spearman": 0.7524232944470166, |
|
"eval_stsb_spearman": 0.8516258894502182, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"eval_avg_sts": 0.8027255384898491, |
|
"eval_sickr_spearman": 0.753412110732323, |
|
"eval_stsb_spearman": 0.8520389662473753, |
|
"step": 25125 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"eval_avg_sts": 0.8025519191789285, |
|
"eval_sickr_spearman": 0.7524277133083489, |
|
"eval_stsb_spearman": 0.8526761250495083, |
|
"step": 25250 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"eval_avg_sts": 0.8020490496082332, |
|
"eval_sickr_spearman": 0.7522735334727325, |
|
"eval_stsb_spearman": 0.8518245657437339, |
|
"step": 25375 |
|
}, |
|
{ |
|
"electra_acc": NaN, |
|
"electra_fix_acc": NaN, |
|
"electra_rep_acc": NaN, |
|
"epoch": 1.63, |
|
"learning_rate": 1.2883655446051453e-06, |
|
"loss": 0.0014, |
|
"neg_sim": NaN, |
|
"pos_sim": 0.8247, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"eval_avg_sts": 0.8019816352585948, |
|
"eval_sickr_spearman": 0.7522344361561619, |
|
"eval_stsb_spearman": 0.8517288343610276, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"eval_avg_sts": 0.8013792896117395, |
|
"eval_sickr_spearman": 0.7513900974239813, |
|
"eval_stsb_spearman": 0.8513684817994976, |
|
"step": 25625 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"eval_avg_sts": 0.8012957662635658, |
|
"eval_sickr_spearman": 0.7513326522266613, |
|
"eval_stsb_spearman": 0.8512588803004703, |
|
"step": 25750 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"eval_avg_sts": 0.8012601775924122, |
|
"eval_sickr_spearman": 0.751396101311661, |
|
"eval_stsb_spearman": 0.8511242538731633, |
|
"step": 25875 |
|
}, |
|
{ |
|
"electra_acc": NaN, |
|
"electra_fix_acc": NaN, |
|
"electra_rep_acc": NaN, |
|
"epoch": 1.66, |
|
"learning_rate": 1.1763727121464225e-06, |
|
"loss": 0.0014, |
|
"neg_sim": NaN, |
|
"pos_sim": 0.825, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"eval_avg_sts": 0.80119947875736, |
|
"eval_sickr_spearman": 0.7513846699095188, |
|
"eval_stsb_spearman": 0.8510142876052013, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"eval_avg_sts": 0.8011593246270685, |
|
"eval_sickr_spearman": 0.751327464867706, |
|
"eval_stsb_spearman": 0.8509911843864311, |
|
"step": 26125 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"eval_avg_sts": 0.8011629421191884, |
|
"eval_sickr_spearman": 0.7513375033679065, |
|
"eval_stsb_spearman": 0.8509883808704702, |
|
"step": 26250 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"eval_avg_sts": 0.801390135441459, |
|
"eval_sickr_spearman": 0.7513261680279671, |
|
"eval_stsb_spearman": 0.8514541028549507, |
|
"step": 26375 |
|
}, |
|
{ |
|
"electra_acc": NaN, |
|
"electra_fix_acc": NaN, |
|
"electra_rep_acc": NaN, |
|
"epoch": 1.7, |
|
"learning_rate": 1.0643798796877e-06, |
|
"loss": 0.0013, |
|
"neg_sim": NaN, |
|
"pos_sim": 0.8251, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"eval_avg_sts": 0.8011107242729139, |
|
"eval_sickr_spearman": 0.7510607481614202, |
|
"eval_stsb_spearman": 0.8511607003844076, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"eval_avg_sts": 0.8017416851794723, |
|
"eval_sickr_spearman": 0.7518608022180748, |
|
"eval_stsb_spearman": 0.8516225681408698, |
|
"step": 26625 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"eval_avg_sts": 0.8012848966551385, |
|
"eval_sickr_spearman": 0.7518499471891499, |
|
"eval_stsb_spearman": 0.8507198461211269, |
|
"step": 26750 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"eval_avg_sts": 0.8018531908928693, |
|
"eval_sickr_spearman": 0.7526851600120571, |
|
"eval_stsb_spearman": 0.8510212217736816, |
|
"step": 26875 |
|
}, |
|
{ |
|
"electra_acc": NaN, |
|
"electra_fix_acc": NaN, |
|
"electra_rep_acc": NaN, |
|
"epoch": 1.73, |
|
"learning_rate": 9.523870472289774e-07, |
|
"loss": 0.0015, |
|
"neg_sim": NaN, |
|
"pos_sim": 0.8252, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"eval_avg_sts": 0.8022264378652083, |
|
"eval_sickr_spearman": 0.7527636908629084, |
|
"eval_stsb_spearman": 0.8516891848675082, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"eval_avg_sts": 0.8022941628228328, |
|
"eval_sickr_spearman": 0.7528165731055918, |
|
"eval_stsb_spearman": 0.851771752540074, |
|
"step": 27125 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"eval_avg_sts": 0.802330897941351, |
|
"eval_sickr_spearman": 0.7527780521622385, |
|
"eval_stsb_spearman": 0.8518837437204637, |
|
"step": 27250 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"eval_avg_sts": 0.8023123636818033, |
|
"eval_sickr_spearman": 0.75282886906756, |
|
"eval_stsb_spearman": 0.8517958582960464, |
|
"step": 27375 |
|
}, |
|
{ |
|
"electra_acc": NaN, |
|
"electra_fix_acc": NaN, |
|
"electra_rep_acc": NaN, |
|
"epoch": 1.76, |
|
"learning_rate": 8.403942147702547e-07, |
|
"loss": 0.0013, |
|
"neg_sim": NaN, |
|
"pos_sim": 0.8254, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"eval_avg_sts": 0.8022805236271966, |
|
"eval_sickr_spearman": 0.7531723875050453, |
|
"eval_stsb_spearman": 0.8513886597493479, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"eval_avg_sts": 0.8021820813348178, |
|
"eval_sickr_spearman": 0.7531067289893795, |
|
"eval_stsb_spearman": 0.8512574336802562, |
|
"step": 27625 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"eval_avg_sts": 0.8021045835263574, |
|
"eval_sickr_spearman": 0.7530147494301255, |
|
"eval_stsb_spearman": 0.8511944176225893, |
|
"step": 27750 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"eval_avg_sts": 0.8021931793671637, |
|
"eval_sickr_spearman": 0.7531494766696594, |
|
"eval_stsb_spearman": 0.8512368820646681, |
|
"step": 27875 |
|
}, |
|
{ |
|
"electra_acc": NaN, |
|
"electra_fix_acc": NaN, |
|
"electra_rep_acc": NaN, |
|
"epoch": 1.79, |
|
"learning_rate": 7.284013823115321e-07, |
|
"loss": 0.0014, |
|
"neg_sim": NaN, |
|
"pos_sim": 0.8249, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"eval_avg_sts": 0.8019519952437764, |
|
"eval_sickr_spearman": 0.752846640575092, |
|
"eval_stsb_spearman": 0.8510573499124608, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"eval_avg_sts": 0.8020597161190861, |
|
"eval_sickr_spearman": 0.7531391019517488, |
|
"eval_stsb_spearman": 0.8509803302864235, |
|
"step": 28125 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"eval_avg_sts": 0.8017160897849254, |
|
"eval_sickr_spearman": 0.7524687799000784, |
|
"eval_stsb_spearman": 0.8509633996697724, |
|
"step": 28250 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"eval_avg_sts": 0.8019391531942626, |
|
"eval_sickr_spearman": 0.7527104243714136, |
|
"eval_stsb_spearman": 0.8511678820171117, |
|
"step": 28375 |
|
}, |
|
{ |
|
"electra_acc": NaN, |
|
"electra_fix_acc": NaN, |
|
"electra_rep_acc": NaN, |
|
"epoch": 1.82, |
|
"learning_rate": 6.164085498528094e-07, |
|
"loss": 0.0014, |
|
"neg_sim": NaN, |
|
"pos_sim": 0.8248, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"eval_avg_sts": 0.8019393193841213, |
|
"eval_sickr_spearman": 0.7527119133355582, |
|
"eval_stsb_spearman": 0.8511667254326846, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"eval_avg_sts": 0.8019492124313456, |
|
"eval_sickr_spearman": 0.7531468829901817, |
|
"eval_stsb_spearman": 0.8507515418725097, |
|
"step": 28625 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"eval_avg_sts": 0.8018476761350394, |
|
"eval_sickr_spearman": 0.753012059688445, |
|
"eval_stsb_spearman": 0.8506832925816338, |
|
"step": 28750 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"eval_avg_sts": 0.8021637081346856, |
|
"eval_sickr_spearman": 0.7534332924480571, |
|
"eval_stsb_spearman": 0.8508941238213141, |
|
"step": 28875 |
|
}, |
|
{ |
|
"electra_acc": NaN, |
|
"electra_fix_acc": NaN, |
|
"electra_rep_acc": NaN, |
|
"epoch": 1.86, |
|
"learning_rate": 5.044157173940868e-07, |
|
"loss": 0.0014, |
|
"neg_sim": NaN, |
|
"pos_sim": 0.8253, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"eval_avg_sts": 0.8021424201726441, |
|
"eval_sickr_spearman": 0.7534233980411609, |
|
"eval_stsb_spearman": 0.8508614423041275, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"eval_avg_sts": 0.8021059378722697, |
|
"eval_sickr_spearman": 0.753409276897338, |
|
"eval_stsb_spearman": 0.8508025988472012, |
|
"step": 29125 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"eval_avg_sts": 0.802109297825347, |
|
"eval_sickr_spearman": 0.7534563954078488, |
|
"eval_stsb_spearman": 0.8507622002428452, |
|
"step": 29250 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"eval_avg_sts": 0.802140899314784, |
|
"eval_sickr_spearman": 0.7535362711295405, |
|
"eval_stsb_spearman": 0.8507455275000276, |
|
"step": 29375 |
|
}, |
|
{ |
|
"electra_acc": NaN, |
|
"electra_fix_acc": NaN, |
|
"electra_rep_acc": NaN, |
|
"epoch": 1.89, |
|
"learning_rate": 3.9242288493536413e-07, |
|
"loss": 0.0013, |
|
"neg_sim": NaN, |
|
"pos_sim": 0.8247, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"eval_avg_sts": 0.8021494286648734, |
|
"eval_sickr_spearman": 0.7535442923234806, |
|
"eval_stsb_spearman": 0.850754565006266, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"eval_avg_sts": 0.802139809482882, |
|
"eval_sickr_spearman": 0.7535292585887305, |
|
"eval_stsb_spearman": 0.8507503603770336, |
|
"step": 29625 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"eval_avg_sts": 0.8021667358363417, |
|
"eval_sickr_spearman": 0.7535602866802595, |
|
"eval_stsb_spearman": 0.8507731849924238, |
|
"step": 29750 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"eval_avg_sts": 0.802187610188689, |
|
"eval_sickr_spearman": 0.7535883368434994, |
|
"eval_stsb_spearman": 0.8507868835338787, |
|
"step": 29875 |
|
}, |
|
{ |
|
"electra_acc": NaN, |
|
"electra_fix_acc": NaN, |
|
"electra_rep_acc": NaN, |
|
"epoch": 1.92, |
|
"learning_rate": 2.804300524766415e-07, |
|
"loss": 0.0013, |
|
"neg_sim": NaN, |
|
"pos_sim": 0.8251, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"eval_avg_sts": 0.802156902456721, |
|
"eval_sickr_spearman": 0.7535403537731628, |
|
"eval_stsb_spearman": 0.8507734511402791, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"eval_avg_sts": 0.8024228445195005, |
|
"eval_sickr_spearman": 0.7538071665416515, |
|
"eval_stsb_spearman": 0.8510385224973495, |
|
"step": 30125 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"eval_avg_sts": 0.802384678105279, |
|
"eval_sickr_spearman": 0.753726522322337, |
|
"eval_stsb_spearman": 0.8510428338882212, |
|
"step": 30250 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"eval_avg_sts": 0.8023422930470364, |
|
"eval_sickr_spearman": 0.7536587504382077, |
|
"eval_stsb_spearman": 0.8510258356558651, |
|
"step": 30375 |
|
}, |
|
{ |
|
"electra_acc": NaN, |
|
"electra_fix_acc": NaN, |
|
"electra_rep_acc": NaN, |
|
"epoch": 1.95, |
|
"learning_rate": 1.6843722001791884e-07, |
|
"loss": 0.0014, |
|
"neg_sim": NaN, |
|
"pos_sim": 0.8246, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"eval_avg_sts": 0.8023481067748586, |
|
"eval_sickr_spearman": 0.7536835344865497, |
|
"eval_stsb_spearman": 0.8510126790631675, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"eval_avg_sts": 0.8023432918069697, |
|
"eval_sickr_spearman": 0.7536910273383741, |
|
"eval_stsb_spearman": 0.8509955562755652, |
|
"step": 30625 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"eval_avg_sts": 0.8023505057059448, |
|
"eval_sickr_spearman": 0.7537099996234421, |
|
"eval_stsb_spearman": 0.8509910117884474, |
|
"step": 30750 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"eval_avg_sts": 0.8023412928143316, |
|
"eval_sickr_spearman": 0.7536968391016481, |
|
"eval_stsb_spearman": 0.8509857465270153, |
|
"step": 30875 |
|
}, |
|
{ |
|
"electra_acc": NaN, |
|
"electra_fix_acc": NaN, |
|
"electra_rep_acc": NaN, |
|
"epoch": 1.98, |
|
"learning_rate": 5.64443875591962e-08, |
|
"loss": 0.0013, |
|
"neg_sim": NaN, |
|
"pos_sim": 0.8246, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"eval_avg_sts": 0.8023353086079636, |
|
"eval_sickr_spearman": 0.7536697975915385, |
|
"eval_stsb_spearman": 0.8510008196243888, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"eval_avg_sts": 0.8023565101430139, |
|
"eval_sickr_spearman": 0.7536807486826663, |
|
"eval_stsb_spearman": 0.8510322716033614, |
|
"step": 31125 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_avg_sts": 0.802324478822219, |
|
"eval_sickr_spearman": 0.7536380010023865, |
|
"eval_stsb_spearman": 0.8510109566420515, |
|
"step": 31250 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 31252, |
|
"train_runtime": 14615.2615, |
|
"train_samples_per_second": 2.138 |
|
} |
|
], |
|
"max_steps": 31252, |
|
"num_train_epochs": 2, |
|
"total_flos": 603571104573277440, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|