{ "best_metric": 0.8359799918026352, "best_model_checkpoint": "result/simcse-celectra-amlp-dmlp-bs128-lr2e-6-mask0.40-elew0.01-roberta-base", "epoch": 1.0, "global_step": 7813, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "electra_acc": 0.2448, "electra_fix_acc": 0.006, "electra_rep_acc": 0.9912, "epoch": 0.0, "learning_rate": 1.9997440163829513e-06, "loss": 8.6888, "neg_sim": 0.2041, "pos_sim": 0.3651, "step": 1 }, { "epoch": 0.02, "eval_avg_sts": 0.7368351609735859, "eval_sickr_spearman": 0.6964049570574558, "eval_stsb_spearman": 0.7772653648897162, "step": 125 }, { "epoch": 0.03, "eval_avg_sts": 0.7530561409181189, "eval_sickr_spearman": 0.707297882521504, "eval_stsb_spearman": 0.7988143993147339, "step": 250 }, { "epoch": 0.05, "eval_avg_sts": 0.7605110871922394, "eval_sickr_spearman": 0.7124740982612865, "eval_stsb_spearman": 0.8085480761231921, "step": 375 }, { "electra_acc": 0.7243, "electra_fix_acc": 0.8981, "electra_rep_acc": 0.2064, "epoch": 0.06, "learning_rate": 1.8720081914757456e-06, "loss": 0.6062, "neg_sim": 0.0055, "pos_sim": 0.6593, "step": 500 }, { "epoch": 0.06, "eval_avg_sts": 0.763939631028596, "eval_sickr_spearman": 0.7155249377624325, "eval_stsb_spearman": 0.8123543242947595, "step": 500 }, { "epoch": 0.08, "eval_avg_sts": 0.7670128200581756, "eval_sickr_spearman": 0.7176028112417467, "eval_stsb_spearman": 0.8164228288746044, "step": 625 }, { "epoch": 0.1, "eval_avg_sts": 0.7683647856842559, "eval_sickr_spearman": 0.7182528161375087, "eval_stsb_spearman": 0.8184767552310033, "step": 750 }, { "epoch": 0.11, "eval_avg_sts": 0.7703405632893722, "eval_sickr_spearman": 0.7201300637061159, "eval_stsb_spearman": 0.8205510628726285, "step": 875 }, { "electra_acc": 0.7954, "electra_fix_acc": 0.9411, "electra_rep_acc": 0.3621, "epoch": 0.13, "learning_rate": 1.744016382951491e-06, "loss": 0.0043, "neg_sim": -0.0066, "pos_sim": 0.7176, "step": 1000 }, { "epoch": 0.13, "eval_avg_sts": 0.7716371834988085, "eval_sickr_spearman": 0.7207027865596639, "eval_stsb_spearman": 0.8225715804379532, "step": 1000 }, { "epoch": 0.14, "eval_avg_sts": 0.7719569589534554, "eval_sickr_spearman": 0.7195236710504597, "eval_stsb_spearman": 0.8243902468564511, "step": 1125 }, { "epoch": 0.16, "eval_avg_sts": 0.7725905596083784, "eval_sickr_spearman": 0.7204897205936844, "eval_stsb_spearman": 0.8246913986230725, "step": 1250 }, { "epoch": 0.18, "eval_avg_sts": 0.7740128875580021, "eval_sickr_spearman": 0.7209413090094056, "eval_stsb_spearman": 0.8270844661065986, "step": 1375 }, { "electra_acc": 0.806, "electra_fix_acc": 0.9446, "electra_rep_acc": 0.3921, "epoch": 0.19, "learning_rate": 1.6160245744272365e-06, "loss": 0.0039, "neg_sim": -0.0067, "pos_sim": 0.7288, "step": 1500 }, { "epoch": 0.19, "eval_avg_sts": 0.7747104419219604, "eval_sickr_spearman": 0.721527768757965, "eval_stsb_spearman": 0.8278931150859556, "step": 1500 }, { "epoch": 0.21, "eval_avg_sts": 0.7750669451240817, "eval_sickr_spearman": 0.722242135329654, "eval_stsb_spearman": 0.8278917549185095, "step": 1625 }, { "epoch": 0.22, "eval_avg_sts": 0.7751917311744843, "eval_sickr_spearman": 0.7214598527805315, "eval_stsb_spearman": 0.8289236095684369, "step": 1750 }, { "epoch": 0.24, "eval_avg_sts": 0.775110379872699, "eval_sickr_spearman": 0.7211095139266419, "eval_stsb_spearman": 0.8291112458187563, "step": 1875 }, { "electra_acc": 0.8111, "electra_fix_acc": 0.9455, "electra_rep_acc": 0.4077, "epoch": 0.26, "learning_rate": 1.4880327659029822e-06, "loss": 0.0037, "neg_sim": -0.0067, "pos_sim": 0.7361, "step": 2000 }, { "epoch": 0.26, "eval_avg_sts": 0.7752389174925427, "eval_sickr_spearman": 0.7207011535022151, "eval_stsb_spearman": 0.8297766814828704, "step": 2000 }, { "epoch": 0.27, "eval_avg_sts": 0.7759291537103485, "eval_sickr_spearman": 0.7218097593545081, "eval_stsb_spearman": 0.8300485480661889, "step": 2125 }, { "epoch": 0.29, "eval_avg_sts": 0.7759100192724719, "eval_sickr_spearman": 0.7214468843831432, "eval_stsb_spearman": 0.8303731541618006, "step": 2250 }, { "epoch": 0.3, "eval_avg_sts": 0.77580052054564, "eval_sickr_spearman": 0.7208077825474077, "eval_stsb_spearman": 0.8307932585438723, "step": 2375 }, { "electra_acc": 0.8137, "electra_fix_acc": 0.9463, "electra_rep_acc": 0.4184, "epoch": 0.32, "learning_rate": 1.3600409573787276e-06, "loss": 0.0036, "neg_sim": -0.0067, "pos_sim": 0.7422, "step": 2500 }, { "epoch": 0.32, "eval_avg_sts": 0.7764057696201115, "eval_sickr_spearman": 0.7207564372999703, "eval_stsb_spearman": 0.8320551019402528, "step": 2500 }, { "epoch": 0.34, "eval_avg_sts": 0.7771167451325478, "eval_sickr_spearman": 0.7216867036726237, "eval_stsb_spearman": 0.8325467865924719, "step": 2625 }, { "epoch": 0.35, "eval_avg_sts": 0.777409082133846, "eval_sickr_spearman": 0.7218441496231378, "eval_stsb_spearman": 0.8329740146445542, "step": 2750 }, { "epoch": 0.37, "eval_avg_sts": 0.7776325118235485, "eval_sickr_spearman": 0.722264277667417, "eval_stsb_spearman": 0.8330007459796801, "step": 2875 }, { "electra_acc": 0.8163, "electra_fix_acc": 0.9464, "electra_rep_acc": 0.4284, "epoch": 0.38, "learning_rate": 1.2320491488544733e-06, "loss": 0.0034, "neg_sim": -0.0068, "pos_sim": 0.7471, "step": 3000 }, { "epoch": 0.38, "eval_avg_sts": 0.7777138423021386, "eval_sickr_spearman": 0.7227461256770441, "eval_stsb_spearman": 0.832681558927233, "step": 3000 }, { "epoch": 0.4, "eval_avg_sts": 0.7781702830872564, "eval_sickr_spearman": 0.7235760070476919, "eval_stsb_spearman": 0.832764559126821, "step": 3125 }, { "epoch": 0.42, "eval_avg_sts": 0.7788742472264717, "eval_sickr_spearman": 0.7235008864050426, "eval_stsb_spearman": 0.8342476080479008, "step": 3250 }, { "epoch": 0.43, "eval_avg_sts": 0.7786540883853343, "eval_sickr_spearman": 0.7228896906392426, "eval_stsb_spearman": 0.8344184861314262, "step": 3375 }, { "electra_acc": 0.8185, "electra_fix_acc": 0.9468, "electra_rep_acc": 0.4365, "epoch": 0.45, "learning_rate": 1.104057340330219e-06, "loss": 0.0034, "neg_sim": -0.0068, "pos_sim": 0.7518, "step": 3500 }, { "epoch": 0.45, "eval_avg_sts": 0.7785390972296262, "eval_sickr_spearman": 0.7228380572051966, "eval_stsb_spearman": 0.8342401372540559, "step": 3500 }, { "epoch": 0.46, "eval_avg_sts": 0.778413022922875, "eval_sickr_spearman": 0.7219714320419488, "eval_stsb_spearman": 0.8348546138038011, "step": 3625 }, { "epoch": 0.48, "eval_avg_sts": 0.778801601085773, "eval_sickr_spearman": 0.722748959512029, "eval_stsb_spearman": 0.8348542426595171, "step": 3750 }, { "epoch": 0.5, "eval_avg_sts": 0.7782529644249387, "eval_sickr_spearman": 0.7217000082877221, "eval_stsb_spearman": 0.8348059205621555, "step": 3875 }, { "electra_acc": 0.8202, "electra_fix_acc": 0.9468, "electra_rep_acc": 0.4426, "epoch": 0.51, "learning_rate": 9.760655318059644e-07, "loss": 0.0033, "neg_sim": -0.0068, "pos_sim": 0.7559, "step": 4000 }, { "epoch": 0.51, "eval_avg_sts": 0.7782394657347366, "eval_sickr_spearman": 0.7217301237883238, "eval_stsb_spearman": 0.8347488076811493, "step": 4000 }, { "epoch": 0.53, "eval_avg_sts": 0.7782895079814026, "eval_sickr_spearman": 0.7220120663537655, "eval_stsb_spearman": 0.8345669496090398, "step": 4125 }, { "epoch": 0.54, "eval_avg_sts": 0.778080439519264, "eval_sickr_spearman": 0.7216004398144409, "eval_stsb_spearman": 0.834560439224087, "step": 4250 }, { "epoch": 0.56, "eval_avg_sts": 0.7782284276808015, "eval_sickr_spearman": 0.7216616794687745, "eval_stsb_spearman": 0.8347951758928285, "step": 4375 }, { "electra_acc": 0.8217, "electra_fix_acc": 0.9477, "electra_rep_acc": 0.4458, "epoch": 0.58, "learning_rate": 8.480737232817099e-07, "loss": 0.0032, "neg_sim": -0.0068, "pos_sim": 0.7596, "step": 4500 }, { "epoch": 0.58, "eval_avg_sts": 0.7781400153641456, "eval_sickr_spearman": 0.7214844927355691, "eval_stsb_spearman": 0.8347955379927221, "step": 4500 }, { "epoch": 0.59, "eval_avg_sts": 0.7783651714396398, "eval_sickr_spearman": 0.721677865949959, "eval_stsb_spearman": 0.8350524769293207, "step": 4625 }, { "epoch": 0.61, "eval_avg_sts": 0.7779810461862591, "eval_sickr_spearman": 0.7215376151337598, "eval_stsb_spearman": 0.8344244772387583, "step": 4750 }, { "epoch": 0.62, "eval_avg_sts": 0.7774863632137369, "eval_sickr_spearman": 0.7212506225972007, "eval_stsb_spearman": 0.833722103830273, "step": 4875 }, { "electra_acc": 0.8222, "electra_fix_acc": 0.9476, "electra_rep_acc": 0.4478, "epoch": 0.64, "learning_rate": 7.200819147574555e-07, "loss": 0.0033, "neg_sim": -0.0068, "pos_sim": 0.7614, "step": 5000 }, { "epoch": 0.64, "eval_avg_sts": 0.7780441928626822, "eval_sickr_spearman": 0.7222703776172996, "eval_stsb_spearman": 0.833818008108065, "step": 5000 }, { "epoch": 0.66, "eval_avg_sts": 0.7772624437857663, "eval_sickr_spearman": 0.7211198886445525, "eval_stsb_spearman": 0.8334049989269801, "step": 5125 }, { "epoch": 0.67, "eval_avg_sts": 0.777433289264948, "eval_sickr_spearman": 0.7208792047952461, "eval_stsb_spearman": 0.83398737373465, "step": 5250 }, { "epoch": 0.69, "eval_avg_sts": 0.778471716959233, "eval_sickr_spearman": 0.7219667730251094, "eval_stsb_spearman": 0.8349766608933566, "step": 5375 }, { "electra_acc": 0.824, "electra_fix_acc": 0.9479, "electra_rep_acc": 0.4538, "epoch": 0.7, "learning_rate": 5.920901062332011e-07, "loss": 0.0033, "neg_sim": -0.0068, "pos_sim": 0.7634, "step": 5500 }, { "epoch": 0.7, "eval_avg_sts": 0.7783835165868276, "eval_sickr_spearman": 0.7220774366828226, "eval_stsb_spearman": 0.8346895964908326, "step": 5500 }, { "epoch": 0.72, "eval_avg_sts": 0.7777215174417764, "eval_sickr_spearman": 0.7211491395853283, "eval_stsb_spearman": 0.8342938952982245, "step": 5625 }, { "epoch": 0.74, "eval_avg_sts": 0.7783006770868415, "eval_sickr_spearman": 0.7218946783418506, "eval_stsb_spearman": 0.8347066758318323, "step": 5750 }, { "epoch": 0.75, "eval_avg_sts": 0.7786525912567952, "eval_sickr_spearman": 0.7218885303608666, "eval_stsb_spearman": 0.8354166521527239, "step": 5875 }, { "electra_acc": 0.8245, "electra_fix_acc": 0.9479, "electra_rep_acc": 0.4557, "epoch": 0.77, "learning_rate": 4.640982977089466e-07, "loss": 0.0033, "neg_sim": -0.0068, "pos_sim": 0.7639, "step": 6000 }, { "epoch": 0.77, "eval_avg_sts": 0.7786598191346372, "eval_sickr_spearman": 0.7214591323140099, "eval_stsb_spearman": 0.8358605059552645, "step": 6000 }, { "epoch": 0.78, "eval_avg_sts": 0.7785391387872032, "eval_sickr_spearman": 0.7212359798167284, "eval_stsb_spearman": 0.8358422977576779, "step": 6125 }, { "epoch": 0.8, "eval_avg_sts": 0.7780962443354171, "eval_sickr_spearman": 0.7207366965172793, "eval_stsb_spearman": 0.835455792153555, "step": 6250 }, { "epoch": 0.82, "eval_avg_sts": 0.7787170644410477, "eval_sickr_spearman": 0.7214541370794603, "eval_stsb_spearman": 0.8359799918026352, "step": 6375 }, { "electra_acc": 0.8249, "electra_fix_acc": 0.9477, "electra_rep_acc": 0.458, "epoch": 0.83, "learning_rate": 3.361064891846921e-07, "loss": 0.0034, "neg_sim": -0.0068, "pos_sim": 0.765, "step": 6500 }, { "epoch": 0.83, "eval_avg_sts": 0.7787359930993909, "eval_sickr_spearman": 0.7215321395881958, "eval_stsb_spearman": 0.835939846610586, "step": 6500 }, { "epoch": 0.85, "eval_avg_sts": 0.778662230635321, "eval_sickr_spearman": 0.7217551960232744, "eval_stsb_spearman": 0.8355692652473675, "step": 6625 }, { "epoch": 0.86, "eval_avg_sts": 0.7788844288300487, "eval_sickr_spearman": 0.7221871330355174, "eval_stsb_spearman": 0.8355817246245801, "step": 6750 }, { "epoch": 0.88, "eval_avg_sts": 0.7789522767655956, "eval_sickr_spearman": 0.722305728507958, "eval_stsb_spearman": 0.8355988250232333, "step": 6875 }, { "electra_acc": 0.8251, "electra_fix_acc": 0.9476, "electra_rep_acc": 0.4589, "epoch": 0.9, "learning_rate": 2.0811468066043772e-07, "loss": 0.0034, "neg_sim": -0.0069, "pos_sim": 0.7655, "step": 7000 }, { "epoch": 0.9, "eval_avg_sts": 0.7787617595098588, "eval_sickr_spearman": 0.721966628931805, "eval_stsb_spearman": 0.8355568900879128, "step": 7000 }, { "epoch": 0.91, "eval_avg_sts": 0.7786478565950568, "eval_sickr_spearman": 0.7216480386359659, "eval_stsb_spearman": 0.8356476745541477, "step": 7125 }, { "epoch": 0.93, "eval_avg_sts": 0.7787981847358625, "eval_sickr_spearman": 0.7219965523080009, "eval_stsb_spearman": 0.8355998171637242, "step": 7250 }, { "epoch": 0.94, "eval_avg_sts": 0.7788565954562792, "eval_sickr_spearman": 0.7221892050558691, "eval_stsb_spearman": 0.8355239858566892, "step": 7375 }, { "electra_acc": 0.8254, "electra_fix_acc": 0.9477, "electra_rep_acc": 0.4607, "epoch": 0.96, "learning_rate": 8.012287213618328e-08, "loss": 0.0032, "neg_sim": -0.0069, "pos_sim": 0.7665, "step": 7500 }, { "epoch": 0.96, "eval_avg_sts": 0.7788488501171182, "eval_sickr_spearman": 0.7221623076390639, "eval_stsb_spearman": 0.8355353925951726, "step": 7500 }, { "epoch": 0.98, "eval_avg_sts": 0.7788989946596425, "eval_sickr_spearman": 0.7222944411991201, "eval_stsb_spearman": 0.8355035481201649, "step": 7625 }, { "epoch": 0.99, "eval_avg_sts": 0.7788451325075824, "eval_sickr_spearman": 0.7221886767137534, "eval_stsb_spearman": 0.8355015883014113, "step": 7750 }, { "epoch": 1.0, "step": 7813, "train_runtime": 7033.0789, "train_samples_per_second": 1.111 } ], "max_steps": 7813, "num_train_epochs": 1, "total_flos": 285611882411596800, "trial_name": null, "trial_params": null }