|
{ |
|
"best_metric": 0.8433710108449586, |
|
"best_model_checkpoint": "result/my-unsup-simcse-roberta-base/SupCon-48-0.2-3e-6-0.07-0.00", |
|
"epoch": 1.0, |
|
"global_step": 7813, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"eval_avg_sts": 0.7648697833708831, |
|
"eval_sickr_spearman": 0.7122734723505795, |
|
"eval_stsb_spearman": 0.8174660943911868, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_avg_sts": 0.7713268000072269, |
|
"eval_sickr_spearman": 0.7159774387690809, |
|
"eval_stsb_spearman": 0.8266761612453729, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_avg_sts": 0.7717212045194138, |
|
"eval_sickr_spearman": 0.716408085624575, |
|
"eval_stsb_spearman": 0.8270343234142526, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.872008191475746e-05, |
|
"loss": 0.1356, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_avg_sts": 0.7714897999409676, |
|
"eval_sickr_spearman": 0.714245581344527, |
|
"eval_stsb_spearman": 0.8287340185374082, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_avg_sts": 0.7683945074315737, |
|
"eval_sickr_spearman": 0.707755714980412, |
|
"eval_stsb_spearman": 0.8290332998827354, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_avg_sts": 0.7712141135572375, |
|
"eval_sickr_spearman": 0.7136527334594763, |
|
"eval_stsb_spearman": 0.8287754936549987, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_avg_sts": 0.7754955425035636, |
|
"eval_sickr_spearman": 0.7167608740646381, |
|
"eval_stsb_spearman": 0.8342302109424891, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.7440163829514915e-05, |
|
"loss": 0.0005, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_avg_sts": 0.7749643365975677, |
|
"eval_sickr_spearman": 0.7165658197616978, |
|
"eval_stsb_spearman": 0.8333628534334376, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_avg_sts": 0.7747453579869918, |
|
"eval_sickr_spearman": 0.707071944220339, |
|
"eval_stsb_spearman": 0.8424187717536445, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_avg_sts": 0.779538087156316, |
|
"eval_sickr_spearman": 0.7169955059951632, |
|
"eval_stsb_spearman": 0.8420806683174688, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_avg_sts": 0.780701521846623, |
|
"eval_sickr_spearman": 0.7190910068887052, |
|
"eval_stsb_spearman": 0.8423120368045408, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.616024574427237e-05, |
|
"loss": 0.0003, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_avg_sts": 0.7822508597322496, |
|
"eval_sickr_spearman": 0.725160745208541, |
|
"eval_stsb_spearman": 0.8393409742559582, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_avg_sts": 0.7806747469546087, |
|
"eval_sickr_spearman": 0.7225573634483924, |
|
"eval_stsb_spearman": 0.8387921304608251, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_avg_sts": 0.783739242864979, |
|
"eval_sickr_spearman": 0.7298345731433808, |
|
"eval_stsb_spearman": 0.8376439125865772, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_avg_sts": 0.7819519916053913, |
|
"eval_sickr_spearman": 0.7310624707355461, |
|
"eval_stsb_spearman": 0.8328415124752365, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.4880327659029823e-05, |
|
"loss": 0.0003, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_avg_sts": 0.7824129920157619, |
|
"eval_sickr_spearman": 0.731245325138721, |
|
"eval_stsb_spearman": 0.8335806588928026, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_avg_sts": 0.786146631865762, |
|
"eval_sickr_spearman": 0.7289222528865652, |
|
"eval_stsb_spearman": 0.8433710108449586, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_avg_sts": 0.7797286001800978, |
|
"eval_sickr_spearman": 0.7224563060109667, |
|
"eval_stsb_spearman": 0.8370008943492291, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_avg_sts": 0.780005893079553, |
|
"eval_sickr_spearman": 0.7195845264559818, |
|
"eval_stsb_spearman": 0.8404272597031243, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.3600409573787278e-05, |
|
"loss": 0.0003, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_avg_sts": 0.7815935533979442, |
|
"eval_sickr_spearman": 0.7247260157094245, |
|
"eval_stsb_spearman": 0.8384610910864639, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_avg_sts": 0.7790885863000977, |
|
"eval_sickr_spearman": 0.719636736263245, |
|
"eval_stsb_spearman": 0.8385404363369504, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_avg_sts": 0.7786611915735007, |
|
"eval_sickr_spearman": 0.720594044146008, |
|
"eval_stsb_spearman": 0.8367283390009934, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_avg_sts": 0.7740761960283342, |
|
"eval_sickr_spearman": 0.7133921647341745, |
|
"eval_stsb_spearman": 0.8347602273224938, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.2320491488544735e-05, |
|
"loss": 0.0003, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_avg_sts": 0.7730594558706145, |
|
"eval_sickr_spearman": 0.7107198103123609, |
|
"eval_stsb_spearman": 0.8353991014288681, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_avg_sts": 0.7735160682870633, |
|
"eval_sickr_spearman": 0.7115497397141101, |
|
"eval_stsb_spearman": 0.8354823968600166, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_avg_sts": 0.7696881215537197, |
|
"eval_sickr_spearman": 0.7034571235261068, |
|
"eval_stsb_spearman": 0.8359191195813325, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_avg_sts": 0.7731307195393922, |
|
"eval_sickr_spearman": 0.7140373184886913, |
|
"eval_stsb_spearman": 0.8322241205900931, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.104057340330219e-05, |
|
"loss": 0.0003, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_avg_sts": 0.7744383399418961, |
|
"eval_sickr_spearman": 0.7103547259103297, |
|
"eval_stsb_spearman": 0.8385219539734627, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_avg_sts": 0.7749032723185794, |
|
"eval_sickr_spearman": 0.7106885420653246, |
|
"eval_stsb_spearman": 0.839118002571834, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_avg_sts": 0.7729790005154229, |
|
"eval_sickr_spearman": 0.7092490979863258, |
|
"eval_stsb_spearman": 0.8367089030445198, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_avg_sts": 0.7675103682931668, |
|
"eval_sickr_spearman": 0.7009157018868134, |
|
"eval_stsb_spearman": 0.83410503469952, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 9.760655318059645e-06, |
|
"loss": 0.0003, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_avg_sts": 0.7600618545185335, |
|
"eval_sickr_spearman": 0.6939802029635556, |
|
"eval_stsb_spearman": 0.8261435060735114, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_avg_sts": 0.7611442447284995, |
|
"eval_sickr_spearman": 0.6946870766834202, |
|
"eval_stsb_spearman": 0.8276014127735787, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_avg_sts": 0.765201470252675, |
|
"eval_sickr_spearman": 0.6980777842583424, |
|
"eval_stsb_spearman": 0.8323251562470075, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_avg_sts": 0.7601537112935712, |
|
"eval_sickr_spearman": 0.6892433277086275, |
|
"eval_stsb_spearman": 0.831064094878515, |
|
"step": 4375 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 8.4807372328171e-06, |
|
"loss": 0.0002, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_avg_sts": 0.7572602743940218, |
|
"eval_sickr_spearman": 0.6881403895263041, |
|
"eval_stsb_spearman": 0.8263801592617394, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_avg_sts": 0.7620805832344215, |
|
"eval_sickr_spearman": 0.6991238944111545, |
|
"eval_stsb_spearman": 0.8250372720576886, |
|
"step": 4625 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_avg_sts": 0.7638147878715696, |
|
"eval_sickr_spearman": 0.6998619475523625, |
|
"eval_stsb_spearman": 0.8277676281907768, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_avg_sts": 0.762036570031333, |
|
"eval_sickr_spearman": 0.6968174001255049, |
|
"eval_stsb_spearman": 0.8272557399371612, |
|
"step": 4875 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 7.200819147574556e-06, |
|
"loss": 0.0002, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_avg_sts": 0.7616123775261012, |
|
"eval_sickr_spearman": 0.6975154841538064, |
|
"eval_stsb_spearman": 0.8257092708983962, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_avg_sts": 0.7572440478626198, |
|
"eval_sickr_spearman": 0.6909725434226024, |
|
"eval_stsb_spearman": 0.8235155523026372, |
|
"step": 5125 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_avg_sts": 0.7615420687641667, |
|
"eval_sickr_spearman": 0.7007691109652242, |
|
"eval_stsb_spearman": 0.8223150265631094, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_avg_sts": 0.7622736227072948, |
|
"eval_sickr_spearman": 0.70394151718411, |
|
"eval_stsb_spearman": 0.8206057282304795, |
|
"step": 5375 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 5.920901062332012e-06, |
|
"loss": 0.0002, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_avg_sts": 0.7598959005042131, |
|
"eval_sickr_spearman": 0.7014216615093624, |
|
"eval_stsb_spearman": 0.8183701394990638, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_avg_sts": 0.7617340962416614, |
|
"eval_sickr_spearman": 0.705636198536151, |
|
"eval_stsb_spearman": 0.8178319939471719, |
|
"step": 5625 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_avg_sts": 0.7602681846424262, |
|
"eval_sickr_spearman": 0.6992537297148518, |
|
"eval_stsb_spearman": 0.8212826395700007, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_avg_sts": 0.7600256686195976, |
|
"eval_sickr_spearman": 0.6983496883235836, |
|
"eval_stsb_spearman": 0.8217016489156117, |
|
"step": 5875 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.640982977089467e-06, |
|
"loss": 0.0003, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_avg_sts": 0.7581145273334985, |
|
"eval_sickr_spearman": 0.6957373727785674, |
|
"eval_stsb_spearman": 0.8204916818884296, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_avg_sts": 0.759467788986251, |
|
"eval_sickr_spearman": 0.6969048167301223, |
|
"eval_stsb_spearman": 0.8220307612423797, |
|
"step": 6125 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_avg_sts": 0.758567409617105, |
|
"eval_sickr_spearman": 0.6932303894390052, |
|
"eval_stsb_spearman": 0.8239044297952048, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_avg_sts": 0.7593640477073755, |
|
"eval_sickr_spearman": 0.6939898572149447, |
|
"eval_stsb_spearman": 0.8247382381998064, |
|
"step": 6375 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.3610648918469217e-06, |
|
"loss": 0.0002, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_avg_sts": 0.7596970769949833, |
|
"eval_sickr_spearman": 0.6966290701767661, |
|
"eval_stsb_spearman": 0.8227650838132007, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"eval_avg_sts": 0.7608506682812288, |
|
"eval_sickr_spearman": 0.6957939053849601, |
|
"eval_stsb_spearman": 0.8259074311774975, |
|
"step": 6625 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_avg_sts": 0.7601011463122427, |
|
"eval_sickr_spearman": 0.6948619579237564, |
|
"eval_stsb_spearman": 0.8253403347007289, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_avg_sts": 0.7613624100018648, |
|
"eval_sickr_spearman": 0.6960120626476921, |
|
"eval_stsb_spearman": 0.8267127573560374, |
|
"step": 6875 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.0811468066043775e-06, |
|
"loss": 0.0002, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_avg_sts": 0.7632853767372183, |
|
"eval_sickr_spearman": 0.6985597763612739, |
|
"eval_stsb_spearman": 0.8280109771131627, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"eval_avg_sts": 0.761317305690945, |
|
"eval_sickr_spearman": 0.6965424220697717, |
|
"eval_stsb_spearman": 0.8260921893121185, |
|
"step": 7125 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_avg_sts": 0.7612243104925751, |
|
"eval_sickr_spearman": 0.6965610101060282, |
|
"eval_stsb_spearman": 0.8258876108791219, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_avg_sts": 0.7611869303629479, |
|
"eval_sickr_spearman": 0.6964085113589622, |
|
"eval_stsb_spearman": 0.8259653493669338, |
|
"step": 7375 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 8.012287213618329e-07, |
|
"loss": 0.0002, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_avg_sts": 0.761389925660918, |
|
"eval_sickr_spearman": 0.6968965073495735, |
|
"eval_stsb_spearman": 0.8258833439722626, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_avg_sts": 0.7614202638478765, |
|
"eval_sickr_spearman": 0.6971455005794286, |
|
"eval_stsb_spearman": 0.8256950271163245, |
|
"step": 7625 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_avg_sts": 0.7612860433632234, |
|
"eval_sickr_spearman": 0.6969965561338691, |
|
"eval_stsb_spearman": 0.8255755305925775, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 7813, |
|
"total_flos": 48546397602349056, |
|
"train_runtime": 6045.9604, |
|
"train_samples_per_second": 1.292 |
|
} |
|
], |
|
"max_steps": 7813, |
|
"num_train_epochs": 1, |
|
"total_flos": 48546397602349056, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|