|
{ |
|
"best_metric": 0.8488700693681294, |
|
"best_model_checkpoint": "result/my-sup-simcse-roberta-large_filtered_final_augx_0517_275578", |
|
"epoch": 3.0, |
|
"global_step": 1617, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05, |
|
"eval_avg_sts": 0.5474616885388701, |
|
"eval_sickr_spearman": 0.5431389131459742, |
|
"eval_stsb_spearman": 0.551784463931766, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_avg_sts": 0.8135416256253626, |
|
"eval_sickr_spearman": 0.7961428840645369, |
|
"eval_stsb_spearman": 0.8309403671861882, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_avg_sts": 0.8290864392720618, |
|
"eval_sickr_spearman": 0.8119339251910409, |
|
"eval_stsb_spearman": 0.8462389533530829, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_avg_sts": 0.8323075552549601, |
|
"eval_sickr_spearman": 0.8165074947010796, |
|
"eval_stsb_spearman": 0.8481076158088408, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_avg_sts": 0.8343864858126124, |
|
"eval_sickr_spearman": 0.8194722624684485, |
|
"eval_stsb_spearman": 0.8493007091567765, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_avg_sts": 0.8378187523831807, |
|
"eval_sickr_spearman": 0.8246357979974512, |
|
"eval_stsb_spearman": 0.8510017067689102, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_avg_sts": 0.8378495980528481, |
|
"eval_sickr_spearman": 0.8251682227568927, |
|
"eval_stsb_spearman": 0.8505309733488035, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_avg_sts": 0.8368235864957789, |
|
"eval_sickr_spearman": 0.8220182470623785, |
|
"eval_stsb_spearman": 0.8516289259291794, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_avg_sts": 0.8395252530386117, |
|
"eval_sickr_spearman": 0.8250073185670752, |
|
"eval_stsb_spearman": 0.8540431875101483, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_avg_sts": 0.8379672358563708, |
|
"eval_sickr_spearman": 0.8226389049551619, |
|
"eval_stsb_spearman": 0.8532955667575798, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_avg_sts": 0.8411570915382243, |
|
"eval_sickr_spearman": 0.827407480737039, |
|
"eval_stsb_spearman": 0.8549067023394095, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_avg_sts": 0.8387757918913767, |
|
"eval_sickr_spearman": 0.8256738461617319, |
|
"eval_stsb_spearman": 0.8518777376210215, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_avg_sts": 0.841718922029721, |
|
"eval_sickr_spearman": 0.8281196378780621, |
|
"eval_stsb_spearman": 0.8553182061813799, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_avg_sts": 0.8436536599024109, |
|
"eval_sickr_spearman": 0.8272171815131412, |
|
"eval_stsb_spearman": 0.8600901382916806, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_avg_sts": 0.8402882236960711, |
|
"eval_sickr_spearman": 0.8261090079407611, |
|
"eval_stsb_spearman": 0.854467439451381, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_avg_sts": 0.8418557779216453, |
|
"eval_sickr_spearman": 0.8260196700920862, |
|
"eval_stsb_spearman": 0.8576918857512046, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_avg_sts": 0.844240769517365, |
|
"eval_sickr_spearman": 0.8310969897940077, |
|
"eval_stsb_spearman": 0.8573845492407224, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_avg_sts": 0.8400099841918098, |
|
"eval_sickr_spearman": 0.8256829240399036, |
|
"eval_stsb_spearman": 0.8543370443437162, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_avg_sts": 0.8443099698070835, |
|
"eval_sickr_spearman": 0.8276297686744947, |
|
"eval_stsb_spearman": 0.8609901709396723, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 6.907854050711195e-06, |
|
"loss": 0.524, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_avg_sts": 0.8402789651908493, |
|
"eval_sickr_spearman": 0.8248319570157246, |
|
"eval_stsb_spearman": 0.8557259733659741, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_avg_sts": 0.8461724605713624, |
|
"eval_sickr_spearman": 0.8330373021988031, |
|
"eval_stsb_spearman": 0.8593076189439217, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"eval_avg_sts": 0.8433982001182605, |
|
"eval_sickr_spearman": 0.8278159852547703, |
|
"eval_stsb_spearman": 0.8589804149817508, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"eval_avg_sts": 0.8449614978763837, |
|
"eval_sickr_spearman": 0.830460289513344, |
|
"eval_stsb_spearman": 0.8594627062394233, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"eval_avg_sts": 0.8455395546247182, |
|
"eval_sickr_spearman": 0.8325919578262689, |
|
"eval_stsb_spearman": 0.8584871514231674, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"eval_avg_sts": 0.8446314587494791, |
|
"eval_sickr_spearman": 0.8281194937847577, |
|
"eval_stsb_spearman": 0.8611434237142005, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"eval_avg_sts": 0.8450148030554683, |
|
"eval_sickr_spearman": 0.8291058604838908, |
|
"eval_stsb_spearman": 0.8609237456270458, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"eval_avg_sts": 0.8426975130677071, |
|
"eval_sickr_spearman": 0.8277219883892558, |
|
"eval_stsb_spearman": 0.8576730377461584, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"eval_avg_sts": 0.8422411960347308, |
|
"eval_sickr_spearman": 0.8265289438606347, |
|
"eval_stsb_spearman": 0.8579534482088269, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"eval_avg_sts": 0.8429589706818645, |
|
"eval_sickr_spearman": 0.8250001619329609, |
|
"eval_stsb_spearman": 0.8609177794307681, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"eval_avg_sts": 0.8412654164348794, |
|
"eval_sickr_spearman": 0.8244867094585874, |
|
"eval_stsb_spearman": 0.8580441234111714, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"eval_avg_sts": 0.8460400500676946, |
|
"eval_sickr_spearman": 0.8282549895219145, |
|
"eval_stsb_spearman": 0.8638251106134746, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"eval_avg_sts": 0.8419603634534258, |
|
"eval_sickr_spearman": 0.8271723684954995, |
|
"eval_stsb_spearman": 0.8567483584113522, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"eval_avg_sts": 0.8470667141064947, |
|
"eval_sickr_spearman": 0.8315851779090248, |
|
"eval_stsb_spearman": 0.8625482503039646, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"eval_avg_sts": 0.8445324992544034, |
|
"eval_sickr_spearman": 0.8299879516618016, |
|
"eval_stsb_spearman": 0.8590770468470054, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"eval_avg_sts": 0.847555914825822, |
|
"eval_sickr_spearman": 0.8307021261090849, |
|
"eval_stsb_spearman": 0.8644097035425593, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"eval_avg_sts": 0.846777031506697, |
|
"eval_sickr_spearman": 0.8312180762007332, |
|
"eval_stsb_spearman": 0.8623359868126609, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"eval_avg_sts": 0.8488700693681294, |
|
"eval_sickr_spearman": 0.8319676976008779, |
|
"eval_stsb_spearman": 0.8657724411353809, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"eval_avg_sts": 0.8447138350851642, |
|
"eval_sickr_spearman": 0.8262617468433344, |
|
"eval_stsb_spearman": 0.8631659233269939, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"eval_avg_sts": 0.8447763720006275, |
|
"eval_sickr_spearman": 0.8259132812024007, |
|
"eval_stsb_spearman": 0.8636394627988543, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 3.815708101422388e-06, |
|
"loss": 0.2157, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"eval_avg_sts": 0.8460663020851612, |
|
"eval_sickr_spearman": 0.8274893257338897, |
|
"eval_stsb_spearman": 0.8646432784364327, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"eval_avg_sts": 0.8426466827719674, |
|
"eval_sickr_spearman": 0.8228687337755433, |
|
"eval_stsb_spearman": 0.8624246317683915, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"eval_avg_sts": 0.8469945788874567, |
|
"eval_sickr_spearman": 0.8279081569384299, |
|
"eval_stsb_spearman": 0.8660810008364834, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"eval_avg_sts": 0.844677174344695, |
|
"eval_sickr_spearman": 0.8273461450205025, |
|
"eval_stsb_spearman": 0.8620082036688875, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"eval_avg_sts": 0.8446326323236237, |
|
"eval_sickr_spearman": 0.8279530179871731, |
|
"eval_stsb_spearman": 0.8613122466600744, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"eval_avg_sts": 0.8468692691266899, |
|
"eval_sickr_spearman": 0.8292202225364149, |
|
"eval_stsb_spearman": 0.8645183157169649, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"eval_avg_sts": 0.8471682137327401, |
|
"eval_sickr_spearman": 0.8307442013539448, |
|
"eval_stsb_spearman": 0.8635922261115354, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"eval_avg_sts": 0.8446425133608689, |
|
"eval_sickr_spearman": 0.8279184355941376, |
|
"eval_stsb_spearman": 0.8613665911276003, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"eval_avg_sts": 0.8447830471539575, |
|
"eval_sickr_spearman": 0.826634035910581, |
|
"eval_stsb_spearman": 0.8629320583973338, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"eval_avg_sts": 0.8443517239023315, |
|
"eval_sickr_spearman": 0.8245701875128868, |
|
"eval_stsb_spearman": 0.8641332602917762, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"eval_avg_sts": 0.8436894469588044, |
|
"eval_sickr_spearman": 0.8249284034674125, |
|
"eval_stsb_spearman": 0.8624504904501964, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"eval_avg_sts": 0.8446249300083088, |
|
"eval_sickr_spearman": 0.8268909542721736, |
|
"eval_stsb_spearman": 0.862358905744444, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"eval_avg_sts": 0.8460631153640206, |
|
"eval_sickr_spearman": 0.8276132459755998, |
|
"eval_stsb_spearman": 0.8645129847524413, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"eval_avg_sts": 0.8450409206315745, |
|
"eval_sickr_spearman": 0.8269633851731423, |
|
"eval_stsb_spearman": 0.8631184560900067, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"eval_avg_sts": 0.8444144648283911, |
|
"eval_sickr_spearman": 0.8256842208796424, |
|
"eval_stsb_spearman": 0.8631447087771397, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"eval_avg_sts": 0.8441319770667891, |
|
"eval_sickr_spearman": 0.8250716322119007, |
|
"eval_stsb_spearman": 0.8631923219216775, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"eval_avg_sts": 0.8440921655811744, |
|
"eval_sickr_spearman": 0.825079557343638, |
|
"eval_stsb_spearman": 0.8631047738187106, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"eval_avg_sts": 0.8442932554429321, |
|
"eval_sickr_spearman": 0.8250880108174912, |
|
"eval_stsb_spearman": 0.863498500068373, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"eval_avg_sts": 0.8458752869787667, |
|
"eval_sickr_spearman": 0.8263863875515662, |
|
"eval_stsb_spearman": 0.865364186405967, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"eval_avg_sts": 0.8455124701644118, |
|
"eval_sickr_spearman": 0.8256285528330758, |
|
"eval_stsb_spearman": 0.8653963874957478, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 7.235621521335808e-07, |
|
"loss": 0.2017, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"eval_avg_sts": 0.8463352416025529, |
|
"eval_sickr_spearman": 0.8273291420105935, |
|
"eval_stsb_spearman": 0.8653413411945124, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"eval_avg_sts": 0.8451320979354053, |
|
"eval_sickr_spearman": 0.8257100135811147, |
|
"eval_stsb_spearman": 0.8645541822896958, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"eval_avg_sts": 0.8447004499326362, |
|
"eval_sickr_spearman": 0.8251015273595114, |
|
"eval_stsb_spearman": 0.8642993725057609, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"eval_avg_sts": 0.8451416986583431, |
|
"eval_sickr_spearman": 0.8259361920377868, |
|
"eval_stsb_spearman": 0.8643472052788993, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"eval_avg_sts": 0.8452566149686973, |
|
"eval_sickr_spearman": 0.8260989214094591, |
|
"eval_stsb_spearman": 0.8644143085279357, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 1617, |
|
"train_runtime": 3774.3125, |
|
"train_samples_per_second": 0.428 |
|
} |
|
], |
|
"max_steps": 1617, |
|
"num_train_epochs": 3, |
|
"total_flos": 388517232930716160, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|