|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 3821, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.3054830287206268e-08, |
|
"logits/chosen": 0.9550814628601074, |
|
"logits/rejected": 1.0664727687835693, |
|
"logps/chosen": -190.47879028320312, |
|
"logps/rejected": -177.6958770751953, |
|
"loss": 0.1031, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.3054830287206266e-07, |
|
"logits/chosen": 1.021599531173706, |
|
"logits/rejected": 1.0737736225128174, |
|
"logps/chosen": -277.8912048339844, |
|
"logps/rejected": -268.34259033203125, |
|
"loss": 0.0514, |
|
"rewards/accuracies": 0.4166666567325592, |
|
"rewards/chosen": 2.9820108466083184e-05, |
|
"rewards/margins": 0.000656133983284235, |
|
"rewards/rejected": -0.0006263138493523002, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.610966057441253e-07, |
|
"logits/chosen": 1.0539672374725342, |
|
"logits/rejected": 1.035296082496643, |
|
"logps/chosen": -258.02105712890625, |
|
"logps/rejected": -219.51577758789062, |
|
"loss": 0.0679, |
|
"rewards/accuracies": 0.4124999940395355, |
|
"rewards/chosen": -0.00037693610647693276, |
|
"rewards/margins": -0.0003669637371785939, |
|
"rewards/rejected": -9.972270163416397e-06, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.9164490861618804e-07, |
|
"logits/chosen": 0.9785920977592468, |
|
"logits/rejected": 0.9956333041191101, |
|
"logps/chosen": -234.4257354736328, |
|
"logps/rejected": -216.3408660888672, |
|
"loss": 0.0522, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.00037702807458117604, |
|
"rewards/margins": 0.0003918584552593529, |
|
"rewards/rejected": -0.0007688865880481899, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.221932114882506e-07, |
|
"logits/chosen": 1.0598526000976562, |
|
"logits/rejected": 1.0610239505767822, |
|
"logps/chosen": -269.3299865722656, |
|
"logps/rejected": -236.5482635498047, |
|
"loss": 0.0646, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.0001463999942643568, |
|
"rewards/margins": 0.0003663330862764269, |
|
"rewards/rejected": -0.0005127330077812076, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 6.527415143603135e-07, |
|
"logits/chosen": 1.0115251541137695, |
|
"logits/rejected": 1.0492277145385742, |
|
"logps/chosen": -245.1737518310547, |
|
"logps/rejected": -241.9782257080078, |
|
"loss": 0.053, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.0008561966242268682, |
|
"rewards/margins": 0.00045777196646668017, |
|
"rewards/rejected": -0.0013139685615897179, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 7.832898172323761e-07, |
|
"logits/chosen": 0.9759989976882935, |
|
"logits/rejected": 1.09335196018219, |
|
"logps/chosen": -283.7034912109375, |
|
"logps/rejected": -234.171142578125, |
|
"loss": 0.0508, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": 0.0002724650257732719, |
|
"rewards/margins": 0.0006013559177517891, |
|
"rewards/rejected": -0.0003288908628746867, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.138381201044387e-07, |
|
"logits/chosen": 1.0061399936676025, |
|
"logits/rejected": 1.0819300413131714, |
|
"logps/chosen": -272.0354919433594, |
|
"logps/rejected": -231.0594482421875, |
|
"loss": 0.0533, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": -0.0003669198777060956, |
|
"rewards/margins": -0.0001511875307187438, |
|
"rewards/rejected": -0.00021573244885075837, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.0443864229765013e-06, |
|
"logits/chosen": 1.0220763683319092, |
|
"logits/rejected": 1.0622212886810303, |
|
"logps/chosen": -283.91650390625, |
|
"logps/rejected": -261.65411376953125, |
|
"loss": 0.0441, |
|
"rewards/accuracies": 0.39375001192092896, |
|
"rewards/chosen": -0.00052777084056288, |
|
"rewards/margins": -0.0005939611000940204, |
|
"rewards/rejected": 6.619028135901317e-05, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.1749347258485642e-06, |
|
"logits/chosen": 1.0424718856811523, |
|
"logits/rejected": 1.092550277709961, |
|
"logps/chosen": -278.462890625, |
|
"logps/rejected": -235.7613983154297, |
|
"loss": 0.0596, |
|
"rewards/accuracies": 0.3812499940395355, |
|
"rewards/chosen": -0.0010484650265425444, |
|
"rewards/margins": -0.0007164698326960206, |
|
"rewards/rejected": -0.0003319952520541847, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.305483028720627e-06, |
|
"logits/chosen": 0.9916040301322937, |
|
"logits/rejected": 1.066935420036316, |
|
"logps/chosen": -237.2812957763672, |
|
"logps/rejected": -218.4796905517578, |
|
"loss": 0.0659, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -4.606993752531707e-05, |
|
"rewards/margins": 0.0002796413318719715, |
|
"rewards/rejected": -0.000325711298501119, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_logits/chosen": 0.9710860252380371, |
|
"eval_logits/rejected": 1.0635499954223633, |
|
"eval_logps/chosen": -277.5683288574219, |
|
"eval_logps/rejected": -243.89227294921875, |
|
"eval_loss": 0.053576212376356125, |
|
"eval_rewards/accuracies": 0.47450000047683716, |
|
"eval_rewards/chosen": -0.00021531998936552554, |
|
"eval_rewards/margins": 0.0005480629042722285, |
|
"eval_rewards/rejected": -0.0007633829372934997, |
|
"eval_runtime": 539.1486, |
|
"eval_samples_per_second": 3.71, |
|
"eval_steps_per_second": 0.927, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.4360313315926894e-06, |
|
"logits/chosen": 0.9959138035774231, |
|
"logits/rejected": 1.0810822248458862, |
|
"logps/chosen": -283.58575439453125, |
|
"logps/rejected": -250.1833038330078, |
|
"loss": 0.0529, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": 0.0003468608483672142, |
|
"rewards/margins": 0.0019003556808456779, |
|
"rewards/rejected": -0.0015534948324784636, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.5665796344647521e-06, |
|
"logits/chosen": 1.0288623571395874, |
|
"logits/rejected": 1.0744774341583252, |
|
"logps/chosen": -227.82470703125, |
|
"logps/rejected": -234.0697479248047, |
|
"loss": 0.0857, |
|
"rewards/accuracies": 0.39375001192092896, |
|
"rewards/chosen": -0.0001596544898347929, |
|
"rewards/margins": -0.0005016528302803636, |
|
"rewards/rejected": 0.0003419983549974859, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.6971279373368146e-06, |
|
"logits/chosen": 1.04789137840271, |
|
"logits/rejected": 1.0942102670669556, |
|
"logps/chosen": -282.67510986328125, |
|
"logps/rejected": -239.3311309814453, |
|
"loss": 0.0449, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.00027361814863979816, |
|
"rewards/margins": 0.002355109201744199, |
|
"rewards/rejected": -0.002081490820273757, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.8276762402088774e-06, |
|
"logits/chosen": 1.0264707803726196, |
|
"logits/rejected": 1.02583646774292, |
|
"logps/chosen": -264.01715087890625, |
|
"logps/rejected": -237.10549926757812, |
|
"loss": 0.0474, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.0003395401581656188, |
|
"rewards/margins": 0.001898492919281125, |
|
"rewards/rejected": -0.0015589528484269977, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.9582245430809403e-06, |
|
"logits/chosen": 1.0069233179092407, |
|
"logits/rejected": 1.0264513492584229, |
|
"logps/chosen": -262.6693420410156, |
|
"logps/rejected": -235.0095977783203, |
|
"loss": 0.0641, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 0.00045030430192127824, |
|
"rewards/margins": 0.0019102304941043258, |
|
"rewards/rejected": -0.0014599261339753866, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.0887728459530026e-06, |
|
"logits/chosen": 0.9561678171157837, |
|
"logits/rejected": 1.085860252380371, |
|
"logps/chosen": -258.2762451171875, |
|
"logps/rejected": -240.168701171875, |
|
"loss": 0.0571, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.0003362145507708192, |
|
"rewards/margins": 0.002913826610893011, |
|
"rewards/rejected": -0.0025776117108762264, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.2193211488250653e-06, |
|
"logits/chosen": 0.996711254119873, |
|
"logits/rejected": 1.0722663402557373, |
|
"logps/chosen": -268.49578857421875, |
|
"logps/rejected": -218.3070831298828, |
|
"loss": 0.0502, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": 0.0006658965139649808, |
|
"rewards/margins": 0.0029082505498081446, |
|
"rewards/rejected": -0.0022423542104661465, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.3498694516971284e-06, |
|
"logits/chosen": 0.9853906631469727, |
|
"logits/rejected": 1.033320665359497, |
|
"logps/chosen": -272.53961181640625, |
|
"logps/rejected": -237.8509979248047, |
|
"loss": 0.0638, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": 0.0014408377464860678, |
|
"rewards/margins": 0.003102297894656658, |
|
"rewards/rejected": -0.0016614599153399467, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.4804177545691907e-06, |
|
"logits/chosen": 0.9930588006973267, |
|
"logits/rejected": 1.0107576847076416, |
|
"logps/chosen": -269.4462890625, |
|
"logps/rejected": -235.57852172851562, |
|
"loss": 0.0604, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.0017847366398200393, |
|
"rewards/margins": 0.004315118305385113, |
|
"rewards/rejected": -0.0025303815491497517, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.610966057441254e-06, |
|
"logits/chosen": 1.0219703912734985, |
|
"logits/rejected": 1.1328296661376953, |
|
"logps/chosen": -278.0802917480469, |
|
"logps/rejected": -249.68466186523438, |
|
"loss": 0.0597, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.004124884493649006, |
|
"rewards/margins": 0.005005924496799707, |
|
"rewards/rejected": -0.000881039712112397, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_logits/chosen": 0.9688093662261963, |
|
"eval_logits/rejected": 1.0617414712905884, |
|
"eval_logps/chosen": -277.1978759765625, |
|
"eval_logps/rejected": -243.9651336669922, |
|
"eval_loss": 0.05182640627026558, |
|
"eval_rewards/accuracies": 0.5879999995231628, |
|
"eval_rewards/chosen": 0.0034893574193120003, |
|
"eval_rewards/margins": 0.00498173339292407, |
|
"eval_rewards/rejected": -0.0014923758571967483, |
|
"eval_runtime": 539.156, |
|
"eval_samples_per_second": 3.71, |
|
"eval_steps_per_second": 0.927, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.741514360313316e-06, |
|
"logits/chosen": 1.0123722553253174, |
|
"logits/rejected": 1.0923728942871094, |
|
"logps/chosen": -260.80499267578125, |
|
"logps/rejected": -233.2253875732422, |
|
"loss": 0.0394, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.002722758101299405, |
|
"rewards/margins": 0.004253658466041088, |
|
"rewards/rejected": -0.0015309008304029703, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.872062663185379e-06, |
|
"logits/chosen": 1.040575385093689, |
|
"logits/rejected": 1.1116924285888672, |
|
"logps/chosen": -277.50433349609375, |
|
"logps/rejected": -243.0937042236328, |
|
"loss": 0.0443, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.004540332593023777, |
|
"rewards/margins": 0.005531441420316696, |
|
"rewards/rejected": -0.0009911099914461374, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.0026109660574416e-06, |
|
"logits/chosen": 1.029170036315918, |
|
"logits/rejected": 1.0374505519866943, |
|
"logps/chosen": -268.8113708496094, |
|
"logps/rejected": -275.160400390625, |
|
"loss": 0.0402, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.005796975456178188, |
|
"rewards/margins": 0.006743866018950939, |
|
"rewards/rejected": -0.0009468902135267854, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.1331592689295043e-06, |
|
"logits/chosen": 1.027822732925415, |
|
"logits/rejected": 1.0454634428024292, |
|
"logps/chosen": -271.84796142578125, |
|
"logps/rejected": -231.60018920898438, |
|
"loss": 0.0469, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.009604343213140965, |
|
"rewards/margins": 0.010616883635520935, |
|
"rewards/rejected": -0.001012541470117867, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.263707571801567e-06, |
|
"logits/chosen": 0.9246234893798828, |
|
"logits/rejected": 1.081726312637329, |
|
"logps/chosen": -262.35052490234375, |
|
"logps/rejected": -207.2003631591797, |
|
"loss": 0.0461, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.005290716886520386, |
|
"rewards/margins": 0.007529892958700657, |
|
"rewards/rejected": -0.002239175606518984, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.3942558746736293e-06, |
|
"logits/chosen": 1.0157970190048218, |
|
"logits/rejected": 1.0061004161834717, |
|
"logps/chosen": -255.8148956298828, |
|
"logps/rejected": -249.33810424804688, |
|
"loss": 0.0541, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.005560068879276514, |
|
"rewards/margins": 0.00910879485309124, |
|
"rewards/rejected": -0.003548725973814726, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.524804177545692e-06, |
|
"logits/chosen": 0.9183789491653442, |
|
"logits/rejected": 1.0651085376739502, |
|
"logps/chosen": -250.3922119140625, |
|
"logps/rejected": -225.31845092773438, |
|
"loss": 0.0573, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.004733686335384846, |
|
"rewards/margins": 0.010133610107004642, |
|
"rewards/rejected": -0.005399924702942371, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.6553524804177547e-06, |
|
"logits/chosen": 0.9533087015151978, |
|
"logits/rejected": 0.9936316609382629, |
|
"logps/chosen": -262.45989990234375, |
|
"logps/rejected": -245.0988006591797, |
|
"loss": 0.0652, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.009047111496329308, |
|
"rewards/margins": 0.01569160632789135, |
|
"rewards/rejected": -0.006644496228545904, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.7859007832898174e-06, |
|
"logits/chosen": 0.9586070775985718, |
|
"logits/rejected": 1.0487323999404907, |
|
"logps/chosen": -258.58087158203125, |
|
"logps/rejected": -229.2060546875, |
|
"loss": 0.0527, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.009910664521157742, |
|
"rewards/margins": 0.01812123879790306, |
|
"rewards/rejected": -0.008210571482777596, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.9164490861618806e-06, |
|
"logits/chosen": 0.9238823056221008, |
|
"logits/rejected": 1.0459530353546143, |
|
"logps/chosen": -257.22900390625, |
|
"logps/rejected": -227.42770385742188, |
|
"loss": 0.0564, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": 0.008343839086592197, |
|
"rewards/margins": 0.01660408265888691, |
|
"rewards/rejected": -0.008260244503617287, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_logits/chosen": 0.9498724341392517, |
|
"eval_logits/rejected": 1.0439953804016113, |
|
"eval_logps/chosen": -276.5095520019531, |
|
"eval_logps/rejected": -244.6271514892578, |
|
"eval_loss": 0.047470785677433014, |
|
"eval_rewards/accuracies": 0.6175000071525574, |
|
"eval_rewards/chosen": 0.010372455231845379, |
|
"eval_rewards/margins": 0.018484672531485558, |
|
"eval_rewards/rejected": -0.008112218230962753, |
|
"eval_runtime": 539.0567, |
|
"eval_samples_per_second": 3.71, |
|
"eval_steps_per_second": 0.928, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.046997389033943e-06, |
|
"logits/chosen": 0.9125510454177856, |
|
"logits/rejected": 1.0743194818496704, |
|
"logps/chosen": -256.52203369140625, |
|
"logps/rejected": -227.2289581298828, |
|
"loss": 0.0457, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 0.013696533627808094, |
|
"rewards/margins": 0.022891724482178688, |
|
"rewards/rejected": -0.009195187129080296, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.177545691906005e-06, |
|
"logits/chosen": 0.8873203992843628, |
|
"logits/rejected": 1.0165441036224365, |
|
"logps/chosen": -282.78265380859375, |
|
"logps/rejected": -257.1055603027344, |
|
"loss": 0.0416, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.009485239163041115, |
|
"rewards/margins": 0.01556326448917389, |
|
"rewards/rejected": -0.006078026257455349, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.308093994778068e-06, |
|
"logits/chosen": 0.967635989189148, |
|
"logits/rejected": 1.0755988359451294, |
|
"logps/chosen": -278.8580017089844, |
|
"logps/rejected": -244.1697235107422, |
|
"loss": 0.0547, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.0037938461173325777, |
|
"rewards/margins": 0.02032056823372841, |
|
"rewards/rejected": -0.01652671955525875, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.4386422976501306e-06, |
|
"logits/chosen": 0.9581148028373718, |
|
"logits/rejected": 0.9901423454284668, |
|
"logps/chosen": -274.140625, |
|
"logps/rejected": -268.93115234375, |
|
"loss": 0.0454, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.004858436528593302, |
|
"rewards/margins": 0.02044074237346649, |
|
"rewards/rejected": -0.015582305379211903, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.569190600522193e-06, |
|
"logits/chosen": 0.974514365196228, |
|
"logits/rejected": 0.9625232815742493, |
|
"logps/chosen": -284.48089599609375, |
|
"logps/rejected": -250.8555908203125, |
|
"loss": 0.0553, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.008898411877453327, |
|
"rewards/margins": 0.02419520542025566, |
|
"rewards/rejected": -0.015296794474124908, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.699738903394257e-06, |
|
"logits/chosen": 0.9241229295730591, |
|
"logits/rejected": 1.0142980813980103, |
|
"logps/chosen": -301.9035949707031, |
|
"logps/rejected": -258.56298828125, |
|
"loss": 0.0361, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": 0.00739449355751276, |
|
"rewards/margins": 0.02596624568104744, |
|
"rewards/rejected": -0.018571753054857254, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.8302872062663196e-06, |
|
"logits/chosen": 0.9622675180435181, |
|
"logits/rejected": 0.9503853917121887, |
|
"logps/chosen": -305.0982971191406, |
|
"logps/rejected": -260.784423828125, |
|
"loss": 0.0439, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": 0.008727970533072948, |
|
"rewards/margins": 0.026681995019316673, |
|
"rewards/rejected": -0.01795402355492115, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.9608355091383814e-06, |
|
"logits/chosen": 0.9401235580444336, |
|
"logits/rejected": 1.042701005935669, |
|
"logps/chosen": -255.1713409423828, |
|
"logps/rejected": -223.62197875976562, |
|
"loss": 0.0635, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.010734304785728455, |
|
"rewards/margins": 0.0330788716673851, |
|
"rewards/rejected": -0.022344566881656647, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.9999488562447675e-06, |
|
"logits/chosen": 0.9535024762153625, |
|
"logits/rejected": 0.9772897958755493, |
|
"logps/chosen": -298.8131103515625, |
|
"logps/rejected": -256.53302001953125, |
|
"loss": 0.0383, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.01018481608480215, |
|
"rewards/margins": 0.03526074439287186, |
|
"rewards/rejected": -0.02507592737674713, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.999698361256577e-06, |
|
"logits/chosen": 0.9641995429992676, |
|
"logits/rejected": 0.9660250544548035, |
|
"logps/chosen": -278.9350891113281, |
|
"logps/rejected": -263.6481628417969, |
|
"loss": 0.0402, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.0038296219427138567, |
|
"rewards/margins": 0.0380658321082592, |
|
"rewards/rejected": -0.03423621878027916, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_logits/chosen": 0.8994618058204651, |
|
"eval_logits/rejected": 0.9931817650794983, |
|
"eval_logps/chosen": -277.37713623046875, |
|
"eval_logps/rejected": -246.910888671875, |
|
"eval_loss": 0.04383732005953789, |
|
"eval_rewards/accuracies": 0.6324999928474426, |
|
"eval_rewards/chosen": 0.0016969649586826563, |
|
"eval_rewards/margins": 0.03264675661921501, |
|
"eval_rewards/rejected": -0.03094978630542755, |
|
"eval_runtime": 539.0327, |
|
"eval_samples_per_second": 3.71, |
|
"eval_steps_per_second": 0.928, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.999239142174581e-06, |
|
"logits/chosen": 0.8831006288528442, |
|
"logits/rejected": 0.8935713768005371, |
|
"logps/chosen": -293.3919982910156, |
|
"logps/rejected": -245.70181274414062, |
|
"loss": 0.0399, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.0013237474486231804, |
|
"rewards/margins": 0.03327140584588051, |
|
"rewards/rejected": -0.03194766119122505, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.99857123734344e-06, |
|
"logits/chosen": 0.9195354580879211, |
|
"logits/rejected": 1.0214719772338867, |
|
"logps/chosen": -278.9452209472656, |
|
"logps/rejected": -246.7372589111328, |
|
"loss": 0.0479, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.0041291858069598675, |
|
"rewards/margins": 0.03341008350253105, |
|
"rewards/rejected": -0.03753926604986191, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.997694702533016e-06, |
|
"logits/chosen": 0.8224805593490601, |
|
"logits/rejected": 0.9571186900138855, |
|
"logps/chosen": -265.44757080078125, |
|
"logps/rejected": -260.35748291015625, |
|
"loss": 0.0417, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.01658559963107109, |
|
"rewards/margins": 0.031039753928780556, |
|
"rewards/rejected": -0.0476253516972065, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.996609610933713e-06, |
|
"logits/chosen": 0.8995935320854187, |
|
"logits/rejected": 0.8752401471138, |
|
"logps/chosen": -283.92718505859375, |
|
"logps/rejected": -262.1937561035156, |
|
"loss": 0.0446, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.02005813643336296, |
|
"rewards/margins": 0.027436578646302223, |
|
"rewards/rejected": -0.04749471694231033, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.995316053150366e-06, |
|
"logits/chosen": 0.8733296394348145, |
|
"logits/rejected": 0.9702059626579285, |
|
"logps/chosen": -263.5385437011719, |
|
"logps/rejected": -237.3717803955078, |
|
"loss": 0.035, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.02067594602704048, |
|
"rewards/margins": 0.025421470403671265, |
|
"rewards/rejected": -0.046097420156002045, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.9938141371946815e-06, |
|
"logits/chosen": 0.9152857661247253, |
|
"logits/rejected": 1.0352412462234497, |
|
"logps/chosen": -252.8705596923828, |
|
"logps/rejected": -253.1604766845703, |
|
"loss": 0.0578, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.00973983108997345, |
|
"rewards/margins": 0.04021871089935303, |
|
"rewards/rejected": -0.049958545714616776, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.992103988476206e-06, |
|
"logits/chosen": 0.9032732844352722, |
|
"logits/rejected": 0.9913337826728821, |
|
"logps/chosen": -296.672119140625, |
|
"logps/rejected": -250.1068878173828, |
|
"loss": 0.0611, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.013237145729362965, |
|
"rewards/margins": 0.0339890792965889, |
|
"rewards/rejected": -0.047226227819919586, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.990185749791866e-06, |
|
"logits/chosen": 0.852449893951416, |
|
"logits/rejected": 0.9530878067016602, |
|
"logps/chosen": -244.0757293701172, |
|
"logps/rejected": -213.8367919921875, |
|
"loss": 0.041, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.016842521727085114, |
|
"rewards/margins": 0.039869144558906555, |
|
"rewards/rejected": -0.05671166256070137, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.9880595813140395e-06, |
|
"logits/chosen": 0.8507224321365356, |
|
"logits/rejected": 0.9859424829483032, |
|
"logps/chosen": -276.9876403808594, |
|
"logps/rejected": -239.509521484375, |
|
"loss": 0.0438, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.022139808163046837, |
|
"rewards/margins": 0.034604597836732864, |
|
"rewards/rejected": -0.05674440786242485, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.985725660577184e-06, |
|
"logits/chosen": 0.9554396867752075, |
|
"logits/rejected": 0.9389545321464539, |
|
"logps/chosen": -246.587158203125, |
|
"logps/rejected": -236.51171875, |
|
"loss": 0.0421, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.031007111072540283, |
|
"rewards/margins": 0.036111582070589066, |
|
"rewards/rejected": -0.06711869686841965, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_logits/chosen": 0.8361961841583252, |
|
"eval_logits/rejected": 0.9295023679733276, |
|
"eval_logps/chosen": -281.6956481933594, |
|
"eval_logps/rejected": -251.91390991210938, |
|
"eval_loss": 0.041099708527326584, |
|
"eval_rewards/accuracies": 0.6194999814033508, |
|
"eval_rewards/chosen": -0.0414884127676487, |
|
"eval_rewards/margins": 0.03949163854122162, |
|
"eval_rewards/rejected": -0.08098004758358002, |
|
"eval_runtime": 539.1317, |
|
"eval_samples_per_second": 3.71, |
|
"eval_steps_per_second": 0.927, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.983184182463009e-06, |
|
"logits/chosen": 0.8190703392028809, |
|
"logits/rejected": 0.9820553064346313, |
|
"logps/chosen": -286.83819580078125, |
|
"logps/rejected": -225.30502319335938, |
|
"loss": 0.0481, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.034673698246479034, |
|
"rewards/margins": 0.04182344675064087, |
|
"rewards/rejected": -0.0764971375465393, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.980435359184203e-06, |
|
"logits/chosen": 0.8918254971504211, |
|
"logits/rejected": 0.9686266779899597, |
|
"logps/chosen": -291.6520080566406, |
|
"logps/rejected": -257.6617126464844, |
|
"loss": 0.0324, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.034312546253204346, |
|
"rewards/margins": 0.04724326729774475, |
|
"rewards/rejected": -0.0815558210015297, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.9774794202667236e-06, |
|
"logits/chosen": 0.8753170967102051, |
|
"logits/rejected": 0.9559276700019836, |
|
"logps/chosen": -268.8739929199219, |
|
"logps/rejected": -239.81484985351562, |
|
"loss": 0.0294, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.028610479086637497, |
|
"rewards/margins": 0.04722968488931656, |
|
"rewards/rejected": -0.07584016025066376, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.974316612530615e-06, |
|
"logits/chosen": 0.8446584939956665, |
|
"logits/rejected": 0.9035196304321289, |
|
"logps/chosen": -245.0946502685547, |
|
"logps/rejected": -227.1122589111328, |
|
"loss": 0.0412, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.03522849082946777, |
|
"rewards/margins": 0.045352503657341, |
|
"rewards/rejected": -0.08058099448680878, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.970947200069416e-06, |
|
"logits/chosen": 0.8838707208633423, |
|
"logits/rejected": 0.9225630760192871, |
|
"logps/chosen": -267.9327087402344, |
|
"logps/rejected": -232.60745239257812, |
|
"loss": 0.0575, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.05179372429847717, |
|
"rewards/margins": 0.05676066875457764, |
|
"rewards/rejected": -0.10855438560247421, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.967371464228096e-06, |
|
"logits/chosen": 0.9162457585334778, |
|
"logits/rejected": 0.9888601303100586, |
|
"logps/chosen": -288.94476318359375, |
|
"logps/rejected": -267.3609313964844, |
|
"loss": 0.0424, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.05239032953977585, |
|
"rewards/margins": 0.040877897292375565, |
|
"rewards/rejected": -0.09326823055744171, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.963589703579569e-06, |
|
"logits/chosen": 0.8896541595458984, |
|
"logits/rejected": 1.071001410484314, |
|
"logps/chosen": -271.9025573730469, |
|
"logps/rejected": -227.18283081054688, |
|
"loss": 0.0489, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.039633095264434814, |
|
"rewards/margins": 0.06890513002872467, |
|
"rewards/rejected": -0.10853822529315948, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.9596022338997615e-06, |
|
"logits/chosen": 0.9138982892036438, |
|
"logits/rejected": 0.8892068862915039, |
|
"logps/chosen": -261.0631408691406, |
|
"logps/rejected": -233.52206420898438, |
|
"loss": 0.0463, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.04562286287546158, |
|
"rewards/margins": 0.03961200267076492, |
|
"rewards/rejected": -0.0852348655462265, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.955409388141243e-06, |
|
"logits/chosen": 0.9704087972640991, |
|
"logits/rejected": 0.9119867086410522, |
|
"logps/chosen": -272.70599365234375, |
|
"logps/rejected": -236.0823211669922, |
|
"loss": 0.0383, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.04884126037359238, |
|
"rewards/margins": 0.03933250904083252, |
|
"rewards/rejected": -0.0881737768650055, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.951011516405429e-06, |
|
"logits/chosen": 0.9375241994857788, |
|
"logits/rejected": 0.9329082369804382, |
|
"logps/chosen": -243.24472045898438, |
|
"logps/rejected": -266.67962646484375, |
|
"loss": 0.0439, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.06609703600406647, |
|
"rewards/margins": 0.03910304233431816, |
|
"rewards/rejected": -0.10520007461309433, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_logits/chosen": 0.8607339262962341, |
|
"eval_logits/rejected": 0.952020525932312, |
|
"eval_logps/chosen": -284.5547180175781, |
|
"eval_logps/rejected": -255.50050354003906, |
|
"eval_loss": 0.03948886692523956, |
|
"eval_rewards/accuracies": 0.6175000071525574, |
|
"eval_rewards/chosen": -0.07007911801338196, |
|
"eval_rewards/margins": 0.046766627579927444, |
|
"eval_rewards/rejected": -0.11684573441743851, |
|
"eval_runtime": 539.068, |
|
"eval_samples_per_second": 3.71, |
|
"eval_steps_per_second": 0.928, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.946408985913344e-06, |
|
"logits/chosen": 0.9299923777580261, |
|
"logits/rejected": 0.949097752571106, |
|
"logps/chosen": -251.2418212890625, |
|
"logps/rejected": -229.9620819091797, |
|
"loss": 0.0418, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.08045734465122223, |
|
"rewards/margins": 0.039681874215602875, |
|
"rewards/rejected": -0.1201392188668251, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.941602180974958e-06, |
|
"logits/chosen": 0.9066370129585266, |
|
"logits/rejected": 0.9455870389938354, |
|
"logps/chosen": -293.0451354980469, |
|
"logps/rejected": -237.9638214111328, |
|
"loss": 0.0459, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.07144733518362045, |
|
"rewards/margins": 0.0446074940264225, |
|
"rewards/rejected": -0.11605483293533325, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.936591502957101e-06, |
|
"logits/chosen": 0.8010427355766296, |
|
"logits/rejected": 0.9337188005447388, |
|
"logps/chosen": -301.9491882324219, |
|
"logps/rejected": -257.299560546875, |
|
"loss": 0.0313, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.049509234726428986, |
|
"rewards/margins": 0.03574910759925842, |
|
"rewards/rejected": -0.085258349776268, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.931377370249946e-06, |
|
"logits/chosen": 0.9087220430374146, |
|
"logits/rejected": 0.9815553426742554, |
|
"logps/chosen": -279.0362548828125, |
|
"logps/rejected": -235.7110137939453, |
|
"loss": 0.0311, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.0523492693901062, |
|
"rewards/margins": 0.03988610580563545, |
|
"rewards/rejected": -0.09223536401987076, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.925960218232073e-06, |
|
"logits/chosen": 0.9217544794082642, |
|
"logits/rejected": 1.0427885055541992, |
|
"logps/chosen": -308.48004150390625, |
|
"logps/rejected": -286.24566650390625, |
|
"loss": 0.0343, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.05535319447517395, |
|
"rewards/margins": 0.054420508444309235, |
|
"rewards/rejected": -0.10977371037006378, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.920340499234116e-06, |
|
"logits/chosen": 0.8749852180480957, |
|
"logits/rejected": 0.9889238476753235, |
|
"logps/chosen": -288.8839416503906, |
|
"logps/rejected": -217.01760864257812, |
|
"loss": 0.0459, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.051978230476379395, |
|
"rewards/margins": 0.03869297355413437, |
|
"rewards/rejected": -0.09067119657993317, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.914518682500995e-06, |
|
"logits/chosen": 0.955339252948761, |
|
"logits/rejected": 0.939558207988739, |
|
"logps/chosen": -282.4548034667969, |
|
"logps/rejected": -278.93646240234375, |
|
"loss": 0.0427, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.05497046187520027, |
|
"rewards/margins": 0.046811606734991074, |
|
"rewards/rejected": -0.10178206861019135, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.9084952541527315e-06, |
|
"logits/chosen": 0.9087344408035278, |
|
"logits/rejected": 0.9323067665100098, |
|
"logps/chosen": -258.89776611328125, |
|
"logps/rejected": -246.7029266357422, |
|
"loss": 0.0366, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.03954412043094635, |
|
"rewards/margins": 0.0509122833609581, |
|
"rewards/rejected": -0.09045641124248505, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.902270717143858e-06, |
|
"logits/chosen": 0.8921301960945129, |
|
"logits/rejected": 0.9605242013931274, |
|
"logps/chosen": -288.9732666015625, |
|
"logps/rejected": -252.2064971923828, |
|
"loss": 0.0353, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.03391667455434799, |
|
"rewards/margins": 0.05217113345861435, |
|
"rewards/rejected": -0.08608780801296234, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.895845591221427e-06, |
|
"logits/chosen": 0.8141648173332214, |
|
"logits/rejected": 0.9764218330383301, |
|
"logps/chosen": -251.57406616210938, |
|
"logps/rejected": -232.8114776611328, |
|
"loss": 0.0363, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.04507957026362419, |
|
"rewards/margins": 0.047078561037778854, |
|
"rewards/rejected": -0.09215812385082245, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_logits/chosen": 0.894931435585022, |
|
"eval_logits/rejected": 0.9895482063293457, |
|
"eval_logps/chosen": -281.16192626953125, |
|
"eval_logps/rejected": -251.89256286621094, |
|
"eval_loss": 0.03899623081088066, |
|
"eval_rewards/accuracies": 0.6309999823570251, |
|
"eval_rewards/chosen": -0.036151450127363205, |
|
"eval_rewards/margins": 0.04461483657360077, |
|
"eval_rewards/rejected": -0.08076628297567368, |
|
"eval_runtime": 539.1732, |
|
"eval_samples_per_second": 3.709, |
|
"eval_steps_per_second": 0.927, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.8892204128816e-06, |
|
"logits/chosen": 0.9831956028938293, |
|
"logits/rejected": 1.0133693218231201, |
|
"logps/chosen": -286.310546875, |
|
"logps/rejected": -283.8514099121094, |
|
"loss": 0.035, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.034512270241975784, |
|
"rewards/margins": 0.047017090022563934, |
|
"rewards/rejected": -0.08152935653924942, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.882395735324864e-06, |
|
"logits/chosen": 0.8945713043212891, |
|
"logits/rejected": 0.8778280019760132, |
|
"logps/chosen": -281.03704833984375, |
|
"logps/rejected": -255.6659698486328, |
|
"loss": 0.0347, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.027182284742593765, |
|
"rewards/margins": 0.04411619156599045, |
|
"rewards/rejected": -0.07129846513271332, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.87537212840983e-06, |
|
"logits/chosen": 0.9152518510818481, |
|
"logits/rejected": 0.9284723997116089, |
|
"logps/chosen": -302.0511169433594, |
|
"logps/rejected": -256.92071533203125, |
|
"loss": 0.0389, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.02550722099840641, |
|
"rewards/margins": 0.049370888620615005, |
|
"rewards/rejected": -0.07487811148166656, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.8681501786056545e-06, |
|
"logits/chosen": 0.8889036178588867, |
|
"logits/rejected": 0.9711803197860718, |
|
"logps/chosen": -273.5214538574219, |
|
"logps/rejected": -221.85977172851562, |
|
"loss": 0.0501, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.03598209470510483, |
|
"rewards/margins": 0.03736092895269394, |
|
"rewards/rejected": -0.07334302365779877, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.860730488943068e-06, |
|
"logits/chosen": 0.9261956214904785, |
|
"logits/rejected": 0.9333757162094116, |
|
"logps/chosen": -279.0644226074219, |
|
"logps/rejected": -245.6189422607422, |
|
"loss": 0.0386, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.016734056174755096, |
|
"rewards/margins": 0.05964844301342964, |
|
"rewards/rejected": -0.07638250291347504, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.853113678964022e-06, |
|
"logits/chosen": 0.9745391607284546, |
|
"logits/rejected": 0.9919463396072388, |
|
"logps/chosen": -249.7964630126953, |
|
"logps/rejected": -235.2704315185547, |
|
"loss": 0.0336, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.029250269755721092, |
|
"rewards/margins": 0.05691809579730034, |
|
"rewards/rejected": -0.08616836369037628, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.845300384669958e-06, |
|
"logits/chosen": 0.9550157785415649, |
|
"logits/rejected": 0.9655323028564453, |
|
"logps/chosen": -266.6517028808594, |
|
"logps/rejected": -224.41366577148438, |
|
"loss": 0.0503, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.04020417481660843, |
|
"rewards/margins": 0.03571712225675583, |
|
"rewards/rejected": -0.07592129707336426, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.837291258468701e-06, |
|
"logits/chosen": 0.8910456895828247, |
|
"logits/rejected": 0.9127016067504883, |
|
"logps/chosen": -280.16632080078125, |
|
"logps/rejected": -249.5512237548828, |
|
"loss": 0.0404, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.04826400801539421, |
|
"rewards/margins": 0.05860195308923721, |
|
"rewards/rejected": -0.10686596482992172, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.829086969119984e-06, |
|
"logits/chosen": 0.8841217756271362, |
|
"logits/rejected": 0.9553950428962708, |
|
"logps/chosen": -249.34146118164062, |
|
"logps/rejected": -241.98623657226562, |
|
"loss": 0.0306, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.04528197646141052, |
|
"rewards/margins": 0.04499911516904831, |
|
"rewards/rejected": -0.09028108417987823, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.820688201679605e-06, |
|
"logits/chosen": 0.9039901494979858, |
|
"logits/rejected": 0.9560089111328125, |
|
"logps/chosen": -262.4331359863281, |
|
"logps/rejected": -234.8318328857422, |
|
"loss": 0.0402, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.04591182619333267, |
|
"rewards/margins": 0.0473395399749279, |
|
"rewards/rejected": -0.09325136244297028, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_logits/chosen": 0.9000641107559204, |
|
"eval_logits/rejected": 0.9937340021133423, |
|
"eval_logps/chosen": -282.6900939941406, |
|
"eval_logps/rejected": -253.87200927734375, |
|
"eval_loss": 0.03816115856170654, |
|
"eval_rewards/accuracies": 0.621999979019165, |
|
"eval_rewards/chosen": -0.05143279209733009, |
|
"eval_rewards/margins": 0.049128152430057526, |
|
"eval_rewards/rejected": -0.10056094080209732, |
|
"eval_runtime": 538.996, |
|
"eval_samples_per_second": 3.711, |
|
"eval_steps_per_second": 0.928, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.8120956574422315e-06, |
|
"logits/chosen": 0.985218346118927, |
|
"logits/rejected": 0.9626695513725281, |
|
"logps/chosen": -285.3841247558594, |
|
"logps/rejected": -283.31024169921875, |
|
"loss": 0.0464, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.05067628622055054, |
|
"rewards/margins": 0.054511237889528275, |
|
"rewards/rejected": -0.10518752038478851, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.803310053882831e-06, |
|
"logits/chosen": 0.916561484336853, |
|
"logits/rejected": 0.9501992464065552, |
|
"logps/chosen": -215.0513458251953, |
|
"logps/rejected": -205.7407989501953, |
|
"loss": 0.0444, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.05079100281000137, |
|
"rewards/margins": 0.042504359036684036, |
|
"rewards/rejected": -0.09329536557197571, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.794332124596775e-06, |
|
"logits/chosen": 0.9133389592170715, |
|
"logits/rejected": 0.9860326647758484, |
|
"logps/chosen": -280.44476318359375, |
|
"logps/rejected": -256.5655212402344, |
|
"loss": 0.0377, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.06920581310987473, |
|
"rewards/margins": 0.037345677614212036, |
|
"rewards/rejected": -0.10655149072408676, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.785162619238575e-06, |
|
"logits/chosen": 0.9965925216674805, |
|
"logits/rejected": 1.0270875692367554, |
|
"logps/chosen": -269.73480224609375, |
|
"logps/rejected": -253.419677734375, |
|
"loss": 0.0389, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.0890035331249237, |
|
"rewards/margins": 0.05010632425546646, |
|
"rewards/rejected": -0.13910984992980957, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.775802303459288e-06, |
|
"logits/chosen": 0.9339388012886047, |
|
"logits/rejected": 1.0584567785263062, |
|
"logps/chosen": -296.1967468261719, |
|
"logps/rejected": -267.1576232910156, |
|
"loss": 0.0395, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.09242481738328934, |
|
"rewards/margins": 0.05164768174290657, |
|
"rewards/rejected": -0.1440725028514862, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.766251958842589e-06, |
|
"logits/chosen": 0.9728446006774902, |
|
"logits/rejected": 1.0179331302642822, |
|
"logps/chosen": -308.83551025390625, |
|
"logps/rejected": -272.84796142578125, |
|
"loss": 0.0325, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.08283834159374237, |
|
"rewards/margins": 0.047348491847515106, |
|
"rewards/rejected": -0.13018682599067688, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.7565123828395066e-06, |
|
"logits/chosen": 0.9140795469284058, |
|
"logits/rejected": 1.0925973653793335, |
|
"logps/chosen": -319.13250732421875, |
|
"logps/rejected": -295.2183837890625, |
|
"loss": 0.0217, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.05658285692334175, |
|
"rewards/margins": 0.04898856207728386, |
|
"rewards/rejected": -0.10557142645120621, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.746584388701831e-06, |
|
"logits/chosen": 1.016638994216919, |
|
"logits/rejected": 1.144810438156128, |
|
"logps/chosen": -261.43853759765625, |
|
"logps/rejected": -228.3360595703125, |
|
"loss": 0.0479, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.05251890420913696, |
|
"rewards/margins": 0.07163821160793304, |
|
"rewards/rejected": -0.12415711581707001, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.736468805414218e-06, |
|
"logits/chosen": 1.0147383213043213, |
|
"logits/rejected": 1.1735047101974487, |
|
"logps/chosen": -297.06878662109375, |
|
"logps/rejected": -270.4248046875, |
|
"loss": 0.0401, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.045632537454366684, |
|
"rewards/margins": 0.08101598918437958, |
|
"rewards/rejected": -0.12664853036403656, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.7261664776249595e-06, |
|
"logits/chosen": 1.0279176235198975, |
|
"logits/rejected": 1.0230156183242798, |
|
"logps/chosen": -273.8691101074219, |
|
"logps/rejected": -256.41912841796875, |
|
"loss": 0.0381, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.053291238844394684, |
|
"rewards/margins": 0.059307873249053955, |
|
"rewards/rejected": -0.11259911209344864, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_logits/chosen": 0.9533628225326538, |
|
"eval_logits/rejected": 1.0464704036712646, |
|
"eval_logps/chosen": -283.0850830078125, |
|
"eval_logps/rejected": -254.8046875, |
|
"eval_loss": 0.03756963834166527, |
|
"eval_rewards/accuracies": 0.6315000057220459, |
|
"eval_rewards/chosen": -0.05538267269730568, |
|
"eval_rewards/margins": 0.054504893720149994, |
|
"eval_rewards/rejected": -0.10988757014274597, |
|
"eval_runtime": 538.9934, |
|
"eval_samples_per_second": 3.711, |
|
"eval_steps_per_second": 0.928, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.715678265575463e-06, |
|
"logits/chosen": 1.033189058303833, |
|
"logits/rejected": 1.1023738384246826, |
|
"logps/chosen": -313.0371398925781, |
|
"logps/rejected": -296.1219482421875, |
|
"loss": 0.0362, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.05556187033653259, |
|
"rewards/margins": 0.036861807107925415, |
|
"rewards/rejected": -0.09242367744445801, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.705005045028415e-06, |
|
"logits/chosen": 1.0147944688796997, |
|
"logits/rejected": 1.0735704898834229, |
|
"logps/chosen": -336.9757995605469, |
|
"logps/rejected": -290.46820068359375, |
|
"loss": 0.0291, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.05297808721661568, |
|
"rewards/margins": 0.06880663335323334, |
|
"rewards/rejected": -0.12178472429513931, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.694147707194659e-06, |
|
"logits/chosen": 1.0121935606002808, |
|
"logits/rejected": 1.0971285104751587, |
|
"logps/chosen": -291.9516296386719, |
|
"logps/rejected": -246.9907684326172, |
|
"loss": 0.0394, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.0664924904704094, |
|
"rewards/margins": 0.05605294555425644, |
|
"rewards/rejected": -0.12254543602466583, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.683107158658782e-06, |
|
"logits/chosen": 0.976874053478241, |
|
"logits/rejected": 0.9745148420333862, |
|
"logps/chosen": -267.6925354003906, |
|
"logps/rejected": -239.68063354492188, |
|
"loss": 0.0418, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.06668306887149811, |
|
"rewards/margins": 0.06082264333963394, |
|
"rewards/rejected": -0.12750570476055145, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.671884321303407e-06, |
|
"logits/chosen": 0.9618045687675476, |
|
"logits/rejected": 0.9925098419189453, |
|
"logps/chosen": -293.56109619140625, |
|
"logps/rejected": -280.6075439453125, |
|
"loss": 0.036, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.08693472295999527, |
|
"rewards/margins": 0.055363357067108154, |
|
"rewards/rejected": -0.14229807257652283, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.660480132232224e-06, |
|
"logits/chosen": 0.9112693071365356, |
|
"logits/rejected": 0.9766784906387329, |
|
"logps/chosen": -284.80621337890625, |
|
"logps/rejected": -257.4582824707031, |
|
"loss": 0.0321, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.08965489268302917, |
|
"rewards/margins": 0.05001254007220268, |
|
"rewards/rejected": -0.13966743648052216, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.6488955436917414e-06, |
|
"logits/chosen": 0.9918138384819031, |
|
"logits/rejected": 1.0566661357879639, |
|
"logps/chosen": -285.3071594238281, |
|
"logps/rejected": -222.27645874023438, |
|
"loss": 0.0308, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.07797913253307343, |
|
"rewards/margins": 0.035942643880844116, |
|
"rewards/rejected": -0.11392178386449814, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.6371315229917644e-06, |
|
"logits/chosen": 0.9261223077774048, |
|
"logits/rejected": 0.991136908531189, |
|
"logps/chosen": -268.17742919921875, |
|
"logps/rejected": -251.2354278564453, |
|
"loss": 0.0314, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.055728018283843994, |
|
"rewards/margins": 0.05425562709569931, |
|
"rewards/rejected": -0.1099836453795433, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.625189052424638e-06, |
|
"logits/chosen": 0.9730453491210938, |
|
"logits/rejected": 1.032597303390503, |
|
"logps/chosen": -249.26864624023438, |
|
"logps/rejected": -216.4525909423828, |
|
"loss": 0.0417, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.04507770389318466, |
|
"rewards/margins": 0.03995997831225395, |
|
"rewards/rejected": -0.08503767102956772, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.613069129183218e-06, |
|
"logits/chosen": 0.9484704732894897, |
|
"logits/rejected": 0.9633451700210571, |
|
"logps/chosen": -264.92633056640625, |
|
"logps/rejected": -257.33941650390625, |
|
"loss": 0.0421, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.04708124324679375, |
|
"rewards/margins": 0.04397277534008026, |
|
"rewards/rejected": -0.09105401486158371, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_logits/chosen": 0.9447739124298096, |
|
"eval_logits/rejected": 1.0398797988891602, |
|
"eval_logps/chosen": -281.62677001953125, |
|
"eval_logps/rejected": -253.11135864257812, |
|
"eval_loss": 0.0373673252761364, |
|
"eval_rewards/accuracies": 0.6269999742507935, |
|
"eval_rewards/chosen": -0.040799498558044434, |
|
"eval_rewards/margins": 0.05215470865368843, |
|
"eval_rewards/rejected": -0.09295421838760376, |
|
"eval_runtime": 539.0882, |
|
"eval_samples_per_second": 3.71, |
|
"eval_steps_per_second": 0.927, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.600772765277607e-06, |
|
"logits/chosen": 0.9384505152702332, |
|
"logits/rejected": 1.036522388458252, |
|
"logps/chosen": -300.46435546875, |
|
"logps/rejected": -254.4575958251953, |
|
"loss": 0.0411, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.04996770992875099, |
|
"rewards/margins": 0.05875014141201973, |
|
"rewards/rejected": -0.10871784389019012, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.588300987450652e-06, |
|
"logits/chosen": 1.0055780410766602, |
|
"logits/rejected": 1.0831704139709473, |
|
"logps/chosen": -283.211181640625, |
|
"logps/rejected": -268.2267150878906, |
|
"loss": 0.0358, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.05758129805326462, |
|
"rewards/margins": 0.062214724719524384, |
|
"rewards/rejected": -0.1197960153222084, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.5756548370922136e-06, |
|
"logits/chosen": 0.991938591003418, |
|
"logits/rejected": 1.0386155843734741, |
|
"logps/chosen": -283.02081298828125, |
|
"logps/rejected": -256.51434326171875, |
|
"loss": 0.0317, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.052321650087833405, |
|
"rewards/margins": 0.03847939521074295, |
|
"rewards/rejected": -0.09080104529857635, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.562835370152206e-06, |
|
"logits/chosen": 0.9276207089424133, |
|
"logits/rejected": 1.045037865638733, |
|
"logps/chosen": -254.05892944335938, |
|
"logps/rejected": -240.1755828857422, |
|
"loss": 0.0383, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.04577355459332466, |
|
"rewards/margins": 0.036837171763181686, |
|
"rewards/rejected": -0.08261072635650635, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.54984365705243e-06, |
|
"logits/chosen": 1.0454961061477661, |
|
"logits/rejected": 1.0517162084579468, |
|
"logps/chosen": -255.4774627685547, |
|
"logps/rejected": -232.04190063476562, |
|
"loss": 0.0469, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.03626035898923874, |
|
"rewards/margins": 0.043095506727695465, |
|
"rewards/rejected": -0.07935585826635361, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.536680782597191e-06, |
|
"logits/chosen": 0.9645137786865234, |
|
"logits/rejected": 1.0626866817474365, |
|
"logps/chosen": -298.6683349609375, |
|
"logps/rejected": -269.98724365234375, |
|
"loss": 0.0301, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.02404281124472618, |
|
"rewards/margins": 0.05357781797647476, |
|
"rewards/rejected": -0.07762061804533005, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.523347845882718e-06, |
|
"logits/chosen": 1.0478742122650146, |
|
"logits/rejected": 1.112269639968872, |
|
"logps/chosen": -258.76458740234375, |
|
"logps/rejected": -208.38107299804688, |
|
"loss": 0.0384, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.03418079391121864, |
|
"rewards/margins": 0.05562075227499008, |
|
"rewards/rejected": -0.08980154246091843, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.50984596020539e-06, |
|
"logits/chosen": 0.9761837720870972, |
|
"logits/rejected": 1.0493123531341553, |
|
"logps/chosen": -300.64105224609375, |
|
"logps/rejected": -265.405517578125, |
|
"loss": 0.0404, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.03310775384306908, |
|
"rewards/margins": 0.04876155033707619, |
|
"rewards/rejected": -0.08186930418014526, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.4961762529687745e-06, |
|
"logits/chosen": 0.9403045773506165, |
|
"logits/rejected": 1.0544614791870117, |
|
"logps/chosen": -305.84027099609375, |
|
"logps/rejected": -234.25341796875, |
|
"loss": 0.0407, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.04744723066687584, |
|
"rewards/margins": 0.04316466301679611, |
|
"rewards/rejected": -0.09061190485954285, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.482339865589492e-06, |
|
"logits/chosen": 0.8826369047164917, |
|
"logits/rejected": 1.0571672916412354, |
|
"logps/chosen": -266.90252685546875, |
|
"logps/rejected": -235.89877319335938, |
|
"loss": 0.0393, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.05000295490026474, |
|
"rewards/margins": 0.058589059859514236, |
|
"rewards/rejected": -0.10859201848506927, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_logits/chosen": 0.9608851075172424, |
|
"eval_logits/rejected": 1.0557035207748413, |
|
"eval_logps/chosen": -283.3030700683594, |
|
"eval_logps/rejected": -254.34910583496094, |
|
"eval_loss": 0.03702974691987038, |
|
"eval_rewards/accuracies": 0.6284999847412109, |
|
"eval_rewards/chosen": -0.05756256729364395, |
|
"eval_rewards/margins": 0.04776925593614578, |
|
"eval_rewards/rejected": -0.10533181577920914, |
|
"eval_runtime": 539.1778, |
|
"eval_samples_per_second": 3.709, |
|
"eval_steps_per_second": 0.927, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.468337953401909e-06, |
|
"logits/chosen": 1.0197701454162598, |
|
"logits/rejected": 1.0784578323364258, |
|
"logps/chosen": -299.9832458496094, |
|
"logps/rejected": -247.8249969482422, |
|
"loss": 0.039, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.04413250833749771, |
|
"rewards/margins": 0.051008790731430054, |
|
"rewards/rejected": -0.09514130651950836, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.45417168556166e-06, |
|
"logits/chosen": 0.9736183285713196, |
|
"logits/rejected": 1.0494579076766968, |
|
"logps/chosen": -261.1857604980469, |
|
"logps/rejected": -262.29241943359375, |
|
"loss": 0.0333, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.03721706196665764, |
|
"rewards/margins": 0.04604244977235794, |
|
"rewards/rejected": -0.08325951546430588, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.439842244948036e-06, |
|
"logits/chosen": 0.9458913803100586, |
|
"logits/rejected": 1.0195437669754028, |
|
"logps/chosen": -261.69512939453125, |
|
"logps/rejected": -244.91513061523438, |
|
"loss": 0.0403, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.03936644643545151, |
|
"rewards/margins": 0.04326556995511055, |
|
"rewards/rejected": -0.08263202011585236, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.425350828065204e-06, |
|
"logits/chosen": 1.0116994380950928, |
|
"logits/rejected": 1.0178403854370117, |
|
"logps/chosen": -282.7467956542969, |
|
"logps/rejected": -266.43963623046875, |
|
"loss": 0.0363, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.03308098763227463, |
|
"rewards/margins": 0.048868577927351, |
|
"rewards/rejected": -0.08194957673549652, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.410698644942303e-06, |
|
"logits/chosen": 1.0054022073745728, |
|
"logits/rejected": 1.0613911151885986, |
|
"logps/chosen": -281.3987731933594, |
|
"logps/rejected": -285.315673828125, |
|
"loss": 0.0349, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.031625766307115555, |
|
"rewards/margins": 0.03699468821287155, |
|
"rewards/rejected": -0.0686204582452774, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.395886919032406e-06, |
|
"logits/chosen": 1.0045572519302368, |
|
"logits/rejected": 1.0691404342651367, |
|
"logps/chosen": -274.28741455078125, |
|
"logps/rejected": -232.10574340820312, |
|
"loss": 0.0388, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.03036217950284481, |
|
"rewards/margins": 0.04830170422792435, |
|
"rewards/rejected": -0.0786639004945755, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.380916887110366e-06, |
|
"logits/chosen": 0.9450492858886719, |
|
"logits/rejected": 1.0655186176300049, |
|
"logps/chosen": -251.56259155273438, |
|
"logps/rejected": -223.6671600341797, |
|
"loss": 0.0337, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.03250129520893097, |
|
"rewards/margins": 0.06061319261789322, |
|
"rewards/rejected": -0.09311448037624359, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.365789799169539e-06, |
|
"logits/chosen": 1.004997968673706, |
|
"logits/rejected": 0.9879255294799805, |
|
"logps/chosen": -313.8050842285156, |
|
"logps/rejected": -263.55718994140625, |
|
"loss": 0.0399, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.0565299317240715, |
|
"rewards/margins": 0.057772088795900345, |
|
"rewards/rejected": -0.11430201679468155, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.350506918317416e-06, |
|
"logits/chosen": 0.9306057095527649, |
|
"logits/rejected": 0.8915265798568726, |
|
"logps/chosen": -307.5498962402344, |
|
"logps/rejected": -276.47088623046875, |
|
"loss": 0.0349, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.047587279230356216, |
|
"rewards/margins": 0.03898516297340393, |
|
"rewards/rejected": -0.08657244592905045, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.335069520670149e-06, |
|
"logits/chosen": 0.9720792770385742, |
|
"logits/rejected": 1.0783460140228271, |
|
"logps/chosen": -281.57232666015625, |
|
"logps/rejected": -217.4803009033203, |
|
"loss": 0.0533, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.052179861813783646, |
|
"rewards/margins": 0.06668353080749512, |
|
"rewards/rejected": -0.11886338889598846, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_logits/chosen": 0.9417441487312317, |
|
"eval_logits/rejected": 1.0367752313613892, |
|
"eval_logps/chosen": -283.6021728515625, |
|
"eval_logps/rejected": -255.3543701171875, |
|
"eval_loss": 0.0369240865111351, |
|
"eval_rewards/accuracies": 0.6209999918937683, |
|
"eval_rewards/chosen": -0.0605538934469223, |
|
"eval_rewards/margins": 0.05483054369688034, |
|
"eval_rewards/rejected": -0.11538443714380264, |
|
"eval_runtime": 538.9866, |
|
"eval_samples_per_second": 3.711, |
|
"eval_steps_per_second": 0.928, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.319478895246e-06, |
|
"logits/chosen": 0.9820619821548462, |
|
"logits/rejected": 0.9384095072746277, |
|
"logps/chosen": -279.18499755859375, |
|
"logps/rejected": -242.414794921875, |
|
"loss": 0.0357, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.05051257088780403, |
|
"rewards/margins": 0.04647805169224739, |
|
"rewards/rejected": -0.09699061512947083, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.303736343857704e-06, |
|
"logits/chosen": 1.0831791162490845, |
|
"logits/rejected": 1.034220814704895, |
|
"logps/chosen": -269.3099365234375, |
|
"logps/rejected": -252.9770965576172, |
|
"loss": 0.0464, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.052128423005342484, |
|
"rewards/margins": 0.06647459417581558, |
|
"rewards/rejected": -0.11860301345586777, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.287843181003772e-06, |
|
"logits/chosen": 0.9971106648445129, |
|
"logits/rejected": 1.1152498722076416, |
|
"logps/chosen": -281.20208740234375, |
|
"logps/rejected": -239.54006958007812, |
|
"loss": 0.0394, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.03529990836977959, |
|
"rewards/margins": 0.0698903501033783, |
|
"rewards/rejected": -0.10519025474786758, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.27180073375873e-06, |
|
"logits/chosen": 0.9784267544746399, |
|
"logits/rejected": 1.0021319389343262, |
|
"logps/chosen": -281.3310241699219, |
|
"logps/rejected": -254.4761962890625, |
|
"loss": 0.038, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.03572789579629898, |
|
"rewards/margins": 0.06249629333615303, |
|
"rewards/rejected": -0.09822418540716171, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.255610341662304e-06, |
|
"logits/chosen": 0.9751097559928894, |
|
"logits/rejected": 1.0435435771942139, |
|
"logps/chosen": -306.3908386230469, |
|
"logps/rejected": -268.491455078125, |
|
"loss": 0.0425, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.01923917606472969, |
|
"rewards/margins": 0.06259562820196152, |
|
"rewards/rejected": -0.0818348079919815, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.2392733566075764e-06, |
|
"logits/chosen": 0.9983538389205933, |
|
"logits/rejected": 1.0708853006362915, |
|
"logps/chosen": -243.330322265625, |
|
"logps/rejected": -224.99075317382812, |
|
"loss": 0.0371, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.0249100960791111, |
|
"rewards/margins": 0.06250262260437012, |
|
"rewards/rejected": -0.08741272240877151, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.2227911427280975e-06, |
|
"logits/chosen": 0.9960931539535522, |
|
"logits/rejected": 1.0461426973342896, |
|
"logps/chosen": -286.45440673828125, |
|
"logps/rejected": -271.23980712890625, |
|
"loss": 0.0393, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.02666524052619934, |
|
"rewards/margins": 0.048846714198589325, |
|
"rewards/rejected": -0.07551195472478867, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.206165076283983e-06, |
|
"logits/chosen": 0.9975628852844238, |
|
"logits/rejected": 1.0400108098983765, |
|
"logps/chosen": -238.9987030029297, |
|
"logps/rejected": -228.86312866210938, |
|
"loss": 0.0446, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.023251879960298538, |
|
"rewards/margins": 0.04423128813505173, |
|
"rewards/rejected": -0.06748317182064056, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.189396545546995e-06, |
|
"logits/chosen": 1.0215797424316406, |
|
"logits/rejected": 1.1224019527435303, |
|
"logps/chosen": -289.75323486328125, |
|
"logps/rejected": -280.547607421875, |
|
"loss": 0.034, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.019200313836336136, |
|
"rewards/margins": 0.03457511216402054, |
|
"rewards/rejected": -0.053775422275066376, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.172486950684627e-06, |
|
"logits/chosen": 1.0255308151245117, |
|
"logits/rejected": 1.0572230815887451, |
|
"logps/chosen": -279.2245178222656, |
|
"logps/rejected": -261.9948425292969, |
|
"loss": 0.0392, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.02723405882716179, |
|
"rewards/margins": 0.047797515988349915, |
|
"rewards/rejected": -0.075031578540802, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_logits/chosen": 0.9659793972969055, |
|
"eval_logits/rejected": 1.0633821487426758, |
|
"eval_logps/chosen": -279.6128845214844, |
|
"eval_logps/rejected": -250.95762634277344, |
|
"eval_loss": 0.0366741381585598, |
|
"eval_rewards/accuracies": 0.6119999885559082, |
|
"eval_rewards/chosen": -0.020660726353526115, |
|
"eval_rewards/margins": 0.05075635015964508, |
|
"eval_rewards/rejected": -0.07141707837581635, |
|
"eval_runtime": 539.1224, |
|
"eval_samples_per_second": 3.71, |
|
"eval_steps_per_second": 0.927, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.155437703643182e-06, |
|
"logits/chosen": 0.9241663217544556, |
|
"logits/rejected": 0.9624761343002319, |
|
"logps/chosen": -284.2118225097656, |
|
"logps/rejected": -250.8708953857422, |
|
"loss": 0.0311, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.01345390360802412, |
|
"rewards/margins": 0.06398328393697739, |
|
"rewards/rejected": -0.07743719965219498, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.138250228029882e-06, |
|
"logits/chosen": 1.0098600387573242, |
|
"logits/rejected": 1.132021188735962, |
|
"logps/chosen": -233.54672241210938, |
|
"logps/rejected": -216.55630493164062, |
|
"loss": 0.0418, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.014631894417107105, |
|
"rewards/margins": 0.07016023248434067, |
|
"rewards/rejected": -0.0847921296954155, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.120925958993994e-06, |
|
"logits/chosen": 0.9781894683837891, |
|
"logits/rejected": 1.0409111976623535, |
|
"logps/chosen": -295.82403564453125, |
|
"logps/rejected": -249.65530395507812, |
|
"loss": 0.0414, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.026302725076675415, |
|
"rewards/margins": 0.0485377199947834, |
|
"rewards/rejected": -0.07484044134616852, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.103466343106999e-06, |
|
"logits/chosen": 0.9926727414131165, |
|
"logits/rejected": 1.0091572999954224, |
|
"logps/chosen": -274.465576171875, |
|
"logps/rejected": -243.0918731689453, |
|
"loss": 0.0381, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.019631439819931984, |
|
"rewards/margins": 0.047348715364933014, |
|
"rewards/rejected": -0.06698014587163925, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.085872838241797e-06, |
|
"logits/chosen": 1.0105888843536377, |
|
"logits/rejected": 1.042271614074707, |
|
"logps/chosen": -286.24774169921875, |
|
"logps/rejected": -284.87847900390625, |
|
"loss": 0.0349, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.010948913171887398, |
|
"rewards/margins": 0.058446235954761505, |
|
"rewards/rejected": -0.06939514726400375, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.06814691345098e-06, |
|
"logits/chosen": 0.9800373911857605, |
|
"logits/rejected": 1.1013528108596802, |
|
"logps/chosen": -303.1207275390625, |
|
"logps/rejected": -257.15679931640625, |
|
"loss": 0.0439, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.03485560044646263, |
|
"rewards/margins": 0.04474693164229393, |
|
"rewards/rejected": -0.07960253953933716, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.050290048844171e-06, |
|
"logits/chosen": 1.0052525997161865, |
|
"logits/rejected": 1.061704158782959, |
|
"logps/chosen": -256.0435791015625, |
|
"logps/rejected": -225.7267303466797, |
|
"loss": 0.0437, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.014286425895988941, |
|
"rewards/margins": 0.03906578570604324, |
|
"rewards/rejected": -0.05335221439599991, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.032303735464422e-06, |
|
"logits/chosen": 0.9533084034919739, |
|
"logits/rejected": 1.0529754161834717, |
|
"logps/chosen": -289.2705078125, |
|
"logps/rejected": -257.12225341796875, |
|
"loss": 0.0421, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.001376188127323985, |
|
"rewards/margins": 0.05364586040377617, |
|
"rewards/rejected": -0.05502205342054367, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.014189475163727e-06, |
|
"logits/chosen": 0.9958294630050659, |
|
"logits/rejected": 1.0700651407241821, |
|
"logps/chosen": -266.70538330078125, |
|
"logps/rejected": -240.39126586914062, |
|
"loss": 0.0486, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.004494071938097477, |
|
"rewards/margins": 0.0475679449737072, |
|
"rewards/rejected": -0.0520620159804821, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.995948780477605e-06, |
|
"logits/chosen": 1.0267126560211182, |
|
"logits/rejected": 1.0607044696807861, |
|
"logps/chosen": -287.0250549316406, |
|
"logps/rejected": -246.72103881835938, |
|
"loss": 0.0432, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.006947031710296869, |
|
"rewards/margins": 0.038351211696863174, |
|
"rewards/rejected": -0.04529824107885361, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_logits/chosen": 0.9482428431510925, |
|
"eval_logits/rejected": 1.0463390350341797, |
|
"eval_logps/chosen": -279.0111999511719, |
|
"eval_logps/rejected": -250.108154296875, |
|
"eval_loss": 0.036706726998090744, |
|
"eval_rewards/accuracies": 0.6259999871253967, |
|
"eval_rewards/chosen": -0.014643603935837746, |
|
"eval_rewards/margins": 0.0482785664498806, |
|
"eval_rewards/rejected": -0.0629221647977829, |
|
"eval_runtime": 539.1963, |
|
"eval_samples_per_second": 3.709, |
|
"eval_steps_per_second": 0.927, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.977583174498816e-06, |
|
"logits/chosen": 0.9617465138435364, |
|
"logits/rejected": 1.0087401866912842, |
|
"logps/chosen": -274.5213317871094, |
|
"logps/rejected": -242.7264404296875, |
|
"loss": 0.0398, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.014896227046847343, |
|
"rewards/margins": 0.038757093250751495, |
|
"rewards/rejected": -0.05365331843495369, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.959094190750172e-06, |
|
"logits/chosen": 0.9357368350028992, |
|
"logits/rejected": 1.0426499843597412, |
|
"logps/chosen": -288.6091003417969, |
|
"logps/rejected": -259.63323974609375, |
|
"loss": 0.0462, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.030341049656271935, |
|
"rewards/margins": 0.037225984036922455, |
|
"rewards/rejected": -0.06756703555583954, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.9404833730564975e-06, |
|
"logits/chosen": 1.026078224182129, |
|
"logits/rejected": 1.0588185787200928, |
|
"logps/chosen": -250.0701141357422, |
|
"logps/rejected": -250.2508087158203, |
|
"loss": 0.0495, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.03756723925471306, |
|
"rewards/margins": 0.042705655097961426, |
|
"rewards/rejected": -0.08027289807796478, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.921752275415712e-06, |
|
"logits/chosen": 1.0129649639129639, |
|
"logits/rejected": 1.0660035610198975, |
|
"logps/chosen": -242.80191040039062, |
|
"logps/rejected": -215.3003387451172, |
|
"loss": 0.0364, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.041556812822818756, |
|
"rewards/margins": 0.0497397780418396, |
|
"rewards/rejected": -0.09129659831523895, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.902902461869079e-06, |
|
"logits/chosen": 0.9659714698791504, |
|
"logits/rejected": 1.106687307357788, |
|
"logps/chosen": -273.6470642089844, |
|
"logps/rejected": -246.69326782226562, |
|
"loss": 0.0399, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.050172436982393265, |
|
"rewards/margins": 0.060846518725156784, |
|
"rewards/rejected": -0.11101895570755005, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.883935506370605e-06, |
|
"logits/chosen": 0.941813588142395, |
|
"logits/rejected": 1.0076746940612793, |
|
"logps/chosen": -282.126708984375, |
|
"logps/rejected": -244.9275665283203, |
|
"loss": 0.0352, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.05347307771444321, |
|
"rewards/margins": 0.053630221635103226, |
|
"rewards/rejected": -0.10710330307483673, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.864852992655617e-06, |
|
"logits/chosen": 0.9045804142951965, |
|
"logits/rejected": 1.0152260065078735, |
|
"logps/chosen": -270.4555969238281, |
|
"logps/rejected": -260.31390380859375, |
|
"loss": 0.0396, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.045248690992593765, |
|
"rewards/margins": 0.0546412356197834, |
|
"rewards/rejected": -0.09988992661237717, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.845656514108516e-06, |
|
"logits/chosen": 0.9225580096244812, |
|
"logits/rejected": 0.9809015989303589, |
|
"logps/chosen": -297.4796142578125, |
|
"logps/rejected": -250.21530151367188, |
|
"loss": 0.0295, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.041134659200906754, |
|
"rewards/margins": 0.06729653477668762, |
|
"rewards/rejected": -0.10843118280172348, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.826347673629738e-06, |
|
"logits/chosen": 0.9812415838241577, |
|
"logits/rejected": 1.0070959329605103, |
|
"logps/chosen": -247.45516967773438, |
|
"logps/rejected": -231.81179809570312, |
|
"loss": 0.0372, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.04904834181070328, |
|
"rewards/margins": 0.05793965980410576, |
|
"rewards/rejected": -0.10698799788951874, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.8069280835019062e-06, |
|
"logits/chosen": 0.9496526718139648, |
|
"logits/rejected": 1.0348269939422607, |
|
"logps/chosen": -325.68267822265625, |
|
"logps/rejected": -257.0068054199219, |
|
"loss": 0.0304, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.035646189004182816, |
|
"rewards/margins": 0.04558128863573074, |
|
"rewards/rejected": -0.08122747391462326, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_logits/chosen": 0.949572741985321, |
|
"eval_logits/rejected": 1.0471240282058716, |
|
"eval_logps/chosen": -282.7773132324219, |
|
"eval_logps/rejected": -254.43394470214844, |
|
"eval_loss": 0.03586630895733833, |
|
"eval_rewards/accuracies": 0.6359999775886536, |
|
"eval_rewards/chosen": -0.052304789423942566, |
|
"eval_rewards/margins": 0.05387549474835396, |
|
"eval_rewards/rejected": -0.10618028789758682, |
|
"eval_runtime": 539.1493, |
|
"eval_samples_per_second": 3.71, |
|
"eval_steps_per_second": 0.927, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.7873993652552077e-06, |
|
"logits/chosen": 0.9462829828262329, |
|
"logits/rejected": 1.0417277812957764, |
|
"logps/chosen": -263.7369689941406, |
|
"logps/rejected": -241.88095092773438, |
|
"loss": 0.036, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.059374719858169556, |
|
"rewards/margins": 0.021540379151701927, |
|
"rewards/rejected": -0.08091510832309723, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.7677631495319953e-06, |
|
"logits/chosen": 0.9473394155502319, |
|
"logits/rejected": 1.016614556312561, |
|
"logps/chosen": -263.25238037109375, |
|
"logps/rejected": -217.35693359375, |
|
"loss": 0.0371, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.04990892857313156, |
|
"rewards/margins": 0.03492476046085358, |
|
"rewards/rejected": -0.08483369648456573, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.748021075950633e-06, |
|
"logits/chosen": 1.0440593957901, |
|
"logits/rejected": 1.0327621698379517, |
|
"logps/chosen": -307.784423828125, |
|
"logps/rejected": -280.920654296875, |
|
"loss": 0.0398, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.05705567076802254, |
|
"rewards/margins": 0.05376668646931648, |
|
"rewards/rejected": -0.11082235723733902, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.7281747929685824e-06, |
|
"logits/chosen": 0.9668010473251343, |
|
"logits/rejected": 1.1068073511123657, |
|
"logps/chosen": -292.89801025390625, |
|
"logps/rejected": -257.0223388671875, |
|
"loss": 0.0377, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.04842069000005722, |
|
"rewards/margins": 0.043641868978738785, |
|
"rewards/rejected": -0.0920625552535057, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.7082259577447604e-06, |
|
"logits/chosen": 0.9432889223098755, |
|
"logits/rejected": 1.05801522731781, |
|
"logps/chosen": -324.9328308105469, |
|
"logps/rejected": -255.90939331054688, |
|
"loss": 0.0319, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.05441862344741821, |
|
"rewards/margins": 0.053149156272411346, |
|
"rewards/rejected": -0.10756777226924896, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.6881762360011688e-06, |
|
"logits/chosen": 1.0232698917388916, |
|
"logits/rejected": 1.055396318435669, |
|
"logps/chosen": -291.96514892578125, |
|
"logps/rejected": -249.9015350341797, |
|
"loss": 0.0428, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.06017423793673515, |
|
"rewards/margins": 0.05147537589073181, |
|
"rewards/rejected": -0.11164961010217667, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.668027301883802e-06, |
|
"logits/chosen": 0.9909790754318237, |
|
"logits/rejected": 1.0502822399139404, |
|
"logps/chosen": -266.43011474609375, |
|
"logps/rejected": -224.57601928710938, |
|
"loss": 0.033, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.05004848912358284, |
|
"rewards/margins": 0.05305255576968193, |
|
"rewards/rejected": -0.10310103744268417, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.64778083782286e-06, |
|
"logits/chosen": 1.0207841396331787, |
|
"logits/rejected": 1.0920627117156982, |
|
"logps/chosen": -270.4779968261719, |
|
"logps/rejected": -278.3066101074219, |
|
"loss": 0.0354, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.038508545607328415, |
|
"rewards/margins": 0.05329999327659607, |
|
"rewards/rejected": -0.09180854260921478, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.627438534392268e-06, |
|
"logits/chosen": 0.9771720767021179, |
|
"logits/rejected": 1.0112661123275757, |
|
"logps/chosen": -264.5091552734375, |
|
"logps/rejected": -213.95535278320312, |
|
"loss": 0.0401, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.03601064160466194, |
|
"rewards/margins": 0.04099656641483307, |
|
"rewards/rejected": -0.07700721174478531, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.607002090168506e-06, |
|
"logits/chosen": 0.9807453155517578, |
|
"logits/rejected": 1.0391424894332886, |
|
"logps/chosen": -261.9506530761719, |
|
"logps/rejected": -238.798828125, |
|
"loss": 0.0436, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.03824782371520996, |
|
"rewards/margins": 0.041073787957429886, |
|
"rewards/rejected": -0.07932160794734955, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_logits/chosen": 0.9584904909133911, |
|
"eval_logits/rejected": 1.0586249828338623, |
|
"eval_logps/chosen": -280.7698669433594, |
|
"eval_logps/rejected": -252.26162719726562, |
|
"eval_loss": 0.03589407727122307, |
|
"eval_rewards/accuracies": 0.6340000033378601, |
|
"eval_rewards/chosen": -0.03223072364926338, |
|
"eval_rewards/margins": 0.05222645774483681, |
|
"eval_rewards/rejected": -0.08445718139410019, |
|
"eval_runtime": 538.9153, |
|
"eval_samples_per_second": 3.711, |
|
"eval_steps_per_second": 0.928, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.586473211588787e-06, |
|
"logits/chosen": 0.9628156423568726, |
|
"logits/rejected": 1.0911314487457275, |
|
"logps/chosen": -301.49798583984375, |
|
"logps/rejected": -244.82583618164062, |
|
"loss": 0.0432, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.025343740358948708, |
|
"rewards/margins": 0.0616876594722271, |
|
"rewards/rejected": -0.08703140914440155, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.5658536128085623e-06, |
|
"logits/chosen": 0.9632102847099304, |
|
"logits/rejected": 1.0290127992630005, |
|
"logps/chosen": -252.96798706054688, |
|
"logps/rejected": -247.17739868164062, |
|
"loss": 0.0346, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.03504541888833046, |
|
"rewards/margins": 0.056658290326595306, |
|
"rewards/rejected": -0.09170371294021606, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.545145015558399e-06, |
|
"logits/chosen": 0.9802696108818054, |
|
"logits/rejected": 1.0333788394927979, |
|
"logps/chosen": -302.0663146972656, |
|
"logps/rejected": -291.22027587890625, |
|
"loss": 0.0495, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.03271108120679855, |
|
"rewards/margins": 0.047079406678676605, |
|
"rewards/rejected": -0.07979048788547516, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.5243491490002056e-06, |
|
"logits/chosen": 0.9676446914672852, |
|
"logits/rejected": 1.0016578435897827, |
|
"logps/chosen": -277.4888000488281, |
|
"logps/rejected": -244.8700714111328, |
|
"loss": 0.0395, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.036362119019031525, |
|
"rewards/margins": 0.042628705501556396, |
|
"rewards/rejected": -0.07899081707000732, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.503467749582857e-06, |
|
"logits/chosen": 0.955204963684082, |
|
"logits/rejected": 0.9823210835456848, |
|
"logps/chosen": -251.63296508789062, |
|
"logps/rejected": -207.33932495117188, |
|
"loss": 0.0357, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.0370803065598011, |
|
"rewards/margins": 0.05566862225532532, |
|
"rewards/rejected": -0.09274892508983612, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.4825025608971947e-06, |
|
"logits/chosen": 0.9128938913345337, |
|
"logits/rejected": 1.0501888990402222, |
|
"logps/chosen": -319.082275390625, |
|
"logps/rejected": -253.41708374023438, |
|
"loss": 0.0321, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.04241309314966202, |
|
"rewards/margins": 0.05771785229444504, |
|
"rewards/rejected": -0.10013093799352646, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.4614553335304407e-06, |
|
"logits/chosen": 0.9686363935470581, |
|
"logits/rejected": 1.0950806140899658, |
|
"logps/chosen": -281.142333984375, |
|
"logps/rejected": -243.0208740234375, |
|
"loss": 0.0393, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.05112028867006302, |
|
"rewards/margins": 0.06849656254053116, |
|
"rewards/rejected": -0.11961684376001358, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.4403278249200222e-06, |
|
"logits/chosen": 0.9172054529190063, |
|
"logits/rejected": 1.0074876546859741, |
|
"logps/chosen": -262.1912536621094, |
|
"logps/rejected": -244.34036254882812, |
|
"loss": 0.038, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.050746072083711624, |
|
"rewards/margins": 0.04991786926984787, |
|
"rewards/rejected": -0.10066394507884979, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.4191217992068293e-06, |
|
"logits/chosen": 0.9850749969482422, |
|
"logits/rejected": 0.9730724096298218, |
|
"logps/chosen": -262.35345458984375, |
|
"logps/rejected": -260.61895751953125, |
|
"loss": 0.0328, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.06715109199285507, |
|
"rewards/margins": 0.05269361659884453, |
|
"rewards/rejected": -0.1198447123169899, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.3978390270879056e-06, |
|
"logits/chosen": 0.9383825063705444, |
|
"logits/rejected": 1.040351152420044, |
|
"logps/chosen": -299.37066650390625, |
|
"logps/rejected": -256.8703918457031, |
|
"loss": 0.0405, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.047446489334106445, |
|
"rewards/margins": 0.05738651007413864, |
|
"rewards/rejected": -0.10483300685882568, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_logits/chosen": 0.9321679472923279, |
|
"eval_logits/rejected": 1.0312304496765137, |
|
"eval_logps/chosen": -282.8528747558594, |
|
"eval_logps/rejected": -254.86965942382812, |
|
"eval_loss": 0.03552675619721413, |
|
"eval_rewards/accuracies": 0.6334999799728394, |
|
"eval_rewards/chosen": -0.05306074023246765, |
|
"eval_rewards/margins": 0.05747658386826515, |
|
"eval_rewards/rejected": -0.1105373352766037, |
|
"eval_runtime": 539.0945, |
|
"eval_samples_per_second": 3.71, |
|
"eval_steps_per_second": 0.927, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.3764812856685995e-06, |
|
"logits/chosen": 0.9679194688796997, |
|
"logits/rejected": 0.9970897436141968, |
|
"logps/chosen": -294.0511169433594, |
|
"logps/rejected": -265.82025146484375, |
|
"loss": 0.0297, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.04057580232620239, |
|
"rewards/margins": 0.06438260525465012, |
|
"rewards/rejected": -0.1049584150314331, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.3550503583141726e-06, |
|
"logits/chosen": 0.9520149230957031, |
|
"logits/rejected": 0.9898689985275269, |
|
"logps/chosen": -327.1080627441406, |
|
"logps/rejected": -308.2015686035156, |
|
"loss": 0.0303, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.045171838253736496, |
|
"rewards/margins": 0.06369863450527191, |
|
"rewards/rejected": -0.1088704839348793, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.3335480345008907e-06, |
|
"logits/chosen": 0.9297927618026733, |
|
"logits/rejected": 1.0150766372680664, |
|
"logps/chosen": -266.30914306640625, |
|
"logps/rejected": -229.8489532470703, |
|
"loss": 0.0462, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.0414881631731987, |
|
"rewards/margins": 0.0664457231760025, |
|
"rewards/rejected": -0.1079338937997818, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.3119761096666055e-06, |
|
"logits/chosen": 1.0308209657669067, |
|
"logits/rejected": 1.0126601457595825, |
|
"logps/chosen": -317.2650451660156, |
|
"logps/rejected": -265.3246765136719, |
|
"loss": 0.04, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.06478316336870193, |
|
"rewards/margins": 0.04602901265025139, |
|
"rewards/rejected": -0.11081217229366302, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.290336385060832e-06, |
|
"logits/chosen": 0.9886214137077332, |
|
"logits/rejected": 0.9748364686965942, |
|
"logps/chosen": -289.01776123046875, |
|
"logps/rejected": -267.6888427734375, |
|
"loss": 0.0375, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.05019726604223251, |
|
"rewards/margins": 0.06619967520236969, |
|
"rewards/rejected": -0.1163969412446022, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.268630667594348e-06, |
|
"logits/chosen": 0.9688504934310913, |
|
"logits/rejected": 1.0535883903503418, |
|
"logps/chosen": -284.36700439453125, |
|
"logps/rejected": -266.2789306640625, |
|
"loss": 0.033, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.05331147834658623, |
|
"rewards/margins": 0.05179852992296219, |
|
"rewards/rejected": -0.10510998964309692, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.2468607696883147e-06, |
|
"logits/chosen": 0.9664725065231323, |
|
"logits/rejected": 1.0324318408966064, |
|
"logps/chosen": -302.47113037109375, |
|
"logps/rejected": -258.18365478515625, |
|
"loss": 0.0377, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.054223883897066116, |
|
"rewards/margins": 0.06361141800880432, |
|
"rewards/rejected": -0.11783530563116074, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.225028509122944e-06, |
|
"logits/chosen": 0.9445089101791382, |
|
"logits/rejected": 1.0780378580093384, |
|
"logps/chosen": -306.8780212402344, |
|
"logps/rejected": -259.9053955078125, |
|
"loss": 0.0198, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.02363925240933895, |
|
"rewards/margins": 0.06653538346290588, |
|
"rewards/rejected": -0.09017463773488998, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.2031357088857083e-06, |
|
"logits/chosen": 1.0200811624526978, |
|
"logits/rejected": 1.0404508113861084, |
|
"logps/chosen": -275.5518493652344, |
|
"logps/rejected": -261.8308410644531, |
|
"loss": 0.0388, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.04572081193327904, |
|
"rewards/margins": 0.060186631977558136, |
|
"rewards/rejected": -0.10590744018554688, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.181184197019127e-06, |
|
"logits/chosen": 1.0228135585784912, |
|
"logits/rejected": 0.9979068636894226, |
|
"logps/chosen": -282.7207946777344, |
|
"logps/rejected": -236.26962280273438, |
|
"loss": 0.0352, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.03686892241239548, |
|
"rewards/margins": 0.06626715511083603, |
|
"rewards/rejected": -0.1031360775232315, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_logits/chosen": 0.9538518190383911, |
|
"eval_logits/rejected": 1.053285002708435, |
|
"eval_logps/chosen": -281.23944091796875, |
|
"eval_logps/rejected": -253.37205505371094, |
|
"eval_loss": 0.03543499857187271, |
|
"eval_rewards/accuracies": 0.621999979019165, |
|
"eval_rewards/chosen": -0.0369262732565403, |
|
"eval_rewards/margins": 0.05863497406244278, |
|
"eval_rewards/rejected": -0.09556125104427338, |
|
"eval_runtime": 539.174, |
|
"eval_samples_per_second": 3.709, |
|
"eval_steps_per_second": 0.927, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.159175806468126e-06, |
|
"logits/chosen": 0.9597219228744507, |
|
"logits/rejected": 1.0474979877471924, |
|
"logps/chosen": -284.355712890625, |
|
"logps/rejected": -276.75836181640625, |
|
"loss": 0.0293, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.030187245458364487, |
|
"rewards/margins": 0.052719276398420334, |
|
"rewards/rejected": -0.08290652930736542, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.1371123749269804e-06, |
|
"logits/chosen": 1.0226430892944336, |
|
"logits/rejected": 1.1090997457504272, |
|
"logps/chosen": -282.0672607421875, |
|
"logps/rejected": -252.12680053710938, |
|
"loss": 0.0356, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.027882922440767288, |
|
"rewards/margins": 0.07020456343889236, |
|
"rewards/rejected": -0.09808747470378876, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.114995744685877e-06, |
|
"logits/chosen": 1.0181407928466797, |
|
"logits/rejected": 0.9908281564712524, |
|
"logps/chosen": -258.36480712890625, |
|
"logps/rejected": -240.61569213867188, |
|
"loss": 0.0308, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.017861558124423027, |
|
"rewards/margins": 0.07849525660276413, |
|
"rewards/rejected": -0.0963568240404129, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.0928277624770743e-06, |
|
"logits/chosen": 0.9835958480834961, |
|
"logits/rejected": 1.0549378395080566, |
|
"logps/chosen": -285.7489013671875, |
|
"logps/rejected": -232.4912109375, |
|
"loss": 0.0297, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.011253233067691326, |
|
"rewards/margins": 0.08058271557092667, |
|
"rewards/rejected": -0.09183595329523087, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.070610279320708e-06, |
|
"logits/chosen": 0.9505823850631714, |
|
"logits/rejected": 1.0996264219284058, |
|
"logps/chosen": -286.63714599609375, |
|
"logps/rejected": -233.0901336669922, |
|
"loss": 0.0359, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.030136246234178543, |
|
"rewards/margins": 0.05022455379366875, |
|
"rewards/rejected": -0.0803607925772667, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.0483451503702264e-06, |
|
"logits/chosen": 1.0030499696731567, |
|
"logits/rejected": 1.0658283233642578, |
|
"logps/chosen": -287.8111877441406, |
|
"logps/rejected": -264.3537292480469, |
|
"loss": 0.043, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.020662177354097366, |
|
"rewards/margins": 0.05197754502296448, |
|
"rewards/rejected": -0.07263971865177155, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.0260342347574916e-06, |
|
"logits/chosen": 1.0229995250701904, |
|
"logits/rejected": 1.0849655866622925, |
|
"logps/chosen": -299.0933837890625, |
|
"logps/rejected": -248.93527221679688, |
|
"loss": 0.0295, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.036781955510377884, |
|
"rewards/margins": 0.0717974454164505, |
|
"rewards/rejected": -0.10857941210269928, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.0036793954375358e-06, |
|
"logits/chosen": 0.9629353284835815, |
|
"logits/rejected": 1.0485321283340454, |
|
"logps/chosen": -279.51690673828125, |
|
"logps/rejected": -242.5545196533203, |
|
"loss": 0.0355, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.01313974242657423, |
|
"rewards/margins": 0.07288579642772675, |
|
"rewards/rejected": -0.08602554351091385, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.981282499033009e-06, |
|
"logits/chosen": 0.9632253646850586, |
|
"logits/rejected": 1.0230735540390015, |
|
"logps/chosen": -293.87286376953125, |
|
"logps/rejected": -243.94613647460938, |
|
"loss": 0.0359, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.022772405296564102, |
|
"rewards/margins": 0.05214250087738037, |
|
"rewards/rejected": -0.07491490244865417, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.9588454156783163e-06, |
|
"logits/chosen": 1.0420585870742798, |
|
"logits/rejected": 1.01620614528656, |
|
"logps/chosen": -277.5963439941406, |
|
"logps/rejected": -251.56881713867188, |
|
"loss": 0.0392, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.020175188779830933, |
|
"rewards/margins": 0.05704687908291817, |
|
"rewards/rejected": -0.0772220641374588, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_logits/chosen": 0.9508064389228821, |
|
"eval_logits/rejected": 1.0498266220092773, |
|
"eval_logps/chosen": -280.359375, |
|
"eval_logps/rejected": -252.41934204101562, |
|
"eval_loss": 0.03548915684223175, |
|
"eval_rewards/accuracies": 0.6209999918937683, |
|
"eval_rewards/chosen": -0.028125399723649025, |
|
"eval_rewards/margins": 0.05790869519114494, |
|
"eval_rewards/rejected": -0.08603409677743912, |
|
"eval_runtime": 539.0909, |
|
"eval_samples_per_second": 3.71, |
|
"eval_steps_per_second": 0.927, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.9363700188634597e-06, |
|
"logits/chosen": 0.9709011912345886, |
|
"logits/rejected": 1.0315817594528198, |
|
"logps/chosen": -271.8665771484375, |
|
"logps/rejected": -249.4420928955078, |
|
"loss": 0.031, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.03171471506357193, |
|
"rewards/margins": 0.04039089381694794, |
|
"rewards/rejected": -0.07210560888051987, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.9138581852776053e-06, |
|
"logits/chosen": 0.9720270037651062, |
|
"logits/rejected": 1.086348056793213, |
|
"logps/chosen": -276.81109619140625, |
|
"logps/rejected": -262.7275085449219, |
|
"loss": 0.0384, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.02635134384036064, |
|
"rewards/margins": 0.07241298258304596, |
|
"rewards/rejected": -0.0987643226981163, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.8913117946523805e-06, |
|
"logits/chosen": 1.0553147792816162, |
|
"logits/rejected": 1.024316430091858, |
|
"logps/chosen": -261.1982116699219, |
|
"logps/rejected": -221.4432373046875, |
|
"loss": 0.0406, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.041058607399463654, |
|
"rewards/margins": 0.05773719400167465, |
|
"rewards/rejected": -0.0987958088517189, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.8687327296049126e-06, |
|
"logits/chosen": 0.9523100852966309, |
|
"logits/rejected": 1.032663345336914, |
|
"logps/chosen": -280.92706298828125, |
|
"logps/rejected": -259.6051940917969, |
|
"loss": 0.0323, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.02694753371179104, |
|
"rewards/margins": 0.04815928265452385, |
|
"rewards/rejected": -0.07510681450366974, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.8461228754806376e-06, |
|
"logits/chosen": 0.9898034930229187, |
|
"logits/rejected": 1.073132038116455, |
|
"logps/chosen": -250.3188018798828, |
|
"logps/rejected": -242.313232421875, |
|
"loss": 0.0325, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.03240308165550232, |
|
"rewards/margins": 0.05959530547261238, |
|
"rewards/rejected": -0.0919983834028244, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.823484120195865e-06, |
|
"logits/chosen": 1.0166945457458496, |
|
"logits/rejected": 0.9883760213851929, |
|
"logps/chosen": -209.08822631835938, |
|
"logps/rejected": -223.0111846923828, |
|
"loss": 0.0343, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.029585499316453934, |
|
"rewards/margins": 0.055918287485837936, |
|
"rewards/rejected": -0.08550377935171127, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.8008183540801486e-06, |
|
"logits/chosen": 0.9123330116271973, |
|
"logits/rejected": 1.0141832828521729, |
|
"logps/chosen": -288.05084228515625, |
|
"logps/rejected": -267.970458984375, |
|
"loss": 0.0307, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.030767519026994705, |
|
"rewards/margins": 0.04243772476911545, |
|
"rewards/rejected": -0.07320524752140045, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.7781274697184353e-06, |
|
"logits/chosen": 0.9999529123306274, |
|
"logits/rejected": 1.0701429843902588, |
|
"logps/chosen": -274.42816162109375, |
|
"logps/rejected": -255.3940887451172, |
|
"loss": 0.0341, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.03205768018960953, |
|
"rewards/margins": 0.0669126957654953, |
|
"rewards/rejected": -0.09897039085626602, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.7554133617930397e-06, |
|
"logits/chosen": 1.0071890354156494, |
|
"logits/rejected": 1.0170022249221802, |
|
"logps/chosen": -272.82794189453125, |
|
"logps/rejected": -268.2226867675781, |
|
"loss": 0.035, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.03548605367541313, |
|
"rewards/margins": 0.03869449347257614, |
|
"rewards/rejected": -0.07418055832386017, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.7326779269254363e-06, |
|
"logits/chosen": 0.945580780506134, |
|
"logits/rejected": 1.0344423055648804, |
|
"logps/chosen": -254.6405792236328, |
|
"logps/rejected": -229.8059844970703, |
|
"loss": 0.0368, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.025666531175374985, |
|
"rewards/margins": 0.0334133505821228, |
|
"rewards/rejected": -0.059079885482788086, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_logits/chosen": 0.9577403664588928, |
|
"eval_logits/rejected": 1.0563304424285889, |
|
"eval_logps/chosen": -279.8615417480469, |
|
"eval_logps/rejected": -251.5159149169922, |
|
"eval_loss": 0.0354422889649868, |
|
"eval_rewards/accuracies": 0.6299999952316284, |
|
"eval_rewards/chosen": -0.023147189989686012, |
|
"eval_rewards/margins": 0.053852878510951996, |
|
"eval_rewards/rejected": -0.07700006663799286, |
|
"eval_runtime": 539.213, |
|
"eval_samples_per_second": 3.709, |
|
"eval_steps_per_second": 0.927, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.7099230635178954e-06, |
|
"logits/chosen": 0.9786656498908997, |
|
"logits/rejected": 1.0951110124588013, |
|
"logps/chosen": -250.3948974609375, |
|
"logps/rejected": -213.85202026367188, |
|
"loss": 0.0432, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.015626171603798866, |
|
"rewards/margins": 0.0794781818985939, |
|
"rewards/rejected": -0.09510435163974762, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.6871506715949608e-06, |
|
"logits/chosen": 1.0892430543899536, |
|
"logits/rejected": 1.11086106300354, |
|
"logps/chosen": -277.72430419921875, |
|
"logps/rejected": -263.95916748046875, |
|
"loss": 0.0435, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.028381651267409325, |
|
"rewards/margins": 0.07892771810293198, |
|
"rewards/rejected": -0.10730937868356705, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.6643626526448063e-06, |
|
"logits/chosen": 1.0042452812194824, |
|
"logits/rejected": 1.097080945968628, |
|
"logps/chosen": -245.4960479736328, |
|
"logps/rejected": -252.0270233154297, |
|
"loss": 0.0336, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.027020296081900597, |
|
"rewards/margins": 0.0670652836561203, |
|
"rewards/rejected": -0.09408558160066605, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.6415609094604562e-06, |
|
"logits/chosen": 0.9564634561538696, |
|
"logits/rejected": 1.0937979221343994, |
|
"logps/chosen": -304.175537109375, |
|
"logps/rejected": -259.8273010253906, |
|
"loss": 0.0401, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.035849399864673615, |
|
"rewards/margins": 0.067509725689888, |
|
"rewards/rejected": -0.10335911810398102, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.618747345980904e-06, |
|
"logits/chosen": 0.9391676783561707, |
|
"logits/rejected": 1.0340735912322998, |
|
"logps/chosen": -289.8169860839844, |
|
"logps/rejected": -250.7114715576172, |
|
"loss": 0.0301, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.02765297330915928, |
|
"rewards/margins": 0.06791722774505615, |
|
"rewards/rejected": -0.09557019919157028, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.595923867132136e-06, |
|
"logits/chosen": 1.0433982610702515, |
|
"logits/rejected": 1.1006929874420166, |
|
"logps/chosen": -227.78182983398438, |
|
"logps/rejected": -231.31103515625, |
|
"loss": 0.0364, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.024369673803448677, |
|
"rewards/margins": 0.061494432389736176, |
|
"rewards/rejected": -0.0858640968799591, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.5730923786680672e-06, |
|
"logits/chosen": 1.0085296630859375, |
|
"logits/rejected": 1.0929278135299683, |
|
"logps/chosen": -275.10479736328125, |
|
"logps/rejected": -230.3509521484375, |
|
"loss": 0.0339, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.031941868364810944, |
|
"rewards/margins": 0.05682260915637016, |
|
"rewards/rejected": -0.08876447379589081, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.5502547870114137e-06, |
|
"logits/chosen": 1.0008578300476074, |
|
"logits/rejected": 1.0413917303085327, |
|
"logps/chosen": -231.7599639892578, |
|
"logps/rejected": -252.7259063720703, |
|
"loss": 0.0245, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.024501001462340355, |
|
"rewards/margins": 0.04970362037420273, |
|
"rewards/rejected": -0.07420462369918823, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.527412999094507e-06, |
|
"logits/chosen": 1.006413459777832, |
|
"logits/rejected": 1.048346996307373, |
|
"logps/chosen": -283.4499816894531, |
|
"logps/rejected": -256.1369323730469, |
|
"loss": 0.0367, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.03352126479148865, |
|
"rewards/margins": 0.05489424616098404, |
|
"rewards/rejected": -0.08841550350189209, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.504568922200064e-06, |
|
"logits/chosen": 0.9541667699813843, |
|
"logits/rejected": 1.0082509517669678, |
|
"logps/chosen": -265.67510986328125, |
|
"logps/rejected": -248.1321258544922, |
|
"loss": 0.0326, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.037846438586711884, |
|
"rewards/margins": 0.04263025149703026, |
|
"rewards/rejected": -0.08047669380903244, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_logits/chosen": 0.9759756326675415, |
|
"eval_logits/rejected": 1.075065016746521, |
|
"eval_logps/chosen": -281.1431884765625, |
|
"eval_logps/rejected": -252.96302795410156, |
|
"eval_loss": 0.03518374264240265, |
|
"eval_rewards/accuracies": 0.6299999952316284, |
|
"eval_rewards/chosen": -0.03596383333206177, |
|
"eval_rewards/margins": 0.055507466197013855, |
|
"eval_rewards/rejected": -0.09147130697965622, |
|
"eval_runtime": 539.005, |
|
"eval_samples_per_second": 3.711, |
|
"eval_steps_per_second": 0.928, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.4817244638019333e-06, |
|
"logits/chosen": 0.9972022175788879, |
|
"logits/rejected": 1.1107069253921509, |
|
"logps/chosen": -259.2168884277344, |
|
"logps/rejected": -252.2943878173828, |
|
"loss": 0.0356, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.034331098198890686, |
|
"rewards/margins": 0.06633375585079193, |
|
"rewards/rejected": -0.10066483914852142, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.4588815314058155e-06, |
|
"logits/chosen": 0.979550838470459, |
|
"logits/rejected": 1.015853762626648, |
|
"logps/chosen": -308.5239562988281, |
|
"logps/rejected": -290.33526611328125, |
|
"loss": 0.0363, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.033022016286849976, |
|
"rewards/margins": 0.03916890174150467, |
|
"rewards/rejected": -0.07219092547893524, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.4360420323899922e-06, |
|
"logits/chosen": 0.9291566610336304, |
|
"logits/rejected": 1.0274138450622559, |
|
"logps/chosen": -300.00830078125, |
|
"logps/rejected": -246.3135528564453, |
|
"loss": 0.0364, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.0385669507086277, |
|
"rewards/margins": 0.0751514807343483, |
|
"rewards/rejected": -0.1137184128165245, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.4132078738460585e-06, |
|
"logits/chosen": 1.0100805759429932, |
|
"logits/rejected": 1.0035889148712158, |
|
"logps/chosen": -273.31329345703125, |
|
"logps/rejected": -260.4046630859375, |
|
"loss": 0.0345, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.042252346873283386, |
|
"rewards/margins": 0.04604203626513481, |
|
"rewards/rejected": -0.0882943868637085, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.3903809624196826e-06, |
|
"logits/chosen": 1.0183110237121582, |
|
"logits/rejected": 1.1255147457122803, |
|
"logps/chosen": -260.5460205078125, |
|
"logps/rejected": -231.55355834960938, |
|
"loss": 0.0355, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.04934290796518326, |
|
"rewards/margins": 0.057183485478162766, |
|
"rewards/rejected": -0.10652639716863632, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.3675632041513978e-06, |
|
"logits/chosen": 1.0892599821090698, |
|
"logits/rejected": 1.1318366527557373, |
|
"logps/chosen": -243.21102905273438, |
|
"logps/rejected": -225.78549194335938, |
|
"loss": 0.0351, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.05033104866743088, |
|
"rewards/margins": 0.03708335757255554, |
|
"rewards/rejected": -0.08741440623998642, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.3447565043174533e-06, |
|
"logits/chosen": 1.0009520053863525, |
|
"logits/rejected": 0.9941670298576355, |
|
"logps/chosen": -273.72149658203125, |
|
"logps/rejected": -250.0045166015625, |
|
"loss": 0.0432, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.04186940938234329, |
|
"rewards/margins": 0.05003537982702255, |
|
"rewards/rejected": -0.09190478920936584, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.321962767270724e-06, |
|
"logits/chosen": 1.0270161628723145, |
|
"logits/rejected": 1.0816559791564941, |
|
"logps/chosen": -258.2284240722656, |
|
"logps/rejected": -254.64871215820312, |
|
"loss": 0.0411, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.04245874285697937, |
|
"rewards/margins": 0.06636542826890945, |
|
"rewards/rejected": -0.10882417112588882, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.299183896281692e-06, |
|
"logits/chosen": 0.9619997143745422, |
|
"logits/rejected": 1.1526567935943604, |
|
"logps/chosen": -272.2848815917969, |
|
"logps/rejected": -232.285888671875, |
|
"loss": 0.036, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.03838383033871651, |
|
"rewards/margins": 0.07473193854093552, |
|
"rewards/rejected": -0.11311577260494232, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.2764217933795297e-06, |
|
"logits/chosen": 1.0228240489959717, |
|
"logits/rejected": 0.9570671319961548, |
|
"logps/chosen": -289.8697204589844, |
|
"logps/rejected": -259.46624755859375, |
|
"loss": 0.0368, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.04657725244760513, |
|
"rewards/margins": 0.04652193933725357, |
|
"rewards/rejected": -0.0930991917848587, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_logits/chosen": 0.9639849066734314, |
|
"eval_logits/rejected": 1.064185619354248, |
|
"eval_logps/chosen": -281.45947265625, |
|
"eval_logps/rejected": -253.4691162109375, |
|
"eval_loss": 0.0351751483976841, |
|
"eval_rewards/accuracies": 0.6345000267028809, |
|
"eval_rewards/chosen": -0.03912654146552086, |
|
"eval_rewards/margins": 0.057405244559049606, |
|
"eval_rewards/rejected": -0.09653179347515106, |
|
"eval_runtime": 539.0332, |
|
"eval_samples_per_second": 3.71, |
|
"eval_steps_per_second": 0.928, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.2536783591932786e-06, |
|
"logits/chosen": 1.1154290437698364, |
|
"logits/rejected": 1.016729712486267, |
|
"logps/chosen": -273.88702392578125, |
|
"logps/rejected": -253.8929901123047, |
|
"loss": 0.0307, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.034655872732400894, |
|
"rewards/margins": 0.0648706778883934, |
|
"rewards/rejected": -0.0995265543460846, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.230955492793149e-06, |
|
"logits/chosen": 1.015891194343567, |
|
"logits/rejected": 1.1429827213287354, |
|
"logps/chosen": -274.21929931640625, |
|
"logps/rejected": -235.86441040039062, |
|
"loss": 0.0409, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.025577425956726074, |
|
"rewards/margins": 0.05668734759092331, |
|
"rewards/rejected": -0.08226476609706879, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.208255091531947e-06, |
|
"logits/chosen": 1.0092315673828125, |
|
"logits/rejected": 1.031456708908081, |
|
"logps/chosen": -266.92181396484375, |
|
"logps/rejected": -221.04495239257812, |
|
"loss": 0.0424, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.02549988031387329, |
|
"rewards/margins": 0.06533181667327881, |
|
"rewards/rejected": -0.0908316969871521, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.1855790508866435e-06, |
|
"logits/chosen": 0.9829255938529968, |
|
"logits/rejected": 1.0525522232055664, |
|
"logps/chosen": -292.2393798828125, |
|
"logps/rejected": -274.6322326660156, |
|
"loss": 0.0217, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.024583814665675163, |
|
"rewards/margins": 0.04256455600261688, |
|
"rewards/rejected": -0.0671483725309372, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.162929264300107e-06, |
|
"logits/chosen": 1.036615014076233, |
|
"logits/rejected": 1.1294059753417969, |
|
"logps/chosen": -254.82858276367188, |
|
"logps/rejected": -225.80612182617188, |
|
"loss": 0.0454, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.03245388716459274, |
|
"rewards/margins": 0.05736144259572029, |
|
"rewards/rejected": -0.08981534093618393, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.1403076230230006e-06, |
|
"logits/chosen": 1.020202875137329, |
|
"logits/rejected": 1.0851867198944092, |
|
"logps/chosen": -270.38360595703125, |
|
"logps/rejected": -208.3204345703125, |
|
"loss": 0.0419, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.040919043123722076, |
|
"rewards/margins": 0.04137270897626877, |
|
"rewards/rejected": -0.08229174464941025, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.11771601595586e-06, |
|
"logits/chosen": 1.0087039470672607, |
|
"logits/rejected": 1.0265519618988037, |
|
"logps/chosen": -259.2693786621094, |
|
"logps/rejected": -261.12091064453125, |
|
"loss": 0.0282, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.017938809469342232, |
|
"rewards/margins": 0.04762765020132065, |
|
"rewards/rejected": -0.06556645780801773, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.0951563294913737e-06, |
|
"logits/chosen": 0.9380186796188354, |
|
"logits/rejected": 1.0882261991500854, |
|
"logps/chosen": -263.954345703125, |
|
"logps/rejected": -221.87899780273438, |
|
"loss": 0.0331, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.033337127417325974, |
|
"rewards/margins": 0.04793107137084007, |
|
"rewards/rejected": -0.08126820623874664, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.0726304473568693e-06, |
|
"logits/chosen": 1.0044240951538086, |
|
"logits/rejected": 1.0548676252365112, |
|
"logps/chosen": -250.0818634033203, |
|
"logps/rejected": -269.6571960449219, |
|
"loss": 0.0306, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.025777125731110573, |
|
"rewards/margins": 0.05805457755923271, |
|
"rewards/rejected": -0.08383170515298843, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.050140250457023e-06, |
|
"logits/chosen": 0.9972747564315796, |
|
"logits/rejected": 0.9751909375190735, |
|
"logps/chosen": -250.11572265625, |
|
"logps/rejected": -239.9993438720703, |
|
"loss": 0.0315, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.019835414364933968, |
|
"rewards/margins": 0.05462411791086197, |
|
"rewards/rejected": -0.07445952296257019, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_logits/chosen": 0.9676101803779602, |
|
"eval_logits/rejected": 1.0684521198272705, |
|
"eval_logps/chosen": -280.0627746582031, |
|
"eval_logps/rejected": -251.82423400878906, |
|
"eval_loss": 0.03506240248680115, |
|
"eval_rewards/accuracies": 0.6330000162124634, |
|
"eval_rewards/chosen": -0.025159668177366257, |
|
"eval_rewards/margins": 0.05492350831627846, |
|
"eval_rewards/rejected": -0.08008317649364471, |
|
"eval_runtime": 538.9096, |
|
"eval_samples_per_second": 3.711, |
|
"eval_steps_per_second": 0.928, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.0276876167168042e-06, |
|
"logits/chosen": 0.9994446039199829, |
|
"logits/rejected": 1.1471552848815918, |
|
"logps/chosen": -285.36883544921875, |
|
"logps/rejected": -264.88861083984375, |
|
"loss": 0.0435, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.021740112453699112, |
|
"rewards/margins": 0.062014125287532806, |
|
"rewards/rejected": -0.08375424146652222, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.0052744209246682e-06, |
|
"logits/chosen": 0.9745758771896362, |
|
"logits/rejected": 1.0511457920074463, |
|
"logps/chosen": -258.8453674316406, |
|
"logps/rejected": -252.9406280517578, |
|
"loss": 0.0345, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.029231492429971695, |
|
"rewards/margins": 0.04611852020025253, |
|
"rewards/rejected": -0.07535000890493393, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9829025345760127e-06, |
|
"logits/chosen": 0.9900597333908081, |
|
"logits/rejected": 1.0469316244125366, |
|
"logps/chosen": -315.5625305175781, |
|
"logps/rejected": -272.3370361328125, |
|
"loss": 0.0276, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.026207396760582924, |
|
"rewards/margins": 0.05628987401723862, |
|
"rewards/rejected": -0.08249727636575699, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9605738257169115e-06, |
|
"logits/chosen": 1.0605112314224243, |
|
"logits/rejected": 1.0293577909469604, |
|
"logps/chosen": -249.8504638671875, |
|
"logps/rejected": -250.99655151367188, |
|
"loss": 0.0377, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.038626205176115036, |
|
"rewards/margins": 0.04553366079926491, |
|
"rewards/rejected": -0.08415986597537994, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.9382901587881275e-06, |
|
"logits/chosen": 0.9544248580932617, |
|
"logits/rejected": 1.0862176418304443, |
|
"logps/chosen": -259.606689453125, |
|
"logps/rejected": -246.25537109375, |
|
"loss": 0.0303, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.03279640153050423, |
|
"rewards/margins": 0.049646954983472824, |
|
"rewards/rejected": -0.08244334906339645, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.916053394469437e-06, |
|
"logits/chosen": 0.9803518056869507, |
|
"logits/rejected": 1.0460015535354614, |
|
"logps/chosen": -227.25918579101562, |
|
"logps/rejected": -231.8365478515625, |
|
"loss": 0.0443, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.025378238409757614, |
|
"rewards/margins": 0.05420111492276192, |
|
"rewards/rejected": -0.07957935333251953, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.8938653895242604e-06, |
|
"logits/chosen": 0.9943161010742188, |
|
"logits/rejected": 1.0234358310699463, |
|
"logps/chosen": -242.02169799804688, |
|
"logps/rejected": -225.488525390625, |
|
"loss": 0.0387, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.0067793638445436954, |
|
"rewards/margins": 0.06738194823265076, |
|
"rewards/rejected": -0.07416132837533951, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.8717279966446267e-06, |
|
"logits/chosen": 0.9629790186882019, |
|
"logits/rejected": 1.0603584051132202, |
|
"logps/chosen": -270.99151611328125, |
|
"logps/rejected": -255.68594360351562, |
|
"loss": 0.0378, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.018644142895936966, |
|
"rewards/margins": 0.057522498071193695, |
|
"rewards/rejected": -0.07616663724184036, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.8496430642964698e-06, |
|
"logits/chosen": 0.9359694719314575, |
|
"logits/rejected": 1.0587961673736572, |
|
"logps/chosen": -254.59310913085938, |
|
"logps/rejected": -238.82177734375, |
|
"loss": 0.0316, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.014369276352226734, |
|
"rewards/margins": 0.056524503976106644, |
|
"rewards/rejected": -0.0708937793970108, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.827612436565286e-06, |
|
"logits/chosen": 0.947625458240509, |
|
"logits/rejected": 1.0177226066589355, |
|
"logps/chosen": -253.84335327148438, |
|
"logps/rejected": -233.13687133789062, |
|
"loss": 0.0341, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.021341439336538315, |
|
"rewards/margins": 0.062166161835193634, |
|
"rewards/rejected": -0.08350759744644165, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_logits/chosen": 0.940484881401062, |
|
"eval_logits/rejected": 1.0420405864715576, |
|
"eval_logps/chosen": -279.9447021484375, |
|
"eval_logps/rejected": -251.8425750732422, |
|
"eval_loss": 0.0352231003344059, |
|
"eval_rewards/accuracies": 0.6320000290870667, |
|
"eval_rewards/chosen": -0.023978877812623978, |
|
"eval_rewards/margins": 0.05628751218318939, |
|
"eval_rewards/rejected": -0.08026638627052307, |
|
"eval_runtime": 539.1415, |
|
"eval_samples_per_second": 3.71, |
|
"eval_steps_per_second": 0.927, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.8056379530021492e-06, |
|
"logits/chosen": 0.9212465286254883, |
|
"logits/rejected": 1.0563522577285767, |
|
"logps/chosen": -255.9601287841797, |
|
"logps/rejected": -224.5750274658203, |
|
"loss": 0.044, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.024405932053923607, |
|
"rewards/margins": 0.059529535472393036, |
|
"rewards/rejected": -0.0839354619383812, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.7837214484701154e-06, |
|
"logits/chosen": 1.0158764123916626, |
|
"logits/rejected": 1.0589698553085327, |
|
"logps/chosen": -272.985107421875, |
|
"logps/rejected": -264.05865478515625, |
|
"loss": 0.0395, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.02469342015683651, |
|
"rewards/margins": 0.07536058127880096, |
|
"rewards/rejected": -0.10005400329828262, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7618647529910043e-06, |
|
"logits/chosen": 0.9243197441101074, |
|
"logits/rejected": 1.1003185510635376, |
|
"logps/chosen": -260.38006591796875, |
|
"logps/rejected": -240.22866821289062, |
|
"loss": 0.0376, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.01998872682452202, |
|
"rewards/margins": 0.06689772009849548, |
|
"rewards/rejected": -0.0868864506483078, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7400696915925996e-06, |
|
"logits/chosen": 0.9673307538032532, |
|
"logits/rejected": 1.0932881832122803, |
|
"logps/chosen": -277.5309753417969, |
|
"logps/rejected": -269.5796813964844, |
|
"loss": 0.0248, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.010230915620923042, |
|
"rewards/margins": 0.076592355966568, |
|
"rewards/rejected": -0.08682326972484589, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.718338084156254e-06, |
|
"logits/chosen": 0.9628580212593079, |
|
"logits/rejected": 1.0081040859222412, |
|
"logps/chosen": -276.6579895019531, |
|
"logps/rejected": -248.1109161376953, |
|
"loss": 0.03, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.01852061226963997, |
|
"rewards/margins": 0.05575736239552498, |
|
"rewards/rejected": -0.07427798211574554, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.6966717452649372e-06, |
|
"logits/chosen": 0.9334288835525513, |
|
"logits/rejected": 1.002824068069458, |
|
"logps/chosen": -279.92205810546875, |
|
"logps/rejected": -271.9393615722656, |
|
"loss": 0.0263, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.017314914613962173, |
|
"rewards/margins": 0.059098273515701294, |
|
"rewards/rejected": -0.07641319185495377, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6750724840517103e-06, |
|
"logits/chosen": 0.9612905383110046, |
|
"logits/rejected": 1.0620540380477905, |
|
"logps/chosen": -294.86712646484375, |
|
"logps/rejected": -253.3771514892578, |
|
"loss": 0.0423, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.017185799777507782, |
|
"rewards/margins": 0.07057368010282516, |
|
"rewards/rejected": -0.08775947988033295, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6535421040486686e-06, |
|
"logits/chosen": 0.9643794894218445, |
|
"logits/rejected": 1.0106008052825928, |
|
"logps/chosen": -256.61090087890625, |
|
"logps/rejected": -232.49862670898438, |
|
"loss": 0.0411, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.018548784777522087, |
|
"rewards/margins": 0.054013751447200775, |
|
"rewards/rejected": -0.07256253063678741, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6320824030363458e-06, |
|
"logits/chosen": 0.8658086657524109, |
|
"logits/rejected": 1.1067800521850586, |
|
"logps/chosen": -268.14715576171875, |
|
"logps/rejected": -246.0467529296875, |
|
"loss": 0.0403, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.025564253330230713, |
|
"rewards/margins": 0.06752271950244904, |
|
"rewards/rejected": -0.09308697283267975, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6106951728936028e-06, |
|
"logits/chosen": 1.0061540603637695, |
|
"logits/rejected": 1.109466314315796, |
|
"logps/chosen": -231.77230834960938, |
|
"logps/rejected": -221.00439453125, |
|
"loss": 0.0488, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.03300454095005989, |
|
"rewards/margins": 0.05412193387746811, |
|
"rewards/rejected": -0.0871264785528183, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_logits/chosen": 0.9377838373184204, |
|
"eval_logits/rejected": 1.039380669593811, |
|
"eval_logps/chosen": -280.7594299316406, |
|
"eval_logps/rejected": -252.99684143066406, |
|
"eval_loss": 0.035037338733673096, |
|
"eval_rewards/accuracies": 0.6340000033378601, |
|
"eval_rewards/chosen": -0.03212602436542511, |
|
"eval_rewards/margins": 0.059683240950107574, |
|
"eval_rewards/rejected": -0.09180926531553268, |
|
"eval_runtime": 539.216, |
|
"eval_samples_per_second": 3.709, |
|
"eval_steps_per_second": 0.927, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.5893821994479996e-06, |
|
"logits/chosen": 1.040175199508667, |
|
"logits/rejected": 1.122536301612854, |
|
"logps/chosen": -279.53924560546875, |
|
"logps/rejected": -262.05816650390625, |
|
"loss": 0.0313, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.026052657514810562, |
|
"rewards/margins": 0.08227355033159256, |
|
"rewards/rejected": -0.10832621157169342, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.5681452623266868e-06, |
|
"logits/chosen": 0.9631746411323547, |
|
"logits/rejected": 1.0485169887542725, |
|
"logps/chosen": -300.1724548339844, |
|
"logps/rejected": -261.44134521484375, |
|
"loss": 0.0336, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.030287887901067734, |
|
"rewards/margins": 0.05683339759707451, |
|
"rewards/rejected": -0.08712128549814224, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.5469861348078014e-06, |
|
"logits/chosen": 0.9725979566574097, |
|
"logits/rejected": 1.0084983110427856, |
|
"logps/chosen": -267.7999572753906, |
|
"logps/rejected": -241.93508911132812, |
|
"loss": 0.0452, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.027236010879278183, |
|
"rewards/margins": 0.05542879179120064, |
|
"rewards/rejected": -0.08266480267047882, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.5259065836724035e-06, |
|
"logits/chosen": 0.9311686754226685, |
|
"logits/rejected": 1.0091297626495361, |
|
"logps/chosen": -268.8539123535156, |
|
"logps/rejected": -257.43206787109375, |
|
"loss": 0.0284, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.0198749378323555, |
|
"rewards/margins": 0.05286857485771179, |
|
"rewards/rejected": -0.07274351269006729, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.5049083690569456e-06, |
|
"logits/chosen": 0.8933135867118835, |
|
"logits/rejected": 1.0143239498138428, |
|
"logps/chosen": -276.6771545410156, |
|
"logps/rejected": -236.60525512695312, |
|
"loss": 0.0394, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.040222011506557465, |
|
"rewards/margins": 0.06946249306201935, |
|
"rewards/rejected": -0.10968450456857681, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.4839932443063057e-06, |
|
"logits/chosen": 1.0430926084518433, |
|
"logits/rejected": 1.0419895648956299, |
|
"logps/chosen": -241.27578735351562, |
|
"logps/rejected": -215.1394500732422, |
|
"loss": 0.0433, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.04254927858710289, |
|
"rewards/margins": 0.054975200444459915, |
|
"rewards/rejected": -0.0975244790315628, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.4631629558273803e-06, |
|
"logits/chosen": 0.9953416585922241, |
|
"logits/rejected": 1.0442326068878174, |
|
"logps/chosen": -291.2084045410156, |
|
"logps/rejected": -256.3158264160156, |
|
"loss": 0.0382, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.02477596327662468, |
|
"rewards/margins": 0.07663208246231079, |
|
"rewards/rejected": -0.10140804201364517, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.4424192429432657e-06, |
|
"logits/chosen": 1.0064821243286133, |
|
"logits/rejected": 1.0545318126678467, |
|
"logps/chosen": -307.71038818359375, |
|
"logps/rejected": -225.4966583251953, |
|
"loss": 0.0361, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.026431897655129433, |
|
"rewards/margins": 0.06981770694255829, |
|
"rewards/rejected": -0.09624960273504257, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.421763837748016e-06, |
|
"logits/chosen": 0.9946783185005188, |
|
"logits/rejected": 1.0760682821273804, |
|
"logps/chosen": -267.80877685546875, |
|
"logps/rejected": -273.41168212890625, |
|
"loss": 0.0416, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.04798274114727974, |
|
"rewards/margins": 0.05912737920880318, |
|
"rewards/rejected": -0.10711012035608292, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.401198464962021e-06, |
|
"logits/chosen": 1.0298030376434326, |
|
"logits/rejected": 0.9537370800971985, |
|
"logps/chosen": -272.90863037109375, |
|
"logps/rejected": -239.8860321044922, |
|
"loss": 0.0279, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.03228624537587166, |
|
"rewards/margins": 0.05655151605606079, |
|
"rewards/rejected": -0.08883775770664215, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_logits/chosen": 0.9350239038467407, |
|
"eval_logits/rejected": 1.0360502004623413, |
|
"eval_logps/chosen": -281.37646484375, |
|
"eval_logps/rejected": -253.77207946777344, |
|
"eval_loss": 0.03485475853085518, |
|
"eval_rewards/accuracies": 0.6315000057220459, |
|
"eval_rewards/chosen": -0.038296524435281754, |
|
"eval_rewards/margins": 0.06126519292593002, |
|
"eval_rewards/rejected": -0.09956171363592148, |
|
"eval_runtime": 539.0411, |
|
"eval_samples_per_second": 3.71, |
|
"eval_steps_per_second": 0.928, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.3807248417879896e-06, |
|
"logits/chosen": 0.9518525004386902, |
|
"logits/rejected": 0.9893406629562378, |
|
"logps/chosen": -258.8128356933594, |
|
"logps/rejected": -219.7599639892578, |
|
"loss": 0.0339, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.02622481808066368, |
|
"rewards/margins": 0.05761373043060303, |
|
"rewards/rejected": -0.083838552236557, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3603446777675665e-06, |
|
"logits/chosen": 0.9631742238998413, |
|
"logits/rejected": 0.9899166822433472, |
|
"logps/chosen": -260.05377197265625, |
|
"logps/rejected": -240.6715087890625, |
|
"loss": 0.0313, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.033961087465286255, |
|
"rewards/margins": 0.05184347182512283, |
|
"rewards/rejected": -0.08580456674098969, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3400596746385817e-06, |
|
"logits/chosen": 0.9519561529159546, |
|
"logits/rejected": 0.9998300671577454, |
|
"logps/chosen": -290.88958740234375, |
|
"logps/rejected": -255.8434600830078, |
|
"loss": 0.0278, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.036170247942209244, |
|
"rewards/margins": 0.07095544040203094, |
|
"rewards/rejected": -0.10712568461894989, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3198715261929587e-06, |
|
"logits/chosen": 0.9674153327941895, |
|
"logits/rejected": 1.0848486423492432, |
|
"logps/chosen": -322.2984924316406, |
|
"logps/rejected": -246.0180206298828, |
|
"loss": 0.0367, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.029658863320946693, |
|
"rewards/margins": 0.04690408706665039, |
|
"rewards/rejected": -0.07656295597553253, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.2997819181352823e-06, |
|
"logits/chosen": 0.979369044303894, |
|
"logits/rejected": 0.9796104431152344, |
|
"logps/chosen": -299.4449462890625, |
|
"logps/rejected": -267.0638122558594, |
|
"loss": 0.0319, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.033346764743328094, |
|
"rewards/margins": 0.05519961193203926, |
|
"rewards/rejected": -0.08854638040065765, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2797925279420454e-06, |
|
"logits/chosen": 1.0160847902297974, |
|
"logits/rejected": 1.0748382806777954, |
|
"logps/chosen": -291.21661376953125, |
|
"logps/rejected": -258.7221374511719, |
|
"loss": 0.0387, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.040911462157964706, |
|
"rewards/margins": 0.046320244669914246, |
|
"rewards/rejected": -0.08723169565200806, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2599050247215764e-06, |
|
"logits/chosen": 0.9987077713012695, |
|
"logits/rejected": 1.0722219944000244, |
|
"logps/chosen": -286.01446533203125, |
|
"logps/rejected": -262.60101318359375, |
|
"loss": 0.0249, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.023940104991197586, |
|
"rewards/margins": 0.07719768583774567, |
|
"rewards/rejected": -0.10113777965307236, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2401210690746705e-06, |
|
"logits/chosen": 0.949263870716095, |
|
"logits/rejected": 1.113747239112854, |
|
"logps/chosen": -264.36834716796875, |
|
"logps/rejected": -241.06570434570312, |
|
"loss": 0.0276, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.029313066974282265, |
|
"rewards/margins": 0.047565605491399765, |
|
"rewards/rejected": -0.07687868177890778, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2204423129559306e-06, |
|
"logits/chosen": 0.8770235180854797, |
|
"logits/rejected": 1.056774377822876, |
|
"logps/chosen": -296.3006896972656, |
|
"logps/rejected": -270.3150939941406, |
|
"loss": 0.0346, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.026878798380494118, |
|
"rewards/margins": 0.04574307054281235, |
|
"rewards/rejected": -0.07262186706066132, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.20087039953583e-06, |
|
"logits/chosen": 0.9897807240486145, |
|
"logits/rejected": 1.0115458965301514, |
|
"logps/chosen": -281.16778564453125, |
|
"logps/rejected": -259.2882080078125, |
|
"loss": 0.0427, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.01994657889008522, |
|
"rewards/margins": 0.06008830666542053, |
|
"rewards/rejected": -0.08003488928079605, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_logits/chosen": 0.9318579435348511, |
|
"eval_logits/rejected": 1.0336464643478394, |
|
"eval_logps/chosen": -280.66436767578125, |
|
"eval_logps/rejected": -252.92898559570312, |
|
"eval_loss": 0.03483254089951515, |
|
"eval_rewards/accuracies": 0.6309999823570251, |
|
"eval_rewards/chosen": -0.031175779178738594, |
|
"eval_rewards/margins": 0.05995478481054306, |
|
"eval_rewards/rejected": -0.0911305621266365, |
|
"eval_runtime": 539.1556, |
|
"eval_samples_per_second": 3.71, |
|
"eval_steps_per_second": 0.927, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.181406963063507e-06, |
|
"logits/chosen": 0.987993061542511, |
|
"logits/rejected": 0.9600605964660645, |
|
"logps/chosen": -277.44580078125, |
|
"logps/rejected": -232.87661743164062, |
|
"loss": 0.0366, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.025253716856241226, |
|
"rewards/margins": 0.05700179934501648, |
|
"rewards/rejected": -0.082255519926548, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.1620536287303052e-06, |
|
"logits/chosen": 0.9831315279006958, |
|
"logits/rejected": 1.0662561655044556, |
|
"logps/chosen": -261.1286315917969, |
|
"logps/rejected": -243.3155059814453, |
|
"loss": 0.0467, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.03405457362532616, |
|
"rewards/margins": 0.06027429178357124, |
|
"rewards/rejected": -0.0943288654088974, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.1428120125340717e-06, |
|
"logits/chosen": 1.0292747020721436, |
|
"logits/rejected": 0.947592556476593, |
|
"logps/chosen": -272.7021484375, |
|
"logps/rejected": -243.03701782226562, |
|
"loss": 0.0401, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.02782285585999489, |
|
"rewards/margins": 0.05716438964009285, |
|
"rewards/rejected": -0.08498723804950714, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.123683721144223e-06, |
|
"logits/chosen": 0.977290153503418, |
|
"logits/rejected": 1.0009124279022217, |
|
"logps/chosen": -272.8582458496094, |
|
"logps/rejected": -241.51687622070312, |
|
"loss": 0.0348, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.03376708924770355, |
|
"rewards/margins": 0.07209788262844086, |
|
"rewards/rejected": -0.105864979326725, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.1046703517675848e-06, |
|
"logits/chosen": 0.973223865032196, |
|
"logits/rejected": 0.9701916575431824, |
|
"logps/chosen": -247.44888305664062, |
|
"logps/rejected": -250.46353149414062, |
|
"loss": 0.0384, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.028101569041609764, |
|
"rewards/margins": 0.05619729310274124, |
|
"rewards/rejected": -0.08429885655641556, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.085773492015028e-06, |
|
"logits/chosen": 0.960196852684021, |
|
"logits/rejected": 1.067030668258667, |
|
"logps/chosen": -288.462158203125, |
|
"logps/rejected": -235.9557647705078, |
|
"loss": 0.0315, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.03159577399492264, |
|
"rewards/margins": 0.07470157742500305, |
|
"rewards/rejected": -0.10629735141992569, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.0669947197689034e-06, |
|
"logits/chosen": 0.9508602023124695, |
|
"logits/rejected": 1.0008846521377563, |
|
"logps/chosen": -229.83627319335938, |
|
"logps/rejected": -243.19570922851562, |
|
"loss": 0.0447, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.04542272537946701, |
|
"rewards/margins": 0.0430048331618309, |
|
"rewards/rejected": -0.08842755109071732, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.048335603051291e-06, |
|
"logits/chosen": 0.9108030200004578, |
|
"logits/rejected": 1.035298228263855, |
|
"logps/chosen": -272.658935546875, |
|
"logps/rejected": -255.37905883789062, |
|
"loss": 0.0379, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.028898200020194054, |
|
"rewards/margins": 0.07144349068403244, |
|
"rewards/rejected": -0.10034169256687164, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0297976998930665e-06, |
|
"logits/chosen": 0.9145883321762085, |
|
"logits/rejected": 0.9523155093193054, |
|
"logps/chosen": -278.5872497558594, |
|
"logps/rejected": -242.91748046875, |
|
"loss": 0.0345, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.023207422345876694, |
|
"rewards/margins": 0.05597452074289322, |
|
"rewards/rejected": -0.07918194681406021, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0113825582038078e-06, |
|
"logits/chosen": 0.9705532193183899, |
|
"logits/rejected": 1.0568289756774902, |
|
"logps/chosen": -271.0597839355469, |
|
"logps/rejected": -225.5278778076172, |
|
"loss": 0.0331, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.03660514950752258, |
|
"rewards/margins": 0.05592336505651474, |
|
"rewards/rejected": -0.09252851456403732, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_logits/chosen": 0.9335169196128845, |
|
"eval_logits/rejected": 1.035439372062683, |
|
"eval_logps/chosen": -280.4610900878906, |
|
"eval_logps/rejected": -252.53688049316406, |
|
"eval_loss": 0.0349029041826725, |
|
"eval_rewards/accuracies": 0.6290000081062317, |
|
"eval_rewards/chosen": -0.0291427094489336, |
|
"eval_rewards/margins": 0.058066822588443756, |
|
"eval_rewards/rejected": -0.08720952272415161, |
|
"eval_runtime": 539.1069, |
|
"eval_samples_per_second": 3.71, |
|
"eval_steps_per_second": 0.927, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.930917156425477e-07, |
|
"logits/chosen": 0.9502407908439636, |
|
"logits/rejected": 1.072632908821106, |
|
"logps/chosen": -246.87118530273438, |
|
"logps/rejected": -225.63467407226562, |
|
"loss": 0.0405, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.018824134021997452, |
|
"rewards/margins": 0.0712006688117981, |
|
"rewards/rejected": -0.09002481400966644, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.749266994893756e-07, |
|
"logits/chosen": 0.9383459091186523, |
|
"logits/rejected": 1.0246083736419678, |
|
"logps/chosen": -242.57315063476562, |
|
"logps/rejected": -255.66812133789062, |
|
"loss": 0.0387, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.029849324375391006, |
|
"rewards/margins": 0.04549198970198631, |
|
"rewards/rejected": -0.07534130662679672, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.56889026517913e-07, |
|
"logits/chosen": 0.9737696647644043, |
|
"logits/rejected": 1.0874649286270142, |
|
"logps/chosen": -272.6422424316406, |
|
"logps/rejected": -241.7522430419922, |
|
"loss": 0.0379, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.027452822774648666, |
|
"rewards/margins": 0.06477371603250504, |
|
"rewards/rejected": -0.0922265350818634, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.389802028686617e-07, |
|
"logits/chosen": 0.9742962121963501, |
|
"logits/rejected": 1.0039392709732056, |
|
"logps/chosen": -219.0965118408203, |
|
"logps/rejected": -192.56277465820312, |
|
"loss": 0.0343, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.029790541157126427, |
|
"rewards/margins": 0.05616752430796623, |
|
"rewards/rejected": -0.08595806360244751, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 9.212017239232427e-07, |
|
"logits/chosen": 0.951312243938446, |
|
"logits/rejected": 1.0733340978622437, |
|
"logps/chosen": -271.9151916503906, |
|
"logps/rejected": -280.4118957519531, |
|
"loss": 0.0357, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.01736624911427498, |
|
"rewards/margins": 0.08112471550703049, |
|
"rewards/rejected": -0.09849096834659576, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 9.03555074179533e-07, |
|
"logits/chosen": 0.9455773234367371, |
|
"logits/rejected": 1.05752432346344, |
|
"logps/chosen": -267.56500244140625, |
|
"logps/rejected": -246.6649932861328, |
|
"loss": 0.0291, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.02218179777264595, |
|
"rewards/margins": 0.07102219760417938, |
|
"rewards/rejected": -0.09320400655269623, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.860417271277067e-07, |
|
"logits/chosen": 0.9300365447998047, |
|
"logits/rejected": 1.022707462310791, |
|
"logps/chosen": -242.83029174804688, |
|
"logps/rejected": -241.719970703125, |
|
"loss": 0.0441, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.027262404561042786, |
|
"rewards/margins": 0.0598478689789772, |
|
"rewards/rejected": -0.08711027354001999, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.686631451272029e-07, |
|
"logits/chosen": 0.9614574313163757, |
|
"logits/rejected": 0.9695903658866882, |
|
"logps/chosen": -269.4878845214844, |
|
"logps/rejected": -222.73403930664062, |
|
"loss": 0.0401, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.035077519714832306, |
|
"rewards/margins": 0.054178714752197266, |
|
"rewards/rejected": -0.08925624191761017, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.514207792846168e-07, |
|
"logits/chosen": 1.0225694179534912, |
|
"logits/rejected": 1.0496468544006348, |
|
"logps/chosen": -252.6670379638672, |
|
"logps/rejected": -257.1852111816406, |
|
"loss": 0.0488, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.02861052379012108, |
|
"rewards/margins": 0.060965172946453094, |
|
"rewards/rejected": -0.08957569301128387, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.343160693325356e-07, |
|
"logits/chosen": 0.9750697016716003, |
|
"logits/rejected": 1.0202362537384033, |
|
"logps/chosen": -232.67269897460938, |
|
"logps/rejected": -227.71505737304688, |
|
"loss": 0.0415, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.03148717060685158, |
|
"rewards/margins": 0.05822090432047844, |
|
"rewards/rejected": -0.08970808237791061, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_logits/chosen": 0.9227569699287415, |
|
"eval_logits/rejected": 1.0247799158096313, |
|
"eval_logps/chosen": -280.527587890625, |
|
"eval_logps/rejected": -252.64686584472656, |
|
"eval_loss": 0.03488382324576378, |
|
"eval_rewards/accuracies": 0.6315000057220459, |
|
"eval_rewards/chosen": -0.02980780228972435, |
|
"eval_rewards/margins": 0.0585014745593071, |
|
"eval_rewards/rejected": -0.08830928802490234, |
|
"eval_runtime": 539.0433, |
|
"eval_samples_per_second": 3.71, |
|
"eval_steps_per_second": 0.928, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.173504435093174e-07, |
|
"logits/chosen": 0.9547770619392395, |
|
"logits/rejected": 0.9979850649833679, |
|
"logps/chosen": -285.94573974609375, |
|
"logps/rejected": -269.0919494628906, |
|
"loss": 0.0381, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.032671697437763214, |
|
"rewards/margins": 0.04868137463927269, |
|
"rewards/rejected": -0.0813530758023262, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.00525318439836e-07, |
|
"logits/chosen": 1.0028339624404907, |
|
"logits/rejected": 1.0430415868759155, |
|
"logps/chosen": -271.57373046875, |
|
"logps/rejected": -232.68222045898438, |
|
"loss": 0.0312, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.03152400627732277, |
|
"rewards/margins": 0.061547745019197464, |
|
"rewards/rejected": -0.09307174384593964, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.838420990171927e-07, |
|
"logits/chosen": 0.9217666387557983, |
|
"logits/rejected": 1.0288439989089966, |
|
"logps/chosen": -313.1500549316406, |
|
"logps/rejected": -267.90985107421875, |
|
"loss": 0.0292, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.022273462265729904, |
|
"rewards/margins": 0.06082174926996231, |
|
"rewards/rejected": -0.08309520781040192, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.673021782854084e-07, |
|
"logits/chosen": 0.9779103994369507, |
|
"logits/rejected": 0.9794729351997375, |
|
"logps/chosen": -250.5184783935547, |
|
"logps/rejected": -264.76287841796875, |
|
"loss": 0.0435, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.028768843039870262, |
|
"rewards/margins": 0.060712385922670364, |
|
"rewards/rejected": -0.08948123455047607, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.509069373231039e-07, |
|
"logits/chosen": 0.9786937832832336, |
|
"logits/rejected": 0.9728788137435913, |
|
"logps/chosen": -294.4608459472656, |
|
"logps/rejected": -271.31146240234375, |
|
"loss": 0.0281, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.019416136667132378, |
|
"rewards/margins": 0.06897474080324173, |
|
"rewards/rejected": -0.08839087188243866, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.346577451281822e-07, |
|
"logits/chosen": 0.9356600046157837, |
|
"logits/rejected": 1.0507375001907349, |
|
"logps/chosen": -251.129638671875, |
|
"logps/rejected": -232.9912567138672, |
|
"loss": 0.0454, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.03892569988965988, |
|
"rewards/margins": 0.061569105833768845, |
|
"rewards/rejected": -0.10049480199813843, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.185559585035138e-07, |
|
"logits/chosen": 0.9160947799682617, |
|
"logits/rejected": 0.9816699028015137, |
|
"logps/chosen": -286.2984924316406, |
|
"logps/rejected": -260.083984375, |
|
"loss": 0.0455, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.01913222298026085, |
|
"rewards/margins": 0.05808521434664726, |
|
"rewards/rejected": -0.07721744477748871, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.026029219436504e-07, |
|
"logits/chosen": 0.9672778844833374, |
|
"logits/rejected": 1.019814372062683, |
|
"logps/chosen": -306.3282470703125, |
|
"logps/rejected": -272.06280517578125, |
|
"loss": 0.0323, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.02404235675930977, |
|
"rewards/margins": 0.06635276973247528, |
|
"rewards/rejected": -0.09039512276649475, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 6.867999675225523e-07, |
|
"logits/chosen": 0.9749993085861206, |
|
"logits/rejected": 0.9944796562194824, |
|
"logps/chosen": -297.462646484375, |
|
"logps/rejected": -249.9281768798828, |
|
"loss": 0.0296, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.019663769751787186, |
|
"rewards/margins": 0.053207218647003174, |
|
"rewards/rejected": -0.07287098467350006, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.711484147823663e-07, |
|
"logits/chosen": 0.9646242260932922, |
|
"logits/rejected": 0.9969871640205383, |
|
"logps/chosen": -270.52301025390625, |
|
"logps/rejected": -241.90933227539062, |
|
"loss": 0.0404, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.02490374445915222, |
|
"rewards/margins": 0.06743566691875458, |
|
"rewards/rejected": -0.0923394113779068, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_logits/chosen": 0.927689790725708, |
|
"eval_logits/rejected": 1.030518889427185, |
|
"eval_logps/chosen": -280.2290954589844, |
|
"eval_logps/rejected": -252.4009246826172, |
|
"eval_loss": 0.034884098917245865, |
|
"eval_rewards/accuracies": 0.6294999718666077, |
|
"eval_rewards/chosen": -0.026823006570339203, |
|
"eval_rewards/margins": 0.059027016162872314, |
|
"eval_rewards/rejected": -0.08585001528263092, |
|
"eval_runtime": 539.1402, |
|
"eval_samples_per_second": 3.71, |
|
"eval_steps_per_second": 0.927, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.556495706232413e-07, |
|
"logits/chosen": 0.9685632586479187, |
|
"logits/rejected": 1.0489590167999268, |
|
"logps/chosen": -274.52239990234375, |
|
"logps/rejected": -239.2083740234375, |
|
"loss": 0.0369, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.01814478076994419, |
|
"rewards/margins": 0.06804581731557846, |
|
"rewards/rejected": -0.0861906185746193, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.403047291942057e-07, |
|
"logits/chosen": 0.9458588361740112, |
|
"logits/rejected": 0.9609957933425903, |
|
"logps/chosen": -278.5867919921875, |
|
"logps/rejected": -264.2691955566406, |
|
"loss": 0.0291, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.01274092961102724, |
|
"rewards/margins": 0.05699128657579422, |
|
"rewards/rejected": -0.06973221898078918, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.251151717851023e-07, |
|
"logits/chosen": 0.9313241243362427, |
|
"logits/rejected": 0.9795175790786743, |
|
"logps/chosen": -291.05364990234375, |
|
"logps/rejected": -275.7203063964844, |
|
"loss": 0.0411, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.02996246889233589, |
|
"rewards/margins": 0.08125524967908859, |
|
"rewards/rejected": -0.11121772229671478, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 6.100821667196041e-07, |
|
"logits/chosen": 0.9516725540161133, |
|
"logits/rejected": 1.033613920211792, |
|
"logps/chosen": -304.0897216796875, |
|
"logps/rejected": -248.3233642578125, |
|
"loss": 0.0313, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.012124654836952686, |
|
"rewards/margins": 0.06473545730113983, |
|
"rewards/rejected": -0.07686010748147964, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.952069692493062e-07, |
|
"logits/chosen": 0.8957148790359497, |
|
"logits/rejected": 1.0140354633331299, |
|
"logps/chosen": -300.5768127441406, |
|
"logps/rejected": -266.0682373046875, |
|
"loss": 0.0293, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.006201650947332382, |
|
"rewards/margins": 0.07337900996208191, |
|
"rewards/rejected": -0.07958065718412399, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.80490821448918e-07, |
|
"logits/chosen": 0.9774069786071777, |
|
"logits/rejected": 1.0049412250518799, |
|
"logps/chosen": -222.11856079101562, |
|
"logps/rejected": -206.6851348876953, |
|
"loss": 0.0289, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.029702406376600266, |
|
"rewards/margins": 0.059780023992061615, |
|
"rewards/rejected": -0.08948242664337158, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.659349521125459e-07, |
|
"logits/chosen": 0.945693850517273, |
|
"logits/rejected": 1.0435597896575928, |
|
"logps/chosen": -238.2971649169922, |
|
"logps/rejected": -243.50106811523438, |
|
"loss": 0.0443, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.02947511151432991, |
|
"rewards/margins": 0.07011254131793976, |
|
"rewards/rejected": -0.09958765655755997, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.5154057665109e-07, |
|
"logits/chosen": 0.9794226884841919, |
|
"logits/rejected": 1.0414973497390747, |
|
"logps/chosen": -280.0814208984375, |
|
"logps/rejected": -243.6100311279297, |
|
"loss": 0.0303, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.022864539176225662, |
|
"rewards/margins": 0.06399150937795639, |
|
"rewards/rejected": -0.08685605973005295, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.373088969907586e-07, |
|
"logits/chosen": 0.9371223449707031, |
|
"logits/rejected": 1.0262264013290405, |
|
"logps/chosen": -260.27728271484375, |
|
"logps/rejected": -258.8297424316406, |
|
"loss": 0.0314, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.02002180740237236, |
|
"rewards/margins": 0.06138715147972107, |
|
"rewards/rejected": -0.08140896260738373, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.23241101472709e-07, |
|
"logits/chosen": 1.0079492330551147, |
|
"logits/rejected": 0.9960187673568726, |
|
"logps/chosen": -255.51211547851562, |
|
"logps/rejected": -243.93319702148438, |
|
"loss": 0.0362, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.02185986004769802, |
|
"rewards/margins": 0.057953812181949615, |
|
"rewards/rejected": -0.07981367409229279, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_logits/chosen": 0.9270116090774536, |
|
"eval_logits/rejected": 1.0295790433883667, |
|
"eval_logps/chosen": -280.18609619140625, |
|
"eval_logps/rejected": -252.307861328125, |
|
"eval_loss": 0.03481233865022659, |
|
"eval_rewards/accuracies": 0.6305000185966492, |
|
"eval_rewards/chosen": -0.026392878964543343, |
|
"eval_rewards/margins": 0.058526668697595596, |
|
"eval_rewards/rejected": -0.08491955697536469, |
|
"eval_runtime": 539.1609, |
|
"eval_samples_per_second": 3.709, |
|
"eval_steps_per_second": 0.927, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.09338364753818e-07, |
|
"logits/chosen": 0.9137361645698547, |
|
"logits/rejected": 1.0361106395721436, |
|
"logps/chosen": -293.02032470703125, |
|
"logps/rejected": -264.98883056640625, |
|
"loss": 0.0454, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.01966175064444542, |
|
"rewards/margins": 0.090638667345047, |
|
"rewards/rejected": -0.11030042171478271, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.956018477086005e-07, |
|
"logits/chosen": 1.0527143478393555, |
|
"logits/rejected": 1.0417900085449219, |
|
"logps/chosen": -281.76318359375, |
|
"logps/rejected": -230.76205444335938, |
|
"loss": 0.0363, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.02277226559817791, |
|
"rewards/margins": 0.055615413933992386, |
|
"rewards/rejected": -0.07838768512010574, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.820326973322764e-07, |
|
"logits/chosen": 0.9705474972724915, |
|
"logits/rejected": 1.0229809284210205, |
|
"logps/chosen": -295.6360778808594, |
|
"logps/rejected": -221.80020141601562, |
|
"loss": 0.033, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.01131758838891983, |
|
"rewards/margins": 0.0728193074464798, |
|
"rewards/rejected": -0.08413688838481903, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.686320466449981e-07, |
|
"logits/chosen": 0.9558774828910828, |
|
"logits/rejected": 1.006240725517273, |
|
"logps/chosen": -237.345947265625, |
|
"logps/rejected": -222.2642364501953, |
|
"loss": 0.0358, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.022244829684495926, |
|
"rewards/margins": 0.07573308050632477, |
|
"rewards/rejected": -0.0979778990149498, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.554010145972418e-07, |
|
"logits/chosen": 0.956200897693634, |
|
"logits/rejected": 1.0314735174179077, |
|
"logps/chosen": -285.91473388671875, |
|
"logps/rejected": -229.6345977783203, |
|
"loss": 0.0312, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.015750734135508537, |
|
"rewards/margins": 0.0518513098359108, |
|
"rewards/rejected": -0.06760205328464508, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.4234070597637455e-07, |
|
"logits/chosen": 0.9090474843978882, |
|
"logits/rejected": 1.0826170444488525, |
|
"logps/chosen": -270.742919921875, |
|
"logps/rejected": -254.69363403320312, |
|
"loss": 0.0316, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.029270146042108536, |
|
"rewards/margins": 0.04612868279218674, |
|
"rewards/rejected": -0.07539881765842438, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.2945221131440783e-07, |
|
"logits/chosen": 0.975232720375061, |
|
"logits/rejected": 1.02248215675354, |
|
"logps/chosen": -245.90774536132812, |
|
"logps/rejected": -221.59896850585938, |
|
"loss": 0.045, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.025399979203939438, |
|
"rewards/margins": 0.0584503710269928, |
|
"rewards/rejected": -0.08385033905506134, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.167366067969381e-07, |
|
"logits/chosen": 0.9106703996658325, |
|
"logits/rejected": 0.9676691293716431, |
|
"logps/chosen": -272.5838317871094, |
|
"logps/rejected": -259.0162658691406, |
|
"loss": 0.0395, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.017812054604291916, |
|
"rewards/margins": 0.05891140550374985, |
|
"rewards/rejected": -0.07672347128391266, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.041949541732826e-07, |
|
"logits/chosen": 0.9986382722854614, |
|
"logits/rejected": 1.0691004991531372, |
|
"logps/chosen": -296.02490234375, |
|
"logps/rejected": -245.5988311767578, |
|
"loss": 0.0447, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.02657441236078739, |
|
"rewards/margins": 0.05036981776356697, |
|
"rewards/rejected": -0.07694423198699951, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.9182830066782614e-07, |
|
"logits/chosen": 0.97566157579422, |
|
"logits/rejected": 0.9876262545585632, |
|
"logps/chosen": -282.0150146484375, |
|
"logps/rejected": -245.9921875, |
|
"loss": 0.0412, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.03303904086351395, |
|
"rewards/margins": 0.04882500693202019, |
|
"rewards/rejected": -0.08186405152082443, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_logits/chosen": 0.9313199520111084, |
|
"eval_logits/rejected": 1.0337715148925781, |
|
"eval_logps/chosen": -280.28759765625, |
|
"eval_logps/rejected": -252.42367553710938, |
|
"eval_loss": 0.03475377336144447, |
|
"eval_rewards/accuracies": 0.6259999871253967, |
|
"eval_rewards/chosen": -0.027407577261328697, |
|
"eval_rewards/margins": 0.05866991728544235, |
|
"eval_rewards/rejected": -0.0860774889588356, |
|
"eval_runtime": 539.1084, |
|
"eval_samples_per_second": 3.71, |
|
"eval_steps_per_second": 0.927, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.796376788925771e-07, |
|
"logits/chosen": 0.9154554605484009, |
|
"logits/rejected": 1.0822858810424805, |
|
"logps/chosen": -247.672119140625, |
|
"logps/rejected": -225.2171630859375, |
|
"loss": 0.0476, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.03178101405501366, |
|
"rewards/margins": 0.0625949501991272, |
|
"rewards/rejected": -0.09437596052885056, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.676241067609465e-07, |
|
"logits/chosen": 0.9667531847953796, |
|
"logits/rejected": 0.9605924487113953, |
|
"logps/chosen": -242.40115356445312, |
|
"logps/rejected": -237.51455688476562, |
|
"loss": 0.0326, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.02729959413409233, |
|
"rewards/margins": 0.06457889080047607, |
|
"rewards/rejected": -0.0918785035610199, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.5578858740274976e-07, |
|
"logits/chosen": 0.9239116907119751, |
|
"logits/rejected": 1.0316154956817627, |
|
"logps/chosen": -300.83447265625, |
|
"logps/rejected": -260.6551513671875, |
|
"loss": 0.035, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.020300351083278656, |
|
"rewards/margins": 0.06348638236522675, |
|
"rewards/rejected": -0.0837867259979248, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.44132109080447e-07, |
|
"logits/chosen": 0.9275467991828918, |
|
"logits/rejected": 1.080038070678711, |
|
"logps/chosen": -259.93646240234375, |
|
"logps/rejected": -237.24990844726562, |
|
"loss": 0.0382, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.03450951725244522, |
|
"rewards/margins": 0.05054662749171257, |
|
"rewards/rejected": -0.08505614101886749, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.3265564510662344e-07, |
|
"logits/chosen": 0.9719650149345398, |
|
"logits/rejected": 1.0536506175994873, |
|
"logps/chosen": -254.2252960205078, |
|
"logps/rejected": -218.9423828125, |
|
"loss": 0.0369, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.024826010689139366, |
|
"rewards/margins": 0.05269388109445572, |
|
"rewards/rejected": -0.07751990109682083, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.213601537627195e-07, |
|
"logits/chosen": 0.9981171488761902, |
|
"logits/rejected": 1.0061393976211548, |
|
"logps/chosen": -260.99664306640625, |
|
"logps/rejected": -252.4512939453125, |
|
"loss": 0.0306, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.028290893882513046, |
|
"rewards/margins": 0.059080712497234344, |
|
"rewards/rejected": -0.08737160265445709, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.1024657821901063e-07, |
|
"logits/chosen": 0.9618428945541382, |
|
"logits/rejected": 0.9869762659072876, |
|
"logps/chosen": -255.50851440429688, |
|
"logps/rejected": -237.8030548095703, |
|
"loss": 0.0263, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.023725476115942, |
|
"rewards/margins": 0.06770970672369003, |
|
"rewards/rejected": -0.09143517911434174, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.9931584645585654e-07, |
|
"logits/chosen": 1.0205438137054443, |
|
"logits/rejected": 1.0499489307403564, |
|
"logps/chosen": -285.1611328125, |
|
"logps/rejected": -280.9693908691406, |
|
"loss": 0.0364, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.02360624074935913, |
|
"rewards/margins": 0.07233406603336334, |
|
"rewards/rejected": -0.09594030678272247, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.885688711862136e-07, |
|
"logits/chosen": 0.9615311622619629, |
|
"logits/rejected": 1.010024905204773, |
|
"logps/chosen": -257.10546875, |
|
"logps/rejected": -222.49844360351562, |
|
"loss": 0.0356, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.022061806172132492, |
|
"rewards/margins": 0.044591888785362244, |
|
"rewards/rejected": -0.06665369868278503, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.7800654977942486e-07, |
|
"logits/chosen": 0.9196559190750122, |
|
"logits/rejected": 1.0562456846237183, |
|
"logps/chosen": -294.05804443359375, |
|
"logps/rejected": -225.5612030029297, |
|
"loss": 0.0485, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.020386073738336563, |
|
"rewards/margins": 0.06678882986307144, |
|
"rewards/rejected": -0.0871749073266983, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_logits/chosen": 0.9336137771606445, |
|
"eval_logits/rejected": 1.0358667373657227, |
|
"eval_logps/chosen": -279.9648132324219, |
|
"eval_logps/rejected": -252.05458068847656, |
|
"eval_loss": 0.034665048122406006, |
|
"eval_rewards/accuracies": 0.6269999742507935, |
|
"eval_rewards/chosen": -0.02418021857738495, |
|
"eval_rewards/margins": 0.05820634588599205, |
|
"eval_rewards/rejected": -0.0823865681886673, |
|
"eval_runtime": 538.8853, |
|
"eval_samples_per_second": 3.711, |
|
"eval_steps_per_second": 0.928, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.6762976418628797e-07, |
|
"logits/chosen": 0.9499009847640991, |
|
"logits/rejected": 0.961447536945343, |
|
"logps/chosen": -252.6084747314453, |
|
"logps/rejected": -231.51760864257812, |
|
"loss": 0.0302, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.020922133699059486, |
|
"rewards/margins": 0.055929750204086304, |
|
"rewards/rejected": -0.07685188204050064, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.5743938086541354e-07, |
|
"logits/chosen": 0.9769983291625977, |
|
"logits/rejected": 1.0180495977401733, |
|
"logps/chosen": -297.083740234375, |
|
"logps/rejected": -251.3445281982422, |
|
"loss": 0.0258, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.013064468279480934, |
|
"rewards/margins": 0.08387977629899979, |
|
"rewards/rejected": -0.09694425016641617, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.4743625071087574e-07, |
|
"logits/chosen": 0.9248872995376587, |
|
"logits/rejected": 1.083939552307129, |
|
"logps/chosen": -269.6517639160156, |
|
"logps/rejected": -246.59970092773438, |
|
"loss": 0.0377, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.029898881912231445, |
|
"rewards/margins": 0.06900982558727264, |
|
"rewards/rejected": -0.09890870004892349, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.3762120898116498e-07, |
|
"logits/chosen": 0.9920533895492554, |
|
"logits/rejected": 1.0261324644088745, |
|
"logps/chosen": -267.4979553222656, |
|
"logps/rejected": -244.2932891845703, |
|
"loss": 0.0377, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.0195697583258152, |
|
"rewards/margins": 0.03883281350135803, |
|
"rewards/rejected": -0.058402568101882935, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.2799507522944048e-07, |
|
"logits/chosen": 1.0060142278671265, |
|
"logits/rejected": 1.0250886678695679, |
|
"logps/chosen": -296.4592590332031, |
|
"logps/rejected": -239.81381225585938, |
|
"loss": 0.0321, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.026712555438280106, |
|
"rewards/margins": 0.03180098533630371, |
|
"rewards/rejected": -0.058513544499874115, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.1855865323510056e-07, |
|
"logits/chosen": 0.978489100933075, |
|
"logits/rejected": 1.032293677330017, |
|
"logps/chosen": -289.8194580078125, |
|
"logps/rejected": -266.70330810546875, |
|
"loss": 0.032, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.015113875269889832, |
|
"rewards/margins": 0.06603299081325531, |
|
"rewards/rejected": -0.08114685118198395, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.0931273093666575e-07, |
|
"logits/chosen": 1.0238964557647705, |
|
"logits/rejected": 1.0438212156295776, |
|
"logps/chosen": -267.2007141113281, |
|
"logps/rejected": -265.8624572753906, |
|
"loss": 0.0403, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.018140006810426712, |
|
"rewards/margins": 0.06415996700525284, |
|
"rewards/rejected": -0.08229997754096985, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.002580803659873e-07, |
|
"logits/chosen": 1.0145037174224854, |
|
"logits/rejected": 1.0263590812683105, |
|
"logps/chosen": -293.4837951660156, |
|
"logps/rejected": -262.3972473144531, |
|
"loss": 0.0357, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.03515145182609558, |
|
"rewards/margins": 0.043677233159542084, |
|
"rewards/rejected": -0.07882869243621826, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.913954575837826e-07, |
|
"logits/chosen": 1.0240306854248047, |
|
"logits/rejected": 1.094536542892456, |
|
"logps/chosen": -291.9563293457031, |
|
"logps/rejected": -263.8965148925781, |
|
"loss": 0.048, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.02634221874177456, |
|
"rewards/margins": 0.06700630486011505, |
|
"rewards/rejected": -0.09334851801395416, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.827256026165028e-07, |
|
"logits/chosen": 1.024501919746399, |
|
"logits/rejected": 1.0359153747558594, |
|
"logps/chosen": -271.8841247558594, |
|
"logps/rejected": -239.6262664794922, |
|
"loss": 0.0376, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.026435798034071922, |
|
"rewards/margins": 0.07295812666416168, |
|
"rewards/rejected": -0.09939391911029816, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_logits/chosen": 0.9353539347648621, |
|
"eval_logits/rejected": 1.0377308130264282, |
|
"eval_logps/chosen": -280.1902160644531, |
|
"eval_logps/rejected": -252.35890197753906, |
|
"eval_loss": 0.03463303670287132, |
|
"eval_rewards/accuracies": 0.6309999823570251, |
|
"eval_rewards/chosen": -0.026434103026986122, |
|
"eval_rewards/margins": 0.0589958056807518, |
|
"eval_rewards/rejected": -0.08542990684509277, |
|
"eval_runtime": 538.4849, |
|
"eval_samples_per_second": 3.714, |
|
"eval_steps_per_second": 0.929, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.7424923939454274e-07, |
|
"logits/chosen": 1.0309066772460938, |
|
"logits/rejected": 1.0446019172668457, |
|
"logps/chosen": -282.263427734375, |
|
"logps/rejected": -256.1120300292969, |
|
"loss": 0.0385, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.02827179990708828, |
|
"rewards/margins": 0.05899345874786377, |
|
"rewards/rejected": -0.0872652679681778, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.6596707569179304e-07, |
|
"logits/chosen": 0.9415512084960938, |
|
"logits/rejected": 1.0031477212905884, |
|
"logps/chosen": -267.2585754394531, |
|
"logps/rejected": -228.9310302734375, |
|
"loss": 0.0309, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.020049545913934708, |
|
"rewards/margins": 0.05747220665216446, |
|
"rewards/rejected": -0.07752174139022827, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.578798030665385e-07, |
|
"logits/chosen": 0.9566577076911926, |
|
"logits/rejected": 0.9886308908462524, |
|
"logps/chosen": -287.398193359375, |
|
"logps/rejected": -231.68643188476562, |
|
"loss": 0.0351, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.030035072937607765, |
|
"rewards/margins": 0.03915448114275932, |
|
"rewards/rejected": -0.06918954849243164, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.499880968037165e-07, |
|
"logits/chosen": 0.9171876907348633, |
|
"logits/rejected": 1.0982364416122437, |
|
"logps/chosen": -283.79156494140625, |
|
"logps/rejected": -253.9375, |
|
"loss": 0.0395, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.02584235928952694, |
|
"rewards/margins": 0.07793084532022476, |
|
"rewards/rejected": -0.10377321392297745, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.4229261585852805e-07, |
|
"logits/chosen": 0.9618092775344849, |
|
"logits/rejected": 1.1071628332138062, |
|
"logps/chosen": -258.87579345703125, |
|
"logps/rejected": -243.849365234375, |
|
"loss": 0.0413, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.031573228538036346, |
|
"rewards/margins": 0.07379934191703796, |
|
"rewards/rejected": -0.10537256300449371, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.3479400280141886e-07, |
|
"logits/chosen": 0.9881182909011841, |
|
"logits/rejected": 0.9810377359390259, |
|
"logps/chosen": -240.4054718017578, |
|
"logps/rejected": -220.0426483154297, |
|
"loss": 0.0372, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.019237659871578217, |
|
"rewards/margins": 0.041987188160419464, |
|
"rewards/rejected": -0.06122484803199768, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.2749288376442044e-07, |
|
"logits/chosen": 1.032110571861267, |
|
"logits/rejected": 1.0473930835723877, |
|
"logps/chosen": -249.2979736328125, |
|
"logps/rejected": -242.74606323242188, |
|
"loss": 0.0334, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.030945682898163795, |
|
"rewards/margins": 0.05242834612727165, |
|
"rewards/rejected": -0.0833740234375, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.203898683888713e-07, |
|
"logits/chosen": 0.9928043484687805, |
|
"logits/rejected": 1.0439598560333252, |
|
"logps/chosen": -255.74020385742188, |
|
"logps/rejected": -241.57583618164062, |
|
"loss": 0.0381, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.03444141149520874, |
|
"rewards/margins": 0.06323965638875961, |
|
"rewards/rejected": -0.09768106043338776, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.1348554977451132e-07, |
|
"logits/chosen": 1.0385875701904297, |
|
"logits/rejected": 1.0472261905670166, |
|
"logps/chosen": -243.3946075439453, |
|
"logps/rejected": -253.4709014892578, |
|
"loss": 0.0353, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.03410564363002777, |
|
"rewards/margins": 0.0681803822517395, |
|
"rewards/rejected": -0.10228602588176727, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.0678050442995802e-07, |
|
"logits/chosen": 0.9589599370956421, |
|
"logits/rejected": 0.9904863238334656, |
|
"logps/chosen": -262.4172058105469, |
|
"logps/rejected": -220.4215545654297, |
|
"loss": 0.0352, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.03152045980095863, |
|
"rewards/margins": 0.0699472576379776, |
|
"rewards/rejected": -0.10146770626306534, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_logits/chosen": 0.9392337203025818, |
|
"eval_logits/rejected": 1.0417654514312744, |
|
"eval_logps/chosen": -280.20367431640625, |
|
"eval_logps/rejected": -252.37255859375, |
|
"eval_loss": 0.03462912142276764, |
|
"eval_rewards/accuracies": 0.6259999871253967, |
|
"eval_rewards/chosen": -0.026568960398435593, |
|
"eval_rewards/margins": 0.058997511863708496, |
|
"eval_rewards/rejected": -0.08556646853685379, |
|
"eval_runtime": 538.5332, |
|
"eval_samples_per_second": 3.714, |
|
"eval_steps_per_second": 0.928, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.0027529222456755e-07, |
|
"logits/chosen": 0.9966568946838379, |
|
"logits/rejected": 0.9782639741897583, |
|
"logps/chosen": -264.34344482421875, |
|
"logps/rejected": -254.0737762451172, |
|
"loss": 0.0298, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.030217718333005905, |
|
"rewards/margins": 0.05717161297798157, |
|
"rewards/rejected": -0.08738932758569717, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 9.397045634168766e-08, |
|
"logits/chosen": 0.933295726776123, |
|
"logits/rejected": 1.088179349899292, |
|
"logps/chosen": -275.3388977050781, |
|
"logps/rejected": -235.18685913085938, |
|
"loss": 0.0367, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.014920748770236969, |
|
"rewards/margins": 0.06524350494146347, |
|
"rewards/rejected": -0.08016424626111984, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 8.78665232332998e-08, |
|
"logits/chosen": 0.9489814639091492, |
|
"logits/rejected": 1.066748857498169, |
|
"logps/chosen": -260.3916320800781, |
|
"logps/rejected": -258.72528076171875, |
|
"loss": 0.036, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.023377668112516403, |
|
"rewards/margins": 0.05880703777074814, |
|
"rewards/rejected": -0.08218470215797424, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 8.196400257606208e-08, |
|
"logits/chosen": 0.9277578592300415, |
|
"logits/rejected": 1.0134170055389404, |
|
"logps/chosen": -279.0546875, |
|
"logps/rejected": -249.3377685546875, |
|
"loss": 0.0347, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.018648769706487656, |
|
"rewards/margins": 0.04787365719676018, |
|
"rewards/rejected": -0.06652243435382843, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 7.626338722875076e-08, |
|
"logits/chosen": 1.015367031097412, |
|
"logits/rejected": 0.9656025171279907, |
|
"logps/chosen": -280.31109619140625, |
|
"logps/rejected": -254.50277709960938, |
|
"loss": 0.0347, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.03300374746322632, |
|
"rewards/margins": 0.050588060170412064, |
|
"rewards/rejected": -0.08359180390834808, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 7.076515319110688e-08, |
|
"logits/chosen": 1.0340051651000977, |
|
"logits/rejected": 1.0160043239593506, |
|
"logps/chosen": -292.3948669433594, |
|
"logps/rejected": -252.76358032226562, |
|
"loss": 0.0371, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.02604197897017002, |
|
"rewards/margins": 0.05999482423067093, |
|
"rewards/rejected": -0.0860368013381958, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 6.54697595640899e-08, |
|
"logits/chosen": 1.0719053745269775, |
|
"logits/rejected": 1.0086462497711182, |
|
"logps/chosen": -279.8869323730469, |
|
"logps/rejected": -261.8102111816406, |
|
"loss": 0.0361, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.023053551092743874, |
|
"rewards/margins": 0.05721823126077652, |
|
"rewards/rejected": -0.08027178794145584, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 6.037764851154426e-08, |
|
"logits/chosen": 0.9340742826461792, |
|
"logits/rejected": 1.0983483791351318, |
|
"logps/chosen": -279.68585205078125, |
|
"logps/rejected": -239.94100952148438, |
|
"loss": 0.0259, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.02659156359732151, |
|
"rewards/margins": 0.05400107428431511, |
|
"rewards/rejected": -0.08059263974428177, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5.548924522327748e-08, |
|
"logits/chosen": 0.9091449975967407, |
|
"logits/rejected": 1.1083462238311768, |
|
"logps/chosen": -275.26678466796875, |
|
"logps/rejected": -258.33453369140625, |
|
"loss": 0.0285, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.019822830334305763, |
|
"rewards/margins": 0.06569498032331467, |
|
"rewards/rejected": -0.08551780879497528, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5.0804957879556915e-08, |
|
"logits/chosen": 0.9512368440628052, |
|
"logits/rejected": 1.0698693990707397, |
|
"logps/chosen": -318.3247375488281, |
|
"logps/rejected": -260.92718505859375, |
|
"loss": 0.0379, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.0346975177526474, |
|
"rewards/margins": 0.056962646543979645, |
|
"rewards/rejected": -0.09166016429662704, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_logits/chosen": 0.9390192031860352, |
|
"eval_logits/rejected": 1.0413662195205688, |
|
"eval_logps/chosen": -280.1781311035156, |
|
"eval_logps/rejected": -252.33770751953125, |
|
"eval_loss": 0.034663841128349304, |
|
"eval_rewards/accuracies": 0.6315000057220459, |
|
"eval_rewards/chosen": -0.026313286274671555, |
|
"eval_rewards/margins": 0.05890476703643799, |
|
"eval_rewards/rejected": -0.08521804958581924, |
|
"eval_runtime": 538.4137, |
|
"eval_samples_per_second": 3.715, |
|
"eval_steps_per_second": 0.929, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.632517761702815e-08, |
|
"logits/chosen": 1.0086432695388794, |
|
"logits/rejected": 1.0595029592514038, |
|
"logps/chosen": -285.149658203125, |
|
"logps/rejected": -251.66793823242188, |
|
"loss": 0.0434, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.018525902181863785, |
|
"rewards/margins": 0.078687384724617, |
|
"rewards/rejected": -0.09721329808235168, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 4.205027849605359e-08, |
|
"logits/chosen": 1.0005525350570679, |
|
"logits/rejected": 0.973240852355957, |
|
"logps/chosen": -231.69265747070312, |
|
"logps/rejected": -235.55581665039062, |
|
"loss": 0.0401, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.02195514738559723, |
|
"rewards/margins": 0.049081120640039444, |
|
"rewards/rejected": -0.07103626430034637, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.798061746947995e-08, |
|
"logits/chosen": 0.9860151410102844, |
|
"logits/rejected": 1.0056957006454468, |
|
"logps/chosen": -345.712890625, |
|
"logps/rejected": -263.0353088378906, |
|
"loss": 0.0229, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.02045946940779686, |
|
"rewards/margins": 0.05285441130399704, |
|
"rewards/rejected": -0.0733138769865036, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.411653435283158e-08, |
|
"logits/chosen": 0.9643945693969727, |
|
"logits/rejected": 1.0569902658462524, |
|
"logps/chosen": -294.2444763183594, |
|
"logps/rejected": -257.2134704589844, |
|
"loss": 0.0379, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.0335361547768116, |
|
"rewards/margins": 0.04753485321998596, |
|
"rewards/rejected": -0.08107100427150726, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.04583517959367e-08, |
|
"logits/chosen": 0.957710862159729, |
|
"logits/rejected": 0.983841598033905, |
|
"logps/chosen": -299.52471923828125, |
|
"logps/rejected": -250.6820831298828, |
|
"loss": 0.0363, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.008963306434452534, |
|
"rewards/margins": 0.08262725919485092, |
|
"rewards/rejected": -0.09159056842327118, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.7006375255985984e-08, |
|
"logits/chosen": 0.9700638055801392, |
|
"logits/rejected": 1.0240037441253662, |
|
"logps/chosen": -241.0752410888672, |
|
"logps/rejected": -215.42562866210938, |
|
"loss": 0.0298, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.026114290580153465, |
|
"rewards/margins": 0.048808712512254715, |
|
"rewards/rejected": -0.07492300122976303, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.3760892972027328e-08, |
|
"logits/chosen": 0.9013713002204895, |
|
"logits/rejected": 1.0410950183868408, |
|
"logps/chosen": -290.4670104980469, |
|
"logps/rejected": -238.00454711914062, |
|
"loss": 0.0394, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.023938851431012154, |
|
"rewards/margins": 0.06292831152677536, |
|
"rewards/rejected": -0.08686716854572296, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.072217594089765e-08, |
|
"logits/chosen": 1.0121930837631226, |
|
"logits/rejected": 0.9898314476013184, |
|
"logps/chosen": -247.4111785888672, |
|
"logps/rejected": -232.48184204101562, |
|
"loss": 0.0394, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.021979061886668205, |
|
"rewards/margins": 0.0453622080385685, |
|
"rewards/rejected": -0.06734126806259155, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.789047789459375e-08, |
|
"logits/chosen": 0.9309120178222656, |
|
"logits/rejected": 1.0076799392700195, |
|
"logps/chosen": -279.5455627441406, |
|
"logps/rejected": -257.4979248046875, |
|
"loss": 0.0377, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.027569543570280075, |
|
"rewards/margins": 0.0605216808617115, |
|
"rewards/rejected": -0.08809121698141098, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.5266035279088708e-08, |
|
"logits/chosen": 0.9448448419570923, |
|
"logits/rejected": 1.0074546337127686, |
|
"logps/chosen": -320.0146179199219, |
|
"logps/rejected": -246.0650634765625, |
|
"loss": 0.0361, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.023909619078040123, |
|
"rewards/margins": 0.05834698677062988, |
|
"rewards/rejected": -0.08225660026073456, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_logits/chosen": 0.9376645088195801, |
|
"eval_logits/rejected": 1.0399267673492432, |
|
"eval_logps/chosen": -280.2046813964844, |
|
"eval_logps/rejected": -252.3740997314453, |
|
"eval_loss": 0.03461700677871704, |
|
"eval_rewards/accuracies": 0.6309999823570251, |
|
"eval_rewards/chosen": -0.026578795164823532, |
|
"eval_rewards/margins": 0.05900290608406067, |
|
"eval_rewards/rejected": -0.0855816975235939, |
|
"eval_runtime": 538.2979, |
|
"eval_samples_per_second": 3.715, |
|
"eval_steps_per_second": 0.929, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.2849067234584623e-08, |
|
"logits/chosen": 0.9224729537963867, |
|
"logits/rejected": 1.0151176452636719, |
|
"logps/chosen": -258.44451904296875, |
|
"logps/rejected": -240.2635498046875, |
|
"loss": 0.0317, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.02202828973531723, |
|
"rewards/margins": 0.055525414645671844, |
|
"rewards/rejected": -0.07755370438098907, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.0639775577218625e-08, |
|
"logits/chosen": 0.9609501957893372, |
|
"logits/rejected": 1.0031676292419434, |
|
"logps/chosen": -288.76519775390625, |
|
"logps/rejected": -266.21478271484375, |
|
"loss": 0.0313, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.01723320782184601, |
|
"rewards/margins": 0.06596283614635468, |
|
"rewards/rejected": -0.08319603651762009, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 8.638344782207486e-09, |
|
"logits/chosen": 0.9164566993713379, |
|
"logits/rejected": 1.0248987674713135, |
|
"logps/chosen": -303.3111877441406, |
|
"logps/rejected": -282.0052185058594, |
|
"loss": 0.0371, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.036383308470249176, |
|
"rewards/margins": 0.0420096218585968, |
|
"rewards/rejected": -0.07839293777942657, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 6.84494196844715e-09, |
|
"logits/chosen": 0.9781646728515625, |
|
"logits/rejected": 1.0682637691497803, |
|
"logps/chosen": -265.3148193359375, |
|
"logps/rejected": -244.98489379882812, |
|
"loss": 0.0314, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.016114359721541405, |
|
"rewards/margins": 0.0599919855594635, |
|
"rewards/rejected": -0.07610634714365005, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 5.259716884556121e-09, |
|
"logits/chosen": 0.9260244369506836, |
|
"logits/rejected": 1.0235192775726318, |
|
"logps/chosen": -311.13995361328125, |
|
"logps/rejected": -255.76925659179688, |
|
"loss": 0.0216, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.019463708624243736, |
|
"rewards/margins": 0.06179703399538994, |
|
"rewards/rejected": -0.08126074075698853, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.882801896372967e-09, |
|
"logits/chosen": 0.8902843594551086, |
|
"logits/rejected": 1.0316295623779297, |
|
"logps/chosen": -282.79034423828125, |
|
"logps/rejected": -261.2944641113281, |
|
"loss": 0.0333, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.024235766381025314, |
|
"rewards/margins": 0.04566502943634987, |
|
"rewards/rejected": -0.06990079581737518, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.7143119759026614e-09, |
|
"logits/chosen": 0.9450544118881226, |
|
"logits/rejected": 1.0837864875793457, |
|
"logps/chosen": -270.1015319824219, |
|
"logps/rejected": -273.8962707519531, |
|
"loss": 0.0297, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.007587161846458912, |
|
"rewards/margins": 0.06377876549959183, |
|
"rewards/rejected": -0.07136592268943787, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.754344691717591e-09, |
|
"logits/chosen": 0.9069304466247559, |
|
"logits/rejected": 0.979387640953064, |
|
"logps/chosen": -264.6932067871094, |
|
"logps/rejected": -218.8267822265625, |
|
"loss": 0.0347, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.011129969730973244, |
|
"rewards/margins": 0.06624534726142883, |
|
"rewards/rejected": -0.07737531512975693, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.0029802008096335e-09, |
|
"logits/chosen": 1.0251834392547607, |
|
"logits/rejected": 0.9546536207199097, |
|
"logps/chosen": -267.88116455078125, |
|
"logps/rejected": -228.50390625, |
|
"loss": 0.0348, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.026254802942276, |
|
"rewards/margins": 0.06516362726688385, |
|
"rewards/rejected": -0.09141843020915985, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.602812418974534e-10, |
|
"logits/chosen": 0.9103859066963196, |
|
"logits/rejected": 1.032867670059204, |
|
"logps/chosen": -254.2440643310547, |
|
"logps/rejected": -240.1820831298828, |
|
"loss": 0.0298, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.03087850846350193, |
|
"rewards/margins": 0.050477832555770874, |
|
"rewards/rejected": -0.08135633170604706, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_logits/chosen": 0.9387494921684265, |
|
"eval_logits/rejected": 1.0411853790283203, |
|
"eval_logps/chosen": -280.1766662597656, |
|
"eval_logps/rejected": -252.320068359375, |
|
"eval_loss": 0.03467794507741928, |
|
"eval_rewards/accuracies": 0.6274999976158142, |
|
"eval_rewards/chosen": -0.02629854343831539, |
|
"eval_rewards/margins": 0.058742720633745193, |
|
"eval_rewards/rejected": -0.08504127711057663, |
|
"eval_runtime": 538.3057, |
|
"eval_samples_per_second": 3.715, |
|
"eval_steps_per_second": 0.929, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.2629313018819312e-10, |
|
"logits/chosen": 0.9089424014091492, |
|
"logits/rejected": 1.0153186321258545, |
|
"logps/chosen": -302.22296142578125, |
|
"logps/rejected": -284.89483642578125, |
|
"loss": 0.0326, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.007386692799627781, |
|
"rewards/margins": 0.025576096028089523, |
|
"rewards/rejected": -0.03296279162168503, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.0437535929996855e-12, |
|
"logits/chosen": 0.9965023994445801, |
|
"logits/rejected": 0.9472867250442505, |
|
"logps/chosen": -285.3251037597656, |
|
"logps/rejected": -266.6712341308594, |
|
"loss": 0.035, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.012905353680253029, |
|
"rewards/margins": 0.030642932280898094, |
|
"rewards/rejected": -0.04354828968644142, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 3821, |
|
"total_flos": 0.0, |
|
"train_loss": 0.0001892112643969555, |
|
"train_runtime": 195.8108, |
|
"train_samples_per_second": 312.215, |
|
"train_steps_per_second": 19.514 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 3821, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|