|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 3821, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.3054830287206268e-08, |
|
"logits/chosen": 0.9550814628601074, |
|
"logits/rejected": 1.0664727687835693, |
|
"logps/chosen": -190.47879028320312, |
|
"logps/rejected": -177.6958770751953, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.3054830287206266e-07, |
|
"logits/chosen": 1.0219794511795044, |
|
"logits/rejected": 1.074777364730835, |
|
"logps/chosen": -277.9150695800781, |
|
"logps/rejected": -268.2773742675781, |
|
"loss": 0.6933, |
|
"rewards/accuracies": 0.4097222089767456, |
|
"rewards/chosen": -0.00020908194710500538, |
|
"rewards/margins": -0.000234946608543396, |
|
"rewards/rejected": 2.586471055110451e-05, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.610966057441253e-07, |
|
"logits/chosen": 1.0537651777267456, |
|
"logits/rejected": 1.0348730087280273, |
|
"logps/chosen": -258.07086181640625, |
|
"logps/rejected": -219.53591918945312, |
|
"loss": 0.6935, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": -0.000874903635121882, |
|
"rewards/margins": -0.0006632002769038081, |
|
"rewards/rejected": -0.0002117032854584977, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.9164490861618804e-07, |
|
"logits/chosen": 0.9774575233459473, |
|
"logits/rejected": 0.9953697919845581, |
|
"logps/chosen": -234.4716796875, |
|
"logps/rejected": -216.33425903320312, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.41874998807907104, |
|
"rewards/chosen": -0.0008366929250769317, |
|
"rewards/margins": -0.00013415786088444293, |
|
"rewards/rejected": -0.0007025349768809974, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.221932114882506e-07, |
|
"logits/chosen": 1.0602939128875732, |
|
"logits/rejected": 1.0611933469772339, |
|
"logps/chosen": -269.39129638671875, |
|
"logps/rejected": -236.54690551757812, |
|
"loss": 0.6933, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.000759485294111073, |
|
"rewards/margins": -0.00026061575044877827, |
|
"rewards/rejected": -0.0004988695727661252, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 6.527415143603135e-07, |
|
"logits/chosen": 1.012333631515503, |
|
"logits/rejected": 1.0495545864105225, |
|
"logps/chosen": -245.17025756835938, |
|
"logps/rejected": -241.9575653076172, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.0008211232488974929, |
|
"rewards/margins": 0.00028653821209445596, |
|
"rewards/rejected": -0.0011076615191996098, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 7.832898172323761e-07, |
|
"logits/chosen": 0.9769388437271118, |
|
"logits/rejected": 1.0936685800552368, |
|
"logps/chosen": -283.7574157714844, |
|
"logps/rejected": -234.1509552001953, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.4312500059604645, |
|
"rewards/chosen": -0.00026667636120691895, |
|
"rewards/margins": -0.00013940571807324886, |
|
"rewards/rejected": -0.00012727065768558532, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.138381201044387e-07, |
|
"logits/chosen": 1.0060393810272217, |
|
"logits/rejected": 1.0819052457809448, |
|
"logps/chosen": -271.9745178222656, |
|
"logps/rejected": -231.0491943359375, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": 0.00024310217122547328, |
|
"rewards/margins": 0.00035628705518320203, |
|
"rewards/rejected": -0.00011318484030198306, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.0443864229765013e-06, |
|
"logits/chosen": 1.0235029458999634, |
|
"logits/rejected": 1.062941312789917, |
|
"logps/chosen": -283.92523193359375, |
|
"logps/rejected": -261.73150634765625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.4124999940395355, |
|
"rewards/chosen": -0.0006150220870040357, |
|
"rewards/margins": 9.248043352272362e-05, |
|
"rewards/rejected": -0.0007075025932863355, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.1749347258485642e-06, |
|
"logits/chosen": 1.043299913406372, |
|
"logits/rejected": 1.0933589935302734, |
|
"logps/chosen": -278.43609619140625, |
|
"logps/rejected": -235.748046875, |
|
"loss": 0.6934, |
|
"rewards/accuracies": 0.40625, |
|
"rewards/chosen": -0.000780908390879631, |
|
"rewards/margins": -0.0005825563566759229, |
|
"rewards/rejected": -0.00019835206330753863, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.305483028720627e-06, |
|
"logits/chosen": 0.9930270910263062, |
|
"logits/rejected": 1.068216323852539, |
|
"logps/chosen": -237.21041870117188, |
|
"logps/rejected": -218.50753784179688, |
|
"loss": 0.6925, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": 0.0006625339738093317, |
|
"rewards/margins": 0.0012667592382058501, |
|
"rewards/rejected": -0.0006042252061888576, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_logits/chosen": 0.9728049635887146, |
|
"eval_logits/rejected": 1.0653607845306396, |
|
"eval_logps/chosen": -277.5415954589844, |
|
"eval_logps/rejected": -243.8911895751953, |
|
"eval_loss": 0.6927526593208313, |
|
"eval_rewards/accuracies": 0.4950000047683716, |
|
"eval_rewards/chosen": 5.226604480412789e-05, |
|
"eval_rewards/margins": 0.0008048939635045826, |
|
"eval_rewards/rejected": -0.0007526279077865183, |
|
"eval_runtime": 545.5397, |
|
"eval_samples_per_second": 3.666, |
|
"eval_steps_per_second": 0.917, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.4360313315926894e-06, |
|
"logits/chosen": 0.9969541430473328, |
|
"logits/rejected": 1.0819013118743896, |
|
"logps/chosen": -283.55718994140625, |
|
"logps/rejected": -250.1197967529297, |
|
"loss": 0.6924, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": 0.0006327680312097073, |
|
"rewards/margins": 0.0015511559322476387, |
|
"rewards/rejected": -0.0009183877264149487, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.5665796344647521e-06, |
|
"logits/chosen": 1.0308067798614502, |
|
"logits/rejected": 1.0752949714660645, |
|
"logps/chosen": -227.8155517578125, |
|
"logps/rejected": -234.15908813476562, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.4312500059604645, |
|
"rewards/chosen": -6.81330930092372e-05, |
|
"rewards/margins": 0.00048325079842470586, |
|
"rewards/rejected": -0.0005513839423656464, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.6971279373368146e-06, |
|
"logits/chosen": 1.049055814743042, |
|
"logits/rejected": 1.0949084758758545, |
|
"logps/chosen": -282.67279052734375, |
|
"logps/rejected": -239.2623291015625, |
|
"loss": 0.6923, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.00029724548221565783, |
|
"rewards/margins": 0.001690825680270791, |
|
"rewards/rejected": -0.0013935801107436419, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.8276762402088774e-06, |
|
"logits/chosen": 1.026393175125122, |
|
"logits/rejected": 1.026254415512085, |
|
"logps/chosen": -264.0199279785156, |
|
"logps/rejected": -237.1195831298828, |
|
"loss": 0.6922, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.0003116951265837997, |
|
"rewards/margins": 0.0020114013459533453, |
|
"rewards/rejected": -0.001699706306681037, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.9582245430809403e-06, |
|
"logits/chosen": 1.0078837871551514, |
|
"logits/rejected": 1.0274178981781006, |
|
"logps/chosen": -262.63385009765625, |
|
"logps/rejected": -235.02658081054688, |
|
"loss": 0.6919, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": 0.0008050563046708703, |
|
"rewards/margins": 0.002434623194858432, |
|
"rewards/rejected": -0.0016295671230182052, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.0887728459530026e-06, |
|
"logits/chosen": 0.9562094807624817, |
|
"logits/rejected": 1.085970163345337, |
|
"logps/chosen": -258.23858642578125, |
|
"logps/rejected": -240.0513916015625, |
|
"loss": 0.6921, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.0007125878473743796, |
|
"rewards/margins": 0.002117170486599207, |
|
"rewards/rejected": -0.0014045826392248273, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.2193211488250653e-06, |
|
"logits/chosen": 0.9959365129470825, |
|
"logits/rejected": 1.0712645053863525, |
|
"logps/chosen": -268.434814453125, |
|
"logps/rejected": -218.3533172607422, |
|
"loss": 0.6912, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.0012755919015035033, |
|
"rewards/margins": 0.0039804959669709206, |
|
"rewards/rejected": -0.0027049046475440264, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.3498694516971284e-06, |
|
"logits/chosen": 0.9840243458747864, |
|
"logits/rejected": 1.0322699546813965, |
|
"logps/chosen": -272.4624938964844, |
|
"logps/rejected": -237.8259735107422, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": 0.0022116941399872303, |
|
"rewards/margins": 0.00362318754196167, |
|
"rewards/rejected": -0.0014114934019744396, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.4804177545691907e-06, |
|
"logits/chosen": 0.9921697378158569, |
|
"logits/rejected": 1.0103299617767334, |
|
"logps/chosen": -269.4023742675781, |
|
"logps/rejected": -235.68417358398438, |
|
"loss": 0.6903, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 0.002223977353423834, |
|
"rewards/margins": 0.005811163689941168, |
|
"rewards/rejected": -0.003587186336517334, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.610966057441254e-06, |
|
"logits/chosen": 1.0245262384414673, |
|
"logits/rejected": 1.1350899934768677, |
|
"logps/chosen": -277.9847412109375, |
|
"logps/rejected": -249.67733764648438, |
|
"loss": 0.6903, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.005080274306237698, |
|
"rewards/margins": 0.00588814215734601, |
|
"rewards/rejected": -0.0008078686660155654, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_logits/chosen": 0.9732384085655212, |
|
"eval_logits/rejected": 1.0658739805221558, |
|
"eval_logps/chosen": -277.0526123046875, |
|
"eval_logps/rejected": -243.96609497070312, |
|
"eval_loss": 0.6899830102920532, |
|
"eval_rewards/accuracies": 0.5830000042915344, |
|
"eval_rewards/chosen": 0.004941860679537058, |
|
"eval_rewards/margins": 0.006443744525313377, |
|
"eval_rewards/rejected": -0.0015018840786069632, |
|
"eval_runtime": 544.739, |
|
"eval_samples_per_second": 3.671, |
|
"eval_steps_per_second": 0.918, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.741514360313316e-06, |
|
"logits/chosen": 1.01772940158844, |
|
"logits/rejected": 1.0974925756454468, |
|
"logps/chosen": -260.6138610839844, |
|
"logps/rejected": -233.33798217773438, |
|
"loss": 0.6896, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 0.00463427510112524, |
|
"rewards/margins": 0.007291147019714117, |
|
"rewards/rejected": -0.0026568726170808077, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.872062663185379e-06, |
|
"logits/chosen": 1.0488287210464478, |
|
"logits/rejected": 1.1195874214172363, |
|
"logps/chosen": -277.33319091796875, |
|
"logps/rejected": -243.1424560546875, |
|
"loss": 0.6894, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 0.006252074148505926, |
|
"rewards/margins": 0.007730481214821339, |
|
"rewards/rejected": -0.001478406717069447, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.0026109660574416e-06, |
|
"logits/chosen": 1.0392377376556396, |
|
"logits/rejected": 1.0487968921661377, |
|
"logps/chosen": -268.562255859375, |
|
"logps/rejected": -275.25787353515625, |
|
"loss": 0.6882, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.008288154378533363, |
|
"rewards/margins": 0.010209737345576286, |
|
"rewards/rejected": -0.0019215833162888885, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.1331592689295043e-06, |
|
"logits/chosen": 1.0380384922027588, |
|
"logits/rejected": 1.0580257177352905, |
|
"logps/chosen": -271.28057861328125, |
|
"logps/rejected": -231.651123046875, |
|
"loss": 0.6849, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.015278227627277374, |
|
"rewards/margins": 0.016800139099359512, |
|
"rewards/rejected": -0.0015219092601910233, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.263707571801567e-06, |
|
"logits/chosen": 0.9396646618843079, |
|
"logits/rejected": 1.0964558124542236, |
|
"logps/chosen": -261.69482421875, |
|
"logps/rejected": -207.1199188232422, |
|
"loss": 0.6867, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.011847788468003273, |
|
"rewards/margins": 0.01328262872993946, |
|
"rewards/rejected": -0.0014348386321216822, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.3942558746736293e-06, |
|
"logits/chosen": 1.0348241329193115, |
|
"logits/rejected": 1.0277621746063232, |
|
"logps/chosen": -255.30850219726562, |
|
"logps/rejected": -249.1978759765625, |
|
"loss": 0.6871, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.01062377355992794, |
|
"rewards/margins": 0.01277032308280468, |
|
"rewards/rejected": -0.002146549290046096, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.524804177545692e-06, |
|
"logits/chosen": 0.9442011713981628, |
|
"logits/rejected": 1.091737985610962, |
|
"logps/chosen": -249.87490844726562, |
|
"logps/rejected": -225.23934936523438, |
|
"loss": 0.6862, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.009906780906021595, |
|
"rewards/margins": 0.014515595510601997, |
|
"rewards/rejected": -0.004608814604580402, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.6553524804177547e-06, |
|
"logits/chosen": 0.9841794967651367, |
|
"logits/rejected": 1.0294774770736694, |
|
"logps/chosen": -261.4816589355469, |
|
"logps/rejected": -244.85580444335938, |
|
"loss": 0.6821, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": 0.01882908120751381, |
|
"rewards/margins": 0.023043904453516006, |
|
"rewards/rejected": -0.004214824177324772, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.7859007832898174e-06, |
|
"logits/chosen": 0.9971128702163696, |
|
"logits/rejected": 1.0910747051239014, |
|
"logps/chosen": -257.55096435546875, |
|
"logps/rejected": -229.0856170654297, |
|
"loss": 0.6801, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.02020946703851223, |
|
"rewards/margins": 0.027215411886572838, |
|
"rewards/rejected": -0.007005943916738033, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.9164490861618806e-06, |
|
"logits/chosen": 0.972184956073761, |
|
"logits/rejected": 1.0939315557479858, |
|
"logps/chosen": -256.2604064941406, |
|
"logps/rejected": -227.18539428710938, |
|
"loss": 0.682, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.01802998222410679, |
|
"rewards/margins": 0.023866988718509674, |
|
"rewards/rejected": -0.0058370064944028854, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_logits/chosen": 1.0022695064544678, |
|
"eval_logits/rejected": 1.097358226776123, |
|
"eval_logps/chosen": -275.3941345214844, |
|
"eval_logps/rejected": -244.4587860107422, |
|
"eval_loss": 0.6800673604011536, |
|
"eval_rewards/accuracies": 0.6054999828338623, |
|
"eval_rewards/chosen": 0.021527061238884926, |
|
"eval_rewards/margins": 0.027955830097198486, |
|
"eval_rewards/rejected": -0.006428766064345837, |
|
"eval_runtime": 545.3023, |
|
"eval_samples_per_second": 3.668, |
|
"eval_steps_per_second": 0.917, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.046997389033943e-06, |
|
"logits/chosen": 0.9668880701065063, |
|
"logits/rejected": 1.1288020610809326, |
|
"logps/chosen": -255.2298583984375, |
|
"logps/rejected": -227.0789337158203, |
|
"loss": 0.6769, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.026618385687470436, |
|
"rewards/margins": 0.03431355208158493, |
|
"rewards/rejected": -0.0076951696537435055, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.177545691906005e-06, |
|
"logits/chosen": 0.9519135355949402, |
|
"logits/rejected": 1.0828487873077393, |
|
"logps/chosen": -281.8594970703125, |
|
"logps/rejected": -257.09490966796875, |
|
"loss": 0.6821, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.01871672458946705, |
|
"rewards/margins": 0.024688560515642166, |
|
"rewards/rejected": -0.00597183546051383, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.308093994778068e-06, |
|
"logits/chosen": 1.033220648765564, |
|
"logits/rejected": 1.1434317827224731, |
|
"logps/chosen": -278.22552490234375, |
|
"logps/rejected": -244.3297119140625, |
|
"loss": 0.6805, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.010118372738361359, |
|
"rewards/margins": 0.028245192021131516, |
|
"rewards/rejected": -0.018126821145415306, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.4386422976501306e-06, |
|
"logits/chosen": 1.0310008525848389, |
|
"logits/rejected": 1.0641546249389648, |
|
"logps/chosen": -273.8054504394531, |
|
"logps/rejected": -269.79022216796875, |
|
"loss": 0.6784, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 0.008210320957005024, |
|
"rewards/margins": 0.03238377720117569, |
|
"rewards/rejected": -0.02417345717549324, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.569190600522193e-06, |
|
"logits/chosen": 1.0388667583465576, |
|
"logits/rejected": 1.034010887145996, |
|
"logps/chosen": -284.13043212890625, |
|
"logps/rejected": -252.32052612304688, |
|
"loss": 0.6737, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.012403028085827827, |
|
"rewards/margins": 0.042349107563495636, |
|
"rewards/rejected": -0.02994607947766781, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.699738903394257e-06, |
|
"logits/chosen": 1.0033949613571167, |
|
"logits/rejected": 1.097804307937622, |
|
"logps/chosen": -302.3703918457031, |
|
"logps/rejected": -261.0248718261719, |
|
"loss": 0.6727, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.0027267406694591045, |
|
"rewards/margins": 0.04591723158955574, |
|
"rewards/rejected": -0.04319049045443535, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.8302872062663196e-06, |
|
"logits/chosen": 1.0511659383773804, |
|
"logits/rejected": 1.042121171951294, |
|
"logps/chosen": -305.19305419921875, |
|
"logps/rejected": -264.2236328125, |
|
"loss": 0.6659, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.007779981009662151, |
|
"rewards/margins": 0.06012594699859619, |
|
"rewards/rejected": -0.052345968782901764, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.9608355091383814e-06, |
|
"logits/chosen": 1.019500970840454, |
|
"logits/rejected": 1.1177808046340942, |
|
"logps/chosen": -256.4404296875, |
|
"logps/rejected": -228.1063232421875, |
|
"loss": 0.6635, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.001956403721123934, |
|
"rewards/margins": 0.06523159891366959, |
|
"rewards/rejected": -0.06718799471855164, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.9999488562447675e-06, |
|
"logits/chosen": 1.0209654569625854, |
|
"logits/rejected": 1.0544273853302002, |
|
"logps/chosen": -299.9403991699219, |
|
"logps/rejected": -262.7241516113281, |
|
"loss": 0.6558, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.0010883864015340805, |
|
"rewards/margins": 0.08589887619018555, |
|
"rewards/rejected": -0.08698725700378418, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.999698361256577e-06, |
|
"logits/chosen": 1.0261785984039307, |
|
"logits/rejected": 1.0322299003601074, |
|
"logps/chosen": -282.60333251953125, |
|
"logps/rejected": -271.5989685058594, |
|
"loss": 0.6574, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.03285291790962219, |
|
"rewards/margins": 0.0808914452791214, |
|
"rewards/rejected": -0.11374436318874359, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_logits/chosen": 0.9584712982177734, |
|
"eval_logits/rejected": 1.0540906190872192, |
|
"eval_logps/chosen": -282.07501220703125, |
|
"eval_logps/rejected": -255.61891174316406, |
|
"eval_loss": 0.6623440384864807, |
|
"eval_rewards/accuracies": 0.6054999828338623, |
|
"eval_rewards/chosen": -0.045281875878572464, |
|
"eval_rewards/margins": 0.07274789363145828, |
|
"eval_rewards/rejected": -0.11802978068590164, |
|
"eval_runtime": 544.6934, |
|
"eval_samples_per_second": 3.672, |
|
"eval_steps_per_second": 0.918, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.999239142174581e-06, |
|
"logits/chosen": 0.9255741834640503, |
|
"logits/rejected": 0.9569520950317383, |
|
"logps/chosen": -298.91546630859375, |
|
"logps/rejected": -255.8893280029297, |
|
"loss": 0.6584, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.053911127150058746, |
|
"rewards/margins": 0.07991180568933487, |
|
"rewards/rejected": -0.13382293283939362, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.99857123734344e-06, |
|
"logits/chosen": 0.9512017965316772, |
|
"logits/rejected": 1.0617644786834717, |
|
"logps/chosen": -285.84527587890625, |
|
"logps/rejected": -258.625732421875, |
|
"loss": 0.6592, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.07313002645969391, |
|
"rewards/margins": 0.08329417556524277, |
|
"rewards/rejected": -0.15642420947551727, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.997694702533016e-06, |
|
"logits/chosen": 0.8519557118415833, |
|
"logits/rejected": 0.9811371564865112, |
|
"logps/chosen": -277.22845458984375, |
|
"logps/rejected": -276.25067138671875, |
|
"loss": 0.6667, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.13439401984214783, |
|
"rewards/margins": 0.07216326892375946, |
|
"rewards/rejected": -0.20655731856822968, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.996609610933713e-06, |
|
"logits/chosen": 0.9116706848144531, |
|
"logits/rejected": 0.8976380228996277, |
|
"logps/chosen": -296.78948974609375, |
|
"logps/rejected": -279.92169189453125, |
|
"loss": 0.6649, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.14868149161338806, |
|
"rewards/margins": 0.07609296590089798, |
|
"rewards/rejected": -0.22477443516254425, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.995316053150366e-06, |
|
"logits/chosen": 0.8733074069023132, |
|
"logits/rejected": 0.9674992561340332, |
|
"logps/chosen": -276.9858093261719, |
|
"logps/rejected": -256.2315673828125, |
|
"loss": 0.6635, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.15514853596687317, |
|
"rewards/margins": 0.07954682409763336, |
|
"rewards/rejected": -0.23469536006450653, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.9938141371946815e-06, |
|
"logits/chosen": 0.8927382230758667, |
|
"logits/rejected": 1.0055015087127686, |
|
"logps/chosen": -265.1849670410156, |
|
"logps/rejected": -274.2850036621094, |
|
"loss": 0.643, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.1328841745853424, |
|
"rewards/margins": 0.12832005321979523, |
|
"rewards/rejected": -0.26120424270629883, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.992103988476206e-06, |
|
"logits/chosen": 0.8474317789077759, |
|
"logits/rejected": 0.9396875500679016, |
|
"logps/chosen": -310.89520263671875, |
|
"logps/rejected": -272.8279113769531, |
|
"loss": 0.6472, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.15546834468841553, |
|
"rewards/margins": 0.11896806955337524, |
|
"rewards/rejected": -0.27443641424179077, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.990185749791866e-06, |
|
"logits/chosen": 0.7985225915908813, |
|
"logits/rejected": 0.8882298469543457, |
|
"logps/chosen": -259.30877685546875, |
|
"logps/rejected": -238.6549530029297, |
|
"loss": 0.6418, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.1691729873418808, |
|
"rewards/margins": 0.13572049140930176, |
|
"rewards/rejected": -0.30489346385002136, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.9880595813140395e-06, |
|
"logits/chosen": 0.7800701856613159, |
|
"logits/rejected": 0.8966795206069946, |
|
"logps/chosen": -295.586181640625, |
|
"logps/rejected": -269.9721374511719, |
|
"loss": 0.6344, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.20812523365020752, |
|
"rewards/margins": 0.15324506163597107, |
|
"rewards/rejected": -0.361370325088501, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.985725660577184e-06, |
|
"logits/chosen": 0.8370100259780884, |
|
"logits/rejected": 0.8300473093986511, |
|
"logps/chosen": -268.3841247558594, |
|
"logps/rejected": -272.91815185546875, |
|
"loss": 0.6262, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.2489767074584961, |
|
"rewards/margins": 0.1822068989276886, |
|
"rewards/rejected": -0.4311836361885071, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_logits/chosen": 0.7187003493309021, |
|
"eval_logits/rejected": 0.7971898317337036, |
|
"eval_logps/chosen": -310.10272216796875, |
|
"eval_logps/rejected": -292.38580322265625, |
|
"eval_loss": 0.6406594514846802, |
|
"eval_rewards/accuracies": 0.6044999957084656, |
|
"eval_rewards/chosen": -0.32555925846099854, |
|
"eval_rewards/margins": 0.16013950109481812, |
|
"eval_rewards/rejected": -0.48569872975349426, |
|
"eval_runtime": 545.4957, |
|
"eval_samples_per_second": 3.666, |
|
"eval_steps_per_second": 0.917, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.983184182463009e-06, |
|
"logits/chosen": 0.6981757879257202, |
|
"logits/rejected": 0.8250112533569336, |
|
"logps/chosen": -314.55950927734375, |
|
"logps/rejected": -264.6604919433594, |
|
"loss": 0.6356, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.31188660860061646, |
|
"rewards/margins": 0.15816518664360046, |
|
"rewards/rejected": -0.4700518548488617, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.980435359184203e-06, |
|
"logits/chosen": 0.7419403791427612, |
|
"logits/rejected": 0.8123974800109863, |
|
"logps/chosen": -326.6161193847656, |
|
"logps/rejected": -308.69049072265625, |
|
"loss": 0.6274, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.3839537501335144, |
|
"rewards/margins": 0.20789018273353577, |
|
"rewards/rejected": -0.5918439626693726, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.9774794202667236e-06, |
|
"logits/chosen": 0.6873521208763123, |
|
"logits/rejected": 0.7603924870491028, |
|
"logps/chosen": -304.0896301269531, |
|
"logps/rejected": -290.05841064453125, |
|
"loss": 0.6251, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.3807668089866638, |
|
"rewards/margins": 0.1975090503692627, |
|
"rewards/rejected": -0.5782758593559265, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.974316612530615e-06, |
|
"logits/chosen": 0.6311649084091187, |
|
"logits/rejected": 0.6694685816764832, |
|
"logps/chosen": -272.9194641113281, |
|
"logps/rejected": -275.1445007324219, |
|
"loss": 0.6065, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.3134765625, |
|
"rewards/margins": 0.24742670357227325, |
|
"rewards/rejected": -0.5609032511711121, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.970947200069416e-06, |
|
"logits/chosen": 0.6108866333961487, |
|
"logits/rejected": 0.6520397663116455, |
|
"logps/chosen": -301.78326416015625, |
|
"logps/rejected": -287.75848388671875, |
|
"loss": 0.6147, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.3902989625930786, |
|
"rewards/margins": 0.26976609230041504, |
|
"rewards/rejected": -0.6600649952888489, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.967371464228096e-06, |
|
"logits/chosen": 0.62494957447052, |
|
"logits/rejected": 0.6929062604904175, |
|
"logps/chosen": -332.30120849609375, |
|
"logps/rejected": -324.767333984375, |
|
"loss": 0.6461, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.4859549105167389, |
|
"rewards/margins": 0.181377574801445, |
|
"rewards/rejected": -0.6673325300216675, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.963589703579569e-06, |
|
"logits/chosen": 0.568712592124939, |
|
"logits/rejected": 0.7027269601821899, |
|
"logps/chosen": -308.6661376953125, |
|
"logps/rejected": -291.0648498535156, |
|
"loss": 0.5775, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.407268762588501, |
|
"rewards/margins": 0.34008973836898804, |
|
"rewards/rejected": -0.747358500957489, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.9596022338997615e-06, |
|
"logits/chosen": 0.5451105833053589, |
|
"logits/rejected": 0.532687246799469, |
|
"logps/chosen": -301.73736572265625, |
|
"logps/rejected": -290.80987548828125, |
|
"loss": 0.6317, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.4523653984069824, |
|
"rewards/margins": 0.20574729144573212, |
|
"rewards/rejected": -0.6581127047538757, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.955409388141243e-06, |
|
"logits/chosen": 0.5721498131752014, |
|
"logits/rejected": 0.5243967771530151, |
|
"logps/chosen": -310.00701904296875, |
|
"logps/rejected": -296.6248779296875, |
|
"loss": 0.6067, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.42185163497924805, |
|
"rewards/margins": 0.2717475891113281, |
|
"rewards/rejected": -0.6935992240905762, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.951011516405429e-06, |
|
"logits/chosen": 0.5027920007705688, |
|
"logits/rejected": 0.5034610033035278, |
|
"logps/chosen": -286.9452209472656, |
|
"logps/rejected": -324.5048828125, |
|
"loss": 0.6441, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.5031018257141113, |
|
"rewards/margins": 0.1803509145975113, |
|
"rewards/rejected": -0.6834527254104614, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_logits/chosen": 0.44342371821403503, |
|
"eval_logits/rejected": 0.5041319727897644, |
|
"eval_logps/chosen": -327.3852233886719, |
|
"eval_logps/rejected": -317.38275146484375, |
|
"eval_loss": 0.6309801340103149, |
|
"eval_rewards/accuracies": 0.6039999723434448, |
|
"eval_rewards/chosen": -0.4983837902545929, |
|
"eval_rewards/margins": 0.23728469014167786, |
|
"eval_rewards/rejected": -0.7356684803962708, |
|
"eval_runtime": 544.5805, |
|
"eval_samples_per_second": 3.673, |
|
"eval_steps_per_second": 0.918, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.946408985913344e-06, |
|
"logits/chosen": 0.4787842333316803, |
|
"logits/rejected": 0.49609264731407166, |
|
"logps/chosen": -300.0178527832031, |
|
"logps/rejected": -288.23297119140625, |
|
"loss": 0.6733, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.5682172775268555, |
|
"rewards/margins": 0.13463091850280762, |
|
"rewards/rejected": -0.7028483152389526, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.941602180974958e-06, |
|
"logits/chosen": 0.48600974678993225, |
|
"logits/rejected": 0.5271373987197876, |
|
"logps/chosen": -327.2370300292969, |
|
"logps/rejected": -296.0666198730469, |
|
"loss": 0.6103, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.4133662283420563, |
|
"rewards/margins": 0.28371673822402954, |
|
"rewards/rejected": -0.6970829963684082, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.936591502957101e-06, |
|
"logits/chosen": 0.43321290612220764, |
|
"logits/rejected": 0.5329369306564331, |
|
"logps/chosen": -335.93365478515625, |
|
"logps/rejected": -309.68084716796875, |
|
"loss": 0.6186, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.3893541097640991, |
|
"rewards/margins": 0.21971730887889862, |
|
"rewards/rejected": -0.6090713739395142, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.931377370249946e-06, |
|
"logits/chosen": 0.5257896184921265, |
|
"logits/rejected": 0.5845480561256409, |
|
"logps/chosen": -317.484619140625, |
|
"logps/rejected": -291.53948974609375, |
|
"loss": 0.618, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.4368332028388977, |
|
"rewards/margins": 0.21368706226348877, |
|
"rewards/rejected": -0.6505202054977417, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.925960218232073e-06, |
|
"logits/chosen": 0.5211079716682434, |
|
"logits/rejected": 0.5993599891662598, |
|
"logps/chosen": -356.9900207519531, |
|
"logps/rejected": -359.18939208984375, |
|
"loss": 0.5982, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.540452778339386, |
|
"rewards/margins": 0.29875844717025757, |
|
"rewards/rejected": -0.8392112851142883, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.920340499234116e-06, |
|
"logits/chosen": 0.4190196394920349, |
|
"logits/rejected": 0.5146626234054565, |
|
"logps/chosen": -337.7661437988281, |
|
"logps/rejected": -283.05902099609375, |
|
"loss": 0.6362, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.5408000349998474, |
|
"rewards/margins": 0.21028542518615723, |
|
"rewards/rejected": -0.7510854005813599, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.914518682500995e-06, |
|
"logits/chosen": 0.5096255540847778, |
|
"logits/rejected": 0.5135337114334106, |
|
"logps/chosen": -328.9342346191406, |
|
"logps/rejected": -347.03399658203125, |
|
"loss": 0.6074, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.51976478099823, |
|
"rewards/margins": 0.2629929780960083, |
|
"rewards/rejected": -0.7827577590942383, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.9084952541527315e-06, |
|
"logits/chosen": 0.4741145968437195, |
|
"logits/rejected": 0.4941348135471344, |
|
"logps/chosen": -293.9762268066406, |
|
"logps/rejected": -308.02655029296875, |
|
"loss": 0.5911, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.39032864570617676, |
|
"rewards/margins": 0.3133639395236969, |
|
"rewards/rejected": -0.7036925554275513, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.902270717143858e-06, |
|
"logits/chosen": 0.44331198930740356, |
|
"logits/rejected": 0.4858936667442322, |
|
"logps/chosen": -326.63909912109375, |
|
"logps/rejected": -310.3343505859375, |
|
"loss": 0.6161, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.41057485342025757, |
|
"rewards/margins": 0.2567914128303528, |
|
"rewards/rejected": -0.6673663258552551, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.895845591221427e-06, |
|
"logits/chosen": 0.3671930730342865, |
|
"logits/rejected": 0.47033652663230896, |
|
"logps/chosen": -300.76873779296875, |
|
"logps/rejected": -305.1224060058594, |
|
"loss": 0.6238, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.537026047706604, |
|
"rewards/margins": 0.2782416045665741, |
|
"rewards/rejected": -0.8152676820755005, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_logits/chosen": 0.414013147354126, |
|
"eval_logits/rejected": 0.4768453538417816, |
|
"eval_logps/chosen": -328.9062805175781, |
|
"eval_logps/rejected": -321.1136779785156, |
|
"eval_loss": 0.6180335879325867, |
|
"eval_rewards/accuracies": 0.6175000071525574, |
|
"eval_rewards/chosen": -0.5135945081710815, |
|
"eval_rewards/margins": 0.25938284397125244, |
|
"eval_rewards/rejected": -0.7729774117469788, |
|
"eval_runtime": 544.9185, |
|
"eval_samples_per_second": 3.67, |
|
"eval_steps_per_second": 0.918, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.8892204128816e-06, |
|
"logits/chosen": 0.4711977541446686, |
|
"logits/rejected": 0.5027458071708679, |
|
"logps/chosen": -335.2975769042969, |
|
"logps/rejected": -362.6507873535156, |
|
"loss": 0.6061, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.5243827700614929, |
|
"rewards/margins": 0.3451407551765442, |
|
"rewards/rejected": -0.8695236444473267, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.882395735324864e-06, |
|
"logits/chosen": 0.37723851203918457, |
|
"logits/rejected": 0.37279143929481506, |
|
"logps/chosen": -324.41204833984375, |
|
"logps/rejected": -320.57904052734375, |
|
"loss": 0.6097, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.4609324038028717, |
|
"rewards/margins": 0.25949710607528687, |
|
"rewards/rejected": -0.720429539680481, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.87537212840983e-06, |
|
"logits/chosen": 0.41040197014808655, |
|
"logits/rejected": 0.4262399673461914, |
|
"logps/chosen": -353.74359130859375, |
|
"logps/rejected": -339.08740234375, |
|
"loss": 0.5831, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.5424319505691528, |
|
"rewards/margins": 0.3541128635406494, |
|
"rewards/rejected": -0.8965448141098022, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.8681501786056545e-06, |
|
"logits/chosen": 0.37297043204307556, |
|
"logits/rejected": 0.42292696237564087, |
|
"logps/chosen": -331.2789306640625, |
|
"logps/rejected": -298.3685302734375, |
|
"loss": 0.6372, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.613556981086731, |
|
"rewards/margins": 0.22487401962280273, |
|
"rewards/rejected": -0.8384310007095337, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.860730488943068e-06, |
|
"logits/chosen": 0.4059647023677826, |
|
"logits/rejected": 0.41552990674972534, |
|
"logps/chosen": -330.83514404296875, |
|
"logps/rejected": -325.4556884765625, |
|
"loss": 0.5811, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.5344411134719849, |
|
"rewards/margins": 0.34030893445014954, |
|
"rewards/rejected": -0.8747501373291016, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.853113678964022e-06, |
|
"logits/chosen": 0.4156310558319092, |
|
"logits/rejected": 0.4202690124511719, |
|
"logps/chosen": -309.32623291015625, |
|
"logps/rejected": -325.4747619628906, |
|
"loss": 0.5772, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.6245480179786682, |
|
"rewards/margins": 0.3636637330055237, |
|
"rewards/rejected": -0.9882117509841919, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.845300384669958e-06, |
|
"logits/chosen": 0.36242616176605225, |
|
"logits/rejected": 0.37326696515083313, |
|
"logps/chosen": -326.7525939941406, |
|
"logps/rejected": -303.91986083984375, |
|
"loss": 0.6412, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.6412128210067749, |
|
"rewards/margins": 0.229770228266716, |
|
"rewards/rejected": -0.8709830045700073, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.837291258468701e-06, |
|
"logits/chosen": 0.3382135033607483, |
|
"logits/rejected": 0.34883031249046326, |
|
"logps/chosen": -334.1026306152344, |
|
"logps/rejected": -330.28582763671875, |
|
"loss": 0.5985, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.5876267552375793, |
|
"rewards/margins": 0.3265851140022278, |
|
"rewards/rejected": -0.9142118692398071, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.829086969119984e-06, |
|
"logits/chosen": 0.305641233921051, |
|
"logits/rejected": 0.3537415862083435, |
|
"logps/chosen": -300.503662109375, |
|
"logps/rejected": -319.2359313964844, |
|
"loss": 0.6035, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.5569041967391968, |
|
"rewards/margins": 0.3058740198612213, |
|
"rewards/rejected": -0.8627783060073853, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.820688201679605e-06, |
|
"logits/chosen": 0.3215860426425934, |
|
"logits/rejected": 0.36162325739860535, |
|
"logps/chosen": -309.1678161621094, |
|
"logps/rejected": -305.80645751953125, |
|
"loss": 0.6022, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.5132586359977722, |
|
"rewards/margins": 0.2897386848926544, |
|
"rewards/rejected": -0.8029972910881042, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_logits/chosen": 0.2919526994228363, |
|
"eval_logits/rejected": 0.3468739986419678, |
|
"eval_logps/chosen": -333.6270751953125, |
|
"eval_logps/rejected": -329.4936828613281, |
|
"eval_loss": 0.6146319508552551, |
|
"eval_rewards/accuracies": 0.609499990940094, |
|
"eval_rewards/chosen": -0.5608024001121521, |
|
"eval_rewards/margins": 0.29597532749176025, |
|
"eval_rewards/rejected": -0.8567777276039124, |
|
"eval_runtime": 544.2266, |
|
"eval_samples_per_second": 3.675, |
|
"eval_steps_per_second": 0.919, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.8120956574422315e-06, |
|
"logits/chosen": 0.3398824632167816, |
|
"logits/rejected": 0.329068660736084, |
|
"logps/chosen": -338.9511413574219, |
|
"logps/rejected": -359.6614074707031, |
|
"loss": 0.6156, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.5863468050956726, |
|
"rewards/margins": 0.282352089881897, |
|
"rewards/rejected": -0.8686988949775696, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.803310053882831e-06, |
|
"logits/chosen": 0.3125428557395935, |
|
"logits/rejected": 0.33215147256851196, |
|
"logps/chosen": -261.2579345703125, |
|
"logps/rejected": -275.7474365234375, |
|
"loss": 0.6258, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.5128569602966309, |
|
"rewards/margins": 0.28050488233566284, |
|
"rewards/rejected": -0.7933619022369385, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.794332124596775e-06, |
|
"logits/chosen": 0.31949958205223083, |
|
"logits/rejected": 0.37735581398010254, |
|
"logps/chosen": -333.3515625, |
|
"logps/rejected": -333.65374755859375, |
|
"loss": 0.6235, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.5982740521430969, |
|
"rewards/margins": 0.2791600823402405, |
|
"rewards/rejected": -0.8774340748786926, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.785162619238575e-06, |
|
"logits/chosen": 0.36345189809799194, |
|
"logits/rejected": 0.39270249009132385, |
|
"logps/chosen": -318.712890625, |
|
"logps/rejected": -333.59381103515625, |
|
"loss": 0.588, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.5787845849990845, |
|
"rewards/margins": 0.36206668615341187, |
|
"rewards/rejected": -0.9408512115478516, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.775802303459288e-06, |
|
"logits/chosen": 0.3391379714012146, |
|
"logits/rejected": 0.42467838525772095, |
|
"logps/chosen": -346.51043701171875, |
|
"logps/rejected": -349.189453125, |
|
"loss": 0.5874, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.595561683177948, |
|
"rewards/margins": 0.36882907152175903, |
|
"rewards/rejected": -0.9643908739089966, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.766251958842589e-06, |
|
"logits/chosen": 0.3606031835079193, |
|
"logits/rejected": 0.39103537797927856, |
|
"logps/chosen": -363.67010498046875, |
|
"logps/rejected": -356.0177001953125, |
|
"loss": 0.5942, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.6311839818954468, |
|
"rewards/margins": 0.330700546503067, |
|
"rewards/rejected": -0.9618844985961914, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.7565123828395066e-06, |
|
"logits/chosen": 0.3178097605705261, |
|
"logits/rejected": 0.42083635926246643, |
|
"logps/chosen": -368.1545715332031, |
|
"logps/rejected": -372.1910095214844, |
|
"loss": 0.591, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.5468029975891113, |
|
"rewards/margins": 0.32849472761154175, |
|
"rewards/rejected": -0.8752977252006531, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.746584388701831e-06, |
|
"logits/chosen": 0.33455774188041687, |
|
"logits/rejected": 0.43152570724487305, |
|
"logps/chosen": -313.480712890625, |
|
"logps/rejected": -316.8352355957031, |
|
"loss": 0.5702, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.5729404091835022, |
|
"rewards/margins": 0.43620842695236206, |
|
"rewards/rejected": -1.0091488361358643, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.736468805414218e-06, |
|
"logits/chosen": 0.3555249571800232, |
|
"logits/rejected": 0.4613436758518219, |
|
"logps/chosen": -347.31915283203125, |
|
"logps/rejected": -359.3041076660156, |
|
"loss": 0.5614, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.5481359362602234, |
|
"rewards/margins": 0.4673057496547699, |
|
"rewards/rejected": -1.015441656112671, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.7261664776249595e-06, |
|
"logits/chosen": 0.31781864166259766, |
|
"logits/rejected": 0.32524436712265015, |
|
"logps/chosen": -334.6598205566406, |
|
"logps/rejected": -352.02569580078125, |
|
"loss": 0.5893, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.6611984372138977, |
|
"rewards/margins": 0.40746697783470154, |
|
"rewards/rejected": -1.0686652660369873, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_logits/chosen": 0.25759702920913696, |
|
"eval_logits/rejected": 0.31359511613845825, |
|
"eval_logps/chosen": -344.19696044921875, |
|
"eval_logps/rejected": -343.95404052734375, |
|
"eval_loss": 0.6058722138404846, |
|
"eval_rewards/accuracies": 0.6169999837875366, |
|
"eval_rewards/chosen": -0.6665017008781433, |
|
"eval_rewards/margins": 0.3348793387413025, |
|
"eval_rewards/rejected": -1.0013810396194458, |
|
"eval_runtime": 545.3642, |
|
"eval_samples_per_second": 3.667, |
|
"eval_steps_per_second": 0.917, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.715678265575463e-06, |
|
"logits/chosen": 0.29794925451278687, |
|
"logits/rejected": 0.34237638115882874, |
|
"logps/chosen": -375.47698974609375, |
|
"logps/rejected": -381.877685546875, |
|
"loss": 0.6336, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.6799601912498474, |
|
"rewards/margins": 0.27002081274986267, |
|
"rewards/rejected": -0.9499810338020325, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.705005045028415e-06, |
|
"logits/chosen": 0.32408756017684937, |
|
"logits/rejected": 0.34640252590179443, |
|
"logps/chosen": -400.0458068847656, |
|
"logps/rejected": -393.19244384765625, |
|
"loss": 0.5575, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.6836780905723572, |
|
"rewards/margins": 0.46534833312034607, |
|
"rewards/rejected": -1.1490265130996704, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.694147707194659e-06, |
|
"logits/chosen": 0.24671033024787903, |
|
"logits/rejected": 0.2881634533405304, |
|
"logps/chosen": -361.03912353515625, |
|
"logps/rejected": -349.8355712890625, |
|
"loss": 0.5887, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.7573670744895935, |
|
"rewards/margins": 0.39362633228302, |
|
"rewards/rejected": -1.1509934663772583, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.683107158658782e-06, |
|
"logits/chosen": 0.19585518538951874, |
|
"logits/rejected": 0.19908159971237183, |
|
"logps/chosen": -330.7119445800781, |
|
"logps/rejected": -334.669921875, |
|
"loss": 0.5961, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.6968773603439331, |
|
"rewards/margins": 0.3805212378501892, |
|
"rewards/rejected": -1.077398657798767, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.671884321303407e-06, |
|
"logits/chosen": 0.23753933608531952, |
|
"logits/rejected": 0.23777754604816437, |
|
"logps/chosen": -348.0587158203125, |
|
"logps/rejected": -371.2275695800781, |
|
"loss": 0.5923, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.6319106817245483, |
|
"rewards/margins": 0.4165875315666199, |
|
"rewards/rejected": -1.048498272895813, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.660480132232224e-06, |
|
"logits/chosen": 0.2047911435365677, |
|
"logits/rejected": 0.23187002539634705, |
|
"logps/chosen": -346.5743713378906, |
|
"logps/rejected": -345.38970947265625, |
|
"loss": 0.6187, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.7073361873626709, |
|
"rewards/margins": 0.31164541840553284, |
|
"rewards/rejected": -1.018981695175171, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.6488955436917414e-06, |
|
"logits/chosen": 0.2647855281829834, |
|
"logits/rejected": 0.3133198320865631, |
|
"logps/chosen": -343.3387756347656, |
|
"logps/rejected": -299.07171630859375, |
|
"loss": 0.6214, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.6582953333854675, |
|
"rewards/margins": 0.22357884049415588, |
|
"rewards/rejected": -0.8818742036819458, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.6371315229917644e-06, |
|
"logits/chosen": 0.2728188931941986, |
|
"logits/rejected": 0.3087821900844574, |
|
"logps/chosen": -320.83038330078125, |
|
"logps/rejected": -330.16864013671875, |
|
"loss": 0.5954, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.5822576284408569, |
|
"rewards/margins": 0.3170580267906189, |
|
"rewards/rejected": -0.8993157148361206, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.625189052424638e-06, |
|
"logits/chosen": 0.3076649308204651, |
|
"logits/rejected": 0.34865492582321167, |
|
"logps/chosen": -290.0766296386719, |
|
"logps/rejected": -279.2445068359375, |
|
"loss": 0.6127, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.4531574249267578, |
|
"rewards/margins": 0.25979921221733093, |
|
"rewards/rejected": -0.7129566669464111, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.613069129183218e-06, |
|
"logits/chosen": 0.2790106236934662, |
|
"logits/rejected": 0.2944566607475281, |
|
"logps/chosen": -318.7317810058594, |
|
"logps/rejected": -332.01373291015625, |
|
"loss": 0.6435, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.5851359367370605, |
|
"rewards/margins": 0.2526613771915436, |
|
"rewards/rejected": -0.8377972841262817, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_logits/chosen": 0.2765878736972809, |
|
"eval_logits/rejected": 0.3378385305404663, |
|
"eval_logps/chosen": -331.1562194824219, |
|
"eval_logps/rejected": -330.9462585449219, |
|
"eval_loss": 0.6007062792778015, |
|
"eval_rewards/accuracies": 0.6294999718666077, |
|
"eval_rewards/chosen": -0.5360942482948303, |
|
"eval_rewards/margins": 0.3352090120315552, |
|
"eval_rewards/rejected": -0.8713032007217407, |
|
"eval_runtime": 545.4218, |
|
"eval_samples_per_second": 3.667, |
|
"eval_steps_per_second": 0.917, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.600772765277607e-06, |
|
"logits/chosen": 0.27743756771087646, |
|
"logits/rejected": 0.34427809715270996, |
|
"logps/chosen": -349.8276672363281, |
|
"logps/rejected": -335.66961669921875, |
|
"loss": 0.5897, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.5436006784439087, |
|
"rewards/margins": 0.3772372603416443, |
|
"rewards/rejected": -0.9208378791809082, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.588300987450652e-06, |
|
"logits/chosen": 0.3442625403404236, |
|
"logits/rejected": 0.4009414613246918, |
|
"logps/chosen": -328.8561096191406, |
|
"logps/rejected": -346.3475036621094, |
|
"loss": 0.5841, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.514030396938324, |
|
"rewards/margins": 0.38697338104248047, |
|
"rewards/rejected": -0.9010037183761597, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.5756548370922136e-06, |
|
"logits/chosen": 0.34263429045677185, |
|
"logits/rejected": 0.3785105049610138, |
|
"logps/chosen": -328.25164794921875, |
|
"logps/rejected": -325.9219665527344, |
|
"loss": 0.6203, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.5046300292015076, |
|
"rewards/margins": 0.2802472710609436, |
|
"rewards/rejected": -0.7848772406578064, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.562835370152206e-06, |
|
"logits/chosen": 0.299723744392395, |
|
"logits/rejected": 0.3728645443916321, |
|
"logps/chosen": -298.93182373046875, |
|
"logps/rejected": -308.7339172363281, |
|
"loss": 0.6124, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.49450236558914185, |
|
"rewards/margins": 0.2736920416355133, |
|
"rewards/rejected": -0.7681943774223328, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.54984365705243e-06, |
|
"logits/chosen": 0.33221280574798584, |
|
"logits/rejected": 0.3348609209060669, |
|
"logps/chosen": -304.962646484375, |
|
"logps/rejected": -305.4454650878906, |
|
"loss": 0.6168, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.5311123132705688, |
|
"rewards/margins": 0.2822791039943695, |
|
"rewards/rejected": -0.8133915066719055, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.536680782597191e-06, |
|
"logits/chosen": 0.29144400358200073, |
|
"logits/rejected": 0.3640415370464325, |
|
"logps/chosen": -345.25677490234375, |
|
"logps/rejected": -343.65972900390625, |
|
"loss": 0.6002, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.48992738127708435, |
|
"rewards/margins": 0.32441776990890503, |
|
"rewards/rejected": -0.8143451809883118, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.523347845882718e-06, |
|
"logits/chosen": 0.3392336964607239, |
|
"logits/rejected": 0.3736265301704407, |
|
"logps/chosen": -311.3797302246094, |
|
"logps/rejected": -290.25030517578125, |
|
"loss": 0.5924, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.560332179069519, |
|
"rewards/margins": 0.34816139936447144, |
|
"rewards/rejected": -0.9084935188293457, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.50984596020539e-06, |
|
"logits/chosen": 0.3102160096168518, |
|
"logits/rejected": 0.34888216853141785, |
|
"logps/chosen": -352.9917297363281, |
|
"logps/rejected": -350.4587707519531, |
|
"loss": 0.589, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.5566148161888123, |
|
"rewards/margins": 0.3757871091365814, |
|
"rewards/rejected": -0.9324019551277161, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.4961762529687745e-06, |
|
"logits/chosen": 0.27754610776901245, |
|
"logits/rejected": 0.3419993817806244, |
|
"logps/chosen": -365.9258117675781, |
|
"logps/rejected": -324.9228210449219, |
|
"loss": 0.5993, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.6483023762702942, |
|
"rewards/margins": 0.34900346398353577, |
|
"rewards/rejected": -0.9973058700561523, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.482339865589492e-06, |
|
"logits/chosen": 0.23795409500598907, |
|
"logits/rejected": 0.339063435792923, |
|
"logps/chosen": -321.9118957519531, |
|
"logps/rejected": -328.0565185546875, |
|
"loss": 0.5626, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.6000961661338806, |
|
"rewards/margins": 0.43007296323776245, |
|
"rewards/rejected": -1.030169129371643, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_logits/chosen": 0.28789982199668884, |
|
"eval_logits/rejected": 0.34160664677619934, |
|
"eval_logps/chosen": -345.95831298828125, |
|
"eval_logps/rejected": -346.8067932128906, |
|
"eval_loss": 0.5970672369003296, |
|
"eval_rewards/accuracies": 0.6194999814033508, |
|
"eval_rewards/chosen": -0.6841151714324951, |
|
"eval_rewards/margins": 0.3457929790019989, |
|
"eval_rewards/rejected": -1.0299081802368164, |
|
"eval_runtime": 545.4538, |
|
"eval_samples_per_second": 3.667, |
|
"eval_steps_per_second": 0.917, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.468337953401909e-06, |
|
"logits/chosen": 0.3074113726615906, |
|
"logits/rejected": 0.34023183584213257, |
|
"logps/chosen": -362.4942626953125, |
|
"logps/rejected": -334.08331298828125, |
|
"loss": 0.6164, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.6692425608634949, |
|
"rewards/margins": 0.2884816527366638, |
|
"rewards/rejected": -0.9577242136001587, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.45417168556166e-06, |
|
"logits/chosen": 0.25964727997779846, |
|
"logits/rejected": 0.30631059408187866, |
|
"logps/chosen": -312.24346923828125, |
|
"logps/rejected": -344.4534606933594, |
|
"loss": 0.5919, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.5477944612503052, |
|
"rewards/margins": 0.3570755422115326, |
|
"rewards/rejected": -0.9048700332641602, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.439842244948036e-06, |
|
"logits/chosen": 0.23340173065662384, |
|
"logits/rejected": 0.2836596369743347, |
|
"logps/chosen": -316.97265625, |
|
"logps/rejected": -322.099609375, |
|
"loss": 0.6289, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.5921416878700256, |
|
"rewards/margins": 0.2623347342014313, |
|
"rewards/rejected": -0.8544764518737793, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.425350828065204e-06, |
|
"logits/chosen": 0.24592900276184082, |
|
"logits/rejected": 0.24909226596355438, |
|
"logps/chosen": -335.1441345214844, |
|
"logps/rejected": -347.17657470703125, |
|
"loss": 0.6054, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.5570545792579651, |
|
"rewards/margins": 0.3322640657424927, |
|
"rewards/rejected": -0.889318585395813, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.410698644942303e-06, |
|
"logits/chosen": 0.27134156227111816, |
|
"logits/rejected": 0.2923016846179962, |
|
"logps/chosen": -332.7898254394531, |
|
"logps/rejected": -365.0975036621094, |
|
"loss": 0.6223, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.5455362796783447, |
|
"rewards/margins": 0.32090216875076294, |
|
"rewards/rejected": -0.8664385080337524, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.395886919032406e-06, |
|
"logits/chosen": 0.22134526073932648, |
|
"logits/rejected": 0.2600460648536682, |
|
"logps/chosen": -330.41302490234375, |
|
"logps/rejected": -321.27239990234375, |
|
"loss": 0.5828, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.5916182994842529, |
|
"rewards/margins": 0.3787124752998352, |
|
"rewards/rejected": -0.9703305959701538, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.380916887110366e-06, |
|
"logits/chosen": 0.167360320687294, |
|
"logits/rejected": 0.23531296849250793, |
|
"logps/chosen": -317.62847900390625, |
|
"logps/rejected": -324.3927001953125, |
|
"loss": 0.5666, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.6931602358818054, |
|
"rewards/margins": 0.4072093963623047, |
|
"rewards/rejected": -1.1003696918487549, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.365789799169539e-06, |
|
"logits/chosen": 0.21306875348091125, |
|
"logits/rejected": 0.1979639232158661, |
|
"logps/chosen": -391.1867370605469, |
|
"logps/rejected": -379.17315673828125, |
|
"loss": 0.563, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.8303465843200684, |
|
"rewards/margins": 0.44011443853378296, |
|
"rewards/rejected": -1.270461082458496, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.350506918317416e-06, |
|
"logits/chosen": 0.16972529888153076, |
|
"logits/rejected": 0.1481637805700302, |
|
"logps/chosen": -377.83966064453125, |
|
"logps/rejected": -376.23394775390625, |
|
"loss": 0.6126, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.7504847645759583, |
|
"rewards/margins": 0.33371812105178833, |
|
"rewards/rejected": -1.084202766418457, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.335069520670149e-06, |
|
"logits/chosen": 0.17195920646190643, |
|
"logits/rejected": 0.2304229438304901, |
|
"logps/chosen": -354.7908935546875, |
|
"logps/rejected": -338.830810546875, |
|
"loss": 0.5319, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.7843652963638306, |
|
"rewards/margins": 0.5480031967163086, |
|
"rewards/rejected": -1.3323684930801392, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_logits/chosen": 0.1477048397064209, |
|
"eval_logits/rejected": 0.19143778085708618, |
|
"eval_logps/chosen": -366.0687255859375, |
|
"eval_logps/rejected": -372.77557373046875, |
|
"eval_loss": 0.5971232652664185, |
|
"eval_rewards/accuracies": 0.628000020980835, |
|
"eval_rewards/chosen": -0.8852190971374512, |
|
"eval_rewards/margins": 0.404377281665802, |
|
"eval_rewards/rejected": -1.289596438407898, |
|
"eval_runtime": 545.4758, |
|
"eval_samples_per_second": 3.667, |
|
"eval_steps_per_second": 0.917, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.319478895246e-06, |
|
"logits/chosen": 0.16348786652088165, |
|
"logits/rejected": 0.13432151079177856, |
|
"logps/chosen": -354.9012756347656, |
|
"logps/rejected": -346.83770751953125, |
|
"loss": 0.6192, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.8076756596565247, |
|
"rewards/margins": 0.3335443139076233, |
|
"rewards/rejected": -1.141219973564148, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.303736343857704e-06, |
|
"logits/chosen": 0.2526926100254059, |
|
"logits/rejected": 0.21305808424949646, |
|
"logps/chosen": -342.25537109375, |
|
"logps/rejected": -363.7655334472656, |
|
"loss": 0.5549, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.781582236289978, |
|
"rewards/margins": 0.44490519165992737, |
|
"rewards/rejected": -1.2264875173568726, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.287843181003772e-06, |
|
"logits/chosen": 0.21736888587474823, |
|
"logits/rejected": 0.27333998680114746, |
|
"logps/chosen": -357.6294860839844, |
|
"logps/rejected": -358.72308349609375, |
|
"loss": 0.5833, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.799574077129364, |
|
"rewards/margins": 0.4974464476108551, |
|
"rewards/rejected": -1.2970205545425415, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.27180073375873e-06, |
|
"logits/chosen": 0.19267229735851288, |
|
"logits/rejected": 0.20701631903648376, |
|
"logps/chosen": -362.3644104003906, |
|
"logps/rejected": -376.9123229980469, |
|
"loss": 0.5756, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.8460612297058105, |
|
"rewards/margins": 0.47652435302734375, |
|
"rewards/rejected": -1.3225855827331543, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.255610341662304e-06, |
|
"logits/chosen": 0.2003142386674881, |
|
"logits/rejected": 0.24681635200977325, |
|
"logps/chosen": -385.9356384277344, |
|
"logps/rejected": -390.82818603515625, |
|
"loss": 0.5636, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.8146869540214539, |
|
"rewards/margins": 0.4905151426792145, |
|
"rewards/rejected": -1.3052021265029907, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.2392733566075764e-06, |
|
"logits/chosen": 0.21379859745502472, |
|
"logits/rejected": 0.2299179583787918, |
|
"logps/chosen": -327.9105529785156, |
|
"logps/rejected": -345.62481689453125, |
|
"loss": 0.5906, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.8707124590873718, |
|
"rewards/margins": 0.42304062843322754, |
|
"rewards/rejected": -1.293752908706665, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.2227911427280975e-06, |
|
"logits/chosen": 0.2143145352602005, |
|
"logits/rejected": 0.24731318652629852, |
|
"logps/chosen": -370.6251525878906, |
|
"logps/rejected": -387.04022216796875, |
|
"loss": 0.6156, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.8683725595474243, |
|
"rewards/margins": 0.36514347791671753, |
|
"rewards/rejected": -1.233515977859497, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.206165076283983e-06, |
|
"logits/chosen": 0.15996266901493073, |
|
"logits/rejected": 0.1850823163986206, |
|
"logps/chosen": -320.31396484375, |
|
"logps/rejected": -331.081298828125, |
|
"loss": 0.636, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.8364042043685913, |
|
"rewards/margins": 0.2532604932785034, |
|
"rewards/rejected": -1.0896646976470947, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.189396545546995e-06, |
|
"logits/chosen": 0.2215954065322876, |
|
"logits/rejected": 0.27586954832077026, |
|
"logps/chosen": -358.5014343261719, |
|
"logps/rejected": -373.06488037109375, |
|
"loss": 0.6226, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.7066820859909058, |
|
"rewards/margins": 0.27226629853248596, |
|
"rewards/rejected": -0.9789482951164246, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.172486950684627e-06, |
|
"logits/chosen": 0.21022501587867737, |
|
"logits/rejected": 0.22880622744560242, |
|
"logps/chosen": -348.2843322753906, |
|
"logps/rejected": -368.8757629394531, |
|
"loss": 0.5818, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.7178322076797485, |
|
"rewards/margins": 0.42600828409194946, |
|
"rewards/rejected": -1.1438405513763428, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_logits/chosen": 0.16877275705337524, |
|
"eval_logits/rejected": 0.21647466719150543, |
|
"eval_logps/chosen": -349.3257141113281, |
|
"eval_logps/rejected": -354.08599853515625, |
|
"eval_loss": 0.5948539972305298, |
|
"eval_rewards/accuracies": 0.6315000057220459, |
|
"eval_rewards/chosen": -0.7177892327308655, |
|
"eval_rewards/margins": 0.38491156697273254, |
|
"eval_rewards/rejected": -1.1027007102966309, |
|
"eval_runtime": 545.5664, |
|
"eval_samples_per_second": 3.666, |
|
"eval_steps_per_second": 0.916, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.155437703643182e-06, |
|
"logits/chosen": 0.138587087392807, |
|
"logits/rejected": 0.1511545479297638, |
|
"logps/chosen": -347.5982971191406, |
|
"logps/rejected": -357.45428466796875, |
|
"loss": 0.5593, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.6473184823989868, |
|
"rewards/margins": 0.49595245718955994, |
|
"rewards/rejected": -1.1432709693908691, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.138250228029882e-06, |
|
"logits/chosen": 0.1727471649646759, |
|
"logits/rejected": 0.22663483023643494, |
|
"logps/chosen": -302.2596740722656, |
|
"logps/rejected": -316.713134765625, |
|
"loss": 0.5862, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.7017611861228943, |
|
"rewards/margins": 0.384599506855011, |
|
"rewards/rejected": -1.0863606929779053, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.120925958993994e-06, |
|
"logits/chosen": 0.18980373442173004, |
|
"logits/rejected": 0.21815748512744904, |
|
"logps/chosen": -365.2742614746094, |
|
"logps/rejected": -347.21844482421875, |
|
"loss": 0.6083, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.7208053469657898, |
|
"rewards/margins": 0.3296663165092468, |
|
"rewards/rejected": -1.0504717826843262, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.103466343106999e-06, |
|
"logits/chosen": 0.17980042099952698, |
|
"logits/rejected": 0.18235398828983307, |
|
"logps/chosen": -345.6518249511719, |
|
"logps/rejected": -340.5759582519531, |
|
"loss": 0.6229, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.7314938902854919, |
|
"rewards/margins": 0.3103274703025818, |
|
"rewards/rejected": -1.0418212413787842, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.085872838241797e-06, |
|
"logits/chosen": 0.22088384628295898, |
|
"logits/rejected": 0.22433125972747803, |
|
"logps/chosen": -352.87835693359375, |
|
"logps/rejected": -389.94769287109375, |
|
"loss": 0.5642, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.6772549152374268, |
|
"rewards/margins": 0.4428323209285736, |
|
"rewards/rejected": -1.1200872659683228, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.06814691345098e-06, |
|
"logits/chosen": 0.20037512481212616, |
|
"logits/rejected": 0.26389187574386597, |
|
"logps/chosen": -373.4176940917969, |
|
"logps/rejected": -363.95367431640625, |
|
"loss": 0.5832, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.7378250360488892, |
|
"rewards/margins": 0.40974587202072144, |
|
"rewards/rejected": -1.1475709676742554, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.050290048844171e-06, |
|
"logits/chosen": 0.216600701212883, |
|
"logits/rejected": 0.23960819840431213, |
|
"logps/chosen": -316.2613220214844, |
|
"logps/rejected": -314.7508850097656, |
|
"loss": 0.6094, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.6164644360542297, |
|
"rewards/margins": 0.32712945342063904, |
|
"rewards/rejected": -0.9435938000679016, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.032303735464422e-06, |
|
"logits/chosen": 0.17721518874168396, |
|
"logits/rejected": 0.22735758125782013, |
|
"logps/chosen": -344.809326171875, |
|
"logps/rejected": -347.049072265625, |
|
"loss": 0.5851, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.5567644238471985, |
|
"rewards/margins": 0.39752551913261414, |
|
"rewards/rejected": -0.9542900323867798, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.014189475163727e-06, |
|
"logits/chosen": 0.16344419121742249, |
|
"logits/rejected": 0.21651005744934082, |
|
"logps/chosen": -322.96533203125, |
|
"logps/rejected": -321.7102966308594, |
|
"loss": 0.6153, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.567093551158905, |
|
"rewards/margins": 0.2981587052345276, |
|
"rewards/rejected": -0.8652523159980774, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.995948780477605e-06, |
|
"logits/chosen": 0.19968202710151672, |
|
"logits/rejected": 0.21880850195884705, |
|
"logps/chosen": -346.12286376953125, |
|
"logps/rejected": -336.0102844238281, |
|
"loss": 0.5981, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.597925066947937, |
|
"rewards/margins": 0.34026554226875305, |
|
"rewards/rejected": -0.9381906390190125, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_logits/chosen": 0.14649315178394318, |
|
"eval_logits/rejected": 0.19736629724502563, |
|
"eval_logps/chosen": -343.7120361328125, |
|
"eval_logps/rejected": -346.38848876953125, |
|
"eval_loss": 0.5935717821121216, |
|
"eval_rewards/accuracies": 0.6290000081062317, |
|
"eval_rewards/chosen": -0.6616523265838623, |
|
"eval_rewards/margins": 0.36407363414764404, |
|
"eval_rewards/rejected": -1.0257259607315063, |
|
"eval_runtime": 544.5501, |
|
"eval_samples_per_second": 3.673, |
|
"eval_steps_per_second": 0.918, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.977583174498816e-06, |
|
"logits/chosen": 0.1473083198070526, |
|
"logits/rejected": 0.16147521138191223, |
|
"logps/chosen": -336.35028076171875, |
|
"logps/rejected": -337.4244079589844, |
|
"loss": 0.5943, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.6331855654716492, |
|
"rewards/margins": 0.3674471378326416, |
|
"rewards/rejected": -1.0006327629089355, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.959094190750172e-06, |
|
"logits/chosen": 0.09590881317853928, |
|
"logits/rejected": 0.1392073780298233, |
|
"logps/chosen": -368.92742919921875, |
|
"logps/rejected": -366.66680908203125, |
|
"loss": 0.6168, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.8335236310958862, |
|
"rewards/margins": 0.3043787181377411, |
|
"rewards/rejected": -1.1379024982452393, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.9404833730564975e-06, |
|
"logits/chosen": 0.12012658268213272, |
|
"logits/rejected": 0.13889259099960327, |
|
"logps/chosen": -338.89166259765625, |
|
"logps/rejected": -369.4378356933594, |
|
"loss": 0.6076, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.9257830381393433, |
|
"rewards/margins": 0.3463601768016815, |
|
"rewards/rejected": -1.2721431255340576, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.921752275415712e-06, |
|
"logits/chosen": 0.07003729790449142, |
|
"logits/rejected": 0.10457529872655869, |
|
"logps/chosen": -324.9673156738281, |
|
"logps/rejected": -325.56304931640625, |
|
"loss": 0.6099, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.8632106781005859, |
|
"rewards/margins": 0.33071261644363403, |
|
"rewards/rejected": -1.1939232349395752, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.902902461869079e-06, |
|
"logits/chosen": 0.04336017742753029, |
|
"logits/rejected": 0.107862189412117, |
|
"logps/chosen": -366.1966247558594, |
|
"logps/rejected": -370.04730224609375, |
|
"loss": 0.5981, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.9756678342819214, |
|
"rewards/margins": 0.36889126896858215, |
|
"rewards/rejected": -1.3445593118667603, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.883935506370605e-06, |
|
"logits/chosen": 0.03404034674167633, |
|
"logits/rejected": 0.07542404532432556, |
|
"logps/chosen": -363.59234619140625, |
|
"logps/rejected": -358.55291748046875, |
|
"loss": 0.6025, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.868129551410675, |
|
"rewards/margins": 0.37522757053375244, |
|
"rewards/rejected": -1.2433571815490723, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.864852992655617e-06, |
|
"logits/chosen": 0.029746342450380325, |
|
"logits/rejected": 0.08432164043188095, |
|
"logps/chosen": -335.89105224609375, |
|
"logps/rejected": -358.84063720703125, |
|
"loss": 0.5915, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.6996029615402222, |
|
"rewards/margins": 0.38555437326431274, |
|
"rewards/rejected": -1.0851573944091797, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.845656514108516e-06, |
|
"logits/chosen": 0.0590134859085083, |
|
"logits/rejected": 0.08817918598651886, |
|
"logps/chosen": -366.0428771972656, |
|
"logps/rejected": -356.31353759765625, |
|
"loss": 0.5654, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.7267670631408691, |
|
"rewards/margins": 0.442646324634552, |
|
"rewards/rejected": -1.1694133281707764, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.826347673629738e-06, |
|
"logits/chosen": 0.11978931725025177, |
|
"logits/rejected": 0.11140674352645874, |
|
"logps/chosen": -309.720947265625, |
|
"logps/rejected": -339.85491943359375, |
|
"loss": 0.5517, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.671705961227417, |
|
"rewards/margins": 0.5157135128974915, |
|
"rewards/rejected": -1.1874195337295532, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.8069280835019062e-06, |
|
"logits/chosen": 0.06044895201921463, |
|
"logits/rejected": 0.091534823179245, |
|
"logps/chosen": -392.0270080566406, |
|
"logps/rejected": -360.0969543457031, |
|
"loss": 0.5843, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.6990892887115479, |
|
"rewards/margins": 0.4130396246910095, |
|
"rewards/rejected": -1.1121289730072021, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_logits/chosen": 0.058730266988277435, |
|
"eval_logits/rejected": 0.10037268698215485, |
|
"eval_logps/chosen": -366.15447998046875, |
|
"eval_logps/rejected": -374.1299133300781, |
|
"eval_loss": 0.5905080437660217, |
|
"eval_rewards/accuracies": 0.6334999799728394, |
|
"eval_rewards/chosen": -0.886076807975769, |
|
"eval_rewards/margins": 0.4170626699924469, |
|
"eval_rewards/rejected": -1.3031394481658936, |
|
"eval_runtime": 544.4729, |
|
"eval_samples_per_second": 3.673, |
|
"eval_steps_per_second": 0.918, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.7873993652552077e-06, |
|
"logits/chosen": 0.05399291589856148, |
|
"logits/rejected": 0.11311284452676773, |
|
"logps/chosen": -355.0113830566406, |
|
"logps/rejected": -351.7707824707031, |
|
"loss": 0.6685, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.9721186757087708, |
|
"rewards/margins": 0.20769429206848145, |
|
"rewards/rejected": -1.1798131465911865, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.7677631495319953e-06, |
|
"logits/chosen": 0.07790177315473557, |
|
"logits/rejected": 0.10548149049282074, |
|
"logps/chosen": -347.7140808105469, |
|
"logps/rejected": -331.2201843261719, |
|
"loss": 0.6034, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.8945258855819702, |
|
"rewards/margins": 0.32894036173820496, |
|
"rewards/rejected": -1.2234662771224976, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.748021075950633e-06, |
|
"logits/chosen": 0.17391851544380188, |
|
"logits/rejected": 0.1568259447813034, |
|
"logps/chosen": -404.0233459472656, |
|
"logps/rejected": -412.5658264160156, |
|
"loss": 0.5985, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.019444465637207, |
|
"rewards/margins": 0.4078293740749359, |
|
"rewards/rejected": -1.4272738695144653, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.7281747929685824e-06, |
|
"logits/chosen": 0.1422004997730255, |
|
"logits/rejected": 0.21070496737957, |
|
"logps/chosen": -367.45281982421875, |
|
"logps/rejected": -362.89080810546875, |
|
"loss": 0.6076, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.793968677520752, |
|
"rewards/margins": 0.35677820444107056, |
|
"rewards/rejected": -1.1507470607757568, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.7082259577447604e-06, |
|
"logits/chosen": 0.14684629440307617, |
|
"logits/rejected": 0.1937289983034134, |
|
"logps/chosen": -405.5082702636719, |
|
"logps/rejected": -375.3370666503906, |
|
"loss": 0.5569, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.8601733446121216, |
|
"rewards/margins": 0.4416710436344147, |
|
"rewards/rejected": -1.3018442392349243, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.6881762360011688e-06, |
|
"logits/chosen": 0.1889687478542328, |
|
"logits/rejected": 0.2150932103395462, |
|
"logps/chosen": -373.6668395996094, |
|
"logps/rejected": -366.2331848144531, |
|
"loss": 0.6058, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.8771917223930359, |
|
"rewards/margins": 0.3977741599082947, |
|
"rewards/rejected": -1.2749658823013306, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.668027301883802e-06, |
|
"logits/chosen": 0.16446110606193542, |
|
"logits/rejected": 0.18785087764263153, |
|
"logps/chosen": -351.145263671875, |
|
"logps/rejected": -343.36676025390625, |
|
"loss": 0.5739, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.8971999883651733, |
|
"rewards/margins": 0.3938083052635193, |
|
"rewards/rejected": -1.2910082340240479, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.64778083782286e-06, |
|
"logits/chosen": 0.22796198725700378, |
|
"logits/rejected": 0.2517542243003845, |
|
"logps/chosen": -352.5816955566406, |
|
"logps/rejected": -400.53314208984375, |
|
"loss": 0.5811, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.8595455884933472, |
|
"rewards/margins": 0.454528272151947, |
|
"rewards/rejected": -1.3140738010406494, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.627438534392268e-06, |
|
"logits/chosen": 0.18804603815078735, |
|
"logits/rejected": 0.19986502826213837, |
|
"logps/chosen": -342.622314453125, |
|
"logps/rejected": -325.24755859375, |
|
"loss": 0.5872, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.8171426057815552, |
|
"rewards/margins": 0.37278664112091064, |
|
"rewards/rejected": -1.1899292469024658, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.607002090168506e-06, |
|
"logits/chosen": 0.182193785905838, |
|
"logits/rejected": 0.20623533427715302, |
|
"logps/chosen": -343.2569274902344, |
|
"logps/rejected": -345.95123291015625, |
|
"loss": 0.6283, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.8513103723526001, |
|
"rewards/margins": 0.2995356619358063, |
|
"rewards/rejected": -1.1508458852767944, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_logits/chosen": 0.1738397777080536, |
|
"eval_logits/rejected": 0.22415193915367126, |
|
"eval_logps/chosen": -356.0013122558594, |
|
"eval_logps/rejected": -360.87457275390625, |
|
"eval_loss": 0.5882366895675659, |
|
"eval_rewards/accuracies": 0.6305000185966492, |
|
"eval_rewards/chosen": -0.7845450639724731, |
|
"eval_rewards/margins": 0.3860413432121277, |
|
"eval_rewards/rejected": -1.1705864667892456, |
|
"eval_runtime": 544.4759, |
|
"eval_samples_per_second": 3.673, |
|
"eval_steps_per_second": 0.918, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.586473211588787e-06, |
|
"logits/chosen": 0.21881847083568573, |
|
"logits/rejected": 0.2783322334289551, |
|
"logps/chosen": -363.8297424316406, |
|
"logps/rejected": -347.1457214355469, |
|
"loss": 0.5606, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.6486612558364868, |
|
"rewards/margins": 0.46156877279281616, |
|
"rewards/rejected": -1.1102300882339478, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.5658536128085623e-06, |
|
"logits/chosen": 0.1464618593454361, |
|
"logits/rejected": 0.18708249926567078, |
|
"logps/chosen": -318.83502197265625, |
|
"logps/rejected": -347.23028564453125, |
|
"loss": 0.5755, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.6937158107757568, |
|
"rewards/margins": 0.39851704239845276, |
|
"rewards/rejected": -1.0922327041625977, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.545145015558399e-06, |
|
"logits/chosen": 0.18751280009746552, |
|
"logits/rejected": 0.19455750286579132, |
|
"logps/chosen": -365.55816650390625, |
|
"logps/rejected": -388.07025146484375, |
|
"loss": 0.6032, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.6676291823387146, |
|
"rewards/margins": 0.3806609809398651, |
|
"rewards/rejected": -1.0482903718948364, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.5243491490002056e-06, |
|
"logits/chosen": 0.1777867078781128, |
|
"logits/rejected": 0.18567804992198944, |
|
"logps/chosen": -341.19940185546875, |
|
"logps/rejected": -340.8212890625, |
|
"loss": 0.6013, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.6734684109687805, |
|
"rewards/margins": 0.36503511667251587, |
|
"rewards/rejected": -1.0385034084320068, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.503467749582857e-06, |
|
"logits/chosen": 0.15182527899742126, |
|
"logits/rejected": 0.15495124459266663, |
|
"logps/chosen": -309.76153564453125, |
|
"logps/rejected": -296.9736633300781, |
|
"loss": 0.5905, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.6183659434318542, |
|
"rewards/margins": 0.3707262873649597, |
|
"rewards/rejected": -0.989092230796814, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.4825025608971947e-06, |
|
"logits/chosen": 0.1220487505197525, |
|
"logits/rejected": 0.18718338012695312, |
|
"logps/chosen": -390.09881591796875, |
|
"logps/rejected": -361.5142517089844, |
|
"loss": 0.5749, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.7525783777236938, |
|
"rewards/margins": 0.42852458357810974, |
|
"rewards/rejected": -1.181102991104126, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.4614553335304407e-06, |
|
"logits/chosen": 0.17326000332832336, |
|
"logits/rejected": 0.2386179268360138, |
|
"logps/chosen": -360.65924072265625, |
|
"logps/rejected": -356.8613586425781, |
|
"loss": 0.5791, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.8462890386581421, |
|
"rewards/margins": 0.41173258423805237, |
|
"rewards/rejected": -1.258021593093872, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.4403278249200222e-06, |
|
"logits/chosen": 0.1455180048942566, |
|
"logits/rejected": 0.17783500254154205, |
|
"logps/chosen": -328.61309814453125, |
|
"logps/rejected": -342.9682922363281, |
|
"loss": 0.5814, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.714964747428894, |
|
"rewards/margins": 0.37197887897491455, |
|
"rewards/rejected": -1.086943507194519, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.4191217992068293e-06, |
|
"logits/chosen": 0.14492273330688477, |
|
"logits/rejected": 0.1336786448955536, |
|
"logps/chosen": -332.1494140625, |
|
"logps/rejected": -359.5680236816406, |
|
"loss": 0.5922, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.7651108503341675, |
|
"rewards/margins": 0.34422460198402405, |
|
"rewards/rejected": -1.1093354225158691, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.3978390270879056e-06, |
|
"logits/chosen": 0.1335761845111847, |
|
"logits/rejected": 0.17906330525875092, |
|
"logps/chosen": -360.72698974609375, |
|
"logps/rejected": -350.4703674316406, |
|
"loss": 0.5892, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.6610103249549866, |
|
"rewards/margins": 0.3798225224018097, |
|
"rewards/rejected": -1.040832757949829, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_logits/chosen": 0.12588946521282196, |
|
"eval_logits/rejected": 0.1717854142189026, |
|
"eval_logps/chosen": -344.9545593261719, |
|
"eval_logps/rejected": -349.971923828125, |
|
"eval_loss": 0.5890761017799377, |
|
"eval_rewards/accuracies": 0.6309999823570251, |
|
"eval_rewards/chosen": -0.6740775108337402, |
|
"eval_rewards/margins": 0.3874828517436981, |
|
"eval_rewards/rejected": -1.0615602731704712, |
|
"eval_runtime": 544.4943, |
|
"eval_samples_per_second": 3.673, |
|
"eval_steps_per_second": 0.918, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.3764812856685995e-06, |
|
"logits/chosen": 0.14172333478927612, |
|
"logits/rejected": 0.13182663917541504, |
|
"logps/chosen": -354.5355529785156, |
|
"logps/rejected": -362.7613830566406, |
|
"loss": 0.5575, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.6454203724861145, |
|
"rewards/margins": 0.4289492070674896, |
|
"rewards/rejected": -1.0743694305419922, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.3550503583141726e-06, |
|
"logits/chosen": 0.12564226984977722, |
|
"logits/rejected": 0.13638147711753845, |
|
"logps/chosen": -399.1639404296875, |
|
"logps/rejected": -421.84246826171875, |
|
"loss": 0.547, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.7657305002212524, |
|
"rewards/margins": 0.4795493185520172, |
|
"rewards/rejected": -1.2452797889709473, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.3335480345008907e-06, |
|
"logits/chosen": 0.10991547256708145, |
|
"logits/rejected": 0.14212583005428314, |
|
"logps/chosen": -335.371337890625, |
|
"logps/rejected": -338.5823669433594, |
|
"loss": 0.5657, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.7321099638938904, |
|
"rewards/margins": 0.46315819025039673, |
|
"rewards/rejected": -1.1952682733535767, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.3119761096666055e-06, |
|
"logits/chosen": 0.15486234426498413, |
|
"logits/rejected": 0.13218258321285248, |
|
"logps/chosen": -401.4673767089844, |
|
"logps/rejected": -383.27923583984375, |
|
"loss": 0.601, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.9068065881729126, |
|
"rewards/margins": 0.3835510313510895, |
|
"rewards/rejected": -1.2903575897216797, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.290336385060832e-06, |
|
"logits/chosen": 0.14742621779441833, |
|
"logits/rejected": 0.13394896686077118, |
|
"logps/chosen": -362.3402404785156, |
|
"logps/rejected": -378.41552734375, |
|
"loss": 0.5781, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.7834215760231018, |
|
"rewards/margins": 0.44024190306663513, |
|
"rewards/rejected": -1.223663568496704, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.268630667594348e-06, |
|
"logits/chosen": 0.1237776055932045, |
|
"logits/rejected": 0.17092521488666534, |
|
"logps/chosen": -345.5756530761719, |
|
"logps/rejected": -362.49029541015625, |
|
"loss": 0.5809, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.6653984189033508, |
|
"rewards/margins": 0.4018256664276123, |
|
"rewards/rejected": -1.0672239065170288, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.2468607696883147e-06, |
|
"logits/chosen": 0.14814460277557373, |
|
"logits/rejected": 0.16757544875144958, |
|
"logps/chosen": -364.9542541503906, |
|
"logps/rejected": -357.6173400878906, |
|
"loss": 0.5768, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.6790551543235779, |
|
"rewards/margins": 0.43311649560928345, |
|
"rewards/rejected": -1.1121716499328613, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.225028509122944e-06, |
|
"logits/chosen": 0.1401061713695526, |
|
"logits/rejected": 0.2017345428466797, |
|
"logps/chosen": -364.52392578125, |
|
"logps/rejected": -358.08514404296875, |
|
"loss": 0.5438, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.6000980734825134, |
|
"rewards/margins": 0.47187384963035583, |
|
"rewards/rejected": -1.0719718933105469, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.2031357088857083e-06, |
|
"logits/chosen": 0.11304491758346558, |
|
"logits/rejected": 0.11087970435619354, |
|
"logps/chosen": -351.2516784667969, |
|
"logps/rejected": -373.64520263671875, |
|
"loss": 0.5746, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.802719235420227, |
|
"rewards/margins": 0.42133206129074097, |
|
"rewards/rejected": -1.2240512371063232, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.181184197019127e-06, |
|
"logits/chosen": 0.0998636931180954, |
|
"logits/rejected": 0.07577769458293915, |
|
"logps/chosen": -364.93353271484375, |
|
"logps/rejected": -355.5188903808594, |
|
"loss": 0.5821, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.8589967489242554, |
|
"rewards/margins": 0.436631977558136, |
|
"rewards/rejected": -1.2956287860870361, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_logits/chosen": 0.07607755810022354, |
|
"eval_logits/rejected": 0.11990514397621155, |
|
"eval_logps/chosen": -367.0340881347656, |
|
"eval_logps/rejected": -377.3439025878906, |
|
"eval_loss": 0.5855891704559326, |
|
"eval_rewards/accuracies": 0.6315000057220459, |
|
"eval_rewards/chosen": -0.8948729038238525, |
|
"eval_rewards/margins": 0.44040703773498535, |
|
"eval_rewards/rejected": -1.335280179977417, |
|
"eval_runtime": 544.5621, |
|
"eval_samples_per_second": 3.673, |
|
"eval_steps_per_second": 0.918, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.159175806468126e-06, |
|
"logits/chosen": 0.06636621057987213, |
|
"logits/rejected": 0.11029330641031265, |
|
"logps/chosen": -359.0235595703125, |
|
"logps/rejected": -389.5569763183594, |
|
"loss": 0.5938, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.7768658995628357, |
|
"rewards/margins": 0.4340268075466156, |
|
"rewards/rejected": -1.2108927965164185, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.1371123749269804e-06, |
|
"logits/chosen": 0.12099988758563995, |
|
"logits/rejected": 0.1557311713695526, |
|
"logps/chosen": -347.983154296875, |
|
"logps/rejected": -352.2116394042969, |
|
"loss": 0.5816, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.6870421171188354, |
|
"rewards/margins": 0.41189369559288025, |
|
"rewards/rejected": -1.0989357233047485, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.114995744685877e-06, |
|
"logits/chosen": 0.09171854704618454, |
|
"logits/rejected": 0.06376312673091888, |
|
"logps/chosen": -322.3338623046875, |
|
"logps/rejected": -354.04266357421875, |
|
"loss": 0.5491, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.6575523614883423, |
|
"rewards/margins": 0.5730739831924438, |
|
"rewards/rejected": -1.2306262254714966, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.0928277624770743e-06, |
|
"logits/chosen": 0.08149708807468414, |
|
"logits/rejected": 0.10335078090429306, |
|
"logps/chosen": -356.908447265625, |
|
"logps/rejected": -348.29168701171875, |
|
"loss": 0.544, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.7228487730026245, |
|
"rewards/margins": 0.5269914865493774, |
|
"rewards/rejected": -1.249840259552002, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.070610279320708e-06, |
|
"logits/chosen": 0.06698820739984512, |
|
"logits/rejected": 0.12880167365074158, |
|
"logps/chosen": -370.1624450683594, |
|
"logps/rejected": -340.4905700683594, |
|
"loss": 0.6352, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.8653889894485474, |
|
"rewards/margins": 0.28897637128829956, |
|
"rewards/rejected": -1.1543653011322021, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.0483451503702264e-06, |
|
"logits/chosen": 0.11194877326488495, |
|
"logits/rejected": 0.1369941681623459, |
|
"logps/chosen": -352.0809326171875, |
|
"logps/rejected": -356.8692321777344, |
|
"loss": 0.6048, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.6633593440055847, |
|
"rewards/margins": 0.3344351649284363, |
|
"rewards/rejected": -0.997794508934021, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.0260342347574916e-06, |
|
"logits/chosen": 0.1743788868188858, |
|
"logits/rejected": 0.20504312217235565, |
|
"logps/chosen": -359.6429748535156, |
|
"logps/rejected": -349.24249267578125, |
|
"loss": 0.5542, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.642277717590332, |
|
"rewards/margins": 0.46937379240989685, |
|
"rewards/rejected": -1.1116515398025513, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.0036793954375358e-06, |
|
"logits/chosen": 0.11372777074575424, |
|
"logits/rejected": 0.14487095177173615, |
|
"logps/chosen": -341.3155212402344, |
|
"logps/rejected": -347.41705322265625, |
|
"loss": 0.5501, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.6311262845993042, |
|
"rewards/margins": 0.5035245418548584, |
|
"rewards/rejected": -1.1346508264541626, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.981282499033009e-06, |
|
"logits/chosen": 0.09161444753408432, |
|
"logits/rejected": 0.11823789775371552, |
|
"logps/chosen": -359.84173583984375, |
|
"logps/rejected": -349.1728820800781, |
|
"loss": 0.5653, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.682461142539978, |
|
"rewards/margins": 0.44472140073776245, |
|
"rewards/rejected": -1.1271824836730957, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.9588454156783163e-06, |
|
"logits/chosen": 0.13893449306488037, |
|
"logits/rejected": 0.10511553287506104, |
|
"logps/chosen": -345.8633728027344, |
|
"logps/rejected": -348.959228515625, |
|
"loss": 0.6072, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.7028459310531616, |
|
"rewards/margins": 0.3482803702354431, |
|
"rewards/rejected": -1.05112624168396, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_logits/chosen": 0.07725726068019867, |
|
"eval_logits/rejected": 0.12365095317363739, |
|
"eval_logps/chosen": -349.35150146484375, |
|
"eval_logps/rejected": -357.206298828125, |
|
"eval_loss": 0.5861266255378723, |
|
"eval_rewards/accuracies": 0.6269999742507935, |
|
"eval_rewards/chosen": -0.7180466055870056, |
|
"eval_rewards/margins": 0.4158574938774109, |
|
"eval_rewards/rejected": -1.1339040994644165, |
|
"eval_runtime": 545.0027, |
|
"eval_samples_per_second": 3.67, |
|
"eval_steps_per_second": 0.917, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.9363700188634597e-06, |
|
"logits/chosen": 0.09426288306713104, |
|
"logits/rejected": 0.12164957821369171, |
|
"logps/chosen": -340.0146179199219, |
|
"logps/rejected": -344.6805419921875, |
|
"loss": 0.6206, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.7131950259208679, |
|
"rewards/margins": 0.31129521131515503, |
|
"rewards/rejected": -1.024490237236023, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.9138581852776053e-06, |
|
"logits/chosen": 0.08359435945749283, |
|
"logits/rejected": 0.13281409442424774, |
|
"logps/chosen": -344.29339599609375, |
|
"logps/rejected": -368.0191955566406, |
|
"loss": 0.5695, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.7011746168136597, |
|
"rewards/margins": 0.45050668716430664, |
|
"rewards/rejected": -1.1516811847686768, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.8913117946523805e-06, |
|
"logits/chosen": 0.1351374387741089, |
|
"logits/rejected": 0.10901328176259995, |
|
"logps/chosen": -330.74658203125, |
|
"logps/rejected": -321.38983154296875, |
|
"loss": 0.5932, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.7365423440933228, |
|
"rewards/margins": 0.3617197275161743, |
|
"rewards/rejected": -1.098262071609497, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.8687327296049126e-06, |
|
"logits/chosen": 0.07262937724590302, |
|
"logits/rejected": 0.10712842643260956, |
|
"logps/chosen": -345.1435546875, |
|
"logps/rejected": -357.8013610839844, |
|
"loss": 0.5871, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.6691123247146606, |
|
"rewards/margins": 0.3879561722278595, |
|
"rewards/rejected": -1.0570685863494873, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.8461228754806376e-06, |
|
"logits/chosen": 0.05750247836112976, |
|
"logits/rejected": 0.10219607502222061, |
|
"logps/chosen": -311.95819091796875, |
|
"logps/rejected": -345.6867980957031, |
|
"loss": 0.5663, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.6487970352172852, |
|
"rewards/margins": 0.4769372344017029, |
|
"rewards/rejected": -1.1257343292236328, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.823484120195865e-06, |
|
"logits/chosen": 0.07319775968790054, |
|
"logits/rejected": 0.042475730180740356, |
|
"logps/chosen": -262.2962646484375, |
|
"logps/rejected": -308.214599609375, |
|
"loss": 0.5832, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.5616660118103027, |
|
"rewards/margins": 0.37587177753448486, |
|
"rewards/rejected": -0.9375377893447876, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.8008183540801486e-06, |
|
"logits/chosen": -0.014851477928459644, |
|
"logits/rejected": 0.042431194335222244, |
|
"logps/chosen": -351.74517822265625, |
|
"logps/rejected": -362.99530029296875, |
|
"loss": 0.6036, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.6677108407020569, |
|
"rewards/margins": 0.3557429313659668, |
|
"rewards/rejected": -1.023453712463379, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.7781274697184353e-06, |
|
"logits/chosen": 0.008823997341096401, |
|
"logits/rejected": 0.035457246005535126, |
|
"logps/chosen": -338.5292053222656, |
|
"logps/rejected": -352.94232177734375, |
|
"loss": 0.5809, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.6730680465698242, |
|
"rewards/margins": 0.4013848304748535, |
|
"rewards/rejected": -1.0744528770446777, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.7554133617930397e-06, |
|
"logits/chosen": 0.019716525450348854, |
|
"logits/rejected": 0.012831945903599262, |
|
"logps/chosen": -341.1331787109375, |
|
"logps/rejected": -363.53704833984375, |
|
"loss": 0.6296, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.7185381650924683, |
|
"rewards/margins": 0.3087863326072693, |
|
"rewards/rejected": -1.0273244380950928, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.7326779269254363e-06, |
|
"logits/chosen": -0.04277997091412544, |
|
"logits/rejected": -0.009286623448133469, |
|
"logps/chosen": -321.3934326171875, |
|
"logps/rejected": -318.6489562988281, |
|
"loss": 0.6338, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.6931952834129333, |
|
"rewards/margins": 0.2543143033981323, |
|
"rewards/rejected": -0.9475095868110657, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_logits/chosen": -0.030136894434690475, |
|
"eval_logits/rejected": 0.00869889184832573, |
|
"eval_logps/chosen": -349.098388671875, |
|
"eval_logps/rejected": -356.585205078125, |
|
"eval_loss": 0.5851887464523315, |
|
"eval_rewards/accuracies": 0.6340000033378601, |
|
"eval_rewards/chosen": -0.7155155539512634, |
|
"eval_rewards/margins": 0.41217753291130066, |
|
"eval_rewards/rejected": -1.1276930570602417, |
|
"eval_runtime": 545.3798, |
|
"eval_samples_per_second": 3.667, |
|
"eval_steps_per_second": 0.917, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.7099230635178954e-06, |
|
"logits/chosen": -0.03830226510763168, |
|
"logits/rejected": 0.004055617842823267, |
|
"logps/chosen": -313.728759765625, |
|
"logps/rejected": -323.69134521484375, |
|
"loss": 0.5366, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.6489647030830383, |
|
"rewards/margins": 0.5445326566696167, |
|
"rewards/rejected": -1.1934973001480103, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.6871506715949608e-06, |
|
"logits/chosen": 0.018227530643343925, |
|
"logits/rejected": 0.021982427686452866, |
|
"logps/chosen": -351.05743408203125, |
|
"logps/rejected": -378.8858337402344, |
|
"loss": 0.5654, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.7617127895355225, |
|
"rewards/margins": 0.49486279487609863, |
|
"rewards/rejected": -1.256575584411621, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.6643626526448063e-06, |
|
"logits/chosen": -0.030996423214673996, |
|
"logits/rejected": 0.006755811162292957, |
|
"logps/chosen": -323.4994201660156, |
|
"logps/rejected": -362.4649353027344, |
|
"loss": 0.6141, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.8070539236068726, |
|
"rewards/margins": 0.3914108872413635, |
|
"rewards/rejected": -1.1984648704528809, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.6415609094604562e-06, |
|
"logits/chosen": -0.05985324829816818, |
|
"logits/rejected": -0.0018987020011991262, |
|
"logps/chosen": -371.3141174316406, |
|
"logps/rejected": -374.3472595214844, |
|
"loss": 0.5541, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.7072352170944214, |
|
"rewards/margins": 0.5413237810134888, |
|
"rewards/rejected": -1.2485589981079102, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.618747345980904e-06, |
|
"logits/chosen": -0.0632224753499031, |
|
"logits/rejected": -0.028610538691282272, |
|
"logps/chosen": -346.6549377441406, |
|
"logps/rejected": -351.1329345703125, |
|
"loss": 0.5581, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.5960323214530945, |
|
"rewards/margins": 0.5037523508071899, |
|
"rewards/rejected": -1.0997846126556396, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.595923867132136e-06, |
|
"logits/chosen": -0.02200886234641075, |
|
"logits/rejected": -0.015088886022567749, |
|
"logps/chosen": -280.63067626953125, |
|
"logps/rejected": -327.4749755859375, |
|
"loss": 0.5462, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.5528582334518433, |
|
"rewards/margins": 0.4946451187133789, |
|
"rewards/rejected": -1.0475032329559326, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.5730923786680672e-06, |
|
"logits/chosen": -0.06903935968875885, |
|
"logits/rejected": -0.05149111896753311, |
|
"logps/chosen": -348.12213134765625, |
|
"logps/rejected": -344.7447509765625, |
|
"loss": 0.5743, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.7621151208877563, |
|
"rewards/margins": 0.47058719396591187, |
|
"rewards/rejected": -1.232702374458313, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.5502547870114137e-06, |
|
"logits/chosen": -0.07594850659370422, |
|
"logits/rejected": -0.05803506448864937, |
|
"logps/chosen": -301.5760498046875, |
|
"logps/rejected": -354.6790771484375, |
|
"loss": 0.5983, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.7226616740226746, |
|
"rewards/margins": 0.3710746169090271, |
|
"rewards/rejected": -1.0937364101409912, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.527412999094507e-06, |
|
"logits/chosen": -0.06949422508478165, |
|
"logits/rejected": -0.053729571402072906, |
|
"logps/chosen": -362.20013427734375, |
|
"logps/rejected": -371.27325439453125, |
|
"loss": 0.5922, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.8210228681564331, |
|
"rewards/margins": 0.4187556207180023, |
|
"rewards/rejected": -1.2397785186767578, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.504568922200064e-06, |
|
"logits/chosen": -0.07110200822353363, |
|
"logits/rejected": -0.049906354397535324, |
|
"logps/chosen": -334.72540283203125, |
|
"logps/rejected": -360.26605224609375, |
|
"loss": 0.5582, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.7283493280410767, |
|
"rewards/margins": 0.47346624732017517, |
|
"rewards/rejected": -1.2018156051635742, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_logits/chosen": -0.059490080922842026, |
|
"eval_logits/rejected": -0.022946665063500404, |
|
"eval_logps/chosen": -351.37261962890625, |
|
"eval_logps/rejected": -360.6402282714844, |
|
"eval_loss": 0.5859557390213013, |
|
"eval_rewards/accuracies": 0.6340000033378601, |
|
"eval_rewards/chosen": -0.7382580637931824, |
|
"eval_rewards/margins": 0.42998480796813965, |
|
"eval_rewards/rejected": -1.1682429313659668, |
|
"eval_runtime": 544.2836, |
|
"eval_samples_per_second": 3.675, |
|
"eval_steps_per_second": 0.919, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.4817244638019333e-06, |
|
"logits/chosen": -0.032096896320581436, |
|
"logits/rejected": -0.0033429942559450865, |
|
"logps/chosen": -323.3070068359375, |
|
"logps/rejected": -365.35467529296875, |
|
"loss": 0.5343, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.6752323508262634, |
|
"rewards/margins": 0.5560353994369507, |
|
"rewards/rejected": -1.2312676906585693, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.4588815314058155e-06, |
|
"logits/chosen": -0.070304274559021, |
|
"logits/rejected": -0.0624236986041069, |
|
"logps/chosen": -384.3260803222656, |
|
"logps/rejected": -400.78741455078125, |
|
"loss": 0.6034, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.7910431623458862, |
|
"rewards/margins": 0.3856695294380188, |
|
"rewards/rejected": -1.1767126321792603, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.4360420323899922e-06, |
|
"logits/chosen": -0.10860241949558258, |
|
"logits/rejected": -0.08050969243049622, |
|
"logps/chosen": -387.05120849609375, |
|
"logps/rejected": -377.1029357910156, |
|
"loss": 0.5609, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.9089959263801575, |
|
"rewards/margins": 0.5126160979270935, |
|
"rewards/rejected": -1.4216121435165405, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.4132078738460585e-06, |
|
"logits/chosen": -0.06523410975933075, |
|
"logits/rejected": -0.08063043653964996, |
|
"logps/chosen": -368.7019348144531, |
|
"logps/rejected": -400.15985107421875, |
|
"loss": 0.5792, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.9961389303207397, |
|
"rewards/margins": 0.48970723152160645, |
|
"rewards/rejected": -1.4858464002609253, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.3903809624196826e-06, |
|
"logits/chosen": -0.06891070306301117, |
|
"logits/rejected": -0.04549848660826683, |
|
"logps/chosen": -364.5621032714844, |
|
"logps/rejected": -373.35076904296875, |
|
"loss": 0.574, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.0895036458969116, |
|
"rewards/margins": 0.43499454855918884, |
|
"rewards/rejected": -1.5244982242584229, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.3675632041513978e-06, |
|
"logits/chosen": -0.052164457738399506, |
|
"logits/rejected": -0.03239618241786957, |
|
"logps/chosen": -338.9453430175781, |
|
"logps/rejected": -358.5426025390625, |
|
"loss": 0.5825, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -1.007673740386963, |
|
"rewards/margins": 0.40731167793273926, |
|
"rewards/rejected": -1.4149854183197021, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.3447565043174533e-06, |
|
"logits/chosen": -0.09212299436330795, |
|
"logits/rejected": -0.09307185560464859, |
|
"logps/chosen": -365.0643005371094, |
|
"logps/rejected": -378.61541748046875, |
|
"loss": 0.5813, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.9552974700927734, |
|
"rewards/margins": 0.4227164387702942, |
|
"rewards/rejected": -1.3780138492584229, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.321962767270724e-06, |
|
"logits/chosen": -0.03847888484597206, |
|
"logits/rejected": -0.023943722248077393, |
|
"logps/chosen": -340.03472900390625, |
|
"logps/rejected": -382.23504638671875, |
|
"loss": 0.537, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.8605219721794128, |
|
"rewards/margins": 0.5241655707359314, |
|
"rewards/rejected": -1.3846876621246338, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.299183896281692e-06, |
|
"logits/chosen": -0.10258199274539948, |
|
"logits/rejected": -0.038494259119033813, |
|
"logps/chosen": -355.46978759765625, |
|
"logps/rejected": -364.3291320800781, |
|
"loss": 0.5412, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.8702327609062195, |
|
"rewards/margins": 0.5633158087730408, |
|
"rewards/rejected": -1.4335486888885498, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.2764217933795297e-06, |
|
"logits/chosen": -0.09681537002325058, |
|
"logits/rejected": -0.14781834185123444, |
|
"logps/chosen": -382.71356201171875, |
|
"logps/rejected": -385.5505676269531, |
|
"loss": 0.6103, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.9750157594680786, |
|
"rewards/margins": 0.3789269030094147, |
|
"rewards/rejected": -1.353942632675171, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_logits/chosen": -0.10652041435241699, |
|
"eval_logits/rejected": -0.07140377908945084, |
|
"eval_logps/chosen": -369.8921203613281, |
|
"eval_logps/rejected": -382.3634948730469, |
|
"eval_loss": 0.5820576548576355, |
|
"eval_rewards/accuracies": 0.6345000267028809, |
|
"eval_rewards/chosen": -0.9234529733657837, |
|
"eval_rewards/margins": 0.46202272176742554, |
|
"eval_rewards/rejected": -1.385475754737854, |
|
"eval_runtime": 545.4224, |
|
"eval_samples_per_second": 3.667, |
|
"eval_steps_per_second": 0.917, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.2536783591932786e-06, |
|
"logits/chosen": -0.019483990967273712, |
|
"logits/rejected": -0.0737195685505867, |
|
"logps/chosen": -357.1891174316406, |
|
"logps/rejected": -374.19061279296875, |
|
"loss": 0.5947, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.8676769137382507, |
|
"rewards/margins": 0.434825599193573, |
|
"rewards/rejected": -1.3025026321411133, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.230955492793149e-06, |
|
"logits/chosen": -0.07425358891487122, |
|
"logits/rejected": -0.042141932994127274, |
|
"logps/chosen": -356.19927978515625, |
|
"logps/rejected": -359.3014221191406, |
|
"loss": 0.5553, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.8453773260116577, |
|
"rewards/margins": 0.47125759720802307, |
|
"rewards/rejected": -1.3166348934173584, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.208255091531947e-06, |
|
"logits/chosen": -0.0675973892211914, |
|
"logits/rejected": -0.07175878435373306, |
|
"logps/chosen": -349.75653076171875, |
|
"logps/rejected": -339.6773986816406, |
|
"loss": 0.5812, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.8538468480110168, |
|
"rewards/margins": 0.4233093857765198, |
|
"rewards/rejected": -1.277156114578247, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.1855790508866435e-06, |
|
"logits/chosen": -0.10004396736621857, |
|
"logits/rejected": -0.0791083425283432, |
|
"logps/chosen": -374.1147766113281, |
|
"logps/rejected": -397.65362548828125, |
|
"loss": 0.5657, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.8433374166488647, |
|
"rewards/margins": 0.45402494072914124, |
|
"rewards/rejected": -1.2973623275756836, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.162929264300107e-06, |
|
"logits/chosen": -0.07542629539966583, |
|
"logits/rejected": -0.037315886467695236, |
|
"logps/chosen": -337.01116943359375, |
|
"logps/rejected": -340.6423034667969, |
|
"loss": 0.5868, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.8542795181274414, |
|
"rewards/margins": 0.3838973939418793, |
|
"rewards/rejected": -1.238176941871643, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.1403076230230006e-06, |
|
"logits/chosen": -0.04517320543527603, |
|
"logits/rejected": -0.029649287462234497, |
|
"logps/chosen": -352.4161376953125, |
|
"logps/rejected": -330.69232177734375, |
|
"loss": 0.5847, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.8612442016601562, |
|
"rewards/margins": 0.444766104221344, |
|
"rewards/rejected": -1.306010365486145, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.11771601595586e-06, |
|
"logits/chosen": -0.05350247770547867, |
|
"logits/rejected": -0.04654960706830025, |
|
"logps/chosen": -330.7650451660156, |
|
"logps/rejected": -376.1535339355469, |
|
"loss": 0.5635, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.7328948974609375, |
|
"rewards/margins": 0.4829980731010437, |
|
"rewards/rejected": -1.215893030166626, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.0951563294913737e-06, |
|
"logits/chosen": -0.14542168378829956, |
|
"logits/rejected": -0.09369027614593506, |
|
"logps/chosen": -351.79052734375, |
|
"logps/rejected": -338.8112487792969, |
|
"loss": 0.6275, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.9116989374160767, |
|
"rewards/margins": 0.3388912081718445, |
|
"rewards/rejected": -1.250590205192566, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.0726304473568693e-06, |
|
"logits/chosen": -0.08726407587528229, |
|
"logits/rejected": -0.07750742137432098, |
|
"logps/chosen": -325.08526611328125, |
|
"logps/rejected": -386.10626220703125, |
|
"loss": 0.5793, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.7758111357688904, |
|
"rewards/margins": 0.4725108742713928, |
|
"rewards/rejected": -1.2483221292495728, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.050140250457023e-06, |
|
"logits/chosen": -0.07832643389701843, |
|
"logits/rejected": -0.0965295284986496, |
|
"logps/chosen": -324.20269775390625, |
|
"logps/rejected": -357.1085510253906, |
|
"loss": 0.5636, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.7607049942016602, |
|
"rewards/margins": 0.48484665155410767, |
|
"rewards/rejected": -1.245551586151123, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_logits/chosen": -0.08411271125078201, |
|
"eval_logits/rejected": -0.048063673079013824, |
|
"eval_logps/chosen": -354.1103820800781, |
|
"eval_logps/rejected": -364.19696044921875, |
|
"eval_loss": 0.5836405158042908, |
|
"eval_rewards/accuracies": 0.6334999799728394, |
|
"eval_rewards/chosen": -0.76563560962677, |
|
"eval_rewards/margins": 0.438174843788147, |
|
"eval_rewards/rejected": -1.203810453414917, |
|
"eval_runtime": 544.4264, |
|
"eval_samples_per_second": 3.674, |
|
"eval_steps_per_second": 0.918, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.0276876167168042e-06, |
|
"logits/chosen": -0.0398612916469574, |
|
"logits/rejected": 0.02817544713616371, |
|
"logps/chosen": -361.60784912109375, |
|
"logps/rejected": -378.54473876953125, |
|
"loss": 0.5713, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.7841297388076782, |
|
"rewards/margins": 0.4361855089664459, |
|
"rewards/rejected": -1.2203152179718018, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.0052744209246682e-06, |
|
"logits/chosen": -0.11950035393238068, |
|
"logits/rejected": -0.09671922028064728, |
|
"logps/chosen": -326.01458740234375, |
|
"logps/rejected": -356.91595458984375, |
|
"loss": 0.5813, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.7009235620498657, |
|
"rewards/margins": 0.41417956352233887, |
|
"rewards/rejected": -1.1151031255722046, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9829025345760127e-06, |
|
"logits/chosen": -0.10013041645288467, |
|
"logits/rejected": -0.09448160976171494, |
|
"logps/chosen": -394.4470520019531, |
|
"logps/rejected": -384.83270263671875, |
|
"loss": 0.5991, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.8150526285171509, |
|
"rewards/margins": 0.39240118861198425, |
|
"rewards/rejected": -1.2074538469314575, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9605738257169115e-06, |
|
"logits/chosen": -0.02237033285200596, |
|
"logits/rejected": -0.04650190472602844, |
|
"logps/chosen": -323.2276916503906, |
|
"logps/rejected": -357.5064697265625, |
|
"loss": 0.6222, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.7723985910415649, |
|
"rewards/margins": 0.3768603801727295, |
|
"rewards/rejected": -1.1492589712142944, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.9382901587881275e-06, |
|
"logits/chosen": -0.08437281101942062, |
|
"logits/rejected": -0.03884163498878479, |
|
"logps/chosen": -325.86907958984375, |
|
"logps/rejected": -347.9014892578125, |
|
"loss": 0.5916, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.6954203844070435, |
|
"rewards/margins": 0.40348443388938904, |
|
"rewards/rejected": -1.0989047288894653, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.916053394469437e-06, |
|
"logits/chosen": -0.058065224438905716, |
|
"logits/rejected": -0.028360579162836075, |
|
"logps/chosen": -287.13189697265625, |
|
"logps/rejected": -332.1973571777344, |
|
"loss": 0.5753, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.6241050958633423, |
|
"rewards/margins": 0.4590820372104645, |
|
"rewards/rejected": -1.083187222480774, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.8938653895242604e-06, |
|
"logits/chosen": -0.03688746690750122, |
|
"logits/rejected": -0.03670436143875122, |
|
"logps/chosen": -300.11126708984375, |
|
"logps/rejected": -324.38092041015625, |
|
"loss": 0.5589, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.5876749753952026, |
|
"rewards/margins": 0.47541046142578125, |
|
"rewards/rejected": -1.0630855560302734, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.8717279966446267e-06, |
|
"logits/chosen": -0.09554643929004669, |
|
"logits/rejected": -0.03961080312728882, |
|
"logps/chosen": -335.5707702636719, |
|
"logps/rejected": -373.040283203125, |
|
"loss": 0.5263, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.6644365787506104, |
|
"rewards/margins": 0.5852737426757812, |
|
"rewards/rejected": -1.2497103214263916, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.8496430642964698e-06, |
|
"logits/chosen": -0.1166432723402977, |
|
"logits/rejected": -0.0716785341501236, |
|
"logps/chosen": -326.2940979003906, |
|
"logps/rejected": -342.4819030761719, |
|
"loss": 0.5844, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.7313794493675232, |
|
"rewards/margins": 0.3761158585548401, |
|
"rewards/rejected": -1.1074954271316528, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.827612436565286e-06, |
|
"logits/chosen": -0.11490534245967865, |
|
"logits/rejected": -0.09440793097019196, |
|
"logps/chosen": -334.36724853515625, |
|
"logps/rejected": -351.0407409667969, |
|
"loss": 0.5846, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.8265805244445801, |
|
"rewards/margins": 0.4359659254550934, |
|
"rewards/rejected": -1.262546420097351, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_logits/chosen": -0.11999206244945526, |
|
"eval_logits/rejected": -0.0871233344078064, |
|
"eval_logps/chosen": -365.27813720703125, |
|
"eval_logps/rejected": -377.25079345703125, |
|
"eval_loss": 0.5803542733192444, |
|
"eval_rewards/accuracies": 0.6334999799728394, |
|
"eval_rewards/chosen": -0.8773132562637329, |
|
"eval_rewards/margins": 0.45703527331352234, |
|
"eval_rewards/rejected": -1.3343485593795776, |
|
"eval_runtime": 545.1487, |
|
"eval_samples_per_second": 3.669, |
|
"eval_steps_per_second": 0.917, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.8056379530021492e-06, |
|
"logits/chosen": -0.13810701668262482, |
|
"logits/rejected": -0.10457410663366318, |
|
"logps/chosen": -339.19525146484375, |
|
"logps/rejected": -346.99029541015625, |
|
"loss": 0.6025, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.8567565679550171, |
|
"rewards/margins": 0.4513315260410309, |
|
"rewards/rejected": -1.3080880641937256, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.7837214484701154e-06, |
|
"logits/chosen": -0.08872968703508377, |
|
"logits/rejected": -0.06984793394804001, |
|
"logps/chosen": -364.3599548339844, |
|
"logps/rejected": -402.4684753417969, |
|
"loss": 0.538, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.9384418725967407, |
|
"rewards/margins": 0.5457102060317993, |
|
"rewards/rejected": -1.48415207862854, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7618647529910043e-06, |
|
"logits/chosen": -0.11679291725158691, |
|
"logits/rejected": -0.02643129602074623, |
|
"logps/chosen": -353.79022216796875, |
|
"logps/rejected": -382.530517578125, |
|
"loss": 0.5517, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.9540901184082031, |
|
"rewards/margins": 0.555814802646637, |
|
"rewards/rejected": -1.5099048614501953, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7400696915925996e-06, |
|
"logits/chosen": -0.09259249269962311, |
|
"logits/rejected": -0.04453275352716446, |
|
"logps/chosen": -364.08837890625, |
|
"logps/rejected": -407.524169921875, |
|
"loss": 0.5457, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.8758049011230469, |
|
"rewards/margins": 0.5904634594917297, |
|
"rewards/rejected": -1.4662683010101318, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.718338084156254e-06, |
|
"logits/chosen": -0.08697889745235443, |
|
"logits/rejected": -0.06519980728626251, |
|
"logps/chosen": -361.45709228515625, |
|
"logps/rejected": -372.6293029785156, |
|
"loss": 0.5659, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.866511344909668, |
|
"rewards/margins": 0.4529503881931305, |
|
"rewards/rejected": -1.3194618225097656, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.6966717452649372e-06, |
|
"logits/chosen": -0.1362774819135666, |
|
"logits/rejected": -0.11426180601119995, |
|
"logps/chosen": -371.2001037597656, |
|
"logps/rejected": -412.7559509277344, |
|
"loss": 0.5511, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.9300951957702637, |
|
"rewards/margins": 0.5544837713241577, |
|
"rewards/rejected": -1.4845788478851318, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6750724840517103e-06, |
|
"logits/chosen": -0.08611822873353958, |
|
"logits/rejected": -0.05831156298518181, |
|
"logps/chosen": -380.17535400390625, |
|
"logps/rejected": -386.8459777832031, |
|
"loss": 0.5452, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.8702682256698608, |
|
"rewards/margins": 0.552179217338562, |
|
"rewards/rejected": -1.4224474430084229, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6535421040486686e-06, |
|
"logits/chosen": -0.08759529888629913, |
|
"logits/rejected": -0.07618779689073563, |
|
"logps/chosen": -335.5084228515625, |
|
"logps/rejected": -350.1391296386719, |
|
"loss": 0.56, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.8075240850448608, |
|
"rewards/margins": 0.4414435029029846, |
|
"rewards/rejected": -1.2489675283432007, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6320824030363458e-06, |
|
"logits/chosen": -0.10661186277866364, |
|
"logits/rejected": -0.008265363052487373, |
|
"logps/chosen": -349.26861572265625, |
|
"logps/rejected": -366.3367614746094, |
|
"loss": 0.5722, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.8367792963981628, |
|
"rewards/margins": 0.4592077136039734, |
|
"rewards/rejected": -1.2959868907928467, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6106951728936028e-06, |
|
"logits/chosen": -0.0859353244304657, |
|
"logits/rejected": -0.04287604242563248, |
|
"logps/chosen": -309.8092346191406, |
|
"logps/rejected": -339.9555358886719, |
|
"loss": 0.5799, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.8133733868598938, |
|
"rewards/margins": 0.4632648527622223, |
|
"rewards/rejected": -1.276638150215149, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_logits/chosen": -0.09223704785108566, |
|
"eval_logits/rejected": -0.0575764924287796, |
|
"eval_logps/chosen": -361.7434997558594, |
|
"eval_logps/rejected": -374.26409912109375, |
|
"eval_loss": 0.5833784341812134, |
|
"eval_rewards/accuracies": 0.6340000033378601, |
|
"eval_rewards/chosen": -0.8419671058654785, |
|
"eval_rewards/margins": 0.4625144600868225, |
|
"eval_rewards/rejected": -1.3044817447662354, |
|
"eval_runtime": 544.9858, |
|
"eval_samples_per_second": 3.67, |
|
"eval_steps_per_second": 0.917, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.5893821994479996e-06, |
|
"logits/chosen": -0.038878120481967926, |
|
"logits/rejected": -0.01672372780740261, |
|
"logps/chosen": -355.44061279296875, |
|
"logps/rejected": -390.06854248046875, |
|
"loss": 0.5178, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.7850661277770996, |
|
"rewards/margins": 0.603364109992981, |
|
"rewards/rejected": -1.3884302377700806, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.5681452623266868e-06, |
|
"logits/chosen": -0.06229633837938309, |
|
"logits/rejected": -0.0467236191034317, |
|
"logps/chosen": -382.23602294921875, |
|
"logps/rejected": -387.97332763671875, |
|
"loss": 0.5756, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.8509229421615601, |
|
"rewards/margins": 0.5015177726745605, |
|
"rewards/rejected": -1.3524408340454102, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.5469861348078014e-06, |
|
"logits/chosen": -0.06945382058620453, |
|
"logits/rejected": -0.06613973528146744, |
|
"logps/chosen": -349.61444091796875, |
|
"logps/rejected": -358.06915283203125, |
|
"loss": 0.6046, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.8453807830810547, |
|
"rewards/margins": 0.39862456917762756, |
|
"rewards/rejected": -1.2440054416656494, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.5259065836724035e-06, |
|
"logits/chosen": -0.08767147362232208, |
|
"logits/rejected": -0.06391812860965729, |
|
"logps/chosen": -341.8178405761719, |
|
"logps/rejected": -372.5906677246094, |
|
"loss": 0.5727, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.7495141625404358, |
|
"rewards/margins": 0.47481536865234375, |
|
"rewards/rejected": -1.2243295907974243, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.5049083690569456e-06, |
|
"logits/chosen": -0.10644855350255966, |
|
"logits/rejected": -0.0691339522600174, |
|
"logps/chosen": -362.25970458984375, |
|
"logps/rejected": -358.62139892578125, |
|
"loss": 0.591, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.8960472941398621, |
|
"rewards/margins": 0.43379873037338257, |
|
"rewards/rejected": -1.3298461437225342, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.4839932443063057e-06, |
|
"logits/chosen": -0.02317485585808754, |
|
"logits/rejected": -0.03716563433408737, |
|
"logps/chosen": -316.1383056640625, |
|
"logps/rejected": -324.560546875, |
|
"loss": 0.5806, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.7911745309829712, |
|
"rewards/margins": 0.4005607068538666, |
|
"rewards/rejected": -1.1917351484298706, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.4631629558273803e-06, |
|
"logits/chosen": -0.044547874480485916, |
|
"logits/rejected": -0.05655717849731445, |
|
"logps/chosen": -363.1641845703125, |
|
"logps/rejected": -377.47503662109375, |
|
"loss": 0.5422, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.744333803653717, |
|
"rewards/margins": 0.5686662793159485, |
|
"rewards/rejected": -1.3130000829696655, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.4424192429432657e-06, |
|
"logits/chosen": 0.012710513547062874, |
|
"logits/rejected": -0.007722015026956797, |
|
"logps/chosen": -382.29779052734375, |
|
"logps/rejected": -335.0196838378906, |
|
"loss": 0.5918, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.7723060846328735, |
|
"rewards/margins": 0.41917362809181213, |
|
"rewards/rejected": -1.1914796829223633, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.421763837748016e-06, |
|
"logits/chosen": -0.06088981777429581, |
|
"logits/rejected": -0.05146254226565361, |
|
"logps/chosen": -351.4329528808594, |
|
"logps/rejected": -388.3905944824219, |
|
"loss": 0.6294, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.8842247724533081, |
|
"rewards/margins": 0.37267476320266724, |
|
"rewards/rejected": -1.2568994760513306, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.401198464962021e-06, |
|
"logits/chosen": -0.03741256147623062, |
|
"logits/rejected": -0.06474824994802475, |
|
"logps/chosen": -347.2456970214844, |
|
"logps/rejected": -356.04510498046875, |
|
"loss": 0.5565, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.775657057762146, |
|
"rewards/margins": 0.47477155923843384, |
|
"rewards/rejected": -1.250428557395935, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_logits/chosen": -0.06430140882730484, |
|
"eval_logits/rejected": -0.028463589027523994, |
|
"eval_logps/chosen": -357.6354675292969, |
|
"eval_logps/rejected": -369.3044128417969, |
|
"eval_loss": 0.5810229778289795, |
|
"eval_rewards/accuracies": 0.6345000267028809, |
|
"eval_rewards/chosen": -0.8008865714073181, |
|
"eval_rewards/margins": 0.45399823784828186, |
|
"eval_rewards/rejected": -1.2548847198486328, |
|
"eval_runtime": 545.1722, |
|
"eval_samples_per_second": 3.669, |
|
"eval_steps_per_second": 0.917, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.3807248417879896e-06, |
|
"logits/chosen": -0.04454026371240616, |
|
"logits/rejected": -0.05448485538363457, |
|
"logps/chosen": -329.34661865234375, |
|
"logps/rejected": -327.6650695800781, |
|
"loss": 0.573, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.7315627336502075, |
|
"rewards/margins": 0.4313267767429352, |
|
"rewards/rejected": -1.1628895998001099, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3603446777675665e-06, |
|
"logits/chosen": -0.06595277786254883, |
|
"logits/rejected": -0.06403062492609024, |
|
"logps/chosen": -338.12213134765625, |
|
"logps/rejected": -354.1922607421875, |
|
"loss": 0.5974, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.8146448135375977, |
|
"rewards/margins": 0.40636712312698364, |
|
"rewards/rejected": -1.2210118770599365, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3400596746385817e-06, |
|
"logits/chosen": -0.04939567297697067, |
|
"logits/rejected": -0.0366520918905735, |
|
"logps/chosen": -371.56658935546875, |
|
"logps/rejected": -373.9921569824219, |
|
"loss": 0.588, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.8429405093193054, |
|
"rewards/margins": 0.44567233324050903, |
|
"rewards/rejected": -1.2886126041412354, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3198715261929587e-06, |
|
"logits/chosen": -0.04225140064954758, |
|
"logits/rejected": -0.0014917313819751143, |
|
"logps/chosen": -399.6859436035156, |
|
"logps/rejected": -362.3399658203125, |
|
"loss": 0.5701, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.8035332560539246, |
|
"rewards/margins": 0.4362492561340332, |
|
"rewards/rejected": -1.2397825717926025, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.2997819181352823e-06, |
|
"logits/chosen": -0.0454365499317646, |
|
"logits/rejected": -0.055598776787519455, |
|
"logps/chosen": -379.8087463378906, |
|
"logps/rejected": -388.2283630371094, |
|
"loss": 0.5703, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.8369846343994141, |
|
"rewards/margins": 0.4632073938846588, |
|
"rewards/rejected": -1.30019211769104, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2797925279420454e-06, |
|
"logits/chosen": -0.0032060600351542234, |
|
"logits/rejected": 0.0273395087569952, |
|
"logps/chosen": -372.40802001953125, |
|
"logps/rejected": -371.7253112792969, |
|
"loss": 0.62, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.8528251647949219, |
|
"rewards/margins": 0.36443841457366943, |
|
"rewards/rejected": -1.2172635793685913, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2599050247215764e-06, |
|
"logits/chosen": -0.01836007833480835, |
|
"logits/rejected": -0.006750327534973621, |
|
"logps/chosen": -360.90728759765625, |
|
"logps/rejected": -392.369140625, |
|
"loss": 0.5012, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.7728686332702637, |
|
"rewards/margins": 0.625950813293457, |
|
"rewards/rejected": -1.3988194465637207, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2401210690746705e-06, |
|
"logits/chosen": -0.01354785542935133, |
|
"logits/rejected": 0.06222701072692871, |
|
"logps/chosen": -352.80438232421875, |
|
"logps/rejected": -359.9801330566406, |
|
"loss": 0.6309, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.9136732220649719, |
|
"rewards/margins": 0.35234978795051575, |
|
"rewards/rejected": -1.2660231590270996, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2204423129559306e-06, |
|
"logits/chosen": -0.11001361906528473, |
|
"logits/rejected": -0.03707585483789444, |
|
"logps/chosen": -392.40118408203125, |
|
"logps/rejected": -391.91204833984375, |
|
"loss": 0.6266, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.9878837466239929, |
|
"rewards/margins": 0.30070778727531433, |
|
"rewards/rejected": -1.2885915040969849, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.20087039953583e-06, |
|
"logits/chosen": -0.03697098046541214, |
|
"logits/rejected": -0.031819600611925125, |
|
"logps/chosen": -363.90045166015625, |
|
"logps/rejected": -389.9029541015625, |
|
"loss": 0.5614, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.8472734689712524, |
|
"rewards/margins": 0.538908839225769, |
|
"rewards/rejected": -1.386182188987732, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_logits/chosen": -0.06983576714992523, |
|
"eval_logits/rejected": -0.035794876515865326, |
|
"eval_logps/chosen": -372.7677307128906, |
|
"eval_logps/rejected": -385.643310546875, |
|
"eval_loss": 0.5781762599945068, |
|
"eval_rewards/accuracies": 0.6324999928474426, |
|
"eval_rewards/chosen": -0.9522089958190918, |
|
"eval_rewards/margins": 0.46606478095054626, |
|
"eval_rewards/rejected": -1.4182738065719604, |
|
"eval_runtime": 544.3902, |
|
"eval_samples_per_second": 3.674, |
|
"eval_steps_per_second": 0.918, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.181406963063507e-06, |
|
"logits/chosen": -0.035129472613334656, |
|
"logits/rejected": -0.05755387619137764, |
|
"logps/chosen": -368.37615966796875, |
|
"logps/rejected": -361.5987243652344, |
|
"loss": 0.5847, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.9345572590827942, |
|
"rewards/margins": 0.43491941690444946, |
|
"rewards/rejected": -1.369476556777954, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.1620536287303052e-06, |
|
"logits/chosen": -0.03167080506682396, |
|
"logits/rejected": -0.009920826181769371, |
|
"logps/chosen": -349.4122314453125, |
|
"logps/rejected": -378.13641357421875, |
|
"loss": 0.5641, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.9168907403945923, |
|
"rewards/margins": 0.5256475210189819, |
|
"rewards/rejected": -1.4425382614135742, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.1428120125340717e-06, |
|
"logits/chosen": -0.0319071002304554, |
|
"logits/rejected": -0.09542439877986908, |
|
"logps/chosen": -364.5460510253906, |
|
"logps/rejected": -374.586669921875, |
|
"loss": 0.5987, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.9462617039680481, |
|
"rewards/margins": 0.45422202348709106, |
|
"rewards/rejected": -1.4004836082458496, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.123683721144223e-06, |
|
"logits/chosen": -0.06852748990058899, |
|
"logits/rejected": -0.08457393199205399, |
|
"logps/chosen": -362.50433349609375, |
|
"logps/rejected": -380.47039794921875, |
|
"loss": 0.5369, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.930228590965271, |
|
"rewards/margins": 0.5651718378067017, |
|
"rewards/rejected": -1.4954004287719727, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.1046703517675848e-06, |
|
"logits/chosen": -0.049139805138111115, |
|
"logits/rejected": -0.047232285141944885, |
|
"logps/chosen": -340.8487243652344, |
|
"logps/rejected": -377.1474304199219, |
|
"loss": 0.6067, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.9621003270149231, |
|
"rewards/margins": 0.389037549495697, |
|
"rewards/rejected": -1.3511378765106201, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.085773492015028e-06, |
|
"logits/chosen": -0.07854189723730087, |
|
"logits/rejected": -0.054329145699739456, |
|
"logps/chosen": -389.5224914550781, |
|
"logps/rejected": -377.756103515625, |
|
"loss": 0.5664, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.0421987771987915, |
|
"rewards/margins": 0.48210200667381287, |
|
"rewards/rejected": -1.5243008136749268, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.0669947197689034e-06, |
|
"logits/chosen": -0.06451062858104706, |
|
"logits/rejected": -0.047071583569049835, |
|
"logps/chosen": -326.9189758300781, |
|
"logps/rejected": -366.94000244140625, |
|
"loss": 0.641, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.016249656677246, |
|
"rewards/margins": 0.30962103605270386, |
|
"rewards/rejected": -1.3258706331253052, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.048335603051291e-06, |
|
"logits/chosen": -0.09542266279459, |
|
"logits/rejected": -0.05664687231183052, |
|
"logps/chosen": -363.3104248046875, |
|
"logps/rejected": -388.03424072265625, |
|
"loss": 0.5604, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.9354135394096375, |
|
"rewards/margins": 0.4914797842502594, |
|
"rewards/rejected": -1.4268933534622192, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0297976998930665e-06, |
|
"logits/chosen": -0.07029617577791214, |
|
"logits/rejected": -0.07720007747411728, |
|
"logps/chosen": -372.33538818359375, |
|
"logps/rejected": -371.91656494140625, |
|
"loss": 0.6013, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.960688591003418, |
|
"rewards/margins": 0.40848368406295776, |
|
"rewards/rejected": -1.3691723346710205, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0113825582038078e-06, |
|
"logits/chosen": -0.051131028681993484, |
|
"logits/rejected": -0.03366447612643242, |
|
"logps/chosen": -368.6939392089844, |
|
"logps/rejected": -360.71551513671875, |
|
"loss": 0.608, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.0129467248916626, |
|
"rewards/margins": 0.43145838379859924, |
|
"rewards/rejected": -1.4444050788879395, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_logits/chosen": -0.05713397264480591, |
|
"eval_logits/rejected": -0.022933386266231537, |
|
"eval_logps/chosen": -371.3293151855469, |
|
"eval_logps/rejected": -383.7585144042969, |
|
"eval_loss": 0.5776425004005432, |
|
"eval_rewards/accuracies": 0.6359999775886536, |
|
"eval_rewards/chosen": -0.9378249645233154, |
|
"eval_rewards/margins": 0.46160098910331726, |
|
"eval_rewards/rejected": -1.399425983428955, |
|
"eval_runtime": 545.1619, |
|
"eval_samples_per_second": 3.669, |
|
"eval_steps_per_second": 0.917, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.930917156425477e-07, |
|
"logits/chosen": -0.035455573350191116, |
|
"logits/rejected": 0.024086173623800278, |
|
"logps/chosen": -332.34405517578125, |
|
"logps/rejected": -361.7284240722656, |
|
"loss": 0.5317, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.8735530972480774, |
|
"rewards/margins": 0.5774091482162476, |
|
"rewards/rejected": -1.4509623050689697, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.749266994893756e-07, |
|
"logits/chosen": -0.05711311846971512, |
|
"logits/rejected": -0.02643768861889839, |
|
"logps/chosen": -336.5233459472656, |
|
"logps/rejected": -384.072265625, |
|
"loss": 0.6026, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.9693512916564941, |
|
"rewards/margins": 0.39003175497055054, |
|
"rewards/rejected": -1.359383225440979, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.56889026517913e-07, |
|
"logits/chosen": -0.03704976290464401, |
|
"logits/rejected": -0.0004162020923104137, |
|
"logps/chosen": -362.77703857421875, |
|
"logps/rejected": -375.52349853515625, |
|
"loss": 0.5474, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.9288007616996765, |
|
"rewards/margins": 0.5011378526687622, |
|
"rewards/rejected": -1.429938554763794, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.389802028686617e-07, |
|
"logits/chosen": -0.03835465759038925, |
|
"logits/rejected": -0.016497110947966576, |
|
"logps/chosen": -301.2439270019531, |
|
"logps/rejected": -307.9148864746094, |
|
"loss": 0.607, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.851264476776123, |
|
"rewards/margins": 0.3882144093513489, |
|
"rewards/rejected": -1.2394790649414062, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 9.212017239232427e-07, |
|
"logits/chosen": -0.021128101274371147, |
|
"logits/rejected": 0.029263263568282127, |
|
"logps/chosen": -350.95611572265625, |
|
"logps/rejected": -408.35638427734375, |
|
"loss": 0.528, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.8077753782272339, |
|
"rewards/margins": 0.570160448551178, |
|
"rewards/rejected": -1.3779360055923462, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 9.03555074179533e-07, |
|
"logits/chosen": -0.06519512832164764, |
|
"logits/rejected": -0.003372351173311472, |
|
"logps/chosen": -351.7237548828125, |
|
"logps/rejected": -381.6541442871094, |
|
"loss": 0.5374, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.8637690544128418, |
|
"rewards/margins": 0.5793260335922241, |
|
"rewards/rejected": -1.443095088005066, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.860417271277067e-07, |
|
"logits/chosen": -0.02651647850871086, |
|
"logits/rejected": 4.415661169332452e-05, |
|
"logps/chosen": -319.85113525390625, |
|
"logps/rejected": -349.617919921875, |
|
"loss": 0.613, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.7974709868431091, |
|
"rewards/margins": 0.3686184287071228, |
|
"rewards/rejected": -1.1660895347595215, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.686631451272029e-07, |
|
"logits/chosen": -0.06533274054527283, |
|
"logits/rejected": -0.06797342002391815, |
|
"logps/chosen": -354.38055419921875, |
|
"logps/rejected": -345.04345703125, |
|
"loss": 0.5937, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.8840041160583496, |
|
"rewards/margins": 0.4283463954925537, |
|
"rewards/rejected": -1.3123505115509033, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.514207792846168e-07, |
|
"logits/chosen": -0.019940800964832306, |
|
"logits/rejected": -0.029427438974380493, |
|
"logps/chosen": -332.21832275390625, |
|
"logps/rejected": -374.78057861328125, |
|
"loss": 0.5771, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.8241235613822937, |
|
"rewards/margins": 0.44140610098838806, |
|
"rewards/rejected": -1.2655296325683594, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.343160693325356e-07, |
|
"logits/chosen": -0.06264803558588028, |
|
"logits/rejected": -0.056431032717227936, |
|
"logps/chosen": -308.91790771484375, |
|
"logps/rejected": -341.4127502441406, |
|
"loss": 0.588, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.7939392924308777, |
|
"rewards/margins": 0.4327456057071686, |
|
"rewards/rejected": -1.226684808731079, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_logits/chosen": -0.07915809750556946, |
|
"eval_logits/rejected": -0.04424045607447624, |
|
"eval_logps/chosen": -360.8502502441406, |
|
"eval_logps/rejected": -372.722412109375, |
|
"eval_loss": 0.5795174241065979, |
|
"eval_rewards/accuracies": 0.6345000267028809, |
|
"eval_rewards/chosen": -0.8330343961715698, |
|
"eval_rewards/margins": 0.4560302197933197, |
|
"eval_rewards/rejected": -1.2890645265579224, |
|
"eval_runtime": 545.3094, |
|
"eval_samples_per_second": 3.668, |
|
"eval_steps_per_second": 0.917, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.173504435093174e-07, |
|
"logits/chosen": -0.07090556621551514, |
|
"logits/rejected": -0.05571649596095085, |
|
"logps/chosen": -374.98504638671875, |
|
"logps/rejected": -389.0013732910156, |
|
"loss": 0.6176, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.9230650067329407, |
|
"rewards/margins": 0.3573823869228363, |
|
"rewards/rejected": -1.2804473638534546, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.00525318439836e-07, |
|
"logits/chosen": -0.05975785106420517, |
|
"logits/rejected": -0.047933854162693024, |
|
"logps/chosen": -353.83294677734375, |
|
"logps/rejected": -353.1571350097656, |
|
"loss": 0.5915, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.8541162610054016, |
|
"rewards/margins": 0.4437043070793152, |
|
"rewards/rejected": -1.2978206872940063, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.838420990171927e-07, |
|
"logits/chosen": -0.07875867187976837, |
|
"logits/rejected": -0.03914656117558479, |
|
"logps/chosen": -388.00396728515625, |
|
"logps/rejected": -384.4292907714844, |
|
"loss": 0.5651, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.7708128690719604, |
|
"rewards/margins": 0.4774767756462097, |
|
"rewards/rejected": -1.2482898235321045, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.673021782854084e-07, |
|
"logits/chosen": -0.03692690655589104, |
|
"logits/rejected": -0.04455095902085304, |
|
"logps/chosen": -327.183837890625, |
|
"logps/rejected": -382.13055419921875, |
|
"loss": 0.5735, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.7954224348068237, |
|
"rewards/margins": 0.46773552894592285, |
|
"rewards/rejected": -1.2631580829620361, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.509069373231039e-07, |
|
"logits/chosen": -0.05857111141085625, |
|
"logits/rejected": -0.06518695503473282, |
|
"logps/chosen": -374.9933166503906, |
|
"logps/rejected": -397.5784912109375, |
|
"loss": 0.5719, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.8247407674789429, |
|
"rewards/margins": 0.5263202786445618, |
|
"rewards/rejected": -1.3510611057281494, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.346577451281822e-07, |
|
"logits/chosen": -0.07813980430364609, |
|
"logits/rejected": -0.044347889721393585, |
|
"logps/chosen": -332.4440612792969, |
|
"logps/rejected": -346.2649230957031, |
|
"loss": 0.6059, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.8520699739456177, |
|
"rewards/margins": 0.38116154074668884, |
|
"rewards/rejected": -1.2332316637039185, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.185559585035138e-07, |
|
"logits/chosen": -0.06604515016078949, |
|
"logits/rejected": -0.038625769317150116, |
|
"logps/chosen": -364.8755798339844, |
|
"logps/rejected": -371.142822265625, |
|
"loss": 0.5878, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.8049036264419556, |
|
"rewards/margins": 0.382902055978775, |
|
"rewards/rejected": -1.1878057718276978, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.026029219436504e-07, |
|
"logits/chosen": -0.05492454767227173, |
|
"logits/rejected": -0.057348012924194336, |
|
"logps/chosen": -383.4015197753906, |
|
"logps/rejected": -387.12176513671875, |
|
"loss": 0.587, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.7947750091552734, |
|
"rewards/margins": 0.4462098181247711, |
|
"rewards/rejected": -1.2409846782684326, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 6.867999675225523e-07, |
|
"logits/chosen": -0.03865230828523636, |
|
"logits/rejected": -0.05615551024675369, |
|
"logps/chosen": -369.0574645996094, |
|
"logps/rejected": -353.56817626953125, |
|
"loss": 0.6035, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.7356120944023132, |
|
"rewards/margins": 0.3736583888530731, |
|
"rewards/rejected": -1.109270691871643, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.711484147823663e-07, |
|
"logits/chosen": -0.06074659898877144, |
|
"logits/rejected": -0.058938294649124146, |
|
"logps/chosen": -339.2735595703125, |
|
"logps/rejected": -357.4136047363281, |
|
"loss": 0.5324, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.7124096751213074, |
|
"rewards/margins": 0.5349728465080261, |
|
"rewards/rejected": -1.247382402420044, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_logits/chosen": -0.064789317548275, |
|
"eval_logits/rejected": -0.029839608818292618, |
|
"eval_logps/chosen": -354.6904296875, |
|
"eval_logps/rejected": -365.1566162109375, |
|
"eval_loss": 0.5806678533554077, |
|
"eval_rewards/accuracies": 0.6340000033378601, |
|
"eval_rewards/chosen": -0.7714364528656006, |
|
"eval_rewards/margins": 0.4419707953929901, |
|
"eval_rewards/rejected": -1.213407278060913, |
|
"eval_runtime": 544.3782, |
|
"eval_samples_per_second": 3.674, |
|
"eval_steps_per_second": 0.918, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.556495706232413e-07, |
|
"logits/chosen": -0.04762962460517883, |
|
"logits/rejected": -0.026001859456300735, |
|
"logps/chosen": -345.75860595703125, |
|
"logps/rejected": -352.12677001953125, |
|
"loss": 0.5552, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.7305065393447876, |
|
"rewards/margins": 0.48486828804016113, |
|
"rewards/rejected": -1.2153748273849487, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.403047291942057e-07, |
|
"logits/chosen": -0.06475412100553513, |
|
"logits/rejected": -0.04636804386973381, |
|
"logps/chosen": -351.92230224609375, |
|
"logps/rejected": -373.903564453125, |
|
"loss": 0.585, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.7460962533950806, |
|
"rewards/margins": 0.4199795126914978, |
|
"rewards/rejected": -1.1660758256912231, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.251151717851023e-07, |
|
"logits/chosen": -0.084681436419487, |
|
"logits/rejected": -0.07976353913545609, |
|
"logps/chosen": -377.3009033203125, |
|
"logps/rejected": -403.86688232421875, |
|
"loss": 0.603, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.8924350738525391, |
|
"rewards/margins": 0.5002483129501343, |
|
"rewards/rejected": -1.3926832675933838, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 6.100821667196041e-07, |
|
"logits/chosen": -0.023926103487610817, |
|
"logits/rejected": -0.0013149696169421077, |
|
"logps/chosen": -373.28125, |
|
"logps/rejected": -354.6680908203125, |
|
"loss": 0.5745, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.7040396332740784, |
|
"rewards/margins": 0.43626755475997925, |
|
"rewards/rejected": -1.1403071880340576, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.952069692493062e-07, |
|
"logits/chosen": -0.0607212670147419, |
|
"logits/rejected": -0.030897587537765503, |
|
"logps/chosen": -368.138916015625, |
|
"logps/rejected": -378.4091796875, |
|
"loss": 0.526, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.6818228960037231, |
|
"rewards/margins": 0.5211673974990845, |
|
"rewards/rejected": -1.2029902935028076, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.80490821448918e-07, |
|
"logits/chosen": -0.03866980969905853, |
|
"logits/rejected": -0.04572879895567894, |
|
"logps/chosen": -291.18768310546875, |
|
"logps/rejected": -308.963134765625, |
|
"loss": 0.6146, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.7203935980796814, |
|
"rewards/margins": 0.39186891913414, |
|
"rewards/rejected": -1.112262487411499, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.659349521125459e-07, |
|
"logits/chosen": -0.08902426809072495, |
|
"logits/rejected": -0.05652226135134697, |
|
"logps/chosen": -306.17620849609375, |
|
"logps/rejected": -353.67633056640625, |
|
"loss": 0.5513, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.7082656025886536, |
|
"rewards/margins": 0.4930747151374817, |
|
"rewards/rejected": -1.2013403177261353, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.5154057665109e-07, |
|
"logits/chosen": -0.055385757237672806, |
|
"logits/rejected": -0.04034740477800369, |
|
"logps/chosen": -354.96563720703125, |
|
"logps/rejected": -360.63677978515625, |
|
"loss": 0.5646, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.7717067003250122, |
|
"rewards/margins": 0.48541682958602905, |
|
"rewards/rejected": -1.257123589515686, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.373088969907586e-07, |
|
"logits/chosen": -0.09343139082193375, |
|
"logits/rejected": -0.045906342566013336, |
|
"logps/chosen": -329.02557373046875, |
|
"logps/rejected": -373.9976501464844, |
|
"loss": 0.5557, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.7075044512748718, |
|
"rewards/margins": 0.5255839228630066, |
|
"rewards/rejected": -1.2330883741378784, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.23241101472709e-07, |
|
"logits/chosen": -0.03690626472234726, |
|
"logits/rejected": -0.054735153913497925, |
|
"logps/chosen": -324.6033935546875, |
|
"logps/rejected": -346.44830322265625, |
|
"loss": 0.6036, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.7127725481987, |
|
"rewards/margins": 0.39219218492507935, |
|
"rewards/rejected": -1.1049648523330688, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_logits/chosen": -0.07098916918039322, |
|
"eval_logits/rejected": -0.03594445437192917, |
|
"eval_logps/chosen": -352.0881042480469, |
|
"eval_logps/rejected": -362.20758056640625, |
|
"eval_loss": 0.5816840529441833, |
|
"eval_rewards/accuracies": 0.6359999775886536, |
|
"eval_rewards/chosen": -0.7454131245613098, |
|
"eval_rewards/margins": 0.4385035037994385, |
|
"eval_rewards/rejected": -1.1839165687561035, |
|
"eval_runtime": 544.6044, |
|
"eval_samples_per_second": 3.672, |
|
"eval_steps_per_second": 0.918, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.09338364753818e-07, |
|
"logits/chosen": -0.09596423804759979, |
|
"logits/rejected": -0.0356723852455616, |
|
"logps/chosen": -363.4578857421875, |
|
"logps/rejected": -385.2988586425781, |
|
"loss": 0.5387, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.724037230014801, |
|
"rewards/margins": 0.5893635153770447, |
|
"rewards/rejected": -1.3134007453918457, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.956018477086005e-07, |
|
"logits/chosen": -0.023332688957452774, |
|
"logits/rejected": -0.05660749599337578, |
|
"logps/chosen": -351.01885986328125, |
|
"logps/rejected": -340.53472900390625, |
|
"loss": 0.5815, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.7153294682502747, |
|
"rewards/margins": 0.46078458428382874, |
|
"rewards/rejected": -1.1761140823364258, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.820326973322764e-07, |
|
"logits/chosen": -0.028302233666181564, |
|
"logits/rejected": -0.03387041389942169, |
|
"logps/chosen": -360.9173889160156, |
|
"logps/rejected": -340.7845458984375, |
|
"loss": 0.5284, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.664130687713623, |
|
"rewards/margins": 0.609849750995636, |
|
"rewards/rejected": -1.2739803791046143, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.686320466449981e-07, |
|
"logits/chosen": -0.05822853371500969, |
|
"logits/rejected": -0.05845784395933151, |
|
"logps/chosen": -303.1192321777344, |
|
"logps/rejected": -334.73553466796875, |
|
"loss": 0.5321, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.6799777746200562, |
|
"rewards/margins": 0.5427129864692688, |
|
"rewards/rejected": -1.2226907014846802, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.554010145972418e-07, |
|
"logits/chosen": -0.06433075666427612, |
|
"logits/rejected": -0.04098791256546974, |
|
"logps/chosen": -356.281982421875, |
|
"logps/rejected": -342.19537353515625, |
|
"loss": 0.581, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.7194234132766724, |
|
"rewards/margins": 0.4737864136695862, |
|
"rewards/rejected": -1.1932098865509033, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.4234070597637455e-07, |
|
"logits/chosen": -0.07401960343122482, |
|
"logits/rejected": -0.006474087946116924, |
|
"logps/chosen": -350.6880798339844, |
|
"logps/rejected": -360.044677734375, |
|
"loss": 0.6235, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.8287215232849121, |
|
"rewards/margins": 0.3001879155635834, |
|
"rewards/rejected": -1.1289094686508179, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.2945221131440783e-07, |
|
"logits/chosen": -0.05917246267199516, |
|
"logits/rejected": -0.042995236814022064, |
|
"logps/chosen": -318.0530700683594, |
|
"logps/rejected": -330.96832275390625, |
|
"loss": 0.5841, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.7468527555465698, |
|
"rewards/margins": 0.4306907057762146, |
|
"rewards/rejected": -1.1775435209274292, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.167366067969381e-07, |
|
"logits/chosen": -0.09087510406970978, |
|
"logits/rejected": -0.07458924502134323, |
|
"logps/chosen": -342.1099548339844, |
|
"logps/rejected": -375.07635498046875, |
|
"loss": 0.5468, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.7130734324455261, |
|
"rewards/margins": 0.5242506861686707, |
|
"rewards/rejected": -1.2373241186141968, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.041949541732826e-07, |
|
"logits/chosen": -0.01006515882909298, |
|
"logits/rejected": 0.011426875367760658, |
|
"logps/chosen": -374.0743103027344, |
|
"logps/rejected": -352.95697021484375, |
|
"loss": 0.6166, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.8070684671401978, |
|
"rewards/margins": 0.3434571623802185, |
|
"rewards/rejected": -1.150525450706482, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.9182830066782614e-07, |
|
"logits/chosen": -0.06810037791728973, |
|
"logits/rejected": -0.07136163860559464, |
|
"logps/chosen": -360.11090087890625, |
|
"logps/rejected": -351.59515380859375, |
|
"loss": 0.615, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.8139978647232056, |
|
"rewards/margins": 0.3238958418369293, |
|
"rewards/rejected": -1.1378936767578125, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_logits/chosen": -0.06448013335466385, |
|
"eval_logits/rejected": -0.029476074501872063, |
|
"eval_logps/chosen": -353.8468933105469, |
|
"eval_logps/rejected": -364.4670104980469, |
|
"eval_loss": 0.5805965662002563, |
|
"eval_rewards/accuracies": 0.6330000162124634, |
|
"eval_rewards/chosen": -0.7630012035369873, |
|
"eval_rewards/margins": 0.443509578704834, |
|
"eval_rewards/rejected": -1.2065109014511108, |
|
"eval_runtime": 545.1592, |
|
"eval_samples_per_second": 3.669, |
|
"eval_steps_per_second": 0.917, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.796376788925771e-07, |
|
"logits/chosen": -0.07999769598245621, |
|
"logits/rejected": -0.016174430027604103, |
|
"logps/chosen": -317.23468017578125, |
|
"logps/rejected": -333.4161682128906, |
|
"loss": 0.5777, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.7274065017700195, |
|
"rewards/margins": 0.44895920157432556, |
|
"rewards/rejected": -1.1763657331466675, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.676241067609465e-07, |
|
"logits/chosen": -0.065385602414608, |
|
"logits/rejected": -0.08215048164129257, |
|
"logps/chosen": -313.9781799316406, |
|
"logps/rejected": -344.7863464355469, |
|
"loss": 0.5884, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.7430699467658997, |
|
"rewards/margins": 0.42152637243270874, |
|
"rewards/rejected": -1.1645963191986084, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.5578858740274976e-07, |
|
"logits/chosen": -0.049977466464042664, |
|
"logits/rejected": -0.022836443036794662, |
|
"logps/chosen": -371.64337158203125, |
|
"logps/rejected": -372.68505859375, |
|
"loss": 0.5669, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.7283893823623657, |
|
"rewards/margins": 0.4756964147090912, |
|
"rewards/rejected": -1.2040858268737793, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.44132109080447e-07, |
|
"logits/chosen": -0.07746875286102295, |
|
"logits/rejected": -0.013276422396302223, |
|
"logps/chosen": -342.2890625, |
|
"logps/rejected": -340.8277282714844, |
|
"loss": 0.6697, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.8580360412597656, |
|
"rewards/margins": 0.2627985179424286, |
|
"rewards/rejected": -1.1208345890045166, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.3265564510662344e-07, |
|
"logits/chosen": -0.04408205673098564, |
|
"logits/rejected": -0.006346794776618481, |
|
"logps/chosen": -323.6293640136719, |
|
"logps/rejected": -328.1676025390625, |
|
"loss": 0.5655, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.7188665866851807, |
|
"rewards/margins": 0.4509055018424988, |
|
"rewards/rejected": -1.1697720289230347, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.213601537627195e-07, |
|
"logits/chosen": -0.02172664925456047, |
|
"logits/rejected": -0.02120097354054451, |
|
"logps/chosen": -335.9650573730469, |
|
"logps/rejected": -357.8267517089844, |
|
"loss": 0.6194, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.7779746055603027, |
|
"rewards/margins": 0.3631519675254822, |
|
"rewards/rejected": -1.1411265134811401, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.1024657821901063e-07, |
|
"logits/chosen": -0.013195675797760487, |
|
"logits/rejected": -0.027406075969338417, |
|
"logps/chosen": -323.6843566894531, |
|
"logps/rejected": -349.3821105957031, |
|
"loss": 0.5386, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.7054836750030518, |
|
"rewards/margins": 0.501741886138916, |
|
"rewards/rejected": -1.2072255611419678, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.9931584645585654e-07, |
|
"logits/chosen": 0.0001564420817885548, |
|
"logits/rejected": -0.0028263225685805082, |
|
"logps/chosen": -366.2295837402344, |
|
"logps/rejected": -400.4193115234375, |
|
"loss": 0.5951, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.8342908024787903, |
|
"rewards/margins": 0.4561484754085541, |
|
"rewards/rejected": -1.2904393672943115, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.885688711862136e-07, |
|
"logits/chosen": -0.040136925876140594, |
|
"logits/rejected": -0.030243951827287674, |
|
"logps/chosen": -333.1867980957031, |
|
"logps/rejected": -329.26239013671875, |
|
"loss": 0.6174, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.7828749418258667, |
|
"rewards/margins": 0.3514179587364197, |
|
"rewards/rejected": -1.1342929601669312, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.7800654977942486e-07, |
|
"logits/chosen": -0.05975431948900223, |
|
"logits/rejected": -0.024867946282029152, |
|
"logps/chosen": -372.686767578125, |
|
"logps/rejected": -331.25628662109375, |
|
"loss": 0.6211, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.8066738843917847, |
|
"rewards/margins": 0.33745184540748596, |
|
"rewards/rejected": -1.1441256999969482, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_logits/chosen": -0.0585113987326622, |
|
"eval_logits/rejected": -0.0239940844476223, |
|
"eval_logps/chosen": -355.21856689453125, |
|
"eval_logps/rejected": -365.8819885253906, |
|
"eval_loss": 0.5793652534484863, |
|
"eval_rewards/accuracies": 0.6334999799728394, |
|
"eval_rewards/chosen": -0.7767176032066345, |
|
"eval_rewards/margins": 0.4439431130886078, |
|
"eval_rewards/rejected": -1.2206608057022095, |
|
"eval_runtime": 544.4721, |
|
"eval_samples_per_second": 3.673, |
|
"eval_steps_per_second": 0.918, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.6762976418628797e-07, |
|
"logits/chosen": -0.03742988407611847, |
|
"logits/rejected": -0.038391679525375366, |
|
"logps/chosen": -328.2310791015625, |
|
"logps/rejected": -339.87530517578125, |
|
"loss": 0.5982, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.777148425579071, |
|
"rewards/margins": 0.3832804560661316, |
|
"rewards/rejected": -1.1604288816452026, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.5743938086541354e-07, |
|
"logits/chosen": -0.038022901862859726, |
|
"logits/rejected": -0.04044175148010254, |
|
"logps/chosen": -369.03594970703125, |
|
"logps/rejected": -378.79937744140625, |
|
"loss": 0.5056, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.7325864434242249, |
|
"rewards/margins": 0.6389061212539673, |
|
"rewards/rejected": -1.3714925050735474, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.4743625071087574e-07, |
|
"logits/chosen": -0.08106885850429535, |
|
"logits/rejected": -0.0331001877784729, |
|
"logps/chosen": -350.80322265625, |
|
"logps/rejected": -370.1235656738281, |
|
"loss": 0.5724, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.8414134979248047, |
|
"rewards/margins": 0.4927336573600769, |
|
"rewards/rejected": -1.3341472148895264, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.3762120898116498e-07, |
|
"logits/chosen": -0.045328982174396515, |
|
"logits/rejected": -0.03163378685712814, |
|
"logps/chosen": -341.0201416015625, |
|
"logps/rejected": -346.0947265625, |
|
"loss": 0.616, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.7547915577888489, |
|
"rewards/margins": 0.3216248154640198, |
|
"rewards/rejected": -1.0764163732528687, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.2799507522944048e-07, |
|
"logits/chosen": 0.0050236075185239315, |
|
"logits/rejected": -0.00028944091172888875, |
|
"logps/chosen": -366.76171875, |
|
"logps/rejected": -338.931640625, |
|
"loss": 0.6071, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.729737401008606, |
|
"rewards/margins": 0.3199540972709656, |
|
"rewards/rejected": -1.0496914386749268, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.1855865323510056e-07, |
|
"logits/chosen": -0.01202615536749363, |
|
"logits/rejected": 0.009629396721720695, |
|
"logps/chosen": -367.4909973144531, |
|
"logps/rejected": -385.5251159667969, |
|
"loss": 0.5707, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.7918294668197632, |
|
"rewards/margins": 0.4775357246398926, |
|
"rewards/rejected": -1.2693650722503662, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.0931273093666575e-07, |
|
"logits/chosen": 0.0005378912901505828, |
|
"logits/rejected": 0.012183221988379955, |
|
"logps/chosen": -339.87249755859375, |
|
"logps/rejected": -380.7689208984375, |
|
"loss": 0.5681, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.7448574304580688, |
|
"rewards/margins": 0.48650699853897095, |
|
"rewards/rejected": -1.2313644886016846, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.002580803659873e-07, |
|
"logits/chosen": -0.02887069061398506, |
|
"logits/rejected": -0.045493822544813156, |
|
"logps/chosen": -373.1261901855469, |
|
"logps/rejected": -381.5206604003906, |
|
"loss": 0.5724, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.831575870513916, |
|
"rewards/margins": 0.43848705291748047, |
|
"rewards/rejected": -1.2700629234313965, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.913954575837826e-07, |
|
"logits/chosen": 0.019021127372980118, |
|
"logits/rejected": 0.05180368572473526, |
|
"logps/chosen": -373.37060546875, |
|
"logps/rejected": -386.58941650390625, |
|
"loss": 0.5659, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.8404852151870728, |
|
"rewards/margins": 0.47979259490966797, |
|
"rewards/rejected": -1.3202778100967407, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.827256026165028e-07, |
|
"logits/chosen": -0.036004990339279175, |
|
"logits/rejected": -0.0617971308529377, |
|
"logps/chosen": -350.956298828125, |
|
"logps/rejected": -374.48272705078125, |
|
"loss": 0.535, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.8171570897102356, |
|
"rewards/margins": 0.6308014988899231, |
|
"rewards/rejected": -1.4479585886001587, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_logits/chosen": -0.05579707399010658, |
|
"eval_logits/rejected": -0.022538870573043823, |
|
"eval_logps/chosen": -361.53656005859375, |
|
"eval_logps/rejected": -373.102783203125, |
|
"eval_loss": 0.5777334570884705, |
|
"eval_rewards/accuracies": 0.6320000290870667, |
|
"eval_rewards/chosen": -0.8398973941802979, |
|
"eval_rewards/margins": 0.45297136902809143, |
|
"eval_rewards/rejected": -1.292868733406067, |
|
"eval_runtime": 545.3246, |
|
"eval_samples_per_second": 3.668, |
|
"eval_steps_per_second": 0.917, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.7424923939454274e-07, |
|
"logits/chosen": -0.002502134535461664, |
|
"logits/rejected": -0.018689513206481934, |
|
"logps/chosen": -365.55181884765625, |
|
"logps/rejected": -376.9024963378906, |
|
"loss": 0.5815, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.8611559867858887, |
|
"rewards/margins": 0.43401390314102173, |
|
"rewards/rejected": -1.2951698303222656, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.6596707569179304e-07, |
|
"logits/chosen": -0.0391705147922039, |
|
"logits/rejected": -0.029395347461104393, |
|
"logps/chosen": -344.93084716796875, |
|
"logps/rejected": -354.69110107421875, |
|
"loss": 0.5448, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.7967727780342102, |
|
"rewards/margins": 0.5383495092391968, |
|
"rewards/rejected": -1.3351223468780518, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.578798030665385e-07, |
|
"logits/chosen": -0.02771691046655178, |
|
"logits/rejected": -0.03689311072230339, |
|
"logps/chosen": -369.16424560546875, |
|
"logps/rejected": -351.5723571777344, |
|
"loss": 0.577, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.8476957082748413, |
|
"rewards/margins": 0.4203530251979828, |
|
"rewards/rejected": -1.268048882484436, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.499880968037165e-07, |
|
"logits/chosen": -0.05807967856526375, |
|
"logits/rejected": 0.006117827724665403, |
|
"logps/chosen": -368.61480712890625, |
|
"logps/rejected": -385.88482666015625, |
|
"loss": 0.5313, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.8740746378898621, |
|
"rewards/margins": 0.5491721034049988, |
|
"rewards/rejected": -1.4232467412948608, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.4229261585852805e-07, |
|
"logits/chosen": -0.08000718057155609, |
|
"logits/rejected": -0.013188359327614307, |
|
"logps/chosen": -344.84844970703125, |
|
"logps/rejected": -366.2679138183594, |
|
"loss": 0.5822, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.8913000822067261, |
|
"rewards/margins": 0.4382581114768982, |
|
"rewards/rejected": -1.3295581340789795, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.3479400280141886e-07, |
|
"logits/chosen": -0.05246477201581001, |
|
"logits/rejected": -0.064031220972538, |
|
"logps/chosen": -311.46075439453125, |
|
"logps/rejected": -326.723876953125, |
|
"loss": 0.5766, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.7297900915145874, |
|
"rewards/margins": 0.3982476592063904, |
|
"rewards/rejected": -1.128037691116333, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.2749288376442044e-07, |
|
"logits/chosen": -0.013275760225951672, |
|
"logits/rejected": -0.015950758010149002, |
|
"logps/chosen": -327.622802734375, |
|
"logps/rejected": -358.119873046875, |
|
"loss": 0.5896, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.814193606376648, |
|
"rewards/margins": 0.4229184091091156, |
|
"rewards/rejected": -1.2371121644973755, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.203898683888713e-07, |
|
"logits/chosen": -0.010835160501301289, |
|
"logits/rejected": -0.0052233547903597355, |
|
"logps/chosen": -340.2196350097656, |
|
"logps/rejected": -364.1300048828125, |
|
"loss": 0.5993, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.8792353868484497, |
|
"rewards/margins": 0.44398754835128784, |
|
"rewards/rejected": -1.3232228755950928, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.1348554977451132e-07, |
|
"logits/chosen": -0.02504626475274563, |
|
"logits/rejected": -0.034282900393009186, |
|
"logps/chosen": -324.88525390625, |
|
"logps/rejected": -377.1581726074219, |
|
"loss": 0.5687, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.8490117788314819, |
|
"rewards/margins": 0.49014702439308167, |
|
"rewards/rejected": -1.3391587734222412, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.0678050442995802e-07, |
|
"logits/chosen": -0.06707514822483063, |
|
"logits/rejected": -0.05451773479580879, |
|
"logps/chosen": -336.59759521484375, |
|
"logps/rejected": -344.66265869140625, |
|
"loss": 0.5322, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.7733240723609924, |
|
"rewards/margins": 0.5705543160438538, |
|
"rewards/rejected": -1.3438783884048462, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_logits/chosen": -0.05459979921579361, |
|
"eval_logits/rejected": -0.020987825468182564, |
|
"eval_logps/chosen": -360.141845703125, |
|
"eval_logps/rejected": -371.6272277832031, |
|
"eval_loss": 0.5779351592063904, |
|
"eval_rewards/accuracies": 0.6334999799728394, |
|
"eval_rewards/chosen": -0.825950562953949, |
|
"eval_rewards/margins": 0.4521624445915222, |
|
"eval_rewards/rejected": -1.2781130075454712, |
|
"eval_runtime": 544.5596, |
|
"eval_samples_per_second": 3.673, |
|
"eval_steps_per_second": 0.918, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.0027529222456755e-07, |
|
"logits/chosen": -0.02216022089123726, |
|
"logits/rejected": -0.042939335107803345, |
|
"logps/chosen": -348.16436767578125, |
|
"logps/rejected": -375.9942321777344, |
|
"loss": 0.5861, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.8684269189834595, |
|
"rewards/margins": 0.43816670775413513, |
|
"rewards/rejected": -1.306593656539917, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 9.397045634168766e-08, |
|
"logits/chosen": -0.06902258098125458, |
|
"logits/rejected": 0.005351958330720663, |
|
"logps/chosen": -349.08087158203125, |
|
"logps/rejected": -350.078125, |
|
"loss": 0.5637, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.7523408532142639, |
|
"rewards/margins": 0.476735919713974, |
|
"rewards/rejected": -1.2290767431259155, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 8.78665232332998e-08, |
|
"logits/chosen": -0.04204776883125305, |
|
"logits/rejected": -0.0012271578889340162, |
|
"logps/chosen": -342.7869567871094, |
|
"logps/rejected": -374.901611328125, |
|
"loss": 0.5951, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.8473308682441711, |
|
"rewards/margins": 0.3966173529624939, |
|
"rewards/rejected": -1.243948221206665, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 8.196400257606208e-08, |
|
"logits/chosen": -0.07394047826528549, |
|
"logits/rejected": -0.030526086688041687, |
|
"logps/chosen": -361.0905456542969, |
|
"logps/rejected": -363.9695739746094, |
|
"loss": 0.605, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.8390079736709595, |
|
"rewards/margins": 0.3738323748111725, |
|
"rewards/rejected": -1.21284019947052, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 7.626338722875076e-08, |
|
"logits/chosen": -0.025840681046247482, |
|
"logits/rejected": -0.05815352872014046, |
|
"logps/chosen": -363.9366149902344, |
|
"logps/rejected": -375.3655090332031, |
|
"loss": 0.5786, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.8692588806152344, |
|
"rewards/margins": 0.4229603707790375, |
|
"rewards/rejected": -1.2922192811965942, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 7.076515319110688e-08, |
|
"logits/chosen": 0.011594533920288086, |
|
"logits/rejected": -0.023100445047020912, |
|
"logps/chosen": -372.5281066894531, |
|
"logps/rejected": -373.2308654785156, |
|
"loss": 0.5711, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.8273743391036987, |
|
"rewards/margins": 0.46333497762680054, |
|
"rewards/rejected": -1.290709376335144, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 6.54697595640899e-08, |
|
"logits/chosen": 0.024370530620217323, |
|
"logits/rejected": -0.025257308036088943, |
|
"logps/chosen": -357.7359924316406, |
|
"logps/rejected": -373.8916015625, |
|
"loss": 0.5944, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.8015440702438354, |
|
"rewards/margins": 0.399541437625885, |
|
"rewards/rejected": -1.2010856866836548, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 6.037764851154426e-08, |
|
"logits/chosen": -0.061722129583358765, |
|
"logits/rejected": -0.017051083967089653, |
|
"logps/chosen": -358.57586669921875, |
|
"logps/rejected": -358.6295471191406, |
|
"loss": 0.5696, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.8154916763305664, |
|
"rewards/margins": 0.4519859254360199, |
|
"rewards/rejected": -1.2674776315689087, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5.548924522327748e-08, |
|
"logits/chosen": -0.08785736560821533, |
|
"logits/rejected": -0.004190725274384022, |
|
"logps/chosen": -355.2710876464844, |
|
"logps/rejected": -382.51019287109375, |
|
"loss": 0.5576, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.8198660016059875, |
|
"rewards/margins": 0.5074083805084229, |
|
"rewards/rejected": -1.3272743225097656, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5.0804957879556915e-08, |
|
"logits/chosen": -0.0530325286090374, |
|
"logits/rejected": -0.013538384810090065, |
|
"logps/chosen": -400.12078857421875, |
|
"logps/rejected": -389.3551330566406, |
|
"loss": 0.5527, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.8526579737663269, |
|
"rewards/margins": 0.523281455039978, |
|
"rewards/rejected": -1.3759396076202393, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_logits/chosen": -0.056537650525569916, |
|
"eval_logits/rejected": -0.022854406386613846, |
|
"eval_logps/chosen": -360.0846862792969, |
|
"eval_logps/rejected": -371.6083068847656, |
|
"eval_loss": 0.5779842734336853, |
|
"eval_rewards/accuracies": 0.6315000057220459, |
|
"eval_rewards/chosen": -0.8253786563873291, |
|
"eval_rewards/margins": 0.4525452256202698, |
|
"eval_rewards/rejected": -1.277923822402954, |
|
"eval_runtime": 544.3878, |
|
"eval_samples_per_second": 3.674, |
|
"eval_steps_per_second": 0.918, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.632517761702815e-08, |
|
"logits/chosen": -0.02192116342484951, |
|
"logits/rejected": -0.0027442649006843567, |
|
"logps/chosen": -359.52862548828125, |
|
"logps/rejected": -368.43524169921875, |
|
"loss": 0.5519, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.7623158097267151, |
|
"rewards/margins": 0.5025709271430969, |
|
"rewards/rejected": -1.2648866176605225, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 4.205027849605359e-08, |
|
"logits/chosen": -0.03419053927063942, |
|
"logits/rejected": -0.0502307191491127, |
|
"logps/chosen": -303.33673095703125, |
|
"logps/rejected": -344.5325012207031, |
|
"loss": 0.5772, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.7383955717086792, |
|
"rewards/margins": 0.42240723967552185, |
|
"rewards/rejected": -1.1608028411865234, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.798061746947995e-08, |
|
"logits/chosen": -0.033175647258758545, |
|
"logits/rejected": -0.056212086230516434, |
|
"logps/chosen": -424.09918212890625, |
|
"logps/rejected": -388.13372802734375, |
|
"loss": 0.5523, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.8043220639228821, |
|
"rewards/margins": 0.5199757814407349, |
|
"rewards/rejected": -1.3242979049682617, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.411653435283158e-08, |
|
"logits/chosen": -0.045365557074546814, |
|
"logits/rejected": -0.025770818814635277, |
|
"logps/chosen": -379.8509826660156, |
|
"logps/rejected": -381.9056091308594, |
|
"loss": 0.6058, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.8896015286445618, |
|
"rewards/margins": 0.4383910596370697, |
|
"rewards/rejected": -1.3279926776885986, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.04583517959367e-08, |
|
"logits/chosen": -0.045900799334049225, |
|
"logits/rejected": -0.05127967521548271, |
|
"logps/chosen": -372.5838623046875, |
|
"logps/rejected": -371.11956787109375, |
|
"loss": 0.5314, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.7395545840263367, |
|
"rewards/margins": 0.5564114451408386, |
|
"rewards/rejected": -1.2959661483764648, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.7006375255985984e-08, |
|
"logits/chosen": -0.05672993138432503, |
|
"logits/rejected": -0.0464431568980217, |
|
"logps/chosen": -315.34393310546875, |
|
"logps/rejected": -327.28131103515625, |
|
"loss": 0.5784, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.7688011527061462, |
|
"rewards/margins": 0.4246784746646881, |
|
"rewards/rejected": -1.1934795379638672, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.3760892972027328e-08, |
|
"logits/chosen": -0.030247915536165237, |
|
"logits/rejected": 0.009180205874145031, |
|
"logps/chosen": -366.7953186035156, |
|
"logps/rejected": -355.39910888671875, |
|
"loss": 0.5729, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.7872218489646912, |
|
"rewards/margins": 0.47359123826026917, |
|
"rewards/rejected": -1.2608129978179932, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.072217594089765e-08, |
|
"logits/chosen": 0.01734892651438713, |
|
"logits/rejected": -0.01647915318608284, |
|
"logps/chosen": -321.6292419433594, |
|
"logps/rejected": -336.16650390625, |
|
"loss": 0.6141, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.7641597390174866, |
|
"rewards/margins": 0.34002798795700073, |
|
"rewards/rejected": -1.1041877269744873, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.789047789459375e-08, |
|
"logits/chosen": -0.058907002210617065, |
|
"logits/rejected": -0.03177405148744583, |
|
"logps/chosen": -361.2591857910156, |
|
"logps/rejected": -376.9844665527344, |
|
"loss": 0.5805, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.8447058796882629, |
|
"rewards/margins": 0.43825072050094604, |
|
"rewards/rejected": -1.282956600189209, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.5266035279088708e-08, |
|
"logits/chosen": -0.05076723173260689, |
|
"logits/rejected": -0.03689832612872124, |
|
"logps/chosen": -401.41070556640625, |
|
"logps/rejected": -366.7544860839844, |
|
"loss": 0.5769, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.8378704786300659, |
|
"rewards/margins": 0.45128026604652405, |
|
"rewards/rejected": -1.2891508340835571, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_logits/chosen": -0.0561523512005806, |
|
"eval_logits/rejected": -0.022463036701083183, |
|
"eval_logps/chosen": -360.40625, |
|
"eval_logps/rejected": -371.9745178222656, |
|
"eval_loss": 0.5780424475669861, |
|
"eval_rewards/accuracies": 0.6315000057220459, |
|
"eval_rewards/chosen": -0.8285941481590271, |
|
"eval_rewards/margins": 0.45299187302589417, |
|
"eval_rewards/rejected": -1.2815860509872437, |
|
"eval_runtime": 544.8127, |
|
"eval_samples_per_second": 3.671, |
|
"eval_steps_per_second": 0.918, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.2849067234584623e-08, |
|
"logits/chosen": -0.06095626950263977, |
|
"logits/rejected": -0.026778647676110268, |
|
"logps/chosen": -335.8456115722656, |
|
"logps/rejected": -357.4090881347656, |
|
"loss": 0.5608, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.7960391640663147, |
|
"rewards/margins": 0.452970415353775, |
|
"rewards/rejected": -1.2490094900131226, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.0639775577218625e-08, |
|
"logits/chosen": -0.028050154447555542, |
|
"logits/rejected": -0.012364340014755726, |
|
"logps/chosen": -363.0377197265625, |
|
"logps/rejected": -385.4869384765625, |
|
"loss": 0.5457, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.7599583268165588, |
|
"rewards/margins": 0.5159587860107422, |
|
"rewards/rejected": -1.2759170532226562, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 8.638344782207486e-09, |
|
"logits/chosen": -0.06382229179143906, |
|
"logits/rejected": -0.024947451427578926, |
|
"logps/chosen": -392.31329345703125, |
|
"logps/rejected": -396.88507080078125, |
|
"loss": 0.6442, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.9264042973518372, |
|
"rewards/margins": 0.30078691244125366, |
|
"rewards/rejected": -1.2271912097930908, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 6.84494196844715e-09, |
|
"logits/chosen": -0.022581690922379494, |
|
"logits/rejected": 0.011578726582229137, |
|
"logps/chosen": -341.0782775878906, |
|
"logps/rejected": -361.1309509277344, |
|
"loss": 0.5693, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.7737487554550171, |
|
"rewards/margins": 0.4638180136680603, |
|
"rewards/rejected": -1.2375667095184326, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 5.259716884556121e-09, |
|
"logits/chosen": -0.06770457327365875, |
|
"logits/rejected": -0.046060215681791306, |
|
"logps/chosen": -390.3734130859375, |
|
"logps/rejected": -387.06060791015625, |
|
"loss": 0.538, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.8117982745170593, |
|
"rewards/margins": 0.5823758840560913, |
|
"rewards/rejected": -1.3941742181777954, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.882801896372967e-09, |
|
"logits/chosen": -0.09617499262094498, |
|
"logits/rejected": -0.03756389021873474, |
|
"logps/chosen": -361.5705871582031, |
|
"logps/rejected": -379.3876037597656, |
|
"loss": 0.5924, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.812038242816925, |
|
"rewards/margins": 0.4387938976287842, |
|
"rewards/rejected": -1.2508320808410645, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.7143119759026614e-09, |
|
"logits/chosen": -0.05360639840364456, |
|
"logits/rejected": 0.002357491757720709, |
|
"logps/chosen": -346.79510498046875, |
|
"logps/rejected": -385.0536804199219, |
|
"loss": 0.5966, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.7745226621627808, |
|
"rewards/margins": 0.40841707587242126, |
|
"rewards/rejected": -1.1829397678375244, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.754344691717591e-09, |
|
"logits/chosen": -0.05662069469690323, |
|
"logits/rejected": -0.04745618626475334, |
|
"logps/chosen": -336.276611328125, |
|
"logps/rejected": -331.7563171386719, |
|
"loss": 0.5534, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.7269638776779175, |
|
"rewards/margins": 0.4797068238258362, |
|
"rewards/rejected": -1.206670880317688, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.0029802008096335e-09, |
|
"logits/chosen": 0.008154665119946003, |
|
"logits/rejected": -0.02306550182402134, |
|
"logps/chosen": -347.02252197265625, |
|
"logps/rejected": -350.6134948730469, |
|
"loss": 0.5594, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.8176683187484741, |
|
"rewards/margins": 0.4948461055755615, |
|
"rewards/rejected": -1.3125144243240356, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.602812418974534e-10, |
|
"logits/chosen": -0.06735100597143173, |
|
"logits/rejected": -0.036688387393951416, |
|
"logps/chosen": -344.30023193359375, |
|
"logps/rejected": -356.15130615234375, |
|
"loss": 0.635, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.9314397573471069, |
|
"rewards/margins": 0.3096088171005249, |
|
"rewards/rejected": -1.2410485744476318, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_logits/chosen": -0.05727638304233551, |
|
"eval_logits/rejected": -0.02366885170340538, |
|
"eval_logps/chosen": -360.228759765625, |
|
"eval_logps/rejected": -371.7966613769531, |
|
"eval_loss": 0.5779924392700195, |
|
"eval_rewards/accuracies": 0.6299999952316284, |
|
"eval_rewards/chosen": -0.8268191814422607, |
|
"eval_rewards/margins": 0.4529884457588196, |
|
"eval_rewards/rejected": -1.2798075675964355, |
|
"eval_runtime": 544.7616, |
|
"eval_samples_per_second": 3.671, |
|
"eval_steps_per_second": 0.918, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.2629313018819312e-10, |
|
"logits/chosen": -0.05566060543060303, |
|
"logits/rejected": -0.01925462670624256, |
|
"logps/chosen": -380.08441162109375, |
|
"logps/rejected": -407.0417175292969, |
|
"loss": 0.5533, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.7933879494667053, |
|
"rewards/margins": 0.494006872177124, |
|
"rewards/rejected": -1.2873947620391846, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.0437535929996855e-12, |
|
"logits/chosen": -0.04778105020523071, |
|
"logits/rejected": -0.05873732641339302, |
|
"logps/chosen": -362.91461181640625, |
|
"logps/rejected": -384.0444641113281, |
|
"loss": 0.5796, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.8017057180404663, |
|
"rewards/margins": 0.4591229557991028, |
|
"rewards/rejected": -1.2608287334442139, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 3821, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5976003442126336, |
|
"train_runtime": 56244.0781, |
|
"train_samples_per_second": 1.087, |
|
"train_steps_per_second": 0.068 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 3821, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|