{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 8335, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 5.995203836930456e-09, "logits/chosen": -2.424614667892456, "logits/rejected": -1.9891018867492676, "logps/chosen": -441.5737609863281, "logps/rejected": -473.3967590332031, "loss": 0.1361, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.0, "learning_rate": 5.995203836930456e-08, "logits/chosen": -2.110199213027954, "logits/rejected": -1.765876054763794, "logps/chosen": -209.27218627929688, "logps/rejected": -153.5750274658203, "loss": 0.2066, "rewards/accuracies": 0.3888888955116272, "rewards/chosen": 0.0005430497694760561, "rewards/margins": 0.0006039439467713237, "rewards/rejected": -6.089422822697088e-05, "step": 10 }, { "epoch": 0.0, "learning_rate": 1.1990407673860913e-07, "logits/chosen": -1.9729121923446655, "logits/rejected": -1.6711788177490234, "logps/chosen": -187.25914001464844, "logps/rejected": -146.9638671875, "loss": 0.1876, "rewards/accuracies": 0.375, "rewards/chosen": -0.00047091051237657666, "rewards/margins": -0.0006188965635374188, "rewards/rejected": 0.00014798599295318127, "step": 20 }, { "epoch": 0.0, "learning_rate": 1.7985611510791368e-07, "logits/chosen": -2.093867063522339, "logits/rejected": -1.7798885107040405, "logps/chosen": -271.8372802734375, "logps/rejected": -197.7427978515625, "loss": 0.161, "rewards/accuracies": 0.5, "rewards/chosen": 0.0006424393504858017, "rewards/margins": 0.0006549443351104856, "rewards/rejected": -1.2505089216574561e-05, "step": 30 }, { "epoch": 0.0, "learning_rate": 2.3980815347721825e-07, "logits/chosen": -1.8111674785614014, "logits/rejected": -1.651614785194397, "logps/chosen": -180.64151000976562, "logps/rejected": -205.8025360107422, "loss": 0.1737, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.0005901036784052849, "rewards/margins": 0.0005529513582587242, "rewards/rejected": 3.7152261938899755e-05, "step": 40 }, { "epoch": 0.01, "learning_rate": 2.997601918465228e-07, "logits/chosen": -2.04856538772583, "logits/rejected": -1.7901275157928467, "logps/chosen": -215.7578582763672, "logps/rejected": -220.8831024169922, "loss": 0.2306, "rewards/accuracies": 0.5, "rewards/chosen": -0.00023345758381765336, "rewards/margins": 8.568236808059737e-05, "rewards/rejected": -0.00031913991551846266, "step": 50 }, { "epoch": 0.01, "learning_rate": 3.5971223021582736e-07, "logits/chosen": -2.021206855773926, "logits/rejected": -1.532591462135315, "logps/chosen": -217.2874298095703, "logps/rejected": -155.38461303710938, "loss": 0.1551, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.0003670873702503741, "rewards/margins": 0.004133955575525761, "rewards/rejected": -0.004501043353229761, "step": 60 }, { "epoch": 0.01, "learning_rate": 4.1966426858513196e-07, "logits/chosen": -2.06149959564209, "logits/rejected": -1.6334540843963623, "logps/chosen": -219.23593139648438, "logps/rejected": -171.34017944335938, "loss": 0.1949, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.0024479026906192303, "rewards/margins": 0.001299393828958273, "rewards/rejected": -0.0037472962867468596, "step": 70 }, { "epoch": 0.01, "learning_rate": 4.796163069544365e-07, "logits/chosen": -2.0656137466430664, "logits/rejected": -1.6331923007965088, "logps/chosen": -287.973876953125, "logps/rejected": -253.8162384033203, "loss": 0.1885, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.006006647367030382, "rewards/margins": 0.0021041277796030045, "rewards/rejected": -0.0081107746809721, "step": 80 }, { "epoch": 0.01, "learning_rate": 5.39568345323741e-07, "logits/chosen": -2.0038671493530273, "logits/rejected": -1.5671374797821045, "logps/chosen": -227.6986083984375, "logps/rejected": -173.5839385986328, "loss": 0.1738, "rewards/accuracies": 0.625, "rewards/chosen": -0.004840956535190344, "rewards/margins": 0.010292068123817444, "rewards/rejected": -0.015133025124669075, "step": 90 }, { "epoch": 0.01, "learning_rate": 5.995203836930456e-07, "logits/chosen": -1.8383821249008179, "logits/rejected": -1.9401063919067383, "logps/chosen": -162.48382568359375, "logps/rejected": -229.74935913085938, "loss": 0.1897, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.013102886267006397, "rewards/margins": 0.002711429027840495, "rewards/rejected": -0.015814315527677536, "step": 100 }, { "epoch": 0.01, "learning_rate": 6.594724220623502e-07, "logits/chosen": -2.0046496391296387, "logits/rejected": -1.519207239151001, "logps/chosen": -167.69444274902344, "logps/rejected": -131.64898681640625, "loss": 0.2673, "rewards/accuracies": 0.5, "rewards/chosen": -0.02011437714099884, "rewards/margins": 0.024061836302280426, "rewards/rejected": -0.04417620971798897, "step": 110 }, { "epoch": 0.01, "learning_rate": 7.194244604316547e-07, "logits/chosen": -2.0810534954071045, "logits/rejected": -1.8244158029556274, "logps/chosen": -236.47250366210938, "logps/rejected": -229.98471069335938, "loss": 0.2358, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.011348506435751915, "rewards/margins": 0.03258121386170387, "rewards/rejected": -0.04392971843481064, "step": 120 }, { "epoch": 0.02, "learning_rate": 7.793764988009593e-07, "logits/chosen": -1.749682068824768, "logits/rejected": -1.4685245752334595, "logps/chosen": -212.50454711914062, "logps/rejected": -210.17977905273438, "loss": 0.2316, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.017982326447963715, "rewards/margins": 0.043081801384687424, "rewards/rejected": -0.06106413155794144, "step": 130 }, { "epoch": 0.02, "learning_rate": 8.393285371702639e-07, "logits/chosen": -1.8063485622406006, "logits/rejected": -1.7413132190704346, "logps/chosen": -159.96786499023438, "logps/rejected": -218.73233032226562, "loss": 0.1663, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.042347490787506104, "rewards/margins": 0.051009368151426315, "rewards/rejected": -0.09335686266422272, "step": 140 }, { "epoch": 0.02, "learning_rate": 8.992805755395684e-07, "logits/chosen": -1.974597692489624, "logits/rejected": -1.5300289392471313, "logps/chosen": -221.36166381835938, "logps/rejected": -209.5833282470703, "loss": 0.1912, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.08563482016324997, "rewards/margins": 0.06949006021022797, "rewards/rejected": -0.15512490272521973, "step": 150 }, { "epoch": 0.02, "learning_rate": 9.59232613908873e-07, "logits/chosen": -1.9377422332763672, "logits/rejected": -1.4812471866607666, "logps/chosen": -214.61581420898438, "logps/rejected": -158.63467407226562, "loss": 0.19, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.19673360884189606, "rewards/margins": 0.11003688722848892, "rewards/rejected": -0.3067705035209656, "step": 160 }, { "epoch": 0.02, "learning_rate": 1.0191846522781776e-06, "logits/chosen": -1.8835103511810303, "logits/rejected": -1.6908140182495117, "logps/chosen": -232.00540161132812, "logps/rejected": -224.3048095703125, "loss": 0.1984, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.32456761598587036, "rewards/margins": 0.03511672466993332, "rewards/rejected": -0.35968437790870667, "step": 170 }, { "epoch": 0.02, "learning_rate": 1.079136690647482e-06, "logits/chosen": -2.0397212505340576, "logits/rejected": -1.7811028957366943, "logps/chosen": -209.57711791992188, "logps/rejected": -219.6051483154297, "loss": 0.1525, "rewards/accuracies": 0.5, "rewards/chosen": -0.20149996876716614, "rewards/margins": 0.10627492517232895, "rewards/rejected": -0.3077749013900757, "step": 180 }, { "epoch": 0.02, "learning_rate": 1.1390887290167866e-06, "logits/chosen": -2.0456180572509766, "logits/rejected": -1.645538091659546, "logps/chosen": -298.3092346191406, "logps/rejected": -260.2908020019531, "loss": 0.0944, "rewards/accuracies": 0.75, "rewards/chosen": -0.1598762422800064, "rewards/margins": 0.14658963680267334, "rewards/rejected": -0.30646592378616333, "step": 190 }, { "epoch": 0.02, "learning_rate": 1.1990407673860912e-06, "logits/chosen": -2.1075217723846436, "logits/rejected": -1.8570976257324219, "logps/chosen": -279.629638671875, "logps/rejected": -273.2984924316406, "loss": 0.171, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.15068146586418152, "rewards/margins": 0.04055650904774666, "rewards/rejected": -0.19123797118663788, "step": 200 }, { "epoch": 0.03, "learning_rate": 1.2589928057553958e-06, "logits/chosen": -2.0482380390167236, "logits/rejected": -1.786058783531189, "logps/chosen": -206.5548858642578, "logps/rejected": -211.97860717773438, "loss": 0.1609, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.10160304605960846, "rewards/margins": 0.0669463574886322, "rewards/rejected": -0.16854938864707947, "step": 210 }, { "epoch": 0.03, "learning_rate": 1.3189448441247004e-06, "logits/chosen": -2.084348678588867, "logits/rejected": -1.6052637100219727, "logps/chosen": -251.0631866455078, "logps/rejected": -206.6727752685547, "loss": 0.1085, "rewards/accuracies": 0.625, "rewards/chosen": -0.13653235137462616, "rewards/margins": 0.06996998935937881, "rewards/rejected": -0.20650234818458557, "step": 220 }, { "epoch": 0.03, "learning_rate": 1.378896882494005e-06, "logits/chosen": -2.140784502029419, "logits/rejected": -1.4730150699615479, "logps/chosen": -233.042236328125, "logps/rejected": -207.8023681640625, "loss": 0.1887, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.13673333823680878, "rewards/margins": 0.11993058770895004, "rewards/rejected": -0.25666388869285583, "step": 230 }, { "epoch": 0.03, "learning_rate": 1.4388489208633094e-06, "logits/chosen": -1.9343608617782593, "logits/rejected": -1.6981518268585205, "logps/chosen": -254.0943145751953, "logps/rejected": -275.8316650390625, "loss": 0.1273, "rewards/accuracies": 0.5, "rewards/chosen": -0.13066630065441132, "rewards/margins": 0.07596530020236969, "rewards/rejected": -0.2066315859556198, "step": 240 }, { "epoch": 0.03, "learning_rate": 1.4988009592326142e-06, "logits/chosen": -1.952183485031128, "logits/rejected": -1.8142400979995728, "logps/chosen": -236.6318359375, "logps/rejected": -268.00537109375, "loss": 0.17, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.22585809230804443, "rewards/margins": 0.0762966051697731, "rewards/rejected": -0.30215469002723694, "step": 250 }, { "epoch": 0.03, "learning_rate": 1.5587529976019186e-06, "logits/chosen": -1.9224618673324585, "logits/rejected": -1.6340528726577759, "logps/chosen": -196.9944305419922, "logps/rejected": -205.39712524414062, "loss": 0.2002, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.2947540879249573, "rewards/margins": 0.09202824532985687, "rewards/rejected": -0.38678231835365295, "step": 260 }, { "epoch": 0.03, "learning_rate": 1.618705035971223e-06, "logits/chosen": -1.6605432033538818, "logits/rejected": -1.3576246500015259, "logps/chosen": -190.0495147705078, "logps/rejected": -203.37741088867188, "loss": 0.1713, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.2563242018222809, "rewards/margins": 0.13959848880767822, "rewards/rejected": -0.3959227204322815, "step": 270 }, { "epoch": 0.03, "learning_rate": 1.6786570743405278e-06, "logits/chosen": -2.081996440887451, "logits/rejected": -1.8335282802581787, "logps/chosen": -206.75241088867188, "logps/rejected": -198.82595825195312, "loss": 0.1588, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2610263228416443, "rewards/margins": 0.1122390478849411, "rewards/rejected": -0.3732653856277466, "step": 280 }, { "epoch": 0.03, "learning_rate": 1.7386091127098322e-06, "logits/chosen": -2.127209186553955, "logits/rejected": -1.662936806678772, "logps/chosen": -278.27166748046875, "logps/rejected": -271.78399658203125, "loss": 0.1299, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3486752510070801, "rewards/margins": 0.12807399034500122, "rewards/rejected": -0.4767492413520813, "step": 290 }, { "epoch": 0.04, "learning_rate": 1.7985611510791368e-06, "logits/chosen": -1.844478964805603, "logits/rejected": -1.6861326694488525, "logps/chosen": -323.74169921875, "logps/rejected": -350.01019287109375, "loss": 0.154, "rewards/accuracies": 0.625, "rewards/chosen": -0.4493889808654785, "rewards/margins": 0.10103818029165268, "rewards/rejected": -0.5504271388053894, "step": 300 }, { "epoch": 0.04, "learning_rate": 1.8585131894484414e-06, "logits/chosen": -2.01519513130188, "logits/rejected": -1.7900644540786743, "logps/chosen": -268.0411071777344, "logps/rejected": -255.30453491210938, "loss": 0.1899, "rewards/accuracies": 0.75, "rewards/chosen": -0.4563392698764801, "rewards/margins": 0.09228341281414032, "rewards/rejected": -0.5486227869987488, "step": 310 }, { "epoch": 0.04, "learning_rate": 1.918465227817746e-06, "logits/chosen": -1.924393653869629, "logits/rejected": -1.5130094289779663, "logps/chosen": -186.55593872070312, "logps/rejected": -182.58775329589844, "loss": 0.166, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.4110238552093506, "rewards/margins": 0.16674764454364777, "rewards/rejected": -0.5777715444564819, "step": 320 }, { "epoch": 0.04, "learning_rate": 1.9784172661870504e-06, "logits/chosen": -2.1037185192108154, "logits/rejected": -1.9657186269760132, "logps/chosen": -218.725830078125, "logps/rejected": -252.91024780273438, "loss": 0.1719, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.35161924362182617, "rewards/margins": 0.0837181806564331, "rewards/rejected": -0.4353373944759369, "step": 330 }, { "epoch": 0.04, "learning_rate": 2.0383693045563552e-06, "logits/chosen": -2.0712966918945312, "logits/rejected": -1.9187800884246826, "logps/chosen": -201.36724853515625, "logps/rejected": -223.52041625976562, "loss": 0.1685, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2922331988811493, "rewards/margins": 0.11911450326442719, "rewards/rejected": -0.41134771704673767, "step": 340 }, { "epoch": 0.04, "learning_rate": 2.0983213429256596e-06, "logits/chosen": -1.9400737285614014, "logits/rejected": -1.7396290302276611, "logps/chosen": -189.12892150878906, "logps/rejected": -177.9490509033203, "loss": 0.2454, "rewards/accuracies": 0.5, "rewards/chosen": -0.25958988070487976, "rewards/margins": 0.03190717101097107, "rewards/rejected": -0.29149705171585083, "step": 350 }, { "epoch": 0.04, "learning_rate": 2.158273381294964e-06, "logits/chosen": -2.010946035385132, "logits/rejected": -1.7258754968643188, "logps/chosen": -230.20706176757812, "logps/rejected": -224.4259033203125, "loss": 0.1737, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.28419947624206543, "rewards/margins": 0.1424468755722046, "rewards/rejected": -0.4266463816165924, "step": 360 }, { "epoch": 0.04, "learning_rate": 2.218225419664269e-06, "logits/chosen": -1.7381842136383057, "logits/rejected": -1.4838992357254028, "logps/chosen": -234.2359619140625, "logps/rejected": -248.5423583984375, "loss": 0.0967, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.3407961428165436, "rewards/margins": 0.10874161869287491, "rewards/rejected": -0.4495377540588379, "step": 370 }, { "epoch": 0.05, "learning_rate": 2.2781774580335732e-06, "logits/chosen": -2.022749423980713, "logits/rejected": -1.4032447338104248, "logps/chosen": -256.8328552246094, "logps/rejected": -192.03329467773438, "loss": 0.1595, "rewards/accuracies": 0.75, "rewards/chosen": -0.2618308663368225, "rewards/margins": 0.14256241917610168, "rewards/rejected": -0.4043932855129242, "step": 380 }, { "epoch": 0.05, "learning_rate": 2.3381294964028776e-06, "logits/chosen": -1.7514533996582031, "logits/rejected": -1.6247230768203735, "logps/chosen": -214.0389862060547, "logps/rejected": -226.91531372070312, "loss": 0.1191, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.2733401656150818, "rewards/margins": 0.11437875032424927, "rewards/rejected": -0.38771897554397583, "step": 390 }, { "epoch": 0.05, "learning_rate": 2.3980815347721824e-06, "logits/chosen": -1.9063535928726196, "logits/rejected": -1.5726300477981567, "logps/chosen": -222.97158813476562, "logps/rejected": -203.55575561523438, "loss": 0.15, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.2823329567909241, "rewards/margins": 0.119574174284935, "rewards/rejected": -0.40190714597702026, "step": 400 }, { "epoch": 0.05, "learning_rate": 2.458033573141487e-06, "logits/chosen": -1.9088506698608398, "logits/rejected": -1.5609016418457031, "logps/chosen": -262.8750915527344, "logps/rejected": -279.5753479003906, "loss": 0.1665, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3804900348186493, "rewards/margins": 0.17719073593616486, "rewards/rejected": -0.5576807856559753, "step": 410 }, { "epoch": 0.05, "learning_rate": 2.5179856115107916e-06, "logits/chosen": -1.9670276641845703, "logits/rejected": -1.5786240100860596, "logps/chosen": -247.9984588623047, "logps/rejected": -261.22015380859375, "loss": 0.103, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.306973397731781, "rewards/margins": 0.17263731360435486, "rewards/rejected": -0.4796106815338135, "step": 420 }, { "epoch": 0.05, "learning_rate": 2.577937649880096e-06, "logits/chosen": -1.9811060428619385, "logits/rejected": -1.8827531337738037, "logps/chosen": -216.9333953857422, "logps/rejected": -262.21697998046875, "loss": 0.2221, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.36211085319519043, "rewards/margins": 0.09959669411182404, "rewards/rejected": -0.46170753240585327, "step": 430 }, { "epoch": 0.05, "learning_rate": 2.637889688249401e-06, "logits/chosen": -2.250277042388916, "logits/rejected": -1.69893479347229, "logps/chosen": -390.80206298828125, "logps/rejected": -284.90594482421875, "loss": 0.1114, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.28247594833374023, "rewards/margins": 0.12760603427886963, "rewards/rejected": -0.4100819528102875, "step": 440 }, { "epoch": 0.05, "learning_rate": 2.6978417266187052e-06, "logits/chosen": -1.8655027151107788, "logits/rejected": -1.5759456157684326, "logps/chosen": -273.0975036621094, "logps/rejected": -323.8568420410156, "loss": 0.1357, "rewards/accuracies": 0.625, "rewards/chosen": -0.33328741788864136, "rewards/margins": 0.1260485053062439, "rewards/rejected": -0.45933595299720764, "step": 450 }, { "epoch": 0.06, "learning_rate": 2.75779376498801e-06, "logits/chosen": -2.09033203125, "logits/rejected": -1.8335535526275635, "logps/chosen": -276.64398193359375, "logps/rejected": -283.84088134765625, "loss": 0.1603, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.2383461743593216, "rewards/margins": 0.10929499566555023, "rewards/rejected": -0.3476411700248718, "step": 460 }, { "epoch": 0.06, "learning_rate": 2.8177458033573145e-06, "logits/chosen": -1.972602128982544, "logits/rejected": -1.670013427734375, "logps/chosen": -201.99130249023438, "logps/rejected": -196.4750518798828, "loss": 0.1939, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.2819952368736267, "rewards/margins": 0.0845649391412735, "rewards/rejected": -0.3665602207183838, "step": 470 }, { "epoch": 0.06, "learning_rate": 2.877697841726619e-06, "logits/chosen": -1.984262228012085, "logits/rejected": -1.9005893468856812, "logps/chosen": -280.5291442871094, "logps/rejected": -247.97720336914062, "loss": 0.1729, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.35804125666618347, "rewards/margins": 0.015171018429100513, "rewards/rejected": -0.3732122778892517, "step": 480 }, { "epoch": 0.06, "learning_rate": 2.9376498800959237e-06, "logits/chosen": -2.09686541557312, "logits/rejected": -1.6708438396453857, "logps/chosen": -238.1042022705078, "logps/rejected": -228.37350463867188, "loss": 0.1234, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3339739739894867, "rewards/margins": 0.13829635083675385, "rewards/rejected": -0.47227030992507935, "step": 490 }, { "epoch": 0.06, "learning_rate": 2.9976019184652285e-06, "logits/chosen": -1.8362640142440796, "logits/rejected": -1.5808006525039673, "logps/chosen": -247.06204223632812, "logps/rejected": -234.0608673095703, "loss": 0.1674, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.24948985874652863, "rewards/margins": 0.061165668070316315, "rewards/rejected": -0.31065553426742554, "step": 500 }, { "epoch": 0.06, "learning_rate": 3.0575539568345324e-06, "logits/chosen": -2.108161449432373, "logits/rejected": -1.9195600748062134, "logps/chosen": -204.02523803710938, "logps/rejected": -211.36361694335938, "loss": 0.1593, "rewards/accuracies": 0.625, "rewards/chosen": -0.23356959223747253, "rewards/margins": 0.07123871147632599, "rewards/rejected": -0.3048083186149597, "step": 510 }, { "epoch": 0.06, "learning_rate": 3.1175059952038373e-06, "logits/chosen": -1.8966169357299805, "logits/rejected": -1.4281069040298462, "logps/chosen": -257.47454833984375, "logps/rejected": -211.0913543701172, "loss": 0.1349, "rewards/accuracies": 0.625, "rewards/chosen": -0.2831900119781494, "rewards/margins": 0.1322624385356903, "rewards/rejected": -0.41545242071151733, "step": 520 }, { "epoch": 0.06, "learning_rate": 3.177458033573142e-06, "logits/chosen": -2.059906482696533, "logits/rejected": -1.6643825769424438, "logps/chosen": -228.86172485351562, "logps/rejected": -192.63961791992188, "loss": 0.1881, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3060818314552307, "rewards/margins": 0.1667357236146927, "rewards/rejected": -0.4728175103664398, "step": 530 }, { "epoch": 0.06, "learning_rate": 3.237410071942446e-06, "logits/chosen": -1.9420620203018188, "logits/rejected": -1.6259702444076538, "logps/chosen": -239.92581176757812, "logps/rejected": -244.3850555419922, "loss": 0.1508, "rewards/accuracies": 0.625, "rewards/chosen": -0.26758164167404175, "rewards/margins": 0.09906923025846481, "rewards/rejected": -0.36665090918540955, "step": 540 }, { "epoch": 0.07, "learning_rate": 3.297362110311751e-06, "logits/chosen": -2.0125441551208496, "logits/rejected": -1.77133047580719, "logps/chosen": -234.72738647460938, "logps/rejected": -229.4871368408203, "loss": 0.0946, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.36053597927093506, "rewards/margins": 0.1191805824637413, "rewards/rejected": -0.47971653938293457, "step": 550 }, { "epoch": 0.07, "learning_rate": 3.3573141486810557e-06, "logits/chosen": -1.9631010293960571, "logits/rejected": -1.7263110876083374, "logps/chosen": -264.1136169433594, "logps/rejected": -245.3027801513672, "loss": 0.1203, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.26941028237342834, "rewards/margins": 0.09951646625995636, "rewards/rejected": -0.3689267337322235, "step": 560 }, { "epoch": 0.07, "learning_rate": 3.4172661870503596e-06, "logits/chosen": -2.090639114379883, "logits/rejected": -1.617297887802124, "logps/chosen": -253.80575561523438, "logps/rejected": -249.5729217529297, "loss": 0.1359, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.16714581847190857, "rewards/margins": 0.15788118541240692, "rewards/rejected": -0.3250270485877991, "step": 570 }, { "epoch": 0.07, "learning_rate": 3.4772182254196645e-06, "logits/chosen": -2.1856539249420166, "logits/rejected": -1.682244896888733, "logps/chosen": -291.01373291015625, "logps/rejected": -219.6407470703125, "loss": 0.2091, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.12654975056648254, "rewards/margins": 0.10359915345907211, "rewards/rejected": -0.23014888167381287, "step": 580 }, { "epoch": 0.07, "learning_rate": 3.5371702637889693e-06, "logits/chosen": -1.8060592412948608, "logits/rejected": -1.5262947082519531, "logps/chosen": -242.6085968017578, "logps/rejected": -213.41592407226562, "loss": 0.1047, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.306922972202301, "rewards/margins": 0.15780650079250336, "rewards/rejected": -0.4647294580936432, "step": 590 }, { "epoch": 0.07, "learning_rate": 3.5971223021582737e-06, "logits/chosen": -1.937294602394104, "logits/rejected": -1.5673654079437256, "logps/chosen": -255.3214874267578, "logps/rejected": -233.734619140625, "loss": 0.1647, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.5621557831764221, "rewards/margins": 0.1001255139708519, "rewards/rejected": -0.6622812747955322, "step": 600 }, { "epoch": 0.07, "learning_rate": 3.657074340527578e-06, "logits/chosen": -1.9981091022491455, "logits/rejected": -1.8456417322158813, "logps/chosen": -284.6530456542969, "logps/rejected": -266.3650207519531, "loss": 0.1632, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.5589288473129272, "rewards/margins": 0.10421963781118393, "rewards/rejected": -0.6631485223770142, "step": 610 }, { "epoch": 0.07, "learning_rate": 3.717026378896883e-06, "logits/chosen": -1.7741851806640625, "logits/rejected": -1.5398646593093872, "logps/chosen": -253.56478881835938, "logps/rejected": -286.1531066894531, "loss": 0.1523, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.6353442072868347, "rewards/margins": 0.179083913564682, "rewards/rejected": -0.8144281506538391, "step": 620 }, { "epoch": 0.08, "learning_rate": 3.7769784172661873e-06, "logits/chosen": -1.7653591632843018, "logits/rejected": -1.5256303548812866, "logps/chosen": -309.58636474609375, "logps/rejected": -303.3143615722656, "loss": 0.0848, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.5480080842971802, "rewards/margins": 0.16620375216007233, "rewards/rejected": -0.7142117023468018, "step": 630 }, { "epoch": 0.08, "learning_rate": 3.836930455635492e-06, "logits/chosen": -1.8177156448364258, "logits/rejected": -1.576643705368042, "logps/chosen": -297.94183349609375, "logps/rejected": -282.4010314941406, "loss": 0.1422, "rewards/accuracies": 0.625, "rewards/chosen": -0.5166171193122864, "rewards/margins": 0.1333894580602646, "rewards/rejected": -0.6500065922737122, "step": 640 }, { "epoch": 0.08, "learning_rate": 3.896882494004797e-06, "logits/chosen": -1.9186363220214844, "logits/rejected": -1.5935009717941284, "logps/chosen": -284.55377197265625, "logps/rejected": -241.8046875, "loss": 0.1217, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.34077686071395874, "rewards/margins": 0.04739413410425186, "rewards/rejected": -0.38817098736763, "step": 650 }, { "epoch": 0.08, "learning_rate": 3.956834532374101e-06, "logits/chosen": -1.8101074695587158, "logits/rejected": -1.6274917125701904, "logps/chosen": -198.5729522705078, "logps/rejected": -253.2472686767578, "loss": 0.1605, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3132355511188507, "rewards/margins": 0.13122674822807312, "rewards/rejected": -0.44446223974227905, "step": 660 }, { "epoch": 0.08, "learning_rate": 4.016786570743406e-06, "logits/chosen": -1.9853845834732056, "logits/rejected": -1.614189863204956, "logps/chosen": -191.61289978027344, "logps/rejected": -172.0723114013672, "loss": 0.0808, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.32061904668807983, "rewards/margins": 0.17392602562904358, "rewards/rejected": -0.4945450723171234, "step": 670 }, { "epoch": 0.08, "learning_rate": 4.0767386091127105e-06, "logits/chosen": -1.895282506942749, "logits/rejected": -1.4025895595550537, "logps/chosen": -287.64459228515625, "logps/rejected": -237.231201171875, "loss": 0.1305, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.34143179655075073, "rewards/margins": 0.1731572449207306, "rewards/rejected": -0.5145890116691589, "step": 680 }, { "epoch": 0.08, "learning_rate": 4.1366906474820145e-06, "logits/chosen": -1.9859917163848877, "logits/rejected": -1.811034917831421, "logps/chosen": -267.02532958984375, "logps/rejected": -248.8046417236328, "loss": 0.1599, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.4106379449367523, "rewards/margins": 0.09379793703556061, "rewards/rejected": -0.5044358968734741, "step": 690 }, { "epoch": 0.08, "learning_rate": 4.196642685851319e-06, "logits/chosen": -2.0239920616149902, "logits/rejected": -1.7091875076293945, "logps/chosen": -163.7296600341797, "logps/rejected": -176.8492431640625, "loss": 0.1985, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.22491566836833954, "rewards/margins": 0.10180320590734482, "rewards/rejected": -0.32671886682510376, "step": 700 }, { "epoch": 0.09, "learning_rate": 4.256594724220624e-06, "logits/chosen": -2.042534828186035, "logits/rejected": -1.7031242847442627, "logps/chosen": -243.00247192382812, "logps/rejected": -238.3159637451172, "loss": 0.1317, "rewards/accuracies": 0.75, "rewards/chosen": -0.22081628441810608, "rewards/margins": 0.13557776808738708, "rewards/rejected": -0.3563940227031708, "step": 710 }, { "epoch": 0.09, "learning_rate": 4.316546762589928e-06, "logits/chosen": -2.0568203926086426, "logits/rejected": -1.7259998321533203, "logps/chosen": -263.7857666015625, "logps/rejected": -256.0942077636719, "loss": 0.0914, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3193433880805969, "rewards/margins": 0.14199210703372955, "rewards/rejected": -0.4613354802131653, "step": 720 }, { "epoch": 0.09, "learning_rate": 4.376498800959233e-06, "logits/chosen": -1.8908824920654297, "logits/rejected": -1.599169135093689, "logps/chosen": -250.081787109375, "logps/rejected": -221.8983917236328, "loss": 0.176, "rewards/accuracies": 0.625, "rewards/chosen": -0.42117080092430115, "rewards/margins": 0.1439433991909027, "rewards/rejected": -0.5651142001152039, "step": 730 }, { "epoch": 0.09, "learning_rate": 4.436450839328538e-06, "logits/chosen": -1.81271231174469, "logits/rejected": -1.4758561849594116, "logps/chosen": -237.4412841796875, "logps/rejected": -207.1601104736328, "loss": 0.189, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.4822634756565094, "rewards/margins": 0.16576936841011047, "rewards/rejected": -0.6480327844619751, "step": 740 }, { "epoch": 0.09, "learning_rate": 4.496402877697842e-06, "logits/chosen": -2.2231035232543945, "logits/rejected": -1.774987816810608, "logps/chosen": -291.08026123046875, "logps/rejected": -247.5023193359375, "loss": 0.09, "rewards/accuracies": 0.625, "rewards/chosen": -0.4788135886192322, "rewards/margins": 0.17467689514160156, "rewards/rejected": -0.6534904837608337, "step": 750 }, { "epoch": 0.09, "learning_rate": 4.5563549160671465e-06, "logits/chosen": -1.9474821090698242, "logits/rejected": -1.523559808731079, "logps/chosen": -256.2854919433594, "logps/rejected": -199.56689453125, "loss": 0.2096, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.525553286075592, "rewards/margins": 0.10277509689331055, "rewards/rejected": -0.6283284425735474, "step": 760 }, { "epoch": 0.09, "learning_rate": 4.616306954436451e-06, "logits/chosen": -1.9608790874481201, "logits/rejected": -1.600482702255249, "logps/chosen": -234.0125274658203, "logps/rejected": -219.60494995117188, "loss": 0.1606, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.560684084892273, "rewards/margins": 0.11568351089954376, "rewards/rejected": -0.6763675808906555, "step": 770 }, { "epoch": 0.09, "learning_rate": 4.676258992805755e-06, "logits/chosen": -1.9117473363876343, "logits/rejected": -1.6412442922592163, "logps/chosen": -225.1289520263672, "logps/rejected": -219.20339965820312, "loss": 0.1631, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.4210142195224762, "rewards/margins": 0.12721626460552216, "rewards/rejected": -0.5482303500175476, "step": 780 }, { "epoch": 0.09, "learning_rate": 4.73621103117506e-06, "logits/chosen": -1.8688926696777344, "logits/rejected": -1.7802883386611938, "logps/chosen": -220.6570281982422, "logps/rejected": -236.92758178710938, "loss": 0.1411, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.46808117628097534, "rewards/margins": 0.0750129297375679, "rewards/rejected": -0.5430941581726074, "step": 790 }, { "epoch": 0.1, "learning_rate": 4.796163069544365e-06, "logits/chosen": -2.0156643390655518, "logits/rejected": -1.6202924251556396, "logps/chosen": -296.9110412597656, "logps/rejected": -225.12802124023438, "loss": 0.1625, "rewards/accuracies": 0.625, "rewards/chosen": -0.5204383730888367, "rewards/margins": 0.12295063585042953, "rewards/rejected": -0.6433890461921692, "step": 800 }, { "epoch": 0.1, "learning_rate": 4.856115107913669e-06, "logits/chosen": -1.9585834741592407, "logits/rejected": -1.6052249670028687, "logps/chosen": -224.037353515625, "logps/rejected": -237.3111572265625, "loss": 0.1651, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.4183143675327301, "rewards/margins": 0.10342450439929962, "rewards/rejected": -0.5217388868331909, "step": 810 }, { "epoch": 0.1, "learning_rate": 4.916067146282974e-06, "logits/chosen": -2.0472521781921387, "logits/rejected": -1.5536268949508667, "logps/chosen": -236.7208251953125, "logps/rejected": -195.6331787109375, "loss": 0.137, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.36916661262512207, "rewards/margins": 0.11299363523721695, "rewards/rejected": -0.4821602702140808, "step": 820 }, { "epoch": 0.1, "learning_rate": 4.9760191846522785e-06, "logits/chosen": -1.9616940021514893, "logits/rejected": -1.7898718118667603, "logps/chosen": -208.87741088867188, "logps/rejected": -228.8385467529297, "loss": 0.1618, "rewards/accuracies": 0.75, "rewards/chosen": -0.3800353407859802, "rewards/margins": 0.12817123532295227, "rewards/rejected": -0.5082066059112549, "step": 830 }, { "epoch": 0.1, "learning_rate": 4.9999921064257284e-06, "logits/chosen": -1.8127410411834717, "logits/rejected": -1.3970979452133179, "logps/chosen": -278.39019775390625, "logps/rejected": -293.3469543457031, "loss": 0.1529, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.4711076319217682, "rewards/margins": 0.15813672542572021, "rewards/rejected": -0.629244327545166, "step": 840 }, { "epoch": 0.1, "learning_rate": 4.9999438680968e-06, "logits/chosen": -1.7962703704833984, "logits/rejected": -1.365013837814331, "logps/chosen": -249.14828491210938, "logps/rejected": -232.2996063232422, "loss": 0.1558, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.6188076734542847, "rewards/margins": 0.18656638264656067, "rewards/rejected": -0.8053741455078125, "step": 850 }, { "epoch": 0.1, "learning_rate": 4.999851777603122e-06, "logits/chosen": -1.9362220764160156, "logits/rejected": -1.697091817855835, "logps/chosen": -292.0437927246094, "logps/rejected": -294.3341369628906, "loss": 0.1133, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.5470017194747925, "rewards/margins": 0.12919080257415771, "rewards/rejected": -0.6761925220489502, "step": 860 }, { "epoch": 0.1, "learning_rate": 4.999715836560074e-06, "logits/chosen": -1.907859206199646, "logits/rejected": -1.5174684524536133, "logps/chosen": -218.5150909423828, "logps/rejected": -220.261474609375, "loss": 0.2033, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.3629061281681061, "rewards/margins": 0.12141053378582001, "rewards/rejected": -0.4843166768550873, "step": 870 }, { "epoch": 0.11, "learning_rate": 4.999536047352236e-06, "logits/chosen": -1.909115195274353, "logits/rejected": -1.6440328359603882, "logps/chosen": -206.3199005126953, "logps/rejected": -194.60552978515625, "loss": 0.1926, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.3590332567691803, "rewards/margins": 0.04666576534509659, "rewards/rejected": -0.4056990146636963, "step": 880 }, { "epoch": 0.11, "learning_rate": 4.999312413133335e-06, "logits/chosen": -2.02655029296875, "logits/rejected": -1.555143117904663, "logps/chosen": -290.0566711425781, "logps/rejected": -258.43951416015625, "loss": 0.1552, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.21334341168403625, "rewards/margins": 0.13854533433914185, "rewards/rejected": -0.3518887460231781, "step": 890 }, { "epoch": 0.11, "learning_rate": 4.999044937826198e-06, "logits/chosen": -1.7870111465454102, "logits/rejected": -1.3078781366348267, "logps/chosen": -232.5644073486328, "logps/rejected": -228.1163330078125, "loss": 0.167, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.22880849242210388, "rewards/margins": 0.1350013017654419, "rewards/rejected": -0.36380982398986816, "step": 900 }, { "epoch": 0.11, "learning_rate": 4.998733626122679e-06, "logits/chosen": -1.9320144653320312, "logits/rejected": -1.7529146671295166, "logps/chosen": -230.58480834960938, "logps/rejected": -215.9065399169922, "loss": 0.1188, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.2430393248796463, "rewards/margins": 0.10922437906265259, "rewards/rejected": -0.3522637188434601, "step": 910 }, { "epoch": 0.11, "learning_rate": 4.998378483483577e-06, "logits/chosen": -2.0543458461761475, "logits/rejected": -1.576468586921692, "logps/chosen": -215.76681518554688, "logps/rejected": -143.02210998535156, "loss": 0.1665, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.19551251828670502, "rewards/margins": 0.15126529335975647, "rewards/rejected": -0.3467778265476227, "step": 920 }, { "epoch": 0.11, "learning_rate": 4.997979516138542e-06, "logits/chosen": -1.7736568450927734, "logits/rejected": -1.427062749862671, "logps/chosen": -193.51675415039062, "logps/rejected": -190.2722625732422, "loss": 0.1579, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2125972956418991, "rewards/margins": 0.12206624448299408, "rewards/rejected": -0.3346635401248932, "step": 930 }, { "epoch": 0.11, "learning_rate": 4.997536731085962e-06, "logits/chosen": -2.076658248901367, "logits/rejected": -1.6957495212554932, "logps/chosen": -275.86004638671875, "logps/rejected": -257.83856201171875, "loss": 0.1156, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.15670546889305115, "rewards/margins": 0.1267530769109726, "rewards/rejected": -0.28345853090286255, "step": 940 }, { "epoch": 0.11, "learning_rate": 4.997050136092847e-06, "logits/chosen": -1.9965837001800537, "logits/rejected": -1.5267733335494995, "logps/chosen": -234.33804321289062, "logps/rejected": -168.39871215820312, "loss": 0.1527, "rewards/accuracies": 0.625, "rewards/chosen": -0.16364577412605286, "rewards/margins": 0.10411280393600464, "rewards/rejected": -0.2677585780620575, "step": 950 }, { "epoch": 0.12, "learning_rate": 4.996519739694684e-06, "logits/chosen": -1.9764916896820068, "logits/rejected": -1.8170015811920166, "logps/chosen": -261.2804260253906, "logps/rejected": -244.72708129882812, "loss": 0.1467, "rewards/accuracies": 0.5, "rewards/chosen": -0.2282881736755371, "rewards/margins": 0.0751405730843544, "rewards/rejected": -0.3034287393093109, "step": 960 }, { "epoch": 0.12, "learning_rate": 4.995945551195296e-06, "logits/chosen": -1.8942501544952393, "logits/rejected": -1.576650857925415, "logps/chosen": -216.11184692382812, "logps/rejected": -192.7135467529297, "loss": 0.1492, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2776258587837219, "rewards/margins": 0.18120935559272766, "rewards/rejected": -0.4588352143764496, "step": 970 }, { "epoch": 0.12, "learning_rate": 4.995327580666672e-06, "logits/chosen": -2.1704368591308594, "logits/rejected": -1.4965741634368896, "logps/chosen": -241.30697631835938, "logps/rejected": -190.32650756835938, "loss": 0.1123, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.27652624249458313, "rewards/margins": 0.24555762112140656, "rewards/rejected": -0.5220838785171509, "step": 980 }, { "epoch": 0.12, "learning_rate": 4.994665838948792e-06, "logits/chosen": -1.9886589050292969, "logits/rejected": -1.8707554340362549, "logps/chosen": -235.10440063476562, "logps/rejected": -289.9771423339844, "loss": 0.1256, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1920485645532608, "rewards/margins": 0.11377612501382828, "rewards/rejected": -0.3058246970176697, "step": 990 }, { "epoch": 0.12, "learning_rate": 4.993960337649441e-06, "logits/chosen": -2.0039737224578857, "logits/rejected": -1.454641342163086, "logps/chosen": -253.69454956054688, "logps/rejected": -198.70730590820312, "loss": 0.2516, "rewards/accuracies": 0.625, "rewards/chosen": -0.21102575957775116, "rewards/margins": 0.06709831953048706, "rewards/rejected": -0.2781240940093994, "step": 1000 }, { "epoch": 0.12, "learning_rate": 4.993211089144e-06, "logits/chosen": -1.9574140310287476, "logits/rejected": -1.4666115045547485, "logps/chosen": -264.51251220703125, "logps/rejected": -221.65234375, "loss": 0.1448, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.2643020749092102, "rewards/margins": 0.15589205920696259, "rewards/rejected": -0.420194149017334, "step": 1010 }, { "epoch": 0.12, "learning_rate": 4.992418106575232e-06, "logits/chosen": -2.2091064453125, "logits/rejected": -1.704564094543457, "logps/chosen": -330.94915771484375, "logps/rejected": -254.61880493164062, "loss": 0.14, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.32784610986709595, "rewards/margins": 0.08099902421236038, "rewards/rejected": -0.40884512662887573, "step": 1020 }, { "epoch": 0.12, "learning_rate": 4.9915814038530505e-06, "logits/chosen": -2.091163396835327, "logits/rejected": -1.8074891567230225, "logps/chosen": -215.7232208251953, "logps/rejected": -192.0024871826172, "loss": 0.1666, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.18186353147029877, "rewards/margins": 0.08887914568185806, "rewards/rejected": -0.2707426846027374, "step": 1030 }, { "epoch": 0.12, "learning_rate": 4.990700995654274e-06, "logits/chosen": -2.0555949211120605, "logits/rejected": -1.72856867313385, "logps/chosen": -213.6540069580078, "logps/rejected": -205.2044677734375, "loss": 0.1204, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.35042351484298706, "rewards/margins": 0.1402900069952011, "rewards/rejected": -0.49071353673934937, "step": 1040 }, { "epoch": 0.13, "learning_rate": 4.9897768974223726e-06, "logits/chosen": -2.166123867034912, "logits/rejected": -1.7946255207061768, "logps/chosen": -231.53933715820312, "logps/rejected": -211.31204223632812, "loss": 0.178, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.20556874573230743, "rewards/margins": 0.16699795424938202, "rewards/rejected": -0.37256669998168945, "step": 1050 }, { "epoch": 0.13, "learning_rate": 4.9888091253671925e-06, "logits/chosen": -2.0272960662841797, "logits/rejected": -1.4488000869750977, "logps/chosen": -219.529052734375, "logps/rejected": -171.55601501464844, "loss": 0.1258, "rewards/accuracies": 0.625, "rewards/chosen": -0.1403190791606903, "rewards/margins": 0.1539257913827896, "rewards/rejected": -0.2942448556423187, "step": 1060 }, { "epoch": 0.13, "learning_rate": 4.9877976964646755e-06, "logits/chosen": -2.0916085243225098, "logits/rejected": -1.8679349422454834, "logps/chosen": -252.82199096679688, "logps/rejected": -225.4739532470703, "loss": 0.1693, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.19687017798423767, "rewards/margins": 0.07593067735433578, "rewards/rejected": -0.27280086278915405, "step": 1070 }, { "epoch": 0.13, "learning_rate": 4.986742628456559e-06, "logits/chosen": -2.1024928092956543, "logits/rejected": -1.534501075744629, "logps/chosen": -257.09710693359375, "logps/rejected": -182.4271240234375, "loss": 0.179, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.15705084800720215, "rewards/margins": 0.11441371589899063, "rewards/rejected": -0.27146458625793457, "step": 1080 }, { "epoch": 0.13, "learning_rate": 4.985643939850063e-06, "logits/chosen": -2.1568455696105957, "logits/rejected": -1.6811933517456055, "logps/chosen": -275.54144287109375, "logps/rejected": -227.31417846679688, "loss": 0.1206, "rewards/accuracies": 0.625, "rewards/chosen": -0.2653849124908447, "rewards/margins": 0.15564067661762238, "rewards/rejected": -0.4210255742073059, "step": 1090 }, { "epoch": 0.13, "learning_rate": 4.984501649917573e-06, "logits/chosen": -1.9597032070159912, "logits/rejected": -1.5438346862792969, "logps/chosen": -223.2861785888672, "logps/rejected": -213.6127166748047, "loss": 0.1441, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.42509156465530396, "rewards/margins": 0.1514272391796112, "rewards/rejected": -0.5765187740325928, "step": 1100 }, { "epoch": 0.13, "learning_rate": 4.98331577869629e-06, "logits/chosen": -1.9955313205718994, "logits/rejected": -1.7289683818817139, "logps/chosen": -316.2930908203125, "logps/rejected": -297.17388916015625, "loss": 0.1012, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.4474318027496338, "rewards/margins": 0.11068395525217056, "rewards/rejected": -0.5581157207489014, "step": 1110 }, { "epoch": 0.13, "learning_rate": 4.982086346987891e-06, "logits/chosen": -1.8174870014190674, "logits/rejected": -1.5796586275100708, "logps/chosen": -252.7262725830078, "logps/rejected": -244.24661254882812, "loss": 0.1905, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3619054853916168, "rewards/margins": 0.08687237650156021, "rewards/rejected": -0.44877785444259644, "step": 1120 }, { "epoch": 0.14, "learning_rate": 4.980813376358157e-06, "logits/chosen": -1.8165006637573242, "logits/rejected": -1.5728943347930908, "logps/chosen": -238.3287353515625, "logps/rejected": -264.8955993652344, "loss": 0.1116, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.4419601857662201, "rewards/margins": 0.14847253262996674, "rewards/rejected": -0.5904327630996704, "step": 1130 }, { "epoch": 0.14, "learning_rate": 4.9794968891365955e-06, "logits/chosen": -1.9940223693847656, "logits/rejected": -1.602085828781128, "logps/chosen": -283.71234130859375, "logps/rejected": -260.02947998046875, "loss": 0.1771, "rewards/accuracies": 0.625, "rewards/chosen": -0.42881909012794495, "rewards/margins": 0.11037082970142365, "rewards/rejected": -0.5391899347305298, "step": 1140 }, { "epoch": 0.14, "learning_rate": 4.978136908416052e-06, "logits/chosen": -2.128349781036377, "logits/rejected": -1.6948553323745728, "logps/chosen": -193.53390502929688, "logps/rejected": -213.319091796875, "loss": 0.1714, "rewards/accuracies": 0.625, "rewards/chosen": -0.41444897651672363, "rewards/margins": 0.15515312552452087, "rewards/rejected": -0.5696021318435669, "step": 1150 }, { "epoch": 0.14, "learning_rate": 4.976733458052301e-06, "logits/chosen": -2.0403997898101807, "logits/rejected": -1.6035503149032593, "logps/chosen": -197.5802001953125, "logps/rejected": -190.8645782470703, "loss": 0.1026, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.46744871139526367, "rewards/margins": 0.2037152796983719, "rewards/rejected": -0.6711639165878296, "step": 1160 }, { "epoch": 0.14, "learning_rate": 4.975286562663629e-06, "logits/chosen": -2.0656538009643555, "logits/rejected": -1.832098364830017, "logps/chosen": -282.55438232421875, "logps/rejected": -244.44912719726562, "loss": 0.1259, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.4940463900566101, "rewards/margins": 0.11086853593587875, "rewards/rejected": -0.6049149036407471, "step": 1170 }, { "epoch": 0.14, "learning_rate": 4.9737962476304045e-06, "logits/chosen": -1.9830493927001953, "logits/rejected": -1.647684097290039, "logps/chosen": -220.96340942382812, "logps/rejected": -208.0186767578125, "loss": 0.1464, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.48578500747680664, "rewards/margins": 0.15359191596508026, "rewards/rejected": -0.6393769383430481, "step": 1180 }, { "epoch": 0.14, "learning_rate": 4.972262539094633e-06, "logits/chosen": -1.978539228439331, "logits/rejected": -1.682488203048706, "logps/chosen": -226.36874389648438, "logps/rejected": -201.7385711669922, "loss": 0.1508, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.38268616795539856, "rewards/margins": 0.08130475878715515, "rewards/rejected": -0.46399086713790894, "step": 1190 }, { "epoch": 0.14, "learning_rate": 4.970685463959489e-06, "logits/chosen": -2.1193437576293945, "logits/rejected": -1.6852171421051025, "logps/chosen": -204.7485809326172, "logps/rejected": -179.21803283691406, "loss": 0.1222, "rewards/accuracies": 0.625, "rewards/chosen": -0.29113954305648804, "rewards/margins": 0.18358632922172546, "rewards/rejected": -0.4747259020805359, "step": 1200 }, { "epoch": 0.15, "learning_rate": 4.969065049888861e-06, "logits/chosen": -2.1825406551361084, "logits/rejected": -1.6184136867523193, "logps/chosen": -186.78878784179688, "logps/rejected": -203.55813598632812, "loss": 0.1486, "rewards/accuracies": 0.625, "rewards/chosen": -0.20323626697063446, "rewards/margins": 0.19247011840343475, "rewards/rejected": -0.3957063853740692, "step": 1210 }, { "epoch": 0.15, "learning_rate": 4.9674013253068535e-06, "logits/chosen": -2.1128785610198975, "logits/rejected": -1.8958683013916016, "logps/chosen": -235.69161987304688, "logps/rejected": -235.4266357421875, "loss": 0.1277, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.22614555060863495, "rewards/margins": 0.1264582872390747, "rewards/rejected": -0.35260388255119324, "step": 1220 }, { "epoch": 0.15, "learning_rate": 4.96569431939729e-06, "logits/chosen": -1.8693885803222656, "logits/rejected": -1.6573035717010498, "logps/chosen": -216.2715301513672, "logps/rejected": -202.7554473876953, "loss": 0.1266, "rewards/accuracies": 0.75, "rewards/chosen": -0.3473548889160156, "rewards/margins": 0.13557687401771545, "rewards/rejected": -0.4829317033290863, "step": 1230 }, { "epoch": 0.15, "learning_rate": 4.963944062103205e-06, "logits/chosen": -2.0532517433166504, "logits/rejected": -1.7747167348861694, "logps/chosen": -238.31423950195312, "logps/rejected": -231.4252166748047, "loss": 0.163, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.2758443355560303, "rewards/margins": 0.12897524237632751, "rewards/rejected": -0.4048195779323578, "step": 1240 }, { "epoch": 0.15, "learning_rate": 4.9621505841263155e-06, "logits/chosen": -1.9278017282485962, "logits/rejected": -1.6317808628082275, "logps/chosen": -209.70388793945312, "logps/rejected": -204.15481567382812, "loss": 0.1274, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.28794723749160767, "rewards/margins": 0.08738715946674347, "rewards/rejected": -0.37533441185951233, "step": 1250 }, { "epoch": 0.15, "learning_rate": 4.960313916926486e-06, "logits/chosen": -1.9663488864898682, "logits/rejected": -1.8491098880767822, "logps/chosen": -225.07937622070312, "logps/rejected": -231.40744018554688, "loss": 0.1182, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.31935936212539673, "rewards/margins": 0.10864345729351044, "rewards/rejected": -0.42800283432006836, "step": 1260 }, { "epoch": 0.15, "learning_rate": 4.958434092721172e-06, "logits/chosen": -1.9907243251800537, "logits/rejected": -1.6675913333892822, "logps/chosen": -199.87515258789062, "logps/rejected": -212.6792755126953, "loss": 0.1512, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2891536056995392, "rewards/margins": 0.11914797872304916, "rewards/rejected": -0.40830159187316895, "step": 1270 }, { "epoch": 0.15, "learning_rate": 4.956511144484858e-06, "logits/chosen": -2.0224695205688477, "logits/rejected": -1.5479836463928223, "logps/chosen": -303.0483703613281, "logps/rejected": -241.37060546875, "loss": 0.1655, "rewards/accuracies": 0.625, "rewards/chosen": -0.4082934856414795, "rewards/margins": 0.11875990778207779, "rewards/rejected": -0.5270534157752991, "step": 1280 }, { "epoch": 0.15, "learning_rate": 4.954545105948479e-06, "logits/chosen": -2.2034153938293457, "logits/rejected": -2.0540757179260254, "logps/chosen": -282.18048095703125, "logps/rejected": -286.74310302734375, "loss": 0.163, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.4291737973690033, "rewards/margins": 0.12008972465991974, "rewards/rejected": -0.5492635369300842, "step": 1290 }, { "epoch": 0.16, "learning_rate": 4.952536011598828e-06, "logits/chosen": -1.9675910472869873, "logits/rejected": -1.8413200378417969, "logps/chosen": -218.7751922607422, "logps/rejected": -267.6961669921875, "loss": 0.2403, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.3774875998497009, "rewards/margins": 0.08021329343318939, "rewards/rejected": -0.45770081877708435, "step": 1300 }, { "epoch": 0.16, "learning_rate": 4.950483896677949e-06, "logits/chosen": -1.9533389806747437, "logits/rejected": -1.5656068325042725, "logps/chosen": -268.5809020996094, "logps/rejected": -251.406494140625, "loss": 0.0979, "rewards/accuracies": 0.75, "rewards/chosen": -0.43018245697021484, "rewards/margins": 0.2314719408750534, "rewards/rejected": -0.6616543531417847, "step": 1310 }, { "epoch": 0.16, "learning_rate": 4.948388797182525e-06, "logits/chosen": -1.9137376546859741, "logits/rejected": -1.8603311777114868, "logps/chosen": -179.66416931152344, "logps/rejected": -240.1300506591797, "loss": 0.2446, "rewards/accuracies": 0.625, "rewards/chosen": -0.3417154848575592, "rewards/margins": 0.15184545516967773, "rewards/rejected": -0.4935609698295593, "step": 1320 }, { "epoch": 0.16, "learning_rate": 4.9462507498632404e-06, "logits/chosen": -1.8757511377334595, "logits/rejected": -1.556334376335144, "logps/chosen": -170.3055419921875, "logps/rejected": -155.99923706054688, "loss": 0.1384, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.09021402895450592, "rewards/margins": 0.061974525451660156, "rewards/rejected": -0.15218856930732727, "step": 1330 }, { "epoch": 0.16, "learning_rate": 4.944069792224138e-06, "logits/chosen": -2.04624080657959, "logits/rejected": -1.6785959005355835, "logps/chosen": -259.4488525390625, "logps/rejected": -208.564697265625, "loss": 0.1482, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.11900661140680313, "rewards/margins": 0.09059344977140427, "rewards/rejected": -0.2096000462770462, "step": 1340 }, { "epoch": 0.16, "learning_rate": 4.941845962521961e-06, "logits/chosen": -2.206084728240967, "logits/rejected": -1.8071863651275635, "logps/chosen": -174.04827880859375, "logps/rejected": -162.29518127441406, "loss": 0.1481, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.22567129135131836, "rewards/margins": 0.115481436252594, "rewards/rejected": -0.34115272760391235, "step": 1350 }, { "epoch": 0.16, "learning_rate": 4.939579299765485e-06, "logits/chosen": -2.1437458992004395, "logits/rejected": -1.828401803970337, "logps/chosen": -189.3341522216797, "logps/rejected": -232.9872283935547, "loss": 0.1491, "rewards/accuracies": 0.5, "rewards/chosen": -0.3480433225631714, "rewards/margins": 0.12997707724571228, "rewards/rejected": -0.47802042961120605, "step": 1360 }, { "epoch": 0.16, "learning_rate": 4.937269843714831e-06, "logits/chosen": -1.8172667026519775, "logits/rejected": -1.5740686655044556, "logps/chosen": -230.97781372070312, "logps/rejected": -243.69906616210938, "loss": 0.1532, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.3387555181980133, "rewards/margins": 0.12509162724018097, "rewards/rejected": -0.46384716033935547, "step": 1370 }, { "epoch": 0.17, "learning_rate": 4.934917634880766e-06, "logits/chosen": -1.8426252603530884, "logits/rejected": -1.6131852865219116, "logps/chosen": -208.56918334960938, "logps/rejected": -232.096435546875, "loss": 0.1422, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.3319644033908844, "rewards/margins": 0.11488902568817139, "rewards/rejected": -0.4468534588813782, "step": 1380 }, { "epoch": 0.17, "learning_rate": 4.932522714523996e-06, "logits/chosen": -1.8893773555755615, "logits/rejected": -1.7410329580307007, "logps/chosen": -207.05514526367188, "logps/rejected": -207.72817993164062, "loss": 0.1441, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.40233850479125977, "rewards/margins": 0.09210322797298431, "rewards/rejected": -0.4944417476654053, "step": 1390 }, { "epoch": 0.17, "learning_rate": 4.930085124654443e-06, "logits/chosen": -2.0386033058166504, "logits/rejected": -1.3505053520202637, "logps/chosen": -369.05743408203125, "logps/rejected": -262.7327575683594, "loss": 0.144, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.38073664903640747, "rewards/margins": 0.19237622618675232, "rewards/rejected": -0.5731129050254822, "step": 1400 }, { "epoch": 0.17, "learning_rate": 4.927604908030503e-06, "logits/chosen": -1.893441915512085, "logits/rejected": -1.7213819026947021, "logps/chosen": -203.71377563476562, "logps/rejected": -231.38339233398438, "loss": 0.2063, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2586003541946411, "rewards/margins": 0.1567048281431198, "rewards/rejected": -0.4153051972389221, "step": 1410 }, { "epoch": 0.17, "learning_rate": 4.9250821081583e-06, "logits/chosen": -1.9134151935577393, "logits/rejected": -1.5941439867019653, "logps/chosen": -238.3423309326172, "logps/rejected": -239.90673828125, "loss": 0.1188, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3424052298069, "rewards/margins": 0.14903725683689117, "rewards/rejected": -0.49144238233566284, "step": 1420 }, { "epoch": 0.17, "learning_rate": 4.922516769290921e-06, "logits/chosen": -2.1062850952148438, "logits/rejected": -1.5940475463867188, "logps/chosen": -285.4846496582031, "logps/rejected": -267.6663818359375, "loss": 0.0838, "rewards/accuracies": 0.75, "rewards/chosen": -0.41442838311195374, "rewards/margins": 0.16315698623657227, "rewards/rejected": -0.5775853395462036, "step": 1430 }, { "epoch": 0.17, "learning_rate": 4.919908936427643e-06, "logits/chosen": -1.9641939401626587, "logits/rejected": -1.6408805847167969, "logps/chosen": -233.9027862548828, "logps/rejected": -241.10244750976562, "loss": 0.1349, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.31380248069763184, "rewards/margins": 0.1510525941848755, "rewards/rejected": -0.4648551344871521, "step": 1440 }, { "epoch": 0.17, "learning_rate": 4.917258655313137e-06, "logits/chosen": -1.9187742471694946, "logits/rejected": -1.7553646564483643, "logps/chosen": -183.12078857421875, "logps/rejected": -215.263916015625, "loss": 0.1463, "rewards/accuracies": 0.625, "rewards/chosen": -0.45299237966537476, "rewards/margins": 0.14117896556854248, "rewards/rejected": -0.5941713452339172, "step": 1450 }, { "epoch": 0.18, "learning_rate": 4.914565972436677e-06, "logits/chosen": -1.9224255084991455, "logits/rejected": -1.568176031112671, "logps/chosen": -243.716552734375, "logps/rejected": -229.509765625, "loss": 0.1259, "rewards/accuracies": 0.75, "rewards/chosen": -0.32048431038856506, "rewards/margins": 0.1995118111371994, "rewards/rejected": -0.5199961066246033, "step": 1460 }, { "epoch": 0.18, "learning_rate": 4.911830935031308e-06, "logits/chosen": -1.7421767711639404, "logits/rejected": -1.6233383417129517, "logps/chosen": -248.0158233642578, "logps/rejected": -251.9866485595703, "loss": 0.1802, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.37991154193878174, "rewards/margins": 0.10848450660705566, "rewards/rejected": -0.4883960783481598, "step": 1470 }, { "epoch": 0.18, "learning_rate": 4.909053591073034e-06, "logits/chosen": -1.8475421667099, "logits/rejected": -1.5178005695343018, "logps/chosen": -234.1063995361328, "logps/rejected": -198.0278778076172, "loss": 0.188, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.21273323893547058, "rewards/margins": 0.10500024259090424, "rewards/rejected": -0.3177334666252136, "step": 1480 }, { "epoch": 0.18, "learning_rate": 4.906233989279967e-06, "logits/chosen": -2.128617525100708, "logits/rejected": -1.6844680309295654, "logps/chosen": -254.1748504638672, "logps/rejected": -218.5316162109375, "loss": 0.1283, "rewards/accuracies": 0.625, "rewards/chosen": -0.23462820053100586, "rewards/margins": 0.07426507025957108, "rewards/rejected": -0.30889326333999634, "step": 1490 }, { "epoch": 0.18, "learning_rate": 4.903372179111473e-06, "logits/chosen": -1.8551340103149414, "logits/rejected": -1.7969564199447632, "logps/chosen": -245.1498565673828, "logps/rejected": -237.3717498779297, "loss": 0.1349, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.26474660634994507, "rewards/margins": 0.06484004855155945, "rewards/rejected": -0.3295866847038269, "step": 1500 }, { "epoch": 0.18, "learning_rate": 4.900468210767309e-06, "logits/chosen": -1.875862717628479, "logits/rejected": -1.5309476852416992, "logps/chosen": -220.53408813476562, "logps/rejected": -184.7772674560547, "loss": 0.2182, "rewards/accuracies": 0.625, "rewards/chosen": -0.20155379176139832, "rewards/margins": 0.08043310791254044, "rewards/rejected": -0.28198689222335815, "step": 1510 }, { "epoch": 0.18, "learning_rate": 4.897522135186737e-06, "logits/chosen": -2.022017478942871, "logits/rejected": -1.7306649684906006, "logps/chosen": -266.8653869628906, "logps/rejected": -258.17962646484375, "loss": 0.1517, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2780263423919678, "rewards/margins": 0.12058179080486298, "rewards/rejected": -0.39860814809799194, "step": 1520 }, { "epoch": 0.18, "learning_rate": 4.894534004047635e-06, "logits/chosen": -2.042154312133789, "logits/rejected": -1.7138588428497314, "logps/chosen": -322.60791015625, "logps/rejected": -300.8125915527344, "loss": 0.0724, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.37096601724624634, "rewards/margins": 0.13116590678691864, "rewards/rejected": -0.502131998538971, "step": 1530 }, { "epoch": 0.18, "learning_rate": 4.891503869765586e-06, "logits/chosen": -2.0083236694335938, "logits/rejected": -1.7603801488876343, "logps/chosen": -266.07330322265625, "logps/rejected": -251.0727081298828, "loss": 0.0935, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.27762866020202637, "rewards/margins": 0.21909542381763458, "rewards/rejected": -0.49672412872314453, "step": 1540 }, { "epoch": 0.19, "learning_rate": 4.888431785492964e-06, "logits/chosen": -2.025075912475586, "logits/rejected": -1.55344820022583, "logps/chosen": -198.4198455810547, "logps/rejected": -193.17062377929688, "loss": 0.1424, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.22466003894805908, "rewards/margins": 0.1589314192533493, "rewards/rejected": -0.3835914731025696, "step": 1550 }, { "epoch": 0.19, "learning_rate": 4.8853178051179965e-06, "logits/chosen": -2.004646062850952, "logits/rejected": -1.5767240524291992, "logps/chosen": -275.49566650390625, "logps/rejected": -211.9617462158203, "loss": 0.1153, "rewards/accuracies": 0.625, "rewards/chosen": -0.24958455562591553, "rewards/margins": 0.12301850318908691, "rewards/rejected": -0.37260305881500244, "step": 1560 }, { "epoch": 0.19, "learning_rate": 4.882161983263822e-06, "logits/chosen": -1.9383262395858765, "logits/rejected": -1.6864608526229858, "logps/chosen": -215.1851348876953, "logps/rejected": -219.91763305664062, "loss": 0.1282, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.21753136813640594, "rewards/margins": 0.07913483679294586, "rewards/rejected": -0.2966662049293518, "step": 1570 }, { "epoch": 0.19, "learning_rate": 4.8789643752875315e-06, "logits/chosen": -2.1681385040283203, "logits/rejected": -1.5037636756896973, "logps/chosen": -296.7191467285156, "logps/rejected": -210.6960906982422, "loss": 0.0908, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.13743911683559418, "rewards/margins": 0.14550727605819702, "rewards/rejected": -0.2829464077949524, "step": 1580 }, { "epoch": 0.19, "learning_rate": 4.875725037279197e-06, "logits/chosen": -2.100879669189453, "logits/rejected": -1.6887743473052979, "logps/chosen": -275.7843322753906, "logps/rejected": -254.24166870117188, "loss": 0.106, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2630919814109802, "rewards/margins": 0.1653970181941986, "rewards/rejected": -0.4284890294075012, "step": 1590 }, { "epoch": 0.19, "learning_rate": 4.8724440260608885e-06, "logits/chosen": -2.100240707397461, "logits/rejected": -1.6715034246444702, "logps/chosen": -215.9812774658203, "logps/rejected": -220.94790649414062, "loss": 0.1162, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.312126100063324, "rewards/margins": 0.20332340896129608, "rewards/rejected": -0.515449583530426, "step": 1600 }, { "epoch": 0.19, "learning_rate": 4.8691213991856755e-06, "logits/chosen": -2.143995523452759, "logits/rejected": -1.8109419345855713, "logps/chosen": -219.9774932861328, "logps/rejected": -207.9420623779297, "loss": 0.1514, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.3066982924938202, "rewards/margins": 0.12182126939296722, "rewards/rejected": -0.4285196363925934, "step": 1610 }, { "epoch": 0.19, "learning_rate": 4.8657572149366195e-06, "logits/chosen": -2.0417563915252686, "logits/rejected": -1.8656389713287354, "logps/chosen": -240.6365203857422, "logps/rejected": -234.8029022216797, "loss": 0.1367, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2173691689968109, "rewards/margins": 0.11171890795230865, "rewards/rejected": -0.32908809185028076, "step": 1620 }, { "epoch": 0.2, "learning_rate": 4.8623515323257496e-06, "logits/chosen": -1.8849719762802124, "logits/rejected": -1.764593482017517, "logps/chosen": -237.77490234375, "logps/rejected": -266.98089599609375, "loss": 0.138, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3375057876110077, "rewards/margins": 0.1129181832075119, "rewards/rejected": -0.4504240155220032, "step": 1630 }, { "epoch": 0.2, "learning_rate": 4.85890441109303e-06, "logits/chosen": -1.9908783435821533, "logits/rejected": -1.6857595443725586, "logps/chosen": -254.00650024414062, "logps/rejected": -248.4944610595703, "loss": 0.1031, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.35663676261901855, "rewards/margins": 0.18786312639713287, "rewards/rejected": -0.544499933719635, "step": 1640 }, { "epoch": 0.2, "learning_rate": 4.855415911705308e-06, "logits/chosen": -2.0321478843688965, "logits/rejected": -1.9031927585601807, "logps/chosen": -243.13436889648438, "logps/rejected": -255.0236053466797, "loss": 0.1124, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.29369717836380005, "rewards/margins": 0.11365096271038055, "rewards/rejected": -0.4073481559753418, "step": 1650 }, { "epoch": 0.2, "learning_rate": 4.851886095355259e-06, "logits/chosen": -2.0734264850616455, "logits/rejected": -1.5513179302215576, "logps/chosen": -311.870849609375, "logps/rejected": -246.34963989257812, "loss": 0.3146, "rewards/accuracies": 0.625, "rewards/chosen": -0.44044867157936096, "rewards/margins": 0.06915672868490219, "rewards/rejected": -0.509605348110199, "step": 1660 }, { "epoch": 0.2, "learning_rate": 4.848315023960308e-06, "logits/chosen": -2.0195250511169434, "logits/rejected": -1.5690950155258179, "logps/chosen": -245.0861053466797, "logps/rejected": -176.3268280029297, "loss": 0.1332, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.32303136587142944, "rewards/margins": 0.14561942219734192, "rewards/rejected": -0.46865081787109375, "step": 1670 }, { "epoch": 0.2, "learning_rate": 4.844702760161546e-06, "logits/chosen": -1.9474140405654907, "logits/rejected": -1.498877763748169, "logps/chosen": -214.76220703125, "logps/rejected": -202.29042053222656, "loss": 0.1529, "rewards/accuracies": 0.625, "rewards/chosen": -0.34490758180618286, "rewards/margins": 0.16737958788871765, "rewards/rejected": -0.5122871398925781, "step": 1680 }, { "epoch": 0.2, "learning_rate": 4.841049367322631e-06, "logits/chosen": -1.878689169883728, "logits/rejected": -1.5493450164794922, "logps/chosen": -263.93896484375, "logps/rejected": -252.0777587890625, "loss": 0.112, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.32466429471969604, "rewards/margins": 0.15016327798366547, "rewards/rejected": -0.4748276174068451, "step": 1690 }, { "epoch": 0.2, "learning_rate": 4.837354909528675e-06, "logits/chosen": -1.8449478149414062, "logits/rejected": -1.8139785528182983, "logps/chosen": -187.8353271484375, "logps/rejected": -203.24066162109375, "loss": 0.2003, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.4078170359134674, "rewards/margins": 0.08712447434663773, "rewards/rejected": -0.49494147300720215, "step": 1700 }, { "epoch": 0.21, "learning_rate": 4.833619451585122e-06, "logits/chosen": -1.8499475717544556, "logits/rejected": -1.498587727546692, "logps/chosen": -243.45242309570312, "logps/rejected": -231.98251342773438, "loss": 0.1135, "rewards/accuracies": 0.75, "rewards/chosen": -0.3529844284057617, "rewards/margins": 0.16045571863651276, "rewards/rejected": -0.5134401321411133, "step": 1710 }, { "epoch": 0.21, "learning_rate": 4.829843059016611e-06, "logits/chosen": -1.81149423122406, "logits/rejected": -1.4992173910140991, "logps/chosen": -184.52310180664062, "logps/rejected": -201.0103759765625, "loss": 0.1903, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.35175323486328125, "rewards/margins": 0.14889448881149292, "rewards/rejected": -0.5006477236747742, "step": 1720 }, { "epoch": 0.21, "learning_rate": 4.826025798065823e-06, "logits/chosen": -1.8906818628311157, "logits/rejected": -1.7450675964355469, "logps/chosen": -254.50241088867188, "logps/rejected": -233.2667999267578, "loss": 0.195, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.4035833775997162, "rewards/margins": 0.10915178060531616, "rewards/rejected": -0.5127351880073547, "step": 1730 }, { "epoch": 0.21, "learning_rate": 4.8221677356923255e-06, "logits/chosen": -1.822003722190857, "logits/rejected": -1.6503994464874268, "logps/chosen": -194.3645782470703, "logps/rejected": -234.00198364257812, "loss": 0.19, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.42598286271095276, "rewards/margins": 0.17088128626346588, "rewards/rejected": -0.5968641638755798, "step": 1740 }, { "epoch": 0.21, "learning_rate": 4.8182689395713925e-06, "logits/chosen": -1.911811113357544, "logits/rejected": -1.4322118759155273, "logps/chosen": -241.7019500732422, "logps/rejected": -241.4373016357422, "loss": 0.1291, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.4872862696647644, "rewards/margins": 0.21816511452198029, "rewards/rejected": -0.7054513692855835, "step": 1750 }, { "epoch": 0.21, "learning_rate": 4.814329478092818e-06, "logits/chosen": -2.0462019443511963, "logits/rejected": -1.638604760169983, "logps/chosen": -279.9619445800781, "logps/rejected": -274.87408447265625, "loss": 0.1234, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.4822389483451843, "rewards/margins": 0.10693428665399551, "rewards/rejected": -0.5891731977462769, "step": 1760 }, { "epoch": 0.21, "learning_rate": 4.810349420359722e-06, "logits/chosen": -1.8592302799224854, "logits/rejected": -1.4212658405303955, "logps/chosen": -241.1697998046875, "logps/rejected": -256.21026611328125, "loss": 0.1746, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.5237879753112793, "rewards/margins": 0.18233174085617065, "rewards/rejected": -0.70611971616745, "step": 1770 }, { "epoch": 0.21, "learning_rate": 4.806328836187328e-06, "logits/chosen": -1.9457238912582397, "logits/rejected": -1.5514074563980103, "logps/chosen": -258.5703430175781, "logps/rejected": -231.8943634033203, "loss": 0.1483, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.43616265058517456, "rewards/margins": 0.18835784494876862, "rewards/rejected": -0.624520480632782, "step": 1780 }, { "epoch": 0.21, "learning_rate": 4.802267796101749e-06, "logits/chosen": -1.8042892217636108, "logits/rejected": -1.4870389699935913, "logps/chosen": -296.75115966796875, "logps/rejected": -246.88882446289062, "loss": 0.1334, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.48000073432922363, "rewards/margins": 0.14861378073692322, "rewards/rejected": -0.6286145448684692, "step": 1790 }, { "epoch": 0.22, "learning_rate": 4.798166371338745e-06, "logits/chosen": -1.9880012273788452, "logits/rejected": -1.7663252353668213, "logps/chosen": -252.8795623779297, "logps/rejected": -283.4180603027344, "loss": 0.1518, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3115619122982025, "rewards/margins": 0.16061726212501526, "rewards/rejected": -0.4721791744232178, "step": 1800 }, { "epoch": 0.22, "learning_rate": 4.79402463384247e-06, "logits/chosen": -1.9161767959594727, "logits/rejected": -1.753260850906372, "logps/chosen": -244.0667266845703, "logps/rejected": -268.25164794921875, "loss": 0.1697, "rewards/accuracies": 0.625, "rewards/chosen": -0.299142062664032, "rewards/margins": 0.11554409563541412, "rewards/rejected": -0.4146861433982849, "step": 1810 }, { "epoch": 0.22, "learning_rate": 4.78984265626422e-06, "logits/chosen": -1.8210163116455078, "logits/rejected": -1.640275001525879, "logps/chosen": -195.47975158691406, "logps/rejected": -204.36692810058594, "loss": 0.1215, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.2043401002883911, "rewards/margins": 0.11669757217168808, "rewards/rejected": -0.3210376501083374, "step": 1820 }, { "epoch": 0.22, "learning_rate": 4.785620511961148e-06, "logits/chosen": -2.0741043090820312, "logits/rejected": -1.788116455078125, "logps/chosen": -263.5960388183594, "logps/rejected": -245.22525024414062, "loss": 0.1444, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.22155031561851501, "rewards/margins": 0.10210974514484406, "rewards/rejected": -0.3236600458621979, "step": 1830 }, { "epoch": 0.22, "learning_rate": 4.781358274994985e-06, "logits/chosen": -2.1329751014709473, "logits/rejected": -1.7640917301177979, "logps/chosen": -227.3170928955078, "logps/rejected": -210.49746704101562, "loss": 0.1666, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2090965211391449, "rewards/margins": 0.1362505704164505, "rewards/rejected": -0.3453471064567566, "step": 1840 }, { "epoch": 0.22, "learning_rate": 4.777056020130737e-06, "logits/chosen": -2.188413143157959, "logits/rejected": -1.6932777166366577, "logps/chosen": -314.6272277832031, "logps/rejected": -289.4335632324219, "loss": 0.1244, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3082244098186493, "rewards/margins": 0.21150562167167664, "rewards/rejected": -0.5197300314903259, "step": 1850 }, { "epoch": 0.22, "learning_rate": 4.772713822835374e-06, "logits/chosen": -1.8019129037857056, "logits/rejected": -1.4269254207611084, "logps/chosen": -215.6371307373047, "logps/rejected": -216.8037567138672, "loss": 0.1824, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3367902636528015, "rewards/margins": 0.19317738711833954, "rewards/rejected": -0.5299676656723022, "step": 1860 }, { "epoch": 0.22, "learning_rate": 4.768331759276506e-06, "logits/chosen": -2.0352442264556885, "logits/rejected": -1.8071515560150146, "logps/chosen": -285.3484802246094, "logps/rejected": -280.1053161621094, "loss": 0.084, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.29302871227264404, "rewards/margins": 0.1241946592926979, "rewards/rejected": -0.41722336411476135, "step": 1870 }, { "epoch": 0.23, "learning_rate": 4.763909906321048e-06, "logits/chosen": -1.9953176975250244, "logits/rejected": -1.617118239402771, "logps/chosen": -248.2249755859375, "logps/rejected": -225.35562133789062, "loss": 0.1598, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.24114413559436798, "rewards/margins": 0.1202860102057457, "rewards/rejected": -0.36143016815185547, "step": 1880 }, { "epoch": 0.23, "learning_rate": 4.759448341533872e-06, "logits/chosen": -1.7579656839370728, "logits/rejected": -1.539206624031067, "logps/chosen": -255.2971649169922, "logps/rejected": -270.1570739746094, "loss": 0.1665, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.401685893535614, "rewards/margins": 0.19057750701904297, "rewards/rejected": -0.5922634601593018, "step": 1890 }, { "epoch": 0.23, "learning_rate": 4.754947143176445e-06, "logits/chosen": -1.880765676498413, "logits/rejected": -1.4084635972976685, "logps/chosen": -198.8912353515625, "logps/rejected": -170.9627227783203, "loss": 0.1344, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.33145859837532043, "rewards/margins": 0.16721078753471375, "rewards/rejected": -0.4986693859100342, "step": 1900 }, { "epoch": 0.23, "learning_rate": 4.750406390205456e-06, "logits/chosen": -1.9753971099853516, "logits/rejected": -1.8784997463226318, "logps/chosen": -285.95343017578125, "logps/rejected": -265.814453125, "loss": 0.1751, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.28710874915122986, "rewards/margins": 0.07001986354589462, "rewards/rejected": -0.3571286201477051, "step": 1910 }, { "epoch": 0.23, "learning_rate": 4.745826162271433e-06, "logits/chosen": -2.2576117515563965, "logits/rejected": -1.8165054321289062, "logps/chosen": -266.011474609375, "logps/rejected": -255.41897583007812, "loss": 0.1212, "rewards/accuracies": 0.75, "rewards/chosen": -0.22302386164665222, "rewards/margins": 0.15649394690990448, "rewards/rejected": -0.3795178532600403, "step": 1920 }, { "epoch": 0.23, "learning_rate": 4.741206539717343e-06, "logits/chosen": -2.0917410850524902, "logits/rejected": -1.5043323040008545, "logps/chosen": -253.68002319335938, "logps/rejected": -229.77005004882812, "loss": 0.1366, "rewards/accuracies": 0.75, "rewards/chosen": -0.18172211945056915, "rewards/margins": 0.23254887759685516, "rewards/rejected": -0.41427096724510193, "step": 1930 }, { "epoch": 0.23, "learning_rate": 4.736547603577185e-06, "logits/chosen": -1.7454341650009155, "logits/rejected": -1.6577666997909546, "logps/chosen": -199.49789428710938, "logps/rejected": -199.80279541015625, "loss": 0.1921, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.3379337191581726, "rewards/margins": 0.12971019744873047, "rewards/rejected": -0.4676439166069031, "step": 1940 }, { "epoch": 0.23, "learning_rate": 4.731849435574568e-06, "logits/chosen": -2.069859504699707, "logits/rejected": -1.7830657958984375, "logps/chosen": -232.3899688720703, "logps/rejected": -232.227294921875, "loss": 0.1621, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2579636573791504, "rewards/margins": 0.18101055920124054, "rewards/rejected": -0.4389742314815521, "step": 1950 }, { "epoch": 0.24, "learning_rate": 4.727112118121279e-06, "logits/chosen": -2.024989366531372, "logits/rejected": -1.809133768081665, "logps/chosen": -225.9645538330078, "logps/rejected": -218.0653533935547, "loss": 0.1256, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.27211037278175354, "rewards/margins": 0.13195666670799255, "rewards/rejected": -0.4040670394897461, "step": 1960 }, { "epoch": 0.24, "learning_rate": 4.722335734315833e-06, "logits/chosen": -1.990189790725708, "logits/rejected": -1.5594546794891357, "logps/chosen": -290.8906555175781, "logps/rejected": -227.53616333007812, "loss": 0.0908, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2791885733604431, "rewards/margins": 0.1558622419834137, "rewards/rejected": -0.4350507855415344, "step": 1970 }, { "epoch": 0.24, "learning_rate": 4.7175203679420175e-06, "logits/chosen": -1.9072424173355103, "logits/rejected": -1.51438307762146, "logps/chosen": -216.40524291992188, "logps/rejected": -228.6339874267578, "loss": 0.1292, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.37562626600265503, "rewards/margins": 0.20719614624977112, "rewards/rejected": -0.582822322845459, "step": 1980 }, { "epoch": 0.24, "learning_rate": 4.712666103467428e-06, "logits/chosen": -2.0311849117279053, "logits/rejected": -1.8723666667938232, "logps/chosen": -233.4354705810547, "logps/rejected": -218.10659790039062, "loss": 0.155, "rewards/accuracies": 0.625, "rewards/chosen": -0.2636135220527649, "rewards/margins": 0.1285993456840515, "rewards/rejected": -0.3922128677368164, "step": 1990 }, { "epoch": 0.24, "learning_rate": 4.707773026041975e-06, "logits/chosen": -2.1100738048553467, "logits/rejected": -1.8569968938827515, "logps/chosen": -268.63885498046875, "logps/rejected": -232.58578491210938, "loss": 0.1733, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.27281227707862854, "rewards/margins": 0.13495466113090515, "rewards/rejected": -0.4077669084072113, "step": 2000 }, { "epoch": 0.24, "learning_rate": 4.702841221496403e-06, "logits/chosen": -1.991676688194275, "logits/rejected": -1.5846397876739502, "logps/chosen": -257.11932373046875, "logps/rejected": -221.46017456054688, "loss": 0.1592, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.24664053320884705, "rewards/margins": 0.13256794214248657, "rewards/rejected": -0.3792084753513336, "step": 2010 }, { "epoch": 0.24, "learning_rate": 4.697870776340776e-06, "logits/chosen": -2.2750840187072754, "logits/rejected": -1.7563819885253906, "logps/chosen": -235.4178009033203, "logps/rejected": -190.757080078125, "loss": 0.1656, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.1487046480178833, "rewards/margins": 0.1518375426530838, "rewards/rejected": -0.3005422055721283, "step": 2020 }, { "epoch": 0.24, "learning_rate": 4.692861777762963e-06, "logits/chosen": -2.0295958518981934, "logits/rejected": -1.5150290727615356, "logps/chosen": -232.8603973388672, "logps/rejected": -198.23338317871094, "loss": 0.119, "rewards/accuracies": 0.75, "rewards/chosen": -0.18991556763648987, "rewards/margins": 0.1996304988861084, "rewards/rejected": -0.3895460069179535, "step": 2030 }, { "epoch": 0.24, "learning_rate": 4.68781431362711e-06, "logits/chosen": -2.090059518814087, "logits/rejected": -1.5975253582000732, "logps/chosen": -268.8729553222656, "logps/rejected": -242.9998779296875, "loss": 0.1739, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.427692174911499, "rewards/margins": 0.1998465359210968, "rewards/rejected": -0.6275386810302734, "step": 2040 }, { "epoch": 0.25, "learning_rate": 4.6827284724720955e-06, "logits/chosen": -2.05842924118042, "logits/rejected": -1.7655296325683594, "logps/chosen": -258.84442138671875, "logps/rejected": -226.68350219726562, "loss": 0.1977, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.3383828103542328, "rewards/margins": 0.13462142646312714, "rewards/rejected": -0.4730042517185211, "step": 2050 }, { "epoch": 0.25, "learning_rate": 4.677604343509981e-06, "logits/chosen": -2.037433385848999, "logits/rejected": -1.5807982683181763, "logps/chosen": -238.64389038085938, "logps/rejected": -213.24490356445312, "loss": 0.1261, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2423369586467743, "rewards/margins": 0.174465611577034, "rewards/rejected": -0.4168025553226471, "step": 2060 }, { "epoch": 0.25, "learning_rate": 4.672442016624444e-06, "logits/chosen": -2.1892218589782715, "logits/rejected": -1.862447738647461, "logps/chosen": -290.03985595703125, "logps/rejected": -240.31771850585938, "loss": 0.1574, "rewards/accuracies": 0.625, "rewards/chosen": -0.2680838704109192, "rewards/margins": 0.11730837821960449, "rewards/rejected": -0.3853922486305237, "step": 2070 }, { "epoch": 0.25, "learning_rate": 4.6672415823692e-06, "logits/chosen": -1.8660366535186768, "logits/rejected": -1.5226314067840576, "logps/chosen": -304.72833251953125, "logps/rejected": -276.5460205078125, "loss": 0.1431, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.32084205746650696, "rewards/margins": 0.15598034858703613, "rewards/rejected": -0.4768224358558655, "step": 2080 }, { "epoch": 0.25, "learning_rate": 4.662003131966418e-06, "logits/chosen": -2.167304277420044, "logits/rejected": -1.6622650623321533, "logps/chosen": -235.3962860107422, "logps/rejected": -217.3386688232422, "loss": 0.1545, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2755299210548401, "rewards/margins": 0.10942339897155762, "rewards/rejected": -0.3849533200263977, "step": 2090 }, { "epoch": 0.25, "learning_rate": 4.6567267573051176e-06, "logits/chosen": -1.8638086318969727, "logits/rejected": -1.7130645513534546, "logps/chosen": -219.14736938476562, "logps/rejected": -230.8884735107422, "loss": 0.1861, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.282665878534317, "rewards/margins": 0.05729461461305618, "rewards/rejected": -0.339960515499115, "step": 2100 }, { "epoch": 0.25, "learning_rate": 4.651412550939556e-06, "logits/chosen": -2.023266553878784, "logits/rejected": -1.448335886001587, "logps/chosen": -222.55819702148438, "logps/rejected": -192.7770538330078, "loss": 0.1121, "rewards/accuracies": 0.75, "rewards/chosen": -0.2549906075000763, "rewards/margins": 0.18708500266075134, "rewards/rejected": -0.44207563996315, "step": 2110 }, { "epoch": 0.25, "learning_rate": 4.646060606087608e-06, "logits/chosen": -1.9137452840805054, "logits/rejected": -1.637158751487732, "logps/chosen": -258.3423767089844, "logps/rejected": -228.5346221923828, "loss": 0.1564, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.3416040241718292, "rewards/margins": 0.14088527858257294, "rewards/rejected": -0.4824892580509186, "step": 2120 }, { "epoch": 0.26, "learning_rate": 4.640671016629129e-06, "logits/chosen": -1.8286240100860596, "logits/rejected": -1.660211205482483, "logps/chosen": -262.9971008300781, "logps/rejected": -271.8037109375, "loss": 0.1143, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.4632445275783539, "rewards/margins": 0.1317237764596939, "rewards/rejected": -0.594968318939209, "step": 2130 }, { "epoch": 0.26, "learning_rate": 4.635243877104307e-06, "logits/chosen": -2.006418466567993, "logits/rejected": -1.716923475265503, "logps/chosen": -250.9329376220703, "logps/rejected": -260.55462646484375, "loss": 0.1458, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.34360507130622864, "rewards/margins": 0.0931440070271492, "rewards/rejected": -0.43674907088279724, "step": 2140 }, { "epoch": 0.26, "learning_rate": 4.629779282712007e-06, "logits/chosen": -1.8346214294433594, "logits/rejected": -1.4906994104385376, "logps/chosen": -262.6698913574219, "logps/rejected": -255.5764923095703, "loss": 0.1224, "rewards/accuracies": 0.75, "rewards/chosen": -0.4487072825431824, "rewards/margins": 0.2172963172197342, "rewards/rejected": -0.6660035848617554, "step": 2150 }, { "epoch": 0.26, "learning_rate": 4.6242773293080965e-06, "logits/chosen": -2.074744462966919, "logits/rejected": -1.5857570171356201, "logps/chosen": -322.7615966796875, "logps/rejected": -300.53790283203125, "loss": 0.076, "rewards/accuracies": 0.75, "rewards/chosen": -0.47309666872024536, "rewards/margins": 0.2397969663143158, "rewards/rejected": -0.712893545627594, "step": 2160 }, { "epoch": 0.26, "learning_rate": 4.618738113403772e-06, "logits/chosen": -1.9601848125457764, "logits/rejected": -1.3724687099456787, "logps/chosen": -329.15814208984375, "logps/rejected": -296.91790771484375, "loss": 0.0874, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.4118635058403015, "rewards/margins": 0.20115897059440613, "rewards/rejected": -0.6130224466323853, "step": 2170 }, { "epoch": 0.26, "learning_rate": 4.613161732163857e-06, "logits/chosen": -2.002680540084839, "logits/rejected": -1.7260891199111938, "logps/chosen": -218.44058227539062, "logps/rejected": -206.19589233398438, "loss": 0.1605, "rewards/accuracies": 0.75, "rewards/chosen": -0.4169228672981262, "rewards/margins": 0.15920642018318176, "rewards/rejected": -0.5761292576789856, "step": 2180 }, { "epoch": 0.26, "learning_rate": 4.607548283405103e-06, "logits/chosen": -2.2463881969451904, "logits/rejected": -1.870919942855835, "logps/chosen": -271.3766784667969, "logps/rejected": -237.7291259765625, "loss": 0.1687, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3008974492549896, "rewards/margins": 0.21275082230567932, "rewards/rejected": -0.5136483311653137, "step": 2190 }, { "epoch": 0.26, "learning_rate": 4.601897865594473e-06, "logits/chosen": -2.1344265937805176, "logits/rejected": -1.807756781578064, "logps/chosen": -254.6204833984375, "logps/rejected": -278.7408142089844, "loss": 0.0969, "rewards/accuracies": 0.75, "rewards/chosen": -0.2825480103492737, "rewards/margins": 0.14841003715991974, "rewards/rejected": -0.43095797300338745, "step": 2200 }, { "epoch": 0.27, "learning_rate": 4.596210577847415e-06, "logits/chosen": -1.8466428518295288, "logits/rejected": -1.4773153066635132, "logps/chosen": -221.1357421875, "logps/rejected": -214.00961303710938, "loss": 0.1446, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2894059717655182, "rewards/margins": 0.18356752395629883, "rewards/rejected": -0.4729735255241394, "step": 2210 }, { "epoch": 0.27, "learning_rate": 4.590486519926118e-06, "logits/chosen": -1.8348041772842407, "logits/rejected": -1.76007080078125, "logps/chosen": -247.6072540283203, "logps/rejected": -254.64437866210938, "loss": 0.1073, "rewards/accuracies": 0.625, "rewards/chosen": -0.30001839995384216, "rewards/margins": 0.1468794047832489, "rewards/rejected": -0.44689780473709106, "step": 2220 }, { "epoch": 0.27, "learning_rate": 4.584725792237772e-06, "logits/chosen": -1.8341724872589111, "logits/rejected": -1.4840484857559204, "logps/chosen": -281.08843994140625, "logps/rejected": -287.3544006347656, "loss": 0.1452, "rewards/accuracies": 0.75, "rewards/chosen": -0.3432127833366394, "rewards/margins": 0.16836020350456238, "rewards/rejected": -0.5115729570388794, "step": 2230 }, { "epoch": 0.27, "learning_rate": 4.578928495832795e-06, "logits/chosen": -2.124887466430664, "logits/rejected": -1.5507080554962158, "logps/chosen": -280.98626708984375, "logps/rejected": -227.08627319335938, "loss": 0.1314, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.43157821893692017, "rewards/margins": 0.19229252636432648, "rewards/rejected": -0.6238707304000854, "step": 2240 }, { "epoch": 0.27, "learning_rate": 4.57309473240307e-06, "logits/chosen": -2.0037436485290527, "logits/rejected": -1.5078222751617432, "logps/chosen": -269.2857360839844, "logps/rejected": -206.1649627685547, "loss": 0.1401, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.32349663972854614, "rewards/margins": 0.14150217175483704, "rewards/rejected": -0.4649987816810608, "step": 2250 }, { "epoch": 0.27, "learning_rate": 4.567224604280157e-06, "logits/chosen": -1.7673028707504272, "logits/rejected": -1.6784632205963135, "logps/chosen": -184.74407958984375, "logps/rejected": -243.2403106689453, "loss": 0.1252, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.33205723762512207, "rewards/margins": 0.1660684049129486, "rewards/rejected": -0.49812570214271545, "step": 2260 }, { "epoch": 0.27, "learning_rate": 4.561318214433499e-06, "logits/chosen": -1.9934532642364502, "logits/rejected": -1.8898242712020874, "logps/chosen": -221.0784912109375, "logps/rejected": -242.1062469482422, "loss": 0.1536, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.31267839670181274, "rewards/margins": 0.1026720255613327, "rewards/rejected": -0.41535043716430664, "step": 2270 }, { "epoch": 0.27, "learning_rate": 4.555375666468613e-06, "logits/chosen": -1.9682775735855103, "logits/rejected": -1.6195480823516846, "logps/chosen": -280.294677734375, "logps/rejected": -256.6422119140625, "loss": 0.198, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.31996774673461914, "rewards/margins": 0.1175389513373375, "rewards/rejected": -0.43750667572021484, "step": 2280 }, { "epoch": 0.27, "learning_rate": 4.549397064625275e-06, "logits/chosen": -1.9350669384002686, "logits/rejected": -1.8133732080459595, "logps/chosen": -249.39791870117188, "logps/rejected": -271.91839599609375, "loss": 0.1115, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.44019174575805664, "rewards/margins": 0.09418530017137527, "rewards/rejected": -0.5343769788742065, "step": 2290 }, { "epoch": 0.28, "learning_rate": 4.543382513775696e-06, "logits/chosen": -1.925415277481079, "logits/rejected": -1.594972014427185, "logps/chosen": -220.84228515625, "logps/rejected": -211.1090545654297, "loss": 0.1146, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.29334786534309387, "rewards/margins": 0.1734061986207962, "rewards/rejected": -0.4667540490627289, "step": 2300 }, { "epoch": 0.28, "learning_rate": 4.5373321194226736e-06, "logits/chosen": -1.9605739116668701, "logits/rejected": -1.6391212940216064, "logps/chosen": -259.51397705078125, "logps/rejected": -275.52398681640625, "loss": 0.0944, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.35873842239379883, "rewards/margins": 0.1706543266773224, "rewards/rejected": -0.5293928384780884, "step": 2310 }, { "epoch": 0.28, "learning_rate": 4.531245987697747e-06, "logits/chosen": -2.164452075958252, "logits/rejected": -1.8649688959121704, "logps/chosen": -275.1927185058594, "logps/rejected": -261.66571044921875, "loss": 0.1149, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.30722159147262573, "rewards/margins": 0.11033214628696442, "rewards/rejected": -0.4175536632537842, "step": 2320 }, { "epoch": 0.28, "learning_rate": 4.525124225359332e-06, "logits/chosen": -2.0339162349700928, "logits/rejected": -1.7141647338867188, "logps/chosen": -264.71923828125, "logps/rejected": -232.3660125732422, "loss": 0.1714, "rewards/accuracies": 0.75, "rewards/chosen": -0.36529964208602905, "rewards/margins": 0.15927986800670624, "rewards/rejected": -0.5245795249938965, "step": 2330 }, { "epoch": 0.28, "learning_rate": 4.518966939790854e-06, "logits/chosen": -2.047182083129883, "logits/rejected": -1.7538772821426392, "logps/chosen": -294.2006530761719, "logps/rejected": -253.27822875976562, "loss": 0.2142, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3689686954021454, "rewards/margins": 0.10386856645345688, "rewards/rejected": -0.47283726930618286, "step": 2340 }, { "epoch": 0.28, "learning_rate": 4.512774238998858e-06, "logits/chosen": -1.9125845432281494, "logits/rejected": -1.6135631799697876, "logps/chosen": -214.25314331054688, "logps/rejected": -214.7703399658203, "loss": 0.1415, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.4276903569698334, "rewards/margins": 0.14196541905403137, "rewards/rejected": -0.56965571641922, "step": 2350 }, { "epoch": 0.28, "learning_rate": 4.506546231611116e-06, "logits/chosen": -1.8613827228546143, "logits/rejected": -1.7953588962554932, "logps/chosen": -261.28729248046875, "logps/rejected": -297.7659606933594, "loss": 0.0916, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.47510260343551636, "rewards/margins": 0.18565957248210907, "rewards/rejected": -0.6607621908187866, "step": 2360 }, { "epoch": 0.28, "learning_rate": 4.500283026874724e-06, "logits/chosen": -2.1421940326690674, "logits/rejected": -1.7659165859222412, "logps/chosen": -278.3591613769531, "logps/rejected": -258.3917236328125, "loss": 0.1477, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.37536367774009705, "rewards/margins": 0.14370563626289368, "rewards/rejected": -0.5190693140029907, "step": 2370 }, { "epoch": 0.29, "learning_rate": 4.493984734654184e-06, "logits/chosen": -2.0281529426574707, "logits/rejected": -1.8278974294662476, "logps/chosen": -231.8653106689453, "logps/rejected": -220.0163116455078, "loss": 0.1186, "rewards/accuracies": 0.625, "rewards/chosen": -0.3119848370552063, "rewards/margins": 0.14551366865634918, "rewards/rejected": -0.4574984908103943, "step": 2380 }, { "epoch": 0.29, "learning_rate": 4.487651465429475e-06, "logits/chosen": -2.177546501159668, "logits/rejected": -1.955910086631775, "logps/chosen": -248.14846801757812, "logps/rejected": -242.93801879882812, "loss": 0.2459, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.27779582142829895, "rewards/margins": 0.20447003841400146, "rewards/rejected": -0.4822658598423004, "step": 2390 }, { "epoch": 0.29, "learning_rate": 4.481283330294118e-06, "logits/chosen": -1.8666213750839233, "logits/rejected": -1.3665393590927124, "logps/chosen": -222.7278594970703, "logps/rejected": -199.8502655029297, "loss": 0.1838, "rewards/accuracies": 0.75, "rewards/chosen": -0.35523027181625366, "rewards/margins": 0.17108853161334991, "rewards/rejected": -0.5263187885284424, "step": 2400 }, { "epoch": 0.29, "learning_rate": 4.474880440953224e-06, "logits/chosen": -1.9999549388885498, "logits/rejected": -1.8370367288589478, "logps/chosen": -192.89273071289062, "logps/rejected": -223.0652618408203, "loss": 0.076, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2936464548110962, "rewards/margins": 0.11866404116153717, "rewards/rejected": -0.41231051087379456, "step": 2410 }, { "epoch": 0.29, "learning_rate": 4.468442909721541e-06, "logits/chosen": -1.9979664087295532, "logits/rejected": -1.8018696308135986, "logps/chosen": -215.09585571289062, "logps/rejected": -227.23757934570312, "loss": 0.1393, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.29200148582458496, "rewards/margins": 0.10804203897714615, "rewards/rejected": -0.4000435471534729, "step": 2420 }, { "epoch": 0.29, "learning_rate": 4.4619708495214735e-06, "logits/chosen": -2.1377148628234863, "logits/rejected": -1.6982520818710327, "logps/chosen": -303.42315673828125, "logps/rejected": -226.8529815673828, "loss": 0.1636, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2613959312438965, "rewards/margins": 0.09153415262699127, "rewards/rejected": -0.35293012857437134, "step": 2430 }, { "epoch": 0.29, "learning_rate": 4.455464373881112e-06, "logits/chosen": -1.9143394231796265, "logits/rejected": -1.7412408590316772, "logps/chosen": -237.32070922851562, "logps/rejected": -219.79324340820312, "loss": 0.1836, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.26246917247772217, "rewards/margins": 0.10806657373905182, "rewards/rejected": -0.3705357313156128, "step": 2440 }, { "epoch": 0.29, "learning_rate": 4.4489235969322355e-06, "logits/chosen": -2.121340751647949, "logits/rejected": -1.8871597051620483, "logps/chosen": -190.05088806152344, "logps/rejected": -195.73678588867188, "loss": 0.1619, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3710538446903229, "rewards/margins": 0.08781547844409943, "rewards/rejected": -0.4588693082332611, "step": 2450 }, { "epoch": 0.3, "learning_rate": 4.442348633408312e-06, "logits/chosen": -1.9419047832489014, "logits/rejected": -1.5559477806091309, "logps/chosen": -200.26173400878906, "logps/rejected": -201.53518676757812, "loss": 0.2091, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.4545482099056244, "rewards/margins": 0.18632353842258453, "rewards/rejected": -0.6408717632293701, "step": 2460 }, { "epoch": 0.3, "learning_rate": 4.435739598642484e-06, "logits/chosen": -2.016679525375366, "logits/rejected": -1.7562650442123413, "logps/chosen": -288.39404296875, "logps/rejected": -258.4088439941406, "loss": 0.0716, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.3196231424808502, "rewards/margins": 0.10171397030353546, "rewards/rejected": -0.4213371276855469, "step": 2470 }, { "epoch": 0.3, "learning_rate": 4.429096608565547e-06, "logits/chosen": -1.806492805480957, "logits/rejected": -1.3850048780441284, "logps/chosen": -249.5767364501953, "logps/rejected": -222.6543426513672, "loss": 0.1036, "rewards/accuracies": 0.75, "rewards/chosen": -0.3670658767223358, "rewards/margins": 0.16034328937530518, "rewards/rejected": -0.5274091958999634, "step": 2480 }, { "epoch": 0.3, "learning_rate": 4.422419779703916e-06, "logits/chosen": -2.2245254516601562, "logits/rejected": -1.865247130393982, "logps/chosen": -208.52560424804688, "logps/rejected": -190.5004425048828, "loss": 0.1605, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.3057347238063812, "rewards/margins": 0.11808328330516815, "rewards/rejected": -0.42381802201271057, "step": 2490 }, { "epoch": 0.3, "learning_rate": 4.415709229177579e-06, "logits/chosen": -2.0980026721954346, "logits/rejected": -1.7750365734100342, "logps/chosen": -243.53494262695312, "logps/rejected": -276.8687438964844, "loss": 0.1665, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.253294974565506, "rewards/margins": 0.14929592609405518, "rewards/rejected": -0.40259090065956116, "step": 2500 }, { "epoch": 0.3, "learning_rate": 4.408965074698048e-06, "logits/chosen": -2.0569772720336914, "logits/rejected": -1.7046623229980469, "logps/chosen": -227.9197998046875, "logps/rejected": -218.9676055908203, "loss": 0.1427, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.26669183373451233, "rewards/margins": 0.17199033498764038, "rewards/rejected": -0.4386821687221527, "step": 2510 }, { "epoch": 0.3, "learning_rate": 4.402187434566286e-06, "logits/chosen": -1.852573037147522, "logits/rejected": -1.9005470275878906, "logps/chosen": -230.39346313476562, "logps/rejected": -252.2911376953125, "loss": 0.1693, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3414091169834137, "rewards/margins": 0.12495288997888565, "rewards/rejected": -0.46636199951171875, "step": 2520 }, { "epoch": 0.3, "learning_rate": 4.395376427670641e-06, "logits/chosen": -1.8688786029815674, "logits/rejected": -1.7985941171646118, "logps/chosen": -279.96453857421875, "logps/rejected": -322.30328369140625, "loss": 0.0915, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.4272969365119934, "rewards/margins": 0.16690507531166077, "rewards/rejected": -0.5942019820213318, "step": 2530 }, { "epoch": 0.3, "learning_rate": 4.388532173484754e-06, "logits/chosen": -2.0615181922912598, "logits/rejected": -1.5270709991455078, "logps/chosen": -265.37591552734375, "logps/rejected": -240.03640747070312, "loss": 0.1222, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.28391411900520325, "rewards/margins": 0.16852129995822906, "rewards/rejected": -0.4524354040622711, "step": 2540 }, { "epoch": 0.31, "learning_rate": 4.381654792065464e-06, "logits/chosen": -2.1687228679656982, "logits/rejected": -1.6613953113555908, "logps/chosen": -320.2466735839844, "logps/rejected": -226.2439727783203, "loss": 0.1973, "rewards/accuracies": 0.625, "rewards/chosen": -0.3444002866744995, "rewards/margins": 0.19264493882656097, "rewards/rejected": -0.5370452404022217, "step": 2550 }, { "epoch": 0.31, "learning_rate": 4.374744404050706e-06, "logits/chosen": -2.1842730045318604, "logits/rejected": -1.622300386428833, "logps/chosen": -257.1001892089844, "logps/rejected": -265.92913818359375, "loss": 0.1575, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.26375168561935425, "rewards/margins": 0.22742655873298645, "rewards/rejected": -0.4911782145500183, "step": 2560 }, { "epoch": 0.31, "learning_rate": 4.367801130657391e-06, "logits/chosen": -2.060206890106201, "logits/rejected": -1.610399603843689, "logps/chosen": -314.66949462890625, "logps/rejected": -269.8716125488281, "loss": 0.1212, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.37169474363327026, "rewards/margins": 0.1514434516429901, "rewards/rejected": -0.523138165473938, "step": 2570 }, { "epoch": 0.31, "learning_rate": 4.3608250936792816e-06, "logits/chosen": -2.1835896968841553, "logits/rejected": -1.7747691869735718, "logps/chosen": -266.17095947265625, "logps/rejected": -240.68453979492188, "loss": 0.1395, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.31101933121681213, "rewards/margins": 0.16181661188602448, "rewards/rejected": -0.4728359580039978, "step": 2580 }, { "epoch": 0.31, "learning_rate": 4.353816415484853e-06, "logits/chosen": -2.2123303413391113, "logits/rejected": -1.7858177423477173, "logps/chosen": -268.2467956542969, "logps/rejected": -238.09500122070312, "loss": 0.1023, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.394400417804718, "rewards/margins": 0.14999434351921082, "rewards/rejected": -0.5443947911262512, "step": 2590 }, { "epoch": 0.31, "learning_rate": 4.346775219015152e-06, "logits/chosen": -2.0210156440734863, "logits/rejected": -1.6547054052352905, "logps/chosen": -292.4082946777344, "logps/rejected": -289.0941467285156, "loss": 0.1894, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.4957256317138672, "rewards/margins": 0.1184227466583252, "rewards/rejected": -0.6141483783721924, "step": 2600 }, { "epoch": 0.31, "learning_rate": 4.339701627781633e-06, "logits/chosen": -1.906998634338379, "logits/rejected": -1.7141332626342773, "logps/chosen": -258.7335205078125, "logps/rejected": -251.07754516601562, "loss": 0.0969, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3772934079170227, "rewards/margins": 0.09553851187229156, "rewards/rejected": -0.47283196449279785, "step": 2610 }, { "epoch": 0.31, "learning_rate": 4.332595765863998e-06, "logits/chosen": -1.8866933584213257, "logits/rejected": -1.5196045637130737, "logps/chosen": -179.44906616210938, "logps/rejected": -181.3992156982422, "loss": 0.0869, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.376947820186615, "rewards/margins": 0.14659300446510315, "rewards/rejected": -0.5235407948493958, "step": 2620 }, { "epoch": 0.32, "learning_rate": 4.325457757908016e-06, "logits/chosen": -1.9919402599334717, "logits/rejected": -1.5705921649932861, "logps/chosen": -260.714111328125, "logps/rejected": -241.1851043701172, "loss": 0.1377, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.39705362915992737, "rewards/margins": 0.15993081033229828, "rewards/rejected": -0.5569844841957092, "step": 2630 }, { "epoch": 0.32, "learning_rate": 4.3182877291233395e-06, "logits/chosen": -1.9707670211791992, "logits/rejected": -1.495273232460022, "logps/chosen": -213.5478515625, "logps/rejected": -201.37350463867188, "loss": 0.1589, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3921014368534088, "rewards/margins": 0.1725640594959259, "rewards/rejected": -0.5646654367446899, "step": 2640 }, { "epoch": 0.32, "learning_rate": 4.311085805281306e-06, "logits/chosen": -1.9549287557601929, "logits/rejected": -1.630378007888794, "logps/chosen": -322.6145935058594, "logps/rejected": -297.1085205078125, "loss": 0.1446, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.41820430755615234, "rewards/margins": 0.1227729320526123, "rewards/rejected": -0.5409771800041199, "step": 2650 }, { "epoch": 0.32, "learning_rate": 4.303852112712731e-06, "logits/chosen": -2.088381767272949, "logits/rejected": -1.612980604171753, "logps/chosen": -291.43255615234375, "logps/rejected": -240.37240600585938, "loss": 0.0888, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3984852731227875, "rewards/margins": 0.1815904676914215, "rewards/rejected": -0.580075740814209, "step": 2660 }, { "epoch": 0.32, "learning_rate": 4.2965867783056965e-06, "logits/chosen": -2.1672182083129883, "logits/rejected": -1.5168484449386597, "logps/chosen": -244.7864990234375, "logps/rejected": -220.2649688720703, "loss": 0.0939, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.24389946460723877, "rewards/margins": 0.23934423923492432, "rewards/rejected": -0.4832437038421631, "step": 2670 }, { "epoch": 0.32, "learning_rate": 4.289289929503319e-06, "logits/chosen": -1.6916240453720093, "logits/rejected": -1.7404859066009521, "logps/chosen": -278.12933349609375, "logps/rejected": -313.3092956542969, "loss": 0.1081, "rewards/accuracies": 0.625, "rewards/chosen": -0.38156622648239136, "rewards/margins": 0.08456975966691971, "rewards/rejected": -0.4661359190940857, "step": 2680 }, { "epoch": 0.32, "learning_rate": 4.28196169430152e-06, "logits/chosen": -2.0158498287200928, "logits/rejected": -1.6960630416870117, "logps/chosen": -214.83908081054688, "logps/rejected": -203.30886840820312, "loss": 0.1377, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.23143234848976135, "rewards/margins": 0.13869646191596985, "rewards/rejected": -0.3701288104057312, "step": 2690 }, { "epoch": 0.32, "learning_rate": 4.274602201246775e-06, "logits/chosen": -2.104879856109619, "logits/rejected": -1.873944878578186, "logps/chosen": -237.60867309570312, "logps/rejected": -253.7798309326172, "loss": 0.1325, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.32146531343460083, "rewards/margins": 0.13118143379688263, "rewards/rejected": -0.45264673233032227, "step": 2700 }, { "epoch": 0.33, "learning_rate": 4.267211579433865e-06, "logits/chosen": -2.124311923980713, "logits/rejected": -1.6988563537597656, "logps/chosen": -248.6872100830078, "logps/rejected": -259.9998779296875, "loss": 0.123, "rewards/accuracies": 0.75, "rewards/chosen": -0.2223828136920929, "rewards/margins": 0.22472596168518066, "rewards/rejected": -0.44710874557495117, "step": 2710 }, { "epoch": 0.33, "learning_rate": 4.259789958503606e-06, "logits/chosen": -1.808075189590454, "logits/rejected": -1.4258639812469482, "logps/chosen": -288.0134582519531, "logps/rejected": -270.99285888671875, "loss": 0.1181, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.48417121171951294, "rewards/margins": 0.1431351900100708, "rewards/rejected": -0.627306342124939, "step": 2720 }, { "epoch": 0.33, "learning_rate": 4.252337468640578e-06, "logits/chosen": -1.8779484033584595, "logits/rejected": -1.4368770122528076, "logps/chosen": -182.4998321533203, "logps/rejected": -176.48782348632812, "loss": 0.1357, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3015367388725281, "rewards/margins": 0.1921067237854004, "rewards/rejected": -0.49364346265792847, "step": 2730 }, { "epoch": 0.33, "learning_rate": 4.244854240570844e-06, "logits/chosen": -1.8997386693954468, "logits/rejected": -1.638164758682251, "logps/chosen": -261.68792724609375, "logps/rejected": -275.615478515625, "loss": 0.1552, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.38944321870803833, "rewards/margins": 0.14735980331897736, "rewards/rejected": -0.536803126335144, "step": 2740 }, { "epoch": 0.33, "learning_rate": 4.237340405559648e-06, "logits/chosen": -2.111983060836792, "logits/rejected": -1.8002418279647827, "logps/chosen": -276.4152526855469, "logps/rejected": -253.03970336914062, "loss": 0.1451, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.43595629930496216, "rewards/margins": 0.1535700559616089, "rewards/rejected": -0.589526355266571, "step": 2750 }, { "epoch": 0.33, "learning_rate": 4.229796095409124e-06, "logits/chosen": -1.9869279861450195, "logits/rejected": -1.6609262228012085, "logps/chosen": -243.32666015625, "logps/rejected": -212.46484375, "loss": 0.1604, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.41807931661605835, "rewards/margins": 0.15857204794883728, "rewards/rejected": -0.5766514539718628, "step": 2760 }, { "epoch": 0.33, "learning_rate": 4.222221442455975e-06, "logits/chosen": -1.7951889038085938, "logits/rejected": -1.6626970767974854, "logps/chosen": -270.48785400390625, "logps/rejected": -270.6944274902344, "loss": 0.1321, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.5235955119132996, "rewards/margins": 0.13521410524845123, "rewards/rejected": -0.6588095426559448, "step": 2770 }, { "epoch": 0.33, "learning_rate": 4.2146165795691565e-06, "logits/chosen": -2.0419421195983887, "logits/rejected": -1.6905419826507568, "logps/chosen": -274.6120910644531, "logps/rejected": -232.2598876953125, "loss": 0.1655, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.5508221387863159, "rewards/margins": 0.0936468318104744, "rewards/rejected": -0.6444690823554993, "step": 2780 }, { "epoch": 0.33, "learning_rate": 4.206981640147543e-06, "logits/chosen": -1.9829498529434204, "logits/rejected": -1.5640804767608643, "logps/chosen": -227.0321044921875, "logps/rejected": -209.35629272460938, "loss": 0.1261, "rewards/accuracies": 0.75, "rewards/chosen": -0.38011568784713745, "rewards/margins": 0.20434486865997314, "rewards/rejected": -0.5844606161117554, "step": 2790 }, { "epoch": 0.34, "learning_rate": 4.199316758117592e-06, "logits/chosen": -1.7994956970214844, "logits/rejected": -1.328776240348816, "logps/chosen": -219.972900390625, "logps/rejected": -209.31918334960938, "loss": 0.1531, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.39323854446411133, "rewards/margins": 0.17654483020305634, "rewards/rejected": -0.5697833895683289, "step": 2800 }, { "epoch": 0.34, "learning_rate": 4.191622067930987e-06, "logits/chosen": -1.9456312656402588, "logits/rejected": -1.5701932907104492, "logps/chosen": -287.2892761230469, "logps/rejected": -293.2528076171875, "loss": 0.0978, "rewards/accuracies": 0.75, "rewards/chosen": -0.5495396852493286, "rewards/margins": 0.13945366442203522, "rewards/rejected": -0.6889933943748474, "step": 2810 }, { "epoch": 0.34, "learning_rate": 4.1838977045622884e-06, "logits/chosen": -2.122058629989624, "logits/rejected": -1.8984161615371704, "logps/chosen": -296.1750793457031, "logps/rejected": -291.369384765625, "loss": 0.1468, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.5357397794723511, "rewards/margins": 0.10289420932531357, "rewards/rejected": -0.6386340260505676, "step": 2820 }, { "epoch": 0.34, "learning_rate": 4.1761438035065624e-06, "logits/chosen": -1.9847745895385742, "logits/rejected": -1.5842864513397217, "logps/chosen": -265.0511779785156, "logps/rejected": -265.59912109375, "loss": 0.171, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.4800674319267273, "rewards/margins": 0.20678965747356415, "rewards/rejected": -0.6868571639060974, "step": 2830 }, { "epoch": 0.34, "learning_rate": 4.168360500777e-06, "logits/chosen": -1.9825668334960938, "logits/rejected": -1.8264620304107666, "logps/chosen": -270.49163818359375, "logps/rejected": -263.13958740234375, "loss": 0.1304, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.6147781014442444, "rewards/margins": 0.15809233486652374, "rewards/rejected": -0.7728704214096069, "step": 2840 }, { "epoch": 0.34, "learning_rate": 4.160547932902536e-06, "logits/chosen": -1.998263955116272, "logits/rejected": -1.4734325408935547, "logps/chosen": -305.92901611328125, "logps/rejected": -268.21624755859375, "loss": 0.1526, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.6971688866615295, "rewards/margins": 0.1615949124097824, "rewards/rejected": -0.8587638139724731, "step": 2850 }, { "epoch": 0.34, "learning_rate": 4.152706236925453e-06, "logits/chosen": -1.8893840312957764, "logits/rejected": -1.5303113460540771, "logps/chosen": -264.53741455078125, "logps/rejected": -240.29244995117188, "loss": 0.1553, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.7637086510658264, "rewards/margins": 0.09846861660480499, "rewards/rejected": -0.8621772527694702, "step": 2860 }, { "epoch": 0.34, "learning_rate": 4.144835550398977e-06, "logits/chosen": -2.0382955074310303, "logits/rejected": -1.6921682357788086, "logps/chosen": -293.18017578125, "logps/rejected": -257.85516357421875, "loss": 0.1353, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.5966418385505676, "rewards/margins": 0.15557792782783508, "rewards/rejected": -0.7522197961807251, "step": 2870 }, { "epoch": 0.35, "learning_rate": 4.136936011384864e-06, "logits/chosen": -1.9725558757781982, "logits/rejected": -1.6349289417266846, "logps/chosen": -282.499267578125, "logps/rejected": -256.969970703125, "loss": 0.1057, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.5478484034538269, "rewards/margins": 0.13796645402908325, "rewards/rejected": -0.6858149170875549, "step": 2880 }, { "epoch": 0.35, "learning_rate": 4.129007758450982e-06, "logits/chosen": -1.8872253894805908, "logits/rejected": -1.4205682277679443, "logps/chosen": -262.5359802246094, "logps/rejected": -235.87744140625, "loss": 0.1245, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.6125961542129517, "rewards/margins": 0.20152945816516876, "rewards/rejected": -0.8141257166862488, "step": 2890 }, { "epoch": 0.35, "learning_rate": 4.121050930668871e-06, "logits/chosen": -2.047837734222412, "logits/rejected": -1.848854422569275, "logps/chosen": -243.146728515625, "logps/rejected": -233.36685180664062, "loss": 0.1355, "rewards/accuracies": 0.5, "rewards/chosen": -0.4853152334690094, "rewards/margins": 0.10266610234975815, "rewards/rejected": -0.5879813432693481, "step": 2900 }, { "epoch": 0.35, "learning_rate": 4.113065667611319e-06, "logits/chosen": -2.1213645935058594, "logits/rejected": -1.6058502197265625, "logps/chosen": -282.0854187011719, "logps/rejected": -236.03237915039062, "loss": 0.133, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.5683524012565613, "rewards/margins": 0.13890430331230164, "rewards/rejected": -0.7072567939758301, "step": 2910 }, { "epoch": 0.35, "learning_rate": 4.105052109349896e-06, "logits/chosen": -1.961520791053772, "logits/rejected": -1.6209933757781982, "logps/chosen": -235.26229858398438, "logps/rejected": -211.75735473632812, "loss": 0.1818, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.5107260942459106, "rewards/margins": 0.1078595519065857, "rewards/rejected": -0.6185856461524963, "step": 2920 }, { "epoch": 0.35, "learning_rate": 4.097010396452511e-06, "logits/chosen": -1.7602100372314453, "logits/rejected": -1.5986255407333374, "logps/chosen": -221.6525115966797, "logps/rejected": -236.3260955810547, "loss": 0.099, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.5530390739440918, "rewards/margins": 0.15462610125541687, "rewards/rejected": -0.7076650857925415, "step": 2930 }, { "epoch": 0.35, "learning_rate": 4.088940669980936e-06, "logits/chosen": -1.833754301071167, "logits/rejected": -1.4045370817184448, "logps/chosen": -229.57437133789062, "logps/rejected": -231.51205444335938, "loss": 0.1555, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.5543310046195984, "rewards/margins": 0.21897678077220917, "rewards/rejected": -0.7733078002929688, "step": 2940 }, { "epoch": 0.35, "learning_rate": 4.080843071488343e-06, "logits/chosen": -1.7528629302978516, "logits/rejected": -1.5004993677139282, "logps/chosen": -324.9387512207031, "logps/rejected": -295.79315185546875, "loss": 0.1262, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.6013875007629395, "rewards/margins": 0.061654604971408844, "rewards/rejected": -0.6630421876907349, "step": 2950 }, { "epoch": 0.36, "learning_rate": 4.072717743016807e-06, "logits/chosen": -1.9874687194824219, "logits/rejected": -1.7957178354263306, "logps/chosen": -243.137939453125, "logps/rejected": -279.1502685546875, "loss": 0.1195, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.45743948221206665, "rewards/margins": 0.1679219901561737, "rewards/rejected": -0.6253615021705627, "step": 2960 }, { "epoch": 0.36, "learning_rate": 4.064564827094827e-06, "logits/chosen": -2.1176095008850098, "logits/rejected": -1.8404308557510376, "logps/chosen": -246.0503692626953, "logps/rejected": -247.1431121826172, "loss": 0.1031, "rewards/accuracies": 0.625, "rewards/chosen": -0.41762226819992065, "rewards/margins": 0.1749526411294937, "rewards/rejected": -0.592574954032898, "step": 2970 }, { "epoch": 0.36, "learning_rate": 4.056384466734819e-06, "logits/chosen": -1.7445135116577148, "logits/rejected": -1.2714130878448486, "logps/chosen": -256.58575439453125, "logps/rejected": -227.7979736328125, "loss": 0.1514, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.5112208127975464, "rewards/margins": 0.19044998288154602, "rewards/rejected": -0.70167076587677, "step": 2980 }, { "epoch": 0.36, "learning_rate": 4.048176805430608e-06, "logits/chosen": -1.8863022327423096, "logits/rejected": -1.7394742965698242, "logps/chosen": -262.2151794433594, "logps/rejected": -257.8638000488281, "loss": 0.1186, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.4821039140224457, "rewards/margins": 0.17492111027240753, "rewards/rejected": -0.6570249795913696, "step": 2990 }, { "epoch": 0.36, "learning_rate": 4.039941987154913e-06, "logits/chosen": -2.089552879333496, "logits/rejected": -1.5143569707870483, "logps/chosen": -255.2841339111328, "logps/rejected": -214.74667358398438, "loss": 0.1349, "rewards/accuracies": 0.75, "rewards/chosen": -0.41421470046043396, "rewards/margins": 0.21286948025226593, "rewards/rejected": -0.6270841360092163, "step": 3000 }, { "epoch": 0.36, "learning_rate": 4.031680156356822e-06, "logits/chosen": -2.152740478515625, "logits/rejected": -1.648754358291626, "logps/chosen": -298.00860595703125, "logps/rejected": -279.27215576171875, "loss": 0.088, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.45244675874710083, "rewards/margins": 0.17495819926261902, "rewards/rejected": -0.6274049282073975, "step": 3010 }, { "epoch": 0.36, "learning_rate": 4.023391457959253e-06, "logits/chosen": -1.9636989831924438, "logits/rejected": -1.5016404390335083, "logps/chosen": -223.6481475830078, "logps/rejected": -208.9552001953125, "loss": 0.1553, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3230467140674591, "rewards/margins": 0.15630824863910675, "rewards/rejected": -0.47935494780540466, "step": 3020 }, { "epoch": 0.36, "learning_rate": 4.015076037356419e-06, "logits/chosen": -1.778830885887146, "logits/rejected": -1.504024624824524, "logps/chosen": -261.44805908203125, "logps/rejected": -237.22036743164062, "loss": 0.2152, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.4342494606971741, "rewards/margins": 0.06908075511455536, "rewards/rejected": -0.5033301711082458, "step": 3030 }, { "epoch": 0.36, "learning_rate": 4.006734040411272e-06, "logits/chosen": -1.8755178451538086, "logits/rejected": -1.488073706626892, "logps/chosen": -233.17788696289062, "logps/rejected": -202.04881286621094, "loss": 0.1823, "rewards/accuracies": 0.625, "rewards/chosen": -0.4887026846408844, "rewards/margins": 0.13193130493164062, "rewards/rejected": -0.6206339597702026, "step": 3040 }, { "epoch": 0.37, "learning_rate": 3.998365613452947e-06, "logits/chosen": -1.744222640991211, "logits/rejected": -1.7371858358383179, "logps/chosen": -213.4022979736328, "logps/rejected": -271.8200378417969, "loss": 0.1179, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.43451786041259766, "rewards/margins": 0.1312752515077591, "rewards/rejected": -0.5657930970191956, "step": 3050 }, { "epoch": 0.37, "learning_rate": 3.9899709032741955e-06, "logits/chosen": -2.135042190551758, "logits/rejected": -1.7216142416000366, "logps/chosen": -226.56991577148438, "logps/rejected": -227.9345245361328, "loss": 0.1873, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.442862331867218, "rewards/margins": 0.20581674575805664, "rewards/rejected": -0.6486790776252747, "step": 3060 }, { "epoch": 0.37, "learning_rate": 3.981550057128809e-06, "logits/chosen": -2.0724985599517822, "logits/rejected": -1.5731353759765625, "logps/chosen": -249.2626953125, "logps/rejected": -205.86062622070312, "loss": 0.0977, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3592108488082886, "rewards/margins": 0.1588134914636612, "rewards/rejected": -0.5180243253707886, "step": 3070 }, { "epoch": 0.37, "learning_rate": 3.973103222729037e-06, "logits/chosen": -1.9891624450683594, "logits/rejected": -1.8182249069213867, "logps/chosen": -238.1395263671875, "logps/rejected": -248.2894744873047, "loss": 0.1503, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.3745599687099457, "rewards/margins": 0.1474495679140091, "rewards/rejected": -0.522009551525116, "step": 3080 }, { "epoch": 0.37, "learning_rate": 3.964630548242997e-06, "logits/chosen": -1.7449464797973633, "logits/rejected": -1.3936296701431274, "logps/chosen": -234.7018585205078, "logps/rejected": -203.70974731445312, "loss": 0.1525, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3542986512184143, "rewards/margins": 0.15336424112319946, "rewards/rejected": -0.5076628923416138, "step": 3090 }, { "epoch": 0.37, "learning_rate": 3.956132182292071e-06, "logits/chosen": -1.9436609745025635, "logits/rejected": -1.6176378726959229, "logps/chosen": -306.6236572265625, "logps/rejected": -285.08624267578125, "loss": 0.1063, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.48512548208236694, "rewards/margins": 0.1552090346813202, "rewards/rejected": -0.6403344869613647, "step": 3100 }, { "epoch": 0.37, "learning_rate": 3.947608273948305e-06, "logits/chosen": -1.9343887567520142, "logits/rejected": -1.7104957103729248, "logps/chosen": -197.42628479003906, "logps/rejected": -188.55636596679688, "loss": 0.1288, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.4073019027709961, "rewards/margins": 0.13703104853630066, "rewards/rejected": -0.5443329811096191, "step": 3110 }, { "epoch": 0.37, "learning_rate": 3.939058972731788e-06, "logits/chosen": -2.057648181915283, "logits/rejected": -1.7952289581298828, "logps/chosen": -184.43569946289062, "logps/rejected": -189.1503143310547, "loss": 0.156, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3685847818851471, "rewards/margins": 0.1822405755519867, "rewards/rejected": -0.5508254170417786, "step": 3120 }, { "epoch": 0.38, "learning_rate": 3.9304844286080356e-06, "logits/chosen": -1.9299640655517578, "logits/rejected": -1.5476219654083252, "logps/chosen": -265.6641540527344, "logps/rejected": -238.1685791015625, "loss": 0.0987, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.4248623847961426, "rewards/margins": 0.14193350076675415, "rewards/rejected": -0.566795825958252, "step": 3130 }, { "epoch": 0.38, "learning_rate": 3.921884791985351e-06, "logits/chosen": -2.0945184230804443, "logits/rejected": -1.710710883140564, "logps/chosen": -289.3420715332031, "logps/rejected": -286.7973327636719, "loss": 0.1331, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.41171398758888245, "rewards/margins": 0.18646354973316193, "rewards/rejected": -0.5981774926185608, "step": 3140 }, { "epoch": 0.38, "learning_rate": 3.913260213712195e-06, "logits/chosen": -2.005298614501953, "logits/rejected": -1.6120986938476562, "logps/chosen": -271.31695556640625, "logps/rejected": -271.58599853515625, "loss": 0.1618, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.39514026045799255, "rewards/margins": 0.18233473598957062, "rewards/rejected": -0.5774749517440796, "step": 3150 }, { "epoch": 0.38, "learning_rate": 3.9046108450745365e-06, "logits/chosen": -1.9153554439544678, "logits/rejected": -1.6038618087768555, "logps/chosen": -244.7465057373047, "logps/rejected": -224.35009765625, "loss": 0.1676, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.4914848804473877, "rewards/margins": 0.13527749478816986, "rewards/rejected": -0.626762330532074, "step": 3160 }, { "epoch": 0.38, "learning_rate": 3.895936837793195e-06, "logits/chosen": -2.1196136474609375, "logits/rejected": -1.9197361469268799, "logps/chosen": -272.1467590332031, "logps/rejected": -282.51373291015625, "loss": 0.1016, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.3920500576496124, "rewards/margins": 0.1278650313615799, "rewards/rejected": -0.5199151039123535, "step": 3170 }, { "epoch": 0.38, "learning_rate": 3.887238344021187e-06, "logits/chosen": -1.9512029886245728, "logits/rejected": -1.5371229648590088, "logps/chosen": -229.00180053710938, "logps/rejected": -227.99154663085938, "loss": 0.1223, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.4040389060974121, "rewards/margins": 0.25474390387535095, "rewards/rejected": -0.6587827801704407, "step": 3180 }, { "epoch": 0.38, "learning_rate": 3.878515516341051e-06, "logits/chosen": -1.8965469598770142, "logits/rejected": -1.5892069339752197, "logps/chosen": -312.53717041015625, "logps/rejected": -321.0782165527344, "loss": 0.1367, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.38909637928009033, "rewards/margins": 0.18783049285411835, "rewards/rejected": -0.5769269466400146, "step": 3190 }, { "epoch": 0.38, "learning_rate": 3.869768507762174e-06, "logits/chosen": -1.8793041706085205, "logits/rejected": -1.5246042013168335, "logps/chosen": -206.9381866455078, "logps/rejected": -187.6604461669922, "loss": 0.1119, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.5590367913246155, "rewards/margins": 0.09890522062778473, "rewards/rejected": -0.6579420566558838, "step": 3200 }, { "epoch": 0.39, "learning_rate": 3.860997471718103e-06, "logits/chosen": -2.1226069927215576, "logits/rejected": -1.4512499570846558, "logps/chosen": -252.26736450195312, "logps/rejected": -193.675048828125, "loss": 0.1548, "rewards/accuracies": 0.75, "rewards/chosen": -0.3612119257450104, "rewards/margins": 0.20598213374614716, "rewards/rejected": -0.5671939849853516, "step": 3210 }, { "epoch": 0.39, "learning_rate": 3.852202562063861e-06, "logits/chosen": -1.9937137365341187, "logits/rejected": -1.6922187805175781, "logps/chosen": -273.3023376464844, "logps/rejected": -247.2145233154297, "loss": 0.1343, "rewards/accuracies": 0.5, "rewards/chosen": -0.48126834630966187, "rewards/margins": 0.08357418328523636, "rewards/rejected": -0.5648424625396729, "step": 3220 }, { "epoch": 0.39, "learning_rate": 3.843383933073243e-06, "logits/chosen": -1.9415899515151978, "logits/rejected": -1.546696424484253, "logps/chosen": -264.92291259765625, "logps/rejected": -254.01150512695312, "loss": 0.1317, "rewards/accuracies": 0.625, "rewards/chosen": -0.4355335235595703, "rewards/margins": 0.15428543090820312, "rewards/rejected": -0.5898188948631287, "step": 3230 }, { "epoch": 0.39, "learning_rate": 3.834541739436111e-06, "logits/chosen": -2.0209739208221436, "logits/rejected": -1.7102893590927124, "logps/chosen": -217.0830078125, "logps/rejected": -216.00875854492188, "loss": 0.1922, "rewards/accuracies": 0.75, "rewards/chosen": -0.3377262353897095, "rewards/margins": 0.18450435996055603, "rewards/rejected": -0.5222306251525879, "step": 3240 }, { "epoch": 0.39, "learning_rate": 3.82567613625568e-06, "logits/chosen": -2.1100401878356934, "logits/rejected": -2.0386178493499756, "logps/chosen": -306.51043701171875, "logps/rejected": -312.2314758300781, "loss": 0.087, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.28856438398361206, "rewards/margins": 0.09022587537765503, "rewards/rejected": -0.3787902593612671, "step": 3250 }, { "epoch": 0.39, "learning_rate": 3.816787279045796e-06, "logits/chosen": -1.8298437595367432, "logits/rejected": -1.4992341995239258, "logps/chosen": -182.92562866210938, "logps/rejected": -192.92718505859375, "loss": 0.1166, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2565813660621643, "rewards/margins": 0.21510104835033417, "rewards/rejected": -0.47168245911598206, "step": 3260 }, { "epoch": 0.39, "learning_rate": 3.807875323728216e-06, "logits/chosen": -2.188213586807251, "logits/rejected": -1.716449499130249, "logps/chosen": -218.6008758544922, "logps/rejected": -216.4799041748047, "loss": 0.1432, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.24077515304088593, "rewards/margins": 0.20533184707164764, "rewards/rejected": -0.44610700011253357, "step": 3270 }, { "epoch": 0.39, "learning_rate": 3.7989404266298614e-06, "logits/chosen": -1.775099515914917, "logits/rejected": -1.7529404163360596, "logps/chosen": -209.0091094970703, "logps/rejected": -222.7781982421875, "loss": 0.1195, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.41656428575515747, "rewards/margins": 0.1831093728542328, "rewards/rejected": -0.5996736288070679, "step": 3280 }, { "epoch": 0.39, "learning_rate": 3.7899827444800824e-06, "logits/chosen": -1.975610375404358, "logits/rejected": -1.701148271560669, "logps/chosen": -320.1239929199219, "logps/rejected": -338.82916259765625, "loss": 0.1189, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.5537024736404419, "rewards/margins": 0.1302124708890915, "rewards/rejected": -0.6839149594306946, "step": 3290 }, { "epoch": 0.4, "learning_rate": 3.7810024344079087e-06, "logits/chosen": -1.9031383991241455, "logits/rejected": -1.6330820322036743, "logps/chosen": -293.07061767578125, "logps/rejected": -310.0358581542969, "loss": 0.135, "rewards/accuracies": 0.75, "rewards/chosen": -0.7108369469642639, "rewards/margins": 0.24567703902721405, "rewards/rejected": -0.9565140008926392, "step": 3300 }, { "epoch": 0.4, "learning_rate": 3.7719996539392934e-06, "logits/chosen": -1.9635547399520874, "logits/rejected": -1.790226697921753, "logps/chosen": -293.0434875488281, "logps/rejected": -275.4864501953125, "loss": 0.1856, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.7340956926345825, "rewards/margins": 0.12796764075756073, "rewards/rejected": -0.8620632886886597, "step": 3310 }, { "epoch": 0.4, "learning_rate": 3.7629745609943454e-06, "logits/chosen": -1.8187742233276367, "logits/rejected": -1.5776069164276123, "logps/chosen": -245.385498046875, "logps/rejected": -267.95513916015625, "loss": 0.2146, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.7081323266029358, "rewards/margins": 0.12336601316928864, "rewards/rejected": -0.8314983248710632, "step": 3320 }, { "epoch": 0.4, "learning_rate": 3.7539273138845646e-06, "logits/chosen": -1.7952165603637695, "logits/rejected": -1.5672744512557983, "logps/chosen": -287.3976135253906, "logps/rejected": -305.989013671875, "loss": 0.121, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.774767279624939, "rewards/margins": 0.18739402294158936, "rewards/rejected": -0.9621612429618835, "step": 3330 }, { "epoch": 0.4, "learning_rate": 3.744858071310063e-06, "logits/chosen": -1.732142686843872, "logits/rejected": -1.4198137521743774, "logps/chosen": -244.3140869140625, "logps/rejected": -244.147705078125, "loss": 0.1903, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.7413711547851562, "rewards/margins": 0.11797042936086655, "rewards/rejected": -0.8593416213989258, "step": 3340 }, { "epoch": 0.4, "learning_rate": 3.7357669923567796e-06, "logits/chosen": -2.07377290725708, "logits/rejected": -1.57345449924469, "logps/chosen": -321.8138427734375, "logps/rejected": -300.8492736816406, "loss": 0.1081, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.709524929523468, "rewards/margins": 0.2038944661617279, "rewards/rejected": -0.9134193658828735, "step": 3350 }, { "epoch": 0.4, "learning_rate": 3.726654236493693e-06, "logits/chosen": -1.7073522806167603, "logits/rejected": -1.2896873950958252, "logps/chosen": -241.73583984375, "logps/rejected": -225.9768524169922, "loss": 0.1198, "rewards/accuracies": 0.75, "rewards/chosen": -0.7038867473602295, "rewards/margins": 0.18675477802753448, "rewards/rejected": -0.8906415700912476, "step": 3360 }, { "epoch": 0.4, "learning_rate": 3.71751996357002e-06, "logits/chosen": -1.9721448421478271, "logits/rejected": -1.5171587467193604, "logps/chosen": -273.17547607421875, "logps/rejected": -271.9955749511719, "loss": 0.1155, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.6923592686653137, "rewards/margins": 0.09898529201745987, "rewards/rejected": -0.7913444638252258, "step": 3370 }, { "epoch": 0.41, "learning_rate": 3.7083643338124148e-06, "logits/chosen": -1.9171966314315796, "logits/rejected": -1.3887364864349365, "logps/chosen": -230.0151824951172, "logps/rejected": -235.19412231445312, "loss": 0.1357, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.6308478116989136, "rewards/margins": 0.2189496010541916, "rewards/rejected": -0.8497973680496216, "step": 3380 }, { "epoch": 0.41, "learning_rate": 3.6991875078221566e-06, "logits/chosen": -1.9821815490722656, "logits/rejected": -1.4464524984359741, "logps/chosen": -298.43017578125, "logps/rejected": -268.8391418457031, "loss": 0.1016, "rewards/accuracies": 0.75, "rewards/chosen": -0.6277667284011841, "rewards/margins": 0.22099390625953674, "rewards/rejected": -0.8487606048583984, "step": 3390 }, { "epoch": 0.41, "learning_rate": 3.6899896465723352e-06, "logits/chosen": -1.9020191431045532, "logits/rejected": -1.55315101146698, "logps/chosen": -225.8837890625, "logps/rejected": -174.9435272216797, "loss": 0.1186, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.46708327531814575, "rewards/margins": 0.11991620063781738, "rewards/rejected": -0.5869995355606079, "step": 3400 }, { "epoch": 0.41, "learning_rate": 3.6807709114050224e-06, "logits/chosen": -1.8173465728759766, "logits/rejected": -1.6795600652694702, "logps/chosen": -272.4085388183594, "logps/rejected": -300.34197998046875, "loss": 0.1344, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.6119049787521362, "rewards/margins": 0.07957009226083755, "rewards/rejected": -0.6914750337600708, "step": 3410 }, { "epoch": 0.41, "learning_rate": 3.6715314640284465e-06, "logits/chosen": -1.95268976688385, "logits/rejected": -1.4205843210220337, "logps/chosen": -284.33819580078125, "logps/rejected": -281.9459533691406, "loss": 0.1827, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.6660246253013611, "rewards/margins": 0.1357964128255844, "rewards/rejected": -0.8018211126327515, "step": 3420 }, { "epoch": 0.41, "learning_rate": 3.6622714665141555e-06, "logits/chosen": -1.760504961013794, "logits/rejected": -1.5992462635040283, "logps/chosen": -255.13363647460938, "logps/rejected": -276.85931396484375, "loss": 0.1626, "rewards/accuracies": 0.75, "rewards/chosen": -0.6074542999267578, "rewards/margins": 0.1671931892633438, "rewards/rejected": -0.7746474742889404, "step": 3430 }, { "epoch": 0.41, "learning_rate": 3.6529910812941688e-06, "logits/chosen": -1.9736906290054321, "logits/rejected": -1.5405575037002563, "logps/chosen": -306.52337646484375, "logps/rejected": -294.29608154296875, "loss": 0.0816, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.6215208768844604, "rewards/margins": 0.19987761974334717, "rewards/rejected": -0.8213985562324524, "step": 3440 }, { "epoch": 0.41, "learning_rate": 3.6436904711581358e-06, "logits/chosen": -1.7905645370483398, "logits/rejected": -1.4196887016296387, "logps/chosen": -263.2611389160156, "logps/rejected": -266.86700439453125, "loss": 0.108, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.576808750629425, "rewards/margins": 0.20972958207130432, "rewards/rejected": -0.7865381240844727, "step": 3450 }, { "epoch": 0.42, "learning_rate": 3.6343697992504745e-06, "logits/chosen": -1.8011541366577148, "logits/rejected": -1.528407096862793, "logps/chosen": -260.6270446777344, "logps/rejected": -244.4331817626953, "loss": 0.1388, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.6323608160018921, "rewards/margins": 0.14937646687030792, "rewards/rejected": -0.7817373275756836, "step": 3460 }, { "epoch": 0.42, "learning_rate": 3.6250292290675103e-06, "logits/chosen": -1.8209354877471924, "logits/rejected": -1.6716169118881226, "logps/chosen": -279.5582275390625, "logps/rejected": -250.3738250732422, "loss": 0.2005, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.6096035242080688, "rewards/margins": 0.018474172800779343, "rewards/rejected": -0.6280776858329773, "step": 3470 }, { "epoch": 0.42, "learning_rate": 3.6156689244546135e-06, "logits/chosen": -1.9151493310928345, "logits/rejected": -1.635745644569397, "logps/chosen": -304.9268493652344, "logps/rejected": -309.64410400390625, "loss": 0.1445, "rewards/accuracies": 0.625, "rewards/chosen": -0.5671188831329346, "rewards/margins": 0.12486696243286133, "rewards/rejected": -0.6919858455657959, "step": 3480 }, { "epoch": 0.42, "learning_rate": 3.606289049603317e-06, "logits/chosen": -1.9070123434066772, "logits/rejected": -1.632115125656128, "logps/chosen": -217.40640258789062, "logps/rejected": -266.7468566894531, "loss": 0.1873, "rewards/accuracies": 0.625, "rewards/chosen": -0.5615465044975281, "rewards/margins": 0.10746095329523087, "rewards/rejected": -0.6690074801445007, "step": 3490 }, { "epoch": 0.42, "learning_rate": 3.596889769048442e-06, "logits/chosen": -1.9706356525421143, "logits/rejected": -1.8182321786880493, "logps/chosen": -245.6522674560547, "logps/rejected": -260.2083740234375, "loss": 0.1345, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.4987486004829407, "rewards/margins": 0.12167295068502426, "rewards/rejected": -0.6204215288162231, "step": 3500 }, { "epoch": 0.42, "learning_rate": 3.587471247665211e-06, "logits/chosen": -1.870273232460022, "logits/rejected": -1.4389684200286865, "logps/chosen": -282.3717346191406, "logps/rejected": -282.10955810546875, "loss": 0.1518, "rewards/accuracies": 0.75, "rewards/chosen": -0.5567461252212524, "rewards/margins": 0.15303365886211395, "rewards/rejected": -0.7097797393798828, "step": 3510 }, { "epoch": 0.42, "learning_rate": 3.578033650666354e-06, "logits/chosen": -1.9102929830551147, "logits/rejected": -1.6725549697875977, "logps/chosen": -269.2619323730469, "logps/rejected": -265.96185302734375, "loss": 0.1345, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.5339959859848022, "rewards/margins": 0.11029736697673798, "rewards/rejected": -0.6442933678627014, "step": 3520 }, { "epoch": 0.42, "learning_rate": 3.56857714359921e-06, "logits/chosen": -1.9303925037384033, "logits/rejected": -1.450552225112915, "logps/chosen": -314.16229248046875, "logps/rejected": -267.4084167480469, "loss": 0.1383, "rewards/accuracies": 0.75, "rewards/chosen": -0.5251078605651855, "rewards/margins": 0.17124128341674805, "rewards/rejected": -0.6963491439819336, "step": 3530 }, { "epoch": 0.42, "learning_rate": 3.5591018923428273e-06, "logits/chosen": -1.821260690689087, "logits/rejected": -1.5743091106414795, "logps/chosen": -229.2704620361328, "logps/rejected": -216.47482299804688, "loss": 0.1376, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.45546379685401917, "rewards/margins": 0.09873731434345245, "rewards/rejected": -0.5542011857032776, "step": 3540 }, { "epoch": 0.43, "learning_rate": 3.5496080631050494e-06, "logits/chosen": -1.9756050109863281, "logits/rejected": -1.7553752660751343, "logps/chosen": -242.6219940185547, "logps/rejected": -246.052978515625, "loss": 0.1605, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.44631749391555786, "rewards/margins": 0.12928064167499542, "rewards/rejected": -0.5755981206893921, "step": 3550 }, { "epoch": 0.43, "learning_rate": 3.5400958224196e-06, "logits/chosen": -1.7444331645965576, "logits/rejected": -1.646104097366333, "logps/chosen": -219.2833251953125, "logps/rejected": -236.79434204101562, "loss": 0.1119, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.3898042142391205, "rewards/margins": 0.09327594935894012, "rewards/rejected": -0.4830802083015442, "step": 3560 }, { "epoch": 0.43, "learning_rate": 3.5305653371431635e-06, "logits/chosen": -1.8823859691619873, "logits/rejected": -1.5607668161392212, "logps/chosen": -255.4270782470703, "logps/rejected": -250.3773651123047, "loss": 0.122, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.502920925617218, "rewards/margins": 0.15608903765678406, "rewards/rejected": -0.6590099930763245, "step": 3570 }, { "epoch": 0.43, "learning_rate": 3.52101677445246e-06, "logits/chosen": -1.8646084070205688, "logits/rejected": -1.5643223524093628, "logps/chosen": -284.89697265625, "logps/rejected": -273.821044921875, "loss": 0.112, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.5394625067710876, "rewards/margins": 0.147272527217865, "rewards/rejected": -0.6867350339889526, "step": 3580 }, { "epoch": 0.43, "learning_rate": 3.5114503018413055e-06, "logits/chosen": -2.060659408569336, "logits/rejected": -1.689171552658081, "logps/chosen": -249.76150512695312, "logps/rejected": -236.51171875, "loss": 0.1114, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.4521172046661377, "rewards/margins": 0.12867510318756104, "rewards/rejected": -0.5807923078536987, "step": 3590 }, { "epoch": 0.43, "learning_rate": 3.5018660871176815e-06, "logits/chosen": -2.1247520446777344, "logits/rejected": -1.6458734273910522, "logps/chosen": -317.2967224121094, "logps/rejected": -260.74072265625, "loss": 0.1181, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.4817916750907898, "rewards/margins": 0.13223214447498322, "rewards/rejected": -0.6140238046646118, "step": 3600 }, { "epoch": 0.43, "learning_rate": 3.4922642984007888e-06, "logits/chosen": -1.8655788898468018, "logits/rejected": -1.3581578731536865, "logps/chosen": -288.52496337890625, "logps/rejected": -243.9176483154297, "loss": 0.2081, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.5236949324607849, "rewards/margins": 0.24149084091186523, "rewards/rejected": -0.7651858329772949, "step": 3610 }, { "epoch": 0.43, "learning_rate": 3.4826451041180963e-06, "logits/chosen": -1.8614925146102905, "logits/rejected": -1.6801944971084595, "logps/chosen": -224.9128875732422, "logps/rejected": -241.3170928955078, "loss": 0.1374, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.49704447388648987, "rewards/margins": 0.1276617795228958, "rewards/rejected": -0.6247062683105469, "step": 3620 }, { "epoch": 0.44, "learning_rate": 3.4730086730023904e-06, "logits/chosen": -1.9381475448608398, "logits/rejected": -1.6607654094696045, "logps/chosen": -270.886474609375, "logps/rejected": -252.22116088867188, "loss": 0.1707, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.3630528151988983, "rewards/margins": 0.13112984597682953, "rewards/rejected": -0.49418267607688904, "step": 3630 }, { "epoch": 0.44, "learning_rate": 3.4633551740888122e-06, "logits/chosen": -2.1135964393615723, "logits/rejected": -1.4086754322052002, "logps/chosen": -322.132568359375, "logps/rejected": -269.0862731933594, "loss": 0.0716, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3996971547603607, "rewards/margins": 0.2393627166748047, "rewards/rejected": -0.639059841632843, "step": 3640 }, { "epoch": 0.44, "learning_rate": 3.4536847767118926e-06, "logits/chosen": -1.9193264245986938, "logits/rejected": -1.5788618326187134, "logps/chosen": -240.32687377929688, "logps/rejected": -219.1731414794922, "loss": 0.1606, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.4481803774833679, "rewards/margins": 0.13924534618854523, "rewards/rejected": -0.587425708770752, "step": 3650 }, { "epoch": 0.44, "learning_rate": 3.443997650502586e-06, "logits/chosen": -1.7943519353866577, "logits/rejected": -1.4917861223220825, "logps/chosen": -238.60647583007812, "logps/rejected": -199.71517944335938, "loss": 0.1366, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.5198334455490112, "rewards/margins": 0.13573278486728668, "rewards/rejected": -0.6555660963058472, "step": 3660 }, { "epoch": 0.44, "learning_rate": 3.434293965385287e-06, "logits/chosen": -1.852033257484436, "logits/rejected": -1.650202989578247, "logps/chosen": -262.7406311035156, "logps/rejected": -232.7073211669922, "loss": 0.1364, "rewards/accuracies": 0.625, "rewards/chosen": -0.40459275245666504, "rewards/margins": 0.13505297899246216, "rewards/rejected": -0.5396457314491272, "step": 3670 }, { "epoch": 0.44, "learning_rate": 3.4245738915748584e-06, "logits/chosen": -2.122192621231079, "logits/rejected": -1.8862508535385132, "logps/chosen": -264.1258544921875, "logps/rejected": -277.42730712890625, "loss": 0.1551, "rewards/accuracies": 0.625, "rewards/chosen": -0.42287102341651917, "rewards/margins": 0.10563336312770844, "rewards/rejected": -0.5285043716430664, "step": 3680 }, { "epoch": 0.44, "learning_rate": 3.4148375995736395e-06, "logits/chosen": -1.9229469299316406, "logits/rejected": -1.524235486984253, "logps/chosen": -297.52252197265625, "logps/rejected": -271.72381591796875, "loss": 0.1194, "rewards/accuracies": 0.75, "rewards/chosen": -0.6332093477249146, "rewards/margins": 0.17999136447906494, "rewards/rejected": -0.8132007718086243, "step": 3690 }, { "epoch": 0.44, "learning_rate": 3.4050852601684563e-06, "logits/chosen": -1.7078931331634521, "logits/rejected": -1.2806559801101685, "logps/chosen": -242.444091796875, "logps/rejected": -239.5855712890625, "loss": 0.1546, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.5890045762062073, "rewards/margins": 0.20271596312522888, "rewards/rejected": -0.7917205095291138, "step": 3700 }, { "epoch": 0.45, "learning_rate": 3.3953170444276283e-06, "logits/chosen": -2.0124032497406006, "logits/rejected": -1.6335124969482422, "logps/chosen": -294.7106628417969, "logps/rejected": -276.8992004394531, "loss": 0.094, "rewards/accuracies": 0.75, "rewards/chosen": -0.5468276143074036, "rewards/margins": 0.18210643529891968, "rewards/rejected": -0.728934109210968, "step": 3710 }, { "epoch": 0.45, "learning_rate": 3.385533123697966e-06, "logits/chosen": -1.6806570291519165, "logits/rejected": -1.582833170890808, "logps/chosen": -250.8227081298828, "logps/rejected": -278.7505187988281, "loss": 0.1042, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.5766712427139282, "rewards/margins": 0.1681402027606964, "rewards/rejected": -0.744811475276947, "step": 3720 }, { "epoch": 0.45, "learning_rate": 3.375733669601763e-06, "logits/chosen": -1.9780842065811157, "logits/rejected": -1.5905098915100098, "logps/chosen": -307.57269287109375, "logps/rejected": -258.7306213378906, "loss": 0.1604, "rewards/accuracies": 0.625, "rewards/chosen": -0.6550931930541992, "rewards/margins": 0.08839500695466995, "rewards/rejected": -0.7434881925582886, "step": 3730 }, { "epoch": 0.45, "learning_rate": 3.3659188540337884e-06, "logits/chosen": -2.0141289234161377, "logits/rejected": -1.7356878519058228, "logps/chosen": -234.14389038085938, "logps/rejected": -253.2290496826172, "loss": 0.1203, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.424830824136734, "rewards/margins": 0.12162800878286362, "rewards/rejected": -0.5464588403701782, "step": 3740 }, { "epoch": 0.45, "learning_rate": 3.3560888491582736e-06, "logits/chosen": -1.877969741821289, "logits/rejected": -1.6804864406585693, "logps/chosen": -217.326171875, "logps/rejected": -254.08627319335938, "loss": 0.1314, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.4663441777229309, "rewards/margins": 0.21716144680976868, "rewards/rejected": -0.683505654335022, "step": 3750 }, { "epoch": 0.45, "learning_rate": 3.3462438274058856e-06, "logits/chosen": -1.7631876468658447, "logits/rejected": -1.5371801853179932, "logps/chosen": -281.08563232421875, "logps/rejected": -305.8494873046875, "loss": 0.1094, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.5819532871246338, "rewards/margins": 0.18844255805015564, "rewards/rejected": -0.770395815372467, "step": 3760 }, { "epoch": 0.45, "learning_rate": 3.3363839614707094e-06, "logits/chosen": -1.8546764850616455, "logits/rejected": -1.6666762828826904, "logps/chosen": -333.0240783691406, "logps/rejected": -356.2004699707031, "loss": 0.1544, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.5481715798377991, "rewards/margins": 0.1419263780117035, "rewards/rejected": -0.6900979280471802, "step": 3770 }, { "epoch": 0.45, "learning_rate": 3.326509424307214e-06, "logits/chosen": -1.9387702941894531, "logits/rejected": -1.7059547901153564, "logps/chosen": -264.86865234375, "logps/rejected": -257.7136535644531, "loss": 0.1748, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.57854825258255, "rewards/margins": 0.1645454317331314, "rewards/rejected": -0.7430936694145203, "step": 3780 }, { "epoch": 0.45, "learning_rate": 3.3166203891272204e-06, "logits/chosen": -2.0821845531463623, "logits/rejected": -1.6988433599472046, "logps/chosen": -336.27447509765625, "logps/rejected": -306.909912109375, "loss": 0.1274, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.48699769377708435, "rewards/margins": 0.19956621527671814, "rewards/rejected": -0.686564028263092, "step": 3790 }, { "epoch": 0.46, "learning_rate": 3.306717029396863e-06, "logits/chosen": -1.900738000869751, "logits/rejected": -1.6325021982192993, "logps/chosen": -303.42596435546875, "logps/rejected": -268.81109619140625, "loss": 0.156, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.5264959335327148, "rewards/margins": 0.08770108968019485, "rewards/rejected": -0.6141969561576843, "step": 3800 }, { "epoch": 0.46, "learning_rate": 3.2967995188335487e-06, "logits/chosen": -2.0487513542175293, "logits/rejected": -1.877976655960083, "logps/chosen": -187.6756134033203, "logps/rejected": -191.01234436035156, "loss": 0.1496, "rewards/accuracies": 0.625, "rewards/chosen": -0.33429086208343506, "rewards/margins": 0.13553500175476074, "rewards/rejected": -0.4698258936405182, "step": 3810 }, { "epoch": 0.46, "learning_rate": 3.2868680314029056e-06, "logits/chosen": -2.096329689025879, "logits/rejected": -1.7965996265411377, "logps/chosen": -288.78558349609375, "logps/rejected": -270.60418701171875, "loss": 0.1086, "rewards/accuracies": 0.625, "rewards/chosen": -0.39450058341026306, "rewards/margins": 0.10876335948705673, "rewards/rejected": -0.5032640099525452, "step": 3820 }, { "epoch": 0.46, "learning_rate": 3.2769227413157346e-06, "logits/chosen": -1.8762671947479248, "logits/rejected": -1.6266272068023682, "logps/chosen": -268.38555908203125, "logps/rejected": -219.3870849609375, "loss": 0.1707, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.29574501514434814, "rewards/margins": 0.18245580792427063, "rewards/rejected": -0.47820085287094116, "step": 3830 }, { "epoch": 0.46, "learning_rate": 3.266963823024951e-06, "logits/chosen": -1.7480850219726562, "logits/rejected": -1.5056158304214478, "logps/chosen": -233.0159149169922, "logps/rejected": -227.66030883789062, "loss": 0.1303, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3971914052963257, "rewards/margins": 0.13690955936908722, "rewards/rejected": -0.5341008901596069, "step": 3840 }, { "epoch": 0.46, "learning_rate": 3.2569914512225294e-06, "logits/chosen": -2.3906702995300293, "logits/rejected": -1.7254350185394287, "logps/chosen": -289.9384460449219, "logps/rejected": -234.61758422851562, "loss": 0.2108, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.31785309314727783, "rewards/margins": 0.15459677577018738, "rewards/rejected": -0.47244992852211, "step": 3850 }, { "epoch": 0.46, "learning_rate": 3.2470058008364335e-06, "logits/chosen": -1.940606713294983, "logits/rejected": -1.567697286605835, "logps/chosen": -300.01751708984375, "logps/rejected": -281.2144775390625, "loss": 0.1124, "rewards/accuracies": 0.75, "rewards/chosen": -0.48682618141174316, "rewards/margins": 0.1738821119070053, "rewards/rejected": -0.6607083082199097, "step": 3860 }, { "epoch": 0.46, "learning_rate": 3.2370070470275493e-06, "logits/chosen": -1.9613168239593506, "logits/rejected": -1.6750881671905518, "logps/chosen": -239.41580200195312, "logps/rejected": -267.15350341796875, "loss": 0.1649, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.514406681060791, "rewards/margins": 0.1650439351797104, "rewards/rejected": -0.6794506311416626, "step": 3870 }, { "epoch": 0.47, "learning_rate": 3.226995365186616e-06, "logits/chosen": -1.8612607717514038, "logits/rejected": -1.55705988407135, "logps/chosen": -228.90213012695312, "logps/rejected": -198.01766967773438, "loss": 0.199, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.4910704493522644, "rewards/margins": 0.11654232442378998, "rewards/rejected": -0.6076127290725708, "step": 3880 }, { "epoch": 0.47, "learning_rate": 3.216970930931144e-06, "logits/chosen": -2.0845413208007812, "logits/rejected": -1.7625007629394531, "logps/chosen": -227.1043243408203, "logps/rejected": -240.2063751220703, "loss": 0.1225, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.4222134053707123, "rewards/margins": 0.11798451095819473, "rewards/rejected": -0.5401979684829712, "step": 3890 }, { "epoch": 0.47, "learning_rate": 3.2069339201023398e-06, "logits/chosen": -2.0071322917938232, "logits/rejected": -1.961846947669983, "logps/chosen": -281.2358703613281, "logps/rejected": -288.28924560546875, "loss": 0.1283, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.5697019100189209, "rewards/margins": 0.07494824379682541, "rewards/rejected": -0.6446502208709717, "step": 3900 }, { "epoch": 0.47, "learning_rate": 3.196884508762016e-06, "logits/chosen": -1.7892014980316162, "logits/rejected": -1.560509204864502, "logps/chosen": -263.98577880859375, "logps/rejected": -225.89773559570312, "loss": 0.2099, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.5703567266464233, "rewards/margins": 0.1076982244849205, "rewards/rejected": -0.6780549883842468, "step": 3910 }, { "epoch": 0.47, "learning_rate": 3.186822873189508e-06, "logits/chosen": -1.8385652303695679, "logits/rejected": -1.474015712738037, "logps/chosen": -245.1987762451172, "logps/rejected": -248.1842498779297, "loss": 0.1091, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.5351457595825195, "rewards/margins": 0.19029943645000458, "rewards/rejected": -0.7254451513290405, "step": 3920 }, { "epoch": 0.47, "learning_rate": 3.1767491898785795e-06, "logits/chosen": -2.093048572540283, "logits/rejected": -1.6442959308624268, "logps/chosen": -260.1977233886719, "logps/rejected": -194.9726104736328, "loss": 0.1783, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.4470372200012207, "rewards/margins": 0.11479449272155762, "rewards/rejected": -0.5618317127227783, "step": 3930 }, { "epoch": 0.47, "learning_rate": 3.166663635534325e-06, "logits/chosen": -1.9069023132324219, "logits/rejected": -1.7728145122528076, "logps/chosen": -251.0937957763672, "logps/rejected": -275.21026611328125, "loss": 0.0791, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.4740324914455414, "rewards/margins": 0.09531258046627045, "rewards/rejected": -0.5693451166152954, "step": 3940 }, { "epoch": 0.47, "learning_rate": 3.1565663870700735e-06, "logits/chosen": -1.7941697835922241, "logits/rejected": -1.6212133169174194, "logps/chosen": -257.2917785644531, "logps/rejected": -283.5328369140625, "loss": 0.1458, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.6217584609985352, "rewards/margins": 0.12172119319438934, "rewards/rejected": -0.7434796690940857, "step": 3950 }, { "epoch": 0.48, "learning_rate": 3.1464576216042832e-06, "logits/chosen": -1.9793428182601929, "logits/rejected": -1.592930555343628, "logps/chosen": -305.0171813964844, "logps/rejected": -260.90753173828125, "loss": 0.1064, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.6241774559020996, "rewards/margins": 0.1830337643623352, "rewards/rejected": -0.8072112202644348, "step": 3960 }, { "epoch": 0.48, "learning_rate": 3.1363375164574343e-06, "logits/chosen": -1.9784101247787476, "logits/rejected": -1.7758643627166748, "logps/chosen": -251.56320190429688, "logps/rejected": -248.0037384033203, "loss": 0.1837, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.556761622428894, "rewards/margins": 0.11395516246557236, "rewards/rejected": -0.6707167625427246, "step": 3970 }, { "epoch": 0.48, "learning_rate": 3.126206249148921e-06, "logits/chosen": -1.7647323608398438, "logits/rejected": -1.493837594985962, "logps/chosen": -300.1401062011719, "logps/rejected": -318.6617431640625, "loss": 0.1218, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.6313523054122925, "rewards/margins": 0.19563212990760803, "rewards/rejected": -0.8269845247268677, "step": 3980 }, { "epoch": 0.48, "learning_rate": 3.1160639973939337e-06, "logits/chosen": -2.1163768768310547, "logits/rejected": -1.8096988201141357, "logps/chosen": -311.0527038574219, "logps/rejected": -290.04046630859375, "loss": 0.2148, "rewards/accuracies": 0.5, "rewards/chosen": -0.5415524244308472, "rewards/margins": 0.08402875810861588, "rewards/rejected": -0.6255810856819153, "step": 3990 }, { "epoch": 0.48, "learning_rate": 3.105910939100345e-06, "logits/chosen": -2.17586088180542, "logits/rejected": -1.680841088294983, "logps/chosen": -301.86749267578125, "logps/rejected": -298.6219177246094, "loss": 0.1436, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.5832773447036743, "rewards/margins": 0.17088885605335236, "rewards/rejected": -0.7541662454605103, "step": 4000 }, { "epoch": 0.48, "learning_rate": 3.095747252365588e-06, "logits/chosen": -1.8582245111465454, "logits/rejected": -1.5364271402359009, "logps/chosen": -283.0442810058594, "logps/rejected": -277.89788818359375, "loss": 0.1558, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.5722718834877014, "rewards/margins": 0.09478892385959625, "rewards/rejected": -0.6670608520507812, "step": 4010 }, { "epoch": 0.48, "learning_rate": 3.0855731154735326e-06, "logits/chosen": -1.6970354318618774, "logits/rejected": -1.434828281402588, "logps/chosen": -237.4835662841797, "logps/rejected": -244.4088134765625, "loss": 0.1824, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.6819806694984436, "rewards/margins": 0.17408792674541473, "rewards/rejected": -0.8560686111450195, "step": 4020 }, { "epoch": 0.48, "learning_rate": 3.0753887068913545e-06, "logits/chosen": -1.896554946899414, "logits/rejected": -1.6122684478759766, "logps/chosen": -268.3304748535156, "logps/rejected": -254.9834747314453, "loss": 0.1759, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.5617300271987915, "rewards/margins": 0.12652353942394257, "rewards/rejected": -0.6882535219192505, "step": 4030 }, { "epoch": 0.48, "learning_rate": 3.0651942052664117e-06, "logits/chosen": -1.7737739086151123, "logits/rejected": -1.4258487224578857, "logps/chosen": -292.5776672363281, "logps/rejected": -271.95257568359375, "loss": 0.1443, "rewards/accuracies": 0.625, "rewards/chosen": -0.5280343890190125, "rewards/margins": 0.165075421333313, "rewards/rejected": -0.6931098699569702, "step": 4040 }, { "epoch": 0.49, "learning_rate": 3.0549897894231058e-06, "logits/chosen": -1.9711172580718994, "logits/rejected": -1.7488377094268799, "logps/chosen": -313.0440368652344, "logps/rejected": -290.74798583984375, "loss": 0.0978, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.5002248883247375, "rewards/margins": 0.10817272961139679, "rewards/rejected": -0.6083976626396179, "step": 4050 }, { "epoch": 0.49, "learning_rate": 3.0447756383597438e-06, "logits/chosen": -1.9547706842422485, "logits/rejected": -1.4798098802566528, "logps/chosen": -224.11904907226562, "logps/rejected": -188.64633178710938, "loss": 0.1618, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.5285110473632812, "rewards/margins": 0.14819425344467163, "rewards/rejected": -0.6767052412033081, "step": 4060 }, { "epoch": 0.49, "learning_rate": 3.034551931245404e-06, "logits/chosen": -1.902276635169983, "logits/rejected": -1.4849553108215332, "logps/chosen": -358.1360168457031, "logps/rejected": -280.324951171875, "loss": 0.1335, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.5318064093589783, "rewards/margins": 0.16225329041481018, "rewards/rejected": -0.6940596699714661, "step": 4070 }, { "epoch": 0.49, "learning_rate": 3.0243188474167884e-06, "logits/chosen": -1.960026502609253, "logits/rejected": -1.5699583292007446, "logps/chosen": -248.52590942382812, "logps/rejected": -228.6545867919922, "loss": 0.1788, "rewards/accuracies": 0.625, "rewards/chosen": -0.39242681860923767, "rewards/margins": 0.1711881309747696, "rewards/rejected": -0.5636149644851685, "step": 4080 }, { "epoch": 0.49, "learning_rate": 3.014076566375078e-06, "logits/chosen": -2.0379650592803955, "logits/rejected": -1.8301384449005127, "logps/chosen": -295.1788635253906, "logps/rejected": -255.16983032226562, "loss": 0.1771, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.46374598145484924, "rewards/margins": 0.09922705590724945, "rewards/rejected": -0.5629730224609375, "step": 4090 }, { "epoch": 0.49, "learning_rate": 3.003825267782785e-06, "logits/chosen": -2.1784117221832275, "logits/rejected": -1.8092330694198608, "logps/chosen": -222.7117156982422, "logps/rejected": -212.63888549804688, "loss": 0.082, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3608611524105072, "rewards/margins": 0.22283951938152313, "rewards/rejected": -0.5837006568908691, "step": 4100 }, { "epoch": 0.49, "learning_rate": 2.993565131460602e-06, "logits/chosen": -1.8552563190460205, "logits/rejected": -1.5919392108917236, "logps/chosen": -258.01837158203125, "logps/rejected": -256.6094665527344, "loss": 0.1408, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.39110246300697327, "rewards/margins": 0.14050598442554474, "rewards/rejected": -0.5316083431243896, "step": 4110 }, { "epoch": 0.49, "learning_rate": 2.9832963373842434e-06, "logits/chosen": -1.8685518503189087, "logits/rejected": -1.6710237264633179, "logps/chosen": -225.02157592773438, "logps/rejected": -231.95175170898438, "loss": 0.1088, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.4149894714355469, "rewards/margins": 0.11046002060174942, "rewards/rejected": -0.5254494547843933, "step": 4120 }, { "epoch": 0.5, "learning_rate": 2.973019065681294e-06, "logits/chosen": -1.994270920753479, "logits/rejected": -1.5915766954421997, "logps/chosen": -237.4359893798828, "logps/rejected": -215.31338500976562, "loss": 0.1742, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.4488137364387512, "rewards/margins": 0.1414080411195755, "rewards/rejected": -0.5902218222618103, "step": 4130 }, { "epoch": 0.5, "learning_rate": 2.9627334966280474e-06, "logits/chosen": -2.0599796772003174, "logits/rejected": -1.6930656433105469, "logps/chosen": -272.7281799316406, "logps/rejected": -230.94287109375, "loss": 0.1445, "rewards/accuracies": 0.625, "rewards/chosen": -0.4368261396884918, "rewards/margins": 0.08637617528438568, "rewards/rejected": -0.5232023000717163, "step": 4140 }, { "epoch": 0.5, "learning_rate": 2.952439810646341e-06, "logits/chosen": -1.9677894115447998, "logits/rejected": -1.6096127033233643, "logps/chosen": -250.97988891601562, "logps/rejected": -230.15469360351562, "loss": 0.1327, "rewards/accuracies": 0.625, "rewards/chosen": -0.4640297293663025, "rewards/margins": 0.13882431387901306, "rewards/rejected": -0.6028540134429932, "step": 4150 }, { "epoch": 0.5, "learning_rate": 2.942138188300394e-06, "logits/chosen": -1.9296554327011108, "logits/rejected": -1.5421892404556274, "logps/chosen": -257.2073974609375, "logps/rejected": -258.81756591796875, "loss": 0.1773, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.5215272307395935, "rewards/margins": 0.17221274971961975, "rewards/rejected": -0.6937400102615356, "step": 4160 }, { "epoch": 0.5, "learning_rate": 2.931828810293642e-06, "logits/chosen": -2.06691312789917, "logits/rejected": -1.559309720993042, "logps/chosen": -252.13491821289062, "logps/rejected": -233.42626953125, "loss": 0.1603, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.46336793899536133, "rewards/margins": 0.18448057770729065, "rewards/rejected": -0.6478484869003296, "step": 4170 }, { "epoch": 0.5, "learning_rate": 2.92151185746556e-06, "logits/chosen": -1.9915742874145508, "logits/rejected": -1.6672182083129883, "logps/chosen": -277.6402893066406, "logps/rejected": -279.9922790527344, "loss": 0.1693, "rewards/accuracies": 0.625, "rewards/chosen": -0.5202735066413879, "rewards/margins": 0.10429404675960541, "rewards/rejected": -0.6245675683021545, "step": 4180 }, { "epoch": 0.5, "learning_rate": 2.911187510788498e-06, "logits/chosen": -1.9717572927474976, "logits/rejected": -1.7132648229599, "logps/chosen": -282.0147399902344, "logps/rejected": -253.1018524169922, "loss": 0.1024, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.41674551367759705, "rewards/margins": 0.13815635442733765, "rewards/rejected": -0.5549019575119019, "step": 4190 }, { "epoch": 0.5, "learning_rate": 2.9008559513645033e-06, "logits/chosen": -1.9843193292617798, "logits/rejected": -1.7526146173477173, "logps/chosen": -264.8548278808594, "logps/rejected": -241.67941284179688, "loss": 0.1593, "rewards/accuracies": 0.625, "rewards/chosen": -0.43674373626708984, "rewards/margins": 0.1286846250295639, "rewards/rejected": -0.5654283761978149, "step": 4200 }, { "epoch": 0.51, "learning_rate": 2.890517360422144e-06, "logits/chosen": -1.9424211978912354, "logits/rejected": -1.694551706314087, "logps/chosen": -256.5013732910156, "logps/rejected": -248.4475860595703, "loss": 0.1429, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.4047975540161133, "rewards/margins": 0.13096585869789124, "rewards/rejected": -0.5357634425163269, "step": 4210 }, { "epoch": 0.51, "learning_rate": 2.880171919313327e-06, "logits/chosen": -1.9947586059570312, "logits/rejected": -1.516392469406128, "logps/chosen": -261.6585388183594, "logps/rejected": -189.2516632080078, "loss": 0.0992, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3641483783721924, "rewards/margins": 0.1547776162624359, "rewards/rejected": -0.5189260244369507, "step": 4220 }, { "epoch": 0.51, "learning_rate": 2.869819809510125e-06, "logits/chosen": -1.9108898639678955, "logits/rejected": -1.6370693445205688, "logps/chosen": -228.65298461914062, "logps/rejected": -231.56832885742188, "loss": 0.1458, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.44458943605422974, "rewards/margins": 0.1396978348493576, "rewards/rejected": -0.5842872858047485, "step": 4230 }, { "epoch": 0.51, "learning_rate": 2.8594612126015825e-06, "logits/chosen": -2.0889339447021484, "logits/rejected": -1.7717602252960205, "logps/chosen": -264.8407897949219, "logps/rejected": -302.21026611328125, "loss": 0.1162, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3832109570503235, "rewards/margins": 0.19960837066173553, "rewards/rejected": -0.5828193426132202, "step": 4240 }, { "epoch": 0.51, "learning_rate": 2.84909631029054e-06, "logits/chosen": -1.8553768396377563, "logits/rejected": -1.621514081954956, "logps/chosen": -263.96429443359375, "logps/rejected": -288.17620849609375, "loss": 0.1611, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.6084156036376953, "rewards/margins": 0.16580604016780853, "rewards/rejected": -0.7742215991020203, "step": 4250 }, { "epoch": 0.51, "learning_rate": 2.838725284390441e-06, "logits/chosen": -1.8268849849700928, "logits/rejected": -1.6455726623535156, "logps/chosen": -269.8697204589844, "logps/rejected": -297.85150146484375, "loss": 0.112, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.5495954751968384, "rewards/margins": 0.19235818088054657, "rewards/rejected": -0.7419536113739014, "step": 4260 }, { "epoch": 0.51, "learning_rate": 2.828348316822144e-06, "logits/chosen": -1.8502800464630127, "logits/rejected": -1.6670821905136108, "logps/chosen": -209.65640258789062, "logps/rejected": -273.5523986816406, "loss": 0.113, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.46271175146102905, "rewards/margins": 0.1825498640537262, "rewards/rejected": -0.6452616453170776, "step": 4270 }, { "epoch": 0.51, "learning_rate": 2.817965589610733e-06, "logits/chosen": -1.8152210712432861, "logits/rejected": -1.5677975416183472, "logps/chosen": -217.03182983398438, "logps/rejected": -245.7638702392578, "loss": 0.134, "rewards/accuracies": 0.625, "rewards/chosen": -0.5718420743942261, "rewards/margins": 0.1416967660188675, "rewards/rejected": -0.7135388255119324, "step": 4280 }, { "epoch": 0.51, "learning_rate": 2.807577284882324e-06, "logits/chosen": -1.8648059368133545, "logits/rejected": -1.4172070026397705, "logps/chosen": -214.18099975585938, "logps/rejected": -227.8682861328125, "loss": 0.1346, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.47108086943626404, "rewards/margins": 0.21896126866340637, "rewards/rejected": -0.6900421380996704, "step": 4290 }, { "epoch": 0.52, "learning_rate": 2.797183584860867e-06, "logits/chosen": -1.9184010028839111, "logits/rejected": -1.5958585739135742, "logps/chosen": -201.8664093017578, "logps/rejected": -200.3294219970703, "loss": 0.1953, "rewards/accuracies": 0.625, "rewards/chosen": -0.3741530776023865, "rewards/margins": 0.12563326954841614, "rewards/rejected": -0.499786376953125, "step": 4300 }, { "epoch": 0.52, "learning_rate": 2.7867846718649538e-06, "logits/chosen": -1.7121245861053467, "logits/rejected": -1.4752973318099976, "logps/chosen": -242.7071533203125, "logps/rejected": -287.45587158203125, "loss": 0.1076, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.39267921447753906, "rewards/margins": 0.21084478497505188, "rewards/rejected": -0.6035240292549133, "step": 4310 }, { "epoch": 0.52, "learning_rate": 2.7763807283046195e-06, "logits/chosen": -2.0703561305999756, "logits/rejected": -1.8529870510101318, "logps/chosen": -213.96029663085938, "logps/rejected": -224.46817016601562, "loss": 0.1417, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.37636810541152954, "rewards/margins": 0.15098202228546143, "rewards/rejected": -0.527350127696991, "step": 4320 }, { "epoch": 0.52, "learning_rate": 2.76597193667814e-06, "logits/chosen": -2.061995029449463, "logits/rejected": -1.6790978908538818, "logps/chosen": -291.52508544921875, "logps/rejected": -278.72088623046875, "loss": 0.1316, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.35219401121139526, "rewards/margins": 0.10760994255542755, "rewards/rejected": -0.45980390906333923, "step": 4330 }, { "epoch": 0.52, "learning_rate": 2.7555584795688328e-06, "logits/chosen": -1.9189672470092773, "logits/rejected": -1.6146653890609741, "logps/chosen": -249.65347290039062, "logps/rejected": -247.68115234375, "loss": 0.1578, "rewards/accuracies": 0.75, "rewards/chosen": -0.36561039090156555, "rewards/margins": 0.15556563436985016, "rewards/rejected": -0.5211759805679321, "step": 4340 }, { "epoch": 0.52, "learning_rate": 2.7451405396418544e-06, "logits/chosen": -1.940999984741211, "logits/rejected": -1.4690072536468506, "logps/chosen": -244.7572021484375, "logps/rejected": -207.64208984375, "loss": 0.1639, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.38119882345199585, "rewards/margins": 0.1026213988661766, "rewards/rejected": -0.48382019996643066, "step": 4350 }, { "epoch": 0.52, "learning_rate": 2.734718299640994e-06, "logits/chosen": -2.1663918495178223, "logits/rejected": -1.8815300464630127, "logps/chosen": -250.9813232421875, "logps/rejected": -251.88296508789062, "loss": 0.1574, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.36953067779541016, "rewards/margins": 0.12235681712627411, "rewards/rejected": -0.4918874204158783, "step": 4360 }, { "epoch": 0.52, "learning_rate": 2.724291942385472e-06, "logits/chosen": -2.146113395690918, "logits/rejected": -1.609834909439087, "logps/chosen": -304.42132568359375, "logps/rejected": -268.470947265625, "loss": 0.0921, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.35676488280296326, "rewards/margins": 0.1696079671382904, "rewards/rejected": -0.5263728499412537, "step": 4370 }, { "epoch": 0.53, "learning_rate": 2.713861650766729e-06, "logits/chosen": -1.9884231090545654, "logits/rejected": -1.5890326499938965, "logps/chosen": -239.7928924560547, "logps/rejected": -229.9420623779297, "loss": 0.1299, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.38828951120376587, "rewards/margins": 0.1986490935087204, "rewards/rejected": -0.5869385600090027, "step": 4380 }, { "epoch": 0.53, "learning_rate": 2.703427607745219e-06, "logits/chosen": -2.1583807468414307, "logits/rejected": -1.7095534801483154, "logps/chosen": -290.33868408203125, "logps/rejected": -269.4275817871094, "loss": 0.1828, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3558133542537689, "rewards/margins": 0.13851602375507355, "rewards/rejected": -0.4943293631076813, "step": 4390 }, { "epoch": 0.53, "learning_rate": 2.6929899963472005e-06, "logits/chosen": -1.947405219078064, "logits/rejected": -1.5273383855819702, "logps/chosen": -235.3554229736328, "logps/rejected": -215.59786987304688, "loss": 0.1119, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.35831964015960693, "rewards/margins": 0.1973123699426651, "rewards/rejected": -0.5556319952011108, "step": 4400 }, { "epoch": 0.53, "learning_rate": 2.6825489996615278e-06, "logits/chosen": -1.8226381540298462, "logits/rejected": -1.5650604963302612, "logps/chosen": -223.3660888671875, "logps/rejected": -215.26327514648438, "loss": 0.1294, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.4000251293182373, "rewards/margins": 0.13242687284946442, "rewards/rejected": -0.5324519872665405, "step": 4410 }, { "epoch": 0.53, "learning_rate": 2.6721048008364343e-06, "logits/chosen": -1.968601942062378, "logits/rejected": -1.527261734008789, "logps/chosen": -263.6767578125, "logps/rejected": -242.66275024414062, "loss": 0.1512, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.357761949300766, "rewards/margins": 0.21579334139823914, "rewards/rejected": -0.5735553503036499, "step": 4420 }, { "epoch": 0.53, "learning_rate": 2.6616575830763247e-06, "logits/chosen": -2.044994831085205, "logits/rejected": -1.5942163467407227, "logps/chosen": -242.64852905273438, "logps/rejected": -245.306640625, "loss": 0.1581, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3358486294746399, "rewards/margins": 0.13670720160007477, "rewards/rejected": -0.47255581617355347, "step": 4430 }, { "epoch": 0.53, "learning_rate": 2.651207529638561e-06, "logits/chosen": -1.7535009384155273, "logits/rejected": -1.373928189277649, "logps/chosen": -261.17889404296875, "logps/rejected": -223.257080078125, "loss": 0.116, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.29363709688186646, "rewards/margins": 0.1494956910610199, "rewards/rejected": -0.44313281774520874, "step": 4440 }, { "epoch": 0.53, "learning_rate": 2.640754823830242e-06, "logits/chosen": -2.192082405090332, "logits/rejected": -1.9141347408294678, "logps/chosen": -312.79693603515625, "logps/rejected": -237.3318634033203, "loss": 0.1115, "rewards/accuracies": 0.625, "rewards/chosen": -0.32631200551986694, "rewards/margins": 0.0922769159078598, "rewards/rejected": -0.4185889661312103, "step": 4450 }, { "epoch": 0.54, "learning_rate": 2.6302996490049983e-06, "logits/chosen": -2.042506456375122, "logits/rejected": -1.6036508083343506, "logps/chosen": -254.78213500976562, "logps/rejected": -255.03018188476562, "loss": 0.1262, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.35550904273986816, "rewards/margins": 0.10495875775814056, "rewards/rejected": -0.4604678153991699, "step": 4460 }, { "epoch": 0.54, "learning_rate": 2.619842188559765e-06, "logits/chosen": -1.9425427913665771, "logits/rejected": -1.5557024478912354, "logps/chosen": -186.65274047851562, "logps/rejected": -203.18185424804688, "loss": 0.1508, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.35145506262779236, "rewards/margins": 0.2223198413848877, "rewards/rejected": -0.5737749338150024, "step": 4470 }, { "epoch": 0.54, "learning_rate": 2.609382625931575e-06, "logits/chosen": -1.9245996475219727, "logits/rejected": -1.6936094760894775, "logps/chosen": -244.86160278320312, "logps/rejected": -269.80584716796875, "loss": 0.1606, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.4344099164009094, "rewards/margins": 0.16771261394023895, "rewards/rejected": -0.6021225452423096, "step": 4480 }, { "epoch": 0.54, "learning_rate": 2.59892114459433e-06, "logits/chosen": -1.9008939266204834, "logits/rejected": -1.969109296798706, "logps/chosen": -238.5630340576172, "logps/rejected": -280.3179931640625, "loss": 0.1689, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.40993937849998474, "rewards/margins": 0.12407805770635605, "rewards/rejected": -0.5340174436569214, "step": 4490 }, { "epoch": 0.54, "learning_rate": 2.588457928055592e-06, "logits/chosen": -1.6659586429595947, "logits/rejected": -1.2960065603256226, "logps/chosen": -255.4331512451172, "logps/rejected": -236.24594116210938, "loss": 0.1008, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.39769870042800903, "rewards/margins": 0.24053025245666504, "rewards/rejected": -0.6382290124893188, "step": 4500 }, { "epoch": 0.54, "learning_rate": 2.5779931598533624e-06, "logits/chosen": -1.9211695194244385, "logits/rejected": -1.555855393409729, "logps/chosen": -265.3733215332031, "logps/rejected": -246.8184814453125, "loss": 0.1628, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.4115406572818756, "rewards/margins": 0.12964625656604767, "rewards/rejected": -0.5411869287490845, "step": 4510 }, { "epoch": 0.54, "learning_rate": 2.567527023552857e-06, "logits/chosen": -1.9409809112548828, "logits/rejected": -1.6917556524276733, "logps/chosen": -309.97760009765625, "logps/rejected": -264.5470886230469, "loss": 0.0704, "rewards/accuracies": 0.75, "rewards/chosen": -0.4299238324165344, "rewards/margins": 0.14857104420661926, "rewards/rejected": -0.5784948468208313, "step": 4520 }, { "epoch": 0.54, "learning_rate": 2.5570597027432907e-06, "logits/chosen": -1.9963619709014893, "logits/rejected": -1.5232570171356201, "logps/chosen": -248.9495391845703, "logps/rejected": -209.61312866210938, "loss": 0.1608, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.4026300311088562, "rewards/margins": 0.1366431713104248, "rewards/rejected": -0.5392731428146362, "step": 4530 }, { "epoch": 0.54, "learning_rate": 2.5465913810346575e-06, "logits/chosen": -1.7939636707305908, "logits/rejected": -1.6163822412490845, "logps/chosen": -263.76019287109375, "logps/rejected": -286.8879699707031, "loss": 0.1451, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.4445052146911621, "rewards/margins": 0.14057457447052002, "rewards/rejected": -0.5850798487663269, "step": 4540 }, { "epoch": 0.55, "learning_rate": 2.536122242054507e-06, "logits/chosen": -1.9959796667099, "logits/rejected": -1.377286672592163, "logps/chosen": -249.66006469726562, "logps/rejected": -221.0861358642578, "loss": 0.1297, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3975772261619568, "rewards/margins": 0.19428391754627228, "rewards/rejected": -0.5918611288070679, "step": 4550 }, { "epoch": 0.55, "learning_rate": 2.525652469444727e-06, "logits/chosen": -2.10296368598938, "logits/rejected": -1.6731036901474, "logps/chosen": -211.2078857421875, "logps/rejected": -196.8761444091797, "loss": 0.1252, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.3557474613189697, "rewards/margins": 0.14848320186138153, "rewards/rejected": -0.5042306184768677, "step": 4560 }, { "epoch": 0.55, "learning_rate": 2.5151822468583165e-06, "logits/chosen": -1.8910295963287354, "logits/rejected": -1.441156268119812, "logps/chosen": -235.1863250732422, "logps/rejected": -203.9948272705078, "loss": 0.0839, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3970240652561188, "rewards/margins": 0.21173615753650665, "rewards/rejected": -0.6087601780891418, "step": 4570 }, { "epoch": 0.55, "learning_rate": 2.5047117579561703e-06, "logits/chosen": -1.858319878578186, "logits/rejected": -1.6645057201385498, "logps/chosen": -318.26129150390625, "logps/rejected": -315.4615478515625, "loss": 0.1405, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.5496889352798462, "rewards/margins": 0.14965248107910156, "rewards/rejected": -0.6993414163589478, "step": 4580 }, { "epoch": 0.55, "learning_rate": 2.494241186403854e-06, "logits/chosen": -2.0156023502349854, "logits/rejected": -1.8666023015975952, "logps/chosen": -204.9974365234375, "logps/rejected": -199.46676635742188, "loss": 0.2009, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.38789471983909607, "rewards/margins": 0.09470699727535248, "rewards/rejected": -0.48260173201560974, "step": 4590 }, { "epoch": 0.55, "learning_rate": 2.4837707158683833e-06, "logits/chosen": -1.733515739440918, "logits/rejected": -1.5892311334609985, "logps/chosen": -248.87283325195312, "logps/rejected": -265.2043762207031, "loss": 0.1352, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.5136191248893738, "rewards/margins": 0.11203992366790771, "rewards/rejected": -0.6256589889526367, "step": 4600 }, { "epoch": 0.55, "learning_rate": 2.473300530015e-06, "logits/chosen": -2.1996002197265625, "logits/rejected": -1.857081651687622, "logps/chosen": -279.1620788574219, "logps/rejected": -275.622314453125, "loss": 0.1569, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.3480737805366516, "rewards/margins": 0.11938655376434326, "rewards/rejected": -0.4674603343009949, "step": 4610 }, { "epoch": 0.55, "learning_rate": 2.4628308125039557e-06, "logits/chosen": -1.8926032781600952, "logits/rejected": -1.5367896556854248, "logps/chosen": -305.69732666015625, "logps/rejected": -276.2022705078125, "loss": 0.1795, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.41959348320961, "rewards/margins": 0.16269411146640778, "rewards/rejected": -0.582287609577179, "step": 4620 }, { "epoch": 0.56, "learning_rate": 2.452361746987284e-06, "logits/chosen": -1.8755619525909424, "logits/rejected": -1.7180248498916626, "logps/chosen": -267.4087829589844, "logps/rejected": -282.9756774902344, "loss": 0.0926, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.5185616612434387, "rewards/margins": 0.11375057697296143, "rewards/rejected": -0.6323122978210449, "step": 4630 }, { "epoch": 0.56, "learning_rate": 2.4418935171055818e-06, "logits/chosen": -1.9167985916137695, "logits/rejected": -1.6408929824829102, "logps/chosen": -202.68295288085938, "logps/rejected": -215.5853729248047, "loss": 0.1187, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.41076311469078064, "rewards/margins": 0.12577161192893982, "rewards/rejected": -0.5365347266197205, "step": 4640 }, { "epoch": 0.56, "learning_rate": 2.43142630648479e-06, "logits/chosen": -1.941982626914978, "logits/rejected": -1.627986192703247, "logps/chosen": -308.0236511230469, "logps/rejected": -356.6058349609375, "loss": 0.0804, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.5076587796211243, "rewards/margins": 0.15822356939315796, "rewards/rejected": -0.665882408618927, "step": 4650 }, { "epoch": 0.56, "learning_rate": 2.4209602987329685e-06, "logits/chosen": -1.7499468326568604, "logits/rejected": -1.2955760955810547, "logps/chosen": -229.9163055419922, "logps/rejected": -189.3719024658203, "loss": 0.0907, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.426525354385376, "rewards/margins": 0.18848739564418793, "rewards/rejected": -0.6150127649307251, "step": 4660 }, { "epoch": 0.56, "learning_rate": 2.410495677437076e-06, "logits/chosen": -1.9118763208389282, "logits/rejected": -1.8392162322998047, "logps/chosen": -226.6531219482422, "logps/rejected": -220.9828338623047, "loss": 0.1856, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3598722815513611, "rewards/margins": 0.13947324454784393, "rewards/rejected": -0.49934548139572144, "step": 4670 }, { "epoch": 0.56, "learning_rate": 2.400032626159756e-06, "logits/chosen": -1.9028289318084717, "logits/rejected": -1.7155911922454834, "logps/chosen": -224.9667510986328, "logps/rejected": -226.68728637695312, "loss": 0.1633, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3874308168888092, "rewards/margins": 0.13661661744117737, "rewards/rejected": -0.5240474343299866, "step": 4680 }, { "epoch": 0.56, "learning_rate": 2.3895713284361065e-06, "logits/chosen": -2.0748324394226074, "logits/rejected": -1.5768488645553589, "logps/chosen": -245.1274871826172, "logps/rejected": -222.86328125, "loss": 0.1049, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3151063919067383, "rewards/margins": 0.19956035912036896, "rewards/rejected": -0.514666736125946, "step": 4690 }, { "epoch": 0.56, "learning_rate": 2.3791119677704676e-06, "logits/chosen": -2.194417715072632, "logits/rejected": -1.6311323642730713, "logps/chosen": -287.04107666015625, "logps/rejected": -247.39163208007812, "loss": 0.1247, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.31668201088905334, "rewards/margins": 0.18119558691978455, "rewards/rejected": -0.4978775978088379, "step": 4700 }, { "epoch": 0.57, "learning_rate": 2.3686547276332046e-06, "logits/chosen": -2.08101487159729, "logits/rejected": -1.6385080814361572, "logps/chosen": -264.02655029296875, "logps/rejected": -233.8690948486328, "loss": 0.1344, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.40405863523483276, "rewards/margins": 0.16924302279949188, "rewards/rejected": -0.5733017325401306, "step": 4710 }, { "epoch": 0.57, "learning_rate": 2.3581997914574807e-06, "logits/chosen": -1.9531478881835938, "logits/rejected": -1.559780240058899, "logps/chosen": -237.64059448242188, "logps/rejected": -223.76565551757812, "loss": 0.1204, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.39436954259872437, "rewards/margins": 0.16357873380184174, "rewards/rejected": -0.5579482913017273, "step": 4720 }, { "epoch": 0.57, "learning_rate": 2.3477473426360463e-06, "logits/chosen": -2.1687614917755127, "logits/rejected": -1.6831060647964478, "logps/chosen": -255.7380828857422, "logps/rejected": -224.1558837890625, "loss": 0.1026, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3829661011695862, "rewards/margins": 0.23224039375782013, "rewards/rejected": -0.6152064800262451, "step": 4730 }, { "epoch": 0.57, "learning_rate": 2.337297564518024e-06, "logits/chosen": -2.1666550636291504, "logits/rejected": -1.8726260662078857, "logps/chosen": -303.0262451171875, "logps/rejected": -272.85894775390625, "loss": 0.1091, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3961569666862488, "rewards/margins": 0.1607007533311844, "rewards/rejected": -0.556857705116272, "step": 4740 }, { "epoch": 0.57, "learning_rate": 2.326850640405684e-06, "logits/chosen": -1.865952730178833, "logits/rejected": -1.3388252258300781, "logps/chosen": -331.45611572265625, "logps/rejected": -297.70074462890625, "loss": 0.0822, "rewards/accuracies": 0.875, "rewards/chosen": -0.35268011689186096, "rewards/margins": 0.2889634668827057, "rewards/rejected": -0.6416435837745667, "step": 4750 }, { "epoch": 0.57, "learning_rate": 2.3164067535512353e-06, "logits/chosen": -1.8777456283569336, "logits/rejected": -1.4981722831726074, "logps/chosen": -285.3711853027344, "logps/rejected": -237.86270141601562, "loss": 0.125, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.4790197014808655, "rewards/margins": 0.15934725105762482, "rewards/rejected": -0.6383669972419739, "step": 4760 }, { "epoch": 0.57, "learning_rate": 2.3059660871536123e-06, "logits/chosen": -1.6866099834442139, "logits/rejected": -1.4525415897369385, "logps/chosen": -237.55563354492188, "logps/rejected": -251.5872039794922, "loss": 0.1271, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.5034652948379517, "rewards/margins": 0.1651829183101654, "rewards/rejected": -0.6686481833457947, "step": 4770 }, { "epoch": 0.57, "learning_rate": 2.2955288243552543e-06, "logits/chosen": -2.0782809257507324, "logits/rejected": -1.6525154113769531, "logps/chosen": -335.6650085449219, "logps/rejected": -239.81674194335938, "loss": 0.1245, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.4862341284751892, "rewards/margins": 0.12166903167963028, "rewards/rejected": -0.6079031229019165, "step": 4780 }, { "epoch": 0.57, "learning_rate": 2.285095148238899e-06, "logits/chosen": -1.9941129684448242, "logits/rejected": -1.7789087295532227, "logps/chosen": -281.5749206542969, "logps/rejected": -266.099609375, "loss": 0.1526, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.4684416651725769, "rewards/margins": 0.17475393414497375, "rewards/rejected": -0.643195629119873, "step": 4790 }, { "epoch": 0.58, "learning_rate": 2.2746652418243714e-06, "logits/chosen": -2.0029962062835693, "logits/rejected": -1.7494831085205078, "logps/chosen": -326.5981140136719, "logps/rejected": -310.0174255371094, "loss": 0.0976, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.5547425150871277, "rewards/margins": 0.10356296598911285, "rewards/rejected": -0.6583055257797241, "step": 4800 }, { "epoch": 0.58, "learning_rate": 2.2642392880653677e-06, "logits/chosen": -1.9393142461776733, "logits/rejected": -1.916164755821228, "logps/chosen": -261.62750244140625, "logps/rejected": -236.9197235107422, "loss": 0.1388, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.49896836280822754, "rewards/margins": 0.07649464905261993, "rewards/rejected": -0.5754629969596863, "step": 4810 }, { "epoch": 0.58, "learning_rate": 2.25381746984625e-06, "logits/chosen": -1.9654737710952759, "logits/rejected": -1.6573346853256226, "logps/chosen": -262.6412658691406, "logps/rejected": -285.51361083984375, "loss": 0.1336, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.49716243147850037, "rewards/margins": 0.175074964761734, "rewards/rejected": -0.6722373366355896, "step": 4820 }, { "epoch": 0.58, "learning_rate": 2.2433999699788404e-06, "logits/chosen": -2.004723072052002, "logits/rejected": -1.700979471206665, "logps/chosen": -265.5523986816406, "logps/rejected": -228.4165496826172, "loss": 0.124, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.5388234257698059, "rewards/margins": 0.11548347771167755, "rewards/rejected": -0.654306948184967, "step": 4830 }, { "epoch": 0.58, "learning_rate": 2.2329869711992093e-06, "logits/chosen": -1.9097673892974854, "logits/rejected": -1.7621214389801025, "logps/chosen": -229.69906616210938, "logps/rejected": -267.36273193359375, "loss": 0.119, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.5158060789108276, "rewards/margins": 0.14432090520858765, "rewards/rejected": -0.6601270437240601, "step": 4840 }, { "epoch": 0.58, "learning_rate": 2.2225786561644724e-06, "logits/chosen": -1.7414562702178955, "logits/rejected": -1.63266921043396, "logps/chosen": -258.82373046875, "logps/rejected": -264.71661376953125, "loss": 0.0976, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.5453828573226929, "rewards/margins": 0.16923405230045319, "rewards/rejected": -0.7146168351173401, "step": 4850 }, { "epoch": 0.58, "learning_rate": 2.212175207449589e-06, "logits/chosen": -1.9261242151260376, "logits/rejected": -1.4300401210784912, "logps/chosen": -220.4147186279297, "logps/rejected": -217.1300811767578, "loss": 0.1082, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.5556666254997253, "rewards/margins": 0.23020467162132263, "rewards/rejected": -0.7858712077140808, "step": 4860 }, { "epoch": 0.58, "learning_rate": 2.2017768075441544e-06, "logits/chosen": -1.9333302974700928, "logits/rejected": -1.7991241216659546, "logps/chosen": -260.7289123535156, "logps/rejected": -277.58941650390625, "loss": 0.089, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.6178120374679565, "rewards/margins": 0.15292315185070038, "rewards/rejected": -0.7707351446151733, "step": 4870 }, { "epoch": 0.59, "learning_rate": 2.191383638849201e-06, "logits/chosen": -1.6100937128067017, "logits/rejected": -1.4786584377288818, "logps/chosen": -224.05224609375, "logps/rejected": -254.0164031982422, "loss": 0.1265, "rewards/accuracies": 0.625, "rewards/chosen": -0.5358814597129822, "rewards/margins": 0.21690325438976288, "rewards/rejected": -0.7527847290039062, "step": 4880 }, { "epoch": 0.59, "learning_rate": 2.180995883674003e-06, "logits/chosen": -2.0832412242889404, "logits/rejected": -1.8438653945922852, "logps/chosen": -301.72808837890625, "logps/rejected": -263.3715515136719, "loss": 0.1293, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.5527404546737671, "rewards/margins": 0.11939278990030289, "rewards/rejected": -0.6721332669258118, "step": 4890 }, { "epoch": 0.59, "learning_rate": 2.1706137242328708e-06, "logits/chosen": -1.8641271591186523, "logits/rejected": -1.7302074432373047, "logps/chosen": -223.2054443359375, "logps/rejected": -246.4233856201172, "loss": 0.1346, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.5136536359786987, "rewards/margins": 0.13826540112495422, "rewards/rejected": -0.6519190669059753, "step": 4900 }, { "epoch": 0.59, "learning_rate": 2.1602373426419593e-06, "logits/chosen": -2.0203075408935547, "logits/rejected": -1.7125927209854126, "logps/chosen": -247.02804565429688, "logps/rejected": -248.95193481445312, "loss": 0.156, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.5652672648429871, "rewards/margins": 0.21123281121253967, "rewards/rejected": -0.7765001058578491, "step": 4910 }, { "epoch": 0.59, "learning_rate": 2.149866920916075e-06, "logits/chosen": -1.9118244647979736, "logits/rejected": -1.6481826305389404, "logps/chosen": -287.49969482421875, "logps/rejected": -266.9239196777344, "loss": 0.098, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.507907509803772, "rewards/margins": 0.15150879323482513, "rewards/rejected": -0.6594163179397583, "step": 4920 }, { "epoch": 0.59, "learning_rate": 2.1395026409654776e-06, "logits/chosen": -2.052753448486328, "logits/rejected": -1.707918405532837, "logps/chosen": -287.81787109375, "logps/rejected": -250.28988647460938, "loss": 0.1311, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.5523009896278381, "rewards/margins": 0.10048754513263702, "rewards/rejected": -0.6527885794639587, "step": 4930 }, { "epoch": 0.59, "learning_rate": 2.129144684592694e-06, "logits/chosen": -1.8895385265350342, "logits/rejected": -1.4638690948486328, "logps/chosen": -229.0388946533203, "logps/rejected": -215.0070343017578, "loss": 0.1343, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.5384308695793152, "rewards/margins": 0.1393643468618393, "rewards/rejected": -0.6777952909469604, "step": 4940 }, { "epoch": 0.59, "learning_rate": 2.1187932334893282e-06, "logits/chosen": -2.058537721633911, "logits/rejected": -1.811730146408081, "logps/chosen": -238.6881866455078, "logps/rejected": -239.40774536132812, "loss": 0.1401, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.5211200714111328, "rewards/margins": 0.11581333726644516, "rewards/rejected": -0.636933445930481, "step": 4950 }, { "epoch": 0.6, "learning_rate": 2.1084484692328726e-06, "logits/chosen": -1.8077147006988525, "logits/rejected": -1.6539599895477295, "logps/chosen": -324.1515808105469, "logps/rejected": -339.98333740234375, "loss": 0.0679, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.667458176612854, "rewards/margins": 0.1506289839744568, "rewards/rejected": -0.8180869817733765, "step": 4960 }, { "epoch": 0.6, "learning_rate": 2.0981105732835227e-06, "logits/chosen": -1.9896256923675537, "logits/rejected": -1.4527660608291626, "logps/chosen": -269.33746337890625, "logps/rejected": -214.31729125976562, "loss": 0.1888, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.5238735675811768, "rewards/margins": 0.20568545162677765, "rewards/rejected": -0.729559063911438, "step": 4970 }, { "epoch": 0.6, "learning_rate": 2.087779726980999e-06, "logits/chosen": -2.0337650775909424, "logits/rejected": -1.6298131942749023, "logps/chosen": -297.9162292480469, "logps/rejected": -280.47637939453125, "loss": 0.0769, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.5461810827255249, "rewards/margins": 0.18490315973758698, "rewards/rejected": -0.7310842275619507, "step": 4980 }, { "epoch": 0.6, "learning_rate": 2.077456111541359e-06, "logits/chosen": -1.915443778038025, "logits/rejected": -1.500583291053772, "logps/chosen": -290.4164733886719, "logps/rejected": -241.181396484375, "loss": 0.1659, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.49130645394325256, "rewards/margins": 0.18306657671928406, "rewards/rejected": -0.6743730306625366, "step": 4990 }, { "epoch": 0.6, "learning_rate": 2.067139908053821e-06, "logits/chosen": -2.1264405250549316, "logits/rejected": -1.8090966939926147, "logps/chosen": -281.15185546875, "logps/rejected": -267.20489501953125, "loss": 0.1224, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.43849506974220276, "rewards/margins": 0.1732708215713501, "rewards/rejected": -0.6117658019065857, "step": 5000 }, { "epoch": 0.6, "learning_rate": 2.056831297477592e-06, "logits/chosen": -1.8897788524627686, "logits/rejected": -1.8385932445526123, "logps/chosen": -329.4559020996094, "logps/rejected": -293.82940673828125, "loss": 0.136, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.678800106048584, "rewards/margins": 0.08193562924861908, "rewards/rejected": -0.7607358694076538, "step": 5010 }, { "epoch": 0.6, "learning_rate": 2.046530460638687e-06, "logits/chosen": -2.099050760269165, "logits/rejected": -1.724473237991333, "logps/chosen": -318.06732177734375, "logps/rejected": -279.4828796386719, "loss": 0.1347, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.5295699834823608, "rewards/margins": 0.18352551758289337, "rewards/rejected": -0.7130955457687378, "step": 5020 }, { "epoch": 0.6, "learning_rate": 2.036237578226761e-06, "logits/chosen": -1.7614809274673462, "logits/rejected": -1.4049193859100342, "logps/chosen": -224.44375610351562, "logps/rejected": -221.57180786132812, "loss": 0.1367, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.5156166553497314, "rewards/margins": 0.1933528184890747, "rewards/rejected": -0.7089694738388062, "step": 5030 }, { "epoch": 0.6, "learning_rate": 2.0259528307919385e-06, "logits/chosen": -2.024557590484619, "logits/rejected": -1.4408557415008545, "logps/chosen": -298.25, "logps/rejected": -232.6403045654297, "loss": 0.149, "rewards/accuracies": 0.75, "rewards/chosen": -0.43420344591140747, "rewards/margins": 0.20419923961162567, "rewards/rejected": -0.6384027004241943, "step": 5040 }, { "epoch": 0.61, "learning_rate": 2.015676398741644e-06, "logits/chosen": -1.9110151529312134, "logits/rejected": -1.3820592164993286, "logps/chosen": -328.24456787109375, "logps/rejected": -274.53765869140625, "loss": 0.1629, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.5336133241653442, "rewards/margins": 0.20271578431129456, "rewards/rejected": -0.7363291382789612, "step": 5050 }, { "epoch": 0.61, "learning_rate": 2.005408462337443e-06, "logits/chosen": -2.0011394023895264, "logits/rejected": -1.5512298345565796, "logps/chosen": -250.78280639648438, "logps/rejected": -245.1263885498047, "loss": 0.1404, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.49497905373573303, "rewards/margins": 0.21909329295158386, "rewards/rejected": -0.7140722870826721, "step": 5060 }, { "epoch": 0.61, "learning_rate": 1.9951492016918745e-06, "logits/chosen": -1.9097583293914795, "logits/rejected": -1.5266954898834229, "logps/chosen": -195.3928985595703, "logps/rejected": -203.9522247314453, "loss": 0.1487, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.5452216863632202, "rewards/margins": 0.17070701718330383, "rewards/rejected": -0.7159286737442017, "step": 5070 }, { "epoch": 0.61, "learning_rate": 1.984898796765294e-06, "logits/chosen": -1.871835470199585, "logits/rejected": -1.4449571371078491, "logps/chosen": -198.09242248535156, "logps/rejected": -197.41738891601562, "loss": 0.1232, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.4947785437107086, "rewards/margins": 0.17074665427207947, "rewards/rejected": -0.6655251979827881, "step": 5080 }, { "epoch": 0.61, "learning_rate": 1.974657427362717e-06, "logits/chosen": -1.817120909690857, "logits/rejected": -1.635406494140625, "logps/chosen": -292.73358154296875, "logps/rejected": -272.16900634765625, "loss": 0.1009, "rewards/accuracies": 0.625, "rewards/chosen": -0.5638500452041626, "rewards/margins": 0.10627535730600357, "rewards/rejected": -0.670125424861908, "step": 5090 }, { "epoch": 0.61, "learning_rate": 1.9644252731306653e-06, "logits/chosen": -1.8737514019012451, "logits/rejected": -1.5180460214614868, "logps/chosen": -367.1842956542969, "logps/rejected": -313.0085144042969, "loss": 0.1218, "rewards/accuracies": 0.75, "rewards/chosen": -0.6011655330657959, "rewards/margins": 0.1648952215909958, "rewards/rejected": -0.7660607099533081, "step": 5100 }, { "epoch": 0.61, "learning_rate": 1.954202513554013e-06, "logits/chosen": -1.9836708307266235, "logits/rejected": -1.797654390335083, "logps/chosen": -243.0941162109375, "logps/rejected": -266.05535888671875, "loss": 0.156, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.4675242006778717, "rewards/margins": 0.17585307359695435, "rewards/rejected": -0.6433773636817932, "step": 5110 }, { "epoch": 0.61, "learning_rate": 1.943989327952841e-06, "logits/chosen": -2.0395166873931885, "logits/rejected": -1.6357864141464233, "logps/chosen": -350.53411865234375, "logps/rejected": -324.0770568847656, "loss": 0.0882, "rewards/accuracies": 0.75, "rewards/chosen": -0.5562902092933655, "rewards/margins": 0.15983954071998596, "rewards/rejected": -0.7161296606063843, "step": 5120 }, { "epoch": 0.62, "learning_rate": 1.9337858954792917e-06, "logits/chosen": -1.8916152715682983, "logits/rejected": -1.6612498760223389, "logps/chosen": -255.1393280029297, "logps/rejected": -268.5872802734375, "loss": 0.1109, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.5940229892730713, "rewards/margins": 0.24639251828193665, "rewards/rejected": -0.8404154777526855, "step": 5130 }, { "epoch": 0.62, "learning_rate": 1.9235923951144246e-06, "logits/chosen": -1.9813220500946045, "logits/rejected": -1.6410853862762451, "logps/chosen": -291.50323486328125, "logps/rejected": -276.9924011230469, "loss": 0.1087, "rewards/accuracies": 0.75, "rewards/chosen": -0.531834602355957, "rewards/margins": 0.15474644303321838, "rewards/rejected": -0.686581015586853, "step": 5140 }, { "epoch": 0.62, "learning_rate": 1.9134090056650764e-06, "logits/chosen": -2.085635185241699, "logits/rejected": -1.7884467840194702, "logps/chosen": -284.26025390625, "logps/rejected": -268.23699951171875, "loss": 0.1903, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.5530114769935608, "rewards/margins": 0.16405172646045685, "rewards/rejected": -0.7170631885528564, "step": 5150 }, { "epoch": 0.62, "learning_rate": 1.9032359057607272e-06, "logits/chosen": -2.1484217643737793, "logits/rejected": -1.5867314338684082, "logps/chosen": -338.3865661621094, "logps/rejected": -318.114990234375, "loss": 0.0867, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.4841574728488922, "rewards/margins": 0.2181321382522583, "rewards/rejected": -0.7022895812988281, "step": 5160 }, { "epoch": 0.62, "learning_rate": 1.8930732738503652e-06, "logits/chosen": -1.991681694984436, "logits/rejected": -1.7101647853851318, "logps/chosen": -253.91015625, "logps/rejected": -210.7447052001953, "loss": 0.1055, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.5010813474655151, "rewards/margins": 0.16278687119483948, "rewards/rejected": -0.6638683080673218, "step": 5170 }, { "epoch": 0.62, "learning_rate": 1.8829212881993553e-06, "logits/chosen": -2.1374099254608154, "logits/rejected": -1.8165279626846313, "logps/chosen": -280.1686096191406, "logps/rejected": -253.91220092773438, "loss": 0.0657, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.49169284105300903, "rewards/margins": 0.12237177044153214, "rewards/rejected": -0.6140645742416382, "step": 5180 }, { "epoch": 0.62, "learning_rate": 1.872780126886316e-06, "logits/chosen": -2.1169066429138184, "logits/rejected": -1.6588671207427979, "logps/chosen": -282.9520568847656, "logps/rejected": -245.0965118408203, "loss": 0.1101, "rewards/accuracies": 0.75, "rewards/chosen": -0.4680394232273102, "rewards/margins": 0.19195261597633362, "rewards/rejected": -0.659991979598999, "step": 5190 }, { "epoch": 0.62, "learning_rate": 1.8626499677999915e-06, "logits/chosen": -1.8921172618865967, "logits/rejected": -1.8109171390533447, "logps/chosen": -260.7506408691406, "logps/rejected": -277.9801025390625, "loss": 0.132, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.5560612082481384, "rewards/margins": 0.13018205761909485, "rewards/rejected": -0.6862432360649109, "step": 5200 }, { "epoch": 0.63, "learning_rate": 1.8525309886361332e-06, "logits/chosen": -1.9643144607543945, "logits/rejected": -1.4548659324645996, "logps/chosen": -215.2502899169922, "logps/rejected": -212.14553833007812, "loss": 0.1047, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.5360875725746155, "rewards/margins": 0.2591664493083954, "rewards/rejected": -0.7952540516853333, "step": 5210 }, { "epoch": 0.63, "learning_rate": 1.8424233668943844e-06, "logits/chosen": -1.8108913898468018, "logits/rejected": -1.6711933612823486, "logps/chosen": -245.8279571533203, "logps/rejected": -260.3638610839844, "loss": 0.1462, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.5376258492469788, "rewards/margins": 0.17290274798870087, "rewards/rejected": -0.710528552532196, "step": 5220 }, { "epoch": 0.63, "learning_rate": 1.8323272798751629e-06, "logits/chosen": -1.8469750881195068, "logits/rejected": -1.6417795419692993, "logps/chosen": -267.5218811035156, "logps/rejected": -255.52890014648438, "loss": 0.1275, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.6079937219619751, "rewards/margins": 0.1432160884141922, "rewards/rejected": -0.7512098550796509, "step": 5230 }, { "epoch": 0.63, "learning_rate": 1.822242904676552e-06, "logits/chosen": -1.8902513980865479, "logits/rejected": -1.6447397470474243, "logps/chosen": -224.96902465820312, "logps/rejected": -236.59140014648438, "loss": 0.0791, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.5813908576965332, "rewards/margins": 0.11333571374416351, "rewards/rejected": -0.6947265863418579, "step": 5240 }, { "epoch": 0.63, "learning_rate": 1.8121704181911989e-06, "logits/chosen": -2.0475192070007324, "logits/rejected": -1.7533676624298096, "logps/chosen": -322.51141357421875, "logps/rejected": -286.36376953125, "loss": 0.1171, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.5761255025863647, "rewards/margins": 0.12248637527227402, "rewards/rejected": -0.698611855506897, "step": 5250 }, { "epoch": 0.63, "learning_rate": 1.8021099971032046e-06, "logits/chosen": -1.731256127357483, "logits/rejected": -1.3307876586914062, "logps/chosen": -248.487060546875, "logps/rejected": -219.0608367919922, "loss": 0.1045, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.5520157217979431, "rewards/margins": 0.1579941362142563, "rewards/rejected": -0.7100099325180054, "step": 5260 }, { "epoch": 0.63, "learning_rate": 1.7920618178850269e-06, "logits/chosen": -2.0428383350372314, "logits/rejected": -1.7901527881622314, "logps/chosen": -307.78533935546875, "logps/rejected": -285.2147521972656, "loss": 0.1015, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.5738735795021057, "rewards/margins": 0.16959087550640106, "rewards/rejected": -0.743464469909668, "step": 5270 }, { "epoch": 0.63, "learning_rate": 1.7820260567943904e-06, "logits/chosen": -1.903365135192871, "logits/rejected": -1.7534162998199463, "logps/chosen": -185.64505004882812, "logps/rejected": -201.76344299316406, "loss": 0.1676, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.574264407157898, "rewards/margins": 0.13275280594825745, "rewards/rejected": -0.7070172429084778, "step": 5280 }, { "epoch": 0.63, "learning_rate": 1.7720028898711852e-06, "logits/chosen": -1.8674421310424805, "logits/rejected": -1.413944959640503, "logps/chosen": -263.2582092285156, "logps/rejected": -235.0648956298828, "loss": 0.137, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.491985946893692, "rewards/margins": 0.17603492736816406, "rewards/rejected": -0.6680207848548889, "step": 5290 }, { "epoch": 0.64, "learning_rate": 1.7619924929343857e-06, "logits/chosen": -1.919923186302185, "logits/rejected": -1.7005188465118408, "logps/chosen": -273.3664855957031, "logps/rejected": -306.89056396484375, "loss": 0.1344, "rewards/accuracies": 0.75, "rewards/chosen": -0.4788680076599121, "rewards/margins": 0.18705452978610992, "rewards/rejected": -0.6659225225448608, "step": 5300 }, { "epoch": 0.64, "learning_rate": 1.7519950415789661e-06, "logits/chosen": -1.7585570812225342, "logits/rejected": -1.5700247287750244, "logps/chosen": -251.32113647460938, "logps/rejected": -301.25079345703125, "loss": 0.1677, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.4546022415161133, "rewards/margins": 0.16988904774188995, "rewards/rejected": -0.624491274356842, "step": 5310 }, { "epoch": 0.64, "learning_rate": 1.7420107111728167e-06, "logits/chosen": -1.8963468074798584, "logits/rejected": -1.7362186908721924, "logps/chosen": -206.5269317626953, "logps/rejected": -223.75009155273438, "loss": 0.08, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.39686816930770874, "rewards/margins": 0.15394839644432068, "rewards/rejected": -0.550816535949707, "step": 5320 }, { "epoch": 0.64, "learning_rate": 1.7320396768536695e-06, "logits/chosen": -1.9675251245498657, "logits/rejected": -1.5709375143051147, "logps/chosen": -268.6692199707031, "logps/rejected": -247.16464233398438, "loss": 0.0934, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.4870396554470062, "rewards/margins": 0.1673925369977951, "rewards/rejected": -0.6544321775436401, "step": 5330 }, { "epoch": 0.64, "learning_rate": 1.7220821135260301e-06, "logits/chosen": -1.836387038230896, "logits/rejected": -1.3881337642669678, "logps/chosen": -240.44357299804688, "logps/rejected": -221.38931274414062, "loss": 0.0791, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.5035630464553833, "rewards/margins": 0.15110139548778534, "rewards/rejected": -0.6546644568443298, "step": 5340 }, { "epoch": 0.64, "learning_rate": 1.7121381958581018e-06, "logits/chosen": -2.0114264488220215, "logits/rejected": -1.6229709386825562, "logps/chosen": -298.6057434082031, "logps/rejected": -229.20565795898438, "loss": 0.1362, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.5573118925094604, "rewards/margins": 0.11680523306131363, "rewards/rejected": -0.6741170883178711, "step": 5350 }, { "epoch": 0.64, "learning_rate": 1.7022080982787259e-06, "logits/chosen": -1.8884429931640625, "logits/rejected": -1.5066940784454346, "logps/chosen": -274.16937255859375, "logps/rejected": -251.51052856445312, "loss": 0.1055, "rewards/accuracies": 0.625, "rewards/chosen": -0.5535235404968262, "rewards/margins": 0.13713331520557404, "rewards/rejected": -0.6906567811965942, "step": 5360 }, { "epoch": 0.64, "learning_rate": 1.692291994974326e-06, "logits/chosen": -1.9273033142089844, "logits/rejected": -1.4788744449615479, "logps/chosen": -301.24896240234375, "logps/rejected": -266.068115234375, "loss": 0.1223, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.4468786120414734, "rewards/margins": 0.1857236623764038, "rewards/rejected": -0.6326022148132324, "step": 5370 }, { "epoch": 0.65, "learning_rate": 1.682390059885845e-06, "logits/chosen": -1.962938904762268, "logits/rejected": -1.4628360271453857, "logps/chosen": -287.84283447265625, "logps/rejected": -222.7774658203125, "loss": 0.0968, "rewards/accuracies": 0.75, "rewards/chosen": -0.5061804056167603, "rewards/margins": 0.22085240483283997, "rewards/rejected": -0.7270327806472778, "step": 5380 }, { "epoch": 0.65, "learning_rate": 1.6725024667056965e-06, "logits/chosen": -1.8040755987167358, "logits/rejected": -1.4079840183258057, "logps/chosen": -270.97686767578125, "logps/rejected": -205.47982788085938, "loss": 0.1878, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.4969252645969391, "rewards/margins": 0.0869857668876648, "rewards/rejected": -0.5839110612869263, "step": 5390 }, { "epoch": 0.65, "learning_rate": 1.6626293888747238e-06, "logits/chosen": -1.9853794574737549, "logits/rejected": -1.4653469324111938, "logps/chosen": -268.05926513671875, "logps/rejected": -262.09197998046875, "loss": 0.1083, "rewards/accuracies": 0.75, "rewards/chosen": -0.47049784660339355, "rewards/margins": 0.1899462640285492, "rewards/rejected": -0.6604441404342651, "step": 5400 }, { "epoch": 0.65, "learning_rate": 1.652770999579148e-06, "logits/chosen": -1.9712364673614502, "logits/rejected": -1.6718246936798096, "logps/chosen": -248.3401336669922, "logps/rejected": -259.27471923828125, "loss": 0.1179, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.4843429923057556, "rewards/margins": 0.15328797698020935, "rewards/rejected": -0.6376310586929321, "step": 5410 }, { "epoch": 0.65, "learning_rate": 1.6429274717475358e-06, "logits/chosen": -1.8927046060562134, "logits/rejected": -1.5739778280258179, "logps/chosen": -279.84503173828125, "logps/rejected": -235.696533203125, "loss": 0.0874, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.4386516213417053, "rewards/margins": 0.19288429617881775, "rewards/rejected": -0.6315358877182007, "step": 5420 }, { "epoch": 0.65, "learning_rate": 1.6330989780477673e-06, "logits/chosen": -1.8618462085723877, "logits/rejected": -1.5246363878250122, "logps/chosen": -253.5839385986328, "logps/rejected": -251.325927734375, "loss": 0.151, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.5161840319633484, "rewards/margins": 0.14887337386608124, "rewards/rejected": -0.665057361125946, "step": 5430 }, { "epoch": 0.65, "learning_rate": 1.6232856908840033e-06, "logits/chosen": -2.285269260406494, "logits/rejected": -1.713772177696228, "logps/chosen": -263.0864562988281, "logps/rejected": -217.45498657226562, "loss": 0.0977, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.4418443739414215, "rewards/margins": 0.17126531898975372, "rewards/rejected": -0.6131097078323364, "step": 5440 }, { "epoch": 0.65, "learning_rate": 1.613487782393661e-06, "logits/chosen": -1.9873231649398804, "logits/rejected": -1.5774824619293213, "logps/chosen": -259.41546630859375, "logps/rejected": -270.5675048828125, "loss": 0.122, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.4419049322605133, "rewards/margins": 0.1639256477355957, "rewards/rejected": -0.6058306097984314, "step": 5450 }, { "epoch": 0.66, "learning_rate": 1.6037054244444007e-06, "logits/chosen": -1.9209073781967163, "logits/rejected": -1.675719976425171, "logps/chosen": -248.13119506835938, "logps/rejected": -253.267822265625, "loss": 0.1288, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.5143054127693176, "rewards/margins": 0.1403733789920807, "rewards/rejected": -0.6546787023544312, "step": 5460 }, { "epoch": 0.66, "learning_rate": 1.593938788631103e-06, "logits/chosen": -1.6965789794921875, "logits/rejected": -1.3882747888565063, "logps/chosen": -236.5738067626953, "logps/rejected": -283.84869384765625, "loss": 0.0971, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.4736576974391937, "rewards/margins": 0.199508398771286, "rewards/rejected": -0.6731661558151245, "step": 5470 }, { "epoch": 0.66, "learning_rate": 1.5841880462728626e-06, "logits/chosen": -1.8833509683609009, "logits/rejected": -1.6429067850112915, "logps/chosen": -280.9195861816406, "logps/rejected": -278.397705078125, "loss": 0.1557, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.46772581338882446, "rewards/margins": 0.1507682502269745, "rewards/rejected": -0.6184940934181213, "step": 5480 }, { "epoch": 0.66, "learning_rate": 1.5744533684099861e-06, "logits/chosen": -2.0979132652282715, "logits/rejected": -1.699033498764038, "logps/chosen": -264.59173583984375, "logps/rejected": -252.0912628173828, "loss": 0.164, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.4788171648979187, "rewards/margins": 0.12794797122478485, "rewards/rejected": -0.6067651510238647, "step": 5490 }, { "epoch": 0.66, "learning_rate": 1.5647349258009857e-06, "logits/chosen": -1.7671406269073486, "logits/rejected": -1.574204683303833, "logps/chosen": -282.95458984375, "logps/rejected": -298.6538391113281, "loss": 0.0751, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.5590416193008423, "rewards/margins": 0.1640872210264206, "rewards/rejected": -0.7231289148330688, "step": 5500 }, { "epoch": 0.66, "learning_rate": 1.555032888919586e-06, "logits/chosen": -1.686753511428833, "logits/rejected": -1.4295735359191895, "logps/chosen": -264.7411804199219, "logps/rejected": -253.6551971435547, "loss": 0.2432, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.5408766269683838, "rewards/margins": 0.15903475880622864, "rewards/rejected": -0.69991135597229, "step": 5510 }, { "epoch": 0.66, "learning_rate": 1.5453474279517383e-06, "logits/chosen": -1.805437684059143, "logits/rejected": -1.6262556314468384, "logps/chosen": -240.7580108642578, "logps/rejected": -236.3534393310547, "loss": 0.1229, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.500167727470398, "rewards/margins": 0.12385289371013641, "rewards/rejected": -0.6240206360816956, "step": 5520 }, { "epoch": 0.66, "learning_rate": 1.5356787127926285e-06, "logits/chosen": -1.9110714197158813, "logits/rejected": -1.4245280027389526, "logps/chosen": -316.56072998046875, "logps/rejected": -266.979248046875, "loss": 0.0896, "rewards/accuracies": 0.625, "rewards/chosen": -0.4973506033420563, "rewards/margins": 0.21019454300403595, "rewards/rejected": -0.7075451612472534, "step": 5530 }, { "epoch": 0.66, "learning_rate": 1.526026913043699e-06, "logits/chosen": -1.7721723318099976, "logits/rejected": -1.6717960834503174, "logps/chosen": -231.5484161376953, "logps/rejected": -236.1089324951172, "loss": 0.0822, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.5355015397071838, "rewards/margins": 0.13346286118030548, "rewards/rejected": -0.6689643859863281, "step": 5540 }, { "epoch": 0.67, "learning_rate": 1.5163921980096791e-06, "logits/chosen": -1.8417619466781616, "logits/rejected": -1.7490192651748657, "logps/chosen": -259.3507080078125, "logps/rejected": -285.2629699707031, "loss": 0.1187, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.5752219557762146, "rewards/margins": 0.11573759466409683, "rewards/rejected": -0.6909595727920532, "step": 5550 }, { "epoch": 0.67, "learning_rate": 1.5067747366956065e-06, "logits/chosen": -2.117729663848877, "logits/rejected": -1.7773869037628174, "logps/chosen": -260.44342041015625, "logps/rejected": -221.59371948242188, "loss": 0.1647, "rewards/accuracies": 0.625, "rewards/chosen": -0.49646344780921936, "rewards/margins": 0.16098496317863464, "rewards/rejected": -0.657448410987854, "step": 5560 }, { "epoch": 0.67, "learning_rate": 1.4971746978038671e-06, "logits/chosen": -1.8527145385742188, "logits/rejected": -1.7836803197860718, "logps/chosen": -257.3758850097656, "logps/rejected": -278.2964782714844, "loss": 0.1172, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.4473143517971039, "rewards/margins": 0.14970967173576355, "rewards/rejected": -0.5970240235328674, "step": 5570 }, { "epoch": 0.67, "learning_rate": 1.4875922497312384e-06, "logits/chosen": -1.802384376525879, "logits/rejected": -1.3964884281158447, "logps/chosen": -257.4482421875, "logps/rejected": -256.9828796386719, "loss": 0.0714, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.4673423767089844, "rewards/margins": 0.22057469189167023, "rewards/rejected": -0.6879170536994934, "step": 5580 }, { "epoch": 0.67, "learning_rate": 1.4780275605659308e-06, "logits/chosen": -1.9443477392196655, "logits/rejected": -1.470523476600647, "logps/chosen": -216.4662628173828, "logps/rejected": -213.83154296875, "loss": 0.1033, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.47214627265930176, "rewards/margins": 0.24475538730621338, "rewards/rejected": -0.7169016599655151, "step": 5590 }, { "epoch": 0.67, "learning_rate": 1.46848079808464e-06, "logits/chosen": -1.811112642288208, "logits/rejected": -1.5633313655853271, "logps/chosen": -286.9377746582031, "logps/rejected": -256.46368408203125, "loss": 0.1492, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.4783618450164795, "rewards/margins": 0.16989843547344208, "rewards/rejected": -0.648260235786438, "step": 5600 }, { "epoch": 0.67, "learning_rate": 1.4589521297496085e-06, "logits/chosen": -1.9072492122650146, "logits/rejected": -1.6674668788909912, "logps/chosen": -287.529052734375, "logps/rejected": -309.4631042480469, "loss": 0.1368, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.46460795402526855, "rewards/margins": 0.12597152590751648, "rewards/rejected": -0.5905795097351074, "step": 5610 }, { "epoch": 0.67, "learning_rate": 1.4494417227056811e-06, "logits/chosen": -1.9489076137542725, "logits/rejected": -1.5660401582717896, "logps/chosen": -236.4983673095703, "logps/rejected": -253.90518188476562, "loss": 0.0872, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.41152167320251465, "rewards/margins": 0.21388018131256104, "rewards/rejected": -0.6254019737243652, "step": 5620 }, { "epoch": 0.68, "learning_rate": 1.4399497437773786e-06, "logits/chosen": -1.9147542715072632, "logits/rejected": -1.4943046569824219, "logps/chosen": -272.55987548828125, "logps/rejected": -264.3128967285156, "loss": 0.1259, "rewards/accuracies": 0.75, "rewards/chosen": -0.5410269498825073, "rewards/margins": 0.19129455089569092, "rewards/rejected": -0.732321560382843, "step": 5630 }, { "epoch": 0.68, "learning_rate": 1.4304763594659694e-06, "logits/chosen": -2.035388946533203, "logits/rejected": -1.5075829029083252, "logps/chosen": -302.56365966796875, "logps/rejected": -257.10601806640625, "loss": 0.1725, "rewards/accuracies": 0.75, "rewards/chosen": -0.49816712737083435, "rewards/margins": 0.18971143662929535, "rewards/rejected": -0.6878786087036133, "step": 5640 }, { "epoch": 0.68, "learning_rate": 1.4210217359465483e-06, "logits/chosen": -2.001213312149048, "logits/rejected": -1.7480404376983643, "logps/chosen": -250.2290496826172, "logps/rejected": -263.33099365234375, "loss": 0.1514, "rewards/accuracies": 0.625, "rewards/chosen": -0.43809619545936584, "rewards/margins": 0.09596999734640121, "rewards/rejected": -0.5340661406517029, "step": 5650 }, { "epoch": 0.68, "learning_rate": 1.4115860390651204e-06, "logits/chosen": -1.8610761165618896, "logits/rejected": -1.242117166519165, "logps/chosen": -272.94488525390625, "logps/rejected": -214.08584594726562, "loss": 0.1465, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.5517784953117371, "rewards/margins": 0.17574277520179749, "rewards/rejected": -0.7275213003158569, "step": 5660 }, { "epoch": 0.68, "learning_rate": 1.4021694343356992e-06, "logits/chosen": -2.0516114234924316, "logits/rejected": -1.6413084268569946, "logps/chosen": -243.1460723876953, "logps/rejected": -240.1009063720703, "loss": 0.1033, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.4688890874385834, "rewards/margins": 0.14948108792304993, "rewards/rejected": -0.6183701753616333, "step": 5670 }, { "epoch": 0.68, "learning_rate": 1.3927720869373912e-06, "logits/chosen": -1.7400707006454468, "logits/rejected": -1.5164529085159302, "logps/chosen": -283.2667236328125, "logps/rejected": -289.87200927734375, "loss": 0.0951, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.46899938583374023, "rewards/margins": 0.15114405751228333, "rewards/rejected": -0.6201435327529907, "step": 5680 }, { "epoch": 0.68, "learning_rate": 1.383394161711509e-06, "logits/chosen": -1.7821184396743774, "logits/rejected": -1.5143952369689941, "logps/chosen": -243.70639038085938, "logps/rejected": -248.849365234375, "loss": 0.0863, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.5013656616210938, "rewards/margins": 0.18036916851997375, "rewards/rejected": -0.6817347407341003, "step": 5690 }, { "epoch": 0.68, "learning_rate": 1.3740358231586752e-06, "logits/chosen": -1.9248117208480835, "logits/rejected": -1.6384576559066772, "logps/chosen": -238.166015625, "logps/rejected": -213.8789825439453, "loss": 0.1689, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.5257433652877808, "rewards/margins": 0.1685757339000702, "rewards/rejected": -0.6943190693855286, "step": 5700 }, { "epoch": 0.69, "learning_rate": 1.3646972354359379e-06, "logits/chosen": -2.0671422481536865, "logits/rejected": -1.6382163763046265, "logps/chosen": -248.79116821289062, "logps/rejected": -239.56393432617188, "loss": 0.1352, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.5227428674697876, "rewards/margins": 0.1494591385126114, "rewards/rejected": -0.672201931476593, "step": 5710 }, { "epoch": 0.69, "learning_rate": 1.3553785623538873e-06, "logits/chosen": -1.8637508153915405, "logits/rejected": -1.6675211191177368, "logps/chosen": -217.97286987304688, "logps/rejected": -246.8916015625, "loss": 0.1261, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.5041235685348511, "rewards/margins": 0.16983681917190552, "rewards/rejected": -0.6739604473114014, "step": 5720 }, { "epoch": 0.69, "learning_rate": 1.346079967373792e-06, "logits/chosen": -1.684739351272583, "logits/rejected": -1.6077110767364502, "logps/chosen": -205.7179718017578, "logps/rejected": -213.598388671875, "loss": 0.0877, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.4919508099555969, "rewards/margins": 0.15389500558376312, "rewards/rejected": -0.6458457708358765, "step": 5730 }, { "epoch": 0.69, "learning_rate": 1.3368016136047194e-06, "logits/chosen": -1.7886161804199219, "logits/rejected": -1.5077216625213623, "logps/chosen": -311.54486083984375, "logps/rejected": -265.2232971191406, "loss": 0.1852, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.5703347325325012, "rewards/margins": 0.1437651813030243, "rewards/rejected": -0.7140999436378479, "step": 5740 }, { "epoch": 0.69, "learning_rate": 1.3275436638006838e-06, "logits/chosen": -1.9010818004608154, "logits/rejected": -1.6132938861846924, "logps/chosen": -279.0166320800781, "logps/rejected": -293.9725646972656, "loss": 0.1197, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.5455999374389648, "rewards/margins": 0.1750425398349762, "rewards/rejected": -0.7206425070762634, "step": 5750 }, { "epoch": 0.69, "learning_rate": 1.3183062803577872e-06, "logits/chosen": -1.9034898281097412, "logits/rejected": -1.5641247034072876, "logps/chosen": -227.96151733398438, "logps/rejected": -215.01339721679688, "loss": 0.1401, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.5809696316719055, "rewards/margins": 0.2188103199005127, "rewards/rejected": -0.7997799515724182, "step": 5760 }, { "epoch": 0.69, "learning_rate": 1.3090896253113736e-06, "logits/chosen": -1.8766626119613647, "logits/rejected": -1.700510025024414, "logps/chosen": -258.7127380371094, "logps/rejected": -250.1974334716797, "loss": 0.1517, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.5055121183395386, "rewards/margins": 0.1273249089717865, "rewards/rejected": -0.6328369379043579, "step": 5770 }, { "epoch": 0.69, "learning_rate": 1.2998938603331796e-06, "logits/chosen": -1.8572345972061157, "logits/rejected": -1.651341199874878, "logps/chosen": -255.7646026611328, "logps/rejected": -279.30694580078125, "loss": 0.0922, "rewards/accuracies": 0.625, "rewards/chosen": -0.6293990612030029, "rewards/margins": 0.12281368672847748, "rewards/rejected": -0.7522127032279968, "step": 5780 }, { "epoch": 0.69, "learning_rate": 1.2907191467285118e-06, "logits/chosen": -1.9182488918304443, "logits/rejected": -1.6267467737197876, "logps/chosen": -266.6481018066406, "logps/rejected": -413.2779235839844, "loss": 5.1685, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.47287482023239136, "rewards/margins": 0.5273378491401672, "rewards/rejected": -1.0002126693725586, "step": 5790 }, { "epoch": 0.7, "learning_rate": 1.2815656454334013e-06, "logits/chosen": -1.9395920038223267, "logits/rejected": -1.8263431787490845, "logps/chosen": -256.23663330078125, "logps/rejected": -270.293701171875, "loss": 0.1378, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.4917454719543457, "rewards/margins": 0.15827788412570953, "rewards/rejected": -0.650023341178894, "step": 5800 }, { "epoch": 0.7, "learning_rate": 1.272433517011793e-06, "logits/chosen": -1.9891746044158936, "logits/rejected": -1.7108840942382812, "logps/chosen": -311.8459167480469, "logps/rejected": -299.76654052734375, "loss": 0.0751, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.48035064339637756, "rewards/margins": 0.1297578364610672, "rewards/rejected": -0.6101084351539612, "step": 5810 }, { "epoch": 0.7, "learning_rate": 1.2633229216527235e-06, "logits/chosen": -1.9504735469818115, "logits/rejected": -1.4860206842422485, "logps/chosen": -230.38272094726562, "logps/rejected": -224.21981811523438, "loss": 0.1323, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.4245625138282776, "rewards/margins": 0.2091328203678131, "rewards/rejected": -0.6336953043937683, "step": 5820 }, { "epoch": 0.7, "learning_rate": 1.254234019167514e-06, "logits/chosen": -1.9143810272216797, "logits/rejected": -1.5591933727264404, "logps/chosen": -283.6191711425781, "logps/rejected": -264.5260009765625, "loss": 0.0793, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.4440692067146301, "rewards/margins": 0.2215677946805954, "rewards/rejected": -0.6656370759010315, "step": 5830 }, { "epoch": 0.7, "learning_rate": 1.24516696898696e-06, "logits/chosen": -1.9349048137664795, "logits/rejected": -1.555820345878601, "logps/chosen": -267.323486328125, "logps/rejected": -289.51947021484375, "loss": 0.1085, "rewards/accuracies": 0.75, "rewards/chosen": -0.5123074054718018, "rewards/margins": 0.20973484218120575, "rewards/rejected": -0.7220422029495239, "step": 5840 }, { "epoch": 0.7, "learning_rate": 1.2361219301585487e-06, "logits/chosen": -2.0896143913269043, "logits/rejected": -1.63728928565979, "logps/chosen": -277.4181213378906, "logps/rejected": -244.1571044921875, "loss": 0.111, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.4161832332611084, "rewards/margins": 0.1889437586069107, "rewards/rejected": -0.6051269769668579, "step": 5850 }, { "epoch": 0.7, "learning_rate": 1.2270990613436522e-06, "logits/chosen": -1.9406229257583618, "logits/rejected": -1.6228440999984741, "logps/chosen": -229.52969360351562, "logps/rejected": -256.2364196777344, "loss": 0.1576, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.4235209822654724, "rewards/margins": 0.12438831478357315, "rewards/rejected": -0.5479093194007874, "step": 5860 }, { "epoch": 0.7, "learning_rate": 1.2180985208147571e-06, "logits/chosen": -1.9510200023651123, "logits/rejected": -1.7511215209960938, "logps/chosen": -213.273681640625, "logps/rejected": -237.1942596435547, "loss": 0.0932, "rewards/accuracies": 0.75, "rewards/chosen": -0.39561378955841064, "rewards/margins": 0.13690926134586334, "rewards/rejected": -0.5325230360031128, "step": 5870 }, { "epoch": 0.71, "learning_rate": 1.2091204664526831e-06, "logits/chosen": -2.0162253379821777, "logits/rejected": -1.6138890981674194, "logps/chosen": -287.21746826171875, "logps/rejected": -234.35693359375, "loss": 0.184, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.4287000596523285, "rewards/margins": 0.1497940570116043, "rewards/rejected": -0.578494131565094, "step": 5880 }, { "epoch": 0.71, "learning_rate": 1.2001650557438143e-06, "logits/chosen": -2.028672695159912, "logits/rejected": -1.7924978733062744, "logps/chosen": -294.2822265625, "logps/rejected": -278.37652587890625, "loss": 0.1596, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.44275110960006714, "rewards/margins": 0.11968086659908295, "rewards/rejected": -0.5624319911003113, "step": 5890 }, { "epoch": 0.71, "learning_rate": 1.1912324457773336e-06, "logits/chosen": -2.0378835201263428, "logits/rejected": -1.7147912979125977, "logps/chosen": -240.72216796875, "logps/rejected": -268.4493713378906, "loss": 0.0914, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.39333948493003845, "rewards/margins": 0.19647042453289032, "rewards/rejected": -0.5898098945617676, "step": 5900 }, { "epoch": 0.71, "learning_rate": 1.182322793242476e-06, "logits/chosen": -2.0833020210266113, "logits/rejected": -1.8951669931411743, "logps/chosen": -225.5447540283203, "logps/rejected": -256.537109375, "loss": 0.1383, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.431450754404068, "rewards/margins": 0.16372133791446686, "rewards/rejected": -0.5951720476150513, "step": 5910 }, { "epoch": 0.71, "learning_rate": 1.1734362544257686e-06, "logits/chosen": -1.8667224645614624, "logits/rejected": -1.4377329349517822, "logps/chosen": -305.6443786621094, "logps/rejected": -238.24801635742188, "loss": 0.1093, "rewards/accuracies": 0.625, "rewards/chosen": -0.3690487742424011, "rewards/margins": 0.14427319169044495, "rewards/rejected": -0.5133219957351685, "step": 5920 }, { "epoch": 0.71, "learning_rate": 1.1645729852082977e-06, "logits/chosen": -2.2229387760162354, "logits/rejected": -1.7280595302581787, "logps/chosen": -246.1704559326172, "logps/rejected": -238.6482391357422, "loss": 0.1897, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.4612872004508972, "rewards/margins": 0.18152639269828796, "rewards/rejected": -0.6428135633468628, "step": 5930 }, { "epoch": 0.71, "learning_rate": 1.1557331410629708e-06, "logits/chosen": -2.273224115371704, "logits/rejected": -1.5956547260284424, "logps/chosen": -271.32366943359375, "logps/rejected": -216.1154327392578, "loss": 0.1183, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.39185529947280884, "rewards/margins": 0.1586187481880188, "rewards/rejected": -0.5504740476608276, "step": 5940 }, { "epoch": 0.71, "learning_rate": 1.1469168770517913e-06, "logits/chosen": -2.2409141063690186, "logits/rejected": -1.7641382217407227, "logps/chosen": -249.9724578857422, "logps/rejected": -213.88729858398438, "loss": 0.123, "rewards/accuracies": 0.75, "rewards/chosen": -0.4436149001121521, "rewards/margins": 0.15342697501182556, "rewards/rejected": -0.5970418453216553, "step": 5950 }, { "epoch": 0.72, "learning_rate": 1.1381243478231336e-06, "logits/chosen": -2.1302855014801025, "logits/rejected": -1.7275043725967407, "logps/chosen": -295.96807861328125, "logps/rejected": -232.9789276123047, "loss": 0.1576, "rewards/accuracies": 0.75, "rewards/chosen": -0.3948201537132263, "rewards/margins": 0.1410309374332428, "rewards/rejected": -0.5358511209487915, "step": 5960 }, { "epoch": 0.72, "learning_rate": 1.1293557076090403e-06, "logits/chosen": -2.055603504180908, "logits/rejected": -1.6448357105255127, "logps/chosen": -270.03485107421875, "logps/rejected": -257.23333740234375, "loss": 0.1255, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3696318566799164, "rewards/margins": 0.16263523697853088, "rewards/rejected": -0.5322670936584473, "step": 5970 }, { "epoch": 0.72, "learning_rate": 1.1206111102225043e-06, "logits/chosen": -2.016026020050049, "logits/rejected": -1.7732445001602173, "logps/chosen": -322.04571533203125, "logps/rejected": -314.3202819824219, "loss": 0.1256, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.43497371673583984, "rewards/margins": 0.15854512155056, "rewards/rejected": -0.593518853187561, "step": 5980 }, { "epoch": 0.72, "learning_rate": 1.1118907090547805e-06, "logits/chosen": -2.1408379077911377, "logits/rejected": -1.6741759777069092, "logps/chosen": -290.5514221191406, "logps/rejected": -262.75262451171875, "loss": 0.099, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.45133018493652344, "rewards/margins": 0.22001805901527405, "rewards/rejected": -0.6713482737541199, "step": 5990 }, { "epoch": 0.72, "learning_rate": 1.1031946570726912e-06, "logits/chosen": -1.9711778163909912, "logits/rejected": -1.855182409286499, "logps/chosen": -278.8828430175781, "logps/rejected": -290.8421325683594, "loss": 0.1164, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.5591145157814026, "rewards/margins": 0.11836342513561249, "rewards/rejected": -0.6774778962135315, "step": 6000 }, { "epoch": 0.72, "learning_rate": 1.094523106815944e-06, "logits/chosen": -1.855329155921936, "logits/rejected": -1.5584124326705933, "logps/chosen": -267.0565185546875, "logps/rejected": -273.24310302734375, "loss": 0.1452, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.44338005781173706, "rewards/margins": 0.17573294043540955, "rewards/rejected": -0.6191130876541138, "step": 6010 }, { "epoch": 0.72, "learning_rate": 1.0858762103944511e-06, "logits/chosen": -1.87862229347229, "logits/rejected": -1.6584688425064087, "logps/chosen": -280.62738037109375, "logps/rejected": -262.09942626953125, "loss": 0.125, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.5238662362098694, "rewards/margins": 0.09647830575704575, "rewards/rejected": -0.6203445196151733, "step": 6020 }, { "epoch": 0.72, "learning_rate": 1.0772541194856732e-06, "logits/chosen": -2.123035430908203, "logits/rejected": -1.6155083179473877, "logps/chosen": -321.01776123046875, "logps/rejected": -279.3617858886719, "loss": 0.0745, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.42041999101638794, "rewards/margins": 0.16421575844287872, "rewards/rejected": -0.5846357941627502, "step": 6030 }, { "epoch": 0.72, "learning_rate": 1.068656985331943e-06, "logits/chosen": -1.9696871042251587, "logits/rejected": -1.6894041299819946, "logps/chosen": -253.11233520507812, "logps/rejected": -267.6400146484375, "loss": 0.1156, "rewards/accuracies": 0.75, "rewards/chosen": -0.48150572180747986, "rewards/margins": 0.17806780338287354, "rewards/rejected": -0.6595736145973206, "step": 6040 }, { "epoch": 0.73, "learning_rate": 1.060084958737825e-06, "logits/chosen": -1.990724802017212, "logits/rejected": -1.446575403213501, "logps/chosen": -237.4898681640625, "logps/rejected": -233.758056640625, "loss": 0.1, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3963894546031952, "rewards/margins": 0.20723167061805725, "rewards/rejected": -0.6036210656166077, "step": 6050 }, { "epoch": 0.73, "learning_rate": 1.0515381900674643e-06, "logits/chosen": -2.1221401691436768, "logits/rejected": -1.7900186777114868, "logps/chosen": -257.96722412109375, "logps/rejected": -280.8104553222656, "loss": 0.1199, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.4584527611732483, "rewards/margins": 0.16997918486595154, "rewards/rejected": -0.6284319162368774, "step": 6060 }, { "epoch": 0.73, "learning_rate": 1.04301682924195e-06, "logits/chosen": -1.9626433849334717, "logits/rejected": -1.4727249145507812, "logps/chosen": -207.9431915283203, "logps/rejected": -195.52149963378906, "loss": 0.1507, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.4559662938117981, "rewards/margins": 0.22094134986400604, "rewards/rejected": -0.6769076585769653, "step": 6070 }, { "epoch": 0.73, "learning_rate": 1.034521025736686e-06, "logits/chosen": -1.9479316473007202, "logits/rejected": -1.5621986389160156, "logps/chosen": -225.77102661132812, "logps/rejected": -238.942138671875, "loss": 0.1602, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.4776443839073181, "rewards/margins": 0.16333012282848358, "rewards/rejected": -0.6409745216369629, "step": 6080 }, { "epoch": 0.73, "learning_rate": 1.0260509285787694e-06, "logits/chosen": -2.129117250442505, "logits/rejected": -1.501849889755249, "logps/chosen": -253.2616729736328, "logps/rejected": -219.7965545654297, "loss": 0.1035, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.4091704785823822, "rewards/margins": 0.18820294737815857, "rewards/rejected": -0.597373366355896, "step": 6090 }, { "epoch": 0.73, "learning_rate": 1.0176066863443726e-06, "logits/chosen": -1.8864481449127197, "logits/rejected": -1.5883742570877075, "logps/chosen": -257.90185546875, "logps/rejected": -220.6878662109375, "loss": 0.1418, "rewards/accuracies": 0.625, "rewards/chosen": -0.45451027154922485, "rewards/margins": 0.12341739982366562, "rewards/rejected": -0.5779277086257935, "step": 6100 }, { "epoch": 0.73, "learning_rate": 1.0091884471561424e-06, "logits/chosen": -1.8764444589614868, "logits/rejected": -1.63616943359375, "logps/chosen": -261.2283020019531, "logps/rejected": -244.85128784179688, "loss": 0.1435, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.4946800172328949, "rewards/margins": 0.1375671923160553, "rewards/rejected": -0.632247269153595, "step": 6110 }, { "epoch": 0.73, "learning_rate": 1.0007963586806e-06, "logits/chosen": -1.905747652053833, "logits/rejected": -1.4888708591461182, "logps/chosen": -284.759033203125, "logps/rejected": -269.97320556640625, "loss": 0.1243, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.5422388315200806, "rewards/margins": 0.15825456380844116, "rewards/rejected": -0.700493335723877, "step": 6120 }, { "epoch": 0.74, "learning_rate": 9.924305681255484e-07, "logits/chosen": -1.8924305438995361, "logits/rejected": -1.4120725393295288, "logps/chosen": -261.49359130859375, "logps/rejected": -246.7588653564453, "loss": 0.1309, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.4799385964870453, "rewards/margins": 0.22691066563129425, "rewards/rejected": -0.7068492770195007, "step": 6130 }, { "epoch": 0.74, "learning_rate": 9.840912222374932e-07, "logits/chosen": -2.065091609954834, "logits/rejected": -1.551511287689209, "logps/chosen": -271.27764892578125, "logps/rejected": -250.86550903320312, "loss": 0.1203, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.465266615152359, "rewards/margins": 0.19768501818180084, "rewards/rejected": -0.6629515886306763, "step": 6140 }, { "epoch": 0.74, "learning_rate": 9.757784672990668e-07, "logits/chosen": -1.8214833736419678, "logits/rejected": -1.3881083726882935, "logps/chosen": -256.01123046875, "logps/rejected": -230.39895629882812, "loss": 0.1763, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.5507286787033081, "rewards/margins": 0.19404050707817078, "rewards/rejected": -0.7447691559791565, "step": 6150 }, { "epoch": 0.74, "learning_rate": 9.674924491264632e-07, "logits/chosen": -1.8416248559951782, "logits/rejected": -1.6367276906967163, "logps/chosen": -219.26351928710938, "logps/rejected": -215.86849975585938, "loss": 0.1109, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.4397053122520447, "rewards/margins": 0.14263916015625, "rewards/rejected": -0.5823444724082947, "step": 6160 }, { "epoch": 0.74, "learning_rate": 9.59233313066878e-07, "logits/chosen": -2.089197874069214, "logits/rejected": -1.6182596683502197, "logps/chosen": -260.54376220703125, "logps/rejected": -253.81912231445312, "loss": 0.1007, "rewards/accuracies": 0.75, "rewards/chosen": -0.44585880637168884, "rewards/margins": 0.17450013756752014, "rewards/rejected": -0.620358943939209, "step": 6170 }, { "epoch": 0.74, "learning_rate": 9.510012039959632e-07, "logits/chosen": -1.9944250583648682, "logits/rejected": -1.6469684839248657, "logps/chosen": -285.20440673828125, "logps/rejected": -260.31121826171875, "loss": 0.1213, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.4487648904323578, "rewards/margins": 0.14048054814338684, "rewards/rejected": -0.5892454981803894, "step": 6180 }, { "epoch": 0.74, "learning_rate": 9.427962663152821e-07, "logits/chosen": -1.9396718740463257, "logits/rejected": -1.632127046585083, "logps/chosen": -305.9094543457031, "logps/rejected": -257.2548828125, "loss": 0.1065, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.46664246916770935, "rewards/margins": 0.15920694172382355, "rewards/rejected": -0.6258494257926941, "step": 6190 }, { "epoch": 0.74, "learning_rate": 9.346186439497778e-07, "logits/chosen": -1.9716598987579346, "logits/rejected": -1.637711524963379, "logps/chosen": -238.27755737304688, "logps/rejected": -227.07540893554688, "loss": 0.1825, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.43801671266555786, "rewards/margins": 0.13967742025852203, "rewards/rejected": -0.5776941180229187, "step": 6200 }, { "epoch": 0.75, "learning_rate": 9.264684803452484e-07, "logits/chosen": -1.9573974609375, "logits/rejected": -1.6610110998153687, "logps/chosen": -292.9803771972656, "logps/rejected": -284.8642578125, "loss": 0.1376, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.4125700891017914, "rewards/margins": 0.1055794209241867, "rewards/rejected": -0.5181494951248169, "step": 6210 }, { "epoch": 0.75, "learning_rate": 9.183459184658317e-07, "logits/chosen": -1.8743131160736084, "logits/rejected": -1.5913054943084717, "logps/chosen": -259.7023010253906, "logps/rejected": -258.9094543457031, "loss": 0.105, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.45578956604003906, "rewards/margins": 0.11757218837738037, "rewards/rejected": -0.5733617544174194, "step": 6220 }, { "epoch": 0.75, "learning_rate": 9.102511007914924e-07, "logits/chosen": -1.9219213724136353, "logits/rejected": -1.4633852243423462, "logps/chosen": -210.48403930664062, "logps/rejected": -200.42410278320312, "loss": 0.126, "rewards/accuracies": 0.75, "rewards/chosen": -0.4389539659023285, "rewards/margins": 0.202097088098526, "rewards/rejected": -0.6410510540008545, "step": 6230 }, { "epoch": 0.75, "learning_rate": 9.021841693155343e-07, "logits/chosen": -2.061584234237671, "logits/rejected": -1.6734384298324585, "logps/chosen": -264.5237121582031, "logps/rejected": -252.5099334716797, "loss": 0.1233, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.39105328917503357, "rewards/margins": 0.18605628609657288, "rewards/rejected": -0.5771095752716064, "step": 6240 }, { "epoch": 0.75, "learning_rate": 8.94145265542094e-07, "logits/chosen": -2.173652172088623, "logits/rejected": -1.8842623233795166, "logps/chosen": -311.49774169921875, "logps/rejected": -285.50506591796875, "loss": 0.0915, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3695235252380371, "rewards/margins": 0.18801763653755188, "rewards/rejected": -0.5575411915779114, "step": 6250 }, { "epoch": 0.75, "learning_rate": 8.861345304836727e-07, "logits/chosen": -1.937359094619751, "logits/rejected": -1.8544925451278687, "logps/chosen": -278.1236877441406, "logps/rejected": -304.86962890625, "loss": 0.1114, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.5378960371017456, "rewards/margins": 0.09365091472864151, "rewards/rejected": -0.6315470933914185, "step": 6260 }, { "epoch": 0.75, "learning_rate": 8.781521046586541e-07, "logits/chosen": -1.9989734888076782, "logits/rejected": -1.5364919900894165, "logps/chosen": -244.04989624023438, "logps/rejected": -233.1022491455078, "loss": 0.136, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.4168265461921692, "rewards/margins": 0.18471379578113556, "rewards/rejected": -0.6015402674674988, "step": 6270 }, { "epoch": 0.75, "learning_rate": 8.701981280888444e-07, "logits/chosen": -1.8424322605133057, "logits/rejected": -1.6274335384368896, "logps/chosen": -247.665771484375, "logps/rejected": -262.58160400390625, "loss": 0.1512, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3954276442527771, "rewards/margins": 0.15929386019706726, "rewards/rejected": -0.554721474647522, "step": 6280 }, { "epoch": 0.75, "learning_rate": 8.622727402970097e-07, "logits/chosen": -1.7672450542449951, "logits/rejected": -1.7314002513885498, "logps/chosen": -255.74935913085938, "logps/rejected": -306.3473205566406, "loss": 0.0826, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.47890645265579224, "rewards/margins": 0.13192898035049438, "rewards/rejected": -0.6108353734016418, "step": 6290 }, { "epoch": 0.76, "learning_rate": 8.543760803044393e-07, "logits/chosen": -1.9199352264404297, "logits/rejected": -1.465319275856018, "logps/chosen": -249.533447265625, "logps/rejected": -250.07919311523438, "loss": 0.1434, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.4657590389251709, "rewards/margins": 0.20879296958446503, "rewards/rejected": -0.6745520830154419, "step": 6300 }, { "epoch": 0.76, "learning_rate": 8.465082866284951e-07, "logits/chosen": -2.023472547531128, "logits/rejected": -1.5475150346755981, "logps/chosen": -259.3255310058594, "logps/rejected": -234.91415405273438, "loss": 0.1315, "rewards/accuracies": 0.75, "rewards/chosen": -0.4470517635345459, "rewards/margins": 0.19469048082828522, "rewards/rejected": -0.6417423486709595, "step": 6310 }, { "epoch": 0.76, "learning_rate": 8.386694972801904e-07, "logits/chosen": -1.8993467092514038, "logits/rejected": -1.540050745010376, "logps/chosen": -270.31182861328125, "logps/rejected": -249.36618041992188, "loss": 0.1409, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.4989251494407654, "rewards/margins": 0.16831564903259277, "rewards/rejected": -0.6672407984733582, "step": 6320 }, { "epoch": 0.76, "learning_rate": 8.308598497617648e-07, "logits/chosen": -1.906795859336853, "logits/rejected": -1.6299690008163452, "logps/chosen": -175.07479858398438, "logps/rejected": -191.7357635498047, "loss": 0.0714, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.4141884446144104, "rewards/margins": 0.1404605209827423, "rewards/rejected": -0.5546489953994751, "step": 6330 }, { "epoch": 0.76, "learning_rate": 8.230794810642753e-07, "logits/chosen": -1.9722293615341187, "logits/rejected": -1.5651670694351196, "logps/chosen": -290.1768798828125, "logps/rejected": -253.2939910888672, "loss": 0.1167, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.4297245442867279, "rewards/margins": 0.1225515827536583, "rewards/rejected": -0.5522761344909668, "step": 6340 }, { "epoch": 0.76, "learning_rate": 8.153285276651876e-07, "logits/chosen": -2.1099610328674316, "logits/rejected": -1.8138777017593384, "logps/chosen": -228.72891235351562, "logps/rejected": -261.8756103515625, "loss": 0.0897, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.36999136209487915, "rewards/margins": 0.14173254370689392, "rewards/rejected": -0.5117239356040955, "step": 6350 }, { "epoch": 0.76, "learning_rate": 8.076071255259918e-07, "logits/chosen": -1.9893842935562134, "logits/rejected": -1.562839150428772, "logps/chosen": -248.32656860351562, "logps/rejected": -230.19082641601562, "loss": 0.1012, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.4931401312351227, "rewards/margins": 0.16243189573287964, "rewards/rejected": -0.6555719971656799, "step": 6360 }, { "epoch": 0.76, "learning_rate": 7.999154100898063e-07, "logits/chosen": -1.8441355228424072, "logits/rejected": -1.6876119375228882, "logps/chosen": -210.8905792236328, "logps/rejected": -267.10882568359375, "loss": 0.0825, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.513385534286499, "rewards/margins": 0.17421071231365204, "rewards/rejected": -0.6875962018966675, "step": 6370 }, { "epoch": 0.77, "learning_rate": 7.922535162790095e-07, "logits/chosen": -2.086892604827881, "logits/rejected": -1.8276185989379883, "logps/chosen": -234.5299072265625, "logps/rejected": -250.52059936523438, "loss": 0.1293, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.4303308129310608, "rewards/margins": 0.16687455773353577, "rewards/rejected": -0.5972053408622742, "step": 6380 }, { "epoch": 0.77, "learning_rate": 7.846215784928721e-07, "logits/chosen": -2.0581459999084473, "logits/rejected": -1.7542043924331665, "logps/chosen": -229.604736328125, "logps/rejected": -245.43185424804688, "loss": 0.1118, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.43111807107925415, "rewards/margins": 0.1881767064332962, "rewards/rejected": -0.6192947626113892, "step": 6390 }, { "epoch": 0.77, "learning_rate": 7.770197306051968e-07, "logits/chosen": -2.1675782203674316, "logits/rejected": -1.4943665266036987, "logps/chosen": -267.9649963378906, "logps/rejected": -236.1041259765625, "loss": 0.1182, "rewards/accuracies": 0.875, "rewards/chosen": -0.42259639501571655, "rewards/margins": 0.24217908084392548, "rewards/rejected": -0.664775550365448, "step": 6400 }, { "epoch": 0.77, "learning_rate": 7.694481059619705e-07, "logits/chosen": -1.932381272315979, "logits/rejected": -1.6388800144195557, "logps/chosen": -246.7991943359375, "logps/rejected": -249.91943359375, "loss": 0.1273, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.4872209429740906, "rewards/margins": 0.18630096316337585, "rewards/rejected": -0.6735219955444336, "step": 6410 }, { "epoch": 0.77, "learning_rate": 7.619068373790306e-07, "logits/chosen": -2.089247226715088, "logits/rejected": -1.614682912826538, "logps/chosen": -282.35125732421875, "logps/rejected": -271.75067138671875, "loss": 0.129, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.44997042417526245, "rewards/margins": 0.1696648895740509, "rewards/rejected": -0.6196353435516357, "step": 6420 }, { "epoch": 0.77, "learning_rate": 7.543960571397257e-07, "logits/chosen": -2.0346500873565674, "logits/rejected": -1.7321970462799072, "logps/chosen": -237.32608032226562, "logps/rejected": -245.86557006835938, "loss": 0.0754, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.46555739641189575, "rewards/margins": 0.16662751138210297, "rewards/rejected": -0.6321848630905151, "step": 6430 }, { "epoch": 0.77, "learning_rate": 7.469158969926038e-07, "logits/chosen": -2.0735549926757812, "logits/rejected": -1.6884253025054932, "logps/chosen": -263.26898193359375, "logps/rejected": -264.8492126464844, "loss": 0.074, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.4788509011268616, "rewards/margins": 0.14806295931339264, "rewards/rejected": -0.6269139051437378, "step": 6440 }, { "epoch": 0.77, "learning_rate": 7.39466488149097e-07, "logits/chosen": -2.0731894969940186, "logits/rejected": -1.5555330514907837, "logps/chosen": -243.86892700195312, "logps/rejected": -223.0145721435547, "loss": 0.1347, "rewards/accuracies": 0.75, "rewards/chosen": -0.41140609979629517, "rewards/margins": 0.2104901522397995, "rewards/rejected": -0.6218962073326111, "step": 6450 }, { "epoch": 0.78, "learning_rate": 7.320479612812218e-07, "logits/chosen": -2.017112970352173, "logits/rejected": -1.5632555484771729, "logps/chosen": -207.18191528320312, "logps/rejected": -199.11024475097656, "loss": 0.0694, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3916897475719452, "rewards/margins": 0.14873406291007996, "rewards/rejected": -0.5404238700866699, "step": 6460 }, { "epoch": 0.78, "learning_rate": 7.246604465192825e-07, "logits/chosen": -1.9492496252059937, "logits/rejected": -1.3995827436447144, "logps/chosen": -259.1231994628906, "logps/rejected": -208.67135620117188, "loss": 0.124, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.49213799834251404, "rewards/margins": 0.2363675832748413, "rewards/rejected": -0.7285054922103882, "step": 6470 }, { "epoch": 0.78, "learning_rate": 7.173040734495973e-07, "logits/chosen": -1.8647918701171875, "logits/rejected": -1.5196516513824463, "logps/chosen": -291.6728210449219, "logps/rejected": -321.27923583984375, "loss": 0.134, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.504711925983429, "rewards/margins": 0.14177486300468445, "rewards/rejected": -0.646486759185791, "step": 6480 }, { "epoch": 0.78, "learning_rate": 7.099789711122149e-07, "logits/chosen": -2.0234179496765137, "logits/rejected": -1.6390674114227295, "logps/chosen": -296.9944763183594, "logps/rejected": -274.94488525390625, "loss": 0.1205, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.4782020151615143, "rewards/margins": 0.15751081705093384, "rewards/rejected": -0.6357128024101257, "step": 6490 }, { "epoch": 0.78, "learning_rate": 7.02685267998659e-07, "logits/chosen": -1.8039512634277344, "logits/rejected": -1.610396146774292, "logps/chosen": -217.3019256591797, "logps/rejected": -230.8792724609375, "loss": 0.1698, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.42607298493385315, "rewards/margins": 0.10820464789867401, "rewards/rejected": -0.534277617931366, "step": 6500 }, { "epoch": 0.78, "learning_rate": 6.954230920496702e-07, "logits/chosen": -1.9449794292449951, "logits/rejected": -1.672254204750061, "logps/chosen": -209.4949188232422, "logps/rejected": -234.17465209960938, "loss": 0.0954, "rewards/accuracies": 0.75, "rewards/chosen": -0.5420662760734558, "rewards/margins": 0.2034389078617096, "rewards/rejected": -0.7455052733421326, "step": 6510 }, { "epoch": 0.78, "learning_rate": 6.881925706529641e-07, "logits/chosen": -2.1921558380126953, "logits/rejected": -1.6133739948272705, "logps/chosen": -253.46450805664062, "logps/rejected": -225.8391571044922, "loss": 0.0943, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.47693949937820435, "rewards/margins": 0.17005819082260132, "rewards/rejected": -0.6469976305961609, "step": 6520 }, { "epoch": 0.78, "learning_rate": 6.809938306409925e-07, "logits/chosen": -1.8478351831436157, "logits/rejected": -1.6184707880020142, "logps/chosen": -254.103515625, "logps/rejected": -244.87991333007812, "loss": 0.0911, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.474212646484375, "rewards/margins": 0.16678480803966522, "rewards/rejected": -0.6409973502159119, "step": 6530 }, { "epoch": 0.78, "learning_rate": 6.738269982887266e-07, "logits/chosen": -2.0551493167877197, "logits/rejected": -1.6460390090942383, "logps/chosen": -320.0547790527344, "logps/rejected": -268.46875, "loss": 0.1129, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.4602360725402832, "rewards/margins": 0.18921080231666565, "rewards/rejected": -0.6494468450546265, "step": 6540 }, { "epoch": 0.79, "learning_rate": 6.66692199311432e-07, "logits/chosen": -2.0328869819641113, "logits/rejected": -1.675079584121704, "logps/chosen": -289.8661193847656, "logps/rejected": -240.75991821289062, "loss": 0.1209, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.42221537232398987, "rewards/margins": 0.1419004648923874, "rewards/rejected": -0.5641158819198608, "step": 6550 }, { "epoch": 0.79, "learning_rate": 6.595895588624717e-07, "logits/chosen": -2.185662269592285, "logits/rejected": -1.7335224151611328, "logps/chosen": -257.2197570800781, "logps/rejected": -243.74807739257812, "loss": 0.1239, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.36936455965042114, "rewards/margins": 0.1940496563911438, "rewards/rejected": -0.5634142756462097, "step": 6560 }, { "epoch": 0.79, "learning_rate": 6.525192015311069e-07, "logits/chosen": -2.069929599761963, "logits/rejected": -1.7172218561172485, "logps/chosen": -269.12603759765625, "logps/rejected": -264.0863952636719, "loss": 0.085, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.456916481256485, "rewards/margins": 0.16084381937980652, "rewards/rejected": -0.6177603006362915, "step": 6570 }, { "epoch": 0.79, "learning_rate": 6.454812513403127e-07, "logits/chosen": -2.1930034160614014, "logits/rejected": -1.7271572351455688, "logps/chosen": -229.4690399169922, "logps/rejected": -206.50888061523438, "loss": 0.105, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.4114112854003906, "rewards/margins": 0.1248263344168663, "rewards/rejected": -0.5362376570701599, "step": 6580 }, { "epoch": 0.79, "learning_rate": 6.384758317445991e-07, "logits/chosen": -1.9955850839614868, "logits/rejected": -1.3841392993927002, "logps/chosen": -287.55853271484375, "logps/rejected": -227.1190948486328, "loss": 0.1237, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.39280954003334045, "rewards/margins": 0.25686582922935486, "rewards/rejected": -0.6496754288673401, "step": 6590 }, { "epoch": 0.79, "learning_rate": 6.31503065627854e-07, "logits/chosen": -1.863050103187561, "logits/rejected": -1.605564832687378, "logps/chosen": -265.7858581542969, "logps/rejected": -288.51629638671875, "loss": 0.1061, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.48526960611343384, "rewards/margins": 0.19922366738319397, "rewards/rejected": -0.684493362903595, "step": 6600 }, { "epoch": 0.79, "learning_rate": 6.245630753011767e-07, "logits/chosen": -2.018514633178711, "logits/rejected": -1.6095269918441772, "logps/chosen": -293.40185546875, "logps/rejected": -261.62066650390625, "loss": 0.1041, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.39809244871139526, "rewards/margins": 0.2280418872833252, "rewards/rejected": -0.6261343955993652, "step": 6610 }, { "epoch": 0.79, "learning_rate": 6.176559825007408e-07, "logits/chosen": -2.115142345428467, "logits/rejected": -1.8222726583480835, "logps/chosen": -286.07147216796875, "logps/rejected": -268.15667724609375, "loss": 0.0929, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.4131897985935211, "rewards/margins": 0.17083369195461273, "rewards/rejected": -0.5840234756469727, "step": 6620 }, { "epoch": 0.8, "learning_rate": 6.107819083856559e-07, "logits/chosen": -2.1033377647399902, "logits/rejected": -1.6299479007720947, "logps/chosen": -292.8658142089844, "logps/rejected": -253.60104370117188, "loss": 0.1869, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.4031642973423004, "rewards/margins": 0.14742298424243927, "rewards/rejected": -0.5505872964859009, "step": 6630 }, { "epoch": 0.8, "learning_rate": 6.039409735358418e-07, "logits/chosen": -1.938940405845642, "logits/rejected": -1.6774669885635376, "logps/chosen": -264.48992919921875, "logps/rejected": -243.3132781982422, "loss": 0.0917, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.4583476483821869, "rewards/margins": 0.227961927652359, "rewards/rejected": -0.6863095164299011, "step": 6640 }, { "epoch": 0.8, "learning_rate": 5.971332979499112e-07, "logits/chosen": -1.9997894763946533, "logits/rejected": -1.6866439580917358, "logps/chosen": -228.4779815673828, "logps/rejected": -202.27304077148438, "loss": 0.0927, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.4338861405849457, "rewards/margins": 0.186149001121521, "rewards/rejected": -0.6200351715087891, "step": 6650 }, { "epoch": 0.8, "learning_rate": 5.903590010430732e-07, "logits/chosen": -1.9610633850097656, "logits/rejected": -1.4865853786468506, "logps/chosen": -247.3853302001953, "logps/rejected": -217.017333984375, "loss": 0.1559, "rewards/accuracies": 0.625, "rewards/chosen": -0.4397171139717102, "rewards/margins": 0.16036757826805115, "rewards/rejected": -0.600084662437439, "step": 6660 }, { "epoch": 0.8, "learning_rate": 5.836182016450273e-07, "logits/chosen": -1.881838083267212, "logits/rejected": -1.6120822429656982, "logps/chosen": -277.1618957519531, "logps/rejected": -234.37094116210938, "loss": 0.1946, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.4851130545139313, "rewards/margins": 0.14165827631950378, "rewards/rejected": -0.6267713308334351, "step": 6670 }, { "epoch": 0.8, "learning_rate": 5.769110179978874e-07, "logits/chosen": -2.084548234939575, "logits/rejected": -1.870194435119629, "logps/chosen": -213.6473388671875, "logps/rejected": -241.52255249023438, "loss": 0.1835, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.41156840324401855, "rewards/margins": 0.1030372828245163, "rewards/rejected": -0.514605700969696, "step": 6680 }, { "epoch": 0.8, "learning_rate": 5.702375677541037e-07, "logits/chosen": -1.8728317022323608, "logits/rejected": -1.7525784969329834, "logps/chosen": -246.52294921875, "logps/rejected": -250.93368530273438, "loss": 0.0981, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.4282185435295105, "rewards/margins": 0.15069648623466492, "rewards/rejected": -0.578914999961853, "step": 6690 }, { "epoch": 0.8, "learning_rate": 5.635979679744006e-07, "logits/chosen": -1.756136178970337, "logits/rejected": -1.4906980991363525, "logps/chosen": -233.78359985351562, "logps/rejected": -230.1853485107422, "loss": 0.0827, "rewards/accuracies": 0.625, "rewards/chosen": -0.4131496548652649, "rewards/margins": 0.11998526751995087, "rewards/rejected": -0.533134937286377, "step": 6700 }, { "epoch": 0.81, "learning_rate": 5.569923351257223e-07, "logits/chosen": -1.9852508306503296, "logits/rejected": -1.7096633911132812, "logps/chosen": -241.21292114257812, "logps/rejected": -279.4994201660156, "loss": 0.1367, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.38888391852378845, "rewards/margins": 0.18084433674812317, "rewards/rejected": -0.5697282552719116, "step": 6710 }, { "epoch": 0.81, "learning_rate": 5.504207850791912e-07, "logits/chosen": -1.9846687316894531, "logits/rejected": -1.4948934316635132, "logps/chosen": -277.4078674316406, "logps/rejected": -225.47042846679688, "loss": 0.1489, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.4744594097137451, "rewards/margins": 0.17791931331157684, "rewards/rejected": -0.6523788571357727, "step": 6720 }, { "epoch": 0.81, "learning_rate": 5.438834331080725e-07, "logits/chosen": -2.0223140716552734, "logits/rejected": -1.8239043951034546, "logps/chosen": -246.28005981445312, "logps/rejected": -258.4651794433594, "loss": 0.145, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.43237772583961487, "rewards/margins": 0.14295579493045807, "rewards/rejected": -0.5753334760665894, "step": 6730 }, { "epoch": 0.81, "learning_rate": 5.373803938857558e-07, "logits/chosen": -1.9347299337387085, "logits/rejected": -1.681318998336792, "logps/chosen": -266.93133544921875, "logps/rejected": -230.0156707763672, "loss": 0.2025, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.5939790606498718, "rewards/margins": 0.11487730592489243, "rewards/rejected": -0.708856463432312, "step": 6740 }, { "epoch": 0.81, "learning_rate": 5.309117814837409e-07, "logits/chosen": -2.084141731262207, "logits/rejected": -1.590496301651001, "logps/chosen": -245.8157501220703, "logps/rejected": -212.6662139892578, "loss": 0.1119, "rewards/accuracies": 0.625, "rewards/chosen": -0.33361050486564636, "rewards/margins": 0.17130649089813232, "rewards/rejected": -0.5049170255661011, "step": 6750 }, { "epoch": 0.81, "learning_rate": 5.244777093696385e-07, "logits/chosen": -2.146206855773926, "logits/rejected": -1.6665055751800537, "logps/chosen": -225.45962524414062, "logps/rejected": -228.45425415039062, "loss": 0.0773, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.4145995080471039, "rewards/margins": 0.2185206413269043, "rewards/rejected": -0.6331201791763306, "step": 6760 }, { "epoch": 0.81, "learning_rate": 5.180782904051787e-07, "logits/chosen": -1.8693435192108154, "logits/rejected": -1.7655032873153687, "logps/chosen": -248.69705200195312, "logps/rejected": -256.6230773925781, "loss": 0.1171, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.44969773292541504, "rewards/margins": 0.13754042983055115, "rewards/rejected": -0.5872381925582886, "step": 6770 }, { "epoch": 0.81, "learning_rate": 5.117136368442322e-07, "logits/chosen": -1.9687871932983398, "logits/rejected": -1.5913883447647095, "logps/chosen": -216.0896759033203, "logps/rejected": -212.8240203857422, "loss": 0.1145, "rewards/accuracies": 0.625, "rewards/chosen": -0.47843655943870544, "rewards/margins": 0.14962342381477356, "rewards/rejected": -0.628059983253479, "step": 6780 }, { "epoch": 0.81, "learning_rate": 5.053838603308403e-07, "logits/chosen": -2.2426624298095703, "logits/rejected": -1.8825994729995728, "logps/chosen": -305.18011474609375, "logps/rejected": -311.623046875, "loss": 0.1362, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.4912681579589844, "rewards/margins": 0.1363973617553711, "rewards/rejected": -0.6276654601097107, "step": 6790 }, { "epoch": 0.82, "learning_rate": 4.99089071897256e-07, "logits/chosen": -1.974311113357544, "logits/rejected": -1.5755977630615234, "logps/chosen": -247.4668731689453, "logps/rejected": -246.3483123779297, "loss": 0.1313, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.4030587077140808, "rewards/margins": 0.2043962925672531, "rewards/rejected": -0.6074550151824951, "step": 6800 }, { "epoch": 0.82, "learning_rate": 4.92829381961999e-07, "logits/chosen": -1.7325855493545532, "logits/rejected": -1.5258713960647583, "logps/chosen": -243.26107788085938, "logps/rejected": -243.32833862304688, "loss": 0.0918, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.47821131348609924, "rewards/margins": 0.18177253007888794, "rewards/rejected": -0.6599838733673096, "step": 6810 }, { "epoch": 0.82, "learning_rate": 4.866049003279163e-07, "logits/chosen": -1.9763206243515015, "logits/rejected": -1.603864073753357, "logps/chosen": -245.575439453125, "logps/rejected": -217.13046264648438, "loss": 0.1529, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.4021136164665222, "rewards/margins": 0.12271346896886826, "rewards/rejected": -0.5248271226882935, "step": 6820 }, { "epoch": 0.82, "learning_rate": 4.80415736180257e-07, "logits/chosen": -1.7185128927230835, "logits/rejected": -1.5524407625198364, "logps/chosen": -206.218017578125, "logps/rejected": -224.42819213867188, "loss": 0.1003, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.502429187297821, "rewards/margins": 0.15189214050769806, "rewards/rejected": -0.6543213129043579, "step": 6830 }, { "epoch": 0.82, "learning_rate": 4.7426199808475735e-07, "logits/chosen": -1.9034755229949951, "logits/rejected": -1.6419847011566162, "logps/chosen": -266.4277648925781, "logps/rejected": -252.0305633544922, "loss": 0.1589, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.46889528632164, "rewards/margins": 0.16565118730068207, "rewards/rejected": -0.6345464587211609, "step": 6840 }, { "epoch": 0.82, "learning_rate": 4.6814379398573613e-07, "logits/chosen": -1.9863262176513672, "logits/rejected": -1.6904399394989014, "logps/chosen": -288.7919921875, "logps/rejected": -317.627197265625, "loss": 0.1441, "rewards/accuracies": 0.75, "rewards/chosen": -0.4666666090488434, "rewards/margins": 0.15177717804908752, "rewards/rejected": -0.6184438467025757, "step": 6850 }, { "epoch": 0.82, "learning_rate": 4.6206123120419944e-07, "logits/chosen": -1.7895174026489258, "logits/rejected": -1.514021635055542, "logps/chosen": -262.0445251464844, "logps/rejected": -283.6744384765625, "loss": 0.1085, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.49015116691589355, "rewards/margins": 0.18592293560504913, "rewards/rejected": -0.6760741472244263, "step": 6860 }, { "epoch": 0.82, "learning_rate": 4.5601441643596145e-07, "logits/chosen": -1.9775257110595703, "logits/rejected": -1.5629953145980835, "logps/chosen": -260.10284423828125, "logps/rejected": -243.96127319335938, "loss": 0.126, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.4571099877357483, "rewards/margins": 0.18302378058433533, "rewards/rejected": -0.6401337385177612, "step": 6870 }, { "epoch": 0.83, "learning_rate": 4.500034557497709e-07, "logits/chosen": -1.863673448562622, "logits/rejected": -1.4204473495483398, "logps/chosen": -292.68621826171875, "logps/rejected": -247.7226104736328, "loss": 0.1288, "rewards/accuracies": 0.75, "rewards/chosen": -0.5655902624130249, "rewards/margins": 0.15709365904331207, "rewards/rejected": -0.7226839661598206, "step": 6880 }, { "epoch": 0.83, "learning_rate": 4.4402845458545037e-07, "logits/chosen": -1.9163280725479126, "logits/rejected": -1.6178480386734009, "logps/chosen": -260.6136169433594, "logps/rejected": -262.7823181152344, "loss": 0.0979, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.5299848318099976, "rewards/margins": 0.15895573794841766, "rewards/rejected": -0.6889406442642212, "step": 6890 }, { "epoch": 0.83, "learning_rate": 4.380895177520475e-07, "logits/chosen": -2.119663953781128, "logits/rejected": -1.4375776052474976, "logps/chosen": -320.13934326171875, "logps/rejected": -282.3840026855469, "loss": 0.1025, "rewards/accuracies": 0.75, "rewards/chosen": -0.48634710907936096, "rewards/margins": 0.1542474776506424, "rewards/rejected": -0.6405946612358093, "step": 6900 }, { "epoch": 0.83, "learning_rate": 4.3218674942599655e-07, "logits/chosen": -1.989381194114685, "logits/rejected": -1.6404857635498047, "logps/chosen": -266.8316955566406, "logps/rejected": -250.7163848876953, "loss": 0.1453, "rewards/accuracies": 0.75, "rewards/chosen": -0.3762025237083435, "rewards/margins": 0.18678084015846252, "rewards/rejected": -0.5629833936691284, "step": 6910 }, { "epoch": 0.83, "learning_rate": 4.263202531492877e-07, "logits/chosen": -1.9917621612548828, "logits/rejected": -1.7141485214233398, "logps/chosen": -256.47882080078125, "logps/rejected": -231.6102752685547, "loss": 0.1329, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.5046517252922058, "rewards/margins": 0.1110520213842392, "rewards/rejected": -0.6157038807868958, "step": 6920 }, { "epoch": 0.83, "learning_rate": 4.204901318276586e-07, "logits/chosen": -2.0761396884918213, "logits/rejected": -1.6558525562286377, "logps/chosen": -301.8915710449219, "logps/rejected": -294.1426086425781, "loss": 0.4635, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.49290648102760315, "rewards/margins": 0.2808048725128174, "rewards/rejected": -0.7737113237380981, "step": 6930 }, { "epoch": 0.83, "learning_rate": 4.146964877287804e-07, "logits/chosen": -2.0482866764068604, "logits/rejected": -1.5608055591583252, "logps/chosen": -353.6920166015625, "logps/rejected": -292.26177978515625, "loss": 0.1426, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.44764071702957153, "rewards/margins": 0.165061354637146, "rewards/rejected": -0.6127020716667175, "step": 6940 }, { "epoch": 0.83, "learning_rate": 4.089394224804691e-07, "logits/chosen": -2.0461716651916504, "logits/rejected": -1.643463373184204, "logps/chosen": -236.15139770507812, "logps/rejected": -208.002685546875, "loss": 0.1336, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.4394107460975647, "rewards/margins": 0.15235844254493713, "rewards/rejected": -0.5917690992355347, "step": 6950 }, { "epoch": 0.84, "learning_rate": 4.032190370689018e-07, "logits/chosen": -2.036041021347046, "logits/rejected": -1.5846761465072632, "logps/chosen": -270.18865966796875, "logps/rejected": -221.50357055664062, "loss": 0.1346, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.48520708084106445, "rewards/margins": 0.1285451054573059, "rewards/rejected": -0.6137521862983704, "step": 6960 }, { "epoch": 0.84, "learning_rate": 3.9753543183684573e-07, "logits/chosen": -1.8880681991577148, "logits/rejected": -1.741532564163208, "logps/chosen": -290.96209716796875, "logps/rejected": -323.4405212402344, "loss": 0.1471, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.5335294008255005, "rewards/margins": 0.1267957240343094, "rewards/rejected": -0.6603251099586487, "step": 6970 }, { "epoch": 0.84, "learning_rate": 3.9188870648189437e-07, "logits/chosen": -2.0555896759033203, "logits/rejected": -1.7752418518066406, "logps/chosen": -273.6790466308594, "logps/rejected": -279.67437744140625, "loss": 0.1747, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.5123944282531738, "rewards/margins": 0.10983245074748993, "rewards/rejected": -0.622226893901825, "step": 6980 }, { "epoch": 0.84, "learning_rate": 3.862789600547268e-07, "logits/chosen": -2.072603702545166, "logits/rejected": -1.5208203792572021, "logps/chosen": -228.24551391601562, "logps/rejected": -198.6292724609375, "loss": 0.1493, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.46002787351608276, "rewards/margins": 0.15952149033546448, "rewards/rejected": -0.6195493936538696, "step": 6990 }, { "epoch": 0.84, "learning_rate": 3.8070629095736e-07, "logits/chosen": -2.0098140239715576, "logits/rejected": -1.8628854751586914, "logps/chosen": -278.3236389160156, "logps/rejected": -290.70135498046875, "loss": 0.1144, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.47310394048690796, "rewards/margins": 0.1412026733160019, "rewards/rejected": -0.6143065690994263, "step": 7000 }, { "epoch": 0.84, "learning_rate": 3.7517079694143145e-07, "logits/chosen": -1.8572998046875, "logits/rejected": -1.6057090759277344, "logps/chosen": -219.03427124023438, "logps/rejected": -238.3125762939453, "loss": 0.1321, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3572883903980255, "rewards/margins": 0.18927468359470367, "rewards/rejected": -0.5465630292892456, "step": 7010 }, { "epoch": 0.84, "learning_rate": 3.696725751064778e-07, "logits/chosen": -1.8692944049835205, "logits/rejected": -1.6396774053573608, "logps/chosen": -249.4277801513672, "logps/rejected": -236.60397338867188, "loss": 0.1562, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.41915005445480347, "rewards/margins": 0.17370714247226715, "rewards/rejected": -0.5928572416305542, "step": 7020 }, { "epoch": 0.84, "learning_rate": 3.6421172189823884e-07, "logits/chosen": -2.1776063442230225, "logits/rejected": -1.8611905574798584, "logps/chosen": -308.03680419921875, "logps/rejected": -264.7606506347656, "loss": 0.1133, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.44754162430763245, "rewards/margins": 0.1368330419063568, "rewards/rejected": -0.5843747854232788, "step": 7030 }, { "epoch": 0.84, "learning_rate": 3.587883331069575e-07, "logits/chosen": -1.870141625404358, "logits/rejected": -1.6269527673721313, "logps/chosen": -300.21435546875, "logps/rejected": -270.22515869140625, "loss": 0.1038, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.5208837389945984, "rewards/margins": 0.10919564962387085, "rewards/rejected": -0.630079448223114, "step": 7040 }, { "epoch": 0.85, "learning_rate": 3.5340250386570547e-07, "logits/chosen": -1.9647839069366455, "logits/rejected": -1.6906397342681885, "logps/chosen": -289.46002197265625, "logps/rejected": -272.2466735839844, "loss": 0.1131, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.5417642593383789, "rewards/margins": 0.15753653645515442, "rewards/rejected": -0.6993007063865662, "step": 7050 }, { "epoch": 0.85, "learning_rate": 3.480543286487126e-07, "logits/chosen": -2.036736249923706, "logits/rejected": -1.7443698644638062, "logps/chosen": -250.82235717773438, "logps/rejected": -267.0654602050781, "loss": 0.111, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.4719162881374359, "rewards/margins": 0.19385971128940582, "rewards/rejected": -0.6657760143280029, "step": 7060 }, { "epoch": 0.85, "learning_rate": 3.4274390126971035e-07, "logits/chosen": -1.9663559198379517, "logits/rejected": -1.701615571975708, "logps/chosen": -236.70419311523438, "logps/rejected": -215.01290893554688, "loss": 0.2167, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.4362650513648987, "rewards/margins": 0.10641946643590927, "rewards/rejected": -0.5426844358444214, "step": 7070 }, { "epoch": 0.85, "learning_rate": 3.374713148802827e-07, "logits/chosen": -2.056093692779541, "logits/rejected": -1.5538482666015625, "logps/chosen": -264.41412353515625, "logps/rejected": -251.5968780517578, "loss": 0.1848, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.46888089179992676, "rewards/margins": 0.1688876450061798, "rewards/rejected": -0.6377686262130737, "step": 7080 }, { "epoch": 0.85, "learning_rate": 3.3223666196823963e-07, "logits/chosen": -2.1422367095947266, "logits/rejected": -1.6432521343231201, "logps/chosen": -332.3525695800781, "logps/rejected": -257.8883361816406, "loss": 0.139, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.5275644063949585, "rewards/margins": 0.13949260115623474, "rewards/rejected": -0.6670569181442261, "step": 7090 }, { "epoch": 0.85, "learning_rate": 3.27040034355986e-07, "logits/chosen": -1.8350646495819092, "logits/rejected": -1.7812795639038086, "logps/chosen": -248.4701690673828, "logps/rejected": -259.24176025390625, "loss": 0.1682, "rewards/accuracies": 0.75, "rewards/chosen": -0.523715615272522, "rewards/margins": 0.1881420910358429, "rewards/rejected": -0.7118576765060425, "step": 7100 }, { "epoch": 0.85, "learning_rate": 3.218815231989167e-07, "logits/chosen": -2.013810157775879, "logits/rejected": -1.7800830602645874, "logps/chosen": -269.4825744628906, "logps/rejected": -250.37014770507812, "loss": 0.1172, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.47187668085098267, "rewards/margins": 0.1158447265625, "rewards/rejected": -0.5877213478088379, "step": 7110 }, { "epoch": 0.85, "learning_rate": 3.1676121898381597e-07, "logits/chosen": -1.7077372074127197, "logits/rejected": -1.537479281425476, "logps/chosen": -283.3507385253906, "logps/rejected": -296.3389892578125, "loss": 0.0994, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.4422314167022705, "rewards/margins": 0.15675051510334015, "rewards/rejected": -0.5989819765090942, "step": 7120 }, { "epoch": 0.86, "learning_rate": 3.1167921152727096e-07, "logits/chosen": -1.9554319381713867, "logits/rejected": -1.6862990856170654, "logps/chosen": -265.57684326171875, "logps/rejected": -236.5033416748047, "loss": 0.1686, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.41017451882362366, "rewards/margins": 0.1357969343662262, "rewards/rejected": -0.5459714531898499, "step": 7130 }, { "epoch": 0.86, "learning_rate": 3.066355899740925e-07, "logits/chosen": -1.9464342594146729, "logits/rejected": -1.6717723608016968, "logps/chosen": -255.412841796875, "logps/rejected": -277.73638916015625, "loss": 0.0925, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.48311883211135864, "rewards/margins": 0.16044536232948303, "rewards/rejected": -0.6435642242431641, "step": 7140 }, { "epoch": 0.86, "learning_rate": 3.0163044279575865e-07, "logits/chosen": -2.0800702571868896, "logits/rejected": -1.6157634258270264, "logps/chosen": -273.6175231933594, "logps/rejected": -198.8623809814453, "loss": 0.1335, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.4554726481437683, "rewards/margins": 0.1640358418226242, "rewards/rejected": -0.6195084452629089, "step": 7150 }, { "epoch": 0.86, "learning_rate": 2.966638577888548e-07, "logits/chosen": -1.9913661479949951, "logits/rejected": -1.7240318059921265, "logps/chosen": -269.86920166015625, "logps/rejected": -281.88836669921875, "loss": 0.1086, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.5008378028869629, "rewards/margins": 0.14693915843963623, "rewards/rejected": -0.6477769613265991, "step": 7160 }, { "epoch": 0.86, "learning_rate": 2.917359220735386e-07, "logits/chosen": -1.7666356563568115, "logits/rejected": -1.6807903051376343, "logps/chosen": -212.5333709716797, "logps/rejected": -204.54331970214844, "loss": 0.1653, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.5168853998184204, "rewards/margins": 0.08568285405635834, "rewards/rejected": -0.6025682687759399, "step": 7170 }, { "epoch": 0.86, "learning_rate": 2.8684672209201067e-07, "logits/chosen": -1.8947250843048096, "logits/rejected": -1.5195536613464355, "logps/chosen": -257.7271423339844, "logps/rejected": -226.953125, "loss": 0.1413, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.48005929589271545, "rewards/margins": 0.11954379081726074, "rewards/rejected": -0.5996031165122986, "step": 7180 }, { "epoch": 0.86, "learning_rate": 2.819963436069986e-07, "logits/chosen": -2.003467321395874, "logits/rejected": -1.6604106426239014, "logps/chosen": -301.46466064453125, "logps/rejected": -248.64669799804688, "loss": 0.1251, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.47957324981689453, "rewards/margins": 0.09554027020931244, "rewards/rejected": -0.5751134753227234, "step": 7190 }, { "epoch": 0.86, "learning_rate": 2.771848717002498e-07, "logits/chosen": -1.7738151550292969, "logits/rejected": -1.6896775960922241, "logps/chosen": -236.6964569091797, "logps/rejected": -270.410888671875, "loss": 0.1098, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.5385271310806274, "rewards/margins": 0.1548021137714386, "rewards/rejected": -0.6933292746543884, "step": 7200 }, { "epoch": 0.87, "learning_rate": 2.724123907710444e-07, "logits/chosen": -1.7751373052597046, "logits/rejected": -1.565288782119751, "logps/chosen": -195.97508239746094, "logps/rejected": -213.2833251953125, "loss": 0.1312, "rewards/accuracies": 0.75, "rewards/chosen": -0.466492235660553, "rewards/margins": 0.1685236245393753, "rewards/rejected": -0.6350158452987671, "step": 7210 }, { "epoch": 0.87, "learning_rate": 2.6767898453470886e-07, "logits/chosen": -2.038952350616455, "logits/rejected": -1.636182427406311, "logps/chosen": -237.19473266601562, "logps/rejected": -223.7322235107422, "loss": 0.1162, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.383628785610199, "rewards/margins": 0.1849764585494995, "rewards/rejected": -0.5686052441596985, "step": 7220 }, { "epoch": 0.87, "learning_rate": 2.629847360211518e-07, "logits/chosen": -1.9980814456939697, "logits/rejected": -1.6197658777236938, "logps/chosen": -247.9432830810547, "logps/rejected": -251.46630859375, "loss": 0.0962, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.4834599494934082, "rewards/margins": 0.15811513364315033, "rewards/rejected": -0.6415750980377197, "step": 7230 }, { "epoch": 0.87, "learning_rate": 2.5832972757340565e-07, "logits/chosen": -2.032080888748169, "logits/rejected": -1.8052698373794556, "logps/chosen": -242.31698608398438, "logps/rejected": -257.6308898925781, "loss": 0.142, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.44113120436668396, "rewards/margins": 0.11361245810985565, "rewards/rejected": -0.5547436475753784, "step": 7240 }, { "epoch": 0.87, "learning_rate": 2.53714040846183e-07, "logits/chosen": -1.9440562725067139, "logits/rejected": -1.5986840724945068, "logps/chosen": -284.2532958984375, "logps/rejected": -236.9690704345703, "loss": 0.1171, "rewards/accuracies": 0.75, "rewards/chosen": -0.3770473003387451, "rewards/margins": 0.23315231502056122, "rewards/rejected": -0.6101996302604675, "step": 7250 }, { "epoch": 0.87, "learning_rate": 2.491377568044434e-07, "logits/chosen": -2.0843589305877686, "logits/rejected": -1.6826503276824951, "logps/chosen": -314.3594665527344, "logps/rejected": -272.8559875488281, "loss": 0.0734, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.5402888059616089, "rewards/margins": 0.14350660145282745, "rewards/rejected": -0.6837953925132751, "step": 7260 }, { "epoch": 0.87, "learning_rate": 2.4460095572197476e-07, "logits/chosen": -2.0028257369995117, "logits/rejected": -1.6907081604003906, "logps/chosen": -238.27001953125, "logps/rejected": -250.3758544921875, "loss": 0.1027, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.4421817660331726, "rewards/margins": 0.1625339388847351, "rewards/rejected": -0.6047157049179077, "step": 7270 }, { "epoch": 0.87, "learning_rate": 2.401037171799819e-07, "logits/chosen": -1.9249871969223022, "logits/rejected": -1.5126729011535645, "logps/chosen": -267.6981506347656, "logps/rejected": -230.08407592773438, "loss": 0.1672, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3991939127445221, "rewards/margins": 0.1651068925857544, "rewards/rejected": -0.5643008351325989, "step": 7280 }, { "epoch": 0.87, "learning_rate": 2.3564612006569482e-07, "logits/chosen": -2.02858304977417, "logits/rejected": -1.7860488891601562, "logps/chosen": -264.84002685546875, "logps/rejected": -264.8438415527344, "loss": 0.1607, "rewards/accuracies": 0.625, "rewards/chosen": -0.45592936873435974, "rewards/margins": 0.1130853146314621, "rewards/rejected": -0.5690146684646606, "step": 7290 }, { "epoch": 0.88, "learning_rate": 2.3122824257098275e-07, "logits/chosen": -1.7619152069091797, "logits/rejected": -1.4626259803771973, "logps/chosen": -230.26889038085938, "logps/rejected": -224.6363525390625, "loss": 0.0667, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.4507187008857727, "rewards/margins": 0.16845285892486572, "rewards/rejected": -0.6191716194152832, "step": 7300 }, { "epoch": 0.88, "learning_rate": 2.2685016219098187e-07, "logits/chosen": -2.1385879516601562, "logits/rejected": -1.5403960943222046, "logps/chosen": -266.4300231933594, "logps/rejected": -216.80722045898438, "loss": 0.1237, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.39975208044052124, "rewards/margins": 0.22581128776073456, "rewards/rejected": -0.625563383102417, "step": 7310 }, { "epoch": 0.88, "learning_rate": 2.2251195572273758e-07, "logits/chosen": -2.1694719791412354, "logits/rejected": -1.607452154159546, "logps/chosen": -341.0927734375, "logps/rejected": -272.94464111328125, "loss": 0.0914, "rewards/accuracies": 0.75, "rewards/chosen": -0.4034740924835205, "rewards/margins": 0.19167360663414001, "rewards/rejected": -0.5951477289199829, "step": 7320 }, { "epoch": 0.88, "learning_rate": 2.18213699263857e-07, "logits/chosen": -1.9197025299072266, "logits/rejected": -1.6683180332183838, "logps/chosen": -291.236572265625, "logps/rejected": -316.86737060546875, "loss": 0.0876, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.47617173194885254, "rewards/margins": 0.2178315371274948, "rewards/rejected": -0.6940032839775085, "step": 7330 }, { "epoch": 0.88, "learning_rate": 2.1395546821117192e-07, "logits/chosen": -1.8608232736587524, "logits/rejected": -1.577675461769104, "logps/chosen": -279.8750915527344, "logps/rejected": -252.7456817626953, "loss": 0.1519, "rewards/accuracies": 0.625, "rewards/chosen": -0.4677848815917969, "rewards/margins": 0.1569843590259552, "rewards/rejected": -0.6247692108154297, "step": 7340 }, { "epoch": 0.88, "learning_rate": 2.097373372594197e-07, "logits/chosen": -2.017251968383789, "logits/rejected": -1.649173378944397, "logps/chosen": -284.208984375, "logps/rejected": -264.468505859375, "loss": 0.1668, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.47412624955177307, "rewards/margins": 0.12772853672504425, "rewards/rejected": -0.6018548011779785, "step": 7350 }, { "epoch": 0.88, "learning_rate": 2.0555938039993145e-07, "logits/chosen": -2.207703113555908, "logits/rejected": -1.7608064413070679, "logps/chosen": -317.61566162109375, "logps/rejected": -273.2945861816406, "loss": 0.1079, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.3838236629962921, "rewards/margins": 0.15046364068984985, "rewards/rejected": -0.5342873334884644, "step": 7360 }, { "epoch": 0.88, "learning_rate": 2.0142167091933368e-07, "logits/chosen": -1.8200185298919678, "logits/rejected": -1.7389323711395264, "logps/chosen": -233.045654296875, "logps/rejected": -263.55535888671875, "loss": 0.1277, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.4523780941963196, "rewards/margins": 0.1272934377193451, "rewards/rejected": -0.5796715617179871, "step": 7370 }, { "epoch": 0.89, "learning_rate": 1.973242813982626e-07, "logits/chosen": -1.7422631978988647, "logits/rejected": -1.4749271869659424, "logps/chosen": -223.44418334960938, "logps/rejected": -217.0834503173828, "loss": 0.1286, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.490098237991333, "rewards/margins": 0.15991918742656708, "rewards/rejected": -0.6500174403190613, "step": 7380 }, { "epoch": 0.89, "learning_rate": 1.932672837100924e-07, "logits/chosen": -2.1760799884796143, "logits/rejected": -1.4984136819839478, "logps/chosen": -262.04400634765625, "logps/rejected": -240.68911743164062, "loss": 0.1252, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.49469202756881714, "rewards/margins": 0.19546754658222198, "rewards/rejected": -0.6901595592498779, "step": 7390 }, { "epoch": 0.89, "learning_rate": 1.8925074901967406e-07, "logits/chosen": -2.032710313796997, "logits/rejected": -1.4763513803482056, "logps/chosen": -284.44317626953125, "logps/rejected": -255.60302734375, "loss": 0.0695, "rewards/accuracies": 0.75, "rewards/chosen": -0.4748278558254242, "rewards/margins": 0.16811513900756836, "rewards/rejected": -0.6429430246353149, "step": 7400 }, { "epoch": 0.89, "learning_rate": 1.8527474778208458e-07, "logits/chosen": -1.841803789138794, "logits/rejected": -1.7014901638031006, "logps/chosen": -180.16915893554688, "logps/rejected": -192.54147338867188, "loss": 0.1502, "rewards/accuracies": 0.625, "rewards/chosen": -0.42819100618362427, "rewards/margins": 0.07826290279626846, "rewards/rejected": -0.5064539313316345, "step": 7410 }, { "epoch": 0.89, "learning_rate": 1.813393497413951e-07, "logits/chosen": -1.9294426441192627, "logits/rejected": -1.611358880996704, "logps/chosen": -286.4609680175781, "logps/rejected": -255.0117645263672, "loss": 0.0994, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.4472865164279938, "rewards/margins": 0.13087594509124756, "rewards/rejected": -0.5781623721122742, "step": 7420 }, { "epoch": 0.89, "learning_rate": 1.7744462392944472e-07, "logits/chosen": -2.1310625076293945, "logits/rejected": -1.6776885986328125, "logps/chosen": -288.6611328125, "logps/rejected": -262.71453857421875, "loss": 0.1262, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.47246164083480835, "rewards/margins": 0.1253913938999176, "rewards/rejected": -0.5978530049324036, "step": 7430 }, { "epoch": 0.89, "learning_rate": 1.7359063866463048e-07, "logits/chosen": -2.0324885845184326, "logits/rejected": -1.6409807205200195, "logps/chosen": -239.01052856445312, "logps/rejected": -202.0733642578125, "loss": 0.1716, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3990083932876587, "rewards/margins": 0.19792206585407257, "rewards/rejected": -0.5969304442405701, "step": 7440 }, { "epoch": 0.89, "learning_rate": 1.6977746155070946e-07, "logits/chosen": -1.877323865890503, "logits/rejected": -1.8759233951568604, "logps/chosen": -244.84945678710938, "logps/rejected": -282.86456298828125, "loss": 0.1464, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.4934001564979553, "rewards/margins": 0.12719208002090454, "rewards/rejected": -0.6205921769142151, "step": 7450 }, { "epoch": 0.9, "learning_rate": 1.6600515947561207e-07, "logits/chosen": -2.076573133468628, "logits/rejected": -1.5323445796966553, "logps/chosen": -282.5911560058594, "logps/rejected": -220.7271728515625, "loss": 0.1678, "rewards/accuracies": 0.75, "rewards/chosen": -0.4402221739292145, "rewards/margins": 0.16551920771598816, "rewards/rejected": -0.6057413220405579, "step": 7460 }, { "epoch": 0.9, "learning_rate": 1.6227379861026738e-07, "logits/chosen": -2.02341365814209, "logits/rejected": -1.6644586324691772, "logps/chosen": -255.2810821533203, "logps/rejected": -251.2646484375, "loss": 0.151, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.4583802819252014, "rewards/margins": 0.15048515796661377, "rewards/rejected": -0.60886549949646, "step": 7470 }, { "epoch": 0.9, "learning_rate": 1.5858344440744745e-07, "logits/chosen": -2.074061632156372, "logits/rejected": -1.6793878078460693, "logps/chosen": -275.97467041015625, "logps/rejected": -275.2462463378906, "loss": 0.128, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.4571126103401184, "rewards/margins": 0.14766040444374084, "rewards/rejected": -0.6047729253768921, "step": 7480 }, { "epoch": 0.9, "learning_rate": 1.5493416160061254e-07, "logits/chosen": -2.1556544303894043, "logits/rejected": -1.7006380558013916, "logps/chosen": -289.0265197753906, "logps/rejected": -254.0576171875, "loss": 0.179, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.4663164019584656, "rewards/margins": 0.13743355870246887, "rewards/rejected": -0.6037499904632568, "step": 7490 }, { "epoch": 0.9, "learning_rate": 1.5132601420278086e-07, "logits/chosen": -1.9497236013412476, "logits/rejected": -1.6325147151947021, "logps/chosen": -267.0816955566406, "logps/rejected": -226.15576171875, "loss": 0.0762, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.47710466384887695, "rewards/margins": 0.14392545819282532, "rewards/rejected": -0.6210300326347351, "step": 7500 }, { "epoch": 0.9, "learning_rate": 1.4775906550540287e-07, "logits/chosen": -1.7985035181045532, "logits/rejected": -1.484886884689331, "logps/chosen": -217.06076049804688, "logps/rejected": -206.60055541992188, "loss": 0.1064, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.443693071603775, "rewards/margins": 0.13600948452949524, "rewards/rejected": -0.5797025561332703, "step": 7510 }, { "epoch": 0.9, "learning_rate": 1.4423337807725286e-07, "logits/chosen": -1.957148790359497, "logits/rejected": -1.8867321014404297, "logps/chosen": -207.6150360107422, "logps/rejected": -233.46658325195312, "loss": 0.0856, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.4386165142059326, "rewards/margins": 0.1362169086933136, "rewards/rejected": -0.5748334527015686, "step": 7520 }, { "epoch": 0.9, "learning_rate": 1.4074901376332855e-07, "logits/chosen": -1.9299287796020508, "logits/rejected": -1.9098894596099854, "logps/chosen": -274.15850830078125, "logps/rejected": -299.91265869140625, "loss": 0.0883, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.47814321517944336, "rewards/margins": 0.13489031791687012, "rewards/rejected": -0.6130335927009583, "step": 7530 }, { "epoch": 0.9, "learning_rate": 1.3730603368377088e-07, "logits/chosen": -1.915826439857483, "logits/rejected": -1.7198493480682373, "logps/chosen": -296.31182861328125, "logps/rejected": -286.13592529296875, "loss": 0.0992, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.4671846926212311, "rewards/margins": 0.1536525934934616, "rewards/rejected": -0.6208373308181763, "step": 7540 }, { "epoch": 0.91, "learning_rate": 1.3390449823278666e-07, "logits/chosen": -1.976479172706604, "logits/rejected": -1.7112070322036743, "logps/chosen": -309.96136474609375, "logps/rejected": -279.93597412109375, "loss": 0.2023, "rewards/accuracies": 0.625, "rewards/chosen": -0.4522860646247864, "rewards/margins": 0.08242306113243103, "rewards/rejected": -0.5347092151641846, "step": 7550 }, { "epoch": 0.91, "learning_rate": 1.3054446707759323e-07, "logits/chosen": -2.088073968887329, "logits/rejected": -1.6631252765655518, "logps/chosen": -269.19268798828125, "logps/rejected": -219.1171112060547, "loss": 0.0938, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.4191514551639557, "rewards/margins": 0.1780991554260254, "rewards/rejected": -0.5972505807876587, "step": 7560 }, { "epoch": 0.91, "learning_rate": 1.2722599915736962e-07, "logits/chosen": -1.9217841625213623, "logits/rejected": -1.4964293241500854, "logps/chosen": -215.7314910888672, "logps/rejected": -203.1638641357422, "loss": 0.161, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.4369204640388489, "rewards/margins": 0.19439806044101715, "rewards/rejected": -0.6313184499740601, "step": 7570 }, { "epoch": 0.91, "learning_rate": 1.2394915268222423e-07, "logits/chosen": -1.7211532592773438, "logits/rejected": -1.4849998950958252, "logps/chosen": -253.0435028076172, "logps/rejected": -240.583740234375, "loss": 0.1213, "rewards/accuracies": 0.625, "rewards/chosen": -0.45572876930236816, "rewards/margins": 0.12647785246372223, "rewards/rejected": -0.5822066068649292, "step": 7580 }, { "epoch": 0.91, "learning_rate": 1.2071398513217118e-07, "logits/chosen": -1.8722648620605469, "logits/rejected": -1.4593003988265991, "logps/chosen": -299.0888366699219, "logps/rejected": -245.06686401367188, "loss": 0.1898, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.45327168703079224, "rewards/margins": 0.16469185054302216, "rewards/rejected": -0.6179635524749756, "step": 7590 }, { "epoch": 0.91, "learning_rate": 1.1752055325612605e-07, "logits/chosen": -2.0962700843811035, "logits/rejected": -1.614005446434021, "logps/chosen": -284.02618408203125, "logps/rejected": -245.89785766601562, "loss": 0.1521, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.3982982933521271, "rewards/margins": 0.14497140049934387, "rewards/rejected": -0.5432697534561157, "step": 7600 }, { "epoch": 0.91, "learning_rate": 1.143689130709058e-07, "logits/chosen": -1.713621735572815, "logits/rejected": -1.4907618761062622, "logps/chosen": -288.0763854980469, "logps/rejected": -314.97869873046875, "loss": 0.067, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.4883494973182678, "rewards/margins": 0.1738799810409546, "rewards/rejected": -0.6622294187545776, "step": 7610 }, { "epoch": 0.91, "learning_rate": 1.1125911986025001e-07, "logits/chosen": -1.7698646783828735, "logits/rejected": -1.6841932535171509, "logps/chosen": -350.5254211425781, "logps/rejected": -300.12103271484375, "loss": 0.1248, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.5203927755355835, "rewards/margins": 0.08340970426797867, "rewards/rejected": -0.603802502155304, "step": 7620 }, { "epoch": 0.92, "learning_rate": 1.0819122817384897e-07, "logits/chosen": -1.9861576557159424, "logits/rejected": -1.5860307216644287, "logps/chosen": -254.307861328125, "logps/rejected": -279.30615234375, "loss": 0.1191, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.5575435757637024, "rewards/margins": 0.1310313642024994, "rewards/rejected": -0.6885749101638794, "step": 7630 }, { "epoch": 0.92, "learning_rate": 1.0516529182638819e-07, "logits/chosen": -1.825350046157837, "logits/rejected": -1.5633935928344727, "logps/chosen": -293.57757568359375, "logps/rejected": -316.1748962402344, "loss": 0.1375, "rewards/accuracies": 0.75, "rewards/chosen": -0.5138299465179443, "rewards/margins": 0.13412019610404968, "rewards/rejected": -0.6479502320289612, "step": 7640 }, { "epoch": 0.92, "learning_rate": 1.0218136389660211e-07, "logits/chosen": -1.9528892040252686, "logits/rejected": -1.6495583057403564, "logps/chosen": -300.08294677734375, "logps/rejected": -279.8624572753906, "loss": 0.1515, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.441044420003891, "rewards/margins": 0.08407802134752274, "rewards/rejected": -0.5251224040985107, "step": 7650 }, { "epoch": 0.92, "learning_rate": 9.923949672634714e-08, "logits/chosen": -1.7535518407821655, "logits/rejected": -1.5248991250991821, "logps/chosen": -286.5433654785156, "logps/rejected": -296.0802001953125, "loss": 0.148, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.519503116607666, "rewards/margins": 0.18411189317703247, "rewards/rejected": -0.7036150097846985, "step": 7660 }, { "epoch": 0.92, "learning_rate": 9.633974191967794e-08, "logits/chosen": -1.9656978845596313, "logits/rejected": -1.546311616897583, "logps/chosen": -251.5748748779297, "logps/rejected": -249.95162963867188, "loss": 0.1335, "rewards/accuracies": 0.75, "rewards/chosen": -0.3971438705921173, "rewards/margins": 0.2471657246351242, "rewards/rejected": -0.6443095207214355, "step": 7670 }, { "epoch": 0.92, "learning_rate": 9.348215034194752e-08, "logits/chosen": -1.918678879737854, "logits/rejected": -1.3947335481643677, "logps/chosen": -295.56622314453125, "logps/rejected": -266.44805908203125, "loss": 0.103, "rewards/accuracies": 0.625, "rewards/chosen": -0.43143653869628906, "rewards/margins": 0.18379929661750793, "rewards/rejected": -0.6152359247207642, "step": 7680 }, { "epoch": 0.92, "learning_rate": 9.066677211891195e-08, "logits/chosen": -1.9028263092041016, "logits/rejected": -1.4752349853515625, "logps/chosen": -219.3140869140625, "logps/rejected": -224.39013671875, "loss": 0.1199, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.47012224793434143, "rewards/margins": 0.2023288756608963, "rewards/rejected": -0.6724511384963989, "step": 7690 }, { "epoch": 0.92, "learning_rate": 8.789365663585208e-08, "logits/chosen": -2.116147518157959, "logits/rejected": -1.8864881992340088, "logps/chosen": -283.5893249511719, "logps/rejected": -234.08224487304688, "loss": 0.1225, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.4223853051662445, "rewards/margins": 0.1264767050743103, "rewards/rejected": -0.5488620400428772, "step": 7700 }, { "epoch": 0.93, "learning_rate": 8.516285253670597e-08, "logits/chosen": -1.9596683979034424, "logits/rejected": -1.6173160076141357, "logps/chosen": -230.27188110351562, "logps/rejected": -205.488525390625, "loss": 0.1671, "rewards/accuracies": 0.75, "rewards/chosen": -0.5056071281433105, "rewards/margins": 0.17759716510772705, "rewards/rejected": -0.6832043528556824, "step": 7710 }, { "epoch": 0.93, "learning_rate": 8.247440772321924e-08, "logits/chosen": -1.8863664865493774, "logits/rejected": -1.7962907552719116, "logps/chosen": -265.54937744140625, "logps/rejected": -279.07379150390625, "loss": 0.1479, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.5093497037887573, "rewards/margins": 0.14122812449932098, "rewards/rejected": -0.6505778431892395, "step": 7720 }, { "epoch": 0.93, "learning_rate": 7.982836935409938e-08, "logits/chosen": -1.9433815479278564, "logits/rejected": -1.6725490093231201, "logps/chosen": -274.528564453125, "logps/rejected": -256.54766845703125, "loss": 0.1345, "rewards/accuracies": 0.75, "rewards/chosen": -0.5039865970611572, "rewards/margins": 0.16849592328071594, "rewards/rejected": -0.6724825501441956, "step": 7730 }, { "epoch": 0.93, "learning_rate": 7.722478384419335e-08, "logits/chosen": -1.8644914627075195, "logits/rejected": -1.4965304136276245, "logps/chosen": -278.33746337890625, "logps/rejected": -243.71817016601562, "loss": 0.1328, "rewards/accuracies": 0.625, "rewards/chosen": -0.49178391695022583, "rewards/margins": 0.12286220490932465, "rewards/rejected": -0.6146460771560669, "step": 7740 }, { "epoch": 0.93, "learning_rate": 7.466369686367075e-08, "logits/chosen": -2.0346133708953857, "logits/rejected": -1.6774375438690186, "logps/chosen": -293.4043273925781, "logps/rejected": -247.53579711914062, "loss": 0.1475, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.46793246269226074, "rewards/margins": 0.09048617631196976, "rewards/rejected": -0.5584186315536499, "step": 7750 }, { "epoch": 0.93, "learning_rate": 7.21451533372236e-08, "logits/chosen": -1.9626423120498657, "logits/rejected": -1.580739974975586, "logps/chosen": -258.93060302734375, "logps/rejected": -241.1245574951172, "loss": 0.1536, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.47752898931503296, "rewards/margins": 0.1563757061958313, "rewards/rejected": -0.6339046955108643, "step": 7760 }, { "epoch": 0.93, "learning_rate": 6.966919744327783e-08, "logits/chosen": -2.0937347412109375, "logits/rejected": -1.5341440439224243, "logps/chosen": -341.6336669921875, "logps/rejected": -260.72589111328125, "loss": 0.1603, "rewards/accuracies": 0.625, "rewards/chosen": -0.42314642667770386, "rewards/margins": 0.19263425469398499, "rewards/rejected": -0.6157806515693665, "step": 7770 }, { "epoch": 0.93, "learning_rate": 6.723587261321912e-08, "logits/chosen": -1.9547611474990845, "logits/rejected": -1.642380714416504, "logps/chosen": -297.21649169921875, "logps/rejected": -273.52459716796875, "loss": 0.1617, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.4811842441558838, "rewards/margins": 0.11549937725067139, "rewards/rejected": -0.5966835618019104, "step": 7780 }, { "epoch": 0.93, "learning_rate": 6.484522153063056e-08, "logits/chosen": -1.9168577194213867, "logits/rejected": -1.5155632495880127, "logps/chosen": -225.7365264892578, "logps/rejected": -204.46615600585938, "loss": 0.1319, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.4766554832458496, "rewards/margins": 0.1555982381105423, "rewards/rejected": -0.6322537064552307, "step": 7790 }, { "epoch": 0.94, "learning_rate": 6.249728613054313e-08, "logits/chosen": -1.7597625255584717, "logits/rejected": -1.5485173463821411, "logps/chosen": -264.35321044921875, "logps/rejected": -273.6183166503906, "loss": 0.1479, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.48368602991104126, "rewards/margins": 0.07144041359424591, "rewards/rejected": -0.5551263689994812, "step": 7800 }, { "epoch": 0.94, "learning_rate": 6.01921075987022e-08, "logits/chosen": -1.9097301959991455, "logits/rejected": -1.186971664428711, "logps/chosen": -246.1496124267578, "logps/rejected": -194.6399688720703, "loss": 0.1488, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.4605187773704529, "rewards/margins": 0.23739011585712433, "rewards/rejected": -0.697908878326416, "step": 7810 }, { "epoch": 0.94, "learning_rate": 5.7929726370843096e-08, "logits/chosen": -2.1597225666046143, "logits/rejected": -1.8278968334197998, "logps/chosen": -254.0515594482422, "logps/rejected": -271.48236083984375, "loss": 0.1766, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.4439367651939392, "rewards/margins": 0.2024269998073578, "rewards/rejected": -0.6463637351989746, "step": 7820 }, { "epoch": 0.94, "learning_rate": 5.5710182131981927e-08, "logits/chosen": -1.99951171875, "logits/rejected": -1.7067577838897705, "logps/chosen": -238.32467651367188, "logps/rejected": -256.7763671875, "loss": 0.1304, "rewards/accuracies": 0.625, "rewards/chosen": -0.39582788944244385, "rewards/margins": 0.21840229630470276, "rewards/rejected": -0.614230215549469, "step": 7830 }, { "epoch": 0.94, "learning_rate": 5.3533513815721694e-08, "logits/chosen": -2.118350028991699, "logits/rejected": -1.6194887161254883, "logps/chosen": -272.92449951171875, "logps/rejected": -227.6458282470703, "loss": 0.0858, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.43067464232444763, "rewards/margins": 0.21112823486328125, "rewards/rejected": -0.6418029069900513, "step": 7840 }, { "epoch": 0.94, "learning_rate": 5.1399759603565916e-08, "logits/chosen": -2.176056385040283, "logits/rejected": -1.72799551486969, "logps/chosen": -259.24176025390625, "logps/rejected": -220.6403350830078, "loss": 0.1577, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.41245096921920776, "rewards/margins": 0.16166391968727112, "rewards/rejected": -0.5741148591041565, "step": 7850 }, { "epoch": 0.94, "learning_rate": 4.930895692425192e-08, "logits/chosen": -2.08243989944458, "logits/rejected": -1.6198593378067017, "logps/chosen": -313.93475341796875, "logps/rejected": -302.1040344238281, "loss": 0.0815, "rewards/accuracies": 0.75, "rewards/chosen": -0.48940593004226685, "rewards/margins": 0.1875011920928955, "rewards/rejected": -0.6769071817398071, "step": 7860 }, { "epoch": 0.94, "learning_rate": 4.726114245309249e-08, "logits/chosen": -2.1306631565093994, "logits/rejected": -1.8443044424057007, "logps/chosen": -260.3841552734375, "logps/rejected": -240.7478790283203, "loss": 0.084, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.4020455777645111, "rewards/margins": 0.11921534687280655, "rewards/rejected": -0.5212609171867371, "step": 7870 }, { "epoch": 0.95, "learning_rate": 4.5256352111333334e-08, "logits/chosen": -2.197538137435913, "logits/rejected": -1.9828275442123413, "logps/chosen": -265.1056213378906, "logps/rejected": -240.36581420898438, "loss": 0.1341, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.41930776834487915, "rewards/margins": 0.10385145992040634, "rewards/rejected": -0.5231592059135437, "step": 7880 }, { "epoch": 0.95, "learning_rate": 4.32946210655219e-08, "logits/chosen": -1.879839539527893, "logits/rejected": -1.6344287395477295, "logps/chosen": -289.47540283203125, "logps/rejected": -303.8387756347656, "loss": 0.0844, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.4271796643733978, "rewards/margins": 0.12638108432292938, "rewards/rejected": -0.553560733795166, "step": 7890 }, { "epoch": 0.95, "learning_rate": 4.137598372689289e-08, "logits/chosen": -1.9711477756500244, "logits/rejected": -1.640442132949829, "logps/chosen": -289.0892639160156, "logps/rejected": -256.19390869140625, "loss": 0.1124, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.5559543967247009, "rewards/margins": 0.06960447132587433, "rewards/rejected": -0.6255587935447693, "step": 7900 }, { "epoch": 0.95, "learning_rate": 3.950047375076177e-08, "logits/chosen": -1.9874846935272217, "logits/rejected": -1.746363639831543, "logps/chosen": -253.0285186767578, "logps/rejected": -267.914306640625, "loss": 0.151, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.4185236096382141, "rewards/margins": 0.170128732919693, "rewards/rejected": -0.5886522531509399, "step": 7910 }, { "epoch": 0.95, "learning_rate": 3.7668124035936395e-08, "logits/chosen": -1.8806092739105225, "logits/rejected": -1.7465565204620361, "logps/chosen": -281.9869384765625, "logps/rejected": -293.6903381347656, "loss": 0.1098, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.48278847336769104, "rewards/margins": 0.16966548562049866, "rewards/rejected": -0.6524539589881897, "step": 7920 }, { "epoch": 0.95, "learning_rate": 3.587896672413882e-08, "logits/chosen": -1.954755187034607, "logits/rejected": -1.6159816980361938, "logps/chosen": -330.97698974609375, "logps/rejected": -257.1192932128906, "loss": 0.1392, "rewards/accuracies": 0.625, "rewards/chosen": -0.4463822841644287, "rewards/margins": 0.12165029346942902, "rewards/rejected": -0.5680325031280518, "step": 7930 }, { "epoch": 0.95, "learning_rate": 3.413303319944244e-08, "logits/chosen": -1.8800256252288818, "logits/rejected": -1.666426420211792, "logps/chosen": -255.54055786132812, "logps/rejected": -272.89056396484375, "loss": 0.1096, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.5043514370918274, "rewards/margins": 0.13485528528690338, "rewards/rejected": -0.6392067670822144, "step": 7940 }, { "epoch": 0.95, "learning_rate": 3.243035408772077e-08, "logits/chosen": -1.8116118907928467, "logits/rejected": -1.465587854385376, "logps/chosen": -263.7826232910156, "logps/rejected": -215.6946563720703, "loss": 0.1744, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.47774848341941833, "rewards/margins": 0.16809609532356262, "rewards/rejected": -0.645844578742981, "step": 7950 }, { "epoch": 0.96, "learning_rate": 3.077095925611007e-08, "logits/chosen": -1.8181393146514893, "logits/rejected": -1.7243306636810303, "logps/chosen": -255.312255859375, "logps/rejected": -261.9159851074219, "loss": 0.1284, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.49375930428504944, "rewards/margins": 0.1126602292060852, "rewards/rejected": -0.6064194440841675, "step": 7960 }, { "epoch": 0.96, "learning_rate": 2.915487781248616e-08, "logits/chosen": -2.0126254558563232, "logits/rejected": -1.7640451192855835, "logps/chosen": -258.76007080078125, "logps/rejected": -300.79937744140625, "loss": 0.0698, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.43722277879714966, "rewards/margins": 0.21826669573783875, "rewards/rejected": -0.6554895043373108, "step": 7970 }, { "epoch": 0.96, "learning_rate": 2.7582138104953748e-08, "logits/chosen": -1.9624278545379639, "logits/rejected": -1.6752490997314453, "logps/chosen": -219.3885955810547, "logps/rejected": -218.8914794921875, "loss": 0.1453, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.4231085181236267, "rewards/margins": 0.1884673833847046, "rewards/rejected": -0.6115759015083313, "step": 7980 }, { "epoch": 0.96, "learning_rate": 2.6052767721348184e-08, "logits/chosen": -2.087956666946411, "logits/rejected": -1.7118041515350342, "logps/chosen": -260.5646667480469, "logps/rejected": -232.9169921875, "loss": 0.1097, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.46867817640304565, "rewards/margins": 0.12946224212646484, "rewards/rejected": -0.5981403589248657, "step": 7990 }, { "epoch": 0.96, "learning_rate": 2.4566793488752795e-08, "logits/chosen": -1.9538524150848389, "logits/rejected": -1.9380991458892822, "logps/chosen": -240.32583618164062, "logps/rejected": -280.9888000488281, "loss": 0.1325, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.5332067608833313, "rewards/margins": 0.08598224818706512, "rewards/rejected": -0.6191889643669128, "step": 8000 }, { "epoch": 0.96, "learning_rate": 2.3124241473027333e-08, "logits/chosen": -1.9650394916534424, "logits/rejected": -1.666481375694275, "logps/chosen": -229.7511444091797, "logps/rejected": -257.80889892578125, "loss": 0.1788, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.44220954179763794, "rewards/margins": 0.14231547713279724, "rewards/rejected": -0.5845250487327576, "step": 8010 }, { "epoch": 0.96, "learning_rate": 2.1725136978351934e-08, "logits/chosen": -2.1228089332580566, "logits/rejected": -1.5950334072113037, "logps/chosen": -295.29400634765625, "logps/rejected": -244.79330444335938, "loss": 0.1059, "rewards/accuracies": 0.625, "rewards/chosen": -0.36449581384658813, "rewards/margins": 0.2231093943119049, "rewards/rejected": -0.5876051783561707, "step": 8020 }, { "epoch": 0.96, "learning_rate": 2.036950454678166e-08, "logits/chosen": -2.112056016921997, "logits/rejected": -1.595947504043579, "logps/chosen": -281.60601806640625, "logps/rejected": -243.0486297607422, "loss": 0.1566, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3312150835990906, "rewards/margins": 0.19925157725811005, "rewards/rejected": -0.530466616153717, "step": 8030 }, { "epoch": 0.96, "learning_rate": 1.9057367957817096e-08, "logits/chosen": -1.9058849811553955, "logits/rejected": -1.4319086074829102, "logps/chosen": -244.7132568359375, "logps/rejected": -223.8097686767578, "loss": 0.09, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.38957375288009644, "rewards/margins": 0.19439074397087097, "rewards/rejected": -0.583964467048645, "step": 8040 }, { "epoch": 0.97, "learning_rate": 1.778875022798693e-08, "logits/chosen": -1.5648739337921143, "logits/rejected": -1.51466965675354, "logps/chosen": -189.83966064453125, "logps/rejected": -232.4983367919922, "loss": 0.1424, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.5467423796653748, "rewards/margins": 0.11828476190567017, "rewards/rejected": -0.6650272011756897, "step": 8050 }, { "epoch": 0.97, "learning_rate": 1.6563673610444363e-08, "logits/chosen": -2.011043071746826, "logits/rejected": -1.761691689491272, "logps/chosen": -261.29498291015625, "logps/rejected": -261.8739929199219, "loss": 0.0942, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.5379740595817566, "rewards/margins": 0.1529974639415741, "rewards/rejected": -0.6909714937210083, "step": 8060 }, { "epoch": 0.97, "learning_rate": 1.5382159594576616e-08, "logits/chosen": -1.802074670791626, "logits/rejected": -1.4913840293884277, "logps/chosen": -231.8621368408203, "logps/rejected": -263.0806884765625, "loss": 0.1387, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.5238322019577026, "rewards/margins": 0.1918632835149765, "rewards/rejected": -0.7156955003738403, "step": 8070 }, { "epoch": 0.97, "learning_rate": 1.424422890562771e-08, "logits/chosen": -2.1378629207611084, "logits/rejected": -1.9434757232666016, "logps/chosen": -232.4508514404297, "logps/rejected": -239.05911254882812, "loss": 0.1501, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.42561620473861694, "rewards/margins": 0.15531761944293976, "rewards/rejected": -0.5809338688850403, "step": 8080 }, { "epoch": 0.97, "learning_rate": 1.3149901504335706e-08, "logits/chosen": -1.9773696660995483, "logits/rejected": -1.4441204071044922, "logps/chosen": -237.9873504638672, "logps/rejected": -219.8182373046875, "loss": 0.1467, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.43333473801612854, "rewards/margins": 0.14528748393058777, "rewards/rejected": -0.5786222219467163, "step": 8090 }, { "epoch": 0.97, "learning_rate": 1.2099196586581596e-08, "logits/chosen": -1.9610668420791626, "logits/rejected": -1.8130521774291992, "logps/chosen": -229.5089569091797, "logps/rejected": -231.4257354736328, "loss": 0.1316, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.501410722732544, "rewards/margins": 0.13243083655834198, "rewards/rejected": -0.6338415741920471, "step": 8100 }, { "epoch": 0.97, "learning_rate": 1.1092132583053472e-08, "logits/chosen": -1.984513521194458, "logits/rejected": -1.6125694513320923, "logps/chosen": -297.69390869140625, "logps/rejected": -272.7558898925781, "loss": 0.0944, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.4865299165248871, "rewards/margins": 0.19219791889190674, "rewards/rejected": -0.6787278056144714, "step": 8110 }, { "epoch": 0.97, "learning_rate": 1.0128727158922603e-08, "logits/chosen": -1.8715741634368896, "logits/rejected": -1.7751662731170654, "logps/chosen": -242.9435577392578, "logps/rejected": -240.58901977539062, "loss": 0.1389, "rewards/accuracies": 0.625, "rewards/chosen": -0.4711694121360779, "rewards/margins": 0.08951371163129807, "rewards/rejected": -0.5606831312179565, "step": 8120 }, { "epoch": 0.98, "learning_rate": 9.20899721353341e-09, "logits/chosen": -2.031369924545288, "logits/rejected": -1.8035614490509033, "logps/chosen": -258.4107971191406, "logps/rejected": -323.7535705566406, "loss": 0.1405, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.4729674458503723, "rewards/margins": 0.15468671917915344, "rewards/rejected": -0.6276541948318481, "step": 8130 }, { "epoch": 0.98, "learning_rate": 8.332958880108155e-09, "logits/chosen": -2.0815842151641846, "logits/rejected": -1.6333885192871094, "logps/chosen": -278.9797668457031, "logps/rejected": -250.55886840820312, "loss": 0.1568, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3721701502799988, "rewards/margins": 0.18021699786186218, "rewards/rejected": -0.5523871183395386, "step": 8140 }, { "epoch": 0.98, "learning_rate": 7.500627525462711e-09, "logits/chosen": -2.008918285369873, "logits/rejected": -1.5942274332046509, "logps/chosen": -286.1038513183594, "logps/rejected": -229.10812377929688, "loss": 0.1275, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.4887969493865967, "rewards/margins": 0.1278802454471588, "rewards/rejected": -0.6166771650314331, "step": 8150 }, { "epoch": 0.98, "learning_rate": 6.712017749737343e-09, "logits/chosen": -2.0941195487976074, "logits/rejected": -1.3795114755630493, "logps/chosen": -307.4806823730469, "logps/rejected": -249.28445434570312, "loss": 0.1405, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.4613245129585266, "rewards/margins": 0.1985275000333786, "rewards/rejected": -0.6598520278930664, "step": 8160 }, { "epoch": 0.98, "learning_rate": 5.96714338614135e-09, "logits/chosen": -2.215099334716797, "logits/rejected": -1.6631901264190674, "logps/chosen": -375.0947265625, "logps/rejected": -313.10205078125, "loss": 0.1627, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.39094269275665283, "rewards/margins": 0.1836312711238861, "rewards/rejected": -0.5745739936828613, "step": 8170 }, { "epoch": 0.98, "learning_rate": 5.266017500709098e-09, "logits/chosen": -2.128441095352173, "logits/rejected": -1.8798637390136719, "logps/chosen": -252.12783813476562, "logps/rejected": -264.87506103515625, "loss": 0.1501, "rewards/accuracies": 0.5, "rewards/chosen": -0.4046745300292969, "rewards/margins": 0.11516742408275604, "rewards/rejected": -0.5198420286178589, "step": 8180 }, { "epoch": 0.98, "learning_rate": 4.608652392072144e-09, "logits/chosen": -2.095583438873291, "logits/rejected": -1.7360079288482666, "logps/chosen": -267.0264587402344, "logps/rejected": -242.8651885986328, "loss": 0.1365, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.4499017596244812, "rewards/margins": 0.15220007300376892, "rewards/rejected": -0.6021018028259277, "step": 8190 }, { "epoch": 0.98, "learning_rate": 3.995059591242467e-09, "logits/chosen": -2.0483319759368896, "logits/rejected": -1.7100093364715576, "logps/chosen": -344.25360107421875, "logps/rejected": -320.6583251953125, "loss": 0.113, "rewards/accuracies": 0.75, "rewards/chosen": -0.4792884886264801, "rewards/margins": 0.13656087219715118, "rewards/rejected": -0.6158494353294373, "step": 8200 }, { "epoch": 0.99, "learning_rate": 3.4252498614106843e-09, "logits/chosen": -1.996120810508728, "logits/rejected": -1.5770938396453857, "logps/chosen": -301.490478515625, "logps/rejected": -247.4650421142578, "loss": 0.1302, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.49081936478614807, "rewards/margins": 0.14099135994911194, "rewards/rejected": -0.6318107843399048, "step": 8210 }, { "epoch": 0.99, "learning_rate": 2.8992331977570343e-09, "logits/chosen": -2.1005160808563232, "logits/rejected": -1.9152495861053467, "logps/chosen": -269.256591796875, "logps/rejected": -259.864501953125, "loss": 0.1759, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.4604567885398865, "rewards/margins": 0.09413363039493561, "rewards/rejected": -0.5545904636383057, "step": 8220 }, { "epoch": 0.99, "learning_rate": 2.4170188272770736e-09, "logits/chosen": -2.0771050453186035, "logits/rejected": -1.7211967706680298, "logps/chosen": -322.2476501464844, "logps/rejected": -295.37115478515625, "loss": 0.1513, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.380420446395874, "rewards/margins": 0.18110953271389008, "rewards/rejected": -0.5615299940109253, "step": 8230 }, { "epoch": 0.99, "learning_rate": 1.9786152086181955e-09, "logits/chosen": -2.000828266143799, "logits/rejected": -1.55103600025177, "logps/chosen": -267.5838928222656, "logps/rejected": -248.3422393798828, "loss": 0.1141, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.4607185423374176, "rewards/margins": 0.19947698712348938, "rewards/rejected": -0.660195529460907, "step": 8240 }, { "epoch": 0.99, "learning_rate": 1.5840300319316937e-09, "logits/chosen": -1.786924123764038, "logits/rejected": -1.3054568767547607, "logps/chosen": -270.92205810546875, "logps/rejected": -260.0880126953125, "loss": 0.1442, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.541989266872406, "rewards/margins": 0.18230721354484558, "rewards/rejected": -0.7242964506149292, "step": 8250 }, { "epoch": 0.99, "learning_rate": 1.23327021873898e-09, "logits/chosen": -2.0959393978118896, "logits/rejected": -1.521923303604126, "logps/chosen": -264.62353515625, "logps/rejected": -225.8861846923828, "loss": 0.1201, "rewards/accuracies": 0.625, "rewards/chosen": -0.37499696016311646, "rewards/margins": 0.20748789608478546, "rewards/rejected": -0.5824848413467407, "step": 8260 }, { "epoch": 0.99, "learning_rate": 9.263419218089042e-10, "logits/chosen": -1.8493993282318115, "logits/rejected": -1.476485013961792, "logps/chosen": -253.7661895751953, "logps/rejected": -263.26422119140625, "loss": 0.1452, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.4299536645412445, "rewards/margins": 0.20787473022937775, "rewards/rejected": -0.6378284692764282, "step": 8270 }, { "epoch": 0.99, "learning_rate": 6.632505250506183e-10, "logits/chosen": -2.12581205368042, "logits/rejected": -1.7464358806610107, "logps/chosen": -305.6679382324219, "logps/rejected": -250.2874298095703, "loss": 0.1467, "rewards/accuracies": 0.625, "rewards/chosen": -0.44572052359580994, "rewards/margins": 0.15005187690258026, "rewards/rejected": -0.595772385597229, "step": 8280 }, { "epoch": 0.99, "learning_rate": 4.440006434183741e-10, "logits/chosen": -1.8823049068450928, "logits/rejected": -1.5557067394256592, "logps/chosen": -312.54608154296875, "logps/rejected": -316.3292541503906, "loss": 0.1481, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.5499744415283203, "rewards/margins": 0.11392368376255035, "rewards/rejected": -0.663898229598999, "step": 8290 }, { "epoch": 1.0, "learning_rate": 2.6859612283186567e-10, "logits/chosen": -2.018319845199585, "logits/rejected": -1.6919094324111938, "logps/chosen": -225.6822052001953, "logps/rejected": -243.1230926513672, "loss": 0.1243, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.4279976785182953, "rewards/margins": 0.1937953531742096, "rewards/rejected": -0.6217929720878601, "step": 8300 }, { "epoch": 1.0, "learning_rate": 1.370400401065619e-10, "logits/chosen": -1.9666255712509155, "logits/rejected": -1.7945621013641357, "logps/chosen": -199.1581573486328, "logps/rejected": -203.54124450683594, "loss": 0.1544, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.5227402448654175, "rewards/margins": 0.12387014925479889, "rewards/rejected": -0.6466103792190552, "step": 8310 }, { "epoch": 1.0, "learning_rate": 4.933470290263698e-11, "logits/chosen": -2.083996295928955, "logits/rejected": -1.7333097457885742, "logps/chosen": -277.2462463378906, "logps/rejected": -264.87225341796875, "loss": 0.155, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.4118651747703552, "rewards/margins": 0.1753673553466797, "rewards/rejected": -0.5872325301170349, "step": 8320 }, { "epoch": 1.0, "learning_rate": 5.481649681671197e-12, "logits/chosen": -1.8581920862197876, "logits/rejected": -1.672580361366272, "logps/chosen": -255.0128173828125, "logps/rejected": -287.12408447265625, "loss": 0.1253, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.5130528211593628, "rewards/margins": 0.1296093761920929, "rewards/rejected": -0.6426622867584229, "step": 8330 }, { "epoch": 1.0, "step": 8335, "total_flos": 0.0, "train_loss": 0.14340321629899808, "train_runtime": 34860.3009, "train_samples_per_second": 0.956, "train_steps_per_second": 0.239 } ], "logging_steps": 10, "max_steps": 8335, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }