{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 7000, "global_step": 16770, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 2.981514609421586e-09, "logits/chosen": -3.0218403339385986, "logits/rejected": -2.940047025680542, "logps/chosen": -73.02317810058594, "logps/rejected": -48.23734664916992, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.0, "learning_rate": 2.9815146094215865e-08, "logits/chosen": -2.9619266986846924, "logits/rejected": -2.9563283920288086, "logps/chosen": -62.74790954589844, "logps/rejected": -44.95586395263672, "loss": 0.693, "rewards/accuracies": 0.5, "rewards/chosen": -0.00012704107211902738, "rewards/margins": 0.0005221219034865499, "rewards/rejected": -0.0006491629173979163, "step": 10 }, { "epoch": 0.0, "learning_rate": 5.963029218843173e-08, "logits/chosen": -2.960465908050537, "logits/rejected": -2.9539878368377686, "logps/chosen": -75.64433288574219, "logps/rejected": -44.491546630859375, "loss": 0.6932, "rewards/accuracies": 0.375, "rewards/chosen": -0.00027207276434637606, "rewards/margins": -0.00038699532160535455, "rewards/rejected": 0.00011492250632727519, "step": 20 }, { "epoch": 0.0, "learning_rate": 8.94454382826476e-08, "logits/chosen": -2.9388415813446045, "logits/rejected": -2.9425132274627686, "logps/chosen": -69.98499298095703, "logps/rejected": -44.00871276855469, "loss": 0.693, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.000582454726099968, "rewards/margins": 0.00027978423167951405, "rewards/rejected": 0.0003026704944204539, "step": 30 }, { "epoch": 0.0, "learning_rate": 1.1926058437686346e-07, "logits/chosen": -2.9837682247161865, "logits/rejected": -2.947817325592041, "logps/chosen": -69.46333312988281, "logps/rejected": -42.51030731201172, "loss": 0.6931, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.00010345459304517135, "rewards/margins": 5.823614264954813e-05, "rewards/rejected": 4.521848677541129e-05, "step": 40 }, { "epoch": 0.0, "learning_rate": 1.490757304710793e-07, "logits/chosen": -2.930995225906372, "logits/rejected": -2.9113659858703613, "logps/chosen": -68.5018310546875, "logps/rejected": -44.30644989013672, "loss": 0.6932, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.0005911254556849599, "rewards/margins": -2.3979193429113366e-05, "rewards/rejected": -0.0005671462859027088, "step": 50 }, { "epoch": 0.0, "learning_rate": 1.788908765652952e-07, "logits/chosen": -3.00565505027771, "logits/rejected": -2.988889217376709, "logps/chosen": -70.67076110839844, "logps/rejected": -45.866981506347656, "loss": 0.6931, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.00041712570236995816, "rewards/margins": 7.655953231733292e-05, "rewards/rejected": 0.00034056618460454047, "step": 60 }, { "epoch": 0.0, "learning_rate": 2.0870602265951104e-07, "logits/chosen": -2.985520124435425, "logits/rejected": -2.947092056274414, "logps/chosen": -72.18915557861328, "logps/rejected": -45.6812858581543, "loss": 0.6927, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 7.899569027358666e-05, "rewards/margins": 0.0009106778306886554, "rewards/rejected": -0.0008316821185871959, "step": 70 }, { "epoch": 0.0, "learning_rate": 2.385211687537269e-07, "logits/chosen": -2.9711382389068604, "logits/rejected": -2.93875789642334, "logps/chosen": -71.97798156738281, "logps/rejected": -44.88883972167969, "loss": 0.6923, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.0008017882937565446, "rewards/margins": 0.0018558672163635492, "rewards/rejected": -0.0010540790390223265, "step": 80 }, { "epoch": 0.01, "learning_rate": 2.6833631484794277e-07, "logits/chosen": -2.9703755378723145, "logits/rejected": -2.9743106365203857, "logps/chosen": -75.0093994140625, "logps/rejected": -44.32701873779297, "loss": 0.6921, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.0006831464124843478, "rewards/margins": 0.0019017761806026101, "rewards/rejected": -0.0012186297681182623, "step": 90 }, { "epoch": 0.01, "learning_rate": 2.981514609421586e-07, "logits/chosen": -2.9895684719085693, "logits/rejected": -3.004122734069824, "logps/chosen": -71.56358337402344, "logps/rejected": -45.33041000366211, "loss": 0.6915, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.0017809504643082619, "rewards/margins": 0.003538265125826001, "rewards/rejected": -0.0017573146615177393, "step": 100 }, { "epoch": 0.01, "learning_rate": 3.2796660703637447e-07, "logits/chosen": -2.963571071624756, "logits/rejected": -2.928189516067505, "logps/chosen": -69.69255065917969, "logps/rejected": -44.475135803222656, "loss": 0.6909, "rewards/accuracies": 1.0, "rewards/chosen": 0.0015576332807540894, "rewards/margins": 0.0045606764033436775, "rewards/rejected": -0.003003043122589588, "step": 110 }, { "epoch": 0.01, "learning_rate": 3.577817531305904e-07, "logits/chosen": -2.974503993988037, "logits/rejected": -2.9577651023864746, "logps/chosen": -67.06887817382812, "logps/rejected": -45.446311950683594, "loss": 0.6905, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.0023120190016925335, "rewards/margins": 0.005582844372838736, "rewards/rejected": -0.003270825371146202, "step": 120 }, { "epoch": 0.01, "learning_rate": 3.8759689922480623e-07, "logits/chosen": -2.995654344558716, "logits/rejected": -2.973827600479126, "logps/chosen": -64.65982055664062, "logps/rejected": -44.023311614990234, "loss": 0.6897, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.0025220434181392193, "rewards/margins": 0.006847357843071222, "rewards/rejected": -0.004325315356254578, "step": 130 }, { "epoch": 0.01, "learning_rate": 4.174120453190221e-07, "logits/chosen": -2.967301845550537, "logits/rejected": -2.9388742446899414, "logps/chosen": -71.3984603881836, "logps/rejected": -44.561988830566406, "loss": 0.6883, "rewards/accuracies": 1.0, "rewards/chosen": 0.0037331501953303814, "rewards/margins": 0.009782666340470314, "rewards/rejected": -0.006049515679478645, "step": 140 }, { "epoch": 0.01, "learning_rate": 4.47227191413238e-07, "logits/chosen": -2.9889566898345947, "logits/rejected": -2.9502789974212646, "logps/chosen": -73.5901870727539, "logps/rejected": -44.54457473754883, "loss": 0.6871, "rewards/accuracies": 1.0, "rewards/chosen": 0.005121590569615364, "rewards/margins": 0.012893019244074821, "rewards/rejected": -0.007771429605782032, "step": 150 }, { "epoch": 0.01, "learning_rate": 4.770423375074538e-07, "logits/chosen": -2.997464895248413, "logits/rejected": -2.973012924194336, "logps/chosen": -67.8456802368164, "logps/rejected": -44.57398223876953, "loss": 0.6861, "rewards/accuracies": 1.0, "rewards/chosen": 0.005220805760473013, "rewards/margins": 0.014461624436080456, "rewards/rejected": -0.00924081914126873, "step": 160 }, { "epoch": 0.01, "learning_rate": 5.068574836016696e-07, "logits/chosen": -2.954655885696411, "logits/rejected": -2.938756227493286, "logps/chosen": -74.64299011230469, "logps/rejected": -44.95725631713867, "loss": 0.6845, "rewards/accuracies": 1.0, "rewards/chosen": 0.006698357407003641, "rewards/margins": 0.0172797292470932, "rewards/rejected": -0.010581372305750847, "step": 170 }, { "epoch": 0.01, "learning_rate": 5.366726296958855e-07, "logits/chosen": -2.9657092094421387, "logits/rejected": -2.942619800567627, "logps/chosen": -72.10955810546875, "logps/rejected": -45.92462921142578, "loss": 0.6827, "rewards/accuracies": 1.0, "rewards/chosen": 0.007762356661260128, "rewards/margins": 0.02173658087849617, "rewards/rejected": -0.013974225148558617, "step": 180 }, { "epoch": 0.01, "learning_rate": 5.664877757901014e-07, "logits/chosen": -3.0228991508483887, "logits/rejected": -2.971086025238037, "logps/chosen": -76.38833618164062, "logps/rejected": -47.514808654785156, "loss": 0.6804, "rewards/accuracies": 1.0, "rewards/chosen": 0.010731477290391922, "rewards/margins": 0.027403127402067184, "rewards/rejected": -0.01667165383696556, "step": 190 }, { "epoch": 0.01, "learning_rate": 5.963029218843172e-07, "logits/chosen": -2.9411473274230957, "logits/rejected": -2.9228625297546387, "logps/chosen": -70.54524230957031, "logps/rejected": -45.080833435058594, "loss": 0.6783, "rewards/accuracies": 1.0, "rewards/chosen": 0.01168130338191986, "rewards/margins": 0.03029986284673214, "rewards/rejected": -0.018618561327457428, "step": 200 }, { "epoch": 0.01, "learning_rate": 6.26118067978533e-07, "logits/chosen": -2.9853718280792236, "logits/rejected": -2.964855670928955, "logps/chosen": -65.7616195678711, "logps/rejected": -45.29457473754883, "loss": 0.6759, "rewards/accuracies": 1.0, "rewards/chosen": 0.012055915780365467, "rewards/margins": 0.03174392506480217, "rewards/rejected": -0.019688012078404427, "step": 210 }, { "epoch": 0.01, "learning_rate": 6.559332140727489e-07, "logits/chosen": -2.9891045093536377, "logits/rejected": -2.9741458892822266, "logps/chosen": -76.55693054199219, "logps/rejected": -47.39777755737305, "loss": 0.6716, "rewards/accuracies": 1.0, "rewards/chosen": 0.018879849463701248, "rewards/margins": 0.045966826379299164, "rewards/rejected": -0.027086973190307617, "step": 220 }, { "epoch": 0.01, "learning_rate": 6.857483601669648e-07, "logits/chosen": -2.9521539211273193, "logits/rejected": -2.9273805618286133, "logps/chosen": -73.84449005126953, "logps/rejected": -47.50820541381836, "loss": 0.6706, "rewards/accuracies": 1.0, "rewards/chosen": 0.018496818840503693, "rewards/margins": 0.04870253801345825, "rewards/rejected": -0.030205722898244858, "step": 230 }, { "epoch": 0.01, "learning_rate": 7.155635062611808e-07, "logits/chosen": -2.990572690963745, "logits/rejected": -2.996241569519043, "logps/chosen": -69.92765808105469, "logps/rejected": -47.41942596435547, "loss": 0.666, "rewards/accuracies": 1.0, "rewards/chosen": 0.01791529916226864, "rewards/margins": 0.055198751389980316, "rewards/rejected": -0.037283457815647125, "step": 240 }, { "epoch": 0.01, "learning_rate": 7.453786523553966e-07, "logits/chosen": -2.9767189025878906, "logits/rejected": -2.9706878662109375, "logps/chosen": -70.4488525390625, "logps/rejected": -49.38618469238281, "loss": 0.6655, "rewards/accuracies": 1.0, "rewards/chosen": 0.01426013559103012, "rewards/margins": 0.055192362517118454, "rewards/rejected": -0.04093223437666893, "step": 250 }, { "epoch": 0.02, "learning_rate": 7.751937984496125e-07, "logits/chosen": -2.9992268085479736, "logits/rejected": -2.9801385402679443, "logps/chosen": -71.62583923339844, "logps/rejected": -48.7337646484375, "loss": 0.6615, "rewards/accuracies": 1.0, "rewards/chosen": 0.022342320531606674, "rewards/margins": 0.062458496540784836, "rewards/rejected": -0.04011617973446846, "step": 260 }, { "epoch": 0.02, "learning_rate": 8.050089445438284e-07, "logits/chosen": -2.9760518074035645, "logits/rejected": -2.9756836891174316, "logps/chosen": -69.02963256835938, "logps/rejected": -47.99614715576172, "loss": 0.6541, "rewards/accuracies": 1.0, "rewards/chosen": 0.03307611495256424, "rewards/margins": 0.07970432192087173, "rewards/rejected": -0.04662821814417839, "step": 270 }, { "epoch": 0.02, "learning_rate": 8.348240906380442e-07, "logits/chosen": -2.972804307937622, "logits/rejected": -2.973416328430176, "logps/chosen": -73.17513275146484, "logps/rejected": -48.799110412597656, "loss": 0.6536, "rewards/accuracies": 1.0, "rewards/chosen": 0.03613675758242607, "rewards/margins": 0.0900457501411438, "rewards/rejected": -0.05390900373458862, "step": 280 }, { "epoch": 0.02, "learning_rate": 8.646392367322601e-07, "logits/chosen": -2.988776683807373, "logits/rejected": -2.9511101245880127, "logps/chosen": -60.7444953918457, "logps/rejected": -50.1580696105957, "loss": 0.6511, "rewards/accuracies": 1.0, "rewards/chosen": 0.02103568986058235, "rewards/margins": 0.07972956448793411, "rewards/rejected": -0.05869387462735176, "step": 290 }, { "epoch": 0.02, "learning_rate": 8.94454382826476e-07, "logits/chosen": -2.9847099781036377, "logits/rejected": -2.9715585708618164, "logps/chosen": -71.0445327758789, "logps/rejected": -50.474178314208984, "loss": 0.6438, "rewards/accuracies": 1.0, "rewards/chosen": 0.04130948334932327, "rewards/margins": 0.1042775884270668, "rewards/rejected": -0.06296811997890472, "step": 300 }, { "epoch": 0.02, "learning_rate": 9.242695289206919e-07, "logits/chosen": -2.991835117340088, "logits/rejected": -2.986812114715576, "logps/chosen": -67.70176696777344, "logps/rejected": -51.52722930908203, "loss": 0.637, "rewards/accuracies": 1.0, "rewards/chosen": 0.04277125000953674, "rewards/margins": 0.11644142866134644, "rewards/rejected": -0.0736701637506485, "step": 310 }, { "epoch": 0.02, "learning_rate": 9.540846750149077e-07, "logits/chosen": -2.982632875442505, "logits/rejected": -2.9579172134399414, "logps/chosen": -61.857505798339844, "logps/rejected": -51.969200134277344, "loss": 0.6312, "rewards/accuracies": 1.0, "rewards/chosen": 0.04146171361207962, "rewards/margins": 0.11927430331707001, "rewards/rejected": -0.07781258970499039, "step": 320 }, { "epoch": 0.02, "learning_rate": 9.838998211091236e-07, "logits/chosen": -3.017160415649414, "logits/rejected": -2.989964008331299, "logps/chosen": -65.59489440917969, "logps/rejected": -55.42946243286133, "loss": 0.6189, "rewards/accuracies": 1.0, "rewards/chosen": 0.05441339686512947, "rewards/margins": 0.1520194113254547, "rewards/rejected": -0.09760601818561554, "step": 330 }, { "epoch": 0.02, "learning_rate": 1.0137149672033393e-06, "logits/chosen": -2.9803266525268555, "logits/rejected": -2.9845376014709473, "logps/chosen": -65.06605529785156, "logps/rejected": -54.811424255371094, "loss": 0.6096, "rewards/accuracies": 1.0, "rewards/chosen": 0.07225533574819565, "rewards/margins": 0.18390336632728577, "rewards/rejected": -0.11164804548025131, "step": 340 }, { "epoch": 0.02, "learning_rate": 1.0435301132975552e-06, "logits/chosen": -2.964911937713623, "logits/rejected": -2.96053147315979, "logps/chosen": -65.23785400390625, "logps/rejected": -57.24370574951172, "loss": 0.6011, "rewards/accuracies": 1.0, "rewards/chosen": 0.059714823961257935, "rewards/margins": 0.18882234394550323, "rewards/rejected": -0.1291075348854065, "step": 350 }, { "epoch": 0.02, "learning_rate": 1.073345259391771e-06, "logits/chosen": -2.974783420562744, "logits/rejected": -2.96232271194458, "logps/chosen": -64.95095825195312, "logps/rejected": -60.1099853515625, "loss": 0.5879, "rewards/accuracies": 1.0, "rewards/chosen": 0.0736270323395729, "rewards/margins": 0.21903876960277557, "rewards/rejected": -0.14541175961494446, "step": 360 }, { "epoch": 0.02, "learning_rate": 1.103160405485987e-06, "logits/chosen": -2.9943270683288574, "logits/rejected": -2.9654793739318848, "logps/chosen": -59.869659423828125, "logps/rejected": -60.060264587402344, "loss": 0.5776, "rewards/accuracies": 1.0, "rewards/chosen": 0.10907473415136337, "rewards/margins": 0.25231292843818665, "rewards/rejected": -0.14323820173740387, "step": 370 }, { "epoch": 0.02, "learning_rate": 1.1329755515802029e-06, "logits/chosen": -2.983459949493408, "logits/rejected": -2.9790661334991455, "logps/chosen": -68.365966796875, "logps/rejected": -62.22774124145508, "loss": 0.5663, "rewards/accuracies": 1.0, "rewards/chosen": 0.10884684324264526, "rewards/margins": 0.27483612298965454, "rewards/rejected": -0.1659892499446869, "step": 380 }, { "epoch": 0.02, "learning_rate": 1.1627906976744188e-06, "logits/chosen": -2.971078872680664, "logits/rejected": -2.9647915363311768, "logps/chosen": -54.592071533203125, "logps/rejected": -65.0318832397461, "loss": 0.5462, "rewards/accuracies": 1.0, "rewards/chosen": 0.11747223138809204, "rewards/margins": 0.3192733824253082, "rewards/rejected": -0.2018011510372162, "step": 390 }, { "epoch": 0.02, "learning_rate": 1.1926058437686345e-06, "logits/chosen": -2.9642443656921387, "logits/rejected": -2.9650585651397705, "logps/chosen": -56.22089385986328, "logps/rejected": -65.07490539550781, "loss": 0.5367, "rewards/accuracies": 1.0, "rewards/chosen": 0.14380133152008057, "rewards/margins": 0.3455009460449219, "rewards/rejected": -0.2016996145248413, "step": 400 }, { "epoch": 0.02, "learning_rate": 1.2224209898628504e-06, "logits/chosen": -2.9602420330047607, "logits/rejected": -2.984884262084961, "logps/chosen": -54.9572639465332, "logps/rejected": -67.0857162475586, "loss": 0.5246, "rewards/accuracies": 1.0, "rewards/chosen": 0.1513342559337616, "rewards/margins": 0.3829588294029236, "rewards/rejected": -0.2316245287656784, "step": 410 }, { "epoch": 0.03, "learning_rate": 1.252236135957066e-06, "logits/chosen": -2.984032154083252, "logits/rejected": -2.960697650909424, "logps/chosen": -53.50678253173828, "logps/rejected": -70.7410659790039, "loss": 0.5036, "rewards/accuracies": 1.0, "rewards/chosen": 0.17459207773208618, "rewards/margins": 0.4419061541557312, "rewards/rejected": -0.26731401681900024, "step": 420 }, { "epoch": 0.03, "learning_rate": 1.282051282051282e-06, "logits/chosen": -3.006791591644287, "logits/rejected": -2.9790358543395996, "logps/chosen": -50.71213912963867, "logps/rejected": -75.37886047363281, "loss": 0.4926, "rewards/accuracies": 1.0, "rewards/chosen": 0.14115287363529205, "rewards/margins": 0.438680499792099, "rewards/rejected": -0.29752764105796814, "step": 430 }, { "epoch": 0.03, "learning_rate": 1.3118664281454979e-06, "logits/chosen": -2.9824748039245605, "logits/rejected": -2.9848923683166504, "logps/chosen": -51.81050491333008, "logps/rejected": -74.18791961669922, "loss": 0.4829, "rewards/accuracies": 1.0, "rewards/chosen": 0.19691374897956848, "rewards/margins": 0.4909387230873108, "rewards/rejected": -0.2940249443054199, "step": 440 }, { "epoch": 0.03, "learning_rate": 1.3416815742397138e-06, "logits/chosen": -2.954195737838745, "logits/rejected": -2.957263946533203, "logps/chosen": -48.250526428222656, "logps/rejected": -73.97734069824219, "loss": 0.4813, "rewards/accuracies": 1.0, "rewards/chosen": 0.17701885104179382, "rewards/margins": 0.4652382731437683, "rewards/rejected": -0.2882193922996521, "step": 450 }, { "epoch": 0.03, "learning_rate": 1.3714967203339297e-06, "logits/chosen": -3.00996732711792, "logits/rejected": -3.002124309539795, "logps/chosen": -46.57711410522461, "logps/rejected": -80.85877990722656, "loss": 0.4508, "rewards/accuracies": 1.0, "rewards/chosen": 0.20726975798606873, "rewards/margins": 0.5677331686019897, "rewards/rejected": -0.36046338081359863, "step": 460 }, { "epoch": 0.03, "learning_rate": 1.4013118664281456e-06, "logits/chosen": -2.954026937484741, "logits/rejected": -2.932624340057373, "logps/chosen": -46.166603088378906, "logps/rejected": -75.62085723876953, "loss": 0.4484, "rewards/accuracies": 1.0, "rewards/chosen": 0.1909331977367401, "rewards/margins": 0.5219781398773193, "rewards/rejected": -0.3310449421405792, "step": 470 }, { "epoch": 0.03, "learning_rate": 1.4311270125223615e-06, "logits/chosen": -2.980708360671997, "logits/rejected": -2.976071834564209, "logps/chosen": -47.06877899169922, "logps/rejected": -85.34896850585938, "loss": 0.4285, "rewards/accuracies": 1.0, "rewards/chosen": 0.2043842375278473, "rewards/margins": 0.6243103742599487, "rewards/rejected": -0.41992610692977905, "step": 480 }, { "epoch": 0.03, "learning_rate": 1.4609421586165772e-06, "logits/chosen": -2.9911887645721436, "logits/rejected": -2.943816661834717, "logps/chosen": -46.52109146118164, "logps/rejected": -87.4489517211914, "loss": 0.4143, "rewards/accuracies": 1.0, "rewards/chosen": 0.23063774406909943, "rewards/margins": 0.674059271812439, "rewards/rejected": -0.4434216022491455, "step": 490 }, { "epoch": 0.03, "learning_rate": 1.490757304710793e-06, "logits/chosen": -2.9820945262908936, "logits/rejected": -2.9838337898254395, "logps/chosen": -54.989166259765625, "logps/rejected": -90.59204864501953, "loss": 0.3981, "rewards/accuracies": 1.0, "rewards/chosen": 0.23931631445884705, "rewards/margins": 0.7045159339904785, "rewards/rejected": -0.4651995599269867, "step": 500 }, { "epoch": 0.03, "learning_rate": 1.520572450805009e-06, "logits/chosen": -2.9805257320404053, "logits/rejected": -2.9528656005859375, "logps/chosen": -46.284706115722656, "logps/rejected": -95.25877380371094, "loss": 0.3966, "rewards/accuracies": 1.0, "rewards/chosen": 0.2438308745622635, "rewards/margins": 0.7423723936080933, "rewards/rejected": -0.49854153394699097, "step": 510 }, { "epoch": 0.03, "learning_rate": 1.550387596899225e-06, "logits/chosen": -2.9418282508850098, "logits/rejected": -2.9414196014404297, "logps/chosen": -48.37353515625, "logps/rejected": -91.3024673461914, "loss": 0.396, "rewards/accuracies": 1.0, "rewards/chosen": 0.2332138568162918, "rewards/margins": 0.7105187773704529, "rewards/rejected": -0.4773048758506775, "step": 520 }, { "epoch": 0.03, "learning_rate": 1.5802027429934408e-06, "logits/chosen": -2.9739184379577637, "logits/rejected": -2.97147798538208, "logps/chosen": -47.54608154296875, "logps/rejected": -96.32991027832031, "loss": 0.382, "rewards/accuracies": 1.0, "rewards/chosen": 0.23890534043312073, "rewards/margins": 0.7606258392333984, "rewards/rejected": -0.5217204093933105, "step": 530 }, { "epoch": 0.03, "learning_rate": 1.6100178890876567e-06, "logits/chosen": -2.980813503265381, "logits/rejected": -2.963414430618286, "logps/chosen": -50.37831115722656, "logps/rejected": -96.20535278320312, "loss": 0.3834, "rewards/accuracies": 1.0, "rewards/chosen": 0.24395489692687988, "rewards/margins": 0.7731548547744751, "rewards/rejected": -0.5291999578475952, "step": 540 }, { "epoch": 0.03, "learning_rate": 1.6398330351818726e-06, "logits/chosen": -2.966637134552002, "logits/rejected": -2.932892322540283, "logps/chosen": -44.41714096069336, "logps/rejected": -100.84266662597656, "loss": 0.3651, "rewards/accuracies": 1.0, "rewards/chosen": 0.25574031472206116, "rewards/margins": 0.8291279077529907, "rewards/rejected": -0.5733876824378967, "step": 550 }, { "epoch": 0.03, "learning_rate": 1.6696481812760883e-06, "logits/chosen": -2.9682445526123047, "logits/rejected": -2.9829764366149902, "logps/chosen": -43.2005500793457, "logps/rejected": -104.4155044555664, "loss": 0.361, "rewards/accuracies": 1.0, "rewards/chosen": 0.24430926144123077, "rewards/margins": 0.8398653268814087, "rewards/rejected": -0.5955560803413391, "step": 560 }, { "epoch": 0.03, "learning_rate": 1.6994633273703042e-06, "logits/chosen": -3.0001132488250732, "logits/rejected": -2.978228807449341, "logps/chosen": -47.23787307739258, "logps/rejected": -99.9131088256836, "loss": 0.3547, "rewards/accuracies": 1.0, "rewards/chosen": 0.28121238946914673, "rewards/margins": 0.8462437391281128, "rewards/rejected": -0.5650314092636108, "step": 570 }, { "epoch": 0.03, "learning_rate": 1.7292784734645201e-06, "logits/chosen": -2.9574625492095947, "logits/rejected": -2.928567409515381, "logps/chosen": -39.49331283569336, "logps/rejected": -107.9442138671875, "loss": 0.3386, "rewards/accuracies": 1.0, "rewards/chosen": 0.2691844701766968, "rewards/margins": 0.9074603915214539, "rewards/rejected": -0.6382759809494019, "step": 580 }, { "epoch": 0.04, "learning_rate": 1.759093619558736e-06, "logits/chosen": -2.9369075298309326, "logits/rejected": -2.9088616371154785, "logps/chosen": -42.70224380493164, "logps/rejected": -110.30489349365234, "loss": 0.3383, "rewards/accuracies": 1.0, "rewards/chosen": 0.27101626992225647, "rewards/margins": 0.9230680465698242, "rewards/rejected": -0.6520518064498901, "step": 590 }, { "epoch": 0.04, "learning_rate": 1.788908765652952e-06, "logits/chosen": -2.930457592010498, "logits/rejected": -2.91943097114563, "logps/chosen": -43.339229583740234, "logps/rejected": -107.48307800292969, "loss": 0.3407, "rewards/accuracies": 1.0, "rewards/chosen": 0.25904375314712524, "rewards/margins": 0.8748016357421875, "rewards/rejected": -0.615757942199707, "step": 600 }, { "epoch": 0.04, "learning_rate": 1.8187239117471678e-06, "logits/chosen": -2.9587793350219727, "logits/rejected": -2.9344980716705322, "logps/chosen": -39.99372482299805, "logps/rejected": -109.69963073730469, "loss": 0.3289, "rewards/accuracies": 1.0, "rewards/chosen": 0.2784316837787628, "rewards/margins": 0.9442272186279297, "rewards/rejected": -0.6657954454421997, "step": 610 }, { "epoch": 0.04, "learning_rate": 1.8485390578413837e-06, "logits/chosen": -2.971811532974243, "logits/rejected": -2.9104809761047363, "logps/chosen": -38.652320861816406, "logps/rejected": -113.38652038574219, "loss": 0.3233, "rewards/accuracies": 1.0, "rewards/chosen": 0.2652958333492279, "rewards/margins": 0.9442809820175171, "rewards/rejected": -0.6789851188659668, "step": 620 }, { "epoch": 0.04, "learning_rate": 1.8783542039355994e-06, "logits/chosen": -2.9191136360168457, "logits/rejected": -2.8827223777770996, "logps/chosen": -42.28081512451172, "logps/rejected": -119.88639831542969, "loss": 0.306, "rewards/accuracies": 1.0, "rewards/chosen": 0.29589518904685974, "rewards/margins": 1.0485247373580933, "rewards/rejected": -0.7526295781135559, "step": 630 }, { "epoch": 0.04, "learning_rate": 1.9081693500298153e-06, "logits/chosen": -2.957550048828125, "logits/rejected": -2.9400954246520996, "logps/chosen": -34.615272521972656, "logps/rejected": -121.06828308105469, "loss": 0.303, "rewards/accuracies": 1.0, "rewards/chosen": 0.2980164587497711, "rewards/margins": 1.0627772808074951, "rewards/rejected": -0.7647607326507568, "step": 640 }, { "epoch": 0.04, "learning_rate": 1.9379844961240315e-06, "logits/chosen": -2.9378323554992676, "logits/rejected": -2.9265735149383545, "logps/chosen": -42.065208435058594, "logps/rejected": -117.59049987792969, "loss": 0.3034, "rewards/accuracies": 1.0, "rewards/chosen": 0.27747949957847595, "rewards/margins": 1.001206636428833, "rewards/rejected": -0.7237271666526794, "step": 650 }, { "epoch": 0.04, "learning_rate": 1.967799642218247e-06, "logits/chosen": -2.9484689235687256, "logits/rejected": -2.9170165061950684, "logps/chosen": -38.046356201171875, "logps/rejected": -125.5303955078125, "loss": 0.2924, "rewards/accuracies": 1.0, "rewards/chosen": 0.3038211464881897, "rewards/margins": 1.1033309698104858, "rewards/rejected": -0.7995098233222961, "step": 660 }, { "epoch": 0.04, "learning_rate": 1.997614788312463e-06, "logits/chosen": -2.9291698932647705, "logits/rejected": -2.913149356842041, "logps/chosen": -40.59954071044922, "logps/rejected": -127.72261047363281, "loss": 0.28, "rewards/accuracies": 1.0, "rewards/chosen": 0.29420167207717896, "rewards/margins": 1.1142350435256958, "rewards/rejected": -0.8200333714485168, "step": 670 }, { "epoch": 0.04, "learning_rate": 2.0274299344066785e-06, "logits/chosen": -2.97379732131958, "logits/rejected": -2.90913724899292, "logps/chosen": -41.133975982666016, "logps/rejected": -131.36251831054688, "loss": 0.2704, "rewards/accuracies": 1.0, "rewards/chosen": 0.2960251271724701, "rewards/margins": 1.1637284755706787, "rewards/rejected": -0.8677034378051758, "step": 680 }, { "epoch": 0.04, "learning_rate": 2.0572450805008946e-06, "logits/chosen": -2.957440137863159, "logits/rejected": -2.902618885040283, "logps/chosen": -38.768409729003906, "logps/rejected": -135.829833984375, "loss": 0.2545, "rewards/accuracies": 1.0, "rewards/chosen": 0.32904133200645447, "rewards/margins": 1.2436773777008057, "rewards/rejected": -0.9146361351013184, "step": 690 }, { "epoch": 0.04, "learning_rate": 2.0870602265951103e-06, "logits/chosen": -2.9167263507843018, "logits/rejected": -2.890842914581299, "logps/chosen": -44.03474807739258, "logps/rejected": -139.43023681640625, "loss": 0.2503, "rewards/accuracies": 1.0, "rewards/chosen": 0.30538904666900635, "rewards/margins": 1.2593690156936646, "rewards/rejected": -0.9539799690246582, "step": 700 }, { "epoch": 0.04, "learning_rate": 2.1168753726893265e-06, "logits/chosen": -2.924889326095581, "logits/rejected": -2.8740432262420654, "logps/chosen": -40.46274185180664, "logps/rejected": -145.13734436035156, "loss": 0.2428, "rewards/accuracies": 1.0, "rewards/chosen": 0.2993396520614624, "rewards/margins": 1.3159592151641846, "rewards/rejected": -1.0166196823120117, "step": 710 }, { "epoch": 0.04, "learning_rate": 2.146690518783542e-06, "logits/chosen": -2.9009227752685547, "logits/rejected": -2.9008853435516357, "logps/chosen": -37.61160659790039, "logps/rejected": -144.91842651367188, "loss": 0.239, "rewards/accuracies": 1.0, "rewards/chosen": 0.2984068989753723, "rewards/margins": 1.3076552152633667, "rewards/rejected": -1.00924813747406, "step": 720 }, { "epoch": 0.04, "learning_rate": 2.176505664877758e-06, "logits/chosen": -2.9028007984161377, "logits/rejected": -2.8786234855651855, "logps/chosen": -38.436622619628906, "logps/rejected": -151.5777130126953, "loss": 0.2284, "rewards/accuracies": 1.0, "rewards/chosen": 0.3371207118034363, "rewards/margins": 1.4050734043121338, "rewards/rejected": -1.0679528713226318, "step": 730 }, { "epoch": 0.04, "learning_rate": 2.206320810971974e-06, "logits/chosen": -2.916623592376709, "logits/rejected": -2.8668630123138428, "logps/chosen": -39.584632873535156, "logps/rejected": -150.6464080810547, "loss": 0.2279, "rewards/accuracies": 1.0, "rewards/chosen": 0.30783018469810486, "rewards/margins": 1.3758673667907715, "rewards/rejected": -1.0680371522903442, "step": 740 }, { "epoch": 0.04, "learning_rate": 2.2361359570661897e-06, "logits/chosen": -2.887272357940674, "logits/rejected": -2.8534915447235107, "logps/chosen": -36.246482849121094, "logps/rejected": -153.67608642578125, "loss": 0.2179, "rewards/accuracies": 1.0, "rewards/chosen": 0.3153519034385681, "rewards/margins": 1.4092376232147217, "rewards/rejected": -1.0938857793807983, "step": 750 }, { "epoch": 0.05, "learning_rate": 2.2659511031604058e-06, "logits/chosen": -2.8925223350524902, "logits/rejected": -2.884477138519287, "logps/chosen": -36.67290115356445, "logps/rejected": -161.21798706054688, "loss": 0.2039, "rewards/accuracies": 1.0, "rewards/chosen": 0.3240107595920563, "rewards/margins": 1.5021789073944092, "rewards/rejected": -1.1781680583953857, "step": 760 }, { "epoch": 0.05, "learning_rate": 2.2957662492546215e-06, "logits/chosen": -2.88120698928833, "logits/rejected": -2.802581310272217, "logps/chosen": -40.013450622558594, "logps/rejected": -159.5142822265625, "loss": 0.2114, "rewards/accuracies": 1.0, "rewards/chosen": 0.33028337359428406, "rewards/margins": 1.4851129055023193, "rewards/rejected": -1.154829502105713, "step": 770 }, { "epoch": 0.05, "learning_rate": 2.3255813953488376e-06, "logits/chosen": -2.8996691703796387, "logits/rejected": -2.859790325164795, "logps/chosen": -39.816078186035156, "logps/rejected": -165.71630859375, "loss": 0.2079, "rewards/accuracies": 1.0, "rewards/chosen": 0.29694247245788574, "rewards/margins": 1.5048564672470093, "rewards/rejected": -1.207914113998413, "step": 780 }, { "epoch": 0.05, "learning_rate": 2.3553965414430533e-06, "logits/chosen": -2.9014010429382324, "logits/rejected": -2.8186144828796387, "logps/chosen": -36.43000793457031, "logps/rejected": -166.2866973876953, "loss": 0.1945, "rewards/accuracies": 1.0, "rewards/chosen": 0.31615301966667175, "rewards/margins": 1.5481258630752563, "rewards/rejected": -1.231972575187683, "step": 790 }, { "epoch": 0.05, "learning_rate": 2.385211687537269e-06, "logits/chosen": -2.9162821769714355, "logits/rejected": -2.829958915710449, "logps/chosen": -41.460723876953125, "logps/rejected": -172.53224182128906, "loss": 0.1916, "rewards/accuracies": 1.0, "rewards/chosen": 0.32863444089889526, "rewards/margins": 1.5999221801757812, "rewards/rejected": -1.2712876796722412, "step": 800 }, { "epoch": 0.05, "learning_rate": 2.415026833631485e-06, "logits/chosen": -2.9372239112854004, "logits/rejected": -2.848865032196045, "logps/chosen": -39.65186309814453, "logps/rejected": -167.17933654785156, "loss": 0.1949, "rewards/accuracies": 1.0, "rewards/chosen": 0.31408971548080444, "rewards/margins": 1.5402828454971313, "rewards/rejected": -1.2261930704116821, "step": 810 }, { "epoch": 0.05, "learning_rate": 2.4448419797257008e-06, "logits/chosen": -2.9475319385528564, "logits/rejected": -2.8369367122650146, "logps/chosen": -42.22636032104492, "logps/rejected": -178.23306274414062, "loss": 0.1866, "rewards/accuracies": 1.0, "rewards/chosen": 0.31575629115104675, "rewards/margins": 1.648012399673462, "rewards/rejected": -1.3322560787200928, "step": 820 }, { "epoch": 0.05, "learning_rate": 2.474657125819917e-06, "logits/chosen": -2.9107608795166016, "logits/rejected": -2.8390917778015137, "logps/chosen": -39.65138244628906, "logps/rejected": -177.58013916015625, "loss": 0.1773, "rewards/accuracies": 1.0, "rewards/chosen": 0.312017023563385, "rewards/margins": 1.658429503440857, "rewards/rejected": -1.3464124202728271, "step": 830 }, { "epoch": 0.05, "learning_rate": 2.504472271914132e-06, "logits/chosen": -2.923290729522705, "logits/rejected": -2.829624652862549, "logps/chosen": -36.920780181884766, "logps/rejected": -183.22515869140625, "loss": 0.172, "rewards/accuracies": 1.0, "rewards/chosen": 0.3202868103981018, "rewards/margins": 1.7096493244171143, "rewards/rejected": -1.3893625736236572, "step": 840 }, { "epoch": 0.05, "learning_rate": 2.5342874180083483e-06, "logits/chosen": -2.9353766441345215, "logits/rejected": -2.8816449642181396, "logps/chosen": -44.964141845703125, "logps/rejected": -185.05551147460938, "loss": 0.1768, "rewards/accuracies": 1.0, "rewards/chosen": 0.2919327914714813, "rewards/margins": 1.7024953365325928, "rewards/rejected": -1.4105623960494995, "step": 850 }, { "epoch": 0.05, "learning_rate": 2.564102564102564e-06, "logits/chosen": -2.879239797592163, "logits/rejected": -2.8295459747314453, "logps/chosen": -40.0316162109375, "logps/rejected": -181.7423553466797, "loss": 0.1721, "rewards/accuracies": 1.0, "rewards/chosen": 0.31078919768333435, "rewards/margins": 1.6821924448013306, "rewards/rejected": -1.3714032173156738, "step": 860 }, { "epoch": 0.05, "learning_rate": 2.59391771019678e-06, "logits/chosen": -2.9260551929473877, "logits/rejected": -2.8518142700195312, "logps/chosen": -34.84546661376953, "logps/rejected": -186.55203247070312, "loss": 0.1601, "rewards/accuracies": 1.0, "rewards/chosen": 0.3088172972202301, "rewards/margins": 1.7361927032470703, "rewards/rejected": -1.4273754358291626, "step": 870 }, { "epoch": 0.05, "learning_rate": 2.6237328562909958e-06, "logits/chosen": -2.9526896476745605, "logits/rejected": -2.864396333694458, "logps/chosen": -46.93625259399414, "logps/rejected": -195.2786407470703, "loss": 0.1569, "rewards/accuracies": 1.0, "rewards/chosen": 0.3179161548614502, "rewards/margins": 1.8251888751983643, "rewards/rejected": -1.507272481918335, "step": 880 }, { "epoch": 0.05, "learning_rate": 2.653548002385212e-06, "logits/chosen": -2.946425199508667, "logits/rejected": -2.8477015495300293, "logps/chosen": -44.20731735229492, "logps/rejected": -191.15806579589844, "loss": 0.1558, "rewards/accuracies": 1.0, "rewards/chosen": 0.2844986021518707, "rewards/margins": 1.7495861053466797, "rewards/rejected": -1.465087652206421, "step": 890 }, { "epoch": 0.05, "learning_rate": 2.6833631484794276e-06, "logits/chosen": -2.908036231994629, "logits/rejected": -2.848543643951416, "logps/chosen": -42.08452606201172, "logps/rejected": -193.06002807617188, "loss": 0.1695, "rewards/accuracies": 1.0, "rewards/chosen": 0.27534404397010803, "rewards/margins": 1.7566055059432983, "rewards/rejected": -1.4812614917755127, "step": 900 }, { "epoch": 0.05, "learning_rate": 2.7131782945736433e-06, "logits/chosen": -2.9223923683166504, "logits/rejected": -2.842595100402832, "logps/chosen": -45.47929763793945, "logps/rejected": -196.42083740234375, "loss": 0.1652, "rewards/accuracies": 1.0, "rewards/chosen": 0.2668255567550659, "rewards/margins": 1.7994037866592407, "rewards/rejected": -1.5325781106948853, "step": 910 }, { "epoch": 0.05, "learning_rate": 2.7429934406678594e-06, "logits/chosen": -2.9216389656066895, "logits/rejected": -2.8908047676086426, "logps/chosen": -45.96767807006836, "logps/rejected": -206.0484161376953, "loss": 0.1433, "rewards/accuracies": 1.0, "rewards/chosen": 0.2884276807308197, "rewards/margins": 1.9038997888565063, "rewards/rejected": -1.6154720783233643, "step": 920 }, { "epoch": 0.06, "learning_rate": 2.772808586762075e-06, "logits/chosen": -2.8979265689849854, "logits/rejected": -2.8431503772735596, "logps/chosen": -49.884559631347656, "logps/rejected": -198.9449005126953, "loss": 0.1568, "rewards/accuracies": 1.0, "rewards/chosen": 0.25000572204589844, "rewards/margins": 1.7875381708145142, "rewards/rejected": -1.5375325679779053, "step": 930 }, { "epoch": 0.06, "learning_rate": 2.802623732856291e-06, "logits/chosen": -2.906466245651245, "logits/rejected": -2.8588998317718506, "logps/chosen": -48.54482650756836, "logps/rejected": -213.2858123779297, "loss": 0.137, "rewards/accuracies": 1.0, "rewards/chosen": 0.22776472568511963, "rewards/margins": 1.9271682500839233, "rewards/rejected": -1.6994035243988037, "step": 940 }, { "epoch": 0.06, "learning_rate": 2.832438878950507e-06, "logits/chosen": -2.932741641998291, "logits/rejected": -2.8750081062316895, "logps/chosen": -43.689491271972656, "logps/rejected": -205.803955078125, "loss": 0.1513, "rewards/accuracies": 1.0, "rewards/chosen": 0.19723446667194366, "rewards/margins": 1.818921446800232, "rewards/rejected": -1.6216869354248047, "step": 950 }, { "epoch": 0.06, "learning_rate": 2.862254025044723e-06, "logits/chosen": -2.927215099334717, "logits/rejected": -2.8479530811309814, "logps/chosen": -48.602638244628906, "logps/rejected": -216.87960815429688, "loss": 0.1434, "rewards/accuracies": 1.0, "rewards/chosen": 0.22594816982746124, "rewards/margins": 1.9465906620025635, "rewards/rejected": -1.7206424474716187, "step": 960 }, { "epoch": 0.06, "learning_rate": 2.8920691711389387e-06, "logits/chosen": -2.9289841651916504, "logits/rejected": -2.8292651176452637, "logps/chosen": -46.494232177734375, "logps/rejected": -209.7342987060547, "loss": 0.1407, "rewards/accuracies": 1.0, "rewards/chosen": 0.20923173427581787, "rewards/margins": 1.8711140155792236, "rewards/rejected": -1.6618821620941162, "step": 970 }, { "epoch": 0.06, "learning_rate": 2.9218843172331544e-06, "logits/chosen": -2.9083921909332275, "logits/rejected": -2.9028637409210205, "logps/chosen": -54.48478317260742, "logps/rejected": -215.77587890625, "loss": 0.1418, "rewards/accuracies": 1.0, "rewards/chosen": 0.18314385414123535, "rewards/margins": 1.8907610177993774, "rewards/rejected": -1.7076170444488525, "step": 980 }, { "epoch": 0.06, "learning_rate": 2.9516994633273705e-06, "logits/chosen": -2.917245864868164, "logits/rejected": -2.8610386848449707, "logps/chosen": -52.58135223388672, "logps/rejected": -220.6952362060547, "loss": 0.1329, "rewards/accuracies": 1.0, "rewards/chosen": 0.1814514845609665, "rewards/margins": 1.937570333480835, "rewards/rejected": -1.7561187744140625, "step": 990 }, { "epoch": 0.06, "learning_rate": 2.981514609421586e-06, "logits/chosen": -2.9567294120788574, "logits/rejected": -2.8885066509246826, "logps/chosen": -51.50627517700195, "logps/rejected": -227.5182647705078, "loss": 0.1303, "rewards/accuracies": 1.0, "rewards/chosen": 0.1922745406627655, "rewards/margins": 2.0097155570983887, "rewards/rejected": -1.8174407482147217, "step": 1000 }, { "epoch": 0.06, "learning_rate": 3.0113297555158023e-06, "logits/chosen": -2.934863567352295, "logits/rejected": -2.8902747631073, "logps/chosen": -50.67198181152344, "logps/rejected": -222.57131958007812, "loss": 0.1366, "rewards/accuracies": 1.0, "rewards/chosen": 0.18493175506591797, "rewards/margins": 1.980546236038208, "rewards/rejected": -1.7956146001815796, "step": 1010 }, { "epoch": 0.06, "learning_rate": 3.041144901610018e-06, "logits/chosen": -2.9291841983795166, "logits/rejected": -2.8428688049316406, "logps/chosen": -49.201480865478516, "logps/rejected": -232.1763458251953, "loss": 0.1267, "rewards/accuracies": 1.0, "rewards/chosen": 0.20666006207466125, "rewards/margins": 2.089517593383789, "rewards/rejected": -1.882857322692871, "step": 1020 }, { "epoch": 0.06, "learning_rate": 3.070960047704234e-06, "logits/chosen": -2.949662446975708, "logits/rejected": -2.8584136962890625, "logps/chosen": -50.243751525878906, "logps/rejected": -232.68533325195312, "loss": 0.1221, "rewards/accuracies": 1.0, "rewards/chosen": 0.16892682015895844, "rewards/margins": 2.0424437522888184, "rewards/rejected": -1.8735166788101196, "step": 1030 }, { "epoch": 0.06, "learning_rate": 3.10077519379845e-06, "logits/chosen": -2.9093775749206543, "logits/rejected": -2.820742607116699, "logps/chosen": -53.60634231567383, "logps/rejected": -228.4220733642578, "loss": 0.1272, "rewards/accuracies": 1.0, "rewards/chosen": 0.1871906816959381, "rewards/margins": 2.0358026027679443, "rewards/rejected": -1.848611831665039, "step": 1040 }, { "epoch": 0.06, "learning_rate": 3.1305903398926655e-06, "logits/chosen": -2.9516549110412598, "logits/rejected": -2.8534727096557617, "logps/chosen": -52.65508270263672, "logps/rejected": -245.27963256835938, "loss": 0.1129, "rewards/accuracies": 1.0, "rewards/chosen": 0.19569073617458344, "rewards/margins": 2.213348627090454, "rewards/rejected": -2.01765775680542, "step": 1050 }, { "epoch": 0.06, "learning_rate": 3.1604054859868816e-06, "logits/chosen": -2.925849199295044, "logits/rejected": -2.821476936340332, "logps/chosen": -55.7864990234375, "logps/rejected": -251.8877716064453, "loss": 0.1093, "rewards/accuracies": 1.0, "rewards/chosen": 0.13442271947860718, "rewards/margins": 2.21010684967041, "rewards/rejected": -2.075684070587158, "step": 1060 }, { "epoch": 0.06, "learning_rate": 3.1902206320810973e-06, "logits/chosen": -2.918884754180908, "logits/rejected": -2.8218636512756348, "logps/chosen": -53.832305908203125, "logps/rejected": -248.07412719726562, "loss": 0.1133, "rewards/accuracies": 1.0, "rewards/chosen": 0.16307690739631653, "rewards/margins": 2.2009968757629395, "rewards/rejected": -2.0379199981689453, "step": 1070 }, { "epoch": 0.06, "learning_rate": 3.2200357781753134e-06, "logits/chosen": -2.913417100906372, "logits/rejected": -2.8137714862823486, "logps/chosen": -61.895362854003906, "logps/rejected": -257.60272216796875, "loss": 0.1021, "rewards/accuracies": 1.0, "rewards/chosen": 0.12998700141906738, "rewards/margins": 2.2668581008911133, "rewards/rejected": -2.136870861053467, "step": 1080 }, { "epoch": 0.06, "learning_rate": 3.249850924269529e-06, "logits/chosen": -2.9006271362304688, "logits/rejected": -2.8202404975891113, "logps/chosen": -62.01024627685547, "logps/rejected": -253.77981567382812, "loss": 0.1087, "rewards/accuracies": 1.0, "rewards/chosen": 0.12107900530099869, "rewards/margins": 2.221971035003662, "rewards/rejected": -2.1008923053741455, "step": 1090 }, { "epoch": 0.07, "learning_rate": 3.2796660703637452e-06, "logits/chosen": -2.9391419887542725, "logits/rejected": -2.8563625812530518, "logps/chosen": -60.92161178588867, "logps/rejected": -277.9583435058594, "loss": 0.1016, "rewards/accuracies": 1.0, "rewards/chosen": 0.07013644278049469, "rewards/margins": 2.408625841140747, "rewards/rejected": -2.338489055633545, "step": 1100 }, { "epoch": 0.07, "learning_rate": 3.309481216457961e-06, "logits/chosen": -2.936723232269287, "logits/rejected": -2.809131145477295, "logps/chosen": -63.90967559814453, "logps/rejected": -287.70257568359375, "loss": 0.0921, "rewards/accuracies": 1.0, "rewards/chosen": 0.07646025717258453, "rewards/margins": 2.511096477508545, "rewards/rejected": -2.434635877609253, "step": 1110 }, { "epoch": 0.07, "learning_rate": 3.3392963625521766e-06, "logits/chosen": -2.854280948638916, "logits/rejected": -2.7536861896514893, "logps/chosen": -69.69160461425781, "logps/rejected": -300.3216247558594, "loss": 0.0907, "rewards/accuracies": 1.0, "rewards/chosen": 0.005916008725762367, "rewards/margins": 2.5596814155578613, "rewards/rejected": -2.553765296936035, "step": 1120 }, { "epoch": 0.07, "learning_rate": 3.3691115086463927e-06, "logits/chosen": -2.911325693130493, "logits/rejected": -2.7586543560028076, "logps/chosen": -82.3992919921875, "logps/rejected": -371.47528076171875, "loss": 0.0623, "rewards/accuracies": 1.0, "rewards/chosen": -0.1462583988904953, "rewards/margins": 3.1211297512054443, "rewards/rejected": -3.267388105392456, "step": 1130 }, { "epoch": 0.07, "learning_rate": 3.3989266547406084e-06, "logits/chosen": -2.8932929039001465, "logits/rejected": -2.788323402404785, "logps/chosen": -89.82049560546875, "logps/rejected": -368.25177001953125, "loss": 0.0729, "rewards/accuracies": 1.0, "rewards/chosen": -0.23859450221061707, "rewards/margins": 3.0021443367004395, "rewards/rejected": -3.240739107131958, "step": 1140 }, { "epoch": 0.07, "learning_rate": 3.4287418008348246e-06, "logits/chosen": -2.913886308670044, "logits/rejected": -2.7686142921447754, "logps/chosen": -118.69229888916016, "logps/rejected": -467.5948181152344, "loss": 0.0503, "rewards/accuracies": 1.0, "rewards/chosen": -0.45577478408813477, "rewards/margins": 3.7719509601593018, "rewards/rejected": -4.227725505828857, "step": 1150 }, { "epoch": 0.07, "learning_rate": 3.4585569469290402e-06, "logits/chosen": -2.889310836791992, "logits/rejected": -2.7985191345214844, "logps/chosen": -74.08802795410156, "logps/rejected": -435.2679138183594, "loss": 0.0444, "rewards/accuracies": 1.0, "rewards/chosen": -0.025621002539992332, "rewards/margins": 3.877232313156128, "rewards/rejected": -3.902853012084961, "step": 1160 }, { "epoch": 0.07, "learning_rate": 3.4883720930232564e-06, "logits/chosen": -2.858800172805786, "logits/rejected": -2.7574596405029297, "logps/chosen": -122.84834289550781, "logps/rejected": -501.29638671875, "loss": 0.0417, "rewards/accuracies": 1.0, "rewards/chosen": -0.49279889464378357, "rewards/margins": 4.067281246185303, "rewards/rejected": -4.560080051422119, "step": 1170 }, { "epoch": 0.07, "learning_rate": 3.518187239117472e-06, "logits/chosen": -2.86140513420105, "logits/rejected": -2.772339105606079, "logps/chosen": -101.2002944946289, "logps/rejected": -492.4452209472656, "loss": 0.0374, "rewards/accuracies": 1.0, "rewards/chosen": -0.2555873990058899, "rewards/margins": 4.228099346160889, "rewards/rejected": -4.483687400817871, "step": 1180 }, { "epoch": 0.07, "learning_rate": 3.5480023852116878e-06, "logits/chosen": -2.907498359680176, "logits/rejected": -2.7997279167175293, "logps/chosen": -126.2753677368164, "logps/rejected": -543.5437622070312, "loss": 0.0398, "rewards/accuracies": 1.0, "rewards/chosen": -0.5438970923423767, "rewards/margins": 4.444826126098633, "rewards/rejected": -4.988723278045654, "step": 1190 }, { "epoch": 0.07, "learning_rate": 3.577817531305904e-06, "logits/chosen": -2.8707385063171387, "logits/rejected": -2.7878029346466064, "logps/chosen": -102.17366027832031, "logps/rejected": -607.9171142578125, "loss": 0.0312, "rewards/accuracies": 1.0, "rewards/chosen": -0.3222612738609314, "rewards/margins": 5.321918964385986, "rewards/rejected": -5.644179344177246, "step": 1200 }, { "epoch": 0.07, "learning_rate": 3.6076326774001196e-06, "logits/chosen": -2.9233720302581787, "logits/rejected": -2.814185380935669, "logps/chosen": -80.7733383178711, "logps/rejected": -581.2581176757812, "loss": 0.0336, "rewards/accuracies": 1.0, "rewards/chosen": -0.05069941282272339, "rewards/margins": 5.321071147918701, "rewards/rejected": -5.371769905090332, "step": 1210 }, { "epoch": 0.07, "learning_rate": 3.6374478234943357e-06, "logits/chosen": -2.8979592323303223, "logits/rejected": -2.8126442432403564, "logps/chosen": -99.09781646728516, "logps/rejected": -586.7628173828125, "loss": 0.0323, "rewards/accuracies": 1.0, "rewards/chosen": -0.25345659255981445, "rewards/margins": 5.178110122680664, "rewards/rejected": -5.431567192077637, "step": 1220 }, { "epoch": 0.07, "learning_rate": 3.6672629695885514e-06, "logits/chosen": -2.8876588344573975, "logits/rejected": -2.7811882495880127, "logps/chosen": -105.880615234375, "logps/rejected": -553.8966064453125, "loss": 0.0667, "rewards/accuracies": 1.0, "rewards/chosen": -0.3590214252471924, "rewards/margins": 4.745104789733887, "rewards/rejected": -5.1041259765625, "step": 1230 }, { "epoch": 0.07, "learning_rate": 3.6970781156827675e-06, "logits/chosen": -2.906069040298462, "logits/rejected": -2.804664134979248, "logps/chosen": -103.67093658447266, "logps/rejected": -587.0574340820312, "loss": 0.0457, "rewards/accuracies": 1.0, "rewards/chosen": -0.3086903691291809, "rewards/margins": 5.122992515563965, "rewards/rejected": -5.431683540344238, "step": 1240 }, { "epoch": 0.07, "learning_rate": 3.726893261776983e-06, "logits/chosen": -2.8762924671173096, "logits/rejected": -2.797161817550659, "logps/chosen": -99.7688980102539, "logps/rejected": -596.4459228515625, "loss": 0.023, "rewards/accuracies": 1.0, "rewards/chosen": -0.33934131264686584, "rewards/margins": 5.174071788787842, "rewards/rejected": -5.513413906097412, "step": 1250 }, { "epoch": 0.08, "learning_rate": 3.756708407871199e-06, "logits/chosen": -2.942321538925171, "logits/rejected": -2.8218114376068115, "logps/chosen": -107.68839263916016, "logps/rejected": -625.3644409179688, "loss": 0.0356, "rewards/accuracies": 1.0, "rewards/chosen": -0.3565952777862549, "rewards/margins": 5.4525322914123535, "rewards/rejected": -5.809127330780029, "step": 1260 }, { "epoch": 0.08, "learning_rate": 3.786523553965415e-06, "logits/chosen": -2.911815881729126, "logits/rejected": -2.770745038986206, "logps/chosen": -85.6732177734375, "logps/rejected": -657.2732543945312, "loss": 0.0267, "rewards/accuracies": 1.0, "rewards/chosen": -0.1061444878578186, "rewards/margins": 6.0360918045043945, "rewards/rejected": -6.14223575592041, "step": 1270 }, { "epoch": 0.08, "learning_rate": 3.816338700059631e-06, "logits/chosen": -2.886545181274414, "logits/rejected": -2.734433650970459, "logps/chosen": -115.2284927368164, "logps/rejected": -633.8414306640625, "loss": 0.0331, "rewards/accuracies": 1.0, "rewards/chosen": -0.40496665239334106, "rewards/margins": 5.501077651977539, "rewards/rejected": -5.906044006347656, "step": 1280 }, { "epoch": 0.08, "learning_rate": 3.846153846153847e-06, "logits/chosen": -2.8823819160461426, "logits/rejected": -2.7652571201324463, "logps/chosen": -128.81338500976562, "logps/rejected": -752.5650634765625, "loss": 0.0287, "rewards/accuracies": 1.0, "rewards/chosen": -0.5743337869644165, "rewards/margins": 6.505009651184082, "rewards/rejected": -7.079343318939209, "step": 1290 }, { "epoch": 0.08, "learning_rate": 3.875968992248063e-06, "logits/chosen": -2.888608932495117, "logits/rejected": -2.8110814094543457, "logps/chosen": -114.83067321777344, "logps/rejected": -707.441650390625, "loss": 0.0236, "rewards/accuracies": 1.0, "rewards/chosen": -0.4642557203769684, "rewards/margins": 6.154486179351807, "rewards/rejected": -6.6187424659729, "step": 1300 }, { "epoch": 0.08, "learning_rate": 3.905784138342278e-06, "logits/chosen": -2.928072452545166, "logits/rejected": -2.810481548309326, "logps/chosen": -145.2091522216797, "logps/rejected": -704.4346923828125, "loss": 0.0342, "rewards/accuracies": 1.0, "rewards/chosen": -0.6881845593452454, "rewards/margins": 5.919460773468018, "rewards/rejected": -6.607645511627197, "step": 1310 }, { "epoch": 0.08, "learning_rate": 3.935599284436494e-06, "logits/chosen": -2.868002414703369, "logits/rejected": -2.765359401702881, "logps/chosen": -131.2050323486328, "logps/rejected": -707.6498413085938, "loss": 0.0287, "rewards/accuracies": 1.0, "rewards/chosen": -0.6056081056594849, "rewards/margins": 6.0184478759765625, "rewards/rejected": -6.6240553855896, "step": 1320 }, { "epoch": 0.08, "learning_rate": 3.96541443053071e-06, "logits/chosen": -2.885503053665161, "logits/rejected": -2.8132071495056152, "logps/chosen": -112.09709167480469, "logps/rejected": -741.6683349609375, "loss": 0.0228, "rewards/accuracies": 1.0, "rewards/chosen": -0.36491528153419495, "rewards/margins": 6.602360725402832, "rewards/rejected": -6.967276096343994, "step": 1330 }, { "epoch": 0.08, "learning_rate": 3.995229576624926e-06, "logits/chosen": -2.9085755348205566, "logits/rejected": -2.835230827331543, "logps/chosen": -108.37870025634766, "logps/rejected": -705.7604370117188, "loss": 0.032, "rewards/accuracies": 1.0, "rewards/chosen": -0.4209515154361725, "rewards/margins": 6.193148136138916, "rewards/rejected": -6.614099025726318, "step": 1340 }, { "epoch": 0.08, "learning_rate": 4.025044722719142e-06, "logits/chosen": -2.905709743499756, "logits/rejected": -2.8284640312194824, "logps/chosen": -91.01869201660156, "logps/rejected": -756.5321044921875, "loss": 0.0289, "rewards/accuracies": 1.0, "rewards/chosen": -0.20456485450267792, "rewards/margins": 6.914083957672119, "rewards/rejected": -7.118648529052734, "step": 1350 }, { "epoch": 0.08, "learning_rate": 4.054859868813357e-06, "logits/chosen": -2.9096837043762207, "logits/rejected": -2.811187267303467, "logps/chosen": -93.60760498046875, "logps/rejected": -772.1401977539062, "loss": 0.0212, "rewards/accuracies": 1.0, "rewards/chosen": -0.17053982615470886, "rewards/margins": 7.10394287109375, "rewards/rejected": -7.274481296539307, "step": 1360 }, { "epoch": 0.08, "learning_rate": 4.084675014907573e-06, "logits/chosen": -2.907258987426758, "logits/rejected": -2.794480323791504, "logps/chosen": -85.34244537353516, "logps/rejected": -773.3317260742188, "loss": 0.0327, "rewards/accuracies": 1.0, "rewards/chosen": -0.20540836453437805, "rewards/margins": 7.082415580749512, "rewards/rejected": -7.2878241539001465, "step": 1370 }, { "epoch": 0.08, "learning_rate": 4.114490161001789e-06, "logits/chosen": -2.8900818824768066, "logits/rejected": -2.8205857276916504, "logps/chosen": -112.50643157958984, "logps/rejected": -770.963623046875, "loss": 0.0303, "rewards/accuracies": 1.0, "rewards/chosen": -0.44011083245277405, "rewards/margins": 6.821206569671631, "rewards/rejected": -7.261316776275635, "step": 1380 }, { "epoch": 0.08, "learning_rate": 4.1443053070960046e-06, "logits/chosen": -2.8994557857513428, "logits/rejected": -2.8128392696380615, "logps/chosen": -119.0394515991211, "logps/rejected": -781.0758666992188, "loss": 0.0232, "rewards/accuracies": 1.0, "rewards/chosen": -0.48783254623413086, "rewards/margins": 6.872071743011475, "rewards/rejected": -7.359903812408447, "step": 1390 }, { "epoch": 0.08, "learning_rate": 4.174120453190221e-06, "logits/chosen": -2.9006454944610596, "logits/rejected": -2.8404648303985596, "logps/chosen": -85.8052978515625, "logps/rejected": -690.7034912109375, "loss": 0.0354, "rewards/accuracies": 1.0, "rewards/chosen": -0.17774322628974915, "rewards/margins": 6.288405895233154, "rewards/rejected": -6.466148376464844, "step": 1400 }, { "epoch": 0.08, "learning_rate": 4.203935599284437e-06, "logits/chosen": -2.9320194721221924, "logits/rejected": -2.8243796825408936, "logps/chosen": -136.13308715820312, "logps/rejected": -770.6847534179688, "loss": 0.0219, "rewards/accuracies": 1.0, "rewards/chosen": -0.697273850440979, "rewards/margins": 6.55682373046875, "rewards/rejected": -7.254096984863281, "step": 1410 }, { "epoch": 0.08, "learning_rate": 4.233750745378653e-06, "logits/chosen": -2.917206287384033, "logits/rejected": -2.795807123184204, "logps/chosen": -149.92236328125, "logps/rejected": -819.3771362304688, "loss": 0.0277, "rewards/accuracies": 1.0, "rewards/chosen": -0.7758058309555054, "rewards/margins": 6.977842807769775, "rewards/rejected": -7.753647804260254, "step": 1420 }, { "epoch": 0.09, "learning_rate": 4.263565891472868e-06, "logits/chosen": -2.908297538757324, "logits/rejected": -2.8062705993652344, "logps/chosen": -100.38211059570312, "logps/rejected": -749.2584838867188, "loss": 0.0286, "rewards/accuracies": 1.0, "rewards/chosen": -0.32293376326560974, "rewards/margins": 6.71783971786499, "rewards/rejected": -7.040773868560791, "step": 1430 }, { "epoch": 0.09, "learning_rate": 4.293381037567084e-06, "logits/chosen": -2.919139862060547, "logits/rejected": -2.817054271697998, "logps/chosen": -138.50921630859375, "logps/rejected": -812.5184326171875, "loss": 0.0158, "rewards/accuracies": 1.0, "rewards/chosen": -0.6169862151145935, "rewards/margins": 7.054887294769287, "rewards/rejected": -7.671874046325684, "step": 1440 }, { "epoch": 0.09, "learning_rate": 4.3231961836613e-06, "logits/chosen": -2.8928654193878174, "logits/rejected": -2.81543231010437, "logps/chosen": -135.01760864257812, "logps/rejected": -755.3966064453125, "loss": 0.0346, "rewards/accuracies": 1.0, "rewards/chosen": -0.5882952809333801, "rewards/margins": 6.516493320465088, "rewards/rejected": -7.104788303375244, "step": 1450 }, { "epoch": 0.09, "learning_rate": 4.353011329755516e-06, "logits/chosen": -2.9313805103302, "logits/rejected": -2.8082115650177, "logps/chosen": -94.20889282226562, "logps/rejected": -718.9976806640625, "loss": 0.0219, "rewards/accuracies": 1.0, "rewards/chosen": -0.2442726343870163, "rewards/margins": 6.521415710449219, "rewards/rejected": -6.765688896179199, "step": 1460 }, { "epoch": 0.09, "learning_rate": 4.382826475849732e-06, "logits/chosen": -2.950016736984253, "logits/rejected": -2.8385097980499268, "logps/chosen": -95.37654113769531, "logps/rejected": -746.2007446289062, "loss": 0.0224, "rewards/accuracies": 1.0, "rewards/chosen": -0.21195659041404724, "rewards/margins": 6.812326908111572, "rewards/rejected": -7.024283409118652, "step": 1470 }, { "epoch": 0.09, "learning_rate": 4.412641621943948e-06, "logits/chosen": -2.909172773361206, "logits/rejected": -2.8274030685424805, "logps/chosen": -115.06422424316406, "logps/rejected": -707.6343994140625, "loss": 0.0406, "rewards/accuracies": 1.0, "rewards/chosen": -0.4683963358402252, "rewards/margins": 6.17194128036499, "rewards/rejected": -6.640337944030762, "step": 1480 }, { "epoch": 0.09, "learning_rate": 4.442456768038164e-06, "logits/chosen": -2.9105584621429443, "logits/rejected": -2.8290228843688965, "logps/chosen": -100.8877944946289, "logps/rejected": -761.8911743164062, "loss": 0.0353, "rewards/accuracies": 1.0, "rewards/chosen": -0.29536038637161255, "rewards/margins": 6.875528812408447, "rewards/rejected": -7.170888423919678, "step": 1490 }, { "epoch": 0.09, "learning_rate": 4.472271914132379e-06, "logits/chosen": -2.936131000518799, "logits/rejected": -2.860999822616577, "logps/chosen": -126.1532211303711, "logps/rejected": -755.7017822265625, "loss": 0.045, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.5298954844474792, "rewards/margins": 6.5799736976623535, "rewards/rejected": -7.109869480133057, "step": 1500 }, { "epoch": 0.09, "learning_rate": 4.502087060226595e-06, "logits/chosen": -2.923811435699463, "logits/rejected": -2.8051819801330566, "logps/chosen": -126.59808349609375, "logps/rejected": -834.5745239257812, "loss": 0.0298, "rewards/accuracies": 1.0, "rewards/chosen": -0.5441330075263977, "rewards/margins": 7.3602800369262695, "rewards/rejected": -7.904412269592285, "step": 1510 }, { "epoch": 0.09, "learning_rate": 4.5319022063208115e-06, "logits/chosen": -2.9029436111450195, "logits/rejected": -2.793008804321289, "logps/chosen": -110.28570556640625, "logps/rejected": -774.4368896484375, "loss": 0.0238, "rewards/accuracies": 1.0, "rewards/chosen": -0.3923500180244446, "rewards/margins": 6.911857604980469, "rewards/rejected": -7.304207801818848, "step": 1520 }, { "epoch": 0.09, "learning_rate": 4.561717352415027e-06, "logits/chosen": -2.934405565261841, "logits/rejected": -2.836610794067383, "logps/chosen": -102.74562072753906, "logps/rejected": -755.12646484375, "loss": 0.0296, "rewards/accuracies": 1.0, "rewards/chosen": -0.28946709632873535, "rewards/margins": 6.826741695404053, "rewards/rejected": -7.116208553314209, "step": 1530 }, { "epoch": 0.09, "learning_rate": 4.591532498509243e-06, "logits/chosen": -2.924065351486206, "logits/rejected": -2.847355365753174, "logps/chosen": -92.72177124023438, "logps/rejected": -743.5125122070312, "loss": 0.0385, "rewards/accuracies": 1.0, "rewards/chosen": -0.21991023421287537, "rewards/margins": 6.7882537841796875, "rewards/rejected": -7.008164882659912, "step": 1540 }, { "epoch": 0.09, "learning_rate": 4.621347644603459e-06, "logits/chosen": -2.929433822631836, "logits/rejected": -2.852585554122925, "logps/chosen": -81.76838684082031, "logps/rejected": -776.5368041992188, "loss": 0.0331, "rewards/accuracies": 1.0, "rewards/chosen": -0.1782989203929901, "rewards/margins": 7.1535186767578125, "rewards/rejected": -7.33181619644165, "step": 1550 }, { "epoch": 0.09, "learning_rate": 4.651162790697675e-06, "logits/chosen": -2.8872294425964355, "logits/rejected": -2.804154634475708, "logps/chosen": -106.18524169921875, "logps/rejected": -862.4391479492188, "loss": 0.02, "rewards/accuracies": 1.0, "rewards/chosen": -0.40572699904441833, "rewards/margins": 7.777975559234619, "rewards/rejected": -8.18370246887207, "step": 1560 }, { "epoch": 0.09, "learning_rate": 4.68097793679189e-06, "logits/chosen": -2.916672945022583, "logits/rejected": -2.8257951736450195, "logps/chosen": -125.31349182128906, "logps/rejected": -861.0426635742188, "loss": 0.025, "rewards/accuracies": 1.0, "rewards/chosen": -0.524355947971344, "rewards/margins": 7.647345542907715, "rewards/rejected": -8.171703338623047, "step": 1570 }, { "epoch": 0.09, "learning_rate": 4.7107930828861065e-06, "logits/chosen": -2.9099411964416504, "logits/rejected": -2.829416036605835, "logps/chosen": -82.06556701660156, "logps/rejected": -796.6558227539062, "loss": 0.019, "rewards/accuracies": 1.0, "rewards/chosen": -0.12387046962976456, "rewards/margins": 7.402307033538818, "rewards/rejected": -7.526177406311035, "step": 1580 }, { "epoch": 0.09, "learning_rate": 4.740608228980323e-06, "logits/chosen": -2.9172427654266357, "logits/rejected": -2.79081392288208, "logps/chosen": -98.95271301269531, "logps/rejected": -797.7516479492188, "loss": 0.0265, "rewards/accuracies": 1.0, "rewards/chosen": -0.25947898626327515, "rewards/margins": 7.2698259353637695, "rewards/rejected": -7.529304504394531, "step": 1590 }, { "epoch": 0.1, "learning_rate": 4.770423375074538e-06, "logits/chosen": -2.9234654903411865, "logits/rejected": -2.826357126235962, "logps/chosen": -69.31069946289062, "logps/rejected": -835.509765625, "loss": 0.0248, "rewards/accuracies": 1.0, "rewards/chosen": -0.00625277915969491, "rewards/margins": 7.899542808532715, "rewards/rejected": -7.905796051025391, "step": 1600 }, { "epoch": 0.1, "learning_rate": 4.800238521168754e-06, "logits/chosen": -2.9441840648651123, "logits/rejected": -2.8595235347747803, "logps/chosen": -77.70034790039062, "logps/rejected": -752.5096435546875, "loss": 0.0117, "rewards/accuracies": 1.0, "rewards/chosen": -0.003330943640321493, "rewards/margins": 7.0750555992126465, "rewards/rejected": -7.078387260437012, "step": 1610 }, { "epoch": 0.1, "learning_rate": 4.83005366726297e-06, "logits/chosen": -2.9434502124786377, "logits/rejected": -2.797577381134033, "logps/chosen": -85.08970642089844, "logps/rejected": -765.79345703125, "loss": 0.0286, "rewards/accuracies": 1.0, "rewards/chosen": -0.15180622041225433, "rewards/margins": 7.069903373718262, "rewards/rejected": -7.22170877456665, "step": 1620 }, { "epoch": 0.1, "learning_rate": 4.859868813357186e-06, "logits/chosen": -2.904468297958374, "logits/rejected": -2.7911434173583984, "logps/chosen": -98.65773010253906, "logps/rejected": -755.7101440429688, "loss": 0.0359, "rewards/accuracies": 1.0, "rewards/chosen": -0.2950167953968048, "rewards/margins": 6.821331977844238, "rewards/rejected": -7.1163482666015625, "step": 1630 }, { "epoch": 0.1, "learning_rate": 4.8896839594514015e-06, "logits/chosen": -2.9521899223327637, "logits/rejected": -2.8706722259521484, "logps/chosen": -75.16636657714844, "logps/rejected": -748.5827026367188, "loss": 0.0594, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.06359346210956573, "rewards/margins": 6.986341953277588, "rewards/rejected": -7.049934387207031, "step": 1640 }, { "epoch": 0.1, "learning_rate": 4.919499105545618e-06, "logits/chosen": -2.9369125366210938, "logits/rejected": -2.833885431289673, "logps/chosen": -92.69554901123047, "logps/rejected": -756.6004638671875, "loss": 0.0192, "rewards/accuracies": 1.0, "rewards/chosen": -0.22647139430046082, "rewards/margins": 6.897830009460449, "rewards/rejected": -7.124301910400391, "step": 1650 }, { "epoch": 0.1, "learning_rate": 4.949314251639834e-06, "logits/chosen": -2.9366354942321777, "logits/rejected": -2.8257880210876465, "logps/chosen": -78.64491271972656, "logps/rejected": -820.7420043945312, "loss": 0.0177, "rewards/accuracies": 1.0, "rewards/chosen": -0.06685201078653336, "rewards/margins": 7.706122398376465, "rewards/rejected": -7.772973537445068, "step": 1660 }, { "epoch": 0.1, "learning_rate": 4.979129397734049e-06, "logits/chosen": -2.905447483062744, "logits/rejected": -2.829411745071411, "logps/chosen": -105.19095611572266, "logps/rejected": -778.275146484375, "loss": 0.037, "rewards/accuracies": 1.0, "rewards/chosen": -0.3830556273460388, "rewards/margins": 6.958500862121582, "rewards/rejected": -7.341555595397949, "step": 1670 }, { "epoch": 0.1, "learning_rate": 4.99999951258251e-06, "logits/chosen": -2.930213451385498, "logits/rejected": -2.881126880645752, "logps/chosen": -93.16279602050781, "logps/rejected": -832.3576049804688, "loss": 0.0355, "rewards/accuracies": 1.0, "rewards/chosen": -0.22098806500434875, "rewards/margins": 7.65737771987915, "rewards/rejected": -7.878365993499756, "step": 1680 }, { "epoch": 0.1, "learning_rate": 4.9999908473879605e-06, "logits/chosen": -2.923370599746704, "logits/rejected": -2.83423113822937, "logps/chosen": -87.92120361328125, "logps/rejected": -878.4290161132812, "loss": 0.0126, "rewards/accuracies": 1.0, "rewards/chosen": -0.18133951723575592, "rewards/margins": 8.165082931518555, "rewards/rejected": -8.346423149108887, "step": 1690 }, { "epoch": 0.1, "learning_rate": 4.999971350736829e-06, "logits/chosen": -2.905275583267212, "logits/rejected": -2.8191068172454834, "logps/chosen": -128.41648864746094, "logps/rejected": -831.4952392578125, "loss": 0.0339, "rewards/accuracies": 1.0, "rewards/chosen": -0.5878888368606567, "rewards/margins": 7.279034614562988, "rewards/rejected": -7.866921901702881, "step": 1700 }, { "epoch": 0.1, "learning_rate": 4.999941022713586e-06, "logits/chosen": -2.909315586090088, "logits/rejected": -2.8059866428375244, "logps/chosen": -91.888671875, "logps/rejected": -915.4306640625, "loss": 0.0247, "rewards/accuracies": 1.0, "rewards/chosen": -0.2655971348285675, "rewards/margins": 8.438061714172363, "rewards/rejected": -8.703659057617188, "step": 1710 }, { "epoch": 0.1, "learning_rate": 4.999899863449631e-06, "logits/chosen": -2.9266390800476074, "logits/rejected": -2.819002866744995, "logps/chosen": -105.3461685180664, "logps/rejected": -842.1634521484375, "loss": 0.0392, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.33604034781455994, "rewards/margins": 7.634359836578369, "rewards/rejected": -7.970399379730225, "step": 1720 }, { "epoch": 0.1, "learning_rate": 4.999847873123291e-06, "logits/chosen": -2.945405960083008, "logits/rejected": -2.839404582977295, "logps/chosen": -77.4031753540039, "logps/rejected": -859.71923828125, "loss": 0.0145, "rewards/accuracies": 1.0, "rewards/chosen": -0.07228974997997284, "rewards/margins": 8.0891752243042, "rewards/rejected": -8.16146469116211, "step": 1730 }, { "epoch": 0.1, "learning_rate": 4.999785051959819e-06, "logits/chosen": -2.927288770675659, "logits/rejected": -2.8343400955200195, "logps/chosen": -80.38436126708984, "logps/rejected": -812.8990478515625, "loss": 0.0268, "rewards/accuracies": 1.0, "rewards/chosen": -0.05866802856326103, "rewards/margins": 7.646407127380371, "rewards/rejected": -7.705076694488525, "step": 1740 }, { "epoch": 0.1, "learning_rate": 4.999711400231393e-06, "logits/chosen": -2.949894666671753, "logits/rejected": -2.851357936859131, "logps/chosen": -72.51522064208984, "logps/rejected": -967.3624267578125, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -0.01461850292980671, "rewards/margins": 9.207165718078613, "rewards/rejected": -9.221784591674805, "step": 1750 }, { "epoch": 0.1, "learning_rate": 4.999626918257117e-06, "logits/chosen": -2.9162895679473877, "logits/rejected": -2.824445962905884, "logps/chosen": -91.44669342041016, "logps/rejected": -881.5900268554688, "loss": 0.0276, "rewards/accuracies": 1.0, "rewards/chosen": -0.25392740964889526, "rewards/margins": 8.122617721557617, "rewards/rejected": -8.376545906066895, "step": 1760 }, { "epoch": 0.11, "learning_rate": 4.999531606403018e-06, "logits/chosen": -2.8978888988494873, "logits/rejected": -2.8125948905944824, "logps/chosen": -91.75733947753906, "logps/rejected": -820.8654174804688, "loss": 0.0214, "rewards/accuracies": 1.0, "rewards/chosen": -0.2465338408946991, "rewards/margins": 7.528445243835449, "rewards/rejected": -7.774979591369629, "step": 1770 }, { "epoch": 0.11, "learning_rate": 4.999425465082043e-06, "logits/chosen": -2.893117904663086, "logits/rejected": -2.79646897315979, "logps/chosen": -111.27781677246094, "logps/rejected": -860.5802612304688, "loss": 0.0253, "rewards/accuracies": 1.0, "rewards/chosen": -0.3445555567741394, "rewards/margins": 7.822633266448975, "rewards/rejected": -8.16718864440918, "step": 1780 }, { "epoch": 0.11, "learning_rate": 4.99930849475406e-06, "logits/chosen": -2.9206490516662598, "logits/rejected": -2.8516833782196045, "logps/chosen": -78.2650146484375, "logps/rejected": -941.5294799804688, "loss": 0.0161, "rewards/accuracies": 1.0, "rewards/chosen": -0.08792293071746826, "rewards/margins": 8.884671211242676, "rewards/rejected": -8.972593307495117, "step": 1790 }, { "epoch": 0.11, "learning_rate": 4.999180695925856e-06, "logits/chosen": -2.953122138977051, "logits/rejected": -2.8829965591430664, "logps/chosen": -85.0573501586914, "logps/rejected": -814.1541137695312, "loss": 0.0221, "rewards/accuracies": 1.0, "rewards/chosen": -0.16442376375198364, "rewards/margins": 7.528973579406738, "rewards/rejected": -7.693397521972656, "step": 1800 }, { "epoch": 0.11, "learning_rate": 4.999042069151129e-06, "logits/chosen": -2.916384220123291, "logits/rejected": -2.8653922080993652, "logps/chosen": -69.55900573730469, "logps/rejected": -865.80322265625, "loss": 0.0266, "rewards/accuracies": 1.0, "rewards/chosen": -0.04656394198536873, "rewards/margins": 8.165156364440918, "rewards/rejected": -8.21172046661377, "step": 1810 }, { "epoch": 0.11, "learning_rate": 4.998892615030496e-06, "logits/chosen": -2.9309935569763184, "logits/rejected": -2.868135929107666, "logps/chosen": -79.2657241821289, "logps/rejected": -772.4255981445312, "loss": 0.0226, "rewards/accuracies": 1.0, "rewards/chosen": -0.10171637684106827, "rewards/margins": 7.189272403717041, "rewards/rejected": -7.290989875793457, "step": 1820 }, { "epoch": 0.11, "learning_rate": 4.99873233421148e-06, "logits/chosen": -2.9083075523376465, "logits/rejected": -2.7826335430145264, "logps/chosen": -97.53350830078125, "logps/rejected": -898.1552734375, "loss": 0.015, "rewards/accuracies": 1.0, "rewards/chosen": -0.2526090145111084, "rewards/margins": 8.289029121398926, "rewards/rejected": -8.541638374328613, "step": 1830 }, { "epoch": 0.11, "learning_rate": 4.9985612273885145e-06, "logits/chosen": -2.937445640563965, "logits/rejected": -2.850409984588623, "logps/chosen": -88.12530517578125, "logps/rejected": -842.4501953125, "loss": 0.0176, "rewards/accuracies": 1.0, "rewards/chosen": -0.15504731237888336, "rewards/margins": 7.8274126052856445, "rewards/rejected": -7.98245906829834, "step": 1840 }, { "epoch": 0.11, "learning_rate": 4.998379295302936e-06, "logits/chosen": -2.898979663848877, "logits/rejected": -2.8027191162109375, "logps/chosen": -75.83100891113281, "logps/rejected": -893.0339965820312, "loss": 0.0201, "rewards/accuracies": 1.0, "rewards/chosen": -0.09177707880735397, "rewards/margins": 8.395233154296875, "rewards/rejected": -8.48701000213623, "step": 1850 }, { "epoch": 0.11, "learning_rate": 4.9981865387429825e-06, "logits/chosen": -2.9434962272644043, "logits/rejected": -2.8376216888427734, "logps/chosen": -94.00227355957031, "logps/rejected": -738.0641479492188, "loss": 0.0366, "rewards/accuracies": 1.0, "rewards/chosen": -0.24040238559246063, "rewards/margins": 6.678671360015869, "rewards/rejected": -6.91907262802124, "step": 1860 }, { "epoch": 0.11, "learning_rate": 4.997982958543792e-06, "logits/chosen": -2.910454750061035, "logits/rejected": -2.7979977130889893, "logps/chosen": -90.28697204589844, "logps/rejected": -1013.6444091796875, "loss": 0.044, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.17328914999961853, "rewards/margins": 9.517983436584473, "rewards/rejected": -9.691271781921387, "step": 1870 }, { "epoch": 0.11, "learning_rate": 4.9977685555873955e-06, "logits/chosen": -2.94758939743042, "logits/rejected": -2.834099054336548, "logps/chosen": -104.4254379272461, "logps/rejected": -839.0380859375, "loss": 0.0295, "rewards/accuracies": 1.0, "rewards/chosen": -0.3154955804347992, "rewards/margins": 7.631292819976807, "rewards/rejected": -7.946788787841797, "step": 1880 }, { "epoch": 0.11, "learning_rate": 4.997543330802716e-06, "logits/chosen": -2.9423186779022217, "logits/rejected": -2.8269755840301514, "logps/chosen": -143.88519287109375, "logps/rejected": -954.8709106445312, "loss": 0.019, "rewards/accuracies": 1.0, "rewards/chosen": -0.7201865911483765, "rewards/margins": 8.379110336303711, "rewards/rejected": -9.099296569824219, "step": 1890 }, { "epoch": 0.11, "learning_rate": 4.997307285165559e-06, "logits/chosen": -2.900526285171509, "logits/rejected": -2.7858831882476807, "logps/chosen": -165.40724182128906, "logps/rejected": -843.07958984375, "loss": 0.027, "rewards/accuracies": 1.0, "rewards/chosen": -0.9333111047744751, "rewards/margins": 7.0505547523498535, "rewards/rejected": -7.983866214752197, "step": 1900 }, { "epoch": 0.11, "learning_rate": 4.997060419698618e-06, "logits/chosen": -2.90938663482666, "logits/rejected": -2.806380033493042, "logps/chosen": -125.01261138916016, "logps/rejected": -848.7537841796875, "loss": 0.0204, "rewards/accuracies": 1.0, "rewards/chosen": -0.5573270916938782, "rewards/margins": 7.487240791320801, "rewards/rejected": -8.044568061828613, "step": 1910 }, { "epoch": 0.11, "learning_rate": 4.996802735471461e-06, "logits/chosen": -2.92683744430542, "logits/rejected": -2.8211593627929688, "logps/chosen": -86.11729431152344, "logps/rejected": -835.4078369140625, "loss": 0.0214, "rewards/accuracies": 1.0, "rewards/chosen": -0.16640424728393555, "rewards/margins": 7.743669033050537, "rewards/rejected": -7.910073757171631, "step": 1920 }, { "epoch": 0.12, "learning_rate": 4.996534233600531e-06, "logits/chosen": -2.905492067337036, "logits/rejected": -2.7942118644714355, "logps/chosen": -81.23568725585938, "logps/rejected": -751.8267822265625, "loss": 0.0303, "rewards/accuracies": 1.0, "rewards/chosen": -0.09233834594488144, "rewards/margins": 6.9861955642700195, "rewards/rejected": -7.078534126281738, "step": 1930 }, { "epoch": 0.12, "learning_rate": 4.996254915249138e-06, "logits/chosen": -2.937582015991211, "logits/rejected": -2.819331407546997, "logps/chosen": -108.2693862915039, "logps/rejected": -898.1185302734375, "loss": 0.0163, "rewards/accuracies": 1.0, "rewards/chosen": -0.40084370970726013, "rewards/margins": 8.131311416625977, "rewards/rejected": -8.53215503692627, "step": 1940 }, { "epoch": 0.12, "learning_rate": 4.995964781627457e-06, "logits/chosen": -2.937994956970215, "logits/rejected": -2.84669828414917, "logps/chosen": -94.46736907958984, "logps/rejected": -935.9000854492188, "loss": 0.0204, "rewards/accuracies": 1.0, "rewards/chosen": -0.2815585732460022, "rewards/margins": 8.637256622314453, "rewards/rejected": -8.918814659118652, "step": 1950 }, { "epoch": 0.12, "learning_rate": 4.99566383399252e-06, "logits/chosen": -2.9156644344329834, "logits/rejected": -2.817598819732666, "logps/chosen": -91.17527770996094, "logps/rejected": -878.8995361328125, "loss": 0.0178, "rewards/accuracies": 1.0, "rewards/chosen": -0.198879212141037, "rewards/margins": 8.136736869812012, "rewards/rejected": -8.335616111755371, "step": 1960 }, { "epoch": 0.12, "learning_rate": 4.995352073648213e-06, "logits/chosen": -2.9092535972595215, "logits/rejected": -2.8243935108184814, "logps/chosen": -107.5519027709961, "logps/rejected": -924.94189453125, "loss": 0.0158, "rewards/accuracies": 1.0, "rewards/chosen": -0.3393505811691284, "rewards/margins": 8.469963073730469, "rewards/rejected": -8.80931282043457, "step": 1970 }, { "epoch": 0.12, "learning_rate": 4.9950295019452665e-06, "logits/chosen": -2.92537260055542, "logits/rejected": -2.8429553508758545, "logps/chosen": -77.56587219238281, "logps/rejected": -926.5347900390625, "loss": 0.0214, "rewards/accuracies": 1.0, "rewards/chosen": -0.09293019771575928, "rewards/margins": 8.726125717163086, "rewards/rejected": -8.819055557250977, "step": 1980 }, { "epoch": 0.12, "learning_rate": 4.9946961202812566e-06, "logits/chosen": -2.9121599197387695, "logits/rejected": -2.8202285766601562, "logps/chosen": -101.99006652832031, "logps/rejected": -943.1390380859375, "loss": 0.025, "rewards/accuracies": 1.0, "rewards/chosen": -0.2530006468296051, "rewards/margins": 8.725936889648438, "rewards/rejected": -8.978937149047852, "step": 1990 }, { "epoch": 0.12, "learning_rate": 4.99435193010059e-06, "logits/chosen": -2.902696371078491, "logits/rejected": -2.8028347492218018, "logps/chosen": -80.13379669189453, "logps/rejected": -770.0050048828125, "loss": 0.0287, "rewards/accuracies": 1.0, "rewards/chosen": -0.1043882742524147, "rewards/margins": 7.15741491317749, "rewards/rejected": -7.26180362701416, "step": 2000 }, { "epoch": 0.12, "learning_rate": 4.993996932894507e-06, "logits/chosen": -2.9226884841918945, "logits/rejected": -2.803157091140747, "logps/chosen": -73.43228912353516, "logps/rejected": -900.3966064453125, "loss": 0.0178, "rewards/accuracies": 1.0, "rewards/chosen": -0.007940527983009815, "rewards/margins": 8.555150032043457, "rewards/rejected": -8.563089370727539, "step": 2010 }, { "epoch": 0.12, "learning_rate": 4.993631130201066e-06, "logits/chosen": -2.9101755619049072, "logits/rejected": -2.819037914276123, "logps/chosen": -75.83551025390625, "logps/rejected": -907.328125, "loss": 0.038, "rewards/accuracies": 1.0, "rewards/chosen": -0.052049994468688965, "rewards/margins": 8.569490432739258, "rewards/rejected": -8.621540069580078, "step": 2020 }, { "epoch": 0.12, "learning_rate": 4.993254523605144e-06, "logits/chosen": -2.9077773094177246, "logits/rejected": -2.8096206188201904, "logps/chosen": -71.08650970458984, "logps/rejected": -894.9310302734375, "loss": 0.021, "rewards/accuracies": 1.0, "rewards/chosen": -0.03352269157767296, "rewards/margins": 8.482320785522461, "rewards/rejected": -8.515843391418457, "step": 2030 }, { "epoch": 0.12, "learning_rate": 4.9928671147384255e-06, "logits/chosen": -2.8848633766174316, "logits/rejected": -2.776020050048828, "logps/chosen": -70.45372009277344, "logps/rejected": -960.6707763671875, "loss": 0.0117, "rewards/accuracies": 1.0, "rewards/chosen": -0.011269062757492065, "rewards/margins": 9.143277168273926, "rewards/rejected": -9.154546737670898, "step": 2040 }, { "epoch": 0.12, "learning_rate": 4.992468905279398e-06, "logits/chosen": -2.867745876312256, "logits/rejected": -2.7999446392059326, "logps/chosen": -75.90371704101562, "logps/rejected": -925.2644653320312, "loss": 0.0301, "rewards/accuracies": 1.0, "rewards/chosen": -0.05030970647931099, "rewards/margins": 8.771891593933105, "rewards/rejected": -8.822200775146484, "step": 2050 }, { "epoch": 0.12, "learning_rate": 4.992059896953343e-06, "logits/chosen": -2.921201229095459, "logits/rejected": -2.8354499340057373, "logps/chosen": -77.17427062988281, "logps/rejected": -842.29296875, "loss": 0.0351, "rewards/accuracies": 1.0, "rewards/chosen": -0.09570114314556122, "rewards/margins": 7.881175994873047, "rewards/rejected": -7.9768781661987305, "step": 2060 }, { "epoch": 0.12, "learning_rate": 4.99164009153233e-06, "logits/chosen": -2.9181408882141113, "logits/rejected": -2.828521966934204, "logps/chosen": -105.7486801147461, "logps/rejected": -875.3050537109375, "loss": 0.0195, "rewards/accuracies": 1.0, "rewards/chosen": -0.35077470541000366, "rewards/margins": 7.954843997955322, "rewards/rejected": -8.305618286132812, "step": 2070 }, { "epoch": 0.12, "learning_rate": 4.991209490835207e-06, "logits/chosen": -2.9404006004333496, "logits/rejected": -2.83046817779541, "logps/chosen": -126.86375427246094, "logps/rejected": -844.8680419921875, "loss": 0.0187, "rewards/accuracies": 1.0, "rewards/chosen": -0.5857782959938049, "rewards/margins": 7.417706489562988, "rewards/rejected": -8.003484725952148, "step": 2080 }, { "epoch": 0.12, "learning_rate": 4.990768096727594e-06, "logits/chosen": -2.9306182861328125, "logits/rejected": -2.814241409301758, "logps/chosen": -103.2199478149414, "logps/rejected": -901.07421875, "loss": 0.0213, "rewards/accuracies": 1.0, "rewards/chosen": -0.2994753420352936, "rewards/margins": 8.261815071105957, "rewards/rejected": -8.561290740966797, "step": 2090 }, { "epoch": 0.13, "learning_rate": 4.990315911121874e-06, "logits/chosen": -2.9294815063476562, "logits/rejected": -2.8447818756103516, "logps/chosen": -73.66078186035156, "logps/rejected": -934.5150146484375, "loss": 0.0246, "rewards/accuracies": 1.0, "rewards/chosen": -0.07724453508853912, "rewards/margins": 8.815179824829102, "rewards/rejected": -8.892423629760742, "step": 2100 }, { "epoch": 0.13, "learning_rate": 4.989852935977187e-06, "logits/chosen": -2.903233766555786, "logits/rejected": -2.8111109733581543, "logps/chosen": -71.41477966308594, "logps/rejected": -884.0208740234375, "loss": 0.0334, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.02149464190006256, "rewards/margins": 8.372645378112793, "rewards/rejected": -8.394139289855957, "step": 2110 }, { "epoch": 0.13, "learning_rate": 4.989379173299416e-06, "logits/chosen": -2.946033477783203, "logits/rejected": -2.8650143146514893, "logps/chosen": -65.48561096191406, "logps/rejected": -891.6853637695312, "loss": 0.024, "rewards/accuracies": 1.0, "rewards/chosen": 0.0060962652787566185, "rewards/margins": 8.4716215133667, "rewards/rejected": -8.465524673461914, "step": 2120 }, { "epoch": 0.13, "learning_rate": 4.988894625141186e-06, "logits/chosen": -2.9310858249664307, "logits/rejected": -2.846900701522827, "logps/chosen": -96.44914245605469, "logps/rejected": -933.12548828125, "loss": 0.0276, "rewards/accuracies": 1.0, "rewards/chosen": -0.24209725856781006, "rewards/margins": 8.64737319946289, "rewards/rejected": -8.889471054077148, "step": 2130 }, { "epoch": 0.13, "learning_rate": 4.98839929360185e-06, "logits/chosen": -2.91212797164917, "logits/rejected": -2.789159059524536, "logps/chosen": -123.30867004394531, "logps/rejected": -930.87060546875, "loss": 0.0256, "rewards/accuracies": 1.0, "rewards/chosen": -0.49023550748825073, "rewards/margins": 8.366610527038574, "rewards/rejected": -8.856844902038574, "step": 2140 }, { "epoch": 0.13, "learning_rate": 4.9878931808274796e-06, "logits/chosen": -2.914276599884033, "logits/rejected": -2.8237998485565186, "logps/chosen": -82.78339385986328, "logps/rejected": -874.6910400390625, "loss": 0.0266, "rewards/accuracies": 1.0, "rewards/chosen": -0.2022639811038971, "rewards/margins": 8.109360694885254, "rewards/rejected": -8.311624526977539, "step": 2150 }, { "epoch": 0.13, "learning_rate": 4.9873762890108596e-06, "logits/chosen": -2.940214157104492, "logits/rejected": -2.8720109462738037, "logps/chosen": -75.28640747070312, "logps/rejected": -923.8096923828125, "loss": 0.0228, "rewards/accuracies": 1.0, "rewards/chosen": -0.03417595475912094, "rewards/margins": 8.761570930480957, "rewards/rejected": -8.795747756958008, "step": 2160 }, { "epoch": 0.13, "learning_rate": 4.986848620391473e-06, "logits/chosen": -2.900097608566284, "logits/rejected": -2.8351101875305176, "logps/chosen": -127.07694244384766, "logps/rejected": -902.7859497070312, "loss": 0.0254, "rewards/accuracies": 1.0, "rewards/chosen": -0.5734145045280457, "rewards/margins": 8.012839317321777, "rewards/rejected": -8.586254119873047, "step": 2170 }, { "epoch": 0.13, "learning_rate": 4.986310177255498e-06, "logits/chosen": -2.905867099761963, "logits/rejected": -2.8195083141326904, "logps/chosen": -134.7004852294922, "logps/rejected": -868.62939453125, "loss": 0.0614, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.6421254277229309, "rewards/margins": 7.5909247398376465, "rewards/rejected": -8.233050346374512, "step": 2180 }, { "epoch": 0.13, "learning_rate": 4.985760961935791e-06, "logits/chosen": -2.9291980266571045, "logits/rejected": -2.864793539047241, "logps/chosen": -71.02049255371094, "logps/rejected": -910.2662963867188, "loss": 0.0202, "rewards/accuracies": 1.0, "rewards/chosen": 0.005708605982363224, "rewards/margins": 8.673110008239746, "rewards/rejected": -8.667402267456055, "step": 2190 }, { "epoch": 0.13, "learning_rate": 4.985200976811882e-06, "logits/chosen": -2.917705774307251, "logits/rejected": -2.8427376747131348, "logps/chosen": -63.59644317626953, "logps/rejected": -901.7972412109375, "loss": 0.0196, "rewards/accuracies": 1.0, "rewards/chosen": 0.050329696387052536, "rewards/margins": 8.622421264648438, "rewards/rejected": -8.572092056274414, "step": 2200 }, { "epoch": 0.13, "learning_rate": 4.9846302243099624e-06, "logits/chosen": -2.913543224334717, "logits/rejected": -2.846938371658325, "logps/chosen": -79.2912826538086, "logps/rejected": -919.1541748046875, "loss": 0.0202, "rewards/accuracies": 1.0, "rewards/chosen": -0.08877657353878021, "rewards/margins": 8.645597457885742, "rewards/rejected": -8.734375, "step": 2210 }, { "epoch": 0.13, "learning_rate": 4.984048706902872e-06, "logits/chosen": -2.9400038719177246, "logits/rejected": -2.815504789352417, "logps/chosen": -88.97303771972656, "logps/rejected": -924.6090698242188, "loss": 0.0329, "rewards/accuracies": 1.0, "rewards/chosen": -0.2140566110610962, "rewards/margins": 8.580434799194336, "rewards/rejected": -8.794490814208984, "step": 2220 }, { "epoch": 0.13, "learning_rate": 4.9834564271100925e-06, "logits/chosen": -2.940690755844116, "logits/rejected": -2.86393666267395, "logps/chosen": -78.72700500488281, "logps/rejected": -940.3984375, "loss": 0.0254, "rewards/accuracies": 1.0, "rewards/chosen": -0.11535857617855072, "rewards/margins": 8.834736824035645, "rewards/rejected": -8.950096130371094, "step": 2230 }, { "epoch": 0.13, "learning_rate": 4.982853387497737e-06, "logits/chosen": -2.916646957397461, "logits/rejected": -2.8344645500183105, "logps/chosen": -83.87245178222656, "logps/rejected": -892.7063598632812, "loss": 0.0237, "rewards/accuracies": 1.0, "rewards/chosen": -0.13665243983268738, "rewards/margins": 8.350369453430176, "rewards/rejected": -8.487020492553711, "step": 2240 }, { "epoch": 0.13, "learning_rate": 4.98223959067853e-06, "logits/chosen": -2.8973255157470703, "logits/rejected": -2.8041720390319824, "logps/chosen": -108.44938659667969, "logps/rejected": -1021.1028442382812, "loss": 0.0136, "rewards/accuracies": 1.0, "rewards/chosen": -0.3960796594619751, "rewards/margins": 9.363378524780273, "rewards/rejected": -9.759458541870117, "step": 2250 }, { "epoch": 0.13, "learning_rate": 4.9816150393118105e-06, "logits/chosen": -2.9236443042755127, "logits/rejected": -2.8413333892822266, "logps/chosen": -95.8695297241211, "logps/rejected": -860.6937255859375, "loss": 0.0256, "rewards/accuracies": 1.0, "rewards/chosen": -0.2797923982143402, "rewards/margins": 7.885159969329834, "rewards/rejected": -8.164952278137207, "step": 2260 }, { "epoch": 0.14, "learning_rate": 4.980979736103506e-06, "logits/chosen": -2.913989543914795, "logits/rejected": -2.8314120769500732, "logps/chosen": -82.67513275146484, "logps/rejected": -938.8826293945312, "loss": 0.0241, "rewards/accuracies": 1.0, "rewards/chosen": -0.10101115703582764, "rewards/margins": 8.846435546875, "rewards/rejected": -8.947446823120117, "step": 2270 }, { "epoch": 0.14, "learning_rate": 4.980333683806132e-06, "logits/chosen": -2.941417932510376, "logits/rejected": -2.81229829788208, "logps/chosen": -83.7132568359375, "logps/rejected": -839.8123168945312, "loss": 0.0191, "rewards/accuracies": 1.0, "rewards/chosen": -0.10666797310113907, "rewards/margins": 7.8471856117248535, "rewards/rejected": -7.953853607177734, "step": 2280 }, { "epoch": 0.14, "learning_rate": 4.979676885218772e-06, "logits/chosen": -2.8838696479797363, "logits/rejected": -2.7944722175598145, "logps/chosen": -75.11915588378906, "logps/rejected": -887.5614013671875, "loss": 0.0399, "rewards/accuracies": 1.0, "rewards/chosen": -0.0021694537717849016, "rewards/margins": 8.430620193481445, "rewards/rejected": -8.432788848876953, "step": 2290 }, { "epoch": 0.14, "learning_rate": 4.979009343187073e-06, "logits/chosen": -2.9060213565826416, "logits/rejected": -2.8375351428985596, "logps/chosen": -71.66847229003906, "logps/rejected": -880.8853759765625, "loss": 0.025, "rewards/accuracies": 1.0, "rewards/chosen": -0.01039136666804552, "rewards/margins": 8.36555290222168, "rewards/rejected": -8.375945091247559, "step": 2300 }, { "epoch": 0.14, "learning_rate": 4.9783310606032245e-06, "logits/chosen": -2.9355666637420654, "logits/rejected": -2.856356143951416, "logps/chosen": -70.37708282470703, "logps/rejected": -908.4384765625, "loss": 0.0179, "rewards/accuracies": 1.0, "rewards/chosen": -0.026617299765348434, "rewards/margins": 8.613717079162598, "rewards/rejected": -8.640335083007812, "step": 2310 }, { "epoch": 0.14, "learning_rate": 4.977642040405954e-06, "logits/chosen": -2.8900671005249023, "logits/rejected": -2.8329005241394043, "logps/chosen": -61.27196502685547, "logps/rejected": -914.7413330078125, "loss": 0.0212, "rewards/accuracies": 1.0, "rewards/chosen": 0.07489734143018723, "rewards/margins": 8.778996467590332, "rewards/rejected": -8.70409870147705, "step": 2320 }, { "epoch": 0.14, "learning_rate": 4.976942285580507e-06, "logits/chosen": -2.9066126346588135, "logits/rejected": -2.8480751514434814, "logps/chosen": -77.35345458984375, "logps/rejected": -772.4773559570312, "loss": 0.0254, "rewards/accuracies": 1.0, "rewards/chosen": -0.07964875549077988, "rewards/margins": 7.202715873718262, "rewards/rejected": -7.282364845275879, "step": 2330 }, { "epoch": 0.14, "learning_rate": 4.976231799158643e-06, "logits/chosen": -2.9451980590820312, "logits/rejected": -2.8087496757507324, "logps/chosen": -102.09986114501953, "logps/rejected": -941.6900634765625, "loss": 0.0234, "rewards/accuracies": 1.0, "rewards/chosen": -0.34645453095436096, "rewards/margins": 8.632307052612305, "rewards/rejected": -8.978763580322266, "step": 2340 }, { "epoch": 0.14, "learning_rate": 4.975510584218614e-06, "logits/chosen": -2.9150779247283936, "logits/rejected": -2.8100497722625732, "logps/chosen": -127.3231430053711, "logps/rejected": -917.0535888671875, "loss": 0.0271, "rewards/accuracies": 1.0, "rewards/chosen": -0.602421224117279, "rewards/margins": 8.135290145874023, "rewards/rejected": -8.737710952758789, "step": 2350 }, { "epoch": 0.14, "learning_rate": 4.974778643885153e-06, "logits/chosen": -2.89973783493042, "logits/rejected": -2.818477153778076, "logps/chosen": -113.25349426269531, "logps/rejected": -923.7726440429688, "loss": 0.0185, "rewards/accuracies": 1.0, "rewards/chosen": -0.4272824823856354, "rewards/margins": 8.35981559753418, "rewards/rejected": -8.787099838256836, "step": 2360 }, { "epoch": 0.14, "learning_rate": 4.974035981329465e-06, "logits/chosen": -2.9361205101013184, "logits/rejected": -2.8323092460632324, "logps/chosen": -103.0582275390625, "logps/rejected": -849.3351440429688, "loss": 0.0491, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.29984036087989807, "rewards/margins": 7.7639617919921875, "rewards/rejected": -8.063802719116211, "step": 2370 }, { "epoch": 0.14, "learning_rate": 4.973282599769207e-06, "logits/chosen": -2.898646354675293, "logits/rejected": -2.789358615875244, "logps/chosen": -100.55979919433594, "logps/rejected": -942.27734375, "loss": 0.0148, "rewards/accuracies": 1.0, "rewards/chosen": -0.33725738525390625, "rewards/margins": 8.635374069213867, "rewards/rejected": -8.972631454467773, "step": 2380 }, { "epoch": 0.14, "learning_rate": 4.972518502468482e-06, "logits/chosen": -2.89788556098938, "logits/rejected": -2.824398994445801, "logps/chosen": -119.30379486083984, "logps/rejected": -849.9752197265625, "loss": 0.0225, "rewards/accuracies": 1.0, "rewards/chosen": -0.519378662109375, "rewards/margins": 7.53745174407959, "rewards/rejected": -8.056829452514648, "step": 2390 }, { "epoch": 0.14, "learning_rate": 4.971743692737814e-06, "logits/chosen": -2.9084324836730957, "logits/rejected": -2.786465883255005, "logps/chosen": -135.71572875976562, "logps/rejected": -896.41455078125, "loss": 0.0207, "rewards/accuracies": 1.0, "rewards/chosen": -0.6771860122680664, "rewards/margins": 7.848405361175537, "rewards/rejected": -8.525590896606445, "step": 2400 }, { "epoch": 0.14, "learning_rate": 4.970958173934144e-06, "logits/chosen": -2.9394097328186035, "logits/rejected": -2.847304582595825, "logps/chosen": -116.1565933227539, "logps/rejected": -964.9874267578125, "loss": 0.0097, "rewards/accuracies": 1.0, "rewards/chosen": -0.42276591062545776, "rewards/margins": 8.783390045166016, "rewards/rejected": -9.206155776977539, "step": 2410 }, { "epoch": 0.14, "learning_rate": 4.970161949460808e-06, "logits/chosen": -2.910912036895752, "logits/rejected": -2.8202598094940186, "logps/chosen": -129.80703735351562, "logps/rejected": -878.2278442382812, "loss": 0.0205, "rewards/accuracies": 1.0, "rewards/chosen": -0.5632044076919556, "rewards/margins": 7.766985893249512, "rewards/rejected": -8.33018970489502, "step": 2420 }, { "epoch": 0.14, "learning_rate": 4.969355022767529e-06, "logits/chosen": -2.9213080406188965, "logits/rejected": -2.821371555328369, "logps/chosen": -147.2045440673828, "logps/rejected": -948.5740356445312, "loss": 0.0188, "rewards/accuracies": 1.0, "rewards/chosen": -0.7376371622085571, "rewards/margins": 8.297497749328613, "rewards/rejected": -9.035135269165039, "step": 2430 }, { "epoch": 0.15, "learning_rate": 4.968537397350395e-06, "logits/chosen": -2.93369460105896, "logits/rejected": -2.8000893592834473, "logps/chosen": -118.7147216796875, "logps/rejected": -1059.5216064453125, "loss": 0.0186, "rewards/accuracies": 1.0, "rewards/chosen": -0.4695959687232971, "rewards/margins": 9.663223266601562, "rewards/rejected": -10.132821083068848, "step": 2440 }, { "epoch": 0.15, "learning_rate": 4.967709076751848e-06, "logits/chosen": -2.9022932052612305, "logits/rejected": -2.7864482402801514, "logps/chosen": -106.00236511230469, "logps/rejected": -884.3004150390625, "loss": 0.0258, "rewards/accuracies": 1.0, "rewards/chosen": -0.33720219135284424, "rewards/margins": 8.058537483215332, "rewards/rejected": -8.395739555358887, "step": 2450 }, { "epoch": 0.15, "learning_rate": 4.96687006456067e-06, "logits/chosen": -2.9117045402526855, "logits/rejected": -2.826190948486328, "logps/chosen": -85.51109313964844, "logps/rejected": -891.7425537109375, "loss": 0.0134, "rewards/accuracies": 1.0, "rewards/chosen": -0.12658901512622833, "rewards/margins": 8.349087715148926, "rewards/rejected": -8.475676536560059, "step": 2460 }, { "epoch": 0.15, "learning_rate": 4.966020364411964e-06, "logits/chosen": -2.931962013244629, "logits/rejected": -2.825965404510498, "logps/chosen": -99.91218566894531, "logps/rejected": -877.6570434570312, "loss": 0.0288, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.24606366455554962, "rewards/margins": 8.083056449890137, "rewards/rejected": -8.329119682312012, "step": 2470 }, { "epoch": 0.15, "learning_rate": 4.965159979987139e-06, "logits/chosen": -2.9141736030578613, "logits/rejected": -2.8199572563171387, "logps/chosen": -110.23834228515625, "logps/rejected": -953.1521606445312, "loss": 0.0209, "rewards/accuracies": 1.0, "rewards/chosen": -0.3438529968261719, "rewards/margins": 8.732954025268555, "rewards/rejected": -9.076807975769043, "step": 2480 }, { "epoch": 0.15, "learning_rate": 4.964288915013895e-06, "logits/chosen": -2.9574790000915527, "logits/rejected": -2.8234541416168213, "logps/chosen": -80.96260070800781, "logps/rejected": -978.5983276367188, "loss": 0.0186, "rewards/accuracies": 1.0, "rewards/chosen": -0.10227058082818985, "rewards/margins": 9.238229751586914, "rewards/rejected": -9.340500831604004, "step": 2490 }, { "epoch": 0.15, "learning_rate": 4.963407173266208e-06, "logits/chosen": -2.9136593341827393, "logits/rejected": -2.807762861251831, "logps/chosen": -77.60491943359375, "logps/rejected": -871.3599853515625, "loss": 0.0291, "rewards/accuracies": 1.0, "rewards/chosen": -0.09069880098104477, "rewards/margins": 8.173863410949707, "rewards/rejected": -8.264561653137207, "step": 2500 }, { "epoch": 0.15, "learning_rate": 4.962514758564309e-06, "logits/chosen": -2.8861048221588135, "logits/rejected": -2.8033528327941895, "logps/chosen": -61.968505859375, "logps/rejected": -1041.1016845703125, "loss": 0.0159, "rewards/accuracies": 1.0, "rewards/chosen": 0.05064171552658081, "rewards/margins": 10.013525009155273, "rewards/rejected": -9.962882041931152, "step": 2510 }, { "epoch": 0.15, "learning_rate": 4.961611674774674e-06, "logits/chosen": -2.926732301712036, "logits/rejected": -2.827977418899536, "logps/chosen": -71.05899047851562, "logps/rejected": -848.5643310546875, "loss": 0.0188, "rewards/accuracies": 1.0, "rewards/chosen": -0.04070550575852394, "rewards/margins": 8.001574516296387, "rewards/rejected": -8.042280197143555, "step": 2520 }, { "epoch": 0.15, "learning_rate": 4.960697925810003e-06, "logits/chosen": -2.9085631370544434, "logits/rejected": -2.8304319381713867, "logps/chosen": -92.6507797241211, "logps/rejected": -1018.9605712890625, "loss": 0.0182, "rewards/accuracies": 1.0, "rewards/chosen": -0.21855959296226501, "rewards/margins": 9.519502639770508, "rewards/rejected": -9.738062858581543, "step": 2530 }, { "epoch": 0.15, "learning_rate": 4.9597735156292024e-06, "logits/chosen": -2.9259884357452393, "logits/rejected": -2.8063361644744873, "logps/chosen": -86.19991302490234, "logps/rejected": -892.0320434570312, "loss": 0.0308, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.11056070029735565, "rewards/margins": 8.37292766571045, "rewards/rejected": -8.483488082885742, "step": 2540 }, { "epoch": 0.15, "learning_rate": 4.9588384482373695e-06, "logits/chosen": -2.884809970855713, "logits/rejected": -2.8330957889556885, "logps/chosen": -98.22537231445312, "logps/rejected": -938.7869873046875, "loss": 0.0597, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.2814822196960449, "rewards/margins": 8.667036056518555, "rewards/rejected": -8.948518753051758, "step": 2550 }, { "epoch": 0.15, "learning_rate": 4.957892727685778e-06, "logits/chosen": -2.9501452445983887, "logits/rejected": -2.847529649734497, "logps/chosen": -107.39640808105469, "logps/rejected": -986.8916015625, "loss": 0.029, "rewards/accuracies": 1.0, "rewards/chosen": -0.34068137407302856, "rewards/margins": 9.089051246643066, "rewards/rejected": -9.429732322692871, "step": 2560 }, { "epoch": 0.15, "learning_rate": 4.956936358071853e-06, "logits/chosen": -2.9261088371276855, "logits/rejected": -2.828829765319824, "logps/chosen": -119.02828216552734, "logps/rejected": -933.5569458007812, "loss": 0.0222, "rewards/accuracies": 1.0, "rewards/chosen": -0.4049844741821289, "rewards/margins": 8.486495018005371, "rewards/rejected": -8.8914794921875, "step": 2570 }, { "epoch": 0.15, "learning_rate": 4.955969343539162e-06, "logits/chosen": -2.906745433807373, "logits/rejected": -2.770998001098633, "logps/chosen": -119.28253173828125, "logps/rejected": -986.7303466796875, "loss": 0.0198, "rewards/accuracies": 1.0, "rewards/chosen": -0.4944824278354645, "rewards/margins": 8.923219680786133, "rewards/rejected": -9.417702674865723, "step": 2580 }, { "epoch": 0.15, "learning_rate": 4.954991688277391e-06, "logits/chosen": -2.867947816848755, "logits/rejected": -2.789060115814209, "logps/chosen": -156.2045135498047, "logps/rejected": -965.4269409179688, "loss": 0.0209, "rewards/accuracies": 1.0, "rewards/chosen": -0.8836523294448853, "rewards/margins": 8.333108901977539, "rewards/rejected": -9.216760635375977, "step": 2590 }, { "epoch": 0.16, "learning_rate": 4.954003396522325e-06, "logits/chosen": -2.94377064704895, "logits/rejected": -2.8526902198791504, "logps/chosen": -158.0526885986328, "logps/rejected": -1045.19482421875, "loss": 0.0261, "rewards/accuracies": 1.0, "rewards/chosen": -0.9019582867622375, "rewards/margins": 9.109042167663574, "rewards/rejected": -10.011000633239746, "step": 2600 }, { "epoch": 0.16, "learning_rate": 4.953004472555838e-06, "logits/chosen": -2.913330316543579, "logits/rejected": -2.7569892406463623, "logps/chosen": -148.7244415283203, "logps/rejected": -857.0618896484375, "loss": 0.0259, "rewards/accuracies": 1.0, "rewards/chosen": -0.7785552144050598, "rewards/margins": 7.3511457443237305, "rewards/rejected": -8.129701614379883, "step": 2610 }, { "epoch": 0.16, "learning_rate": 4.951994920705865e-06, "logits/chosen": -2.9549460411071777, "logits/rejected": -2.8786098957061768, "logps/chosen": -102.9132308959961, "logps/rejected": -871.0667724609375, "loss": 0.0901, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.32913029193878174, "rewards/margins": 7.944624423980713, "rewards/rejected": -8.273754119873047, "step": 2620 }, { "epoch": 0.16, "learning_rate": 4.95097474534639e-06, "logits/chosen": -2.906001567840576, "logits/rejected": -2.8609766960144043, "logps/chosen": -108.26625061035156, "logps/rejected": -946.1947021484375, "loss": 0.0381, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.328972190618515, "rewards/margins": 8.691851615905762, "rewards/rejected": -9.020824432373047, "step": 2630 }, { "epoch": 0.16, "learning_rate": 4.949943950897422e-06, "logits/chosen": -2.9310507774353027, "logits/rejected": -2.8409712314605713, "logps/chosen": -76.93956756591797, "logps/rejected": -872.7999877929688, "loss": 0.0274, "rewards/accuracies": 1.0, "rewards/chosen": -0.0723833218216896, "rewards/margins": 8.212732315063477, "rewards/rejected": -8.285116195678711, "step": 2640 }, { "epoch": 0.16, "learning_rate": 4.94890254182498e-06, "logits/chosen": -2.9161858558654785, "logits/rejected": -2.83249568939209, "logps/chosen": -74.18345642089844, "logps/rejected": -844.9074096679688, "loss": 0.0148, "rewards/accuracies": 1.0, "rewards/chosen": -0.012019271962344646, "rewards/margins": 7.98058557510376, "rewards/rejected": -7.992604732513428, "step": 2650 }, { "epoch": 0.16, "learning_rate": 4.947850522641072e-06, "logits/chosen": -2.9110209941864014, "logits/rejected": -2.8343100547790527, "logps/chosen": -81.40888214111328, "logps/rejected": -758.9796142578125, "loss": 0.0267, "rewards/accuracies": 1.0, "rewards/chosen": -0.1388753354549408, "rewards/margins": 7.00775671005249, "rewards/rejected": -7.146633148193359, "step": 2660 }, { "epoch": 0.16, "learning_rate": 4.946787897903674e-06, "logits/chosen": -2.9253344535827637, "logits/rejected": -2.802432060241699, "logps/chosen": -81.86743927001953, "logps/rejected": -1033.0467529296875, "loss": 0.0869, "rewards/accuracies": 1.0, "rewards/chosen": -0.10440780967473984, "rewards/margins": 9.79059886932373, "rewards/rejected": -9.89500617980957, "step": 2670 }, { "epoch": 0.16, "learning_rate": 4.945714672216713e-06, "logits/chosen": -2.910594940185547, "logits/rejected": -2.8370227813720703, "logps/chosen": -86.73094940185547, "logps/rejected": -1026.197021484375, "loss": 0.0231, "rewards/accuracies": 1.0, "rewards/chosen": -0.15769681334495544, "rewards/margins": 9.66103744506836, "rewards/rejected": -9.818734169006348, "step": 2680 }, { "epoch": 0.16, "learning_rate": 4.944630850230045e-06, "logits/chosen": -2.907191038131714, "logits/rejected": -2.8154234886169434, "logps/chosen": -81.18495178222656, "logps/rejected": -901.1973876953125, "loss": 0.0191, "rewards/accuracies": 1.0, "rewards/chosen": -0.06847550719976425, "rewards/margins": 8.506799697875977, "rewards/rejected": -8.575275421142578, "step": 2690 }, { "epoch": 0.16, "learning_rate": 4.9435364366394334e-06, "logits/chosen": -2.9387497901916504, "logits/rejected": -2.823193311691284, "logps/chosen": -71.61036682128906, "logps/rejected": -825.7556762695312, "loss": 0.0221, "rewards/accuracies": 1.0, "rewards/chosen": -0.0704922154545784, "rewards/margins": 7.746884346008301, "rewards/rejected": -7.817376613616943, "step": 2700 }, { "epoch": 0.16, "learning_rate": 4.942431436186536e-06, "logits/chosen": -2.8955795764923096, "logits/rejected": -2.7983651161193848, "logps/chosen": -69.2351303100586, "logps/rejected": -830.044921875, "loss": 0.0261, "rewards/accuracies": 1.0, "rewards/chosen": -0.015279693529009819, "rewards/margins": 7.844470024108887, "rewards/rejected": -7.859750270843506, "step": 2710 }, { "epoch": 0.16, "learning_rate": 4.941315853658873e-06, "logits/chosen": -2.912855863571167, "logits/rejected": -2.789764881134033, "logps/chosen": -95.71670532226562, "logps/rejected": -865.7041015625, "loss": 0.0357, "rewards/accuracies": 1.0, "rewards/chosen": -0.17794740200042725, "rewards/margins": 8.018354415893555, "rewards/rejected": -8.196301460266113, "step": 2720 }, { "epoch": 0.16, "learning_rate": 4.940189693889819e-06, "logits/chosen": -2.9068188667297363, "logits/rejected": -2.7605984210968018, "logps/chosen": -98.43501281738281, "logps/rejected": -935.4601440429688, "loss": 0.0266, "rewards/accuracies": 1.0, "rewards/chosen": -0.2977240979671478, "rewards/margins": 8.620194435119629, "rewards/rejected": -8.917917251586914, "step": 2730 }, { "epoch": 0.16, "learning_rate": 4.939052961758569e-06, "logits/chosen": -2.9329752922058105, "logits/rejected": -2.8329977989196777, "logps/chosen": -83.51493072509766, "logps/rejected": -970.2652587890625, "loss": 0.0155, "rewards/accuracies": 1.0, "rewards/chosen": -0.14018702507019043, "rewards/margins": 9.116633415222168, "rewards/rejected": -9.256821632385254, "step": 2740 }, { "epoch": 0.16, "learning_rate": 4.937905662190129e-06, "logits/chosen": -2.926013231277466, "logits/rejected": -2.8018393516540527, "logps/chosen": -83.0693359375, "logps/rejected": -947.9988403320312, "loss": 0.0175, "rewards/accuracies": 1.0, "rewards/chosen": -0.1644803136587143, "rewards/margins": 8.875802040100098, "rewards/rejected": -9.040281295776367, "step": 2750 }, { "epoch": 0.16, "learning_rate": 4.936747800155285e-06, "logits/chosen": -2.8951776027679443, "logits/rejected": -2.825871467590332, "logps/chosen": -69.45985412597656, "logps/rejected": -886.7191162109375, "loss": 0.0243, "rewards/accuracies": 1.0, "rewards/chosen": -0.02263670228421688, "rewards/margins": 8.406549453735352, "rewards/rejected": -8.42918586730957, "step": 2760 }, { "epoch": 0.17, "learning_rate": 4.935579380670592e-06, "logits/chosen": -2.9374606609344482, "logits/rejected": -2.8428306579589844, "logps/chosen": -102.8312759399414, "logps/rejected": -990.3069458007812, "loss": 0.0171, "rewards/accuracies": 1.0, "rewards/chosen": -0.28860825300216675, "rewards/margins": 9.165548324584961, "rewards/rejected": -9.454156875610352, "step": 2770 }, { "epoch": 0.17, "learning_rate": 4.934400408798339e-06, "logits/chosen": -2.8919525146484375, "logits/rejected": -2.782038927078247, "logps/chosen": -70.26911926269531, "logps/rejected": -905.73681640625, "loss": 0.0213, "rewards/accuracies": 1.0, "rewards/chosen": 0.0039605796337127686, "rewards/margins": 8.629324913024902, "rewards/rejected": -8.625364303588867, "step": 2780 }, { "epoch": 0.17, "learning_rate": 4.93321088964654e-06, "logits/chosen": -2.9169554710388184, "logits/rejected": -2.800493001937866, "logps/chosen": -84.66515350341797, "logps/rejected": -965.3494262695312, "loss": 0.015, "rewards/accuracies": 1.0, "rewards/chosen": -0.13011623919010162, "rewards/margins": 9.08696174621582, "rewards/rejected": -9.217077255249023, "step": 2790 }, { "epoch": 0.17, "learning_rate": 4.932010828368903e-06, "logits/chosen": -2.9324235916137695, "logits/rejected": -2.8542137145996094, "logps/chosen": -124.497314453125, "logps/rejected": -964.9666748046875, "loss": 0.0302, "rewards/accuracies": 1.0, "rewards/chosen": -0.5493017435073853, "rewards/margins": 8.65716552734375, "rewards/rejected": -9.206467628479004, "step": 2800 }, { "epoch": 0.17, "learning_rate": 4.930800230164812e-06, "logits/chosen": -2.9189116954803467, "logits/rejected": -2.7923741340637207, "logps/chosen": -115.26444244384766, "logps/rejected": -1061.429443359375, "loss": 0.0209, "rewards/accuracies": 1.0, "rewards/chosen": -0.45345592498779297, "rewards/margins": 9.718252182006836, "rewards/rejected": -10.171709060668945, "step": 2810 }, { "epoch": 0.17, "learning_rate": 4.929579100279302e-06, "logits/chosen": -2.934934616088867, "logits/rejected": -2.8368613719940186, "logps/chosen": -96.55180358886719, "logps/rejected": -878.8713989257812, "loss": 0.0319, "rewards/accuracies": 1.0, "rewards/chosen": -0.2639729678630829, "rewards/margins": 8.087080001831055, "rewards/rejected": -8.351053237915039, "step": 2820 }, { "epoch": 0.17, "learning_rate": 4.92834744400304e-06, "logits/chosen": -2.9105095863342285, "logits/rejected": -2.821148633956909, "logps/chosen": -80.38851165771484, "logps/rejected": -1007.0836181640625, "loss": 0.0451, "rewards/accuracies": 1.0, "rewards/chosen": -0.09356964379549026, "rewards/margins": 9.5266752243042, "rewards/rejected": -9.620244026184082, "step": 2830 }, { "epoch": 0.17, "learning_rate": 4.927105266672296e-06, "logits/chosen": -2.926135540008545, "logits/rejected": -2.81009840965271, "logps/chosen": -103.74556732177734, "logps/rejected": -996.8015747070312, "loss": 0.0233, "rewards/accuracies": 1.0, "rewards/chosen": -0.27629369497299194, "rewards/margins": 9.234261512756348, "rewards/rejected": -9.510554313659668, "step": 2840 }, { "epoch": 0.17, "learning_rate": 4.925852573668928e-06, "logits/chosen": -2.9445695877075195, "logits/rejected": -2.856632947921753, "logps/chosen": -106.05985260009766, "logps/rejected": -1072.9588623046875, "loss": 0.0205, "rewards/accuracies": 1.0, "rewards/chosen": -0.33258286118507385, "rewards/margins": 9.954703330993652, "rewards/rejected": -10.287286758422852, "step": 2850 }, { "epoch": 0.17, "learning_rate": 4.924589370420351e-06, "logits/chosen": -2.953538179397583, "logits/rejected": -2.8282103538513184, "logps/chosen": -128.39675903320312, "logps/rejected": -931.0433349609375, "loss": 0.0239, "rewards/accuracies": 1.0, "rewards/chosen": -0.5655667781829834, "rewards/margins": 8.30016040802002, "rewards/rejected": -8.865727424621582, "step": 2860 }, { "epoch": 0.17, "learning_rate": 4.923315662399517e-06, "logits/chosen": -2.881783962249756, "logits/rejected": -2.797868251800537, "logps/chosen": -95.38026428222656, "logps/rejected": -894.2131958007812, "loss": 0.0216, "rewards/accuracies": 1.0, "rewards/chosen": -0.19933325052261353, "rewards/margins": 8.294595718383789, "rewards/rejected": -8.493928909301758, "step": 2870 }, { "epoch": 0.17, "learning_rate": 4.9220314551248915e-06, "logits/chosen": -2.89984393119812, "logits/rejected": -2.722132921218872, "logps/chosen": -66.76728820800781, "logps/rejected": -843.0881958007812, "loss": 0.0239, "rewards/accuracies": 1.0, "rewards/chosen": 0.03717636317014694, "rewards/margins": 8.017894744873047, "rewards/rejected": -7.980717658996582, "step": 2880 }, { "epoch": 0.17, "learning_rate": 4.920736754160429e-06, "logits/chosen": -2.9310741424560547, "logits/rejected": -2.846287965774536, "logps/chosen": -93.0264663696289, "logps/rejected": -982.4656982421875, "loss": 0.0212, "rewards/accuracies": 1.0, "rewards/chosen": -0.21156001091003418, "rewards/margins": 9.166433334350586, "rewards/rejected": -9.3779935836792, "step": 2890 }, { "epoch": 0.17, "learning_rate": 4.91943156511555e-06, "logits/chosen": -2.929076910018921, "logits/rejected": -2.810227155685425, "logps/chosen": -99.96405029296875, "logps/rejected": -1053.358154296875, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/chosen": -0.340766042470932, "rewards/margins": 9.75223445892334, "rewards/rejected": -10.093001365661621, "step": 2900 }, { "epoch": 0.17, "learning_rate": 4.918115893645113e-06, "logits/chosen": -2.9340429306030273, "logits/rejected": -2.8081812858581543, "logps/chosen": -75.57527160644531, "logps/rejected": -1050.577392578125, "loss": 0.0211, "rewards/accuracies": 1.0, "rewards/chosen": -0.0290999673306942, "rewards/margins": 10.024075508117676, "rewards/rejected": -10.053176879882812, "step": 2910 }, { "epoch": 0.17, "learning_rate": 4.916789745449396e-06, "logits/chosen": -2.903440475463867, "logits/rejected": -2.7989277839660645, "logps/chosen": -66.62117767333984, "logps/rejected": -937.4807739257812, "loss": 0.0189, "rewards/accuracies": 1.0, "rewards/chosen": 0.022950012236833572, "rewards/margins": 8.961909294128418, "rewards/rejected": -8.938959121704102, "step": 2920 }, { "epoch": 0.17, "learning_rate": 4.915453126274065e-06, "logits/chosen": -2.9188408851623535, "logits/rejected": -2.78098726272583, "logps/chosen": -90.40374755859375, "logps/rejected": -899.36083984375, "loss": 0.0391, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.17839650809764862, "rewards/margins": 8.379777908325195, "rewards/rejected": -8.558175086975098, "step": 2930 }, { "epoch": 0.18, "learning_rate": 4.914106041910155e-06, "logits/chosen": -2.8941490650177, "logits/rejected": -2.763282060623169, "logps/chosen": -92.59281158447266, "logps/rejected": -1018.7340087890625, "loss": 0.0131, "rewards/accuracies": 1.0, "rewards/chosen": -0.16338813304901123, "rewards/margins": 9.582254409790039, "rewards/rejected": -9.745641708374023, "step": 2940 }, { "epoch": 0.18, "learning_rate": 4.9127484981940425e-06, "logits/chosen": -2.9010350704193115, "logits/rejected": -2.8127832412719727, "logps/chosen": -91.32048034667969, "logps/rejected": -1050.362548828125, "loss": 0.0201, "rewards/accuracies": 1.0, "rewards/chosen": -0.20067770779132843, "rewards/margins": 9.852140426635742, "rewards/rejected": -10.052818298339844, "step": 2950 }, { "epoch": 0.18, "learning_rate": 4.911380501007417e-06, "logits/chosen": -2.925996780395508, "logits/rejected": -2.7819266319274902, "logps/chosen": -196.75430297851562, "logps/rejected": -961.32177734375, "loss": 0.0267, "rewards/accuracies": 1.0, "rewards/chosen": -1.246008038520813, "rewards/margins": 7.936345100402832, "rewards/rejected": -9.182353019714355, "step": 2960 }, { "epoch": 0.18, "learning_rate": 4.910002056277263e-06, "logits/chosen": -2.8940131664276123, "logits/rejected": -2.776315927505493, "logps/chosen": -91.83622741699219, "logps/rejected": -1009.2171630859375, "loss": 0.0207, "rewards/accuracies": 1.0, "rewards/chosen": -0.16851451992988586, "rewards/margins": 9.462518692016602, "rewards/rejected": -9.631032943725586, "step": 2970 }, { "epoch": 0.18, "learning_rate": 4.908613169975828e-06, "logits/chosen": -2.887906551361084, "logits/rejected": -2.793120861053467, "logps/chosen": -76.0972900390625, "logps/rejected": -970.8505859375, "loss": 0.0203, "rewards/accuracies": 1.0, "rewards/chosen": -0.05901686102151871, "rewards/margins": 9.19333267211914, "rewards/rejected": -9.252348899841309, "step": 2980 }, { "epoch": 0.18, "learning_rate": 4.9072138481205985e-06, "logits/chosen": -2.8974432945251465, "logits/rejected": -2.806164264678955, "logps/chosen": -100.28203582763672, "logps/rejected": -1064.16748046875, "loss": 0.0192, "rewards/accuracies": 1.0, "rewards/chosen": -0.3262271285057068, "rewards/margins": 9.866116523742676, "rewards/rejected": -10.192342758178711, "step": 2990 }, { "epoch": 0.18, "learning_rate": 4.905804096774274e-06, "logits/chosen": -2.895698070526123, "logits/rejected": -2.798494815826416, "logps/chosen": -88.13387298583984, "logps/rejected": -995.4547119140625, "loss": 0.0268, "rewards/accuracies": 1.0, "rewards/chosen": -0.1971902847290039, "rewards/margins": 9.32446575164795, "rewards/rejected": -9.521655082702637, "step": 3000 }, { "epoch": 0.18, "learning_rate": 4.90438392204474e-06, "logits/chosen": -2.9238946437835693, "logits/rejected": -2.8205199241638184, "logps/chosen": -73.76292419433594, "logps/rejected": -858.8571166992188, "loss": 0.0192, "rewards/accuracies": 1.0, "rewards/chosen": 0.014222566969692707, "rewards/margins": 8.153276443481445, "rewards/rejected": -8.139055252075195, "step": 3010 }, { "epoch": 0.18, "learning_rate": 4.902953330085045e-06, "logits/chosen": -2.9124464988708496, "logits/rejected": -2.8200974464416504, "logps/chosen": -57.89391326904297, "logps/rejected": -861.4302978515625, "loss": 0.0206, "rewards/accuracies": 1.0, "rewards/chosen": 0.08842764049768448, "rewards/margins": 8.274726867675781, "rewards/rejected": -8.186299324035645, "step": 3020 }, { "epoch": 0.18, "learning_rate": 4.901512327093369e-06, "logits/chosen": -2.930318593978882, "logits/rejected": -2.812051296234131, "logps/chosen": -69.50514221191406, "logps/rejected": -974.1229248046875, "loss": 0.0207, "rewards/accuracies": 1.0, "rewards/chosen": 0.03143477067351341, "rewards/margins": 9.326597213745117, "rewards/rejected": -9.295161247253418, "step": 3030 }, { "epoch": 0.18, "learning_rate": 4.900060919313001e-06, "logits/chosen": -2.9161267280578613, "logits/rejected": -2.815610885620117, "logps/chosen": -82.34928894042969, "logps/rejected": -998.6640625, "loss": 0.0222, "rewards/accuracies": 1.0, "rewards/chosen": -0.1185515746474266, "rewards/margins": 9.436986923217773, "rewards/rejected": -9.555538177490234, "step": 3040 }, { "epoch": 0.18, "learning_rate": 4.8985991130323055e-06, "logits/chosen": -2.878901958465576, "logits/rejected": -2.8079466819763184, "logps/chosen": -78.50292205810547, "logps/rejected": -855.5111083984375, "loss": 0.0346, "rewards/accuracies": 1.0, "rewards/chosen": -0.1105574369430542, "rewards/margins": 7.991842746734619, "rewards/rejected": -8.102399826049805, "step": 3050 }, { "epoch": 0.18, "learning_rate": 4.8971269145847036e-06, "logits/chosen": -2.8982720375061035, "logits/rejected": -2.787421226501465, "logps/chosen": -82.7979507446289, "logps/rejected": -998.7786865234375, "loss": 0.0097, "rewards/accuracies": 1.0, "rewards/chosen": -0.16418203711509705, "rewards/margins": 9.379179954528809, "rewards/rejected": -9.54336166381836, "step": 3060 }, { "epoch": 0.18, "learning_rate": 4.895644330348639e-06, "logits/chosen": -2.9356489181518555, "logits/rejected": -2.790642261505127, "logps/chosen": -107.53193664550781, "logps/rejected": -1074.806884765625, "loss": 0.0112, "rewards/accuracies": 1.0, "rewards/chosen": -0.43008285760879517, "rewards/margins": 9.864014625549316, "rewards/rejected": -10.294095993041992, "step": 3070 }, { "epoch": 0.18, "learning_rate": 4.8941513667475545e-06, "logits/chosen": -2.9424381256103516, "logits/rejected": -2.8311257362365723, "logps/chosen": -68.97103881835938, "logps/rejected": -1078.7869873046875, "loss": 0.0163, "rewards/accuracies": 1.0, "rewards/chosen": -0.0492127500474453, "rewards/margins": 10.294766426086426, "rewards/rejected": -10.34398078918457, "step": 3080 }, { "epoch": 0.18, "learning_rate": 4.892648030249863e-06, "logits/chosen": -2.918834686279297, "logits/rejected": -2.8089890480041504, "logps/chosen": -92.81131744384766, "logps/rejected": -974.1917724609375, "loss": 0.0189, "rewards/accuracies": 1.0, "rewards/chosen": -0.16505205631256104, "rewards/margins": 9.129356384277344, "rewards/rejected": -9.29440689086914, "step": 3090 }, { "epoch": 0.18, "learning_rate": 4.891134327368919e-06, "logits/chosen": -2.926624298095703, "logits/rejected": -2.830854892730713, "logps/chosen": -90.18801879882812, "logps/rejected": -985.90283203125, "loss": 0.0261, "rewards/accuracies": 1.0, "rewards/chosen": -0.16908162832260132, "rewards/margins": 9.234790802001953, "rewards/rejected": -9.4038724899292, "step": 3100 }, { "epoch": 0.19, "learning_rate": 4.889610264662984e-06, "logits/chosen": -2.9370040893554688, "logits/rejected": -2.7925350666046143, "logps/chosen": -71.12479400634766, "logps/rejected": -1097.640869140625, "loss": 0.0142, "rewards/accuracies": 1.0, "rewards/chosen": -0.02991882897913456, "rewards/margins": 10.50147533416748, "rewards/rejected": -10.531394004821777, "step": 3110 }, { "epoch": 0.19, "learning_rate": 4.888075848735216e-06, "logits/chosen": -2.9268550872802734, "logits/rejected": -2.834078311920166, "logps/chosen": -125.74955749511719, "logps/rejected": -987.5769653320312, "loss": 0.0419, "rewards/accuracies": 1.0, "rewards/chosen": -0.5263983607292175, "rewards/margins": 8.890470504760742, "rewards/rejected": -9.416869163513184, "step": 3120 }, { "epoch": 0.19, "learning_rate": 4.8865310862336185e-06, "logits/chosen": -2.9167327880859375, "logits/rejected": -2.827451229095459, "logps/chosen": -97.91780090332031, "logps/rejected": -1008.7288208007812, "loss": 0.011, "rewards/accuracies": 1.0, "rewards/chosen": -0.23241129517555237, "rewards/margins": 9.407768249511719, "rewards/rejected": -9.640179634094238, "step": 3130 }, { "epoch": 0.19, "learning_rate": 4.88497598385103e-06, "logits/chosen": -2.861631393432617, "logits/rejected": -2.7636709213256836, "logps/chosen": -90.74644470214844, "logps/rejected": -1004.3260498046875, "loss": 0.0143, "rewards/accuracies": 1.0, "rewards/chosen": -0.24270197749137878, "rewards/margins": 9.349315643310547, "rewards/rejected": -9.592016220092773, "step": 3140 }, { "epoch": 0.19, "learning_rate": 4.883410548325083e-06, "logits/chosen": -2.9055678844451904, "logits/rejected": -2.804964303970337, "logps/chosen": -89.72052764892578, "logps/rejected": -1050.116455078125, "loss": 0.0183, "rewards/accuracies": 1.0, "rewards/chosen": -0.1484868973493576, "rewards/margins": 9.905959129333496, "rewards/rejected": -10.054445266723633, "step": 3150 }, { "epoch": 0.19, "learning_rate": 4.881834786438183e-06, "logits/chosen": -2.8878417015075684, "logits/rejected": -2.828029155731201, "logps/chosen": -64.44745635986328, "logps/rejected": -916.2906494140625, "loss": 0.0271, "rewards/accuracies": 1.0, "rewards/chosen": 0.08098606765270233, "rewards/margins": 8.80667781829834, "rewards/rejected": -8.725691795349121, "step": 3160 }, { "epoch": 0.19, "learning_rate": 4.880248705017472e-06, "logits/chosen": -2.929187774658203, "logits/rejected": -2.8213164806365967, "logps/chosen": -75.32865905761719, "logps/rejected": -940.2420043945312, "loss": 0.0218, "rewards/accuracies": 1.0, "rewards/chosen": -0.10205451399087906, "rewards/margins": 8.863107681274414, "rewards/rejected": -8.96516227722168, "step": 3170 }, { "epoch": 0.19, "learning_rate": 4.878652310934804e-06, "logits/chosen": -2.9131598472595215, "logits/rejected": -2.7929470539093018, "logps/chosen": -99.04710388183594, "logps/rejected": -1051.5531005859375, "loss": 0.025, "rewards/accuracies": 1.0, "rewards/chosen": -0.302415132522583, "rewards/margins": 9.769061088562012, "rewards/rejected": -10.071475982666016, "step": 3180 }, { "epoch": 0.19, "learning_rate": 4.877045611106715e-06, "logits/chosen": -2.9377360343933105, "logits/rejected": -2.8415417671203613, "logps/chosen": -113.6156005859375, "logps/rejected": -1036.766845703125, "loss": 0.0156, "rewards/accuracies": 1.0, "rewards/chosen": -0.3979889452457428, "rewards/margins": 9.514283180236816, "rewards/rejected": -9.912271499633789, "step": 3190 }, { "epoch": 0.19, "learning_rate": 4.8754286124943885e-06, "logits/chosen": -2.9039368629455566, "logits/rejected": -2.813891887664795, "logps/chosen": -100.67926788330078, "logps/rejected": -997.3171997070312, "loss": 0.0158, "rewards/accuracies": 1.0, "rewards/chosen": -0.2190081626176834, "rewards/margins": 9.310359954833984, "rewards/rejected": -9.52936840057373, "step": 3200 }, { "epoch": 0.19, "learning_rate": 4.873801322103632e-06, "logits/chosen": -2.900237798690796, "logits/rejected": -2.7764649391174316, "logps/chosen": -73.33778381347656, "logps/rejected": -1124.5582275390625, "loss": 0.011, "rewards/accuracies": 1.0, "rewards/chosen": -0.0386403426527977, "rewards/margins": 10.754034996032715, "rewards/rejected": -10.792677879333496, "step": 3210 }, { "epoch": 0.19, "learning_rate": 4.872163746984839e-06, "logits/chosen": -2.866896152496338, "logits/rejected": -2.789792537689209, "logps/chosen": -77.7817611694336, "logps/rejected": -1108.5367431640625, "loss": 0.0202, "rewards/accuracies": 1.0, "rewards/chosen": -0.11586730182170868, "rewards/margins": 10.520917892456055, "rewards/rejected": -10.636785507202148, "step": 3220 }, { "epoch": 0.19, "learning_rate": 4.8705158942329676e-06, "logits/chosen": -2.9006447792053223, "logits/rejected": -2.8281712532043457, "logps/chosen": -97.91754150390625, "logps/rejected": -934.1824340820312, "loss": 0.0199, "rewards/accuracies": 1.0, "rewards/chosen": -0.2949380874633789, "rewards/margins": 8.605655670166016, "rewards/rejected": -8.900593757629395, "step": 3230 }, { "epoch": 0.19, "learning_rate": 4.8688577709875015e-06, "logits/chosen": -2.896557569503784, "logits/rejected": -2.8031506538391113, "logps/chosen": -85.1817398071289, "logps/rejected": -871.4775390625, "loss": 0.0312, "rewards/accuracies": 1.0, "rewards/chosen": -0.1452803909778595, "rewards/margins": 8.143285751342773, "rewards/rejected": -8.288567543029785, "step": 3240 }, { "epoch": 0.19, "learning_rate": 4.8671893844324215e-06, "logits/chosen": -2.8887970447540283, "logits/rejected": -2.7693824768066406, "logps/chosen": -91.21710968017578, "logps/rejected": -1113.0513916015625, "loss": 0.018, "rewards/accuracies": 1.0, "rewards/chosen": -0.19352278113365173, "rewards/margins": 10.491316795349121, "rewards/rejected": -10.684839248657227, "step": 3250 }, { "epoch": 0.19, "learning_rate": 4.865510741796178e-06, "logits/chosen": -2.9023499488830566, "logits/rejected": -2.8091979026794434, "logps/chosen": -126.04353332519531, "logps/rejected": -1067.5782470703125, "loss": 0.0162, "rewards/accuracies": 1.0, "rewards/chosen": -0.5405257940292358, "rewards/margins": 9.677751541137695, "rewards/rejected": -10.218276023864746, "step": 3260 }, { "epoch": 0.19, "learning_rate": 4.863821850351655e-06, "logits/chosen": -2.8556227684020996, "logits/rejected": -2.7804763317108154, "logps/chosen": -105.49711608886719, "logps/rejected": -1011.6388549804688, "loss": 0.0217, "rewards/accuracies": 1.0, "rewards/chosen": -0.37187138199806213, "rewards/margins": 9.298276901245117, "rewards/rejected": -9.670148849487305, "step": 3270 }, { "epoch": 0.2, "learning_rate": 4.862122717416142e-06, "logits/chosen": -2.8979685306549072, "logits/rejected": -2.756927013397217, "logps/chosen": -97.33106994628906, "logps/rejected": -1057.734375, "loss": 0.0156, "rewards/accuracies": 1.0, "rewards/chosen": -0.22641482949256897, "rewards/margins": 9.916643142700195, "rewards/rejected": -10.143056869506836, "step": 3280 }, { "epoch": 0.2, "learning_rate": 4.860413350351299e-06, "logits/chosen": -2.9265663623809814, "logits/rejected": -2.8173117637634277, "logps/chosen": -117.81965637207031, "logps/rejected": -1057.960205078125, "loss": 0.0169, "rewards/accuracies": 1.0, "rewards/chosen": -0.5091457962989807, "rewards/margins": 9.628435134887695, "rewards/rejected": -10.137581825256348, "step": 3290 }, { "epoch": 0.2, "learning_rate": 4.8586937565631265e-06, "logits/chosen": -2.8908934593200684, "logits/rejected": -2.7778306007385254, "logps/chosen": -102.55086517333984, "logps/rejected": -1013.2619018554688, "loss": 0.0218, "rewards/accuracies": 1.0, "rewards/chosen": -0.29052549600601196, "rewards/margins": 9.392009735107422, "rewards/rejected": -9.682535171508789, "step": 3300 }, { "epoch": 0.2, "learning_rate": 4.856963943501935e-06, "logits/chosen": -2.896718740463257, "logits/rejected": -2.7803893089294434, "logps/chosen": -121.4454116821289, "logps/rejected": -1098.3824462890625, "loss": 0.0151, "rewards/accuracies": 1.0, "rewards/chosen": -0.5048161745071411, "rewards/margins": 10.032444953918457, "rewards/rejected": -10.537260055541992, "step": 3310 }, { "epoch": 0.2, "learning_rate": 4.85522391866231e-06, "logits/chosen": -2.920180559158325, "logits/rejected": -2.8442211151123047, "logps/chosen": -118.40858459472656, "logps/rejected": -1077.216552734375, "loss": 0.0283, "rewards/accuracies": 1.0, "rewards/chosen": -0.511841893196106, "rewards/margins": 9.819600105285645, "rewards/rejected": -10.331441879272461, "step": 3320 }, { "epoch": 0.2, "learning_rate": 4.85347368958308e-06, "logits/chosen": -2.9260549545288086, "logits/rejected": -2.8104348182678223, "logps/chosen": -81.95069885253906, "logps/rejected": -992.25732421875, "loss": 0.0178, "rewards/accuracies": 1.0, "rewards/chosen": -0.09835229814052582, "rewards/margins": 9.383533477783203, "rewards/rejected": -9.48188591003418, "step": 3330 }, { "epoch": 0.2, "learning_rate": 4.8517132638472845e-06, "logits/chosen": -2.900827407836914, "logits/rejected": -2.8186659812927246, "logps/chosen": -82.92295837402344, "logps/rejected": -930.4632568359375, "loss": 0.0366, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.1473492532968521, "rewards/margins": 8.729012489318848, "rewards/rejected": -8.876360893249512, "step": 3340 }, { "epoch": 0.2, "learning_rate": 4.849942649082143e-06, "logits/chosen": -2.878818988800049, "logits/rejected": -2.775176525115967, "logps/chosen": -75.95471954345703, "logps/rejected": -954.2730712890625, "loss": 0.0201, "rewards/accuracies": 1.0, "rewards/chosen": -0.07086614519357681, "rewards/margins": 9.017777442932129, "rewards/rejected": -9.088644027709961, "step": 3350 }, { "epoch": 0.2, "learning_rate": 4.848161852959016e-06, "logits/chosen": -2.920950412750244, "logits/rejected": -2.8400120735168457, "logps/chosen": -66.54155731201172, "logps/rejected": -1020.5328979492188, "loss": 0.0185, "rewards/accuracies": 1.0, "rewards/chosen": 0.023877177387475967, "rewards/margins": 9.787919998168945, "rewards/rejected": -9.764042854309082, "step": 3360 }, { "epoch": 0.2, "learning_rate": 4.84637088319338e-06, "logits/chosen": -2.933952808380127, "logits/rejected": -2.819856882095337, "logps/chosen": -65.50806427001953, "logps/rejected": -953.2479248046875, "loss": 0.0183, "rewards/accuracies": 1.0, "rewards/chosen": 0.0579400435090065, "rewards/margins": 9.150853157043457, "rewards/rejected": -9.092912673950195, "step": 3370 }, { "epoch": 0.2, "learning_rate": 4.844569747544788e-06, "logits/chosen": -2.9174880981445312, "logits/rejected": -2.8065009117126465, "logps/chosen": -70.78478240966797, "logps/rejected": -1098.2421875, "loss": 0.0116, "rewards/accuracies": 1.0, "rewards/chosen": -0.018338222056627274, "rewards/margins": 10.509620666503906, "rewards/rejected": -10.527958869934082, "step": 3380 }, { "epoch": 0.2, "learning_rate": 4.842758453816836e-06, "logits/chosen": -2.9255166053771973, "logits/rejected": -2.8217504024505615, "logps/chosen": -79.82255554199219, "logps/rejected": -989.0732421875, "loss": 0.0158, "rewards/accuracies": 1.0, "rewards/chosen": -0.11151568591594696, "rewards/margins": 9.333974838256836, "rewards/rejected": -9.445490837097168, "step": 3390 }, { "epoch": 0.2, "learning_rate": 4.840937009857134e-06, "logits/chosen": -2.9049763679504395, "logits/rejected": -2.763141393661499, "logps/chosen": -92.43160247802734, "logps/rejected": -1064.4652099609375, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/chosen": -0.23361711204051971, "rewards/margins": 9.972365379333496, "rewards/rejected": -10.20598316192627, "step": 3400 }, { "epoch": 0.2, "learning_rate": 4.839105423557266e-06, "logits/chosen": -2.8952553272247314, "logits/rejected": -2.7982900142669678, "logps/chosen": -104.31312561035156, "logps/rejected": -1009.9908447265625, "loss": 0.0234, "rewards/accuracies": 1.0, "rewards/chosen": -0.3694721460342407, "rewards/margins": 9.28325080871582, "rewards/rejected": -9.65272331237793, "step": 3410 }, { "epoch": 0.2, "learning_rate": 4.8372637028527615e-06, "logits/chosen": -2.9060323238372803, "logits/rejected": -2.815075635910034, "logps/chosen": -87.04627990722656, "logps/rejected": -1043.029052734375, "loss": 0.0224, "rewards/accuracies": 1.0, "rewards/chosen": -0.11950896680355072, "rewards/margins": 9.856620788574219, "rewards/rejected": -9.976129531860352, "step": 3420 }, { "epoch": 0.2, "learning_rate": 4.835411855723056e-06, "logits/chosen": -2.9083447456359863, "logits/rejected": -2.8016884326934814, "logps/chosen": -99.11207580566406, "logps/rejected": -882.466796875, "loss": 0.0532, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.32335492968559265, "rewards/margins": 8.073678970336914, "rewards/rejected": -8.39703369140625, "step": 3430 }, { "epoch": 0.21, "learning_rate": 4.83354989019146e-06, "logits/chosen": -2.8986876010894775, "logits/rejected": -2.777966260910034, "logps/chosen": -98.28465270996094, "logps/rejected": -982.7640380859375, "loss": 0.0527, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.2675360441207886, "rewards/margins": 9.127363204956055, "rewards/rejected": -9.394899368286133, "step": 3440 }, { "epoch": 0.21, "learning_rate": 4.831677814325122e-06, "logits/chosen": -2.9343008995056152, "logits/rejected": -2.796949863433838, "logps/chosen": -135.0299530029297, "logps/rejected": -1021.5451049804688, "loss": 0.1024, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.6532796621322632, "rewards/margins": 9.130407333374023, "rewards/rejected": -9.783686637878418, "step": 3450 }, { "epoch": 0.21, "learning_rate": 4.8297956362349955e-06, "logits/chosen": -2.911616802215576, "logits/rejected": -2.7802021503448486, "logps/chosen": -190.67584228515625, "logps/rejected": -1014.8893432617188, "loss": 0.0299, "rewards/accuracies": 1.0, "rewards/chosen": -1.2101691961288452, "rewards/margins": 8.506454467773438, "rewards/rejected": -9.716622352600098, "step": 3460 }, { "epoch": 0.21, "learning_rate": 4.8279033640758026e-06, "logits/chosen": -2.909989833831787, "logits/rejected": -2.8176238536834717, "logps/chosen": -105.54266357421875, "logps/rejected": -941.88037109375, "loss": 0.0182, "rewards/accuracies": 1.0, "rewards/chosen": -0.35250407457351685, "rewards/margins": 8.61872386932373, "rewards/rejected": -8.971227645874023, "step": 3470 }, { "epoch": 0.21, "learning_rate": 4.826001006045997e-06, "logits/chosen": -2.9218358993530273, "logits/rejected": -2.858625888824463, "logps/chosen": -77.50524139404297, "logps/rejected": -921.4065551757812, "loss": 0.02, "rewards/accuracies": 1.0, "rewards/chosen": -0.07038109004497528, "rewards/margins": 8.711301803588867, "rewards/rejected": -8.781682968139648, "step": 3480 }, { "epoch": 0.21, "learning_rate": 4.824088570387735e-06, "logits/chosen": -2.9038586616516113, "logits/rejected": -2.821878433227539, "logps/chosen": -126.85333251953125, "logps/rejected": -932.4591674804688, "loss": 0.0329, "rewards/accuracies": 1.0, "rewards/chosen": -0.5707005858421326, "rewards/margins": 8.309720993041992, "rewards/rejected": -8.880422592163086, "step": 3490 }, { "epoch": 0.21, "learning_rate": 4.822166065386832e-06, "logits/chosen": -2.919581174850464, "logits/rejected": -2.833155632019043, "logps/chosen": -100.0995864868164, "logps/rejected": -1006.1409912109375, "loss": 0.017, "rewards/accuracies": 1.0, "rewards/chosen": -0.31064051389694214, "rewards/margins": 9.319631576538086, "rewards/rejected": -9.63027286529541, "step": 3500 }, { "epoch": 0.21, "learning_rate": 4.820233499372728e-06, "logits/chosen": -2.8938403129577637, "logits/rejected": -2.7597873210906982, "logps/chosen": -150.28883361816406, "logps/rejected": -1096.629638671875, "loss": 0.0373, "rewards/accuracies": 1.0, "rewards/chosen": -0.7687881588935852, "rewards/margins": 9.742793083190918, "rewards/rejected": -10.511579513549805, "step": 3510 }, { "epoch": 0.21, "learning_rate": 4.8182908807184585e-06, "logits/chosen": -2.9124953746795654, "logits/rejected": -2.7926323413848877, "logps/chosen": -143.35337829589844, "logps/rejected": -1126.3023681640625, "loss": 0.0178, "rewards/accuracies": 1.0, "rewards/chosen": -0.6865848302841187, "rewards/margins": 10.126398086547852, "rewards/rejected": -10.812982559204102, "step": 3520 }, { "epoch": 0.21, "learning_rate": 4.816338217840607e-06, "logits/chosen": -2.9210238456726074, "logits/rejected": -2.838343620300293, "logps/chosen": -76.27590942382812, "logps/rejected": -886.6156005859375, "loss": 0.0236, "rewards/accuracies": 1.0, "rewards/chosen": -0.10457025468349457, "rewards/margins": 8.311971664428711, "rewards/rejected": -8.416543006896973, "step": 3530 }, { "epoch": 0.21, "learning_rate": 4.814375519199281e-06, "logits/chosen": -2.91465163230896, "logits/rejected": -2.770555257797241, "logps/chosen": -84.92073822021484, "logps/rejected": -1105.976318359375, "loss": 0.0161, "rewards/accuracies": 1.0, "rewards/chosen": -0.18526974320411682, "rewards/margins": 10.429521560668945, "rewards/rejected": -10.614790916442871, "step": 3540 }, { "epoch": 0.21, "learning_rate": 4.812402793298063e-06, "logits/chosen": -2.902050733566284, "logits/rejected": -2.800664186477661, "logps/chosen": -97.14655303955078, "logps/rejected": -958.6209716796875, "loss": 0.0282, "rewards/accuracies": 1.0, "rewards/chosen": -0.2618294358253479, "rewards/margins": 8.886330604553223, "rewards/rejected": -9.148159980773926, "step": 3550 }, { "epoch": 0.21, "learning_rate": 4.810420048683985e-06, "logits/chosen": -2.9267497062683105, "logits/rejected": -2.8296542167663574, "logps/chosen": -98.052001953125, "logps/rejected": -1111.1044921875, "loss": 0.0198, "rewards/accuracies": 1.0, "rewards/chosen": -0.30213773250579834, "rewards/margins": 10.367947578430176, "rewards/rejected": -10.670086860656738, "step": 3560 }, { "epoch": 0.21, "learning_rate": 4.808427293947481e-06, "logits/chosen": -2.9082841873168945, "logits/rejected": -2.8234333992004395, "logps/chosen": -93.5290756225586, "logps/rejected": -1032.8382568359375, "loss": 0.0274, "rewards/accuracies": 1.0, "rewards/chosen": -0.24832916259765625, "rewards/margins": 9.64242172241211, "rewards/rejected": -9.89074993133545, "step": 3570 }, { "epoch": 0.21, "learning_rate": 4.806424537722359e-06, "logits/chosen": -2.9553589820861816, "logits/rejected": -2.793161630630493, "logps/chosen": -65.16587829589844, "logps/rejected": -999.2928466796875, "loss": 0.0138, "rewards/accuracies": 1.0, "rewards/chosen": -0.013816917315125465, "rewards/margins": 9.536550521850586, "rewards/rejected": -9.55036735534668, "step": 3580 }, { "epoch": 0.21, "learning_rate": 4.804411788685755e-06, "logits/chosen": -2.9300644397735596, "logits/rejected": -2.81620192527771, "logps/chosen": -67.64566040039062, "logps/rejected": -1021.4599609375, "loss": 0.014, "rewards/accuracies": 1.0, "rewards/chosen": 0.005144655704498291, "rewards/margins": 9.7776460647583, "rewards/rejected": -9.772500991821289, "step": 3590 }, { "epoch": 0.21, "learning_rate": 4.802389055558105e-06, "logits/chosen": -2.901089906692505, "logits/rejected": -2.7751426696777344, "logps/chosen": -81.9259033203125, "logps/rejected": -1056.336181640625, "loss": 0.0213, "rewards/accuracies": 1.0, "rewards/chosen": -0.1204097643494606, "rewards/margins": 9.990921974182129, "rewards/rejected": -10.11133098602295, "step": 3600 }, { "epoch": 0.22, "learning_rate": 4.8003563471030974e-06, "logits/chosen": -2.882105588912964, "logits/rejected": -2.8083081245422363, "logps/chosen": -99.86497497558594, "logps/rejected": -962.1526489257812, "loss": 0.0408, "rewards/accuracies": 1.0, "rewards/chosen": -0.2984747290611267, "rewards/margins": 8.889835357666016, "rewards/rejected": -9.188310623168945, "step": 3610 }, { "epoch": 0.22, "learning_rate": 4.7983136721276435e-06, "logits/chosen": -2.927910566329956, "logits/rejected": -2.8260843753814697, "logps/chosen": -81.40794372558594, "logps/rejected": -1044.443603515625, "loss": 0.0116, "rewards/accuracies": 1.0, "rewards/chosen": -0.0779222697019577, "rewards/margins": 9.905133247375488, "rewards/rejected": -9.983054161071777, "step": 3620 }, { "epoch": 0.22, "learning_rate": 4.796261039481833e-06, "logits/chosen": -2.9126973152160645, "logits/rejected": -2.7857346534729004, "logps/chosen": -100.78450012207031, "logps/rejected": -987.5748291015625, "loss": 0.015, "rewards/accuracies": 1.0, "rewards/chosen": -0.2860509753227234, "rewards/margins": 9.147804260253906, "rewards/rejected": -9.433855056762695, "step": 3630 }, { "epoch": 0.22, "learning_rate": 4.7941984580589e-06, "logits/chosen": -2.916168689727783, "logits/rejected": -2.810474157333374, "logps/chosen": -153.5299835205078, "logps/rejected": -962.2683715820312, "loss": 0.0184, "rewards/accuracies": 1.0, "rewards/chosen": -0.86139315366745, "rewards/margins": 8.309381484985352, "rewards/rejected": -9.170774459838867, "step": 3640 }, { "epoch": 0.22, "learning_rate": 4.7921259367951804e-06, "logits/chosen": -2.8897111415863037, "logits/rejected": -2.8101799488067627, "logps/chosen": -132.01263427734375, "logps/rejected": -1016.0870971679688, "loss": 0.0205, "rewards/accuracies": 1.0, "rewards/chosen": -0.602690577507019, "rewards/margins": 9.103035926818848, "rewards/rejected": -9.70572566986084, "step": 3650 }, { "epoch": 0.22, "learning_rate": 4.790043484670077e-06, "logits/chosen": -2.914196014404297, "logits/rejected": -2.8064537048339844, "logps/chosen": -145.84518432617188, "logps/rejected": -1150.814208984375, "loss": 0.0158, "rewards/accuracies": 1.0, "rewards/chosen": -0.8015564680099487, "rewards/margins": 10.246492385864258, "rewards/rejected": -11.048049926757812, "step": 3660 }, { "epoch": 0.22, "learning_rate": 4.787951110706019e-06, "logits/chosen": -2.907914161682129, "logits/rejected": -2.8049521446228027, "logps/chosen": -99.94340515136719, "logps/rejected": -1061.523681640625, "loss": 0.024, "rewards/accuracies": 1.0, "rewards/chosen": -0.2665363848209381, "rewards/margins": 9.910881042480469, "rewards/rejected": -10.177417755126953, "step": 3670 }, { "epoch": 0.22, "learning_rate": 4.785848823968424e-06, "logits/chosen": -2.941972017288208, "logits/rejected": -2.819143533706665, "logps/chosen": -92.7989501953125, "logps/rejected": -1125.849365234375, "loss": 0.0151, "rewards/accuracies": 1.0, "rewards/chosen": -0.18092337250709534, "rewards/margins": 10.625335693359375, "rewards/rejected": -10.806259155273438, "step": 3680 }, { "epoch": 0.22, "learning_rate": 4.783736633565654e-06, "logits/chosen": -2.919088840484619, "logits/rejected": -2.8095946311950684, "logps/chosen": -89.71275329589844, "logps/rejected": -1106.349853515625, "loss": 0.0127, "rewards/accuracies": 1.0, "rewards/chosen": -0.18761476874351501, "rewards/margins": 10.43260669708252, "rewards/rejected": -10.620222091674805, "step": 3690 }, { "epoch": 0.22, "learning_rate": 4.781614548648983e-06, "logits/chosen": -2.8606014251708984, "logits/rejected": -2.735283613204956, "logps/chosen": -87.48583984375, "logps/rejected": -973.7822265625, "loss": 0.0802, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.17278487980365753, "rewards/margins": 9.119610786437988, "rewards/rejected": -9.292396545410156, "step": 3700 }, { "epoch": 0.22, "learning_rate": 4.779482578412553e-06, "logits/chosen": -2.9025392532348633, "logits/rejected": -2.803652286529541, "logps/chosen": -83.37914276123047, "logps/rejected": -1047.5526123046875, "loss": 0.0214, "rewards/accuracies": 1.0, "rewards/chosen": -0.14480479061603546, "rewards/margins": 9.883936882019043, "rewards/rejected": -10.028741836547852, "step": 3710 }, { "epoch": 0.22, "learning_rate": 4.7773407320933345e-06, "logits/chosen": -2.918300151824951, "logits/rejected": -2.7781999111175537, "logps/chosen": -108.9967041015625, "logps/rejected": -1034.0489501953125, "loss": 0.0522, "rewards/accuracies": 1.0, "rewards/chosen": -0.4233720898628235, "rewards/margins": 9.476350784301758, "rewards/rejected": -9.8997220993042, "step": 3720 }, { "epoch": 0.22, "learning_rate": 4.775189018971088e-06, "logits/chosen": -2.9265215396881104, "logits/rejected": -2.813169002532959, "logps/chosen": -95.07650756835938, "logps/rejected": -1022.4942626953125, "loss": 0.0186, "rewards/accuracies": 1.0, "rewards/chosen": -0.2539520263671875, "rewards/margins": 9.519453048706055, "rewards/rejected": -9.773405075073242, "step": 3730 }, { "epoch": 0.22, "learning_rate": 4.773027448368323e-06, "logits/chosen": -2.8871707916259766, "logits/rejected": -2.789888381958008, "logps/chosen": -96.50606536865234, "logps/rejected": -993.6654052734375, "loss": 0.0213, "rewards/accuracies": 1.0, "rewards/chosen": -0.265226274728775, "rewards/margins": 9.223313331604004, "rewards/rejected": -9.488539695739746, "step": 3740 }, { "epoch": 0.22, "learning_rate": 4.770856029650257e-06, "logits/chosen": -2.8960018157958984, "logits/rejected": -2.804370403289795, "logps/chosen": -92.79429626464844, "logps/rejected": -995.08056640625, "loss": 0.044, "rewards/accuracies": 1.0, "rewards/chosen": -0.248972088098526, "rewards/margins": 9.265229225158691, "rewards/rejected": -9.514201164245605, "step": 3750 }, { "epoch": 0.22, "learning_rate": 4.768674772224775e-06, "logits/chosen": -2.9093520641326904, "logits/rejected": -2.789581537246704, "logps/chosen": -87.56285095214844, "logps/rejected": -1031.72119140625, "loss": 0.0166, "rewards/accuracies": 1.0, "rewards/chosen": -0.1927683800458908, "rewards/margins": 9.660600662231445, "rewards/rejected": -9.85336971282959, "step": 3760 }, { "epoch": 0.22, "learning_rate": 4.766483685542389e-06, "logits/chosen": -2.9082770347595215, "logits/rejected": -2.8302905559539795, "logps/chosen": -65.1877212524414, "logps/rejected": -918.7218017578125, "loss": 0.0171, "rewards/accuracies": 1.0, "rewards/chosen": 0.01386135071516037, "rewards/margins": 8.758512496948242, "rewards/rejected": -8.744649887084961, "step": 3770 }, { "epoch": 0.23, "learning_rate": 4.764282779096199e-06, "logits/chosen": -2.894007444381714, "logits/rejected": -2.8101418018341064, "logps/chosen": -72.62544250488281, "logps/rejected": -973.9033203125, "loss": 0.0207, "rewards/accuracies": 1.0, "rewards/chosen": -0.019312819465994835, "rewards/margins": 9.27327823638916, "rewards/rejected": -9.292590141296387, "step": 3780 }, { "epoch": 0.23, "learning_rate": 4.762072062421849e-06, "logits/chosen": -2.902066230773926, "logits/rejected": -2.796579122543335, "logps/chosen": -68.55062103271484, "logps/rejected": -981.8231201171875, "loss": 0.0212, "rewards/accuracies": 1.0, "rewards/chosen": -0.021280916407704353, "rewards/margins": 9.347227096557617, "rewards/rejected": -9.368507385253906, "step": 3790 }, { "epoch": 0.23, "learning_rate": 4.759851545097486e-06, "logits/chosen": -2.927185535430908, "logits/rejected": -2.8071682453155518, "logps/chosen": -94.28609466552734, "logps/rejected": -1121.700439453125, "loss": 0.0093, "rewards/accuracies": 1.0, "rewards/chosen": -0.21990816295146942, "rewards/margins": 10.542993545532227, "rewards/rejected": -10.762903213500977, "step": 3800 }, { "epoch": 0.23, "learning_rate": 4.75762123674372e-06, "logits/chosen": -2.905829429626465, "logits/rejected": -2.7959067821502686, "logps/chosen": -113.93766784667969, "logps/rejected": -1116.533203125, "loss": 0.0147, "rewards/accuracies": 1.0, "rewards/chosen": -0.45689186453819275, "rewards/margins": 10.273436546325684, "rewards/rejected": -10.730328559875488, "step": 3810 }, { "epoch": 0.23, "learning_rate": 4.755381147023582e-06, "logits/chosen": -2.88506817817688, "logits/rejected": -2.785269260406494, "logps/chosen": -77.04222106933594, "logps/rejected": -931.8571166992188, "loss": 0.0347, "rewards/accuracies": 1.0, "rewards/chosen": -0.10545346885919571, "rewards/margins": 8.777097702026367, "rewards/rejected": -8.882551193237305, "step": 3820 }, { "epoch": 0.23, "learning_rate": 4.7531312856424814e-06, "logits/chosen": -2.935608148574829, "logits/rejected": -2.8553714752197266, "logps/chosen": -71.35791015625, "logps/rejected": -902.74267578125, "loss": 0.0237, "rewards/accuracies": 1.0, "rewards/chosen": -0.005191388539969921, "rewards/margins": 8.57547378540039, "rewards/rejected": -8.58066463470459, "step": 3830 }, { "epoch": 0.23, "learning_rate": 4.750871662348164e-06, "logits/chosen": -2.9029181003570557, "logits/rejected": -2.8173446655273438, "logps/chosen": -75.56688690185547, "logps/rejected": -913.6727294921875, "loss": 0.0209, "rewards/accuracies": 1.0, "rewards/chosen": -0.02877405285835266, "rewards/margins": 8.667641639709473, "rewards/rejected": -8.696414947509766, "step": 3840 }, { "epoch": 0.23, "learning_rate": 4.748602286930671e-06, "logits/chosen": -2.8777687549591064, "logits/rejected": -2.769540309906006, "logps/chosen": -68.40373992919922, "logps/rejected": -1035.921875, "loss": 0.0148, "rewards/accuracies": 1.0, "rewards/chosen": 0.001151949167251587, "rewards/margins": 9.908658981323242, "rewards/rejected": -9.907508850097656, "step": 3850 }, { "epoch": 0.23, "learning_rate": 4.746323169222295e-06, "logits/chosen": -2.884075880050659, "logits/rejected": -2.7866289615631104, "logps/chosen": -74.61378479003906, "logps/rejected": -900.3932495117188, "loss": 0.019, "rewards/accuracies": 1.0, "rewards/chosen": 0.044218212366104126, "rewards/margins": 8.607454299926758, "rewards/rejected": -8.563236236572266, "step": 3860 }, { "epoch": 0.23, "learning_rate": 4.744034319097536e-06, "logits/chosen": -2.9046847820281982, "logits/rejected": -2.810368776321411, "logps/chosen": -75.18858337402344, "logps/rejected": -1058.2366943359375, "loss": 0.0179, "rewards/accuracies": 1.0, "rewards/chosen": -0.014871361665427685, "rewards/margins": 10.108458518981934, "rewards/rejected": -10.123331069946289, "step": 3870 }, { "epoch": 0.23, "learning_rate": 4.741735746473063e-06, "logits/chosen": -2.9184045791625977, "logits/rejected": -2.795245885848999, "logps/chosen": -69.85333251953125, "logps/rejected": -994.2190551757812, "loss": 0.0254, "rewards/accuracies": 1.0, "rewards/chosen": -0.050799496471881866, "rewards/margins": 9.447065353393555, "rewards/rejected": -9.497864723205566, "step": 3880 }, { "epoch": 0.23, "learning_rate": 4.739427461307671e-06, "logits/chosen": -2.8918087482452393, "logits/rejected": -2.7953124046325684, "logps/chosen": -78.41696166992188, "logps/rejected": -944.1104736328125, "loss": 0.0127, "rewards/accuracies": 1.0, "rewards/chosen": -0.052492789924144745, "rewards/margins": 8.951055526733398, "rewards/rejected": -9.003546714782715, "step": 3890 }, { "epoch": 0.23, "learning_rate": 4.73710947360223e-06, "logits/chosen": -2.8873825073242188, "logits/rejected": -2.7540669441223145, "logps/chosen": -96.78419494628906, "logps/rejected": -975.72412109375, "loss": 0.0207, "rewards/accuracies": 1.0, "rewards/chosen": -0.23091724514961243, "rewards/margins": 9.077892303466797, "rewards/rejected": -9.308808326721191, "step": 3900 }, { "epoch": 0.23, "learning_rate": 4.734781793399651e-06, "logits/chosen": -2.9157841205596924, "logits/rejected": -2.7792770862579346, "logps/chosen": -85.96147155761719, "logps/rejected": -1089.6395263671875, "loss": 0.0143, "rewards/accuracies": 1.0, "rewards/chosen": -0.13481441140174866, "rewards/margins": 10.323954582214355, "rewards/rejected": -10.458769798278809, "step": 3910 }, { "epoch": 0.23, "learning_rate": 4.732444430784838e-06, "logits/chosen": -2.902320623397827, "logits/rejected": -2.7530972957611084, "logps/chosen": -84.19400787353516, "logps/rejected": -1082.6468505859375, "loss": 0.0195, "rewards/accuracies": 1.0, "rewards/chosen": -0.07995015382766724, "rewards/margins": 10.297528266906738, "rewards/rejected": -10.37747859954834, "step": 3920 }, { "epoch": 0.23, "learning_rate": 4.730097395884645e-06, "logits/chosen": -2.935899496078491, "logits/rejected": -2.7937304973602295, "logps/chosen": -78.46994018554688, "logps/rejected": -961.6571044921875, "loss": 0.0244, "rewards/accuracies": 1.0, "rewards/chosen": -0.08862430602312088, "rewards/margins": 9.097587585449219, "rewards/rejected": -9.186212539672852, "step": 3930 }, { "epoch": 0.23, "learning_rate": 4.727740698867831e-06, "logits/chosen": -2.900052309036255, "logits/rejected": -2.8231678009033203, "logps/chosen": -87.9262924194336, "logps/rejected": -1028.0103759765625, "loss": 0.0133, "rewards/accuracies": 1.0, "rewards/chosen": -0.1742534339427948, "rewards/margins": 9.663190841674805, "rewards/rejected": -9.837444305419922, "step": 3940 }, { "epoch": 0.24, "learning_rate": 4.725374349945019e-06, "logits/chosen": -2.9103264808654785, "logits/rejected": -2.8139142990112305, "logps/chosen": -107.99403381347656, "logps/rejected": -1099.698974609375, "loss": 0.0182, "rewards/accuracies": 1.0, "rewards/chosen": -0.37500011920928955, "rewards/margins": 10.16627311706543, "rewards/rejected": -10.541272163391113, "step": 3950 }, { "epoch": 0.24, "learning_rate": 4.7229983593686465e-06, "logits/chosen": -2.899981737136841, "logits/rejected": -2.7954936027526855, "logps/chosen": -82.14442443847656, "logps/rejected": -1063.176025390625, "loss": 0.0179, "rewards/accuracies": 1.0, "rewards/chosen": -0.11459366232156754, "rewards/margins": 10.062212944030762, "rewards/rejected": -10.176806449890137, "step": 3960 }, { "epoch": 0.24, "learning_rate": 4.72061273743293e-06, "logits/chosen": -2.9029393196105957, "logits/rejected": -2.8007359504699707, "logps/chosen": -93.69132995605469, "logps/rejected": -1082.078125, "loss": 0.0145, "rewards/accuracies": 1.0, "rewards/chosen": -0.24225322902202606, "rewards/margins": 10.138811111450195, "rewards/rejected": -10.381063461303711, "step": 3970 }, { "epoch": 0.24, "learning_rate": 4.718217494473809e-06, "logits/chosen": -2.8980863094329834, "logits/rejected": -2.782282590866089, "logps/chosen": -76.29203033447266, "logps/rejected": -1009.2958984375, "loss": 0.0162, "rewards/accuracies": 1.0, "rewards/chosen": -0.09314502775669098, "rewards/margins": 9.557165145874023, "rewards/rejected": -9.650311470031738, "step": 3980 }, { "epoch": 0.24, "learning_rate": 4.715812640868911e-06, "logits/chosen": -2.941286087036133, "logits/rejected": -2.8018040657043457, "logps/chosen": -83.56819915771484, "logps/rejected": -1039.623779296875, "loss": 0.0207, "rewards/accuracies": 1.0, "rewards/chosen": -0.02263762429356575, "rewards/margins": 9.941400527954102, "rewards/rejected": -9.964037895202637, "step": 3990 }, { "epoch": 0.24, "learning_rate": 4.7133981870375e-06, "logits/chosen": -2.9263908863067627, "logits/rejected": -2.8119800090789795, "logps/chosen": -94.2664794921875, "logps/rejected": -1165.2017822265625, "loss": 0.0178, "rewards/accuracies": 1.0, "rewards/chosen": -0.19283881783485413, "rewards/margins": 11.018218040466309, "rewards/rejected": -11.21105670928955, "step": 4000 }, { "epoch": 0.24, "learning_rate": 4.710974143440435e-06, "logits/chosen": -2.916626453399658, "logits/rejected": -2.825366258621216, "logps/chosen": -79.94502258300781, "logps/rejected": -1028.0797119140625, "loss": 0.0318, "rewards/accuracies": 1.0, "rewards/chosen": -0.13081735372543335, "rewards/margins": 9.698927879333496, "rewards/rejected": -9.829744338989258, "step": 4010 }, { "epoch": 0.24, "learning_rate": 4.708540520580125e-06, "logits/chosen": -2.9402849674224854, "logits/rejected": -2.8051180839538574, "logps/chosen": -74.67461395263672, "logps/rejected": -1090.4091796875, "loss": 0.0163, "rewards/accuracies": 1.0, "rewards/chosen": -0.009149352088570595, "rewards/margins": 10.44430923461914, "rewards/rejected": -10.453458786010742, "step": 4020 }, { "epoch": 0.24, "learning_rate": 4.70609732900048e-06, "logits/chosen": -2.905668020248413, "logits/rejected": -2.803889751434326, "logps/chosen": -69.18238830566406, "logps/rejected": -1125.2408447265625, "loss": 0.0121, "rewards/accuracies": 1.0, "rewards/chosen": 0.05240710452198982, "rewards/margins": 10.851551055908203, "rewards/rejected": -10.799144744873047, "step": 4030 }, { "epoch": 0.24, "learning_rate": 4.703644579286867e-06, "logits/chosen": -2.8862783908843994, "logits/rejected": -2.765496253967285, "logps/chosen": -69.36138916015625, "logps/rejected": -1107.096435546875, "loss": 0.0877, "rewards/accuracies": 1.0, "rewards/chosen": 0.03343920782208443, "rewards/margins": 10.67336654663086, "rewards/rejected": -10.63992691040039, "step": 4040 }, { "epoch": 0.24, "learning_rate": 4.701182282066068e-06, "logits/chosen": -2.9192187786102295, "logits/rejected": -2.7758922576904297, "logps/chosen": -77.3496322631836, "logps/rejected": -1172.750732421875, "loss": 0.018, "rewards/accuracies": 1.0, "rewards/chosen": -0.03041331097483635, "rewards/margins": 11.249832153320312, "rewards/rejected": -11.28024673461914, "step": 4050 }, { "epoch": 0.24, "learning_rate": 4.698710448006226e-06, "logits/chosen": -2.9110758304595947, "logits/rejected": -2.805079698562622, "logps/chosen": -68.88075256347656, "logps/rejected": -1095.01123046875, "loss": 0.0112, "rewards/accuracies": 1.0, "rewards/chosen": -0.07891975343227386, "rewards/margins": 10.428773880004883, "rewards/rejected": -10.50769329071045, "step": 4060 }, { "epoch": 0.24, "learning_rate": 4.696229087816808e-06, "logits/chosen": -2.905869245529175, "logits/rejected": -2.8004088401794434, "logps/chosen": -71.49755859375, "logps/rejected": -1070.1978759765625, "loss": 0.0176, "rewards/accuracies": 1.0, "rewards/chosen": -0.01815054751932621, "rewards/margins": 10.243074417114258, "rewards/rejected": -10.261224746704102, "step": 4070 }, { "epoch": 0.24, "learning_rate": 4.693738212248549e-06, "logits/chosen": -2.896515369415283, "logits/rejected": -2.7848098278045654, "logps/chosen": -103.5752182006836, "logps/rejected": -1010.2786865234375, "loss": 0.0306, "rewards/accuracies": 1.0, "rewards/chosen": -0.33002567291259766, "rewards/margins": 9.334737777709961, "rewards/rejected": -9.664763450622559, "step": 4080 }, { "epoch": 0.24, "learning_rate": 4.6912378320934134e-06, "logits/chosen": -2.8731987476348877, "logits/rejected": -2.7809576988220215, "logps/chosen": -70.00006866455078, "logps/rejected": -992.94287109375, "loss": 0.0215, "rewards/accuracies": 1.0, "rewards/chosen": 0.08050527423620224, "rewards/margins": 9.564136505126953, "rewards/rejected": -9.483631134033203, "step": 4090 }, { "epoch": 0.24, "learning_rate": 4.688727958184545e-06, "logits/chosen": -2.913094997406006, "logits/rejected": -2.818051815032959, "logps/chosen": -98.42679595947266, "logps/rejected": -1067.169677734375, "loss": 0.0174, "rewards/accuracies": 1.0, "rewards/chosen": -0.2496531903743744, "rewards/margins": 9.977968215942383, "rewards/rejected": -10.227621078491211, "step": 4100 }, { "epoch": 0.25, "learning_rate": 4.68620860139622e-06, "logits/chosen": -2.8749492168426514, "logits/rejected": -2.7749624252319336, "logps/chosen": -110.8221206665039, "logps/rejected": -1056.250244140625, "loss": 0.0253, "rewards/accuracies": 1.0, "rewards/chosen": -0.3982095718383789, "rewards/margins": 9.721628189086914, "rewards/rejected": -10.119839668273926, "step": 4110 }, { "epoch": 0.25, "learning_rate": 4.683679772643799e-06, "logits/chosen": -2.8999485969543457, "logits/rejected": -2.801888942718506, "logps/chosen": -92.64431762695312, "logps/rejected": -1134.45068359375, "loss": 0.0092, "rewards/accuracies": 1.0, "rewards/chosen": -0.26071274280548096, "rewards/margins": 10.631994247436523, "rewards/rejected": -10.892707824707031, "step": 4120 }, { "epoch": 0.25, "learning_rate": 4.681141482883682e-06, "logits/chosen": -2.877768039703369, "logits/rejected": -2.8172197341918945, "logps/chosen": -109.5040283203125, "logps/rejected": -985.7999877929688, "loss": 0.033, "rewards/accuracies": 1.0, "rewards/chosen": -0.3617154061794281, "rewards/margins": 9.049224853515625, "rewards/rejected": -9.410940170288086, "step": 4130 }, { "epoch": 0.25, "learning_rate": 4.6785937431132596e-06, "logits/chosen": -2.884094715118408, "logits/rejected": -2.7508468627929688, "logps/chosen": -99.2677993774414, "logps/rejected": -1108.4849853515625, "loss": 0.0245, "rewards/accuracies": 1.0, "rewards/chosen": -0.23048047721385956, "rewards/margins": 10.412751197814941, "rewards/rejected": -10.643231391906738, "step": 4140 }, { "epoch": 0.25, "learning_rate": 4.676036564370865e-06, "logits/chosen": -2.9182515144348145, "logits/rejected": -2.7855281829833984, "logps/chosen": -86.19815063476562, "logps/rejected": -973.0714111328125, "loss": 0.013, "rewards/accuracies": 1.0, "rewards/chosen": -0.17939230799674988, "rewards/margins": 9.110641479492188, "rewards/rejected": -9.290034294128418, "step": 4150 }, { "epoch": 0.25, "learning_rate": 4.6734699577357265e-06, "logits/chosen": -2.952885627746582, "logits/rejected": -2.8558971881866455, "logps/chosen": -72.60917663574219, "logps/rejected": -912.0119018554688, "loss": 0.0168, "rewards/accuracies": 1.0, "rewards/chosen": -0.023732393980026245, "rewards/margins": 8.656572341918945, "rewards/rejected": -8.680304527282715, "step": 4160 }, { "epoch": 0.25, "learning_rate": 4.670893934327921e-06, "logits/chosen": -2.945774555206299, "logits/rejected": -2.8346006870269775, "logps/chosen": -59.6494140625, "logps/rejected": -1054.052734375, "loss": 0.066, "rewards/accuracies": 1.0, "rewards/chosen": 0.0770690068602562, "rewards/margins": 10.181549072265625, "rewards/rejected": -10.104479789733887, "step": 4170 }, { "epoch": 0.25, "learning_rate": 4.668308505308323e-06, "logits/chosen": -2.905186176300049, "logits/rejected": -2.8176016807556152, "logps/chosen": -66.75899505615234, "logps/rejected": -979.2154541015625, "loss": 0.0281, "rewards/accuracies": 1.0, "rewards/chosen": 0.03944163769483566, "rewards/margins": 9.391366958618164, "rewards/rejected": -9.351924896240234, "step": 4180 }, { "epoch": 0.25, "learning_rate": 4.6657136818785596e-06, "logits/chosen": -2.9032211303710938, "logits/rejected": -2.816678524017334, "logps/chosen": -74.63514709472656, "logps/rejected": -1158.8536376953125, "loss": 0.0157, "rewards/accuracies": 1.0, "rewards/chosen": -0.07231833785772324, "rewards/margins": 11.082597732543945, "rewards/rejected": -11.154916763305664, "step": 4190 }, { "epoch": 0.25, "learning_rate": 4.663109475280958e-06, "logits/chosen": -2.9040133953094482, "logits/rejected": -2.81174373626709, "logps/chosen": -141.2930908203125, "logps/rejected": -1220.878662109375, "loss": 0.0142, "rewards/accuracies": 1.0, "rewards/chosen": -0.7061454653739929, "rewards/margins": 11.046578407287598, "rewards/rejected": -11.752723693847656, "step": 4200 }, { "epoch": 0.25, "learning_rate": 4.660495896798499e-06, "logits/chosen": -2.9432120323181152, "logits/rejected": -2.774690866470337, "logps/chosen": -109.40970611572266, "logps/rejected": -1126.1468505859375, "loss": 0.0171, "rewards/accuracies": 1.0, "rewards/chosen": -0.3487492501735687, "rewards/margins": 10.462849617004395, "rewards/rejected": -10.811599731445312, "step": 4210 }, { "epoch": 0.25, "learning_rate": 4.65787295775477e-06, "logits/chosen": -2.9040253162384033, "logits/rejected": -2.774690866470337, "logps/chosen": -72.62080383300781, "logps/rejected": -966.7214965820312, "loss": 0.035, "rewards/accuracies": 1.0, "rewards/chosen": -0.015141752548515797, "rewards/margins": 9.208559036254883, "rewards/rejected": -9.223701477050781, "step": 4220 }, { "epoch": 0.25, "learning_rate": 4.655240669513913e-06, "logits/chosen": -2.9094412326812744, "logits/rejected": -2.8058242797851562, "logps/chosen": -71.72166442871094, "logps/rejected": -1042.1463623046875, "loss": 0.0229, "rewards/accuracies": 1.0, "rewards/chosen": -0.030352765694260597, "rewards/margins": 9.953377723693848, "rewards/rejected": -9.98373031616211, "step": 4230 }, { "epoch": 0.25, "learning_rate": 4.652599043480574e-06, "logits/chosen": -2.9107279777526855, "logits/rejected": -2.8319244384765625, "logps/chosen": -76.95477294921875, "logps/rejected": -1065.914306640625, "loss": 0.0149, "rewards/accuracies": 1.0, "rewards/chosen": -0.08376533538103104, "rewards/margins": 10.118742942810059, "rewards/rejected": -10.202507972717285, "step": 4240 }, { "epoch": 0.25, "learning_rate": 4.64994809109986e-06, "logits/chosen": -2.9013326168060303, "logits/rejected": -2.7833971977233887, "logps/chosen": -106.92848205566406, "logps/rejected": -995.4361572265625, "loss": 0.0206, "rewards/accuracies": 1.0, "rewards/chosen": -0.3284170627593994, "rewards/margins": 9.189836502075195, "rewards/rejected": -9.5182523727417, "step": 4250 }, { "epoch": 0.25, "learning_rate": 4.647287823857283e-06, "logits/chosen": -2.8964715003967285, "logits/rejected": -2.762953281402588, "logps/chosen": -159.28265380859375, "logps/rejected": -1047.5933837890625, "loss": 0.0148, "rewards/accuracies": 1.0, "rewards/chosen": -0.9117465019226074, "rewards/margins": 9.121123313903809, "rewards/rejected": -10.032869338989258, "step": 4260 }, { "epoch": 0.25, "learning_rate": 4.644618253278712e-06, "logits/chosen": -2.9185850620269775, "logits/rejected": -2.808415412902832, "logps/chosen": -161.60824584960938, "logps/rejected": -1039.034912109375, "loss": 0.0235, "rewards/accuracies": 1.0, "rewards/chosen": -0.9266613721847534, "rewards/margins": 9.022591590881348, "rewards/rejected": -9.949252128601074, "step": 4270 }, { "epoch": 0.26, "learning_rate": 4.6419393909303254e-06, "logits/chosen": -2.904066562652588, "logits/rejected": -2.8094186782836914, "logps/chosen": -145.71018981933594, "logps/rejected": -1020.9894409179688, "loss": 0.0247, "rewards/accuracies": 1.0, "rewards/chosen": -0.714751124382019, "rewards/margins": 9.04936695098877, "rewards/rejected": -9.764118194580078, "step": 4280 }, { "epoch": 0.26, "learning_rate": 4.639251248418558e-06, "logits/chosen": -2.9164223670959473, "logits/rejected": -2.785963773727417, "logps/chosen": -110.82879638671875, "logps/rejected": -1146.018798828125, "loss": 0.0104, "rewards/accuracies": 1.0, "rewards/chosen": -0.40823906660079956, "rewards/margins": 10.603636741638184, "rewards/rejected": -11.011876106262207, "step": 4290 }, { "epoch": 0.26, "learning_rate": 4.636553837390051e-06, "logits/chosen": -2.9003405570983887, "logits/rejected": -2.7966482639312744, "logps/chosen": -87.22411346435547, "logps/rejected": -1113.9827880859375, "loss": 0.0169, "rewards/accuracies": 1.0, "rewards/chosen": -0.16813921928405762, "rewards/margins": 10.537897109985352, "rewards/rejected": -10.706036567687988, "step": 4300 }, { "epoch": 0.26, "learning_rate": 4.6338471695316046e-06, "logits/chosen": -2.8989522457122803, "logits/rejected": -2.8207175731658936, "logps/chosen": -108.576416015625, "logps/rejected": -1153.6016845703125, "loss": 0.0103, "rewards/accuracies": 1.0, "rewards/chosen": -0.39755091071128845, "rewards/margins": 10.700644493103027, "rewards/rejected": -11.098196029663086, "step": 4310 }, { "epoch": 0.26, "learning_rate": 4.631131256570124e-06, "logits/chosen": -2.9059391021728516, "logits/rejected": -2.791686534881592, "logps/chosen": -125.39930725097656, "logps/rejected": -1061.9052734375, "loss": 0.0151, "rewards/accuracies": 1.0, "rewards/chosen": -0.539800763130188, "rewards/margins": 9.600065231323242, "rewards/rejected": -10.139867782592773, "step": 4320 }, { "epoch": 0.26, "learning_rate": 4.628406110272568e-06, "logits/chosen": -2.9412436485290527, "logits/rejected": -2.79535174369812, "logps/chosen": -104.64253997802734, "logps/rejected": -1198.5625, "loss": 0.0098, "rewards/accuracies": 1.0, "rewards/chosen": -0.35039615631103516, "rewards/margins": 11.188610076904297, "rewards/rejected": -11.539005279541016, "step": 4330 }, { "epoch": 0.26, "learning_rate": 4.625671742445903e-06, "logits/chosen": -2.8668785095214844, "logits/rejected": -2.7742056846618652, "logps/chosen": -92.81754302978516, "logps/rejected": -1065.806884765625, "loss": 0.0518, "rewards/accuracies": 1.0, "rewards/chosen": -0.245576411485672, "rewards/margins": 9.967387199401855, "rewards/rejected": -10.212964057922363, "step": 4340 }, { "epoch": 0.26, "learning_rate": 4.622928164937046e-06, "logits/chosen": -2.91459059715271, "logits/rejected": -2.792318820953369, "logps/chosen": -108.16807556152344, "logps/rejected": -1020.1964721679688, "loss": 0.0186, "rewards/accuracies": 1.0, "rewards/chosen": -0.38700878620147705, "rewards/margins": 9.374357223510742, "rewards/rejected": -9.76136589050293, "step": 4350 }, { "epoch": 0.26, "learning_rate": 4.620175389632817e-06, "logits/chosen": -2.8872015476226807, "logits/rejected": -2.811877727508545, "logps/chosen": -129.07662963867188, "logps/rejected": -1071.6632080078125, "loss": 0.0438, "rewards/accuracies": 1.0, "rewards/chosen": -0.6013280749320984, "rewards/margins": 9.681346893310547, "rewards/rejected": -10.282674789428711, "step": 4360 }, { "epoch": 0.26, "learning_rate": 4.617413428459887e-06, "logits/chosen": -2.8861045837402344, "logits/rejected": -2.7798550128936768, "logps/chosen": -140.4359893798828, "logps/rejected": -1040.20068359375, "loss": 0.0231, "rewards/accuracies": 1.0, "rewards/chosen": -0.7209984064102173, "rewards/margins": 9.245370864868164, "rewards/rejected": -9.96636962890625, "step": 4370 }, { "epoch": 0.26, "learning_rate": 4.614642293384724e-06, "logits/chosen": -2.883229970932007, "logits/rejected": -2.77640700340271, "logps/chosen": -126.4269027709961, "logps/rejected": -1000.5406494140625, "loss": 0.0256, "rewards/accuracies": 1.0, "rewards/chosen": -0.5862837433815002, "rewards/margins": 8.971501350402832, "rewards/rejected": -9.557785034179688, "step": 4380 }, { "epoch": 0.26, "learning_rate": 4.611861996413542e-06, "logits/chosen": -2.905961275100708, "logits/rejected": -2.8052828311920166, "logps/chosen": -66.661865234375, "logps/rejected": -999.3909301757812, "loss": 0.0107, "rewards/accuracies": 1.0, "rewards/chosen": 0.053945302963256836, "rewards/margins": 9.615189552307129, "rewards/rejected": -9.561243057250977, "step": 4390 }, { "epoch": 0.26, "learning_rate": 4.609072549592255e-06, "logits/chosen": -2.8728692531585693, "logits/rejected": -2.7769827842712402, "logps/chosen": -78.50016021728516, "logps/rejected": -1102.4761962890625, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/chosen": -0.02066270262002945, "rewards/margins": 10.549295425415039, "rewards/rejected": -10.569957733154297, "step": 4400 }, { "epoch": 0.26, "learning_rate": 4.6062739650064135e-06, "logits/chosen": -2.893392562866211, "logits/rejected": -2.8116512298583984, "logps/chosen": -91.8978271484375, "logps/rejected": -963.6676025390625, "loss": 0.0226, "rewards/accuracies": 1.0, "rewards/chosen": -0.20641469955444336, "rewards/margins": 8.978131294250488, "rewards/rejected": -9.18454647064209, "step": 4410 }, { "epoch": 0.26, "learning_rate": 4.603466254781162e-06, "logits/chosen": -2.9127235412597656, "logits/rejected": -2.815603017807007, "logps/chosen": -96.66557312011719, "logps/rejected": -1068.8868408203125, "loss": 0.0326, "rewards/accuracies": 1.0, "rewards/chosen": -0.30028194189071655, "rewards/margins": 9.935462951660156, "rewards/rejected": -10.23574447631836, "step": 4420 }, { "epoch": 0.26, "learning_rate": 4.600649431081181e-06, "logits/chosen": -2.911548137664795, "logits/rejected": -2.821232318878174, "logps/chosen": -161.19851684570312, "logps/rejected": -1012.6959228515625, "loss": 0.0579, "rewards/accuracies": 1.0, "rewards/chosen": -0.877530574798584, "rewards/margins": 8.805020332336426, "rewards/rejected": -9.682550430297852, "step": 4430 }, { "epoch": 0.26, "learning_rate": 4.597823506110637e-06, "logits/chosen": -2.866288661956787, "logits/rejected": -2.797546863555908, "logps/chosen": -199.53262329101562, "logps/rejected": -1092.073974609375, "loss": 0.0331, "rewards/accuracies": 1.0, "rewards/chosen": -1.308280348777771, "rewards/margins": 9.171706199645996, "rewards/rejected": -10.479988098144531, "step": 4440 }, { "epoch": 0.27, "learning_rate": 4.594988492113128e-06, "logits/chosen": -2.92360782623291, "logits/rejected": -2.8020482063293457, "logps/chosen": -95.24528503417969, "logps/rejected": -1000.21337890625, "loss": 0.0218, "rewards/accuracies": 1.0, "rewards/chosen": -0.2674865126609802, "rewards/margins": 9.31503677368164, "rewards/rejected": -9.582524299621582, "step": 4450 }, { "epoch": 0.27, "learning_rate": 4.592144401371632e-06, "logits/chosen": -2.913630247116089, "logits/rejected": -2.8257508277893066, "logps/chosen": -146.58274841308594, "logps/rejected": -1209.158447265625, "loss": 0.0125, "rewards/accuracies": 1.0, "rewards/chosen": -0.7044652104377747, "rewards/margins": 10.943525314331055, "rewards/rejected": -11.647990226745605, "step": 4460 }, { "epoch": 0.27, "learning_rate": 4.5892912462084515e-06, "logits/chosen": -2.9358959197998047, "logits/rejected": -2.7742631435394287, "logps/chosen": -114.9584732055664, "logps/rejected": -1048.086669921875, "loss": 0.0227, "rewards/accuracies": 1.0, "rewards/chosen": -0.4246769845485687, "rewards/margins": 9.612180709838867, "rewards/rejected": -10.036857604980469, "step": 4470 }, { "epoch": 0.27, "learning_rate": 4.586429038985163e-06, "logits/chosen": -2.9011335372924805, "logits/rejected": -2.7730495929718018, "logps/chosen": -70.93653869628906, "logps/rejected": -1152.823486328125, "loss": 0.0214, "rewards/accuracies": 1.0, "rewards/chosen": -0.011950431391596794, "rewards/margins": 11.079075813293457, "rewards/rejected": -11.091026306152344, "step": 4480 }, { "epoch": 0.27, "learning_rate": 4.583557792102559e-06, "logits/chosen": -2.9267096519470215, "logits/rejected": -2.8298516273498535, "logps/chosen": -76.23814392089844, "logps/rejected": -1001.984375, "loss": 0.0193, "rewards/accuracies": 1.0, "rewards/chosen": -0.0638723373413086, "rewards/margins": 9.523317337036133, "rewards/rejected": -9.587187767028809, "step": 4490 }, { "epoch": 0.27, "learning_rate": 4.580677518000604e-06, "logits/chosen": -2.9086670875549316, "logits/rejected": -2.801614761352539, "logps/chosen": -71.93829345703125, "logps/rejected": -1079.363525390625, "loss": 0.0215, "rewards/accuracies": 1.0, "rewards/chosen": -0.009500850923359394, "rewards/margins": 10.335481643676758, "rewards/rejected": -10.344982147216797, "step": 4500 }, { "epoch": 0.27, "learning_rate": 4.577788229158364e-06, "logits/chosen": -2.9079577922821045, "logits/rejected": -2.7817137241363525, "logps/chosen": -76.71377563476562, "logps/rejected": -949.2999267578125, "loss": 0.018, "rewards/accuracies": 1.0, "rewards/chosen": -0.06510486453771591, "rewards/margins": 8.988740921020508, "rewards/rejected": -9.053844451904297, "step": 4510 }, { "epoch": 0.27, "learning_rate": 4.574889938093971e-06, "logits/chosen": -2.893267869949341, "logits/rejected": -2.802339553833008, "logps/chosen": -94.14060974121094, "logps/rejected": -1111.646484375, "loss": 0.0195, "rewards/accuracies": 1.0, "rewards/chosen": -0.20732493698596954, "rewards/margins": 10.476593017578125, "rewards/rejected": -10.683917999267578, "step": 4520 }, { "epoch": 0.27, "learning_rate": 4.571982657364555e-06, "logits/chosen": -2.8677310943603516, "logits/rejected": -2.7809898853302, "logps/chosen": -119.81768798828125, "logps/rejected": -1099.111083984375, "loss": 0.0172, "rewards/accuracies": 1.0, "rewards/chosen": -0.4681376814842224, "rewards/margins": 10.077434539794922, "rewards/rejected": -10.545572280883789, "step": 4530 }, { "epoch": 0.27, "learning_rate": 4.569066399566196e-06, "logits/chosen": -2.9095661640167236, "logits/rejected": -2.819188356399536, "logps/chosen": -108.3870849609375, "logps/rejected": -1139.8427734375, "loss": 0.0135, "rewards/accuracies": 1.0, "rewards/chosen": -0.41503772139549255, "rewards/margins": 10.548416137695312, "rewards/rejected": -10.96345329284668, "step": 4540 }, { "epoch": 0.27, "learning_rate": 4.566141177333871e-06, "logits/chosen": -2.9172446727752686, "logits/rejected": -2.810838222503662, "logps/chosen": -116.5077896118164, "logps/rejected": -1068.036376953125, "loss": 0.0215, "rewards/accuracies": 1.0, "rewards/chosen": -0.5008957982063293, "rewards/margins": 9.735939025878906, "rewards/rejected": -10.236834526062012, "step": 4550 }, { "epoch": 0.27, "learning_rate": 4.563207003341389e-06, "logits/chosen": -2.8966095447540283, "logits/rejected": -2.8229918479919434, "logps/chosen": -99.80728912353516, "logps/rejected": -1024.94921875, "loss": 0.0148, "rewards/accuracies": 1.0, "rewards/chosen": -0.22795113921165466, "rewards/margins": 9.57709789276123, "rewards/rejected": -9.805047988891602, "step": 4560 }, { "epoch": 0.27, "learning_rate": 4.56026389030135e-06, "logits/chosen": -2.876563310623169, "logits/rejected": -2.802300214767456, "logps/chosen": -84.9843978881836, "logps/rejected": -940.0216064453125, "loss": 0.0144, "rewards/accuracies": 1.0, "rewards/chosen": -0.12979432940483093, "rewards/margins": 8.832286834716797, "rewards/rejected": -8.962080955505371, "step": 4570 }, { "epoch": 0.27, "learning_rate": 4.557311850965081e-06, "logits/chosen": -2.9038519859313965, "logits/rejected": -2.808375835418701, "logps/chosen": -66.52571105957031, "logps/rejected": -1105.466064453125, "loss": 0.0147, "rewards/accuracies": 1.0, "rewards/chosen": 0.018833911046385765, "rewards/margins": 10.632471084594727, "rewards/rejected": -10.613636016845703, "step": 4580 }, { "epoch": 0.27, "learning_rate": 4.554350898122585e-06, "logits/chosen": -2.9017598628997803, "logits/rejected": -2.825206995010376, "logps/chosen": -82.07447814941406, "logps/rejected": -937.4646606445312, "loss": 0.0758, "rewards/accuracies": 1.0, "rewards/chosen": -0.14081144332885742, "rewards/margins": 8.786211013793945, "rewards/rejected": -8.927021980285645, "step": 4590 }, { "epoch": 0.27, "learning_rate": 4.551381044602478e-06, "logits/chosen": -2.9548227787017822, "logits/rejected": -2.8534159660339355, "logps/chosen": -71.24727630615234, "logps/rejected": -1036.587158203125, "loss": 0.0289, "rewards/accuracies": 1.0, "rewards/chosen": -0.04382842406630516, "rewards/margins": 9.880915641784668, "rewards/rejected": -9.924745559692383, "step": 4600 }, { "epoch": 0.27, "learning_rate": 4.548402303271946e-06, "logits/chosen": -2.8801655769348145, "logits/rejected": -2.8027002811431885, "logps/chosen": -61.337425231933594, "logps/rejected": -1094.833251953125, "loss": 0.0174, "rewards/accuracies": 1.0, "rewards/chosen": 0.056496210396289825, "rewards/margins": 10.569719314575195, "rewards/rejected": -10.513221740722656, "step": 4610 }, { "epoch": 0.28, "learning_rate": 4.5454146870366775e-06, "logits/chosen": -2.8988003730773926, "logits/rejected": -2.8232078552246094, "logps/chosen": -84.91638946533203, "logps/rejected": -1046.2430419921875, "loss": 0.0202, "rewards/accuracies": 1.0, "rewards/chosen": -0.11448167264461517, "rewards/margins": 9.906743049621582, "rewards/rejected": -10.021224021911621, "step": 4620 }, { "epoch": 0.28, "learning_rate": 4.542418208840816e-06, "logits/chosen": -2.9094996452331543, "logits/rejected": -2.83848237991333, "logps/chosen": -69.73999786376953, "logps/rejected": -972.4375, "loss": 0.0212, "rewards/accuracies": 1.0, "rewards/chosen": -0.01323959231376648, "rewards/margins": 9.26408576965332, "rewards/rejected": -9.277325630187988, "step": 4630 }, { "epoch": 0.28, "learning_rate": 4.539412881666896e-06, "logits/chosen": -2.9168286323547363, "logits/rejected": -2.82263445854187, "logps/chosen": -75.34038543701172, "logps/rejected": -1006.7355346679688, "loss": 0.0116, "rewards/accuracies": 1.0, "rewards/chosen": -0.03974676877260208, "rewards/margins": 9.594426155090332, "rewards/rejected": -9.634172439575195, "step": 4640 }, { "epoch": 0.28, "learning_rate": 4.536398718535795e-06, "logits/chosen": -2.91206955909729, "logits/rejected": -2.83095383644104, "logps/chosen": -81.1929702758789, "logps/rejected": -1025.0313720703125, "loss": 0.0231, "rewards/accuracies": 1.0, "rewards/chosen": -0.09227331727743149, "rewards/margins": 9.729327201843262, "rewards/rejected": -9.821599960327148, "step": 4650 }, { "epoch": 0.28, "learning_rate": 4.5333757325066715e-06, "logits/chosen": -2.8676838874816895, "logits/rejected": -2.777498722076416, "logps/chosen": -81.93919372558594, "logps/rejected": -1140.127197265625, "loss": 0.0159, "rewards/accuracies": 1.0, "rewards/chosen": -0.10093430429697037, "rewards/margins": 10.844438552856445, "rewards/rejected": -10.945371627807617, "step": 4660 }, { "epoch": 0.28, "learning_rate": 4.5303439366769095e-06, "logits/chosen": -2.906921863555908, "logits/rejected": -2.7776081562042236, "logps/chosen": -93.50048065185547, "logps/rejected": -1020.9957275390625, "loss": 0.0306, "rewards/accuracies": 1.0, "rewards/chosen": -0.2694311738014221, "rewards/margins": 9.503682136535645, "rewards/rejected": -9.773112297058105, "step": 4670 }, { "epoch": 0.28, "learning_rate": 4.527303344182065e-06, "logits/chosen": -2.859835147857666, "logits/rejected": -2.7575488090515137, "logps/chosen": -106.78855895996094, "logps/rejected": -1145.6884765625, "loss": 0.0215, "rewards/accuracies": 1.0, "rewards/chosen": -0.3507247865200043, "rewards/margins": 10.670522689819336, "rewards/rejected": -11.021248817443848, "step": 4680 }, { "epoch": 0.28, "learning_rate": 4.524253968195802e-06, "logits/chosen": -2.9059767723083496, "logits/rejected": -2.810584783554077, "logps/chosen": -101.52924346923828, "logps/rejected": -1136.054931640625, "loss": 0.025, "rewards/accuracies": 1.0, "rewards/chosen": -0.3442736864089966, "rewards/margins": 10.585061073303223, "rewards/rejected": -10.92933464050293, "step": 4690 }, { "epoch": 0.28, "learning_rate": 4.521195821929843e-06, "logits/chosen": -2.9197306632995605, "logits/rejected": -2.7863106727600098, "logps/chosen": -105.24568176269531, "logps/rejected": -976.8414306640625, "loss": 0.0227, "rewards/accuracies": 1.0, "rewards/chosen": -0.3304779827594757, "rewards/margins": 8.985427856445312, "rewards/rejected": -9.31590461730957, "step": 4700 }, { "epoch": 0.28, "learning_rate": 4.5181289186339085e-06, "logits/chosen": -2.893448829650879, "logits/rejected": -2.7539124488830566, "logps/chosen": -100.33922576904297, "logps/rejected": -984.7218017578125, "loss": 0.0252, "rewards/accuracies": 1.0, "rewards/chosen": -0.2708638310432434, "rewards/margins": 9.130024909973145, "rewards/rejected": -9.40088939666748, "step": 4710 }, { "epoch": 0.28, "learning_rate": 4.51505327159566e-06, "logits/chosen": -2.937783718109131, "logits/rejected": -2.80378794670105, "logps/chosen": -76.32362365722656, "logps/rejected": -997.26123046875, "loss": 0.0142, "rewards/accuracies": 1.0, "rewards/chosen": -0.07866490632295609, "rewards/margins": 9.43741226196289, "rewards/rejected": -9.516077041625977, "step": 4720 }, { "epoch": 0.28, "learning_rate": 4.511968894140639e-06, "logits/chosen": -2.9309630393981934, "logits/rejected": -2.826199531555176, "logps/chosen": -89.91520690917969, "logps/rejected": -880.4615478515625, "loss": 0.0585, "rewards/accuracies": 1.0, "rewards/chosen": -0.13478204607963562, "rewards/margins": 8.2269926071167, "rewards/rejected": -8.361775398254395, "step": 4730 }, { "epoch": 0.28, "learning_rate": 4.508875799632215e-06, "logits/chosen": -2.9290318489074707, "logits/rejected": -2.8303656578063965, "logps/chosen": -131.4691619873047, "logps/rejected": -1089.2655029296875, "loss": 0.0224, "rewards/accuracies": 1.0, "rewards/chosen": -0.5839880108833313, "rewards/margins": 9.863065719604492, "rewards/rejected": -10.447053909301758, "step": 4740 }, { "epoch": 0.28, "learning_rate": 4.505774001471527e-06, "logits/chosen": -2.9127578735351562, "logits/rejected": -2.7932186126708984, "logps/chosen": -78.004150390625, "logps/rejected": -1080.9903564453125, "loss": 0.016, "rewards/accuracies": 1.0, "rewards/chosen": -0.10248641669750214, "rewards/margins": 10.269338607788086, "rewards/rejected": -10.371824264526367, "step": 4750 }, { "epoch": 0.28, "learning_rate": 4.502663513097419e-06, "logits/chosen": -2.926621198654175, "logits/rejected": -2.77805757522583, "logps/chosen": -92.62043762207031, "logps/rejected": -1071.267578125, "loss": 0.0188, "rewards/accuracies": 1.0, "rewards/chosen": -0.1928451955318451, "rewards/margins": 10.071054458618164, "rewards/rejected": -10.263900756835938, "step": 4760 }, { "epoch": 0.28, "learning_rate": 4.499544347986388e-06, "logits/chosen": -2.8830108642578125, "logits/rejected": -2.7515289783477783, "logps/chosen": -89.59968566894531, "logps/rejected": -1003.7515869140625, "loss": 0.0131, "rewards/accuracies": 1.0, "rewards/chosen": -0.2422805279493332, "rewards/margins": 9.363607406616211, "rewards/rejected": -9.605888366699219, "step": 4770 }, { "epoch": 0.29, "learning_rate": 4.4964165196525255e-06, "logits/chosen": -2.941171407699585, "logits/rejected": -2.8077425956726074, "logps/chosen": -96.42909240722656, "logps/rejected": -1170.781005859375, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -0.2246961146593094, "rewards/margins": 11.036178588867188, "rewards/rejected": -11.260873794555664, "step": 4780 }, { "epoch": 0.29, "learning_rate": 4.493280041647454e-06, "logits/chosen": -2.9356400966644287, "logits/rejected": -2.780233144760132, "logps/chosen": -119.87623596191406, "logps/rejected": -1087.643310546875, "loss": 0.0152, "rewards/accuracies": 1.0, "rewards/chosen": -0.46486926078796387, "rewards/margins": 9.971296310424805, "rewards/rejected": -10.436163902282715, "step": 4790 }, { "epoch": 0.29, "learning_rate": 4.490134927560276e-06, "logits/chosen": -2.92179799079895, "logits/rejected": -2.7985856533050537, "logps/chosen": -123.69156646728516, "logps/rejected": -1119.8121337890625, "loss": 0.0201, "rewards/accuracies": 1.0, "rewards/chosen": -0.5838445425033569, "rewards/margins": 10.174220085144043, "rewards/rejected": -10.758064270019531, "step": 4800 }, { "epoch": 0.29, "learning_rate": 4.486981191017505e-06, "logits/chosen": -2.928393840789795, "logits/rejected": -2.760741710662842, "logps/chosen": -92.12145233154297, "logps/rejected": -1152.6842041015625, "loss": 0.0149, "rewards/accuracies": 1.0, "rewards/chosen": -0.23580631613731384, "rewards/margins": 10.84332275390625, "rewards/rejected": -11.07912826538086, "step": 4810 }, { "epoch": 0.29, "learning_rate": 4.4838188456830175e-06, "logits/chosen": -2.8960201740264893, "logits/rejected": -2.796502113342285, "logps/chosen": -83.48484802246094, "logps/rejected": -1064.636962890625, "loss": 0.0126, "rewards/accuracies": 1.0, "rewards/chosen": -0.12085038423538208, "rewards/margins": 10.091634750366211, "rewards/rejected": -10.212484359741211, "step": 4820 }, { "epoch": 0.29, "learning_rate": 4.480647905257985e-06, "logits/chosen": -2.928720474243164, "logits/rejected": -2.818305015563965, "logps/chosen": -96.21577453613281, "logps/rejected": -1090.26708984375, "loss": 0.0214, "rewards/accuracies": 1.0, "rewards/chosen": -0.2710864543914795, "rewards/margins": 10.182416915893555, "rewards/rejected": -10.45350456237793, "step": 4830 }, { "epoch": 0.29, "learning_rate": 4.47746838348082e-06, "logits/chosen": -2.9394912719726562, "logits/rejected": -2.8059401512145996, "logps/chosen": -75.38832092285156, "logps/rejected": -980.80712890625, "loss": 0.0263, "rewards/accuracies": 1.0, "rewards/chosen": -0.05348791554570198, "rewards/margins": 9.331521987915039, "rewards/rejected": -9.385010719299316, "step": 4840 }, { "epoch": 0.29, "learning_rate": 4.474280294127112e-06, "logits/chosen": -2.9221832752227783, "logits/rejected": -2.852569580078125, "logps/chosen": -73.40013122558594, "logps/rejected": -1074.966064453125, "loss": 0.0194, "rewards/accuracies": 1.0, "rewards/chosen": -0.08040627092123032, "rewards/margins": 10.232905387878418, "rewards/rejected": -10.313311576843262, "step": 4850 }, { "epoch": 0.29, "learning_rate": 4.471083651009574e-06, "logits/chosen": -2.9261255264282227, "logits/rejected": -2.8120765686035156, "logps/chosen": -87.65428161621094, "logps/rejected": -1101.6876220703125, "loss": 0.019, "rewards/accuracies": 1.0, "rewards/chosen": -0.2045680582523346, "rewards/margins": 10.38014030456543, "rewards/rejected": -10.584708213806152, "step": 4860 }, { "epoch": 0.29, "learning_rate": 4.4678784679779766e-06, "logits/chosen": -2.9147346019744873, "logits/rejected": -2.8132572174072266, "logps/chosen": -73.7406997680664, "logps/rejected": -1141.0323486328125, "loss": 0.0217, "rewards/accuracies": 1.0, "rewards/chosen": 0.0162602998316288, "rewards/margins": 10.989324569702148, "rewards/rejected": -10.973065376281738, "step": 4870 }, { "epoch": 0.29, "learning_rate": 4.464664758919092e-06, "logits/chosen": -2.9283194541931152, "logits/rejected": -2.81685733795166, "logps/chosen": -95.83625793457031, "logps/rejected": -1124.4720458984375, "loss": 0.0252, "rewards/accuracies": 1.0, "rewards/chosen": -0.23186016082763672, "rewards/margins": 10.574603080749512, "rewards/rejected": -10.806463241577148, "step": 4880 }, { "epoch": 0.29, "learning_rate": 4.461442537756629e-06, "logits/chosen": -2.8623392581939697, "logits/rejected": -2.753228187561035, "logps/chosen": -108.88375091552734, "logps/rejected": -1113.374267578125, "loss": 0.015, "rewards/accuracies": 1.0, "rewards/chosen": -0.36459964513778687, "rewards/margins": 10.332502365112305, "rewards/rejected": -10.697102546691895, "step": 4890 }, { "epoch": 0.29, "learning_rate": 4.458211818451179e-06, "logits/chosen": -2.9641549587249756, "logits/rejected": -2.8256726264953613, "logps/chosen": -75.55189514160156, "logps/rejected": -1139.4482421875, "loss": 0.0313, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.1000233069062233, "rewards/margins": 10.85367488861084, "rewards/rejected": -10.953699111938477, "step": 4900 }, { "epoch": 0.29, "learning_rate": 4.454972615000153e-06, "logits/chosen": -2.8726203441619873, "logits/rejected": -2.7775096893310547, "logps/chosen": -76.64668273925781, "logps/rejected": -922.6935424804688, "loss": 0.0287, "rewards/accuracies": 1.0, "rewards/chosen": -0.05730264633893967, "rewards/margins": 8.723938941955566, "rewards/rejected": -8.781240463256836, "step": 4910 }, { "epoch": 0.29, "learning_rate": 4.451724941437718e-06, "logits/chosen": -2.9053893089294434, "logits/rejected": -2.7921721935272217, "logps/chosen": -79.14024353027344, "logps/rejected": -1114.6865234375, "loss": 0.0307, "rewards/accuracies": 1.0, "rewards/chosen": -0.008538919501006603, "rewards/margins": 10.700807571411133, "rewards/rejected": -10.709346771240234, "step": 4920 }, { "epoch": 0.29, "learning_rate": 4.448468811834739e-06, "logits/chosen": -2.932300329208374, "logits/rejected": -2.854891538619995, "logps/chosen": -78.27799987792969, "logps/rejected": -904.4276123046875, "loss": 0.0268, "rewards/accuracies": 1.0, "rewards/chosen": -0.11643964052200317, "rewards/margins": 8.488099098205566, "rewards/rejected": -8.604537963867188, "step": 4930 }, { "epoch": 0.29, "learning_rate": 4.445204240298718e-06, "logits/chosen": -2.9165663719177246, "logits/rejected": -2.803201913833618, "logps/chosen": -72.69478607177734, "logps/rejected": -1076.4212646484375, "loss": 0.0166, "rewards/accuracies": 1.0, "rewards/chosen": -0.03252802789211273, "rewards/margins": 10.271202087402344, "rewards/rejected": -10.303731918334961, "step": 4940 }, { "epoch": 0.3, "learning_rate": 4.441931240973735e-06, "logits/chosen": -2.94026780128479, "logits/rejected": -2.8018202781677246, "logps/chosen": -95.87989807128906, "logps/rejected": -1074.091064453125, "loss": 0.0373, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.2970934808254242, "rewards/margins": 10.007808685302734, "rewards/rejected": -10.304903030395508, "step": 4950 }, { "epoch": 0.3, "learning_rate": 4.43864982804038e-06, "logits/chosen": -2.9383206367492676, "logits/rejected": -2.796290636062622, "logps/chosen": -94.48403930664062, "logps/rejected": -1010.3338623046875, "loss": 0.0231, "rewards/accuracies": 1.0, "rewards/chosen": -0.20962488651275635, "rewards/margins": 9.447382926940918, "rewards/rejected": -9.657008171081543, "step": 4960 }, { "epoch": 0.3, "learning_rate": 4.435360015715697e-06, "logits/chosen": -2.9326417446136475, "logits/rejected": -2.766787052154541, "logps/chosen": -96.70450592041016, "logps/rejected": -1143.965576171875, "loss": 0.0224, "rewards/accuracies": 1.0, "rewards/chosen": -0.23923973739147186, "rewards/margins": 10.757135391235352, "rewards/rejected": -10.996376037597656, "step": 4970 }, { "epoch": 0.3, "learning_rate": 4.4320618182531244e-06, "logits/chosen": -2.8783507347106934, "logits/rejected": -2.7696948051452637, "logps/chosen": -96.92609405517578, "logps/rejected": -1114.8580322265625, "loss": 0.0148, "rewards/accuracies": 1.0, "rewards/chosen": -0.23641660809516907, "rewards/margins": 10.473978996276855, "rewards/rejected": -10.710395812988281, "step": 4980 }, { "epoch": 0.3, "learning_rate": 4.428755249942425e-06, "logits/chosen": -2.94284725189209, "logits/rejected": -2.805267810821533, "logps/chosen": -88.04710388183594, "logps/rejected": -1128.584716796875, "loss": 0.0136, "rewards/accuracies": 1.0, "rewards/chosen": -0.15025809407234192, "rewards/margins": 10.699899673461914, "rewards/rejected": -10.850159645080566, "step": 4990 }, { "epoch": 0.3, "learning_rate": 4.4254403251096345e-06, "logits/chosen": -2.9280588626861572, "logits/rejected": -2.808825969696045, "logps/chosen": -106.11286926269531, "logps/rejected": -1052.760009765625, "loss": 0.0236, "rewards/accuracies": 1.0, "rewards/chosen": -0.37431150674819946, "rewards/margins": 9.705513000488281, "rewards/rejected": -10.079824447631836, "step": 5000 }, { "epoch": 0.3, "learning_rate": 4.422117058116989e-06, "logits/chosen": -2.8958401679992676, "logits/rejected": -2.794448137283325, "logps/chosen": -108.98091125488281, "logps/rejected": -1148.6414794921875, "loss": 0.0175, "rewards/accuracies": 1.0, "rewards/chosen": -0.35968703031539917, "rewards/margins": 10.67334270477295, "rewards/rejected": -11.033029556274414, "step": 5010 }, { "epoch": 0.3, "learning_rate": 4.418785463362871e-06, "logits/chosen": -2.918119430541992, "logits/rejected": -2.799116849899292, "logps/chosen": -100.15885162353516, "logps/rejected": -1211.322998046875, "loss": 0.0191, "rewards/accuracies": 1.0, "rewards/chosen": -0.2800785005092621, "rewards/margins": 11.365874290466309, "rewards/rejected": -11.645952224731445, "step": 5020 }, { "epoch": 0.3, "learning_rate": 4.415445555281742e-06, "logits/chosen": -2.90566086769104, "logits/rejected": -2.805044174194336, "logps/chosen": -115.4151611328125, "logps/rejected": -1186.9598388671875, "loss": 0.0125, "rewards/accuracies": 1.0, "rewards/chosen": -0.4487641751766205, "rewards/margins": 10.98048210144043, "rewards/rejected": -11.429245948791504, "step": 5030 }, { "epoch": 0.3, "learning_rate": 4.412097348344084e-06, "logits/chosen": -2.890604257583618, "logits/rejected": -2.7863199710845947, "logps/chosen": -94.54801940917969, "logps/rejected": -1265.533447265625, "loss": 0.0173, "rewards/accuracies": 1.0, "rewards/chosen": -0.20007574558258057, "rewards/margins": 12.013483047485352, "rewards/rejected": -12.213560104370117, "step": 5040 }, { "epoch": 0.3, "learning_rate": 4.408740857056332e-06, "logits/chosen": -2.9250786304473877, "logits/rejected": -2.796694040298462, "logps/chosen": -113.1000747680664, "logps/rejected": -1204.5736083984375, "loss": 0.0159, "rewards/accuracies": 1.0, "rewards/chosen": -0.441622793674469, "rewards/margins": 11.159921646118164, "rewards/rejected": -11.601545333862305, "step": 5050 }, { "epoch": 0.3, "learning_rate": 4.405376095960816e-06, "logits/chosen": -2.9202957153320312, "logits/rejected": -2.794678211212158, "logps/chosen": -114.0595703125, "logps/rejected": -1091.915283203125, "loss": 0.015, "rewards/accuracies": 1.0, "rewards/chosen": -0.4511253237724304, "rewards/margins": 10.023406028747559, "rewards/rejected": -10.474531173706055, "step": 5060 }, { "epoch": 0.3, "learning_rate": 4.402003079635695e-06, "logits/chosen": -2.926386594772339, "logits/rejected": -2.801600694656372, "logps/chosen": -120.9647216796875, "logps/rejected": -1005.9732666015625, "loss": 0.0255, "rewards/accuracies": 1.0, "rewards/chosen": -0.5526293516159058, "rewards/margins": 9.075660705566406, "rewards/rejected": -9.628290176391602, "step": 5070 }, { "epoch": 0.3, "learning_rate": 4.398621822694894e-06, "logits/chosen": -2.906467914581299, "logits/rejected": -2.8236374855041504, "logps/chosen": -71.70893859863281, "logps/rejected": -1108.146728515625, "loss": 0.0164, "rewards/accuracies": 1.0, "rewards/chosen": 0.005425202660262585, "rewards/margins": 10.629765510559082, "rewards/rejected": -10.62433910369873, "step": 5080 }, { "epoch": 0.3, "learning_rate": 4.3952323397880426e-06, "logits/chosen": -2.89420747756958, "logits/rejected": -2.778454303741455, "logps/chosen": -69.36300659179688, "logps/rejected": -906.44921875, "loss": 0.0249, "rewards/accuracies": 1.0, "rewards/chosen": 0.009314288385212421, "rewards/margins": 8.618794441223145, "rewards/rejected": -8.609479904174805, "step": 5090 }, { "epoch": 0.3, "learning_rate": 4.391834645600408e-06, "logits/chosen": -2.9491798877716064, "logits/rejected": -2.8280227184295654, "logps/chosen": -76.1402587890625, "logps/rejected": -1026.66015625, "loss": 0.0169, "rewards/accuracies": 1.0, "rewards/chosen": -0.0630360096693039, "rewards/margins": 9.7501859664917, "rewards/rejected": -9.813223838806152, "step": 5100 }, { "epoch": 0.3, "learning_rate": 4.388428754852835e-06, "logits/chosen": -2.878732681274414, "logits/rejected": -2.78424072265625, "logps/chosen": -80.48857116699219, "logps/rejected": -972.7552490234375, "loss": 0.0209, "rewards/accuracies": 1.0, "rewards/chosen": -0.134246364235878, "rewards/margins": 9.154767990112305, "rewards/rejected": -9.28901481628418, "step": 5110 }, { "epoch": 0.31, "learning_rate": 4.385014682301682e-06, "logits/chosen": -2.9347071647644043, "logits/rejected": -2.7972300052642822, "logps/chosen": -91.8586654663086, "logps/rejected": -1073.089111328125, "loss": 0.0139, "rewards/accuracies": 1.0, "rewards/chosen": -0.22346599400043488, "rewards/margins": 10.061551094055176, "rewards/rejected": -10.285016059875488, "step": 5120 }, { "epoch": 0.31, "learning_rate": 4.381592442738753e-06, "logits/chosen": -2.8769049644470215, "logits/rejected": -2.7817323207855225, "logps/chosen": -66.82271575927734, "logps/rejected": -978.7667236328125, "loss": 0.0185, "rewards/accuracies": 1.0, "rewards/chosen": 0.01835566759109497, "rewards/margins": 9.366061210632324, "rewards/rejected": -9.347704887390137, "step": 5130 }, { "epoch": 0.31, "learning_rate": 4.3781620509912395e-06, "logits/chosen": -2.8907856941223145, "logits/rejected": -2.7915472984313965, "logps/chosen": -84.63075256347656, "logps/rejected": -1095.581298828125, "loss": 0.0273, "rewards/accuracies": 1.0, "rewards/chosen": -0.10715818405151367, "rewards/margins": 10.430553436279297, "rewards/rejected": -10.537710189819336, "step": 5140 }, { "epoch": 0.31, "learning_rate": 4.374723521921651e-06, "logits/chosen": -2.895655632019043, "logits/rejected": -2.7703962326049805, "logps/chosen": -101.97102355957031, "logps/rejected": -1050.6082763671875, "loss": 0.061, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.26692596077919006, "rewards/margins": 9.794083595275879, "rewards/rejected": -10.061009407043457, "step": 5150 }, { "epoch": 0.31, "learning_rate": 4.3712768704277535e-06, "logits/chosen": -2.924736738204956, "logits/rejected": -2.762291431427002, "logps/chosen": -107.10150146484375, "logps/rejected": -1177.926513671875, "loss": 0.0199, "rewards/accuracies": 1.0, "rewards/chosen": -0.35556110739707947, "rewards/margins": 10.972970962524414, "rewards/rejected": -11.328532218933105, "step": 5160 }, { "epoch": 0.31, "learning_rate": 4.367822111442504e-06, "logits/chosen": -2.9397132396698, "logits/rejected": -2.830716848373413, "logps/chosen": -87.31817626953125, "logps/rejected": -1015.3263549804688, "loss": 0.0213, "rewards/accuracies": 1.0, "rewards/chosen": -0.1307411640882492, "rewards/margins": 9.583714485168457, "rewards/rejected": -9.714456558227539, "step": 5170 }, { "epoch": 0.31, "learning_rate": 4.364359259933985e-06, "logits/chosen": -2.914748430252075, "logits/rejected": -2.792332649230957, "logps/chosen": -109.8978271484375, "logps/rejected": -1092.9752197265625, "loss": 0.0249, "rewards/accuracies": 1.0, "rewards/chosen": -0.36138713359832764, "rewards/margins": 10.136848449707031, "rewards/rejected": -10.498235702514648, "step": 5180 }, { "epoch": 0.31, "learning_rate": 4.3608883309053425e-06, "logits/chosen": -2.935044527053833, "logits/rejected": -2.8275039196014404, "logps/chosen": -137.51113891601562, "logps/rejected": -978.7269287109375, "loss": 0.0477, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6427118182182312, "rewards/margins": 8.703424453735352, "rewards/rejected": -9.346137046813965, "step": 5190 }, { "epoch": 0.31, "learning_rate": 4.35740933939472e-06, "logits/chosen": -2.8802666664123535, "logits/rejected": -2.801466464996338, "logps/chosen": -112.32859802246094, "logps/rejected": -1065.3653564453125, "loss": 0.0129, "rewards/accuracies": 1.0, "rewards/chosen": -0.3624281585216522, "rewards/margins": 9.844383239746094, "rewards/rejected": -10.206811904907227, "step": 5200 }, { "epoch": 0.31, "learning_rate": 4.353922300475189e-06, "logits/chosen": -2.9154207706451416, "logits/rejected": -2.7947864532470703, "logps/chosen": -90.10201263427734, "logps/rejected": -1063.709228515625, "loss": 0.0213, "rewards/accuracies": 1.0, "rewards/chosen": -0.21142025291919708, "rewards/margins": 9.986998558044434, "rewards/rejected": -10.198419570922852, "step": 5210 }, { "epoch": 0.31, "learning_rate": 4.350427229254689e-06, "logits/chosen": -2.8895621299743652, "logits/rejected": -2.805819034576416, "logps/chosen": -73.00257873535156, "logps/rejected": -948.2406005859375, "loss": 0.0242, "rewards/accuracies": 1.0, "rewards/chosen": -0.03478895127773285, "rewards/margins": 9.013351440429688, "rewards/rejected": -9.048139572143555, "step": 5220 }, { "epoch": 0.31, "learning_rate": 4.346924140875961e-06, "logits/chosen": -2.8794350624084473, "logits/rejected": -2.7719314098358154, "logps/chosen": -75.07901763916016, "logps/rejected": -1007.3990478515625, "loss": 0.0211, "rewards/accuracies": 1.0, "rewards/chosen": -0.03966792672872543, "rewards/margins": 9.588266372680664, "rewards/rejected": -9.627934455871582, "step": 5230 }, { "epoch": 0.31, "learning_rate": 4.34341305051648e-06, "logits/chosen": -2.85591459274292, "logits/rejected": -2.734689235687256, "logps/chosen": -86.8490219116211, "logps/rejected": -1034.8328857421875, "loss": 0.0167, "rewards/accuracies": 1.0, "rewards/chosen": -0.16776810586452484, "rewards/margins": 9.731412887573242, "rewards/rejected": -9.899181365966797, "step": 5240 }, { "epoch": 0.31, "learning_rate": 4.339893973388392e-06, "logits/chosen": -2.9150335788726807, "logits/rejected": -2.8110556602478027, "logps/chosen": -116.9923324584961, "logps/rejected": -999.41455078125, "loss": 0.0346, "rewards/accuracies": 1.0, "rewards/chosen": -0.5206400752067566, "rewards/margins": 9.03512954711914, "rewards/rejected": -9.555768966674805, "step": 5250 }, { "epoch": 0.31, "learning_rate": 4.3363669247384446e-06, "logits/chosen": -2.937760591506958, "logits/rejected": -2.842745304107666, "logps/chosen": -85.13516998291016, "logps/rejected": -1062.99609375, "loss": 0.0221, "rewards/accuracies": 1.0, "rewards/chosen": -0.1540161669254303, "rewards/margins": 10.041516304016113, "rewards/rejected": -10.195531845092773, "step": 5260 }, { "epoch": 0.31, "learning_rate": 4.332831919847922e-06, "logits/chosen": -2.9191360473632812, "logits/rejected": -2.8050339221954346, "logps/chosen": -113.01680755615234, "logps/rejected": -957.55078125, "loss": 0.0242, "rewards/accuracies": 1.0, "rewards/chosen": -0.4381740987300873, "rewards/margins": 8.692211151123047, "rewards/rejected": -9.130385398864746, "step": 5270 }, { "epoch": 0.31, "learning_rate": 4.329288974032583e-06, "logits/chosen": -2.905866861343384, "logits/rejected": -2.773118495941162, "logps/chosen": -123.447509765625, "logps/rejected": -1007.7584228515625, "loss": 0.031, "rewards/accuracies": 1.0, "rewards/chosen": -0.45142611861228943, "rewards/margins": 9.184259414672852, "rewards/rejected": -9.635684967041016, "step": 5280 }, { "epoch": 0.32, "learning_rate": 4.325738102642589e-06, "logits/chosen": -2.9166271686553955, "logits/rejected": -2.7881298065185547, "logps/chosen": -87.07463836669922, "logps/rejected": -1023.7344970703125, "loss": 0.0448, "rewards/accuracies": 1.0, "rewards/chosen": -0.15132933855056763, "rewards/margins": 9.644368171691895, "rewards/rejected": -9.795698165893555, "step": 5290 }, { "epoch": 0.32, "learning_rate": 4.322179321062439e-06, "logits/chosen": -2.900089979171753, "logits/rejected": -2.793597459793091, "logps/chosen": -97.57390594482422, "logps/rejected": -1073.473876953125, "loss": 0.0288, "rewards/accuracies": 1.0, "rewards/chosen": -0.3010684847831726, "rewards/margins": 9.992565155029297, "rewards/rejected": -10.293633460998535, "step": 5300 }, { "epoch": 0.32, "learning_rate": 4.318612644710906e-06, "logits/chosen": -2.8975777626037598, "logits/rejected": -2.829590082168579, "logps/chosen": -86.01814270019531, "logps/rejected": -1105.52685546875, "loss": 0.0324, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.16788221895694733, "rewards/margins": 10.444056510925293, "rewards/rejected": -10.6119384765625, "step": 5310 }, { "epoch": 0.32, "learning_rate": 4.315038089040965e-06, "logits/chosen": -2.9081172943115234, "logits/rejected": -2.7894349098205566, "logps/chosen": -61.70881271362305, "logps/rejected": -1040.346435546875, "loss": 0.0169, "rewards/accuracies": 1.0, "rewards/chosen": 0.043680790811777115, "rewards/margins": 10.004947662353516, "rewards/rejected": -9.961265563964844, "step": 5320 }, { "epoch": 0.32, "learning_rate": 4.311455669539732e-06, "logits/chosen": -2.905578136444092, "logits/rejected": -2.81683087348938, "logps/chosen": -65.98013305664062, "logps/rejected": -1031.008056640625, "loss": 0.0441, "rewards/accuracies": 1.0, "rewards/chosen": 0.051032789051532745, "rewards/margins": 9.92083740234375, "rewards/rejected": -9.869805335998535, "step": 5330 }, { "epoch": 0.32, "learning_rate": 4.307865401728392e-06, "logits/chosen": -2.9275906085968018, "logits/rejected": -2.8506433963775635, "logps/chosen": -69.98945617675781, "logps/rejected": -1111.661376953125, "loss": 0.0196, "rewards/accuracies": 1.0, "rewards/chosen": 0.03507096320390701, "rewards/margins": 10.716146469116211, "rewards/rejected": -10.681076049804688, "step": 5340 }, { "epoch": 0.32, "learning_rate": 4.3042673011621334e-06, "logits/chosen": -2.8653833866119385, "logits/rejected": -2.756660223007202, "logps/chosen": -66.45137786865234, "logps/rejected": -1089.083984375, "loss": 0.0172, "rewards/accuracies": 1.0, "rewards/chosen": 0.019074004143476486, "rewards/margins": 10.478767395019531, "rewards/rejected": -10.459692001342773, "step": 5350 }, { "epoch": 0.32, "learning_rate": 4.300661383430081e-06, "logits/chosen": -2.9260733127593994, "logits/rejected": -2.8223023414611816, "logps/chosen": -72.69377136230469, "logps/rejected": -1047.390869140625, "loss": 0.0123, "rewards/accuracies": 1.0, "rewards/chosen": -0.07252846658229828, "rewards/margins": 9.955301284790039, "rewards/rejected": -10.027830123901367, "step": 5360 }, { "epoch": 0.32, "learning_rate": 4.2970476641552304e-06, "logits/chosen": -2.912391424179077, "logits/rejected": -2.8059020042419434, "logps/chosen": -68.8610610961914, "logps/rejected": -1084.7510986328125, "loss": 0.0104, "rewards/accuracies": 1.0, "rewards/chosen": 0.030656863003969193, "rewards/margins": 10.447047233581543, "rewards/rejected": -10.416390419006348, "step": 5370 }, { "epoch": 0.32, "learning_rate": 4.293426158994375e-06, "logits/chosen": -2.9317848682403564, "logits/rejected": -2.7879457473754883, "logps/chosen": -73.31602478027344, "logps/rejected": -1076.7596435546875, "loss": 0.0147, "rewards/accuracies": 1.0, "rewards/chosen": -0.09305614233016968, "rewards/margins": 10.234904289245605, "rewards/rejected": -10.327960014343262, "step": 5380 }, { "epoch": 0.32, "learning_rate": 4.289796883638042e-06, "logits/chosen": -2.9145305156707764, "logits/rejected": -2.869666337966919, "logps/chosen": -79.21398162841797, "logps/rejected": -976.82373046875, "loss": 0.0151, "rewards/accuracies": 1.0, "rewards/chosen": -0.059015024453401566, "rewards/margins": 9.27155876159668, "rewards/rejected": -9.330573081970215, "step": 5390 }, { "epoch": 0.32, "learning_rate": 4.2861598538104255e-06, "logits/chosen": -2.918776035308838, "logits/rejected": -2.799042224884033, "logps/chosen": -92.83975219726562, "logps/rejected": -1122.894775390625, "loss": 0.0111, "rewards/accuracies": 1.0, "rewards/chosen": -0.2426188886165619, "rewards/margins": 10.524441719055176, "rewards/rejected": -10.767061233520508, "step": 5400 }, { "epoch": 0.32, "learning_rate": 4.282515085269315e-06, "logits/chosen": -2.941538095474243, "logits/rejected": -2.8051202297210693, "logps/chosen": -98.79366302490234, "logps/rejected": -1020.1435546875, "loss": 0.0337, "rewards/accuracies": 1.0, "rewards/chosen": -0.257463276386261, "rewards/margins": 9.506620407104492, "rewards/rejected": -9.76408576965332, "step": 5410 }, { "epoch": 0.32, "learning_rate": 4.278862593806029e-06, "logits/chosen": -2.9192214012145996, "logits/rejected": -2.7784857749938965, "logps/chosen": -83.96290588378906, "logps/rejected": -964.5505981445312, "loss": 0.027, "rewards/accuracies": 1.0, "rewards/chosen": -0.1542671024799347, "rewards/margins": 9.048551559448242, "rewards/rejected": -9.202820777893066, "step": 5420 }, { "epoch": 0.32, "learning_rate": 4.275202395245346e-06, "logits/chosen": -2.905764102935791, "logits/rejected": -2.805637836456299, "logps/chosen": -103.6803970336914, "logps/rejected": -1055.0147705078125, "loss": 0.0195, "rewards/accuracies": 1.0, "rewards/chosen": -0.293278306722641, "rewards/margins": 9.819478988647461, "rewards/rejected": -10.11275863647461, "step": 5430 }, { "epoch": 0.32, "learning_rate": 4.271534505445438e-06, "logits/chosen": -2.935650110244751, "logits/rejected": -2.779548406600952, "logps/chosen": -82.7082290649414, "logps/rejected": -1142.6175537109375, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": -0.0751660019159317, "rewards/margins": 10.8822021484375, "rewards/rejected": -10.957368850708008, "step": 5440 }, { "epoch": 0.32, "learning_rate": 4.267858940297799e-06, "logits/chosen": -2.8821425437927246, "logits/rejected": -2.7712297439575195, "logps/chosen": -89.7991943359375, "logps/rejected": -1161.3721923828125, "loss": 0.0133, "rewards/accuracies": 1.0, "rewards/chosen": -0.2234000861644745, "rewards/margins": 10.930200576782227, "rewards/rejected": -11.153600692749023, "step": 5450 }, { "epoch": 0.33, "learning_rate": 4.264175715727176e-06, "logits/chosen": -2.9116759300231934, "logits/rejected": -2.7574896812438965, "logps/chosen": -92.06423950195312, "logps/rejected": -1130.909912109375, "loss": 0.0159, "rewards/accuracies": 1.0, "rewards/chosen": -0.22101660072803497, "rewards/margins": 10.647990226745605, "rewards/rejected": -10.869007110595703, "step": 5460 }, { "epoch": 0.33, "learning_rate": 4.2604848476915015e-06, "logits/chosen": -2.886748790740967, "logits/rejected": -2.790830135345459, "logps/chosen": -67.21646881103516, "logps/rejected": -1200.1285400390625, "loss": 0.012, "rewards/accuracies": 1.0, "rewards/chosen": 0.005697926972061396, "rewards/margins": 11.564594268798828, "rewards/rejected": -11.5588960647583, "step": 5470 }, { "epoch": 0.33, "learning_rate": 4.256786352181827e-06, "logits/chosen": -2.9011101722717285, "logits/rejected": -2.834777593612671, "logps/chosen": -81.32843017578125, "logps/rejected": -1057.913818359375, "loss": 0.0162, "rewards/accuracies": 1.0, "rewards/chosen": -0.06897179037332535, "rewards/margins": 10.05571460723877, "rewards/rejected": -10.124686241149902, "step": 5480 }, { "epoch": 0.33, "learning_rate": 4.253080245222246e-06, "logits/chosen": -2.9348692893981934, "logits/rejected": -2.856180429458618, "logps/chosen": -72.4715576171875, "logps/rejected": -1090.723388671875, "loss": 0.0225, "rewards/accuracies": 1.0, "rewards/chosen": 0.025209635496139526, "rewards/margins": 10.495555877685547, "rewards/rejected": -10.470346450805664, "step": 5490 }, { "epoch": 0.33, "learning_rate": 4.249366542869835e-06, "logits/chosen": -2.928931474685669, "logits/rejected": -2.8061206340789795, "logps/chosen": -99.90522766113281, "logps/rejected": -1051.652099609375, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": -0.270371675491333, "rewards/margins": 9.804542541503906, "rewards/rejected": -10.074914932250977, "step": 5500 }, { "epoch": 0.33, "learning_rate": 4.245645261214572e-06, "logits/chosen": -2.9076218605041504, "logits/rejected": -2.793264389038086, "logps/chosen": -91.87276458740234, "logps/rejected": -1218.15234375, "loss": 0.0181, "rewards/accuracies": 1.0, "rewards/chosen": -0.16891315579414368, "rewards/margins": 11.567235946655273, "rewards/rejected": -11.736149787902832, "step": 5510 }, { "epoch": 0.33, "learning_rate": 4.24191641637928e-06, "logits/chosen": -2.9241397380828857, "logits/rejected": -2.8189492225646973, "logps/chosen": -87.92935943603516, "logps/rejected": -1165.9312744140625, "loss": 0.0193, "rewards/accuracies": 1.0, "rewards/chosen": -0.17771950364112854, "rewards/margins": 11.028741836547852, "rewards/rejected": -11.206462860107422, "step": 5520 }, { "epoch": 0.33, "learning_rate": 4.238180024519543e-06, "logits/chosen": -2.9220926761627197, "logits/rejected": -2.812472343444824, "logps/chosen": -78.54875183105469, "logps/rejected": -1018.8239135742188, "loss": 0.0199, "rewards/accuracies": 1.0, "rewards/chosen": -0.08486328274011612, "rewards/margins": 9.656705856323242, "rewards/rejected": -9.741570472717285, "step": 5530 }, { "epoch": 0.33, "learning_rate": 4.234436101823648e-06, "logits/chosen": -2.919038772583008, "logits/rejected": -2.8116660118103027, "logps/chosen": -66.53028869628906, "logps/rejected": -1173.835693359375, "loss": 0.0197, "rewards/accuracies": 1.0, "rewards/chosen": 0.007306198589503765, "rewards/margins": 11.290348052978516, "rewards/rejected": -11.283041000366211, "step": 5540 }, { "epoch": 0.33, "learning_rate": 4.230684664512509e-06, "logits/chosen": -2.932173013687134, "logits/rejected": -2.8377366065979004, "logps/chosen": -66.55361938476562, "logps/rejected": -989.4749145507812, "loss": 0.0217, "rewards/accuracies": 1.0, "rewards/chosen": 0.05417945235967636, "rewards/margins": 9.51244831085205, "rewards/rejected": -9.458269119262695, "step": 5550 }, { "epoch": 0.33, "learning_rate": 4.226925728839598e-06, "logits/chosen": -2.9452261924743652, "logits/rejected": -2.8142213821411133, "logps/chosen": -99.51521301269531, "logps/rejected": -1074.3839111328125, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/chosen": -0.2224973440170288, "rewards/margins": 10.062762260437012, "rewards/rejected": -10.285259246826172, "step": 5560 }, { "epoch": 0.33, "learning_rate": 4.223159311090874e-06, "logits/chosen": -2.8924169540405273, "logits/rejected": -2.756124496459961, "logps/chosen": -96.50779724121094, "logps/rejected": -1127.698486328125, "loss": 0.02, "rewards/accuracies": 1.0, "rewards/chosen": -0.23888087272644043, "rewards/margins": 10.595687866210938, "rewards/rejected": -10.834568977355957, "step": 5570 }, { "epoch": 0.33, "learning_rate": 4.2193854275847115e-06, "logits/chosen": -2.9521889686584473, "logits/rejected": -2.8155055046081543, "logps/chosen": -83.12867736816406, "logps/rejected": -1134.474853515625, "loss": 0.0149, "rewards/accuracies": 1.0, "rewards/chosen": -0.16818618774414062, "rewards/margins": 10.723310470581055, "rewards/rejected": -10.891496658325195, "step": 5580 }, { "epoch": 0.33, "learning_rate": 4.215604094671835e-06, "logits/chosen": -2.935363292694092, "logits/rejected": -2.823221206665039, "logps/chosen": -113.07855224609375, "logps/rejected": -1123.1505126953125, "loss": 0.0279, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.42306798696517944, "rewards/margins": 10.36509895324707, "rewards/rejected": -10.788165092468262, "step": 5590 }, { "epoch": 0.33, "learning_rate": 4.211815328735239e-06, "logits/chosen": -2.902005434036255, "logits/rejected": -2.788292169570923, "logps/chosen": -89.6695327758789, "logps/rejected": -971.9357299804688, "loss": 0.0191, "rewards/accuracies": 1.0, "rewards/chosen": -0.18500728905200958, "rewards/margins": 9.09084415435791, "rewards/rejected": -9.27585220336914, "step": 5600 }, { "epoch": 0.33, "learning_rate": 4.208019146190127e-06, "logits/chosen": -2.8818888664245605, "logits/rejected": -2.8172061443328857, "logps/chosen": -80.78018951416016, "logps/rejected": -1142.979248046875, "loss": 0.0143, "rewards/accuracies": 1.0, "rewards/chosen": -0.10595504939556122, "rewards/margins": 10.88661003112793, "rewards/rejected": -10.992565155029297, "step": 5610 }, { "epoch": 0.34, "learning_rate": 4.204215563483833e-06, "logits/chosen": -2.9177119731903076, "logits/rejected": -2.8300135135650635, "logps/chosen": -100.86442565917969, "logps/rejected": -871.18798828125, "loss": 0.083, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.36082345247268677, "rewards/margins": 7.9124860763549805, "rewards/rejected": -8.273309707641602, "step": 5620 }, { "epoch": 0.34, "learning_rate": 4.200404597095754e-06, "logits/chosen": -2.903635025024414, "logits/rejected": -2.780912399291992, "logps/chosen": -128.10079956054688, "logps/rejected": -951.4157104492188, "loss": 0.0204, "rewards/accuracies": 1.0, "rewards/chosen": -0.5435784459114075, "rewards/margins": 8.542450904846191, "rewards/rejected": -9.086029052734375, "step": 5630 }, { "epoch": 0.34, "learning_rate": 4.196586263537277e-06, "logits/chosen": -2.8814871311187744, "logits/rejected": -2.780014753341675, "logps/chosen": -85.3988037109375, "logps/rejected": -1053.209716796875, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": -0.10892568528652191, "rewards/margins": 9.975194931030273, "rewards/rejected": -10.084120750427246, "step": 5640 }, { "epoch": 0.34, "learning_rate": 4.192760579351708e-06, "logits/chosen": -2.925739049911499, "logits/rejected": -2.7880098819732666, "logps/chosen": -78.75343322753906, "logps/rejected": -1094.351806640625, "loss": 0.0169, "rewards/accuracies": 1.0, "rewards/chosen": -0.09332330524921417, "rewards/margins": 10.394277572631836, "rewards/rejected": -10.487602233886719, "step": 5650 }, { "epoch": 0.34, "learning_rate": 4.188927561114201e-06, "logits/chosen": -2.9139280319213867, "logits/rejected": -2.7899022102355957, "logps/chosen": -76.35122680664062, "logps/rejected": -1034.1761474609375, "loss": 0.0159, "rewards/accuracies": 1.0, "rewards/chosen": -0.05266835168004036, "rewards/margins": 9.847970962524414, "rewards/rejected": -9.900638580322266, "step": 5660 }, { "epoch": 0.34, "learning_rate": 4.185087225431686e-06, "logits/chosen": -2.919797420501709, "logits/rejected": -2.8353352546691895, "logps/chosen": -85.43548583984375, "logps/rejected": -1090.5357666015625, "loss": 0.0184, "rewards/accuracies": 1.0, "rewards/chosen": -0.17850326001644135, "rewards/margins": 10.278223991394043, "rewards/rejected": -10.456727981567383, "step": 5670 }, { "epoch": 0.34, "learning_rate": 4.181239588942793e-06, "logits/chosen": -2.9361441135406494, "logits/rejected": -2.7996506690979004, "logps/chosen": -80.62799072265625, "logps/rejected": -969.0096435546875, "loss": 0.0184, "rewards/accuracies": 1.0, "rewards/chosen": -0.027362119406461716, "rewards/margins": 9.228143692016602, "rewards/rejected": -9.255505561828613, "step": 5680 }, { "epoch": 0.34, "learning_rate": 4.177384668317788e-06, "logits/chosen": -2.930062770843506, "logits/rejected": -2.8270697593688965, "logps/chosen": -75.63851928710938, "logps/rejected": -941.7664184570312, "loss": 0.0258, "rewards/accuracies": 1.0, "rewards/chosen": -0.07435659319162369, "rewards/margins": 8.918901443481445, "rewards/rejected": -8.993257522583008, "step": 5690 }, { "epoch": 0.34, "learning_rate": 4.173522480258494e-06, "logits/chosen": -2.8649230003356934, "logits/rejected": -2.785822868347168, "logps/chosen": -87.01467895507812, "logps/rejected": -977.3831176757812, "loss": 0.0206, "rewards/accuracies": 1.0, "rewards/chosen": -0.14952056109905243, "rewards/margins": 9.178632736206055, "rewards/rejected": -9.328152656555176, "step": 5700 }, { "epoch": 0.34, "learning_rate": 4.1696530414982225e-06, "logits/chosen": -2.9223949909210205, "logits/rejected": -2.8064534664154053, "logps/chosen": -88.6563491821289, "logps/rejected": -1160.025390625, "loss": 0.0126, "rewards/accuracies": 1.0, "rewards/chosen": -0.13313785195350647, "rewards/margins": 11.01543140411377, "rewards/rejected": -11.148569107055664, "step": 5710 }, { "epoch": 0.34, "learning_rate": 4.165776368801695e-06, "logits/chosen": -2.9090263843536377, "logits/rejected": -2.803642988204956, "logps/chosen": -86.6533203125, "logps/rejected": -1075.635498046875, "loss": 0.018, "rewards/accuracies": 1.0, "rewards/chosen": -0.1997295320034027, "rewards/margins": 10.107152938842773, "rewards/rejected": -10.306882858276367, "step": 5720 }, { "epoch": 0.34, "learning_rate": 4.16189247896498e-06, "logits/chosen": -2.9081945419311523, "logits/rejected": -2.8132612705230713, "logps/chosen": -105.81819915771484, "logps/rejected": -1061.7763671875, "loss": 0.0248, "rewards/accuracies": 1.0, "rewards/chosen": -0.34442442655563354, "rewards/margins": 9.838922500610352, "rewards/rejected": -10.183345794677734, "step": 5730 }, { "epoch": 0.34, "learning_rate": 4.1580013888154126e-06, "logits/chosen": -2.899080276489258, "logits/rejected": -2.7419142723083496, "logps/chosen": -93.13218688964844, "logps/rejected": -1127.587646484375, "loss": 0.0093, "rewards/accuracies": 1.0, "rewards/chosen": -0.23785266280174255, "rewards/margins": 10.599491119384766, "rewards/rejected": -10.837343215942383, "step": 5740 }, { "epoch": 0.34, "learning_rate": 4.154103115211523e-06, "logits/chosen": -2.890242099761963, "logits/rejected": -2.759457588195801, "logps/chosen": -108.1460952758789, "logps/rejected": -1022.3095703125, "loss": 0.0596, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.4143275320529938, "rewards/margins": 9.379884719848633, "rewards/rejected": -9.794212341308594, "step": 5750 }, { "epoch": 0.34, "learning_rate": 4.150197675042966e-06, "logits/chosen": -2.9226062297821045, "logits/rejected": -2.8194291591644287, "logps/chosen": -89.87496185302734, "logps/rejected": -1136.595458984375, "loss": 0.0111, "rewards/accuracies": 1.0, "rewards/chosen": -0.21833351254463196, "rewards/margins": 10.690470695495605, "rewards/rejected": -10.908803939819336, "step": 5760 }, { "epoch": 0.34, "learning_rate": 4.146285085230447e-06, "logits/chosen": -2.8766026496887207, "logits/rejected": -2.7943215370178223, "logps/chosen": -93.8062973022461, "logps/rejected": -1165.0740966796875, "loss": 0.0161, "rewards/accuracies": 1.0, "rewards/chosen": -0.20195484161376953, "rewards/margins": 11.018232345581055, "rewards/rejected": -11.220187187194824, "step": 5770 }, { "epoch": 0.34, "learning_rate": 4.1423653627256445e-06, "logits/chosen": -2.925827741622925, "logits/rejected": -2.7985169887542725, "logps/chosen": -98.32868957519531, "logps/rejected": -1049.890869140625, "loss": 0.0547, "rewards/accuracies": 1.0, "rewards/chosen": -0.3201757073402405, "rewards/margins": 9.73574447631836, "rewards/rejected": -10.055920600891113, "step": 5780 }, { "epoch": 0.35, "learning_rate": 4.138438524511145e-06, "logits/chosen": -2.916382074356079, "logits/rejected": -2.805530071258545, "logps/chosen": -84.35920715332031, "logps/rejected": -1072.8790283203125, "loss": 0.0165, "rewards/accuracies": 1.0, "rewards/chosen": -0.15996138751506805, "rewards/margins": 10.127336502075195, "rewards/rejected": -10.287299156188965, "step": 5790 }, { "epoch": 0.35, "learning_rate": 4.134504587600359e-06, "logits/chosen": -2.891566276550293, "logits/rejected": -2.7598204612731934, "logps/chosen": -85.30905151367188, "logps/rejected": -1081.630615234375, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -0.13826137781143188, "rewards/margins": 10.221885681152344, "rewards/rejected": -10.360146522521973, "step": 5800 }, { "epoch": 0.35, "learning_rate": 4.130563569037458e-06, "logits/chosen": -2.9300613403320312, "logits/rejected": -2.80314302444458, "logps/chosen": -81.52914428710938, "logps/rejected": -1069.8724365234375, "loss": 0.0251, "rewards/accuracies": 1.0, "rewards/chosen": -0.04921416938304901, "rewards/margins": 10.200726509094238, "rewards/rejected": -10.249940872192383, "step": 5810 }, { "epoch": 0.35, "learning_rate": 4.126615485897292e-06, "logits/chosen": -2.885714054107666, "logits/rejected": -2.8170981407165527, "logps/chosen": -81.49369812011719, "logps/rejected": -1060.9339599609375, "loss": 0.0121, "rewards/accuracies": 1.0, "rewards/chosen": -0.13677053153514862, "rewards/margins": 10.032251358032227, "rewards/rejected": -10.169021606445312, "step": 5820 }, { "epoch": 0.35, "learning_rate": 4.12266035528532e-06, "logits/chosen": -2.8987393379211426, "logits/rejected": -2.8095765113830566, "logps/chosen": -73.6042709350586, "logps/rejected": -1113.3331298828125, "loss": 0.0109, "rewards/accuracies": 1.0, "rewards/chosen": -0.019057368859648705, "rewards/margins": 10.670315742492676, "rewards/rejected": -10.689372062683105, "step": 5830 }, { "epoch": 0.35, "learning_rate": 4.118698194337536e-06, "logits/chosen": -2.916578531265259, "logits/rejected": -2.8023016452789307, "logps/chosen": -72.79944610595703, "logps/rejected": -992.8082275390625, "loss": 0.0176, "rewards/accuracies": 1.0, "rewards/chosen": -0.029437948018312454, "rewards/margins": 9.446737289428711, "rewards/rejected": -9.476176261901855, "step": 5840 }, { "epoch": 0.35, "learning_rate": 4.114729020220392e-06, "logits/chosen": -2.9200024604797363, "logits/rejected": -2.7916064262390137, "logps/chosen": -107.86048889160156, "logps/rejected": -931.02001953125, "loss": 0.0937, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.3610355257987976, "rewards/margins": 8.515572547912598, "rewards/rejected": -8.876606941223145, "step": 5850 }, { "epoch": 0.35, "learning_rate": 4.110752850130724e-06, "logits/chosen": -2.93218994140625, "logits/rejected": -2.77553129196167, "logps/chosen": -101.95326232910156, "logps/rejected": -1144.560302734375, "loss": 0.0442, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.2615772783756256, "rewards/margins": 10.757929801940918, "rewards/rejected": -11.019506454467773, "step": 5860 }, { "epoch": 0.35, "learning_rate": 4.106769701295683e-06, "logits/chosen": -2.9479477405548096, "logits/rejected": -2.83124041557312, "logps/chosen": -90.23303985595703, "logps/rejected": -1024.789306640625, "loss": 0.0268, "rewards/accuracies": 1.0, "rewards/chosen": -0.21872690320014954, "rewards/margins": 9.591408729553223, "rewards/rejected": -9.810136795043945, "step": 5870 }, { "epoch": 0.35, "learning_rate": 4.102779590972652e-06, "logits/chosen": -2.897775173187256, "logits/rejected": -2.7773444652557373, "logps/chosen": -97.25646209716797, "logps/rejected": -1119.082275390625, "loss": 0.0174, "rewards/accuracies": 1.0, "rewards/chosen": -0.21951651573181152, "rewards/margins": 10.537130355834961, "rewards/rejected": -10.756647109985352, "step": 5880 }, { "epoch": 0.35, "learning_rate": 4.098782536449179e-06, "logits/chosen": -2.8982994556427, "logits/rejected": -2.7915964126586914, "logps/chosen": -121.70416259765625, "logps/rejected": -912.2566528320312, "loss": 0.0325, "rewards/accuracies": 1.0, "rewards/chosen": -0.5237027406692505, "rewards/margins": 8.154032707214355, "rewards/rejected": -8.677735328674316, "step": 5890 }, { "epoch": 0.35, "learning_rate": 4.094778555042893e-06, "logits/chosen": -2.890817165374756, "logits/rejected": -2.7857794761657715, "logps/chosen": -106.4247055053711, "logps/rejected": -1102.311279296875, "loss": 0.0181, "rewards/accuracies": 1.0, "rewards/chosen": -0.3361923396587372, "rewards/margins": 10.237695693969727, "rewards/rejected": -10.573888778686523, "step": 5900 }, { "epoch": 0.35, "learning_rate": 4.090767664101442e-06, "logits/chosen": -2.8680167198181152, "logits/rejected": -2.7525858879089355, "logps/chosen": -126.20454406738281, "logps/rejected": -1081.9871826171875, "loss": 0.0195, "rewards/accuracies": 1.0, "rewards/chosen": -0.5417619347572327, "rewards/margins": 9.826483726501465, "rewards/rejected": -10.368245124816895, "step": 5910 }, { "epoch": 0.35, "learning_rate": 4.086749881002403e-06, "logits/chosen": -2.9129996299743652, "logits/rejected": -2.765171766281128, "logps/chosen": -223.8513641357422, "logps/rejected": -1133.186767578125, "loss": 0.0236, "rewards/accuracies": 1.0, "rewards/chosen": -1.553107738494873, "rewards/margins": 9.355761528015137, "rewards/rejected": -10.908870697021484, "step": 5920 }, { "epoch": 0.35, "learning_rate": 4.0827252231532185e-06, "logits/chosen": -2.9506583213806152, "logits/rejected": -2.7986555099487305, "logps/chosen": -230.62399291992188, "logps/rejected": -1140.5233154296875, "loss": 0.0248, "rewards/accuracies": 1.0, "rewards/chosen": -1.6065990924835205, "rewards/margins": 9.366796493530273, "rewards/rejected": -10.973396301269531, "step": 5930 }, { "epoch": 0.35, "learning_rate": 4.078693707991115e-06, "logits/chosen": -2.9007019996643066, "logits/rejected": -2.7728428840637207, "logps/chosen": -212.84423828125, "logps/rejected": -1170.5079345703125, "loss": 0.018, "rewards/accuracies": 1.0, "rewards/chosen": -1.4260632991790771, "rewards/margins": 9.83821964263916, "rewards/rejected": -11.2642822265625, "step": 5940 }, { "epoch": 0.35, "learning_rate": 4.0746553529830274e-06, "logits/chosen": -2.94193959236145, "logits/rejected": -2.7876224517822266, "logps/chosen": -188.4014434814453, "logps/rejected": -1101.51708984375, "loss": 0.0199, "rewards/accuracies": 1.0, "rewards/chosen": -1.1470121145248413, "rewards/margins": 9.426497459411621, "rewards/rejected": -10.573509216308594, "step": 5950 }, { "epoch": 0.36, "learning_rate": 4.070610175625528e-06, "logits/chosen": -2.944218397140503, "logits/rejected": -2.8169147968292236, "logps/chosen": -146.0027313232422, "logps/rejected": -1183.237548828125, "loss": 0.0126, "rewards/accuracies": 1.0, "rewards/chosen": -0.7262595891952515, "rewards/margins": 10.65246868133545, "rewards/rejected": -11.378727912902832, "step": 5960 }, { "epoch": 0.36, "learning_rate": 4.066558193444746e-06, "logits/chosen": -2.890836715698242, "logits/rejected": -2.8003313541412354, "logps/chosen": -108.06514739990234, "logps/rejected": -1042.223388671875, "loss": 0.0236, "rewards/accuracies": 1.0, "rewards/chosen": -0.4303262233734131, "rewards/margins": 9.557116508483887, "rewards/rejected": -9.987442970275879, "step": 5970 }, { "epoch": 0.36, "learning_rate": 4.0624994239962935e-06, "logits/chosen": -2.887263298034668, "logits/rejected": -2.778682231903076, "logps/chosen": -101.46379089355469, "logps/rejected": -1073.271728515625, "loss": 0.0165, "rewards/accuracies": 1.0, "rewards/chosen": -0.193510502576828, "rewards/margins": 10.095321655273438, "rewards/rejected": -10.288833618164062, "step": 5980 }, { "epoch": 0.36, "learning_rate": 4.058433884865188e-06, "logits/chosen": -2.9255692958831787, "logits/rejected": -2.8124046325683594, "logps/chosen": -88.64639282226562, "logps/rejected": -1201.575927734375, "loss": 0.0138, "rewards/accuracies": 1.0, "rewards/chosen": -0.1960635483264923, "rewards/margins": 11.367776870727539, "rewards/rejected": -11.563840866088867, "step": 5990 }, { "epoch": 0.36, "learning_rate": 4.0543615936657785e-06, "logits/chosen": -2.878767967224121, "logits/rejected": -2.718470573425293, "logps/chosen": -99.90760040283203, "logps/rejected": -1055.6903076171875, "loss": 0.0161, "rewards/accuracies": 1.0, "rewards/chosen": -0.2017739713191986, "rewards/margins": 9.91982364654541, "rewards/rejected": -10.121597290039062, "step": 6000 }, { "epoch": 0.36, "learning_rate": 4.050282568041668e-06, "logits/chosen": -2.9185707569122314, "logits/rejected": -2.7540478706359863, "logps/chosen": -92.38533782958984, "logps/rejected": -1091.3779296875, "loss": 0.0105, "rewards/accuracies": 1.0, "rewards/chosen": -0.20589880645275116, "rewards/margins": 10.27662181854248, "rewards/rejected": -10.482521057128906, "step": 6010 }, { "epoch": 0.36, "learning_rate": 4.046196825665638e-06, "logits/chosen": -2.887535810470581, "logits/rejected": -2.793731689453125, "logps/chosen": -86.35167694091797, "logps/rejected": -1050.620361328125, "loss": 0.0156, "rewards/accuracies": 1.0, "rewards/chosen": -0.2140572965145111, "rewards/margins": 9.851717948913574, "rewards/rejected": -10.065774917602539, "step": 6020 }, { "epoch": 0.36, "learning_rate": 4.042104384239568e-06, "logits/chosen": -2.9212846755981445, "logits/rejected": -2.7784135341644287, "logps/chosen": -98.52204895019531, "logps/rejected": -1123.0556640625, "loss": 0.0166, "rewards/accuracies": 1.0, "rewards/chosen": -0.2850589156150818, "rewards/margins": 10.500296592712402, "rewards/rejected": -10.785355567932129, "step": 6030 }, { "epoch": 0.36, "learning_rate": 4.038005261494364e-06, "logits/chosen": -2.941490650177002, "logits/rejected": -2.8238582611083984, "logps/chosen": -94.63851928710938, "logps/rejected": -1045.890869140625, "loss": 0.0147, "rewards/accuracies": 1.0, "rewards/chosen": -0.25241321325302124, "rewards/margins": 9.768071174621582, "rewards/rejected": -10.020485877990723, "step": 6040 }, { "epoch": 0.36, "learning_rate": 4.033899475189877e-06, "logits/chosen": -2.9170010089874268, "logits/rejected": -2.7927098274230957, "logps/chosen": -84.85941314697266, "logps/rejected": -1106.8258056640625, "loss": 0.0095, "rewards/accuracies": 1.0, "rewards/chosen": -0.16339339315891266, "rewards/margins": 10.454751014709473, "rewards/rejected": -10.618144989013672, "step": 6050 }, { "epoch": 0.36, "learning_rate": 4.029787043114835e-06, "logits/chosen": -2.912188768386841, "logits/rejected": -2.7875545024871826, "logps/chosen": -84.82963562011719, "logps/rejected": -983.8064575195312, "loss": 0.0162, "rewards/accuracies": 1.0, "rewards/chosen": -0.1228787899017334, "rewards/margins": 9.286172866821289, "rewards/rejected": -9.409051895141602, "step": 6060 }, { "epoch": 0.36, "learning_rate": 4.025667983086753e-06, "logits/chosen": -2.919534683227539, "logits/rejected": -2.8064064979553223, "logps/chosen": -100.1720199584961, "logps/rejected": -1088.806396484375, "loss": 0.0164, "rewards/accuracies": 1.0, "rewards/chosen": -0.3036212623119354, "rewards/margins": 10.14475154876709, "rewards/rejected": -10.448372840881348, "step": 6070 }, { "epoch": 0.36, "learning_rate": 4.021542312951862e-06, "logits/chosen": -2.9426026344299316, "logits/rejected": -2.8380138874053955, "logps/chosen": -74.31979370117188, "logps/rejected": -1067.9876708984375, "loss": 0.0163, "rewards/accuracies": 1.0, "rewards/chosen": 0.029753098264336586, "rewards/margins": 10.277753829956055, "rewards/rejected": -10.248002052307129, "step": 6080 }, { "epoch": 0.36, "learning_rate": 4.017410050585038e-06, "logits/chosen": -2.901204824447632, "logits/rejected": -2.795081615447998, "logps/chosen": -72.08843231201172, "logps/rejected": -1173.5740966796875, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": -0.049670178443193436, "rewards/margins": 11.246772766113281, "rewards/rejected": -11.296442031860352, "step": 6090 }, { "epoch": 0.36, "learning_rate": 4.013271213889712e-06, "logits/chosen": -2.9069836139678955, "logits/rejected": -2.7805657386779785, "logps/chosen": -75.16117858886719, "logps/rejected": -1062.703125, "loss": 0.0278, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.014051372185349464, "rewards/margins": 10.184760093688965, "rewards/rejected": -10.198812484741211, "step": 6100 }, { "epoch": 0.36, "learning_rate": 4.009125820797802e-06, "logits/chosen": -2.929468870162964, "logits/rejected": -2.810861110687256, "logps/chosen": -80.66822814941406, "logps/rejected": -1155.5316162109375, "loss": 0.0163, "rewards/accuracies": 1.0, "rewards/chosen": -0.08166153728961945, "rewards/margins": 11.017230033874512, "rewards/rejected": -11.098891258239746, "step": 6110 }, { "epoch": 0.36, "learning_rate": 4.0049738892696345e-06, "logits/chosen": -2.8765709400177, "logits/rejected": -2.7566983699798584, "logps/chosen": -86.41004180908203, "logps/rejected": -1207.1488037109375, "loss": 0.0165, "rewards/accuracies": 1.0, "rewards/chosen": -0.1773633509874344, "rewards/margins": 11.451537132263184, "rewards/rejected": -11.628900527954102, "step": 6120 }, { "epoch": 0.37, "learning_rate": 4.000815437293858e-06, "logits/chosen": -2.8686881065368652, "logits/rejected": -2.7436630725860596, "logps/chosen": -107.51835632324219, "logps/rejected": -1247.323486328125, "loss": 0.0093, "rewards/accuracies": 1.0, "rewards/chosen": -0.32581138610839844, "rewards/margins": 11.695051193237305, "rewards/rejected": -12.020861625671387, "step": 6130 }, { "epoch": 0.37, "learning_rate": 3.996650482887377e-06, "logits/chosen": -2.9623053073883057, "logits/rejected": -2.809072971343994, "logps/chosen": -154.9795684814453, "logps/rejected": -1202.623291015625, "loss": 0.0198, "rewards/accuracies": 1.0, "rewards/chosen": -0.8094560503959656, "rewards/margins": 10.774508476257324, "rewards/rejected": -11.583965301513672, "step": 6140 }, { "epoch": 0.37, "learning_rate": 3.992479044095267e-06, "logits/chosen": -2.91528058052063, "logits/rejected": -2.7518434524536133, "logps/chosen": -139.6954345703125, "logps/rejected": -1149.312744140625, "loss": 0.0259, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.7149208784103394, "rewards/margins": 10.353741645812988, "rewards/rejected": -11.068662643432617, "step": 6150 }, { "epoch": 0.37, "learning_rate": 3.988301138990697e-06, "logits/chosen": -2.910979986190796, "logits/rejected": -2.789787769317627, "logps/chosen": -173.71800231933594, "logps/rejected": -1156.4022216796875, "loss": 0.03, "rewards/accuracies": 1.0, "rewards/chosen": -1.1050055027008057, "rewards/margins": 10.007904052734375, "rewards/rejected": -11.112909317016602, "step": 6160 }, { "epoch": 0.37, "learning_rate": 3.984116785674852e-06, "logits/chosen": -2.9205832481384277, "logits/rejected": -2.758943557739258, "logps/chosen": -134.47640991210938, "logps/rejected": -1116.061767578125, "loss": 0.0295, "rewards/accuracies": 1.0, "rewards/chosen": -0.6752219200134277, "rewards/margins": 10.03821849822998, "rewards/rejected": -10.713440895080566, "step": 6170 }, { "epoch": 0.37, "learning_rate": 3.979926002276856e-06, "logits/chosen": -2.9152002334594727, "logits/rejected": -2.8041398525238037, "logps/chosen": -86.02631378173828, "logps/rejected": -1053.052001953125, "loss": 0.019, "rewards/accuracies": 1.0, "rewards/chosen": -0.11347518116235733, "rewards/margins": 9.975103378295898, "rewards/rejected": -10.088579177856445, "step": 6180 }, { "epoch": 0.37, "learning_rate": 3.97572880695369e-06, "logits/chosen": -2.932382583618164, "logits/rejected": -2.7951180934906006, "logps/chosen": -83.01848602294922, "logps/rejected": -1027.914794921875, "loss": 0.0152, "rewards/accuracies": 1.0, "rewards/chosen": -0.13890673220157623, "rewards/margins": 9.708002090454102, "rewards/rejected": -9.846907615661621, "step": 6190 }, { "epoch": 0.37, "learning_rate": 3.971525217890117e-06, "logits/chosen": -2.891970634460449, "logits/rejected": -2.774087429046631, "logps/chosen": -92.13037109375, "logps/rejected": -1031.8250732421875, "loss": 0.0181, "rewards/accuracies": 1.0, "rewards/chosen": -0.19881696999073029, "rewards/margins": 9.660085678100586, "rewards/rejected": -9.858903884887695, "step": 6200 }, { "epoch": 0.37, "learning_rate": 3.967315253298599e-06, "logits/chosen": -2.9005608558654785, "logits/rejected": -2.8113651275634766, "logps/chosen": -98.0318603515625, "logps/rejected": -1191.575439453125, "loss": 0.0171, "rewards/accuracies": 1.0, "rewards/chosen": -0.2886895537376404, "rewards/margins": 11.196630477905273, "rewards/rejected": -11.485318183898926, "step": 6210 }, { "epoch": 0.37, "learning_rate": 3.963098931419223e-06, "logits/chosen": -2.942368984222412, "logits/rejected": -2.766812801361084, "logps/chosen": -92.01741027832031, "logps/rejected": -1096.832275390625, "loss": 0.0149, "rewards/accuracies": 1.0, "rewards/chosen": -0.22160692512989044, "rewards/margins": 10.303544044494629, "rewards/rejected": -10.525152206420898, "step": 6220 }, { "epoch": 0.37, "learning_rate": 3.958876270519619e-06, "logits/chosen": -2.871826648712158, "logits/rejected": -2.7864551544189453, "logps/chosen": -87.02898406982422, "logps/rejected": -1130.223876953125, "loss": 0.0586, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.1553284376859665, "rewards/margins": 10.7144193649292, "rewards/rejected": -10.869749069213867, "step": 6230 }, { "epoch": 0.37, "learning_rate": 3.9546472888948825e-06, "logits/chosen": -2.9435770511627197, "logits/rejected": -2.837279796600342, "logps/chosen": -80.69771575927734, "logps/rejected": -1076.6014404296875, "loss": 0.0187, "rewards/accuracies": 1.0, "rewards/chosen": -0.11250308901071548, "rewards/margins": 10.206145286560059, "rewards/rejected": -10.318647384643555, "step": 6240 }, { "epoch": 0.37, "learning_rate": 3.950412004867491e-06, "logits/chosen": -2.897233486175537, "logits/rejected": -2.7567076683044434, "logps/chosen": -81.30437469482422, "logps/rejected": -1116.92822265625, "loss": 0.0275, "rewards/accuracies": 1.0, "rewards/chosen": -0.0483371838927269, "rewards/margins": 10.688684463500977, "rewards/rejected": -10.737021446228027, "step": 6250 }, { "epoch": 0.37, "learning_rate": 3.94617043678723e-06, "logits/chosen": -2.8798232078552246, "logits/rejected": -2.764303684234619, "logps/chosen": -81.736328125, "logps/rejected": -973.8621826171875, "loss": 0.0198, "rewards/accuracies": 1.0, "rewards/chosen": -0.11787674576044083, "rewards/margins": 9.178884506225586, "rewards/rejected": -9.296760559082031, "step": 6260 }, { "epoch": 0.37, "learning_rate": 3.941922603031113e-06, "logits/chosen": -2.9101200103759766, "logits/rejected": -2.793123722076416, "logps/chosen": -83.25614929199219, "logps/rejected": -1046.255126953125, "loss": 0.0125, "rewards/accuracies": 1.0, "rewards/chosen": -0.1816023290157318, "rewards/margins": 9.83031940460205, "rewards/rejected": -10.011922836303711, "step": 6270 }, { "epoch": 0.37, "learning_rate": 3.937668522003295e-06, "logits/chosen": -2.943380832672119, "logits/rejected": -2.812248945236206, "logps/chosen": -96.51483154296875, "logps/rejected": -977.9375, "loss": 0.0116, "rewards/accuracies": 1.0, "rewards/chosen": -0.2522944509983063, "rewards/margins": 9.086143493652344, "rewards/rejected": -9.338438034057617, "step": 6280 }, { "epoch": 0.38, "learning_rate": 3.933408212135003e-06, "logits/chosen": -2.9029500484466553, "logits/rejected": -2.8118367195129395, "logps/chosen": -81.0311050415039, "logps/rejected": -978.9517822265625, "loss": 0.0181, "rewards/accuracies": 1.0, "rewards/chosen": -0.13293835520744324, "rewards/margins": 9.216623306274414, "rewards/rejected": -9.349563598632812, "step": 6290 }, { "epoch": 0.38, "learning_rate": 3.929141691884448e-06, "logits/chosen": -2.935347080230713, "logits/rejected": -2.792088031768799, "logps/chosen": -80.49495697021484, "logps/rejected": -1181.645751953125, "loss": 0.0113, "rewards/accuracies": 1.0, "rewards/chosen": -0.11081305891275406, "rewards/margins": 11.2561616897583, "rewards/rejected": -11.366973876953125, "step": 6300 }, { "epoch": 0.38, "learning_rate": 3.9248689797367515e-06, "logits/chosen": -2.89543080329895, "logits/rejected": -2.8255739212036133, "logps/chosen": -93.41390228271484, "logps/rejected": -1153.826904296875, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": -0.2257152497768402, "rewards/margins": 10.865880966186523, "rewards/rejected": -11.091597557067871, "step": 6310 }, { "epoch": 0.38, "learning_rate": 3.920590094203856e-06, "logits/chosen": -2.930365800857544, "logits/rejected": -2.8250584602355957, "logps/chosen": -82.77169799804688, "logps/rejected": -1098.818359375, "loss": 0.0129, "rewards/accuracies": 1.0, "rewards/chosen": -0.08564687520265579, "rewards/margins": 10.462926864624023, "rewards/rejected": -10.54857349395752, "step": 6320 }, { "epoch": 0.38, "learning_rate": 3.916305053824458e-06, "logits/chosen": -2.929466724395752, "logits/rejected": -2.8033576011657715, "logps/chosen": -89.52632141113281, "logps/rejected": -1175.772216796875, "loss": 0.0245, "rewards/accuracies": 1.0, "rewards/chosen": -0.17079557478427887, "rewards/margins": 11.128901481628418, "rewards/rejected": -11.299696922302246, "step": 6330 }, { "epoch": 0.38, "learning_rate": 3.912013877163916e-06, "logits/chosen": -2.9159460067749023, "logits/rejected": -2.781715154647827, "logps/chosen": -75.20976257324219, "logps/rejected": -1089.1168212890625, "loss": 0.0187, "rewards/accuracies": 1.0, "rewards/chosen": -0.04497598111629486, "rewards/margins": 10.396872520446777, "rewards/rejected": -10.441848754882812, "step": 6340 }, { "epoch": 0.38, "learning_rate": 3.907716582814175e-06, "logits/chosen": -2.913346767425537, "logits/rejected": -2.8136565685272217, "logps/chosen": -82.28569030761719, "logps/rejected": -1120.493896484375, "loss": 0.0193, "rewards/accuracies": 1.0, "rewards/chosen": -0.12956306338310242, "rewards/margins": 10.637325286865234, "rewards/rejected": -10.766888618469238, "step": 6350 }, { "epoch": 0.38, "learning_rate": 3.903413189393687e-06, "logits/chosen": -2.8990001678466797, "logits/rejected": -2.817972421646118, "logps/chosen": -88.62944030761719, "logps/rejected": -1043.7587890625, "loss": 0.02, "rewards/accuracies": 1.0, "rewards/chosen": -0.19952502846717834, "rewards/margins": 9.794733047485352, "rewards/rejected": -9.994256973266602, "step": 6360 }, { "epoch": 0.38, "learning_rate": 3.899103715547325e-06, "logits/chosen": -2.9326694011688232, "logits/rejected": -2.8338265419006348, "logps/chosen": -74.43017578125, "logps/rejected": -1132.8187255859375, "loss": 0.0301, "rewards/accuracies": 1.0, "rewards/chosen": -0.034858811646699905, "rewards/margins": 10.850278854370117, "rewards/rejected": -10.885136604309082, "step": 6370 }, { "epoch": 0.38, "learning_rate": 3.894788179946313e-06, "logits/chosen": -2.909954786300659, "logits/rejected": -2.766792058944702, "logps/chosen": -93.53961181640625, "logps/rejected": -1024.513916015625, "loss": 0.0173, "rewards/accuracies": 1.0, "rewards/chosen": -0.1904674470424652, "rewards/margins": 9.611539840698242, "rewards/rejected": -9.802005767822266, "step": 6380 }, { "epoch": 0.38, "learning_rate": 3.890466601288131e-06, "logits/chosen": -2.9135093688964844, "logits/rejected": -2.800748586654663, "logps/chosen": -89.88471984863281, "logps/rejected": -1001.25927734375, "loss": 0.0432, "rewards/accuracies": 1.0, "rewards/chosen": -0.2575301229953766, "rewards/margins": 9.310220718383789, "rewards/rejected": -9.56775188446045, "step": 6390 }, { "epoch": 0.38, "learning_rate": 3.886138998296446e-06, "logits/chosen": -2.9123740196228027, "logits/rejected": -2.7747879028320312, "logps/chosen": -78.06915283203125, "logps/rejected": -1117.611328125, "loss": 0.0177, "rewards/accuracies": 1.0, "rewards/chosen": -0.11141111701726913, "rewards/margins": 10.628135681152344, "rewards/rejected": -10.739545822143555, "step": 6400 }, { "epoch": 0.38, "learning_rate": 3.881805389721021e-06, "logits/chosen": -2.9010398387908936, "logits/rejected": -2.7781822681427, "logps/chosen": -101.46094512939453, "logps/rejected": -1099.253173828125, "loss": 0.0144, "rewards/accuracies": 1.0, "rewards/chosen": -0.2968367338180542, "rewards/margins": 10.258384704589844, "rewards/rejected": -10.555219650268555, "step": 6410 }, { "epoch": 0.38, "learning_rate": 3.877465794337648e-06, "logits/chosen": -2.9146225452423096, "logits/rejected": -2.8289735317230225, "logps/chosen": -91.29545593261719, "logps/rejected": -1072.724853515625, "loss": 0.019, "rewards/accuracies": 1.0, "rewards/chosen": -0.19682198762893677, "rewards/margins": 10.085344314575195, "rewards/rejected": -10.282166481018066, "step": 6420 }, { "epoch": 0.38, "learning_rate": 3.873120230948045e-06, "logits/chosen": -2.9187588691711426, "logits/rejected": -2.827144145965576, "logps/chosen": -98.31732177734375, "logps/rejected": -910.05322265625, "loss": 0.03, "rewards/accuracies": 1.0, "rewards/chosen": -0.32930025458335876, "rewards/margins": 8.338726043701172, "rewards/rejected": -8.668025970458984, "step": 6430 }, { "epoch": 0.38, "learning_rate": 3.868768718379798e-06, "logits/chosen": -2.924968719482422, "logits/rejected": -2.7943320274353027, "logps/chosen": -106.68647766113281, "logps/rejected": -1153.1915283203125, "loss": 0.0169, "rewards/accuracies": 1.0, "rewards/chosen": -0.38858476281166077, "rewards/margins": 10.690592765808105, "rewards/rejected": -11.079178810119629, "step": 6440 }, { "epoch": 0.38, "learning_rate": 3.8644112754862614e-06, "logits/chosen": -2.9585347175598145, "logits/rejected": -2.8262205123901367, "logps/chosen": -130.94100952148438, "logps/rejected": -1087.3109130859375, "loss": 0.0454, "rewards/accuracies": 1.0, "rewards/chosen": -0.5822890996932983, "rewards/margins": 9.82852554321289, "rewards/rejected": -10.410813331604004, "step": 6450 }, { "epoch": 0.39, "learning_rate": 3.860047921146487e-06, "logits/chosen": -2.9241621494293213, "logits/rejected": -2.7909157276153564, "logps/chosen": -87.95986938476562, "logps/rejected": -1058.934814453125, "loss": 0.0134, "rewards/accuracies": 1.0, "rewards/chosen": -0.1660889834165573, "rewards/margins": 9.99785327911377, "rewards/rejected": -10.163942337036133, "step": 6460 }, { "epoch": 0.39, "learning_rate": 3.855678674265136e-06, "logits/chosen": -2.9131813049316406, "logits/rejected": -2.792825937271118, "logps/chosen": -91.43817901611328, "logps/rejected": -1003.8660888671875, "loss": 0.0159, "rewards/accuracies": 1.0, "rewards/chosen": -0.2128903865814209, "rewards/margins": 9.38998031616211, "rewards/rejected": -9.602869987487793, "step": 6470 }, { "epoch": 0.39, "learning_rate": 3.851303553772402e-06, "logits/chosen": -2.939784526824951, "logits/rejected": -2.811361789703369, "logps/chosen": -82.83306884765625, "logps/rejected": -1082.6319580078125, "loss": 0.0106, "rewards/accuracies": 1.0, "rewards/chosen": -0.15579429268836975, "rewards/margins": 10.234354019165039, "rewards/rejected": -10.390148162841797, "step": 6480 }, { "epoch": 0.39, "learning_rate": 3.846922578623924e-06, "logits/chosen": -2.9310402870178223, "logits/rejected": -2.8119266033172607, "logps/chosen": -88.42886352539062, "logps/rejected": -1169.181884765625, "loss": 0.0347, "rewards/accuracies": 1.0, "rewards/chosen": -0.14256305992603302, "rewards/margins": 11.109556198120117, "rewards/rejected": -11.252120018005371, "step": 6490 }, { "epoch": 0.39, "learning_rate": 3.84253576780071e-06, "logits/chosen": -2.923973798751831, "logits/rejected": -2.838120937347412, "logps/chosen": -85.28412628173828, "logps/rejected": -1032.6490478515625, "loss": 0.0162, "rewards/accuracies": 1.0, "rewards/chosen": -0.1481623500585556, "rewards/margins": 9.737385749816895, "rewards/rejected": -9.88554859161377, "step": 6500 }, { "epoch": 0.39, "learning_rate": 3.83814314030905e-06, "logits/chosen": -2.936239242553711, "logits/rejected": -2.7988486289978027, "logps/chosen": -64.92374420166016, "logps/rejected": -1064.503173828125, "loss": 0.0135, "rewards/accuracies": 1.0, "rewards/chosen": 0.041876696050167084, "rewards/margins": 10.23698902130127, "rewards/rejected": -10.195112228393555, "step": 6510 }, { "epoch": 0.39, "learning_rate": 3.833744715180433e-06, "logits/chosen": -2.9519200325012207, "logits/rejected": -2.837407350540161, "logps/chosen": -68.72590637207031, "logps/rejected": -980.8690185546875, "loss": 0.0374, "rewards/accuracies": 1.0, "rewards/chosen": 0.005347815807908773, "rewards/margins": 9.388530731201172, "rewards/rejected": -9.383182525634766, "step": 6520 }, { "epoch": 0.39, "learning_rate": 3.829340511471471e-06, "logits/chosen": -2.9176063537597656, "logits/rejected": -2.8307461738586426, "logps/chosen": -62.59233856201172, "logps/rejected": -1033.553955078125, "loss": 0.02, "rewards/accuracies": 1.0, "rewards/chosen": 0.101711705327034, "rewards/margins": 10.011478424072266, "rewards/rejected": -9.90976619720459, "step": 6530 }, { "epoch": 0.39, "learning_rate": 3.824930548263811e-06, "logits/chosen": -2.939133882522583, "logits/rejected": -2.832871675491333, "logps/chosen": -62.86103439331055, "logps/rejected": -1058.087890625, "loss": 0.0157, "rewards/accuracies": 1.0, "rewards/chosen": 0.080507293343544, "rewards/margins": 10.208907127380371, "rewards/rejected": -10.128398895263672, "step": 6540 }, { "epoch": 0.39, "learning_rate": 3.82051484466405e-06, "logits/chosen": -2.9525794982910156, "logits/rejected": -2.791292428970337, "logps/chosen": -67.79877471923828, "logps/rejected": -1080.8424072265625, "loss": 0.0118, "rewards/accuracies": 1.0, "rewards/chosen": 0.04454438015818596, "rewards/margins": 10.391596794128418, "rewards/rejected": -10.347051620483398, "step": 6550 }, { "epoch": 0.39, "learning_rate": 3.816093419803663e-06, "logits/chosen": -2.9264256954193115, "logits/rejected": -2.790048122406006, "logps/chosen": -72.99713134765625, "logps/rejected": -988.3656005859375, "loss": 0.0152, "rewards/accuracies": 1.0, "rewards/chosen": -0.0011682920157909393, "rewards/margins": 9.443254470825195, "rewards/rejected": -9.444421768188477, "step": 6560 }, { "epoch": 0.39, "learning_rate": 3.811666292838905e-06, "logits/chosen": -2.914684534072876, "logits/rejected": -2.783233165740967, "logps/chosen": -96.10786437988281, "logps/rejected": -1063.142333984375, "loss": 0.0226, "rewards/accuracies": 1.0, "rewards/chosen": -0.21540692448616028, "rewards/margins": 9.973735809326172, "rewards/rejected": -10.189143180847168, "step": 6570 }, { "epoch": 0.39, "learning_rate": 3.8072334829507414e-06, "logits/chosen": -2.905433177947998, "logits/rejected": -2.7853519916534424, "logps/chosen": -136.0424041748047, "logps/rejected": -1073.2169189453125, "loss": 0.0231, "rewards/accuracies": 1.0, "rewards/chosen": -0.6832762956619263, "rewards/margins": 9.608771324157715, "rewards/rejected": -10.292046546936035, "step": 6580 }, { "epoch": 0.39, "learning_rate": 3.802795009344757e-06, "logits/chosen": -2.904794216156006, "logits/rejected": -2.7682700157165527, "logps/chosen": -110.67903900146484, "logps/rejected": -1079.4990234375, "loss": 0.0156, "rewards/accuracies": 1.0, "rewards/chosen": -0.40114468336105347, "rewards/margins": 9.956185340881348, "rewards/rejected": -10.357330322265625, "step": 6590 }, { "epoch": 0.39, "learning_rate": 3.798350891251076e-06, "logits/chosen": -2.895101547241211, "logits/rejected": -2.7678451538085938, "logps/chosen": -105.27542877197266, "logps/rejected": -1182.995361328125, "loss": 0.0355, "rewards/accuracies": 1.0, "rewards/chosen": -0.37807735800743103, "rewards/margins": 11.017293930053711, "rewards/rejected": -11.39537239074707, "step": 6600 }, { "epoch": 0.39, "learning_rate": 3.7939011479242784e-06, "logits/chosen": -2.9222826957702637, "logits/rejected": -2.766177177429199, "logps/chosen": -98.7292709350586, "logps/rejected": -1169.2509765625, "loss": 0.0106, "rewards/accuracies": 1.0, "rewards/chosen": -0.29226866364479065, "rewards/margins": 10.956949234008789, "rewards/rejected": -11.249216079711914, "step": 6610 }, { "epoch": 0.39, "learning_rate": 3.7894457986433143e-06, "logits/chosen": -2.8875489234924316, "logits/rejected": -2.7267744541168213, "logps/chosen": -113.87646484375, "logps/rejected": -1042.7359619140625, "loss": 0.0138, "rewards/accuracies": 1.0, "rewards/chosen": -0.46203166246414185, "rewards/margins": 9.51615047454834, "rewards/rejected": -9.978182792663574, "step": 6620 }, { "epoch": 0.4, "learning_rate": 3.7849848627114248e-06, "logits/chosen": -2.9139058589935303, "logits/rejected": -2.7674803733825684, "logps/chosen": -99.23222351074219, "logps/rejected": -1149.800537109375, "loss": 0.0186, "rewards/accuracies": 1.0, "rewards/chosen": -0.29400184750556946, "rewards/margins": 10.766439437866211, "rewards/rejected": -11.060441970825195, "step": 6630 }, { "epoch": 0.4, "learning_rate": 3.7805183594560525e-06, "logits/chosen": -2.9445641040802, "logits/rejected": -2.8059093952178955, "logps/chosen": -124.96044921875, "logps/rejected": -1100.9664306640625, "loss": 0.019, "rewards/accuracies": 1.0, "rewards/chosen": -0.5462000966072083, "rewards/margins": 10.025306701660156, "rewards/rejected": -10.571507453918457, "step": 6640 }, { "epoch": 0.4, "learning_rate": 3.7760463082287647e-06, "logits/chosen": -2.9038233757019043, "logits/rejected": -2.7581303119659424, "logps/chosen": -120.37159729003906, "logps/rejected": -1200.5157470703125, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -0.5187109708786011, "rewards/margins": 11.042951583862305, "rewards/rejected": -11.561662673950195, "step": 6650 }, { "epoch": 0.4, "learning_rate": 3.7715687284051618e-06, "logits/chosen": -2.9060845375061035, "logits/rejected": -2.783130645751953, "logps/chosen": -122.11412048339844, "logps/rejected": -1204.497802734375, "loss": 0.0158, "rewards/accuracies": 1.0, "rewards/chosen": -0.5129600763320923, "rewards/margins": 11.088891983032227, "rewards/rejected": -11.601851463317871, "step": 6660 }, { "epoch": 0.4, "learning_rate": 3.7670856393848e-06, "logits/chosen": -2.9159538745880127, "logits/rejected": -2.760282516479492, "logps/chosen": -95.04017639160156, "logps/rejected": -1020.05712890625, "loss": 0.025, "rewards/accuracies": 1.0, "rewards/chosen": -0.23771195113658905, "rewards/margins": 9.51791763305664, "rewards/rejected": -9.755629539489746, "step": 6670 }, { "epoch": 0.4, "learning_rate": 3.7625970605911038e-06, "logits/chosen": -2.887974262237549, "logits/rejected": -2.751300811767578, "logps/chosen": -88.84022521972656, "logps/rejected": -1258.835693359375, "loss": 0.0099, "rewards/accuracies": 1.0, "rewards/chosen": -0.20529845356941223, "rewards/margins": 11.951395034790039, "rewards/rejected": -12.156692504882812, "step": 6680 }, { "epoch": 0.4, "learning_rate": 3.7581030114712837e-06, "logits/chosen": -2.9165754318237305, "logits/rejected": -2.782853364944458, "logps/chosen": -113.78248596191406, "logps/rejected": -1018.2673950195312, "loss": 0.0143, "rewards/accuracies": 1.0, "rewards/chosen": -0.41609930992126465, "rewards/margins": 9.314772605895996, "rewards/rejected": -9.730871200561523, "step": 6690 }, { "epoch": 0.4, "learning_rate": 3.75360351149625e-06, "logits/chosen": -2.9435460567474365, "logits/rejected": -2.8024682998657227, "logps/chosen": -77.84777069091797, "logps/rejected": -1078.976318359375, "loss": 0.0144, "rewards/accuracies": 1.0, "rewards/chosen": -0.07346780598163605, "rewards/margins": 10.270526885986328, "rewards/rejected": -10.343994140625, "step": 6700 }, { "epoch": 0.4, "learning_rate": 3.7490985801605303e-06, "logits/chosen": -2.914401054382324, "logits/rejected": -2.7792012691497803, "logps/chosen": -78.41474914550781, "logps/rejected": -1130.6934814453125, "loss": 0.0126, "rewards/accuracies": 1.0, "rewards/chosen": -0.06996846944093704, "rewards/margins": 10.809919357299805, "rewards/rejected": -10.879887580871582, "step": 6710 }, { "epoch": 0.4, "learning_rate": 3.744588236982181e-06, "logits/chosen": -2.926753044128418, "logits/rejected": -2.788318157196045, "logps/chosen": -87.95268249511719, "logps/rejected": -1118.17138671875, "loss": 0.0133, "rewards/accuracies": 1.0, "rewards/chosen": -0.18708589673042297, "rewards/margins": 10.554030418395996, "rewards/rejected": -10.741117477416992, "step": 6720 }, { "epoch": 0.4, "learning_rate": 3.7400725015027107e-06, "logits/chosen": -2.903153896331787, "logits/rejected": -2.8029251098632812, "logps/chosen": -119.28309631347656, "logps/rejected": -1237.8499755859375, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": -0.4689444899559021, "rewards/margins": 11.4512300491333, "rewards/rejected": -11.920174598693848, "step": 6730 }, { "epoch": 0.4, "learning_rate": 3.7355513932869862e-06, "logits/chosen": -2.9454221725463867, "logits/rejected": -2.820990562438965, "logps/chosen": -83.53551483154297, "logps/rejected": -1159.2552490234375, "loss": 0.0144, "rewards/accuracies": 1.0, "rewards/chosen": -0.10593627393245697, "rewards/margins": 11.049755096435547, "rewards/rejected": -11.155692100524902, "step": 6740 }, { "epoch": 0.4, "learning_rate": 3.7310249319231552e-06, "logits/chosen": -2.9192848205566406, "logits/rejected": -2.7833282947540283, "logps/chosen": -79.68898010253906, "logps/rejected": -1128.011962890625, "loss": 0.0159, "rewards/accuracies": 1.0, "rewards/chosen": -0.08301255851984024, "rewards/margins": 10.753610610961914, "rewards/rejected": -10.836623191833496, "step": 6750 }, { "epoch": 0.4, "learning_rate": 3.726493137022557e-06, "logits/chosen": -2.9085731506347656, "logits/rejected": -2.812312602996826, "logps/chosen": -85.72715759277344, "logps/rejected": -1085.9427490234375, "loss": 0.0216, "rewards/accuracies": 1.0, "rewards/chosen": -0.12284183502197266, "rewards/margins": 10.29987907409668, "rewards/rejected": -10.422721862792969, "step": 6760 }, { "epoch": 0.4, "learning_rate": 3.7219560282196397e-06, "logits/chosen": -2.935290813446045, "logits/rejected": -2.7731118202209473, "logps/chosen": -119.27406311035156, "logps/rejected": -1101.92333984375, "loss": 0.014, "rewards/accuracies": 1.0, "rewards/chosen": -0.4734339714050293, "rewards/margins": 10.102243423461914, "rewards/rejected": -10.575677871704102, "step": 6770 }, { "epoch": 0.4, "learning_rate": 3.7174136251718735e-06, "logits/chosen": -2.9045538902282715, "logits/rejected": -2.7873570919036865, "logps/chosen": -138.05226135253906, "logps/rejected": -1124.597412109375, "loss": 0.0159, "rewards/accuracies": 1.0, "rewards/chosen": -0.6557701230049133, "rewards/margins": 10.145862579345703, "rewards/rejected": -10.80163288116455, "step": 6780 }, { "epoch": 0.4, "learning_rate": 3.712865947559667e-06, "logits/chosen": -2.9085047245025635, "logits/rejected": -2.768371820449829, "logps/chosen": -101.27290344238281, "logps/rejected": -1119.4437255859375, "loss": 0.0308, "rewards/accuracies": 1.0, "rewards/chosen": -0.24350428581237793, "rewards/margins": 10.501436233520508, "rewards/rejected": -10.744939804077148, "step": 6790 }, { "epoch": 0.41, "learning_rate": 3.7083130150862835e-06, "logits/chosen": -2.9560627937316895, "logits/rejected": -2.828529119491577, "logps/chosen": -76.32826232910156, "logps/rejected": -1146.9981689453125, "loss": 0.0104, "rewards/accuracies": 1.0, "rewards/chosen": -0.03673182427883148, "rewards/margins": 10.98664379119873, "rewards/rejected": -11.02337646484375, "step": 6800 }, { "epoch": 0.41, "learning_rate": 3.7037548474777484e-06, "logits/chosen": -2.9539592266082764, "logits/rejected": -2.823580265045166, "logps/chosen": -78.04996490478516, "logps/rejected": -1050.046875, "loss": 0.0306, "rewards/accuracies": 1.0, "rewards/chosen": -0.025190208107233047, "rewards/margins": 10.027109146118164, "rewards/rejected": -10.052297592163086, "step": 6810 }, { "epoch": 0.41, "learning_rate": 3.6991914644827732e-06, "logits/chosen": -2.93625807762146, "logits/rejected": -2.7904951572418213, "logps/chosen": -73.10401916503906, "logps/rejected": -1024.831298828125, "loss": 0.0159, "rewards/accuracies": 1.0, "rewards/chosen": -0.03253782540559769, "rewards/margins": 9.767110824584961, "rewards/rejected": -9.79964828491211, "step": 6820 }, { "epoch": 0.41, "learning_rate": 3.6946228858726642e-06, "logits/chosen": -2.9074456691741943, "logits/rejected": -2.7821357250213623, "logps/chosen": -101.6294174194336, "logps/rejected": -1086.5552978515625, "loss": 0.0269, "rewards/accuracies": 1.0, "rewards/chosen": -0.32821211218833923, "rewards/margins": 10.076467514038086, "rewards/rejected": -10.404680252075195, "step": 6830 }, { "epoch": 0.41, "learning_rate": 3.690049131441238e-06, "logits/chosen": -2.9105629920959473, "logits/rejected": -2.8157317638397217, "logps/chosen": -96.59138488769531, "logps/rejected": -1192.170654296875, "loss": 0.0133, "rewards/accuracies": 1.0, "rewards/chosen": -0.2676796317100525, "rewards/margins": 11.207104682922363, "rewards/rejected": -11.474782943725586, "step": 6840 }, { "epoch": 0.41, "learning_rate": 3.6854702210047353e-06, "logits/chosen": -2.9104385375976562, "logits/rejected": -2.787836790084839, "logps/chosen": -94.45555877685547, "logps/rejected": -956.9180908203125, "loss": 0.0248, "rewards/accuracies": 1.0, "rewards/chosen": -0.24695566296577454, "rewards/margins": 8.900993347167969, "rewards/rejected": -9.14794921875, "step": 6850 }, { "epoch": 0.41, "learning_rate": 3.6808861744017386e-06, "logits/chosen": -2.904888391494751, "logits/rejected": -2.7817986011505127, "logps/chosen": -86.03636169433594, "logps/rejected": -1182.254638671875, "loss": 0.0162, "rewards/accuracies": 1.0, "rewards/chosen": -0.14749349653720856, "rewards/margins": 11.233137130737305, "rewards/rejected": -11.380630493164062, "step": 6860 }, { "epoch": 0.41, "learning_rate": 3.6762970114930796e-06, "logits/chosen": -2.9094462394714355, "logits/rejected": -2.8107056617736816, "logps/chosen": -103.11491394042969, "logps/rejected": -1092.7376708984375, "loss": 0.025, "rewards/accuracies": 1.0, "rewards/chosen": -0.32972320914268494, "rewards/margins": 10.160021781921387, "rewards/rejected": -10.489745140075684, "step": 6870 }, { "epoch": 0.41, "learning_rate": 3.6717027521617593e-06, "logits/chosen": -2.9403204917907715, "logits/rejected": -2.775780200958252, "logps/chosen": -113.1695785522461, "logps/rejected": -1159.4503173828125, "loss": 0.0257, "rewards/accuracies": 1.0, "rewards/chosen": -0.3750307261943817, "rewards/margins": 10.771791458129883, "rewards/rejected": -11.146821975708008, "step": 6880 }, { "epoch": 0.41, "learning_rate": 3.6671034163128594e-06, "logits/chosen": -2.9391627311706543, "logits/rejected": -2.794499635696411, "logps/chosen": -90.55206298828125, "logps/rejected": -1018.2598876953125, "loss": 0.1077, "rewards/accuracies": 1.0, "rewards/chosen": -0.21661922335624695, "rewards/margins": 9.516180992126465, "rewards/rejected": -9.732800483703613, "step": 6890 }, { "epoch": 0.41, "learning_rate": 3.662499023873454e-06, "logits/chosen": -2.8947079181671143, "logits/rejected": -2.8041396141052246, "logps/chosen": -95.18833923339844, "logps/rejected": -1077.7174072265625, "loss": 0.0326, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.2365306168794632, "rewards/margins": 10.094181060791016, "rewards/rejected": -10.330713272094727, "step": 6900 }, { "epoch": 0.41, "learning_rate": 3.657889594792528e-06, "logits/chosen": -2.9660799503326416, "logits/rejected": -2.8214352130889893, "logps/chosen": -86.84017181396484, "logps/rejected": -900.7272338867188, "loss": 0.019, "rewards/accuracies": 1.0, "rewards/chosen": -0.12992525100708008, "rewards/margins": 8.428224563598633, "rewards/rejected": -8.558150291442871, "step": 6910 }, { "epoch": 0.41, "learning_rate": 3.653275149040887e-06, "logits/chosen": -2.9090256690979004, "logits/rejected": -2.809492588043213, "logps/chosen": -99.82324981689453, "logps/rejected": -949.4283447265625, "loss": 0.0163, "rewards/accuracies": 1.0, "rewards/chosen": -0.24338094890117645, "rewards/margins": 8.810364723205566, "rewards/rejected": -9.05374526977539, "step": 6920 }, { "epoch": 0.41, "learning_rate": 3.6486557066110694e-06, "logits/chosen": -2.9437060356140137, "logits/rejected": -2.8253989219665527, "logps/chosen": -78.02464294433594, "logps/rejected": -1014.4766845703125, "loss": 0.0139, "rewards/accuracies": 1.0, "rewards/chosen": -0.07109050452709198, "rewards/margins": 9.623019218444824, "rewards/rejected": -9.694109916687012, "step": 6930 }, { "epoch": 0.41, "learning_rate": 3.644031287517267e-06, "logits/chosen": -2.915351152420044, "logits/rejected": -2.765439748764038, "logps/chosen": -106.92597961425781, "logps/rejected": -1165.0740966796875, "loss": 0.0116, "rewards/accuracies": 1.0, "rewards/chosen": -0.3702332675457001, "rewards/margins": 10.828544616699219, "rewards/rejected": -11.198777198791504, "step": 6940 }, { "epoch": 0.41, "learning_rate": 3.639401911795232e-06, "logits/chosen": -2.9155404567718506, "logits/rejected": -2.7839725017547607, "logps/chosen": -99.48594665527344, "logps/rejected": -1071.2060546875, "loss": 0.0285, "rewards/accuracies": 1.0, "rewards/chosen": -0.2706945538520813, "rewards/margins": 9.996676445007324, "rewards/rejected": -10.267372131347656, "step": 6950 }, { "epoch": 0.42, "learning_rate": 3.6347675995021874e-06, "logits/chosen": -2.9346346855163574, "logits/rejected": -2.7792932987213135, "logps/chosen": -100.21730041503906, "logps/rejected": -1037.0322265625, "loss": 0.0116, "rewards/accuracies": 1.0, "rewards/chosen": -0.20675675570964813, "rewards/margins": 9.714632987976074, "rewards/rejected": -9.92138957977295, "step": 6960 }, { "epoch": 0.42, "learning_rate": 3.6301283707167495e-06, "logits/chosen": -2.933328151702881, "logits/rejected": -2.767930269241333, "logps/chosen": -101.89979553222656, "logps/rejected": -1134.9237060546875, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": -0.22748295962810516, "rewards/margins": 10.678056716918945, "rewards/rejected": -10.905540466308594, "step": 6970 }, { "epoch": 0.42, "learning_rate": 3.6254842455388347e-06, "logits/chosen": -2.892735004425049, "logits/rejected": -2.7738585472106934, "logps/chosen": -103.06649017333984, "logps/rejected": -1066.89453125, "loss": 0.0212, "rewards/accuracies": 1.0, "rewards/chosen": -0.3658735156059265, "rewards/margins": 9.865544319152832, "rewards/rejected": -10.23141860961914, "step": 6980 }, { "epoch": 0.42, "learning_rate": 3.6208352440895704e-06, "logits/chosen": -2.9056954383850098, "logits/rejected": -2.7622809410095215, "logps/chosen": -122.41854095458984, "logps/rejected": -997.4375, "loss": 0.0195, "rewards/accuracies": 1.0, "rewards/chosen": -0.4713757634162903, "rewards/margins": 9.070648193359375, "rewards/rejected": -9.542022705078125, "step": 6990 }, { "epoch": 0.42, "learning_rate": 3.6161813865112155e-06, "logits/chosen": -2.904318332672119, "logits/rejected": -2.7899816036224365, "logps/chosen": -101.73571014404297, "logps/rejected": -986.7930908203125, "loss": 0.023, "rewards/accuracies": 1.0, "rewards/chosen": -0.2600402235984802, "rewards/margins": 9.165243148803711, "rewards/rejected": -9.425283432006836, "step": 7000 }, { "epoch": 0.42, "eval_logits/chosen": -2.8776113986968994, "eval_logits/rejected": -2.844184160232544, "eval_logps/chosen": -175.428955078125, "eval_logps/rejected": -904.81982421875, "eval_loss": 0.01644059643149376, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -1.0835449695587158, "eval_rewards/margins": 7.5091705322265625, "eval_rewards/rejected": -8.592716217041016, "eval_runtime": 4.2684, "eval_samples_per_second": 1.171, "eval_steps_per_second": 0.234, "step": 7000 }, { "epoch": 0.42, "learning_rate": 3.611522692967065e-06, "logits/chosen": -2.9096760749816895, "logits/rejected": -2.750338077545166, "logps/chosen": -110.2779312133789, "logps/rejected": -1105.7159423828125, "loss": 0.0142, "rewards/accuracies": 1.0, "rewards/chosen": -0.3872075378894806, "rewards/margins": 10.244175910949707, "rewards/rejected": -10.631383895874023, "step": 7010 }, { "epoch": 0.42, "learning_rate": 3.6068591836413687e-06, "logits/chosen": -2.9231553077697754, "logits/rejected": -2.7905192375183105, "logps/chosen": -92.08689880371094, "logps/rejected": -1078.547119140625, "loss": 0.0126, "rewards/accuracies": 1.0, "rewards/chosen": -0.2296150177717209, "rewards/margins": 10.1162691116333, "rewards/rejected": -10.3458833694458, "step": 7020 }, { "epoch": 0.42, "learning_rate": 3.602190878739239e-06, "logits/chosen": -2.900203227996826, "logits/rejected": -2.785003662109375, "logps/chosen": -101.54088592529297, "logps/rejected": -1139.35986328125, "loss": 0.0119, "rewards/accuracies": 1.0, "rewards/chosen": -0.3198123276233673, "rewards/margins": 10.610698699951172, "rewards/rejected": -10.930512428283691, "step": 7030 }, { "epoch": 0.42, "learning_rate": 3.5975177984865673e-06, "logits/chosen": -2.878770589828491, "logits/rejected": -2.751427173614502, "logps/chosen": -99.40553283691406, "logps/rejected": -1185.62158203125, "loss": 0.0112, "rewards/accuracies": 1.0, "rewards/chosen": -0.32205453515052795, "rewards/margins": 11.092864990234375, "rewards/rejected": -11.41491985321045, "step": 7040 }, { "epoch": 0.42, "learning_rate": 3.592839963129934e-06, "logits/chosen": -2.8878285884857178, "logits/rejected": -2.7685837745666504, "logps/chosen": -78.28253173828125, "logps/rejected": -1149.0386962890625, "loss": 0.0123, "rewards/accuracies": 1.0, "rewards/chosen": -0.09539985656738281, "rewards/margins": 10.969889640808105, "rewards/rejected": -11.065289497375488, "step": 7050 }, { "epoch": 0.42, "learning_rate": 3.588157392936521e-06, "logits/chosen": -2.882582187652588, "logits/rejected": -2.7486889362335205, "logps/chosen": -83.16156005859375, "logps/rejected": -1183.084228515625, "loss": 0.0089, "rewards/accuracies": 1.0, "rewards/chosen": -0.12763121724128723, "rewards/margins": 11.256253242492676, "rewards/rejected": -11.383883476257324, "step": 7060 }, { "epoch": 0.42, "learning_rate": 3.583470108194026e-06, "logits/chosen": -2.906589984893799, "logits/rejected": -2.790468692779541, "logps/chosen": -89.14583587646484, "logps/rejected": -1030.7427978515625, "loss": 0.0215, "rewards/accuracies": 1.0, "rewards/chosen": -0.22094586491584778, "rewards/margins": 9.643675804138184, "rewards/rejected": -9.864623069763184, "step": 7070 }, { "epoch": 0.42, "learning_rate": 3.5787781292105704e-06, "logits/chosen": -2.920591354370117, "logits/rejected": -2.8202481269836426, "logps/chosen": -116.5506362915039, "logps/rejected": -1035.1424560546875, "loss": 0.0169, "rewards/accuracies": 1.0, "rewards/chosen": -0.49446067214012146, "rewards/margins": 9.41383171081543, "rewards/rejected": -9.908292770385742, "step": 7080 }, { "epoch": 0.42, "learning_rate": 3.5740814763146164e-06, "logits/chosen": -2.874154567718506, "logits/rejected": -2.7651419639587402, "logps/chosen": -117.6974868774414, "logps/rejected": -1153.4556884765625, "loss": 0.0149, "rewards/accuracies": 1.0, "rewards/chosen": -0.5130449533462524, "rewards/margins": 10.582649230957031, "rewards/rejected": -11.095693588256836, "step": 7090 }, { "epoch": 0.42, "learning_rate": 3.569380169854875e-06, "logits/chosen": -2.9505703449249268, "logits/rejected": -2.8482229709625244, "logps/chosen": -75.36229705810547, "logps/rejected": -1178.416259765625, "loss": 0.0144, "rewards/accuracies": 1.0, "rewards/chosen": -0.09957019239664078, "rewards/margins": 11.241961479187012, "rewards/rejected": -11.341530799865723, "step": 7100 }, { "epoch": 0.42, "learning_rate": 3.5646742302002185e-06, "logits/chosen": -2.8947432041168213, "logits/rejected": -2.8020567893981934, "logps/chosen": -77.16735076904297, "logps/rejected": -1093.35693359375, "loss": 0.0231, "rewards/accuracies": 1.0, "rewards/chosen": -0.07255446910858154, "rewards/margins": 10.425664901733398, "rewards/rejected": -10.498220443725586, "step": 7110 }, { "epoch": 0.42, "learning_rate": 3.5599636777395954e-06, "logits/chosen": -2.896845579147339, "logits/rejected": -2.781343936920166, "logps/chosen": -94.4598617553711, "logps/rejected": -1132.225341796875, "loss": 0.0157, "rewards/accuracies": 1.0, "rewards/chosen": -0.19503115117549896, "rewards/margins": 10.689114570617676, "rewards/rejected": -10.884145736694336, "step": 7120 }, { "epoch": 0.43, "learning_rate": 3.555248532881938e-06, "logits/chosen": -2.8788840770721436, "logits/rejected": -2.7696962356567383, "logps/chosen": -147.68276977539062, "logps/rejected": -1132.974853515625, "loss": 0.0219, "rewards/accuracies": 1.0, "rewards/chosen": -0.7828652262687683, "rewards/margins": 10.097051620483398, "rewards/rejected": -10.879919052124023, "step": 7130 }, { "epoch": 0.43, "learning_rate": 3.5505288160560745e-06, "logits/chosen": -2.9414455890655518, "logits/rejected": -2.820356845855713, "logps/chosen": -119.46009826660156, "logps/rejected": -995.22802734375, "loss": 0.016, "rewards/accuracies": 1.0, "rewards/chosen": -0.47757524251937866, "rewards/margins": 9.035507202148438, "rewards/rejected": -9.513082504272461, "step": 7140 }, { "epoch": 0.43, "learning_rate": 3.545804547710645e-06, "logits/chosen": -2.9364776611328125, "logits/rejected": -2.788905620574951, "logps/chosen": -121.44282531738281, "logps/rejected": -1161.408935546875, "loss": 0.0186, "rewards/accuracies": 1.0, "rewards/chosen": -0.5759637951850891, "rewards/margins": 10.596400260925293, "rewards/rejected": -11.17236328125, "step": 7150 }, { "epoch": 0.43, "learning_rate": 3.5410757483140057e-06, "logits/chosen": -2.9435677528381348, "logits/rejected": -2.796893835067749, "logps/chosen": -120.52152252197266, "logps/rejected": -1144.41796875, "loss": 0.0208, "rewards/accuracies": 1.0, "rewards/chosen": -0.5478697419166565, "rewards/margins": 10.453125, "rewards/rejected": -11.000993728637695, "step": 7160 }, { "epoch": 0.43, "learning_rate": 3.5363424383541465e-06, "logits/chosen": -2.9058589935302734, "logits/rejected": -2.7687151432037354, "logps/chosen": -200.76461791992188, "logps/rejected": -1074.436279296875, "loss": 0.0201, "rewards/accuracies": 1.0, "rewards/chosen": -1.3432514667510986, "rewards/margins": 8.966203689575195, "rewards/rejected": -10.309454917907715, "step": 7170 }, { "epoch": 0.43, "learning_rate": 3.5316046383385983e-06, "logits/chosen": -2.912508487701416, "logits/rejected": -2.7569141387939453, "logps/chosen": -184.05824279785156, "logps/rejected": -1251.82958984375, "loss": 0.0184, "rewards/accuracies": 1.0, "rewards/chosen": -1.1382877826690674, "rewards/margins": 10.932048797607422, "rewards/rejected": -12.07033634185791, "step": 7180 }, { "epoch": 0.43, "learning_rate": 3.526862368794347e-06, "logits/chosen": -2.9150776863098145, "logits/rejected": -2.752119541168213, "logps/chosen": -182.68524169921875, "logps/rejected": -1092.622802734375, "loss": 0.0303, "rewards/accuracies": 1.0, "rewards/chosen": -1.1595345735549927, "rewards/margins": 9.330678939819336, "rewards/rejected": -10.490215301513672, "step": 7190 }, { "epoch": 0.43, "learning_rate": 3.522115650267743e-06, "logits/chosen": -2.9281692504882812, "logits/rejected": -2.759819746017456, "logps/chosen": -209.9885711669922, "logps/rejected": -1307.581298828125, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/chosen": -1.3615130186080933, "rewards/margins": 11.256918907165527, "rewards/rejected": -12.618432998657227, "step": 7200 }, { "epoch": 0.43, "learning_rate": 3.5173645033244103e-06, "logits/chosen": -2.885080099105835, "logits/rejected": -2.7192654609680176, "logps/chosen": -159.7135009765625, "logps/rejected": -1246.55517578125, "loss": 0.0153, "rewards/accuracies": 1.0, "rewards/chosen": -0.8966726064682007, "rewards/margins": 11.121199607849121, "rewards/rejected": -12.017870903015137, "step": 7210 }, { "epoch": 0.43, "learning_rate": 3.5126089485491627e-06, "logits/chosen": -2.915461540222168, "logits/rejected": -2.760791540145874, "logps/chosen": -126.8998031616211, "logps/rejected": -1186.1251220703125, "loss": 0.014, "rewards/accuracies": 1.0, "rewards/chosen": -0.6100717782974243, "rewards/margins": 10.823419570922852, "rewards/rejected": -11.433491706848145, "step": 7220 }, { "epoch": 0.43, "learning_rate": 3.5078490065459083e-06, "logits/chosen": -2.8814327716827393, "logits/rejected": -2.754974365234375, "logps/chosen": -118.06925964355469, "logps/rejected": -1088.775390625, "loss": 0.0133, "rewards/accuracies": 1.0, "rewards/chosen": -0.49047431349754333, "rewards/margins": 9.959485054016113, "rewards/rejected": -10.449958801269531, "step": 7230 }, { "epoch": 0.43, "learning_rate": 3.503084697937565e-06, "logits/chosen": -2.9181699752807617, "logits/rejected": -2.7804200649261475, "logps/chosen": -135.76287841796875, "logps/rejected": -1097.1099853515625, "loss": 0.0259, "rewards/accuracies": 1.0, "rewards/chosen": -0.638733446598053, "rewards/margins": 9.89672565460205, "rewards/rejected": -10.5354585647583, "step": 7240 }, { "epoch": 0.43, "learning_rate": 3.4983160433659702e-06, "logits/chosen": -2.9229915142059326, "logits/rejected": -2.7635908126831055, "logps/chosen": -177.2243194580078, "logps/rejected": -1138.9625244140625, "loss": 0.0197, "rewards/accuracies": 1.0, "rewards/chosen": -1.0620439052581787, "rewards/margins": 9.895639419555664, "rewards/rejected": -10.957681655883789, "step": 7250 }, { "epoch": 0.43, "learning_rate": 3.493543063491788e-06, "logits/chosen": -2.9145591259002686, "logits/rejected": -2.793954372406006, "logps/chosen": -214.75399780273438, "logps/rejected": -1211.7982177734375, "loss": 0.0128, "rewards/accuracies": 1.0, "rewards/chosen": -1.3817886114120483, "rewards/margins": 10.29120922088623, "rewards/rejected": -11.67299747467041, "step": 7260 }, { "epoch": 0.43, "learning_rate": 3.4887657789944236e-06, "logits/chosen": -2.890446901321411, "logits/rejected": -2.7624447345733643, "logps/chosen": -140.09176635742188, "logps/rejected": -1124.4049072265625, "loss": 0.0175, "rewards/accuracies": 1.0, "rewards/chosen": -0.6334695219993591, "rewards/margins": 10.17092227935791, "rewards/rejected": -10.804390907287598, "step": 7270 }, { "epoch": 0.43, "learning_rate": 3.4839842105719346e-06, "logits/chosen": -2.8583121299743652, "logits/rejected": -2.751098394393921, "logps/chosen": -113.97159576416016, "logps/rejected": -1133.4715576171875, "loss": 0.0232, "rewards/accuracies": 1.0, "rewards/chosen": -0.485659122467041, "rewards/margins": 10.407791137695312, "rewards/rejected": -10.893449783325195, "step": 7280 }, { "epoch": 0.43, "learning_rate": 3.4791983789409358e-06, "logits/chosen": -2.9343976974487305, "logits/rejected": -2.760997772216797, "logps/chosen": -107.3431396484375, "logps/rejected": -1103.991455078125, "loss": 0.0231, "rewards/accuracies": 1.0, "rewards/chosen": -0.37820738554000854, "rewards/margins": 10.233498573303223, "rewards/rejected": -10.611705780029297, "step": 7290 }, { "epoch": 0.44, "learning_rate": 3.474408304836514e-06, "logits/chosen": -2.9446558952331543, "logits/rejected": -2.805596113204956, "logps/chosen": -117.64411926269531, "logps/rejected": -1213.383544921875, "loss": 0.0142, "rewards/accuracies": 1.0, "rewards/chosen": -0.5462199449539185, "rewards/margins": 11.134984970092773, "rewards/rejected": -11.681203842163086, "step": 7300 }, { "epoch": 0.44, "learning_rate": 3.4696140090121377e-06, "logits/chosen": -2.836948871612549, "logits/rejected": -2.7465267181396484, "logps/chosen": -149.80337524414062, "logps/rejected": -1234.909912109375, "loss": 0.0176, "rewards/accuracies": 1.0, "rewards/chosen": -0.799278199672699, "rewards/margins": 11.106013298034668, "rewards/rejected": -11.905291557312012, "step": 7310 }, { "epoch": 0.44, "learning_rate": 3.4648155122395653e-06, "logits/chosen": -2.9179329872131348, "logits/rejected": -2.801334857940674, "logps/chosen": -131.99435424804688, "logps/rejected": -1187.2066650390625, "loss": 0.0137, "rewards/accuracies": 1.0, "rewards/chosen": -0.6442035436630249, "rewards/margins": 10.789766311645508, "rewards/rejected": -11.433968544006348, "step": 7320 }, { "epoch": 0.44, "learning_rate": 3.460012835308757e-06, "logits/chosen": -2.9187004566192627, "logits/rejected": -2.7918803691864014, "logps/chosen": -121.123046875, "logps/rejected": -1253.275634765625, "loss": 0.011, "rewards/accuracies": 1.0, "rewards/chosen": -0.48308229446411133, "rewards/margins": 11.605780601501465, "rewards/rejected": -12.08886432647705, "step": 7330 }, { "epoch": 0.44, "learning_rate": 3.455205999027783e-06, "logits/chosen": -2.89402437210083, "logits/rejected": -2.7125091552734375, "logps/chosen": -166.7138214111328, "logps/rejected": -1202.82568359375, "loss": 0.0269, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.9210103750228882, "rewards/margins": 10.652724266052246, "rewards/rejected": -11.573735237121582, "step": 7340 }, { "epoch": 0.44, "learning_rate": 3.4503950242227356e-06, "logits/chosen": -2.9318723678588867, "logits/rejected": -2.7714152336120605, "logps/chosen": -128.21913146972656, "logps/rejected": -1293.4091796875, "loss": 0.0161, "rewards/accuracies": 1.0, "rewards/chosen": -0.6019225120544434, "rewards/margins": 11.885282516479492, "rewards/rejected": -12.487207412719727, "step": 7350 }, { "epoch": 0.44, "learning_rate": 3.445579931737637e-06, "logits/chosen": -2.8801767826080322, "logits/rejected": -2.7383370399475098, "logps/chosen": -130.3653106689453, "logps/rejected": -1269.890869140625, "loss": 0.0122, "rewards/accuracies": 1.0, "rewards/chosen": -0.6593203544616699, "rewards/margins": 11.590472221374512, "rewards/rejected": -12.249794006347656, "step": 7360 }, { "epoch": 0.44, "learning_rate": 3.44076074243435e-06, "logits/chosen": -2.911398410797119, "logits/rejected": -2.793224334716797, "logps/chosen": -125.44395446777344, "logps/rejected": -1229.047607421875, "loss": 0.0173, "rewards/accuracies": 1.0, "rewards/chosen": -0.5073223114013672, "rewards/margins": 11.336065292358398, "rewards/rejected": -11.84338665008545, "step": 7370 }, { "epoch": 0.44, "learning_rate": 3.435937477192486e-06, "logits/chosen": -2.8998169898986816, "logits/rejected": -2.7710812091827393, "logps/chosen": -100.43770599365234, "logps/rejected": -1117.4185791015625, "loss": 0.0146, "rewards/accuracies": 1.0, "rewards/chosen": -0.33409082889556885, "rewards/margins": 10.401369094848633, "rewards/rejected": -10.735459327697754, "step": 7380 }, { "epoch": 0.44, "learning_rate": 3.431110156909316e-06, "logits/chosen": -2.914207696914673, "logits/rejected": -2.7645771503448486, "logps/chosen": -142.84832763671875, "logps/rejected": -1104.692138671875, "loss": 0.0163, "rewards/accuracies": 1.0, "rewards/chosen": -0.7382567524909973, "rewards/margins": 9.862664222717285, "rewards/rejected": -10.600919723510742, "step": 7390 }, { "epoch": 0.44, "learning_rate": 3.4262788024996835e-06, "logits/chosen": -2.927865505218506, "logits/rejected": -2.7974324226379395, "logps/chosen": -92.3178482055664, "logps/rejected": -1177.158447265625, "loss": 0.017, "rewards/accuracies": 1.0, "rewards/chosen": -0.20079433917999268, "rewards/margins": 11.131086349487305, "rewards/rejected": -11.331879615783691, "step": 7400 }, { "epoch": 0.44, "learning_rate": 3.421443434895905e-06, "logits/chosen": -2.906113386154175, "logits/rejected": -2.8002967834472656, "logps/chosen": -92.94039916992188, "logps/rejected": -1144.2978515625, "loss": 0.0125, "rewards/accuracies": 1.0, "rewards/chosen": -0.19046145677566528, "rewards/margins": 10.805360794067383, "rewards/rejected": -10.995823860168457, "step": 7410 }, { "epoch": 0.44, "learning_rate": 3.4166040750476868e-06, "logits/chosen": -2.8886752128601074, "logits/rejected": -2.7687957286834717, "logps/chosen": -95.22380065917969, "logps/rejected": -1039.466796875, "loss": 0.0211, "rewards/accuracies": 1.0, "rewards/chosen": -0.2493990659713745, "rewards/margins": 9.703400611877441, "rewards/rejected": -9.952799797058105, "step": 7420 }, { "epoch": 0.44, "learning_rate": 3.4117607439220336e-06, "logits/chosen": -2.9080374240875244, "logits/rejected": -2.810459613800049, "logps/chosen": -81.8446044921875, "logps/rejected": -1124.9327392578125, "loss": 0.0157, "rewards/accuracies": 1.0, "rewards/chosen": -0.12409061193466187, "rewards/margins": 10.675195693969727, "rewards/rejected": -10.799286842346191, "step": 7430 }, { "epoch": 0.44, "learning_rate": 3.406913462503153e-06, "logits/chosen": -2.9194133281707764, "logits/rejected": -2.79185152053833, "logps/chosen": -85.29582214355469, "logps/rejected": -1234.4886474609375, "loss": 0.0144, "rewards/accuracies": 1.0, "rewards/chosen": -0.11951877921819687, "rewards/margins": 11.769564628601074, "rewards/rejected": -11.889083862304688, "step": 7440 }, { "epoch": 0.44, "learning_rate": 3.40206225179237e-06, "logits/chosen": -2.8995308876037598, "logits/rejected": -2.7470388412475586, "logps/chosen": -84.77726745605469, "logps/rejected": -1044.743896484375, "loss": 0.018, "rewards/accuracies": 1.0, "rewards/chosen": -0.17494158446788788, "rewards/margins": 9.827191352844238, "rewards/rejected": -10.002132415771484, "step": 7450 }, { "epoch": 0.44, "learning_rate": 3.397207132808033e-06, "logits/chosen": -2.8914952278137207, "logits/rejected": -2.7760746479034424, "logps/chosen": -117.90235900878906, "logps/rejected": -1162.0018310546875, "loss": 0.0582, "rewards/accuracies": 1.0, "rewards/chosen": -0.46003374457359314, "rewards/margins": 10.728364944458008, "rewards/rejected": -11.188400268554688, "step": 7460 }, { "epoch": 0.45, "learning_rate": 3.3923481265854226e-06, "logits/chosen": -2.924891948699951, "logits/rejected": -2.7873799800872803, "logps/chosen": -138.79653930664062, "logps/rejected": -1196.8026123046875, "loss": 0.0122, "rewards/accuracies": 1.0, "rewards/chosen": -0.6437373161315918, "rewards/margins": 10.888925552368164, "rewards/rejected": -11.532663345336914, "step": 7470 }, { "epoch": 0.45, "learning_rate": 3.387485254176663e-06, "logits/chosen": -2.909339427947998, "logits/rejected": -2.757549524307251, "logps/chosen": -136.43145751953125, "logps/rejected": -1189.8463134765625, "loss": 0.0119, "rewards/accuracies": 1.0, "rewards/chosen": -0.6500800251960754, "rewards/margins": 10.8010835647583, "rewards/rejected": -11.451164245605469, "step": 7480 }, { "epoch": 0.45, "learning_rate": 3.382618536650626e-06, "logits/chosen": -2.9054527282714844, "logits/rejected": -2.8070077896118164, "logps/chosen": -149.27891540527344, "logps/rejected": -1211.5892333984375, "loss": 0.0219, "rewards/accuracies": 1.0, "rewards/chosen": -0.7684259414672852, "rewards/margins": 10.909029006958008, "rewards/rejected": -11.677453994750977, "step": 7490 }, { "epoch": 0.45, "learning_rate": 3.377747995092846e-06, "logits/chosen": -2.908186435699463, "logits/rejected": -2.747255802154541, "logps/chosen": -91.23493957519531, "logps/rejected": -1106.392333984375, "loss": 0.0092, "rewards/accuracies": 1.0, "rewards/chosen": -0.24066027998924255, "rewards/margins": 10.382299423217773, "rewards/rejected": -10.622961044311523, "step": 7500 }, { "epoch": 0.45, "learning_rate": 3.3728736506054234e-06, "logits/chosen": -2.862262010574341, "logits/rejected": -2.7661473751068115, "logps/chosen": -92.10249328613281, "logps/rejected": -1219.478271484375, "loss": 0.0168, "rewards/accuracies": 1.0, "rewards/chosen": -0.196889728307724, "rewards/margins": 11.551292419433594, "rewards/rejected": -11.74818229675293, "step": 7510 }, { "epoch": 0.45, "learning_rate": 3.3679955243069364e-06, "logits/chosen": -2.9217212200164795, "logits/rejected": -2.797945022583008, "logps/chosen": -90.7013931274414, "logps/rejected": -1055.8570556640625, "loss": 0.0215, "rewards/accuracies": 1.0, "rewards/chosen": -0.25004202127456665, "rewards/margins": 9.877058029174805, "rewards/rejected": -10.127099990844727, "step": 7520 }, { "epoch": 0.45, "learning_rate": 3.3631136373323468e-06, "logits/chosen": -2.908334732055664, "logits/rejected": -2.785301685333252, "logps/chosen": -108.50828552246094, "logps/rejected": -1166.8721923828125, "loss": 0.0138, "rewards/accuracies": 1.0, "rewards/chosen": -0.35020893812179565, "rewards/margins": 10.8536958694458, "rewards/rejected": -11.20390510559082, "step": 7530 }, { "epoch": 0.45, "learning_rate": 3.3582280108329125e-06, "logits/chosen": -2.9232125282287598, "logits/rejected": -2.776710033416748, "logps/chosen": -103.93333435058594, "logps/rejected": -1093.431396484375, "loss": 0.0168, "rewards/accuracies": 1.0, "rewards/chosen": -0.315208375453949, "rewards/margins": 10.189821243286133, "rewards/rejected": -10.505029678344727, "step": 7540 }, { "epoch": 0.45, "learning_rate": 3.353338665976089e-06, "logits/chosen": -2.903386354446411, "logits/rejected": -2.7691469192504883, "logps/chosen": -126.32470703125, "logps/rejected": -1136.219970703125, "loss": 0.0338, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.5213348865509033, "rewards/margins": 10.39504337310791, "rewards/rejected": -10.916378021240234, "step": 7550 }, { "epoch": 0.45, "learning_rate": 3.3484456239454467e-06, "logits/chosen": -2.8897786140441895, "logits/rejected": -2.7951836585998535, "logps/chosen": -190.11044311523438, "logps/rejected": -1097.8270263671875, "loss": 0.0257, "rewards/accuracies": 1.0, "rewards/chosen": -1.2235487699508667, "rewards/margins": 9.306633949279785, "rewards/rejected": -10.530183792114258, "step": 7560 }, { "epoch": 0.45, "learning_rate": 3.3435489059405713e-06, "logits/chosen": -2.9078426361083984, "logits/rejected": -2.799065113067627, "logps/chosen": -112.38739013671875, "logps/rejected": -1027.890869140625, "loss": 0.0161, "rewards/accuracies": 1.0, "rewards/chosen": -0.4134586453437805, "rewards/margins": 9.435567855834961, "rewards/rejected": -9.849026679992676, "step": 7570 }, { "epoch": 0.45, "learning_rate": 3.3386485331769747e-06, "logits/chosen": -2.8979456424713135, "logits/rejected": -2.775184154510498, "logps/chosen": -78.13394927978516, "logps/rejected": -1113.0523681640625, "loss": 0.0138, "rewards/accuracies": 1.0, "rewards/chosen": -0.061250101774930954, "rewards/margins": 10.617616653442383, "rewards/rejected": -10.678865432739258, "step": 7580 }, { "epoch": 0.45, "learning_rate": 3.3337445268860065e-06, "logits/chosen": -2.9517388343811035, "logits/rejected": -2.8235983848571777, "logps/chosen": -77.43302154541016, "logps/rejected": -1103.814208984375, "loss": 0.0946, "rewards/accuracies": 1.0, "rewards/chosen": -0.04222659394145012, "rewards/margins": 10.552355766296387, "rewards/rejected": -10.594582557678223, "step": 7590 }, { "epoch": 0.45, "learning_rate": 3.328836908314755e-06, "logits/chosen": -2.926504373550415, "logits/rejected": -2.787956714630127, "logps/chosen": -64.11087799072266, "logps/rejected": -1041.532470703125, "loss": 0.0595, "rewards/accuracies": 1.0, "rewards/chosen": 0.04303840547800064, "rewards/margins": 10.002165794372559, "rewards/rejected": -9.959126472473145, "step": 7600 }, { "epoch": 0.45, "learning_rate": 3.3239256987259635e-06, "logits/chosen": -2.896299362182617, "logits/rejected": -2.827054500579834, "logps/chosen": -77.9505615234375, "logps/rejected": -966.1187744140625, "loss": 0.018, "rewards/accuracies": 1.0, "rewards/chosen": -0.08739982545375824, "rewards/margins": 9.138105392456055, "rewards/rejected": -9.225504875183105, "step": 7610 }, { "epoch": 0.45, "learning_rate": 3.319010919397929e-06, "logits/chosen": -2.921833038330078, "logits/rejected": -2.8029959201812744, "logps/chosen": -132.29855346679688, "logps/rejected": -1028.130615234375, "loss": 0.0262, "rewards/accuracies": 1.0, "rewards/chosen": -0.5989159345626831, "rewards/margins": 9.243756294250488, "rewards/rejected": -9.842672348022461, "step": 7620 }, { "epoch": 0.45, "learning_rate": 3.3140925916244184e-06, "logits/chosen": -2.93902587890625, "logits/rejected": -2.7790448665618896, "logps/chosen": -107.2216567993164, "logps/rejected": -1134.238037109375, "loss": 0.0216, "rewards/accuracies": 1.0, "rewards/chosen": -0.3338576555252075, "rewards/margins": 10.558181762695312, "rewards/rejected": -10.892040252685547, "step": 7630 }, { "epoch": 0.46, "learning_rate": 3.3091707367145707e-06, "logits/chosen": -2.867914915084839, "logits/rejected": -2.764857530593872, "logps/chosen": -105.13523864746094, "logps/rejected": -1192.6383056640625, "loss": 0.0171, "rewards/accuracies": 1.0, "rewards/chosen": -0.3823467791080475, "rewards/margins": 11.101738929748535, "rewards/rejected": -11.484085083007812, "step": 7640 }, { "epoch": 0.46, "learning_rate": 3.304245375992807e-06, "logits/chosen": -2.9051058292388916, "logits/rejected": -2.7789852619171143, "logps/chosen": -179.8166046142578, "logps/rejected": -1130.5843505859375, "loss": 0.0276, "rewards/accuracies": 1.0, "rewards/chosen": -1.168277621269226, "rewards/margins": 9.694743156433105, "rewards/rejected": -10.863021850585938, "step": 7650 }, { "epoch": 0.46, "learning_rate": 3.299316530798738e-06, "logits/chosen": -2.89345121383667, "logits/rejected": -2.7720870971679688, "logps/chosen": -194.74624633789062, "logps/rejected": -1303.444580078125, "loss": 0.0134, "rewards/accuracies": 1.0, "rewards/chosen": -1.2065147161483765, "rewards/margins": 11.379701614379883, "rewards/rejected": -12.586216926574707, "step": 7660 }, { "epoch": 0.46, "learning_rate": 3.2943842224870705e-06, "logits/chosen": -2.884500026702881, "logits/rejected": -2.7564034461975098, "logps/chosen": -132.61166381835938, "logps/rejected": -1193.4124755859375, "loss": 0.0197, "rewards/accuracies": 1.0, "rewards/chosen": -0.5704704523086548, "rewards/margins": 10.92011833190918, "rewards/rejected": -11.490588188171387, "step": 7670 }, { "epoch": 0.46, "learning_rate": 3.2894484724275156e-06, "logits/chosen": -2.914498805999756, "logits/rejected": -2.7918460369110107, "logps/chosen": -97.73077392578125, "logps/rejected": -1090.3912353515625, "loss": 0.048, "rewards/accuracies": 1.0, "rewards/chosen": -0.29716259241104126, "rewards/margins": 10.16445541381836, "rewards/rejected": -10.461617469787598, "step": 7680 }, { "epoch": 0.46, "learning_rate": 3.284509302004699e-06, "logits/chosen": -2.8862226009368896, "logits/rejected": -2.7714197635650635, "logps/chosen": -108.28077697753906, "logps/rejected": -1223.725341796875, "loss": 0.0153, "rewards/accuracies": 1.0, "rewards/chosen": -0.3438900113105774, "rewards/margins": 11.444146156311035, "rewards/rejected": -11.788037300109863, "step": 7690 }, { "epoch": 0.46, "learning_rate": 3.2795667326180604e-06, "logits/chosen": -2.8831708431243896, "logits/rejected": -2.812370777130127, "logps/chosen": -107.17352294921875, "logps/rejected": -1172.658447265625, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/chosen": -0.38246119022369385, "rewards/margins": 10.899229049682617, "rewards/rejected": -11.28169059753418, "step": 7700 }, { "epoch": 0.46, "learning_rate": 3.2746207856817695e-06, "logits/chosen": -2.8924126625061035, "logits/rejected": -2.80979585647583, "logps/chosen": -142.9319305419922, "logps/rejected": -1171.10693359375, "loss": 0.0139, "rewards/accuracies": 1.0, "rewards/chosen": -0.7194041609764099, "rewards/margins": 10.549975395202637, "rewards/rejected": -11.269380569458008, "step": 7710 }, { "epoch": 0.46, "learning_rate": 3.2696714826246295e-06, "logits/chosen": -2.893454074859619, "logits/rejected": -2.7888636589050293, "logps/chosen": -119.1712417602539, "logps/rejected": -1150.808837890625, "loss": 0.0177, "rewards/accuracies": 1.0, "rewards/chosen": -0.468590646982193, "rewards/margins": 10.587118148803711, "rewards/rejected": -11.055707931518555, "step": 7720 }, { "epoch": 0.46, "learning_rate": 3.2647188448899813e-06, "logits/chosen": -2.942380428314209, "logits/rejected": -2.8053178787231445, "logps/chosen": -104.38326263427734, "logps/rejected": -1062.9024658203125, "loss": 0.0111, "rewards/accuracies": 1.0, "rewards/chosen": -0.2865389883518219, "rewards/margins": 9.900952339172363, "rewards/rejected": -10.187492370605469, "step": 7730 }, { "epoch": 0.46, "learning_rate": 3.2597628939356174e-06, "logits/chosen": -2.8925106525421143, "logits/rejected": -2.796748161315918, "logps/chosen": -102.07750701904297, "logps/rejected": -1070.4822998046875, "loss": 0.013, "rewards/accuracies": 1.0, "rewards/chosen": -0.34492790699005127, "rewards/margins": 9.899534225463867, "rewards/rejected": -10.244461059570312, "step": 7740 }, { "epoch": 0.46, "learning_rate": 3.254803651233683e-06, "logits/chosen": -2.8944544792175293, "logits/rejected": -2.7875208854675293, "logps/chosen": -130.02359008789062, "logps/rejected": -1073.03173828125, "loss": 0.0223, "rewards/accuracies": 1.0, "rewards/chosen": -0.5804868340492249, "rewards/margins": 9.703164100646973, "rewards/rejected": -10.283651351928711, "step": 7750 }, { "epoch": 0.46, "learning_rate": 3.249841138270585e-06, "logits/chosen": -2.924694538116455, "logits/rejected": -2.7893524169921875, "logps/chosen": -89.68095397949219, "logps/rejected": -1207.548828125, "loss": 0.0197, "rewards/accuracies": 1.0, "rewards/chosen": -0.21344156563282013, "rewards/margins": 11.428121566772461, "rewards/rejected": -11.641563415527344, "step": 7760 }, { "epoch": 0.46, "learning_rate": 3.2448753765469e-06, "logits/chosen": -2.8864364624023438, "logits/rejected": -2.811795234680176, "logps/chosen": -89.72996520996094, "logps/rejected": -1198.156494140625, "loss": 0.0156, "rewards/accuracies": 1.0, "rewards/chosen": -0.20445597171783447, "rewards/margins": 11.331620216369629, "rewards/rejected": -11.536075592041016, "step": 7770 }, { "epoch": 0.46, "learning_rate": 3.23990638757728e-06, "logits/chosen": -2.9429805278778076, "logits/rejected": -2.795192003250122, "logps/chosen": -131.03366088867188, "logps/rejected": -1132.643310546875, "loss": 0.0254, "rewards/accuracies": 1.0, "rewards/chosen": -0.584152340888977, "rewards/margins": 10.295206069946289, "rewards/rejected": -10.879358291625977, "step": 7780 }, { "epoch": 0.46, "learning_rate": 3.2349341928903588e-06, "logits/chosen": -2.907038927078247, "logits/rejected": -2.77135968208313, "logps/chosen": -92.76820373535156, "logps/rejected": -1118.838134765625, "loss": 0.0155, "rewards/accuracies": 1.0, "rewards/chosen": -0.24288122355937958, "rewards/margins": 10.516764640808105, "rewards/rejected": -10.75964641571045, "step": 7790 }, { "epoch": 0.47, "learning_rate": 3.2299588140286597e-06, "logits/chosen": -2.8822779655456543, "logits/rejected": -2.7765870094299316, "logps/chosen": -121.6458969116211, "logps/rejected": -1159.571533203125, "loss": 0.0349, "rewards/accuracies": 1.0, "rewards/chosen": -0.518525242805481, "rewards/margins": 10.626665115356445, "rewards/rejected": -11.145190238952637, "step": 7800 }, { "epoch": 0.47, "learning_rate": 3.2249802725485026e-06, "logits/chosen": -2.8916025161743164, "logits/rejected": -2.7779576778411865, "logps/chosen": -95.29065704345703, "logps/rejected": -948.3624267578125, "loss": 0.0278, "rewards/accuracies": 1.0, "rewards/chosen": -0.25884923338890076, "rewards/margins": 8.782289505004883, "rewards/rejected": -9.0411376953125, "step": 7810 }, { "epoch": 0.47, "learning_rate": 3.2199985900199064e-06, "logits/chosen": -2.906839609146118, "logits/rejected": -2.773341417312622, "logps/chosen": -89.33119201660156, "logps/rejected": -1099.8743896484375, "loss": 0.0204, "rewards/accuracies": 1.0, "rewards/chosen": -0.19501051306724548, "rewards/margins": 10.363716125488281, "rewards/rejected": -10.558727264404297, "step": 7820 }, { "epoch": 0.47, "learning_rate": 3.215013788026504e-06, "logits/chosen": -2.9322872161865234, "logits/rejected": -2.7878661155700684, "logps/chosen": -117.50235748291016, "logps/rejected": -1091.5369873046875, "loss": 0.0222, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.4392385482788086, "rewards/margins": 10.031774520874023, "rewards/rejected": -10.471014022827148, "step": 7830 }, { "epoch": 0.47, "learning_rate": 3.2100258881654387e-06, "logits/chosen": -2.8693737983703613, "logits/rejected": -2.759340524673462, "logps/chosen": -98.27899169921875, "logps/rejected": -973.5848388671875, "loss": 0.0167, "rewards/accuracies": 1.0, "rewards/chosen": -0.24930746853351593, "rewards/margins": 9.050195693969727, "rewards/rejected": -9.299501419067383, "step": 7840 }, { "epoch": 0.47, "learning_rate": 3.20503491204728e-06, "logits/chosen": -2.8951995372772217, "logits/rejected": -2.7511303424835205, "logps/chosen": -93.40910339355469, "logps/rejected": -1145.65576171875, "loss": 0.0163, "rewards/accuracies": 1.0, "rewards/chosen": -0.17460860311985016, "rewards/margins": 10.842793464660645, "rewards/rejected": -11.017400741577148, "step": 7850 }, { "epoch": 0.47, "learning_rate": 3.200040881295922e-06, "logits/chosen": -2.8989176750183105, "logits/rejected": -2.766719102859497, "logps/chosen": -92.99183654785156, "logps/rejected": -1149.91748046875, "loss": 0.0127, "rewards/accuracies": 1.0, "rewards/chosen": -0.2718045711517334, "rewards/margins": 10.780914306640625, "rewards/rejected": -11.052719116210938, "step": 7860 }, { "epoch": 0.47, "learning_rate": 3.1950438175484965e-06, "logits/chosen": -2.9045536518096924, "logits/rejected": -2.7655181884765625, "logps/chosen": -90.64842224121094, "logps/rejected": -1172.5733642578125, "loss": 0.0202, "rewards/accuracies": 1.0, "rewards/chosen": -0.18401357531547546, "rewards/margins": 11.10698127746582, "rewards/rejected": -11.290994644165039, "step": 7870 }, { "epoch": 0.47, "learning_rate": 3.1900437424552726e-06, "logits/chosen": -2.9216113090515137, "logits/rejected": -2.7956647872924805, "logps/chosen": -72.9948501586914, "logps/rejected": -1124.760498046875, "loss": 0.0116, "rewards/accuracies": 1.0, "rewards/chosen": -0.0612676739692688, "rewards/margins": 10.752792358398438, "rewards/rejected": -10.81406021118164, "step": 7880 }, { "epoch": 0.47, "learning_rate": 3.1850406776795682e-06, "logits/chosen": -2.9127840995788574, "logits/rejected": -2.771578311920166, "logps/chosen": -76.86058807373047, "logps/rejected": -1020.6849365234375, "loss": 0.0253, "rewards/accuracies": 1.0, "rewards/chosen": -0.08427444845438004, "rewards/margins": 9.686493873596191, "rewards/rejected": -9.770768165588379, "step": 7890 }, { "epoch": 0.47, "learning_rate": 3.1800346448976567e-06, "logits/chosen": -2.905949592590332, "logits/rejected": -2.7715139389038086, "logps/chosen": -107.0735092163086, "logps/rejected": -1213.9295654296875, "loss": 0.0192, "rewards/accuracies": 1.0, "rewards/chosen": -0.3573954999446869, "rewards/margins": 11.336355209350586, "rewards/rejected": -11.693750381469727, "step": 7900 }, { "epoch": 0.47, "learning_rate": 3.1750256657986643e-06, "logits/chosen": -2.9175524711608887, "logits/rejected": -2.772958278656006, "logps/chosen": -228.7196807861328, "logps/rejected": -1282.1923828125, "loss": 0.0152, "rewards/accuracies": 1.0, "rewards/chosen": -1.5395708084106445, "rewards/margins": 10.842707633972168, "rewards/rejected": -12.382279396057129, "step": 7910 }, { "epoch": 0.47, "learning_rate": 3.1700137620844897e-06, "logits/chosen": -2.899862289428711, "logits/rejected": -2.769911527633667, "logps/chosen": -236.7340087890625, "logps/rejected": -1178.8040771484375, "loss": 0.0235, "rewards/accuracies": 1.0, "rewards/chosen": -1.7074544429779053, "rewards/margins": 9.635464668273926, "rewards/rejected": -11.342917442321777, "step": 7920 }, { "epoch": 0.47, "learning_rate": 3.164998955469697e-06, "logits/chosen": -2.9402148723602295, "logits/rejected": -2.782379627227783, "logps/chosen": -160.73495483398438, "logps/rejected": -1204.7757568359375, "loss": 0.0166, "rewards/accuracies": 1.0, "rewards/chosen": -0.9023953676223755, "rewards/margins": 10.697343826293945, "rewards/rejected": -11.599738121032715, "step": 7930 }, { "epoch": 0.47, "learning_rate": 3.1599812676814314e-06, "logits/chosen": -2.9018945693969727, "logits/rejected": -2.7919204235076904, "logps/chosen": -119.3823013305664, "logps/rejected": -1185.866943359375, "loss": 0.0135, "rewards/accuracies": 1.0, "rewards/chosen": -0.5544542670249939, "rewards/margins": 10.875728607177734, "rewards/rejected": -11.430182456970215, "step": 7940 }, { "epoch": 0.47, "learning_rate": 3.1549607204593185e-06, "logits/chosen": -2.894233226776123, "logits/rejected": -2.7141265869140625, "logps/chosen": -135.49066162109375, "logps/rejected": -1150.4248046875, "loss": 0.048, "rewards/accuracies": 1.0, "rewards/chosen": -0.696251392364502, "rewards/margins": 10.371603012084961, "rewards/rejected": -11.067853927612305, "step": 7950 }, { "epoch": 0.47, "learning_rate": 3.1499373355553746e-06, "logits/chosen": -2.8979289531707764, "logits/rejected": -2.793078899383545, "logps/chosen": -136.1551513671875, "logps/rejected": -1105.3221435546875, "loss": 0.0126, "rewards/accuracies": 1.0, "rewards/chosen": -0.6886736154556274, "rewards/margins": 9.918999671936035, "rewards/rejected": -10.607671737670898, "step": 7960 }, { "epoch": 0.48, "learning_rate": 3.1449111347339084e-06, "logits/chosen": -2.911642074584961, "logits/rejected": -2.781906843185425, "logps/chosen": -103.6140365600586, "logps/rejected": -1114.9400634765625, "loss": 0.0242, "rewards/accuracies": 1.0, "rewards/chosen": -0.3321247696876526, "rewards/margins": 10.374422073364258, "rewards/rejected": -10.70654582977295, "step": 7970 }, { "epoch": 0.48, "learning_rate": 3.139882139771431e-06, "logits/chosen": -2.9380996227264404, "logits/rejected": -2.7687249183654785, "logps/chosen": -132.02896118164062, "logps/rejected": -1165.959228515625, "loss": 0.0167, "rewards/accuracies": 1.0, "rewards/chosen": -0.5754782557487488, "rewards/margins": 10.640026092529297, "rewards/rejected": -11.215505599975586, "step": 7980 }, { "epoch": 0.48, "learning_rate": 3.134850372456558e-06, "logits/chosen": -2.89884877204895, "logits/rejected": -2.799994945526123, "logps/chosen": -117.5091323852539, "logps/rejected": -1086.375244140625, "loss": 0.023, "rewards/accuracies": 1.0, "rewards/chosen": -0.4332999289035797, "rewards/margins": 9.98084831237793, "rewards/rejected": -10.414148330688477, "step": 7990 }, { "epoch": 0.48, "learning_rate": 3.1298158545899167e-06, "logits/chosen": -2.9007787704467773, "logits/rejected": -2.8000261783599854, "logps/chosen": -132.62811279296875, "logps/rejected": -1202.259033203125, "loss": 0.0117, "rewards/accuracies": 1.0, "rewards/chosen": -0.6295466423034668, "rewards/margins": 10.952387809753418, "rewards/rejected": -11.581934928894043, "step": 8000 }, { "epoch": 0.48, "learning_rate": 3.1247786079840513e-06, "logits/chosen": -2.9133951663970947, "logits/rejected": -2.7975242137908936, "logps/chosen": -154.61904907226562, "logps/rejected": -1088.6337890625, "loss": 0.1259, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.8594759702682495, "rewards/margins": 9.582804679870605, "rewards/rejected": -10.442280769348145, "step": 8010 }, { "epoch": 0.48, "learning_rate": 3.11973865446333e-06, "logits/chosen": -2.917457342147827, "logits/rejected": -2.787106513977051, "logps/chosen": -139.6363983154297, "logps/rejected": -1186.9237060546875, "loss": 0.0126, "rewards/accuracies": 1.0, "rewards/chosen": -0.7446572780609131, "rewards/margins": 10.687456130981445, "rewards/rejected": -11.432111740112305, "step": 8020 }, { "epoch": 0.48, "learning_rate": 3.1146960158638475e-06, "logits/chosen": -2.876384735107422, "logits/rejected": -2.716489315032959, "logps/chosen": -111.98118591308594, "logps/rejected": -1249.654052734375, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/chosen": -0.47215795516967773, "rewards/margins": 11.570295333862305, "rewards/rejected": -12.04245376586914, "step": 8030 }, { "epoch": 0.48, "learning_rate": 3.109650714033331e-06, "logits/chosen": -2.9529457092285156, "logits/rejected": -2.8049941062927246, "logps/chosen": -103.19281005859375, "logps/rejected": -1153.6995849609375, "loss": 0.0162, "rewards/accuracies": 1.0, "rewards/chosen": -0.3436373174190521, "rewards/margins": 10.744990348815918, "rewards/rejected": -11.088627815246582, "step": 8040 }, { "epoch": 0.48, "learning_rate": 3.10460277083105e-06, "logits/chosen": -2.9112133979797363, "logits/rejected": -2.773751974105835, "logps/chosen": -108.43266296386719, "logps/rejected": -1187.142333984375, "loss": 0.0187, "rewards/accuracies": 1.0, "rewards/chosen": -0.4154531955718994, "rewards/margins": 11.014824867248535, "rewards/rejected": -11.430275917053223, "step": 8050 }, { "epoch": 0.48, "learning_rate": 3.099552208127713e-06, "logits/chosen": -2.921924114227295, "logits/rejected": -2.791663646697998, "logps/chosen": -103.28163146972656, "logps/rejected": -1102.4859619140625, "loss": 0.012, "rewards/accuracies": 1.0, "rewards/chosen": -0.34807831048965454, "rewards/margins": 10.234344482421875, "rewards/rejected": -10.582422256469727, "step": 8060 }, { "epoch": 0.48, "learning_rate": 3.0944990478053816e-06, "logits/chosen": -2.8961918354034424, "logits/rejected": -2.774545192718506, "logps/chosen": -134.37338256835938, "logps/rejected": -1182.175048828125, "loss": 0.0196, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.6099185347557068, "rewards/margins": 10.75146198272705, "rewards/rejected": -11.361379623413086, "step": 8070 }, { "epoch": 0.48, "learning_rate": 3.089443311757371e-06, "logits/chosen": -2.916269540786743, "logits/rejected": -2.7896814346313477, "logps/chosen": -100.31048583984375, "logps/rejected": -1159.375244140625, "loss": 0.0118, "rewards/accuracies": 1.0, "rewards/chosen": -0.3261149823665619, "rewards/margins": 10.81511116027832, "rewards/rejected": -11.141225814819336, "step": 8080 }, { "epoch": 0.48, "learning_rate": 3.0843850218881545e-06, "logits/chosen": -2.9256129264831543, "logits/rejected": -2.7803902626037598, "logps/chosen": -99.23483276367188, "logps/rejected": -1147.093505859375, "loss": 0.0174, "rewards/accuracies": 1.0, "rewards/chosen": -0.243240088224411, "rewards/margins": 10.786042213439941, "rewards/rejected": -11.029282569885254, "step": 8090 }, { "epoch": 0.48, "learning_rate": 3.0793242001132725e-06, "logits/chosen": -2.9398560523986816, "logits/rejected": -2.758514881134033, "logps/chosen": -126.73808288574219, "logps/rejected": -1083.206787109375, "loss": 0.0198, "rewards/accuracies": 1.0, "rewards/chosen": -0.5989679098129272, "rewards/margins": 9.79456615447998, "rewards/rejected": -10.393533706665039, "step": 8100 }, { "epoch": 0.48, "learning_rate": 3.074260868359233e-06, "logits/chosen": -2.9151105880737305, "logits/rejected": -2.7593088150024414, "logps/chosen": -84.69123840332031, "logps/rejected": -1154.373291015625, "loss": 0.0121, "rewards/accuracies": 1.0, "rewards/chosen": -0.15928702056407928, "rewards/margins": 10.934359550476074, "rewards/rejected": -11.093647956848145, "step": 8110 }, { "epoch": 0.48, "learning_rate": 3.0691950485634192e-06, "logits/chosen": -2.9352869987487793, "logits/rejected": -2.7683749198913574, "logps/chosen": -101.80007934570312, "logps/rejected": -1129.234130859375, "loss": 0.019, "rewards/accuracies": 1.0, "rewards/chosen": -0.35052749514579773, "rewards/margins": 10.501663208007812, "rewards/rejected": -10.852190017700195, "step": 8120 }, { "epoch": 0.48, "learning_rate": 3.0641267626739946e-06, "logits/chosen": -2.946281671524048, "logits/rejected": -2.8257334232330322, "logps/chosen": -93.11467742919922, "logps/rejected": -1135.157470703125, "loss": 0.0287, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.26752060651779175, "rewards/margins": 10.6437406539917, "rewards/rejected": -10.911263465881348, "step": 8130 }, { "epoch": 0.49, "learning_rate": 3.059056032649808e-06, "logits/chosen": -2.876255512237549, "logits/rejected": -2.770972728729248, "logps/chosen": -73.00826263427734, "logps/rejected": -1075.5638427734375, "loss": 0.0165, "rewards/accuracies": 1.0, "rewards/chosen": -0.11690004169940948, "rewards/margins": 10.196069717407227, "rewards/rejected": -10.312968254089355, "step": 8140 }, { "epoch": 0.49, "learning_rate": 3.0539828804602955e-06, "logits/chosen": -2.9464824199676514, "logits/rejected": -2.790735960006714, "logps/chosen": -74.12883758544922, "logps/rejected": -1151.718505859375, "loss": 0.0235, "rewards/accuracies": 1.0, "rewards/chosen": 0.01673782430589199, "rewards/margins": 11.099535942077637, "rewards/rejected": -11.082798957824707, "step": 8150 }, { "epoch": 0.49, "learning_rate": 3.0489073280853886e-06, "logits/chosen": -2.925861358642578, "logits/rejected": -2.8040521144866943, "logps/chosen": -71.85643005371094, "logps/rejected": -1043.108154296875, "loss": 0.0207, "rewards/accuracies": 1.0, "rewards/chosen": 0.0238981693983078, "rewards/margins": 10.012449264526367, "rewards/rejected": -9.988550186157227, "step": 8160 }, { "epoch": 0.49, "learning_rate": 3.043829397515419e-06, "logits/chosen": -2.894137144088745, "logits/rejected": -2.7600536346435547, "logps/chosen": -73.14070129394531, "logps/rejected": -1124.641357421875, "loss": 0.0208, "rewards/accuracies": 1.0, "rewards/chosen": 0.013810524716973305, "rewards/margins": 10.816551208496094, "rewards/rejected": -10.802742004394531, "step": 8170 }, { "epoch": 0.49, "learning_rate": 3.03874911075102e-06, "logits/chosen": -2.919699192047119, "logits/rejected": -2.7573540210723877, "logps/chosen": -72.86320495605469, "logps/rejected": -1086.99951171875, "loss": 0.0157, "rewards/accuracies": 1.0, "rewards/chosen": -0.016040001064538956, "rewards/margins": 10.398584365844727, "rewards/rejected": -10.414624214172363, "step": 8180 }, { "epoch": 0.49, "learning_rate": 3.0336664898030344e-06, "logits/chosen": -2.922598123550415, "logits/rejected": -2.8029847145080566, "logps/chosen": -79.62333679199219, "logps/rejected": -1090.1444091796875, "loss": 0.0152, "rewards/accuracies": 1.0, "rewards/chosen": -0.06879739463329315, "rewards/margins": 10.383159637451172, "rewards/rejected": -10.451955795288086, "step": 8190 }, { "epoch": 0.49, "learning_rate": 3.0285815566924186e-06, "logits/chosen": -2.932765007019043, "logits/rejected": -2.790095806121826, "logps/chosen": -84.5161361694336, "logps/rejected": -1213.811767578125, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": -0.15413479506969452, "rewards/margins": 11.52294635772705, "rewards/rejected": -11.677081108093262, "step": 8200 }, { "epoch": 0.49, "learning_rate": 3.023494333450146e-06, "logits/chosen": -2.878129482269287, "logits/rejected": -2.746980667114258, "logps/chosen": -84.72652435302734, "logps/rejected": -1167.591552734375, "loss": 0.0144, "rewards/accuracies": 1.0, "rewards/chosen": -0.17867842316627502, "rewards/margins": 11.049379348754883, "rewards/rejected": -11.228056907653809, "step": 8210 }, { "epoch": 0.49, "learning_rate": 3.018404842117112e-06, "logits/chosen": -2.9170081615448, "logits/rejected": -2.777595281600952, "logps/chosen": -87.08171081542969, "logps/rejected": -967.5809326171875, "loss": 0.0247, "rewards/accuracies": 1.0, "rewards/chosen": -0.15085728466510773, "rewards/margins": 9.075668334960938, "rewards/rejected": -9.226526260375977, "step": 8220 }, { "epoch": 0.49, "learning_rate": 3.01331310474404e-06, "logits/chosen": -2.9363760948181152, "logits/rejected": -2.793762683868408, "logps/chosen": -89.48347473144531, "logps/rejected": -1008.2911376953125, "loss": 0.0211, "rewards/accuracies": 1.0, "rewards/chosen": -0.19423620402812958, "rewards/margins": 9.455721855163574, "rewards/rejected": -9.649958610534668, "step": 8230 }, { "epoch": 0.49, "learning_rate": 3.0082191433913825e-06, "logits/chosen": -2.954378366470337, "logits/rejected": -2.7722232341766357, "logps/chosen": -78.72822570800781, "logps/rejected": -1160.857666015625, "loss": 0.0152, "rewards/accuracies": 1.0, "rewards/chosen": -0.10182075202465057, "rewards/margins": 11.069437980651855, "rewards/rejected": -11.171258926391602, "step": 8240 }, { "epoch": 0.49, "learning_rate": 3.0031229801292293e-06, "logits/chosen": -2.9497978687286377, "logits/rejected": -2.7920262813568115, "logps/chosen": -79.68630981445312, "logps/rejected": -1183.924072265625, "loss": 0.0159, "rewards/accuracies": 1.0, "rewards/chosen": -0.12509101629257202, "rewards/margins": 11.281144142150879, "rewards/rejected": -11.406235694885254, "step": 8250 }, { "epoch": 0.49, "learning_rate": 2.99802463703721e-06, "logits/chosen": -2.9280426502227783, "logits/rejected": -2.80010986328125, "logps/chosen": -77.04187774658203, "logps/rejected": -1131.11962890625, "loss": 0.0194, "rewards/accuracies": 1.0, "rewards/chosen": -0.041668131947517395, "rewards/margins": 10.820841789245605, "rewards/rejected": -10.86251163482666, "step": 8260 }, { "epoch": 0.49, "learning_rate": 2.9929241362043976e-06, "logits/chosen": -2.9086289405822754, "logits/rejected": -2.7568442821502686, "logps/chosen": -81.07769775390625, "logps/rejected": -1087.364013671875, "loss": 0.0146, "rewards/accuracies": 1.0, "rewards/chosen": -0.16367505490779877, "rewards/margins": 10.270933151245117, "rewards/rejected": -10.434608459472656, "step": 8270 }, { "epoch": 0.49, "learning_rate": 2.9878214997292155e-06, "logits/chosen": -2.9421534538269043, "logits/rejected": -2.7460548877716064, "logps/chosen": -95.58737182617188, "logps/rejected": -1106.6307373046875, "loss": 0.0171, "rewards/accuracies": 1.0, "rewards/chosen": -0.2242475003004074, "rewards/margins": 10.409322738647461, "rewards/rejected": -10.633569717407227, "step": 8280 }, { "epoch": 0.49, "learning_rate": 2.9827167497193367e-06, "logits/chosen": -2.9335103034973145, "logits/rejected": -2.7946219444274902, "logps/chosen": -140.57815551757812, "logps/rejected": -1278.376220703125, "loss": 0.0192, "rewards/accuracies": 1.0, "rewards/chosen": -0.6617147922515869, "rewards/margins": 11.685511589050293, "rewards/rejected": -12.347227096557617, "step": 8290 }, { "epoch": 0.49, "learning_rate": 2.9776099082915954e-06, "logits/chosen": -2.911815643310547, "logits/rejected": -2.742642402648926, "logps/chosen": -163.64859008789062, "logps/rejected": -1300.9970703125, "loss": 0.0091, "rewards/accuracies": 1.0, "rewards/chosen": -0.9327988624572754, "rewards/margins": 11.63986873626709, "rewards/rejected": -12.572668075561523, "step": 8300 }, { "epoch": 0.5, "learning_rate": 2.9725009975718845e-06, "logits/chosen": -2.8979454040527344, "logits/rejected": -2.7175180912017822, "logps/chosen": -154.80947875976562, "logps/rejected": -1268.8074951171875, "loss": 0.0092, "rewards/accuracies": 1.0, "rewards/chosen": -0.8442990183830261, "rewards/margins": 11.391578674316406, "rewards/rejected": -12.235878944396973, "step": 8310 }, { "epoch": 0.5, "learning_rate": 2.9673900396950622e-06, "logits/chosen": -2.931565046310425, "logits/rejected": -2.8107380867004395, "logps/chosen": -162.10623168945312, "logps/rejected": -1088.323486328125, "loss": 0.0296, "rewards/accuracies": 1.0, "rewards/chosen": -0.8948472142219543, "rewards/margins": 9.552961349487305, "rewards/rejected": -10.447807312011719, "step": 8320 }, { "epoch": 0.5, "learning_rate": 2.9622770568048577e-06, "logits/chosen": -2.9129998683929443, "logits/rejected": -2.7176756858825684, "logps/chosen": -160.28585815429688, "logps/rejected": -1199.1192626953125, "loss": 0.0284, "rewards/accuracies": 1.0, "rewards/chosen": -0.8855406641960144, "rewards/margins": 10.659804344177246, "rewards/rejected": -11.5453462600708, "step": 8330 }, { "epoch": 0.5, "learning_rate": 2.9571620710537726e-06, "logits/chosen": -2.9029946327209473, "logits/rejected": -2.7773029804229736, "logps/chosen": -156.53988647460938, "logps/rejected": -1178.3660888671875, "loss": 0.0141, "rewards/accuracies": 1.0, "rewards/chosen": -0.802294135093689, "rewards/margins": 10.541152000427246, "rewards/rejected": -11.343446731567383, "step": 8340 }, { "epoch": 0.5, "learning_rate": 2.9520451046029862e-06, "logits/chosen": -2.890559196472168, "logits/rejected": -2.759887933731079, "logps/chosen": -154.37278747558594, "logps/rejected": -1227.537353515625, "loss": 0.0163, "rewards/accuracies": 1.0, "rewards/chosen": -0.8512604832649231, "rewards/margins": 10.97917366027832, "rewards/rejected": -11.83043384552002, "step": 8350 }, { "epoch": 0.5, "learning_rate": 2.9469261796222608e-06, "logits/chosen": -2.8820786476135254, "logits/rejected": -2.721764087677002, "logps/chosen": -151.35205078125, "logps/rejected": -1162.476806640625, "loss": 0.0202, "rewards/accuracies": 1.0, "rewards/chosen": -0.8646419644355774, "rewards/margins": 10.332788467407227, "rewards/rejected": -11.197429656982422, "step": 8360 }, { "epoch": 0.5, "learning_rate": 2.9418053182898428e-06, "logits/chosen": -2.898127317428589, "logits/rejected": -2.729259729385376, "logps/chosen": -139.13018798828125, "logps/rejected": -1169.8662109375, "loss": 0.0139, "rewards/accuracies": 1.0, "rewards/chosen": -0.684096097946167, "rewards/margins": 10.566595077514648, "rewards/rejected": -11.250690460205078, "step": 8370 }, { "epoch": 0.5, "learning_rate": 2.936682542792367e-06, "logits/chosen": -2.9057202339172363, "logits/rejected": -2.7429583072662354, "logps/chosen": -131.05111694335938, "logps/rejected": -1136.9920654296875, "loss": 0.0244, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.628420352935791, "rewards/margins": 10.316336631774902, "rewards/rejected": -10.944757461547852, "step": 8380 }, { "epoch": 0.5, "learning_rate": 2.9315578753247632e-06, "logits/chosen": -2.9237546920776367, "logits/rejected": -2.7132420539855957, "logps/chosen": -134.4027862548828, "logps/rejected": -1118.904052734375, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/chosen": -0.5988486409187317, "rewards/margins": 10.142118453979492, "rewards/rejected": -10.740968704223633, "step": 8390 }, { "epoch": 0.5, "learning_rate": 2.9264313380901586e-06, "logits/chosen": -2.923072338104248, "logits/rejected": -2.7244014739990234, "logps/chosen": -127.77690124511719, "logps/rejected": -1257.849609375, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/chosen": -0.5060471892356873, "rewards/margins": 11.631449699401855, "rewards/rejected": -12.137496948242188, "step": 8400 }, { "epoch": 0.5, "learning_rate": 2.921302953299781e-06, "logits/chosen": -2.894197940826416, "logits/rejected": -2.6894216537475586, "logps/chosen": -142.1927947998047, "logps/rejected": -1178.7689208984375, "loss": 0.0134, "rewards/accuracies": 1.0, "rewards/chosen": -0.7495073080062866, "rewards/margins": 10.590059280395508, "rewards/rejected": -11.339567184448242, "step": 8410 }, { "epoch": 0.5, "learning_rate": 2.916172743172861e-06, "logits/chosen": -2.8771791458129883, "logits/rejected": -2.72314453125, "logps/chosen": -180.23348999023438, "logps/rejected": -1090.023681640625, "loss": 0.0322, "rewards/accuracies": 1.0, "rewards/chosen": -1.0421350002288818, "rewards/margins": 9.425169944763184, "rewards/rejected": -10.467306137084961, "step": 8420 }, { "epoch": 0.5, "learning_rate": 2.911040729936542e-06, "logits/chosen": -2.904419183731079, "logits/rejected": -2.737999439239502, "logps/chosen": -140.25180053710938, "logps/rejected": -1127.528076171875, "loss": 0.0137, "rewards/accuracies": 1.0, "rewards/chosen": -0.7147947549819946, "rewards/margins": 10.117956161499023, "rewards/rejected": -10.83275032043457, "step": 8430 }, { "epoch": 0.5, "learning_rate": 2.905906935825774e-06, "logits/chosen": -2.8962762355804443, "logits/rejected": -2.665329694747925, "logps/chosen": -115.6053695678711, "logps/rejected": -1215.293212890625, "loss": 0.0227, "rewards/accuracies": 1.0, "rewards/chosen": -0.4125330448150635, "rewards/margins": 11.294572830200195, "rewards/rejected": -11.70710563659668, "step": 8440 }, { "epoch": 0.5, "learning_rate": 2.900771383083227e-06, "logits/chosen": -2.902400255203247, "logits/rejected": -2.688458204269409, "logps/chosen": -112.76815032958984, "logps/rejected": -1184.0489501953125, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/chosen": -0.3807516098022461, "rewards/margins": 11.021198272705078, "rewards/rejected": -11.401948928833008, "step": 8450 }, { "epoch": 0.5, "learning_rate": 2.895634093959189e-06, "logits/chosen": -2.8728690147399902, "logits/rejected": -2.6832470893859863, "logps/chosen": -95.8866958618164, "logps/rejected": -1126.6212158203125, "loss": 0.0145, "rewards/accuracies": 1.0, "rewards/chosen": -0.24849529564380646, "rewards/margins": 10.576570510864258, "rewards/rejected": -10.825065612792969, "step": 8460 }, { "epoch": 0.51, "learning_rate": 2.8904950907114715e-06, "logits/chosen": -2.919908285140991, "logits/rejected": -2.7099380493164062, "logps/chosen": -102.00383758544922, "logps/rejected": -1214.02587890625, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/chosen": -0.3236317038536072, "rewards/margins": 11.365498542785645, "rewards/rejected": -11.689130783081055, "step": 8470 }, { "epoch": 0.51, "learning_rate": 2.885354395605311e-06, "logits/chosen": -2.9085373878479004, "logits/rejected": -2.699601411819458, "logps/chosen": -115.09901428222656, "logps/rejected": -1174.053955078125, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/chosen": -0.45333442091941833, "rewards/margins": 10.83600902557373, "rewards/rejected": -11.289342880249023, "step": 8480 }, { "epoch": 0.51, "learning_rate": 2.880212030913276e-06, "logits/chosen": -2.88631272315979, "logits/rejected": -2.7022931575775146, "logps/chosen": -119.80062103271484, "logps/rejected": -1064.912841796875, "loss": 0.0154, "rewards/accuracies": 1.0, "rewards/chosen": -0.5223627090454102, "rewards/margins": 9.709166526794434, "rewards/rejected": -10.231529235839844, "step": 8490 }, { "epoch": 0.51, "learning_rate": 2.875068018915169e-06, "logits/chosen": -2.864041805267334, "logits/rejected": -2.678987741470337, "logps/chosen": -112.8697280883789, "logps/rejected": -1098.531005859375, "loss": 0.0132, "rewards/accuracies": 1.0, "rewards/chosen": -0.4213559627532959, "rewards/margins": 10.123356819152832, "rewards/rejected": -10.54471206665039, "step": 8500 }, { "epoch": 0.51, "learning_rate": 2.8699223818979274e-06, "logits/chosen": -2.861661195755005, "logits/rejected": -2.68904972076416, "logps/chosen": -116.3563232421875, "logps/rejected": -1156.1903076171875, "loss": 0.0121, "rewards/accuracies": 1.0, "rewards/chosen": -0.4229259490966797, "rewards/margins": 10.690869331359863, "rewards/rejected": -11.11379623413086, "step": 8510 }, { "epoch": 0.51, "learning_rate": 2.8647751421555313e-06, "logits/chosen": -2.881359577178955, "logits/rejected": -2.681013822555542, "logps/chosen": -121.25843811035156, "logps/rejected": -1274.0323486328125, "loss": 0.0082, "rewards/accuracies": 1.0, "rewards/chosen": -0.4660007059574127, "rewards/margins": 11.827128410339355, "rewards/rejected": -12.293130874633789, "step": 8520 }, { "epoch": 0.51, "learning_rate": 2.859626321988903e-06, "logits/chosen": -2.8887641429901123, "logits/rejected": -2.6403064727783203, "logps/chosen": -135.89137268066406, "logps/rejected": -1120.910400390625, "loss": 0.0105, "rewards/accuracies": 1.0, "rewards/chosen": -0.6892722249031067, "rewards/margins": 10.080706596374512, "rewards/rejected": -10.769978523254395, "step": 8530 }, { "epoch": 0.51, "learning_rate": 2.8544759437058135e-06, "logits/chosen": -2.8979439735412598, "logits/rejected": -2.6841869354248047, "logps/chosen": -170.7774200439453, "logps/rejected": -1199.5584716796875, "loss": 0.0212, "rewards/accuracies": 1.0, "rewards/chosen": -0.995023250579834, "rewards/margins": 10.565732955932617, "rewards/rejected": -11.56075668334961, "step": 8540 }, { "epoch": 0.51, "learning_rate": 2.8493240296207835e-06, "logits/chosen": -2.843690872192383, "logits/rejected": -2.6437625885009766, "logps/chosen": -223.54745483398438, "logps/rejected": -1286.1190185546875, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": -1.5435632467269897, "rewards/margins": 10.872712135314941, "rewards/rejected": -12.416275978088379, "step": 8550 }, { "epoch": 0.51, "learning_rate": 2.844170602054989e-06, "logits/chosen": -2.8815646171569824, "logits/rejected": -2.62292218208313, "logps/chosen": -206.5679473876953, "logps/rejected": -1223.450927734375, "loss": 0.014, "rewards/accuracies": 1.0, "rewards/chosen": -1.3406312465667725, "rewards/margins": 10.451102256774902, "rewards/rejected": -11.79173469543457, "step": 8560 }, { "epoch": 0.51, "learning_rate": 2.8390156833361616e-06, "logits/chosen": -2.8827974796295166, "logits/rejected": -2.6515748500823975, "logps/chosen": -216.79373168945312, "logps/rejected": -1198.4493408203125, "loss": 0.0153, "rewards/accuracies": 1.0, "rewards/chosen": -1.3943380117416382, "rewards/margins": 10.1595458984375, "rewards/rejected": -11.55388355255127, "step": 8570 }, { "epoch": 0.51, "learning_rate": 2.833859295798495e-06, "logits/chosen": -2.840674877166748, "logits/rejected": -2.6365323066711426, "logps/chosen": -193.5364990234375, "logps/rejected": -1295.1944580078125, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": -1.1920167207717896, "rewards/margins": 11.305582046508789, "rewards/rejected": -12.497599601745605, "step": 8580 }, { "epoch": 0.51, "learning_rate": 2.828701461782546e-06, "logits/chosen": -2.8499903678894043, "logits/rejected": -2.663597583770752, "logps/chosen": -180.19314575195312, "logps/rejected": -1165.320068359375, "loss": 0.0163, "rewards/accuracies": 1.0, "rewards/chosen": -1.1210962533950806, "rewards/margins": 10.096768379211426, "rewards/rejected": -11.217864036560059, "step": 8590 }, { "epoch": 0.51, "learning_rate": 2.8235422036351384e-06, "logits/chosen": -2.842646837234497, "logits/rejected": -2.6701502799987793, "logps/chosen": -167.73040771484375, "logps/rejected": -1202.8580322265625, "loss": 0.0147, "rewards/accuracies": 1.0, "rewards/chosen": -1.0205423831939697, "rewards/margins": 10.567150115966797, "rewards/rejected": -11.587693214416504, "step": 8600 }, { "epoch": 0.51, "learning_rate": 2.818381543709267e-06, "logits/chosen": -2.8757827281951904, "logits/rejected": -2.644541025161743, "logps/chosen": -179.77206420898438, "logps/rejected": -1292.0792236328125, "loss": 0.0088, "rewards/accuracies": 1.0, "rewards/chosen": -1.0578577518463135, "rewards/margins": 11.416297912597656, "rewards/rejected": -12.474156379699707, "step": 8610 }, { "epoch": 0.51, "learning_rate": 2.813219504363998e-06, "logits/chosen": -2.8929104804992676, "logits/rejected": -2.6589672565460205, "logps/chosen": -186.99432373046875, "logps/rejected": -1229.5103759765625, "loss": 0.0171, "rewards/accuracies": 1.0, "rewards/chosen": -1.116969347000122, "rewards/margins": 10.737485885620117, "rewards/rejected": -11.854455947875977, "step": 8620 }, { "epoch": 0.51, "learning_rate": 2.8080561079643758e-06, "logits/chosen": -2.8735790252685547, "logits/rejected": -2.6584935188293457, "logps/chosen": -177.3016815185547, "logps/rejected": -1353.6968994140625, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -1.1050446033477783, "rewards/margins": 12.000685691833496, "rewards/rejected": -13.105731010437012, "step": 8630 }, { "epoch": 0.52, "learning_rate": 2.802891376881325e-06, "logits/chosen": -2.859884738922119, "logits/rejected": -2.6368844509124756, "logps/chosen": -164.41403198242188, "logps/rejected": -1120.52392578125, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/chosen": -0.9209758043289185, "rewards/margins": 9.83740520477295, "rewards/rejected": -10.758380889892578, "step": 8640 }, { "epoch": 0.52, "learning_rate": 2.7977253334915495e-06, "logits/chosen": -2.848546028137207, "logits/rejected": -2.6064257621765137, "logps/chosen": -165.62997436523438, "logps/rejected": -1182.0648193359375, "loss": 0.0117, "rewards/accuracies": 1.0, "rewards/chosen": -0.9194143414497375, "rewards/margins": 10.454706192016602, "rewards/rejected": -11.374120712280273, "step": 8650 }, { "epoch": 0.52, "learning_rate": 2.7925580001774422e-06, "logits/chosen": -2.8367691040039062, "logits/rejected": -2.595975160598755, "logps/chosen": -146.14683532714844, "logps/rejected": -1265.567626953125, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -0.6889903545379639, "rewards/margins": 11.512295722961426, "rewards/rejected": -12.201287269592285, "step": 8660 }, { "epoch": 0.52, "learning_rate": 2.787389399326984e-06, "logits/chosen": -2.8577499389648438, "logits/rejected": -2.6285009384155273, "logps/chosen": -146.45794677734375, "logps/rejected": -1191.8450927734375, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/chosen": -0.7575078010559082, "rewards/margins": 10.720059394836426, "rewards/rejected": -11.477567672729492, "step": 8670 }, { "epoch": 0.52, "learning_rate": 2.7822195533336466e-06, "logits/chosen": -2.85831880569458, "logits/rejected": -2.659726142883301, "logps/chosen": -140.2643585205078, "logps/rejected": -1153.3939208984375, "loss": 0.0092, "rewards/accuracies": 1.0, "rewards/chosen": -0.6969577074050903, "rewards/margins": 10.404402732849121, "rewards/rejected": -11.101360321044922, "step": 8680 }, { "epoch": 0.52, "learning_rate": 2.7770484845962976e-06, "logits/chosen": -2.908207416534424, "logits/rejected": -2.667222499847412, "logps/chosen": -142.89402770996094, "logps/rejected": -1020.4970703125, "loss": 0.0132, "rewards/accuracies": 1.0, "rewards/chosen": -0.7316961288452148, "rewards/margins": 9.035357475280762, "rewards/rejected": -9.767054557800293, "step": 8690 }, { "epoch": 0.52, "learning_rate": 2.7718762155191015e-06, "logits/chosen": -2.884187936782837, "logits/rejected": -2.6048455238342285, "logps/chosen": -130.8945770263672, "logps/rejected": -1101.829833984375, "loss": 0.0183, "rewards/accuracies": 1.0, "rewards/chosen": -0.5116895437240601, "rewards/margins": 10.068532943725586, "rewards/rejected": -10.580222129821777, "step": 8700 }, { "epoch": 0.52, "learning_rate": 2.766702768511423e-06, "logits/chosen": -2.8269100189208984, "logits/rejected": -2.639131546020508, "logps/chosen": -109.55877685546875, "logps/rejected": -989.4346923828125, "loss": 0.017, "rewards/accuracies": 1.0, "rewards/chosen": -0.42994171380996704, "rewards/margins": 9.02569580078125, "rewards/rejected": -9.455636978149414, "step": 8710 }, { "epoch": 0.52, "learning_rate": 2.7615281659877304e-06, "logits/chosen": -2.884887456893921, "logits/rejected": -2.668712854385376, "logps/chosen": -125.5203857421875, "logps/rejected": -1095.218505859375, "loss": 0.0149, "rewards/accuracies": 1.0, "rewards/chosen": -0.5232694745063782, "rewards/margins": 9.987313270568848, "rewards/rejected": -10.510583877563477, "step": 8720 }, { "epoch": 0.52, "learning_rate": 2.7563524303675005e-06, "logits/chosen": -2.804560899734497, "logits/rejected": -2.530867099761963, "logps/chosen": -150.4720001220703, "logps/rejected": -1232.0577392578125, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/chosen": -0.7528031468391418, "rewards/margins": 11.117652893066406, "rewards/rejected": -11.870455741882324, "step": 8730 }, { "epoch": 0.52, "learning_rate": 2.7511755840751165e-06, "logits/chosen": -2.8314120769500732, "logits/rejected": -2.6416478157043457, "logps/chosen": -129.7180938720703, "logps/rejected": -1180.48974609375, "loss": 0.0333, "rewards/accuracies": 1.0, "rewards/chosen": -0.6153192520141602, "rewards/margins": 10.765087127685547, "rewards/rejected": -11.380406379699707, "step": 8740 }, { "epoch": 0.52, "learning_rate": 2.7459976495397738e-06, "logits/chosen": -2.8279497623443604, "logits/rejected": -2.5734431743621826, "logps/chosen": -159.1131134033203, "logps/rejected": -1189.624267578125, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/chosen": -0.8376861810684204, "rewards/margins": 10.623645782470703, "rewards/rejected": -11.461332321166992, "step": 8750 }, { "epoch": 0.52, "learning_rate": 2.7408186491953862e-06, "logits/chosen": -2.851015567779541, "logits/rejected": -2.5913257598876953, "logps/chosen": -142.84890747070312, "logps/rejected": -1179.016845703125, "loss": 0.0136, "rewards/accuracies": 1.0, "rewards/chosen": -0.7569302916526794, "rewards/margins": 10.598562240600586, "rewards/rejected": -11.355490684509277, "step": 8760 }, { "epoch": 0.52, "learning_rate": 2.735638605480482e-06, "logits/chosen": -2.8595190048217773, "logits/rejected": -2.582789182662964, "logps/chosen": -144.33029174804688, "logps/rejected": -1175.071044921875, "loss": 0.0084, "rewards/accuracies": 1.0, "rewards/chosen": -0.7189782857894897, "rewards/margins": 10.575189590454102, "rewards/rejected": -11.294167518615723, "step": 8770 }, { "epoch": 0.52, "learning_rate": 2.730457540838109e-06, "logits/chosen": -2.81011962890625, "logits/rejected": -2.5828940868377686, "logps/chosen": -129.65719604492188, "logps/rejected": -1138.3101806640625, "loss": 0.0148, "rewards/accuracies": 1.0, "rewards/chosen": -0.6193026900291443, "rewards/margins": 10.32276439666748, "rewards/rejected": -10.942068099975586, "step": 8780 }, { "epoch": 0.52, "learning_rate": 2.725275477715743e-06, "logits/chosen": -2.8384578227996826, "logits/rejected": -2.548628568649292, "logps/chosen": -172.1205596923828, "logps/rejected": -1171.3193359375, "loss": 0.0158, "rewards/accuracies": 1.0, "rewards/chosen": -1.0557301044464111, "rewards/margins": 10.209169387817383, "rewards/rejected": -11.264899253845215, "step": 8790 }, { "epoch": 0.52, "learning_rate": 2.7200924385651805e-06, "logits/chosen": -2.8204004764556885, "logits/rejected": -2.549956798553467, "logps/chosen": -207.0591278076172, "logps/rejected": -1209.3974609375, "loss": 0.0099, "rewards/accuracies": 1.0, "rewards/chosen": -1.4057868719100952, "rewards/margins": 10.251618385314941, "rewards/rejected": -11.657405853271484, "step": 8800 }, { "epoch": 0.53, "learning_rate": 2.7149084458424497e-06, "logits/chosen": -2.8310275077819824, "logits/rejected": -2.6350979804992676, "logps/chosen": -248.27474975585938, "logps/rejected": -1290.673095703125, "loss": 0.0092, "rewards/accuracies": 1.0, "rewards/chosen": -1.8052504062652588, "rewards/margins": 10.661766052246094, "rewards/rejected": -12.46701717376709, "step": 8810 }, { "epoch": 0.53, "learning_rate": 2.70972352200771e-06, "logits/chosen": -2.8514134883880615, "logits/rejected": -2.5841760635375977, "logps/chosen": -240.83657836914062, "logps/rejected": -1387.0308837890625, "loss": 0.0261, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -1.6830450296401978, "rewards/margins": 11.736095428466797, "rewards/rejected": -13.419140815734863, "step": 8820 }, { "epoch": 0.53, "learning_rate": 2.7045376895251544e-06, "logits/chosen": -2.824413776397705, "logits/rejected": -2.594223737716675, "logps/chosen": -171.55313110351562, "logps/rejected": -1272.257080078125, "loss": 0.0114, "rewards/accuracies": 1.0, "rewards/chosen": -1.1000640392303467, "rewards/margins": 11.190762519836426, "rewards/rejected": -12.290824890136719, "step": 8830 }, { "epoch": 0.53, "learning_rate": 2.6993509708629133e-06, "logits/chosen": -2.858731746673584, "logits/rejected": -2.634040117263794, "logps/chosen": -211.41061401367188, "logps/rejected": -1314.533203125, "loss": 0.0089, "rewards/accuracies": 1.0, "rewards/chosen": -1.388708472251892, "rewards/margins": 11.303516387939453, "rewards/rejected": -12.69222354888916, "step": 8840 }, { "epoch": 0.53, "learning_rate": 2.694163388492957e-06, "logits/chosen": -2.8316709995269775, "logits/rejected": -2.6018905639648438, "logps/chosen": -169.09791564941406, "logps/rejected": -1177.427490234375, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/chosen": -0.9727762937545776, "rewards/margins": 10.349662780761719, "rewards/rejected": -11.322439193725586, "step": 8850 }, { "epoch": 0.53, "learning_rate": 2.6889749648909946e-06, "logits/chosen": -2.8448777198791504, "logits/rejected": -2.56866717338562, "logps/chosen": -123.35162353515625, "logps/rejected": -1396.155517578125, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -0.5139948725700378, "rewards/margins": 12.987811088562012, "rewards/rejected": -13.501806259155273, "step": 8860 }, { "epoch": 0.53, "learning_rate": 2.6837857225363837e-06, "logits/chosen": -2.830170154571533, "logits/rejected": -2.59755277633667, "logps/chosen": -116.15406799316406, "logps/rejected": -1179.905517578125, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": -0.5133495926856995, "rewards/margins": 10.840788841247559, "rewards/rejected": -11.35413932800293, "step": 8870 }, { "epoch": 0.53, "learning_rate": 2.6785956839120294e-06, "logits/chosen": -2.827650308609009, "logits/rejected": -2.6163816452026367, "logps/chosen": -117.0268783569336, "logps/rejected": -1123.203369140625, "loss": 0.0146, "rewards/accuracies": 1.0, "rewards/chosen": -0.510259747505188, "rewards/margins": 10.284109115600586, "rewards/rejected": -10.794370651245117, "step": 8880 }, { "epoch": 0.53, "learning_rate": 2.6734048715042824e-06, "logits/chosen": -2.817591905593872, "logits/rejected": -2.5745701789855957, "logps/chosen": -111.74925231933594, "logps/rejected": -1238.2119140625, "loss": 0.0163, "rewards/accuracies": 1.0, "rewards/chosen": -0.4261833131313324, "rewards/margins": 11.530172348022461, "rewards/rejected": -11.9563570022583, "step": 8890 }, { "epoch": 0.53, "learning_rate": 2.668213307802851e-06, "logits/chosen": -2.806105136871338, "logits/rejected": -2.591003894805908, "logps/chosen": -115.70748138427734, "logps/rejected": -1178.735107421875, "loss": 0.0108, "rewards/accuracies": 1.0, "rewards/chosen": -0.40064820647239685, "rewards/margins": 10.935684204101562, "rewards/rejected": -11.336334228515625, "step": 8900 }, { "epoch": 0.53, "learning_rate": 2.663021015300695e-06, "logits/chosen": -2.8011221885681152, "logits/rejected": -2.572206497192383, "logps/chosen": -109.93327331542969, "logps/rejected": -1094.0491943359375, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": -0.3877597451210022, "rewards/margins": 10.114436149597168, "rewards/rejected": -10.502195358276367, "step": 8910 }, { "epoch": 0.53, "learning_rate": 2.657828016493933e-06, "logits/chosen": -2.80245041847229, "logits/rejected": -2.573734760284424, "logps/chosen": -126.8602066040039, "logps/rejected": -1155.0802001953125, "loss": 0.011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5544692873954773, "rewards/margins": 10.555421829223633, "rewards/rejected": -11.109891891479492, "step": 8920 }, { "epoch": 0.53, "learning_rate": 2.6526343338817445e-06, "logits/chosen": -2.8176636695861816, "logits/rejected": -2.562530517578125, "logps/chosen": -151.31910705566406, "logps/rejected": -1200.107666015625, "loss": 0.0121, "rewards/accuracies": 1.0, "rewards/chosen": -0.7892540693283081, "rewards/margins": 10.772626876831055, "rewards/rejected": -11.561881065368652, "step": 8930 }, { "epoch": 0.53, "learning_rate": 2.647439989966272e-06, "logits/chosen": -2.8049850463867188, "logits/rejected": -2.5129425525665283, "logps/chosen": -114.68141174316406, "logps/rejected": -1238.9661865234375, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -0.47490543127059937, "rewards/margins": 11.465176582336426, "rewards/rejected": -11.940081596374512, "step": 8940 }, { "epoch": 0.53, "learning_rate": 2.6422450072525198e-06, "logits/chosen": -2.823791265487671, "logits/rejected": -2.569718360900879, "logps/chosen": -108.46688079833984, "logps/rejected": -1020.7151489257812, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/chosen": -0.41460657119750977, "rewards/margins": 9.34644889831543, "rewards/rejected": -9.761054992675781, "step": 8950 }, { "epoch": 0.53, "learning_rate": 2.6370494082482632e-06, "logits/chosen": -2.809652805328369, "logits/rejected": -2.5150139331817627, "logps/chosen": -119.2606201171875, "logps/rejected": -1105.090576171875, "loss": 0.0091, "rewards/accuracies": 1.0, "rewards/chosen": -0.4620642066001892, "rewards/margins": 10.13792610168457, "rewards/rejected": -10.599990844726562, "step": 8960 }, { "epoch": 0.53, "learning_rate": 2.6318532154639474e-06, "logits/chosen": -2.7741401195526123, "logits/rejected": -2.5240073204040527, "logps/chosen": -126.09135437011719, "logps/rejected": -1223.4954833984375, "loss": 0.0165, "rewards/accuracies": 1.0, "rewards/chosen": -0.522668182849884, "rewards/margins": 11.278470993041992, "rewards/rejected": -11.801138877868652, "step": 8970 }, { "epoch": 0.54, "learning_rate": 2.626656451412588e-06, "logits/chosen": -2.8281302452087402, "logits/rejected": -2.5706429481506348, "logps/chosen": -118.5013198852539, "logps/rejected": -1252.6693115234375, "loss": 0.0082, "rewards/accuracies": 1.0, "rewards/chosen": -0.4158239960670471, "rewards/margins": 11.657991409301758, "rewards/rejected": -12.07381534576416, "step": 8980 }, { "epoch": 0.54, "learning_rate": 2.6214591386096782e-06, "logits/chosen": -2.7473807334899902, "logits/rejected": -2.478829860687256, "logps/chosen": -101.12090301513672, "logps/rejected": -1214.964599609375, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/chosen": -0.31386569142341614, "rewards/margins": 11.400246620178223, "rewards/rejected": -11.714112281799316, "step": 8990 }, { "epoch": 0.54, "learning_rate": 2.6162612995730874e-06, "logits/chosen": -2.8409926891326904, "logits/rejected": -2.567551374435425, "logps/chosen": -114.22438049316406, "logps/rejected": -1286.527099609375, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -0.3678627610206604, "rewards/margins": 12.063667297363281, "rewards/rejected": -12.431530952453613, "step": 9000 }, { "epoch": 0.54, "learning_rate": 2.6110629568229647e-06, "logits/chosen": -2.816166400909424, "logits/rejected": -2.54646635055542, "logps/chosen": -105.74687194824219, "logps/rejected": -1147.75146484375, "loss": 0.0134, "rewards/accuracies": 1.0, "rewards/chosen": -0.33271974325180054, "rewards/margins": 10.716184616088867, "rewards/rejected": -11.048904418945312, "step": 9010 }, { "epoch": 0.54, "learning_rate": 2.6058641328816425e-06, "logits/chosen": -2.8309741020202637, "logits/rejected": -2.5696823596954346, "logps/chosen": -127.2796859741211, "logps/rejected": -1033.6153564453125, "loss": 0.0151, "rewards/accuracies": 1.0, "rewards/chosen": -0.5708521604537964, "rewards/margins": 9.325971603393555, "rewards/rejected": -9.896822929382324, "step": 9020 }, { "epoch": 0.54, "learning_rate": 2.6006648502735384e-06, "logits/chosen": -2.801985263824463, "logits/rejected": -2.563275098800659, "logps/chosen": -102.72383117675781, "logps/rejected": -1021.3406982421875, "loss": 0.0151, "rewards/accuracies": 1.0, "rewards/chosen": -0.3138514757156372, "rewards/margins": 9.46950626373291, "rewards/rejected": -9.783357620239258, "step": 9030 }, { "epoch": 0.54, "learning_rate": 2.5954651315250543e-06, "logits/chosen": -2.792391538619995, "logits/rejected": -2.4843857288360596, "logps/chosen": -105.3338851928711, "logps/rejected": -1155.8475341796875, "loss": 0.0112, "rewards/accuracies": 1.0, "rewards/chosen": -0.3732788860797882, "rewards/margins": 10.75007438659668, "rewards/rejected": -11.12335205078125, "step": 9040 }, { "epoch": 0.54, "learning_rate": 2.5902649991644855e-06, "logits/chosen": -2.7931556701660156, "logits/rejected": -2.510646104812622, "logps/chosen": -123.06136322021484, "logps/rejected": -1246.3153076171875, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": -0.5123059749603271, "rewards/margins": 11.507547378540039, "rewards/rejected": -12.019853591918945, "step": 9050 }, { "epoch": 0.54, "learning_rate": 2.5850644757219177e-06, "logits/chosen": -2.795856475830078, "logits/rejected": -2.4899067878723145, "logps/chosen": -128.98214721679688, "logps/rejected": -1215.587646484375, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/chosen": -0.5626493692398071, "rewards/margins": 11.163839340209961, "rewards/rejected": -11.726489067077637, "step": 9060 }, { "epoch": 0.54, "learning_rate": 2.5798635837291304e-06, "logits/chosen": -2.774034023284912, "logits/rejected": -2.488168478012085, "logps/chosen": -124.50978088378906, "logps/rejected": -1135.6466064453125, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/chosen": -0.5497738718986511, "rewards/margins": 10.363019943237305, "rewards/rejected": -10.91279411315918, "step": 9070 }, { "epoch": 0.54, "learning_rate": 2.5746623457194996e-06, "logits/chosen": -2.7708590030670166, "logits/rejected": -2.4828476905822754, "logps/chosen": -134.91110229492188, "logps/rejected": -1292.5389404296875, "loss": 0.0228, "rewards/accuracies": 1.0, "rewards/chosen": -0.6623948216438293, "rewards/margins": 11.825265884399414, "rewards/rejected": -12.48766040802002, "step": 9080 }, { "epoch": 0.54, "learning_rate": 2.569460784227903e-06, "logits/chosen": -2.7878754138946533, "logits/rejected": -2.476685047149658, "logps/chosen": -168.18325805664062, "logps/rejected": -1254.955810546875, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": -0.993513286113739, "rewards/margins": 11.11749267578125, "rewards/rejected": -12.111004829406738, "step": 9090 }, { "epoch": 0.54, "learning_rate": 2.5642589217906164e-06, "logits/chosen": -2.7759673595428467, "logits/rejected": -2.442605972290039, "logps/chosen": -164.1595001220703, "logps/rejected": -1352.0487060546875, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/chosen": -0.9402977228164673, "rewards/margins": 12.136119842529297, "rewards/rejected": -13.076417922973633, "step": 9100 }, { "epoch": 0.54, "learning_rate": 2.559056780945223e-06, "logits/chosen": -2.801144599914551, "logits/rejected": -2.4511032104492188, "logps/chosen": -183.38833618164062, "logps/rejected": -1290.93603515625, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -1.1023266315460205, "rewards/margins": 11.362133026123047, "rewards/rejected": -12.464460372924805, "step": 9110 }, { "epoch": 0.54, "learning_rate": 2.5538543842305085e-06, "logits/chosen": -2.7863965034484863, "logits/rejected": -2.425858974456787, "logps/chosen": -187.49612426757812, "logps/rejected": -1302.1209716796875, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -1.1713062524795532, "rewards/margins": 11.404998779296875, "rewards/rejected": -12.576306343078613, "step": 9120 }, { "epoch": 0.54, "learning_rate": 2.5486517541863696e-06, "logits/chosen": -2.7975218296051025, "logits/rejected": -2.4785380363464355, "logps/chosen": -182.64633178710938, "logps/rejected": -1255.8494873046875, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -1.065992832183838, "rewards/margins": 11.067254066467285, "rewards/rejected": -12.133247375488281, "step": 9130 }, { "epoch": 0.55, "learning_rate": 2.5434489133537154e-06, "logits/chosen": -2.802154541015625, "logits/rejected": -2.4505929946899414, "logps/chosen": -189.68191528320312, "logps/rejected": -1365.123291015625, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -1.1531507968902588, "rewards/margins": 12.054038047790527, "rewards/rejected": -13.207188606262207, "step": 9140 }, { "epoch": 0.55, "learning_rate": 2.5382458842743634e-06, "logits/chosen": -2.7340104579925537, "logits/rejected": -2.4547784328460693, "logps/chosen": -179.56491088867188, "logps/rejected": -1334.59765625, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -1.1335508823394775, "rewards/margins": 11.76144790649414, "rewards/rejected": -12.894998550415039, "step": 9150 }, { "epoch": 0.55, "learning_rate": 2.53304268949095e-06, "logits/chosen": -2.7387959957122803, "logits/rejected": -2.469914197921753, "logps/chosen": -182.5727081298828, "logps/rejected": -1299.2421875, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -1.1277105808258057, "rewards/margins": 11.406871795654297, "rewards/rejected": -12.534584045410156, "step": 9160 }, { "epoch": 0.55, "learning_rate": 2.5278393515468312e-06, "logits/chosen": -2.7731475830078125, "logits/rejected": -2.401132583618164, "logps/chosen": -172.27481079101562, "logps/rejected": -1232.1500244140625, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": -0.9934558868408203, "rewards/margins": 10.876480102539062, "rewards/rejected": -11.869935989379883, "step": 9170 }, { "epoch": 0.55, "learning_rate": 2.5226358929859793e-06, "logits/chosen": -2.8033432960510254, "logits/rejected": -2.4295883178710938, "logps/chosen": -208.0424041748047, "logps/rejected": -1171.468994140625, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -1.3904781341552734, "rewards/margins": 9.89474868774414, "rewards/rejected": -11.28522777557373, "step": 9180 }, { "epoch": 0.55, "learning_rate": 2.517432336352891e-06, "logits/chosen": -2.7643942832946777, "logits/rejected": -2.405329704284668, "logps/chosen": -196.18893432617188, "logps/rejected": -1373.046875, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -1.2262336015701294, "rewards/margins": 12.06359577178955, "rewards/rejected": -13.289830207824707, "step": 9190 }, { "epoch": 0.55, "learning_rate": 2.5122287041924897e-06, "logits/chosen": -2.773871660232544, "logits/rejected": -2.459120035171509, "logps/chosen": -154.21078491210938, "logps/rejected": -1331.728515625, "loss": 0.0108, "rewards/accuracies": 1.0, "rewards/chosen": -0.8379950523376465, "rewards/margins": 12.044095993041992, "rewards/rejected": -12.88209056854248, "step": 9200 }, { "epoch": 0.55, "learning_rate": 2.507025019050022e-06, "logits/chosen": -2.7721176147460938, "logits/rejected": -2.4530961513519287, "logps/chosen": -180.89395141601562, "logps/rejected": -1257.1702880859375, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/chosen": -1.1186147928237915, "rewards/margins": 11.023232460021973, "rewards/rejected": -12.141847610473633, "step": 9210 }, { "epoch": 0.55, "learning_rate": 2.5018213034709683e-06, "logits/chosen": -2.7496094703674316, "logits/rejected": -2.391211748123169, "logps/chosen": -146.93063354492188, "logps/rejected": -1182.9371337890625, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -0.7501407265663147, "rewards/margins": 10.64448356628418, "rewards/rejected": -11.394624710083008, "step": 9220 }, { "epoch": 0.55, "learning_rate": 2.496617580000937e-06, "logits/chosen": -2.751349925994873, "logits/rejected": -2.372615098953247, "logps/chosen": -143.75375366210938, "logps/rejected": -1195.037109375, "loss": 0.0084, "rewards/accuracies": 1.0, "rewards/chosen": -0.7807610034942627, "rewards/margins": 10.714524269104004, "rewards/rejected": -11.495285034179688, "step": 9230 }, { "epoch": 0.55, "learning_rate": 2.491413871185574e-06, "logits/chosen": -2.7620747089385986, "logits/rejected": -2.4743142127990723, "logps/chosen": -162.22793579101562, "logps/rejected": -1202.9722900390625, "loss": 0.013, "rewards/accuracies": 1.0, "rewards/chosen": -0.9299749135971069, "rewards/margins": 10.657004356384277, "rewards/rejected": -11.586979866027832, "step": 9240 }, { "epoch": 0.55, "learning_rate": 2.486210199570459e-06, "logits/chosen": -2.766876697540283, "logits/rejected": -2.427877902984619, "logps/chosen": -151.55001831054688, "logps/rejected": -1301.142822265625, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -0.8024405241012573, "rewards/margins": 11.776586532592773, "rewards/rejected": -12.57902717590332, "step": 9250 }, { "epoch": 0.55, "learning_rate": 2.4810065877010137e-06, "logits/chosen": -2.75538969039917, "logits/rejected": -2.447815418243408, "logps/chosen": -154.34844970703125, "logps/rejected": -1244.7877197265625, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -0.7802685499191284, "rewards/margins": 11.223494529724121, "rewards/rejected": -12.003763198852539, "step": 9260 }, { "epoch": 0.55, "learning_rate": 2.475803058122397e-06, "logits/chosen": -2.788378953933716, "logits/rejected": -2.407958507537842, "logps/chosen": -122.7944564819336, "logps/rejected": -1211.561279296875, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -0.5630209445953369, "rewards/margins": 11.120402336120605, "rewards/rejected": -11.68342399597168, "step": 9270 }, { "epoch": 0.55, "learning_rate": 2.470599633379415e-06, "logits/chosen": -2.7260079383850098, "logits/rejected": -2.438333511352539, "logps/chosen": -156.7364501953125, "logps/rejected": -1286.427001953125, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -0.8463515043258667, "rewards/margins": 11.567557334899902, "rewards/rejected": -12.413908958435059, "step": 9280 }, { "epoch": 0.55, "learning_rate": 2.465396336016417e-06, "logits/chosen": -2.7842154502868652, "logits/rejected": -2.4713377952575684, "logps/chosen": -132.51913452148438, "logps/rejected": -1383.843505859375, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -0.5946775674819946, "rewards/margins": 12.794930458068848, "rewards/rejected": -13.389608383178711, "step": 9290 }, { "epoch": 0.55, "learning_rate": 2.460193188577201e-06, "logits/chosen": -2.722109794616699, "logits/rejected": -2.302485704421997, "logps/chosen": -175.0625762939453, "logps/rejected": -1191.669921875, "loss": 0.0141, "rewards/accuracies": 1.0, "rewards/chosen": -1.0287086963653564, "rewards/margins": 10.449769973754883, "rewards/rejected": -11.47847843170166, "step": 9300 }, { "epoch": 0.56, "learning_rate": 2.454990213604917e-06, "logits/chosen": -2.763545274734497, "logits/rejected": -2.3513073921203613, "logps/chosen": -131.9331817626953, "logps/rejected": -1234.7012939453125, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": -0.6387866735458374, "rewards/margins": 11.270801544189453, "rewards/rejected": -11.909588813781738, "step": 9310 }, { "epoch": 0.56, "learning_rate": 2.449787433641965e-06, "logits/chosen": -2.747870445251465, "logits/rejected": -2.358887195587158, "logps/chosen": -124.69953918457031, "logps/rejected": -1216.431884765625, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -0.5647678971290588, "rewards/margins": 11.141289710998535, "rewards/rejected": -11.706056594848633, "step": 9320 }, { "epoch": 0.56, "learning_rate": 2.4445848712299027e-06, "logits/chosen": -2.7382333278656006, "logits/rejected": -2.403057813644409, "logps/chosen": -151.37469482421875, "logps/rejected": -1220.283935546875, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/chosen": -0.8205707669258118, "rewards/margins": 10.940628051757812, "rewards/rejected": -11.761198043823242, "step": 9330 }, { "epoch": 0.56, "learning_rate": 2.4393825489093438e-06, "logits/chosen": -2.746755599975586, "logits/rejected": -2.336787700653076, "logps/chosen": -138.52200317382812, "logps/rejected": -1274.541748046875, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": -0.691260039806366, "rewards/margins": 11.603950500488281, "rewards/rejected": -12.295210838317871, "step": 9340 }, { "epoch": 0.56, "learning_rate": 2.434180489219863e-06, "logits/chosen": -2.764892339706421, "logits/rejected": -2.375894546508789, "logps/chosen": -152.8723907470703, "logps/rejected": -1170.1402587890625, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -0.868782639503479, "rewards/margins": 10.39822006225586, "rewards/rejected": -11.267003059387207, "step": 9350 }, { "epoch": 0.56, "learning_rate": 2.428978714699894e-06, "logits/chosen": -2.722238540649414, "logits/rejected": -2.366903305053711, "logps/chosen": -167.43258666992188, "logps/rejected": -1381.462158203125, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -0.9342926144599915, "rewards/margins": 12.439630508422852, "rewards/rejected": -13.373922348022461, "step": 9360 }, { "epoch": 0.56, "learning_rate": 2.4237772478866403e-06, "logits/chosen": -2.7401957511901855, "logits/rejected": -2.2874555587768555, "logps/chosen": -153.06741333007812, "logps/rejected": -1248.850830078125, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -0.777852475643158, "rewards/margins": 11.264423370361328, "rewards/rejected": -12.042276382446289, "step": 9370 }, { "epoch": 0.56, "learning_rate": 2.4185761113159677e-06, "logits/chosen": -2.7049171924591064, "logits/rejected": -2.3371849060058594, "logps/chosen": -148.55088806152344, "logps/rejected": -1172.7884521484375, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -0.829466700553894, "rewards/margins": 10.430952072143555, "rewards/rejected": -11.260418891906738, "step": 9380 }, { "epoch": 0.56, "learning_rate": 2.4133753275223114e-06, "logits/chosen": -2.713712692260742, "logits/rejected": -2.361161708831787, "logps/chosen": -174.9651641845703, "logps/rejected": -1227.4547119140625, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": -1.0444750785827637, "rewards/margins": 10.787628173828125, "rewards/rejected": -11.832103729248047, "step": 9390 }, { "epoch": 0.56, "learning_rate": 2.4081749190385818e-06, "logits/chosen": -2.7402079105377197, "logits/rejected": -2.446700096130371, "logps/chosen": -144.0089111328125, "logps/rejected": -1308.43115234375, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -0.7618849873542786, "rewards/margins": 11.874956130981445, "rewards/rejected": -12.636838912963867, "step": 9400 }, { "epoch": 0.56, "learning_rate": 2.402974908396059e-06, "logits/chosen": -2.717909336090088, "logits/rejected": -2.3127570152282715, "logps/chosen": -156.04469299316406, "logps/rejected": -1263.321533203125, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -0.8087388873100281, "rewards/margins": 11.377927780151367, "rewards/rejected": -12.186666488647461, "step": 9410 }, { "epoch": 0.56, "learning_rate": 2.397775318124302e-06, "logits/chosen": -2.6970152854919434, "logits/rejected": -2.3667407035827637, "logps/chosen": -150.6900177001953, "logps/rejected": -1348.1600341796875, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -0.8574720621109009, "rewards/margins": 12.17613697052002, "rewards/rejected": -13.033609390258789, "step": 9420 }, { "epoch": 0.56, "learning_rate": 2.3925761707510484e-06, "logits/chosen": -2.7813735008239746, "logits/rejected": -2.4709270000457764, "logps/chosen": -153.06045532226562, "logps/rejected": -1096.801025390625, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -0.8275460004806519, "rewards/margins": 9.691169738769531, "rewards/rejected": -10.518715858459473, "step": 9430 }, { "epoch": 0.56, "learning_rate": 2.387377488802116e-06, "logits/chosen": -2.67199969291687, "logits/rejected": -2.2829158306121826, "logps/chosen": -188.1942138671875, "logps/rejected": -1222.33203125, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -1.120816946029663, "rewards/margins": 10.654254913330078, "rewards/rejected": -11.77507209777832, "step": 9440 }, { "epoch": 0.56, "learning_rate": 2.382179294801305e-06, "logits/chosen": -2.743873357772827, "logits/rejected": -2.3810336589813232, "logps/chosen": -136.88839721679688, "logps/rejected": -1203.7110595703125, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -0.6752172112464905, "rewards/margins": 10.918087005615234, "rewards/rejected": -11.593304634094238, "step": 9450 }, { "epoch": 0.56, "learning_rate": 2.376981611270305e-06, "logits/chosen": -2.6915335655212402, "logits/rejected": -2.3777434825897217, "logps/chosen": -150.03700256347656, "logps/rejected": -1328.369873046875, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": -0.7870742678642273, "rewards/margins": 12.051506042480469, "rewards/rejected": -12.838579177856445, "step": 9460 }, { "epoch": 0.56, "learning_rate": 2.3717844607285905e-06, "logits/chosen": -2.7596230506896973, "logits/rejected": -2.3682141304016113, "logps/chosen": -152.27685546875, "logps/rejected": -1317.722900390625, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -0.8494324684143066, "rewards/margins": 11.87131404876709, "rewards/rejected": -12.720746994018555, "step": 9470 }, { "epoch": 0.57, "learning_rate": 2.3665878656933285e-06, "logits/chosen": -2.7440428733825684, "logits/rejected": -2.443173885345459, "logps/chosen": -170.11737060546875, "logps/rejected": -1196.157958984375, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": -1.021935224533081, "rewards/margins": 10.493707656860352, "rewards/rejected": -11.515642166137695, "step": 9480 }, { "epoch": 0.57, "learning_rate": 2.3613918486792777e-06, "logits/chosen": -2.753974437713623, "logits/rejected": -2.3040335178375244, "logps/chosen": -167.3306884765625, "logps/rejected": -1311.3167724609375, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -0.975999653339386, "rewards/margins": 11.682435989379883, "rewards/rejected": -12.65843391418457, "step": 9490 }, { "epoch": 0.57, "learning_rate": 2.3561964321986963e-06, "logits/chosen": -2.740182399749756, "logits/rejected": -2.3473961353302, "logps/chosen": -140.16009521484375, "logps/rejected": -1294.11328125, "loss": 0.0082, "rewards/accuracies": 1.0, "rewards/chosen": -0.655563473701477, "rewards/margins": 11.846887588500977, "rewards/rejected": -12.50245189666748, "step": 9500 }, { "epoch": 0.57, "learning_rate": 2.351001638761236e-06, "logits/chosen": -2.7467072010040283, "logits/rejected": -2.3630900382995605, "logps/chosen": -169.1416473388672, "logps/rejected": -1334.5823974609375, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -1.0012924671173096, "rewards/margins": 11.913200378417969, "rewards/rejected": -12.9144926071167, "step": 9510 }, { "epoch": 0.57, "learning_rate": 2.34580749087385e-06, "logits/chosen": -2.724733829498291, "logits/rejected": -2.36202335357666, "logps/chosen": -146.73130798339844, "logps/rejected": -1197.087890625, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -0.7576860785484314, "rewards/margins": 10.775758743286133, "rewards/rejected": -11.533442497253418, "step": 9520 }, { "epoch": 0.57, "learning_rate": 2.3406140110406984e-06, "logits/chosen": -2.717862367630005, "logits/rejected": -2.3901379108428955, "logps/chosen": -132.48936462402344, "logps/rejected": -1347.44140625, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -0.5981499552726746, "rewards/margins": 12.440714836120605, "rewards/rejected": -13.038862228393555, "step": 9530 }, { "epoch": 0.57, "learning_rate": 2.3354212217630428e-06, "logits/chosen": -2.768568515777588, "logits/rejected": -2.32222318649292, "logps/chosen": -167.62844848632812, "logps/rejected": -1297.629638671875, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -1.0196685791015625, "rewards/margins": 11.51185417175293, "rewards/rejected": -12.531522750854492, "step": 9540 }, { "epoch": 0.57, "learning_rate": 2.3302291455391525e-06, "logits/chosen": -2.7186317443847656, "logits/rejected": -2.332707166671753, "logps/chosen": -173.63253784179688, "logps/rejected": -1247.8470458984375, "loss": 0.0232, "rewards/accuracies": 1.0, "rewards/chosen": -0.9834288358688354, "rewards/margins": 11.050291061401367, "rewards/rejected": -12.033719062805176, "step": 9550 }, { "epoch": 0.57, "learning_rate": 2.3250378048642117e-06, "logits/chosen": -2.722168207168579, "logits/rejected": -2.329042911529541, "logps/chosen": -136.57289123535156, "logps/rejected": -1243.9241943359375, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -0.6665438413619995, "rewards/margins": 11.333455085754395, "rewards/rejected": -11.999998092651367, "step": 9560 }, { "epoch": 0.57, "learning_rate": 2.3198472222302144e-06, "logits/chosen": -2.715280294418335, "logits/rejected": -2.345092296600342, "logps/chosen": -142.8782196044922, "logps/rejected": -1219.1258544921875, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -0.6935387849807739, "rewards/margins": 11.052824974060059, "rewards/rejected": -11.746365547180176, "step": 9570 }, { "epoch": 0.57, "learning_rate": 2.3146574201258697e-06, "logits/chosen": -2.7101492881774902, "logits/rejected": -2.253700017929077, "logps/chosen": -123.58675384521484, "logps/rejected": -1261.567626953125, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -0.4841700494289398, "rewards/margins": 11.675737380981445, "rewards/rejected": -12.159907341003418, "step": 9580 }, { "epoch": 0.57, "learning_rate": 2.309468421036509e-06, "logits/chosen": -2.712399959564209, "logits/rejected": -2.3561177253723145, "logps/chosen": -137.35348510742188, "logps/rejected": -1089.5350341796875, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -0.7036619782447815, "rewards/margins": 9.754589080810547, "rewards/rejected": -10.458251953125, "step": 9590 }, { "epoch": 0.57, "learning_rate": 2.3042802474439805e-06, "logits/chosen": -2.71528959274292, "logits/rejected": -2.3430187702178955, "logps/chosen": -147.6951446533203, "logps/rejected": -1242.837158203125, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -0.7670568227767944, "rewards/margins": 11.235685348510742, "rewards/rejected": -12.002740859985352, "step": 9600 }, { "epoch": 0.57, "learning_rate": 2.299092921826556e-06, "logits/chosen": -2.7347283363342285, "logits/rejected": -2.389584541320801, "logps/chosen": -143.44552612304688, "logps/rejected": -1186.394775390625, "loss": 0.0082, "rewards/accuracies": 1.0, "rewards/chosen": -0.7385128736495972, "rewards/margins": 10.69105052947998, "rewards/rejected": -11.429563522338867, "step": 9610 }, { "epoch": 0.57, "learning_rate": 2.293906466658837e-06, "logits/chosen": -2.719064235687256, "logits/rejected": -2.3804092407226562, "logps/chosen": -141.65573120117188, "logps/rejected": -1206.084716796875, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -0.6805785894393921, "rewards/margins": 10.930667877197266, "rewards/rejected": -11.611247062683105, "step": 9620 }, { "epoch": 0.57, "learning_rate": 2.288720904411651e-06, "logits/chosen": -2.7025928497314453, "logits/rejected": -2.397294282913208, "logps/chosen": -156.29571533203125, "logps/rejected": -1319.673583984375, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -0.8888632655143738, "rewards/margins": 11.861442565917969, "rewards/rejected": -12.750304222106934, "step": 9630 }, { "epoch": 0.57, "learning_rate": 2.283536257551955e-06, "logits/chosen": -2.6957173347473145, "logits/rejected": -2.3164567947387695, "logps/chosen": -174.2127227783203, "logps/rejected": -1319.6488037109375, "loss": 0.0112, "rewards/accuracies": 1.0, "rewards/chosen": -1.0500398874282837, "rewards/margins": 11.702432632446289, "rewards/rejected": -12.752473831176758, "step": 9640 }, { "epoch": 0.58, "learning_rate": 2.278352548542744e-06, "logits/chosen": -2.6827242374420166, "logits/rejected": -2.344632625579834, "logps/chosen": -176.06137084960938, "logps/rejected": -1327.793212890625, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -1.0405280590057373, "rewards/margins": 11.787446975708008, "rewards/rejected": -12.827974319458008, "step": 9650 }, { "epoch": 0.58, "learning_rate": 2.2731697998429485e-06, "logits/chosen": -2.7394208908081055, "logits/rejected": -2.39331316947937, "logps/chosen": -162.4534454345703, "logps/rejected": -1293.4583740234375, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -0.9437228441238403, "rewards/margins": 11.549233436584473, "rewards/rejected": -12.492956161499023, "step": 9660 }, { "epoch": 0.58, "learning_rate": 2.267988033907335e-06, "logits/chosen": -2.7272305488586426, "logits/rejected": -2.384800672531128, "logps/chosen": -150.8538360595703, "logps/rejected": -1176.474365234375, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -0.7473594546318054, "rewards/margins": 10.571760177612305, "rewards/rejected": -11.319120407104492, "step": 9670 }, { "epoch": 0.58, "learning_rate": 2.2628072731864186e-06, "logits/chosen": -2.6871254444122314, "logits/rejected": -2.3054261207580566, "logps/chosen": -139.70700073242188, "logps/rejected": -1352.862060546875, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -0.6979607343673706, "rewards/margins": 12.369781494140625, "rewards/rejected": -13.067741394042969, "step": 9680 }, { "epoch": 0.58, "learning_rate": 2.257627540126353e-06, "logits/chosen": -2.7221837043762207, "logits/rejected": -2.3884360790252686, "logps/chosen": -177.17596435546875, "logps/rejected": -1203.695556640625, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -1.0383026599884033, "rewards/margins": 10.544049263000488, "rewards/rejected": -11.582351684570312, "step": 9690 }, { "epoch": 0.58, "learning_rate": 2.2524488571688407e-06, "logits/chosen": -2.740537643432617, "logits/rejected": -2.3334858417510986, "logps/chosen": -147.5407257080078, "logps/rejected": -1223.970458984375, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -0.7409318089485168, "rewards/margins": 11.06391429901123, "rewards/rejected": -11.804845809936523, "step": 9700 }, { "epoch": 0.58, "learning_rate": 2.247271246751039e-06, "logits/chosen": -2.6774489879608154, "logits/rejected": -2.2068538665771484, "logps/chosen": -143.45022583007812, "logps/rejected": -1324.55078125, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -0.7117710113525391, "rewards/margins": 12.084896087646484, "rewards/rejected": -12.796667098999023, "step": 9710 }, { "epoch": 0.58, "learning_rate": 2.242094731305452e-06, "logits/chosen": -2.7038002014160156, "logits/rejected": -2.322172164916992, "logps/chosen": -149.22738647460938, "logps/rejected": -1293.8321533203125, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -0.8422451019287109, "rewards/margins": 11.669540405273438, "rewards/rejected": -12.511785507202148, "step": 9720 }, { "epoch": 0.58, "learning_rate": 2.236919333259844e-06, "logits/chosen": -2.710664987564087, "logits/rejected": -2.3422932624816895, "logps/chosen": -174.58734130859375, "logps/rejected": -1267.180419921875, "loss": 0.0141, "rewards/accuracies": 1.0, "rewards/chosen": -1.0178511142730713, "rewards/margins": 11.215797424316406, "rewards/rejected": -12.233648300170898, "step": 9730 }, { "epoch": 0.58, "learning_rate": 2.231745075037137e-06, "logits/chosen": -2.7135777473449707, "logits/rejected": -2.3462371826171875, "logps/chosen": -158.44654846191406, "logps/rejected": -1228.8057861328125, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -0.8770634531974792, "rewards/margins": 10.965360641479492, "rewards/rejected": -11.842424392700195, "step": 9740 }, { "epoch": 0.58, "learning_rate": 2.2265719790553147e-06, "logits/chosen": -2.71870756149292, "logits/rejected": -2.393075466156006, "logps/chosen": -142.76229858398438, "logps/rejected": -1248.034912109375, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -0.7533028721809387, "rewards/margins": 11.273712158203125, "rewards/rejected": -12.027015686035156, "step": 9750 }, { "epoch": 0.58, "learning_rate": 2.221400067727323e-06, "logits/chosen": -2.7411274909973145, "logits/rejected": -2.378117799758911, "logps/chosen": -148.7209014892578, "logps/rejected": -1237.346923828125, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -0.7532230615615845, "rewards/margins": 11.163887977600098, "rewards/rejected": -11.917110443115234, "step": 9760 }, { "epoch": 0.58, "learning_rate": 2.21622936346098e-06, "logits/chosen": -2.7367300987243652, "logits/rejected": -2.3767924308776855, "logps/chosen": -133.9344482421875, "logps/rejected": -1165.0394287109375, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -0.6389613151550293, "rewards/margins": 10.568251609802246, "rewards/rejected": -11.207212448120117, "step": 9770 }, { "epoch": 0.58, "learning_rate": 2.2110598886588693e-06, "logits/chosen": -2.705064296722412, "logits/rejected": -2.3200974464416504, "logps/chosen": -123.133056640625, "logps/rejected": -1162.938720703125, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/chosen": -0.488416850566864, "rewards/margins": 10.700271606445312, "rewards/rejected": -11.188688278198242, "step": 9780 }, { "epoch": 0.58, "learning_rate": 2.2058916657182493e-06, "logits/chosen": -2.7131052017211914, "logits/rejected": -2.3258042335510254, "logps/chosen": -119.3879165649414, "logps/rejected": -1254.6146240234375, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -0.5529167056083679, "rewards/margins": 11.558649063110352, "rewards/rejected": -12.111566543579102, "step": 9790 }, { "epoch": 0.58, "learning_rate": 2.2007247170309567e-06, "logits/chosen": -2.676908016204834, "logits/rejected": -2.319890260696411, "logps/chosen": -120.99007415771484, "logps/rejected": -1231.625244140625, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -0.5064847469329834, "rewards/margins": 11.367959022521973, "rewards/rejected": -11.874443054199219, "step": 9800 }, { "epoch": 0.58, "learning_rate": 2.195559064983304e-06, "logits/chosen": -2.7641353607177734, "logits/rejected": -2.2611196041107178, "logps/chosen": -107.12109375, "logps/rejected": -1164.3746337890625, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -0.40743571519851685, "rewards/margins": 10.78770637512207, "rewards/rejected": -11.195141792297363, "step": 9810 }, { "epoch": 0.59, "learning_rate": 2.1903947319559884e-06, "logits/chosen": -2.672227382659912, "logits/rejected": -2.273566484451294, "logps/chosen": -114.71855163574219, "logps/rejected": -1231.4246826171875, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -0.39840349555015564, "rewards/margins": 11.47371768951416, "rewards/rejected": -11.872122764587402, "step": 9820 }, { "epoch": 0.59, "learning_rate": 2.1852317403239907e-06, "logits/chosen": -2.728119134902954, "logits/rejected": -2.2599117755889893, "logps/chosen": -103.9937973022461, "logps/rejected": -1233.3365478515625, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -0.34447240829467773, "rewards/margins": 11.541015625, "rewards/rejected": -11.885488510131836, "step": 9830 }, { "epoch": 0.59, "learning_rate": 2.180070112456482e-06, "logits/chosen": -2.6759068965911865, "logits/rejected": -2.2568295001983643, "logps/chosen": -117.73927307128906, "logps/rejected": -1246.0224609375, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": -0.4468896985054016, "rewards/margins": 11.570645332336426, "rewards/rejected": -12.017535209655762, "step": 9840 }, { "epoch": 0.59, "learning_rate": 2.174909870716721e-06, "logits/chosen": -2.692049741744995, "logits/rejected": -2.3022232055664062, "logps/chosen": -99.54176330566406, "logps/rejected": -1176.357421875, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -0.2951687276363373, "rewards/margins": 11.026335716247559, "rewards/rejected": -11.321505546569824, "step": 9850 }, { "epoch": 0.59, "learning_rate": 2.169751037461966e-06, "logits/chosen": -2.727674961090088, "logits/rejected": -2.2472915649414062, "logps/chosen": -102.3802490234375, "logps/rejected": -1200.757568359375, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/chosen": -0.2733840048313141, "rewards/margins": 11.29835033416748, "rewards/rejected": -11.571733474731445, "step": 9860 }, { "epoch": 0.59, "learning_rate": 2.1645936350433692e-06, "logits/chosen": -2.675719738006592, "logits/rejected": -2.3521580696105957, "logps/chosen": -122.25919342041016, "logps/rejected": -1234.517578125, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -0.5146879553794861, "rewards/margins": 11.389273643493652, "rewards/rejected": -11.903961181640625, "step": 9870 }, { "epoch": 0.59, "learning_rate": 2.159437685805883e-06, "logits/chosen": -2.710116386413574, "logits/rejected": -2.3138279914855957, "logps/chosen": -111.29341125488281, "logps/rejected": -1215.350341796875, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -0.3928848206996918, "rewards/margins": 11.326349258422852, "rewards/rejected": -11.71923542022705, "step": 9880 }, { "epoch": 0.59, "learning_rate": 2.154283212088168e-06, "logits/chosen": -2.712231397628784, "logits/rejected": -2.2694108486175537, "logps/chosen": -112.117431640625, "logps/rejected": -1220.748046875, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.4200320243835449, "rewards/margins": 11.336292266845703, "rewards/rejected": -11.756322860717773, "step": 9890 }, { "epoch": 0.59, "learning_rate": 2.149130236222487e-06, "logits/chosen": -2.702179431915283, "logits/rejected": -2.3258004188537598, "logps/chosen": -109.50498962402344, "logps/rejected": -1042.2923583984375, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": -0.3576398491859436, "rewards/margins": 9.622787475585938, "rewards/rejected": -9.980427742004395, "step": 9900 }, { "epoch": 0.59, "learning_rate": 2.143978780534616e-06, "logits/chosen": -2.697065830230713, "logits/rejected": -2.3736064434051514, "logps/chosen": -118.19590759277344, "logps/rejected": -1157.2821044921875, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": -0.4652016758918762, "rewards/margins": 10.67738151550293, "rewards/rejected": -11.142583847045898, "step": 9910 }, { "epoch": 0.59, "learning_rate": 2.138828867343746e-06, "logits/chosen": -2.704310417175293, "logits/rejected": -2.254875421524048, "logps/chosen": -121.0136489868164, "logps/rejected": -1098.206787109375, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -0.4516844153404236, "rewards/margins": 10.086403846740723, "rewards/rejected": -10.538087844848633, "step": 9920 }, { "epoch": 0.59, "learning_rate": 2.1336805189623813e-06, "logits/chosen": -2.703275680541992, "logits/rejected": -2.268481731414795, "logps/chosen": -132.83358764648438, "logps/rejected": -1127.052490234375, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -0.5715658664703369, "rewards/margins": 10.252817153930664, "rewards/rejected": -10.824382781982422, "step": 9930 }, { "epoch": 0.59, "learning_rate": 2.128533757696248e-06, "logits/chosen": -2.7110695838928223, "logits/rejected": -2.2251057624816895, "logps/chosen": -132.5632781982422, "logps/rejected": -1197.8358154296875, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/chosen": -0.6174434423446655, "rewards/margins": 10.920549392700195, "rewards/rejected": -11.537992477416992, "step": 9940 }, { "epoch": 0.59, "learning_rate": 2.123388605844198e-06, "logits/chosen": -2.7473742961883545, "logits/rejected": -2.340789318084717, "logps/chosen": -113.72737121582031, "logps/rejected": -1233.451416015625, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -0.4353352189064026, "rewards/margins": 11.462714195251465, "rewards/rejected": -11.898050308227539, "step": 9950 }, { "epoch": 0.59, "learning_rate": 2.1182450856981066e-06, "logits/chosen": -2.691542148590088, "logits/rejected": -2.3198623657226562, "logps/chosen": -134.77635192871094, "logps/rejected": -1283.7322998046875, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -0.6292972564697266, "rewards/margins": 11.758000373840332, "rewards/rejected": -12.387296676635742, "step": 9960 }, { "epoch": 0.59, "learning_rate": 2.113103219542782e-06, "logits/chosen": -2.729349136352539, "logits/rejected": -2.3680644035339355, "logps/chosen": -128.93963623046875, "logps/rejected": -1161.668701171875, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -0.5358244180679321, "rewards/margins": 10.645295143127441, "rewards/rejected": -11.181119918823242, "step": 9970 }, { "epoch": 0.6, "learning_rate": 2.107963029655867e-06, "logits/chosen": -2.654165506362915, "logits/rejected": -2.2560324668884277, "logps/chosen": -126.63565826416016, "logps/rejected": -1294.947509765625, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -0.5407634973526001, "rewards/margins": 11.96657657623291, "rewards/rejected": -12.507340431213379, "step": 9980 }, { "epoch": 0.6, "learning_rate": 2.1028245383077392e-06, "logits/chosen": -2.671581983566284, "logits/rejected": -2.2404465675354004, "logps/chosen": -130.98965454101562, "logps/rejected": -1157.130126953125, "loss": 0.0088, "rewards/accuracies": 1.0, "rewards/chosen": -0.6222785711288452, "rewards/margins": 10.513528823852539, "rewards/rejected": -11.135807991027832, "step": 9990 }, { "epoch": 0.6, "learning_rate": 2.0976877677614183e-06, "logits/chosen": -2.751434803009033, "logits/rejected": -2.2885284423828125, "logps/chosen": -141.47140502929688, "logps/rejected": -1288.7344970703125, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -0.6786977052688599, "rewards/margins": 11.744922637939453, "rewards/rejected": -12.423620223999023, "step": 10000 }, { "epoch": 0.6, "learning_rate": 2.09255274027247e-06, "logits/chosen": -2.715000629425049, "logits/rejected": -2.3316192626953125, "logps/chosen": -143.27122497558594, "logps/rejected": -1246.9996337890625, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -0.6987273097038269, "rewards/margins": 11.320338249206543, "rewards/rejected": -12.019065856933594, "step": 10010 }, { "epoch": 0.6, "learning_rate": 2.087419478088906e-06, "logits/chosen": -2.6851000785827637, "logits/rejected": -2.3289437294006348, "logps/chosen": -131.16859436035156, "logps/rejected": -1238.6689453125, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -0.6639537811279297, "rewards/margins": 11.288864135742188, "rewards/rejected": -11.9528169631958, "step": 10020 }, { "epoch": 0.6, "learning_rate": 2.0822880034510897e-06, "logits/chosen": -2.6702263355255127, "logits/rejected": -2.3204562664031982, "logps/chosen": -151.24459838867188, "logps/rejected": -1240.671142578125, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -0.7986651659011841, "rewards/margins": 11.178454399108887, "rewards/rejected": -11.977119445800781, "step": 10030 }, { "epoch": 0.6, "learning_rate": 2.077158338591641e-06, "logits/chosen": -2.722813367843628, "logits/rejected": -2.3071341514587402, "logps/chosen": -153.7009735107422, "logps/rejected": -1224.3828125, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": -0.8373913764953613, "rewards/margins": 10.950312614440918, "rewards/rejected": -11.787703514099121, "step": 10040 }, { "epoch": 0.6, "learning_rate": 2.0720305057353384e-06, "logits/chosen": -2.7021384239196777, "logits/rejected": -2.377882242202759, "logps/chosen": -166.43902587890625, "logps/rejected": -1259.81396484375, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -0.8993788957595825, "rewards/margins": 11.235876083374023, "rewards/rejected": -12.135255813598633, "step": 10050 }, { "epoch": 0.6, "learning_rate": 2.0669045270990216e-06, "logits/chosen": -2.687422752380371, "logits/rejected": -2.267098903656006, "logps/chosen": -169.32257080078125, "logps/rejected": -1229.2327880859375, "loss": 0.0144, "rewards/accuracies": 1.0, "rewards/chosen": -1.0353240966796875, "rewards/margins": 10.812653541564941, "rewards/rejected": -11.847977638244629, "step": 10060 }, { "epoch": 0.6, "learning_rate": 2.0617804248914992e-06, "logits/chosen": -2.7357964515686035, "logits/rejected": -2.3649051189422607, "logps/chosen": -139.17788696289062, "logps/rejected": -1189.037353515625, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -0.7059356570243835, "rewards/margins": 10.748687744140625, "rewards/rejected": -11.454623222351074, "step": 10070 }, { "epoch": 0.6, "learning_rate": 2.056658221313449e-06, "logits/chosen": -2.6646952629089355, "logits/rejected": -2.3168365955352783, "logps/chosen": -150.77810668945312, "logps/rejected": -1313.236083984375, "loss": 0.0234, "rewards/accuracies": 1.0, "rewards/chosen": -0.7881194353103638, "rewards/margins": 11.894584655761719, "rewards/rejected": -12.68270492553711, "step": 10080 }, { "epoch": 0.6, "learning_rate": 2.0515379385573205e-06, "logits/chosen": -2.690781831741333, "logits/rejected": -2.2457027435302734, "logps/chosen": -160.4130401611328, "logps/rejected": -1176.741455078125, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -0.8455179333686829, "rewards/margins": 10.480415344238281, "rewards/rejected": -11.325933456420898, "step": 10090 }, { "epoch": 0.6, "learning_rate": 2.0464195988072454e-06, "logits/chosen": -2.6529228687286377, "logits/rejected": -2.297720432281494, "logps/chosen": -173.38729858398438, "logps/rejected": -1276.4019775390625, "loss": 0.0351, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -1.0520063638687134, "rewards/margins": 11.274874687194824, "rewards/rejected": -12.326879501342773, "step": 10100 }, { "epoch": 0.6, "learning_rate": 2.041303224238934e-06, "logits/chosen": -2.6802988052368164, "logits/rejected": -2.221172571182251, "logps/chosen": -143.85702514648438, "logps/rejected": -1220.127685546875, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -0.7228442430496216, "rewards/margins": 11.044290542602539, "rewards/rejected": -11.767134666442871, "step": 10110 }, { "epoch": 0.6, "learning_rate": 2.036188837019582e-06, "logits/chosen": -2.6976428031921387, "logits/rejected": -2.2647860050201416, "logps/chosen": -148.67123413085938, "logps/rejected": -1270.944580078125, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -0.786220908164978, "rewards/margins": 11.47563648223877, "rewards/rejected": -12.261857032775879, "step": 10120 }, { "epoch": 0.6, "learning_rate": 2.031076459307777e-06, "logits/chosen": -2.704453468322754, "logits/rejected": -2.2222750186920166, "logps/chosen": -158.06971740722656, "logps/rejected": -1266.3482666015625, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -0.8916547894477844, "rewards/margins": 11.33264446258545, "rewards/rejected": -12.224299430847168, "step": 10130 }, { "epoch": 0.6, "learning_rate": 2.0259661132533983e-06, "logits/chosen": -2.7022762298583984, "logits/rejected": -2.2719624042510986, "logps/chosen": -170.3082733154297, "logps/rejected": -1208.604736328125, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -0.9979127645492554, "rewards/margins": 10.648189544677734, "rewards/rejected": -11.646102905273438, "step": 10140 }, { "epoch": 0.61, "learning_rate": 2.020857820997524e-06, "logits/chosen": -2.7469606399536133, "logits/rejected": -2.2794220447540283, "logps/chosen": -156.23287963867188, "logps/rejected": -1146.036376953125, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -0.8729828596115112, "rewards/margins": 10.147176742553711, "rewards/rejected": -11.020161628723145, "step": 10150 }, { "epoch": 0.61, "learning_rate": 2.015751604672333e-06, "logits/chosen": -2.650033712387085, "logits/rejected": -2.2324442863464355, "logps/chosen": -175.41061401367188, "logps/rejected": -1169.69482421875, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -1.044511079788208, "rewards/margins": 10.20124340057373, "rewards/rejected": -11.245756149291992, "step": 10160 }, { "epoch": 0.61, "learning_rate": 2.010647486401011e-06, "logits/chosen": -2.741267681121826, "logits/rejected": -2.277954578399658, "logps/chosen": -169.172119140625, "logps/rejected": -1255.72998046875, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -0.9312788248062134, "rewards/margins": 11.190518379211426, "rewards/rejected": -12.121795654296875, "step": 10170 }, { "epoch": 0.61, "learning_rate": 2.005545488297652e-06, "logits/chosen": -2.6606040000915527, "logits/rejected": -2.2214267253875732, "logps/chosen": -159.6976776123047, "logps/rejected": -1201.581298828125, "loss": 0.0136, "rewards/accuracies": 1.0, "rewards/chosen": -0.900174617767334, "rewards/margins": 10.674308776855469, "rewards/rejected": -11.574483871459961, "step": 10180 }, { "epoch": 0.61, "learning_rate": 2.0004456324671673e-06, "logits/chosen": -2.6660828590393066, "logits/rejected": -2.1683387756347656, "logps/chosen": -176.6342010498047, "logps/rejected": -1268.905517578125, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/chosen": -1.0480890274047852, "rewards/margins": 11.187259674072266, "rewards/rejected": -12.235349655151367, "step": 10190 }, { "epoch": 0.61, "learning_rate": 1.9953479410051833e-06, "logits/chosen": -2.725188732147217, "logits/rejected": -2.3086845874786377, "logps/chosen": -190.69775390625, "logps/rejected": -1240.564453125, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -1.1602516174316406, "rewards/margins": 10.803372383117676, "rewards/rejected": -11.963623046875, "step": 10200 }, { "epoch": 0.61, "learning_rate": 1.9902524359979494e-06, "logits/chosen": -2.7197043895721436, "logits/rejected": -2.2445409297943115, "logps/chosen": -166.06114196777344, "logps/rejected": -1292.9495849609375, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -0.9960309863090515, "rewards/margins": 11.476550102233887, "rewards/rejected": -12.472580909729004, "step": 10210 }, { "epoch": 0.61, "learning_rate": 1.985159139522245e-06, "logits/chosen": -2.6791765689849854, "logits/rejected": -2.278733491897583, "logps/chosen": -186.21365356445312, "logps/rejected": -1187.319091796875, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -1.1369743347167969, "rewards/margins": 10.308209419250488, "rewards/rejected": -11.445182800292969, "step": 10220 }, { "epoch": 0.61, "learning_rate": 1.9800680736452773e-06, "logits/chosen": -2.7204699516296387, "logits/rejected": -2.27595853805542, "logps/chosen": -171.42391967773438, "logps/rejected": -1147.065185546875, "loss": 0.0136, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -1.0061955451965332, "rewards/margins": 10.023384094238281, "rewards/rejected": -11.029580116271973, "step": 10230 }, { "epoch": 0.61, "learning_rate": 1.974979260424591e-06, "logits/chosen": -2.7123920917510986, "logits/rejected": -2.3224048614501953, "logps/chosen": -150.8501434326172, "logps/rejected": -1207.732177734375, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -0.7454864978790283, "rewards/margins": 10.883295059204102, "rewards/rejected": -11.62878131866455, "step": 10240 }, { "epoch": 0.61, "learning_rate": 1.969892721907971e-06, "logits/chosen": -2.691455364227295, "logits/rejected": -2.288158655166626, "logps/chosen": -176.45851135253906, "logps/rejected": -1143.230224609375, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": -1.0605857372283936, "rewards/margins": 9.927177429199219, "rewards/rejected": -10.987763404846191, "step": 10250 }, { "epoch": 0.61, "learning_rate": 1.9648084801333468e-06, "logits/chosen": -2.6569085121154785, "logits/rejected": -2.267460584640503, "logps/chosen": -157.91433715820312, "logps/rejected": -1155.6077880859375, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -0.9086402058601379, "rewards/margins": 10.202337265014648, "rewards/rejected": -11.110978126525879, "step": 10260 }, { "epoch": 0.61, "learning_rate": 1.9597265571286945e-06, "logits/chosen": -2.6799840927124023, "logits/rejected": -2.2462401390075684, "logps/chosen": -147.64344787597656, "logps/rejected": -1218.9774169921875, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -0.8373859524726868, "rewards/margins": 10.907033920288086, "rewards/rejected": -11.74441909790039, "step": 10270 }, { "epoch": 0.61, "learning_rate": 1.9546469749119485e-06, "logits/chosen": -2.729508876800537, "logits/rejected": -2.3001186847686768, "logps/chosen": -151.4470977783203, "logps/rejected": -1142.6334228515625, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -0.7849904298782349, "rewards/margins": 10.195791244506836, "rewards/rejected": -10.980780601501465, "step": 10280 }, { "epoch": 0.61, "learning_rate": 1.9495697554908984e-06, "logits/chosen": -2.678361415863037, "logits/rejected": -2.2465968132019043, "logps/chosen": -144.54483032226562, "logps/rejected": -1181.9140625, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -0.7672642469406128, "rewards/margins": 10.611356735229492, "rewards/rejected": -11.378621101379395, "step": 10290 }, { "epoch": 0.61, "learning_rate": 1.944494920863096e-06, "logits/chosen": -2.686680555343628, "logits/rejected": -2.2656469345092773, "logps/chosen": -169.4293212890625, "logps/rejected": -1171.400634765625, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -0.9755252003669739, "rewards/margins": 10.304737091064453, "rewards/rejected": -11.280261993408203, "step": 10300 }, { "epoch": 0.61, "learning_rate": 1.939422493015764e-06, "logits/chosen": -2.6721138954162598, "logits/rejected": -2.21661639213562, "logps/chosen": -155.87158203125, "logps/rejected": -1284.742919921875, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -0.8563534617424011, "rewards/margins": 11.553642272949219, "rewards/rejected": -12.409995079040527, "step": 10310 }, { "epoch": 0.62, "learning_rate": 1.934352493925695e-06, "logits/chosen": -2.728530168533325, "logits/rejected": -2.3068325519561768, "logps/chosen": -174.71905517578125, "logps/rejected": -1075.154541015625, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -1.0647732019424438, "rewards/margins": 9.243081092834473, "rewards/rejected": -10.307853698730469, "step": 10320 }, { "epoch": 0.62, "learning_rate": 1.929284945559159e-06, "logits/chosen": -2.7263424396514893, "logits/rejected": -2.3152506351470947, "logps/chosen": -187.24583435058594, "logps/rejected": -1229.1773681640625, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -1.0893981456756592, "rewards/margins": 10.766997337341309, "rewards/rejected": -11.85639476776123, "step": 10330 }, { "epoch": 0.62, "learning_rate": 1.9242198698718096e-06, "logits/chosen": -2.7002689838409424, "logits/rejected": -2.285580635070801, "logps/chosen": -169.35635375976562, "logps/rejected": -1162.4388427734375, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -0.9782946705818176, "rewards/margins": 10.206392288208008, "rewards/rejected": -11.184687614440918, "step": 10340 }, { "epoch": 0.62, "learning_rate": 1.919157288808585e-06, "logits/chosen": -2.672635555267334, "logits/rejected": -2.163297653198242, "logps/chosen": -173.37655639648438, "logps/rejected": -1241.8756103515625, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -1.027738332748413, "rewards/margins": 10.940069198608398, "rewards/rejected": -11.967806816101074, "step": 10350 }, { "epoch": 0.62, "learning_rate": 1.914097224303616e-06, "logits/chosen": -2.651663303375244, "logits/rejected": -2.2349636554718018, "logps/chosen": -167.01730346679688, "logps/rejected": -1237.832763671875, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -0.9664157629013062, "rewards/margins": 10.976346969604492, "rewards/rejected": -11.942761421203613, "step": 10360 }, { "epoch": 0.62, "learning_rate": 1.9090396982801317e-06, "logits/chosen": -2.69647479057312, "logits/rejected": -2.3212196826934814, "logps/chosen": -156.8590087890625, "logps/rejected": -1209.521728515625, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": -0.9029251337051392, "rewards/margins": 10.743374824523926, "rewards/rejected": -11.646299362182617, "step": 10370 }, { "epoch": 0.62, "learning_rate": 1.9039847326503608e-06, "logits/chosen": -2.709139585494995, "logits/rejected": -2.2755322456359863, "logps/chosen": -181.7281036376953, "logps/rejected": -1297.624755859375, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -1.1287683248519897, "rewards/margins": 11.405282974243164, "rewards/rejected": -12.534050941467285, "step": 10380 }, { "epoch": 0.62, "learning_rate": 1.8989323493154402e-06, "logits/chosen": -2.7091329097747803, "logits/rejected": -2.2651634216308594, "logps/chosen": -162.07962036132812, "logps/rejected": -1172.4117431640625, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": -0.9179351925849915, "rewards/margins": 10.364705085754395, "rewards/rejected": -11.282638549804688, "step": 10390 }, { "epoch": 0.62, "learning_rate": 1.893882570165318e-06, "logits/chosen": -2.674309253692627, "logits/rejected": -2.3083274364471436, "logps/chosen": -155.83653259277344, "logps/rejected": -1306.9290771484375, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -0.8740228414535522, "rewards/margins": 11.746663093566895, "rewards/rejected": -12.620685577392578, "step": 10400 }, { "epoch": 0.62, "learning_rate": 1.8888354170786604e-06, "logits/chosen": -2.691688299179077, "logits/rejected": -2.301456928253174, "logps/chosen": -143.4541473388672, "logps/rejected": -1127.671630859375, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -0.7627164125442505, "rewards/margins": 10.070082664489746, "rewards/rejected": -10.83279800415039, "step": 10410 }, { "epoch": 0.62, "learning_rate": 1.8837909119227541e-06, "logits/chosen": -2.655333995819092, "logits/rejected": -2.339489459991455, "logps/chosen": -167.1618194580078, "logps/rejected": -1247.521484375, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -0.9810325503349304, "rewards/margins": 11.044880867004395, "rewards/rejected": -12.025912284851074, "step": 10420 }, { "epoch": 0.62, "learning_rate": 1.878749076553416e-06, "logits/chosen": -2.6650567054748535, "logits/rejected": -2.192473888397217, "logps/chosen": -159.62437438964844, "logps/rejected": -1261.5546875, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -0.8769809603691101, "rewards/margins": 11.283531188964844, "rewards/rejected": -12.16051197052002, "step": 10430 }, { "epoch": 0.62, "learning_rate": 1.873709932814894e-06, "logits/chosen": -2.648094892501831, "logits/rejected": -2.242197036743164, "logps/chosen": -149.56546020507812, "logps/rejected": -1178.7962646484375, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -0.7502764463424683, "rewards/margins": 10.599845886230469, "rewards/rejected": -11.350122451782227, "step": 10440 }, { "epoch": 0.62, "learning_rate": 1.8686735025397728e-06, "logits/chosen": -2.6840195655822754, "logits/rejected": -2.3303234577178955, "logps/chosen": -149.92015075683594, "logps/rejected": -1316.4888916015625, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -0.7355614304542542, "rewards/margins": 11.985756874084473, "rewards/rejected": -12.721318244934082, "step": 10450 }, { "epoch": 0.62, "learning_rate": 1.8636398075488857e-06, "logits/chosen": -2.6736550331115723, "logits/rejected": -2.2199785709381104, "logps/chosen": -147.96337890625, "logps/rejected": -1128.46630859375, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -0.7754791975021362, "rewards/margins": 10.067721366882324, "rewards/rejected": -10.84320068359375, "step": 10460 }, { "epoch": 0.62, "learning_rate": 1.8586088696512101e-06, "logits/chosen": -2.6745097637176514, "logits/rejected": -2.230201005935669, "logps/chosen": -140.08935546875, "logps/rejected": -1106.438232421875, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -0.6857896447181702, "rewards/margins": 9.941146850585938, "rewards/rejected": -10.626935958862305, "step": 10470 }, { "epoch": 0.62, "learning_rate": 1.85358071064378e-06, "logits/chosen": -2.7406771183013916, "logits/rejected": -2.3733060359954834, "logps/chosen": -137.88369750976562, "logps/rejected": -1111.315673828125, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/chosen": -0.6779087781906128, "rewards/margins": 10.002992630004883, "rewards/rejected": -10.680900573730469, "step": 10480 }, { "epoch": 0.63, "learning_rate": 1.8485553523115902e-06, "logits/chosen": -2.6721410751342773, "logits/rejected": -2.337549924850464, "logps/chosen": -133.80642700195312, "logps/rejected": -1201.2855224609375, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -0.638275682926178, "rewards/margins": 10.92553997039795, "rewards/rejected": -11.56381607055664, "step": 10490 }, { "epoch": 0.63, "learning_rate": 1.8435328164275007e-06, "logits/chosen": -2.65844464302063, "logits/rejected": -2.3178467750549316, "logps/chosen": -158.33909606933594, "logps/rejected": -1240.73876953125, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -0.8368174433708191, "rewards/margins": 11.120333671569824, "rewards/rejected": -11.95715045928955, "step": 10500 }, { "epoch": 0.63, "learning_rate": 1.838513124752142e-06, "logits/chosen": -2.704360246658325, "logits/rejected": -2.271937131881714, "logps/chosen": -121.60099029541016, "logps/rejected": -1182.9244384765625, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -0.6088675260543823, "rewards/margins": 10.778694152832031, "rewards/rejected": -11.387561798095703, "step": 10510 }, { "epoch": 0.63, "learning_rate": 1.833496299033824e-06, "logits/chosen": -2.6805663108825684, "logits/rejected": -2.2875351905822754, "logps/chosen": -154.54995727539062, "logps/rejected": -1141.4454345703125, "loss": 0.0209, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.8170779347419739, "rewards/margins": 10.14470100402832, "rewards/rejected": -10.96177864074707, "step": 10520 }, { "epoch": 0.63, "learning_rate": 1.8284823610084375e-06, "logits/chosen": -2.705570697784424, "logits/rejected": -2.286400318145752, "logps/chosen": -174.88827514648438, "logps/rejected": -1088.65234375, "loss": 0.0084, "rewards/accuracies": 1.0, "rewards/chosen": -0.9820972681045532, "rewards/margins": 9.472587585449219, "rewards/rejected": -10.454684257507324, "step": 10530 }, { "epoch": 0.63, "learning_rate": 1.8234713323993622e-06, "logits/chosen": -2.7189624309539795, "logits/rejected": -2.221888780593872, "logps/chosen": -135.50201416015625, "logps/rejected": -1256.1312255859375, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -0.6958117485046387, "rewards/margins": 11.418486595153809, "rewards/rejected": -12.114297866821289, "step": 10540 }, { "epoch": 0.63, "learning_rate": 1.8184632349173747e-06, "logits/chosen": -2.667470693588257, "logits/rejected": -2.3295605182647705, "logps/chosen": -145.33932495117188, "logps/rejected": -1128.9957275390625, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": -0.777393102645874, "rewards/margins": 10.06690502166748, "rewards/rejected": -10.84429931640625, "step": 10550 }, { "epoch": 0.63, "learning_rate": 1.8134580902605491e-06, "logits/chosen": -2.7305068969726562, "logits/rejected": -2.313091278076172, "logps/chosen": -164.78121948242188, "logps/rejected": -1104.535400390625, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -0.9076918363571167, "rewards/margins": 9.698684692382812, "rewards/rejected": -10.606376647949219, "step": 10560 }, { "epoch": 0.63, "learning_rate": 1.8084559201141677e-06, "logits/chosen": -2.6300487518310547, "logits/rejected": -2.2776150703430176, "logps/chosen": -184.3844451904297, "logps/rejected": -1363.12744140625, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/chosen": -1.1176321506500244, "rewards/margins": 12.056962013244629, "rewards/rejected": -13.174595832824707, "step": 10570 }, { "epoch": 0.63, "learning_rate": 1.803456746150627e-06, "logits/chosen": -2.6253128051757812, "logits/rejected": -2.2257564067840576, "logps/chosen": -136.60572814941406, "logps/rejected": -1179.6331787109375, "loss": 0.0545, "rewards/accuracies": 1.0, "rewards/chosen": -0.7256103754043579, "rewards/margins": 10.64277172088623, "rewards/rejected": -11.368383407592773, "step": 10580 }, { "epoch": 0.63, "learning_rate": 1.7984605900293395e-06, "logits/chosen": -2.6622276306152344, "logits/rejected": -2.189941167831421, "logps/chosen": -180.76763916015625, "logps/rejected": -1323.7637939453125, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -1.1611995697021484, "rewards/margins": 11.631550788879395, "rewards/rejected": -12.792750358581543, "step": 10590 }, { "epoch": 0.63, "learning_rate": 1.7934674733966426e-06, "logits/chosen": -2.670856475830078, "logits/rejected": -2.275364637374878, "logps/chosen": -181.9188995361328, "logps/rejected": -1288.626708984375, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -1.1387431621551514, "rewards/margins": 11.296748161315918, "rewards/rejected": -12.435491561889648, "step": 10600 }, { "epoch": 0.63, "learning_rate": 1.7884774178857079e-06, "logits/chosen": -2.6612496376037598, "logits/rejected": -2.250316619873047, "logps/chosen": -185.29873657226562, "logps/rejected": -1283.1878662109375, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -1.1791346073150635, "rewards/margins": 11.206694602966309, "rewards/rejected": -12.385828018188477, "step": 10610 }, { "epoch": 0.63, "learning_rate": 1.7834904451164417e-06, "logits/chosen": -2.666290044784546, "logits/rejected": -2.2596793174743652, "logps/chosen": -172.3092498779297, "logps/rejected": -1252.9447021484375, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -1.0615670680999756, "rewards/margins": 11.021515846252441, "rewards/rejected": -12.083083152770996, "step": 10620 }, { "epoch": 0.63, "learning_rate": 1.7785065766953932e-06, "logits/chosen": -2.6500802040100098, "logits/rejected": -2.146433115005493, "logps/chosen": -197.70945739746094, "logps/rejected": -1286.618896484375, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/chosen": -1.3150241374969482, "rewards/margins": 11.083675384521484, "rewards/rejected": -12.398698806762695, "step": 10630 }, { "epoch": 0.63, "learning_rate": 1.7735258342156653e-06, "logits/chosen": -2.6253857612609863, "logits/rejected": -2.1825995445251465, "logps/chosen": -210.876708984375, "logps/rejected": -1185.6953125, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/chosen": -1.4058815240859985, "rewards/margins": 10.007796287536621, "rewards/rejected": -11.413679122924805, "step": 10640 }, { "epoch": 0.64, "learning_rate": 1.768548239256815e-06, "logits/chosen": -2.6982548236846924, "logits/rejected": -2.3529727458953857, "logps/chosen": -237.3026580810547, "logps/rejected": -1256.3804931640625, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -1.6476304531097412, "rewards/margins": 10.454874992370605, "rewards/rejected": -12.102503776550293, "step": 10650 }, { "epoch": 0.64, "learning_rate": 1.7635738133847608e-06, "logits/chosen": -2.713792324066162, "logits/rejected": -2.2466139793395996, "logps/chosen": -239.65487670898438, "logps/rejected": -1288.21142578125, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -1.663100242614746, "rewards/margins": 10.771455764770508, "rewards/rejected": -12.434554100036621, "step": 10660 }, { "epoch": 0.64, "learning_rate": 1.7586025781516958e-06, "logits/chosen": -2.6637954711914062, "logits/rejected": -2.270134687423706, "logps/chosen": -211.2230224609375, "logps/rejected": -1232.7806396484375, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -1.4256975650787354, "rewards/margins": 10.447927474975586, "rewards/rejected": -11.873624801635742, "step": 10670 }, { "epoch": 0.64, "learning_rate": 1.7536345550959844e-06, "logits/chosen": -2.7061476707458496, "logits/rejected": -2.2232162952423096, "logps/chosen": -190.14871215820312, "logps/rejected": -1213.1466064453125, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -1.2435824871063232, "rewards/margins": 10.444583892822266, "rewards/rejected": -11.688166618347168, "step": 10680 }, { "epoch": 0.64, "learning_rate": 1.7486697657420752e-06, "logits/chosen": -2.711820363998413, "logits/rejected": -2.3143553733825684, "logps/chosen": -180.61358642578125, "logps/rejected": -1146.7406005859375, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -1.1420700550079346, "rewards/margins": 9.889798164367676, "rewards/rejected": -11.031867980957031, "step": 10690 }, { "epoch": 0.64, "learning_rate": 1.743708231600409e-06, "logits/chosen": -2.706454038619995, "logits/rejected": -2.3063597679138184, "logps/chosen": -213.4799346923828, "logps/rejected": -1232.907470703125, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -1.4261481761932373, "rewards/margins": 10.453648567199707, "rewards/rejected": -11.879796028137207, "step": 10700 }, { "epoch": 0.64, "learning_rate": 1.7387499741673197e-06, "logits/chosen": -2.655303955078125, "logits/rejected": -2.2451364994049072, "logps/chosen": -204.3408660888672, "logps/rejected": -1256.3521728515625, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": -1.3569390773773193, "rewards/margins": 10.759969711303711, "rewards/rejected": -12.116909980773926, "step": 10710 }, { "epoch": 0.64, "learning_rate": 1.7337950149249466e-06, "logits/chosen": -2.6937034130096436, "logits/rejected": -2.2553374767303467, "logps/chosen": -196.3228302001953, "logps/rejected": -1234.77197265625, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -1.2222070693969727, "rewards/margins": 10.689011573791504, "rewards/rejected": -11.911218643188477, "step": 10720 }, { "epoch": 0.64, "learning_rate": 1.7288433753411383e-06, "logits/chosen": -2.7084755897521973, "logits/rejected": -2.300086736679077, "logps/chosen": -176.45362854003906, "logps/rejected": -1240.108642578125, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -1.0833253860473633, "rewards/margins": 10.869207382202148, "rewards/rejected": -11.952531814575195, "step": 10730 }, { "epoch": 0.64, "learning_rate": 1.7238950768693619e-06, "logits/chosen": -2.6832168102264404, "logits/rejected": -2.219399929046631, "logps/chosen": -190.40277099609375, "logps/rejected": -1201.741943359375, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -1.1526172161102295, "rewards/margins": 10.423155784606934, "rewards/rejected": -11.575773239135742, "step": 10740 }, { "epoch": 0.64, "learning_rate": 1.7189501409486061e-06, "logits/chosen": -2.6671907901763916, "logits/rejected": -2.2211320400238037, "logps/chosen": -166.03326416015625, "logps/rejected": -1250.656005859375, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -0.9305809736251831, "rewards/margins": 11.132793426513672, "rewards/rejected": -12.063374519348145, "step": 10750 }, { "epoch": 0.64, "learning_rate": 1.7140085890032951e-06, "logits/chosen": -2.6868786811828613, "logits/rejected": -2.216801643371582, "logps/chosen": -183.50216674804688, "logps/rejected": -1362.376953125, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -1.0438324213027954, "rewards/margins": 12.142550468444824, "rewards/rejected": -13.186384201049805, "step": 10760 }, { "epoch": 0.64, "learning_rate": 1.7090704424431882e-06, "logits/chosen": -2.6949551105499268, "logits/rejected": -2.3042445182800293, "logps/chosen": -188.57101440429688, "logps/rejected": -1268.557861328125, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -1.1118698120117188, "rewards/margins": 11.11991024017334, "rewards/rejected": -12.231779098510742, "step": 10770 }, { "epoch": 0.64, "learning_rate": 1.704135722663291e-06, "logits/chosen": -2.6836369037628174, "logits/rejected": -2.2245659828186035, "logps/chosen": -187.42575073242188, "logps/rejected": -1304.1470947265625, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -1.1433792114257812, "rewards/margins": 11.452189445495605, "rewards/rejected": -12.59556770324707, "step": 10780 }, { "epoch": 0.64, "learning_rate": 1.6992044510437644e-06, "logits/chosen": -2.696124315261841, "logits/rejected": -2.2626724243164062, "logps/chosen": -205.2533721923828, "logps/rejected": -1335.229248046875, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -1.3333510160446167, "rewards/margins": 11.585688591003418, "rewards/rejected": -12.919039726257324, "step": 10790 }, { "epoch": 0.64, "learning_rate": 1.6942766489498278e-06, "logits/chosen": -2.7053720951080322, "logits/rejected": -2.2856245040893555, "logps/chosen": -164.22140502929688, "logps/rejected": -1135.5733642578125, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -0.9044491052627563, "rewards/margins": 10.012067794799805, "rewards/rejected": -10.916516304016113, "step": 10800 }, { "epoch": 0.64, "learning_rate": 1.689352337731669e-06, "logits/chosen": -2.74284029006958, "logits/rejected": -2.3319687843322754, "logps/chosen": -194.62136840820312, "logps/rejected": -1335.9298095703125, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -1.22065007686615, "rewards/margins": 11.702608108520508, "rewards/rejected": -12.923257827758789, "step": 10810 }, { "epoch": 0.65, "learning_rate": 1.6844315387243514e-06, "logits/chosen": -2.722900629043579, "logits/rejected": -2.204911708831787, "logps/chosen": -174.6101531982422, "logps/rejected": -1229.6229248046875, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -1.063889741897583, "rewards/margins": 10.79820442199707, "rewards/rejected": -11.862093925476074, "step": 10820 }, { "epoch": 0.65, "learning_rate": 1.6795142732477222e-06, "logits/chosen": -2.628021478652954, "logits/rejected": -2.2600715160369873, "logps/chosen": -178.87042236328125, "logps/rejected": -1090.664306640625, "loss": 0.0222, "rewards/accuracies": 1.0, "rewards/chosen": -1.0961132049560547, "rewards/margins": 9.353715896606445, "rewards/rejected": -10.4498291015625, "step": 10830 }, { "epoch": 0.65, "learning_rate": 1.6746005626063163e-06, "logits/chosen": -2.688551425933838, "logits/rejected": -2.259103775024414, "logps/chosen": -154.6709747314453, "logps/rejected": -1074.783447265625, "loss": 0.0097, "rewards/accuracies": 1.0, "rewards/chosen": -0.8224309086799622, "rewards/margins": 9.493600845336914, "rewards/rejected": -10.316032409667969, "step": 10840 }, { "epoch": 0.65, "learning_rate": 1.6696904280892716e-06, "logits/chosen": -2.656571626663208, "logits/rejected": -2.209709644317627, "logps/chosen": -168.6212615966797, "logps/rejected": -1336.944091796875, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -0.9901341199874878, "rewards/margins": 11.925986289978027, "rewards/rejected": -12.916119575500488, "step": 10850 }, { "epoch": 0.65, "learning_rate": 1.6647838909702287e-06, "logits/chosen": -2.6694931983947754, "logits/rejected": -2.2432913780212402, "logps/chosen": -177.16159057617188, "logps/rejected": -1245.0076904296875, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -1.1048028469085693, "rewards/margins": 10.907476425170898, "rewards/rejected": -12.012280464172363, "step": 10860 }, { "epoch": 0.65, "learning_rate": 1.6598809725072412e-06, "logits/chosen": -2.7171568870544434, "logits/rejected": -2.241743326187134, "logps/chosen": -158.462158203125, "logps/rejected": -1329.7509765625, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -0.9434038400650024, "rewards/margins": 11.908381462097168, "rewards/rejected": -12.851785659790039, "step": 10870 }, { "epoch": 0.65, "learning_rate": 1.6549816939426888e-06, "logits/chosen": -2.672494411468506, "logits/rejected": -2.3112988471984863, "logps/chosen": -157.9896697998047, "logps/rejected": -1208.8345947265625, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -0.93427574634552, "rewards/margins": 10.699585914611816, "rewards/rejected": -11.633859634399414, "step": 10880 }, { "epoch": 0.65, "learning_rate": 1.6500860765031767e-06, "logits/chosen": -2.662343740463257, "logits/rejected": -2.240168809890747, "logps/chosen": -181.88897705078125, "logps/rejected": -1245.9794921875, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -1.0778279304504395, "rewards/margins": 10.941686630249023, "rewards/rejected": -12.019515037536621, "step": 10890 }, { "epoch": 0.65, "learning_rate": 1.64519414139945e-06, "logits/chosen": -2.6769790649414062, "logits/rejected": -2.2713465690612793, "logps/chosen": -161.05191040039062, "logps/rejected": -1284.2474365234375, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.9550189971923828, "rewards/margins": 11.451133728027344, "rewards/rejected": -12.406153678894043, "step": 10900 }, { "epoch": 0.65, "learning_rate": 1.6403059098263003e-06, "logits/chosen": -2.7319157123565674, "logits/rejected": -2.285252094268799, "logps/chosen": -167.67991638183594, "logps/rejected": -1321.8765869140625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.9087414741516113, "rewards/margins": 11.856451034545898, "rewards/rejected": -12.765192031860352, "step": 10910 }, { "epoch": 0.65, "learning_rate": 1.6354214029624719e-06, "logits/chosen": -2.6906540393829346, "logits/rejected": -2.2802863121032715, "logps/chosen": -169.17660522460938, "logps/rejected": -1169.43896484375, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -0.9710705876350403, "rewards/margins": 10.287484169006348, "rewards/rejected": -11.25855541229248, "step": 10920 }, { "epoch": 0.65, "learning_rate": 1.6305406419705704e-06, "logits/chosen": -2.670264482498169, "logits/rejected": -2.274122714996338, "logps/chosen": -193.5460205078125, "logps/rejected": -1333.782958984375, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -1.228770136833191, "rewards/margins": 11.671228408813477, "rewards/rejected": -12.899998664855957, "step": 10930 }, { "epoch": 0.65, "learning_rate": 1.6256636479969757e-06, "logits/chosen": -2.7037606239318848, "logits/rejected": -2.2911012172698975, "logps/chosen": -172.2279510498047, "logps/rejected": -1270.383056640625, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -1.0514845848083496, "rewards/margins": 11.207279205322266, "rewards/rejected": -12.258763313293457, "step": 10940 }, { "epoch": 0.65, "learning_rate": 1.6207904421717438e-06, "logits/chosen": -2.677539587020874, "logits/rejected": -2.2273712158203125, "logps/chosen": -157.8165740966797, "logps/rejected": -1257.1446533203125, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -0.8738549947738647, "rewards/margins": 11.240236282348633, "rewards/rejected": -12.114090919494629, "step": 10950 }, { "epoch": 0.65, "learning_rate": 1.6159210456085179e-06, "logits/chosen": -2.677459239959717, "logits/rejected": -2.212089776992798, "logps/chosen": -160.33921813964844, "logps/rejected": -1302.200439453125, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -0.864704966545105, "rewards/margins": 11.718900680541992, "rewards/rejected": -12.583606719970703, "step": 10960 }, { "epoch": 0.65, "learning_rate": 1.6110554794044397e-06, "logits/chosen": -2.66448974609375, "logits/rejected": -2.289799928665161, "logps/chosen": -169.28921508789062, "logps/rejected": -1146.988525390625, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -0.9300249218940735, "rewards/margins": 10.09156608581543, "rewards/rejected": -11.021590232849121, "step": 10970 }, { "epoch": 0.65, "learning_rate": 1.6061937646400526e-06, "logits/chosen": -2.707273006439209, "logits/rejected": -2.308811902999878, "logps/chosen": -152.71749877929688, "logps/rejected": -1228.77587890625, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -0.7697857618331909, "rewards/margins": 11.076682090759277, "rewards/rejected": -11.846467971801758, "step": 10980 }, { "epoch": 0.66, "learning_rate": 1.6013359223792155e-06, "logits/chosen": -2.7099242210388184, "logits/rejected": -2.2905263900756836, "logps/chosen": -173.2109832763672, "logps/rejected": -1207.319580078125, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -1.0260493755340576, "rewards/margins": 10.619953155517578, "rewards/rejected": -11.646002769470215, "step": 10990 }, { "epoch": 0.66, "learning_rate": 1.596481973669009e-06, "logits/chosen": -2.6645987033843994, "logits/rejected": -2.262202501296997, "logps/chosen": -164.31773376464844, "logps/rejected": -1315.1573486328125, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -0.9622052311897278, "rewards/margins": 11.734614372253418, "rewards/rejected": -12.696820259094238, "step": 11000 }, { "epoch": 0.66, "learning_rate": 1.591631939539644e-06, "logits/chosen": -2.686890125274658, "logits/rejected": -2.299267292022705, "logps/chosen": -164.4632568359375, "logps/rejected": -1337.7044677734375, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -0.9094980359077454, "rewards/margins": 12.016288757324219, "rewards/rejected": -12.925786972045898, "step": 11010 }, { "epoch": 0.66, "learning_rate": 1.5867858410043688e-06, "logits/chosen": -2.666682481765747, "logits/rejected": -2.290273666381836, "logps/chosen": -187.35986328125, "logps/rejected": -1242.795654296875, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -1.1542308330535889, "rewards/margins": 10.819709777832031, "rewards/rejected": -11.9739408493042, "step": 11020 }, { "epoch": 0.66, "learning_rate": 1.5819436990593855e-06, "logits/chosen": -2.6902308464050293, "logits/rejected": -2.2475180625915527, "logps/chosen": -210.10440063476562, "logps/rejected": -1348.645263671875, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -1.3616456985473633, "rewards/margins": 11.679061889648438, "rewards/rejected": -13.0407075881958, "step": 11030 }, { "epoch": 0.66, "learning_rate": 1.5771055346837498e-06, "logits/chosen": -2.650068521499634, "logits/rejected": -2.249622106552124, "logps/chosen": -209.57089233398438, "logps/rejected": -1287.388671875, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -1.3396633863449097, "rewards/margins": 11.081099510192871, "rewards/rejected": -12.42076301574707, "step": 11040 }, { "epoch": 0.66, "learning_rate": 1.5722713688392844e-06, "logits/chosen": -2.6901421546936035, "logits/rejected": -2.28308367729187, "logps/chosen": -155.38592529296875, "logps/rejected": -1393.3990478515625, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": -0.8177770376205444, "rewards/margins": 12.665241241455078, "rewards/rejected": -13.48301887512207, "step": 11050 }, { "epoch": 0.66, "learning_rate": 1.5674412224704902e-06, "logits/chosen": -2.6922764778137207, "logits/rejected": -2.2809135913848877, "logps/chosen": -173.3521270751953, "logps/rejected": -1145.6488037109375, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -1.020254373550415, "rewards/margins": 10.00926399230957, "rewards/rejected": -11.029520034790039, "step": 11060 }, { "epoch": 0.66, "learning_rate": 1.5626151165044522e-06, "logits/chosen": -2.7068779468536377, "logits/rejected": -2.1955337524414062, "logps/chosen": -161.06285095214844, "logps/rejected": -1270.3131103515625, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -0.8800745010375977, "rewards/margins": 11.366345405578613, "rewards/rejected": -12.246420860290527, "step": 11070 }, { "epoch": 0.66, "learning_rate": 1.557793071850749e-06, "logits/chosen": -2.6897425651550293, "logits/rejected": -2.2221031188964844, "logps/chosen": -157.92539978027344, "logps/rejected": -1311.1961669921875, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -0.8567537069320679, "rewards/margins": 11.795483589172363, "rewards/rejected": -12.652236938476562, "step": 11080 }, { "epoch": 0.66, "learning_rate": 1.552975109401365e-06, "logits/chosen": -2.7281808853149414, "logits/rejected": -2.3182520866394043, "logps/chosen": -187.7662353515625, "logps/rejected": -1216.343017578125, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -1.1299870014190674, "rewards/margins": 10.582721710205078, "rewards/rejected": -11.712708473205566, "step": 11090 }, { "epoch": 0.66, "learning_rate": 1.5481612500305964e-06, "logits/chosen": -2.686325788497925, "logits/rejected": -2.291994094848633, "logps/chosen": -174.48562622070312, "logps/rejected": -1247.1187744140625, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -1.0441081523895264, "rewards/margins": 10.992681503295898, "rewards/rejected": -12.03679084777832, "step": 11100 }, { "epoch": 0.66, "learning_rate": 1.5433515145949636e-06, "logits/chosen": -2.6949706077575684, "logits/rejected": -2.3253660202026367, "logps/chosen": -173.86875915527344, "logps/rejected": -1259.810546875, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/chosen": -0.9885636568069458, "rewards/margins": 11.173116683959961, "rewards/rejected": -12.161680221557617, "step": 11110 }, { "epoch": 0.66, "learning_rate": 1.5385459239331173e-06, "logits/chosen": -2.6652445793151855, "logits/rejected": -2.2798759937286377, "logps/chosen": -177.71279907226562, "logps/rejected": -1245.261474609375, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -1.0771000385284424, "rewards/margins": 10.931848526000977, "rewards/rejected": -12.008947372436523, "step": 11120 }, { "epoch": 0.66, "learning_rate": 1.5337444988657546e-06, "logits/chosen": -2.7376301288604736, "logits/rejected": -2.2805728912353516, "logps/chosen": -182.17759704589844, "logps/rejected": -1372.2288818359375, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -1.0855470895767212, "rewards/margins": 12.18674373626709, "rewards/rejected": -13.27229118347168, "step": 11130 }, { "epoch": 0.66, "learning_rate": 1.5289472601955219e-06, "logits/chosen": -2.6978373527526855, "logits/rejected": -2.275664806365967, "logps/chosen": -192.24615478515625, "logps/rejected": -1245.9857177734375, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -1.2008732557296753, "rewards/margins": 10.808465957641602, "rewards/rejected": -12.009337425231934, "step": 11140 }, { "epoch": 0.66, "learning_rate": 1.5241542287069273e-06, "logits/chosen": -2.6892693042755127, "logits/rejected": -2.257542848587036, "logps/chosen": -171.97042846679688, "logps/rejected": -1199.820556640625, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -1.0330320596694946, "rewards/margins": 10.519083023071289, "rewards/rejected": -11.552114486694336, "step": 11150 }, { "epoch": 0.67, "learning_rate": 1.5193654251662531e-06, "logits/chosen": -2.651379346847534, "logits/rejected": -2.211075782775879, "logps/chosen": -202.85946655273438, "logps/rejected": -1235.321533203125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.2954750061035156, "rewards/margins": 10.60512924194336, "rewards/rejected": -11.900605201721191, "step": 11160 }, { "epoch": 0.67, "learning_rate": 1.514580870321462e-06, "logits/chosen": -2.700159788131714, "logits/rejected": -2.2890915870666504, "logps/chosen": -172.19615173339844, "logps/rejected": -1392.2249755859375, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": -0.9928817749023438, "rewards/margins": 12.490656852722168, "rewards/rejected": -13.483538627624512, "step": 11170 }, { "epoch": 0.67, "learning_rate": 1.509800584902108e-06, "logits/chosen": -2.695985794067383, "logits/rejected": -2.2855312824249268, "logps/chosen": -181.10806274414062, "logps/rejected": -1326.548095703125, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -1.0607606172561646, "rewards/margins": 11.766735076904297, "rewards/rejected": -12.827496528625488, "step": 11180 }, { "epoch": 0.67, "learning_rate": 1.5050245896192503e-06, "logits/chosen": -2.6450653076171875, "logits/rejected": -2.217040538787842, "logps/chosen": -215.6058349609375, "logps/rejected": -1201.48681640625, "loss": 0.0155, "rewards/accuracies": 1.0, "rewards/chosen": -1.4160423278808594, "rewards/margins": 10.149759292602539, "rewards/rejected": -11.565801620483398, "step": 11190 }, { "epoch": 0.67, "learning_rate": 1.5002529051653576e-06, "logits/chosen": -2.626765727996826, "logits/rejected": -2.264268398284912, "logps/chosen": -171.37767028808594, "logps/rejected": -1319.353271484375, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -0.9757068753242493, "rewards/margins": 11.780801773071289, "rewards/rejected": -12.756509780883789, "step": 11200 }, { "epoch": 0.67, "learning_rate": 1.4954855522142225e-06, "logits/chosen": -2.6638448238372803, "logits/rejected": -2.219672918319702, "logps/chosen": -179.73074340820312, "logps/rejected": -1286.843505859375, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -1.1611230373382568, "rewards/margins": 11.27320671081543, "rewards/rejected": -12.434330940246582, "step": 11210 }, { "epoch": 0.67, "learning_rate": 1.4907225514208724e-06, "logits/chosen": -2.7109882831573486, "logits/rejected": -2.2481467723846436, "logps/chosen": -188.31228637695312, "logps/rejected": -1226.2203369140625, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -1.1562902927398682, "rewards/margins": 10.656560897827148, "rewards/rejected": -11.812849044799805, "step": 11220 }, { "epoch": 0.67, "learning_rate": 1.4859639234214774e-06, "logits/chosen": -2.730440855026245, "logits/rejected": -2.3092398643493652, "logps/chosen": -197.57882690429688, "logps/rejected": -1378.6351318359375, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -1.256392478942871, "rewards/margins": 12.091400146484375, "rewards/rejected": -13.347793579101562, "step": 11230 }, { "epoch": 0.67, "learning_rate": 1.48120968883326e-06, "logits/chosen": -2.6735386848449707, "logits/rejected": -2.206005334854126, "logps/chosen": -208.0272216796875, "logps/rejected": -1233.26025390625, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -1.3230682611465454, "rewards/margins": 10.568212509155273, "rewards/rejected": -11.891278266906738, "step": 11240 }, { "epoch": 0.67, "learning_rate": 1.4764598682544124e-06, "logits/chosen": -2.659745216369629, "logits/rejected": -2.2526659965515137, "logps/chosen": -180.5372314453125, "logps/rejected": -1184.196044921875, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -1.1693991422653198, "rewards/margins": 10.236166000366211, "rewards/rejected": -11.405566215515137, "step": 11250 }, { "epoch": 0.67, "learning_rate": 1.4717144822639988e-06, "logits/chosen": -2.6884684562683105, "logits/rejected": -2.194739580154419, "logps/chosen": -201.0059814453125, "logps/rejected": -1283.117431640625, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -1.3106249570846558, "rewards/margins": 11.076963424682617, "rewards/rejected": -12.38758659362793, "step": 11260 }, { "epoch": 0.67, "learning_rate": 1.4669735514218709e-06, "logits/chosen": -2.6820919513702393, "logits/rejected": -2.2434215545654297, "logps/chosen": -207.5338134765625, "logps/rejected": -1305.942138671875, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -1.2980427742004395, "rewards/margins": 11.307580947875977, "rewards/rejected": -12.605623245239258, "step": 11270 }, { "epoch": 0.67, "learning_rate": 1.46223709626858e-06, "logits/chosen": -2.6448042392730713, "logits/rejected": -2.1442885398864746, "logps/chosen": -178.77737426757812, "logps/rejected": -1428.279541015625, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -1.014285922050476, "rewards/margins": 12.814753532409668, "rewards/rejected": -13.829038619995117, "step": 11280 }, { "epoch": 0.67, "learning_rate": 1.457505137325283e-06, "logits/chosen": -2.68040132522583, "logits/rejected": -2.30275297164917, "logps/chosen": -198.19296264648438, "logps/rejected": -1273.473876953125, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -1.2826688289642334, "rewards/margins": 11.02665901184082, "rewards/rejected": -12.309328079223633, "step": 11290 }, { "epoch": 0.67, "learning_rate": 1.452777695093659e-06, "logits/chosen": -2.675504207611084, "logits/rejected": -2.229268789291382, "logps/chosen": -198.2040252685547, "logps/rejected": -1304.1357421875, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -1.2921191453933716, "rewards/margins": 11.297712326049805, "rewards/rejected": -12.589831352233887, "step": 11300 }, { "epoch": 0.67, "learning_rate": 1.448054790055817e-06, "logits/chosen": -2.6589934825897217, "logits/rejected": -2.1987435817718506, "logps/chosen": -233.8262481689453, "logps/rejected": -1269.7509765625, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -1.645715355873108, "rewards/margins": 10.614938735961914, "rewards/rejected": -12.260655403137207, "step": 11310 }, { "epoch": 0.68, "learning_rate": 1.443336442674208e-06, "logits/chosen": -2.6648573875427246, "logits/rejected": -2.232083797454834, "logps/chosen": -190.82818603515625, "logps/rejected": -1339.959716796875, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -1.1801660060882568, "rewards/margins": 11.772756576538086, "rewards/rejected": -12.952923774719238, "step": 11320 }, { "epoch": 0.68, "learning_rate": 1.438622673391537e-06, "logits/chosen": -2.639448881149292, "logits/rejected": -2.1928868293762207, "logps/chosen": -209.60018920898438, "logps/rejected": -1266.0494384765625, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -1.4015882015228271, "rewards/margins": 10.827807426452637, "rewards/rejected": -12.229395866394043, "step": 11330 }, { "epoch": 0.68, "learning_rate": 1.4339135026306738e-06, "logits/chosen": -2.656280994415283, "logits/rejected": -2.2428205013275146, "logps/chosen": -206.0610809326172, "logps/rejected": -1218.919189453125, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -1.3237788677215576, "rewards/margins": 10.418710708618164, "rewards/rejected": -11.7424898147583, "step": 11340 }, { "epoch": 0.68, "learning_rate": 1.4292089507945655e-06, "logits/chosen": -2.6868033409118652, "logits/rejected": -2.2751266956329346, "logps/chosen": -196.91854858398438, "logps/rejected": -1246.6492919921875, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -1.2253191471099854, "rewards/margins": 10.809341430664062, "rewards/rejected": -12.034660339355469, "step": 11350 }, { "epoch": 0.68, "learning_rate": 1.424509038266143e-06, "logits/chosen": -2.6107964515686035, "logits/rejected": -2.265533685684204, "logps/chosen": -184.57809448242188, "logps/rejected": -1329.9722900390625, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -1.164617657661438, "rewards/margins": 11.694311141967773, "rewards/rejected": -12.858929634094238, "step": 11360 }, { "epoch": 0.68, "learning_rate": 1.4198137854082443e-06, "logits/chosen": -2.7021453380584717, "logits/rejected": -2.2715065479278564, "logps/chosen": -210.5146026611328, "logps/rejected": -1284.7017822265625, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -1.3556597232818604, "rewards/margins": 11.051874160766602, "rewards/rejected": -12.407533645629883, "step": 11370 }, { "epoch": 0.68, "learning_rate": 1.4151232125635123e-06, "logits/chosen": -2.7190427780151367, "logits/rejected": -2.270233154296875, "logps/chosen": -188.6830596923828, "logps/rejected": -1210.3553466796875, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -1.1461411714553833, "rewards/margins": 10.506145477294922, "rewards/rejected": -11.652286529541016, "step": 11380 }, { "epoch": 0.68, "learning_rate": 1.4104373400543162e-06, "logits/chosen": -2.725419044494629, "logits/rejected": -2.337550163269043, "logps/chosen": -198.15994262695312, "logps/rejected": -1274.6337890625, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -1.297153115272522, "rewards/margins": 10.995423316955566, "rewards/rejected": -12.29257583618164, "step": 11390 }, { "epoch": 0.68, "learning_rate": 1.405756188182661e-06, "logits/chosen": -2.7237181663513184, "logits/rejected": -2.2582039833068848, "logps/chosen": -203.4135284423828, "logps/rejected": -1403.0826416015625, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -1.3156065940856934, "rewards/margins": 12.275812149047852, "rewards/rejected": -13.59142017364502, "step": 11400 }, { "epoch": 0.68, "learning_rate": 1.4010797772300972e-06, "logits/chosen": -2.6992669105529785, "logits/rejected": -2.2757856845855713, "logps/chosen": -180.81820678710938, "logps/rejected": -1240.244384765625, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": -1.1167184114456177, "rewards/margins": 10.83329963684082, "rewards/rejected": -11.950017929077148, "step": 11410 }, { "epoch": 0.68, "learning_rate": 1.396408127457637e-06, "logits/chosen": -2.696956157684326, "logits/rejected": -2.2336435317993164, "logps/chosen": -185.38070678710938, "logps/rejected": -1322.981201171875, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -1.144913673400879, "rewards/margins": 11.636453628540039, "rewards/rejected": -12.781367301940918, "step": 11420 }, { "epoch": 0.68, "learning_rate": 1.3917412591056623e-06, "logits/chosen": -2.71138596534729, "logits/rejected": -2.244676113128662, "logps/chosen": -190.50401306152344, "logps/rejected": -1354.371826171875, "loss": 0.0492, "rewards/accuracies": 1.0, "rewards/chosen": -1.1792914867401123, "rewards/margins": 11.913422584533691, "rewards/rejected": -13.0927152633667, "step": 11430 }, { "epoch": 0.68, "learning_rate": 1.3870791923938408e-06, "logits/chosen": -2.699059009552002, "logits/rejected": -2.295976161956787, "logps/chosen": -206.33114624023438, "logps/rejected": -1343.3280029296875, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -1.3789557218551636, "rewards/margins": 11.613662719726562, "rewards/rejected": -12.992619514465332, "step": 11440 }, { "epoch": 0.68, "learning_rate": 1.3824219475210337e-06, "logits/chosen": -2.6734960079193115, "logits/rejected": -2.2320971488952637, "logps/chosen": -213.85781860351562, "logps/rejected": -1303.5487060546875, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -1.4375171661376953, "rewards/margins": 11.161863327026367, "rewards/rejected": -12.599379539489746, "step": 11450 }, { "epoch": 0.68, "learning_rate": 1.3777695446652167e-06, "logits/chosen": -2.717869758605957, "logits/rejected": -2.301353931427002, "logps/chosen": -196.81044006347656, "logps/rejected": -1369.522216796875, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -1.2375197410583496, "rewards/margins": 12.009973526000977, "rewards/rejected": -13.2474946975708, "step": 11460 }, { "epoch": 0.68, "learning_rate": 1.3731220039833798e-06, "logits/chosen": -2.6929969787597656, "logits/rejected": -2.3108010292053223, "logps/chosen": -204.46914672851562, "logps/rejected": -1271.9449462890625, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -1.300736665725708, "rewards/margins": 10.98784065246582, "rewards/rejected": -12.288576126098633, "step": 11470 }, { "epoch": 0.68, "learning_rate": 1.3684793456114526e-06, "logits/chosen": -2.7030506134033203, "logits/rejected": -2.258239269256592, "logps/chosen": -202.55868530273438, "logps/rejected": -1337.636962890625, "loss": 0.0226, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -1.3218635320663452, "rewards/margins": 11.600619316101074, "rewards/rejected": -12.922483444213867, "step": 11480 }, { "epoch": 0.69, "learning_rate": 1.3638415896642093e-06, "logits/chosen": -2.6755852699279785, "logits/rejected": -2.2529449462890625, "logps/chosen": -218.40133666992188, "logps/rejected": -1291.953369140625, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -1.488645315170288, "rewards/margins": 10.97671127319336, "rewards/rejected": -12.465356826782227, "step": 11490 }, { "epoch": 0.69, "learning_rate": 1.359208756235184e-06, "logits/chosen": -2.717275619506836, "logits/rejected": -2.291421413421631, "logps/chosen": -183.0303497314453, "logps/rejected": -1367.19482421875, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -1.109860897064209, "rewards/margins": 12.122903823852539, "rewards/rejected": -13.232765197753906, "step": 11500 }, { "epoch": 0.69, "learning_rate": 1.3545808653965847e-06, "logits/chosen": -2.671726703643799, "logits/rejected": -2.3693349361419678, "logps/chosen": -207.83706665039062, "logps/rejected": -1266.651123046875, "loss": 0.0109, "rewards/accuracies": 1.0, "rewards/chosen": -1.3972653150558472, "rewards/margins": 10.807889938354492, "rewards/rejected": -12.205156326293945, "step": 11510 }, { "epoch": 0.69, "learning_rate": 1.349957937199204e-06, "logits/chosen": -2.699586868286133, "logits/rejected": -2.343557357788086, "logps/chosen": -197.34967041015625, "logps/rejected": -1186.7872314453125, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -1.2625820636749268, "rewards/margins": 10.151843070983887, "rewards/rejected": -11.414424896240234, "step": 11520 }, { "epoch": 0.69, "learning_rate": 1.3453399916723343e-06, "logits/chosen": -2.6766464710235596, "logits/rejected": -2.144011974334717, "logps/chosen": -180.8981475830078, "logps/rejected": -1183.130126953125, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -1.1016746759414673, "rewards/margins": 10.29432201385498, "rewards/rejected": -11.395998001098633, "step": 11530 }, { "epoch": 0.69, "learning_rate": 1.3407270488236769e-06, "logits/chosen": -2.724325180053711, "logits/rejected": -2.2858035564422607, "logps/chosen": -197.6632843017578, "logps/rejected": -1284.21435546875, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -1.2183350324630737, "rewards/margins": 11.170289993286133, "rewards/rejected": -12.388626098632812, "step": 11540 }, { "epoch": 0.69, "learning_rate": 1.3361191286392644e-06, "logits/chosen": -2.7123208045959473, "logits/rejected": -2.321998119354248, "logps/chosen": -211.4967498779297, "logps/rejected": -1219.271728515625, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -1.4731355905532837, "rewards/margins": 10.281391143798828, "rewards/rejected": -11.754526138305664, "step": 11550 }, { "epoch": 0.69, "learning_rate": 1.3315162510833623e-06, "logits/chosen": -2.707472324371338, "logits/rejected": -2.3292603492736816, "logps/chosen": -189.14869689941406, "logps/rejected": -1366.7987060546875, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -1.2080543041229248, "rewards/margins": 12.017338752746582, "rewards/rejected": -13.225393295288086, "step": 11560 }, { "epoch": 0.69, "learning_rate": 1.3269184360983919e-06, "logits/chosen": -2.6877353191375732, "logits/rejected": -2.286729574203491, "logps/chosen": -219.62985229492188, "logps/rejected": -1284.7747802734375, "loss": 0.0088, "rewards/accuracies": 1.0, "rewards/chosen": -1.499387502670288, "rewards/margins": 10.899397850036621, "rewards/rejected": -12.398786544799805, "step": 11570 }, { "epoch": 0.69, "learning_rate": 1.3223257036048395e-06, "logits/chosen": -2.7114851474761963, "logits/rejected": -2.2053208351135254, "logps/chosen": -176.20571899414062, "logps/rejected": -1250.053955078125, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -0.9813610911369324, "rewards/margins": 11.068108558654785, "rewards/rejected": -12.049469947814941, "step": 11580 }, { "epoch": 0.69, "learning_rate": 1.3177380735011714e-06, "logits/chosen": -2.6327686309814453, "logits/rejected": -2.1458797454833984, "logps/chosen": -182.25143432617188, "logps/rejected": -1309.0238037109375, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -1.0889462232589722, "rewards/margins": 11.559961318969727, "rewards/rejected": -12.648905754089355, "step": 11590 }, { "epoch": 0.69, "learning_rate": 1.3131555656637459e-06, "logits/chosen": -2.6530749797821045, "logits/rejected": -2.1939406394958496, "logps/chosen": -182.24398803710938, "logps/rejected": -1201.700927734375, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -1.08091139793396, "rewards/margins": 10.48482894897461, "rewards/rejected": -11.565740585327148, "step": 11600 }, { "epoch": 0.69, "learning_rate": 1.3085781999467303e-06, "logits/chosen": -2.6596662998199463, "logits/rejected": -2.253260374069214, "logps/chosen": -164.27633666992188, "logps/rejected": -1256.12744140625, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -0.8979629278182983, "rewards/margins": 11.229085922241211, "rewards/rejected": -12.127047538757324, "step": 11610 }, { "epoch": 0.69, "learning_rate": 1.3040059961820135e-06, "logits/chosen": -2.669900417327881, "logits/rejected": -2.29205584526062, "logps/chosen": -144.73570251464844, "logps/rejected": -1288.7686767578125, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -0.7337912321090698, "rewards/margins": 11.727774620056152, "rewards/rejected": -12.461565971374512, "step": 11620 }, { "epoch": 0.69, "learning_rate": 1.2994389741791152e-06, "logits/chosen": -2.6438167095184326, "logits/rejected": -2.2469024658203125, "logps/chosen": -177.0650634765625, "logps/rejected": -1190.037353515625, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -1.0854206085205078, "rewards/margins": 10.371801376342773, "rewards/rejected": -11.457222938537598, "step": 11630 }, { "epoch": 0.69, "learning_rate": 1.294877153725112e-06, "logits/chosen": -2.6704092025756836, "logits/rejected": -2.285759210586548, "logps/chosen": -194.0321807861328, "logps/rejected": -1307.4847412109375, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -1.2456846237182617, "rewards/margins": 11.391227722167969, "rewards/rejected": -12.636914253234863, "step": 11640 }, { "epoch": 0.69, "learning_rate": 1.2903205545845378e-06, "logits/chosen": -2.669640064239502, "logits/rejected": -2.2860941886901855, "logps/chosen": -172.52822875976562, "logps/rejected": -1235.365966796875, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -1.0323947668075562, "rewards/margins": 10.878218650817871, "rewards/rejected": -11.910614013671875, "step": 11650 }, { "epoch": 0.7, "learning_rate": 1.285769196499308e-06, "logits/chosen": -2.7038121223449707, "logits/rejected": -2.2927186489105225, "logps/chosen": -184.20726013183594, "logps/rejected": -1362.823974609375, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -1.0956871509552002, "rewards/margins": 12.08732795715332, "rewards/rejected": -13.183016777038574, "step": 11660 }, { "epoch": 0.7, "learning_rate": 1.28122309918863e-06, "logits/chosen": -2.6596062183380127, "logits/rejected": -2.2318673133850098, "logps/chosen": -193.51368713378906, "logps/rejected": -1250.417236328125, "loss": 0.0161, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -1.188525915145874, "rewards/margins": 10.858878135681152, "rewards/rejected": -12.047405242919922, "step": 11670 }, { "epoch": 0.7, "learning_rate": 1.2766822823489175e-06, "logits/chosen": -2.667905330657959, "logits/rejected": -2.2183101177215576, "logps/chosen": -176.84426879882812, "logps/rejected": -1153.181884765625, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -1.0317151546478271, "rewards/margins": 10.05436897277832, "rewards/rejected": -11.086084365844727, "step": 11680 }, { "epoch": 0.7, "learning_rate": 1.2721467656537074e-06, "logits/chosen": -2.7191903591156006, "logits/rejected": -2.2563586235046387, "logps/chosen": -167.41163635253906, "logps/rejected": -1297.9930419921875, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.9730628132820129, "rewards/margins": 11.55770492553711, "rewards/rejected": -12.530767440795898, "step": 11690 }, { "epoch": 0.7, "learning_rate": 1.2676165687535719e-06, "logits/chosen": -2.689013957977295, "logits/rejected": -2.3450937271118164, "logps/chosen": -152.34751892089844, "logps/rejected": -1224.363525390625, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": -0.807723343372345, "rewards/margins": 10.987044334411621, "rewards/rejected": -11.794767379760742, "step": 11700 }, { "epoch": 0.7, "learning_rate": 1.2630917112760365e-06, "logits/chosen": -2.687070369720459, "logits/rejected": -2.2182226181030273, "logps/chosen": -178.33847045898438, "logps/rejected": -1239.14111328125, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -1.0956488847732544, "rewards/margins": 10.834636688232422, "rewards/rejected": -11.930285453796387, "step": 11710 }, { "epoch": 0.7, "learning_rate": 1.2585722128254896e-06, "logits/chosen": -2.6702404022216797, "logits/rejected": -2.2642807960510254, "logps/chosen": -164.9624481201172, "logps/rejected": -1234.2159423828125, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -0.9125876426696777, "rewards/margins": 10.986547470092773, "rewards/rejected": -11.899134635925293, "step": 11720 }, { "epoch": 0.7, "learning_rate": 1.2540580929831065e-06, "logits/chosen": -2.6474432945251465, "logits/rejected": -2.1999526023864746, "logps/chosen": -158.49305725097656, "logps/rejected": -1199.693115234375, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -0.8346771001815796, "rewards/margins": 10.70768928527832, "rewards/rejected": -11.542366981506348, "step": 11730 }, { "epoch": 0.7, "learning_rate": 1.249549371306753e-06, "logits/chosen": -2.678654193878174, "logits/rejected": -2.2136974334716797, "logps/chosen": -185.2541961669922, "logps/rejected": -1230.862548828125, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -1.113336205482483, "rewards/margins": 10.734987258911133, "rewards/rejected": -11.848322868347168, "step": 11740 }, { "epoch": 0.7, "learning_rate": 1.2450460673309115e-06, "logits/chosen": -2.6717171669006348, "logits/rejected": -2.2952756881713867, "logps/chosen": -163.7808380126953, "logps/rejected": -1220.9493408203125, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -0.9566238522529602, "rewards/margins": 10.802164077758789, "rewards/rejected": -11.758787155151367, "step": 11750 }, { "epoch": 0.7, "learning_rate": 1.2405482005665894e-06, "logits/chosen": -2.658249616622925, "logits/rejected": -2.154463291168213, "logps/chosen": -178.73484802246094, "logps/rejected": -1264.4908447265625, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -1.0500277280807495, "rewards/margins": 11.150541305541992, "rewards/rejected": -12.200569152832031, "step": 11760 }, { "epoch": 0.7, "learning_rate": 1.236055790501238e-06, "logits/chosen": -2.7080893516540527, "logits/rejected": -2.1892635822296143, "logps/chosen": -167.18630981445312, "logps/rejected": -1232.641845703125, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -0.9356141090393066, "rewards/margins": 10.950101852416992, "rewards/rejected": -11.885717391967773, "step": 11770 }, { "epoch": 0.7, "learning_rate": 1.231568856598666e-06, "logits/chosen": -2.7048535346984863, "logits/rejected": -2.2809369564056396, "logps/chosen": -169.42620849609375, "logps/rejected": -1253.554931640625, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -1.000076174736023, "rewards/margins": 11.080299377441406, "rewards/rejected": -12.080375671386719, "step": 11780 }, { "epoch": 0.7, "learning_rate": 1.2270874182989566e-06, "logits/chosen": -2.6488308906555176, "logits/rejected": -2.161964178085327, "logps/chosen": -157.1221923828125, "logps/rejected": -1067.824951171875, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -0.9346389770507812, "rewards/margins": 9.307622909545898, "rewards/rejected": -10.24226188659668, "step": 11790 }, { "epoch": 0.7, "learning_rate": 1.2226114950183836e-06, "logits/chosen": -2.6762447357177734, "logits/rejected": -2.2160847187042236, "logps/chosen": -166.7312469482422, "logps/rejected": -1209.5733642578125, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -0.9776681661605835, "rewards/margins": 10.67459774017334, "rewards/rejected": -11.652264595031738, "step": 11800 }, { "epoch": 0.7, "learning_rate": 1.2181411061493229e-06, "logits/chosen": -2.6886744499206543, "logits/rejected": -2.1918797492980957, "logps/chosen": -164.03114318847656, "logps/rejected": -1207.5887451171875, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -0.9666979908943176, "rewards/margins": 10.65786361694336, "rewards/rejected": -11.624561309814453, "step": 11810 }, { "epoch": 0.7, "learning_rate": 1.213676271060178e-06, "logits/chosen": -2.6682701110839844, "logits/rejected": -2.210137128829956, "logps/chosen": -157.25338745117188, "logps/rejected": -1236.2374267578125, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -0.8700025677680969, "rewards/margins": 11.043835639953613, "rewards/rejected": -11.913838386535645, "step": 11820 }, { "epoch": 0.71, "learning_rate": 1.2092170090952838e-06, "logits/chosen": -2.681671380996704, "logits/rejected": -2.2425122261047363, "logps/chosen": -185.64088439941406, "logps/rejected": -1307.0732421875, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -1.1284072399139404, "rewards/margins": 11.496757507324219, "rewards/rejected": -12.625164031982422, "step": 11830 }, { "epoch": 0.71, "learning_rate": 1.204763339574833e-06, "logits/chosen": -2.6535892486572266, "logits/rejected": -2.2164878845214844, "logps/chosen": -172.20343017578125, "logps/rejected": -1361.5809326171875, "loss": 0.0183, "rewards/accuracies": 1.0, "rewards/chosen": -1.0151000022888184, "rewards/margins": 12.163667678833008, "rewards/rejected": -13.178767204284668, "step": 11840 }, { "epoch": 0.71, "learning_rate": 1.2003152817947878e-06, "logits/chosen": -2.667032480239868, "logits/rejected": -2.200667381286621, "logps/chosen": -163.82644653320312, "logps/rejected": -1169.5721435546875, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -0.92219078540802, "rewards/margins": 10.342397689819336, "rewards/rejected": -11.264589309692383, "step": 11850 }, { "epoch": 0.71, "learning_rate": 1.1958728550267958e-06, "logits/chosen": -2.668520927429199, "logits/rejected": -2.2707881927490234, "logps/chosen": -161.77818298339844, "logps/rejected": -1139.8802490234375, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": -0.9035550355911255, "rewards/margins": 10.056806564331055, "rewards/rejected": -10.96036148071289, "step": 11860 }, { "epoch": 0.71, "learning_rate": 1.1914360785181099e-06, "logits/chosen": -2.6910767555236816, "logits/rejected": -2.2122721672058105, "logps/chosen": -174.186767578125, "logps/rejected": -1055.9832763671875, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": -1.065211534500122, "rewards/margins": 9.051030158996582, "rewards/rejected": -10.116240501403809, "step": 11870 }, { "epoch": 0.71, "learning_rate": 1.1870049714915e-06, "logits/chosen": -2.709481716156006, "logits/rejected": -2.3278183937072754, "logps/chosen": -181.68829345703125, "logps/rejected": -1392.965087890625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.0728803873062134, "rewards/margins": 12.423988342285156, "rewards/rejected": -13.496870040893555, "step": 11880 }, { "epoch": 0.71, "learning_rate": 1.182579553145175e-06, "logits/chosen": -2.6649162769317627, "logits/rejected": -2.2277672290802, "logps/chosen": -158.9559326171875, "logps/rejected": -1329.152099609375, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -0.8659127950668335, "rewards/margins": 11.969205856323242, "rewards/rejected": -12.835118293762207, "step": 11890 }, { "epoch": 0.71, "learning_rate": 1.1781598426526935e-06, "logits/chosen": -2.6503052711486816, "logits/rejected": -2.2765793800354004, "logps/chosen": -164.4527130126953, "logps/rejected": -1314.384033203125, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -1.0304515361785889, "rewards/margins": 11.653807640075684, "rewards/rejected": -12.684259414672852, "step": 11900 }, { "epoch": 0.71, "learning_rate": 1.1737458591628898e-06, "logits/chosen": -2.6715242862701416, "logits/rejected": -2.2028393745422363, "logps/chosen": -160.84860229492188, "logps/rejected": -1242.447509765625, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -0.8587220311164856, "rewards/margins": 11.128213882446289, "rewards/rejected": -11.986934661865234, "step": 11910 }, { "epoch": 0.71, "learning_rate": 1.1693376217997795e-06, "logits/chosen": -2.678063154220581, "logits/rejected": -2.161362409591675, "logps/chosen": -193.4080352783203, "logps/rejected": -1244.3980712890625, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/chosen": -1.2295480966567993, "rewards/margins": 10.776708602905273, "rewards/rejected": -12.006256103515625, "step": 11920 }, { "epoch": 0.71, "learning_rate": 1.164935149662485e-06, "logits/chosen": -2.644008159637451, "logits/rejected": -2.2190909385681152, "logps/chosen": -179.3295440673828, "logps/rejected": -1232.16015625, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -1.1008204221725464, "rewards/margins": 10.773138999938965, "rewards/rejected": -11.873958587646484, "step": 11930 }, { "epoch": 0.71, "learning_rate": 1.1605384618251533e-06, "logits/chosen": -2.699425220489502, "logits/rejected": -2.18827486038208, "logps/chosen": -170.82801818847656, "logps/rejected": -1274.0860595703125, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -0.9689435958862305, "rewards/margins": 11.329154014587402, "rewards/rejected": -12.298096656799316, "step": 11940 }, { "epoch": 0.71, "learning_rate": 1.156147577336865e-06, "logits/chosen": -2.678511142730713, "logits/rejected": -2.2204604148864746, "logps/chosen": -176.87564086914062, "logps/rejected": -1340.9168701171875, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -1.1032963991165161, "rewards/margins": 11.873059272766113, "rewards/rejected": -12.976354598999023, "step": 11950 }, { "epoch": 0.71, "learning_rate": 1.1517625152215603e-06, "logits/chosen": -2.673372983932495, "logits/rejected": -2.216002941131592, "logps/chosen": -192.49183654785156, "logps/rejected": -1290.7626953125, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -1.1558568477630615, "rewards/margins": 11.306575775146484, "rewards/rejected": -12.462432861328125, "step": 11960 }, { "epoch": 0.71, "learning_rate": 1.1473832944779525e-06, "logits/chosen": -2.6785974502563477, "logits/rejected": -2.2316765785217285, "logps/chosen": -190.4306182861328, "logps/rejected": -1287.8900146484375, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -1.2031124830245972, "rewards/margins": 11.224150657653809, "rewards/rejected": -12.427265167236328, "step": 11970 }, { "epoch": 0.71, "learning_rate": 1.1430099340794482e-06, "logits/chosen": -2.65739107131958, "logits/rejected": -2.273519992828369, "logps/chosen": -191.32949829101562, "logps/rejected": -1294.4039306640625, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -1.2415682077407837, "rewards/margins": 11.256373405456543, "rewards/rejected": -12.497941970825195, "step": 11980 }, { "epoch": 0.71, "learning_rate": 1.138642452974059e-06, "logits/chosen": -2.67218017578125, "logits/rejected": -2.2821950912475586, "logps/chosen": -177.0503692626953, "logps/rejected": -1250.2603759765625, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -1.095555067062378, "rewards/margins": 10.965291976928711, "rewards/rejected": -12.060847282409668, "step": 11990 }, { "epoch": 0.72, "learning_rate": 1.1342808700843297e-06, "logits/chosen": -2.6717300415039062, "logits/rejected": -2.182730197906494, "logps/chosen": -184.40335083007812, "logps/rejected": -1261.567138671875, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -1.1220561265945435, "rewards/margins": 11.054956436157227, "rewards/rejected": -12.17701244354248, "step": 12000 }, { "epoch": 0.72, "learning_rate": 1.1299252043072478e-06, "logits/chosen": -2.6794955730438232, "logits/rejected": -2.216703414916992, "logps/chosen": -174.71340942382812, "logps/rejected": -1267.625244140625, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -1.072036862373352, "rewards/margins": 11.160558700561523, "rewards/rejected": -12.232595443725586, "step": 12010 }, { "epoch": 0.72, "learning_rate": 1.1255754745141617e-06, "logits/chosen": -2.6612839698791504, "logits/rejected": -2.2987060546875, "logps/chosen": -193.7748565673828, "logps/rejected": -1137.1163330078125, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -1.2622792720794678, "rewards/margins": 9.6884765625, "rewards/rejected": -10.950756072998047, "step": 12020 }, { "epoch": 0.72, "learning_rate": 1.1212316995507079e-06, "logits/chosen": -2.692817211151123, "logits/rejected": -2.246981143951416, "logps/chosen": -203.880859375, "logps/rejected": -1277.25, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -1.2494544982910156, "rewards/margins": 11.091383934020996, "rewards/rejected": -12.340837478637695, "step": 12030 }, { "epoch": 0.72, "learning_rate": 1.1168938982367162e-06, "logits/chosen": -2.6536331176757812, "logits/rejected": -2.2482407093048096, "logps/chosen": -173.14845275878906, "logps/rejected": -1227.80517578125, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -1.0356905460357666, "rewards/margins": 10.808980941772461, "rewards/rejected": -11.844671249389648, "step": 12040 }, { "epoch": 0.72, "learning_rate": 1.112562089366139e-06, "logits/chosen": -2.6278417110443115, "logits/rejected": -2.1871559619903564, "logps/chosen": -211.38430786132812, "logps/rejected": -1264.448486328125, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -1.3910199403762817, "rewards/margins": 10.821457862854004, "rewards/rejected": -12.212477684020996, "step": 12050 }, { "epoch": 0.72, "learning_rate": 1.108236291706965e-06, "logits/chosen": -2.645273447036743, "logits/rejected": -2.227074384689331, "logps/chosen": -195.989501953125, "logps/rejected": -1285.885498046875, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -1.260510802268982, "rewards/margins": 11.161626815795898, "rewards/rejected": -12.422136306762695, "step": 12060 }, { "epoch": 0.72, "learning_rate": 1.1039165240011388e-06, "logits/chosen": -2.681525230407715, "logits/rejected": -2.178351640701294, "logps/chosen": -190.31344604492188, "logps/rejected": -1220.031982421875, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -1.1926311254501343, "rewards/margins": 10.562582015991211, "rewards/rejected": -11.755213737487793, "step": 12070 }, { "epoch": 0.72, "learning_rate": 1.0996028049644792e-06, "logits/chosen": -2.662846326828003, "logits/rejected": -2.2142162322998047, "logps/chosen": -214.1508331298828, "logps/rejected": -1111.049072265625, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -1.4760183095932007, "rewards/margins": 9.213679313659668, "rewards/rejected": -10.689699172973633, "step": 12080 }, { "epoch": 0.72, "learning_rate": 1.095295153286599e-06, "logits/chosen": -2.6866848468780518, "logits/rejected": -2.2753748893737793, "logps/chosen": -183.940185546875, "logps/rejected": -1207.918212890625, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -1.1320769786834717, "rewards/margins": 10.512657165527344, "rewards/rejected": -11.644734382629395, "step": 12090 }, { "epoch": 0.72, "learning_rate": 1.090993587630824e-06, "logits/chosen": -2.6410410404205322, "logits/rejected": -2.1447455883026123, "logps/chosen": -217.2635498046875, "logps/rejected": -1309.773681640625, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -1.434149980545044, "rewards/margins": 11.219610214233398, "rewards/rejected": -12.653759956359863, "step": 12100 }, { "epoch": 0.72, "learning_rate": 1.0866981266341084e-06, "logits/chosen": -2.7037551403045654, "logits/rejected": -2.225490093231201, "logps/chosen": -206.97787475585938, "logps/rejected": -1398.537109375, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -1.2295153141021729, "rewards/margins": 12.304937362670898, "rewards/rejected": -13.534452438354492, "step": 12110 }, { "epoch": 0.72, "learning_rate": 1.082408788906964e-06, "logits/chosen": -2.627535581588745, "logits/rejected": -2.1925041675567627, "logps/chosen": -175.8035125732422, "logps/rejected": -1310.95458984375, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -1.082283616065979, "rewards/margins": 11.59314250946045, "rewards/rejected": -12.67542552947998, "step": 12120 }, { "epoch": 0.72, "learning_rate": 1.078125593033366e-06, "logits/chosen": -2.6692299842834473, "logits/rejected": -2.1882576942443848, "logps/chosen": -199.92593383789062, "logps/rejected": -1419.90625, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -1.2494189739227295, "rewards/margins": 12.493829727172852, "rewards/rejected": -13.743250846862793, "step": 12130 }, { "epoch": 0.72, "learning_rate": 1.0738485575706834e-06, "logits/chosen": -2.676517963409424, "logits/rejected": -2.2814550399780273, "logps/chosen": -197.37060546875, "logps/rejected": -1307.5328369140625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.236182689666748, "rewards/margins": 11.39842414855957, "rewards/rejected": -12.63460636138916, "step": 12140 }, { "epoch": 0.72, "learning_rate": 1.0695777010495936e-06, "logits/chosen": -2.6884961128234863, "logits/rejected": -2.252476930618286, "logps/chosen": -201.0217742919922, "logps/rejected": -1290.505126953125, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -1.2830235958099365, "rewards/margins": 11.179656028747559, "rewards/rejected": -12.462679862976074, "step": 12150 }, { "epoch": 0.73, "learning_rate": 1.065313041974003e-06, "logits/chosen": -2.639547824859619, "logits/rejected": -2.2105681896209717, "logps/chosen": -181.94541931152344, "logps/rejected": -1448.171142578125, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -1.108457326889038, "rewards/margins": 12.92326545715332, "rewards/rejected": -14.031723022460938, "step": 12160 }, { "epoch": 0.73, "learning_rate": 1.0610545988209671e-06, "logits/chosen": -2.7427210807800293, "logits/rejected": -2.293287992477417, "logps/chosen": -202.73580932617188, "logps/rejected": -1285.069580078125, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -1.3460676670074463, "rewards/margins": 11.057480812072754, "rewards/rejected": -12.403548240661621, "step": 12170 }, { "epoch": 0.73, "learning_rate": 1.0568023900406108e-06, "logits/chosen": -2.691849708557129, "logits/rejected": -2.2438807487487793, "logps/chosen": -186.82894897460938, "logps/rejected": -1246.1009521484375, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -1.0914620161056519, "rewards/margins": 10.9232177734375, "rewards/rejected": -12.014680862426758, "step": 12180 }, { "epoch": 0.73, "learning_rate": 1.0525564340560476e-06, "logits/chosen": -2.6386990547180176, "logits/rejected": -2.1966171264648438, "logps/chosen": -185.1238250732422, "logps/rejected": -1255.1148681640625, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -1.2038465738296509, "rewards/margins": 10.910414695739746, "rewards/rejected": -12.114261627197266, "step": 12190 }, { "epoch": 0.73, "learning_rate": 1.048316749263298e-06, "logits/chosen": -2.6191465854644775, "logits/rejected": -2.2068686485290527, "logps/chosen": -237.9846649169922, "logps/rejected": -1279.3477783203125, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -1.7072252035140991, "rewards/margins": 10.642881393432617, "rewards/rejected": -12.350106239318848, "step": 12200 }, { "epoch": 0.73, "learning_rate": 1.044083354031217e-06, "logits/chosen": -2.6836929321289062, "logits/rejected": -2.289625406265259, "logps/chosen": -184.58509826660156, "logps/rejected": -1135.781982421875, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -1.1118168830871582, "rewards/margins": 9.80505657196045, "rewards/rejected": -10.916872024536133, "step": 12210 }, { "epoch": 0.73, "learning_rate": 1.039856266701404e-06, "logits/chosen": -2.6601803302764893, "logits/rejected": -2.189298152923584, "logps/chosen": -190.9080352783203, "logps/rejected": -1322.838623046875, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -1.1929147243499756, "rewards/margins": 11.594941139221191, "rewards/rejected": -12.78785514831543, "step": 12220 }, { "epoch": 0.73, "learning_rate": 1.035635505588132e-06, "logits/chosen": -2.6691157817840576, "logits/rejected": -2.235931873321533, "logps/chosen": -166.89657592773438, "logps/rejected": -1178.704833984375, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -1.0135897397994995, "rewards/margins": 10.321293830871582, "rewards/rejected": -11.334881782531738, "step": 12230 }, { "epoch": 0.73, "learning_rate": 1.0314210889782642e-06, "logits/chosen": -2.6448354721069336, "logits/rejected": -2.2196128368377686, "logps/chosen": -188.511474609375, "logps/rejected": -1234.0687255859375, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -1.2140707969665527, "rewards/margins": 10.684258460998535, "rewards/rejected": -11.898329734802246, "step": 12240 }, { "epoch": 0.73, "learning_rate": 1.0272130351311758e-06, "logits/chosen": -2.6663403511047363, "logits/rejected": -2.2466037273406982, "logps/chosen": -189.37844848632812, "logps/rejected": -1187.046142578125, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/chosen": -1.1639484167099, "rewards/margins": 10.26215648651123, "rewards/rejected": -11.426103591918945, "step": 12250 }, { "epoch": 0.73, "learning_rate": 1.0230113622786744e-06, "logits/chosen": -2.6671876907348633, "logits/rejected": -2.3231263160705566, "logps/chosen": -179.62448120117188, "logps/rejected": -1230.8353271484375, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -1.0858711004257202, "rewards/margins": 10.794492721557617, "rewards/rejected": -11.880363464355469, "step": 12260 }, { "epoch": 0.73, "learning_rate": 1.0188160886249219e-06, "logits/chosen": -2.6754186153411865, "logits/rejected": -2.2237744331359863, "logps/chosen": -164.10980224609375, "logps/rejected": -1253.9830322265625, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/chosen": -0.9184304475784302, "rewards/margins": 11.18305778503418, "rewards/rejected": -12.101489067077637, "step": 12270 }, { "epoch": 0.73, "learning_rate": 1.0146272323463548e-06, "logits/chosen": -2.6572108268737793, "logits/rejected": -2.210970640182495, "logps/chosen": -185.42538452148438, "logps/rejected": -1337.664306640625, "loss": 0.0107, "rewards/accuracies": 1.0, "rewards/chosen": -1.1407896280288696, "rewards/margins": 11.784409523010254, "rewards/rejected": -12.925198554992676, "step": 12280 }, { "epoch": 0.73, "learning_rate": 1.0104448115916035e-06, "logits/chosen": -2.671604871749878, "logits/rejected": -2.2632572650909424, "logps/chosen": -143.889892578125, "logps/rejected": -1244.36474609375, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -0.7241929769515991, "rewards/margins": 11.282548904418945, "rewards/rejected": -12.006742477416992, "step": 12290 }, { "epoch": 0.73, "learning_rate": 1.0062688444814208e-06, "logits/chosen": -2.6593387126922607, "logits/rejected": -2.259366512298584, "logps/chosen": -183.2617950439453, "logps/rejected": -1187.4873046875, "loss": 0.0126, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -1.1031699180603027, "rewards/margins": 10.335807800292969, "rewards/rejected": -11.438979148864746, "step": 12300 }, { "epoch": 0.73, "learning_rate": 1.0020993491085936e-06, "logits/chosen": -2.671769618988037, "logits/rejected": -2.2467968463897705, "logps/chosen": -157.1591339111328, "logps/rejected": -1172.5406494140625, "loss": 0.0094, "rewards/accuracies": 1.0, "rewards/chosen": -0.8475213050842285, "rewards/margins": 10.435922622680664, "rewards/rejected": -11.283443450927734, "step": 12310 }, { "epoch": 0.73, "learning_rate": 9.979363435378717e-07, "logits/chosen": -2.658071994781494, "logits/rejected": -2.277698278427124, "logps/chosen": -155.0120849609375, "logps/rejected": -1244.922119140625, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -0.8463680148124695, "rewards/margins": 11.151788711547852, "rewards/rejected": -11.99815559387207, "step": 12320 }, { "epoch": 0.74, "learning_rate": 9.937798458058864e-07, "logits/chosen": -2.611347198486328, "logits/rejected": -2.1982204914093018, "logps/chosen": -152.5498504638672, "logps/rejected": -1221.0391845703125, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -0.8382118344306946, "rewards/margins": 10.927261352539062, "rewards/rejected": -11.765473365783691, "step": 12330 }, { "epoch": 0.74, "learning_rate": 9.896298739210745e-07, "logits/chosen": -2.6760573387145996, "logits/rejected": -2.262755870819092, "logps/chosen": -161.95468139648438, "logps/rejected": -1308.771240234375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.9251921772956848, "rewards/margins": 11.71369743347168, "rewards/rejected": -12.638891220092773, "step": 12340 }, { "epoch": 0.74, "learning_rate": 9.85486445863597e-07, "logits/chosen": -2.6713201999664307, "logits/rejected": -2.2018585205078125, "logps/chosen": -145.06082153320312, "logps/rejected": -1328.052001953125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.8253404498100281, "rewards/margins": 12.014080047607422, "rewards/rejected": -12.8394193649292, "step": 12350 }, { "epoch": 0.74, "learning_rate": 9.813495795852646e-07, "logits/chosen": -2.6409125328063965, "logits/rejected": -2.204023838043213, "logps/chosen": -168.88502502441406, "logps/rejected": -1289.7230224609375, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -1.0140244960784912, "rewards/margins": 11.437627792358398, "rewards/rejected": -12.451652526855469, "step": 12360 }, { "epoch": 0.74, "learning_rate": 9.772192930094588e-07, "logits/chosen": -2.637007474899292, "logits/rejected": -2.121833324432373, "logps/chosen": -152.52601623535156, "logps/rejected": -1168.227294921875, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -0.8251526951789856, "rewards/margins": 10.420989990234375, "rewards/rejected": -11.246142387390137, "step": 12370 }, { "epoch": 0.74, "learning_rate": 9.730956040310499e-07, "logits/chosen": -2.691771984100342, "logits/rejected": -2.2512869834899902, "logps/chosen": -174.83450317382812, "logps/rejected": -1134.830078125, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": -1.024186611175537, "rewards/margins": 9.885404586791992, "rewards/rejected": -10.909590721130371, "step": 12380 }, { "epoch": 0.74, "learning_rate": 9.689785305163307e-07, "logits/chosen": -2.664811372756958, "logits/rejected": -2.2151479721069336, "logps/chosen": -145.58462524414062, "logps/rejected": -1182.7923583984375, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -0.7275064587593079, "rewards/margins": 10.661162376403809, "rewards/rejected": -11.388669967651367, "step": 12390 }, { "epoch": 0.74, "learning_rate": 9.648680903029245e-07, "logits/chosen": -2.6662790775299072, "logits/rejected": -2.287348985671997, "logps/chosen": -143.27102661132812, "logps/rejected": -1194.017333984375, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -0.7125800251960754, "rewards/margins": 10.792291641235352, "rewards/rejected": -11.50487232208252, "step": 12400 }, { "epoch": 0.74, "learning_rate": 9.607643011997195e-07, "logits/chosen": -2.6860768795013428, "logits/rejected": -2.2764711380004883, "logps/chosen": -173.8489532470703, "logps/rejected": -1296.558837890625, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -1.0201029777526855, "rewards/margins": 11.49165153503418, "rewards/rejected": -12.511754035949707, "step": 12410 }, { "epoch": 0.74, "learning_rate": 9.566671809867864e-07, "logits/chosen": -2.6700212955474854, "logits/rejected": -2.2919955253601074, "logps/chosen": -155.58798217773438, "logps/rejected": -1162.3597412109375, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -0.8391944766044617, "rewards/margins": 10.340049743652344, "rewards/rejected": -11.179243087768555, "step": 12420 }, { "epoch": 0.74, "learning_rate": 9.52576747415302e-07, "logits/chosen": -2.665966749191284, "logits/rejected": -2.326286792755127, "logps/chosen": -150.83148193359375, "logps/rejected": -1322.6256103515625, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -0.7846275568008423, "rewards/margins": 11.986906051635742, "rewards/rejected": -12.77153491973877, "step": 12430 }, { "epoch": 0.74, "learning_rate": 9.484930182074722e-07, "logits/chosen": -2.657564640045166, "logits/rejected": -2.22395920753479, "logps/chosen": -168.1222686767578, "logps/rejected": -1227.8724365234375, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -0.9654632806777954, "rewards/margins": 10.863714218139648, "rewards/rejected": -11.829176902770996, "step": 12440 }, { "epoch": 0.74, "learning_rate": 9.444160110564563e-07, "logits/chosen": -2.677396297454834, "logits/rejected": -2.18302321434021, "logps/chosen": -149.75918579101562, "logps/rejected": -1145.447509765625, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -0.7426698207855225, "rewards/margins": 10.266037940979004, "rewards/rejected": -11.008707046508789, "step": 12450 }, { "epoch": 0.74, "learning_rate": 9.403457436262906e-07, "logits/chosen": -2.654158592224121, "logits/rejected": -2.240130662918091, "logps/chosen": -183.4116668701172, "logps/rejected": -1234.705078125, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -1.1435333490371704, "rewards/margins": 10.754674911499023, "rewards/rejected": -11.898208618164062, "step": 12460 }, { "epoch": 0.74, "learning_rate": 9.362822335518062e-07, "logits/chosen": -2.620704174041748, "logits/rejected": -2.1191720962524414, "logps/chosen": -181.8234100341797, "logps/rejected": -1332.8526611328125, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -1.0477808713912964, "rewards/margins": 11.827988624572754, "rewards/rejected": -12.875768661499023, "step": 12470 }, { "epoch": 0.74, "learning_rate": 9.322254984385651e-07, "logits/chosen": -2.674955368041992, "logits/rejected": -2.2813265323638916, "logps/chosen": -139.9225616455078, "logps/rejected": -1191.1591796875, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -0.7620293498039246, "rewards/margins": 10.708468437194824, "rewards/rejected": -11.470499038696289, "step": 12480 }, { "epoch": 0.74, "learning_rate": 9.281755558627686e-07, "logits/chosen": -2.6602911949157715, "logits/rejected": -2.1803629398345947, "logps/chosen": -158.12271118164062, "logps/rejected": -1334.6392822265625, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -0.8359683752059937, "rewards/margins": 12.070714950561523, "rewards/rejected": -12.906682968139648, "step": 12490 }, { "epoch": 0.75, "learning_rate": 9.241324233711929e-07, "logits/chosen": -2.6210074424743652, "logits/rejected": -2.2206318378448486, "logps/chosen": -152.96688842773438, "logps/rejected": -1248.0006103515625, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -0.8983484506607056, "rewards/margins": 11.138702392578125, "rewards/rejected": -12.037050247192383, "step": 12500 }, { "epoch": 0.75, "learning_rate": 9.200961184811075e-07, "logits/chosen": -2.6159567832946777, "logits/rejected": -2.2066009044647217, "logps/chosen": -181.7709197998047, "logps/rejected": -1248.8392333984375, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/chosen": -1.0818769931793213, "rewards/margins": 10.952686309814453, "rewards/rejected": -12.034563064575195, "step": 12510 }, { "epoch": 0.75, "learning_rate": 9.160666586802011e-07, "logits/chosen": -2.6807777881622314, "logits/rejected": -2.228256940841675, "logps/chosen": -150.993408203125, "logps/rejected": -1311.835205078125, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -0.825247585773468, "rewards/margins": 11.840875625610352, "rewards/rejected": -12.66612434387207, "step": 12520 }, { "epoch": 0.75, "learning_rate": 9.12044061426505e-07, "logits/chosen": -2.685821771621704, "logits/rejected": -2.308408260345459, "logps/chosen": -152.82186889648438, "logps/rejected": -1244.049560546875, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -0.8500364422798157, "rewards/margins": 11.15960693359375, "rewards/rejected": -12.009645462036133, "step": 12530 }, { "epoch": 0.75, "learning_rate": 9.080283441483182e-07, "logits/chosen": -2.6817920207977295, "logits/rejected": -2.2532148361206055, "logps/chosen": -155.13894653320312, "logps/rejected": -1286.9625244140625, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -0.8206734657287598, "rewards/margins": 11.612648963928223, "rewards/rejected": -12.433321952819824, "step": 12540 }, { "epoch": 0.75, "learning_rate": 9.040195242441322e-07, "logits/chosen": -2.658567428588867, "logits/rejected": -2.1805081367492676, "logps/chosen": -150.51686096191406, "logps/rejected": -1141.17724609375, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -0.8117367029190063, "rewards/margins": 10.165749549865723, "rewards/rejected": -10.977485656738281, "step": 12550 }, { "epoch": 0.75, "learning_rate": 9.000176190825513e-07, "logits/chosen": -2.6792659759521484, "logits/rejected": -2.2553915977478027, "logps/chosen": -167.29043579101562, "logps/rejected": -1228.302978515625, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -0.8949813842773438, "rewards/margins": 10.941363334655762, "rewards/rejected": -11.836343765258789, "step": 12560 }, { "epoch": 0.75, "learning_rate": 8.960226460022272e-07, "logits/chosen": -2.695333957672119, "logits/rejected": -2.1948001384735107, "logps/chosen": -174.39013671875, "logps/rejected": -1158.2430419921875, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -1.0396320819854736, "rewards/margins": 10.100898742675781, "rewards/rejected": -11.140531539916992, "step": 12570 }, { "epoch": 0.75, "learning_rate": 8.920346223117721e-07, "logits/chosen": -2.6415774822235107, "logits/rejected": -2.201040506362915, "logps/chosen": -153.525146484375, "logps/rejected": -1195.8531494140625, "loss": 0.0084, "rewards/accuracies": 1.0, "rewards/chosen": -0.8580840826034546, "rewards/margins": 10.666096687316895, "rewards/rejected": -11.524181365966797, "step": 12580 }, { "epoch": 0.75, "learning_rate": 8.88053565289691e-07, "logits/chosen": -2.685497760772705, "logits/rejected": -2.2530007362365723, "logps/chosen": -166.4014434814453, "logps/rejected": -1194.649169921875, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -0.9929316639900208, "rewards/margins": 10.518499374389648, "rewards/rejected": -11.511429786682129, "step": 12590 }, { "epoch": 0.75, "learning_rate": 8.840794921843085e-07, "logits/chosen": -2.624795913696289, "logits/rejected": -2.2480812072753906, "logps/chosen": -155.05343627929688, "logps/rejected": -1237.7613525390625, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -0.9196068048477173, "rewards/margins": 11.013126373291016, "rewards/rejected": -11.932733535766602, "step": 12600 }, { "epoch": 0.75, "learning_rate": 8.801124202136846e-07, "logits/chosen": -2.701706647872925, "logits/rejected": -2.2729063034057617, "logps/chosen": -159.85902404785156, "logps/rejected": -1218.8260498046875, "loss": 0.0131, "rewards/accuracies": 1.0, "rewards/chosen": -0.8510640263557434, "rewards/margins": 10.89560604095459, "rewards/rejected": -11.74666976928711, "step": 12610 }, { "epoch": 0.75, "learning_rate": 8.761523665655508e-07, "logits/chosen": -2.652740955352783, "logits/rejected": -2.2721943855285645, "logps/chosen": -161.44705200195312, "logps/rejected": -1173.760498046875, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -0.9070202112197876, "rewards/margins": 10.377522468566895, "rewards/rejected": -11.28454303741455, "step": 12620 }, { "epoch": 0.75, "learning_rate": 8.721993483972294e-07, "logits/chosen": -2.616049289703369, "logits/rejected": -2.278193950653076, "logps/chosen": -147.48715209960938, "logps/rejected": -1177.179931640625, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -0.805441677570343, "rewards/margins": 10.516280174255371, "rewards/rejected": -11.321722984313965, "step": 12630 }, { "epoch": 0.75, "learning_rate": 8.682533828355616e-07, "logits/chosen": -2.681006908416748, "logits/rejected": -2.210326671600342, "logps/chosen": -158.94662475585938, "logps/rejected": -1211.8001708984375, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -0.8334416151046753, "rewards/margins": 10.846317291259766, "rewards/rejected": -11.67975902557373, "step": 12640 }, { "epoch": 0.75, "learning_rate": 8.643144869768294e-07, "logits/chosen": -2.708228588104248, "logits/rejected": -2.373368740081787, "logps/chosen": -189.62631225585938, "logps/rejected": -1233.6568603515625, "loss": 0.0084, "rewards/accuracies": 1.0, "rewards/chosen": -1.1873180866241455, "rewards/margins": 10.70081615447998, "rewards/rejected": -11.88813591003418, "step": 12650 }, { "epoch": 0.75, "learning_rate": 8.6038267788669e-07, "logits/chosen": -2.6643872261047363, "logits/rejected": -2.256521701812744, "logps/chosen": -161.9615936279297, "logps/rejected": -1239.02880859375, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -0.860692024230957, "rewards/margins": 11.08536434173584, "rewards/rejected": -11.94605541229248, "step": 12660 }, { "epoch": 0.76, "learning_rate": 8.56457972600093e-07, "logits/chosen": -2.6932599544525146, "logits/rejected": -2.326667308807373, "logps/chosen": -141.00509643554688, "logps/rejected": -1195.75732421875, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -0.7428602576255798, "rewards/margins": 10.78742790222168, "rewards/rejected": -11.530289649963379, "step": 12670 }, { "epoch": 0.76, "learning_rate": 8.525403881212083e-07, "logits/chosen": -2.688525438308716, "logits/rejected": -2.2554612159729004, "logps/chosen": -153.75303649902344, "logps/rejected": -1194.4525146484375, "loss": 0.0127, "rewards/accuracies": 1.0, "rewards/chosen": -0.8197674751281738, "rewards/margins": 10.654682159423828, "rewards/rejected": -11.474449157714844, "step": 12680 }, { "epoch": 0.76, "learning_rate": 8.486299414233598e-07, "logits/chosen": -2.683134078979492, "logits/rejected": -2.237874984741211, "logps/chosen": -188.1017608642578, "logps/rejected": -1218.7811279296875, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -1.2083886861801147, "rewards/margins": 10.539608001708984, "rewards/rejected": -11.74799633026123, "step": 12690 }, { "epoch": 0.76, "learning_rate": 8.447266494489408e-07, "logits/chosen": -2.6903889179229736, "logits/rejected": -2.251967668533325, "logps/chosen": -163.27590942382812, "logps/rejected": -1198.499755859375, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -0.9330371618270874, "rewards/margins": 10.607423782348633, "rewards/rejected": -11.540462493896484, "step": 12700 }, { "epoch": 0.76, "learning_rate": 8.408305291093488e-07, "logits/chosen": -2.643890857696533, "logits/rejected": -2.2460498809814453, "logps/chosen": -176.0294647216797, "logps/rejected": -1271.7115478515625, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -1.0676785707473755, "rewards/margins": 11.218488693237305, "rewards/rejected": -12.286166191101074, "step": 12710 }, { "epoch": 0.76, "learning_rate": 8.369415972849087e-07, "logits/chosen": -2.6614792346954346, "logits/rejected": -2.2340197563171387, "logps/chosen": -164.5394744873047, "logps/rejected": -1240.233642578125, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -0.8873366117477417, "rewards/margins": 11.073686599731445, "rewards/rejected": -11.961023330688477, "step": 12720 }, { "epoch": 0.76, "learning_rate": 8.330598708248011e-07, "logits/chosen": -2.685056209564209, "logits/rejected": -2.2430357933044434, "logps/chosen": -182.60873413085938, "logps/rejected": -1230.0767822265625, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -1.116711139678955, "rewards/margins": 10.743387222290039, "rewards/rejected": -11.860098838806152, "step": 12730 }, { "epoch": 0.76, "learning_rate": 8.291853665469887e-07, "logits/chosen": -2.719602584838867, "logits/rejected": -2.321622133255005, "logps/chosen": -181.74244689941406, "logps/rejected": -1205.73193359375, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -1.1487635374069214, "rewards/margins": 10.465904235839844, "rewards/rejected": -11.614667892456055, "step": 12740 }, { "epoch": 0.76, "learning_rate": 8.253181012381409e-07, "logits/chosen": -2.6575121879577637, "logits/rejected": -2.2720494270324707, "logps/chosen": -195.53575134277344, "logps/rejected": -1303.7784423828125, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -1.3223384618759155, "rewards/margins": 11.268315315246582, "rewards/rejected": -12.590653419494629, "step": 12750 }, { "epoch": 0.76, "learning_rate": 8.214580916535683e-07, "logits/chosen": -2.6711089611053467, "logits/rejected": -2.2366392612457275, "logps/chosen": -180.1494598388672, "logps/rejected": -1362.3228759765625, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -1.1187859773635864, "rewards/margins": 12.059918403625488, "rewards/rejected": -13.178703308105469, "step": 12760 }, { "epoch": 0.76, "learning_rate": 8.176053545171403e-07, "logits/chosen": -2.6228363513946533, "logits/rejected": -2.222689628601074, "logps/chosen": -161.51174926757812, "logps/rejected": -1225.2271728515625, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -0.8777238130569458, "rewards/margins": 10.940903663635254, "rewards/rejected": -11.818626403808594, "step": 12770 }, { "epoch": 0.76, "learning_rate": 8.13759906521221e-07, "logits/chosen": -2.6365768909454346, "logits/rejected": -2.195507764816284, "logps/chosen": -191.83201599121094, "logps/rejected": -1180.65625, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -1.217861294746399, "rewards/margins": 10.143416404724121, "rewards/rejected": -11.361279487609863, "step": 12780 }, { "epoch": 0.76, "learning_rate": 8.099217643265928e-07, "logits/chosen": -2.6504197120666504, "logits/rejected": -2.1798901557922363, "logps/chosen": -198.8717498779297, "logps/rejected": -1325.856201171875, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -1.2890946865081787, "rewards/margins": 11.52834701538086, "rewards/rejected": -12.8174409866333, "step": 12790 }, { "epoch": 0.76, "learning_rate": 8.06090944562385e-07, "logits/chosen": -2.6366419792175293, "logits/rejected": -2.1820199489593506, "logps/chosen": -180.94772338867188, "logps/rejected": -1316.943115234375, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -1.1441292762756348, "rewards/margins": 11.587011337280273, "rewards/rejected": -12.731141090393066, "step": 12800 }, { "epoch": 0.76, "learning_rate": 8.022674638259995e-07, "logits/chosen": -2.687102794647217, "logits/rejected": -2.200025796890259, "logps/chosen": -195.7443084716797, "logps/rejected": -1190.1416015625, "loss": 0.0117, "rewards/accuracies": 1.0, "rewards/chosen": -1.2137089967727661, "rewards/margins": 10.235818862915039, "rewards/rejected": -11.449528694152832, "step": 12810 }, { "epoch": 0.76, "learning_rate": 7.984513386830453e-07, "logits/chosen": -2.696150302886963, "logits/rejected": -2.3030591011047363, "logps/chosen": -189.81515502929688, "logps/rejected": -1216.6669921875, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -1.2418229579925537, "rewards/margins": 10.495587348937988, "rewards/rejected": -11.737411499023438, "step": 12820 }, { "epoch": 0.77, "learning_rate": 7.94642585667261e-07, "logits/chosen": -2.7001521587371826, "logits/rejected": -2.1935391426086426, "logps/chosen": -200.95327758789062, "logps/rejected": -1306.6044921875, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -1.2740213871002197, "rewards/margins": 11.338061332702637, "rewards/rejected": -12.612082481384277, "step": 12830 }, { "epoch": 0.77, "learning_rate": 7.908412212804414e-07, "logits/chosen": -2.64448881149292, "logits/rejected": -2.1777243614196777, "logps/chosen": -201.76072692871094, "logps/rejected": -1281.703125, "loss": 0.0098, "rewards/accuracies": 1.0, "rewards/chosen": -1.3328297138214111, "rewards/margins": 11.045312881469727, "rewards/rejected": -12.378142356872559, "step": 12840 }, { "epoch": 0.77, "learning_rate": 7.870472619923755e-07, "logits/chosen": -2.6775288581848145, "logits/rejected": -2.235886335372925, "logps/chosen": -160.6778564453125, "logps/rejected": -1253.2672119140625, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -0.9192790985107422, "rewards/margins": 11.16749382019043, "rewards/rejected": -12.086771965026855, "step": 12850 }, { "epoch": 0.77, "learning_rate": 7.832607242407631e-07, "logits/chosen": -2.650095224380493, "logits/rejected": -2.2364020347595215, "logps/chosen": -183.8063507080078, "logps/rejected": -1209.2640380859375, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -1.0781023502349854, "rewards/margins": 10.567517280578613, "rewards/rejected": -11.64561939239502, "step": 12860 }, { "epoch": 0.77, "learning_rate": 7.794816244311526e-07, "logits/chosen": -2.6251273155212402, "logits/rejected": -2.2273685932159424, "logps/chosen": -173.90574645996094, "logps/rejected": -1335.7841796875, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -1.0581132173538208, "rewards/margins": 11.862656593322754, "rewards/rejected": -12.920770645141602, "step": 12870 }, { "epoch": 0.77, "learning_rate": 7.757099789368663e-07, "logits/chosen": -2.6846981048583984, "logits/rejected": -2.2969648838043213, "logps/chosen": -157.6529998779297, "logps/rejected": -1178.612060546875, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -0.8531044125556946, "rewards/margins": 10.486659049987793, "rewards/rejected": -11.339765548706055, "step": 12880 }, { "epoch": 0.77, "learning_rate": 7.7194580409893e-07, "logits/chosen": -2.627392292022705, "logits/rejected": -2.2261767387390137, "logps/chosen": -177.13339233398438, "logps/rejected": -1211.6153564453125, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": -1.0407990217208862, "rewards/margins": 10.635047912597656, "rewards/rejected": -11.675847053527832, "step": 12890 }, { "epoch": 0.77, "learning_rate": 7.681891162260016e-07, "logits/chosen": -2.674931764602661, "logits/rejected": -2.2001960277557373, "logps/chosen": -184.78048706054688, "logps/rejected": -1247.32763671875, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -1.0805585384368896, "rewards/margins": 10.953813552856445, "rewards/rejected": -12.034372329711914, "step": 12900 }, { "epoch": 0.77, "learning_rate": 7.644399315943016e-07, "logits/chosen": -2.6839587688446045, "logits/rejected": -2.255333185195923, "logps/chosen": -155.64060974121094, "logps/rejected": -1321.2723388671875, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -0.904283881187439, "rewards/margins": 11.863615989685059, "rewards/rejected": -12.767901420593262, "step": 12910 }, { "epoch": 0.77, "learning_rate": 7.606982664475421e-07, "logits/chosen": -2.714613199234009, "logits/rejected": -2.2923293113708496, "logps/chosen": -214.0587921142578, "logps/rejected": -1296.0406494140625, "loss": 0.0397, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -1.398858666419983, "rewards/margins": 11.094388008117676, "rewards/rejected": -12.493246078491211, "step": 12920 }, { "epoch": 0.77, "learning_rate": 7.569641369968539e-07, "logits/chosen": -2.6917829513549805, "logits/rejected": -2.2468316555023193, "logps/chosen": -170.21600341796875, "logps/rejected": -1269.312744140625, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -0.9905278086662292, "rewards/margins": 11.267845153808594, "rewards/rejected": -12.258373260498047, "step": 12930 }, { "epoch": 0.77, "learning_rate": 7.532375594207236e-07, "logits/chosen": -2.6901912689208984, "logits/rejected": -2.180147409439087, "logps/chosen": -152.74215698242188, "logps/rejected": -1350.9908447265625, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -0.7753187417984009, "rewards/margins": 12.291214942932129, "rewards/rejected": -13.066534042358398, "step": 12940 }, { "epoch": 0.77, "learning_rate": 7.495185498649132e-07, "logits/chosen": -2.669287919998169, "logits/rejected": -2.2561023235321045, "logps/chosen": -170.34580993652344, "logps/rejected": -1266.3555908203125, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -1.0135958194732666, "rewards/margins": 11.203359603881836, "rewards/rejected": -12.216954231262207, "step": 12950 }, { "epoch": 0.77, "learning_rate": 7.45807124442399e-07, "logits/chosen": -2.6803717613220215, "logits/rejected": -2.2732787132263184, "logps/chosen": -174.05111694335938, "logps/rejected": -1233.6781005859375, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -1.003154993057251, "rewards/margins": 10.904004096984863, "rewards/rejected": -11.907158851623535, "step": 12960 }, { "epoch": 0.77, "learning_rate": 7.421032992332967e-07, "logits/chosen": -2.6921281814575195, "logits/rejected": -2.278663158416748, "logps/chosen": -174.88394165039062, "logps/rejected": -1194.1551513671875, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -1.0726388692855835, "rewards/margins": 10.44238567352295, "rewards/rejected": -11.51502513885498, "step": 12970 }, { "epoch": 0.77, "learning_rate": 7.384070902847943e-07, "logits/chosen": -2.683811902999878, "logits/rejected": -2.2557904720306396, "logps/chosen": -167.99618530273438, "logps/rejected": -1221.670654296875, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -0.9177943468093872, "rewards/margins": 10.854341506958008, "rewards/rejected": -11.772135734558105, "step": 12980 }, { "epoch": 0.77, "learning_rate": 7.347185136110808e-07, "logits/chosen": -2.6894168853759766, "logits/rejected": -2.2807250022888184, "logps/chosen": -185.720947265625, "logps/rejected": -1281.350341796875, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": -1.1329227685928345, "rewards/margins": 11.245623588562012, "rewards/rejected": -12.378546714782715, "step": 12990 }, { "epoch": 0.78, "learning_rate": 7.31037585193278e-07, "logits/chosen": -2.623938798904419, "logits/rejected": -2.2227213382720947, "logps/chosen": -168.48818969726562, "logps/rejected": -1299.114501953125, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -0.9466837048530579, "rewards/margins": 11.610816955566406, "rewards/rejected": -12.557500839233398, "step": 13000 }, { "epoch": 0.78, "learning_rate": 7.273643209793719e-07, "logits/chosen": -2.6866536140441895, "logits/rejected": -2.2167487144470215, "logps/chosen": -159.50753784179688, "logps/rejected": -1251.9810791015625, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -0.847709059715271, "rewards/margins": 11.217992782592773, "rewards/rejected": -12.065701484680176, "step": 13010 }, { "epoch": 0.78, "learning_rate": 7.236987368841386e-07, "logits/chosen": -2.637845039367676, "logits/rejected": -2.1932952404022217, "logps/chosen": -179.9394073486328, "logps/rejected": -1110.22265625, "loss": 0.0082, "rewards/accuracies": 1.0, "rewards/chosen": -1.1683967113494873, "rewards/margins": 9.51075553894043, "rewards/rejected": -10.679153442382812, "step": 13020 }, { "epoch": 0.78, "learning_rate": 7.200408487890859e-07, "logits/chosen": -2.7048897743225098, "logits/rejected": -2.2351491451263428, "logps/chosen": -165.5768280029297, "logps/rejected": -1252.8206787109375, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -0.9825845956802368, "rewards/margins": 11.100459098815918, "rewards/rejected": -12.083043098449707, "step": 13030 }, { "epoch": 0.78, "learning_rate": 7.163906725423717e-07, "logits/chosen": -2.678225040435791, "logits/rejected": -2.2554025650024414, "logps/chosen": -163.45361328125, "logps/rejected": -1187.052978515625, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -0.9612115621566772, "rewards/margins": 10.474334716796875, "rewards/rejected": -11.435545921325684, "step": 13040 }, { "epoch": 0.78, "learning_rate": 7.127482239587449e-07, "logits/chosen": -2.6812033653259277, "logits/rejected": -2.2906978130340576, "logps/chosen": -183.73699951171875, "logps/rejected": -1277.0086669921875, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -1.16994309425354, "rewards/margins": 11.151533126831055, "rewards/rejected": -12.3214750289917, "step": 13050 }, { "epoch": 0.78, "learning_rate": 7.091135188194729e-07, "logits/chosen": -2.6475512981414795, "logits/rejected": -2.125187397003174, "logps/chosen": -162.56509399414062, "logps/rejected": -1275.4913330078125, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -0.9636369943618774, "rewards/margins": 11.352842330932617, "rewards/rejected": -12.316479682922363, "step": 13060 }, { "epoch": 0.78, "learning_rate": 7.054865728722732e-07, "logits/chosen": -2.695511817932129, "logits/rejected": -2.306565761566162, "logps/chosen": -179.28182983398438, "logps/rejected": -1256.1376953125, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -1.1137334108352661, "rewards/margins": 11.013031959533691, "rewards/rejected": -12.126764297485352, "step": 13070 }, { "epoch": 0.78, "learning_rate": 7.018674018312468e-07, "logits/chosen": -2.700685977935791, "logits/rejected": -2.23781156539917, "logps/chosen": -173.95602416992188, "logps/rejected": -1210.769775390625, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -0.9836994409561157, "rewards/margins": 10.681673049926758, "rewards/rejected": -11.66537094116211, "step": 13080 }, { "epoch": 0.78, "learning_rate": 6.982560213768088e-07, "logits/chosen": -2.6668457984924316, "logits/rejected": -2.3130152225494385, "logps/chosen": -196.2173614501953, "logps/rejected": -1350.684814453125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.272918462753296, "rewards/margins": 11.78793716430664, "rewards/rejected": -13.0608549118042, "step": 13090 }, { "epoch": 0.78, "learning_rate": 6.946524471556212e-07, "logits/chosen": -2.642522096633911, "logits/rejected": -2.2591443061828613, "logps/chosen": -210.7269287109375, "logps/rejected": -1289.4095458984375, "loss": 0.0351, "rewards/accuracies": 1.0, "rewards/chosen": -1.4042508602142334, "rewards/margins": 11.056662559509277, "rewards/rejected": -12.46091365814209, "step": 13100 }, { "epoch": 0.78, "learning_rate": 6.91056694780522e-07, "logits/chosen": -2.6940157413482666, "logits/rejected": -2.279496192932129, "logps/chosen": -201.9541778564453, "logps/rejected": -1185.190185546875, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": -1.2740415334701538, "rewards/margins": 10.136459350585938, "rewards/rejected": -11.410501480102539, "step": 13110 }, { "epoch": 0.78, "learning_rate": 6.874687798304657e-07, "logits/chosen": -2.677957773208618, "logits/rejected": -2.2969970703125, "logps/chosen": -170.20960998535156, "logps/rejected": -1304.2730712890625, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": -1.0024642944335938, "rewards/margins": 11.60912799835205, "rewards/rejected": -12.611592292785645, "step": 13120 }, { "epoch": 0.78, "learning_rate": 6.83888717850445e-07, "logits/chosen": -2.708543539047241, "logits/rejected": -2.2202653884887695, "logps/chosen": -172.69029235839844, "logps/rejected": -1262.724365234375, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -0.9483838081359863, "rewards/margins": 11.248501777648926, "rewards/rejected": -12.196885108947754, "step": 13130 }, { "epoch": 0.78, "learning_rate": 6.803165243514315e-07, "logits/chosen": -2.67868971824646, "logits/rejected": -2.2132647037506104, "logps/chosen": -179.4116973876953, "logps/rejected": -1266.4095458984375, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/chosen": -1.0981104373931885, "rewards/margins": 11.128485679626465, "rewards/rejected": -12.22659683227539, "step": 13140 }, { "epoch": 0.78, "learning_rate": 6.767522148103054e-07, "logits/chosen": -2.6488633155822754, "logits/rejected": -2.2347731590270996, "logps/chosen": -188.84371948242188, "logps/rejected": -1151.799560546875, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -1.1771914958953857, "rewards/margins": 9.915623664855957, "rewards/rejected": -11.092815399169922, "step": 13150 }, { "epoch": 0.78, "learning_rate": 6.731958046697893e-07, "logits/chosen": -2.726508140563965, "logits/rejected": -2.3081257343292236, "logps/chosen": -173.83941650390625, "logps/rejected": -1308.51123046875, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -0.9566122889518738, "rewards/margins": 11.688125610351562, "rewards/rejected": -12.64473819732666, "step": 13160 }, { "epoch": 0.79, "learning_rate": 6.696473093383798e-07, "logits/chosen": -2.6651060581207275, "logits/rejected": -2.238739252090454, "logps/chosen": -191.33001708984375, "logps/rejected": -1191.375244140625, "loss": 0.0153, "rewards/accuracies": 1.0, "rewards/chosen": -1.1960890293121338, "rewards/margins": 10.284440040588379, "rewards/rejected": -11.480527877807617, "step": 13170 }, { "epoch": 0.79, "learning_rate": 6.66106744190283e-07, "logits/chosen": -2.6319127082824707, "logits/rejected": -2.1692593097686768, "logps/chosen": -163.61129760742188, "logps/rejected": -1328.734130859375, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -0.9775162935256958, "rewards/margins": 11.859718322753906, "rewards/rejected": -12.837234497070312, "step": 13180 }, { "epoch": 0.79, "learning_rate": 6.625741245653466e-07, "logits/chosen": -2.6469409465789795, "logits/rejected": -2.295015811920166, "logps/chosen": -170.31234741210938, "logps/rejected": -1135.0689697265625, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -1.028408408164978, "rewards/margins": 9.866082191467285, "rewards/rejected": -10.894491195678711, "step": 13190 }, { "epoch": 0.79, "learning_rate": 6.590494657689909e-07, "logits/chosen": -2.6353957653045654, "logits/rejected": -2.253103733062744, "logps/chosen": -153.44149780273438, "logps/rejected": -1347.284423828125, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -0.8604599833488464, "rewards/margins": 12.169416427612305, "rewards/rejected": -13.029878616333008, "step": 13200 }, { "epoch": 0.79, "learning_rate": 6.5553278307215e-07, "logits/chosen": -2.728541851043701, "logits/rejected": -2.20381498336792, "logps/chosen": -166.08578491210938, "logps/rejected": -1143.349853515625, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -0.952092170715332, "rewards/margins": 10.038162231445312, "rewards/rejected": -10.990252494812012, "step": 13210 }, { "epoch": 0.79, "learning_rate": 6.520240917111961e-07, "logits/chosen": -2.651829242706299, "logits/rejected": -2.2207236289978027, "logps/chosen": -177.0665283203125, "logps/rejected": -1300.932861328125, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -1.0621365308761597, "rewards/margins": 11.499796867370605, "rewards/rejected": -12.561933517456055, "step": 13220 }, { "epoch": 0.79, "learning_rate": 6.485234068878809e-07, "logits/chosen": -2.6543197631835938, "logits/rejected": -2.2154428958892822, "logps/chosen": -163.3125, "logps/rejected": -1217.4219970703125, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -0.9492557644844055, "rewards/margins": 10.781808853149414, "rewards/rejected": -11.731064796447754, "step": 13230 }, { "epoch": 0.79, "learning_rate": 6.450307437692663e-07, "logits/chosen": -2.680445432662964, "logits/rejected": -2.251739025115967, "logps/chosen": -174.94589233398438, "logps/rejected": -1278.713134765625, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -1.0443763732910156, "rewards/margins": 11.288985252380371, "rewards/rejected": -12.333361625671387, "step": 13240 }, { "epoch": 0.79, "learning_rate": 6.415461174876589e-07, "logits/chosen": -2.6188805103302, "logits/rejected": -2.1884093284606934, "logps/chosen": -207.90133666992188, "logps/rejected": -1085.8309326171875, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -1.3835684061050415, "rewards/margins": 9.021151542663574, "rewards/rejected": -10.4047212600708, "step": 13250 }, { "epoch": 0.79, "learning_rate": 6.380695431405453e-07, "logits/chosen": -2.7068963050842285, "logits/rejected": -2.1892426013946533, "logps/chosen": -182.9019317626953, "logps/rejected": -1269.402587890625, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -1.0909157991409302, "rewards/margins": 11.161664962768555, "rewards/rejected": -12.252579689025879, "step": 13260 }, { "epoch": 0.79, "learning_rate": 6.346010357905269e-07, "logits/chosen": -2.653012752532959, "logits/rejected": -2.1631197929382324, "logps/chosen": -165.99468994140625, "logps/rejected": -1274.478515625, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -0.9044996500015259, "rewards/margins": 11.387828826904297, "rewards/rejected": -12.292328834533691, "step": 13270 }, { "epoch": 0.79, "learning_rate": 6.311406104652534e-07, "logits/chosen": -2.6233255863189697, "logits/rejected": -2.1972603797912598, "logps/chosen": -160.0179443359375, "logps/rejected": -1298.3896484375, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -0.9248750805854797, "rewards/margins": 11.606918334960938, "rewards/rejected": -12.531793594360352, "step": 13280 }, { "epoch": 0.79, "learning_rate": 6.276882821573566e-07, "logits/chosen": -2.678894519805908, "logits/rejected": -2.205015182495117, "logps/chosen": -174.76361083984375, "logps/rejected": -1205.548095703125, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -0.9612113833427429, "rewards/margins": 10.646207809448242, "rewards/rejected": -11.607417106628418, "step": 13290 }, { "epoch": 0.79, "learning_rate": 6.242440658243915e-07, "logits/chosen": -2.7096359729766846, "logits/rejected": -2.2825987339019775, "logps/chosen": -188.0618438720703, "logps/rejected": -1294.4810791015625, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -1.1275831460952759, "rewards/margins": 11.366848945617676, "rewards/rejected": -12.494430541992188, "step": 13300 }, { "epoch": 0.79, "learning_rate": 6.208079763887626e-07, "logits/chosen": -2.647400379180908, "logits/rejected": -2.227109432220459, "logps/chosen": -169.544677734375, "logps/rejected": -1213.8255615234375, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -0.9902595281600952, "rewards/margins": 10.696873664855957, "rewards/rejected": -11.6871337890625, "step": 13310 }, { "epoch": 0.79, "learning_rate": 6.173800287376669e-07, "logits/chosen": -2.669236183166504, "logits/rejected": -2.142890453338623, "logps/chosen": -174.83187866210938, "logps/rejected": -1341.41552734375, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -1.0087352991104126, "rewards/margins": 11.962529182434082, "rewards/rejected": -12.97126579284668, "step": 13320 }, { "epoch": 0.79, "learning_rate": 6.139602377230247e-07, "logits/chosen": -2.695554494857788, "logits/rejected": -2.2485549449920654, "logps/chosen": -172.8942108154297, "logps/rejected": -1233.6075439453125, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -1.0706312656402588, "rewards/margins": 10.822290420532227, "rewards/rejected": -11.892923355102539, "step": 13330 }, { "epoch": 0.8, "learning_rate": 6.105486181614176e-07, "logits/chosen": -2.6472957134246826, "logits/rejected": -2.2596423625946045, "logps/chosen": -183.82476806640625, "logps/rejected": -1218.6607666015625, "loss": 0.0232, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -1.1191326379776, "rewards/margins": 10.635268211364746, "rewards/rejected": -11.754400253295898, "step": 13340 }, { "epoch": 0.8, "learning_rate": 6.071451848340235e-07, "logits/chosen": -2.716681480407715, "logits/rejected": -2.3031864166259766, "logps/chosen": -183.01229858398438, "logps/rejected": -1330.8768310546875, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -1.0952812433242798, "rewards/margins": 11.753024101257324, "rewards/rejected": -12.848306655883789, "step": 13350 }, { "epoch": 0.8, "learning_rate": 6.037499524865523e-07, "logits/chosen": -2.679293632507324, "logits/rejected": -2.2975120544433594, "logps/chosen": -196.26878356933594, "logps/rejected": -1164.00634765625, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/chosen": -1.2833541631698608, "rewards/margins": 9.899324417114258, "rewards/rejected": -11.182679176330566, "step": 13360 }, { "epoch": 0.8, "learning_rate": 6.003629358291832e-07, "logits/chosen": -2.650348663330078, "logits/rejected": -2.17437481880188, "logps/chosen": -179.01318359375, "logps/rejected": -1338.89208984375, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -1.1230435371398926, "rewards/margins": 11.820822715759277, "rewards/rejected": -12.943865776062012, "step": 13370 }, { "epoch": 0.8, "learning_rate": 5.969841495364978e-07, "logits/chosen": -2.706183910369873, "logits/rejected": -2.1938719749450684, "logps/chosen": -171.48179626464844, "logps/rejected": -1221.214111328125, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -1.0038444995880127, "rewards/margins": 10.76405143737793, "rewards/rejected": -11.76789665222168, "step": 13380 }, { "epoch": 0.8, "learning_rate": 5.936136082474228e-07, "logits/chosen": -2.647810459136963, "logits/rejected": -2.2589077949523926, "logps/chosen": -192.23797607421875, "logps/rejected": -1151.644287109375, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -1.2304718494415283, "rewards/margins": 9.834982872009277, "rewards/rejected": -11.065455436706543, "step": 13390 }, { "epoch": 0.8, "learning_rate": 5.902513265651585e-07, "logits/chosen": -2.6624646186828613, "logits/rejected": -2.2934396266937256, "logps/chosen": -199.35960388183594, "logps/rejected": -1224.0863037109375, "loss": 0.0485, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -1.2951158285140991, "rewards/margins": 10.49374771118164, "rewards/rejected": -11.788863182067871, "step": 13400 }, { "epoch": 0.8, "learning_rate": 5.868973190571214e-07, "logits/chosen": -2.6969449520111084, "logits/rejected": -2.2601099014282227, "logps/chosen": -175.25234985351562, "logps/rejected": -1302.220458984375, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -1.0468733310699463, "rewards/margins": 11.517797470092773, "rewards/rejected": -12.564672470092773, "step": 13410 }, { "epoch": 0.8, "learning_rate": 5.835516002548816e-07, "logits/chosen": -2.676764488220215, "logits/rejected": -2.2897772789001465, "logps/chosen": -166.84677124023438, "logps/rejected": -1289.373779296875, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -0.9588912129402161, "rewards/margins": 11.490133285522461, "rewards/rejected": -12.44902515411377, "step": 13420 }, { "epoch": 0.8, "learning_rate": 5.802141846540932e-07, "logits/chosen": -2.636695384979248, "logits/rejected": -2.1682748794555664, "logps/chosen": -171.29469299316406, "logps/rejected": -1207.435791015625, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": -1.0376616716384888, "rewards/margins": 10.577069282531738, "rewards/rejected": -11.614728927612305, "step": 13430 }, { "epoch": 0.8, "learning_rate": 5.768850867144385e-07, "logits/chosen": -2.6255812644958496, "logits/rejected": -2.207350254058838, "logps/chosen": -168.15487670898438, "logps/rejected": -1215.546142578125, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -0.9339563250541687, "rewards/margins": 10.77913761138916, "rewards/rejected": -11.713093757629395, "step": 13440 }, { "epoch": 0.8, "learning_rate": 5.735643208595623e-07, "logits/chosen": -2.6327404975891113, "logits/rejected": -2.2287228107452393, "logps/chosen": -177.02865600585938, "logps/rejected": -1245.131591796875, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -1.0770198106765747, "rewards/margins": 10.917552947998047, "rewards/rejected": -11.994571685791016, "step": 13450 }, { "epoch": 0.8, "learning_rate": 5.702519014770108e-07, "logits/chosen": -2.640324115753174, "logits/rejected": -2.2754924297332764, "logps/chosen": -198.15188598632812, "logps/rejected": -1141.55712890625, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -1.2626521587371826, "rewards/margins": 9.7100191116333, "rewards/rejected": -10.972671508789062, "step": 13460 }, { "epoch": 0.8, "learning_rate": 5.669478429181646e-07, "logits/chosen": -2.681659698486328, "logits/rejected": -2.305046558380127, "logps/chosen": -168.178955078125, "logps/rejected": -1214.56494140625, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -0.9352678060531616, "rewards/margins": 10.766515731811523, "rewards/rejected": -11.701784133911133, "step": 13470 }, { "epoch": 0.8, "learning_rate": 5.636521594981851e-07, "logits/chosen": -2.719573497772217, "logits/rejected": -2.2618885040283203, "logps/chosen": -156.7507781982422, "logps/rejected": -1208.9212646484375, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -0.9281970858573914, "rewards/margins": 10.716633796691895, "rewards/rejected": -11.644829750061035, "step": 13480 }, { "epoch": 0.8, "learning_rate": 5.603648654959454e-07, "logits/chosen": -2.657705307006836, "logits/rejected": -2.1829946041107178, "logps/chosen": -190.27902221679688, "logps/rejected": -1258.4146728515625, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -1.131217360496521, "rewards/margins": 11.015460014343262, "rewards/rejected": -12.146677017211914, "step": 13490 }, { "epoch": 0.81, "learning_rate": 5.570859751539687e-07, "logits/chosen": -2.707965612411499, "logits/rejected": -2.179104804992676, "logps/chosen": -166.4765625, "logps/rejected": -1183.0833740234375, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -0.8997961282730103, "rewards/margins": 10.489396095275879, "rewards/rejected": -11.389192581176758, "step": 13500 }, { "epoch": 0.81, "learning_rate": 5.538155026783726e-07, "logits/chosen": -2.6481680870056152, "logits/rejected": -2.203890323638916, "logps/chosen": -216.88485717773438, "logps/rejected": -1153.209716796875, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -1.4733598232269287, "rewards/margins": 9.611236572265625, "rewards/rejected": -11.084596633911133, "step": 13510 }, { "epoch": 0.81, "learning_rate": 5.505534622387998e-07, "logits/chosen": -2.6645545959472656, "logits/rejected": -2.2244956493377686, "logps/chosen": -176.46615600585938, "logps/rejected": -1325.2359619140625, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -0.9905093908309937, "rewards/margins": 11.800531387329102, "rewards/rejected": -12.791041374206543, "step": 13520 }, { "epoch": 0.81, "learning_rate": 5.472998679683619e-07, "logits/chosen": -2.704134702682495, "logits/rejected": -2.2616806030273438, "logps/chosen": -211.95303344726562, "logps/rejected": -1289.783203125, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -1.371867299079895, "rewards/margins": 11.083040237426758, "rewards/rejected": -12.454907417297363, "step": 13530 }, { "epoch": 0.81, "learning_rate": 5.440547339635769e-07, "logits/chosen": -2.6759040355682373, "logits/rejected": -2.2015576362609863, "logps/chosen": -175.56509399414062, "logps/rejected": -1367.412353515625, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -1.0149471759796143, "rewards/margins": 12.2113037109375, "rewards/rejected": -13.226249694824219, "step": 13540 }, { "epoch": 0.81, "learning_rate": 5.408180742843069e-07, "logits/chosen": -2.673353672027588, "logits/rejected": -2.2181715965270996, "logps/chosen": -175.46861267089844, "logps/rejected": -1311.572509765625, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -1.0530420541763306, "rewards/margins": 11.617881774902344, "rewards/rejected": -12.67092514038086, "step": 13550 }, { "epoch": 0.81, "learning_rate": 5.375899029536996e-07, "logits/chosen": -2.6008431911468506, "logits/rejected": -2.196563243865967, "logps/chosen": -158.3331756591797, "logps/rejected": -1320.8887939453125, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -0.8840807676315308, "rewards/margins": 11.880335807800293, "rewards/rejected": -12.764415740966797, "step": 13560 }, { "epoch": 0.81, "learning_rate": 5.34370233958125e-07, "logits/chosen": -2.7051897048950195, "logits/rejected": -2.218392848968506, "logps/chosen": -176.99301147460938, "logps/rejected": -1282.6273193359375, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -1.0912928581237793, "rewards/margins": 11.286653518676758, "rewards/rejected": -12.377946853637695, "step": 13570 }, { "epoch": 0.81, "learning_rate": 5.311590812471165e-07, "logits/chosen": -2.680466413497925, "logits/rejected": -2.2152719497680664, "logps/chosen": -189.317138671875, "logps/rejected": -1248.241943359375, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -1.2016308307647705, "rewards/margins": 10.841754913330078, "rewards/rejected": -12.043386459350586, "step": 13580 }, { "epoch": 0.81, "learning_rate": 5.279564587333077e-07, "logits/chosen": -2.6547799110412598, "logits/rejected": -2.2106080055236816, "logps/chosen": -176.56942749023438, "logps/rejected": -1225.6854248046875, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/chosen": -1.1161929368972778, "rewards/margins": 10.699230194091797, "rewards/rejected": -11.815423965454102, "step": 13590 }, { "epoch": 0.81, "learning_rate": 5.247623802923788e-07, "logits/chosen": -2.699678421020508, "logits/rejected": -2.291386842727661, "logps/chosen": -172.39993286132812, "logps/rejected": -1277.2335205078125, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -1.028847336769104, "rewards/margins": 11.290389060974121, "rewards/rejected": -12.319235801696777, "step": 13600 }, { "epoch": 0.81, "learning_rate": 5.215768597629872e-07, "logits/chosen": -2.7503225803375244, "logits/rejected": -2.293412446975708, "logps/chosen": -165.2312469482422, "logps/rejected": -1258.467529296875, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -0.9487820863723755, "rewards/margins": 11.192811965942383, "rewards/rejected": -12.141592025756836, "step": 13610 }, { "epoch": 0.81, "learning_rate": 5.18399910946715e-07, "logits/chosen": -2.6760306358337402, "logits/rejected": -2.1967055797576904, "logps/chosen": -157.71188354492188, "logps/rejected": -1231.4622802734375, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -0.835241973400116, "rewards/margins": 11.033151626586914, "rewards/rejected": -11.86839485168457, "step": 13620 }, { "epoch": 0.81, "learning_rate": 5.152315476080058e-07, "logits/chosen": -2.7032432556152344, "logits/rejected": -2.1931357383728027, "logps/chosen": -162.85061645507812, "logps/rejected": -1246.513916015625, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": -0.9287565350532532, "rewards/margins": 11.081074714660645, "rewards/rejected": -12.009831428527832, "step": 13630 }, { "epoch": 0.81, "learning_rate": 5.12071783474106e-07, "logits/chosen": -2.622471809387207, "logits/rejected": -2.254931688308716, "logps/chosen": -170.14712524414062, "logps/rejected": -1179.711669921875, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -1.0697211027145386, "rewards/margins": 10.290043830871582, "rewards/rejected": -11.359764099121094, "step": 13640 }, { "epoch": 0.81, "learning_rate": 5.089206322350046e-07, "logits/chosen": -2.5998706817626953, "logits/rejected": -2.248945713043213, "logps/chosen": -195.8653106689453, "logps/rejected": -1266.3809814453125, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -1.266817331314087, "rewards/margins": 10.947492599487305, "rewards/rejected": -12.214308738708496, "step": 13650 }, { "epoch": 0.81, "learning_rate": 5.057781075433751e-07, "logits/chosen": -2.671738386154175, "logits/rejected": -2.2132976055145264, "logps/chosen": -172.5011444091797, "logps/rejected": -1306.454833984375, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -1.0415641069412231, "rewards/margins": 11.572668075561523, "rewards/rejected": -12.614233016967773, "step": 13660 }, { "epoch": 0.82, "learning_rate": 5.026442230145157e-07, "logits/chosen": -2.7238144874572754, "logits/rejected": -2.311789035797119, "logps/chosen": -169.64390563964844, "logps/rejected": -1288.0789794921875, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -0.9923800230026245, "rewards/margins": 11.448450088500977, "rewards/rejected": -12.440828323364258, "step": 13670 }, { "epoch": 0.82, "learning_rate": 4.995189922262877e-07, "logits/chosen": -2.6588640213012695, "logits/rejected": -2.228370189666748, "logps/chosen": -159.25311279296875, "logps/rejected": -1209.178466796875, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -0.8602941632270813, "rewards/margins": 10.785550117492676, "rewards/rejected": -11.645845413208008, "step": 13680 }, { "epoch": 0.82, "learning_rate": 4.964024287190644e-07, "logits/chosen": -2.7045180797576904, "logits/rejected": -2.1723971366882324, "logps/chosen": -166.21282958984375, "logps/rejected": -1224.7825927734375, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -1.0211451053619385, "rewards/margins": 10.764714241027832, "rewards/rejected": -11.785860061645508, "step": 13690 }, { "epoch": 0.82, "learning_rate": 4.932945459956617e-07, "logits/chosen": -2.692246675491333, "logits/rejected": -2.257985830307007, "logps/chosen": -166.0492401123047, "logps/rejected": -1306.399169921875, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -0.9635535478591919, "rewards/margins": 11.66749382019043, "rewards/rejected": -12.631048202514648, "step": 13700 }, { "epoch": 0.82, "learning_rate": 4.901953575212884e-07, "logits/chosen": -2.6288509368896484, "logits/rejected": -2.167935609817505, "logps/chosen": -177.33889770507812, "logps/rejected": -1239.9371337890625, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -1.1235032081604004, "rewards/margins": 10.83365249633789, "rewards/rejected": -11.957155227661133, "step": 13710 }, { "epoch": 0.82, "learning_rate": 4.87104876723484e-07, "logits/chosen": -2.680795192718506, "logits/rejected": -2.2613396644592285, "logps/chosen": -181.4055633544922, "logps/rejected": -1192.895263671875, "loss": 0.0353, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -1.1026599407196045, "rewards/margins": 10.384865760803223, "rewards/rejected": -11.487527847290039, "step": 13720 }, { "epoch": 0.82, "learning_rate": 4.840231169920609e-07, "logits/chosen": -2.6946616172790527, "logits/rejected": -2.1802120208740234, "logps/chosen": -183.39422607421875, "logps/rejected": -1199.854248046875, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": -1.0639870166778564, "rewards/margins": 10.495994567871094, "rewards/rejected": -11.559983253479004, "step": 13730 }, { "epoch": 0.82, "learning_rate": 4.809500916790466e-07, "logits/chosen": -2.661583185195923, "logits/rejected": -2.217468500137329, "logps/chosen": -163.66004943847656, "logps/rejected": -1271.381591796875, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -0.8894637227058411, "rewards/margins": 11.384147644042969, "rewards/rejected": -12.273612022399902, "step": 13740 }, { "epoch": 0.82, "learning_rate": 4.778858140986259e-07, "logits/chosen": -2.6748485565185547, "logits/rejected": -2.1921467781066895, "logps/chosen": -166.37962341308594, "logps/rejected": -1257.1416015625, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -0.9459733963012695, "rewards/margins": 11.175190925598145, "rewards/rejected": -12.12116527557373, "step": 13750 }, { "epoch": 0.82, "learning_rate": 4.748302975270838e-07, "logits/chosen": -2.6516661643981934, "logits/rejected": -2.244601011276245, "logps/chosen": -177.70980834960938, "logps/rejected": -1294.5078125, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -1.0942624807357788, "rewards/margins": 11.416128158569336, "rewards/rejected": -12.510391235351562, "step": 13760 }, { "epoch": 0.82, "learning_rate": 4.71783555202745e-07, "logits/chosen": -2.65417742729187, "logits/rejected": -2.1740970611572266, "logps/chosen": -171.97743225097656, "logps/rejected": -1268.628662109375, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -1.0818229913711548, "rewards/margins": 11.163043022155762, "rewards/rejected": -12.244867324829102, "step": 13770 }, { "epoch": 0.82, "learning_rate": 4.6874560032592333e-07, "logits/chosen": -2.6474406719207764, "logits/rejected": -2.3152217864990234, "logps/chosen": -168.77127075195312, "logps/rejected": -1212.509033203125, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -1.0077855587005615, "rewards/margins": 10.671684265136719, "rewards/rejected": -11.67947006225586, "step": 13780 }, { "epoch": 0.82, "learning_rate": 4.6571644605885565e-07, "logits/chosen": -2.683969736099243, "logits/rejected": -2.2438387870788574, "logps/chosen": -176.6464080810547, "logps/rejected": -1315.1319580078125, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -1.0864895582199097, "rewards/margins": 11.621270179748535, "rewards/rejected": -12.707758903503418, "step": 13790 }, { "epoch": 0.82, "learning_rate": 4.6269610552565153e-07, "logits/chosen": -2.636932849884033, "logits/rejected": -2.1782472133636475, "logps/chosen": -172.1649627685547, "logps/rejected": -1294.2266845703125, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -0.9745909571647644, "rewards/margins": 11.520150184631348, "rewards/rejected": -12.494741439819336, "step": 13800 }, { "epoch": 0.82, "learning_rate": 4.5968459181223416e-07, "logits/chosen": -2.6854310035705566, "logits/rejected": -2.282257556915283, "logps/chosen": -176.67733764648438, "logps/rejected": -1218.510498046875, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -1.0794392824172974, "rewards/margins": 10.672860145568848, "rewards/rejected": -11.752298355102539, "step": 13810 }, { "epoch": 0.82, "learning_rate": 4.566819179662829e-07, "logits/chosen": -2.6704695224761963, "logits/rejected": -2.292807102203369, "logps/chosen": -178.35671997070312, "logps/rejected": -1292.9691162109375, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -1.0838291645050049, "rewards/margins": 11.395403861999512, "rewards/rejected": -12.479230880737305, "step": 13820 }, { "epoch": 0.82, "learning_rate": 4.5368809699717855e-07, "logits/chosen": -2.6712725162506104, "logits/rejected": -2.286290407180786, "logps/chosen": -178.18814086914062, "logps/rejected": -1236.680908203125, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -1.0077872276306152, "rewards/margins": 10.912640571594238, "rewards/rejected": -11.920427322387695, "step": 13830 }, { "epoch": 0.83, "learning_rate": 4.507031418759447e-07, "logits/chosen": -2.704024314880371, "logits/rejected": -2.2878246307373047, "logps/chosen": -180.00888061523438, "logps/rejected": -1279.6773681640625, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -1.077067494392395, "rewards/margins": 11.275760650634766, "rewards/rejected": -12.352828979492188, "step": 13840 }, { "epoch": 0.83, "learning_rate": 4.477270655351942e-07, "logits/chosen": -2.6880481243133545, "logits/rejected": -2.167849063873291, "logps/chosen": -180.31747436523438, "logps/rejected": -1329.6435546875, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -1.1320539712905884, "rewards/margins": 11.714104652404785, "rewards/rejected": -12.846158027648926, "step": 13850 }, { "epoch": 0.83, "learning_rate": 4.447598808690695e-07, "logits/chosen": -2.6901631355285645, "logits/rejected": -2.1718173027038574, "logps/chosen": -163.86790466308594, "logps/rejected": -1238.12158203125, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.9505518674850464, "rewards/margins": 10.998294830322266, "rewards/rejected": -11.948846817016602, "step": 13860 }, { "epoch": 0.83, "learning_rate": 4.418016007331924e-07, "logits/chosen": -2.6914234161376953, "logits/rejected": -2.282498836517334, "logps/chosen": -165.16580200195312, "logps/rejected": -1366.4530029296875, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -0.9785376787185669, "rewards/margins": 12.24502182006836, "rewards/rejected": -13.223559379577637, "step": 13870 }, { "epoch": 0.83, "learning_rate": 4.3885223794460114e-07, "logits/chosen": -2.7076926231384277, "logits/rejected": -2.2664225101470947, "logps/chosen": -172.03372192382812, "logps/rejected": -1235.4722900390625, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -0.9551488757133484, "rewards/margins": 10.9685640335083, "rewards/rejected": -11.923711776733398, "step": 13880 }, { "epoch": 0.83, "learning_rate": 4.359118052817013e-07, "logits/chosen": -2.6591410636901855, "logits/rejected": -2.0986130237579346, "logps/chosen": -194.7082061767578, "logps/rejected": -1338.324462890625, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -1.2044440507888794, "rewards/margins": 11.721646308898926, "rewards/rejected": -12.926091194152832, "step": 13890 }, { "epoch": 0.83, "learning_rate": 4.3298031548420716e-07, "logits/chosen": -2.676812171936035, "logits/rejected": -2.207137107849121, "logps/chosen": -170.67684936523438, "logps/rejected": -1282.3267822265625, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -1.07079017162323, "rewards/margins": 11.308879852294922, "rewards/rejected": -12.379671096801758, "step": 13900 }, { "epoch": 0.83, "learning_rate": 4.300577812530868e-07, "logits/chosen": -2.654151678085327, "logits/rejected": -2.201157331466675, "logps/chosen": -180.61279296875, "logps/rejected": -1209.297119140625, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -1.1155108213424683, "rewards/margins": 10.541376113891602, "rewards/rejected": -11.656888961791992, "step": 13910 }, { "epoch": 0.83, "learning_rate": 4.2714421525050734e-07, "logits/chosen": -2.6808247566223145, "logits/rejected": -2.247138500213623, "logps/chosen": -184.03382873535156, "logps/rejected": -1217.38134765625, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -1.0670932531356812, "rewards/margins": 10.660669326782227, "rewards/rejected": -11.727762222290039, "step": 13920 }, { "epoch": 0.83, "learning_rate": 4.242396300997809e-07, "logits/chosen": -2.693331480026245, "logits/rejected": -2.2159297466278076, "logps/chosen": -167.58206176757812, "logps/rejected": -1344.5343017578125, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -0.9405523538589478, "rewards/margins": 12.056676864624023, "rewards/rejected": -12.997228622436523, "step": 13930 }, { "epoch": 0.83, "learning_rate": 4.213440383853093e-07, "logits/chosen": -2.6392228603363037, "logits/rejected": -2.2534401416778564, "logps/chosen": -156.3339385986328, "logps/rejected": -1310.461669921875, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -0.8530643582344055, "rewards/margins": 11.808847427368164, "rewards/rejected": -12.661911010742188, "step": 13940 }, { "epoch": 0.83, "learning_rate": 4.1845745265252673e-07, "logits/chosen": -2.6900010108947754, "logits/rejected": -2.2535605430603027, "logps/chosen": -167.2605743408203, "logps/rejected": -1268.8167724609375, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -1.0186363458633423, "rewards/margins": 11.22164535522461, "rewards/rejected": -12.24028205871582, "step": 13950 }, { "epoch": 0.83, "learning_rate": 4.15579885407853e-07, "logits/chosen": -2.6795239448547363, "logits/rejected": -2.261195659637451, "logps/chosen": -183.81129455566406, "logps/rejected": -1348.705322265625, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -1.1615241765975952, "rewards/margins": 11.869373321533203, "rewards/rejected": -13.03089714050293, "step": 13960 }, { "epoch": 0.83, "learning_rate": 4.1271134911862936e-07, "logits/chosen": -2.6693310737609863, "logits/rejected": -2.237502336502075, "logps/chosen": -198.24862670898438, "logps/rejected": -1274.082275390625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.2623385190963745, "rewards/margins": 11.021790504455566, "rewards/rejected": -12.284128189086914, "step": 13970 }, { "epoch": 0.83, "learning_rate": 4.0985185621307293e-07, "logits/chosen": -2.6910958290100098, "logits/rejected": -2.262298107147217, "logps/chosen": -183.97500610351562, "logps/rejected": -1292.0716552734375, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -1.1392381191253662, "rewards/margins": 11.339223861694336, "rewards/rejected": -12.478460311889648, "step": 13980 }, { "epoch": 0.83, "learning_rate": 4.0700141908021793e-07, "logits/chosen": -2.6785387992858887, "logits/rejected": -2.215505838394165, "logps/chosen": -199.35546875, "logps/rejected": -1358.932373046875, "loss": 0.067, "rewards/accuracies": 1.0, "rewards/chosen": -1.240281343460083, "rewards/margins": 11.902799606323242, "rewards/rejected": -13.143081665039062, "step": 13990 }, { "epoch": 0.83, "learning_rate": 4.041600500698642e-07, "logits/chosen": -2.6829400062561035, "logits/rejected": -2.223679304122925, "logps/chosen": -188.1009979248047, "logps/rejected": -1227.83740234375, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -1.2205641269683838, "rewards/margins": 10.616205215454102, "rewards/rejected": -11.836769104003906, "step": 14000 }, { "epoch": 0.83, "eval_logits/chosen": -2.6053109169006348, "eval_logits/rejected": -2.383760452270508, "eval_logps/chosen": -264.53240966796875, "eval_logps/rejected": -1094.8602294921875, "eval_loss": 0.002599479164928198, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -1.974579930305481, "eval_rewards/margins": 8.518539428710938, "eval_rewards/rejected": -10.493120193481445, "eval_runtime": 3.9017, "eval_samples_per_second": 1.281, "eval_steps_per_second": 0.256, "step": 14000 }, { "epoch": 0.84, "learning_rate": 4.013277614925229e-07, "logits/chosen": -2.658773899078369, "logits/rejected": -2.1468019485473633, "logps/chosen": -179.64801025390625, "logps/rejected": -1278.55908203125, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -1.0787864923477173, "rewards/margins": 11.25596809387207, "rewards/rejected": -12.33475399017334, "step": 14010 }, { "epoch": 0.84, "learning_rate": 3.985045656193631e-07, "logits/chosen": -2.668735980987549, "logits/rejected": -2.2114713191986084, "logps/chosen": -189.68881225585938, "logps/rejected": -1241.6612548828125, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -1.1798040866851807, "rewards/margins": 10.79493522644043, "rewards/rejected": -11.974739074707031, "step": 14020 }, { "epoch": 0.84, "learning_rate": 3.9569047468215967e-07, "logits/chosen": -2.6559648513793945, "logits/rejected": -2.111429214477539, "logps/chosen": -171.84005737304688, "logps/rejected": -1269.4163818359375, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -0.9797341227531433, "rewards/margins": 11.274862289428711, "rewards/rejected": -12.254595756530762, "step": 14030 }, { "epoch": 0.84, "learning_rate": 3.9288550087323687e-07, "logits/chosen": -2.631896495819092, "logits/rejected": -2.1895270347595215, "logps/chosen": -183.16319274902344, "logps/rejected": -1329.515869140625, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -1.1326725482940674, "rewards/margins": 11.718535423278809, "rewards/rejected": -12.85120964050293, "step": 14040 }, { "epoch": 0.84, "learning_rate": 3.900896563454226e-07, "logits/chosen": -2.6776461601257324, "logits/rejected": -2.24357271194458, "logps/chosen": -166.2142333984375, "logps/rejected": -1312.224365234375, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.9974905252456665, "rewards/margins": 11.684764862060547, "rewards/rejected": -12.682255744934082, "step": 14050 }, { "epoch": 0.84, "learning_rate": 3.873029532119868e-07, "logits/chosen": -2.646001100540161, "logits/rejected": -2.2312138080596924, "logps/chosen": -154.52357482910156, "logps/rejected": -1280.1116943359375, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -0.8567987680435181, "rewards/margins": 11.488916397094727, "rewards/rejected": -12.345715522766113, "step": 14060 }, { "epoch": 0.84, "learning_rate": 3.845254035465951e-07, "logits/chosen": -2.6889290809631348, "logits/rejected": -2.277940273284912, "logps/chosen": -176.64369201660156, "logps/rejected": -1371.5977783203125, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -1.0301624536514282, "rewards/margins": 12.242680549621582, "rewards/rejected": -13.272842407226562, "step": 14070 }, { "epoch": 0.84, "learning_rate": 3.8175701938325677e-07, "logits/chosen": -2.6686508655548096, "logits/rejected": -2.156785488128662, "logps/chosen": -173.02218627929688, "logps/rejected": -1296.3160400390625, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -1.0174912214279175, "rewards/margins": 11.499128341674805, "rewards/rejected": -12.516620635986328, "step": 14080 }, { "epoch": 0.84, "learning_rate": 3.7899781271626747e-07, "logits/chosen": -2.692993402481079, "logits/rejected": -2.2525722980499268, "logps/chosen": -161.3787841796875, "logps/rejected": -1306.217529296875, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -0.8953760266304016, "rewards/margins": 11.722616195678711, "rewards/rejected": -12.617993354797363, "step": 14090 }, { "epoch": 0.84, "learning_rate": 3.76247795500162e-07, "logits/chosen": -2.6358532905578613, "logits/rejected": -2.2306854724884033, "logps/chosen": -182.1425323486328, "logps/rejected": -1269.7691650390625, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -1.1269450187683105, "rewards/margins": 11.12839412689209, "rewards/rejected": -12.255338668823242, "step": 14100 }, { "epoch": 0.84, "learning_rate": 3.73506979649661e-07, "logits/chosen": -2.64140248298645, "logits/rejected": -2.179159641265869, "logps/chosen": -164.57003784179688, "logps/rejected": -1269.4193115234375, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -0.9803975820541382, "rewards/margins": 11.26661205291748, "rewards/rejected": -12.247010231018066, "step": 14110 }, { "epoch": 0.84, "learning_rate": 3.707753770396197e-07, "logits/chosen": -2.663074254989624, "logits/rejected": -2.2346599102020264, "logps/chosen": -192.89430236816406, "logps/rejected": -1316.9742431640625, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -1.185072422027588, "rewards/margins": 11.541204452514648, "rewards/rejected": -12.726276397705078, "step": 14120 }, { "epoch": 0.84, "learning_rate": 3.6805299950497366e-07, "logits/chosen": -2.6560990810394287, "logits/rejected": -2.23017954826355, "logps/chosen": -189.10353088378906, "logps/rejected": -1307.087890625, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -1.142559289932251, "rewards/margins": 11.477446556091309, "rewards/rejected": -12.620006561279297, "step": 14130 }, { "epoch": 0.84, "learning_rate": 3.653398588406937e-07, "logits/chosen": -2.682931661605835, "logits/rejected": -2.249525547027588, "logps/chosen": -176.5172119140625, "logps/rejected": -1271.5799560546875, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": -1.034685730934143, "rewards/margins": 11.222209930419922, "rewards/rejected": -12.256896018981934, "step": 14140 }, { "epoch": 0.84, "learning_rate": 3.626359668017285e-07, "logits/chosen": -2.6658554077148438, "logits/rejected": -2.208019733428955, "logps/chosen": -179.17420959472656, "logps/rejected": -1218.5045166015625, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -1.0749691724777222, "rewards/margins": 10.65962028503418, "rewards/rejected": -11.734590530395508, "step": 14150 }, { "epoch": 0.84, "learning_rate": 3.5994133510295517e-07, "logits/chosen": -2.687696695327759, "logits/rejected": -2.280946731567383, "logps/chosen": -171.23837280273438, "logps/rejected": -1219.3798828125, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -0.9967787861824036, "rewards/margins": 10.754220962524414, "rewards/rejected": -11.751001358032227, "step": 14160 }, { "epoch": 0.84, "learning_rate": 3.572559754191332e-07, "logits/chosen": -2.6567318439483643, "logits/rejected": -2.2087929248809814, "logps/chosen": -171.87026977539062, "logps/rejected": -1285.305908203125, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": -0.9473191499710083, "rewards/margins": 11.470184326171875, "rewards/rejected": -12.417502403259277, "step": 14170 }, { "epoch": 0.85, "learning_rate": 3.545798993848465e-07, "logits/chosen": -2.623717784881592, "logits/rejected": -2.213705539703369, "logps/chosen": -174.06163024902344, "logps/rejected": -1281.0960693359375, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -1.045467495918274, "rewards/margins": 11.316000938415527, "rewards/rejected": -12.361467361450195, "step": 14180 }, { "epoch": 0.85, "learning_rate": 3.51913118594458e-07, "logits/chosen": -2.6105799674987793, "logits/rejected": -2.276928424835205, "logps/chosen": -172.11465454101562, "logps/rejected": -1310.3248291015625, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -1.0431947708129883, "rewards/margins": 11.61866283416748, "rewards/rejected": -12.661857604980469, "step": 14190 }, { "epoch": 0.85, "learning_rate": 3.492556446020587e-07, "logits/chosen": -2.6462557315826416, "logits/rejected": -2.229262590408325, "logps/chosen": -180.5137176513672, "logps/rejected": -1249.146240234375, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/chosen": -1.0995638370513916, "rewards/margins": 10.961332321166992, "rewards/rejected": -12.060895919799805, "step": 14200 }, { "epoch": 0.85, "learning_rate": 3.466074889214169e-07, "logits/chosen": -2.633293390274048, "logits/rejected": -2.2219607830047607, "logps/chosen": -198.06295776367188, "logps/rejected": -1143.631591796875, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": -1.283493161201477, "rewards/margins": 9.719396591186523, "rewards/rejected": -11.002891540527344, "step": 14210 }, { "epoch": 0.85, "learning_rate": 3.4396866302592593e-07, "logits/chosen": -2.7021024227142334, "logits/rejected": -2.3271546363830566, "logps/chosen": -168.0321807861328, "logps/rejected": -1253.287353515625, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -0.9981589317321777, "rewards/margins": 11.080887794494629, "rewards/rejected": -12.079048156738281, "step": 14220 }, { "epoch": 0.85, "learning_rate": 3.413391783485606e-07, "logits/chosen": -2.659151315689087, "logits/rejected": -2.2468862533569336, "logps/chosen": -188.7427978515625, "logps/rejected": -1302.943359375, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -1.18603515625, "rewards/margins": 11.406412124633789, "rewards/rejected": -12.592448234558105, "step": 14230 }, { "epoch": 0.85, "learning_rate": 3.3871904628182267e-07, "logits/chosen": -2.6383728981018066, "logits/rejected": -2.2224040031433105, "logps/chosen": -189.111083984375, "logps/rejected": -1412.593505859375, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -1.1670124530792236, "rewards/margins": 12.524897575378418, "rewards/rejected": -13.691909790039062, "step": 14240 }, { "epoch": 0.85, "learning_rate": 3.361082781776906e-07, "logits/chosen": -2.6593356132507324, "logits/rejected": -2.2361083030700684, "logps/chosen": -156.43466186523438, "logps/rejected": -1166.180908203125, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -0.9144033193588257, "rewards/margins": 10.305667877197266, "rewards/rejected": -11.220071792602539, "step": 14250 }, { "epoch": 0.85, "learning_rate": 3.335068853475762e-07, "logits/chosen": -2.6842901706695557, "logits/rejected": -2.210298538208008, "logps/chosen": -189.82516479492188, "logps/rejected": -1330.842041015625, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -1.173034429550171, "rewards/margins": 11.690396308898926, "rewards/rejected": -12.863430976867676, "step": 14260 }, { "epoch": 0.85, "learning_rate": 3.309148790622688e-07, "logits/chosen": -2.697014331817627, "logits/rejected": -2.1413753032684326, "logps/chosen": -171.2134246826172, "logps/rejected": -1357.882080078125, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -1.0021451711654663, "rewards/margins": 12.136377334594727, "rewards/rejected": -13.138522148132324, "step": 14270 }, { "epoch": 0.85, "learning_rate": 3.2833227055189126e-07, "logits/chosen": -2.6973605155944824, "logits/rejected": -2.2502338886260986, "logps/chosen": -174.47206115722656, "logps/rejected": -1272.278564453125, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": -1.0462062358856201, "rewards/margins": 11.218513488769531, "rewards/rejected": -12.264719009399414, "step": 14280 }, { "epoch": 0.85, "learning_rate": 3.2575907100584976e-07, "logits/chosen": -2.6261942386627197, "logits/rejected": -2.160449981689453, "logps/chosen": -168.2234344482422, "logps/rejected": -1224.8074951171875, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -0.976810097694397, "rewards/margins": 10.81821060180664, "rewards/rejected": -11.79502010345459, "step": 14290 }, { "epoch": 0.85, "learning_rate": 3.2319529157278427e-07, "logits/chosen": -2.6749837398529053, "logits/rejected": -2.167555809020996, "logps/chosen": -156.91146850585938, "logps/rejected": -1224.4534912109375, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -0.8893406987190247, "rewards/margins": 10.914307594299316, "rewards/rejected": -11.803647994995117, "step": 14300 }, { "epoch": 0.85, "learning_rate": 3.2064094336052176e-07, "logits/chosen": -2.660374641418457, "logits/rejected": -2.2785587310791016, "logps/chosen": -180.96017456054688, "logps/rejected": -1205.510009765625, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -1.1168030500411987, "rewards/margins": 10.501714706420898, "rewards/rejected": -11.618518829345703, "step": 14310 }, { "epoch": 0.85, "learning_rate": 3.1809603743602783e-07, "logits/chosen": -2.633387327194214, "logits/rejected": -2.2793474197387695, "logps/chosen": -198.32861328125, "logps/rejected": -1229.17431640625, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -1.2924237251281738, "rewards/margins": 10.56283187866211, "rewards/rejected": -11.855255126953125, "step": 14320 }, { "epoch": 0.85, "learning_rate": 3.1556058482535817e-07, "logits/chosen": -2.722856044769287, "logits/rejected": -2.251875162124634, "logps/chosen": -165.18190002441406, "logps/rejected": -1172.5921630859375, "loss": 0.0139, "rewards/accuracies": 1.0, "rewards/chosen": -0.8998897671699524, "rewards/margins": 10.391897201538086, "rewards/rejected": -11.291788101196289, "step": 14330 }, { "epoch": 0.86, "learning_rate": 3.1303459651361027e-07, "logits/chosen": -2.690218687057495, "logits/rejected": -2.2556252479553223, "logps/chosen": -183.59963989257812, "logps/rejected": -1241.440185546875, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -1.1058900356292725, "rewards/margins": 10.85296630859375, "rewards/rejected": -11.958856582641602, "step": 14340 }, { "epoch": 0.86, "learning_rate": 3.105180834448776e-07, "logits/chosen": -2.6766045093536377, "logits/rejected": -2.1780879497528076, "logps/chosen": -178.98837280273438, "logps/rejected": -1316.917724609375, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -1.0893800258636475, "rewards/margins": 11.631126403808594, "rewards/rejected": -12.72050666809082, "step": 14350 }, { "epoch": 0.86, "learning_rate": 3.080110565222008e-07, "logits/chosen": -2.658325433731079, "logits/rejected": -2.1948657035827637, "logps/chosen": -170.4897918701172, "logps/rejected": -1229.463623046875, "loss": 0.0239, "rewards/accuracies": 1.0, "rewards/chosen": -0.9832500219345093, "rewards/margins": 10.863972663879395, "rewards/rejected": -11.847222328186035, "step": 14360 }, { "epoch": 0.86, "learning_rate": 3.05513526607521e-07, "logits/chosen": -2.651050090789795, "logits/rejected": -2.208951473236084, "logps/chosen": -170.09188842773438, "logps/rejected": -1252.8548583984375, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -0.9993183016777039, "rewards/margins": 11.10303020477295, "rewards/rejected": -12.102350234985352, "step": 14370 }, { "epoch": 0.86, "learning_rate": 3.0302550452163294e-07, "logits/chosen": -2.65950608253479, "logits/rejected": -2.1992413997650146, "logps/chosen": -183.9871368408203, "logps/rejected": -1185.7957763671875, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -1.0926755666732788, "rewards/margins": 10.318548202514648, "rewards/rejected": -11.411224365234375, "step": 14380 }, { "epoch": 0.86, "learning_rate": 3.0054700104413666e-07, "logits/chosen": -2.667231321334839, "logits/rejected": -2.3050522804260254, "logps/chosen": -178.59092712402344, "logps/rejected": -1189.386962890625, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -1.0339524745941162, "rewards/margins": 10.397581100463867, "rewards/rejected": -11.431532859802246, "step": 14390 }, { "epoch": 0.86, "learning_rate": 2.980780269133937e-07, "logits/chosen": -2.64802622795105, "logits/rejected": -2.2131125926971436, "logps/chosen": -169.2388916015625, "logps/rejected": -1173.3343505859375, "loss": 0.0103, "rewards/accuracies": 1.0, "rewards/chosen": -0.9914522171020508, "rewards/margins": 10.293741226196289, "rewards/rejected": -11.28519344329834, "step": 14400 }, { "epoch": 0.86, "learning_rate": 2.956185928264757e-07, "logits/chosen": -2.6555652618408203, "logits/rejected": -2.193197011947632, "logps/chosen": -163.09716796875, "logps/rejected": -1338.634033203125, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -0.956673800945282, "rewards/margins": 11.981584548950195, "rewards/rejected": -12.938260078430176, "step": 14410 }, { "epoch": 0.86, "learning_rate": 2.9316870943912554e-07, "logits/chosen": -2.6932625770568848, "logits/rejected": -2.255809783935547, "logps/chosen": -193.12936401367188, "logps/rejected": -1331.60791015625, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": -1.1898857355117798, "rewards/margins": 11.672222137451172, "rewards/rejected": -12.86210823059082, "step": 14420 }, { "epoch": 0.86, "learning_rate": 2.9072838736570243e-07, "logits/chosen": -2.6873533725738525, "logits/rejected": -2.2109134197235107, "logps/chosen": -182.5050811767578, "logps/rejected": -1252.6083984375, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -1.1479988098144531, "rewards/margins": 10.920316696166992, "rewards/rejected": -12.068315505981445, "step": 14430 }, { "epoch": 0.86, "learning_rate": 2.8829763717914266e-07, "logits/chosen": -2.671875, "logits/rejected": -2.201972723007202, "logps/chosen": -163.68991088867188, "logps/rejected": -1191.8785400390625, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -0.9103431701660156, "rewards/margins": 10.550494194030762, "rewards/rejected": -11.460836410522461, "step": 14440 }, { "epoch": 0.86, "learning_rate": 2.8587646941091116e-07, "logits/chosen": -2.706421375274658, "logits/rejected": -2.1908249855041504, "logps/chosen": -162.16207885742188, "logps/rejected": -1268.368896484375, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -0.9255984425544739, "rewards/margins": 11.313652038574219, "rewards/rejected": -12.239252090454102, "step": 14450 }, { "epoch": 0.86, "learning_rate": 2.834648945509552e-07, "logits/chosen": -2.6842308044433594, "logits/rejected": -2.2188427448272705, "logps/chosen": -172.319580078125, "logps/rejected": -1205.335693359375, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -0.9818706512451172, "rewards/margins": 10.621633529663086, "rewards/rejected": -11.603503227233887, "step": 14460 }, { "epoch": 0.86, "learning_rate": 2.810629230476611e-07, "logits/chosen": -2.613492250442505, "logits/rejected": -2.1703238487243652, "logps/chosen": -179.2425079345703, "logps/rejected": -1238.242919921875, "loss": 0.0162, "rewards/accuracies": 1.0, "rewards/chosen": -1.0603941679000854, "rewards/margins": 10.867496490478516, "rewards/rejected": -11.927891731262207, "step": 14470 }, { "epoch": 0.86, "learning_rate": 2.786705653078062e-07, "logits/chosen": -2.6873011589050293, "logits/rejected": -2.2895147800445557, "logps/chosen": -157.36329650878906, "logps/rejected": -1272.4246826171875, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -0.9024655222892761, "rewards/margins": 11.387192726135254, "rewards/rejected": -12.289657592773438, "step": 14480 }, { "epoch": 0.86, "learning_rate": 2.76287831696517e-07, "logits/chosen": -2.686208724975586, "logits/rejected": -2.3067870140075684, "logps/chosen": -168.56027221679688, "logps/rejected": -1299.931396484375, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -0.9828259348869324, "rewards/margins": 11.578295707702637, "rewards/rejected": -12.561121940612793, "step": 14490 }, { "epoch": 0.86, "learning_rate": 2.7391473253722017e-07, "logits/chosen": -2.6310219764709473, "logits/rejected": -2.2018754482269287, "logps/chosen": -189.600830078125, "logps/rejected": -1253.774169921875, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -1.195866346359253, "rewards/margins": 10.903592109680176, "rewards/rejected": -12.099458694458008, "step": 14500 }, { "epoch": 0.87, "learning_rate": 2.7155127811160336e-07, "logits/chosen": -2.679464817047119, "logits/rejected": -2.1640472412109375, "logps/chosen": -157.51136779785156, "logps/rejected": -1292.6475830078125, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -0.8621135950088501, "rewards/margins": 11.614855766296387, "rewards/rejected": -12.476968765258789, "step": 14510 }, { "epoch": 0.87, "learning_rate": 2.6919747865956413e-07, "logits/chosen": -2.7105636596679688, "logits/rejected": -2.191953659057617, "logps/chosen": -165.73458862304688, "logps/rejected": -1390.4169921875, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -0.9154362678527832, "rewards/margins": 12.541097640991211, "rewards/rejected": -13.45653247833252, "step": 14520 }, { "epoch": 0.87, "learning_rate": 2.668533443791707e-07, "logits/chosen": -2.6745681762695312, "logits/rejected": -2.2801852226257324, "logps/chosen": -178.88168334960938, "logps/rejected": -1224.2813720703125, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": -1.053114414215088, "rewards/margins": 10.73914909362793, "rewards/rejected": -11.792263984680176, "step": 14530 }, { "epoch": 0.87, "learning_rate": 2.645188854266162e-07, "logits/chosen": -2.70839524269104, "logits/rejected": -2.3101611137390137, "logps/chosen": -179.01614379882812, "logps/rejected": -1197.5069580078125, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -1.0526068210601807, "rewards/margins": 10.463216781616211, "rewards/rejected": -11.515823364257812, "step": 14540 }, { "epoch": 0.87, "learning_rate": 2.621941119161739e-07, "logits/chosen": -2.6644093990325928, "logits/rejected": -2.1778931617736816, "logps/chosen": -160.2152862548828, "logps/rejected": -1215.9842529296875, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": -0.9512295722961426, "rewards/margins": 10.764131546020508, "rewards/rejected": -11.715360641479492, "step": 14550 }, { "epoch": 0.87, "learning_rate": 2.598790339201537e-07, "logits/chosen": -2.653076648712158, "logits/rejected": -2.2415120601654053, "logps/chosen": -207.11050415039062, "logps/rejected": -1230.3896484375, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": -1.4329458475112915, "rewards/margins": 10.431455612182617, "rewards/rejected": -11.864401817321777, "step": 14560 }, { "epoch": 0.87, "learning_rate": 2.575736614688595e-07, "logits/chosen": -2.6746716499328613, "logits/rejected": -2.2277660369873047, "logps/chosen": -190.6104736328125, "logps/rejected": -1247.4610595703125, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -1.2100681066513062, "rewards/margins": 10.824273109436035, "rewards/rejected": -12.034341812133789, "step": 14570 }, { "epoch": 0.87, "learning_rate": 2.552780045505446e-07, "logits/chosen": -2.6615734100341797, "logits/rejected": -2.192617893218994, "logps/chosen": -164.65646362304688, "logps/rejected": -1250.136474609375, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -0.9407480359077454, "rewards/margins": 11.12216854095459, "rewards/rejected": -12.062917709350586, "step": 14580 }, { "epoch": 0.87, "learning_rate": 2.529920731113672e-07, "logits/chosen": -2.6694424152374268, "logits/rejected": -2.240365743637085, "logps/chosen": -186.12057495117188, "logps/rejected": -1314.60107421875, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -1.1555207967758179, "rewards/margins": 11.549055099487305, "rewards/rejected": -12.704575538635254, "step": 14590 }, { "epoch": 0.87, "learning_rate": 2.507158770553528e-07, "logits/chosen": -2.6896824836730957, "logits/rejected": -2.152794122695923, "logps/chosen": -206.90377807617188, "logps/rejected": -1183.181884765625, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -1.2904070615768433, "rewards/margins": 10.098420143127441, "rewards/rejected": -11.388827323913574, "step": 14600 }, { "epoch": 0.87, "learning_rate": 2.484494262443429e-07, "logits/chosen": -2.669369697570801, "logits/rejected": -2.1714138984680176, "logps/chosen": -148.35142517089844, "logps/rejected": -1362.5677490234375, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -0.7646754384040833, "rewards/margins": 12.405680656433105, "rewards/rejected": -13.170356750488281, "step": 14610 }, { "epoch": 0.87, "learning_rate": 2.4619273049796e-07, "logits/chosen": -2.6571056842803955, "logits/rejected": -2.2819979190826416, "logps/chosen": -172.59158325195312, "logps/rejected": -1309.4736328125, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -1.0510157346725464, "rewards/margins": 11.601158142089844, "rewards/rejected": -12.652173042297363, "step": 14620 }, { "epoch": 0.87, "learning_rate": 2.439457995935604e-07, "logits/chosen": -2.6655564308166504, "logits/rejected": -2.193443775177002, "logps/chosen": -166.09619140625, "logps/rejected": -1301.968017578125, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -0.976818859577179, "rewards/margins": 11.59904956817627, "rewards/rejected": -12.575868606567383, "step": 14630 }, { "epoch": 0.87, "learning_rate": 2.417086432661939e-07, "logits/chosen": -2.676795482635498, "logits/rejected": -2.2105135917663574, "logps/chosen": -160.29208374023438, "logps/rejected": -1297.194580078125, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -0.863673985004425, "rewards/margins": 11.653975486755371, "rewards/rejected": -12.517648696899414, "step": 14640 }, { "epoch": 0.87, "learning_rate": 2.394812712085598e-07, "logits/chosen": -2.664961338043213, "logits/rejected": -2.32708740234375, "logps/chosen": -160.30398559570312, "logps/rejected": -1327.02734375, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -0.9610745310783386, "rewards/margins": 11.851113319396973, "rewards/rejected": -12.812185287475586, "step": 14650 }, { "epoch": 0.87, "learning_rate": 2.3726369307096765e-07, "logits/chosen": -2.665367841720581, "logits/rejected": -2.303982734680176, "logps/chosen": -185.8868865966797, "logps/rejected": -1194.243408203125, "loss": 0.0382, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -1.1416006088256836, "rewards/margins": 10.349721908569336, "rewards/rejected": -11.49132251739502, "step": 14660 }, { "epoch": 0.87, "learning_rate": 2.3505591846129356e-07, "logits/chosen": -2.67551851272583, "logits/rejected": -2.2528491020202637, "logps/chosen": -192.8577423095703, "logps/rejected": -1303.645263671875, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -1.2152811288833618, "rewards/margins": 11.37720012664795, "rewards/rejected": -12.59248161315918, "step": 14670 }, { "epoch": 0.88, "learning_rate": 2.3285795694493686e-07, "logits/chosen": -2.6500518321990967, "logits/rejected": -2.2434792518615723, "logps/chosen": -184.54811096191406, "logps/rejected": -1109.455810546875, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -1.110153079032898, "rewards/margins": 9.554444313049316, "rewards/rejected": -10.66459846496582, "step": 14680 }, { "epoch": 0.88, "learning_rate": 2.3066981804478416e-07, "logits/chosen": -2.622807741165161, "logits/rejected": -2.1578707695007324, "logps/chosen": -168.41107177734375, "logps/rejected": -1361.412841796875, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -1.0306992530822754, "rewards/margins": 12.140836715698242, "rewards/rejected": -13.171536445617676, "step": 14690 }, { "epoch": 0.88, "learning_rate": 2.2849151124116148e-07, "logits/chosen": -2.643070697784424, "logits/rejected": -2.2879068851470947, "logps/chosen": -182.92086791992188, "logps/rejected": -1253.0843505859375, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -1.120378851890564, "rewards/margins": 10.963888168334961, "rewards/rejected": -12.084267616271973, "step": 14700 }, { "epoch": 0.88, "learning_rate": 2.2632304597179827e-07, "logits/chosen": -2.6435441970825195, "logits/rejected": -2.167996644973755, "logps/chosen": -186.99679565429688, "logps/rejected": -1212.7271728515625, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -1.1710927486419678, "rewards/margins": 10.504181861877441, "rewards/rejected": -11.675273895263672, "step": 14710 }, { "epoch": 0.88, "learning_rate": 2.2416443163178342e-07, "logits/chosen": -2.6479573249816895, "logits/rejected": -2.1469974517822266, "logps/chosen": -167.46426391601562, "logps/rejected": -1331.291748046875, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -0.9423903226852417, "rewards/margins": 11.911943435668945, "rewards/rejected": -12.854333877563477, "step": 14720 }, { "epoch": 0.88, "learning_rate": 2.2201567757352631e-07, "logits/chosen": -2.6761555671691895, "logits/rejected": -2.2536380290985107, "logps/chosen": -175.158203125, "logps/rejected": -1269.8704833984375, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -1.0826818943023682, "rewards/margins": 11.166032791137695, "rewards/rejected": -12.248714447021484, "step": 14730 }, { "epoch": 0.88, "learning_rate": 2.1987679310671582e-07, "logits/chosen": -2.702472686767578, "logits/rejected": -2.3169729709625244, "logps/chosen": -167.0040283203125, "logps/rejected": -1179.12158203125, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -0.938464343547821, "rewards/margins": 10.411504745483398, "rewards/rejected": -11.349969863891602, "step": 14740 }, { "epoch": 0.88, "learning_rate": 2.1774778749827946e-07, "logits/chosen": -2.6422219276428223, "logits/rejected": -2.2057957649230957, "logps/chosen": -184.132080078125, "logps/rejected": -1134.3599853515625, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/chosen": -1.1921025514602661, "rewards/margins": 9.710203170776367, "rewards/rejected": -10.902303695678711, "step": 14750 }, { "epoch": 0.88, "learning_rate": 2.1562866997234421e-07, "logits/chosen": -2.6585116386413574, "logits/rejected": -2.2246639728546143, "logps/chosen": -177.4308319091797, "logps/rejected": -1236.3043212890625, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -1.1117875576019287, "rewards/margins": 10.802475929260254, "rewards/rejected": -11.914262771606445, "step": 14760 }, { "epoch": 0.88, "learning_rate": 2.1351944971019362e-07, "logits/chosen": -2.687551498413086, "logits/rejected": -2.2716846466064453, "logps/chosen": -175.306396484375, "logps/rejected": -1306.552978515625, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -1.029445767402649, "rewards/margins": 11.590786933898926, "rewards/rejected": -12.620233535766602, "step": 14770 }, { "epoch": 0.88, "learning_rate": 2.1142013585023464e-07, "logits/chosen": -2.633091688156128, "logits/rejected": -2.1895394325256348, "logps/chosen": -192.74295043945312, "logps/rejected": -1113.7147216796875, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -1.2129895687103271, "rewards/margins": 9.494071006774902, "rewards/rejected": -10.707061767578125, "step": 14780 }, { "epoch": 0.88, "learning_rate": 2.0933073748794996e-07, "logits/chosen": -2.716001272201538, "logits/rejected": -2.353024482727051, "logps/chosen": -208.6348419189453, "logps/rejected": -1199.489013671875, "loss": 0.0202, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -1.3700079917907715, "rewards/margins": 10.19678020477295, "rewards/rejected": -11.566787719726562, "step": 14790 }, { "epoch": 0.88, "learning_rate": 2.072512636758639e-07, "logits/chosen": -2.6699705123901367, "logits/rejected": -2.196615219116211, "logps/chosen": -187.41168212890625, "logps/rejected": -1271.893798828125, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -1.151033639907837, "rewards/margins": 11.120112419128418, "rewards/rejected": -12.271146774291992, "step": 14800 }, { "epoch": 0.88, "learning_rate": 2.051817234235015e-07, "logits/chosen": -2.6464037895202637, "logits/rejected": -2.190279722213745, "logps/chosen": -162.7576446533203, "logps/rejected": -1345.40673828125, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -0.9371393322944641, "rewards/margins": 12.077689170837402, "rewards/rejected": -13.0148286819458, "step": 14810 }, { "epoch": 0.88, "learning_rate": 2.0312212569735035e-07, "logits/chosen": -2.620392322540283, "logits/rejected": -2.186061382293701, "logps/chosen": -193.6808624267578, "logps/rejected": -1146.101806640625, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -1.2068160772323608, "rewards/margins": 9.815080642700195, "rewards/rejected": -11.021897315979004, "step": 14820 }, { "epoch": 0.88, "learning_rate": 2.0107247942081963e-07, "logits/chosen": -2.70564603805542, "logits/rejected": -2.2533860206604004, "logps/chosen": -169.4835968017578, "logps/rejected": -1190.15185546875, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -0.9564429521560669, "rewards/margins": 10.50793743133545, "rewards/rejected": -11.464380264282227, "step": 14830 }, { "epoch": 0.88, "learning_rate": 1.990327934742045e-07, "logits/chosen": -2.618466854095459, "logits/rejected": -2.245718002319336, "logps/chosen": -176.75430297851562, "logps/rejected": -1181.525146484375, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/chosen": -1.0724035501480103, "rewards/margins": 10.301519393920898, "rewards/rejected": -11.373924255371094, "step": 14840 }, { "epoch": 0.89, "learning_rate": 1.9700307669464515e-07, "logits/chosen": -2.627598762512207, "logits/rejected": -2.1568453311920166, "logps/chosen": -161.8828582763672, "logps/rejected": -1258.74267578125, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -0.9156071543693542, "rewards/margins": 11.22768497467041, "rewards/rejected": -12.143292427062988, "step": 14850 }, { "epoch": 0.89, "learning_rate": 1.949833378760882e-07, "logits/chosen": -2.6692910194396973, "logits/rejected": -2.1686606407165527, "logps/chosen": -169.31985473632812, "logps/rejected": -1167.5325927734375, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -0.9801681637763977, "rewards/margins": 10.247018814086914, "rewards/rejected": -11.22718620300293, "step": 14860 }, { "epoch": 0.89, "learning_rate": 1.92973585769253e-07, "logits/chosen": -2.695345401763916, "logits/rejected": -2.209428310394287, "logps/chosen": -190.50949096679688, "logps/rejected": -1161.2869873046875, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -1.144087791442871, "rewards/margins": 10.028764724731445, "rewards/rejected": -11.172852516174316, "step": 14870 }, { "epoch": 0.89, "learning_rate": 1.9097382908158713e-07, "logits/chosen": -2.6855318546295166, "logits/rejected": -2.2859859466552734, "logps/chosen": -168.3792724609375, "logps/rejected": -1155.4234619140625, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/chosen": -0.996250331401825, "rewards/margins": 10.118196487426758, "rewards/rejected": -11.114445686340332, "step": 14880 }, { "epoch": 0.89, "learning_rate": 1.8898407647723327e-07, "logits/chosen": -2.6655187606811523, "logits/rejected": -2.2823891639709473, "logps/chosen": -170.9694366455078, "logps/rejected": -1249.5794677734375, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -1.0004255771636963, "rewards/margins": 11.049158096313477, "rewards/rejected": -12.049583435058594, "step": 14890 }, { "epoch": 0.89, "learning_rate": 1.8700433657699162e-07, "logits/chosen": -2.673092842102051, "logits/rejected": -2.284445285797119, "logps/chosen": -179.8528289794922, "logps/rejected": -1253.5740966796875, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -1.0807421207427979, "rewards/margins": 11.017679214477539, "rewards/rejected": -12.098422050476074, "step": 14900 }, { "epoch": 0.89, "learning_rate": 1.8503461795827958e-07, "logits/chosen": -2.682305335998535, "logits/rejected": -2.2803447246551514, "logps/chosen": -182.93017578125, "logps/rejected": -1284.439697265625, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -1.1043970584869385, "rewards/margins": 11.29609489440918, "rewards/rejected": -12.400491714477539, "step": 14910 }, { "epoch": 0.89, "learning_rate": 1.8307492915509705e-07, "logits/chosen": -2.635374069213867, "logits/rejected": -2.2397847175598145, "logps/chosen": -180.3602752685547, "logps/rejected": -1274.7506103515625, "loss": 0.0267, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -1.045652985572815, "rewards/margins": 11.249390602111816, "rewards/rejected": -12.2950439453125, "step": 14920 }, { "epoch": 0.89, "learning_rate": 1.8112527865798896e-07, "logits/chosen": -2.6177165508270264, "logits/rejected": -2.2755227088928223, "logps/chosen": -188.5, "logps/rejected": -1290.842041015625, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -1.1717121601104736, "rewards/margins": 11.276228904724121, "rewards/rejected": -12.4479398727417, "step": 14930 }, { "epoch": 0.89, "learning_rate": 1.7918567491400862e-07, "logits/chosen": -2.626162528991699, "logits/rejected": -2.187166213989258, "logps/chosen": -179.59324645996094, "logps/rejected": -1198.760498046875, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -1.0399250984191895, "rewards/margins": 10.51546859741211, "rewards/rejected": -11.55539321899414, "step": 14940 }, { "epoch": 0.89, "learning_rate": 1.7725612632667895e-07, "logits/chosen": -2.627930164337158, "logits/rejected": -2.164045810699463, "logps/chosen": -177.1788787841797, "logps/rejected": -1292.7125244140625, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -1.0708956718444824, "rewards/margins": 11.406911849975586, "rewards/rejected": -12.477807998657227, "step": 14950 }, { "epoch": 0.89, "learning_rate": 1.7533664125596038e-07, "logits/chosen": -2.635820150375366, "logits/rejected": -2.2120137214660645, "logps/chosen": -170.9647979736328, "logps/rejected": -1227.5697021484375, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -1.0084832906723022, "rewards/margins": 10.832781791687012, "rewards/rejected": -11.841263771057129, "step": 14960 }, { "epoch": 0.89, "learning_rate": 1.7342722801821143e-07, "logits/chosen": -2.6447854042053223, "logits/rejected": -2.245664596557617, "logps/chosen": -163.8087158203125, "logps/rejected": -1210.20068359375, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -0.9513195753097534, "rewards/margins": 10.699905395507812, "rewards/rejected": -11.651225090026855, "step": 14970 }, { "epoch": 0.89, "learning_rate": 1.7152789488615124e-07, "logits/chosen": -2.64689302444458, "logits/rejected": -2.2711923122406006, "logps/chosen": -205.9303741455078, "logps/rejected": -1338.5269775390625, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -1.3396284580230713, "rewards/margins": 11.595454216003418, "rewards/rejected": -12.935083389282227, "step": 14980 }, { "epoch": 0.89, "learning_rate": 1.6963865008882975e-07, "logits/chosen": -2.685518741607666, "logits/rejected": -2.210488796234131, "logps/chosen": -182.5147705078125, "logps/rejected": -1331.3974609375, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -1.1285771131515503, "rewards/margins": 11.744684219360352, "rewards/rejected": -12.873260498046875, "step": 14990 }, { "epoch": 0.89, "learning_rate": 1.6775950181158462e-07, "logits/chosen": -2.654169797897339, "logits/rejected": -2.188753128051758, "logps/chosen": -165.80029296875, "logps/rejected": -1171.6915283203125, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -0.9859506487846375, "rewards/margins": 10.283140182495117, "rewards/rejected": -11.26909065246582, "step": 15000 }, { "epoch": 0.9, "learning_rate": 1.6589045819601134e-07, "logits/chosen": -2.603675603866577, "logits/rejected": -2.1540234088897705, "logps/chosen": -183.5207061767578, "logps/rejected": -1190.9810791015625, "loss": 0.0131, "rewards/accuracies": 1.0, "rewards/chosen": -1.1452476978302002, "rewards/margins": 10.333688735961914, "rewards/rejected": -11.478937149047852, "step": 15010 }, { "epoch": 0.9, "learning_rate": 1.640315273399254e-07, "logits/chosen": -2.6677446365356445, "logits/rejected": -2.1996638774871826, "logps/chosen": -174.00079345703125, "logps/rejected": -1314.3968505859375, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -1.0667635202407837, "rewards/margins": 11.641279220581055, "rewards/rejected": -12.708044052124023, "step": 15020 }, { "epoch": 0.9, "learning_rate": 1.621827172973281e-07, "logits/chosen": -2.646925449371338, "logits/rejected": -2.184150218963623, "logps/chosen": -169.05628967285156, "logps/rejected": -1187.2843017578125, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -0.9964550137519836, "rewards/margins": 10.43781852722168, "rewards/rejected": -11.434274673461914, "step": 15030 }, { "epoch": 0.9, "learning_rate": 1.603440360783709e-07, "logits/chosen": -2.6401660442352295, "logits/rejected": -2.189229726791382, "logps/chosen": -182.5482635498047, "logps/rejected": -1237.8343505859375, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -1.1609185934066772, "rewards/margins": 10.766833305358887, "rewards/rejected": -11.927752494812012, "step": 15040 }, { "epoch": 0.9, "learning_rate": 1.5851549164932118e-07, "logits/chosen": -2.67946457862854, "logits/rejected": -2.2476909160614014, "logps/chosen": -173.39321899414062, "logps/rejected": -1295.132080078125, "loss": 0.0101, "rewards/accuracies": 1.0, "rewards/chosen": -1.021991491317749, "rewards/margins": 11.490584373474121, "rewards/rejected": -12.512575149536133, "step": 15050 }, { "epoch": 0.9, "learning_rate": 1.5669709193252835e-07, "logits/chosen": -2.652005672454834, "logits/rejected": -2.239576816558838, "logps/chosen": -169.68539428710938, "logps/rejected": -1189.311279296875, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/chosen": -1.015600323677063, "rewards/margins": 10.432500839233398, "rewards/rejected": -11.448099136352539, "step": 15060 }, { "epoch": 0.9, "learning_rate": 1.5488884480638677e-07, "logits/chosen": -2.6556079387664795, "logits/rejected": -2.2443923950195312, "logps/chosen": -197.99832153320312, "logps/rejected": -1294.5548095703125, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -1.237754225730896, "rewards/margins": 11.259954452514648, "rewards/rejected": -12.497709274291992, "step": 15070 }, { "epoch": 0.9, "learning_rate": 1.5309075810530732e-07, "logits/chosen": -2.699147939682007, "logits/rejected": -2.2865710258483887, "logps/chosen": -154.24337768554688, "logps/rejected": -1166.4036865234375, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -0.848885715007782, "rewards/margins": 10.372926712036133, "rewards/rejected": -11.221811294555664, "step": 15080 }, { "epoch": 0.9, "learning_rate": 1.5130283961967614e-07, "logits/chosen": -2.6388185024261475, "logits/rejected": -2.1456522941589355, "logps/chosen": -147.41668701171875, "logps/rejected": -1251.6011962890625, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -0.782366156578064, "rewards/margins": 11.289183616638184, "rewards/rejected": -12.071551322937012, "step": 15090 }, { "epoch": 0.9, "learning_rate": 1.4952509709582673e-07, "logits/chosen": -2.671745777130127, "logits/rejected": -2.2714924812316895, "logps/chosen": -164.42474365234375, "logps/rejected": -1189.809814453125, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -0.96100252866745, "rewards/margins": 10.500677108764648, "rewards/rejected": -11.46168041229248, "step": 15100 }, { "epoch": 0.9, "learning_rate": 1.4775753823600359e-07, "logits/chosen": -2.6454641819000244, "logits/rejected": -2.2221553325653076, "logps/chosen": -185.87667846679688, "logps/rejected": -1236.68701171875, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -1.135982871055603, "rewards/margins": 10.797276496887207, "rewards/rejected": -11.933259963989258, "step": 15110 }, { "epoch": 0.9, "learning_rate": 1.460001706983294e-07, "logits/chosen": -2.657090187072754, "logits/rejected": -2.199199676513672, "logps/chosen": -168.357177734375, "logps/rejected": -1134.2022705078125, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -0.9889437556266785, "rewards/margins": 9.91118335723877, "rewards/rejected": -10.900126457214355, "step": 15120 }, { "epoch": 0.9, "learning_rate": 1.442530020967725e-07, "logits/chosen": -2.7151927947998047, "logits/rejected": -2.300499200820923, "logps/chosen": -169.01512145996094, "logps/rejected": -1328.2789306640625, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -0.988416850566864, "rewards/margins": 11.847478866577148, "rewards/rejected": -12.835896492004395, "step": 15130 }, { "epoch": 0.9, "learning_rate": 1.4251604000111275e-07, "logits/chosen": -2.636293411254883, "logits/rejected": -2.2017130851745605, "logps/chosen": -160.78887939453125, "logps/rejected": -1261.2135009765625, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -0.9082280993461609, "rewards/margins": 11.26390552520752, "rewards/rejected": -12.172134399414062, "step": 15140 }, { "epoch": 0.9, "learning_rate": 1.4078929193691e-07, "logits/chosen": -2.673943042755127, "logits/rejected": -2.202122211456299, "logps/chosen": -171.90130615234375, "logps/rejected": -1177.3507080078125, "loss": 0.0265, "rewards/accuracies": 1.0, "rewards/chosen": -0.9695454835891724, "rewards/margins": 10.366622924804688, "rewards/rejected": -11.33616828918457, "step": 15150 }, { "epoch": 0.9, "learning_rate": 1.3907276538546898e-07, "logits/chosen": -2.6691880226135254, "logits/rejected": -2.2728888988494873, "logps/chosen": -205.15542602539062, "logps/rejected": -1242.2779541015625, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -1.3748595714569092, "rewards/margins": 10.599506378173828, "rewards/rejected": -11.974365234375, "step": 15160 }, { "epoch": 0.9, "learning_rate": 1.3736646778381159e-07, "logits/chosen": -2.711009979248047, "logits/rejected": -2.2541840076446533, "logps/chosen": -189.1422882080078, "logps/rejected": -1210.8173828125, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -1.2040817737579346, "rewards/margins": 10.449132919311523, "rewards/rejected": -11.653213500976562, "step": 15170 }, { "epoch": 0.91, "learning_rate": 1.3567040652463946e-07, "logits/chosen": -2.668684244155884, "logits/rejected": -2.238323450088501, "logps/chosen": -168.61758422851562, "logps/rejected": -1393.0931396484375, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -0.9762320518493652, "rewards/margins": 12.500678062438965, "rewards/rejected": -13.476910591125488, "step": 15180 }, { "epoch": 0.91, "learning_rate": 1.339845889563049e-07, "logits/chosen": -2.6397979259490967, "logits/rejected": -2.2097837924957275, "logps/chosen": -149.66696166992188, "logps/rejected": -1432.6214599609375, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -0.8102267980575562, "rewards/margins": 13.069025039672852, "rewards/rejected": -13.879251480102539, "step": 15190 }, { "epoch": 0.91, "learning_rate": 1.3230902238277887e-07, "logits/chosen": -2.686516761779785, "logits/rejected": -2.2522130012512207, "logps/chosen": -171.831298828125, "logps/rejected": -1325.211181640625, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -1.0223077535629272, "rewards/margins": 11.766373634338379, "rewards/rejected": -12.788681030273438, "step": 15200 }, { "epoch": 0.91, "learning_rate": 1.3064371406361854e-07, "logits/chosen": -2.6649279594421387, "logits/rejected": -2.2388997077941895, "logps/chosen": -164.57571411132812, "logps/rejected": -1251.55712890625, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -0.9790751338005066, "rewards/margins": 11.08824348449707, "rewards/rejected": -12.0673189163208, "step": 15210 }, { "epoch": 0.91, "learning_rate": 1.2898867121393627e-07, "logits/chosen": -2.571096181869507, "logits/rejected": -2.145815134048462, "logps/chosen": -186.50498962402344, "logps/rejected": -1170.244873046875, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -1.2005667686462402, "rewards/margins": 10.064101219177246, "rewards/rejected": -11.264668464660645, "step": 15220 }, { "epoch": 0.91, "learning_rate": 1.273439010043681e-07, "logits/chosen": -2.6600730419158936, "logits/rejected": -2.209486961364746, "logps/chosen": -160.07379150390625, "logps/rejected": -1236.22216796875, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -0.8968342542648315, "rewards/margins": 11.02524185180664, "rewards/rejected": -11.922076225280762, "step": 15230 }, { "epoch": 0.91, "learning_rate": 1.2570941056104348e-07, "logits/chosen": -2.6403045654296875, "logits/rejected": -2.1852526664733887, "logps/chosen": -171.4044189453125, "logps/rejected": -1165.039306640625, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -1.0091670751571655, "rewards/margins": 10.193971633911133, "rewards/rejected": -11.20313835144043, "step": 15240 }, { "epoch": 0.91, "learning_rate": 1.2408520696555183e-07, "logits/chosen": -2.7019858360290527, "logits/rejected": -2.315781354904175, "logps/chosen": -184.33883666992188, "logps/rejected": -1244.1417236328125, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -1.0852398872375488, "rewards/margins": 10.913727760314941, "rewards/rejected": -11.998968124389648, "step": 15250 }, { "epoch": 0.91, "learning_rate": 1.224712972549172e-07, "logits/chosen": -2.6305789947509766, "logits/rejected": -2.266334056854248, "logps/chosen": -167.66064453125, "logps/rejected": -1269.989501953125, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -0.9983094334602356, "rewards/margins": 11.261682510375977, "rewards/rejected": -12.259991645812988, "step": 15260 }, { "epoch": 0.91, "learning_rate": 1.2086768842156065e-07, "logits/chosen": -2.675300121307373, "logits/rejected": -2.286644697189331, "logps/chosen": -174.8889923095703, "logps/rejected": -1235.01171875, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -1.015540599822998, "rewards/margins": 10.896650314331055, "rewards/rejected": -11.912191390991211, "step": 15270 }, { "epoch": 0.91, "learning_rate": 1.1927438741327652e-07, "logits/chosen": -2.6511597633361816, "logits/rejected": -2.2300453186035156, "logps/chosen": -174.6057586669922, "logps/rejected": -1153.0677490234375, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -1.0353620052337646, "rewards/margins": 10.062275886535645, "rewards/rejected": -11.097638130187988, "step": 15280 }, { "epoch": 0.91, "learning_rate": 1.1769140113319755e-07, "logits/chosen": -2.7141215801239014, "logits/rejected": -2.2491402626037598, "logps/chosen": -169.5233154296875, "logps/rejected": -1164.0906982421875, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -0.9601899981498718, "rewards/margins": 10.25045394897461, "rewards/rejected": -11.210644721984863, "step": 15290 }, { "epoch": 0.91, "learning_rate": 1.1611873643976839e-07, "logits/chosen": -2.6771621704101562, "logits/rejected": -2.2743866443634033, "logps/chosen": -158.3846435546875, "logps/rejected": -1224.721923828125, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -0.8705946207046509, "rewards/margins": 10.940997123718262, "rewards/rejected": -11.811592102050781, "step": 15300 }, { "epoch": 0.91, "learning_rate": 1.145564001467131e-07, "logits/chosen": -2.697965145111084, "logits/rejected": -2.2223258018493652, "logps/chosen": -181.2537384033203, "logps/rejected": -1376.65966796875, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -1.0719373226165771, "rewards/margins": 12.260189056396484, "rewards/rejected": -13.332125663757324, "step": 15310 }, { "epoch": 0.91, "learning_rate": 1.1300439902300814e-07, "logits/chosen": -2.6815478801727295, "logits/rejected": -2.2120895385742188, "logps/chosen": -177.8232879638672, "logps/rejected": -1183.864013671875, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -1.0381433963775635, "rewards/margins": 10.360038757324219, "rewards/rejected": -11.398181915283203, "step": 15320 }, { "epoch": 0.91, "learning_rate": 1.1146273979285138e-07, "logits/chosen": -2.655674695968628, "logits/rejected": -2.249160051345825, "logps/chosen": -201.2614288330078, "logps/rejected": -1120.4149169921875, "loss": 0.0098, "rewards/accuracies": 1.0, "rewards/chosen": -1.3674007654190063, "rewards/margins": 9.404138565063477, "rewards/rejected": -10.771540641784668, "step": 15330 }, { "epoch": 0.91, "learning_rate": 1.0993142913563209e-07, "logits/chosen": -2.6248021125793457, "logits/rejected": -2.0581984519958496, "logps/chosen": -179.18348693847656, "logps/rejected": -1243.94287109375, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -1.133216142654419, "rewards/margins": 10.868388175964355, "rewards/rejected": -12.001604080200195, "step": 15340 }, { "epoch": 0.92, "learning_rate": 1.0841047368590596e-07, "logits/chosen": -2.6652657985687256, "logits/rejected": -2.197152614593506, "logps/chosen": -171.3666229248047, "logps/rejected": -1206.4525146484375, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -1.0100961923599243, "rewards/margins": 10.606683731079102, "rewards/rejected": -11.616779327392578, "step": 15350 }, { "epoch": 0.92, "learning_rate": 1.0689988003336121e-07, "logits/chosen": -2.688142776489258, "logits/rejected": -2.2145469188690186, "logps/chosen": -178.90750122070312, "logps/rejected": -1156.3896484375, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": -1.052981972694397, "rewards/margins": 10.077046394348145, "rewards/rejected": -11.130027770996094, "step": 15360 }, { "epoch": 0.92, "learning_rate": 1.0539965472279424e-07, "logits/chosen": -2.648425579071045, "logits/rejected": -2.1206278800964355, "logps/chosen": -165.90655517578125, "logps/rejected": -1135.87646484375, "loss": 0.011, "rewards/accuracies": 1.0, "rewards/chosen": -0.9682438969612122, "rewards/margins": 9.959437370300293, "rewards/rejected": -10.927680969238281, "step": 15370 }, { "epoch": 0.92, "learning_rate": 1.039098042540787e-07, "logits/chosen": -2.697800397872925, "logits/rejected": -2.2975144386291504, "logps/chosen": -182.51307678222656, "logps/rejected": -1195.71435546875, "loss": 0.0098, "rewards/accuracies": 1.0, "rewards/chosen": -1.0929452180862427, "rewards/margins": 10.417900085449219, "rewards/rejected": -11.510844230651855, "step": 15380 }, { "epoch": 0.92, "learning_rate": 1.0243033508213873e-07, "logits/chosen": -2.644886016845703, "logits/rejected": -2.1226062774658203, "logps/chosen": -163.13980102539062, "logps/rejected": -1188.9097900390625, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/chosen": -0.9173575639724731, "rewards/margins": 10.52003002166748, "rewards/rejected": -11.43738842010498, "step": 15390 }, { "epoch": 0.92, "learning_rate": 1.0096125361691993e-07, "logits/chosen": -2.666822671890259, "logits/rejected": -2.284858226776123, "logps/chosen": -181.47325134277344, "logps/rejected": -1255.91552734375, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -1.0732786655426025, "rewards/margins": 11.037242889404297, "rewards/rejected": -12.11052131652832, "step": 15400 }, { "epoch": 0.92, "learning_rate": 9.950256622336258e-08, "logits/chosen": -2.6804606914520264, "logits/rejected": -2.273733615875244, "logps/chosen": -165.30763244628906, "logps/rejected": -1225.792236328125, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -0.968244194984436, "rewards/margins": 10.85100269317627, "rewards/rejected": -11.819246292114258, "step": 15410 }, { "epoch": 0.92, "learning_rate": 9.805427922137373e-08, "logits/chosen": -2.66705060005188, "logits/rejected": -2.1891260147094727, "logps/chosen": -173.49383544921875, "logps/rejected": -1332.0062255859375, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -1.030502200126648, "rewards/margins": 11.832608222961426, "rewards/rejected": -12.863110542297363, "step": 15420 }, { "epoch": 0.92, "learning_rate": 9.661639888579877e-08, "logits/chosen": -2.6146352291107178, "logits/rejected": -2.1669552326202393, "logps/chosen": -169.9706268310547, "logps/rejected": -1221.0625, "loss": 0.0097, "rewards/accuracies": 1.0, "rewards/chosen": -1.1017240285873413, "rewards/margins": 10.672877311706543, "rewards/rejected": -11.7746000289917, "step": 15430 }, { "epoch": 0.92, "learning_rate": 9.51889314463969e-08, "logits/chosen": -2.71271014213562, "logits/rejected": -2.2638492584228516, "logps/chosen": -178.9517059326172, "logps/rejected": -1188.2783203125, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -1.0832500457763672, "rewards/margins": 10.332706451416016, "rewards/rejected": -11.415956497192383, "step": 15440 }, { "epoch": 0.92, "learning_rate": 9.377188308781038e-08, "logits/chosen": -2.6629462242126465, "logits/rejected": -2.190911054611206, "logps/chosen": -184.59182739257812, "logps/rejected": -1255.3714599609375, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": -1.1611316204071045, "rewards/margins": 10.942505836486816, "rewards/rejected": -12.1036376953125, "step": 15450 }, { "epoch": 0.92, "learning_rate": 9.236525994954142e-08, "logits/chosen": -2.66231632232666, "logits/rejected": -2.2185728549957275, "logps/chosen": -167.41738891601562, "logps/rejected": -1202.1636962890625, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -0.9807716608047485, "rewards/margins": 10.598533630371094, "rewards/rejected": -11.579305648803711, "step": 15460 }, { "epoch": 0.92, "learning_rate": 9.096906812592315e-08, "logits/chosen": -2.6808948516845703, "logits/rejected": -2.2528421878814697, "logps/chosen": -156.71408081054688, "logps/rejected": -1257.4429931640625, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -0.9337954521179199, "rewards/margins": 11.1892671585083, "rewards/rejected": -12.123063087463379, "step": 15470 }, { "epoch": 0.92, "learning_rate": 8.958331366609424e-08, "logits/chosen": -2.6562304496765137, "logits/rejected": -2.2269248962402344, "logps/chosen": -193.720947265625, "logps/rejected": -1293.5947265625, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -1.2302790880203247, "rewards/margins": 11.262996673583984, "rewards/rejected": -12.49327564239502, "step": 15480 }, { "epoch": 0.92, "learning_rate": 8.820800257397205e-08, "logits/chosen": -2.657881498336792, "logits/rejected": -2.233145236968994, "logps/chosen": -166.1521453857422, "logps/rejected": -1257.880126953125, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -0.9509795308113098, "rewards/margins": 11.179496765136719, "rewards/rejected": -12.130475997924805, "step": 15490 }, { "epoch": 0.92, "learning_rate": 8.684314080822764e-08, "logits/chosen": -2.670079469680786, "logits/rejected": -2.2392404079437256, "logps/chosen": -183.1615447998047, "logps/rejected": -1324.6661376953125, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -1.120317816734314, "rewards/margins": 11.68382453918457, "rewards/rejected": -12.8041410446167, "step": 15500 }, { "epoch": 0.92, "learning_rate": 8.54887342822594e-08, "logits/chosen": -2.6902074813842773, "logits/rejected": -2.2246272563934326, "logps/chosen": -176.72415161132812, "logps/rejected": -1282.885009765625, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -1.0135183334350586, "rewards/margins": 11.368112564086914, "rewards/rejected": -12.381631851196289, "step": 15510 }, { "epoch": 0.93, "learning_rate": 8.414478886416611e-08, "logits/chosen": -2.6867082118988037, "logits/rejected": -2.2736759185791016, "logps/chosen": -159.37124633789062, "logps/rejected": -1182.3377685546875, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -0.9226862788200378, "rewards/margins": 10.444775581359863, "rewards/rejected": -11.367461204528809, "step": 15520 }, { "epoch": 0.93, "learning_rate": 8.281131037672474e-08, "logits/chosen": -2.671300172805786, "logits/rejected": -2.2463297843933105, "logps/chosen": -195.54315185546875, "logps/rejected": -1259.5648193359375, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -1.2373530864715576, "rewards/margins": 10.92322063446045, "rewards/rejected": -12.16057300567627, "step": 15530 }, { "epoch": 0.93, "learning_rate": 8.148830459736106e-08, "logits/chosen": -2.653449058532715, "logits/rejected": -2.24001145362854, "logps/chosen": -167.10552978515625, "logps/rejected": -1317.7305908203125, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -0.9326269030570984, "rewards/margins": 11.81169319152832, "rewards/rejected": -12.7443208694458, "step": 15540 }, { "epoch": 0.93, "learning_rate": 8.017577725812825e-08, "logits/chosen": -2.6509127616882324, "logits/rejected": -2.250549793243408, "logps/chosen": -178.0433349609375, "logps/rejected": -1138.2529296875, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -1.051513910293579, "rewards/margins": 9.897701263427734, "rewards/rejected": -10.949213027954102, "step": 15550 }, { "epoch": 0.93, "learning_rate": 7.887373404568133e-08, "logits/chosen": -2.650705575942993, "logits/rejected": -2.1456823348999023, "logps/chosen": -184.0078887939453, "logps/rejected": -1261.8377685546875, "loss": 0.0292, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -1.1063464879989624, "rewards/margins": 11.06517505645752, "rewards/rejected": -12.17152214050293, "step": 15560 }, { "epoch": 0.93, "learning_rate": 7.758218060124916e-08, "logits/chosen": -2.6347453594207764, "logits/rejected": -2.2446770668029785, "logps/chosen": -216.38247680664062, "logps/rejected": -1281.050048828125, "loss": 0.0514, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -1.415852427482605, "rewards/margins": 10.954060554504395, "rewards/rejected": -12.369911193847656, "step": 15570 }, { "epoch": 0.93, "learning_rate": 7.630112252061534e-08, "logits/chosen": -2.6698391437530518, "logits/rejected": -2.2567455768585205, "logps/chosen": -181.057373046875, "logps/rejected": -1319.342041015625, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -1.120937705039978, "rewards/margins": 11.62074089050293, "rewards/rejected": -12.741678237915039, "step": 15580 }, { "epoch": 0.93, "learning_rate": 7.503056535408975e-08, "logits/chosen": -2.6975629329681396, "logits/rejected": -2.2926926612854004, "logps/chosen": -180.2107696533203, "logps/rejected": -1263.228515625, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -1.0885566473007202, "rewards/margins": 11.096248626708984, "rewards/rejected": -12.184804916381836, "step": 15590 }, { "epoch": 0.93, "learning_rate": 7.377051460648682e-08, "logits/chosen": -2.6085667610168457, "logits/rejected": -2.229710102081299, "logps/chosen": -170.35891723632812, "logps/rejected": -1316.247314453125, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": -1.012973427772522, "rewards/margins": 11.701685905456543, "rewards/rejected": -12.714659690856934, "step": 15600 }, { "epoch": 0.93, "learning_rate": 7.252097573709982e-08, "logits/chosen": -2.707313060760498, "logits/rejected": -2.2668449878692627, "logps/chosen": -165.257080078125, "logps/rejected": -1292.935546875, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -0.9548360705375671, "rewards/margins": 11.541874885559082, "rewards/rejected": -12.496709823608398, "step": 15610 }, { "epoch": 0.93, "learning_rate": 7.128195415967987e-08, "logits/chosen": -2.6687378883361816, "logits/rejected": -2.3147215843200684, "logps/chosen": -178.81834411621094, "logps/rejected": -1303.671142578125, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -1.096724510192871, "rewards/margins": 11.488851547241211, "rewards/rejected": -12.585575103759766, "step": 15620 }, { "epoch": 0.93, "learning_rate": 7.005345524240926e-08, "logits/chosen": -2.6833956241607666, "logits/rejected": -2.1427695751190186, "logps/chosen": -178.79551696777344, "logps/rejected": -1276.1961669921875, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -1.0346635580062866, "rewards/margins": 11.280159950256348, "rewards/rejected": -12.314825057983398, "step": 15630 }, { "epoch": 0.93, "learning_rate": 6.883548430788062e-08, "logits/chosen": -2.6569366455078125, "logits/rejected": -2.2688584327697754, "logps/chosen": -173.58758544921875, "logps/rejected": -1221.869384765625, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -1.0539261102676392, "rewards/margins": 10.717350959777832, "rewards/rejected": -11.77127742767334, "step": 15640 }, { "epoch": 0.93, "learning_rate": 6.762804663307365e-08, "logits/chosen": -2.6584842205047607, "logits/rejected": -2.195120334625244, "logps/chosen": -184.82872009277344, "logps/rejected": -1309.2528076171875, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -1.0888469219207764, "rewards/margins": 11.556825637817383, "rewards/rejected": -12.645673751831055, "step": 15650 }, { "epoch": 0.93, "learning_rate": 6.643114744933038e-08, "logits/chosen": -2.6389527320861816, "logits/rejected": -2.224365711212158, "logps/chosen": -171.68350219726562, "logps/rejected": -1278.6298828125, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -1.0224170684814453, "rewards/margins": 11.32701301574707, "rewards/rejected": -12.3494291305542, "step": 15660 }, { "epoch": 0.93, "learning_rate": 6.524479194233463e-08, "logits/chosen": -2.7615578174591064, "logits/rejected": -2.2572968006134033, "logps/chosen": -185.48269653320312, "logps/rejected": -1336.146240234375, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -1.1167644262313843, "rewards/margins": 11.79128360748291, "rewards/rejected": -12.908047676086426, "step": 15670 }, { "epoch": 0.94, "learning_rate": 6.406898525208843e-08, "logits/chosen": -2.623728036880493, "logits/rejected": -2.1538612842559814, "logps/chosen": -166.7744598388672, "logps/rejected": -1292.7698974609375, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -0.9642624855041504, "rewards/margins": 11.528935432434082, "rewards/rejected": -12.49319839477539, "step": 15680 }, { "epoch": 0.94, "learning_rate": 6.290373247289012e-08, "logits/chosen": -2.649784564971924, "logits/rejected": -2.1965155601501465, "logps/chosen": -180.64471435546875, "logps/rejected": -1309.1988525390625, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -1.1040668487548828, "rewards/margins": 11.546464920043945, "rewards/rejected": -12.650530815124512, "step": 15690 }, { "epoch": 0.94, "learning_rate": 6.174903865331177e-08, "logits/chosen": -2.701651096343994, "logits/rejected": -2.231921672821045, "logps/chosen": -171.8490447998047, "logps/rejected": -1340.428466796875, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -0.992447018623352, "rewards/margins": 11.97745418548584, "rewards/rejected": -12.969900131225586, "step": 15700 }, { "epoch": 0.94, "learning_rate": 6.060490879617853e-08, "logits/chosen": -2.6515955924987793, "logits/rejected": -2.2553226947784424, "logps/chosen": -164.45840454101562, "logps/rejected": -1203.866455078125, "loss": 0.0168, "rewards/accuracies": 1.0, "rewards/chosen": -0.9104216694831848, "rewards/margins": 10.70018196105957, "rewards/rejected": -11.610605239868164, "step": 15710 }, { "epoch": 0.94, "learning_rate": 5.947134785854597e-08, "logits/chosen": -2.642116069793701, "logits/rejected": -2.2558083534240723, "logps/chosen": -168.17141723632812, "logps/rejected": -1221.61279296875, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -0.9918482899665833, "rewards/margins": 10.772356033325195, "rewards/rejected": -11.764204025268555, "step": 15720 }, { "epoch": 0.94, "learning_rate": 5.8348360751677435e-08, "logits/chosen": -2.644047498703003, "logits/rejected": -2.196828603744507, "logps/chosen": -172.4115447998047, "logps/rejected": -1205.8841552734375, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -0.9714924097061157, "rewards/margins": 10.647869110107422, "rewards/rejected": -11.619361877441406, "step": 15730 }, { "epoch": 0.94, "learning_rate": 5.7235952341026524e-08, "logits/chosen": -2.675264596939087, "logits/rejected": -2.2065622806549072, "logps/chosen": -179.26968383789062, "logps/rejected": -1173.582763671875, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -1.0718376636505127, "rewards/margins": 10.227112770080566, "rewards/rejected": -11.298949241638184, "step": 15740 }, { "epoch": 0.94, "learning_rate": 5.6134127446211275e-08, "logits/chosen": -2.6677443981170654, "logits/rejected": -2.267427444458008, "logps/chosen": -163.35110473632812, "logps/rejected": -1212.1041259765625, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -0.9990060925483704, "rewards/margins": 10.684392929077148, "rewards/rejected": -11.68339729309082, "step": 15750 }, { "epoch": 0.94, "learning_rate": 5.5042890840996676e-08, "logits/chosen": -2.7018380165100098, "logits/rejected": -2.2311275005340576, "logps/chosen": -178.52374267578125, "logps/rejected": -1192.3687744140625, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/chosen": -1.1306133270263672, "rewards/margins": 10.351810455322266, "rewards/rejected": -11.482422828674316, "step": 15760 }, { "epoch": 0.94, "learning_rate": 5.3962247253273035e-08, "logits/chosen": -2.673051118850708, "logits/rejected": -2.1973841190338135, "logps/chosen": -228.45956420898438, "logps/rejected": -1167.745849609375, "loss": 0.0206, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -1.564034342765808, "rewards/margins": 9.680739402770996, "rewards/rejected": -11.24477481842041, "step": 15770 }, { "epoch": 0.94, "learning_rate": 5.2892201365035144e-08, "logits/chosen": -2.6698529720306396, "logits/rejected": -2.263176202774048, "logps/chosen": -178.38954162597656, "logps/rejected": -1230.742431640625, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -1.1097861528396606, "rewards/margins": 10.761785507202148, "rewards/rejected": -11.871572494506836, "step": 15780 }, { "epoch": 0.94, "learning_rate": 5.18327578123623e-08, "logits/chosen": -2.6756222248077393, "logits/rejected": -2.244558334350586, "logps/chosen": -159.38980102539062, "logps/rejected": -1191.995361328125, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -0.8802758455276489, "rewards/margins": 10.607393264770508, "rewards/rejected": -11.487669944763184, "step": 15790 }, { "epoch": 0.94, "learning_rate": 5.078392118539777e-08, "logits/chosen": -2.6724026203155518, "logits/rejected": -2.255080461502075, "logps/chosen": -188.29446411132812, "logps/rejected": -1205.693115234375, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -1.1916935443878174, "rewards/margins": 10.421567916870117, "rewards/rejected": -11.613263130187988, "step": 15800 }, { "epoch": 0.94, "learning_rate": 4.974569602832991e-08, "logits/chosen": -2.6580288410186768, "logits/rejected": -2.2031607627868652, "logps/chosen": -172.54359436035156, "logps/rejected": -1260.949951171875, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -1.063873052597046, "rewards/margins": 11.100789070129395, "rewards/rejected": -12.16466236114502, "step": 15810 }, { "epoch": 0.94, "learning_rate": 4.8718086839370794e-08, "logits/chosen": -2.673914670944214, "logits/rejected": -2.2175183296203613, "logps/chosen": -175.0134735107422, "logps/rejected": -1311.15771484375, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -1.0596094131469727, "rewards/margins": 11.605969429016113, "rewards/rejected": -12.66557788848877, "step": 15820 }, { "epoch": 0.94, "learning_rate": 4.7701098070739304e-08, "logits/chosen": -2.6465909481048584, "logits/rejected": -2.2616775035858154, "logps/chosen": -174.7118377685547, "logps/rejected": -1199.425537109375, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": -1.057864785194397, "rewards/margins": 10.501089096069336, "rewards/rejected": -11.558954238891602, "step": 15830 }, { "epoch": 0.94, "learning_rate": 4.66947341286389e-08, "logits/chosen": -2.6740498542785645, "logits/rejected": -2.178858518600464, "logps/chosen": -163.44229125976562, "logps/rejected": -1386.80908203125, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -0.9184477925300598, "rewards/margins": 12.485325813293457, "rewards/rejected": -13.403773307800293, "step": 15840 }, { "epoch": 0.95, "learning_rate": 4.5698999373240404e-08, "logits/chosen": -2.6467292308807373, "logits/rejected": -2.2230963706970215, "logps/chosen": -184.9004364013672, "logps/rejected": -1163.9185791015625, "loss": 0.0116, "rewards/accuracies": 1.0, "rewards/chosen": -1.1377449035644531, "rewards/margins": 10.049426078796387, "rewards/rejected": -11.18717098236084, "step": 15850 }, { "epoch": 0.95, "learning_rate": 4.471389811866289e-08, "logits/chosen": -2.6821835041046143, "logits/rejected": -2.218871593475342, "logps/chosen": -180.2025146484375, "logps/rejected": -1147.4190673828125, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -1.142691969871521, "rewards/margins": 9.896963119506836, "rewards/rejected": -11.039654731750488, "step": 15860 }, { "epoch": 0.95, "learning_rate": 4.373943463295477e-08, "logits/chosen": -2.6431102752685547, "logits/rejected": -2.2247185707092285, "logps/chosen": -161.41119384765625, "logps/rejected": -1239.714111328125, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -0.9377598762512207, "rewards/margins": 11.023923873901367, "rewards/rejected": -11.961685180664062, "step": 15870 }, { "epoch": 0.95, "learning_rate": 4.277561313807493e-08, "logits/chosen": -2.6661829948425293, "logits/rejected": -2.195704221725464, "logps/chosen": -176.55194091796875, "logps/rejected": -1253.5799560546875, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -1.0512021780014038, "rewards/margins": 11.044206619262695, "rewards/rejected": -12.095407485961914, "step": 15880 }, { "epoch": 0.95, "learning_rate": 4.1822437809874994e-08, "logits/chosen": -2.657374143600464, "logits/rejected": -2.2294418811798096, "logps/chosen": -179.5014190673828, "logps/rejected": -1220.29931640625, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -1.1006819009780884, "rewards/margins": 10.661455154418945, "rewards/rejected": -11.762137413024902, "step": 15890 }, { "epoch": 0.95, "learning_rate": 4.0879912778080956e-08, "logits/chosen": -2.6456456184387207, "logits/rejected": -2.2596583366394043, "logps/chosen": -181.09066772460938, "logps/rejected": -1146.300537109375, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -1.1637568473815918, "rewards/margins": 9.87397575378418, "rewards/rejected": -11.037734031677246, "step": 15900 }, { "epoch": 0.95, "learning_rate": 3.994804212627462e-08, "logits/chosen": -2.651865005493164, "logits/rejected": -2.2323803901672363, "logps/chosen": -175.99172973632812, "logps/rejected": -1316.3236083984375, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -1.0354217290878296, "rewards/margins": 11.680215835571289, "rewards/rejected": -12.71563720703125, "step": 15910 }, { "epoch": 0.95, "learning_rate": 3.902682989187889e-08, "logits/chosen": -2.6795008182525635, "logits/rejected": -2.213805675506592, "logps/chosen": -160.47288513183594, "logps/rejected": -1261.33642578125, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -0.8807178735733032, "rewards/margins": 11.292486190795898, "rewards/rejected": -12.17320442199707, "step": 15920 }, { "epoch": 0.95, "learning_rate": 3.8116280066134994e-08, "logits/chosen": -2.6878819465637207, "logits/rejected": -2.2324771881103516, "logps/chosen": -162.35952758789062, "logps/rejected": -1327.2325439453125, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -0.9276419878005981, "rewards/margins": 11.898499488830566, "rewards/rejected": -12.826141357421875, "step": 15930 }, { "epoch": 0.95, "learning_rate": 3.721639659409054e-08, "logits/chosen": -2.6434741020202637, "logits/rejected": -2.2681822776794434, "logps/chosen": -172.68704223632812, "logps/rejected": -1200.7523193359375, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": -1.0266098976135254, "rewards/margins": 10.549015998840332, "rewards/rejected": -11.5756254196167, "step": 15940 }, { "epoch": 0.95, "learning_rate": 3.63271833745793e-08, "logits/chosen": -2.6975810527801514, "logits/rejected": -2.238269329071045, "logps/chosen": -161.9984588623047, "logps/rejected": -1281.224853515625, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -0.8777400255203247, "rewards/margins": 11.489442825317383, "rewards/rejected": -12.367182731628418, "step": 15950 }, { "epoch": 0.95, "learning_rate": 3.544864426020478e-08, "logits/chosen": -2.676342487335205, "logits/rejected": -2.294611692428589, "logps/chosen": -176.2101287841797, "logps/rejected": -1235.3106689453125, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -1.0305867195129395, "rewards/margins": 10.878564834594727, "rewards/rejected": -11.909151077270508, "step": 15960 }, { "epoch": 0.95, "learning_rate": 3.4580783057324706e-08, "logits/chosen": -2.6921143531799316, "logits/rejected": -2.222726821899414, "logps/chosen": -186.8076171875, "logps/rejected": -1137.28564453125, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -1.130562663078308, "rewards/margins": 9.798645973205566, "rewards/rejected": -10.929207801818848, "step": 15970 }, { "epoch": 0.95, "learning_rate": 3.3723603526032435e-08, "logits/chosen": -2.639613151550293, "logits/rejected": -2.164780616760254, "logps/chosen": -178.20245361328125, "logps/rejected": -1183.5435791015625, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -1.0918643474578857, "rewards/margins": 10.308358192443848, "rewards/rejected": -11.400223731994629, "step": 15980 }, { "epoch": 0.95, "learning_rate": 3.2877109380143604e-08, "logits/chosen": -2.694697380065918, "logits/rejected": -2.214928150177002, "logps/chosen": -181.5031280517578, "logps/rejected": -1273.8643798828125, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -1.1214652061462402, "rewards/margins": 11.179290771484375, "rewards/rejected": -12.300756454467773, "step": 15990 }, { "epoch": 0.95, "learning_rate": 3.204130428717672e-08, "logits/chosen": -2.70440411567688, "logits/rejected": -2.1867878437042236, "logps/chosen": -180.6644744873047, "logps/rejected": -1208.783203125, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -1.0519477128982544, "rewards/margins": 10.596542358398438, "rewards/rejected": -11.648488998413086, "step": 16000 }, { "epoch": 0.95, "learning_rate": 3.121619186834041e-08, "logits/chosen": -2.6858105659484863, "logits/rejected": -2.246583938598633, "logps/chosen": -205.57125854492188, "logps/rejected": -1266.3936767578125, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -1.3817111253738403, "rewards/margins": 10.831425666809082, "rewards/rejected": -12.213135719299316, "step": 16010 }, { "epoch": 0.96, "learning_rate": 3.040177569851477e-08, "logits/chosen": -2.61903715133667, "logits/rejected": -2.208021879196167, "logps/chosen": -174.45413208007812, "logps/rejected": -1214.1480712890625, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": -1.0142533779144287, "rewards/margins": 10.69257926940918, "rewards/rejected": -11.706830978393555, "step": 16020 }, { "epoch": 0.96, "learning_rate": 2.9598059306238658e-08, "logits/chosen": -2.647799015045166, "logits/rejected": -2.1788220405578613, "logps/chosen": -170.56747436523438, "logps/rejected": -1287.8985595703125, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -0.9761293530464172, "rewards/margins": 11.46005916595459, "rewards/rejected": -12.436187744140625, "step": 16030 }, { "epoch": 0.96, "learning_rate": 2.8805046173692176e-08, "logits/chosen": -2.6659646034240723, "logits/rejected": -2.2203304767608643, "logps/chosen": -167.7789764404297, "logps/rejected": -1295.4447021484375, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -0.9377069473266602, "rewards/margins": 11.559220314025879, "rewards/rejected": -12.496925354003906, "step": 16040 }, { "epoch": 0.96, "learning_rate": 2.802273973668279e-08, "logits/chosen": -2.655034303665161, "logits/rejected": -2.268833875656128, "logps/chosen": -166.41989135742188, "logps/rejected": -1366.1336669921875, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -0.9681248664855957, "rewards/margins": 12.258781433105469, "rewards/rejected": -13.226905822753906, "step": 16050 }, { "epoch": 0.96, "learning_rate": 2.725114338463064e-08, "logits/chosen": -2.7416205406188965, "logits/rejected": -2.2912113666534424, "logps/chosen": -166.6707000732422, "logps/rejected": -1312.750732421875, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -0.9457403421401978, "rewards/margins": 11.732978820800781, "rewards/rejected": -12.678718566894531, "step": 16060 }, { "epoch": 0.96, "learning_rate": 2.6490260460552143e-08, "logits/chosen": -2.6333775520324707, "logits/rejected": -2.215641736984253, "logps/chosen": -173.98220825195312, "logps/rejected": -1290.986328125, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -0.9858376383781433, "rewards/margins": 11.479873657226562, "rewards/rejected": -12.46571159362793, "step": 16070 }, { "epoch": 0.96, "learning_rate": 2.5740094261048342e-08, "logits/chosen": -2.629335880279541, "logits/rejected": -2.2283644676208496, "logps/chosen": -198.64735412597656, "logps/rejected": -1200.9288330078125, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -1.3244969844818115, "rewards/margins": 10.241098403930664, "rewards/rejected": -11.565595626831055, "step": 16080 }, { "epoch": 0.96, "learning_rate": 2.5000648036287712e-08, "logits/chosen": -2.690540075302124, "logits/rejected": -2.249478816986084, "logps/chosen": -185.4785919189453, "logps/rejected": -1211.8551025390625, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": -1.1727367639541626, "rewards/margins": 10.489535331726074, "rewards/rejected": -11.662272453308105, "step": 16090 }, { "epoch": 0.96, "learning_rate": 2.4271924989993646e-08, "logits/chosen": -2.6665353775024414, "logits/rejected": -2.1682610511779785, "logps/chosen": -161.706298828125, "logps/rejected": -1247.8104248046875, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -0.8923938870429993, "rewards/margins": 11.139763832092285, "rewards/rejected": -12.032156944274902, "step": 16100 }, { "epoch": 0.96, "learning_rate": 2.3553928279431147e-08, "logits/chosen": -2.7035655975341797, "logits/rejected": -2.241546869277954, "logps/chosen": -179.96670532226562, "logps/rejected": -1372.9996337890625, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -1.0802112817764282, "rewards/margins": 12.1927490234375, "rewards/rejected": -13.27295970916748, "step": 16110 }, { "epoch": 0.96, "learning_rate": 2.284666101539129e-08, "logits/chosen": -2.6543209552764893, "logits/rejected": -2.226243495941162, "logps/chosen": -157.00765991210938, "logps/rejected": -1217.953857421875, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -0.8477692604064941, "rewards/margins": 10.890615463256836, "rewards/rejected": -11.738385200500488, "step": 16120 }, { "epoch": 0.96, "learning_rate": 2.2150126262179273e-08, "logits/chosen": -2.679105758666992, "logits/rejected": -2.259007215499878, "logps/chosen": -170.59732055664062, "logps/rejected": -1319.4207763671875, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -1.0483429431915283, "rewards/margins": 11.701032638549805, "rewards/rejected": -12.74937629699707, "step": 16130 }, { "epoch": 0.96, "learning_rate": 2.1464327037600264e-08, "logits/chosen": -2.667034149169922, "logits/rejected": -2.145745038986206, "logps/chosen": -173.6420440673828, "logps/rejected": -1251.659912109375, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.974643349647522, "rewards/margins": 11.09910774230957, "rewards/rejected": -12.073748588562012, "step": 16140 }, { "epoch": 0.96, "learning_rate": 2.0789266312947477e-08, "logits/chosen": -2.6917691230773926, "logits/rejected": -2.3492634296417236, "logps/chosen": -182.48651123046875, "logps/rejected": -1119.5035400390625, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/chosen": -1.1133863925933838, "rewards/margins": 9.64062786102295, "rewards/rejected": -10.75401496887207, "step": 16150 }, { "epoch": 0.96, "learning_rate": 2.0124947012987172e-08, "logits/chosen": -2.644659996032715, "logits/rejected": -2.214852809906006, "logps/chosen": -152.89157104492188, "logps/rejected": -1245.90966796875, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -0.8423296213150024, "rewards/margins": 11.1737060546875, "rewards/rejected": -12.016035079956055, "step": 16160 }, { "epoch": 0.96, "learning_rate": 1.947137201594923e-08, "logits/chosen": -2.6840062141418457, "logits/rejected": -2.3002514839172363, "logps/chosen": -184.42825317382812, "logps/rejected": -1231.1353759765625, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -1.095862627029419, "rewards/margins": 10.771964073181152, "rewards/rejected": -11.867825508117676, "step": 16170 }, { "epoch": 0.96, "learning_rate": 1.8828544153510765e-08, "logits/chosen": -2.705512285232544, "logits/rejected": -2.3017754554748535, "logps/chosen": -188.9728240966797, "logps/rejected": -1201.132568359375, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -1.2116485834121704, "rewards/margins": 10.357165336608887, "rewards/rejected": -11.568815231323242, "step": 16180 }, { "epoch": 0.97, "learning_rate": 1.8196466210787245e-08, "logits/chosen": -2.707188129425049, "logits/rejected": -2.274749755859375, "logps/chosen": -163.2353057861328, "logps/rejected": -1249.7904052734375, "loss": 0.0137, "rewards/accuracies": 1.0, "rewards/chosen": -0.9587360620498657, "rewards/margins": 11.098140716552734, "rewards/rejected": -12.056876182556152, "step": 16190 }, { "epoch": 0.97, "learning_rate": 1.7575140926318346e-08, "logits/chosen": -2.6321167945861816, "logits/rejected": -2.2132019996643066, "logps/chosen": -193.10452270507812, "logps/rejected": -1155.3184814453125, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/chosen": -1.2553688287734985, "rewards/margins": 9.859628677368164, "rewards/rejected": -11.114996910095215, "step": 16200 }, { "epoch": 0.97, "learning_rate": 1.6964570992057394e-08, "logits/chosen": -2.6683545112609863, "logits/rejected": -2.2522847652435303, "logps/chosen": -173.28628540039062, "logps/rejected": -1343.628173828125, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -1.075852632522583, "rewards/margins": 11.91985034942627, "rewards/rejected": -12.995704650878906, "step": 16210 }, { "epoch": 0.97, "learning_rate": 1.6364759053358603e-08, "logits/chosen": -2.6427385807037354, "logits/rejected": -2.2167510986328125, "logps/chosen": -161.57571411132812, "logps/rejected": -1264.093505859375, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -0.9353412389755249, "rewards/margins": 11.257986068725586, "rewards/rejected": -12.193326950073242, "step": 16220 }, { "epoch": 0.97, "learning_rate": 1.5775707708966247e-08, "logits/chosen": -2.650177478790283, "logits/rejected": -2.1728808879852295, "logps/chosen": -172.66888427734375, "logps/rejected": -1184.2003173828125, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -1.0020825862884521, "rewards/margins": 10.396360397338867, "rewards/rejected": -11.398443222045898, "step": 16230 }, { "epoch": 0.97, "learning_rate": 1.5197419511003564e-08, "logits/chosen": -2.6753103733062744, "logits/rejected": -2.2551887035369873, "logps/chosen": -212.41061401367188, "logps/rejected": -1316.4437255859375, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -1.3922874927520752, "rewards/margins": 11.326181411743164, "rewards/rejected": -12.718469619750977, "step": 16240 }, { "epoch": 0.97, "learning_rate": 1.4629896964960533e-08, "logits/chosen": -2.603825807571411, "logits/rejected": -2.1976101398468018, "logps/chosen": -161.50404357910156, "logps/rejected": -1155.7236328125, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -0.9237446784973145, "rewards/margins": 10.20910930633545, "rewards/rejected": -11.132853507995605, "step": 16250 }, { "epoch": 0.97, "learning_rate": 1.4073142529685003e-08, "logits/chosen": -2.6120095252990723, "logits/rejected": -2.1848855018615723, "logps/chosen": -182.35549926757812, "logps/rejected": -1240.063232421875, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -1.036584734916687, "rewards/margins": 10.91883659362793, "rewards/rejected": -11.955423355102539, "step": 16260 }, { "epoch": 0.97, "learning_rate": 1.3527158617370196e-08, "logits/chosen": -2.6554369926452637, "logits/rejected": -2.2158074378967285, "logps/chosen": -169.08436584472656, "logps/rejected": -1240.2252197265625, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -1.0364867448806763, "rewards/margins": 10.917577743530273, "rewards/rejected": -11.954065322875977, "step": 16270 }, { "epoch": 0.97, "learning_rate": 1.2991947593545273e-08, "logits/chosen": -2.6749796867370605, "logits/rejected": -2.2231011390686035, "logps/chosen": -182.36309814453125, "logps/rejected": -1285.9312744140625, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -1.0808775424957275, "rewards/margins": 11.342862129211426, "rewards/rejected": -12.42374038696289, "step": 16280 }, { "epoch": 0.97, "learning_rate": 1.2467511777064789e-08, "logits/chosen": -2.651242971420288, "logits/rejected": -2.2104105949401855, "logps/chosen": -177.27374267578125, "logps/rejected": -1284.770263671875, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -0.9978905916213989, "rewards/margins": 11.40503978729248, "rewards/rejected": -12.402929306030273, "step": 16290 }, { "epoch": 0.97, "learning_rate": 1.1953853440098418e-08, "logits/chosen": -2.6822965145111084, "logits/rejected": -2.2353248596191406, "logps/chosen": -158.13011169433594, "logps/rejected": -1288.239501953125, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -0.840979278087616, "rewards/margins": 11.592016220092773, "rewards/rejected": -12.43299388885498, "step": 16300 }, { "epoch": 0.97, "learning_rate": 1.145097480812124e-08, "logits/chosen": -2.6494178771972656, "logits/rejected": -2.2277884483337402, "logps/chosen": -202.35888671875, "logps/rejected": -1159.5748291015625, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -1.3133056163787842, "rewards/margins": 9.840926170349121, "rewards/rejected": -11.154231071472168, "step": 16310 }, { "epoch": 0.97, "learning_rate": 1.0958878059905143e-08, "logits/chosen": -2.70688533782959, "logits/rejected": -2.195460081100464, "logps/chosen": -164.93997192382812, "logps/rejected": -1241.07080078125, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -0.9415566325187683, "rewards/margins": 11.026819229125977, "rewards/rejected": -11.968377113342285, "step": 16320 }, { "epoch": 0.97, "learning_rate": 1.0477565327507155e-08, "logits/chosen": -2.756645679473877, "logits/rejected": -2.282986879348755, "logps/chosen": -170.934814453125, "logps/rejected": -1251.6358642578125, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -1.0068751573562622, "rewards/margins": 11.066400527954102, "rewards/rejected": -12.073274612426758, "step": 16330 }, { "epoch": 0.97, "learning_rate": 1.0007038696262517e-08, "logits/chosen": -2.7095096111297607, "logits/rejected": -2.1451876163482666, "logps/chosen": -162.7404327392578, "logps/rejected": -1209.6416015625, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -0.9350031614303589, "rewards/margins": 10.727781295776367, "rewards/rejected": -11.662785530090332, "step": 16340 }, { "epoch": 0.97, "learning_rate": 9.547300204773845e-09, "logits/chosen": -2.647977352142334, "logits/rejected": -2.137451171875, "logps/chosen": -186.81507873535156, "logps/rejected": -1232.0718994140625, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -1.140344262123108, "rewards/margins": 10.737279891967773, "rewards/rejected": -11.877625465393066, "step": 16350 }, { "epoch": 0.98, "learning_rate": 9.098351844903653e-09, "logits/chosen": -2.633021593093872, "logits/rejected": -2.2412657737731934, "logps/chosen": -191.61239624023438, "logps/rejected": -1286.171875, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -1.1795886754989624, "rewards/margins": 11.230447769165039, "rewards/rejected": -12.410036087036133, "step": 16360 }, { "epoch": 0.98, "learning_rate": 8.660195561764617e-09, "logits/chosen": -2.6692349910736084, "logits/rejected": -2.1601152420043945, "logps/chosen": -182.53131103515625, "logps/rejected": -1202.576416015625, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -1.1304045915603638, "rewards/margins": 10.447057723999023, "rewards/rejected": -11.577461242675781, "step": 16370 }, { "epoch": 0.98, "learning_rate": 8.232833253712657e-09, "logits/chosen": -2.642995834350586, "logits/rejected": -2.2354342937469482, "logps/chosen": -164.24864196777344, "logps/rejected": -1302.755859375, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -0.9687468409538269, "rewards/margins": 11.626684188842773, "rewards/rejected": -12.595431327819824, "step": 16380 }, { "epoch": 0.98, "learning_rate": 7.816266772336378e-09, "logits/chosen": -2.655147075653076, "logits/rejected": -2.253140687942505, "logps/chosen": -182.57879638671875, "logps/rejected": -1224.9423828125, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -1.1447583436965942, "rewards/margins": 10.65778923034668, "rewards/rejected": -11.802547454833984, "step": 16390 }, { "epoch": 0.98, "learning_rate": 7.410497922451243e-09, "logits/chosen": -2.670452833175659, "logits/rejected": -2.2061877250671387, "logps/chosen": -169.3420867919922, "logps/rejected": -1353.225341796875, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -0.9867624044418335, "rewards/margins": 12.098861694335938, "rewards/rejected": -13.085622787475586, "step": 16400 }, { "epoch": 0.98, "learning_rate": 7.015528462091248e-09, "logits/chosen": -2.639734983444214, "logits/rejected": -2.2123537063598633, "logps/chosen": -184.6167449951172, "logps/rejected": -1259.0152587890625, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -1.102648377418518, "rewards/margins": 11.055513381958008, "rewards/rejected": -12.158160209655762, "step": 16410 }, { "epoch": 0.98, "learning_rate": 6.63136010250004e-09, "logits/chosen": -2.6688857078552246, "logits/rejected": -2.2339518070220947, "logps/chosen": -173.97569274902344, "logps/rejected": -1313.511962890625, "loss": 0.0121, "rewards/accuracies": 1.0, "rewards/chosen": -1.0491271018981934, "rewards/margins": 11.649115562438965, "rewards/rejected": -12.6982421875, "step": 16420 }, { "epoch": 0.98, "learning_rate": 6.257994508124532e-09, "logits/chosen": -2.647425413131714, "logits/rejected": -2.2450172901153564, "logps/chosen": -153.29281616210938, "logps/rejected": -1286.2093505859375, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.867265522480011, "rewards/margins": 11.552124977111816, "rewards/rejected": -12.419389724731445, "step": 16430 }, { "epoch": 0.98, "learning_rate": 5.895433296608799e-09, "logits/chosen": -2.6959025859832764, "logits/rejected": -2.232936382293701, "logps/chosen": -168.3072509765625, "logps/rejected": -1244.86767578125, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -0.9388321042060852, "rewards/margins": 11.074982643127441, "rewards/rejected": -12.013814926147461, "step": 16440 }, { "epoch": 0.98, "learning_rate": 5.543678038784361e-09, "logits/chosen": -2.6504616737365723, "logits/rejected": -2.2044098377227783, "logps/chosen": -176.8509979248047, "logps/rejected": -1223.298583984375, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -1.0913746356964111, "rewards/margins": 10.703654289245605, "rewards/rejected": -11.795029640197754, "step": 16450 }, { "epoch": 0.98, "learning_rate": 5.202730258665745e-09, "logits/chosen": -2.665109157562256, "logits/rejected": -2.223554849624634, "logps/chosen": -165.9916534423828, "logps/rejected": -1278.921142578125, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -0.9834840893745422, "rewards/margins": 11.362183570861816, "rewards/rejected": -12.345666885375977, "step": 16460 }, { "epoch": 0.98, "learning_rate": 4.872591433442708e-09, "logits/chosen": -2.6509833335876465, "logits/rejected": -2.2273030281066895, "logps/chosen": -160.29881286621094, "logps/rejected": -1179.502685546875, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -0.8968982696533203, "rewards/margins": 10.453070640563965, "rewards/rejected": -11.349969863891602, "step": 16470 }, { "epoch": 0.98, "learning_rate": 4.5532629934744166e-09, "logits/chosen": -2.679215908050537, "logits/rejected": -2.254709005355835, "logps/chosen": -176.91891479492188, "logps/rejected": -1366.59716796875, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -1.0106145143508911, "rewards/margins": 12.200166702270508, "rewards/rejected": -13.210782051086426, "step": 16480 }, { "epoch": 0.98, "learning_rate": 4.244746322282501e-09, "logits/chosen": -2.6910905838012695, "logits/rejected": -2.267216444015503, "logps/chosen": -178.09024047851562, "logps/rejected": -1130.768798828125, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -1.1032586097717285, "rewards/margins": 9.755863189697266, "rewards/rejected": -10.859121322631836, "step": 16490 }, { "epoch": 0.98, "learning_rate": 3.94704275654606e-09, "logits/chosen": -2.6660170555114746, "logits/rejected": -2.231707811355591, "logps/chosen": -167.55545043945312, "logps/rejected": -1288.8323974609375, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -0.9682508707046509, "rewards/margins": 11.475881576538086, "rewards/rejected": -12.444130897521973, "step": 16500 }, { "epoch": 0.98, "learning_rate": 3.6601535860950053e-09, "logits/chosen": -2.6902756690979004, "logits/rejected": -2.18860125541687, "logps/chosen": -172.37490844726562, "logps/rejected": -1385.1578369140625, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -0.9987273216247559, "rewards/margins": 12.404215812683105, "rewards/rejected": -13.402941703796387, "step": 16510 }, { "epoch": 0.99, "learning_rate": 3.3840800539047815e-09, "logits/chosen": -2.6335432529449463, "logits/rejected": -2.2267251014709473, "logps/chosen": -174.3216552734375, "logps/rejected": -1261.455322265625, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -1.0557180643081665, "rewards/margins": 11.113521575927734, "rewards/rejected": -12.16923999786377, "step": 16520 }, { "epoch": 0.99, "learning_rate": 3.1188233560913717e-09, "logits/chosen": -2.6585869789123535, "logits/rejected": -2.2412328720092773, "logps/chosen": -193.1375274658203, "logps/rejected": -1241.5718994140625, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": -1.2179319858551025, "rewards/margins": 10.76350212097168, "rewards/rejected": -11.981435775756836, "step": 16530 }, { "epoch": 0.99, "learning_rate": 2.8643846419057484e-09, "logits/chosen": -2.672546625137329, "logits/rejected": -2.2495222091674805, "logps/chosen": -192.04794311523438, "logps/rejected": -1263.3785400390625, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -1.1814525127410889, "rewards/margins": 11.017091751098633, "rewards/rejected": -12.1985445022583, "step": 16540 }, { "epoch": 0.99, "learning_rate": 2.6207650137283215e-09, "logits/chosen": -2.708150625228882, "logits/rejected": -2.2877864837646484, "logps/chosen": -173.510009765625, "logps/rejected": -1366.581787109375, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -1.0227696895599365, "rewards/margins": 12.202122688293457, "rewards/rejected": -13.224891662597656, "step": 16550 }, { "epoch": 0.99, "learning_rate": 2.3879655270650504e-09, "logits/chosen": -2.7082462310791016, "logits/rejected": -2.171170473098755, "logps/chosen": -196.70620727539062, "logps/rejected": -1233.5294189453125, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -1.2639291286468506, "rewards/margins": 10.629531860351562, "rewards/rejected": -11.893461227416992, "step": 16560 }, { "epoch": 0.99, "learning_rate": 2.1659871905430064e-09, "logits/chosen": -2.6711885929107666, "logits/rejected": -2.2594029903411865, "logps/chosen": -180.36941528320312, "logps/rejected": -1367.834228515625, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -1.047650694847107, "rewards/margins": 12.184184074401855, "rewards/rejected": -13.231834411621094, "step": 16570 }, { "epoch": 0.99, "learning_rate": 1.954830965905097e-09, "logits/chosen": -2.6764516830444336, "logits/rejected": -2.23189115524292, "logps/chosen": -172.1129913330078, "logps/rejected": -1113.990478515625, "loss": 0.0082, "rewards/accuracies": 1.0, "rewards/chosen": -1.040505051612854, "rewards/margins": 9.66499137878418, "rewards/rejected": -10.705495834350586, "step": 16580 }, { "epoch": 0.99, "learning_rate": 1.7544977680064578e-09, "logits/chosen": -2.652247428894043, "logits/rejected": -2.215662717819214, "logps/chosen": -157.01705932617188, "logps/rejected": -1146.182861328125, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": -0.9195022583007812, "rewards/margins": 10.094548225402832, "rewards/rejected": -11.014049530029297, "step": 16590 }, { "epoch": 0.99, "learning_rate": 1.564988464810291e-09, "logits/chosen": -2.6204469203948975, "logits/rejected": -2.241234064102173, "logps/chosen": -156.6399688720703, "logps/rejected": -1244.577392578125, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -0.8605591654777527, "rewards/margins": 11.15047550201416, "rewards/rejected": -12.011034965515137, "step": 16600 }, { "epoch": 0.99, "learning_rate": 1.386303877384254e-09, "logits/chosen": -2.6458616256713867, "logits/rejected": -2.1999783515930176, "logps/chosen": -176.6503448486328, "logps/rejected": -1230.3421630859375, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -1.0806583166122437, "rewards/margins": 10.774874687194824, "rewards/rejected": -11.855533599853516, "step": 16610 }, { "epoch": 0.99, "learning_rate": 1.2184447798971322e-09, "logits/chosen": -2.688946485519409, "logits/rejected": -2.167438507080078, "logps/chosen": -171.19871520996094, "logps/rejected": -1295.857421875, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -1.0013701915740967, "rewards/margins": 11.509811401367188, "rewards/rejected": -12.511181831359863, "step": 16620 }, { "epoch": 0.99, "learning_rate": 1.0614118996146727e-09, "logits/chosen": -2.695730686187744, "logits/rejected": -2.302783489227295, "logps/chosen": -187.93051147460938, "logps/rejected": -1276.522216796875, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -1.1826187372207642, "rewards/margins": 11.138666152954102, "rewards/rejected": -12.321285247802734, "step": 16630 }, { "epoch": 0.99, "learning_rate": 9.152059168976435e-10, "logits/chosen": -2.654323101043701, "logits/rejected": -2.1863253116607666, "logps/chosen": -174.3800811767578, "logps/rejected": -1250.937744140625, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -1.0174219608306885, "rewards/margins": 11.043591499328613, "rewards/rejected": -12.061013221740723, "step": 16640 }, { "epoch": 0.99, "learning_rate": 7.798274651979465e-10, "logits/chosen": -2.637986660003662, "logits/rejected": -2.2469239234924316, "logps/chosen": -196.75131225585938, "logps/rejected": -1252.58251953125, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -1.2284270524978638, "rewards/margins": 10.838926315307617, "rewards/rejected": -12.067353248596191, "step": 16650 }, { "epoch": 0.99, "learning_rate": 6.552771310558426e-10, "logits/chosen": -2.707401990890503, "logits/rejected": -2.188354253768921, "logps/chosen": -187.86233520507812, "logps/rejected": -1123.2301025390625, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -1.1808136701583862, "rewards/margins": 9.608713150024414, "rewards/rejected": -10.789527893066406, "step": 16660 }, { "epoch": 0.99, "learning_rate": 5.415554540977308e-10, "logits/chosen": -2.6379354000091553, "logits/rejected": -2.2245168685913086, "logps/chosen": -176.5787353515625, "logps/rejected": -1283.8917236328125, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -1.0409266948699951, "rewards/margins": 11.354669570922852, "rewards/rejected": -12.395596504211426, "step": 16670 }, { "epoch": 0.99, "learning_rate": 4.386629270342058e-10, "logits/chosen": -2.6777150630950928, "logits/rejected": -2.2911205291748047, "logps/chosen": -179.92527770996094, "logps/rejected": -1208.0726318359375, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -1.1010576486587524, "rewards/margins": 10.538191795349121, "rewards/rejected": -11.639249801635742, "step": 16680 }, { "epoch": 1.0, "learning_rate": 3.465999956575594e-10, "logits/chosen": -2.6711153984069824, "logits/rejected": -2.2284107208251953, "logps/chosen": -178.85572814941406, "logps/rejected": -1175.2252197265625, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -1.0292279720306396, "rewards/margins": 10.2882080078125, "rewards/rejected": -11.317437171936035, "step": 16690 }, { "epoch": 1.0, "learning_rate": 2.653670588390056e-10, "logits/chosen": -2.6496317386627197, "logits/rejected": -2.1748948097229004, "logps/chosen": -176.67767333984375, "logps/rejected": -1217.37353515625, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -1.0714900493621826, "rewards/margins": 10.662920951843262, "rewards/rejected": -11.734411239624023, "step": 16700 }, { "epoch": 1.0, "learning_rate": 1.9496446852840244e-10, "logits/chosen": -2.668013095855713, "logits/rejected": -2.3151652812957764, "logps/chosen": -176.02630615234375, "logps/rejected": -1224.7296142578125, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -1.0651683807373047, "rewards/margins": 10.749885559082031, "rewards/rejected": -11.815053939819336, "step": 16710 }, { "epoch": 1.0, "learning_rate": 1.3539252975175442e-10, "logits/chosen": -2.6090002059936523, "logits/rejected": -2.220052719116211, "logps/chosen": -194.02365112304688, "logps/rejected": -1199.383056640625, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -1.2622654438018799, "rewards/margins": 10.295656204223633, "rewards/rejected": -11.55792236328125, "step": 16720 }, { "epoch": 1.0, "learning_rate": 8.665150061093475e-11, "logits/chosen": -2.6500911712646484, "logits/rejected": -2.043184280395508, "logps/chosen": -198.4783172607422, "logps/rejected": -1317.258056640625, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -1.2058560848236084, "rewards/margins": 11.52690315246582, "rewards/rejected": -12.732759475708008, "step": 16730 }, { "epoch": 1.0, "learning_rate": 4.874159228063224e-11, "logits/chosen": -2.6816859245300293, "logits/rejected": -2.2883806228637695, "logps/chosen": -156.26614379882812, "logps/rejected": -1260.8797607421875, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -0.824610710144043, "rewards/margins": 11.33895206451416, "rewards/rejected": -12.163562774658203, "step": 16740 }, { "epoch": 1.0, "learning_rate": 2.1662969009461632e-11, "logits/chosen": -2.6500821113586426, "logits/rejected": -2.225158452987671, "logps/chosen": -164.89157104492188, "logps/rejected": -1216.4520263671875, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -0.9150495529174805, "rewards/margins": 10.813158988952637, "rewards/rejected": -11.728208541870117, "step": 16750 }, { "epoch": 1.0, "learning_rate": 5.415748118575703e-12, "logits/chosen": -2.6592376232147217, "logits/rejected": -2.247807264328003, "logps/chosen": -177.20169067382812, "logps/rejected": -1228.7794189453125, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -1.057569146156311, "rewards/margins": 10.785604476928711, "rewards/rejected": -11.843174934387207, "step": 16760 }, { "epoch": 1.0, "learning_rate": 0.0, "logits/chosen": -2.650529623031616, "logits/rejected": -2.262345552444458, "logps/chosen": -170.49472045898438, "logps/rejected": -1145.919189453125, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -0.9720252156257629, "rewards/margins": 10.04547119140625, "rewards/rejected": -11.017497062683105, "step": 16770 }, { "epoch": 1.0, "step": 16770, "total_flos": 0.0, "train_loss": 0.03956493256323719, "train_runtime": 68990.3604, "train_samples_per_second": 1.945, "train_steps_per_second": 0.243 } ], "logging_steps": 10, "max_steps": 16770, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }