{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 5250, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 9.523809523809524e-09, "logits/chosen": -1.4633961915969849, "logits/rejected": -1.2364428043365479, "logps/chosen": -210.33938598632812, "logps/rejected": -419.1065979003906, "loss": 0.2593, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.0, "learning_rate": 9.523809523809525e-08, "logits/chosen": -1.610365390777588, "logits/rejected": -1.0621064901351929, "logps/chosen": -482.9412841796875, "logps/rejected": -730.5945434570312, "loss": 0.3055, "rewards/accuracies": 0.5833333134651184, "rewards/chosen": 0.0003477936261333525, "rewards/margins": 0.0005409025470726192, "rewards/rejected": -0.00019310889183543622, "step": 10 }, { "epoch": 0.0, "learning_rate": 1.904761904761905e-07, "logits/chosen": -1.5603498220443726, "logits/rejected": -0.8166106343269348, "logps/chosen": -454.48858642578125, "logps/rejected": -721.1671752929688, "loss": 0.2782, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.0004291702643968165, "rewards/margins": 0.0007661186391487718, "rewards/rejected": -0.0011952888453379273, "step": 20 }, { "epoch": 0.01, "learning_rate": 2.8571428571428575e-07, "logits/chosen": -1.3784929513931274, "logits/rejected": -0.9101301431655884, "logps/chosen": -428.0098571777344, "logps/rejected": -662.2744140625, "loss": 0.2718, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.002030368195846677, "rewards/margins": 0.002315156627446413, "rewards/rejected": -0.0043455250561237335, "step": 30 }, { "epoch": 0.01, "learning_rate": 3.80952380952381e-07, "logits/chosen": -1.5284583568572998, "logits/rejected": -1.0132588148117065, "logps/chosen": -545.9862060546875, "logps/rejected": -774.7142333984375, "loss": 0.3389, "rewards/accuracies": 0.875, "rewards/chosen": -0.0067994482815265656, "rewards/margins": 0.003867440391331911, "rewards/rejected": -0.010666887275874615, "step": 40 }, { "epoch": 0.01, "learning_rate": 4.7619047619047623e-07, "logits/chosen": -1.7644160985946655, "logits/rejected": -1.0432060956954956, "logps/chosen": -480.3941345214844, "logps/rejected": -677.6199340820312, "loss": 0.3576, "rewards/accuracies": 0.875, "rewards/chosen": -0.01194092445075512, "rewards/margins": 0.010018697939813137, "rewards/rejected": -0.02195962332189083, "step": 50 }, { "epoch": 0.01, "learning_rate": 5.714285714285715e-07, "logits/chosen": -1.7885560989379883, "logits/rejected": -1.1062058210372925, "logps/chosen": -533.5252685546875, "logps/rejected": -668.8731079101562, "loss": 0.3171, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.014572158455848694, "rewards/margins": 0.017655830830335617, "rewards/rejected": -0.03222799301147461, "step": 60 }, { "epoch": 0.01, "learning_rate": 6.666666666666667e-07, "logits/chosen": -1.3738806247711182, "logits/rejected": -1.0891026258468628, "logps/chosen": -424.70703125, "logps/rejected": -692.15234375, "loss": 0.272, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.029187273234128952, "rewards/margins": 0.03771970793604851, "rewards/rejected": -0.06690698117017746, "step": 70 }, { "epoch": 0.02, "learning_rate": 7.61904761904762e-07, "logits/chosen": -1.5506525039672852, "logits/rejected": -0.9549859762191772, "logps/chosen": -512.8480224609375, "logps/rejected": -784.5028076171875, "loss": 0.2737, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.05644345283508301, "rewards/margins": 0.06473176181316376, "rewards/rejected": -0.12117521464824677, "step": 80 }, { "epoch": 0.02, "learning_rate": 8.571428571428572e-07, "logits/chosen": -1.665061354637146, "logits/rejected": -0.9693418741226196, "logps/chosen": -559.4435424804688, "logps/rejected": -998.4827880859375, "loss": 0.204, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.08695171028375626, "rewards/margins": 0.14592763781547546, "rewards/rejected": -0.23287932574748993, "step": 90 }, { "epoch": 0.02, "learning_rate": 9.523809523809525e-07, "logits/chosen": -1.3899726867675781, "logits/rejected": -0.9542592763900757, "logps/chosen": -516.2752075195312, "logps/rejected": -899.1209106445312, "loss": 0.2199, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.13038575649261475, "rewards/margins": 0.1349850296974182, "rewards/rejected": -0.26537078619003296, "step": 100 }, { "epoch": 0.02, "learning_rate": 1.0476190476190478e-06, "logits/chosen": -1.4382736682891846, "logits/rejected": -0.9191503524780273, "logps/chosen": -682.9276123046875, "logps/rejected": -1102.0411376953125, "loss": 0.1968, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.19294723868370056, "rewards/margins": 0.21316266059875488, "rewards/rejected": -0.40610989928245544, "step": 110 }, { "epoch": 0.02, "learning_rate": 1.142857142857143e-06, "logits/chosen": -1.459641695022583, "logits/rejected": -0.6542884707450867, "logps/chosen": -672.031005859375, "logps/rejected": -1260.8494873046875, "loss": 0.2996, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.19834551215171814, "rewards/margins": 0.28519508242607117, "rewards/rejected": -0.4835405945777893, "step": 120 }, { "epoch": 0.02, "learning_rate": 1.2380952380952382e-06, "logits/chosen": -1.4884998798370361, "logits/rejected": -0.782085120677948, "logps/chosen": -655.8922119140625, "logps/rejected": -1180.880615234375, "loss": 0.2023, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.18265406787395477, "rewards/margins": 0.2280837595462799, "rewards/rejected": -0.4107378423213959, "step": 130 }, { "epoch": 0.03, "learning_rate": 1.3333333333333334e-06, "logits/chosen": -1.4412648677825928, "logits/rejected": -0.7835417985916138, "logps/chosen": -515.529541015625, "logps/rejected": -1009.7794189453125, "loss": 0.1867, "rewards/accuracies": 0.875, "rewards/chosen": -0.1095825582742691, "rewards/margins": 0.22792240977287292, "rewards/rejected": -0.3375050127506256, "step": 140 }, { "epoch": 0.03, "learning_rate": 1.4285714285714286e-06, "logits/chosen": -1.4961611032485962, "logits/rejected": -0.973048985004425, "logps/chosen": -600.7317504882812, "logps/rejected": -1086.9722900390625, "loss": 0.2245, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.13946318626403809, "rewards/margins": 0.20260484516620636, "rewards/rejected": -0.34206801652908325, "step": 150 }, { "epoch": 0.03, "learning_rate": 1.523809523809524e-06, "logits/chosen": -1.5238187313079834, "logits/rejected": -0.6449756622314453, "logps/chosen": -660.7269287109375, "logps/rejected": -1104.213623046875, "loss": 0.1981, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.1448592096567154, "rewards/margins": 0.2078539878129959, "rewards/rejected": -0.3527131676673889, "step": 160 }, { "epoch": 0.03, "learning_rate": 1.6190476190476193e-06, "logits/chosen": -1.4879696369171143, "logits/rejected": -0.8903700113296509, "logps/chosen": -605.8817138671875, "logps/rejected": -1033.109130859375, "loss": 0.1416, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.13867175579071045, "rewards/margins": 0.22906196117401123, "rewards/rejected": -0.3677336573600769, "step": 170 }, { "epoch": 0.03, "learning_rate": 1.7142857142857145e-06, "logits/chosen": -1.269089937210083, "logits/rejected": -0.7880300879478455, "logps/chosen": -539.9698486328125, "logps/rejected": -1043.28466796875, "loss": 0.1833, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.15886561572551727, "rewards/margins": 0.23221167922019958, "rewards/rejected": -0.39107733964920044, "step": 180 }, { "epoch": 0.04, "learning_rate": 1.8095238095238097e-06, "logits/chosen": -1.2870985269546509, "logits/rejected": -0.899337887763977, "logps/chosen": -629.3480224609375, "logps/rejected": -1022.1195068359375, "loss": 0.2154, "rewards/accuracies": 0.75, "rewards/chosen": -0.18074938654899597, "rewards/margins": 0.2024730145931244, "rewards/rejected": -0.38322240114212036, "step": 190 }, { "epoch": 0.04, "learning_rate": 1.904761904761905e-06, "logits/chosen": -1.2736904621124268, "logits/rejected": -0.7282256484031677, "logps/chosen": -561.5228881835938, "logps/rejected": -974.63134765625, "loss": 0.1857, "rewards/accuracies": 0.75, "rewards/chosen": -0.1457606703042984, "rewards/margins": 0.21344856917858124, "rewards/rejected": -0.35920923948287964, "step": 200 }, { "epoch": 0.04, "learning_rate": 2.0000000000000003e-06, "logits/chosen": -1.077405333518982, "logits/rejected": -0.8292808532714844, "logps/chosen": -612.2826538085938, "logps/rejected": -1123.556884765625, "loss": 0.1564, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2081625759601593, "rewards/margins": 0.20980456471443176, "rewards/rejected": -0.4179670810699463, "step": 210 }, { "epoch": 0.04, "learning_rate": 2.0952380952380955e-06, "logits/chosen": -1.2383986711502075, "logits/rejected": -0.6710551977157593, "logps/chosen": -650.8024291992188, "logps/rejected": -1233.49072265625, "loss": 0.1012, "rewards/accuracies": 0.875, "rewards/chosen": -0.2048388421535492, "rewards/margins": 0.3004988729953766, "rewards/rejected": -0.5053377747535706, "step": 220 }, { "epoch": 0.04, "learning_rate": 2.1904761904761908e-06, "logits/chosen": -1.45066499710083, "logits/rejected": -0.748737096786499, "logps/chosen": -693.0684814453125, "logps/rejected": -1181.05908203125, "loss": 0.1614, "rewards/accuracies": 0.875, "rewards/chosen": -0.19217872619628906, "rewards/margins": 0.29584550857543945, "rewards/rejected": -0.4880242347717285, "step": 230 }, { "epoch": 0.05, "learning_rate": 2.285714285714286e-06, "logits/chosen": -1.1749699115753174, "logits/rejected": -0.6850377321243286, "logps/chosen": -560.8719482421875, "logps/rejected": -1131.795166015625, "loss": 0.1276, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.14133869111537933, "rewards/margins": 0.3111477792263031, "rewards/rejected": -0.45248645544052124, "step": 240 }, { "epoch": 0.05, "learning_rate": 2.380952380952381e-06, "logits/chosen": -1.455298662185669, "logits/rejected": -0.6744661331176758, "logps/chosen": -573.9270629882812, "logps/rejected": -1027.9237060546875, "loss": 0.1678, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.12096985429525375, "rewards/margins": 0.29721394181251526, "rewards/rejected": -0.4181838631629944, "step": 250 }, { "epoch": 0.05, "learning_rate": 2.4761904761904764e-06, "logits/chosen": -1.3899072408676147, "logits/rejected": -0.6559457778930664, "logps/chosen": -595.6860961914062, "logps/rejected": -1140.6729736328125, "loss": 0.1244, "rewards/accuracies": 0.875, "rewards/chosen": -0.14922866225242615, "rewards/margins": 0.3135303258895874, "rewards/rejected": -0.46275901794433594, "step": 260 }, { "epoch": 0.05, "learning_rate": 2.571428571428571e-06, "logits/chosen": -1.0578467845916748, "logits/rejected": -0.8492599725723267, "logps/chosen": -574.3513793945312, "logps/rejected": -1259.55712890625, "loss": 0.1556, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.19695577025413513, "rewards/margins": 0.36541420221328735, "rewards/rejected": -0.5623699426651001, "step": 270 }, { "epoch": 0.05, "learning_rate": 2.666666666666667e-06, "logits/chosen": -1.2612149715423584, "logits/rejected": -0.7240465879440308, "logps/chosen": -672.7413330078125, "logps/rejected": -1256.050537109375, "loss": 0.159, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.21864071488380432, "rewards/margins": 0.3210473954677582, "rewards/rejected": -0.5396881103515625, "step": 280 }, { "epoch": 0.06, "learning_rate": 2.7619047619047625e-06, "logits/chosen": -1.3020931482315063, "logits/rejected": -0.6435850858688354, "logps/chosen": -653.7093505859375, "logps/rejected": -1208.412841796875, "loss": 0.1052, "rewards/accuracies": 0.875, "rewards/chosen": -0.147475466132164, "rewards/margins": 0.36482852697372437, "rewards/rejected": -0.5123040080070496, "step": 290 }, { "epoch": 0.06, "learning_rate": 2.8571428571428573e-06, "logits/chosen": -1.4918224811553955, "logits/rejected": -0.8085969090461731, "logps/chosen": -713.1387939453125, "logps/rejected": -1272.7677001953125, "loss": 0.1426, "rewards/accuracies": 0.875, "rewards/chosen": -0.17535661160945892, "rewards/margins": 0.3165578246116638, "rewards/rejected": -0.49191442131996155, "step": 300 }, { "epoch": 0.06, "learning_rate": 2.9523809523809525e-06, "logits/chosen": -1.6136243343353271, "logits/rejected": -0.8312125205993652, "logps/chosen": -635.4207763671875, "logps/rejected": -1105.80078125, "loss": 0.1737, "rewards/accuracies": 0.75, "rewards/chosen": -0.13362707197666168, "rewards/margins": 0.2607218623161316, "rewards/rejected": -0.3943489193916321, "step": 310 }, { "epoch": 0.06, "learning_rate": 3.047619047619048e-06, "logits/chosen": -1.1584962606430054, "logits/rejected": -0.5327649712562561, "logps/chosen": -573.6459350585938, "logps/rejected": -1091.638916015625, "loss": 0.1381, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.09677333384752274, "rewards/margins": 0.28998833894729614, "rewards/rejected": -0.38676169514656067, "step": 320 }, { "epoch": 0.06, "learning_rate": 3.142857142857143e-06, "logits/chosen": -1.3556053638458252, "logits/rejected": -0.5345439910888672, "logps/chosen": -714.9662475585938, "logps/rejected": -1150.104248046875, "loss": 0.2078, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.19412854313850403, "rewards/margins": 0.2673655152320862, "rewards/rejected": -0.4614940583705902, "step": 330 }, { "epoch": 0.06, "learning_rate": 3.2380952380952385e-06, "logits/chosen": -1.4859298467636108, "logits/rejected": -0.6170077323913574, "logps/chosen": -696.609375, "logps/rejected": -1109.577392578125, "loss": 0.2261, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.17592501640319824, "rewards/margins": 0.26663655042648315, "rewards/rejected": -0.4425615668296814, "step": 340 }, { "epoch": 0.07, "learning_rate": 3.3333333333333333e-06, "logits/chosen": -1.3932907581329346, "logits/rejected": -0.854517936706543, "logps/chosen": -625.2777099609375, "logps/rejected": -1159.3671875, "loss": 0.1656, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.1585162878036499, "rewards/margins": 0.2966553568840027, "rewards/rejected": -0.455171674489975, "step": 350 }, { "epoch": 0.07, "learning_rate": 3.428571428571429e-06, "logits/chosen": -1.2704038619995117, "logits/rejected": -0.6886963248252869, "logps/chosen": -729.0342407226562, "logps/rejected": -1316.4833984375, "loss": 0.2799, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.24822619557380676, "rewards/margins": 0.36715978384017944, "rewards/rejected": -0.6153860092163086, "step": 360 }, { "epoch": 0.07, "learning_rate": 3.523809523809524e-06, "logits/chosen": -1.4961518049240112, "logits/rejected": -0.6622525453567505, "logps/chosen": -585.5128173828125, "logps/rejected": -1209.227294921875, "loss": 0.0957, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.0684739202260971, "rewards/margins": 0.37941282987594604, "rewards/rejected": -0.44788676500320435, "step": 370 }, { "epoch": 0.07, "learning_rate": 3.6190476190476194e-06, "logits/chosen": -1.5004198551177979, "logits/rejected": -0.9735828638076782, "logps/chosen": -449.74365234375, "logps/rejected": -873.9354248046875, "loss": 0.1561, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.014761904254555702, "rewards/margins": 0.20308324694633484, "rewards/rejected": -0.21784517168998718, "step": 380 }, { "epoch": 0.07, "learning_rate": 3.7142857142857146e-06, "logits/chosen": -1.434891939163208, "logits/rejected": -0.7585717439651489, "logps/chosen": -471.00469970703125, "logps/rejected": -860.2316284179688, "loss": 0.2167, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.017866212874650955, "rewards/margins": 0.19812558591365814, "rewards/rejected": -0.21599180996418, "step": 390 }, { "epoch": 0.08, "learning_rate": 3.80952380952381e-06, "logits/chosen": -1.2273303270339966, "logits/rejected": -0.8588708639144897, "logps/chosen": -421.289794921875, "logps/rejected": -852.4148559570312, "loss": 0.1556, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.05043165758252144, "rewards/margins": 0.22477364540100098, "rewards/rejected": -0.2752053141593933, "step": 400 }, { "epoch": 0.08, "learning_rate": 3.9047619047619055e-06, "logits/chosen": -0.9734954833984375, "logits/rejected": -0.6048663854598999, "logps/chosen": -560.4136962890625, "logps/rejected": -978.0811767578125, "loss": 0.2031, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.16438128054141998, "rewards/margins": 0.20696699619293213, "rewards/rejected": -0.3713482618331909, "step": 410 }, { "epoch": 0.08, "learning_rate": 4.000000000000001e-06, "logits/chosen": -1.041243314743042, "logits/rejected": -0.5710188746452332, "logps/chosen": -628.2027587890625, "logps/rejected": -1168.212646484375, "loss": 0.1319, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1534917801618576, "rewards/margins": 0.2832767367362976, "rewards/rejected": -0.4367685317993164, "step": 420 }, { "epoch": 0.08, "learning_rate": 4.095238095238096e-06, "logits/chosen": -1.120323657989502, "logits/rejected": -0.47065305709838867, "logps/chosen": -512.0430908203125, "logps/rejected": -1044.714599609375, "loss": 0.1528, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.07303700596094131, "rewards/margins": 0.30483847856521606, "rewards/rejected": -0.37787550687789917, "step": 430 }, { "epoch": 0.08, "learning_rate": 4.190476190476191e-06, "logits/chosen": -1.228567361831665, "logits/rejected": -0.7676167488098145, "logps/chosen": -580.1812133789062, "logps/rejected": -1115.2666015625, "loss": 0.2222, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.13131147623062134, "rewards/margins": 0.2785014510154724, "rewards/rejected": -0.40981292724609375, "step": 440 }, { "epoch": 0.09, "learning_rate": 4.2857142857142855e-06, "logits/chosen": -0.9824286699295044, "logits/rejected": -0.5752304792404175, "logps/chosen": -452.2618103027344, "logps/rejected": -1014.6959838867188, "loss": 0.1411, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.041228070855140686, "rewards/margins": 0.29497069120407104, "rewards/rejected": -0.3361987769603729, "step": 450 }, { "epoch": 0.09, "learning_rate": 4.3809523809523815e-06, "logits/chosen": -1.1204311847686768, "logits/rejected": -0.5266382694244385, "logps/chosen": -597.1425170898438, "logps/rejected": -1117.980224609375, "loss": 0.1202, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.08962099254131317, "rewards/margins": 0.31395572423934937, "rewards/rejected": -0.40357670187950134, "step": 460 }, { "epoch": 0.09, "learning_rate": 4.476190476190477e-06, "logits/chosen": -1.0846962928771973, "logits/rejected": -0.7576047778129578, "logps/chosen": -517.5731201171875, "logps/rejected": -1103.2222900390625, "loss": 0.1116, "rewards/accuracies": 0.75, "rewards/chosen": -0.12066017091274261, "rewards/margins": 0.30953511595726013, "rewards/rejected": -0.43019533157348633, "step": 470 }, { "epoch": 0.09, "learning_rate": 4.571428571428572e-06, "logits/chosen": -0.9018595814704895, "logits/rejected": -0.5866034626960754, "logps/chosen": -523.2333984375, "logps/rejected": -1036.0869140625, "loss": 0.2058, "rewards/accuracies": 0.75, "rewards/chosen": -0.10268028825521469, "rewards/margins": 0.29270845651626587, "rewards/rejected": -0.3953886926174164, "step": 480 }, { "epoch": 0.09, "learning_rate": 4.666666666666667e-06, "logits/chosen": -1.1721107959747314, "logits/rejected": -0.7808191776275635, "logps/chosen": -493.0081481933594, "logps/rejected": -919.6887817382812, "loss": 0.179, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.070055291056633, "rewards/margins": 0.2476065456867218, "rewards/rejected": -0.3176618218421936, "step": 490 }, { "epoch": 0.1, "learning_rate": 4.761904761904762e-06, "logits/chosen": -1.275310754776001, "logits/rejected": -0.6922630071640015, "logps/chosen": -482.3363342285156, "logps/rejected": -1051.318603515625, "loss": 0.1712, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.0965057760477066, "rewards/margins": 0.31909894943237305, "rewards/rejected": -0.41560474038124084, "step": 500 }, { "epoch": 0.1, "learning_rate": 4.857142857142858e-06, "logits/chosen": -1.1258885860443115, "logits/rejected": -0.585066020488739, "logps/chosen": -702.0685424804688, "logps/rejected": -1306.002197265625, "loss": 0.184, "rewards/accuracies": 0.875, "rewards/chosen": -0.221037819981575, "rewards/margins": 0.3286159634590149, "rewards/rejected": -0.5496538281440735, "step": 510 }, { "epoch": 0.1, "learning_rate": 4.952380952380953e-06, "logits/chosen": -1.0958242416381836, "logits/rejected": -0.6358640193939209, "logps/chosen": -686.0093994140625, "logps/rejected": -1221.550048828125, "loss": 0.1325, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.22425253689289093, "rewards/margins": 0.3489426374435425, "rewards/rejected": -0.5731951594352722, "step": 520 }, { "epoch": 0.1, "learning_rate": 4.999986185163754e-06, "logits/chosen": -1.5748833417892456, "logits/rejected": -1.0087988376617432, "logps/chosen": -579.9489135742188, "logps/rejected": -1070.4644775390625, "loss": 0.1338, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.13689087331295013, "rewards/margins": 0.3429722189903259, "rewards/rejected": -0.47986307740211487, "step": 530 }, { "epoch": 0.1, "learning_rate": 4.999875667389858e-06, "logits/chosen": -1.2475988864898682, "logits/rejected": -0.8125373721122742, "logps/chosen": -676.51416015625, "logps/rejected": -1296.272216796875, "loss": 0.1325, "rewards/accuracies": 0.875, "rewards/chosen": -0.13830117881298065, "rewards/margins": 0.38971132040023804, "rewards/rejected": -0.5280125141143799, "step": 540 }, { "epoch": 0.1, "learning_rate": 4.999654636727765e-06, "logits/chosen": -1.3340667486190796, "logits/rejected": -0.5722957849502563, "logps/chosen": -618.6773681640625, "logps/rejected": -1288.3988037109375, "loss": 0.1574, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.12158086150884628, "rewards/margins": 0.3755950331687927, "rewards/rejected": -0.49717584252357483, "step": 550 }, { "epoch": 0.11, "learning_rate": 4.999323102948655e-06, "logits/chosen": -1.4867702722549438, "logits/rejected": -1.0985515117645264, "logps/chosen": -659.9049682617188, "logps/rejected": -1145.168701171875, "loss": 0.1767, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.13046395778656006, "rewards/margins": 0.30620601773262024, "rewards/rejected": -0.4366699755191803, "step": 560 }, { "epoch": 0.11, "learning_rate": 4.998881080708759e-06, "logits/chosen": -1.3138049840927124, "logits/rejected": -0.6846949458122253, "logps/chosen": -661.994873046875, "logps/rejected": -1117.7989501953125, "loss": 0.1708, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.16562718152999878, "rewards/margins": 0.31299692392349243, "rewards/rejected": -0.47862404584884644, "step": 570 }, { "epoch": 0.11, "learning_rate": 4.998328589548711e-06, "logits/chosen": -1.5272343158721924, "logits/rejected": -0.7413185238838196, "logps/chosen": -675.2716674804688, "logps/rejected": -1201.1070556640625, "loss": 0.1685, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.11696191877126694, "rewards/margins": 0.3152127265930176, "rewards/rejected": -0.4321746826171875, "step": 580 }, { "epoch": 0.11, "learning_rate": 4.997665653892682e-06, "logits/chosen": -1.4794944524765015, "logits/rejected": -0.8358553647994995, "logps/chosen": -514.8074951171875, "logps/rejected": -1089.1304931640625, "loss": 0.1756, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.08781744539737701, "rewards/margins": 0.30905675888061523, "rewards/rejected": -0.39687421917915344, "step": 590 }, { "epoch": 0.11, "learning_rate": 4.996892303047306e-06, "logits/chosen": -1.2631568908691406, "logits/rejected": -0.906185507774353, "logps/chosen": -510.026123046875, "logps/rejected": -1112.696533203125, "loss": 0.1216, "rewards/accuracies": 0.875, "rewards/chosen": -0.11950021982192993, "rewards/margins": 0.3633073568344116, "rewards/rejected": -0.48280757665634155, "step": 600 }, { "epoch": 0.12, "learning_rate": 4.996008571200375e-06, "logits/chosen": -1.2829585075378418, "logits/rejected": -0.9288860559463501, "logps/chosen": -630.21826171875, "logps/rejected": -1218.042724609375, "loss": 0.1951, "rewards/accuracies": 0.75, "rewards/chosen": -0.18757669627666473, "rewards/margins": 0.3189699947834015, "rewards/rejected": -0.506546676158905, "step": 610 }, { "epoch": 0.12, "learning_rate": 4.995014497419336e-06, "logits/chosen": -0.9935330152511597, "logits/rejected": -0.6505762338638306, "logps/chosen": -930.5193481445312, "logps/rejected": -1337.595947265625, "loss": 0.2039, "rewards/accuracies": 0.75, "rewards/chosen": -0.4062381386756897, "rewards/margins": 0.24238184094429016, "rewards/rejected": -0.6486199498176575, "step": 620 }, { "epoch": 0.12, "learning_rate": 4.993910125649561e-06, "logits/chosen": -0.7837833762168884, "logits/rejected": -0.4489542543888092, "logps/chosen": -739.3512573242188, "logps/rejected": -1275.1015625, "loss": 0.1772, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.2991621792316437, "rewards/margins": 0.33359241485595703, "rewards/rejected": -0.6327545642852783, "step": 630 }, { "epoch": 0.12, "learning_rate": 4.992695504712402e-06, "logits/chosen": -0.8878018260002136, "logits/rejected": -0.47548002004623413, "logps/chosen": -604.0863037109375, "logps/rejected": -1200.1826171875, "loss": 0.1144, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.17400696873664856, "rewards/margins": 0.3342629075050354, "rewards/rejected": -0.5082699060440063, "step": 640 }, { "epoch": 0.12, "learning_rate": 4.9913706883030385e-06, "logits/chosen": -1.370296835899353, "logits/rejected": -0.5689765214920044, "logps/chosen": -676.3694458007812, "logps/rejected": -1185.32275390625, "loss": 0.1122, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.09333183616399765, "rewards/margins": 0.3735785484313965, "rewards/rejected": -0.4669104218482971, "step": 650 }, { "epoch": 0.13, "learning_rate": 4.989935734988098e-06, "logits/chosen": -1.261099100112915, "logits/rejected": -0.7942265868186951, "logps/chosen": -581.4351806640625, "logps/rejected": -1077.23291015625, "loss": 0.1647, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.07504286617040634, "rewards/margins": 0.31316477060317993, "rewards/rejected": -0.38820770382881165, "step": 660 }, { "epoch": 0.13, "learning_rate": 4.988390708203068e-06, "logits/chosen": -1.5747838020324707, "logits/rejected": -0.9350663423538208, "logps/chosen": -521.41552734375, "logps/rejected": -1092.664794921875, "loss": 0.1598, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.0677158385515213, "rewards/margins": 0.3290405869483948, "rewards/rejected": -0.3967564105987549, "step": 670 }, { "epoch": 0.13, "learning_rate": 4.9867356762494955e-06, "logits/chosen": -1.3997423648834229, "logits/rejected": -0.9180939793586731, "logps/chosen": -581.2384033203125, "logps/rejected": -1137.547119140625, "loss": 0.1994, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.10941344499588013, "rewards/margins": 0.325897753238678, "rewards/rejected": -0.4353111684322357, "step": 680 }, { "epoch": 0.13, "learning_rate": 4.984970712291963e-06, "logits/chosen": -1.3846169710159302, "logits/rejected": -0.73557049036026, "logps/chosen": -462.7078552246094, "logps/rejected": -976.39501953125, "loss": 0.1678, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.017078759148716927, "rewards/margins": 0.31490957736968994, "rewards/rejected": -0.33198830485343933, "step": 690 }, { "epoch": 0.13, "learning_rate": 4.983095894354858e-06, "logits/chosen": -1.1463801860809326, "logits/rejected": -0.3814142346382141, "logps/chosen": -596.7115478515625, "logps/rejected": -1126.910888671875, "loss": 0.1381, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.09377743303775787, "rewards/margins": 0.2981225252151489, "rewards/rejected": -0.391899973154068, "step": 700 }, { "epoch": 0.14, "learning_rate": 4.981111305318918e-06, "logits/chosen": -0.9459941983222961, "logits/rejected": -0.6081192493438721, "logps/chosen": -620.593017578125, "logps/rejected": -1376.078369140625, "loss": 0.102, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.14758248627185822, "rewards/margins": 0.3472079634666443, "rewards/rejected": -0.4947904646396637, "step": 710 }, { "epoch": 0.14, "learning_rate": 4.979017032917576e-06, "logits/chosen": -0.8523277044296265, "logits/rejected": -0.6036144495010376, "logps/chosen": -711.4821166992188, "logps/rejected": -1270.931396484375, "loss": 0.1933, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.2176143229007721, "rewards/margins": 0.319147527217865, "rewards/rejected": -0.5367618799209595, "step": 720 }, { "epoch": 0.14, "learning_rate": 4.97681316973307e-06, "logits/chosen": -0.90800541639328, "logits/rejected": -0.4888209402561188, "logps/chosen": -530.8043823242188, "logps/rejected": -1027.162841796875, "loss": 0.1921, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.12928815186023712, "rewards/margins": 0.30399924516677856, "rewards/rejected": -0.4332873821258545, "step": 730 }, { "epoch": 0.14, "learning_rate": 4.9744998131923625e-06, "logits/chosen": -1.083520770072937, "logits/rejected": -0.5023576021194458, "logps/chosen": -541.5089111328125, "logps/rejected": -1044.0648193359375, "loss": 0.1732, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.10550675541162491, "rewards/margins": 0.2670230269432068, "rewards/rejected": -0.3725297749042511, "step": 740 }, { "epoch": 0.14, "learning_rate": 4.9720770655628216e-06, "logits/chosen": -1.151623249053955, "logits/rejected": -0.49989479780197144, "logps/chosen": -480.1856994628906, "logps/rejected": -1090.775390625, "loss": 0.1708, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.07557425647974014, "rewards/margins": 0.3159797191619873, "rewards/rejected": -0.39155399799346924, "step": 750 }, { "epoch": 0.14, "learning_rate": 4.969545033947711e-06, "logits/chosen": -1.1325894594192505, "logits/rejected": -0.6074356436729431, "logps/chosen": -600.750244140625, "logps/rejected": -992.29443359375, "loss": 0.1796, "rewards/accuracies": 0.75, "rewards/chosen": -0.12622541189193726, "rewards/margins": 0.2877815365791321, "rewards/rejected": -0.41400688886642456, "step": 760 }, { "epoch": 0.15, "learning_rate": 4.966903830281449e-06, "logits/chosen": -0.9898525476455688, "logits/rejected": -0.5480491518974304, "logps/chosen": -519.122314453125, "logps/rejected": -1155.3433837890625, "loss": 0.1122, "rewards/accuracies": 0.875, "rewards/chosen": -0.10655902326107025, "rewards/margins": 0.3709509074687958, "rewards/rejected": -0.4775099754333496, "step": 770 }, { "epoch": 0.15, "learning_rate": 4.964153571324658e-06, "logits/chosen": -0.901624321937561, "logits/rejected": -0.5526978373527527, "logps/chosen": -633.5411376953125, "logps/rejected": -1211.4859619140625, "loss": 0.1474, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.22335144877433777, "rewards/margins": 0.3132711350917816, "rewards/rejected": -0.5366225242614746, "step": 780 }, { "epoch": 0.15, "learning_rate": 4.96129437865901e-06, "logits/chosen": -1.3616435527801514, "logits/rejected": -0.8551801443099976, "logps/chosen": -603.9468994140625, "logps/rejected": -1205.045654296875, "loss": 0.1193, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.19647106528282166, "rewards/margins": 0.36330199241638184, "rewards/rejected": -0.5597730875015259, "step": 790 }, { "epoch": 0.15, "learning_rate": 4.958326378681849e-06, "logits/chosen": -1.1252937316894531, "logits/rejected": -0.683855414390564, "logps/chosen": -614.3113403320312, "logps/rejected": -1332.9471435546875, "loss": 0.1476, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.14736303687095642, "rewards/margins": 0.3910350799560547, "rewards/rejected": -0.5383981466293335, "step": 800 }, { "epoch": 0.15, "learning_rate": 4.955249702600598e-06, "logits/chosen": -1.554051399230957, "logits/rejected": -0.8260191082954407, "logps/chosen": -566.3794555664062, "logps/rejected": -972.8746948242188, "loss": 0.1773, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.07766494899988174, "rewards/margins": 0.32112446427345276, "rewards/rejected": -0.3987894356250763, "step": 810 }, { "epoch": 0.16, "learning_rate": 4.952064486426965e-06, "logits/chosen": -1.219767451286316, "logits/rejected": -0.7621467113494873, "logps/chosen": -650.5616455078125, "logps/rejected": -1093.0321044921875, "loss": 0.1903, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.18349714577198029, "rewards/margins": 0.2664046585559845, "rewards/rejected": -0.449901819229126, "step": 820 }, { "epoch": 0.16, "learning_rate": 4.948770870970929e-06, "logits/chosen": -1.092639446258545, "logits/rejected": -0.5460809469223022, "logps/chosen": -645.5680541992188, "logps/rejected": -1267.3668212890625, "loss": 0.1404, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.22210128605365753, "rewards/margins": 0.3323093056678772, "rewards/rejected": -0.5544105768203735, "step": 830 }, { "epoch": 0.16, "learning_rate": 4.9453690018345144e-06, "logits/chosen": -1.531174898147583, "logits/rejected": -0.8413252830505371, "logps/chosen": -685.4244384765625, "logps/rejected": -1203.5826416015625, "loss": 0.1417, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.18149597942829132, "rewards/margins": 0.3382376730442047, "rewards/rejected": -0.5197336673736572, "step": 840 }, { "epoch": 0.16, "learning_rate": 4.941859029405354e-06, "logits/chosen": -1.17498779296875, "logits/rejected": -0.7023947238922119, "logps/chosen": -551.9922485351562, "logps/rejected": -1016.5985107421875, "loss": 0.1363, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.11300446838140488, "rewards/margins": 0.3152386546134949, "rewards/rejected": -0.42824316024780273, "step": 850 }, { "epoch": 0.16, "learning_rate": 4.938241108850039e-06, "logits/chosen": -1.40805983543396, "logits/rejected": -0.833687961101532, "logps/chosen": -593.3404541015625, "logps/rejected": -1136.892822265625, "loss": 0.091, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.13804402947425842, "rewards/margins": 0.36584407091140747, "rewards/rejected": -0.5038881301879883, "step": 860 }, { "epoch": 0.17, "learning_rate": 4.934515400107266e-06, "logits/chosen": -1.2635471820831299, "logits/rejected": -0.8378936648368835, "logps/chosen": -621.8711547851562, "logps/rejected": -1154.795654296875, "loss": 0.1727, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.1536412537097931, "rewards/margins": 0.3258283734321594, "rewards/rejected": -0.4794696867465973, "step": 870 }, { "epoch": 0.17, "learning_rate": 4.930682067880759e-06, "logits/chosen": -1.108872890472412, "logits/rejected": -0.930811882019043, "logps/chosen": -514.6136474609375, "logps/rejected": -1029.8231201171875, "loss": 0.1744, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.1102759838104248, "rewards/margins": 0.3130547106266022, "rewards/rejected": -0.423330694437027, "step": 880 }, { "epoch": 0.17, "learning_rate": 4.926741281631991e-06, "logits/chosen": -0.9169878959655762, "logits/rejected": -0.6105486750602722, "logps/chosen": -676.228271484375, "logps/rejected": -1249.63525390625, "loss": 0.1593, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.204975888133049, "rewards/margins": 0.3155573010444641, "rewards/rejected": -0.5205332040786743, "step": 890 }, { "epoch": 0.17, "learning_rate": 4.922693215572695e-06, "logits/chosen": -1.1725494861602783, "logits/rejected": -0.7552506923675537, "logps/chosen": -712.9232177734375, "logps/rejected": -1375.6258544921875, "loss": 0.1401, "rewards/accuracies": 0.875, "rewards/chosen": -0.18521100282669067, "rewards/margins": 0.3580084443092346, "rewards/rejected": -0.5432195067405701, "step": 900 }, { "epoch": 0.17, "learning_rate": 4.91853804865716e-06, "logits/chosen": -1.1123251914978027, "logits/rejected": -0.523077130317688, "logps/chosen": -605.0357666015625, "logps/rejected": -1046.534912109375, "loss": 0.1584, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.13643239438533783, "rewards/margins": 0.25512298941612244, "rewards/rejected": -0.3915553689002991, "step": 910 }, { "epoch": 0.18, "learning_rate": 4.91427596457432e-06, "logits/chosen": -0.9901615381240845, "logits/rejected": -0.4433814585208893, "logps/chosen": -542.9393310546875, "logps/rejected": -1093.594482421875, "loss": 0.157, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.07922369986772537, "rewards/margins": 0.34233754873275757, "rewards/rejected": -0.42156124114990234, "step": 920 }, { "epoch": 0.18, "learning_rate": 4.909907151739634e-06, "logits/chosen": -1.1942120790481567, "logits/rejected": -0.5035274028778076, "logps/chosen": -595.7505493164062, "logps/rejected": -1247.04248046875, "loss": 0.125, "rewards/accuracies": 0.875, "rewards/chosen": -0.13257943093776703, "rewards/margins": 0.36459317803382874, "rewards/rejected": -0.49717265367507935, "step": 930 }, { "epoch": 0.18, "learning_rate": 4.905431803286756e-06, "logits/chosen": -1.2022253274917603, "logits/rejected": -0.7740514278411865, "logps/chosen": -622.0069580078125, "logps/rejected": -1264.624755859375, "loss": 0.1333, "rewards/accuracies": 0.875, "rewards/chosen": -0.11565611511468887, "rewards/margins": 0.3564669191837311, "rewards/rejected": -0.47212305665016174, "step": 940 }, { "epoch": 0.18, "learning_rate": 4.900850117059e-06, "logits/chosen": -1.0613458156585693, "logits/rejected": -0.6823363900184631, "logps/chosen": -438.91656494140625, "logps/rejected": -1148.8316650390625, "loss": 0.1438, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.07876328378915787, "rewards/margins": 0.3808586001396179, "rewards/rejected": -0.4596218466758728, "step": 950 }, { "epoch": 0.18, "learning_rate": 4.8961622956005895e-06, "logits/chosen": -1.5277841091156006, "logits/rejected": -0.6931833028793335, "logps/chosen": -555.1168823242188, "logps/rejected": -1010.2923583984375, "loss": 0.138, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.09128925204277039, "rewards/margins": 0.3329823315143585, "rewards/rejected": -0.4242715835571289, "step": 960 }, { "epoch": 0.18, "learning_rate": 4.891368546147707e-06, "logits/chosen": -1.187867283821106, "logits/rejected": -0.5561275482177734, "logps/chosen": -648.8507080078125, "logps/rejected": -1295.135986328125, "loss": 0.1343, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.16202875971794128, "rewards/margins": 0.4002090394496918, "rewards/rejected": -0.5622377991676331, "step": 970 }, { "epoch": 0.19, "learning_rate": 4.88646908061933e-06, "logits/chosen": -1.326287031173706, "logits/rejected": -0.5947343707084656, "logps/chosen": -633.6121826171875, "logps/rejected": -1276.321533203125, "loss": 0.1076, "rewards/accuracies": 0.875, "rewards/chosen": -0.12177781015634537, "rewards/margins": 0.3921108841896057, "rewards/rejected": -0.5138886570930481, "step": 980 }, { "epoch": 0.19, "learning_rate": 4.881464115607866e-06, "logits/chosen": -1.3830275535583496, "logits/rejected": -0.6146548390388489, "logps/chosen": -553.8236694335938, "logps/rejected": -1102.0936279296875, "loss": 0.1488, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.10271717607975006, "rewards/margins": 0.35286661982536316, "rewards/rejected": -0.4555838108062744, "step": 990 }, { "epoch": 0.19, "learning_rate": 4.876353872369573e-06, "logits/chosen": -1.3082172870635986, "logits/rejected": -0.52774578332901, "logps/chosen": -530.1275634765625, "logps/rejected": -1161.6585693359375, "loss": 0.1557, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.10600404441356659, "rewards/margins": 0.3763955533504486, "rewards/rejected": -0.4823996424674988, "step": 1000 }, { "epoch": 0.19, "learning_rate": 4.871138576814782e-06, "logits/chosen": -0.9107920527458191, "logits/rejected": -0.7462531924247742, "logps/chosen": -627.5322875976562, "logps/rejected": -1158.464111328125, "loss": 0.1746, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.13089929521083832, "rewards/margins": 0.28971680998802185, "rewards/rejected": -0.420616090297699, "step": 1010 }, { "epoch": 0.19, "learning_rate": 4.865818459497911e-06, "logits/chosen": -1.4817858934402466, "logits/rejected": -0.639228105545044, "logps/chosen": -493.29400634765625, "logps/rejected": -1035.482666015625, "loss": 0.0961, "rewards/accuracies": 0.875, "rewards/chosen": -0.05207972973585129, "rewards/margins": 0.331185519695282, "rewards/rejected": -0.3832652270793915, "step": 1020 }, { "epoch": 0.2, "learning_rate": 4.860393755607266e-06, "logits/chosen": -0.8650029897689819, "logits/rejected": -0.4500044286251068, "logps/chosen": -566.3787231445312, "logps/rejected": -1107.3399658203125, "loss": 0.1214, "rewards/accuracies": 0.875, "rewards/chosen": -0.07852591574192047, "rewards/margins": 0.35980334877967834, "rewards/rejected": -0.43832927942276, "step": 1030 }, { "epoch": 0.2, "learning_rate": 4.854864704954654e-06, "logits/chosen": -1.1504755020141602, "logits/rejected": -0.7252748608589172, "logps/chosen": -554.3898315429688, "logps/rejected": -1351.515869140625, "loss": 0.09, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.08045043796300888, "rewards/margins": 0.42690858244895935, "rewards/rejected": -0.5073590874671936, "step": 1040 }, { "epoch": 0.2, "learning_rate": 4.849231551964771e-06, "logits/chosen": -1.4742746353149414, "logits/rejected": -0.7494825124740601, "logps/chosen": -633.5468139648438, "logps/rejected": -1140.65771484375, "loss": 0.1486, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.10754774510860443, "rewards/margins": 0.375037282705307, "rewards/rejected": -0.48258504271507263, "step": 1050 }, { "epoch": 0.2, "learning_rate": 4.843494545664407e-06, "logits/chosen": -1.236707329750061, "logits/rejected": -0.7440085411071777, "logps/chosen": -688.7361450195312, "logps/rejected": -1218.9052734375, "loss": 0.1163, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.19547973573207855, "rewards/margins": 0.3395041227340698, "rewards/rejected": -0.5349839329719543, "step": 1060 }, { "epoch": 0.2, "learning_rate": 4.837653939671427e-06, "logits/chosen": -0.9117664098739624, "logits/rejected": -0.3286951184272766, "logps/chosen": -787.8372802734375, "logps/rejected": -1330.0345458984375, "loss": 0.1948, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.29442816972732544, "rewards/margins": 0.37456613779067993, "rewards/rejected": -0.6689942479133606, "step": 1070 }, { "epoch": 0.21, "learning_rate": 4.8317099921835695e-06, "logits/chosen": -1.3668533563613892, "logits/rejected": -0.28480952978134155, "logps/chosen": -635.2105712890625, "logps/rejected": -1240.2222900390625, "loss": 0.1405, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.1587924063205719, "rewards/margins": 0.40898895263671875, "rewards/rejected": -0.5677813291549683, "step": 1080 }, { "epoch": 0.21, "learning_rate": 4.825662965967023e-06, "logits/chosen": -1.2586389780044556, "logits/rejected": -0.8771806955337524, "logps/chosen": -507.41302490234375, "logps/rejected": -1038.387939453125, "loss": 0.1756, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.1068439707159996, "rewards/margins": 0.2905372381210327, "rewards/rejected": -0.3973812460899353, "step": 1090 }, { "epoch": 0.21, "learning_rate": 4.819513128344814e-06, "logits/chosen": -1.0466293096542358, "logits/rejected": -0.4343542158603668, "logps/chosen": -615.6522216796875, "logps/rejected": -1209.098388671875, "loss": 0.1468, "rewards/accuracies": 0.75, "rewards/chosen": -0.12263033539056778, "rewards/margins": 0.3429615795612335, "rewards/rejected": -0.4655919075012207, "step": 1100 }, { "epoch": 0.21, "learning_rate": 4.813260751184992e-06, "logits/chosen": -1.3536839485168457, "logits/rejected": -0.5522807240486145, "logps/chosen": -674.6290893554688, "logps/rejected": -1156.7613525390625, "loss": 0.2257, "rewards/accuracies": 0.75, "rewards/chosen": -0.1414644569158554, "rewards/margins": 0.3118587136268616, "rewards/rejected": -0.4533231258392334, "step": 1110 }, { "epoch": 0.21, "learning_rate": 4.806906110888606e-06, "logits/chosen": -1.1581947803497314, "logits/rejected": -0.9168432950973511, "logps/chosen": -564.1990966796875, "logps/rejected": -1230.8319091796875, "loss": 0.0925, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.10862650722265244, "rewards/margins": 0.37330490350723267, "rewards/rejected": -0.4819313883781433, "step": 1120 }, { "epoch": 0.22, "learning_rate": 4.8004494883774885e-06, "logits/chosen": -1.0457051992416382, "logits/rejected": -0.4030301570892334, "logps/chosen": -533.67138671875, "logps/rejected": -958.4835815429688, "loss": 0.1672, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.11227695643901825, "rewards/margins": 0.2561245858669281, "rewards/rejected": -0.36840155720710754, "step": 1130 }, { "epoch": 0.22, "learning_rate": 4.793891169081835e-06, "logits/chosen": -0.9240137338638306, "logits/rejected": -0.46791282296180725, "logps/chosen": -517.36279296875, "logps/rejected": -1042.5450439453125, "loss": 0.1592, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.12927506864070892, "rewards/margins": 0.3360896408557892, "rewards/rejected": -0.4653647541999817, "step": 1140 }, { "epoch": 0.22, "learning_rate": 4.787231442927587e-06, "logits/chosen": -0.977418065071106, "logits/rejected": -0.4544641077518463, "logps/chosen": -602.7192993164062, "logps/rejected": -1098.3702392578125, "loss": 0.1292, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.14185282588005066, "rewards/margins": 0.3293328583240509, "rewards/rejected": -0.47118568420410156, "step": 1150 }, { "epoch": 0.22, "learning_rate": 4.780470604323616e-06, "logits/chosen": -0.915343165397644, "logits/rejected": -0.2857852578163147, "logps/chosen": -579.4141845703125, "logps/rejected": -1180.65869140625, "loss": 0.1545, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.1585255116224289, "rewards/margins": 0.36057814955711365, "rewards/rejected": -0.5191037058830261, "step": 1160 }, { "epoch": 0.22, "learning_rate": 4.773608952148706e-06, "logits/chosen": -0.9045808911323547, "logits/rejected": -0.48351603746414185, "logps/chosen": -622.4371337890625, "logps/rejected": -1293.734130859375, "loss": 0.1105, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.1697140634059906, "rewards/margins": 0.3570792078971863, "rewards/rejected": -0.5267933011054993, "step": 1170 }, { "epoch": 0.22, "learning_rate": 4.766646789738342e-06, "logits/chosen": -0.8739123344421387, "logits/rejected": -0.22731420397758484, "logps/chosen": -645.2877197265625, "logps/rejected": -1271.317626953125, "loss": 0.1193, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.16044828295707703, "rewards/margins": 0.34264761209487915, "rewards/rejected": -0.5030958652496338, "step": 1180 }, { "epoch": 0.23, "learning_rate": 4.759584424871302e-06, "logits/chosen": -1.1294559240341187, "logits/rejected": -0.5988625288009644, "logps/chosen": -581.9887084960938, "logps/rejected": -1244.6416015625, "loss": 0.11, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.16848810017108917, "rewards/margins": 0.3956344723701477, "rewards/rejected": -0.5641225576400757, "step": 1190 }, { "epoch": 0.23, "learning_rate": 4.752422169756048e-06, "logits/chosen": -0.8915788531303406, "logits/rejected": -0.22975881397724152, "logps/chosen": -650.3289794921875, "logps/rejected": -1052.154052734375, "loss": 0.1984, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2178494930267334, "rewards/margins": 0.27915602922439575, "rewards/rejected": -0.49700555205345154, "step": 1200 }, { "epoch": 0.23, "learning_rate": 4.745160341016927e-06, "logits/chosen": -1.104791522026062, "logits/rejected": -0.5506068468093872, "logps/chosen": -511.31085205078125, "logps/rejected": -1089.227783203125, "loss": 0.0961, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.07785917073488235, "rewards/margins": 0.33755436539649963, "rewards/rejected": -0.4154135286808014, "step": 1210 }, { "epoch": 0.23, "learning_rate": 4.737799259680172e-06, "logits/chosen": -0.9533011317253113, "logits/rejected": -0.6333903074264526, "logps/chosen": -655.6493530273438, "logps/rejected": -1117.072509765625, "loss": 0.2152, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.16594456136226654, "rewards/margins": 0.2548503875732422, "rewards/rejected": -0.4207949638366699, "step": 1220 }, { "epoch": 0.23, "learning_rate": 4.730339251159709e-06, "logits/chosen": -1.019805669784546, "logits/rejected": -0.601079523563385, "logps/chosen": -688.417236328125, "logps/rejected": -1230.857177734375, "loss": 0.1371, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.16020271182060242, "rewards/margins": 0.3214544653892517, "rewards/rejected": -0.4816571772098541, "step": 1230 }, { "epoch": 0.24, "learning_rate": 4.722780645242775e-06, "logits/chosen": -1.1415643692016602, "logits/rejected": -0.47147518396377563, "logps/chosen": -669.0128784179688, "logps/rejected": -1380.927978515625, "loss": 0.1117, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.1965821385383606, "rewards/margins": 0.3425455093383789, "rewards/rejected": -0.5391276478767395, "step": 1240 }, { "epoch": 0.24, "learning_rate": 4.715123776075337e-06, "logits/chosen": -0.6856998205184937, "logits/rejected": -0.32746610045433044, "logps/chosen": -650.1533203125, "logps/rejected": -1139.1361083984375, "loss": 0.1642, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.2048042118549347, "rewards/margins": 0.27393996715545654, "rewards/rejected": -0.47874417901039124, "step": 1250 }, { "epoch": 0.24, "learning_rate": 4.707368982147318e-06, "logits/chosen": -0.9481573104858398, "logits/rejected": -0.4462096095085144, "logps/chosen": -723.6339111328125, "logps/rejected": -1460.113037109375, "loss": 0.133, "rewards/accuracies": 0.875, "rewards/chosen": -0.22485177218914032, "rewards/margins": 0.3850511908531189, "rewards/rejected": -0.6099029779434204, "step": 1260 }, { "epoch": 0.24, "learning_rate": 4.699516606277638e-06, "logits/chosen": -0.9349290132522583, "logits/rejected": -0.3267834186553955, "logps/chosen": -799.514404296875, "logps/rejected": -1373.8382568359375, "loss": 0.1546, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.2475205957889557, "rewards/margins": 0.33584627509117126, "rewards/rejected": -0.5833669304847717, "step": 1270 }, { "epoch": 0.24, "learning_rate": 4.691566995599056e-06, "logits/chosen": -0.9802951812744141, "logits/rejected": -0.5204849243164062, "logps/chosen": -669.2354736328125, "logps/rejected": -1357.170166015625, "loss": 0.1324, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.22764785587787628, "rewards/margins": 0.38922256231307983, "rewards/rejected": -0.6168703436851501, "step": 1280 }, { "epoch": 0.25, "learning_rate": 4.683520501542825e-06, "logits/chosen": -1.0406824350357056, "logits/rejected": -0.6527504324913025, "logps/chosen": -686.4751586914062, "logps/rejected": -1285.3961181640625, "loss": 0.1742, "rewards/accuracies": 0.75, "rewards/chosen": -0.22216255962848663, "rewards/margins": 0.38026702404022217, "rewards/rejected": -0.6024295091629028, "step": 1290 }, { "epoch": 0.25, "learning_rate": 4.675377479823153e-06, "logits/chosen": -1.2209810018539429, "logits/rejected": -0.554297924041748, "logps/chosen": -636.0477294921875, "logps/rejected": -1283.287841796875, "loss": 0.1195, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.21655163168907166, "rewards/margins": 0.44541746377944946, "rewards/rejected": -0.661969006061554, "step": 1300 }, { "epoch": 0.25, "learning_rate": 4.667138290421483e-06, "logits/chosen": -1.2028907537460327, "logits/rejected": -0.695913553237915, "logps/chosen": -669.0062255859375, "logps/rejected": -1421.6971435546875, "loss": 0.1238, "rewards/accuracies": 0.875, "rewards/chosen": -0.22412872314453125, "rewards/margins": 0.4658629894256592, "rewards/rejected": -0.6899917721748352, "step": 1310 }, { "epoch": 0.25, "learning_rate": 4.658803297570578e-06, "logits/chosen": -1.3348348140716553, "logits/rejected": -0.7408299446105957, "logps/chosen": -593.8594970703125, "logps/rejected": -1273.971923828125, "loss": 0.1373, "rewards/accuracies": 0.875, "rewards/chosen": -0.15079554915428162, "rewards/margins": 0.3982647955417633, "rewards/rejected": -0.5490604043006897, "step": 1320 }, { "epoch": 0.25, "learning_rate": 4.650372869738415e-06, "logits/chosen": -1.253575086593628, "logits/rejected": -0.7287543416023254, "logps/chosen": -567.991455078125, "logps/rejected": -1249.7635498046875, "loss": 0.1433, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.10806350409984589, "rewards/margins": 0.34477290511131287, "rewards/rejected": -0.45283642411231995, "step": 1330 }, { "epoch": 0.26, "learning_rate": 4.641847379611898e-06, "logits/chosen": -0.94889897108078, "logits/rejected": -0.5828784704208374, "logps/chosen": -476.6119079589844, "logps/rejected": -953.9421997070312, "loss": 0.1474, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.11951301246881485, "rewards/margins": 0.32011961936950684, "rewards/rejected": -0.4396325945854187, "step": 1340 }, { "epoch": 0.26, "learning_rate": 4.633227204080389e-06, "logits/chosen": -1.5797253847122192, "logits/rejected": -0.9707719683647156, "logps/chosen": -569.090087890625, "logps/rejected": -1124.434814453125, "loss": 0.1396, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.11360124498605728, "rewards/margins": 0.3391994535923004, "rewards/rejected": -0.4528006911277771, "step": 1350 }, { "epoch": 0.26, "learning_rate": 4.624512724219038e-06, "logits/chosen": -1.303711175918579, "logits/rejected": -1.0192549228668213, "logps/chosen": -494.87628173828125, "logps/rejected": -952.2423095703125, "loss": 0.1863, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.1247144341468811, "rewards/margins": 0.28481853008270264, "rewards/rejected": -0.40953296422958374, "step": 1360 }, { "epoch": 0.26, "learning_rate": 4.6157043252719374e-06, "logits/chosen": -1.155958890914917, "logits/rejected": -0.7091153860092163, "logps/chosen": -503.44940185546875, "logps/rejected": -1050.03662109375, "loss": 0.1343, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.1682785451412201, "rewards/margins": 0.34431368112564087, "rewards/rejected": -0.5125921964645386, "step": 1370 }, { "epoch": 0.26, "learning_rate": 4.606802396635098e-06, "logits/chosen": -0.9495147466659546, "logits/rejected": -0.7004578113555908, "logps/chosen": -728.7332763671875, "logps/rejected": -1480.4185791015625, "loss": 0.116, "rewards/accuracies": 0.875, "rewards/chosen": -0.25476792454719543, "rewards/margins": 0.36221030354499817, "rewards/rejected": -0.6169782876968384, "step": 1380 }, { "epoch": 0.26, "learning_rate": 4.597807331839229e-06, "logits/chosen": -1.254997968673706, "logits/rejected": -0.6912177801132202, "logps/chosen": -589.2714233398438, "logps/rejected": -1181.50146484375, "loss": 0.1371, "rewards/accuracies": 0.875, "rewards/chosen": -0.15702159702777863, "rewards/margins": 0.3693925738334656, "rewards/rejected": -0.5264140963554382, "step": 1390 }, { "epoch": 0.27, "learning_rate": 4.588719528532342e-06, "logits/chosen": -1.1592851877212524, "logits/rejected": -0.7439531087875366, "logps/chosen": -586.4857177734375, "logps/rejected": -991.2376708984375, "loss": 0.1433, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.15877124667167664, "rewards/margins": 0.24606874585151672, "rewards/rejected": -0.4048399329185486, "step": 1400 }, { "epoch": 0.27, "learning_rate": 4.5795393884621735e-06, "logits/chosen": -1.218705415725708, "logits/rejected": -0.6760854125022888, "logps/chosen": -648.5039672851562, "logps/rejected": -1289.6689453125, "loss": 0.1641, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.20570158958435059, "rewards/margins": 0.3733693063259125, "rewards/rejected": -0.5790709257125854, "step": 1410 }, { "epoch": 0.27, "learning_rate": 4.5702673174584236e-06, "logits/chosen": -1.1791530847549438, "logits/rejected": -0.5734705328941345, "logps/chosen": -515.4287109375, "logps/rejected": -999.3806762695312, "loss": 0.1763, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.13127431273460388, "rewards/margins": 0.369742214679718, "rewards/rejected": -0.5010164976119995, "step": 1420 }, { "epoch": 0.27, "learning_rate": 4.560903725414816e-06, "logits/chosen": -1.152813196182251, "logits/rejected": -0.481799840927124, "logps/chosen": -665.3431396484375, "logps/rejected": -1264.098388671875, "loss": 0.1794, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.20136284828186035, "rewards/margins": 0.3202665448188782, "rewards/rejected": -0.5216294527053833, "step": 1430 }, { "epoch": 0.27, "learning_rate": 4.551449026270979e-06, "logits/chosen": -1.267394781112671, "logits/rejected": -0.5250222086906433, "logps/chosen": -581.6858520507812, "logps/rejected": -1057.5654296875, "loss": 0.1596, "rewards/accuracies": 0.75, "rewards/chosen": -0.1351909339427948, "rewards/margins": 0.3306771516799927, "rewards/rejected": -0.46586814522743225, "step": 1440 }, { "epoch": 0.28, "learning_rate": 4.541903637994142e-06, "logits/chosen": -1.3409764766693115, "logits/rejected": -0.7679377794265747, "logps/chosen": -480.05645751953125, "logps/rejected": -1090.971923828125, "loss": 0.1392, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.07650762796401978, "rewards/margins": 0.3760572671890259, "rewards/rejected": -0.45256486535072327, "step": 1450 }, { "epoch": 0.28, "learning_rate": 4.532267982560662e-06, "logits/chosen": -1.4760819673538208, "logits/rejected": -0.9794790148735046, "logps/chosen": -479.7372131347656, "logps/rejected": -999.6368408203125, "loss": 0.1796, "rewards/accuracies": 0.75, "rewards/chosen": -0.052784789353609085, "rewards/margins": 0.2971996068954468, "rewards/rejected": -0.34998437762260437, "step": 1460 }, { "epoch": 0.28, "learning_rate": 4.522542485937369e-06, "logits/chosen": -1.3197352886199951, "logits/rejected": -0.9877460598945618, "logps/chosen": -492.95819091796875, "logps/rejected": -1145.4498291015625, "loss": 0.1059, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.06162706017494202, "rewards/margins": 0.38417935371398926, "rewards/rejected": -0.4458064138889313, "step": 1470 }, { "epoch": 0.28, "learning_rate": 4.512727578062733e-06, "logits/chosen": -1.272756814956665, "logits/rejected": -0.8677829504013062, "logps/chosen": -552.0528564453125, "logps/rejected": -1070.023681640625, "loss": 0.1893, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.11670806258916855, "rewards/margins": 0.3226349651813507, "rewards/rejected": -0.4393429756164551, "step": 1480 }, { "epoch": 0.28, "learning_rate": 4.502823692827859e-06, "logits/chosen": -1.448500394821167, "logits/rejected": -1.0024363994598389, "logps/chosen": -514.86572265625, "logps/rejected": -1106.417236328125, "loss": 0.1299, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.1159830242395401, "rewards/margins": 0.3968135118484497, "rewards/rejected": -0.5127965211868286, "step": 1490 }, { "epoch": 0.29, "learning_rate": 4.492831268057307e-06, "logits/chosen": -1.2232450246810913, "logits/rejected": -0.7823948860168457, "logps/chosen": -592.3616333007812, "logps/rejected": -1248.7081298828125, "loss": 0.1447, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.16527682542800903, "rewards/margins": 0.3835424482822418, "rewards/rejected": -0.5488192439079285, "step": 1500 }, { "epoch": 0.29, "learning_rate": 4.482750745489733e-06, "logits/chosen": -1.5774047374725342, "logits/rejected": -0.7469654679298401, "logps/chosen": -563.06396484375, "logps/rejected": -1091.6959228515625, "loss": 0.1655, "rewards/accuracies": 0.75, "rewards/chosen": -0.09917666763067245, "rewards/margins": 0.34462952613830566, "rewards/rejected": -0.44380617141723633, "step": 1510 }, { "epoch": 0.29, "learning_rate": 4.472582570758367e-06, "logits/chosen": -1.3279736042022705, "logits/rejected": -0.788594126701355, "logps/chosen": -612.7301025390625, "logps/rejected": -1255.1072998046875, "loss": 0.0866, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.10588326305150986, "rewards/margins": 0.3859194219112396, "rewards/rejected": -0.4918026924133301, "step": 1520 }, { "epoch": 0.29, "learning_rate": 4.4623271933713065e-06, "logits/chosen": -1.194624900817871, "logits/rejected": -0.9283427000045776, "logps/chosen": -534.6856689453125, "logps/rejected": -1339.970947265625, "loss": 0.1069, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.13434982299804688, "rewards/margins": 0.3875037729740143, "rewards/rejected": -0.5218536257743835, "step": 1530 }, { "epoch": 0.29, "learning_rate": 4.451985066691649e-06, "logits/chosen": -1.5798327922821045, "logits/rejected": -0.910678505897522, "logps/chosen": -499.3916015625, "logps/rejected": -1011.3780517578125, "loss": 0.1199, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.05566025525331497, "rewards/margins": 0.303718239068985, "rewards/rejected": -0.35937851667404175, "step": 1540 }, { "epoch": 0.3, "learning_rate": 4.441556647917447e-06, "logits/chosen": -1.6016006469726562, "logits/rejected": -0.9427106976509094, "logps/chosen": -574.3204956054688, "logps/rejected": -1196.283935546875, "loss": 0.1276, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.0995236486196518, "rewards/margins": 0.36877164244651794, "rewards/rejected": -0.46829527616500854, "step": 1550 }, { "epoch": 0.3, "learning_rate": 4.431042398061499e-06, "logits/chosen": -1.3156967163085938, "logits/rejected": -0.7986858487129211, "logps/chosen": -592.8699340820312, "logps/rejected": -1237.5057373046875, "loss": 0.1733, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.1585538387298584, "rewards/margins": 0.3927284777164459, "rewards/rejected": -0.5512823462486267, "step": 1560 }, { "epoch": 0.3, "learning_rate": 4.420442781930971e-06, "logits/chosen": -1.625213861465454, "logits/rejected": -0.8621442914009094, "logps/chosen": -659.4328002929688, "logps/rejected": -1072.4892578125, "loss": 0.2191, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.15614424645900726, "rewards/margins": 0.3086170554161072, "rewards/rejected": -0.46476125717163086, "step": 1570 }, { "epoch": 0.3, "learning_rate": 4.409758268106842e-06, "logits/chosen": -1.4072753190994263, "logits/rejected": -0.7623811960220337, "logps/chosen": -561.3104248046875, "logps/rejected": -1190.7952880859375, "loss": 0.0977, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.10378583520650864, "rewards/margins": 0.4222695827484131, "rewards/rejected": -0.5260554552078247, "step": 1580 }, { "epoch": 0.3, "learning_rate": 4.398989328923196e-06, "logits/chosen": -1.3669263124465942, "logits/rejected": -0.9334972500801086, "logps/chosen": -542.7431640625, "logps/rejected": -1307.201904296875, "loss": 0.1055, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.11602024734020233, "rewards/margins": 0.45660361647605896, "rewards/rejected": -0.5726239085197449, "step": 1590 }, { "epoch": 0.3, "learning_rate": 4.388136440446338e-06, "logits/chosen": -1.4301055669784546, "logits/rejected": -0.614362359046936, "logps/chosen": -626.32373046875, "logps/rejected": -1175.321533203125, "loss": 0.1789, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.1843259185552597, "rewards/margins": 0.3518390357494354, "rewards/rejected": -0.5361649990081787, "step": 1600 }, { "epoch": 0.31, "learning_rate": 4.377200082453748e-06, "logits/chosen": -1.4748992919921875, "logits/rejected": -0.7736212611198425, "logps/chosen": -670.9618530273438, "logps/rejected": -1255.772216796875, "loss": 0.1367, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.19646312296390533, "rewards/margins": 0.3885990381240845, "rewards/rejected": -0.5850621461868286, "step": 1610 }, { "epoch": 0.31, "learning_rate": 4.366180738412876e-06, "logits/chosen": -1.366182565689087, "logits/rejected": -0.7817819118499756, "logps/chosen": -553.6234741210938, "logps/rejected": -1196.564208984375, "loss": 0.135, "rewards/accuracies": 0.875, "rewards/chosen": -0.16348284482955933, "rewards/margins": 0.4007638394832611, "rewards/rejected": -0.5642467141151428, "step": 1620 }, { "epoch": 0.31, "learning_rate": 4.355078895459761e-06, "logits/chosen": -1.3753581047058105, "logits/rejected": -0.7463507056236267, "logps/chosen": -558.6531982421875, "logps/rejected": -1246.663330078125, "loss": 0.1107, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.13876953721046448, "rewards/margins": 0.41733336448669434, "rewards/rejected": -0.5561028718948364, "step": 1630 }, { "epoch": 0.31, "learning_rate": 4.343895044377504e-06, "logits/chosen": -1.3057817220687866, "logits/rejected": -0.5243216753005981, "logps/chosen": -497.2242126464844, "logps/rejected": -1097.6368408203125, "loss": 0.1275, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.07704071700572968, "rewards/margins": 0.39920055866241455, "rewards/rejected": -0.4762412905693054, "step": 1640 }, { "epoch": 0.31, "learning_rate": 4.332629679574566e-06, "logits/chosen": -1.3259530067443848, "logits/rejected": -1.0467650890350342, "logps/chosen": -518.2452392578125, "logps/rejected": -979.3154296875, "loss": 0.1935, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.13340485095977783, "rewards/margins": 0.2716575264930725, "rewards/rejected": -0.40506237745285034, "step": 1650 }, { "epoch": 0.32, "learning_rate": 4.321283299062916e-06, "logits/chosen": -1.3237239122390747, "logits/rejected": -0.8720139265060425, "logps/chosen": -517.047607421875, "logps/rejected": -1177.549072265625, "loss": 0.1676, "rewards/accuracies": 0.75, "rewards/chosen": -0.10904773324728012, "rewards/margins": 0.36686578392982483, "rewards/rejected": -0.47591352462768555, "step": 1660 }, { "epoch": 0.32, "learning_rate": 4.309856404436013e-06, "logits/chosen": -1.116029143333435, "logits/rejected": -0.75030916929245, "logps/chosen": -556.4301147460938, "logps/rejected": -1120.1612548828125, "loss": 0.1095, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.1307665854692459, "rewards/margins": 0.3586767315864563, "rewards/rejected": -0.48944324254989624, "step": 1670 }, { "epoch": 0.32, "learning_rate": 4.2983495008466285e-06, "logits/chosen": -1.3444514274597168, "logits/rejected": -0.751409649848938, "logps/chosen": -649.5257568359375, "logps/rejected": -1231.934326171875, "loss": 0.1339, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.19607360661029816, "rewards/margins": 0.3417356014251709, "rewards/rejected": -0.5378091931343079, "step": 1680 }, { "epoch": 0.32, "learning_rate": 4.2867630969845235e-06, "logits/chosen": -1.2701570987701416, "logits/rejected": -1.0056142807006836, "logps/chosen": -518.3505859375, "logps/rejected": -1137.1268310546875, "loss": 0.1677, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.10869228839874268, "rewards/margins": 0.3265579342842102, "rewards/rejected": -0.4352502226829529, "step": 1690 }, { "epoch": 0.32, "learning_rate": 4.275097705053951e-06, "logits/chosen": -1.316869854927063, "logits/rejected": -0.9948606491088867, "logps/chosen": -520.4530029296875, "logps/rejected": -1160.157958984375, "loss": 0.1326, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.13747765123844147, "rewards/margins": 0.34342777729034424, "rewards/rejected": -0.4809054434299469, "step": 1700 }, { "epoch": 0.33, "learning_rate": 4.263353840751023e-06, "logits/chosen": -1.6228771209716797, "logits/rejected": -0.7855164408683777, "logps/chosen": -611.2681884765625, "logps/rejected": -1256.611083984375, "loss": 0.1508, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1420706957578659, "rewards/margins": 0.3913714289665222, "rewards/rejected": -0.5334421396255493, "step": 1710 }, { "epoch": 0.33, "learning_rate": 4.251532023240901e-06, "logits/chosen": -1.3502724170684814, "logits/rejected": -0.7914119362831116, "logps/chosen": -521.9387817382812, "logps/rejected": -1034.3741455078125, "loss": 0.1282, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.10950807482004166, "rewards/margins": 0.31158068776130676, "rewards/rejected": -0.4210887551307678, "step": 1720 }, { "epoch": 0.33, "learning_rate": 4.239632775134857e-06, "logits/chosen": -1.5358669757843018, "logits/rejected": -1.0733082294464111, "logps/chosen": -517.5224609375, "logps/rejected": -1196.409912109375, "loss": 0.1407, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.09737374633550644, "rewards/margins": 0.3955075144767761, "rewards/rejected": -0.49288123846054077, "step": 1730 }, { "epoch": 0.33, "learning_rate": 4.227656622467162e-06, "logits/chosen": -1.6994380950927734, "logits/rejected": -1.0408018827438354, "logps/chosen": -642.0311279296875, "logps/rejected": -1130.818603515625, "loss": 0.1922, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.0865662693977356, "rewards/margins": 0.29607805609703064, "rewards/rejected": -0.38264432549476624, "step": 1740 }, { "epoch": 0.33, "learning_rate": 4.215604094671835e-06, "logits/chosen": -1.7783164978027344, "logits/rejected": -1.0559531450271606, "logps/chosen": -539.727783203125, "logps/rejected": -1073.862060546875, "loss": 0.0991, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.06037856265902519, "rewards/margins": 0.34613969922065735, "rewards/rejected": -0.40651822090148926, "step": 1750 }, { "epoch": 0.34, "learning_rate": 4.203475724559235e-06, "logits/chosen": -1.363991618156433, "logits/rejected": -0.7144681215286255, "logps/chosen": -587.6904296875, "logps/rejected": -1210.5653076171875, "loss": 0.1122, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.13938170671463013, "rewards/margins": 0.33856886625289917, "rewards/rejected": -0.4779505729675293, "step": 1760 }, { "epoch": 0.34, "learning_rate": 4.191272048292514e-06, "logits/chosen": -1.4167028665542603, "logits/rejected": -0.6090995073318481, "logps/chosen": -629.8917236328125, "logps/rejected": -1422.7752685546875, "loss": 0.1155, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.18620936572551727, "rewards/margins": 0.46838346123695374, "rewards/rejected": -0.6545928120613098, "step": 1770 }, { "epoch": 0.34, "learning_rate": 4.178993605363904e-06, "logits/chosen": -1.3911386728286743, "logits/rejected": -0.6771969199180603, "logps/chosen": -629.9539794921875, "logps/rejected": -1197.6573486328125, "loss": 0.1575, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.16914603114128113, "rewards/margins": 0.3652108907699585, "rewards/rejected": -0.534356951713562, "step": 1780 }, { "epoch": 0.34, "learning_rate": 4.166640938570879e-06, "logits/chosen": -1.3372395038604736, "logits/rejected": -0.9776903390884399, "logps/chosen": -557.6268310546875, "logps/rejected": -1164.528564453125, "loss": 0.1092, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.12413576990365982, "rewards/margins": 0.3473701477050781, "rewards/rejected": -0.47150593996047974, "step": 1790 }, { "epoch": 0.34, "learning_rate": 4.154214593992149e-06, "logits/chosen": -1.559403657913208, "logits/rejected": -0.8102853894233704, "logps/chosen": -546.7049560546875, "logps/rejected": -1175.484619140625, "loss": 0.0795, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1457722932100296, "rewards/margins": 0.3927035331726074, "rewards/rejected": -0.5384758114814758, "step": 1800 }, { "epoch": 0.34, "learning_rate": 4.1417151209635265e-06, "logits/chosen": -1.4616343975067139, "logits/rejected": -0.7549998760223389, "logps/chosen": -641.3250732421875, "logps/rejected": -1241.9052734375, "loss": 0.12, "rewards/accuracies": 0.875, "rewards/chosen": -0.11428463459014893, "rewards/margins": 0.3963013291358948, "rewards/rejected": -0.5105859041213989, "step": 1810 }, { "epoch": 0.35, "learning_rate": 4.129143072053639e-06, "logits/chosen": -1.2769545316696167, "logits/rejected": -0.970511794090271, "logps/chosen": -632.5917358398438, "logps/rejected": -1191.12841796875, "loss": 0.1692, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.14244945347309113, "rewards/margins": 0.30153170228004456, "rewards/rejected": -0.4439811706542969, "step": 1820 }, { "epoch": 0.35, "learning_rate": 4.116499003039499e-06, "logits/chosen": -1.6208698749542236, "logits/rejected": -1.0573627948760986, "logps/chosen": -506.5850524902344, "logps/rejected": -1004.9436645507812, "loss": 0.1679, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.07435942441225052, "rewards/margins": 0.31333065032958984, "rewards/rejected": -0.387690007686615, "step": 1830 }, { "epoch": 0.35, "learning_rate": 4.103783472881942e-06, "logits/chosen": -1.4309699535369873, "logits/rejected": -0.9792940020561218, "logps/chosen": -568.4268188476562, "logps/rejected": -1159.736083984375, "loss": 0.1741, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.14729540050029755, "rewards/margins": 0.3386920392513275, "rewards/rejected": -0.48598742485046387, "step": 1840 }, { "epoch": 0.35, "learning_rate": 4.0909970437009094e-06, "logits/chosen": -1.775718331336975, "logits/rejected": -0.9666167497634888, "logps/chosen": -561.1525268554688, "logps/rejected": -1086.6915283203125, "loss": 0.1364, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.11503710597753525, "rewards/margins": 0.3677385449409485, "rewards/rejected": -0.48277562856674194, "step": 1850 }, { "epoch": 0.35, "learning_rate": 4.078140280750598e-06, "logits/chosen": -1.2573397159576416, "logits/rejected": -0.7077465057373047, "logps/chosen": -633.7716064453125, "logps/rejected": -1439.435302734375, "loss": 0.0564, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.1650095283985138, "rewards/margins": 0.45819205045700073, "rewards/rejected": -0.6232015490531921, "step": 1860 }, { "epoch": 0.36, "learning_rate": 4.065213752394478e-06, "logits/chosen": -1.3589421510696411, "logits/rejected": -0.9992012977600098, "logps/chosen": -580.5692138671875, "logps/rejected": -1243.3802490234375, "loss": 0.15, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.18238410353660583, "rewards/margins": 0.33454930782318115, "rewards/rejected": -0.5169334411621094, "step": 1870 }, { "epoch": 0.36, "learning_rate": 4.052218030080162e-06, "logits/chosen": -1.45956289768219, "logits/rejected": -0.8376334309577942, "logps/chosen": -706.3399047851562, "logps/rejected": -1299.748779296875, "loss": 0.2029, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.21740135550498962, "rewards/margins": 0.34002408385276794, "rewards/rejected": -0.5574253797531128, "step": 1880 }, { "epoch": 0.36, "learning_rate": 4.039153688314146e-06, "logits/chosen": -1.1303154230117798, "logits/rejected": -0.612655758857727, "logps/chosen": -599.7442626953125, "logps/rejected": -1183.0966796875, "loss": 0.1312, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.17545202374458313, "rewards/margins": 0.3555278778076172, "rewards/rejected": -0.5309798717498779, "step": 1890 }, { "epoch": 0.36, "learning_rate": 4.026021304636408e-06, "logits/chosen": -1.3697234392166138, "logits/rejected": -0.8021427989006042, "logps/chosen": -548.4887084960938, "logps/rejected": -1340.834228515625, "loss": 0.1397, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.11686456203460693, "rewards/margins": 0.3839666247367859, "rewards/rejected": -0.5008311867713928, "step": 1900 }, { "epoch": 0.36, "learning_rate": 4.012821459594881e-06, "logits/chosen": -1.6020843982696533, "logits/rejected": -0.8310837745666504, "logps/chosen": -622.650390625, "logps/rejected": -1369.901611328125, "loss": 0.1549, "rewards/accuracies": 0.875, "rewards/chosen": -0.1758982241153717, "rewards/margins": 0.4299909174442291, "rewards/rejected": -0.605889081954956, "step": 1910 }, { "epoch": 0.37, "learning_rate": 3.999554736719785e-06, "logits/chosen": -1.4315736293792725, "logits/rejected": -0.6609684228897095, "logps/chosen": -720.5235595703125, "logps/rejected": -1294.607666015625, "loss": 0.1031, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.21330145001411438, "rewards/margins": 0.3633466362953186, "rewards/rejected": -0.5766480565071106, "step": 1920 }, { "epoch": 0.37, "learning_rate": 3.986221722497832e-06, "logits/chosen": -1.483521819114685, "logits/rejected": -0.9152014851570129, "logps/chosen": -539.4887084960938, "logps/rejected": -985.6451416015625, "loss": 0.1502, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.07910256087779999, "rewards/margins": 0.3007953464984894, "rewards/rejected": -0.37989792227745056, "step": 1930 }, { "epoch": 0.37, "learning_rate": 3.9728230063463e-06, "logits/chosen": -1.529681921005249, "logits/rejected": -0.8244630098342896, "logps/chosen": -675.7764892578125, "logps/rejected": -1180.5302734375, "loss": 0.155, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.11956585943698883, "rewards/margins": 0.33157438039779663, "rewards/rejected": -0.45114025473594666, "step": 1940 }, { "epoch": 0.37, "learning_rate": 3.9593591805869755e-06, "logits/chosen": -1.3715250492095947, "logits/rejected": -0.8341531753540039, "logps/chosen": -593.0419311523438, "logps/rejected": -1164.8173828125, "loss": 0.1623, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.17433542013168335, "rewards/margins": 0.35831016302108765, "rewards/rejected": -0.532645583152771, "step": 1950 }, { "epoch": 0.37, "learning_rate": 3.945830840419966e-06, "logits/chosen": -1.423443078994751, "logits/rejected": -0.7140504121780396, "logps/chosen": -742.0792236328125, "logps/rejected": -1378.734130859375, "loss": 0.1206, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.17810894548892975, "rewards/margins": 0.41435369849205017, "rewards/rejected": -0.5924626588821411, "step": 1960 }, { "epoch": 0.38, "learning_rate": 3.932238583897395e-06, "logits/chosen": -1.4452390670776367, "logits/rejected": -0.8688638806343079, "logps/chosen": -495.364990234375, "logps/rejected": -1121.3785400390625, "loss": 0.1319, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.09509611129760742, "rewards/margins": 0.38469117879867554, "rewards/rejected": -0.47978726029396057, "step": 1970 }, { "epoch": 0.38, "learning_rate": 3.918583011896955e-06, "logits/chosen": -1.3520643711090088, "logits/rejected": -0.6752449870109558, "logps/chosen": -593.3623657226562, "logps/rejected": -1242.71240234375, "loss": 0.142, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1391793191432953, "rewards/margins": 0.37348073720932007, "rewards/rejected": -0.5126600861549377, "step": 1980 }, { "epoch": 0.38, "learning_rate": 3.904864728095349e-06, "logits/chosen": -1.5041224956512451, "logits/rejected": -1.1004841327667236, "logps/chosen": -597.1741333007812, "logps/rejected": -1029.542724609375, "loss": 0.1836, "rewards/accuracies": 0.75, "rewards/chosen": -0.15273083746433258, "rewards/margins": 0.291714072227478, "rewards/rejected": -0.4444448947906494, "step": 1990 }, { "epoch": 0.38, "learning_rate": 3.891084338941603e-06, "logits/chosen": -1.3465986251831055, "logits/rejected": -0.6932390928268433, "logps/chosen": -770.487548828125, "logps/rejected": -1395.894287109375, "loss": 0.107, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.23891493678092957, "rewards/margins": 0.37787628173828125, "rewards/rejected": -0.6167911291122437, "step": 2000 }, { "epoch": 0.38, "learning_rate": 3.8772424536302565e-06, "logits/chosen": -1.4603118896484375, "logits/rejected": -0.6516574025154114, "logps/chosen": -758.9030151367188, "logps/rejected": -1242.5791015625, "loss": 0.1266, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.25265830755233765, "rewards/margins": 0.3533734679222107, "rewards/rejected": -0.6060317754745483, "step": 2010 }, { "epoch": 0.38, "learning_rate": 3.863339684074432e-06, "logits/chosen": -1.3898146152496338, "logits/rejected": -0.7232173085212708, "logps/chosen": -689.8148193359375, "logps/rejected": -1179.7474365234375, "loss": 0.1656, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.2183699905872345, "rewards/margins": 0.3092297315597534, "rewards/rejected": -0.5275997519493103, "step": 2020 }, { "epoch": 0.39, "learning_rate": 3.849376644878783e-06, "logits/chosen": -1.3936997652053833, "logits/rejected": -0.716342568397522, "logps/chosen": -682.8460083007812, "logps/rejected": -1205.3048095703125, "loss": 0.1374, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.15836775302886963, "rewards/margins": 0.3463904857635498, "rewards/rejected": -0.5047582387924194, "step": 2030 }, { "epoch": 0.39, "learning_rate": 3.835353953312322e-06, "logits/chosen": -1.1822779178619385, "logits/rejected": -0.7468147873878479, "logps/chosen": -573.7110595703125, "logps/rejected": -1142.3682861328125, "loss": 0.1513, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.1834631860256195, "rewards/margins": 0.2781076729297638, "rewards/rejected": -0.4615708291530609, "step": 2040 }, { "epoch": 0.39, "learning_rate": 3.821272229281139e-06, "logits/chosen": -1.599094033241272, "logits/rejected": -0.9966009855270386, "logps/chosen": -656.4232788085938, "logps/rejected": -1115.461181640625, "loss": 0.189, "rewards/accuracies": 0.75, "rewards/chosen": -0.17068567872047424, "rewards/margins": 0.30639082193374634, "rewards/rejected": -0.47707653045654297, "step": 2050 }, { "epoch": 0.39, "learning_rate": 3.8071320953009906e-06, "logits/chosen": -1.6005001068115234, "logits/rejected": -0.9590044021606445, "logps/chosen": -537.8767700195312, "logps/rejected": -1028.5263671875, "loss": 0.2036, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.12943410873413086, "rewards/margins": 0.3024521470069885, "rewards/rejected": -0.4318862557411194, "step": 2060 }, { "epoch": 0.39, "learning_rate": 3.792934176469782e-06, "logits/chosen": -1.5783987045288086, "logits/rejected": -1.038562297821045, "logps/chosen": -623.7960815429688, "logps/rejected": -1096.355712890625, "loss": 0.1776, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1391536444425583, "rewards/margins": 0.2909093499183655, "rewards/rejected": -0.43006306886672974, "step": 2070 }, { "epoch": 0.4, "learning_rate": 3.7786791004399353e-06, "logits/chosen": -1.5684443712234497, "logits/rejected": -0.9906244277954102, "logps/chosen": -592.7698974609375, "logps/rejected": -1227.1822509765625, "loss": 0.1206, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.13039204478263855, "rewards/margins": 0.3637987971305847, "rewards/rejected": -0.4941907823085785, "step": 2080 }, { "epoch": 0.4, "learning_rate": 3.764367497390642e-06, "logits/chosen": -1.1963915824890137, "logits/rejected": -0.7949463725090027, "logps/chosen": -695.6273193359375, "logps/rejected": -1262.935302734375, "loss": 0.1701, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.1959591805934906, "rewards/margins": 0.32200273871421814, "rewards/rejected": -0.5179619193077087, "step": 2090 }, { "epoch": 0.4, "learning_rate": 3.7500000000000005e-06, "logits/chosen": -1.1150803565979004, "logits/rejected": -0.8243590593338013, "logps/chosen": -738.6058349609375, "logps/rejected": -1304.2548828125, "loss": 0.1389, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.26565247774124146, "rewards/margins": 0.36396244168281555, "rewards/rejected": -0.6296149492263794, "step": 2100 }, { "epoch": 0.4, "learning_rate": 3.7355772434170523e-06, "logits/chosen": -1.1940969228744507, "logits/rejected": -0.577799916267395, "logps/chosen": -717.1580810546875, "logps/rejected": -1174.541259765625, "loss": 0.1605, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2500852048397064, "rewards/margins": 0.3062884509563446, "rewards/rejected": -0.5563737154006958, "step": 2110 }, { "epoch": 0.4, "learning_rate": 3.7210998652337016e-06, "logits/chosen": -1.5477961301803589, "logits/rejected": -0.9089897871017456, "logps/chosen": -655.0966186523438, "logps/rejected": -1230.6929931640625, "loss": 0.0893, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.21259336173534393, "rewards/margins": 0.348269522190094, "rewards/rejected": -0.5608628392219543, "step": 2120 }, { "epoch": 0.41, "learning_rate": 3.7065685054565277e-06, "logits/chosen": -1.5412157773971558, "logits/rejected": -0.6528169512748718, "logps/chosen": -742.4505615234375, "logps/rejected": -1409.538330078125, "loss": 0.17, "rewards/accuracies": 0.875, "rewards/chosen": -0.20062299072742462, "rewards/margins": 0.4100804924964905, "rewards/rejected": -0.6107034087181091, "step": 2130 }, { "epoch": 0.41, "learning_rate": 3.691983806478494e-06, "logits/chosen": -1.364302635192871, "logits/rejected": -1.0505597591400146, "logps/chosen": -647.7124633789062, "logps/rejected": -1196.28369140625, "loss": 0.1955, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.21452391147613525, "rewards/margins": 0.3072245717048645, "rewards/rejected": -0.521748423576355, "step": 2140 }, { "epoch": 0.41, "learning_rate": 3.677346413050551e-06, "logits/chosen": -1.2281032800674438, "logits/rejected": -0.7881981730461121, "logps/chosen": -594.7728881835938, "logps/rejected": -1143.158935546875, "loss": 0.151, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.11433923244476318, "rewards/margins": 0.26201775670051575, "rewards/rejected": -0.37635698914527893, "step": 2150 }, { "epoch": 0.41, "learning_rate": 3.6626569722531268e-06, "logits/chosen": -1.1320793628692627, "logits/rejected": -0.7946068048477173, "logps/chosen": -530.4256591796875, "logps/rejected": -1185.376220703125, "loss": 0.149, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.08485416322946548, "rewards/margins": 0.34793829917907715, "rewards/rejected": -0.4327924847602844, "step": 2160 }, { "epoch": 0.41, "learning_rate": 3.6479161334675294e-06, "logits/chosen": -0.9808338284492493, "logits/rejected": -0.655880331993103, "logps/chosen": -577.3077392578125, "logps/rejected": -1390.304443359375, "loss": 0.0928, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.17440642416477203, "rewards/margins": 0.45003876090049744, "rewards/rejected": -0.6244451999664307, "step": 2170 }, { "epoch": 0.42, "learning_rate": 3.6331245483472353e-06, "logits/chosen": -1.3242298364639282, "logits/rejected": -0.8474270701408386, "logps/chosen": -674.1361083984375, "logps/rejected": -1173.4654541015625, "loss": 0.143, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.1747753918170929, "rewards/margins": 0.3353702127933502, "rewards/rejected": -0.5101455450057983, "step": 2180 }, { "epoch": 0.42, "learning_rate": 3.6182828707890816e-06, "logits/chosen": -1.2232577800750732, "logits/rejected": -0.8688710331916809, "logps/chosen": -573.0668334960938, "logps/rejected": -1264.8297119140625, "loss": 0.1742, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.17573964595794678, "rewards/margins": 0.3904005289077759, "rewards/rejected": -0.5661401152610779, "step": 2190 }, { "epoch": 0.42, "learning_rate": 3.6033917569043604e-06, "logits/chosen": -1.2476309537887573, "logits/rejected": -0.8130432367324829, "logps/chosen": -555.2723388671875, "logps/rejected": -1177.34130859375, "loss": 0.1227, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.16443537175655365, "rewards/margins": 0.36548200249671936, "rewards/rejected": -0.5299173593521118, "step": 2200 }, { "epoch": 0.42, "learning_rate": 3.588451864989811e-06, "logits/chosen": -1.141230583190918, "logits/rejected": -0.7061150670051575, "logps/chosen": -617.2860107421875, "logps/rejected": -1251.1861572265625, "loss": 0.1214, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.17058375477790833, "rewards/margins": 0.3323902487754822, "rewards/rejected": -0.5029739737510681, "step": 2210 }, { "epoch": 0.42, "learning_rate": 3.5734638554985234e-06, "logits/chosen": -1.3979885578155518, "logits/rejected": -0.8291142582893372, "logps/chosen": -760.4945068359375, "logps/rejected": -1155.178955078125, "loss": 0.2026, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.21520309150218964, "rewards/margins": 0.3011152148246765, "rewards/rejected": -0.5163183212280273, "step": 2220 }, { "epoch": 0.42, "learning_rate": 3.5584283910107343e-06, "logits/chosen": -1.0205285549163818, "logits/rejected": -0.49086451530456543, "logps/chosen": -662.8735961914062, "logps/rejected": -1314.891845703125, "loss": 0.126, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.21770426630973816, "rewards/margins": 0.36773937940597534, "rewards/rejected": -0.5854436755180359, "step": 2230 }, { "epoch": 0.43, "learning_rate": 3.543346136204545e-06, "logits/chosen": -1.265298843383789, "logits/rejected": -0.6332573890686035, "logps/chosen": -709.36083984375, "logps/rejected": -1145.278076171875, "loss": 0.1931, "rewards/accuracies": 0.75, "rewards/chosen": -0.2619430124759674, "rewards/margins": 0.2858063280582428, "rewards/rejected": -0.5477493405342102, "step": 2240 }, { "epoch": 0.43, "learning_rate": 3.5282177578265295e-06, "logits/chosen": -1.2400686740875244, "logits/rejected": -0.8400391340255737, "logps/chosen": -695.9065551757812, "logps/rejected": -1354.976806640625, "loss": 0.1051, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.27938929200172424, "rewards/margins": 0.36980706453323364, "rewards/rejected": -0.6491963267326355, "step": 2250 }, { "epoch": 0.43, "learning_rate": 3.5130439246622635e-06, "logits/chosen": -1.1504467725753784, "logits/rejected": -0.7273313999176025, "logps/chosen": -779.0042724609375, "logps/rejected": -1438.2664794921875, "loss": 0.0972, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.33439522981643677, "rewards/margins": 0.39211294054985046, "rewards/rejected": -0.7265081405639648, "step": 2260 }, { "epoch": 0.43, "learning_rate": 3.497825307506758e-06, "logits/chosen": -1.3013336658477783, "logits/rejected": -0.8293247222900391, "logps/chosen": -740.0505981445312, "logps/rejected": -1453.6439208984375, "loss": 0.1681, "rewards/accuracies": 0.875, "rewards/chosen": -0.32560890913009644, "rewards/margins": 0.4059384763240814, "rewards/rejected": -0.731547474861145, "step": 2270 }, { "epoch": 0.43, "learning_rate": 3.4825625791348093e-06, "logits/chosen": -1.3770142793655396, "logits/rejected": -0.6459300518035889, "logps/chosen": -744.8535766601562, "logps/rejected": -1214.858154296875, "loss": 0.2006, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.24249497056007385, "rewards/margins": 0.3268192410469055, "rewards/rejected": -0.5693141222000122, "step": 2280 }, { "epoch": 0.44, "learning_rate": 3.467256414271249e-06, "logits/chosen": -1.4594746828079224, "logits/rejected": -0.8764937520027161, "logps/chosen": -619.4993896484375, "logps/rejected": -1200.8060302734375, "loss": 0.1068, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.17955270409584045, "rewards/margins": 0.3195507526397705, "rewards/rejected": -0.49910345673561096, "step": 2290 }, { "epoch": 0.44, "learning_rate": 3.4519074895611245e-06, "logits/chosen": -1.4393250942230225, "logits/rejected": -0.9791213870048523, "logps/chosen": -605.4874267578125, "logps/rejected": -1121.820556640625, "loss": 0.1096, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.13679179549217224, "rewards/margins": 0.32319575548171997, "rewards/rejected": -0.4599875509738922, "step": 2300 }, { "epoch": 0.44, "learning_rate": 3.436516483539781e-06, "logits/chosen": -1.5887141227722168, "logits/rejected": -0.8892790079116821, "logps/chosen": -647.2487182617188, "logps/rejected": -1159.001708984375, "loss": 0.1413, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.183025524020195, "rewards/margins": 0.34414952993392944, "rewards/rejected": -0.5271750688552856, "step": 2310 }, { "epoch": 0.44, "learning_rate": 3.421084076602867e-06, "logits/chosen": -1.3948428630828857, "logits/rejected": -0.811150848865509, "logps/chosen": -680.2684936523438, "logps/rejected": -1143.1392822265625, "loss": 0.1359, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.20170864462852478, "rewards/margins": 0.3125322759151459, "rewards/rejected": -0.5142408609390259, "step": 2320 }, { "epoch": 0.44, "learning_rate": 3.405610950976257e-06, "logits/chosen": -1.193880319595337, "logits/rejected": -0.8295865058898926, "logps/chosen": -720.5093994140625, "logps/rejected": -1335.20166015625, "loss": 0.1153, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.2470368891954422, "rewards/margins": 0.3655020594596863, "rewards/rejected": -0.6125389337539673, "step": 2330 }, { "epoch": 0.45, "learning_rate": 3.3900977906858923e-06, "logits/chosen": -1.447500467300415, "logits/rejected": -0.9078332185745239, "logps/chosen": -663.2506103515625, "logps/rejected": -1140.707763671875, "loss": 0.185, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.16475597023963928, "rewards/margins": 0.32917505502700806, "rewards/rejected": -0.49393099546432495, "step": 2340 }, { "epoch": 0.45, "learning_rate": 3.3745452815275375e-06, "logits/chosen": -1.274245262145996, "logits/rejected": -0.9042494893074036, "logps/chosen": -531.5411376953125, "logps/rejected": -1157.0191650390625, "loss": 0.1451, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.13722440600395203, "rewards/margins": 0.3919863700866699, "rewards/rejected": -0.5292107462882996, "step": 2350 }, { "epoch": 0.45, "learning_rate": 3.3589541110364678e-06, "logits/chosen": -1.1918079853057861, "logits/rejected": -0.6122594475746155, "logps/chosen": -739.5403442382812, "logps/rejected": -1287.9014892578125, "loss": 0.1398, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.25828179717063904, "rewards/margins": 0.32268136739730835, "rewards/rejected": -0.580963134765625, "step": 2360 }, { "epoch": 0.45, "learning_rate": 3.3433249684570757e-06, "logits/chosen": -1.5309993028640747, "logits/rejected": -0.9379400014877319, "logps/chosen": -588.4664306640625, "logps/rejected": -1311.06982421875, "loss": 0.1264, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.16523903608322144, "rewards/margins": 0.42264652252197266, "rewards/rejected": -0.5878855586051941, "step": 2370 }, { "epoch": 0.45, "learning_rate": 3.3276585447123957e-06, "logits/chosen": -1.6091543436050415, "logits/rejected": -0.910578727722168, "logps/chosen": -689.0510864257812, "logps/rejected": -1201.344970703125, "loss": 0.1537, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.16112855076789856, "rewards/margins": 0.32666200399398804, "rewards/rejected": -0.4877905249595642, "step": 2380 }, { "epoch": 0.46, "learning_rate": 3.3119555323735664e-06, "logits/chosen": -1.1626198291778564, "logits/rejected": -1.0693439245224, "logps/chosen": -530.9092407226562, "logps/rejected": -1132.2298583984375, "loss": 0.1409, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.17957080900669098, "rewards/margins": 0.33662185072898865, "rewards/rejected": -0.5161926746368408, "step": 2390 }, { "epoch": 0.46, "learning_rate": 3.2962166256292116e-06, "logits/chosen": -1.078432321548462, "logits/rejected": -0.882637619972229, "logps/chosen": -632.7577514648438, "logps/rejected": -1368.06982421875, "loss": 0.0997, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.21540217101573944, "rewards/margins": 0.40605098009109497, "rewards/rejected": -0.6214531660079956, "step": 2400 }, { "epoch": 0.46, "learning_rate": 3.2804425202547494e-06, "logits/chosen": -1.1566262245178223, "logits/rejected": -0.873746395111084, "logps/chosen": -712.541259765625, "logps/rejected": -1309.746337890625, "loss": 0.1449, "rewards/accuracies": 0.875, "rewards/chosen": -0.2719976305961609, "rewards/margins": 0.3202235996723175, "rewards/rejected": -0.5922211408615112, "step": 2410 }, { "epoch": 0.46, "learning_rate": 3.2646339135816386e-06, "logits/chosen": -1.1526052951812744, "logits/rejected": -0.6483839154243469, "logps/chosen": -696.2269287109375, "logps/rejected": -1328.681640625, "loss": 0.1365, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.22893765568733215, "rewards/margins": 0.33363354206085205, "rewards/rejected": -0.5625711679458618, "step": 2420 }, { "epoch": 0.46, "learning_rate": 3.2487915044665485e-06, "logits/chosen": -1.4031927585601807, "logits/rejected": -0.6022292375564575, "logps/chosen": -734.1324462890625, "logps/rejected": -1427.7449951171875, "loss": 0.0716, "rewards/accuracies": 1.0, "rewards/chosen": -0.20992405712604523, "rewards/margins": 0.4092291295528412, "rewards/rejected": -0.6191532611846924, "step": 2430 }, { "epoch": 0.46, "learning_rate": 3.2329159932604638e-06, "logits/chosen": -1.4655476808547974, "logits/rejected": -0.7912246584892273, "logps/chosen": -677.5623779296875, "logps/rejected": -1336.5660400390625, "loss": 0.1429, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.17878834903240204, "rewards/margins": 0.41769617795944214, "rewards/rejected": -0.5964845418930054, "step": 2440 }, { "epoch": 0.47, "learning_rate": 3.217008081777726e-06, "logits/chosen": -1.455952525138855, "logits/rejected": -0.7825593948364258, "logps/chosen": -684.768798828125, "logps/rejected": -1285.584228515625, "loss": 0.1329, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.1612541526556015, "rewards/margins": 0.37014898657798767, "rewards/rejected": -0.5314030647277832, "step": 2450 }, { "epoch": 0.47, "learning_rate": 3.201068473265007e-06, "logits/chosen": -1.3329956531524658, "logits/rejected": -1.0923413038253784, "logps/chosen": -590.7109985351562, "logps/rejected": -1165.980712890625, "loss": 0.154, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.14957448840141296, "rewards/margins": 0.32259348034858704, "rewards/rejected": -0.47216796875, "step": 2460 }, { "epoch": 0.47, "learning_rate": 3.1850978723702213e-06, "logits/chosen": -1.35431706905365, "logits/rejected": -0.6948716044425964, "logps/chosen": -519.1008911132812, "logps/rejected": -1139.4970703125, "loss": 0.1465, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.12256044149398804, "rewards/margins": 0.3723739981651306, "rewards/rejected": -0.49493446946144104, "step": 2470 }, { "epoch": 0.47, "learning_rate": 3.1690969851113724e-06, "logits/chosen": -1.1916701793670654, "logits/rejected": -0.728734016418457, "logps/chosen": -670.5787353515625, "logps/rejected": -1236.937744140625, "loss": 0.1369, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.21277372539043427, "rewards/margins": 0.343991219997406, "rewards/rejected": -0.5567649602890015, "step": 2480 }, { "epoch": 0.47, "learning_rate": 3.1530665188453463e-06, "logits/chosen": -1.275588035583496, "logits/rejected": -0.7608711123466492, "logps/chosen": -699.1370849609375, "logps/rejected": -1446.107177734375, "loss": 0.0932, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.23058712482452393, "rewards/margins": 0.4649442732334137, "rewards/rejected": -0.69553142786026, "step": 2490 }, { "epoch": 0.48, "learning_rate": 3.137007182236637e-06, "logits/chosen": -1.1952766180038452, "logits/rejected": -0.6948586702346802, "logps/chosen": -702.9150390625, "logps/rejected": -1346.622314453125, "loss": 0.1778, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.23727735877037048, "rewards/margins": 0.3740360736846924, "rewards/rejected": -0.6113134622573853, "step": 2500 }, { "epoch": 0.48, "learning_rate": 3.1209196852260204e-06, "logits/chosen": -1.2836663722991943, "logits/rejected": -0.6611964106559753, "logps/chosen": -705.1036376953125, "logps/rejected": -1195.1522216796875, "loss": 0.1629, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.23696069419384003, "rewards/margins": 0.30969372391700745, "rewards/rejected": -0.5466543436050415, "step": 2510 }, { "epoch": 0.48, "learning_rate": 3.1048047389991693e-06, "logits/chosen": -1.259320855140686, "logits/rejected": -0.8590114712715149, "logps/chosen": -611.6530151367188, "logps/rejected": -1171.3182373046875, "loss": 0.1738, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.19809211790561676, "rewards/margins": 0.33735308051109314, "rewards/rejected": -0.5354452133178711, "step": 2520 }, { "epoch": 0.48, "learning_rate": 3.0886630559552144e-06, "logits/chosen": -1.6304620504379272, "logits/rejected": -0.9268521070480347, "logps/chosen": -715.045166015625, "logps/rejected": -1346.1966552734375, "loss": 0.1439, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.22071857750415802, "rewards/margins": 0.3608259856700897, "rewards/rejected": -0.5815445184707642, "step": 2530 }, { "epoch": 0.48, "learning_rate": 3.072495349675249e-06, "logits/chosen": -1.344519019126892, "logits/rejected": -0.7373297810554504, "logps/chosen": -654.3245849609375, "logps/rejected": -1251.800537109375, "loss": 0.1685, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.188394233584404, "rewards/margins": 0.36836719512939453, "rewards/rejected": -0.5567613840103149, "step": 2540 }, { "epoch": 0.49, "learning_rate": 3.056302334890786e-06, "logits/chosen": -1.188240647315979, "logits/rejected": -0.6836980581283569, "logps/chosen": -688.141357421875, "logps/rejected": -1250.227294921875, "loss": 0.1454, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.21923711895942688, "rewards/margins": 0.3769698739051819, "rewards/rejected": -0.5962069630622864, "step": 2550 }, { "epoch": 0.49, "learning_rate": 3.04008472745216e-06, "logits/chosen": -1.1567745208740234, "logits/rejected": -0.840670108795166, "logps/chosen": -683.027587890625, "logps/rejected": -1340.2181396484375, "loss": 0.1634, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.23779603838920593, "rewards/margins": 0.37171220779418945, "rewards/rejected": -0.609508216381073, "step": 2560 }, { "epoch": 0.49, "learning_rate": 3.0238432442968803e-06, "logits/chosen": -1.653428316116333, "logits/rejected": -1.0281118154525757, "logps/chosen": -652.8214111328125, "logps/rejected": -1302.401611328125, "loss": 0.1609, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2061002552509308, "rewards/margins": 0.3352206349372864, "rewards/rejected": -0.5413209199905396, "step": 2570 }, { "epoch": 0.49, "learning_rate": 3.0075786034179407e-06, "logits/chosen": -1.2312666177749634, "logits/rejected": -0.7779617309570312, "logps/chosen": -702.6845092773438, "logps/rejected": -1300.8258056640625, "loss": 0.1591, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.22368571162223816, "rewards/margins": 0.3962397575378418, "rewards/rejected": -0.6199254989624023, "step": 2580 }, { "epoch": 0.49, "learning_rate": 2.9912915238320755e-06, "logits/chosen": -1.1236364841461182, "logits/rejected": -0.517497718334198, "logps/chosen": -717.16845703125, "logps/rejected": -1139.229248046875, "loss": 0.1856, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.21108496189117432, "rewards/margins": 0.27783527970314026, "rewards/rejected": -0.48892027139663696, "step": 2590 }, { "epoch": 0.5, "learning_rate": 2.974982725547976e-06, "logits/chosen": -1.2851816415786743, "logits/rejected": -0.614777684211731, "logps/chosen": -633.1151733398438, "logps/rejected": -1295.719970703125, "loss": 0.1266, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.18555249273777008, "rewards/margins": 0.3568888306617737, "rewards/rejected": -0.5424412488937378, "step": 2600 }, { "epoch": 0.5, "learning_rate": 2.958652929534456e-06, "logits/chosen": -1.2323633432388306, "logits/rejected": -0.7411845326423645, "logps/chosen": -712.4410400390625, "logps/rejected": -1184.3865966796875, "loss": 0.1972, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.20911547541618347, "rewards/margins": 0.27840426564216614, "rewards/rejected": -0.4875197410583496, "step": 2610 }, { "epoch": 0.5, "learning_rate": 2.9423028576885894e-06, "logits/chosen": -1.1827176809310913, "logits/rejected": -0.7810096144676208, "logps/chosen": -552.6229248046875, "logps/rejected": -1073.3472900390625, "loss": 0.1524, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.17607593536376953, "rewards/margins": 0.3017176687717438, "rewards/rejected": -0.4777936041355133, "step": 2620 }, { "epoch": 0.5, "learning_rate": 2.9259332328037852e-06, "logits/chosen": -1.1173738241195679, "logits/rejected": -0.5114302635192871, "logps/chosen": -585.2100219726562, "logps/rejected": -1109.5047607421875, "loss": 0.0914, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.16614975035190582, "rewards/margins": 0.37607747316360474, "rewards/rejected": -0.5422271490097046, "step": 2630 }, { "epoch": 0.5, "learning_rate": 2.9095447785378446e-06, "logits/chosen": -1.1125425100326538, "logits/rejected": -0.6108592748641968, "logps/chosen": -532.2913818359375, "logps/rejected": -1165.22900390625, "loss": 0.1347, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.16372396051883698, "rewards/margins": 0.35369619727134705, "rewards/rejected": -0.5174201726913452, "step": 2640 }, { "epoch": 0.5, "learning_rate": 2.893138219380964e-06, "logits/chosen": -1.283231496810913, "logits/rejected": -0.9531766176223755, "logps/chosen": -630.4801025390625, "logps/rejected": -1332.77392578125, "loss": 0.1472, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.18999141454696655, "rewards/margins": 0.3947600722312927, "rewards/rejected": -0.5847514867782593, "step": 2650 }, { "epoch": 0.51, "learning_rate": 2.876714280623708e-06, "logits/chosen": -1.3367483615875244, "logits/rejected": -0.80877286195755, "logps/chosen": -520.8480224609375, "logps/rejected": -1174.756103515625, "loss": 0.1362, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.1368516981601715, "rewards/margins": 0.370403528213501, "rewards/rejected": -0.5072552561759949, "step": 2660 }, { "epoch": 0.51, "learning_rate": 2.8602736883249504e-06, "logits/chosen": -1.1700047254562378, "logits/rejected": -0.8310747146606445, "logps/chosen": -620.7849731445312, "logps/rejected": -1286.119384765625, "loss": 0.1269, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.15554332733154297, "rewards/margins": 0.4156574606895447, "rewards/rejected": -0.5712007880210876, "step": 2670 }, { "epoch": 0.51, "learning_rate": 2.843817169279772e-06, "logits/chosen": -1.3184053897857666, "logits/rejected": -0.5530634522438049, "logps/chosen": -692.7755126953125, "logps/rejected": -1302.6802978515625, "loss": 0.0852, "rewards/accuracies": 0.875, "rewards/chosen": -0.21319115161895752, "rewards/margins": 0.40780577063560486, "rewards/rejected": -0.6209970116615295, "step": 2680 }, { "epoch": 0.51, "learning_rate": 2.8273454509873333e-06, "logits/chosen": -1.2192256450653076, "logits/rejected": -0.6062721014022827, "logps/chosen": -628.1260375976562, "logps/rejected": -1252.9503173828125, "loss": 0.0933, "rewards/accuracies": 0.875, "rewards/chosen": -0.2101316750049591, "rewards/margins": 0.4121881127357483, "rewards/rejected": -0.6223198175430298, "step": 2690 }, { "epoch": 0.51, "learning_rate": 2.8108592616187135e-06, "logits/chosen": -1.4138648509979248, "logits/rejected": -0.6868306398391724, "logps/chosen": -743.1535034179688, "logps/rejected": -1364.7056884765625, "loss": 0.0964, "rewards/accuracies": 0.875, "rewards/chosen": -0.19858908653259277, "rewards/margins": 0.41844049096107483, "rewards/rejected": -0.61702960729599, "step": 2700 }, { "epoch": 0.52, "learning_rate": 2.7943593299847186e-06, "logits/chosen": -1.1856378316879272, "logits/rejected": -0.5521339178085327, "logps/chosen": -616.3605346679688, "logps/rejected": -1355.580078125, "loss": 0.1093, "rewards/accuracies": 0.875, "rewards/chosen": -0.19345074892044067, "rewards/margins": 0.44275960326194763, "rewards/rejected": -0.6362103819847107, "step": 2710 }, { "epoch": 0.52, "learning_rate": 2.7778463855036656e-06, "logits/chosen": -1.2834575176239014, "logits/rejected": -0.8571203351020813, "logps/chosen": -645.5665283203125, "logps/rejected": -1298.7808837890625, "loss": 0.1525, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.18394744396209717, "rewards/margins": 0.3437793254852295, "rewards/rejected": -0.5277267694473267, "step": 2720 }, { "epoch": 0.52, "learning_rate": 2.761321158169134e-06, "logits/chosen": -1.6189771890640259, "logits/rejected": -0.6602326035499573, "logps/chosen": -528.0179443359375, "logps/rejected": -1086.173095703125, "loss": 0.0952, "rewards/accuracies": 0.875, "rewards/chosen": -0.11647792905569077, "rewards/margins": 0.35972946882247925, "rewards/rejected": -0.476207435131073, "step": 2730 }, { "epoch": 0.52, "learning_rate": 2.7447843785176958e-06, "logits/chosen": -1.3356536626815796, "logits/rejected": -0.7880634069442749, "logps/chosen": -674.3583984375, "logps/rejected": -1053.1739501953125, "loss": 0.234, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.1534251719713211, "rewards/margins": 0.25006696581840515, "rewards/rejected": -0.40349215269088745, "step": 2740 }, { "epoch": 0.52, "learning_rate": 2.728236777596621e-06, "logits/chosen": -1.4571244716644287, "logits/rejected": -0.792648434638977, "logps/chosen": -595.8406982421875, "logps/rejected": -1163.229248046875, "loss": 0.2051, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.1462465077638626, "rewards/margins": 0.31508877873420715, "rewards/rejected": -0.4613352417945862, "step": 2750 }, { "epoch": 0.53, "learning_rate": 2.7116790869315583e-06, "logits/chosen": -1.269464135169983, "logits/rejected": -0.8573349118232727, "logps/chosen": -643.273193359375, "logps/rejected": -1155.2039794921875, "loss": 0.174, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1968885362148285, "rewards/margins": 0.27871397137641907, "rewards/rejected": -0.47560253739356995, "step": 2760 }, { "epoch": 0.53, "learning_rate": 2.695112038494198e-06, "logits/chosen": -1.2030057907104492, "logits/rejected": -0.6855699419975281, "logps/chosen": -627.1573486328125, "logps/rejected": -1178.1893310546875, "loss": 0.1559, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.1748468428850174, "rewards/margins": 0.3222338557243347, "rewards/rejected": -0.4970807433128357, "step": 2770 }, { "epoch": 0.53, "learning_rate": 2.6785363646699125e-06, "logits/chosen": -1.6578325033187866, "logits/rejected": -0.9508485794067383, "logps/chosen": -589.2328491210938, "logps/rejected": -1053.3826904296875, "loss": 0.1632, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.1356348693370819, "rewards/margins": 0.3082703948020935, "rewards/rejected": -0.4439052641391754, "step": 2780 }, { "epoch": 0.53, "learning_rate": 2.6619527982253796e-06, "logits/chosen": -1.5094674825668335, "logits/rejected": -1.0003337860107422, "logps/chosen": -614.179443359375, "logps/rejected": -1248.248291015625, "loss": 0.105, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.17690172791481018, "rewards/margins": 0.36094677448272705, "rewards/rejected": -0.5378484725952148, "step": 2790 }, { "epoch": 0.53, "learning_rate": 2.6453620722761897e-06, "logits/chosen": -1.3403935432434082, "logits/rejected": -1.0036475658416748, "logps/chosen": -642.2327880859375, "logps/rejected": -1178.154541015625, "loss": 0.1615, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.22599093616008759, "rewards/margins": 0.3031874895095825, "rewards/rejected": -0.5291783809661865, "step": 2800 }, { "epoch": 0.54, "learning_rate": 2.628764920254435e-06, "logits/chosen": -1.0864436626434326, "logits/rejected": -0.7584824562072754, "logps/chosen": -656.6350708007812, "logps/rejected": -1280.084228515625, "loss": 0.1024, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.22639353573322296, "rewards/margins": 0.37987181544303894, "rewards/rejected": -0.6062653660774231, "step": 2810 }, { "epoch": 0.54, "learning_rate": 2.6121620758762877e-06, "logits/chosen": -1.1653586626052856, "logits/rejected": -0.7054857015609741, "logps/chosen": -730.0079345703125, "logps/rejected": -1356.309814453125, "loss": 0.1553, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.2346700131893158, "rewards/margins": 0.3929257392883301, "rewards/rejected": -0.6275956630706787, "step": 2820 }, { "epoch": 0.54, "learning_rate": 2.595554273109564e-06, "logits/chosen": -1.4682152271270752, "logits/rejected": -0.8444644808769226, "logps/chosen": -644.2890625, "logps/rejected": -1284.6246337890625, "loss": 0.141, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.20568695664405823, "rewards/margins": 0.3958762586116791, "rewards/rejected": -0.6015631556510925, "step": 2830 }, { "epoch": 0.54, "learning_rate": 2.5789422461412776e-06, "logits/chosen": -1.3313639163970947, "logits/rejected": -0.789891242980957, "logps/chosen": -704.336181640625, "logps/rejected": -1463.5936279296875, "loss": 0.1195, "rewards/accuracies": 0.875, "rewards/chosen": -0.21005280315876007, "rewards/margins": 0.4353434145450592, "rewards/rejected": -0.6453962326049805, "step": 2840 }, { "epoch": 0.54, "learning_rate": 2.5623267293451827e-06, "logits/chosen": -1.512527585029602, "logits/rejected": -0.838526725769043, "logps/chosen": -637.8837890625, "logps/rejected": -1201.1187744140625, "loss": 0.1112, "rewards/accuracies": 0.875, "rewards/chosen": -0.1588594615459442, "rewards/margins": 0.36842361092567444, "rewards/rejected": -0.5272830724716187, "step": 2850 }, { "epoch": 0.54, "learning_rate": 2.5457084572493094e-06, "logits/chosen": -1.669001579284668, "logits/rejected": -0.9330201148986816, "logps/chosen": -634.0943603515625, "logps/rejected": -1069.78515625, "loss": 0.1549, "rewards/accuracies": 0.75, "rewards/chosen": -0.12474080175161362, "rewards/margins": 0.30902618169784546, "rewards/rejected": -0.43376702070236206, "step": 2860 }, { "epoch": 0.55, "learning_rate": 2.5290881645034932e-06, "logits/chosen": -1.5603636503219604, "logits/rejected": -0.895548939704895, "logps/chosen": -623.4061889648438, "logps/rejected": -1268.900146484375, "loss": 0.1108, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.15059690177440643, "rewards/margins": 0.4006767272949219, "rewards/rejected": -0.5512735843658447, "step": 2870 }, { "epoch": 0.55, "learning_rate": 2.5124665858468956e-06, "logits/chosen": -1.4589670896530151, "logits/rejected": -0.910359263420105, "logps/chosen": -632.2818603515625, "logps/rejected": -1159.327880859375, "loss": 0.1326, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.16128315031528473, "rewards/margins": 0.32945162057876587, "rewards/rejected": -0.4907347559928894, "step": 2880 }, { "epoch": 0.55, "learning_rate": 2.4958444560755268e-06, "logits/chosen": -1.3983685970306396, "logits/rejected": -0.7965866327285767, "logps/chosen": -668.0626220703125, "logps/rejected": -1132.1715087890625, "loss": 0.1554, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1758166253566742, "rewards/margins": 0.31473228335380554, "rewards/rejected": -0.4905489981174469, "step": 2890 }, { "epoch": 0.55, "learning_rate": 2.479222510009758e-06, "logits/chosen": -1.3288921117782593, "logits/rejected": -0.7427980899810791, "logps/chosen": -780.6190185546875, "logps/rejected": -1382.6385498046875, "loss": 0.098, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.17616650462150574, "rewards/margins": 0.3826137185096741, "rewards/rejected": -0.5587801933288574, "step": 2900 }, { "epoch": 0.55, "learning_rate": 2.4626014824618418e-06, "logits/chosen": -1.353317379951477, "logits/rejected": -1.0125176906585693, "logps/chosen": -626.8876342773438, "logps/rejected": -1151.4967041015625, "loss": 0.1394, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.17466707527637482, "rewards/margins": 0.31895187497138977, "rewards/rejected": -0.4936189651489258, "step": 2910 }, { "epoch": 0.56, "learning_rate": 2.445982108203422e-06, "logits/chosen": -1.6425803899765015, "logits/rejected": -0.908068060874939, "logps/chosen": -573.92822265625, "logps/rejected": -1174.154296875, "loss": 0.0791, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.10182499885559082, "rewards/margins": 0.406977117061615, "rewards/rejected": -0.5088021755218506, "step": 2920 }, { "epoch": 0.56, "learning_rate": 2.4293651219330614e-06, "logits/chosen": -1.1639844179153442, "logits/rejected": -0.6353759169578552, "logps/chosen": -423.5708923339844, "logps/rejected": -993.1744384765625, "loss": 0.1615, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.06568099558353424, "rewards/margins": 0.3412225842475891, "rewards/rejected": -0.40690359473228455, "step": 2930 }, { "epoch": 0.56, "learning_rate": 2.4127512582437486e-06, "logits/chosen": -1.4072645902633667, "logits/rejected": -0.8985492587089539, "logps/chosen": -503.56488037109375, "logps/rejected": -1167.6790771484375, "loss": 0.1181, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.10424760729074478, "rewards/margins": 0.3817724287509918, "rewards/rejected": -0.486020028591156, "step": 2940 }, { "epoch": 0.56, "learning_rate": 2.3961412515904337e-06, "logits/chosen": -1.3280065059661865, "logits/rejected": -0.6499324440956116, "logps/chosen": -666.6687622070312, "logps/rejected": -1376.3489990234375, "loss": 0.103, "rewards/accuracies": 0.875, "rewards/chosen": -0.17620491981506348, "rewards/margins": 0.4082191586494446, "rewards/rejected": -0.5844241380691528, "step": 2950 }, { "epoch": 0.56, "learning_rate": 2.3795358362575618e-06, "logits/chosen": -1.4302467107772827, "logits/rejected": -0.8625220060348511, "logps/chosen": -704.021240234375, "logps/rejected": -1118.782470703125, "loss": 0.204, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.22685328125953674, "rewards/margins": 0.2564946711063385, "rewards/rejected": -0.48334795236587524, "step": 2960 }, { "epoch": 0.57, "learning_rate": 2.3629357463266e-06, "logits/chosen": -1.237702488899231, "logits/rejected": -0.5888513326644897, "logps/chosen": -679.6105346679688, "logps/rejected": -1400.75927734375, "loss": 0.0929, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.18586349487304688, "rewards/margins": 0.40543466806411743, "rewards/rejected": -0.5912982225418091, "step": 2970 }, { "epoch": 0.57, "learning_rate": 2.346341715643601e-06, "logits/chosen": -1.2570858001708984, "logits/rejected": -0.6778491735458374, "logps/chosen": -624.9553833007812, "logps/rejected": -1298.9554443359375, "loss": 0.1236, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.15773199498653412, "rewards/margins": 0.38969069719314575, "rewards/rejected": -0.5474227666854858, "step": 2980 }, { "epoch": 0.57, "learning_rate": 2.32975447778675e-06, "logits/chosen": -1.260581612586975, "logits/rejected": -0.9373453259468079, "logps/chosen": -497.40252685546875, "logps/rejected": -988.1329956054688, "loss": 0.1608, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.12548328936100006, "rewards/margins": 0.2681237757205963, "rewards/rejected": -0.39360707998275757, "step": 2990 }, { "epoch": 0.57, "learning_rate": 2.3131747660339396e-06, "logits/chosen": -1.4381310939788818, "logits/rejected": -0.6804248094558716, "logps/chosen": -675.74609375, "logps/rejected": -1059.1004638671875, "loss": 0.1735, "rewards/accuracies": 0.75, "rewards/chosen": -0.1629467010498047, "rewards/margins": 0.2915138304233551, "rewards/rejected": -0.4544605612754822, "step": 3000 }, { "epoch": 0.57, "learning_rate": 2.296603313330355e-06, "logits/chosen": -1.2403786182403564, "logits/rejected": -0.9865403175354004, "logps/chosen": -570.98583984375, "logps/rejected": -1332.025634765625, "loss": 0.1357, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1444140374660492, "rewards/margins": 0.39944201707839966, "rewards/rejected": -0.5438560247421265, "step": 3010 }, { "epoch": 0.58, "learning_rate": 2.280040852256068e-06, "logits/chosen": -1.6480066776275635, "logits/rejected": -0.8598796725273132, "logps/chosen": -640.6783447265625, "logps/rejected": -1256.3858642578125, "loss": 0.0919, "rewards/accuracies": 0.875, "rewards/chosen": -0.15866537392139435, "rewards/margins": 0.3985527753829956, "rewards/rejected": -0.5572181940078735, "step": 3020 }, { "epoch": 0.58, "learning_rate": 2.2634881149936576e-06, "logits/chosen": -1.1153600215911865, "logits/rejected": -0.7183237075805664, "logps/chosen": -576.868408203125, "logps/rejected": -1217.5550537109375, "loss": 0.0844, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.18046694993972778, "rewards/margins": 0.36683768033981323, "rewards/rejected": -0.547304630279541, "step": 3030 }, { "epoch": 0.58, "learning_rate": 2.246945833295836e-06, "logits/chosen": -1.3217637538909912, "logits/rejected": -0.9497002363204956, "logps/chosen": -701.6013793945312, "logps/rejected": -1358.8414306640625, "loss": 0.1674, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.21788398921489716, "rewards/margins": 0.39392000436782837, "rewards/rejected": -0.6118040084838867, "step": 3040 }, { "epoch": 0.58, "learning_rate": 2.230414738453104e-06, "logits/chosen": -1.0161378383636475, "logits/rejected": -0.6661367416381836, "logps/chosen": -716.9393310546875, "logps/rejected": -1298.892578125, "loss": 0.1575, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.25477442145347595, "rewards/margins": 0.355234295129776, "rewards/rejected": -0.6100087761878967, "step": 3050 }, { "epoch": 0.58, "learning_rate": 2.2138955612614206e-06, "logits/chosen": -1.1696593761444092, "logits/rejected": -0.8939590454101562, "logps/chosen": -591.8184814453125, "logps/rejected": -1157.5386962890625, "loss": 0.1715, "rewards/accuracies": 0.75, "rewards/chosen": -0.1666014939546585, "rewards/margins": 0.2978556454181671, "rewards/rejected": -0.46445712447166443, "step": 3060 }, { "epoch": 0.58, "learning_rate": 2.1973890319898965e-06, "logits/chosen": -1.2547149658203125, "logits/rejected": -0.7859278917312622, "logps/chosen": -619.2592163085938, "logps/rejected": -1243.4642333984375, "loss": 0.1725, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1560363471508026, "rewards/margins": 0.32919973134994507, "rewards/rejected": -0.4852360785007477, "step": 3070 }, { "epoch": 0.59, "learning_rate": 2.1808958803485134e-06, "logits/chosen": -1.227962851524353, "logits/rejected": -0.5197386741638184, "logps/chosen": -715.6917724609375, "logps/rejected": -1218.333251953125, "loss": 0.139, "rewards/accuracies": 0.875, "rewards/chosen": -0.2008967399597168, "rewards/margins": 0.32234469056129456, "rewards/rejected": -0.5232414603233337, "step": 3080 }, { "epoch": 0.59, "learning_rate": 2.1644168354558623e-06, "logits/chosen": -1.3220916986465454, "logits/rejected": -0.7695302367210388, "logps/chosen": -641.0, "logps/rejected": -1174.6611328125, "loss": 0.1446, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.13602153956890106, "rewards/margins": 0.3456249237060547, "rewards/rejected": -0.48164647817611694, "step": 3090 }, { "epoch": 0.59, "learning_rate": 2.1479526258069086e-06, "logits/chosen": -1.2712328433990479, "logits/rejected": -0.6862407326698303, "logps/chosen": -618.6680908203125, "logps/rejected": -1255.6075439453125, "loss": 0.1023, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.14606595039367676, "rewards/margins": 0.36818912625312805, "rewards/rejected": -0.514255166053772, "step": 3100 }, { "epoch": 0.59, "learning_rate": 2.1315039792407975e-06, "logits/chosen": -1.6223697662353516, "logits/rejected": -0.8164051175117493, "logps/chosen": -575.8013916015625, "logps/rejected": -1136.9779052734375, "loss": 0.1584, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.15093176066875458, "rewards/margins": 0.3385089933872223, "rewards/rejected": -0.4894407391548157, "step": 3110 }, { "epoch": 0.59, "learning_rate": 2.115071622908666e-06, "logits/chosen": -1.3620809316635132, "logits/rejected": -0.9362419843673706, "logps/chosen": -574.0597534179688, "logps/rejected": -1164.9071044921875, "loss": 0.1186, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.11159572750329971, "rewards/margins": 0.3834700286388397, "rewards/rejected": -0.4950657784938812, "step": 3120 }, { "epoch": 0.6, "learning_rate": 2.0986562832415063e-06, "logits/chosen": -1.4660781621932983, "logits/rejected": -0.9878972172737122, "logps/chosen": -561.0264892578125, "logps/rejected": -1118.4351806640625, "loss": 0.1422, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.13186056911945343, "rewards/margins": 0.3271760940551758, "rewards/rejected": -0.4590366780757904, "step": 3130 }, { "epoch": 0.6, "learning_rate": 2.082258685918047e-06, "logits/chosen": -1.3061813116073608, "logits/rejected": -0.8454760313034058, "logps/chosen": -561.0923461914062, "logps/rejected": -1075.7889404296875, "loss": 0.1546, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.13316716253757477, "rewards/margins": 0.34905123710632324, "rewards/rejected": -0.4822184145450592, "step": 3140 }, { "epoch": 0.6, "learning_rate": 2.0658795558326745e-06, "logits/chosen": -1.3911728858947754, "logits/rejected": -0.9803736805915833, "logps/chosen": -638.9776000976562, "logps/rejected": -1206.3843994140625, "loss": 0.1361, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.16358928382396698, "rewards/margins": 0.341183602809906, "rewards/rejected": -0.5047729015350342, "step": 3150 }, { "epoch": 0.6, "learning_rate": 2.049519617063389e-06, "logits/chosen": -1.3889875411987305, "logits/rejected": -0.9294905662536621, "logps/chosen": -542.9625244140625, "logps/rejected": -1051.85546875, "loss": 0.1683, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.14223119616508484, "rewards/margins": 0.2886609733104706, "rewards/rejected": -0.4308921694755554, "step": 3160 }, { "epoch": 0.6, "learning_rate": 2.033179592839792e-06, "logits/chosen": -1.2984880208969116, "logits/rejected": -0.534463107585907, "logps/chosen": -754.719482421875, "logps/rejected": -1351.0599365234375, "loss": 0.1365, "rewards/accuracies": 0.875, "rewards/chosen": -0.1791641116142273, "rewards/margins": 0.3925498127937317, "rewards/rejected": -0.571713924407959, "step": 3170 }, { "epoch": 0.61, "learning_rate": 2.0168602055111175e-06, "logits/chosen": -1.3223940134048462, "logits/rejected": -0.7277297973632812, "logps/chosen": -673.4014892578125, "logps/rejected": -1272.4732666015625, "loss": 0.1754, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.17117366194725037, "rewards/margins": 0.3154929280281067, "rewards/rejected": -0.48666661977767944, "step": 3180 }, { "epoch": 0.61, "learning_rate": 2.0005621765142942e-06, "logits/chosen": -1.6512504816055298, "logits/rejected": -0.6952489614486694, "logps/chosen": -669.9365844726562, "logps/rejected": -1287.814208984375, "loss": 0.1376, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.18224267661571503, "rewards/margins": 0.37694430351257324, "rewards/rejected": -0.5591869354248047, "step": 3190 }, { "epoch": 0.61, "learning_rate": 1.9842862263420565e-06, "logits/chosen": -1.1526167392730713, "logits/rejected": -0.9309229850769043, "logps/chosen": -741.6167602539062, "logps/rejected": -1469.031005859375, "loss": 0.1556, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.27139097452163696, "rewards/margins": 0.38400715589523315, "rewards/rejected": -0.6553980708122253, "step": 3200 }, { "epoch": 0.61, "learning_rate": 1.9680330745110954e-06, "logits/chosen": -1.3391094207763672, "logits/rejected": -0.8636461496353149, "logps/chosen": -753.0921020507812, "logps/rejected": -1270.45849609375, "loss": 0.1433, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.26907286047935486, "rewards/margins": 0.32631435990333557, "rewards/rejected": -0.5953872799873352, "step": 3210 }, { "epoch": 0.61, "learning_rate": 1.9518034395302413e-06, "logits/chosen": -1.4481360912322998, "logits/rejected": -0.8140970468521118, "logps/chosen": -743.0811767578125, "logps/rejected": -1314.015869140625, "loss": 0.1653, "rewards/accuracies": 0.875, "rewards/chosen": -0.2544246315956116, "rewards/margins": 0.3395732045173645, "rewards/rejected": -0.5939978361129761, "step": 3220 }, { "epoch": 0.62, "learning_rate": 1.9355980388687145e-06, "logits/chosen": -1.3119375705718994, "logits/rejected": -0.8712166547775269, "logps/chosen": -599.9112548828125, "logps/rejected": -1200.256591796875, "loss": 0.14, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.1929992139339447, "rewards/margins": 0.3526448607444763, "rewards/rejected": -0.5456440448760986, "step": 3230 }, { "epoch": 0.62, "learning_rate": 1.9194175889243942e-06, "logits/chosen": -1.2391819953918457, "logits/rejected": -0.5406273603439331, "logps/chosen": -628.7824096679688, "logps/rejected": -1064.890869140625, "loss": 0.1497, "rewards/accuracies": 0.75, "rewards/chosen": -0.16888299584388733, "rewards/margins": 0.31660085916519165, "rewards/rejected": -0.48548388481140137, "step": 3240 }, { "epoch": 0.62, "learning_rate": 1.903262804992156e-06, "logits/chosen": -1.4092689752578735, "logits/rejected": -0.9603003263473511, "logps/chosen": -583.987060546875, "logps/rejected": -1143.6436767578125, "loss": 0.1324, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.1697378009557724, "rewards/margins": 0.3553219735622406, "rewards/rejected": -0.5250598192214966, "step": 3250 }, { "epoch": 0.62, "learning_rate": 1.8871344012322504e-06, "logits/chosen": -1.1899571418762207, "logits/rejected": -0.6299318075180054, "logps/chosen": -607.3427734375, "logps/rejected": -1147.0712890625, "loss": 0.1484, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.1492100954055786, "rewards/margins": 0.37963762879371643, "rewards/rejected": -0.5288476943969727, "step": 3260 }, { "epoch": 0.62, "learning_rate": 1.8710330906387288e-06, "logits/chosen": -1.3586984872817993, "logits/rejected": -0.7469106912612915, "logps/chosen": -629.2229614257812, "logps/rejected": -1260.6671142578125, "loss": 0.1402, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.1873566210269928, "rewards/margins": 0.3727463483810425, "rewards/rejected": -0.5601029396057129, "step": 3270 }, { "epoch": 0.62, "learning_rate": 1.8549595850079272e-06, "logits/chosen": -1.218826413154602, "logits/rejected": -0.5827449560165405, "logps/chosen": -713.9976806640625, "logps/rejected": -1333.36865234375, "loss": 0.1516, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.24330130219459534, "rewards/margins": 0.37012121081352234, "rewards/rejected": -0.6134225130081177, "step": 3280 }, { "epoch": 0.63, "learning_rate": 1.8389145949069953e-06, "logits/chosen": -1.254154920578003, "logits/rejected": -0.7086342573165894, "logps/chosen": -774.9803466796875, "logps/rejected": -1327.598388671875, "loss": 0.1609, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.2706790566444397, "rewards/margins": 0.3544086515903473, "rewards/rejected": -0.6250876188278198, "step": 3290 }, { "epoch": 0.63, "learning_rate": 1.8228988296424877e-06, "logits/chosen": -1.238526701927185, "logits/rejected": -0.5777446031570435, "logps/chosen": -734.1629638671875, "logps/rejected": -1278.482177734375, "loss": 0.1728, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.24784033000469208, "rewards/margins": 0.3742365539073944, "rewards/rejected": -0.6220768690109253, "step": 3300 }, { "epoch": 0.63, "learning_rate": 1.806912997229008e-06, "logits/chosen": -1.337026834487915, "logits/rejected": -0.73210608959198, "logps/chosen": -616.189453125, "logps/rejected": -1284.3951416015625, "loss": 0.0995, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.1656108945608139, "rewards/margins": 0.3951425850391388, "rewards/rejected": -0.5607534646987915, "step": 3310 }, { "epoch": 0.63, "learning_rate": 1.7909578043579037e-06, "logits/chosen": -1.5598376989364624, "logits/rejected": -0.9694225192070007, "logps/chosen": -605.2431640625, "logps/rejected": -1196.6907958984375, "loss": 0.1812, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.1923334002494812, "rewards/margins": 0.32692453265190125, "rewards/rejected": -0.5192579030990601, "step": 3320 }, { "epoch": 0.63, "learning_rate": 1.7750339563660346e-06, "logits/chosen": -1.3083417415618896, "logits/rejected": -0.7350462675094604, "logps/chosen": -564.703125, "logps/rejected": -1031.722412109375, "loss": 0.1698, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.15099498629570007, "rewards/margins": 0.28776273131370544, "rewards/rejected": -0.4387577176094055, "step": 3330 }, { "epoch": 0.64, "learning_rate": 1.759142157204583e-06, "logits/chosen": -1.5791130065917969, "logits/rejected": -0.6345584988594055, "logps/chosen": -673.38525390625, "logps/rejected": -1164.561279296875, "loss": 0.1682, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.15048852562904358, "rewards/margins": 0.35989099740982056, "rewards/rejected": -0.5103794932365417, "step": 3340 }, { "epoch": 0.64, "learning_rate": 1.7432831094079357e-06, "logits/chosen": -1.0886738300323486, "logits/rejected": -0.8590899705886841, "logps/chosen": -572.8646850585938, "logps/rejected": -1247.0721435546875, "loss": 0.1216, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.15038278698921204, "rewards/margins": 0.36671438813209534, "rewards/rejected": -0.5170971155166626, "step": 3350 }, { "epoch": 0.64, "learning_rate": 1.7274575140626318e-06, "logits/chosen": -1.232344388961792, "logits/rejected": -0.5832250118255615, "logps/chosen": -607.7042846679688, "logps/rejected": -1236.8526611328125, "loss": 0.141, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.14053115248680115, "rewards/margins": 0.3645753562450409, "rewards/rejected": -0.505106508731842, "step": 3360 }, { "epoch": 0.64, "learning_rate": 1.7116660707763637e-06, "logits/chosen": -1.2681310176849365, "logits/rejected": -0.8806220889091492, "logps/chosen": -530.366943359375, "logps/rejected": -1044.75439453125, "loss": 0.1406, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.10312321037054062, "rewards/margins": 0.31398484110832214, "rewards/rejected": -0.41710805892944336, "step": 3370 }, { "epoch": 0.64, "learning_rate": 1.695909477647054e-06, "logits/chosen": -1.354081630706787, "logits/rejected": -1.1495956182479858, "logps/chosen": -500.3272399902344, "logps/rejected": -1167.035888671875, "loss": 0.0934, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.11254183202981949, "rewards/margins": 0.33835655450820923, "rewards/rejected": -0.45089834928512573, "step": 3380 }, { "epoch": 0.65, "learning_rate": 1.6801884312319893e-06, "logits/chosen": -1.4185993671417236, "logits/rejected": -0.8398419618606567, "logps/chosen": -562.6046752929688, "logps/rejected": -1222.684326171875, "loss": 0.1424, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.10464582592248917, "rewards/margins": 0.35574817657470703, "rewards/rejected": -0.4603939950466156, "step": 3390 }, { "epoch": 0.65, "learning_rate": 1.6645036265170314e-06, "logits/chosen": -1.145711898803711, "logits/rejected": -0.5923820734024048, "logps/chosen": -658.3640747070312, "logps/rejected": -1288.2315673828125, "loss": 0.1219, "rewards/accuracies": 0.875, "rewards/chosen": -0.16214919090270996, "rewards/margins": 0.3784920871257782, "rewards/rejected": -0.5406412482261658, "step": 3400 }, { "epoch": 0.65, "learning_rate": 1.648855756885893e-06, "logits/chosen": -1.6798959970474243, "logits/rejected": -0.9430153965950012, "logps/chosen": -660.0595703125, "logps/rejected": -1357.20751953125, "loss": 0.1105, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.13158023357391357, "rewards/margins": 0.4688226580619812, "rewards/rejected": -0.6004029512405396, "step": 3410 }, { "epoch": 0.65, "learning_rate": 1.633245514089482e-06, "logits/chosen": -1.1770648956298828, "logits/rejected": -0.8179131746292114, "logps/chosen": -540.3776245117188, "logps/rejected": -1088.2252197265625, "loss": 0.1614, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.1655862033367157, "rewards/margins": 0.31001153588294983, "rewards/rejected": -0.47559770941734314, "step": 3420 }, { "epoch": 0.65, "learning_rate": 1.6176735882153284e-06, "logits/chosen": -1.2217695713043213, "logits/rejected": -0.8322153091430664, "logps/chosen": -591.529541015625, "logps/rejected": -1096.73681640625, "loss": 0.188, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.15774957835674286, "rewards/margins": 0.31352144479751587, "rewards/rejected": -0.47127097845077515, "step": 3430 }, { "epoch": 0.66, "learning_rate": 1.6021406676570667e-06, "logits/chosen": -1.6108424663543701, "logits/rejected": -0.4408155381679535, "logps/chosen": -736.3297119140625, "logps/rejected": -1439.7017822265625, "loss": 0.1032, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.17098134756088257, "rewards/margins": 0.4619825482368469, "rewards/rejected": -0.6329637765884399, "step": 3440 }, { "epoch": 0.66, "learning_rate": 1.5866474390840126e-06, "logits/chosen": -0.9174199104309082, "logits/rejected": -0.3748546540737152, "logps/chosen": -516.4058837890625, "logps/rejected": -1139.381591796875, "loss": 0.1598, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.12665481865406036, "rewards/margins": 0.39082154631614685, "rewards/rejected": -0.5174763798713684, "step": 3450 }, { "epoch": 0.66, "learning_rate": 1.5711945874108053e-06, "logits/chosen": -0.8216487765312195, "logits/rejected": -0.460014283657074, "logps/chosen": -680.7373046875, "logps/rejected": -1179.248779296875, "loss": 0.1323, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.22821757197380066, "rewards/margins": 0.3213030993938446, "rewards/rejected": -0.54952073097229, "step": 3460 }, { "epoch": 0.66, "learning_rate": 1.5557827957671249e-06, "logits/chosen": -1.1738169193267822, "logits/rejected": -0.4187871515750885, "logps/chosen": -692.5808715820312, "logps/rejected": -1301.657470703125, "loss": 0.0979, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.21501493453979492, "rewards/margins": 0.38429784774780273, "rewards/rejected": -0.5993127822875977, "step": 3470 }, { "epoch": 0.66, "learning_rate": 1.5404127454674994e-06, "logits/chosen": -1.1849193572998047, "logits/rejected": -0.46282443404197693, "logps/chosen": -649.400634765625, "logps/rejected": -1203.847900390625, "loss": 0.1099, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.19498701393604279, "rewards/margins": 0.37596970796585083, "rewards/rejected": -0.570956826210022, "step": 3480 }, { "epoch": 0.66, "learning_rate": 1.5250851159811809e-06, "logits/chosen": -0.9714870452880859, "logits/rejected": -0.7505975961685181, "logps/chosen": -672.786376953125, "logps/rejected": -1408.379150390625, "loss": 0.1357, "rewards/accuracies": 0.875, "rewards/chosen": -0.21087384223937988, "rewards/margins": 0.37880977988243103, "rewards/rejected": -0.5896835923194885, "step": 3490 }, { "epoch": 0.67, "learning_rate": 1.509800584902108e-06, "logits/chosen": -1.155910611152649, "logits/rejected": -0.43785548210144043, "logps/chosen": -861.1187744140625, "logps/rejected": -1418.395751953125, "loss": 0.1091, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.2794817090034485, "rewards/margins": 0.37415820360183716, "rewards/rejected": -0.6536398530006409, "step": 3500 }, { "epoch": 0.67, "learning_rate": 1.4945598279189565e-06, "logits/chosen": -1.218125581741333, "logits/rejected": -0.7664887309074402, "logps/chosen": -778.1519775390625, "logps/rejected": -1335.9088134765625, "loss": 0.1192, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.2392837554216385, "rewards/margins": 0.3505345582962036, "rewards/rejected": -0.5898183584213257, "step": 3510 }, { "epoch": 0.67, "learning_rate": 1.4793635187852622e-06, "logits/chosen": -1.0100408792495728, "logits/rejected": -0.6889999508857727, "logps/chosen": -657.8116455078125, "logps/rejected": -1277.6400146484375, "loss": 0.1512, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.1980300098657608, "rewards/margins": 0.36997079849243164, "rewards/rejected": -0.568000853061676, "step": 3520 }, { "epoch": 0.67, "learning_rate": 1.4642123292896406e-06, "logits/chosen": -0.943370521068573, "logits/rejected": -0.744850754737854, "logps/chosen": -633.4537353515625, "logps/rejected": -1239.15966796875, "loss": 0.1535, "rewards/accuracies": 0.75, "rewards/chosen": -0.22848740220069885, "rewards/margins": 0.31630563735961914, "rewards/rejected": -0.5447930097579956, "step": 3530 }, { "epoch": 0.67, "learning_rate": 1.4491069292260867e-06, "logits/chosen": -1.0385249853134155, "logits/rejected": -0.5362288355827332, "logps/chosen": -662.1536254882812, "logps/rejected": -1150.642822265625, "loss": 0.1691, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.23081104457378387, "rewards/margins": 0.3218469023704529, "rewards/rejected": -0.552657961845398, "step": 3540 }, { "epoch": 0.68, "learning_rate": 1.4340479863643658e-06, "logits/chosen": -1.0680629014968872, "logits/rejected": -0.5989462733268738, "logps/chosen": -565.4825439453125, "logps/rejected": -1251.3245849609375, "loss": 0.0992, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.179966002702713, "rewards/margins": 0.39868369698524475, "rewards/rejected": -0.5786498188972473, "step": 3550 }, { "epoch": 0.68, "learning_rate": 1.4190361664204936e-06, "logits/chosen": -0.9839814305305481, "logits/rejected": -0.6571632623672485, "logps/chosen": -632.7914428710938, "logps/rejected": -1111.93701171875, "loss": 0.1835, "rewards/accuracies": 0.75, "rewards/chosen": -0.2057075947523117, "rewards/margins": 0.26010626554489136, "rewards/rejected": -0.4658138155937195, "step": 3560 }, { "epoch": 0.68, "learning_rate": 1.4040721330273063e-06, "logits/chosen": -1.035467267036438, "logits/rejected": -0.5835942029953003, "logps/chosen": -658.8521728515625, "logps/rejected": -1272.2744140625, "loss": 0.0963, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.17964915931224823, "rewards/margins": 0.39302974939346313, "rewards/rejected": -0.5726788640022278, "step": 3570 }, { "epoch": 0.68, "learning_rate": 1.3891565477051242e-06, "logits/chosen": -1.3078663349151611, "logits/rejected": -0.7058553695678711, "logps/chosen": -652.1445922851562, "logps/rejected": -1200.9466552734375, "loss": 0.1622, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.19131067395210266, "rewards/margins": 0.3530043065547943, "rewards/rejected": -0.544314980506897, "step": 3580 }, { "epoch": 0.68, "learning_rate": 1.3742900698325034e-06, "logits/chosen": -1.2529584169387817, "logits/rejected": -0.7100784182548523, "logps/chosen": -601.9139404296875, "logps/rejected": -1144.3150634765625, "loss": 0.1401, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.13541176915168762, "rewards/margins": 0.36244815587997437, "rewards/rejected": -0.4978599548339844, "step": 3590 }, { "epoch": 0.69, "learning_rate": 1.3594733566170925e-06, "logits/chosen": -1.3782615661621094, "logits/rejected": -0.6579089164733887, "logps/chosen": -582.8475341796875, "logps/rejected": -1242.493408203125, "loss": 0.1174, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.14790233969688416, "rewards/margins": 0.34383291006088257, "rewards/rejected": -0.4917352795600891, "step": 3600 }, { "epoch": 0.69, "learning_rate": 1.3447070630665771e-06, "logits/chosen": -1.4839890003204346, "logits/rejected": -0.796236515045166, "logps/chosen": -600.3063354492188, "logps/rejected": -1149.0216064453125, "loss": 0.1312, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.12155196815729141, "rewards/margins": 0.33586984872817993, "rewards/rejected": -0.45742183923721313, "step": 3610 }, { "epoch": 0.69, "learning_rate": 1.329991841959717e-06, "logits/chosen": -1.2810465097427368, "logits/rejected": -0.8118991851806641, "logps/chosen": -528.5442504882812, "logps/rejected": -1024.990478515625, "loss": 0.1393, "rewards/accuracies": 0.75, "rewards/chosen": -0.09333421289920807, "rewards/margins": 0.33459314703941345, "rewards/rejected": -0.4279273450374603, "step": 3620 }, { "epoch": 0.69, "learning_rate": 1.3153283438175036e-06, "logits/chosen": -1.1652348041534424, "logits/rejected": -0.6431460380554199, "logps/chosen": -612.7841796875, "logps/rejected": -1273.5966796875, "loss": 0.1094, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.13439562916755676, "rewards/margins": 0.3692600727081299, "rewards/rejected": -0.5036556720733643, "step": 3630 }, { "epoch": 0.69, "learning_rate": 1.3007172168743854e-06, "logits/chosen": -1.1769293546676636, "logits/rejected": -0.6776280999183655, "logps/chosen": -544.8734130859375, "logps/rejected": -1208.873046875, "loss": 0.1267, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.14331087470054626, "rewards/margins": 0.3766873776912689, "rewards/rejected": -0.5199981927871704, "step": 3640 }, { "epoch": 0.7, "learning_rate": 1.2861591070496193e-06, "logits/chosen": -1.1209774017333984, "logits/rejected": -0.39191246032714844, "logps/chosen": -674.94580078125, "logps/rejected": -1270.96533203125, "loss": 0.1265, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.17688027024269104, "rewards/margins": 0.3601907789707184, "rewards/rejected": -0.5370709896087646, "step": 3650 }, { "epoch": 0.7, "learning_rate": 1.271654657918722e-06, "logits/chosen": -1.2530699968338013, "logits/rejected": -0.6681731939315796, "logps/chosen": -605.7479248046875, "logps/rejected": -1245.85009765625, "loss": 0.1269, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.1481107622385025, "rewards/margins": 0.41965943574905396, "rewards/rejected": -0.56777024269104, "step": 3660 }, { "epoch": 0.7, "learning_rate": 1.2572045106850051e-06, "logits/chosen": -1.077708125114441, "logits/rejected": -0.7910763621330261, "logps/chosen": -645.6097412109375, "logps/rejected": -1208.621826171875, "loss": 0.1726, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1740269809961319, "rewards/margins": 0.3548907935619354, "rewards/rejected": -0.5289177298545837, "step": 3670 }, { "epoch": 0.7, "learning_rate": 1.2428093041512418e-06, "logits/chosen": -1.0985954999923706, "logits/rejected": -0.6648399233818054, "logps/chosen": -592.8692016601562, "logps/rejected": -1260.7965087890625, "loss": 0.1419, "rewards/accuracies": 0.875, "rewards/chosen": -0.19636262953281403, "rewards/margins": 0.4022560715675354, "rewards/rejected": -0.5986186265945435, "step": 3680 }, { "epoch": 0.7, "learning_rate": 1.2284696746914216e-06, "logits/chosen": -1.3438647985458374, "logits/rejected": -0.888390064239502, "logps/chosen": -698.9994506835938, "logps/rejected": -1312.741455078125, "loss": 0.1269, "rewards/accuracies": 0.875, "rewards/chosen": -0.24452729523181915, "rewards/margins": 0.3692966103553772, "rewards/rejected": -0.6138239502906799, "step": 3690 }, { "epoch": 0.7, "learning_rate": 1.2141862562226164e-06, "logits/chosen": -1.2511751651763916, "logits/rejected": -0.5096303224563599, "logps/chosen": -588.0484619140625, "logps/rejected": -1269.4197998046875, "loss": 0.1044, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.1658642590045929, "rewards/margins": 0.4230321943759918, "rewards/rejected": -0.5888963937759399, "step": 3700 }, { "epoch": 0.71, "learning_rate": 1.1999596801769617e-06, "logits/chosen": -1.0038516521453857, "logits/rejected": -0.45204249024391174, "logps/chosen": -721.703125, "logps/rejected": -1323.9910888671875, "loss": 0.1196, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.24282777309417725, "rewards/margins": 0.4124522805213928, "rewards/rejected": -0.6552800536155701, "step": 3710 }, { "epoch": 0.71, "learning_rate": 1.185790575473738e-06, "logits/chosen": -1.12263023853302, "logits/rejected": -0.8619738817214966, "logps/chosen": -585.7687377929688, "logps/rejected": -1195.392578125, "loss": 0.177, "rewards/accuracies": 0.75, "rewards/chosen": -0.21055932343006134, "rewards/margins": 0.34318262338638306, "rewards/rejected": -0.5537418723106384, "step": 3720 }, { "epoch": 0.71, "learning_rate": 1.1716795684915728e-06, "logits/chosen": -1.3430308103561401, "logits/rejected": -0.53284752368927, "logps/chosen": -765.2874755859375, "logps/rejected": -1431.0419921875, "loss": 0.1001, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.214613676071167, "rewards/margins": 0.448627769947052, "rewards/rejected": -0.6632413864135742, "step": 3730 }, { "epoch": 0.71, "learning_rate": 1.1576272830407418e-06, "logits/chosen": -1.0894341468811035, "logits/rejected": -0.3034951984882355, "logps/chosen": -633.4129638671875, "logps/rejected": -1246.320068359375, "loss": 0.112, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.17959848046302795, "rewards/margins": 0.4334283769130707, "rewards/rejected": -0.6130269169807434, "step": 3740 }, { "epoch": 0.71, "learning_rate": 1.1436343403356019e-06, "logits/chosen": -1.056552529335022, "logits/rejected": -0.5549115538597107, "logps/chosen": -697.0592041015625, "logps/rejected": -1527.7884521484375, "loss": 0.0809, "rewards/accuracies": 1.0, "rewards/chosen": -0.2237638235092163, "rewards/margins": 0.47696346044540405, "rewards/rejected": -0.7007272243499756, "step": 3750 }, { "epoch": 0.72, "learning_rate": 1.129701358967123e-06, "logits/chosen": -1.3412917852401733, "logits/rejected": -0.6541746854782104, "logps/chosen": -638.39208984375, "logps/rejected": -1218.297607421875, "loss": 0.0977, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.20013268291950226, "rewards/margins": 0.3840998709201813, "rewards/rejected": -0.5842326283454895, "step": 3760 }, { "epoch": 0.72, "learning_rate": 1.11582895487554e-06, "logits/chosen": -1.3476431369781494, "logits/rejected": -0.7230587601661682, "logps/chosen": -760.1488037109375, "logps/rejected": -1453.4154052734375, "loss": 0.0783, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.18445217609405518, "rewards/margins": 0.4773259162902832, "rewards/rejected": -0.6617780923843384, "step": 3770 }, { "epoch": 0.72, "learning_rate": 1.1020177413231334e-06, "logits/chosen": -1.4782958030700684, "logits/rejected": -0.7418783903121948, "logps/chosen": -623.0445556640625, "logps/rejected": -1187.9761962890625, "loss": 0.1016, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.19287201762199402, "rewards/margins": 0.38419967889785767, "rewards/rejected": -0.5770717263221741, "step": 3780 }, { "epoch": 0.72, "learning_rate": 1.0882683288671041e-06, "logits/chosen": -1.1622469425201416, "logits/rejected": -0.5610478520393372, "logps/chosen": -817.5090942382812, "logps/rejected": -1415.7564697265625, "loss": 0.1571, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.27743351459503174, "rewards/margins": 0.35226619243621826, "rewards/rejected": -0.62969970703125, "step": 3790 }, { "epoch": 0.72, "learning_rate": 1.0745813253325957e-06, "logits/chosen": -1.067703127861023, "logits/rejected": -0.5928519368171692, "logps/chosen": -718.2166748046875, "logps/rejected": -1323.1864013671875, "loss": 0.1367, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.23111920058727264, "rewards/margins": 0.3656584322452545, "rewards/rejected": -0.5967775583267212, "step": 3800 }, { "epoch": 0.73, "learning_rate": 1.0609573357858166e-06, "logits/chosen": -1.3917607069015503, "logits/rejected": -0.9353009462356567, "logps/chosen": -625.9328002929688, "logps/rejected": -1222.05419921875, "loss": 0.152, "rewards/accuracies": 0.875, "rewards/chosen": -0.18823829293251038, "rewards/margins": 0.3441144526004791, "rewards/rejected": -0.5323527455329895, "step": 3810 }, { "epoch": 0.73, "learning_rate": 1.0473969625072922e-06, "logits/chosen": -0.9717020988464355, "logits/rejected": -0.759957492351532, "logps/chosen": -654.322509765625, "logps/rejected": -1210.5252685546875, "loss": 0.1397, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.1741013526916504, "rewards/margins": 0.34090572595596313, "rewards/rejected": -0.5150070786476135, "step": 3820 }, { "epoch": 0.73, "learning_rate": 1.0339008049652427e-06, "logits/chosen": -1.3002078533172607, "logits/rejected": -0.4916529655456543, "logps/chosen": -696.1192626953125, "logps/rejected": -1171.50341796875, "loss": 0.1245, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1939762383699417, "rewards/margins": 0.3528677821159363, "rewards/rejected": -0.5468440055847168, "step": 3830 }, { "epoch": 0.73, "learning_rate": 1.0204694597890814e-06, "logits/chosen": -1.3936400413513184, "logits/rejected": -0.7498485445976257, "logps/chosen": -674.5922241210938, "logps/rejected": -1153.2427978515625, "loss": 0.1318, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.19239839911460876, "rewards/margins": 0.3497624099254608, "rewards/rejected": -0.5421608090400696, "step": 3840 }, { "epoch": 0.73, "learning_rate": 1.0071035207430352e-06, "logits/chosen": -1.0502431392669678, "logits/rejected": -0.5163276195526123, "logps/chosen": -558.4337158203125, "logps/rejected": -1253.4818115234375, "loss": 0.1114, "rewards/accuracies": 0.875, "rewards/chosen": -0.17126135528087616, "rewards/margins": 0.4170903265476227, "rewards/rejected": -0.5883517265319824, "step": 3850 }, { "epoch": 0.74, "learning_rate": 9.938035786999018e-07, "logits/chosen": -1.3897660970687866, "logits/rejected": -0.6625775098800659, "logps/chosen": -639.350341796875, "logps/rejected": -1350.543212890625, "loss": 0.1068, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.1545448899269104, "rewards/margins": 0.4384000897407532, "rewards/rejected": -0.5929449200630188, "step": 3860 }, { "epoch": 0.74, "learning_rate": 9.805702216149252e-07, "logits/chosen": -1.4430843591690063, "logits/rejected": -0.6385616064071655, "logps/chosen": -660.9547119140625, "logps/rejected": -1220.5994873046875, "loss": 0.0809, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.1694284975528717, "rewards/margins": 0.39203736186027527, "rewards/rejected": -0.561465859413147, "step": 3870 }, { "epoch": 0.74, "learning_rate": 9.674040344998056e-07, "logits/chosen": -1.2704236507415771, "logits/rejected": -0.7016037106513977, "logps/chosen": -645.2579956054688, "logps/rejected": -1076.955322265625, "loss": 0.1268, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.19721722602844238, "rewards/margins": 0.30071109533309937, "rewards/rejected": -0.4979283809661865, "step": 3880 }, { "epoch": 0.74, "learning_rate": 9.543055993968339e-07, "logits/chosen": -1.1252660751342773, "logits/rejected": -0.5774485468864441, "logps/chosen": -712.1278686523438, "logps/rejected": -1256.327880859375, "loss": 0.1998, "rewards/accuracies": 0.75, "rewards/chosen": -0.23033590614795685, "rewards/margins": 0.33510932326316833, "rewards/rejected": -0.5654452443122864, "step": 3890 }, { "epoch": 0.74, "learning_rate": 9.412754953531664e-07, "logits/chosen": -1.3307710886001587, "logits/rejected": -0.5324984192848206, "logps/chosen": -678.5230712890625, "logps/rejected": -1398.4915771484375, "loss": 0.0722, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.19032174348831177, "rewards/margins": 0.4650568962097168, "rewards/rejected": -0.6553786993026733, "step": 3900 }, { "epoch": 0.74, "learning_rate": 9.283142983952231e-07, "logits/chosen": -1.286468505859375, "logits/rejected": -0.7081373929977417, "logps/chosen": -704.1011962890625, "logps/rejected": -1354.781005859375, "loss": 0.141, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.23343345522880554, "rewards/margins": 0.35219472646713257, "rewards/rejected": -0.5856281518936157, "step": 3910 }, { "epoch": 0.75, "learning_rate": 9.154225815032242e-07, "logits/chosen": -0.9127951860427856, "logits/rejected": -0.5052443742752075, "logps/chosen": -716.0985107421875, "logps/rejected": -1368.282958984375, "loss": 0.1668, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.25649404525756836, "rewards/margins": 0.33906081318855286, "rewards/rejected": -0.5955548286437988, "step": 3920 }, { "epoch": 0.75, "learning_rate": 9.026009145858608e-07, "logits/chosen": -1.1161166429519653, "logits/rejected": -0.41551637649536133, "logps/chosen": -772.7965087890625, "logps/rejected": -1489.08203125, "loss": 0.145, "rewards/accuracies": 0.875, "rewards/chosen": -0.23351868987083435, "rewards/margins": 0.4384360909461975, "rewards/rejected": -0.671954870223999, "step": 3930 }, { "epoch": 0.75, "learning_rate": 8.898498644550973e-07, "logits/chosen": -1.4083125591278076, "logits/rejected": -0.7952542901039124, "logps/chosen": -726.1722412109375, "logps/rejected": -1346.196533203125, "loss": 0.109, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.2122231274843216, "rewards/margins": 0.37760061025619507, "rewards/rejected": -0.5898237228393555, "step": 3940 }, { "epoch": 0.75, "learning_rate": 8.771699948011203e-07, "logits/chosen": -1.096065878868103, "logits/rejected": -0.6019551753997803, "logps/chosen": -724.5733032226562, "logps/rejected": -1308.7606201171875, "loss": 0.1126, "rewards/accuracies": 0.875, "rewards/chosen": -0.21775946021080017, "rewards/margins": 0.40117740631103516, "rewards/rejected": -0.6189368963241577, "step": 3950 }, { "epoch": 0.75, "learning_rate": 8.645618661674144e-07, "logits/chosen": -1.2701376676559448, "logits/rejected": -0.6749829053878784, "logps/chosen": -755.7200927734375, "logps/rejected": -1293.5980224609375, "loss": 0.1477, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.24769988656044006, "rewards/margins": 0.3436283767223358, "rewards/rejected": -0.5913282632827759, "step": 3960 }, { "epoch": 0.76, "learning_rate": 8.520260359259822e-07, "logits/chosen": -1.1892999410629272, "logits/rejected": -0.5921664834022522, "logps/chosen": -641.0836791992188, "logps/rejected": -1180.94287109375, "loss": 0.1498, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.2086607962846756, "rewards/margins": 0.3636617064476013, "rewards/rejected": -0.5723224878311157, "step": 3970 }, { "epoch": 0.76, "learning_rate": 8.395630582527075e-07, "logits/chosen": -1.4522649049758911, "logits/rejected": -0.6051837205886841, "logps/chosen": -655.6356201171875, "logps/rejected": -1347.12548828125, "loss": 0.1041, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.17207400500774384, "rewards/margins": 0.40920838713645935, "rewards/rejected": -0.581282377243042, "step": 3980 }, { "epoch": 0.76, "learning_rate": 8.271734841028553e-07, "logits/chosen": -1.1060794591903687, "logits/rejected": -0.5389295220375061, "logps/chosen": -647.7974853515625, "logps/rejected": -1318.9398193359375, "loss": 0.1272, "rewards/accuracies": 0.875, "rewards/chosen": -0.2094346582889557, "rewards/margins": 0.3917198181152344, "rewards/rejected": -0.6011544466018677, "step": 3990 }, { "epoch": 0.76, "learning_rate": 8.148578611867114e-07, "logits/chosen": -1.2203426361083984, "logits/rejected": -0.8155349493026733, "logps/chosen": -636.7138061523438, "logps/rejected": -1386.8890380859375, "loss": 0.1064, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.2090234011411667, "rewards/margins": 0.37104713916778564, "rewards/rejected": -0.5800706148147583, "step": 4000 }, { "epoch": 0.76, "learning_rate": 8.026167339453792e-07, "logits/chosen": -1.2789884805679321, "logits/rejected": -0.536770224571228, "logps/chosen": -820.8888549804688, "logps/rejected": -1397.5396728515625, "loss": 0.1623, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.26279765367507935, "rewards/margins": 0.373899906873703, "rewards/rejected": -0.6366976499557495, "step": 4010 }, { "epoch": 0.77, "learning_rate": 7.904506435266998e-07, "logits/chosen": -1.3287841081619263, "logits/rejected": -0.7663698196411133, "logps/chosen": -631.3057861328125, "logps/rejected": -1121.5838623046875, "loss": 0.1009, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.17134110629558563, "rewards/margins": 0.364962100982666, "rewards/rejected": -0.5363032221794128, "step": 4020 }, { "epoch": 0.77, "learning_rate": 7.783601277613378e-07, "logits/chosen": -1.250239610671997, "logits/rejected": -0.6047655940055847, "logps/chosen": -697.8419799804688, "logps/rejected": -1391.185546875, "loss": 0.0926, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.2049448937177658, "rewards/margins": 0.4367770552635193, "rewards/rejected": -0.6417220234870911, "step": 4030 }, { "epoch": 0.77, "learning_rate": 7.66345721139003e-07, "logits/chosen": -1.5006142854690552, "logits/rejected": -0.683341383934021, "logps/chosen": -572.77294921875, "logps/rejected": -1225.771728515625, "loss": 0.1246, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.15218904614448547, "rewards/margins": 0.41893473267555237, "rewards/rejected": -0.5711238384246826, "step": 4040 }, { "epoch": 0.77, "learning_rate": 7.544079547848183e-07, "logits/chosen": -1.264830231666565, "logits/rejected": -0.7260152697563171, "logps/chosen": -700.9507446289062, "logps/rejected": -1366.594482421875, "loss": 0.1052, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.20156005024909973, "rewards/margins": 0.3960078954696655, "rewards/rejected": -0.5975679159164429, "step": 4050 }, { "epoch": 0.77, "learning_rate": 7.425473564358457e-07, "logits/chosen": -1.0954644680023193, "logits/rejected": -0.6338463425636292, "logps/chosen": -614.618408203125, "logps/rejected": -1147.806396484375, "loss": 0.1739, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1719847470521927, "rewards/margins": 0.3488149642944336, "rewards/rejected": -0.5207996964454651, "step": 4060 }, { "epoch": 0.78, "learning_rate": 7.307644504177539e-07, "logits/chosen": -1.3845043182373047, "logits/rejected": -0.7251416444778442, "logps/chosen": -658.0118408203125, "logps/rejected": -1141.19189453125, "loss": 0.1712, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.19125691056251526, "rewards/margins": 0.331901878118515, "rewards/rejected": -0.5231587290763855, "step": 4070 }, { "epoch": 0.78, "learning_rate": 7.190597576216385e-07, "logits/chosen": -1.4137630462646484, "logits/rejected": -1.0189796686172485, "logps/chosen": -529.8546752929688, "logps/rejected": -1103.696533203125, "loss": 0.1064, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.12954707443714142, "rewards/margins": 0.3549201488494873, "rewards/rejected": -0.4844672679901123, "step": 4080 }, { "epoch": 0.78, "learning_rate": 7.074337954809945e-07, "logits/chosen": -1.2685167789459229, "logits/rejected": -0.7394587397575378, "logps/chosen": -630.5240478515625, "logps/rejected": -1176.576416015625, "loss": 0.1448, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.1995144784450531, "rewards/margins": 0.3364899158477783, "rewards/rejected": -0.5360044240951538, "step": 4090 }, { "epoch": 0.78, "learning_rate": 6.958870779488447e-07, "logits/chosen": -1.3933331966400146, "logits/rejected": -0.6874264478683472, "logps/chosen": -736.90087890625, "logps/rejected": -1352.171630859375, "loss": 0.1121, "rewards/accuracies": 0.875, "rewards/chosen": -0.20742790400981903, "rewards/margins": 0.367341548204422, "rewards/rejected": -0.5747694373130798, "step": 4100 }, { "epoch": 0.78, "learning_rate": 6.844201154750176e-07, "logits/chosen": -1.1871206760406494, "logits/rejected": -0.792644202709198, "logps/chosen": -626.2569580078125, "logps/rejected": -1167.786865234375, "loss": 0.2183, "rewards/accuracies": 0.75, "rewards/chosen": -0.2114189863204956, "rewards/margins": 0.3176122307777405, "rewards/rejected": -0.5290312170982361, "step": 4110 }, { "epoch": 0.78, "learning_rate": 6.730334149835788e-07, "logits/chosen": -1.2410621643066406, "logits/rejected": -0.37250614166259766, "logps/chosen": -670.5504150390625, "logps/rejected": -1224.5399169921875, "loss": 0.1313, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.20627684891223907, "rewards/margins": 0.3557787537574768, "rewards/rejected": -0.5620556473731995, "step": 4120 }, { "epoch": 0.79, "learning_rate": 6.617274798504286e-07, "logits/chosen": -1.5327309370040894, "logits/rejected": -0.8354431390762329, "logps/chosen": -635.7954711914062, "logps/rejected": -1196.0064697265625, "loss": 0.1493, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.17149753868579865, "rewards/margins": 0.33922523260116577, "rewards/rejected": -0.5107227563858032, "step": 4130 }, { "epoch": 0.79, "learning_rate": 6.505028098810407e-07, "logits/chosen": -1.4094494581222534, "logits/rejected": -0.6212278604507446, "logps/chosen": -743.4639892578125, "logps/rejected": -1290.336181640625, "loss": 0.1504, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.23548047244548798, "rewards/margins": 0.34705108404159546, "rewards/rejected": -0.5825315713882446, "step": 4140 }, { "epoch": 0.79, "learning_rate": 6.393599012883709e-07, "logits/chosen": -1.2645955085754395, "logits/rejected": -0.6100594997406006, "logps/chosen": -600.9065551757812, "logps/rejected": -1302.2193603515625, "loss": 0.1541, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1765137016773224, "rewards/margins": 0.4136267304420471, "rewards/rejected": -0.5901404619216919, "step": 4150 }, { "epoch": 0.79, "learning_rate": 6.282992466709247e-07, "logits/chosen": -1.4073201417922974, "logits/rejected": -0.6787480711936951, "logps/chosen": -597.0184326171875, "logps/rejected": -1064.3037109375, "loss": 0.1595, "rewards/accuracies": 0.75, "rewards/chosen": -0.18919098377227783, "rewards/margins": 0.3104937970638275, "rewards/rejected": -0.49968472123146057, "step": 4160 }, { "epoch": 0.79, "learning_rate": 6.17321334990973e-07, "logits/chosen": -1.5116772651672363, "logits/rejected": -0.8611841201782227, "logps/chosen": -568.7406005859375, "logps/rejected": -1094.880126953125, "loss": 0.1584, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.15613974630832672, "rewards/margins": 0.3134354054927826, "rewards/rejected": -0.46957510709762573, "step": 4170 }, { "epoch": 0.8, "learning_rate": 6.064266515529419e-07, "logits/chosen": -1.2406316995620728, "logits/rejected": -0.7882015109062195, "logps/chosen": -553.3673095703125, "logps/rejected": -1092.30419921875, "loss": 0.15, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.15634885430335999, "rewards/margins": 0.31625062227249146, "rewards/rejected": -0.47259944677352905, "step": 4180 }, { "epoch": 0.8, "learning_rate": 5.956156779819586e-07, "logits/chosen": -1.5881235599517822, "logits/rejected": -0.8134576678276062, "logps/chosen": -571.6589965820312, "logps/rejected": -1253.228271484375, "loss": 0.0941, "rewards/accuracies": 0.875, "rewards/chosen": -0.13489124178886414, "rewards/margins": 0.409782737493515, "rewards/rejected": -0.5446739792823792, "step": 4190 }, { "epoch": 0.8, "learning_rate": 5.848888922025553e-07, "logits/chosen": -1.625577688217163, "logits/rejected": -0.8591996431350708, "logps/chosen": -655.3140869140625, "logps/rejected": -1305.062255859375, "loss": 0.1107, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.16010040044784546, "rewards/margins": 0.38980206847190857, "rewards/rejected": -0.5499024391174316, "step": 4200 }, { "epoch": 0.8, "learning_rate": 5.742467684175473e-07, "logits/chosen": -1.4327632188796997, "logits/rejected": -0.7635586261749268, "logps/chosen": -613.9177856445312, "logps/rejected": -1256.923583984375, "loss": 0.0908, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.14799591898918152, "rewards/margins": 0.3925449252128601, "rewards/rejected": -0.540540874004364, "step": 4210 }, { "epoch": 0.8, "learning_rate": 5.636897770870667e-07, "logits/chosen": -1.6202754974365234, "logits/rejected": -0.7592384219169617, "logps/chosen": -639.2320556640625, "logps/rejected": -1302.3748779296875, "loss": 0.1132, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.14248713850975037, "rewards/margins": 0.4462302327156067, "rewards/rejected": -0.5887174606323242, "step": 4220 }, { "epoch": 0.81, "learning_rate": 5.532183849077651e-07, "logits/chosen": -1.4232017993927002, "logits/rejected": -0.6501675844192505, "logps/chosen": -649.5947875976562, "logps/rejected": -1332.9541015625, "loss": 0.1345, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.17357690632343292, "rewards/margins": 0.39399975538253784, "rewards/rejected": -0.5675767064094543, "step": 4230 }, { "epoch": 0.81, "learning_rate": 5.428330547921809e-07, "logits/chosen": -1.5804541110992432, "logits/rejected": -0.8540856242179871, "logps/chosen": -555.0350341796875, "logps/rejected": -1135.249755859375, "loss": 0.1268, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.12337274849414825, "rewards/margins": 0.3674394488334656, "rewards/rejected": -0.490812212228775, "step": 4240 }, { "epoch": 0.81, "learning_rate": 5.32534245848278e-07, "logits/chosen": -1.111779808998108, "logits/rejected": -0.9118094444274902, "logps/chosen": -657.785400390625, "logps/rejected": -1307.219970703125, "loss": 0.1176, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.18533580005168915, "rewards/margins": 0.37753137946128845, "rewards/rejected": -0.5628672242164612, "step": 4250 }, { "epoch": 0.81, "learning_rate": 5.223224133591475e-07, "logits/chosen": -1.2844034433364868, "logits/rejected": -0.5752917528152466, "logps/chosen": -735.5989990234375, "logps/rejected": -1231.976806640625, "loss": 0.1379, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.21159152686595917, "rewards/margins": 0.3349112272262573, "rewards/rejected": -0.5465027093887329, "step": 4260 }, { "epoch": 0.81, "learning_rate": 5.121980087628802e-07, "logits/chosen": -1.3753794431686401, "logits/rejected": -0.673454761505127, "logps/chosen": -741.6004028320312, "logps/rejected": -1498.8599853515625, "loss": 0.0715, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.2075379341840744, "rewards/margins": 0.4599502682685852, "rewards/rejected": -0.6674882173538208, "step": 4270 }, { "epoch": 0.82, "learning_rate": 5.021614796326155e-07, "logits/chosen": -1.3672094345092773, "logits/rejected": -0.5643395781517029, "logps/chosen": -703.5550537109375, "logps/rejected": -1437.540283203125, "loss": 0.0834, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.19563846290111542, "rewards/margins": 0.45745301246643066, "rewards/rejected": -0.6530914902687073, "step": 4280 }, { "epoch": 0.82, "learning_rate": 4.922132696567463e-07, "logits/chosen": -1.574609398841858, "logits/rejected": -0.6377558708190918, "logps/chosen": -664.7916259765625, "logps/rejected": -1346.763427734375, "loss": 0.1212, "rewards/accuracies": 0.875, "rewards/chosen": -0.16896727681159973, "rewards/margins": 0.4139528274536133, "rewards/rejected": -0.5829200744628906, "step": 4290 }, { "epoch": 0.82, "learning_rate": 4.823538186193097e-07, "logits/chosen": -1.451124906539917, "logits/rejected": -0.6306055784225464, "logps/chosen": -669.7281494140625, "logps/rejected": -1328.270751953125, "loss": 0.1147, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.20952646434307098, "rewards/margins": 0.42805638909339905, "rewards/rejected": -0.6375828981399536, "step": 4300 }, { "epoch": 0.82, "learning_rate": 4.725835623805494e-07, "logits/chosen": -1.3181301355361938, "logits/rejected": -0.7448017001152039, "logps/chosen": -590.9874877929688, "logps/rejected": -1046.7158203125, "loss": 0.1781, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.17497803270816803, "rewards/margins": 0.2649361789226532, "rewards/rejected": -0.4399142265319824, "step": 4310 }, { "epoch": 0.82, "learning_rate": 4.6290293285763816e-07, "logits/chosen": -1.2688031196594238, "logits/rejected": -0.8608768582344055, "logps/chosen": -673.9658813476562, "logps/rejected": -1142.4676513671875, "loss": 0.2185, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2415378838777542, "rewards/margins": 0.27603575587272644, "rewards/rejected": -0.5175737142562866, "step": 4320 }, { "epoch": 0.82, "learning_rate": 4.533123580055909e-07, "logits/chosen": -1.2598726749420166, "logits/rejected": -0.8441423177719116, "logps/chosen": -580.6671752929688, "logps/rejected": -1168.000732421875, "loss": 0.1468, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.1596529185771942, "rewards/margins": 0.36406847834587097, "rewards/rejected": -0.5237213969230652, "step": 4330 }, { "epoch": 0.83, "learning_rate": 4.438122617983442e-07, "logits/chosen": -1.2603580951690674, "logits/rejected": -0.6316035985946655, "logps/chosen": -649.4517211914062, "logps/rejected": -1136.001220703125, "loss": 0.1489, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.18067191541194916, "rewards/margins": 0.342294305562973, "rewards/rejected": -0.522966206073761, "step": 4340 }, { "epoch": 0.83, "learning_rate": 4.344030642100133e-07, "logits/chosen": -1.332079291343689, "logits/rejected": -0.7255615592002869, "logps/chosen": -536.6853637695312, "logps/rejected": -1140.5736083984375, "loss": 0.1444, "rewards/accuracies": 0.875, "rewards/chosen": -0.10494126379489899, "rewards/margins": 0.3486764132976532, "rewards/rejected": -0.453617662191391, "step": 4350 }, { "epoch": 0.83, "learning_rate": 4.250851811963236e-07, "logits/chosen": -1.2695554494857788, "logits/rejected": -0.6029915809631348, "logps/chosen": -632.0772094726562, "logps/rejected": -1235.291259765625, "loss": 0.1262, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.13401833176612854, "rewards/margins": 0.37926769256591797, "rewards/rejected": -0.5132860541343689, "step": 4360 }, { "epoch": 0.83, "learning_rate": 4.158590246762278e-07, "logits/chosen": -1.3069413900375366, "logits/rejected": -0.6581517457962036, "logps/chosen": -609.7135009765625, "logps/rejected": -1252.7021484375, "loss": 0.1335, "rewards/accuracies": 0.875, "rewards/chosen": -0.15370860695838928, "rewards/margins": 0.3951302766799927, "rewards/rejected": -0.5488388538360596, "step": 4370 }, { "epoch": 0.83, "learning_rate": 4.0672500251369204e-07, "logits/chosen": -1.1749308109283447, "logits/rejected": -0.6665660738945007, "logps/chosen": -702.4545288085938, "logps/rejected": -1344.811279296875, "loss": 0.0998, "rewards/accuracies": 0.875, "rewards/chosen": -0.19916370511054993, "rewards/margins": 0.385100394487381, "rewards/rejected": -0.5842640995979309, "step": 4380 }, { "epoch": 0.84, "learning_rate": 3.976835184996644e-07, "logits/chosen": -1.1730005741119385, "logits/rejected": -0.6292411088943481, "logps/chosen": -617.7659912109375, "logps/rejected": -1162.8343505859375, "loss": 0.0869, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.17485156655311584, "rewards/margins": 0.34688758850097656, "rewards/rejected": -0.52173912525177, "step": 4390 }, { "epoch": 0.84, "learning_rate": 3.887349723342304e-07, "logits/chosen": -1.3301860094070435, "logits/rejected": -0.8942984342575073, "logps/chosen": -515.6876220703125, "logps/rejected": -1020.4283447265625, "loss": 0.1541, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.12954142689704895, "rewards/margins": 0.33292967081069946, "rewards/rejected": -0.4624711573123932, "step": 4400 }, { "epoch": 0.84, "learning_rate": 3.798797596089351e-07, "logits/chosen": -1.271340250968933, "logits/rejected": -0.7264868021011353, "logps/chosen": -553.5992431640625, "logps/rejected": -1132.2711181640625, "loss": 0.1415, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.1580553650856018, "rewards/margins": 0.33277612924575806, "rewards/rejected": -0.4908314645290375, "step": 4410 }, { "epoch": 0.84, "learning_rate": 3.711182717893011e-07, "logits/chosen": -1.1246154308319092, "logits/rejected": -0.8045031428337097, "logps/chosen": -651.364990234375, "logps/rejected": -1357.323974609375, "loss": 0.123, "rewards/accuracies": 0.875, "rewards/chosen": -0.2048703134059906, "rewards/margins": 0.3610819876194, "rewards/rejected": -0.5659523010253906, "step": 4420 }, { "epoch": 0.84, "learning_rate": 3.624508961975215e-07, "logits/chosen": -1.3047051429748535, "logits/rejected": -0.8493931889533997, "logps/chosen": -585.2650756835938, "logps/rejected": -1048.5419921875, "loss": 0.1423, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.15923550724983215, "rewards/margins": 0.2898097336292267, "rewards/rejected": -0.44904524087905884, "step": 4430 }, { "epoch": 0.85, "learning_rate": 3.538780159953348e-07, "logits/chosen": -1.2130616903305054, "logits/rejected": -0.8316548466682434, "logps/chosen": -644.5127563476562, "logps/rejected": -1234.968994140625, "loss": 0.1481, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.18661195039749146, "rewards/margins": 0.33793720602989197, "rewards/rejected": -0.524549126625061, "step": 4440 }, { "epoch": 0.85, "learning_rate": 3.454000101670901e-07, "logits/chosen": -1.425441861152649, "logits/rejected": -0.8048744201660156, "logps/chosen": -656.5826416015625, "logps/rejected": -1301.8543701171875, "loss": 0.0727, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.1628897488117218, "rewards/margins": 0.3989006280899048, "rewards/rejected": -0.5617903470993042, "step": 4450 }, { "epoch": 0.85, "learning_rate": 3.3701725350299143e-07, "logits/chosen": -1.4737662076950073, "logits/rejected": -1.1224262714385986, "logps/chosen": -709.0985717773438, "logps/rejected": -1282.176513671875, "loss": 0.1231, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.2114751785993576, "rewards/margins": 0.3335014581680298, "rewards/rejected": -0.5449766516685486, "step": 4460 }, { "epoch": 0.85, "learning_rate": 3.2873011658252796e-07, "logits/chosen": -1.3247419595718384, "logits/rejected": -0.7069304585456848, "logps/chosen": -569.6729736328125, "logps/rejected": -1159.7352294921875, "loss": 0.126, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1273414045572281, "rewards/margins": 0.3491176962852478, "rewards/rejected": -0.4764591157436371, "step": 4470 }, { "epoch": 0.85, "learning_rate": 3.2053896575809426e-07, "logits/chosen": -1.2610900402069092, "logits/rejected": -0.7959686517715454, "logps/chosen": -628.8607177734375, "logps/rejected": -1139.8856201171875, "loss": 0.1415, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.16233834624290466, "rewards/margins": 0.3112182021141052, "rewards/rejected": -0.47355660796165466, "step": 4480 }, { "epoch": 0.86, "learning_rate": 3.124441631387931e-07, "logits/chosen": -1.2010934352874756, "logits/rejected": -0.5530330538749695, "logps/chosen": -565.778564453125, "logps/rejected": -1137.80322265625, "loss": 0.1569, "rewards/accuracies": 0.875, "rewards/chosen": -0.13163362443447113, "rewards/margins": 0.32249611616134644, "rewards/rejected": -0.45412975549697876, "step": 4490 }, { "epoch": 0.86, "learning_rate": 3.044460665744284e-07, "logits/chosen": -1.2157789468765259, "logits/rejected": -0.6982497572898865, "logps/chosen": -567.2361450195312, "logps/rejected": -1238.99951171875, "loss": 0.1205, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.14434069395065308, "rewards/margins": 0.39340585470199585, "rewards/rejected": -0.5377465486526489, "step": 4500 }, { "epoch": 0.86, "learning_rate": 2.9654502963968575e-07, "logits/chosen": -1.2132378816604614, "logits/rejected": -0.5881598591804504, "logps/chosen": -594.38818359375, "logps/rejected": -1174.1934814453125, "loss": 0.1257, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.15615180134773254, "rewards/margins": 0.37599360942840576, "rewards/rejected": -0.5321453809738159, "step": 4510 }, { "epoch": 0.86, "learning_rate": 2.8874140161849915e-07, "logits/chosen": -1.3240309953689575, "logits/rejected": -0.7199960947036743, "logps/chosen": -661.8668823242188, "logps/rejected": -1343.602294921875, "loss": 0.099, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.17018947005271912, "rewards/margins": 0.43720951676368713, "rewards/rejected": -0.6073989868164062, "step": 4520 }, { "epoch": 0.86, "learning_rate": 2.810355274886148e-07, "logits/chosen": -1.4425709247589111, "logits/rejected": -0.720179557800293, "logps/chosen": -587.2921752929688, "logps/rejected": -1056.2947998046875, "loss": 0.213, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.14530804753303528, "rewards/margins": 0.34271925687789917, "rewards/rejected": -0.48802727460861206, "step": 4530 }, { "epoch": 0.86, "learning_rate": 2.7342774790633686e-07, "logits/chosen": -1.2788457870483398, "logits/rejected": -0.5721955299377441, "logps/chosen": -588.525390625, "logps/rejected": -1134.7154541015625, "loss": 0.1338, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.171498104929924, "rewards/margins": 0.3769661486148834, "rewards/rejected": -0.5484642386436462, "step": 4540 }, { "epoch": 0.87, "learning_rate": 2.6591839919146963e-07, "logits/chosen": -1.5304784774780273, "logits/rejected": -0.7977396249771118, "logps/chosen": -574.0228271484375, "logps/rejected": -1217.320068359375, "loss": 0.1065, "rewards/accuracies": 0.875, "rewards/chosen": -0.14134375751018524, "rewards/margins": 0.41396650671958923, "rewards/rejected": -0.5553102493286133, "step": 4550 }, { "epoch": 0.87, "learning_rate": 2.58507813312448e-07, "logits/chosen": -1.641689658164978, "logits/rejected": -0.6864644289016724, "logps/chosen": -751.0174560546875, "logps/rejected": -1362.750244140625, "loss": 0.124, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.1931757926940918, "rewards/margins": 0.40351542830467224, "rewards/rejected": -0.5966912508010864, "step": 4560 }, { "epoch": 0.87, "learning_rate": 2.511963178716648e-07, "logits/chosen": -1.3207637071609497, "logits/rejected": -0.6894078254699707, "logps/chosen": -677.3839721679688, "logps/rejected": -1312.611083984375, "loss": 0.1339, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.1971091330051422, "rewards/margins": 0.3820040225982666, "rewards/rejected": -0.5791131258010864, "step": 4570 }, { "epoch": 0.87, "learning_rate": 2.439842360909864e-07, "logits/chosen": -1.3214836120605469, "logits/rejected": -0.8019074201583862, "logps/chosen": -532.6083374023438, "logps/rejected": -1153.3880615234375, "loss": 0.1129, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1290787160396576, "rewards/margins": 0.3827238082885742, "rewards/rejected": -0.5118025541305542, "step": 4580 }, { "epoch": 0.87, "learning_rate": 2.3687188679746314e-07, "logits/chosen": -1.6117923259735107, "logits/rejected": -0.8461950421333313, "logps/chosen": -656.3216552734375, "logps/rejected": -1255.8548583984375, "loss": 0.1095, "rewards/accuracies": 0.875, "rewards/chosen": -0.18579891324043274, "rewards/margins": 0.389835000038147, "rewards/rejected": -0.5756338834762573, "step": 4590 }, { "epoch": 0.88, "learning_rate": 2.2985958440923772e-07, "logits/chosen": -1.0252299308776855, "logits/rejected": -0.5883991122245789, "logps/chosen": -667.2033081054688, "logps/rejected": -1374.6705322265625, "loss": 0.0893, "rewards/accuracies": 0.875, "rewards/chosen": -0.20209595561027527, "rewards/margins": 0.354301393032074, "rewards/rejected": -0.5563974380493164, "step": 4600 }, { "epoch": 0.88, "learning_rate": 2.2294763892164284e-07, "logits/chosen": -1.0620825290679932, "logits/rejected": -0.5592368841171265, "logps/chosen": -669.3486328125, "logps/rejected": -1236.846923828125, "loss": 0.1485, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.21842341125011444, "rewards/margins": 0.36530035734176636, "rewards/rejected": -0.5837237238883972, "step": 4610 }, { "epoch": 0.88, "learning_rate": 2.1613635589349756e-07, "logits/chosen": -1.0014476776123047, "logits/rejected": -0.49029821157455444, "logps/chosen": -617.0636596679688, "logps/rejected": -1130.314697265625, "loss": 0.1327, "rewards/accuracies": 0.75, "rewards/chosen": -0.19929710030555725, "rewards/margins": 0.3112267553806305, "rewards/rejected": -0.5105238556861877, "step": 4620 }, { "epoch": 0.88, "learning_rate": 2.094260364336026e-07, "logits/chosen": -1.2768690586090088, "logits/rejected": -0.7870615124702454, "logps/chosen": -686.94140625, "logps/rejected": -1295.814453125, "loss": 0.0965, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.1852492392063141, "rewards/margins": 0.4062500596046448, "rewards/rejected": -0.5914993286132812, "step": 4630 }, { "epoch": 0.88, "learning_rate": 2.0281697718742333e-07, "logits/chosen": -1.4496015310287476, "logits/rejected": -0.7284448742866516, "logps/chosen": -685.0479125976562, "logps/rejected": -1305.800048828125, "loss": 0.0675, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.19062955677509308, "rewards/margins": 0.40818914771080017, "rewards/rejected": -0.5988186597824097, "step": 4640 }, { "epoch": 0.89, "learning_rate": 1.9630947032398068e-07, "logits/chosen": -1.3132942914962769, "logits/rejected": -0.865318775177002, "logps/chosen": -696.484619140625, "logps/rejected": -1255.061279296875, "loss": 0.172, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.20725543797016144, "rewards/margins": 0.32888931035995483, "rewards/rejected": -0.5361447930335999, "step": 4650 }, { "epoch": 0.89, "learning_rate": 1.899038035229342e-07, "logits/chosen": -1.3633558750152588, "logits/rejected": -0.5295436978340149, "logps/chosen": -696.1705322265625, "logps/rejected": -1405.713623046875, "loss": 0.1004, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.1994573026895523, "rewards/margins": 0.4198401868343353, "rewards/rejected": -0.6192975044250488, "step": 4660 }, { "epoch": 0.89, "learning_rate": 1.8360025996186138e-07, "logits/chosen": -1.4343448877334595, "logits/rejected": -0.7798787355422974, "logps/chosen": -662.548583984375, "logps/rejected": -1278.1373291015625, "loss": 0.1404, "rewards/accuracies": 0.875, "rewards/chosen": -0.17773586511611938, "rewards/margins": 0.394275039434433, "rewards/rejected": -0.57201087474823, "step": 4670 }, { "epoch": 0.89, "learning_rate": 1.7739911830374352e-07, "logits/chosen": -1.178863286972046, "logits/rejected": -0.8868694305419922, "logps/chosen": -678.286865234375, "logps/rejected": -1239.553466796875, "loss": 0.1693, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.21190288662910461, "rewards/margins": 0.31052833795547485, "rewards/rejected": -0.5224312543869019, "step": 4680 }, { "epoch": 0.89, "learning_rate": 1.713006526846439e-07, "logits/chosen": -1.2362756729125977, "logits/rejected": -0.6598626971244812, "logps/chosen": -571.7789306640625, "logps/rejected": -1071.474853515625, "loss": 0.1834, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.15595315396785736, "rewards/margins": 0.3536679148674011, "rewards/rejected": -0.5096210241317749, "step": 4690 }, { "epoch": 0.9, "learning_rate": 1.6530513270159116e-07, "logits/chosen": -1.171621561050415, "logits/rejected": -0.658499002456665, "logps/chosen": -663.4884643554688, "logps/rejected": -1213.605712890625, "loss": 0.1504, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2194618433713913, "rewards/margins": 0.3511435091495514, "rewards/rejected": -0.5706053972244263, "step": 4700 }, { "epoch": 0.9, "learning_rate": 1.59412823400657e-07, "logits/chosen": -1.231784462928772, "logits/rejected": -0.7229105234146118, "logps/chosen": -706.43505859375, "logps/rejected": -1331.36279296875, "loss": 0.1777, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.2676090598106384, "rewards/margins": 0.3416467308998108, "rewards/rejected": -0.6092557907104492, "step": 4710 }, { "epoch": 0.9, "learning_rate": 1.5362398526524463e-07, "logits/chosen": -1.3131927251815796, "logits/rejected": -0.8547158241271973, "logps/chosen": -625.4578857421875, "logps/rejected": -1146.136474609375, "loss": 0.174, "rewards/accuracies": 0.75, "rewards/chosen": -0.1988741159439087, "rewards/margins": 0.3716055452823639, "rewards/rejected": -0.5704796314239502, "step": 4720 }, { "epoch": 0.9, "learning_rate": 1.4793887420457008e-07, "logits/chosen": -1.2562273740768433, "logits/rejected": -0.5379887819290161, "logps/chosen": -737.6373901367188, "logps/rejected": -1470.8919677734375, "loss": 0.0683, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.21053163707256317, "rewards/margins": 0.46442070603370667, "rewards/rejected": -0.6749523878097534, "step": 4730 }, { "epoch": 0.9, "learning_rate": 1.4235774154234855e-07, "logits/chosen": -1.2837750911712646, "logits/rejected": -0.7496229410171509, "logps/chosen": -644.0496826171875, "logps/rejected": -1238.470458984375, "loss": 0.114, "rewards/accuracies": 0.875, "rewards/chosen": -0.19958636164665222, "rewards/margins": 0.38710564374923706, "rewards/rejected": -0.5866919755935669, "step": 4740 }, { "epoch": 0.9, "learning_rate": 1.368808340056879e-07, "logits/chosen": -1.3824344873428345, "logits/rejected": -0.8901912569999695, "logps/chosen": -587.9757080078125, "logps/rejected": -1115.145263671875, "loss": 0.129, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.14997518062591553, "rewards/margins": 0.3634381890296936, "rewards/rejected": -0.5134133100509644, "step": 4750 }, { "epoch": 0.91, "learning_rate": 1.31508393714177e-07, "logits/chosen": -1.3364778757095337, "logits/rejected": -0.6429688930511475, "logps/chosen": -745.0770874023438, "logps/rejected": -1234.697998046875, "loss": 0.1316, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.2092159539461136, "rewards/margins": 0.3572046458721161, "rewards/rejected": -0.5664206743240356, "step": 4760 }, { "epoch": 0.91, "learning_rate": 1.2624065816918414e-07, "logits/chosen": -1.0031553506851196, "logits/rejected": -0.7630094289779663, "logps/chosen": -563.0581665039062, "logps/rejected": -1344.8671875, "loss": 0.1144, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1754561811685562, "rewards/margins": 0.44700154662132263, "rewards/rejected": -0.6224576830863953, "step": 4770 }, { "epoch": 0.91, "learning_rate": 1.210778602433596e-07, "logits/chosen": -1.281699538230896, "logits/rejected": -0.5587688684463501, "logps/chosen": -720.8016357421875, "logps/rejected": -1304.34814453125, "loss": 0.1285, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.19240069389343262, "rewards/margins": 0.3902169167995453, "rewards/rejected": -0.5826175808906555, "step": 4780 }, { "epoch": 0.91, "learning_rate": 1.1602022817033709e-07, "logits/chosen": -1.3950403928756714, "logits/rejected": -0.694631040096283, "logps/chosen": -681.0729370117188, "logps/rejected": -1182.3558349609375, "loss": 0.1601, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.19754651188850403, "rewards/margins": 0.3333587050437927, "rewards/rejected": -0.5309051275253296, "step": 4790 }, { "epoch": 0.91, "learning_rate": 1.1106798553464804e-07, "logits/chosen": -1.558779239654541, "logits/rejected": -0.7026988863945007, "logps/chosen": -642.016357421875, "logps/rejected": -1287.9619140625, "loss": 0.1203, "rewards/accuracies": 0.875, "rewards/chosen": -0.19038644433021545, "rewards/margins": 0.38128289580345154, "rewards/rejected": -0.5716692805290222, "step": 4800 }, { "epoch": 0.92, "learning_rate": 1.0622135126183514e-07, "logits/chosen": -1.0428054332733154, "logits/rejected": -0.7852006554603577, "logps/chosen": -635.1043701171875, "logps/rejected": -1277.9976806640625, "loss": 0.1509, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.20879606902599335, "rewards/margins": 0.36163991689682007, "rewards/rejected": -0.5704359412193298, "step": 4810 }, { "epoch": 0.92, "learning_rate": 1.0148053960877396e-07, "logits/chosen": -1.2954778671264648, "logits/rejected": -0.8386659622192383, "logps/chosen": -652.3655395507812, "logps/rejected": -1173.629638671875, "loss": 0.0959, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.17341004312038422, "rewards/margins": 0.34398287534713745, "rewards/rejected": -0.5173929929733276, "step": 4820 }, { "epoch": 0.92, "learning_rate": 9.684576015420277e-08, "logits/chosen": -1.2328388690948486, "logits/rejected": -0.643140435218811, "logps/chosen": -720.0303344726562, "logps/rejected": -1279.578369140625, "loss": 0.2088, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.2328731268644333, "rewards/margins": 0.3637922704219818, "rewards/rejected": -0.5966655015945435, "step": 4830 }, { "epoch": 0.92, "learning_rate": 9.23172177894574e-08, "logits/chosen": -1.3657875061035156, "logits/rejected": -0.7052638530731201, "logps/chosen": -722.7994384765625, "logps/rejected": -1325.4456787109375, "loss": 0.1421, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.23159709572792053, "rewards/margins": 0.35636669397354126, "rewards/rejected": -0.5879637002944946, "step": 4840 }, { "epoch": 0.92, "learning_rate": 8.78951127094127e-08, "logits/chosen": -1.5664695501327515, "logits/rejected": -0.8145062327384949, "logps/chosen": -723.9432373046875, "logps/rejected": -1316.299072265625, "loss": 0.1367, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.20845094323158264, "rewards/margins": 0.38458698987960815, "rewards/rejected": -0.5930379629135132, "step": 4850 }, { "epoch": 0.93, "learning_rate": 8.357964040363209e-08, "logits/chosen": -1.27678644657135, "logits/rejected": -0.7609430551528931, "logps/chosen": -709.1177978515625, "logps/rejected": -1335.61279296875, "loss": 0.1058, "rewards/accuracies": 0.875, "rewards/chosen": -0.1909387856721878, "rewards/margins": 0.41902145743370056, "rewards/rejected": -0.6099602580070496, "step": 4860 }, { "epoch": 0.93, "learning_rate": 7.937099164772699e-08, "logits/chosen": -1.2243019342422485, "logits/rejected": -0.5583440065383911, "logps/chosen": -639.6881713867188, "logps/rejected": -1181.5858154296875, "loss": 0.1294, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.19391921162605286, "rewards/margins": 0.3068258762359619, "rewards/rejected": -0.5007451176643372, "step": 4870 }, { "epoch": 0.93, "learning_rate": 7.526935249492245e-08, "logits/chosen": -1.418666124343872, "logits/rejected": -0.784935474395752, "logps/chosen": -660.876708984375, "logps/rejected": -1126.3966064453125, "loss": 0.1326, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.1960071623325348, "rewards/margins": 0.3191688656806946, "rewards/rejected": -0.515175998210907, "step": 4880 }, { "epoch": 0.93, "learning_rate": 7.127490426783124e-08, "logits/chosen": -1.3633763790130615, "logits/rejected": -0.6353563666343689, "logps/chosen": -665.6785888671875, "logps/rejected": -1134.138916015625, "loss": 0.1922, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.18521231412887573, "rewards/margins": 0.32668644189834595, "rewards/rejected": -0.5118987560272217, "step": 4890 }, { "epoch": 0.93, "learning_rate": 6.738782355044048e-08, "logits/chosen": -1.039763331413269, "logits/rejected": -0.7630573511123657, "logps/chosen": -667.9732055664062, "logps/rejected": -1110.4716796875, "loss": 0.2217, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.23924049735069275, "rewards/margins": 0.2777882218360901, "rewards/rejected": -0.5170287489891052, "step": 4900 }, { "epoch": 0.94, "learning_rate": 6.360828218030191e-08, "logits/chosen": -1.114702582359314, "logits/rejected": -0.5790780186653137, "logps/chosen": -541.8421020507812, "logps/rejected": -1112.6759033203125, "loss": 0.1248, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.14809590578079224, "rewards/margins": 0.4047287404537201, "rewards/rejected": -0.5528246164321899, "step": 4910 }, { "epoch": 0.94, "learning_rate": 5.993644724093889e-08, "logits/chosen": -1.122267484664917, "logits/rejected": -0.8811401128768921, "logps/chosen": -680.8912353515625, "logps/rejected": -1287.191650390625, "loss": 0.1502, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.23508867621421814, "rewards/margins": 0.3237661123275757, "rewards/rejected": -0.5588547587394714, "step": 4920 }, { "epoch": 0.94, "learning_rate": 5.637248105445775e-08, "logits/chosen": -1.5622440576553345, "logits/rejected": -0.7539731860160828, "logps/chosen": -556.6498413085938, "logps/rejected": -1098.62451171875, "loss": 0.1393, "rewards/accuracies": 0.75, "rewards/chosen": -0.1479572057723999, "rewards/margins": 0.36362332105636597, "rewards/rejected": -0.5115805864334106, "step": 4930 }, { "epoch": 0.94, "learning_rate": 5.291654117437262e-08, "logits/chosen": -1.2904975414276123, "logits/rejected": -0.7377313375473022, "logps/chosen": -665.4820556640625, "logps/rejected": -1313.580810546875, "loss": 0.1405, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.16726170480251312, "rewards/margins": 0.3971097469329834, "rewards/rejected": -0.5643714070320129, "step": 4940 }, { "epoch": 0.94, "learning_rate": 4.956878037864044e-08, "logits/chosen": -0.9625524282455444, "logits/rejected": -0.6042176485061646, "logps/chosen": -695.654052734375, "logps/rejected": -1358.5296630859375, "loss": 0.107, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.22345490753650665, "rewards/margins": 0.40664950013160706, "rewards/rejected": -0.6301044225692749, "step": 4950 }, { "epoch": 0.94, "learning_rate": 4.632934666290778e-08, "logits/chosen": -1.227859377861023, "logits/rejected": -0.7641362547874451, "logps/chosen": -666.58056640625, "logps/rejected": -1289.722412109375, "loss": 0.1532, "rewards/accuracies": 0.875, "rewards/chosen": -0.19577890634536743, "rewards/margins": 0.3802658021450043, "rewards/rejected": -0.5760447978973389, "step": 4960 }, { "epoch": 0.95, "learning_rate": 4.319838323396691e-08, "logits/chosen": -1.3235547542572021, "logits/rejected": -0.6249874234199524, "logps/chosen": -668.4324951171875, "logps/rejected": -1309.440673828125, "loss": 0.1069, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.16014665365219116, "rewards/margins": 0.41093382239341736, "rewards/rejected": -0.5710804462432861, "step": 4970 }, { "epoch": 0.95, "learning_rate": 4.017602850342584e-08, "logits/chosen": -1.2945332527160645, "logits/rejected": -0.4351419508457184, "logps/chosen": -784.4776000976562, "logps/rejected": -1288.029296875, "loss": 0.1436, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.2231055498123169, "rewards/margins": 0.35268718004226685, "rewards/rejected": -0.5757927894592285, "step": 4980 }, { "epoch": 0.95, "learning_rate": 3.7262416081589866e-08, "logits/chosen": -1.4735770225524902, "logits/rejected": -0.8036497831344604, "logps/chosen": -712.3739013671875, "logps/rejected": -1263.31689453125, "loss": 0.1651, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.22615352272987366, "rewards/margins": 0.357717901468277, "rewards/rejected": -0.5838714838027954, "step": 4990 }, { "epoch": 0.95, "learning_rate": 3.445767477155443e-08, "logits/chosen": -1.1589405536651611, "logits/rejected": -0.8581134080886841, "logps/chosen": -554.54248046875, "logps/rejected": -1249.936279296875, "loss": 0.1091, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.15571340918540955, "rewards/margins": 0.38257110118865967, "rewards/rejected": -0.5382844805717468, "step": 5000 }, { "epoch": 0.95, "learning_rate": 3.1761928563510956e-08, "logits/chosen": -1.0289770364761353, "logits/rejected": -0.7526764273643494, "logps/chosen": -696.2867431640625, "logps/rejected": -1235.195556640625, "loss": 0.1648, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.2115805447101593, "rewards/margins": 0.34011542797088623, "rewards/rejected": -0.5516959428787231, "step": 5010 }, { "epoch": 0.96, "learning_rate": 2.917529662926549e-08, "logits/chosen": -1.2079118490219116, "logits/rejected": -0.7245731353759766, "logps/chosen": -610.6170043945312, "logps/rejected": -1056.6126708984375, "loss": 0.2027, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.21145102381706238, "rewards/margins": 0.2977626323699951, "rewards/rejected": -0.5092136263847351, "step": 5020 }, { "epoch": 0.96, "learning_rate": 2.669789331697148e-08, "logits/chosen": -1.2090483903884888, "logits/rejected": -0.7866436243057251, "logps/chosen": -631.7671508789062, "logps/rejected": -1184.830322265625, "loss": 0.2066, "rewards/accuracies": 0.75, "rewards/chosen": -0.19812533259391785, "rewards/margins": 0.32974180579185486, "rewards/rejected": -0.5278670787811279, "step": 5030 }, { "epoch": 0.96, "learning_rate": 2.4329828146074096e-08, "logits/chosen": -1.183631420135498, "logits/rejected": -0.4477883279323578, "logps/chosen": -663.0415649414062, "logps/rejected": -1280.47607421875, "loss": 0.1243, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1664762943983078, "rewards/margins": 0.3955213725566864, "rewards/rejected": -0.5619977712631226, "step": 5040 }, { "epoch": 0.96, "learning_rate": 2.20712058024683e-08, "logits/chosen": -1.162414789199829, "logits/rejected": -0.5523552298545837, "logps/chosen": -623.75439453125, "logps/rejected": -1221.985595703125, "loss": 0.1038, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.16335761547088623, "rewards/margins": 0.3917371332645416, "rewards/rejected": -0.5550947785377502, "step": 5050 }, { "epoch": 0.96, "learning_rate": 1.9922126133870568e-08, "logits/chosen": -1.1910568475723267, "logits/rejected": -0.5336898565292358, "logps/chosen": -695.8238525390625, "logps/rejected": -1241.3873291015625, "loss": 0.1203, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.21383127570152283, "rewards/margins": 0.37593773007392883, "rewards/rejected": -0.5897690057754517, "step": 5060 }, { "epoch": 0.97, "learning_rate": 1.7882684145406616e-08, "logits/chosen": -1.1737043857574463, "logits/rejected": -0.6569717526435852, "logps/chosen": -594.8487548828125, "logps/rejected": -1206.2760009765625, "loss": 0.1276, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.1872614622116089, "rewards/margins": 0.36402034759521484, "rewards/rejected": -0.5512818098068237, "step": 5070 }, { "epoch": 0.97, "learning_rate": 1.595296999541057e-08, "logits/chosen": -1.400295615196228, "logits/rejected": -0.8565350770950317, "logps/chosen": -626.7350463867188, "logps/rejected": -1158.6981201171875, "loss": 0.1982, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.1880793422460556, "rewards/margins": 0.3245196044445038, "rewards/rejected": -0.512598991394043, "step": 5080 }, { "epoch": 0.97, "learning_rate": 1.4133068991437903e-08, "logits/chosen": -1.0790560245513916, "logits/rejected": -0.6355162262916565, "logps/chosen": -626.5213623046875, "logps/rejected": -1166.830322265625, "loss": 0.1878, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.19226156175136566, "rewards/margins": 0.3172582983970642, "rewards/rejected": -0.5095198154449463, "step": 5090 }, { "epoch": 0.97, "learning_rate": 1.2423061586496476e-08, "logits/chosen": -1.6458122730255127, "logits/rejected": -0.8766103982925415, "logps/chosen": -585.2715454101562, "logps/rejected": -1190.6949462890625, "loss": 0.0906, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.1274069845676422, "rewards/margins": 0.43028607964515686, "rewards/rejected": -0.5576930642127991, "step": 5100 }, { "epoch": 0.97, "learning_rate": 1.0823023375489128e-08, "logits/chosen": -1.310750126838684, "logits/rejected": -0.9613951444625854, "logps/chosen": -646.385498046875, "logps/rejected": -1159.139892578125, "loss": 0.1623, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.1659984141588211, "rewards/margins": 0.30811062455177307, "rewards/rejected": -0.4741089940071106, "step": 5110 }, { "epoch": 0.98, "learning_rate": 9.333025091870507e-09, "logits/chosen": -1.335836410522461, "logits/rejected": -0.8691408038139343, "logps/chosen": -698.8345947265625, "logps/rejected": -1215.299560546875, "loss": 0.2012, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2206064760684967, "rewards/margins": 0.28262168169021606, "rewards/rejected": -0.5032281875610352, "step": 5120 }, { "epoch": 0.98, "learning_rate": 7.95313260452263e-09, "logits/chosen": -1.2383493185043335, "logits/rejected": -0.6333569884300232, "logps/chosen": -689.2410888671875, "logps/rejected": -1336.73291015625, "loss": 0.1386, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.20564253628253937, "rewards/margins": 0.3403290808200836, "rewards/rejected": -0.5459715127944946, "step": 5130 }, { "epoch": 0.98, "learning_rate": 6.683406914840818e-09, "logits/chosen": -1.0342224836349487, "logits/rejected": -0.7361395359039307, "logps/chosen": -549.5321655273438, "logps/rejected": -1246.310302734375, "loss": 0.1339, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.17279265820980072, "rewards/margins": 0.3750759959220886, "rewards/rejected": -0.5478686094284058, "step": 5140 }, { "epoch": 0.98, "learning_rate": 5.523904154037529e-09, "logits/chosen": -1.535077691078186, "logits/rejected": -0.9410215616226196, "logps/chosen": -684.4967041015625, "logps/rejected": -1473.415283203125, "loss": 0.077, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.18594303727149963, "rewards/margins": 0.4658792018890381, "rewards/rejected": -0.6518222093582153, "step": 5150 }, { "epoch": 0.98, "learning_rate": 4.474675580662113e-09, "logits/chosen": -1.4770389795303345, "logits/rejected": -0.6177552938461304, "logps/chosen": -597.9993896484375, "logps/rejected": -1269.7330322265625, "loss": 0.0993, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.14835500717163086, "rewards/margins": 0.43027836084365845, "rewards/rejected": -0.5786333680152893, "step": 5160 }, { "epoch": 0.98, "learning_rate": 3.5357675783331823e-09, "logits/chosen": -1.2707096338272095, "logits/rejected": -0.6901790499687195, "logps/chosen": -685.2569580078125, "logps/rejected": -1283.8712158203125, "loss": 0.1113, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.202836275100708, "rewards/margins": 0.40447163581848145, "rewards/rejected": -0.6073078513145447, "step": 5170 }, { "epoch": 0.99, "learning_rate": 2.7072216536885855e-09, "logits/chosen": -1.2012836933135986, "logits/rejected": -0.8834649920463562, "logps/chosen": -604.7525024414062, "logps/rejected": -1220.6387939453125, "loss": 0.1213, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.20166388154029846, "rewards/margins": 0.34872767329216003, "rewards/rejected": -0.5503915548324585, "step": 5180 }, { "epoch": 0.99, "learning_rate": 1.989074434551874e-09, "logits/chosen": -1.244760274887085, "logits/rejected": -0.5650443434715271, "logps/chosen": -632.00048828125, "logps/rejected": -1149.3975830078125, "loss": 0.16, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.1756267249584198, "rewards/margins": 0.31486809253692627, "rewards/rejected": -0.49049481749534607, "step": 5190 }, { "epoch": 0.99, "learning_rate": 1.3813576683111007e-09, "logits/chosen": -1.1455289125442505, "logits/rejected": -0.6390076875686646, "logps/chosen": -640.3883056640625, "logps/rejected": -1216.2052001953125, "loss": 0.1679, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.19959679245948792, "rewards/margins": 0.3413989543914795, "rewards/rejected": -0.540995717048645, "step": 5200 }, { "epoch": 0.99, "learning_rate": 8.840982205160498e-10, "logits/chosen": -1.371997356414795, "logits/rejected": -0.585719108581543, "logps/chosen": -708.5758056640625, "logps/rejected": -1272.704833984375, "loss": 0.1196, "rewards/accuracies": 0.875, "rewards/chosen": -0.1906108260154724, "rewards/margins": 0.4003322720527649, "rewards/rejected": -0.5909430980682373, "step": 5210 }, { "epoch": 0.99, "learning_rate": 4.973180736911332e-10, "logits/chosen": -1.3347803354263306, "logits/rejected": -0.5453115701675415, "logps/chosen": -661.0377197265625, "logps/rejected": -1164.52685546875, "loss": 0.0885, "rewards/accuracies": 0.875, "rewards/chosen": -0.17294715344905853, "rewards/margins": 0.37463489174842834, "rewards/rejected": -0.5475820302963257, "step": 5220 }, { "epoch": 1.0, "learning_rate": 2.2103432636366718e-10, "logits/chosen": -1.3600475788116455, "logits/rejected": -0.8246363401412964, "logps/chosen": -624.4554443359375, "logps/rejected": -1144.3265380859375, "loss": 0.1505, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.15025684237480164, "rewards/margins": 0.36535200476646423, "rewards/rejected": -0.5156089067459106, "step": 5230 }, { "epoch": 1.0, "learning_rate": 5.525919230670029e-11, "logits/chosen": -1.4224984645843506, "logits/rejected": -0.6222517490386963, "logps/chosen": -668.0000610351562, "logps/rejected": -1333.5238037109375, "loss": 0.1099, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.20631489157676697, "rewards/margins": 0.41408148407936096, "rewards/rejected": -0.6203962564468384, "step": 5240 }, { "epoch": 1.0, "learning_rate": 0.0, "logits/chosen": -1.2460081577301025, "logits/rejected": -0.8154599070549011, "logps/chosen": -627.9478149414062, "logps/rejected": -1239.8111572265625, "loss": 0.0891, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.16402778029441833, "rewards/margins": 0.3759276270866394, "rewards/rejected": -0.5399554967880249, "step": 5250 }, { "epoch": 1.0, "step": 5250, "total_flos": 0.0, "train_loss": 0.14500870579197292, "train_runtime": 22293.6001, "train_samples_per_second": 0.942, "train_steps_per_second": 0.235 } ], "logging_steps": 10, "max_steps": 5250, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }