{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.9993222089532967, "eval_steps": 100, "global_step": 2904, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.7182130584192438e-09, "logits/chosen": -4.324154853820801, "logits/rejected": -4.269870758056641, "logps/chosen": -367.06219482421875, "logps/rejected": -317.6511535644531, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.01, "learning_rate": 1.718213058419244e-08, "logits/chosen": -4.277963638305664, "logits/rejected": -4.137287616729736, "logps/chosen": -423.3011779785156, "logps/rejected": -322.6611633300781, "loss": 0.6946, "rewards/accuracies": 0.3958333432674408, "rewards/chosen": -0.0024322373792529106, "rewards/margins": -0.0025027708616107702, "rewards/rejected": 7.053340232232586e-05, "step": 10 }, { "epoch": 0.02, "learning_rate": 3.436426116838488e-08, "logits/chosen": -4.263833045959473, "logits/rejected": -4.1435723304748535, "logps/chosen": -392.3028259277344, "logps/rejected": -317.58099365234375, "loss": 0.6933, "rewards/accuracies": 0.5, "rewards/chosen": 0.00013974684407003224, "rewards/margins": 0.00036675756564363837, "rewards/rejected": -0.0002270108088850975, "step": 20 }, { "epoch": 0.03, "learning_rate": 5.154639175257731e-08, "logits/chosen": -4.267035961151123, "logits/rejected": -4.1559858322143555, "logps/chosen": -406.5338134765625, "logps/rejected": -325.1300354003906, "loss": 0.6926, "rewards/accuracies": 0.528124988079071, "rewards/chosen": 0.0006199823692440987, "rewards/margins": 0.001958064269274473, "rewards/rejected": -0.0013380816671997309, "step": 30 }, { "epoch": 0.04, "learning_rate": 6.872852233676976e-08, "logits/chosen": -4.252381324768066, "logits/rejected": -4.157735824584961, "logps/chosen": -376.9677429199219, "logps/rejected": -313.22186279296875, "loss": 0.6936, "rewards/accuracies": 0.49687498807907104, "rewards/chosen": 0.00031176567426882684, "rewards/margins": -0.00012173606228316203, "rewards/rejected": 0.0004335021658334881, "step": 40 }, { "epoch": 0.05, "learning_rate": 8.59106529209622e-08, "logits/chosen": -4.273421287536621, "logits/rejected": -4.16197395324707, "logps/chosen": -397.47222900390625, "logps/rejected": -314.4212341308594, "loss": 0.693, "rewards/accuracies": 0.5234375, "rewards/chosen": -0.002201077062636614, "rewards/margins": 0.0010374437551945448, "rewards/rejected": -0.0032385208178311586, "step": 50 }, { "epoch": 0.06, "learning_rate": 1.0309278350515462e-07, "logits/chosen": -4.286251544952393, "logits/rejected": -4.157068252563477, "logps/chosen": -398.3650207519531, "logps/rejected": -320.15008544921875, "loss": 0.6947, "rewards/accuracies": 0.45781248807907104, "rewards/chosen": -0.004078245721757412, "rewards/margins": -0.0025454089045524597, "rewards/rejected": -0.0015328375156968832, "step": 60 }, { "epoch": 0.07, "learning_rate": 1.202749140893471e-07, "logits/chosen": -4.272886753082275, "logits/rejected": -4.148139953613281, "logps/chosen": -401.9974060058594, "logps/rejected": -301.5581970214844, "loss": 0.6926, "rewards/accuracies": 0.5140625238418579, "rewards/chosen": -0.0007527429843321443, "rewards/margins": 0.0018155823927372694, "rewards/rejected": -0.0025683254934847355, "step": 70 }, { "epoch": 0.08, "learning_rate": 1.3745704467353952e-07, "logits/chosen": -4.266884803771973, "logits/rejected": -4.140568256378174, "logps/chosen": -422.1355895996094, "logps/rejected": -317.8870849609375, "loss": 0.692, "rewards/accuracies": 0.5218750238418579, "rewards/chosen": -0.0010083441156893969, "rewards/margins": 0.003068871796131134, "rewards/rejected": -0.004077216610312462, "step": 80 }, { "epoch": 0.09, "learning_rate": 1.5463917525773197e-07, "logits/chosen": -4.275304317474365, "logits/rejected": -4.130114555358887, "logps/chosen": -405.22271728515625, "logps/rejected": -321.6945495605469, "loss": 0.6934, "rewards/accuracies": 0.4906249940395355, "rewards/chosen": -0.0015558091690763831, "rewards/margins": 0.00022566183179151267, "rewards/rejected": -0.001781471073627472, "step": 90 }, { "epoch": 0.1, "learning_rate": 1.718213058419244e-07, "logits/chosen": -4.3047099113464355, "logits/rejected": -4.166022300720215, "logps/chosen": -401.2640075683594, "logps/rejected": -308.84307861328125, "loss": 0.694, "rewards/accuracies": 0.5140625238418579, "rewards/chosen": -0.0015420484123751521, "rewards/margins": -0.0010955848265439272, "rewards/rejected": -0.0004464638768695295, "step": 100 }, { "epoch": 0.11, "learning_rate": 1.8900343642611682e-07, "logits/chosen": -4.256144046783447, "logits/rejected": -4.105890274047852, "logps/chosen": -420.77423095703125, "logps/rejected": -302.2908935546875, "loss": 0.6937, "rewards/accuracies": 0.4859375059604645, "rewards/chosen": 9.21973041840829e-05, "rewards/margins": -0.0004623614368028939, "rewards/rejected": 0.0005545587628148496, "step": 110 }, { "epoch": 0.12, "learning_rate": 2.0618556701030925e-07, "logits/chosen": -4.286948204040527, "logits/rejected": -4.167834281921387, "logps/chosen": -405.80596923828125, "logps/rejected": -323.16510009765625, "loss": 0.694, "rewards/accuracies": 0.4906249940395355, "rewards/chosen": -0.0016373072285205126, "rewards/margins": -0.001050219521857798, "rewards/rejected": -0.0005870877066627145, "step": 120 }, { "epoch": 0.13, "learning_rate": 2.2336769759450173e-07, "logits/chosen": -4.278336524963379, "logits/rejected": -4.132115840911865, "logps/chosen": -401.5750732421875, "logps/rejected": -298.68670654296875, "loss": 0.6926, "rewards/accuracies": 0.49531251192092896, "rewards/chosen": -0.002432642038911581, "rewards/margins": 0.0018511947710067034, "rewards/rejected": -0.004283837042748928, "step": 130 }, { "epoch": 0.14, "learning_rate": 2.405498281786942e-07, "logits/chosen": -4.276402473449707, "logits/rejected": -4.149945259094238, "logps/chosen": -412.5904846191406, "logps/rejected": -305.0515441894531, "loss": 0.694, "rewards/accuracies": 0.49531251192092896, "rewards/chosen": -0.0023457477800548077, "rewards/margins": -0.0008829582366161048, "rewards/rejected": -0.0014627889031544328, "step": 140 }, { "epoch": 0.15, "learning_rate": 2.5773195876288655e-07, "logits/chosen": -4.2490129470825195, "logits/rejected": -4.137168884277344, "logps/chosen": -381.56756591796875, "logps/rejected": -299.19793701171875, "loss": 0.6946, "rewards/accuracies": 0.48124998807907104, "rewards/chosen": -0.0032607235480099916, "rewards/margins": -0.0023342289496213198, "rewards/rejected": -0.0009264945983886719, "step": 150 }, { "epoch": 0.17, "learning_rate": 2.7491408934707903e-07, "logits/chosen": -4.262160778045654, "logits/rejected": -4.115349769592285, "logps/chosen": -399.4332580566406, "logps/rejected": -311.04608154296875, "loss": 0.6943, "rewards/accuracies": 0.4921875, "rewards/chosen": -0.0022923736833035946, "rewards/margins": -0.0016422644257545471, "rewards/rejected": -0.0006501094321720302, "step": 160 }, { "epoch": 0.18, "learning_rate": 2.9209621993127146e-07, "logits/chosen": -4.254582405090332, "logits/rejected": -4.094132423400879, "logps/chosen": -419.3282775878906, "logps/rejected": -314.56829833984375, "loss": 0.6926, "rewards/accuracies": 0.4859375059604645, "rewards/chosen": -0.0008115085074678063, "rewards/margins": 0.0018295502522960305, "rewards/rejected": -0.002641058526933193, "step": 170 }, { "epoch": 0.19, "learning_rate": 3.0927835051546394e-07, "logits/chosen": -4.262505531311035, "logits/rejected": -4.1487321853637695, "logps/chosen": -413.31011962890625, "logps/rejected": -324.69805908203125, "loss": 0.6935, "rewards/accuracies": 0.4921875, "rewards/chosen": -0.0014527825405821204, "rewards/margins": -0.00011156280379509553, "rewards/rejected": -0.0013412194093689322, "step": 180 }, { "epoch": 0.2, "learning_rate": 3.2646048109965636e-07, "logits/chosen": -4.2973856925964355, "logits/rejected": -4.131929397583008, "logps/chosen": -417.13568115234375, "logps/rejected": -314.27001953125, "loss": 0.6946, "rewards/accuracies": 0.484375, "rewards/chosen": -0.0009450524812564254, "rewards/margins": -0.0020798335317522287, "rewards/rejected": 0.0011347811669111252, "step": 190 }, { "epoch": 0.21, "learning_rate": 3.436426116838488e-07, "logits/chosen": -4.277375221252441, "logits/rejected": -4.1441168785095215, "logps/chosen": -381.47296142578125, "logps/rejected": -301.980224609375, "loss": 0.6925, "rewards/accuracies": 0.518750011920929, "rewards/chosen": 0.002059588208794594, "rewards/margins": 0.00202515278942883, "rewards/rejected": 3.443551395321265e-05, "step": 200 }, { "epoch": 0.22, "learning_rate": 3.608247422680412e-07, "logits/chosen": -4.272757530212402, "logits/rejected": -4.1654133796691895, "logps/chosen": -407.0859069824219, "logps/rejected": -331.93328857421875, "loss": 0.6924, "rewards/accuracies": 0.520312488079071, "rewards/chosen": 0.0008089464390650392, "rewards/margins": 0.0022251014597713947, "rewards/rejected": -0.0014161552535369992, "step": 210 }, { "epoch": 0.23, "learning_rate": 3.7800687285223364e-07, "logits/chosen": -4.265755653381348, "logits/rejected": -4.139852523803711, "logps/chosen": -396.70904541015625, "logps/rejected": -311.2632141113281, "loss": 0.6945, "rewards/accuracies": 0.48124998807907104, "rewards/chosen": -0.0018544609192758799, "rewards/margins": -0.0020773629657924175, "rewards/rejected": 0.00022290220658760518, "step": 220 }, { "epoch": 0.24, "learning_rate": 3.9518900343642607e-07, "logits/chosen": -4.260704040527344, "logits/rejected": -4.126727104187012, "logps/chosen": -408.3927917480469, "logps/rejected": -319.992919921875, "loss": 0.694, "rewards/accuracies": 0.4859375059604645, "rewards/chosen": -0.0022286553867161274, "rewards/margins": -0.000920031510759145, "rewards/rejected": -0.001308623468503356, "step": 230 }, { "epoch": 0.25, "learning_rate": 4.123711340206185e-07, "logits/chosen": -4.259461879730225, "logits/rejected": -4.144876956939697, "logps/chosen": -409.2859802246094, "logps/rejected": -328.0377502441406, "loss": 0.6936, "rewards/accuracies": 0.504687488079071, "rewards/chosen": -0.0005639836890622973, "rewards/margins": -0.00022939601331017911, "rewards/rejected": -0.0003345878212712705, "step": 240 }, { "epoch": 0.26, "learning_rate": 4.2955326460481097e-07, "logits/chosen": -4.266787052154541, "logits/rejected": -4.1633710861206055, "logps/chosen": -403.5596923828125, "logps/rejected": -319.5048522949219, "loss": 0.6943, "rewards/accuracies": 0.5, "rewards/chosen": -0.0008266723598353565, "rewards/margins": -0.0015376238152384758, "rewards/rejected": 0.0007109515718184412, "step": 250 }, { "epoch": 0.27, "learning_rate": 4.4673539518900345e-07, "logits/chosen": -4.2665534019470215, "logits/rejected": -4.13022518157959, "logps/chosen": -379.11322021484375, "logps/rejected": -289.3961486816406, "loss": 0.6932, "rewards/accuracies": 0.5, "rewards/chosen": -0.0011679441668093204, "rewards/margins": 0.00039926558383740485, "rewards/rejected": -0.0015672097215428948, "step": 260 }, { "epoch": 0.28, "learning_rate": 4.639175257731959e-07, "logits/chosen": -4.271460056304932, "logits/rejected": -4.139186859130859, "logps/chosen": -424.3821716308594, "logps/rejected": -318.91943359375, "loss": 0.693, "rewards/accuracies": 0.48750001192092896, "rewards/chosen": -0.00013967350241728127, "rewards/margins": 0.0009566223016008735, "rewards/rejected": -0.0010962963569909334, "step": 270 }, { "epoch": 0.29, "learning_rate": 4.810996563573884e-07, "logits/chosen": -4.285706520080566, "logits/rejected": -4.135653018951416, "logps/chosen": -408.2174377441406, "logps/rejected": -306.91937255859375, "loss": 0.6928, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 2.2016651200829074e-05, "rewards/margins": 0.0013473120052367449, "rewards/rejected": -0.001325295539572835, "step": 280 }, { "epoch": 0.3, "learning_rate": 4.982817869415807e-07, "logits/chosen": -4.282963752746582, "logits/rejected": -4.156318664550781, "logps/chosen": -383.9125671386719, "logps/rejected": -296.3202819824219, "loss": 0.694, "rewards/accuracies": 0.4828124940395355, "rewards/chosen": -0.0008248983067460358, "rewards/margins": -0.0010658926330506802, "rewards/rejected": 0.00024099461734294891, "step": 290 }, { "epoch": 0.31, "learning_rate": 4.982778415614236e-07, "logits/chosen": -4.276602268218994, "logits/rejected": -4.160402774810791, "logps/chosen": -395.6986083984375, "logps/rejected": -308.46368408203125, "loss": 0.6934, "rewards/accuracies": 0.5062500238418579, "rewards/chosen": -0.00020179541024845093, "rewards/margins": 0.00023063849948812276, "rewards/rejected": -0.00043243388063274324, "step": 300 }, { "epoch": 0.32, "learning_rate": 4.963643321852277e-07, "logits/chosen": -4.262394905090332, "logits/rejected": -4.137896537780762, "logps/chosen": -405.48583984375, "logps/rejected": -316.9510498046875, "loss": 0.6933, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.0025507945101708174, "rewards/margins": 0.0003848490596283227, "rewards/rejected": -0.0029356435406953096, "step": 310 }, { "epoch": 0.33, "learning_rate": 4.944508228090318e-07, "logits/chosen": -4.2743682861328125, "logits/rejected": -4.132224082946777, "logps/chosen": -406.7742614746094, "logps/rejected": -300.9280700683594, "loss": 0.6936, "rewards/accuracies": 0.5171874761581421, "rewards/chosen": -0.0014929536264389753, "rewards/margins": -0.00023388855333905667, "rewards/rejected": -0.0012590645346790552, "step": 320 }, { "epoch": 0.34, "learning_rate": 4.925373134328357e-07, "logits/chosen": -4.293769836425781, "logits/rejected": -4.170851230621338, "logps/chosen": -406.20074462890625, "logps/rejected": -320.6856384277344, "loss": 0.6947, "rewards/accuracies": 0.47968751192092896, "rewards/chosen": -3.2803043268359033e-06, "rewards/margins": -0.0023293071426451206, "rewards/rejected": 0.0023260267917066813, "step": 330 }, { "epoch": 0.35, "learning_rate": 4.906238040566398e-07, "logits/chosen": -4.264895439147949, "logits/rejected": -4.144906044006348, "logps/chosen": -382.7566223144531, "logps/rejected": -307.6598205566406, "loss": 0.6922, "rewards/accuracies": 0.5, "rewards/chosen": -0.001648748992010951, "rewards/margins": 0.002527676522731781, "rewards/rejected": -0.004176425281912088, "step": 340 }, { "epoch": 0.36, "learning_rate": 4.887102946804438e-07, "logits/chosen": -4.280018329620361, "logits/rejected": -4.165085792541504, "logps/chosen": -396.96026611328125, "logps/rejected": -309.4230651855469, "loss": 0.6923, "rewards/accuracies": 0.510937511920929, "rewards/chosen": 0.0016236413503065705, "rewards/margins": 0.0022962945513427258, "rewards/rejected": -0.000672653317451477, "step": 350 }, { "epoch": 0.37, "learning_rate": 4.867967853042479e-07, "logits/chosen": -4.289803504943848, "logits/rejected": -4.140151500701904, "logps/chosen": -405.28973388671875, "logps/rejected": -312.529541015625, "loss": 0.6939, "rewards/accuracies": 0.4937500059604645, "rewards/chosen": -0.0023417104966938496, "rewards/margins": -0.0007966022822074592, "rewards/rejected": -0.0015451073413714767, "step": 360 }, { "epoch": 0.38, "learning_rate": 4.84883275928052e-07, "logits/chosen": -4.268817901611328, "logits/rejected": -4.147084712982178, "logps/chosen": -399.0814514160156, "logps/rejected": -325.91363525390625, "loss": 0.6924, "rewards/accuracies": 0.520312488079071, "rewards/chosen": 3.723411282408051e-05, "rewards/margins": 0.0022587967105209827, "rewards/rejected": -0.0022215619683265686, "step": 370 }, { "epoch": 0.39, "learning_rate": 4.82969766551856e-07, "logits/chosen": -4.2609758377075195, "logits/rejected": -4.148962020874023, "logps/chosen": -393.0679016113281, "logps/rejected": -317.9642028808594, "loss": 0.6927, "rewards/accuracies": 0.4765625, "rewards/chosen": 0.0006004157476127148, "rewards/margins": 0.0016590984305366874, "rewards/rejected": -0.001058683032169938, "step": 380 }, { "epoch": 0.4, "learning_rate": 4.810562571756601e-07, "logits/chosen": -4.2723493576049805, "logits/rejected": -4.122799873352051, "logps/chosen": -397.3229675292969, "logps/rejected": -304.338623046875, "loss": 0.6935, "rewards/accuracies": 0.5328124761581421, "rewards/chosen": 0.0007614147616550326, "rewards/margins": 7.879303666413762e-06, "rewards/rejected": 0.0007535360055044293, "step": 390 }, { "epoch": 0.41, "learning_rate": 4.791427477994642e-07, "logits/chosen": -4.290783405303955, "logits/rejected": -4.144261360168457, "logps/chosen": -412.53375244140625, "logps/rejected": -313.51739501953125, "loss": 0.6948, "rewards/accuracies": 0.45625001192092896, "rewards/chosen": -0.0025499488692730665, "rewards/margins": -0.002634689910337329, "rewards/rejected": 8.474113565171137e-05, "step": 400 }, { "epoch": 0.42, "learning_rate": 4.772292384232682e-07, "logits/chosen": -4.280355930328369, "logits/rejected": -4.188906669616699, "logps/chosen": -399.37750244140625, "logps/rejected": -324.8134765625, "loss": 0.692, "rewards/accuracies": 0.5234375, "rewards/chosen": 0.0012526216451078653, "rewards/margins": 0.0029473325703293085, "rewards/rejected": -0.0016947109252214432, "step": 410 }, { "epoch": 0.43, "learning_rate": 4.753157290470723e-07, "logits/chosen": -4.2888898849487305, "logits/rejected": -4.1478071212768555, "logps/chosen": -418.41351318359375, "logps/rejected": -319.01507568359375, "loss": 0.6938, "rewards/accuracies": 0.4984374940395355, "rewards/chosen": 0.0015372170601040125, "rewards/margins": -0.0005680068279616535, "rewards/rejected": 0.0021052241791039705, "step": 420 }, { "epoch": 0.44, "learning_rate": 4.7340221967087635e-07, "logits/chosen": -4.254868984222412, "logits/rejected": -4.136019706726074, "logps/chosen": -384.3445739746094, "logps/rejected": -295.01312255859375, "loss": 0.6948, "rewards/accuracies": 0.4828124940395355, "rewards/chosen": -0.0011382882948964834, "rewards/margins": -0.0026994033250957727, "rewards/rejected": 0.0015611147973686457, "step": 430 }, { "epoch": 0.45, "learning_rate": 4.714887102946804e-07, "logits/chosen": -4.2907233238220215, "logits/rejected": -4.132693290710449, "logps/chosen": -414.918212890625, "logps/rejected": -315.94573974609375, "loss": 0.6936, "rewards/accuracies": 0.4937500059604645, "rewards/chosen": 0.0015450514620169997, "rewards/margins": -0.0002846633142326027, "rewards/rejected": 0.001829715445637703, "step": 440 }, { "epoch": 0.46, "learning_rate": 4.6957520091848447e-07, "logits/chosen": -4.255721092224121, "logits/rejected": -4.1633100509643555, "logps/chosen": -400.84967041015625, "logps/rejected": -327.2038269042969, "loss": 0.6932, "rewards/accuracies": 0.515625, "rewards/chosen": -0.0014290885301306844, "rewards/margins": 0.0006102249026298523, "rewards/rejected": -0.0020393135491758585, "step": 450 }, { "epoch": 0.48, "learning_rate": 4.6766169154228853e-07, "logits/chosen": -4.247704029083252, "logits/rejected": -4.119411468505859, "logps/chosen": -394.64178466796875, "logps/rejected": -308.6086120605469, "loss": 0.6924, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": 0.0018646775279194117, "rewards/margins": 0.0020817045588046312, "rewards/rejected": -0.00021702758385799825, "step": 460 }, { "epoch": 0.49, "learning_rate": 4.657481821660926e-07, "logits/chosen": -4.270275592803955, "logits/rejected": -4.133027076721191, "logps/chosen": -419.8226623535156, "logps/rejected": -323.5798645019531, "loss": 0.6949, "rewards/accuracies": 0.49687498807907104, "rewards/chosen": -0.0003835520183201879, "rewards/margins": -0.00283462880179286, "rewards/rejected": 0.0024510768707841635, "step": 470 }, { "epoch": 0.5, "learning_rate": 4.6383467278989666e-07, "logits/chosen": -4.274611473083496, "logits/rejected": -4.136614799499512, "logps/chosen": -400.21673583984375, "logps/rejected": -314.0077209472656, "loss": 0.6929, "rewards/accuracies": 0.4828124940395355, "rewards/chosen": -0.00010184728307649493, "rewards/margins": 0.0012176515301689506, "rewards/rejected": -0.0013194989878684282, "step": 480 }, { "epoch": 0.51, "learning_rate": 4.6192116341370067e-07, "logits/chosen": -4.25107479095459, "logits/rejected": -4.151733875274658, "logps/chosen": -397.98016357421875, "logps/rejected": -308.0941467285156, "loss": 0.6939, "rewards/accuracies": 0.4828124940395355, "rewards/chosen": -0.0004709061176981777, "rewards/margins": -0.0007273858063854277, "rewards/rejected": 0.00025647960137575865, "step": 490 }, { "epoch": 0.52, "learning_rate": 4.6000765403750473e-07, "logits/chosen": -4.2503204345703125, "logits/rejected": -4.1171112060546875, "logps/chosen": -412.30450439453125, "logps/rejected": -309.49481201171875, "loss": 0.692, "rewards/accuracies": 0.5484374761581421, "rewards/chosen": 0.0027156358119100332, "rewards/margins": 0.003060466842725873, "rewards/rejected": -0.0003448307979851961, "step": 500 }, { "epoch": 0.53, "learning_rate": 4.580941446613088e-07, "logits/chosen": -4.279356002807617, "logits/rejected": -4.125931262969971, "logps/chosen": -409.6204528808594, "logps/rejected": -298.81329345703125, "loss": 0.6915, "rewards/accuracies": 0.5093749761581421, "rewards/chosen": 0.0007973018218763173, "rewards/margins": 0.003946124110370874, "rewards/rejected": -0.0031488225795328617, "step": 510 }, { "epoch": 0.54, "learning_rate": 4.5618063528511285e-07, "logits/chosen": -4.253719329833984, "logits/rejected": -4.118457794189453, "logps/chosen": -408.22882080078125, "logps/rejected": -319.6842346191406, "loss": 0.6914, "rewards/accuracies": 0.535937488079071, "rewards/chosen": 0.00437967898324132, "rewards/margins": 0.004070502705872059, "rewards/rejected": 0.00030917683034203947, "step": 520 }, { "epoch": 0.55, "learning_rate": 4.542671259089169e-07, "logits/chosen": -4.232258319854736, "logits/rejected": -4.110759258270264, "logps/chosen": -414.5962829589844, "logps/rejected": -323.87078857421875, "loss": 0.6919, "rewards/accuracies": 0.5546875, "rewards/chosen": 0.002581060165539384, "rewards/margins": 0.0032051261514425278, "rewards/rejected": -0.0006240661023184657, "step": 530 }, { "epoch": 0.56, "learning_rate": 4.52353616532721e-07, "logits/chosen": -4.25144624710083, "logits/rejected": -4.156504154205322, "logps/chosen": -395.7941589355469, "logps/rejected": -313.2027893066406, "loss": 0.6946, "rewards/accuracies": 0.4859375059604645, "rewards/chosen": 0.0006000929279252887, "rewards/margins": -0.0023031379096210003, "rewards/rejected": 0.0029032311867922544, "step": 540 }, { "epoch": 0.57, "learning_rate": 4.5044010715652504e-07, "logits/chosen": -4.2781877517700195, "logits/rejected": -4.152641296386719, "logps/chosen": -400.3907470703125, "logps/rejected": -323.36663818359375, "loss": 0.6939, "rewards/accuracies": 0.47968751192092896, "rewards/chosen": 0.0021930981893092394, "rewards/margins": -0.0007275763782672584, "rewards/rejected": 0.0029206746257841587, "step": 550 }, { "epoch": 0.58, "learning_rate": 4.485265977803291e-07, "logits/chosen": -4.2500386238098145, "logits/rejected": -4.107308387756348, "logps/chosen": -411.8309020996094, "logps/rejected": -319.95989990234375, "loss": 0.6914, "rewards/accuracies": 0.5218750238418579, "rewards/chosen": 0.0027205091901123524, "rewards/margins": 0.004122564569115639, "rewards/rejected": -0.0014020560774952173, "step": 560 }, { "epoch": 0.59, "learning_rate": 4.4661308840413316e-07, "logits/chosen": -4.307834625244141, "logits/rejected": -4.123293399810791, "logps/chosen": -412.34625244140625, "logps/rejected": -310.9077453613281, "loss": 0.6933, "rewards/accuracies": 0.512499988079071, "rewards/chosen": -0.00016926185344345868, "rewards/margins": 0.0005053894128650427, "rewards/rejected": -0.0006746514118276536, "step": 570 }, { "epoch": 0.6, "learning_rate": 4.446995790279372e-07, "logits/chosen": -4.272593021392822, "logits/rejected": -4.129204750061035, "logps/chosen": -411.84161376953125, "logps/rejected": -321.55072021484375, "loss": 0.6922, "rewards/accuracies": 0.5078125, "rewards/chosen": 0.003927945625036955, "rewards/margins": 0.0026295329444110394, "rewards/rejected": 0.0012984138447791338, "step": 580 }, { "epoch": 0.61, "learning_rate": 4.4278606965174123e-07, "logits/chosen": -4.280831813812256, "logits/rejected": -4.155787467956543, "logps/chosen": -408.56256103515625, "logps/rejected": -312.60577392578125, "loss": 0.6936, "rewards/accuracies": 0.4921875, "rewards/chosen": -9.327723819296807e-05, "rewards/margins": -0.00040280382381752133, "rewards/rejected": 0.0003095265128649771, "step": 590 }, { "epoch": 0.62, "learning_rate": 4.408725602755453e-07, "logits/chosen": -4.285837650299072, "logits/rejected": -4.156912803649902, "logps/chosen": -404.223876953125, "logps/rejected": -322.22894287109375, "loss": 0.6945, "rewards/accuracies": 0.4906249940395355, "rewards/chosen": 0.00110340875107795, "rewards/margins": -0.0020270957611501217, "rewards/rejected": 0.003130504861474037, "step": 600 }, { "epoch": 0.63, "learning_rate": 4.3895905089934936e-07, "logits/chosen": -4.270712852478027, "logits/rejected": -4.1485466957092285, "logps/chosen": -427.80010986328125, "logps/rejected": -334.1997375488281, "loss": 0.6933, "rewards/accuracies": 0.5093749761581421, "rewards/chosen": 0.0012709179427474737, "rewards/margins": 0.00037813876406289637, "rewards/rejected": 0.0008927792077884078, "step": 610 }, { "epoch": 0.64, "learning_rate": 4.370455415231534e-07, "logits/chosen": -4.2818217277526855, "logits/rejected": -4.141083240509033, "logps/chosen": -397.087646484375, "logps/rejected": -299.8023986816406, "loss": 0.6917, "rewards/accuracies": 0.520312488079071, "rewards/chosen": 0.0015098925214260817, "rewards/margins": 0.003647155361250043, "rewards/rejected": -0.0021372628398239613, "step": 620 }, { "epoch": 0.65, "learning_rate": 4.351320321469575e-07, "logits/chosen": -4.2828288078308105, "logits/rejected": -4.15810489654541, "logps/chosen": -408.72857666015625, "logps/rejected": -310.77764892578125, "loss": 0.6944, "rewards/accuracies": 0.4765625, "rewards/chosen": 0.00010033079888671637, "rewards/margins": -0.0017872953321784735, "rewards/rejected": 0.0018876262474805117, "step": 630 }, { "epoch": 0.66, "learning_rate": 4.3321852277076154e-07, "logits/chosen": -4.249554634094238, "logits/rejected": -4.136783599853516, "logps/chosen": -414.71624755859375, "logps/rejected": -330.455810546875, "loss": 0.6912, "rewards/accuracies": 0.578125, "rewards/chosen": 0.006457502953708172, "rewards/margins": 0.0046369172632694244, "rewards/rejected": 0.0018205851083621383, "step": 640 }, { "epoch": 0.67, "learning_rate": 4.313050133945656e-07, "logits/chosen": -4.281416893005371, "logits/rejected": -4.156689643859863, "logps/chosen": -419.05401611328125, "logps/rejected": -319.1665954589844, "loss": 0.6921, "rewards/accuracies": 0.528124988079071, "rewards/chosen": 0.0033603243064135313, "rewards/margins": 0.0027967148926109076, "rewards/rejected": 0.0005636097048409283, "step": 650 }, { "epoch": 0.68, "learning_rate": 4.2939150401836967e-07, "logits/chosen": -4.239119529724121, "logits/rejected": -4.116203308105469, "logps/chosen": -421.9933166503906, "logps/rejected": -327.5665588378906, "loss": 0.6935, "rewards/accuracies": 0.4906249940395355, "rewards/chosen": 0.0006358650280162692, "rewards/margins": -3.5358032619114965e-05, "rewards/rejected": 0.0006712229805998504, "step": 660 }, { "epoch": 0.69, "learning_rate": 4.2747799464217373e-07, "logits/chosen": -4.252030849456787, "logits/rejected": -4.138897895812988, "logps/chosen": -402.931884765625, "logps/rejected": -311.1913757324219, "loss": 0.6926, "rewards/accuracies": 0.4906249940395355, "rewards/chosen": 0.003976965788751841, "rewards/margins": 0.0018042316660284996, "rewards/rejected": 0.002172734122723341, "step": 670 }, { "epoch": 0.7, "learning_rate": 4.255644852659778e-07, "logits/chosen": -4.269718170166016, "logits/rejected": -4.140122413635254, "logps/chosen": -402.2257385253906, "logps/rejected": -318.7126770019531, "loss": 0.6928, "rewards/accuracies": 0.515625, "rewards/chosen": 0.00315951113589108, "rewards/margins": 0.0014611692167818546, "rewards/rejected": 0.0016983415698632598, "step": 680 }, { "epoch": 0.71, "learning_rate": 4.236509758897818e-07, "logits/chosen": -4.262238025665283, "logits/rejected": -4.1432600021362305, "logps/chosen": -405.7474670410156, "logps/rejected": -317.77447509765625, "loss": 0.6911, "rewards/accuracies": 0.510937511920929, "rewards/chosen": 0.005235121585428715, "rewards/margins": 0.004777342546731234, "rewards/rejected": 0.00045777950435876846, "step": 690 }, { "epoch": 0.72, "learning_rate": 4.2173746651358586e-07, "logits/chosen": -4.308034420013428, "logits/rejected": -4.176297187805176, "logps/chosen": -417.35626220703125, "logps/rejected": -313.90887451171875, "loss": 0.6918, "rewards/accuracies": 0.520312488079071, "rewards/chosen": 0.0032903787214308977, "rewards/margins": 0.003389782505109906, "rewards/rejected": -9.9403434433043e-05, "step": 700 }, { "epoch": 0.73, "learning_rate": 4.198239571373899e-07, "logits/chosen": -4.285686492919922, "logits/rejected": -4.153790473937988, "logps/chosen": -423.37127685546875, "logps/rejected": -331.2875671386719, "loss": 0.6909, "rewards/accuracies": 0.542187511920929, "rewards/chosen": 0.005327778868377209, "rewards/margins": 0.005328441970050335, "rewards/rejected": -6.637536102971353e-07, "step": 710 }, { "epoch": 0.74, "learning_rate": 4.17910447761194e-07, "logits/chosen": -4.279539108276367, "logits/rejected": -4.177431106567383, "logps/chosen": -379.5310363769531, "logps/rejected": -300.36602783203125, "loss": 0.6924, "rewards/accuracies": 0.510937511920929, "rewards/chosen": 0.0025111553259193897, "rewards/margins": 0.0021530953235924244, "rewards/rejected": 0.00035806017694994807, "step": 720 }, { "epoch": 0.75, "learning_rate": 4.1599693838499805e-07, "logits/chosen": -4.239541530609131, "logits/rejected": -4.1288628578186035, "logps/chosen": -375.0932312011719, "logps/rejected": -298.7073974609375, "loss": 0.6934, "rewards/accuracies": 0.48750001192092896, "rewards/chosen": 0.0017489530146121979, "rewards/margins": 0.0001466287358198315, "rewards/rejected": 0.0016023240750655532, "step": 730 }, { "epoch": 0.76, "learning_rate": 4.140834290088021e-07, "logits/chosen": -4.258530616760254, "logits/rejected": -4.131335258483887, "logps/chosen": -378.4256591796875, "logps/rejected": -288.19842529296875, "loss": 0.6939, "rewards/accuracies": 0.49531251192092896, "rewards/chosen": 0.0009945884812623262, "rewards/margins": -0.0008082139538601041, "rewards/rejected": 0.0018028020858764648, "step": 740 }, { "epoch": 0.77, "learning_rate": 4.121699196326062e-07, "logits/chosen": -4.268132209777832, "logits/rejected": -4.133342742919922, "logps/chosen": -402.5184631347656, "logps/rejected": -310.98651123046875, "loss": 0.6942, "rewards/accuracies": 0.46875, "rewards/chosen": 0.0009071314707398415, "rewards/margins": -0.001490770373493433, "rewards/rejected": 0.0023979023098945618, "step": 750 }, { "epoch": 0.78, "learning_rate": 4.1025641025641024e-07, "logits/chosen": -4.277812480926514, "logits/rejected": -4.182877063751221, "logps/chosen": -408.18048095703125, "logps/rejected": -319.3320617675781, "loss": 0.6925, "rewards/accuracies": 0.515625, "rewards/chosen": 0.004439138807356358, "rewards/margins": 0.0019760008435696363, "rewards/rejected": 0.0024631377309560776, "step": 760 }, { "epoch": 0.8, "learning_rate": 4.083429008802143e-07, "logits/chosen": -4.280663967132568, "logits/rejected": -4.152825355529785, "logps/chosen": -401.7829895019531, "logps/rejected": -306.715576171875, "loss": 0.6928, "rewards/accuracies": 0.5, "rewards/chosen": 0.004907527472823858, "rewards/margins": 0.00137388426810503, "rewards/rejected": 0.003533643204718828, "step": 770 }, { "epoch": 0.81, "learning_rate": 4.0642939150401836e-07, "logits/chosen": -4.289612770080566, "logits/rejected": -4.144918918609619, "logps/chosen": -399.4092712402344, "logps/rejected": -304.46209716796875, "loss": 0.6928, "rewards/accuracies": 0.520312488079071, "rewards/chosen": 0.002732563531026244, "rewards/margins": 0.0014565556775778532, "rewards/rejected": 0.0012760077370330691, "step": 780 }, { "epoch": 0.82, "learning_rate": 4.0451588212782237e-07, "logits/chosen": -4.240975856781006, "logits/rejected": -4.128232479095459, "logps/chosen": -413.4056701660156, "logps/rejected": -328.46240234375, "loss": 0.6923, "rewards/accuracies": 0.526562511920929, "rewards/chosen": 0.003439761698246002, "rewards/margins": 0.0025056053418666124, "rewards/rejected": 0.0009341565892100334, "step": 790 }, { "epoch": 0.83, "learning_rate": 4.0260237275162643e-07, "logits/chosen": -4.260972023010254, "logits/rejected": -4.160162448883057, "logps/chosen": -404.38824462890625, "logps/rejected": -330.4800109863281, "loss": 0.6938, "rewards/accuracies": 0.5015624761581421, "rewards/chosen": 0.0018609057879075408, "rewards/margins": -0.000567351933568716, "rewards/rejected": 0.0024282578378915787, "step": 800 }, { "epoch": 0.84, "learning_rate": 4.006888633754305e-07, "logits/chosen": -4.292420387268066, "logits/rejected": -4.155418395996094, "logps/chosen": -402.5275573730469, "logps/rejected": -311.781982421875, "loss": 0.6924, "rewards/accuracies": 0.520312488079071, "rewards/chosen": 0.0035346276126801968, "rewards/margins": 0.002268751384690404, "rewards/rejected": 0.0012658759951591492, "step": 810 }, { "epoch": 0.85, "learning_rate": 3.9877535399923456e-07, "logits/chosen": -4.276908874511719, "logits/rejected": -4.151911735534668, "logps/chosen": -408.8199768066406, "logps/rejected": -321.52215576171875, "loss": 0.693, "rewards/accuracies": 0.520312488079071, "rewards/chosen": 0.00492482166737318, "rewards/margins": 0.0010390502866357565, "rewards/rejected": 0.003885771380737424, "step": 820 }, { "epoch": 0.86, "learning_rate": 3.968618446230386e-07, "logits/chosen": -4.282100677490234, "logits/rejected": -4.158295631408691, "logps/chosen": -399.35430908203125, "logps/rejected": -316.38165283203125, "loss": 0.6928, "rewards/accuracies": 0.510937511920929, "rewards/chosen": 0.0036009408067911863, "rewards/margins": 0.0014611782971769571, "rewards/rejected": 0.002139762043952942, "step": 830 }, { "epoch": 0.87, "learning_rate": 3.949483352468427e-07, "logits/chosen": -4.265560150146484, "logits/rejected": -4.16861629486084, "logps/chosen": -384.11151123046875, "logps/rejected": -317.0963134765625, "loss": 0.6941, "rewards/accuracies": 0.49531251192092896, "rewards/chosen": 0.0022504080552607775, "rewards/margins": -0.001352280960418284, "rewards/rejected": 0.0036026891320943832, "step": 840 }, { "epoch": 0.88, "learning_rate": 3.9303482587064674e-07, "logits/chosen": -4.278510093688965, "logits/rejected": -4.114365577697754, "logps/chosen": -409.4468688964844, "logps/rejected": -319.8404541015625, "loss": 0.6941, "rewards/accuracies": 0.4937500059604645, "rewards/chosen": 0.0014671286335214972, "rewards/margins": -0.001252708025276661, "rewards/rejected": 0.0027198365423828363, "step": 850 }, { "epoch": 0.89, "learning_rate": 3.911213164944508e-07, "logits/chosen": -4.275809288024902, "logits/rejected": -4.195669651031494, "logps/chosen": -395.3207702636719, "logps/rejected": -331.13372802734375, "loss": 0.6927, "rewards/accuracies": 0.510937511920929, "rewards/chosen": 0.0037080198526382446, "rewards/margins": 0.0016179044032469392, "rewards/rejected": 0.002090116497129202, "step": 860 }, { "epoch": 0.9, "learning_rate": 3.8920780711825487e-07, "logits/chosen": -4.255551815032959, "logits/rejected": -4.139596462249756, "logps/chosen": -402.43768310546875, "logps/rejected": -321.7333984375, "loss": 0.692, "rewards/accuracies": 0.53125, "rewards/chosen": 0.0047720326110720634, "rewards/margins": 0.003143607871606946, "rewards/rejected": 0.0016284246230497956, "step": 870 }, { "epoch": 0.91, "learning_rate": 3.8729429774205893e-07, "logits/chosen": -4.2646284103393555, "logits/rejected": -4.149605751037598, "logps/chosen": -387.3090515136719, "logps/rejected": -301.6280212402344, "loss": 0.6916, "rewards/accuracies": 0.5062500238418579, "rewards/chosen": 0.004609361290931702, "rewards/margins": 0.0038493976462632418, "rewards/rejected": 0.0007599632954224944, "step": 880 }, { "epoch": 0.92, "learning_rate": 3.8538078836586294e-07, "logits/chosen": -4.2500715255737305, "logits/rejected": -4.115664958953857, "logps/chosen": -425.0071716308594, "logps/rejected": -319.0146484375, "loss": 0.6913, "rewards/accuracies": 0.5140625238418579, "rewards/chosen": 0.004975964780896902, "rewards/margins": 0.0043433718383312225, "rewards/rejected": 0.0006325935246422887, "step": 890 }, { "epoch": 0.93, "learning_rate": 3.83467278989667e-07, "logits/chosen": -4.254255294799805, "logits/rejected": -4.149744510650635, "logps/chosen": -409.4825134277344, "logps/rejected": -328.2546691894531, "loss": 0.693, "rewards/accuracies": 0.518750011920929, "rewards/chosen": 0.0023609010968357325, "rewards/margins": 0.0011571452487260103, "rewards/rejected": 0.0012037558481097221, "step": 900 }, { "epoch": 0.94, "learning_rate": 3.8155376961347106e-07, "logits/chosen": -4.244211673736572, "logits/rejected": -4.126115322113037, "logps/chosen": -405.63763427734375, "logps/rejected": -319.128662109375, "loss": 0.6936, "rewards/accuracies": 0.4984374940395355, "rewards/chosen": 0.002668022643774748, "rewards/margins": -0.00015769092715345323, "rewards/rejected": 0.0028257134836167097, "step": 910 }, { "epoch": 0.95, "learning_rate": 3.796402602372751e-07, "logits/chosen": -4.292696952819824, "logits/rejected": -4.156879425048828, "logps/chosen": -415.4306640625, "logps/rejected": -324.4388732910156, "loss": 0.6919, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.0032460917718708515, "rewards/margins": 0.0033036619424819946, "rewards/rejected": -5.7570869103074074e-05, "step": 920 }, { "epoch": 0.96, "learning_rate": 3.777267508610792e-07, "logits/chosen": -4.28294038772583, "logits/rejected": -4.117633819580078, "logps/chosen": -415.66290283203125, "logps/rejected": -305.69390869140625, "loss": 0.6919, "rewards/accuracies": 0.5296875238418579, "rewards/chosen": 0.004189764615148306, "rewards/margins": 0.0031828763894736767, "rewards/rejected": 0.0010068879928439856, "step": 930 }, { "epoch": 0.97, "learning_rate": 3.7581324148488325e-07, "logits/chosen": -4.260636806488037, "logits/rejected": -4.170042037963867, "logps/chosen": -407.6413879394531, "logps/rejected": -327.45574951171875, "loss": 0.6895, "rewards/accuracies": 0.5765625238418579, "rewards/chosen": 0.00892933551222086, "rewards/margins": 0.008162255398929119, "rewards/rejected": 0.0007670802297070622, "step": 940 }, { "epoch": 0.98, "learning_rate": 3.738997321086873e-07, "logits/chosen": -4.2640604972839355, "logits/rejected": -4.171642303466797, "logps/chosen": -400.89373779296875, "logps/rejected": -322.080078125, "loss": 0.6924, "rewards/accuracies": 0.503125011920929, "rewards/chosen": 0.00451917527243495, "rewards/margins": 0.0021246224641799927, "rewards/rejected": 0.002394552808254957, "step": 950 }, { "epoch": 0.99, "learning_rate": 3.7198622273249137e-07, "logits/chosen": -4.275049209594727, "logits/rejected": -4.145798683166504, "logps/chosen": -429.56634521484375, "logps/rejected": -331.715576171875, "loss": 0.6916, "rewards/accuracies": 0.526562511920929, "rewards/chosen": 0.0037804250605404377, "rewards/margins": 0.003932067193090916, "rewards/rejected": -0.00015164251090027392, "step": 960 }, { "epoch": 1.0, "eval_logits/chosen": -4.191330432891846, "eval_logits/rejected": -4.081260681152344, "eval_logps/chosen": -402.61639404296875, "eval_logps/rejected": -315.7343444824219, "eval_loss": 0.6920775771141052, "eval_rewards/accuracies": 0.5070000290870667, "eval_rewards/chosen": 0.003913247026503086, "eval_rewards/margins": 0.002829314675182104, "eval_rewards/rejected": 0.0010839327005669475, "eval_runtime": 762.8033, "eval_samples_per_second": 2.622, "eval_steps_per_second": 0.655, "step": 968 }, { "epoch": 1.0, "learning_rate": 3.7007271335629544e-07, "logits/chosen": -4.291719436645508, "logits/rejected": -4.159844875335693, "logps/chosen": -410.930419921875, "logps/rejected": -320.3424377441406, "loss": 0.6935, "rewards/accuracies": 0.484375, "rewards/chosen": 0.004596616607159376, "rewards/margins": 3.339797694934532e-05, "rewards/rejected": 0.004563218913972378, "step": 970 }, { "epoch": 1.01, "learning_rate": 3.681592039800995e-07, "logits/chosen": -4.274192810058594, "logits/rejected": -4.13837194442749, "logps/chosen": -401.00213623046875, "logps/rejected": -310.458251953125, "loss": 0.6907, "rewards/accuracies": 0.5453125238418579, "rewards/chosen": 0.0076770298182964325, "rewards/margins": 0.005642565432935953, "rewards/rejected": 0.0020344643853604794, "step": 980 }, { "epoch": 1.02, "learning_rate": 3.662456946039035e-07, "logits/chosen": -4.276472091674805, "logits/rejected": -4.1441144943237305, "logps/chosen": -413.98760986328125, "logps/rejected": -310.71051025390625, "loss": 0.6912, "rewards/accuracies": 0.528124988079071, "rewards/chosen": 0.010534636676311493, "rewards/margins": 0.00471758097410202, "rewards/rejected": 0.005817054770886898, "step": 990 }, { "epoch": 1.03, "learning_rate": 3.6433218522770757e-07, "logits/chosen": -4.2762131690979, "logits/rejected": -4.135240077972412, "logps/chosen": -420.061767578125, "logps/rejected": -330.8781433105469, "loss": 0.6936, "rewards/accuracies": 0.503125011920929, "rewards/chosen": 0.00425821915268898, "rewards/margins": -0.00019165253615938127, "rewards/rejected": 0.004449871368706226, "step": 1000 }, { "epoch": 1.04, "learning_rate": 3.6241867585151163e-07, "logits/chosen": -4.270270824432373, "logits/rejected": -4.130080223083496, "logps/chosen": -392.8804626464844, "logps/rejected": -304.02294921875, "loss": 0.692, "rewards/accuracies": 0.534375011920929, "rewards/chosen": 0.004289795644581318, "rewards/margins": 0.0029843891970813274, "rewards/rejected": 0.0013054062146693468, "step": 1010 }, { "epoch": 1.05, "learning_rate": 3.605051664753157e-07, "logits/chosen": -4.268284797668457, "logits/rejected": -4.145885944366455, "logps/chosen": -408.9548645019531, "logps/rejected": -317.30560302734375, "loss": 0.692, "rewards/accuracies": 0.510937511920929, "rewards/chosen": 0.007414447609335184, "rewards/margins": 0.0030614163260906935, "rewards/rejected": 0.004353031050413847, "step": 1020 }, { "epoch": 1.06, "learning_rate": 3.5859165709911975e-07, "logits/chosen": -4.300627708435059, "logits/rejected": -4.120467185974121, "logps/chosen": -422.968994140625, "logps/rejected": -307.66473388671875, "loss": 0.6929, "rewards/accuracies": 0.5, "rewards/chosen": 0.00493131997063756, "rewards/margins": 0.0012998055899515748, "rewards/rejected": 0.0036315140314400196, "step": 1030 }, { "epoch": 1.07, "learning_rate": 3.566781477229238e-07, "logits/chosen": -4.260711669921875, "logits/rejected": -4.147767066955566, "logps/chosen": -389.6875305175781, "logps/rejected": -305.23516845703125, "loss": 0.692, "rewards/accuracies": 0.543749988079071, "rewards/chosen": 0.005956425331532955, "rewards/margins": 0.002882971428334713, "rewards/rejected": 0.003073454136028886, "step": 1040 }, { "epoch": 1.08, "learning_rate": 3.547646383467279e-07, "logits/chosen": -4.267011642456055, "logits/rejected": -4.122623443603516, "logps/chosen": -403.59942626953125, "logps/rejected": -312.45770263671875, "loss": 0.6922, "rewards/accuracies": 0.515625, "rewards/chosen": 0.006427975837141275, "rewards/margins": 0.0026332822162657976, "rewards/rejected": 0.0037946938537061214, "step": 1050 }, { "epoch": 1.09, "learning_rate": 3.5285112897053194e-07, "logits/chosen": -4.294173717498779, "logits/rejected": -4.156063556671143, "logps/chosen": -417.2015686035156, "logps/rejected": -329.7525634765625, "loss": 0.69, "rewards/accuracies": 0.5453125238418579, "rewards/chosen": 0.009924950078129768, "rewards/margins": 0.0070407153107225895, "rewards/rejected": 0.002884234767407179, "step": 1060 }, { "epoch": 1.11, "learning_rate": 3.50937619594336e-07, "logits/chosen": -4.2720513343811035, "logits/rejected": -4.118578910827637, "logps/chosen": -408.00701904296875, "logps/rejected": -299.61981201171875, "loss": 0.6922, "rewards/accuracies": 0.504687488079071, "rewards/chosen": 0.0056653618812561035, "rewards/margins": 0.0025350514333695173, "rewards/rejected": 0.003130309982225299, "step": 1070 }, { "epoch": 1.12, "learning_rate": 3.4902411021814007e-07, "logits/chosen": -4.258962154388428, "logits/rejected": -4.1505255699157715, "logps/chosen": -391.7621154785156, "logps/rejected": -303.1611022949219, "loss": 0.6911, "rewards/accuracies": 0.520312488079071, "rewards/chosen": 0.007706860546022654, "rewards/margins": 0.004729891195893288, "rewards/rejected": 0.002976970048621297, "step": 1080 }, { "epoch": 1.13, "learning_rate": 3.4711060084194413e-07, "logits/chosen": -4.25510311126709, "logits/rejected": -4.135909080505371, "logps/chosen": -400.17205810546875, "logps/rejected": -307.4388427734375, "loss": 0.6903, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": 0.00958174280822277, "rewards/margins": 0.006473850458860397, "rewards/rejected": 0.0031078937463462353, "step": 1090 }, { "epoch": 1.14, "learning_rate": 3.4519709146574814e-07, "logits/chosen": -4.276521682739258, "logits/rejected": -4.158177852630615, "logps/chosen": -395.39215087890625, "logps/rejected": -310.30010986328125, "loss": 0.6905, "rewards/accuracies": 0.5140625238418579, "rewards/chosen": 0.009448185563087463, "rewards/margins": 0.005911382380872965, "rewards/rejected": 0.003536803647875786, "step": 1100 }, { "epoch": 1.15, "learning_rate": 3.432835820895522e-07, "logits/chosen": -4.291528224945068, "logits/rejected": -4.155767917633057, "logps/chosen": -423.88177490234375, "logps/rejected": -330.54730224609375, "loss": 0.6903, "rewards/accuracies": 0.567187488079071, "rewards/chosen": 0.009671617299318314, "rewards/margins": 0.0064759948290884495, "rewards/rejected": 0.003195622470229864, "step": 1110 }, { "epoch": 1.16, "learning_rate": 3.4137007271335626e-07, "logits/chosen": -4.27802848815918, "logits/rejected": -4.153050422668457, "logps/chosen": -406.92523193359375, "logps/rejected": -305.8375244140625, "loss": 0.6921, "rewards/accuracies": 0.518750011920929, "rewards/chosen": 0.007676907815039158, "rewards/margins": 0.0026743696071207523, "rewards/rejected": 0.0050025382079184055, "step": 1120 }, { "epoch": 1.17, "learning_rate": 3.394565633371603e-07, "logits/chosen": -4.263790607452393, "logits/rejected": -4.140618801116943, "logps/chosen": -417.0517578125, "logps/rejected": -325.2332763671875, "loss": 0.6915, "rewards/accuracies": 0.5453125238418579, "rewards/chosen": 0.007303275167942047, "rewards/margins": 0.004130188841372728, "rewards/rejected": 0.0031730863265693188, "step": 1130 }, { "epoch": 1.18, "learning_rate": 3.375430539609644e-07, "logits/chosen": -4.309510707855225, "logits/rejected": -4.183161735534668, "logps/chosen": -388.7647705078125, "logps/rejected": -309.83673095703125, "loss": 0.6904, "rewards/accuracies": 0.526562511920929, "rewards/chosen": 0.009641969576478004, "rewards/margins": 0.006214521359652281, "rewards/rejected": 0.0034274482168257236, "step": 1140 }, { "epoch": 1.19, "learning_rate": 3.3562954458476845e-07, "logits/chosen": -4.253263473510742, "logits/rejected": -4.152525424957275, "logps/chosen": -387.7099609375, "logps/rejected": -302.3329162597656, "loss": 0.6918, "rewards/accuracies": 0.510937511920929, "rewards/chosen": 0.0074900491163134575, "rewards/margins": 0.0033420673571527004, "rewards/rejected": 0.00414798129349947, "step": 1150 }, { "epoch": 1.2, "learning_rate": 3.337160352085725e-07, "logits/chosen": -4.244006156921387, "logits/rejected": -4.125102996826172, "logps/chosen": -400.85107421875, "logps/rejected": -304.1709289550781, "loss": 0.6923, "rewards/accuracies": 0.5406249761581421, "rewards/chosen": 0.007216416299343109, "rewards/margins": 0.002480756724253297, "rewards/rejected": 0.004735658876597881, "step": 1160 }, { "epoch": 1.21, "learning_rate": 3.3180252583237657e-07, "logits/chosen": -4.282382488250732, "logits/rejected": -4.156040191650391, "logps/chosen": -409.8832092285156, "logps/rejected": -317.5098571777344, "loss": 0.6925, "rewards/accuracies": 0.5062500238418579, "rewards/chosen": 0.008845189586281776, "rewards/margins": 0.0021160345058888197, "rewards/rejected": 0.006729154847562313, "step": 1170 }, { "epoch": 1.22, "learning_rate": 3.2988901645618063e-07, "logits/chosen": -4.294495582580566, "logits/rejected": -4.211082458496094, "logps/chosen": -398.4643249511719, "logps/rejected": -337.3882141113281, "loss": 0.6929, "rewards/accuracies": 0.5, "rewards/chosen": 0.005116584710776806, "rewards/margins": 0.0013333541573956609, "rewards/rejected": 0.003783230436965823, "step": 1180 }, { "epoch": 1.23, "learning_rate": 3.279755070799847e-07, "logits/chosen": -4.265214443206787, "logits/rejected": -4.16311502456665, "logps/chosen": -387.9737243652344, "logps/rejected": -306.5306091308594, "loss": 0.6923, "rewards/accuracies": 0.5015624761581421, "rewards/chosen": 0.005055157467722893, "rewards/margins": 0.0023398033808916807, "rewards/rejected": 0.002715354086831212, "step": 1190 }, { "epoch": 1.24, "learning_rate": 3.260619977037887e-07, "logits/chosen": -4.283998489379883, "logits/rejected": -4.149415016174316, "logps/chosen": -393.43609619140625, "logps/rejected": -300.15667724609375, "loss": 0.6914, "rewards/accuracies": 0.520312488079071, "rewards/chosen": 0.007636996451765299, "rewards/margins": 0.004209595732390881, "rewards/rejected": 0.0034273997880518436, "step": 1200 }, { "epoch": 1.25, "learning_rate": 3.2414848832759277e-07, "logits/chosen": -4.261348724365234, "logits/rejected": -4.140153884887695, "logps/chosen": -393.04046630859375, "logps/rejected": -311.3672180175781, "loss": 0.6917, "rewards/accuracies": 0.520312488079071, "rewards/chosen": 0.008078986778855324, "rewards/margins": 0.003632976207882166, "rewards/rejected": 0.0044460115022957325, "step": 1210 }, { "epoch": 1.26, "learning_rate": 3.2223497895139683e-07, "logits/chosen": -4.243834972381592, "logits/rejected": -4.141684055328369, "logps/chosen": -408.1537170410156, "logps/rejected": -314.95404052734375, "loss": 0.6917, "rewards/accuracies": 0.520312488079071, "rewards/chosen": 0.009249814786016941, "rewards/margins": 0.0036557712592184544, "rewards/rejected": 0.005594043061137199, "step": 1220 }, { "epoch": 1.27, "learning_rate": 3.203214695752009e-07, "logits/chosen": -4.290652275085449, "logits/rejected": -4.172784328460693, "logps/chosen": -404.22247314453125, "logps/rejected": -311.638916015625, "loss": 0.6926, "rewards/accuracies": 0.5140625238418579, "rewards/chosen": 0.008968379348516464, "rewards/margins": 0.001797800650820136, "rewards/rejected": 0.007170577999204397, "step": 1230 }, { "epoch": 1.28, "learning_rate": 3.1840796019900495e-07, "logits/chosen": -4.221989631652832, "logits/rejected": -4.126450061798096, "logps/chosen": -362.48773193359375, "logps/rejected": -310.7374572753906, "loss": 0.691, "rewards/accuracies": 0.5484374761581421, "rewards/chosen": 0.007350596599280834, "rewards/margins": 0.004948228131979704, "rewards/rejected": 0.0024023696314543486, "step": 1240 }, { "epoch": 1.29, "learning_rate": 3.16494450822809e-07, "logits/chosen": -4.273645877838135, "logits/rejected": -4.147796630859375, "logps/chosen": -397.3808288574219, "logps/rejected": -298.5054626464844, "loss": 0.6905, "rewards/accuracies": 0.5296875238418579, "rewards/chosen": 0.008822308853268623, "rewards/margins": 0.0060681127943098545, "rewards/rejected": 0.0027541951276361942, "step": 1250 }, { "epoch": 1.3, "learning_rate": 3.145809414466131e-07, "logits/chosen": -4.290090560913086, "logits/rejected": -4.17457389831543, "logps/chosen": -405.3314208984375, "logps/rejected": -332.5986633300781, "loss": 0.6905, "rewards/accuracies": 0.5484374761581421, "rewards/chosen": 0.008818728849291801, "rewards/margins": 0.0060805464163422585, "rewards/rejected": 0.0027381826657801867, "step": 1260 }, { "epoch": 1.31, "learning_rate": 3.1266743207041714e-07, "logits/chosen": -4.275857448577881, "logits/rejected": -4.184884548187256, "logps/chosen": -413.36260986328125, "logps/rejected": -335.1319274902344, "loss": 0.6903, "rewards/accuracies": 0.528124988079071, "rewards/chosen": 0.011068764142692089, "rewards/margins": 0.006428800523281097, "rewards/rejected": 0.004639963153749704, "step": 1270 }, { "epoch": 1.32, "learning_rate": 3.107539226942212e-07, "logits/chosen": -4.262097358703613, "logits/rejected": -4.138064384460449, "logps/chosen": -420.592041015625, "logps/rejected": -336.1610107421875, "loss": 0.6921, "rewards/accuracies": 0.551562488079071, "rewards/chosen": 0.008383669890463352, "rewards/margins": 0.0029027739074081182, "rewards/rejected": 0.00548089575022459, "step": 1280 }, { "epoch": 1.33, "learning_rate": 3.0884041331802526e-07, "logits/chosen": -4.273635387420654, "logits/rejected": -4.136135578155518, "logps/chosen": -419.8043518066406, "logps/rejected": -326.3934631347656, "loss": 0.691, "rewards/accuracies": 0.5406249761581421, "rewards/chosen": 0.008629587478935719, "rewards/margins": 0.0051121762953698635, "rewards/rejected": 0.003517411183565855, "step": 1290 }, { "epoch": 1.34, "learning_rate": 3.0692690394182927e-07, "logits/chosen": -4.25814962387085, "logits/rejected": -4.133566856384277, "logps/chosen": -388.8675231933594, "logps/rejected": -307.92169189453125, "loss": 0.6903, "rewards/accuracies": 0.5296875238418579, "rewards/chosen": 0.010921096429228783, "rewards/margins": 0.0063910940662026405, "rewards/rejected": 0.004530002363026142, "step": 1300 }, { "epoch": 1.35, "learning_rate": 3.0501339456563334e-07, "logits/chosen": -4.258055210113525, "logits/rejected": -4.142639636993408, "logps/chosen": -417.714599609375, "logps/rejected": -331.1921691894531, "loss": 0.6899, "rewards/accuracies": 0.565625011920929, "rewards/chosen": 0.011006483808159828, "rewards/margins": 0.007266665808856487, "rewards/rejected": 0.0037398170679807663, "step": 1310 }, { "epoch": 1.36, "learning_rate": 3.030998851894374e-07, "logits/chosen": -4.290154457092285, "logits/rejected": -4.134154796600342, "logps/chosen": -419.7810974121094, "logps/rejected": -316.3617248535156, "loss": 0.6899, "rewards/accuracies": 0.5484374761581421, "rewards/chosen": 0.012462007813155651, "rewards/margins": 0.0071654594503343105, "rewards/rejected": 0.005296547897160053, "step": 1320 }, { "epoch": 1.37, "learning_rate": 3.0118637581324146e-07, "logits/chosen": -4.25482702255249, "logits/rejected": -4.102695941925049, "logps/chosen": -426.0753479003906, "logps/rejected": -321.2982177734375, "loss": 0.6922, "rewards/accuracies": 0.5093749761581421, "rewards/chosen": 0.00866289995610714, "rewards/margins": 0.0026752217672765255, "rewards/rejected": 0.005987677723169327, "step": 1330 }, { "epoch": 1.38, "learning_rate": 2.992728664370455e-07, "logits/chosen": -4.27578067779541, "logits/rejected": -4.178536415100098, "logps/chosen": -399.89422607421875, "logps/rejected": -309.16204833984375, "loss": 0.6921, "rewards/accuracies": 0.5234375, "rewards/chosen": 0.0074403537437319756, "rewards/margins": 0.0027673656586557627, "rewards/rejected": 0.004672987386584282, "step": 1340 }, { "epoch": 1.39, "learning_rate": 2.973593570608496e-07, "logits/chosen": -4.252071380615234, "logits/rejected": -4.123923301696777, "logps/chosen": -402.0321044921875, "logps/rejected": -311.18463134765625, "loss": 0.6917, "rewards/accuracies": 0.535937488079071, "rewards/chosen": 0.007632538676261902, "rewards/margins": 0.003616305533796549, "rewards/rejected": 0.004016232676804066, "step": 1350 }, { "epoch": 1.4, "learning_rate": 2.9544584768465365e-07, "logits/chosen": -4.282023906707764, "logits/rejected": -4.184301853179932, "logps/chosen": -389.57904052734375, "logps/rejected": -322.18389892578125, "loss": 0.6926, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": 0.00903762225061655, "rewards/margins": 0.0019117307383567095, "rewards/rejected": 0.007125890348106623, "step": 1360 }, { "epoch": 1.41, "learning_rate": 2.935323383084577e-07, "logits/chosen": -4.287262916564941, "logits/rejected": -4.1361284255981445, "logps/chosen": -420.2037658691406, "logps/rejected": -316.2614440917969, "loss": 0.6909, "rewards/accuracies": 0.551562488079071, "rewards/chosen": 0.009709215722978115, "rewards/margins": 0.005225184373557568, "rewards/rejected": 0.0044840313494205475, "step": 1370 }, { "epoch": 1.43, "learning_rate": 2.9161882893226177e-07, "logits/chosen": -4.279843807220459, "logits/rejected": -4.154780864715576, "logps/chosen": -405.2857971191406, "logps/rejected": -318.71368408203125, "loss": 0.6921, "rewards/accuracies": 0.5218750238418579, "rewards/chosen": 0.007445839233696461, "rewards/margins": 0.0028315638191998005, "rewards/rejected": 0.00461427541449666, "step": 1380 }, { "epoch": 1.44, "learning_rate": 2.8970531955606583e-07, "logits/chosen": -4.297855377197266, "logits/rejected": -4.1429829597473145, "logps/chosen": -411.401123046875, "logps/rejected": -309.50469970703125, "loss": 0.6899, "rewards/accuracies": 0.534375011920929, "rewards/chosen": 0.010837659239768982, "rewards/margins": 0.007386946585029364, "rewards/rejected": 0.003450712887570262, "step": 1390 }, { "epoch": 1.45, "learning_rate": 2.8779181017986984e-07, "logits/chosen": -4.270615577697754, "logits/rejected": -4.156781196594238, "logps/chosen": -402.6678161621094, "logps/rejected": -313.5867614746094, "loss": 0.6915, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.012565011158585548, "rewards/margins": 0.004020330961793661, "rewards/rejected": 0.008544680662453175, "step": 1400 }, { "epoch": 1.46, "learning_rate": 2.858783008036739e-07, "logits/chosen": -4.293181896209717, "logits/rejected": -4.163121223449707, "logps/chosen": -383.950927734375, "logps/rejected": -298.0428161621094, "loss": 0.6915, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.009886051528155804, "rewards/margins": 0.004015837796032429, "rewards/rejected": 0.0058702146634459496, "step": 1410 }, { "epoch": 1.47, "learning_rate": 2.8396479142747797e-07, "logits/chosen": -4.2656683921813965, "logits/rejected": -4.139580249786377, "logps/chosen": -408.76361083984375, "logps/rejected": -313.99298095703125, "loss": 0.6905, "rewards/accuracies": 0.543749988079071, "rewards/chosen": 0.011651566252112389, "rewards/margins": 0.006123474799096584, "rewards/rejected": 0.005528091918677092, "step": 1420 }, { "epoch": 1.48, "learning_rate": 2.8205128205128203e-07, "logits/chosen": -4.313319206237793, "logits/rejected": -4.1831374168396, "logps/chosen": -416.11773681640625, "logps/rejected": -317.92279052734375, "loss": 0.6906, "rewards/accuracies": 0.534375011920929, "rewards/chosen": 0.01188584603369236, "rewards/margins": 0.005925232544541359, "rewards/rejected": 0.005960613489151001, "step": 1430 }, { "epoch": 1.49, "learning_rate": 2.801377726750861e-07, "logits/chosen": -4.274300575256348, "logits/rejected": -4.167074203491211, "logps/chosen": -395.77117919921875, "logps/rejected": -322.4521179199219, "loss": 0.6898, "rewards/accuracies": 0.5796874761581421, "rewards/chosen": 0.013639995828270912, "rewards/margins": 0.007328096777200699, "rewards/rejected": 0.006311898585408926, "step": 1440 }, { "epoch": 1.5, "learning_rate": 2.7822426329889015e-07, "logits/chosen": -4.246912956237793, "logits/rejected": -4.1353559494018555, "logps/chosen": -402.06451416015625, "logps/rejected": -326.43328857421875, "loss": 0.6924, "rewards/accuracies": 0.512499988079071, "rewards/chosen": 0.008587488904595375, "rewards/margins": 0.0023642387241125107, "rewards/rejected": 0.006223250180482864, "step": 1450 }, { "epoch": 1.51, "learning_rate": 2.763107539226942e-07, "logits/chosen": -4.270732879638672, "logits/rejected": -4.132948398590088, "logps/chosen": -413.1165466308594, "logps/rejected": -322.5657653808594, "loss": 0.6902, "rewards/accuracies": 0.542187511920929, "rewards/chosen": 0.012603357434272766, "rewards/margins": 0.006664451211690903, "rewards/rejected": 0.005938907153904438, "step": 1460 }, { "epoch": 1.52, "learning_rate": 2.743972445464983e-07, "logits/chosen": -4.273732662200928, "logits/rejected": -4.119269371032715, "logps/chosen": -390.9085998535156, "logps/rejected": -302.38348388671875, "loss": 0.6922, "rewards/accuracies": 0.5093749761581421, "rewards/chosen": 0.008494021371006966, "rewards/margins": 0.0025641201063990593, "rewards/rejected": 0.005929900798946619, "step": 1470 }, { "epoch": 1.53, "learning_rate": 2.7248373517030234e-07, "logits/chosen": -4.267933368682861, "logits/rejected": -4.161561012268066, "logps/chosen": -390.6529235839844, "logps/rejected": -303.3753662109375, "loss": 0.6907, "rewards/accuracies": 0.5390625, "rewards/chosen": 0.010899425484240055, "rewards/margins": 0.005612888839095831, "rewards/rejected": 0.005286536645144224, "step": 1480 }, { "epoch": 1.54, "learning_rate": 2.705702257941064e-07, "logits/chosen": -4.304495811462402, "logits/rejected": -4.120760440826416, "logps/chosen": -413.19744873046875, "logps/rejected": -302.12603759765625, "loss": 0.6905, "rewards/accuracies": 0.5484374761581421, "rewards/chosen": 0.011145773343741894, "rewards/margins": 0.006089083384722471, "rewards/rejected": 0.005056688562035561, "step": 1490 }, { "epoch": 1.55, "learning_rate": 2.686567164179104e-07, "logits/chosen": -4.27075719833374, "logits/rejected": -4.125774383544922, "logps/chosen": -398.0982360839844, "logps/rejected": -307.02325439453125, "loss": 0.6908, "rewards/accuracies": 0.5328124761581421, "rewards/chosen": 0.010808114893734455, "rewards/margins": 0.005621565040200949, "rewards/rejected": 0.005186550319194794, "step": 1500 }, { "epoch": 1.56, "learning_rate": 2.6674320704171447e-07, "logits/chosen": -4.2627692222595215, "logits/rejected": -4.124849796295166, "logps/chosen": -400.72039794921875, "logps/rejected": -309.2858581542969, "loss": 0.6912, "rewards/accuracies": 0.5093749761581421, "rewards/chosen": 0.00958535261452198, "rewards/margins": 0.004684613086283207, "rewards/rejected": 0.004900740925222635, "step": 1510 }, { "epoch": 1.57, "learning_rate": 2.6482969766551853e-07, "logits/chosen": -4.256047248840332, "logits/rejected": -4.143467903137207, "logps/chosen": -395.4497985839844, "logps/rejected": -311.4337463378906, "loss": 0.69, "rewards/accuracies": 0.542187511920929, "rewards/chosen": 0.012033809907734394, "rewards/margins": 0.006985441781580448, "rewards/rejected": 0.005048368591815233, "step": 1520 }, { "epoch": 1.58, "learning_rate": 2.629161882893226e-07, "logits/chosen": -4.2988762855529785, "logits/rejected": -4.160223960876465, "logps/chosen": -421.30926513671875, "logps/rejected": -330.73834228515625, "loss": 0.6912, "rewards/accuracies": 0.546875, "rewards/chosen": 0.012272657826542854, "rewards/margins": 0.004651675932109356, "rewards/rejected": 0.007620981428772211, "step": 1530 }, { "epoch": 1.59, "learning_rate": 2.6100267891312666e-07, "logits/chosen": -4.249758720397949, "logits/rejected": -4.107924938201904, "logps/chosen": -394.63458251953125, "logps/rejected": -304.65570068359375, "loss": 0.6893, "rewards/accuracies": 0.5609375238418579, "rewards/chosen": 0.014264127239584923, "rewards/margins": 0.008482937701046467, "rewards/rejected": 0.005781189538538456, "step": 1540 }, { "epoch": 1.6, "learning_rate": 2.590891695369307e-07, "logits/chosen": -4.296584606170654, "logits/rejected": -4.137936592102051, "logps/chosen": -426.6543884277344, "logps/rejected": -326.82086181640625, "loss": 0.6899, "rewards/accuracies": 0.559374988079071, "rewards/chosen": 0.013136537745594978, "rewards/margins": 0.0072318254970014095, "rewards/rejected": 0.005904710851609707, "step": 1550 }, { "epoch": 1.61, "learning_rate": 2.571756601607348e-07, "logits/chosen": -4.269163608551025, "logits/rejected": -4.147532939910889, "logps/chosen": -418.16259765625, "logps/rejected": -319.60687255859375, "loss": 0.6889, "rewards/accuracies": 0.5484374761581421, "rewards/chosen": 0.015316249802708626, "rewards/margins": 0.00932287611067295, "rewards/rejected": 0.005993373692035675, "step": 1560 }, { "epoch": 1.62, "learning_rate": 2.5526215078453884e-07, "logits/chosen": -4.262753963470459, "logits/rejected": -4.136817932128906, "logps/chosen": -414.9779357910156, "logps/rejected": -330.0815734863281, "loss": 0.6898, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.013297900557518005, "rewards/margins": 0.007364665158092976, "rewards/rejected": 0.00593323539942503, "step": 1570 }, { "epoch": 1.63, "learning_rate": 2.533486414083429e-07, "logits/chosen": -4.293523788452148, "logits/rejected": -4.158999443054199, "logps/chosen": -413.1465759277344, "logps/rejected": -309.12017822265625, "loss": 0.6909, "rewards/accuracies": 0.518750011920929, "rewards/chosen": 0.013114909641444683, "rewards/margins": 0.005343536846339703, "rewards/rejected": 0.007771371863782406, "step": 1580 }, { "epoch": 1.64, "learning_rate": 2.5143513203214697e-07, "logits/chosen": -4.268404960632324, "logits/rejected": -4.135837554931641, "logps/chosen": -394.46466064453125, "logps/rejected": -313.1191101074219, "loss": 0.688, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.01660170406103134, "rewards/margins": 0.011182994581758976, "rewards/rejected": 0.005418709013611078, "step": 1590 }, { "epoch": 1.65, "learning_rate": 2.49521622655951e-07, "logits/chosen": -4.279505252838135, "logits/rejected": -4.153802871704102, "logps/chosen": -396.4070739746094, "logps/rejected": -320.4418029785156, "loss": 0.6917, "rewards/accuracies": 0.53125, "rewards/chosen": 0.009519520215690136, "rewards/margins": 0.0036930330097675323, "rewards/rejected": 0.005826488137245178, "step": 1600 }, { "epoch": 1.66, "learning_rate": 2.4760811327975504e-07, "logits/chosen": -4.265553951263428, "logits/rejected": -4.148402214050293, "logps/chosen": -419.32171630859375, "logps/rejected": -328.2819519042969, "loss": 0.6886, "rewards/accuracies": 0.5546875, "rewards/chosen": 0.018884066492319107, "rewards/margins": 0.00994439609348774, "rewards/rejected": 0.008939670398831367, "step": 1610 }, { "epoch": 1.67, "learning_rate": 2.456946039035591e-07, "logits/chosen": -4.287269115447998, "logits/rejected": -4.127593040466309, "logps/chosen": -386.55499267578125, "logps/rejected": -294.11505126953125, "loss": 0.6894, "rewards/accuracies": 0.5609375238418579, "rewards/chosen": 0.013925912790000439, "rewards/margins": 0.008140355348587036, "rewards/rejected": 0.005785556975752115, "step": 1620 }, { "epoch": 1.68, "learning_rate": 2.4378109452736316e-07, "logits/chosen": -4.2739386558532715, "logits/rejected": -4.1460676193237305, "logps/chosen": -406.03900146484375, "logps/rejected": -317.59918212890625, "loss": 0.6912, "rewards/accuracies": 0.526562511920929, "rewards/chosen": 0.013792428188025951, "rewards/margins": 0.004698522854596376, "rewards/rejected": 0.009093904867768288, "step": 1630 }, { "epoch": 1.69, "learning_rate": 2.418675851511672e-07, "logits/chosen": -4.243307113647461, "logits/rejected": -4.1257524490356445, "logps/chosen": -389.49627685546875, "logps/rejected": -310.25921630859375, "loss": 0.6895, "rewards/accuracies": 0.559374988079071, "rewards/chosen": 0.01434385310858488, "rewards/margins": 0.007951314561069012, "rewards/rejected": 0.006392539478838444, "step": 1640 }, { "epoch": 1.7, "learning_rate": 2.399540757749713e-07, "logits/chosen": -4.276772975921631, "logits/rejected": -4.147823333740234, "logps/chosen": -384.64141845703125, "logps/rejected": -305.95355224609375, "loss": 0.6917, "rewards/accuracies": 0.515625, "rewards/chosen": 0.010843750089406967, "rewards/margins": 0.003594112815335393, "rewards/rejected": 0.0072496384382247925, "step": 1650 }, { "epoch": 1.71, "learning_rate": 2.3804056639877535e-07, "logits/chosen": -4.259124279022217, "logits/rejected": -4.154895782470703, "logps/chosen": -404.73663330078125, "logps/rejected": -332.0699462890625, "loss": 0.692, "rewards/accuracies": 0.534375011920929, "rewards/chosen": 0.010340576991438866, "rewards/margins": 0.002993339207023382, "rewards/rejected": 0.007347238250076771, "step": 1660 }, { "epoch": 1.72, "learning_rate": 2.361270570225794e-07, "logits/chosen": -4.2700676918029785, "logits/rejected": -4.160883903503418, "logps/chosen": -406.1430358886719, "logps/rejected": -311.64459228515625, "loss": 0.6904, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.012120211496949196, "rewards/margins": 0.006281781941652298, "rewards/rejected": 0.005838429089635611, "step": 1670 }, { "epoch": 1.74, "learning_rate": 2.3421354764638345e-07, "logits/chosen": -4.2109479904174805, "logits/rejected": -4.121271133422852, "logps/chosen": -390.29046630859375, "logps/rejected": -306.08843994140625, "loss": 0.6896, "rewards/accuracies": 0.5484374761581421, "rewards/chosen": 0.012644372880458832, "rewards/margins": 0.0077982256188988686, "rewards/rejected": 0.004846146795898676, "step": 1680 }, { "epoch": 1.75, "learning_rate": 2.323000382701875e-07, "logits/chosen": -4.242735385894775, "logits/rejected": -4.120673179626465, "logps/chosen": -396.1617431640625, "logps/rejected": -299.79345703125, "loss": 0.6903, "rewards/accuracies": 0.534375011920929, "rewards/chosen": 0.013311442919075489, "rewards/margins": 0.006340789142996073, "rewards/rejected": 0.006970655173063278, "step": 1690 }, { "epoch": 1.76, "learning_rate": 2.3038652889399157e-07, "logits/chosen": -4.255246162414551, "logits/rejected": -4.130453109741211, "logps/chosen": -393.3377380371094, "logps/rejected": -320.06109619140625, "loss": 0.6897, "rewards/accuracies": 0.535937488079071, "rewards/chosen": 0.014351250603795052, "rewards/margins": 0.00757851917296648, "rewards/rejected": 0.006772731896489859, "step": 1700 }, { "epoch": 1.77, "learning_rate": 2.2847301951779563e-07, "logits/chosen": -4.267707347869873, "logits/rejected": -4.147231101989746, "logps/chosen": -414.69256591796875, "logps/rejected": -318.17291259765625, "loss": 0.6909, "rewards/accuracies": 0.542187511920929, "rewards/chosen": 0.012826653197407722, "rewards/margins": 0.005257748067378998, "rewards/rejected": 0.00756890419870615, "step": 1710 }, { "epoch": 1.78, "learning_rate": 2.265595101415997e-07, "logits/chosen": -4.258088111877441, "logits/rejected": -4.129515647888184, "logps/chosen": -401.8673400878906, "logps/rejected": -315.34417724609375, "loss": 0.6887, "rewards/accuracies": 0.5484374761581421, "rewards/chosen": 0.017096903175115585, "rewards/margins": 0.009566163644194603, "rewards/rejected": 0.0075307427905499935, "step": 1720 }, { "epoch": 1.79, "learning_rate": 2.2464600076540373e-07, "logits/chosen": -4.2839155197143555, "logits/rejected": -4.152881622314453, "logps/chosen": -420.7337951660156, "logps/rejected": -324.15985107421875, "loss": 0.6902, "rewards/accuracies": 0.5390625, "rewards/chosen": 0.013899828307330608, "rewards/margins": 0.006815521512180567, "rewards/rejected": 0.00708430539816618, "step": 1730 }, { "epoch": 1.8, "learning_rate": 2.227324913892078e-07, "logits/chosen": -4.261233329772949, "logits/rejected": -4.125912666320801, "logps/chosen": -408.70989990234375, "logps/rejected": -311.13983154296875, "loss": 0.6876, "rewards/accuracies": 0.5703125, "rewards/chosen": 0.018134312704205513, "rewards/margins": 0.011861599050462246, "rewards/rejected": 0.00627271318808198, "step": 1740 }, { "epoch": 1.81, "learning_rate": 2.2081898201301186e-07, "logits/chosen": -4.2318902015686035, "logits/rejected": -4.125788688659668, "logps/chosen": -421.2041015625, "logps/rejected": -331.3543701171875, "loss": 0.6906, "rewards/accuracies": 0.543749988079071, "rewards/chosen": 0.01709870807826519, "rewards/margins": 0.005781983956694603, "rewards/rejected": 0.011316723190248013, "step": 1750 }, { "epoch": 1.82, "learning_rate": 2.1890547263681592e-07, "logits/chosen": -4.270097732543945, "logits/rejected": -4.112766265869141, "logps/chosen": -415.7230529785156, "logps/rejected": -309.1680603027344, "loss": 0.6882, "rewards/accuracies": 0.5703125, "rewards/chosen": 0.016786256805062294, "rewards/margins": 0.010718188248574734, "rewards/rejected": 0.006068066693842411, "step": 1760 }, { "epoch": 1.83, "learning_rate": 2.1699196326061998e-07, "logits/chosen": -4.254898548126221, "logits/rejected": -4.1117353439331055, "logps/chosen": -406.07330322265625, "logps/rejected": -307.8787841796875, "loss": 0.6889, "rewards/accuracies": 0.5625, "rewards/chosen": 0.016035914421081543, "rewards/margins": 0.009246991947293282, "rewards/rejected": 0.006788922939449549, "step": 1770 }, { "epoch": 1.84, "learning_rate": 2.1507845388442402e-07, "logits/chosen": -4.308491230010986, "logits/rejected": -4.162188529968262, "logps/chosen": -414.5621643066406, "logps/rejected": -300.40106201171875, "loss": 0.6888, "rewards/accuracies": 0.5625, "rewards/chosen": 0.019205499440431595, "rewards/margins": 0.009419824928045273, "rewards/rejected": 0.009785676375031471, "step": 1780 }, { "epoch": 1.85, "learning_rate": 2.1316494450822808e-07, "logits/chosen": -4.263808250427246, "logits/rejected": -4.1408891677856445, "logps/chosen": -409.64141845703125, "logps/rejected": -318.11041259765625, "loss": 0.6901, "rewards/accuracies": 0.5296875238418579, "rewards/chosen": 0.014620177447795868, "rewards/margins": 0.007014470640569925, "rewards/rejected": 0.007605706341564655, "step": 1790 }, { "epoch": 1.86, "learning_rate": 2.1125143513203214e-07, "logits/chosen": -4.306565284729004, "logits/rejected": -4.195437431335449, "logps/chosen": -398.6597595214844, "logps/rejected": -313.44366455078125, "loss": 0.6913, "rewards/accuracies": 0.5328124761581421, "rewards/chosen": 0.013173435814678669, "rewards/margins": 0.004629576578736305, "rewards/rejected": 0.008543858304619789, "step": 1800 }, { "epoch": 1.87, "learning_rate": 2.093379257558362e-07, "logits/chosen": -4.287682056427002, "logits/rejected": -4.170054912567139, "logps/chosen": -413.82818603515625, "logps/rejected": -336.8646545410156, "loss": 0.6908, "rewards/accuracies": 0.551562488079071, "rewards/chosen": 0.016411561518907547, "rewards/margins": 0.005662465933710337, "rewards/rejected": 0.0107490923255682, "step": 1810 }, { "epoch": 1.88, "learning_rate": 2.0742441637964026e-07, "logits/chosen": -4.2755303382873535, "logits/rejected": -4.166233062744141, "logps/chosen": -372.7901306152344, "logps/rejected": -299.3600158691406, "loss": 0.6896, "rewards/accuracies": 0.535937488079071, "rewards/chosen": 0.015290270559489727, "rewards/margins": 0.007844468578696251, "rewards/rejected": 0.0074458010494709015, "step": 1820 }, { "epoch": 1.89, "learning_rate": 2.055109070034443e-07, "logits/chosen": -4.297582149505615, "logits/rejected": -4.127498149871826, "logps/chosen": -397.54498291015625, "logps/rejected": -304.5601501464844, "loss": 0.6905, "rewards/accuracies": 0.5406249761581421, "rewards/chosen": 0.01460904348641634, "rewards/margins": 0.0060775866732001305, "rewards/rejected": 0.00853145681321621, "step": 1830 }, { "epoch": 1.9, "learning_rate": 2.0359739762724836e-07, "logits/chosen": -4.267261028289795, "logits/rejected": -4.143542289733887, "logps/chosen": -438.79937744140625, "logps/rejected": -327.8368225097656, "loss": 0.6887, "rewards/accuracies": 0.543749988079071, "rewards/chosen": 0.017624245956540108, "rewards/margins": 0.009726700372993946, "rewards/rejected": 0.007897543720901012, "step": 1840 }, { "epoch": 1.91, "learning_rate": 2.0168388825105242e-07, "logits/chosen": -4.271047115325928, "logits/rejected": -4.188268661499023, "logps/chosen": -386.79815673828125, "logps/rejected": -312.2374572753906, "loss": 0.6912, "rewards/accuracies": 0.535937488079071, "rewards/chosen": 0.013870956376194954, "rewards/margins": 0.004729996435344219, "rewards/rejected": 0.00914095900952816, "step": 1850 }, { "epoch": 1.92, "learning_rate": 1.997703788748565e-07, "logits/chosen": -4.247762203216553, "logits/rejected": -4.124339580535889, "logps/chosen": -399.97686767578125, "logps/rejected": -318.4253845214844, "loss": 0.689, "rewards/accuracies": 0.567187488079071, "rewards/chosen": 0.01522988360375166, "rewards/margins": 0.008996319025754929, "rewards/rejected": 0.0062335641123354435, "step": 1860 }, { "epoch": 1.93, "learning_rate": 1.9785686949866055e-07, "logits/chosen": -4.297530174255371, "logits/rejected": -4.151538372039795, "logps/chosen": -411.59027099609375, "logps/rejected": -309.5844421386719, "loss": 0.6892, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": 0.01565638557076454, "rewards/margins": 0.008750095032155514, "rewards/rejected": 0.006906290538609028, "step": 1870 }, { "epoch": 1.94, "learning_rate": 1.9594336012246458e-07, "logits/chosen": -4.267110347747803, "logits/rejected": -4.1285834312438965, "logps/chosen": -399.1042785644531, "logps/rejected": -314.04193115234375, "loss": 0.6885, "rewards/accuracies": 0.5640624761581421, "rewards/chosen": 0.01778355799615383, "rewards/margins": 0.010103506036102772, "rewards/rejected": 0.007680053357034922, "step": 1880 }, { "epoch": 1.95, "learning_rate": 1.9402985074626865e-07, "logits/chosen": -4.260018825531006, "logits/rejected": -4.148495197296143, "logps/chosen": -393.2950744628906, "logps/rejected": -308.5839538574219, "loss": 0.6888, "rewards/accuracies": 0.559374988079071, "rewards/chosen": 0.01795302703976631, "rewards/margins": 0.009579015895724297, "rewards/rejected": 0.008374011144042015, "step": 1890 }, { "epoch": 1.96, "learning_rate": 1.921163413700727e-07, "logits/chosen": -4.2746124267578125, "logits/rejected": -4.14363956451416, "logps/chosen": -423.131103515625, "logps/rejected": -334.2845458984375, "loss": 0.6892, "rewards/accuracies": 0.5453125238418579, "rewards/chosen": 0.01808355748653412, "rewards/margins": 0.008949248120188713, "rewards/rejected": 0.009134308435022831, "step": 1900 }, { "epoch": 1.97, "learning_rate": 1.9020283199387677e-07, "logits/chosen": -4.257375717163086, "logits/rejected": -4.1283063888549805, "logps/chosen": -409.4438171386719, "logps/rejected": -327.1435852050781, "loss": 0.6897, "rewards/accuracies": 0.5390625, "rewards/chosen": 0.016801532357931137, "rewards/margins": 0.007867367006838322, "rewards/rejected": 0.008934165351092815, "step": 1910 }, { "epoch": 1.98, "learning_rate": 1.8828932261768083e-07, "logits/chosen": -4.274910926818848, "logits/rejected": -4.14418888092041, "logps/chosen": -400.5626220703125, "logps/rejected": -311.94171142578125, "loss": 0.6904, "rewards/accuracies": 0.520312488079071, "rewards/chosen": 0.013942083343863487, "rewards/margins": 0.006330497562885284, "rewards/rejected": 0.0076115853153169155, "step": 1920 }, { "epoch": 1.99, "learning_rate": 1.8637581324148487e-07, "logits/chosen": -4.279542446136475, "logits/rejected": -4.158401012420654, "logps/chosen": -405.467529296875, "logps/rejected": -329.11602783203125, "loss": 0.6904, "rewards/accuracies": 0.557812511920929, "rewards/chosen": 0.018447261303663254, "rewards/margins": 0.006302011664956808, "rewards/rejected": 0.012145251035690308, "step": 1930 }, { "epoch": 2.0, "eval_logits/chosen": -4.192010879516602, "eval_logits/rejected": -4.082387447357178, "eval_logps/chosen": -402.46429443359375, "eval_logps/rejected": -315.65875244140625, "eval_loss": 0.6883671879768372, "eval_rewards/accuracies": 0.5569999814033508, "eval_rewards/chosen": 0.019122228026390076, "eval_rewards/margins": 0.010481986217200756, "eval_rewards/rejected": 0.00864024180918932, "eval_runtime": 765.0828, "eval_samples_per_second": 2.614, "eval_steps_per_second": 0.654, "step": 1936 }, { "epoch": 2.0, "learning_rate": 1.8446230386528893e-07, "logits/chosen": -4.2764363288879395, "logits/rejected": -4.1841230392456055, "logps/chosen": -391.84844970703125, "logps/rejected": -318.74786376953125, "loss": 0.6892, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": 0.017281491309404373, "rewards/margins": 0.008870037272572517, "rewards/rejected": 0.008411452174186707, "step": 1940 }, { "epoch": 2.01, "learning_rate": 1.82548794489093e-07, "logits/chosen": -4.290091514587402, "logits/rejected": -4.141688346862793, "logps/chosen": -413.09112548828125, "logps/rejected": -315.75860595703125, "loss": 0.6886, "rewards/accuracies": 0.5484374761581421, "rewards/chosen": 0.01853874884545803, "rewards/margins": 0.010042714886367321, "rewards/rejected": 0.008496033027768135, "step": 1950 }, { "epoch": 2.02, "learning_rate": 1.8063528511289706e-07, "logits/chosen": -4.286593437194824, "logits/rejected": -4.1507697105407715, "logps/chosen": -389.1523132324219, "logps/rejected": -307.9405822753906, "loss": 0.688, "rewards/accuracies": 0.559374988079071, "rewards/chosen": 0.018895253539085388, "rewards/margins": 0.011058597825467587, "rewards/rejected": 0.007836655713617802, "step": 1960 }, { "epoch": 2.03, "learning_rate": 1.7872177573670112e-07, "logits/chosen": -4.287877559661865, "logits/rejected": -4.147292613983154, "logps/chosen": -416.8997497558594, "logps/rejected": -327.71551513671875, "loss": 0.6886, "rewards/accuracies": 0.5546875, "rewards/chosen": 0.018623776733875275, "rewards/margins": 0.009957761503756046, "rewards/rejected": 0.008666014298796654, "step": 1970 }, { "epoch": 2.04, "learning_rate": 1.7680826636050515e-07, "logits/chosen": -4.2642436027526855, "logits/rejected": -4.148723125457764, "logps/chosen": -388.62054443359375, "logps/rejected": -313.1318359375, "loss": 0.6907, "rewards/accuracies": 0.551562488079071, "rewards/chosen": 0.01719742640852928, "rewards/margins": 0.005715816281735897, "rewards/rejected": 0.011481606401503086, "step": 1980 }, { "epoch": 2.06, "learning_rate": 1.7489475698430921e-07, "logits/chosen": -4.268927097320557, "logits/rejected": -4.110062599182129, "logps/chosen": -428.08935546875, "logps/rejected": -315.6456604003906, "loss": 0.6885, "rewards/accuracies": 0.5625, "rewards/chosen": 0.0186283178627491, "rewards/margins": 0.010059954598546028, "rewards/rejected": 0.008568364195525646, "step": 1990 }, { "epoch": 2.07, "learning_rate": 1.7298124760811328e-07, "logits/chosen": -4.257794380187988, "logits/rejected": -4.151054859161377, "logps/chosen": -390.84942626953125, "logps/rejected": -318.6888732910156, "loss": 0.6891, "rewards/accuracies": 0.557812511920929, "rewards/chosen": 0.016992371529340744, "rewards/margins": 0.00896035972982645, "rewards/rejected": 0.008032011799514294, "step": 2000 }, { "epoch": 2.08, "learning_rate": 1.7106773823191734e-07, "logits/chosen": -4.262537956237793, "logits/rejected": -4.151637077331543, "logps/chosen": -397.4742431640625, "logps/rejected": -319.90289306640625, "loss": 0.6891, "rewards/accuracies": 0.5406249761581421, "rewards/chosen": 0.017107700929045677, "rewards/margins": 0.00891804601997137, "rewards/rejected": 0.008189653977751732, "step": 2010 }, { "epoch": 2.09, "learning_rate": 1.691542288557214e-07, "logits/chosen": -4.258824348449707, "logits/rejected": -4.141880989074707, "logps/chosen": -393.25311279296875, "logps/rejected": -307.5327453613281, "loss": 0.6893, "rewards/accuracies": 0.551562488079071, "rewards/chosen": 0.01972380466759205, "rewards/margins": 0.008552981540560722, "rewards/rejected": 0.011170822195708752, "step": 2020 }, { "epoch": 2.1, "learning_rate": 1.6724071947952544e-07, "logits/chosen": -4.245623588562012, "logits/rejected": -4.144326686859131, "logps/chosen": -403.1796569824219, "logps/rejected": -326.97894287109375, "loss": 0.6911, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.01679617539048195, "rewards/margins": 0.004923067055642605, "rewards/rejected": 0.011873109266161919, "step": 2030 }, { "epoch": 2.11, "learning_rate": 1.653272101033295e-07, "logits/chosen": -4.275112152099609, "logits/rejected": -4.147688388824463, "logps/chosen": -394.7803039550781, "logps/rejected": -311.6099548339844, "loss": 0.6887, "rewards/accuracies": 0.557812511920929, "rewards/chosen": 0.01747475564479828, "rewards/margins": 0.00970934983342886, "rewards/rejected": 0.00776540394872427, "step": 2040 }, { "epoch": 2.12, "learning_rate": 1.6341370072713356e-07, "logits/chosen": -4.274272918701172, "logits/rejected": -4.157819747924805, "logps/chosen": -378.563720703125, "logps/rejected": -302.5975036621094, "loss": 0.6892, "rewards/accuracies": 0.5453125238418579, "rewards/chosen": 0.01942756399512291, "rewards/margins": 0.008909397758543491, "rewards/rejected": 0.010518166236579418, "step": 2050 }, { "epoch": 2.13, "learning_rate": 1.6150019135093762e-07, "logits/chosen": -4.260178565979004, "logits/rejected": -4.149471759796143, "logps/chosen": -418.959716796875, "logps/rejected": -332.4044494628906, "loss": 0.6893, "rewards/accuracies": 0.5484374761581421, "rewards/chosen": 0.017765840515494347, "rewards/margins": 0.008554233238101006, "rewards/rejected": 0.009211607277393341, "step": 2060 }, { "epoch": 2.14, "learning_rate": 1.5958668197474169e-07, "logits/chosen": -4.2685041427612305, "logits/rejected": -4.117525100708008, "logps/chosen": -430.4039001464844, "logps/rejected": -319.9500732421875, "loss": 0.6873, "rewards/accuracies": 0.578125, "rewards/chosen": 0.022361256182193756, "rewards/margins": 0.01272774301469326, "rewards/rejected": 0.009633513167500496, "step": 2070 }, { "epoch": 2.15, "learning_rate": 1.5767317259854572e-07, "logits/chosen": -4.2741522789001465, "logits/rejected": -4.18676233291626, "logps/chosen": -379.95263671875, "logps/rejected": -313.4046936035156, "loss": 0.6891, "rewards/accuracies": 0.557812511920929, "rewards/chosen": 0.01609444059431553, "rewards/margins": 0.009009727276861668, "rewards/rejected": 0.007084711454808712, "step": 2080 }, { "epoch": 2.16, "learning_rate": 1.5575966322234978e-07, "logits/chosen": -4.279686450958252, "logits/rejected": -4.155128479003906, "logps/chosen": -416.2315368652344, "logps/rejected": -321.23992919921875, "loss": 0.6909, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.01688700169324875, "rewards/margins": 0.005368704441934824, "rewards/rejected": 0.011518299579620361, "step": 2090 }, { "epoch": 2.17, "learning_rate": 1.5384615384615385e-07, "logits/chosen": -4.279124736785889, "logits/rejected": -4.1183061599731445, "logps/chosen": -428.96142578125, "logps/rejected": -316.525146484375, "loss": 0.6891, "rewards/accuracies": 0.5546875, "rewards/chosen": 0.019147472456097603, "rewards/margins": 0.008967303670942783, "rewards/rejected": 0.01018016878515482, "step": 2100 }, { "epoch": 2.18, "learning_rate": 1.519326444699579e-07, "logits/chosen": -4.268794059753418, "logits/rejected": -4.155481815338135, "logps/chosen": -411.06573486328125, "logps/rejected": -329.96044921875, "loss": 0.6888, "rewards/accuracies": 0.534375011920929, "rewards/chosen": 0.020965853706002235, "rewards/margins": 0.009673124179244041, "rewards/rejected": 0.011292731389403343, "step": 2110 }, { "epoch": 2.19, "learning_rate": 1.5001913509376197e-07, "logits/chosen": -4.276661396026611, "logits/rejected": -4.141108512878418, "logps/chosen": -391.82391357421875, "logps/rejected": -302.3670349121094, "loss": 0.6872, "rewards/accuracies": 0.581250011920929, "rewards/chosen": 0.02172374725341797, "rewards/margins": 0.012819238007068634, "rewards/rejected": 0.008904511108994484, "step": 2120 }, { "epoch": 2.2, "learning_rate": 1.4810562571756603e-07, "logits/chosen": -4.27203369140625, "logits/rejected": -4.1426167488098145, "logps/chosen": -420.7378845214844, "logps/rejected": -312.4131774902344, "loss": 0.689, "rewards/accuracies": 0.578125, "rewards/chosen": 0.018989499658346176, "rewards/margins": 0.009152286686003208, "rewards/rejected": 0.009837212972342968, "step": 2130 }, { "epoch": 2.21, "learning_rate": 1.4619211634137007e-07, "logits/chosen": -4.257569789886475, "logits/rejected": -4.129474639892578, "logps/chosen": -409.93609619140625, "logps/rejected": -317.70989990234375, "loss": 0.6886, "rewards/accuracies": 0.5765625238418579, "rewards/chosen": 0.019030291587114334, "rewards/margins": 0.01004733331501484, "rewards/rejected": 0.008982958272099495, "step": 2140 }, { "epoch": 2.22, "learning_rate": 1.4427860696517413e-07, "logits/chosen": -4.251282691955566, "logits/rejected": -4.162935256958008, "logps/chosen": -396.5443420410156, "logps/rejected": -335.5440368652344, "loss": 0.6887, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": 0.020548541098833084, "rewards/margins": 0.009832927957177162, "rewards/rejected": 0.010715610347688198, "step": 2150 }, { "epoch": 2.23, "learning_rate": 1.423650975889782e-07, "logits/chosen": -4.25177526473999, "logits/rejected": -4.121354579925537, "logps/chosen": -406.2722473144531, "logps/rejected": -319.5417175292969, "loss": 0.6889, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.022125694900751114, "rewards/margins": 0.00926921982318163, "rewards/rejected": 0.01285647600889206, "step": 2160 }, { "epoch": 2.24, "learning_rate": 1.4045158821278225e-07, "logits/chosen": -4.285470485687256, "logits/rejected": -4.150871276855469, "logps/chosen": -422.54071044921875, "logps/rejected": -323.01458740234375, "loss": 0.6883, "rewards/accuracies": 0.559374988079071, "rewards/chosen": 0.023092512041330338, "rewards/margins": 0.010742807760834694, "rewards/rejected": 0.012349705211818218, "step": 2170 }, { "epoch": 2.25, "learning_rate": 1.3853807883658632e-07, "logits/chosen": -4.2822346687316895, "logits/rejected": -4.1521124839782715, "logps/chosen": -414.7969665527344, "logps/rejected": -330.4766540527344, "loss": 0.6886, "rewards/accuracies": 0.5531250238418579, "rewards/chosen": 0.021155862137675285, "rewards/margins": 0.010015945881605148, "rewards/rejected": 0.011139917187392712, "step": 2180 }, { "epoch": 2.26, "learning_rate": 1.3662456946039035e-07, "logits/chosen": -4.249444961547852, "logits/rejected": -4.152978897094727, "logps/chosen": -389.48052978515625, "logps/rejected": -311.3460693359375, "loss": 0.6878, "rewards/accuracies": 0.515625, "rewards/chosen": 0.020100217312574387, "rewards/margins": 0.01170959509909153, "rewards/rejected": 0.008390624076128006, "step": 2190 }, { "epoch": 2.27, "learning_rate": 1.3471106008419441e-07, "logits/chosen": -4.282201290130615, "logits/rejected": -4.174456596374512, "logps/chosen": -396.2969665527344, "logps/rejected": -308.55548095703125, "loss": 0.6893, "rewards/accuracies": 0.557812511920929, "rewards/chosen": 0.020026249811053276, "rewards/margins": 0.008474086411297321, "rewards/rejected": 0.01155216433107853, "step": 2200 }, { "epoch": 2.28, "learning_rate": 1.3279755070799848e-07, "logits/chosen": -4.26694393157959, "logits/rejected": -4.153203010559082, "logps/chosen": -390.5587158203125, "logps/rejected": -312.5738830566406, "loss": 0.689, "rewards/accuracies": 0.5406249761581421, "rewards/chosen": 0.01699787937104702, "rewards/margins": 0.009081227704882622, "rewards/rejected": 0.007916653528809547, "step": 2210 }, { "epoch": 2.29, "learning_rate": 1.3088404133180254e-07, "logits/chosen": -4.26552677154541, "logits/rejected": -4.145693778991699, "logps/chosen": -413.443115234375, "logps/rejected": -327.2570495605469, "loss": 0.6887, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": 0.022313930094242096, "rewards/margins": 0.009759850800037384, "rewards/rejected": 0.012554079294204712, "step": 2220 }, { "epoch": 2.3, "learning_rate": 1.289705319556066e-07, "logits/chosen": -4.264178276062012, "logits/rejected": -4.172913551330566, "logps/chosen": -407.66339111328125, "logps/rejected": -321.8123779296875, "loss": 0.6884, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": 0.023416642099618912, "rewards/margins": 0.010417203418910503, "rewards/rejected": 0.012999439612030983, "step": 2230 }, { "epoch": 2.31, "learning_rate": 1.2705702257941064e-07, "logits/chosen": -4.250650405883789, "logits/rejected": -4.140833854675293, "logps/chosen": -374.83990478515625, "logps/rejected": -302.1290588378906, "loss": 0.6893, "rewards/accuracies": 0.5484374761581421, "rewards/chosen": 0.019031699746847153, "rewards/margins": 0.008602599613368511, "rewards/rejected": 0.010429101064801216, "step": 2240 }, { "epoch": 2.32, "learning_rate": 1.251435132032147e-07, "logits/chosen": -4.270743370056152, "logits/rejected": -4.151588439941406, "logps/chosen": -443.17791748046875, "logps/rejected": -332.28302001953125, "loss": 0.6913, "rewards/accuracies": 0.535937488079071, "rewards/chosen": 0.01945783570408821, "rewards/margins": 0.004704002290964127, "rewards/rejected": 0.014753831550478935, "step": 2250 }, { "epoch": 2.33, "learning_rate": 1.2323000382701873e-07, "logits/chosen": -4.275900840759277, "logits/rejected": -4.1336822509765625, "logps/chosen": -416.0301208496094, "logps/rejected": -323.3976135253906, "loss": 0.6874, "rewards/accuracies": 0.5640624761581421, "rewards/chosen": 0.021596388891339302, "rewards/margins": 0.012395900674164295, "rewards/rejected": 0.009200489148497581, "step": 2260 }, { "epoch": 2.34, "learning_rate": 1.213164944508228e-07, "logits/chosen": -4.26046085357666, "logits/rejected": -4.138212203979492, "logps/chosen": -397.1346740722656, "logps/rejected": -319.06781005859375, "loss": 0.689, "rewards/accuracies": 0.565625011920929, "rewards/chosen": 0.021542087197303772, "rewards/margins": 0.009193692356348038, "rewards/rejected": 0.012348394840955734, "step": 2270 }, { "epoch": 2.35, "learning_rate": 1.1940298507462686e-07, "logits/chosen": -4.280481815338135, "logits/rejected": -4.179018974304199, "logps/chosen": -407.4460754394531, "logps/rejected": -328.47021484375, "loss": 0.6879, "rewards/accuracies": 0.5843750238418579, "rewards/chosen": 0.01951112225651741, "rewards/margins": 0.011365312151610851, "rewards/rejected": 0.008145810104906559, "step": 2280 }, { "epoch": 2.37, "learning_rate": 1.1748947569843092e-07, "logits/chosen": -4.224648952484131, "logits/rejected": -4.105835914611816, "logps/chosen": -381.49658203125, "logps/rejected": -303.1542663574219, "loss": 0.6889, "rewards/accuracies": 0.5625, "rewards/chosen": 0.020115623250603676, "rewards/margins": 0.009458022192120552, "rewards/rejected": 0.010657599195837975, "step": 2290 }, { "epoch": 2.38, "learning_rate": 1.1557596632223497e-07, "logits/chosen": -4.270878791809082, "logits/rejected": -4.1364850997924805, "logps/chosen": -407.75909423828125, "logps/rejected": -300.38336181640625, "loss": 0.6885, "rewards/accuracies": 0.5484374761581421, "rewards/chosen": 0.019836071878671646, "rewards/margins": 0.010231700725853443, "rewards/rejected": 0.009604370221495628, "step": 2300 }, { "epoch": 2.39, "learning_rate": 1.1366245694603903e-07, "logits/chosen": -4.28275203704834, "logits/rejected": -4.161673545837402, "logps/chosen": -389.09051513671875, "logps/rejected": -298.67401123046875, "loss": 0.6881, "rewards/accuracies": 0.5703125, "rewards/chosen": 0.022033553570508957, "rewards/margins": 0.010990725830197334, "rewards/rejected": 0.011042827740311623, "step": 2310 }, { "epoch": 2.4, "learning_rate": 1.1174894756984308e-07, "logits/chosen": -4.2564802169799805, "logits/rejected": -4.108375549316406, "logps/chosen": -383.16119384765625, "logps/rejected": -286.8339538574219, "loss": 0.6897, "rewards/accuracies": 0.534375011920929, "rewards/chosen": 0.019884765148162842, "rewards/margins": 0.007881352677941322, "rewards/rejected": 0.012003413401544094, "step": 2320 }, { "epoch": 2.41, "learning_rate": 1.0983543819364714e-07, "logits/chosen": -4.2984795570373535, "logits/rejected": -4.14174747467041, "logps/chosen": -417.61273193359375, "logps/rejected": -321.7851867675781, "loss": 0.687, "rewards/accuracies": 0.5718749761581421, "rewards/chosen": 0.023209992796182632, "rewards/margins": 0.013400438241660595, "rewards/rejected": 0.009809553623199463, "step": 2330 }, { "epoch": 2.42, "learning_rate": 1.079219288174512e-07, "logits/chosen": -4.233872413635254, "logits/rejected": -4.129950046539307, "logps/chosen": -393.71453857421875, "logps/rejected": -327.7266845703125, "loss": 0.6903, "rewards/accuracies": 0.526562511920929, "rewards/chosen": 0.017995553091168404, "rewards/margins": 0.006606035865843296, "rewards/rejected": 0.011389517225325108, "step": 2340 }, { "epoch": 2.43, "learning_rate": 1.0600841944125525e-07, "logits/chosen": -4.279402732849121, "logits/rejected": -4.134713172912598, "logps/chosen": -390.8346862792969, "logps/rejected": -306.3309020996094, "loss": 0.6891, "rewards/accuracies": 0.551562488079071, "rewards/chosen": 0.02169058658182621, "rewards/margins": 0.008909964933991432, "rewards/rejected": 0.012780621647834778, "step": 2350 }, { "epoch": 2.44, "learning_rate": 1.0409491006505931e-07, "logits/chosen": -4.276646614074707, "logits/rejected": -4.130280017852783, "logps/chosen": -404.8982849121094, "logps/rejected": -310.2183532714844, "loss": 0.6874, "rewards/accuracies": 0.5921875238418579, "rewards/chosen": 0.022115709260106087, "rewards/margins": 0.012477119453251362, "rewards/rejected": 0.00963858887553215, "step": 2360 }, { "epoch": 2.45, "learning_rate": 1.0218140068886336e-07, "logits/chosen": -4.257068634033203, "logits/rejected": -4.119657516479492, "logps/chosen": -405.00750732421875, "logps/rejected": -309.30023193359375, "loss": 0.6898, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": 0.021267231553792953, "rewards/margins": 0.007724496070295572, "rewards/rejected": 0.013542735949158669, "step": 2370 }, { "epoch": 2.46, "learning_rate": 1.0026789131266743e-07, "logits/chosen": -4.301741600036621, "logits/rejected": -4.1736650466918945, "logps/chosen": -388.27923583984375, "logps/rejected": -308.6962890625, "loss": 0.6892, "rewards/accuracies": 0.543749988079071, "rewards/chosen": 0.01904493011534214, "rewards/margins": 0.008867397904396057, "rewards/rejected": 0.010177532210946083, "step": 2380 }, { "epoch": 2.47, "learning_rate": 9.835438193647149e-08, "logits/chosen": -4.262604236602783, "logits/rejected": -4.160672187805176, "logps/chosen": -396.7488708496094, "logps/rejected": -320.4295654296875, "loss": 0.6852, "rewards/accuracies": 0.609375, "rewards/chosen": 0.027415934950113297, "rewards/margins": 0.016748551279306412, "rewards/rejected": 0.010667381808161736, "step": 2390 }, { "epoch": 2.48, "learning_rate": 9.644087256027554e-08, "logits/chosen": -4.258942604064941, "logits/rejected": -4.159635543823242, "logps/chosen": -410.4764099121094, "logps/rejected": -334.953369140625, "loss": 0.6895, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": 0.021485131233930588, "rewards/margins": 0.00812376569956541, "rewards/rejected": 0.013361366465687752, "step": 2400 }, { "epoch": 2.49, "learning_rate": 9.45273631840796e-08, "logits/chosen": -4.264720916748047, "logits/rejected": -4.117271900177002, "logps/chosen": -385.35052490234375, "logps/rejected": -290.49114990234375, "loss": 0.6894, "rewards/accuracies": 0.565625011920929, "rewards/chosen": 0.018992017954587936, "rewards/margins": 0.008459472097456455, "rewards/rejected": 0.01053254771977663, "step": 2410 }, { "epoch": 2.5, "learning_rate": 9.261385380788366e-08, "logits/chosen": -4.276088714599609, "logits/rejected": -4.1584858894348145, "logps/chosen": -397.9452209472656, "logps/rejected": -306.89739990234375, "loss": 0.6881, "rewards/accuracies": 0.5609375238418579, "rewards/chosen": 0.020109858363866806, "rewards/margins": 0.011120992712676525, "rewards/rejected": 0.008988862857222557, "step": 2420 }, { "epoch": 2.51, "learning_rate": 9.070034443168771e-08, "logits/chosen": -4.260004997253418, "logits/rejected": -4.13455867767334, "logps/chosen": -389.76397705078125, "logps/rejected": -304.0987548828125, "loss": 0.6871, "rewards/accuracies": 0.589062511920929, "rewards/chosen": 0.021830763667821884, "rewards/margins": 0.013007350265979767, "rewards/rejected": 0.008823414333164692, "step": 2430 }, { "epoch": 2.52, "learning_rate": 8.878683505549177e-08, "logits/chosen": -4.2787675857543945, "logits/rejected": -4.1580352783203125, "logps/chosen": -410.0357971191406, "logps/rejected": -329.86334228515625, "loss": 0.6893, "rewards/accuracies": 0.5625, "rewards/chosen": 0.02292051911354065, "rewards/margins": 0.008661197498440742, "rewards/rejected": 0.014259323477745056, "step": 2440 }, { "epoch": 2.53, "learning_rate": 8.687332567929582e-08, "logits/chosen": -4.27437686920166, "logits/rejected": -4.147732734680176, "logps/chosen": -401.8902587890625, "logps/rejected": -296.3741760253906, "loss": 0.6876, "rewards/accuracies": 0.5796874761581421, "rewards/chosen": 0.024159640073776245, "rewards/margins": 0.01200934313237667, "rewards/rejected": 0.012150297872722149, "step": 2450 }, { "epoch": 2.54, "learning_rate": 8.495981630309988e-08, "logits/chosen": -4.271141529083252, "logits/rejected": -4.141169548034668, "logps/chosen": -401.7086181640625, "logps/rejected": -319.8025817871094, "loss": 0.6875, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.023453358560800552, "rewards/margins": 0.01219714991748333, "rewards/rejected": 0.011256209574639797, "step": 2460 }, { "epoch": 2.55, "learning_rate": 8.304630692690395e-08, "logits/chosen": -4.2775139808654785, "logits/rejected": -4.150107383728027, "logps/chosen": -426.84576416015625, "logps/rejected": -320.8899230957031, "loss": 0.6879, "rewards/accuracies": 0.559374988079071, "rewards/chosen": 0.02233419567346573, "rewards/margins": 0.011261718347668648, "rewards/rejected": 0.011072477325797081, "step": 2470 }, { "epoch": 2.56, "learning_rate": 8.1132797550708e-08, "logits/chosen": -4.264378547668457, "logits/rejected": -4.172327995300293, "logps/chosen": -404.49420166015625, "logps/rejected": -316.01739501953125, "loss": 0.6862, "rewards/accuracies": 0.5765625238418579, "rewards/chosen": 0.02468792162835598, "rewards/margins": 0.014898866415023804, "rewards/rejected": 0.009789055213332176, "step": 2480 }, { "epoch": 2.57, "learning_rate": 7.921928817451206e-08, "logits/chosen": -4.274040222167969, "logits/rejected": -4.133544921875, "logps/chosen": -412.9867248535156, "logps/rejected": -303.045166015625, "loss": 0.6881, "rewards/accuracies": 0.5609375238418579, "rewards/chosen": 0.024420084431767464, "rewards/margins": 0.011113069020211697, "rewards/rejected": 0.013307017274200916, "step": 2490 }, { "epoch": 2.58, "learning_rate": 7.73057787983161e-08, "logits/chosen": -4.268971920013428, "logits/rejected": -4.1228179931640625, "logps/chosen": -412.2561950683594, "logps/rejected": -311.38623046875, "loss": 0.6879, "rewards/accuracies": 0.5625, "rewards/chosen": 0.02301056683063507, "rewards/margins": 0.011502142064273357, "rewards/rejected": 0.011508422903716564, "step": 2500 }, { "epoch": 2.59, "learning_rate": 7.539226942212017e-08, "logits/chosen": -4.249145030975342, "logits/rejected": -4.131203651428223, "logps/chosen": -407.7536926269531, "logps/rejected": -336.33172607421875, "loss": 0.6889, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": 0.024292880669236183, "rewards/margins": 0.009482759051024914, "rewards/rejected": 0.01481011975556612, "step": 2510 }, { "epoch": 2.6, "learning_rate": 7.347876004592423e-08, "logits/chosen": -4.284695625305176, "logits/rejected": -4.161882400512695, "logps/chosen": -408.2262878417969, "logps/rejected": -316.01239013671875, "loss": 0.689, "rewards/accuracies": 0.565625011920929, "rewards/chosen": 0.020047323778271675, "rewards/margins": 0.009101735427975655, "rewards/rejected": 0.01094558835029602, "step": 2520 }, { "epoch": 2.61, "learning_rate": 7.156525066972828e-08, "logits/chosen": -4.280055522918701, "logits/rejected": -4.136964321136475, "logps/chosen": -430.8373107910156, "logps/rejected": -321.91973876953125, "loss": 0.6871, "rewards/accuracies": 0.565625011920929, "rewards/chosen": 0.024292441084980965, "rewards/margins": 0.013177357614040375, "rewards/rejected": 0.01111508347094059, "step": 2530 }, { "epoch": 2.62, "learning_rate": 6.965174129353234e-08, "logits/chosen": -4.265179634094238, "logits/rejected": -4.146527290344238, "logps/chosen": -391.67169189453125, "logps/rejected": -320.75372314453125, "loss": 0.6881, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": 0.02059927210211754, "rewards/margins": 0.011022168211638927, "rewards/rejected": 0.009577102959156036, "step": 2540 }, { "epoch": 2.63, "learning_rate": 6.773823191733639e-08, "logits/chosen": -4.287339210510254, "logits/rejected": -4.139233112335205, "logps/chosen": -410.2044372558594, "logps/rejected": -304.69049072265625, "loss": 0.6868, "rewards/accuracies": 0.596875011920929, "rewards/chosen": 0.024769442155957222, "rewards/margins": 0.013655883260071278, "rewards/rejected": 0.011113559827208519, "step": 2550 }, { "epoch": 2.64, "learning_rate": 6.582472254114045e-08, "logits/chosen": -4.297701835632324, "logits/rejected": -4.167489051818848, "logps/chosen": -416.37469482421875, "logps/rejected": -326.1492614746094, "loss": 0.6871, "rewards/accuracies": 0.596875011920929, "rewards/chosen": 0.023087535053491592, "rewards/margins": 0.013127269223332405, "rewards/rejected": 0.009960266761481762, "step": 2560 }, { "epoch": 2.65, "learning_rate": 6.391121316494451e-08, "logits/chosen": -4.28665828704834, "logits/rejected": -4.161301612854004, "logps/chosen": -382.5966796875, "logps/rejected": -317.6145935058594, "loss": 0.6885, "rewards/accuracies": 0.565625011920929, "rewards/chosen": 0.021074790507555008, "rewards/margins": 0.010219180956482887, "rewards/rejected": 0.010855610482394695, "step": 2570 }, { "epoch": 2.66, "learning_rate": 6.199770378874856e-08, "logits/chosen": -4.263566017150879, "logits/rejected": -4.153146266937256, "logps/chosen": -402.6602478027344, "logps/rejected": -314.2333068847656, "loss": 0.6873, "rewards/accuracies": 0.567187488079071, "rewards/chosen": 0.0247223861515522, "rewards/margins": 0.01265608798712492, "rewards/rejected": 0.012066296301782131, "step": 2580 }, { "epoch": 2.68, "learning_rate": 6.008419441255262e-08, "logits/chosen": -4.286923885345459, "logits/rejected": -4.174257278442383, "logps/chosen": -401.7779541015625, "logps/rejected": -322.3847961425781, "loss": 0.6885, "rewards/accuracies": 0.5765625238418579, "rewards/chosen": 0.024542078375816345, "rewards/margins": 0.01016208902001381, "rewards/rejected": 0.014379991218447685, "step": 2590 }, { "epoch": 2.69, "learning_rate": 5.817068503635668e-08, "logits/chosen": -4.264492988586426, "logits/rejected": -4.1116790771484375, "logps/chosen": -414.8555603027344, "logps/rejected": -294.9403381347656, "loss": 0.6862, "rewards/accuracies": 0.589062511920929, "rewards/chosen": 0.0254591666162014, "rewards/margins": 0.014859716407954693, "rewards/rejected": 0.010599448345601559, "step": 2600 }, { "epoch": 2.7, "learning_rate": 5.6257175660160735e-08, "logits/chosen": -4.258959770202637, "logits/rejected": -4.129164695739746, "logps/chosen": -404.98980712890625, "logps/rejected": -310.5729675292969, "loss": 0.6885, "rewards/accuracies": 0.565625011920929, "rewards/chosen": 0.021537696942687035, "rewards/margins": 0.010314391925930977, "rewards/rejected": 0.011223304085433483, "step": 2610 }, { "epoch": 2.71, "learning_rate": 5.4343666283964784e-08, "logits/chosen": -4.262757301330566, "logits/rejected": -4.128348350524902, "logps/chosen": -396.00445556640625, "logps/rejected": -306.8481140136719, "loss": 0.6865, "rewards/accuracies": 0.573437511920929, "rewards/chosen": 0.02303687483072281, "rewards/margins": 0.014214645139873028, "rewards/rejected": 0.008822232484817505, "step": 2620 }, { "epoch": 2.72, "learning_rate": 5.243015690776884e-08, "logits/chosen": -4.266745567321777, "logits/rejected": -4.137091159820557, "logps/chosen": -402.24993896484375, "logps/rejected": -299.17864990234375, "loss": 0.6885, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": 0.023752233013510704, "rewards/margins": 0.010097989812493324, "rewards/rejected": 0.01365424133837223, "step": 2630 }, { "epoch": 2.73, "learning_rate": 5.05166475315729e-08, "logits/chosen": -4.23615026473999, "logits/rejected": -4.157704830169678, "logps/chosen": -375.03277587890625, "logps/rejected": -310.47540283203125, "loss": 0.6869, "rewards/accuracies": 0.565625011920929, "rewards/chosen": 0.024319518357515335, "rewards/margins": 0.013446244411170483, "rewards/rejected": 0.010873274877667427, "step": 2640 }, { "epoch": 2.74, "learning_rate": 4.860313815537696e-08, "logits/chosen": -4.296034812927246, "logits/rejected": -4.151383399963379, "logps/chosen": -410.50146484375, "logps/rejected": -302.6440734863281, "loss": 0.6882, "rewards/accuracies": 0.5625, "rewards/chosen": 0.02242346480488777, "rewards/margins": 0.010714459232985973, "rewards/rejected": 0.011709003709256649, "step": 2650 }, { "epoch": 2.75, "learning_rate": 4.668962877918101e-08, "logits/chosen": -4.266396522521973, "logits/rejected": -4.138249397277832, "logps/chosen": -404.47039794921875, "logps/rejected": -302.8034362792969, "loss": 0.6876, "rewards/accuracies": 0.581250011920929, "rewards/chosen": 0.024602141231298447, "rewards/margins": 0.011944174766540527, "rewards/rejected": 0.012657967396080494, "step": 2660 }, { "epoch": 2.76, "learning_rate": 4.477611940298507e-08, "logits/chosen": -4.252145767211914, "logits/rejected": -4.149975776672363, "logps/chosen": -393.45355224609375, "logps/rejected": -314.1678771972656, "loss": 0.6886, "rewards/accuracies": 0.567187488079071, "rewards/chosen": 0.02430255338549614, "rewards/margins": 0.01004251278936863, "rewards/rejected": 0.014260041527450085, "step": 2670 }, { "epoch": 2.77, "learning_rate": 4.2862610026789124e-08, "logits/chosen": -4.265524864196777, "logits/rejected": -4.1408796310424805, "logps/chosen": -405.55511474609375, "logps/rejected": -310.058837890625, "loss": 0.6879, "rewards/accuracies": 0.5625, "rewards/chosen": 0.021910104900598526, "rewards/margins": 0.011513815261423588, "rewards/rejected": 0.010396288707852364, "step": 2680 }, { "epoch": 2.78, "learning_rate": 4.0949100650593186e-08, "logits/chosen": -4.2873334884643555, "logits/rejected": -4.150042533874512, "logps/chosen": -411.6018981933594, "logps/rejected": -312.466552734375, "loss": 0.6872, "rewards/accuracies": 0.5625, "rewards/chosen": 0.023441951721906662, "rewards/margins": 0.01284896582365036, "rewards/rejected": 0.010592986829578876, "step": 2690 }, { "epoch": 2.79, "learning_rate": 3.903559127439724e-08, "logits/chosen": -4.27925968170166, "logits/rejected": -4.131691932678223, "logps/chosen": -414.133056640625, "logps/rejected": -314.72442626953125, "loss": 0.6894, "rewards/accuracies": 0.551562488079071, "rewards/chosen": 0.02151947282254696, "rewards/margins": 0.00846365001052618, "rewards/rejected": 0.013055823743343353, "step": 2700 }, { "epoch": 2.8, "learning_rate": 3.71220818982013e-08, "logits/chosen": -4.298044681549072, "logits/rejected": -4.16172981262207, "logps/chosen": -410.84063720703125, "logps/rejected": -313.7170104980469, "loss": 0.6917, "rewards/accuracies": 0.5015624761581421, "rewards/chosen": 0.01963016204535961, "rewards/margins": 0.0036624562926590443, "rewards/rejected": 0.015967708081007004, "step": 2710 }, { "epoch": 2.81, "learning_rate": 3.520857252200535e-08, "logits/chosen": -4.293381690979004, "logits/rejected": -4.154776573181152, "logps/chosen": -410.7498474121094, "logps/rejected": -313.6238708496094, "loss": 0.6862, "rewards/accuracies": 0.59375, "rewards/chosen": 0.023770933970808983, "rewards/margins": 0.014910402707755566, "rewards/rejected": 0.008860534057021141, "step": 2720 }, { "epoch": 2.82, "learning_rate": 3.3295063145809414e-08, "logits/chosen": -4.240599632263184, "logits/rejected": -4.130780220031738, "logps/chosen": -411.70074462890625, "logps/rejected": -340.7054443359375, "loss": 0.6901, "rewards/accuracies": 0.5625, "rewards/chosen": 0.023142261430621147, "rewards/margins": 0.007068459875881672, "rewards/rejected": 0.0160738043487072, "step": 2730 }, { "epoch": 2.83, "learning_rate": 3.138155376961347e-08, "logits/chosen": -4.274569511413574, "logits/rejected": -4.144467830657959, "logps/chosen": -390.971923828125, "logps/rejected": -306.0947265625, "loss": 0.6885, "rewards/accuracies": 0.5703125, "rewards/chosen": 0.02243395894765854, "rewards/margins": 0.010051446035504341, "rewards/rejected": 0.012382512912154198, "step": 2740 }, { "epoch": 2.84, "learning_rate": 2.9468044393417525e-08, "logits/chosen": -4.260807037353516, "logits/rejected": -4.1508378982543945, "logps/chosen": -390.257080078125, "logps/rejected": -308.8485107421875, "loss": 0.6894, "rewards/accuracies": 0.559374988079071, "rewards/chosen": 0.02236488275229931, "rewards/margins": 0.008465753868222237, "rewards/rejected": 0.013899129815399647, "step": 2750 }, { "epoch": 2.85, "learning_rate": 2.755453501722158e-08, "logits/chosen": -4.267168998718262, "logits/rejected": -4.142585277557373, "logps/chosen": -400.91070556640625, "logps/rejected": -305.3719177246094, "loss": 0.6891, "rewards/accuracies": 0.546875, "rewards/chosen": 0.022190194576978683, "rewards/margins": 0.009010069072246552, "rewards/rejected": 0.013180124573409557, "step": 2760 }, { "epoch": 2.86, "learning_rate": 2.564102564102564e-08, "logits/chosen": -4.2551798820495605, "logits/rejected": -4.179601669311523, "logps/chosen": -403.3457946777344, "logps/rejected": -337.92742919921875, "loss": 0.6875, "rewards/accuracies": 0.567187488079071, "rewards/chosen": 0.02564335986971855, "rewards/margins": 0.012334323488175869, "rewards/rejected": 0.013309036381542683, "step": 2770 }, { "epoch": 2.87, "learning_rate": 2.3727516264829695e-08, "logits/chosen": -4.273959159851074, "logits/rejected": -4.142548084259033, "logps/chosen": -399.456787109375, "logps/rejected": -308.5451965332031, "loss": 0.6888, "rewards/accuracies": 0.5531250238418579, "rewards/chosen": 0.02145221456885338, "rewards/margins": 0.009570146910846233, "rewards/rejected": 0.01188206858932972, "step": 2780 }, { "epoch": 2.88, "learning_rate": 2.1814006888633754e-08, "logits/chosen": -4.258917808532715, "logits/rejected": -4.150449275970459, "logps/chosen": -403.0406494140625, "logps/rejected": -322.05072021484375, "loss": 0.6899, "rewards/accuracies": 0.565625011920929, "rewards/chosen": 0.024062659591436386, "rewards/margins": 0.0073325140401721, "rewards/rejected": 0.01673014461994171, "step": 2790 }, { "epoch": 2.89, "learning_rate": 1.990049751243781e-08, "logits/chosen": -4.2558393478393555, "logits/rejected": -4.121561527252197, "logps/chosen": -406.07708740234375, "logps/rejected": -318.92083740234375, "loss": 0.6855, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": 0.026952465996146202, "rewards/margins": 0.0162807684391737, "rewards/rejected": 0.010671699419617653, "step": 2800 }, { "epoch": 2.9, "learning_rate": 1.7986988136241865e-08, "logits/chosen": -4.284726142883301, "logits/rejected": -4.145880222320557, "logps/chosen": -407.32513427734375, "logps/rejected": -315.43463134765625, "loss": 0.6879, "rewards/accuracies": 0.5796874761581421, "rewards/chosen": 0.025830427184700966, "rewards/margins": 0.01146793458610773, "rewards/rejected": 0.01436249352991581, "step": 2810 }, { "epoch": 2.91, "learning_rate": 1.6073478760045924e-08, "logits/chosen": -4.281344413757324, "logits/rejected": -4.137038230895996, "logps/chosen": -425.17877197265625, "logps/rejected": -322.626953125, "loss": 0.6874, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": 0.024241294711828232, "rewards/margins": 0.012439909391105175, "rewards/rejected": 0.011801382526755333, "step": 2820 }, { "epoch": 2.92, "learning_rate": 1.4159969383849981e-08, "logits/chosen": -4.255224227905273, "logits/rejected": -4.1160664558410645, "logps/chosen": -417.343017578125, "logps/rejected": -317.6444091796875, "loss": 0.6898, "rewards/accuracies": 0.5703125, "rewards/chosen": 0.020717119798064232, "rewards/margins": 0.00756122637540102, "rewards/rejected": 0.013155892491340637, "step": 2830 }, { "epoch": 2.93, "learning_rate": 1.2246460007654037e-08, "logits/chosen": -4.262511730194092, "logits/rejected": -4.158580780029297, "logps/chosen": -414.7383728027344, "logps/rejected": -330.22784423828125, "loss": 0.6888, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": 0.026073377579450607, "rewards/margins": 0.00955992005765438, "rewards/rejected": 0.016513461247086525, "step": 2840 }, { "epoch": 2.94, "learning_rate": 1.0332950631458094e-08, "logits/chosen": -4.292693138122559, "logits/rejected": -4.152438163757324, "logps/chosen": -409.6170349121094, "logps/rejected": -313.6919860839844, "loss": 0.6871, "rewards/accuracies": 0.551562488079071, "rewards/chosen": 0.027544280514121056, "rewards/margins": 0.013068397529423237, "rewards/rejected": 0.014475886709988117, "step": 2850 }, { "epoch": 2.95, "learning_rate": 8.419441255262151e-09, "logits/chosen": -4.245689392089844, "logits/rejected": -4.140617847442627, "logps/chosen": -390.8550109863281, "logps/rejected": -315.28570556640625, "loss": 0.6892, "rewards/accuracies": 0.535937488079071, "rewards/chosen": 0.022948402911424637, "rewards/margins": 0.008875529281795025, "rewards/rejected": 0.014072870835661888, "step": 2860 }, { "epoch": 2.96, "learning_rate": 6.505931879066207e-09, "logits/chosen": -4.273464202880859, "logits/rejected": -4.1207990646362305, "logps/chosen": -421.85577392578125, "logps/rejected": -319.6473083496094, "loss": 0.6868, "rewards/accuracies": 0.5625, "rewards/chosen": 0.025004684925079346, "rewards/margins": 0.013815673068165779, "rewards/rejected": 0.011189011856913567, "step": 2870 }, { "epoch": 2.97, "learning_rate": 4.592422502870264e-09, "logits/chosen": -4.305132865905762, "logits/rejected": -4.1475958824157715, "logps/chosen": -421.42529296875, "logps/rejected": -318.45184326171875, "loss": 0.6882, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.024396046996116638, "rewards/margins": 0.010859435424208641, "rewards/rejected": 0.013536609709262848, "step": 2880 }, { "epoch": 2.98, "learning_rate": 2.6789131266743202e-09, "logits/chosen": -4.286343097686768, "logits/rejected": -4.121587753295898, "logps/chosen": -385.7117614746094, "logps/rejected": -282.57080078125, "loss": 0.6881, "rewards/accuracies": 0.5625, "rewards/chosen": 0.022473538294434547, "rewards/margins": 0.011112211272120476, "rewards/rejected": 0.011361326090991497, "step": 2890 }, { "epoch": 3.0, "learning_rate": 7.654037504783773e-10, "logits/chosen": -4.263542652130127, "logits/rejected": -4.148170471191406, "logps/chosen": -402.5519104003906, "logps/rejected": -317.2384338378906, "loss": 0.6876, "rewards/accuracies": 0.578125, "rewards/chosen": 0.027037670835852623, "rewards/margins": 0.01205758098512888, "rewards/rejected": 0.014980090782046318, "step": 2900 }, { "epoch": 3.0, "eval_logits/chosen": -4.191605567932129, "eval_logits/rejected": -4.081777572631836, "eval_logps/chosen": -402.4017333984375, "eval_logps/rejected": -315.6105651855469, "eval_loss": 0.6876626014709473, "eval_rewards/accuracies": 0.5644999742507935, "eval_rewards/chosen": 0.025381002575159073, "eval_rewards/margins": 0.011920945718884468, "eval_rewards/rejected": 0.013460054062306881, "eval_runtime": 776.0859, "eval_samples_per_second": 2.577, "eval_steps_per_second": 0.644, "step": 2904 }, { "epoch": 3.0, "step": 2904, "total_flos": 0.0, "train_loss": 0.6907179896044994, "train_runtime": 111372.3355, "train_samples_per_second": 1.669, "train_steps_per_second": 0.026 } ], "logging_steps": 10, "max_steps": 2904, "num_train_epochs": 3, "save_steps": 500, "total_flos": 0.0, "trial_name": null, "trial_params": null }