SebastianSchramm's picture
Model save
cb45edb
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.9993222089532967,
"eval_steps": 100,
"global_step": 2904,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 1.7182130584192438e-09,
"logits/chosen": -4.324154853820801,
"logits/rejected": -4.269870758056641,
"logps/chosen": -367.06219482421875,
"logps/rejected": -317.6511535644531,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.01,
"learning_rate": 1.718213058419244e-08,
"logits/chosen": -4.277963638305664,
"logits/rejected": -4.137287616729736,
"logps/chosen": -423.3011779785156,
"logps/rejected": -322.6611633300781,
"loss": 0.6946,
"rewards/accuracies": 0.3958333432674408,
"rewards/chosen": -0.0024322373792529106,
"rewards/margins": -0.0025027708616107702,
"rewards/rejected": 7.053340232232586e-05,
"step": 10
},
{
"epoch": 0.02,
"learning_rate": 3.436426116838488e-08,
"logits/chosen": -4.263833045959473,
"logits/rejected": -4.1435723304748535,
"logps/chosen": -392.3028259277344,
"logps/rejected": -317.58099365234375,
"loss": 0.6933,
"rewards/accuracies": 0.5,
"rewards/chosen": 0.00013974684407003224,
"rewards/margins": 0.00036675756564363837,
"rewards/rejected": -0.0002270108088850975,
"step": 20
},
{
"epoch": 0.03,
"learning_rate": 5.154639175257731e-08,
"logits/chosen": -4.267035961151123,
"logits/rejected": -4.1559858322143555,
"logps/chosen": -406.5338134765625,
"logps/rejected": -325.1300354003906,
"loss": 0.6926,
"rewards/accuracies": 0.528124988079071,
"rewards/chosen": 0.0006199823692440987,
"rewards/margins": 0.001958064269274473,
"rewards/rejected": -0.0013380816671997309,
"step": 30
},
{
"epoch": 0.04,
"learning_rate": 6.872852233676976e-08,
"logits/chosen": -4.252381324768066,
"logits/rejected": -4.157735824584961,
"logps/chosen": -376.9677429199219,
"logps/rejected": -313.22186279296875,
"loss": 0.6936,
"rewards/accuracies": 0.49687498807907104,
"rewards/chosen": 0.00031176567426882684,
"rewards/margins": -0.00012173606228316203,
"rewards/rejected": 0.0004335021658334881,
"step": 40
},
{
"epoch": 0.05,
"learning_rate": 8.59106529209622e-08,
"logits/chosen": -4.273421287536621,
"logits/rejected": -4.16197395324707,
"logps/chosen": -397.47222900390625,
"logps/rejected": -314.4212341308594,
"loss": 0.693,
"rewards/accuracies": 0.5234375,
"rewards/chosen": -0.002201077062636614,
"rewards/margins": 0.0010374437551945448,
"rewards/rejected": -0.0032385208178311586,
"step": 50
},
{
"epoch": 0.06,
"learning_rate": 1.0309278350515462e-07,
"logits/chosen": -4.286251544952393,
"logits/rejected": -4.157068252563477,
"logps/chosen": -398.3650207519531,
"logps/rejected": -320.15008544921875,
"loss": 0.6947,
"rewards/accuracies": 0.45781248807907104,
"rewards/chosen": -0.004078245721757412,
"rewards/margins": -0.0025454089045524597,
"rewards/rejected": -0.0015328375156968832,
"step": 60
},
{
"epoch": 0.07,
"learning_rate": 1.202749140893471e-07,
"logits/chosen": -4.272886753082275,
"logits/rejected": -4.148139953613281,
"logps/chosen": -401.9974060058594,
"logps/rejected": -301.5581970214844,
"loss": 0.6926,
"rewards/accuracies": 0.5140625238418579,
"rewards/chosen": -0.0007527429843321443,
"rewards/margins": 0.0018155823927372694,
"rewards/rejected": -0.0025683254934847355,
"step": 70
},
{
"epoch": 0.08,
"learning_rate": 1.3745704467353952e-07,
"logits/chosen": -4.266884803771973,
"logits/rejected": -4.140568256378174,
"logps/chosen": -422.1355895996094,
"logps/rejected": -317.8870849609375,
"loss": 0.692,
"rewards/accuracies": 0.5218750238418579,
"rewards/chosen": -0.0010083441156893969,
"rewards/margins": 0.003068871796131134,
"rewards/rejected": -0.004077216610312462,
"step": 80
},
{
"epoch": 0.09,
"learning_rate": 1.5463917525773197e-07,
"logits/chosen": -4.275304317474365,
"logits/rejected": -4.130114555358887,
"logps/chosen": -405.22271728515625,
"logps/rejected": -321.6945495605469,
"loss": 0.6934,
"rewards/accuracies": 0.4906249940395355,
"rewards/chosen": -0.0015558091690763831,
"rewards/margins": 0.00022566183179151267,
"rewards/rejected": -0.001781471073627472,
"step": 90
},
{
"epoch": 0.1,
"learning_rate": 1.718213058419244e-07,
"logits/chosen": -4.3047099113464355,
"logits/rejected": -4.166022300720215,
"logps/chosen": -401.2640075683594,
"logps/rejected": -308.84307861328125,
"loss": 0.694,
"rewards/accuracies": 0.5140625238418579,
"rewards/chosen": -0.0015420484123751521,
"rewards/margins": -0.0010955848265439272,
"rewards/rejected": -0.0004464638768695295,
"step": 100
},
{
"epoch": 0.11,
"learning_rate": 1.8900343642611682e-07,
"logits/chosen": -4.256144046783447,
"logits/rejected": -4.105890274047852,
"logps/chosen": -420.77423095703125,
"logps/rejected": -302.2908935546875,
"loss": 0.6937,
"rewards/accuracies": 0.4859375059604645,
"rewards/chosen": 9.21973041840829e-05,
"rewards/margins": -0.0004623614368028939,
"rewards/rejected": 0.0005545587628148496,
"step": 110
},
{
"epoch": 0.12,
"learning_rate": 2.0618556701030925e-07,
"logits/chosen": -4.286948204040527,
"logits/rejected": -4.167834281921387,
"logps/chosen": -405.80596923828125,
"logps/rejected": -323.16510009765625,
"loss": 0.694,
"rewards/accuracies": 0.4906249940395355,
"rewards/chosen": -0.0016373072285205126,
"rewards/margins": -0.001050219521857798,
"rewards/rejected": -0.0005870877066627145,
"step": 120
},
{
"epoch": 0.13,
"learning_rate": 2.2336769759450173e-07,
"logits/chosen": -4.278336524963379,
"logits/rejected": -4.132115840911865,
"logps/chosen": -401.5750732421875,
"logps/rejected": -298.68670654296875,
"loss": 0.6926,
"rewards/accuracies": 0.49531251192092896,
"rewards/chosen": -0.002432642038911581,
"rewards/margins": 0.0018511947710067034,
"rewards/rejected": -0.004283837042748928,
"step": 130
},
{
"epoch": 0.14,
"learning_rate": 2.405498281786942e-07,
"logits/chosen": -4.276402473449707,
"logits/rejected": -4.149945259094238,
"logps/chosen": -412.5904846191406,
"logps/rejected": -305.0515441894531,
"loss": 0.694,
"rewards/accuracies": 0.49531251192092896,
"rewards/chosen": -0.0023457477800548077,
"rewards/margins": -0.0008829582366161048,
"rewards/rejected": -0.0014627889031544328,
"step": 140
},
{
"epoch": 0.15,
"learning_rate": 2.5773195876288655e-07,
"logits/chosen": -4.2490129470825195,
"logits/rejected": -4.137168884277344,
"logps/chosen": -381.56756591796875,
"logps/rejected": -299.19793701171875,
"loss": 0.6946,
"rewards/accuracies": 0.48124998807907104,
"rewards/chosen": -0.0032607235480099916,
"rewards/margins": -0.0023342289496213198,
"rewards/rejected": -0.0009264945983886719,
"step": 150
},
{
"epoch": 0.17,
"learning_rate": 2.7491408934707903e-07,
"logits/chosen": -4.262160778045654,
"logits/rejected": -4.115349769592285,
"logps/chosen": -399.4332580566406,
"logps/rejected": -311.04608154296875,
"loss": 0.6943,
"rewards/accuracies": 0.4921875,
"rewards/chosen": -0.0022923736833035946,
"rewards/margins": -0.0016422644257545471,
"rewards/rejected": -0.0006501094321720302,
"step": 160
},
{
"epoch": 0.18,
"learning_rate": 2.9209621993127146e-07,
"logits/chosen": -4.254582405090332,
"logits/rejected": -4.094132423400879,
"logps/chosen": -419.3282775878906,
"logps/rejected": -314.56829833984375,
"loss": 0.6926,
"rewards/accuracies": 0.4859375059604645,
"rewards/chosen": -0.0008115085074678063,
"rewards/margins": 0.0018295502522960305,
"rewards/rejected": -0.002641058526933193,
"step": 170
},
{
"epoch": 0.19,
"learning_rate": 3.0927835051546394e-07,
"logits/chosen": -4.262505531311035,
"logits/rejected": -4.1487321853637695,
"logps/chosen": -413.31011962890625,
"logps/rejected": -324.69805908203125,
"loss": 0.6935,
"rewards/accuracies": 0.4921875,
"rewards/chosen": -0.0014527825405821204,
"rewards/margins": -0.00011156280379509553,
"rewards/rejected": -0.0013412194093689322,
"step": 180
},
{
"epoch": 0.2,
"learning_rate": 3.2646048109965636e-07,
"logits/chosen": -4.2973856925964355,
"logits/rejected": -4.131929397583008,
"logps/chosen": -417.13568115234375,
"logps/rejected": -314.27001953125,
"loss": 0.6946,
"rewards/accuracies": 0.484375,
"rewards/chosen": -0.0009450524812564254,
"rewards/margins": -0.0020798335317522287,
"rewards/rejected": 0.0011347811669111252,
"step": 190
},
{
"epoch": 0.21,
"learning_rate": 3.436426116838488e-07,
"logits/chosen": -4.277375221252441,
"logits/rejected": -4.1441168785095215,
"logps/chosen": -381.47296142578125,
"logps/rejected": -301.980224609375,
"loss": 0.6925,
"rewards/accuracies": 0.518750011920929,
"rewards/chosen": 0.002059588208794594,
"rewards/margins": 0.00202515278942883,
"rewards/rejected": 3.443551395321265e-05,
"step": 200
},
{
"epoch": 0.22,
"learning_rate": 3.608247422680412e-07,
"logits/chosen": -4.272757530212402,
"logits/rejected": -4.1654133796691895,
"logps/chosen": -407.0859069824219,
"logps/rejected": -331.93328857421875,
"loss": 0.6924,
"rewards/accuracies": 0.520312488079071,
"rewards/chosen": 0.0008089464390650392,
"rewards/margins": 0.0022251014597713947,
"rewards/rejected": -0.0014161552535369992,
"step": 210
},
{
"epoch": 0.23,
"learning_rate": 3.7800687285223364e-07,
"logits/chosen": -4.265755653381348,
"logits/rejected": -4.139852523803711,
"logps/chosen": -396.70904541015625,
"logps/rejected": -311.2632141113281,
"loss": 0.6945,
"rewards/accuracies": 0.48124998807907104,
"rewards/chosen": -0.0018544609192758799,
"rewards/margins": -0.0020773629657924175,
"rewards/rejected": 0.00022290220658760518,
"step": 220
},
{
"epoch": 0.24,
"learning_rate": 3.9518900343642607e-07,
"logits/chosen": -4.260704040527344,
"logits/rejected": -4.126727104187012,
"logps/chosen": -408.3927917480469,
"logps/rejected": -319.992919921875,
"loss": 0.694,
"rewards/accuracies": 0.4859375059604645,
"rewards/chosen": -0.0022286553867161274,
"rewards/margins": -0.000920031510759145,
"rewards/rejected": -0.001308623468503356,
"step": 230
},
{
"epoch": 0.25,
"learning_rate": 4.123711340206185e-07,
"logits/chosen": -4.259461879730225,
"logits/rejected": -4.144876956939697,
"logps/chosen": -409.2859802246094,
"logps/rejected": -328.0377502441406,
"loss": 0.6936,
"rewards/accuracies": 0.504687488079071,
"rewards/chosen": -0.0005639836890622973,
"rewards/margins": -0.00022939601331017911,
"rewards/rejected": -0.0003345878212712705,
"step": 240
},
{
"epoch": 0.26,
"learning_rate": 4.2955326460481097e-07,
"logits/chosen": -4.266787052154541,
"logits/rejected": -4.1633710861206055,
"logps/chosen": -403.5596923828125,
"logps/rejected": -319.5048522949219,
"loss": 0.6943,
"rewards/accuracies": 0.5,
"rewards/chosen": -0.0008266723598353565,
"rewards/margins": -0.0015376238152384758,
"rewards/rejected": 0.0007109515718184412,
"step": 250
},
{
"epoch": 0.27,
"learning_rate": 4.4673539518900345e-07,
"logits/chosen": -4.2665534019470215,
"logits/rejected": -4.13022518157959,
"logps/chosen": -379.11322021484375,
"logps/rejected": -289.3961486816406,
"loss": 0.6932,
"rewards/accuracies": 0.5,
"rewards/chosen": -0.0011679441668093204,
"rewards/margins": 0.00039926558383740485,
"rewards/rejected": -0.0015672097215428948,
"step": 260
},
{
"epoch": 0.28,
"learning_rate": 4.639175257731959e-07,
"logits/chosen": -4.271460056304932,
"logits/rejected": -4.139186859130859,
"logps/chosen": -424.3821716308594,
"logps/rejected": -318.91943359375,
"loss": 0.693,
"rewards/accuracies": 0.48750001192092896,
"rewards/chosen": -0.00013967350241728127,
"rewards/margins": 0.0009566223016008735,
"rewards/rejected": -0.0010962963569909334,
"step": 270
},
{
"epoch": 0.29,
"learning_rate": 4.810996563573884e-07,
"logits/chosen": -4.285706520080566,
"logits/rejected": -4.135653018951416,
"logps/chosen": -408.2174377441406,
"logps/rejected": -306.91937255859375,
"loss": 0.6928,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": 2.2016651200829074e-05,
"rewards/margins": 0.0013473120052367449,
"rewards/rejected": -0.001325295539572835,
"step": 280
},
{
"epoch": 0.3,
"learning_rate": 4.982817869415807e-07,
"logits/chosen": -4.282963752746582,
"logits/rejected": -4.156318664550781,
"logps/chosen": -383.9125671386719,
"logps/rejected": -296.3202819824219,
"loss": 0.694,
"rewards/accuracies": 0.4828124940395355,
"rewards/chosen": -0.0008248983067460358,
"rewards/margins": -0.0010658926330506802,
"rewards/rejected": 0.00024099461734294891,
"step": 290
},
{
"epoch": 0.31,
"learning_rate": 4.982778415614236e-07,
"logits/chosen": -4.276602268218994,
"logits/rejected": -4.160402774810791,
"logps/chosen": -395.6986083984375,
"logps/rejected": -308.46368408203125,
"loss": 0.6934,
"rewards/accuracies": 0.5062500238418579,
"rewards/chosen": -0.00020179541024845093,
"rewards/margins": 0.00023063849948812276,
"rewards/rejected": -0.00043243388063274324,
"step": 300
},
{
"epoch": 0.32,
"learning_rate": 4.963643321852277e-07,
"logits/chosen": -4.262394905090332,
"logits/rejected": -4.137896537780762,
"logps/chosen": -405.48583984375,
"logps/rejected": -316.9510498046875,
"loss": 0.6933,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": -0.0025507945101708174,
"rewards/margins": 0.0003848490596283227,
"rewards/rejected": -0.0029356435406953096,
"step": 310
},
{
"epoch": 0.33,
"learning_rate": 4.944508228090318e-07,
"logits/chosen": -4.2743682861328125,
"logits/rejected": -4.132224082946777,
"logps/chosen": -406.7742614746094,
"logps/rejected": -300.9280700683594,
"loss": 0.6936,
"rewards/accuracies": 0.5171874761581421,
"rewards/chosen": -0.0014929536264389753,
"rewards/margins": -0.00023388855333905667,
"rewards/rejected": -0.0012590645346790552,
"step": 320
},
{
"epoch": 0.34,
"learning_rate": 4.925373134328357e-07,
"logits/chosen": -4.293769836425781,
"logits/rejected": -4.170851230621338,
"logps/chosen": -406.20074462890625,
"logps/rejected": -320.6856384277344,
"loss": 0.6947,
"rewards/accuracies": 0.47968751192092896,
"rewards/chosen": -3.2803043268359033e-06,
"rewards/margins": -0.0023293071426451206,
"rewards/rejected": 0.0023260267917066813,
"step": 330
},
{
"epoch": 0.35,
"learning_rate": 4.906238040566398e-07,
"logits/chosen": -4.264895439147949,
"logits/rejected": -4.144906044006348,
"logps/chosen": -382.7566223144531,
"logps/rejected": -307.6598205566406,
"loss": 0.6922,
"rewards/accuracies": 0.5,
"rewards/chosen": -0.001648748992010951,
"rewards/margins": 0.002527676522731781,
"rewards/rejected": -0.004176425281912088,
"step": 340
},
{
"epoch": 0.36,
"learning_rate": 4.887102946804438e-07,
"logits/chosen": -4.280018329620361,
"logits/rejected": -4.165085792541504,
"logps/chosen": -396.96026611328125,
"logps/rejected": -309.4230651855469,
"loss": 0.6923,
"rewards/accuracies": 0.510937511920929,
"rewards/chosen": 0.0016236413503065705,
"rewards/margins": 0.0022962945513427258,
"rewards/rejected": -0.000672653317451477,
"step": 350
},
{
"epoch": 0.37,
"learning_rate": 4.867967853042479e-07,
"logits/chosen": -4.289803504943848,
"logits/rejected": -4.140151500701904,
"logps/chosen": -405.28973388671875,
"logps/rejected": -312.529541015625,
"loss": 0.6939,
"rewards/accuracies": 0.4937500059604645,
"rewards/chosen": -0.0023417104966938496,
"rewards/margins": -0.0007966022822074592,
"rewards/rejected": -0.0015451073413714767,
"step": 360
},
{
"epoch": 0.38,
"learning_rate": 4.84883275928052e-07,
"logits/chosen": -4.268817901611328,
"logits/rejected": -4.147084712982178,
"logps/chosen": -399.0814514160156,
"logps/rejected": -325.91363525390625,
"loss": 0.6924,
"rewards/accuracies": 0.520312488079071,
"rewards/chosen": 3.723411282408051e-05,
"rewards/margins": 0.0022587967105209827,
"rewards/rejected": -0.0022215619683265686,
"step": 370
},
{
"epoch": 0.39,
"learning_rate": 4.82969766551856e-07,
"logits/chosen": -4.2609758377075195,
"logits/rejected": -4.148962020874023,
"logps/chosen": -393.0679016113281,
"logps/rejected": -317.9642028808594,
"loss": 0.6927,
"rewards/accuracies": 0.4765625,
"rewards/chosen": 0.0006004157476127148,
"rewards/margins": 0.0016590984305366874,
"rewards/rejected": -0.001058683032169938,
"step": 380
},
{
"epoch": 0.4,
"learning_rate": 4.810562571756601e-07,
"logits/chosen": -4.2723493576049805,
"logits/rejected": -4.122799873352051,
"logps/chosen": -397.3229675292969,
"logps/rejected": -304.338623046875,
"loss": 0.6935,
"rewards/accuracies": 0.5328124761581421,
"rewards/chosen": 0.0007614147616550326,
"rewards/margins": 7.879303666413762e-06,
"rewards/rejected": 0.0007535360055044293,
"step": 390
},
{
"epoch": 0.41,
"learning_rate": 4.791427477994642e-07,
"logits/chosen": -4.290783405303955,
"logits/rejected": -4.144261360168457,
"logps/chosen": -412.53375244140625,
"logps/rejected": -313.51739501953125,
"loss": 0.6948,
"rewards/accuracies": 0.45625001192092896,
"rewards/chosen": -0.0025499488692730665,
"rewards/margins": -0.002634689910337329,
"rewards/rejected": 8.474113565171137e-05,
"step": 400
},
{
"epoch": 0.42,
"learning_rate": 4.772292384232682e-07,
"logits/chosen": -4.280355930328369,
"logits/rejected": -4.188906669616699,
"logps/chosen": -399.37750244140625,
"logps/rejected": -324.8134765625,
"loss": 0.692,
"rewards/accuracies": 0.5234375,
"rewards/chosen": 0.0012526216451078653,
"rewards/margins": 0.0029473325703293085,
"rewards/rejected": -0.0016947109252214432,
"step": 410
},
{
"epoch": 0.43,
"learning_rate": 4.753157290470723e-07,
"logits/chosen": -4.2888898849487305,
"logits/rejected": -4.1478071212768555,
"logps/chosen": -418.41351318359375,
"logps/rejected": -319.01507568359375,
"loss": 0.6938,
"rewards/accuracies": 0.4984374940395355,
"rewards/chosen": 0.0015372170601040125,
"rewards/margins": -0.0005680068279616535,
"rewards/rejected": 0.0021052241791039705,
"step": 420
},
{
"epoch": 0.44,
"learning_rate": 4.7340221967087635e-07,
"logits/chosen": -4.254868984222412,
"logits/rejected": -4.136019706726074,
"logps/chosen": -384.3445739746094,
"logps/rejected": -295.01312255859375,
"loss": 0.6948,
"rewards/accuracies": 0.4828124940395355,
"rewards/chosen": -0.0011382882948964834,
"rewards/margins": -0.0026994033250957727,
"rewards/rejected": 0.0015611147973686457,
"step": 430
},
{
"epoch": 0.45,
"learning_rate": 4.714887102946804e-07,
"logits/chosen": -4.2907233238220215,
"logits/rejected": -4.132693290710449,
"logps/chosen": -414.918212890625,
"logps/rejected": -315.94573974609375,
"loss": 0.6936,
"rewards/accuracies": 0.4937500059604645,
"rewards/chosen": 0.0015450514620169997,
"rewards/margins": -0.0002846633142326027,
"rewards/rejected": 0.001829715445637703,
"step": 440
},
{
"epoch": 0.46,
"learning_rate": 4.6957520091848447e-07,
"logits/chosen": -4.255721092224121,
"logits/rejected": -4.1633100509643555,
"logps/chosen": -400.84967041015625,
"logps/rejected": -327.2038269042969,
"loss": 0.6932,
"rewards/accuracies": 0.515625,
"rewards/chosen": -0.0014290885301306844,
"rewards/margins": 0.0006102249026298523,
"rewards/rejected": -0.0020393135491758585,
"step": 450
},
{
"epoch": 0.48,
"learning_rate": 4.6766169154228853e-07,
"logits/chosen": -4.247704029083252,
"logits/rejected": -4.119411468505859,
"logps/chosen": -394.64178466796875,
"logps/rejected": -308.6086120605469,
"loss": 0.6924,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": 0.0018646775279194117,
"rewards/margins": 0.0020817045588046312,
"rewards/rejected": -0.00021702758385799825,
"step": 460
},
{
"epoch": 0.49,
"learning_rate": 4.657481821660926e-07,
"logits/chosen": -4.270275592803955,
"logits/rejected": -4.133027076721191,
"logps/chosen": -419.8226623535156,
"logps/rejected": -323.5798645019531,
"loss": 0.6949,
"rewards/accuracies": 0.49687498807907104,
"rewards/chosen": -0.0003835520183201879,
"rewards/margins": -0.00283462880179286,
"rewards/rejected": 0.0024510768707841635,
"step": 470
},
{
"epoch": 0.5,
"learning_rate": 4.6383467278989666e-07,
"logits/chosen": -4.274611473083496,
"logits/rejected": -4.136614799499512,
"logps/chosen": -400.21673583984375,
"logps/rejected": -314.0077209472656,
"loss": 0.6929,
"rewards/accuracies": 0.4828124940395355,
"rewards/chosen": -0.00010184728307649493,
"rewards/margins": 0.0012176515301689506,
"rewards/rejected": -0.0013194989878684282,
"step": 480
},
{
"epoch": 0.51,
"learning_rate": 4.6192116341370067e-07,
"logits/chosen": -4.25107479095459,
"logits/rejected": -4.151733875274658,
"logps/chosen": -397.98016357421875,
"logps/rejected": -308.0941467285156,
"loss": 0.6939,
"rewards/accuracies": 0.4828124940395355,
"rewards/chosen": -0.0004709061176981777,
"rewards/margins": -0.0007273858063854277,
"rewards/rejected": 0.00025647960137575865,
"step": 490
},
{
"epoch": 0.52,
"learning_rate": 4.6000765403750473e-07,
"logits/chosen": -4.2503204345703125,
"logits/rejected": -4.1171112060546875,
"logps/chosen": -412.30450439453125,
"logps/rejected": -309.49481201171875,
"loss": 0.692,
"rewards/accuracies": 0.5484374761581421,
"rewards/chosen": 0.0027156358119100332,
"rewards/margins": 0.003060466842725873,
"rewards/rejected": -0.0003448307979851961,
"step": 500
},
{
"epoch": 0.53,
"learning_rate": 4.580941446613088e-07,
"logits/chosen": -4.279356002807617,
"logits/rejected": -4.125931262969971,
"logps/chosen": -409.6204528808594,
"logps/rejected": -298.81329345703125,
"loss": 0.6915,
"rewards/accuracies": 0.5093749761581421,
"rewards/chosen": 0.0007973018218763173,
"rewards/margins": 0.003946124110370874,
"rewards/rejected": -0.0031488225795328617,
"step": 510
},
{
"epoch": 0.54,
"learning_rate": 4.5618063528511285e-07,
"logits/chosen": -4.253719329833984,
"logits/rejected": -4.118457794189453,
"logps/chosen": -408.22882080078125,
"logps/rejected": -319.6842346191406,
"loss": 0.6914,
"rewards/accuracies": 0.535937488079071,
"rewards/chosen": 0.00437967898324132,
"rewards/margins": 0.004070502705872059,
"rewards/rejected": 0.00030917683034203947,
"step": 520
},
{
"epoch": 0.55,
"learning_rate": 4.542671259089169e-07,
"logits/chosen": -4.232258319854736,
"logits/rejected": -4.110759258270264,
"logps/chosen": -414.5962829589844,
"logps/rejected": -323.87078857421875,
"loss": 0.6919,
"rewards/accuracies": 0.5546875,
"rewards/chosen": 0.002581060165539384,
"rewards/margins": 0.0032051261514425278,
"rewards/rejected": -0.0006240661023184657,
"step": 530
},
{
"epoch": 0.56,
"learning_rate": 4.52353616532721e-07,
"logits/chosen": -4.25144624710083,
"logits/rejected": -4.156504154205322,
"logps/chosen": -395.7941589355469,
"logps/rejected": -313.2027893066406,
"loss": 0.6946,
"rewards/accuracies": 0.4859375059604645,
"rewards/chosen": 0.0006000929279252887,
"rewards/margins": -0.0023031379096210003,
"rewards/rejected": 0.0029032311867922544,
"step": 540
},
{
"epoch": 0.57,
"learning_rate": 4.5044010715652504e-07,
"logits/chosen": -4.2781877517700195,
"logits/rejected": -4.152641296386719,
"logps/chosen": -400.3907470703125,
"logps/rejected": -323.36663818359375,
"loss": 0.6939,
"rewards/accuracies": 0.47968751192092896,
"rewards/chosen": 0.0021930981893092394,
"rewards/margins": -0.0007275763782672584,
"rewards/rejected": 0.0029206746257841587,
"step": 550
},
{
"epoch": 0.58,
"learning_rate": 4.485265977803291e-07,
"logits/chosen": -4.2500386238098145,
"logits/rejected": -4.107308387756348,
"logps/chosen": -411.8309020996094,
"logps/rejected": -319.95989990234375,
"loss": 0.6914,
"rewards/accuracies": 0.5218750238418579,
"rewards/chosen": 0.0027205091901123524,
"rewards/margins": 0.004122564569115639,
"rewards/rejected": -0.0014020560774952173,
"step": 560
},
{
"epoch": 0.59,
"learning_rate": 4.4661308840413316e-07,
"logits/chosen": -4.307834625244141,
"logits/rejected": -4.123293399810791,
"logps/chosen": -412.34625244140625,
"logps/rejected": -310.9077453613281,
"loss": 0.6933,
"rewards/accuracies": 0.512499988079071,
"rewards/chosen": -0.00016926185344345868,
"rewards/margins": 0.0005053894128650427,
"rewards/rejected": -0.0006746514118276536,
"step": 570
},
{
"epoch": 0.6,
"learning_rate": 4.446995790279372e-07,
"logits/chosen": -4.272593021392822,
"logits/rejected": -4.129204750061035,
"logps/chosen": -411.84161376953125,
"logps/rejected": -321.55072021484375,
"loss": 0.6922,
"rewards/accuracies": 0.5078125,
"rewards/chosen": 0.003927945625036955,
"rewards/margins": 0.0026295329444110394,
"rewards/rejected": 0.0012984138447791338,
"step": 580
},
{
"epoch": 0.61,
"learning_rate": 4.4278606965174123e-07,
"logits/chosen": -4.280831813812256,
"logits/rejected": -4.155787467956543,
"logps/chosen": -408.56256103515625,
"logps/rejected": -312.60577392578125,
"loss": 0.6936,
"rewards/accuracies": 0.4921875,
"rewards/chosen": -9.327723819296807e-05,
"rewards/margins": -0.00040280382381752133,
"rewards/rejected": 0.0003095265128649771,
"step": 590
},
{
"epoch": 0.62,
"learning_rate": 4.408725602755453e-07,
"logits/chosen": -4.285837650299072,
"logits/rejected": -4.156912803649902,
"logps/chosen": -404.223876953125,
"logps/rejected": -322.22894287109375,
"loss": 0.6945,
"rewards/accuracies": 0.4906249940395355,
"rewards/chosen": 0.00110340875107795,
"rewards/margins": -0.0020270957611501217,
"rewards/rejected": 0.003130504861474037,
"step": 600
},
{
"epoch": 0.63,
"learning_rate": 4.3895905089934936e-07,
"logits/chosen": -4.270712852478027,
"logits/rejected": -4.1485466957092285,
"logps/chosen": -427.80010986328125,
"logps/rejected": -334.1997375488281,
"loss": 0.6933,
"rewards/accuracies": 0.5093749761581421,
"rewards/chosen": 0.0012709179427474737,
"rewards/margins": 0.00037813876406289637,
"rewards/rejected": 0.0008927792077884078,
"step": 610
},
{
"epoch": 0.64,
"learning_rate": 4.370455415231534e-07,
"logits/chosen": -4.2818217277526855,
"logits/rejected": -4.141083240509033,
"logps/chosen": -397.087646484375,
"logps/rejected": -299.8023986816406,
"loss": 0.6917,
"rewards/accuracies": 0.520312488079071,
"rewards/chosen": 0.0015098925214260817,
"rewards/margins": 0.003647155361250043,
"rewards/rejected": -0.0021372628398239613,
"step": 620
},
{
"epoch": 0.65,
"learning_rate": 4.351320321469575e-07,
"logits/chosen": -4.2828288078308105,
"logits/rejected": -4.15810489654541,
"logps/chosen": -408.72857666015625,
"logps/rejected": -310.77764892578125,
"loss": 0.6944,
"rewards/accuracies": 0.4765625,
"rewards/chosen": 0.00010033079888671637,
"rewards/margins": -0.0017872953321784735,
"rewards/rejected": 0.0018876262474805117,
"step": 630
},
{
"epoch": 0.66,
"learning_rate": 4.3321852277076154e-07,
"logits/chosen": -4.249554634094238,
"logits/rejected": -4.136783599853516,
"logps/chosen": -414.71624755859375,
"logps/rejected": -330.455810546875,
"loss": 0.6912,
"rewards/accuracies": 0.578125,
"rewards/chosen": 0.006457502953708172,
"rewards/margins": 0.0046369172632694244,
"rewards/rejected": 0.0018205851083621383,
"step": 640
},
{
"epoch": 0.67,
"learning_rate": 4.313050133945656e-07,
"logits/chosen": -4.281416893005371,
"logits/rejected": -4.156689643859863,
"logps/chosen": -419.05401611328125,
"logps/rejected": -319.1665954589844,
"loss": 0.6921,
"rewards/accuracies": 0.528124988079071,
"rewards/chosen": 0.0033603243064135313,
"rewards/margins": 0.0027967148926109076,
"rewards/rejected": 0.0005636097048409283,
"step": 650
},
{
"epoch": 0.68,
"learning_rate": 4.2939150401836967e-07,
"logits/chosen": -4.239119529724121,
"logits/rejected": -4.116203308105469,
"logps/chosen": -421.9933166503906,
"logps/rejected": -327.5665588378906,
"loss": 0.6935,
"rewards/accuracies": 0.4906249940395355,
"rewards/chosen": 0.0006358650280162692,
"rewards/margins": -3.5358032619114965e-05,
"rewards/rejected": 0.0006712229805998504,
"step": 660
},
{
"epoch": 0.69,
"learning_rate": 4.2747799464217373e-07,
"logits/chosen": -4.252030849456787,
"logits/rejected": -4.138897895812988,
"logps/chosen": -402.931884765625,
"logps/rejected": -311.1913757324219,
"loss": 0.6926,
"rewards/accuracies": 0.4906249940395355,
"rewards/chosen": 0.003976965788751841,
"rewards/margins": 0.0018042316660284996,
"rewards/rejected": 0.002172734122723341,
"step": 670
},
{
"epoch": 0.7,
"learning_rate": 4.255644852659778e-07,
"logits/chosen": -4.269718170166016,
"logits/rejected": -4.140122413635254,
"logps/chosen": -402.2257385253906,
"logps/rejected": -318.7126770019531,
"loss": 0.6928,
"rewards/accuracies": 0.515625,
"rewards/chosen": 0.00315951113589108,
"rewards/margins": 0.0014611692167818546,
"rewards/rejected": 0.0016983415698632598,
"step": 680
},
{
"epoch": 0.71,
"learning_rate": 4.236509758897818e-07,
"logits/chosen": -4.262238025665283,
"logits/rejected": -4.1432600021362305,
"logps/chosen": -405.7474670410156,
"logps/rejected": -317.77447509765625,
"loss": 0.6911,
"rewards/accuracies": 0.510937511920929,
"rewards/chosen": 0.005235121585428715,
"rewards/margins": 0.004777342546731234,
"rewards/rejected": 0.00045777950435876846,
"step": 690
},
{
"epoch": 0.72,
"learning_rate": 4.2173746651358586e-07,
"logits/chosen": -4.308034420013428,
"logits/rejected": -4.176297187805176,
"logps/chosen": -417.35626220703125,
"logps/rejected": -313.90887451171875,
"loss": 0.6918,
"rewards/accuracies": 0.520312488079071,
"rewards/chosen": 0.0032903787214308977,
"rewards/margins": 0.003389782505109906,
"rewards/rejected": -9.9403434433043e-05,
"step": 700
},
{
"epoch": 0.73,
"learning_rate": 4.198239571373899e-07,
"logits/chosen": -4.285686492919922,
"logits/rejected": -4.153790473937988,
"logps/chosen": -423.37127685546875,
"logps/rejected": -331.2875671386719,
"loss": 0.6909,
"rewards/accuracies": 0.542187511920929,
"rewards/chosen": 0.005327778868377209,
"rewards/margins": 0.005328441970050335,
"rewards/rejected": -6.637536102971353e-07,
"step": 710
},
{
"epoch": 0.74,
"learning_rate": 4.17910447761194e-07,
"logits/chosen": -4.279539108276367,
"logits/rejected": -4.177431106567383,
"logps/chosen": -379.5310363769531,
"logps/rejected": -300.36602783203125,
"loss": 0.6924,
"rewards/accuracies": 0.510937511920929,
"rewards/chosen": 0.0025111553259193897,
"rewards/margins": 0.0021530953235924244,
"rewards/rejected": 0.00035806017694994807,
"step": 720
},
{
"epoch": 0.75,
"learning_rate": 4.1599693838499805e-07,
"logits/chosen": -4.239541530609131,
"logits/rejected": -4.1288628578186035,
"logps/chosen": -375.0932312011719,
"logps/rejected": -298.7073974609375,
"loss": 0.6934,
"rewards/accuracies": 0.48750001192092896,
"rewards/chosen": 0.0017489530146121979,
"rewards/margins": 0.0001466287358198315,
"rewards/rejected": 0.0016023240750655532,
"step": 730
},
{
"epoch": 0.76,
"learning_rate": 4.140834290088021e-07,
"logits/chosen": -4.258530616760254,
"logits/rejected": -4.131335258483887,
"logps/chosen": -378.4256591796875,
"logps/rejected": -288.19842529296875,
"loss": 0.6939,
"rewards/accuracies": 0.49531251192092896,
"rewards/chosen": 0.0009945884812623262,
"rewards/margins": -0.0008082139538601041,
"rewards/rejected": 0.0018028020858764648,
"step": 740
},
{
"epoch": 0.77,
"learning_rate": 4.121699196326062e-07,
"logits/chosen": -4.268132209777832,
"logits/rejected": -4.133342742919922,
"logps/chosen": -402.5184631347656,
"logps/rejected": -310.98651123046875,
"loss": 0.6942,
"rewards/accuracies": 0.46875,
"rewards/chosen": 0.0009071314707398415,
"rewards/margins": -0.001490770373493433,
"rewards/rejected": 0.0023979023098945618,
"step": 750
},
{
"epoch": 0.78,
"learning_rate": 4.1025641025641024e-07,
"logits/chosen": -4.277812480926514,
"logits/rejected": -4.182877063751221,
"logps/chosen": -408.18048095703125,
"logps/rejected": -319.3320617675781,
"loss": 0.6925,
"rewards/accuracies": 0.515625,
"rewards/chosen": 0.004439138807356358,
"rewards/margins": 0.0019760008435696363,
"rewards/rejected": 0.0024631377309560776,
"step": 760
},
{
"epoch": 0.8,
"learning_rate": 4.083429008802143e-07,
"logits/chosen": -4.280663967132568,
"logits/rejected": -4.152825355529785,
"logps/chosen": -401.7829895019531,
"logps/rejected": -306.715576171875,
"loss": 0.6928,
"rewards/accuracies": 0.5,
"rewards/chosen": 0.004907527472823858,
"rewards/margins": 0.00137388426810503,
"rewards/rejected": 0.003533643204718828,
"step": 770
},
{
"epoch": 0.81,
"learning_rate": 4.0642939150401836e-07,
"logits/chosen": -4.289612770080566,
"logits/rejected": -4.144918918609619,
"logps/chosen": -399.4092712402344,
"logps/rejected": -304.46209716796875,
"loss": 0.6928,
"rewards/accuracies": 0.520312488079071,
"rewards/chosen": 0.002732563531026244,
"rewards/margins": 0.0014565556775778532,
"rewards/rejected": 0.0012760077370330691,
"step": 780
},
{
"epoch": 0.82,
"learning_rate": 4.0451588212782237e-07,
"logits/chosen": -4.240975856781006,
"logits/rejected": -4.128232479095459,
"logps/chosen": -413.4056701660156,
"logps/rejected": -328.46240234375,
"loss": 0.6923,
"rewards/accuracies": 0.526562511920929,
"rewards/chosen": 0.003439761698246002,
"rewards/margins": 0.0025056053418666124,
"rewards/rejected": 0.0009341565892100334,
"step": 790
},
{
"epoch": 0.83,
"learning_rate": 4.0260237275162643e-07,
"logits/chosen": -4.260972023010254,
"logits/rejected": -4.160162448883057,
"logps/chosen": -404.38824462890625,
"logps/rejected": -330.4800109863281,
"loss": 0.6938,
"rewards/accuracies": 0.5015624761581421,
"rewards/chosen": 0.0018609057879075408,
"rewards/margins": -0.000567351933568716,
"rewards/rejected": 0.0024282578378915787,
"step": 800
},
{
"epoch": 0.84,
"learning_rate": 4.006888633754305e-07,
"logits/chosen": -4.292420387268066,
"logits/rejected": -4.155418395996094,
"logps/chosen": -402.5275573730469,
"logps/rejected": -311.781982421875,
"loss": 0.6924,
"rewards/accuracies": 0.520312488079071,
"rewards/chosen": 0.0035346276126801968,
"rewards/margins": 0.002268751384690404,
"rewards/rejected": 0.0012658759951591492,
"step": 810
},
{
"epoch": 0.85,
"learning_rate": 3.9877535399923456e-07,
"logits/chosen": -4.276908874511719,
"logits/rejected": -4.151911735534668,
"logps/chosen": -408.8199768066406,
"logps/rejected": -321.52215576171875,
"loss": 0.693,
"rewards/accuracies": 0.520312488079071,
"rewards/chosen": 0.00492482166737318,
"rewards/margins": 0.0010390502866357565,
"rewards/rejected": 0.003885771380737424,
"step": 820
},
{
"epoch": 0.86,
"learning_rate": 3.968618446230386e-07,
"logits/chosen": -4.282100677490234,
"logits/rejected": -4.158295631408691,
"logps/chosen": -399.35430908203125,
"logps/rejected": -316.38165283203125,
"loss": 0.6928,
"rewards/accuracies": 0.510937511920929,
"rewards/chosen": 0.0036009408067911863,
"rewards/margins": 0.0014611782971769571,
"rewards/rejected": 0.002139762043952942,
"step": 830
},
{
"epoch": 0.87,
"learning_rate": 3.949483352468427e-07,
"logits/chosen": -4.265560150146484,
"logits/rejected": -4.16861629486084,
"logps/chosen": -384.11151123046875,
"logps/rejected": -317.0963134765625,
"loss": 0.6941,
"rewards/accuracies": 0.49531251192092896,
"rewards/chosen": 0.0022504080552607775,
"rewards/margins": -0.001352280960418284,
"rewards/rejected": 0.0036026891320943832,
"step": 840
},
{
"epoch": 0.88,
"learning_rate": 3.9303482587064674e-07,
"logits/chosen": -4.278510093688965,
"logits/rejected": -4.114365577697754,
"logps/chosen": -409.4468688964844,
"logps/rejected": -319.8404541015625,
"loss": 0.6941,
"rewards/accuracies": 0.4937500059604645,
"rewards/chosen": 0.0014671286335214972,
"rewards/margins": -0.001252708025276661,
"rewards/rejected": 0.0027198365423828363,
"step": 850
},
{
"epoch": 0.89,
"learning_rate": 3.911213164944508e-07,
"logits/chosen": -4.275809288024902,
"logits/rejected": -4.195669651031494,
"logps/chosen": -395.3207702636719,
"logps/rejected": -331.13372802734375,
"loss": 0.6927,
"rewards/accuracies": 0.510937511920929,
"rewards/chosen": 0.0037080198526382446,
"rewards/margins": 0.0016179044032469392,
"rewards/rejected": 0.002090116497129202,
"step": 860
},
{
"epoch": 0.9,
"learning_rate": 3.8920780711825487e-07,
"logits/chosen": -4.255551815032959,
"logits/rejected": -4.139596462249756,
"logps/chosen": -402.43768310546875,
"logps/rejected": -321.7333984375,
"loss": 0.692,
"rewards/accuracies": 0.53125,
"rewards/chosen": 0.0047720326110720634,
"rewards/margins": 0.003143607871606946,
"rewards/rejected": 0.0016284246230497956,
"step": 870
},
{
"epoch": 0.91,
"learning_rate": 3.8729429774205893e-07,
"logits/chosen": -4.2646284103393555,
"logits/rejected": -4.149605751037598,
"logps/chosen": -387.3090515136719,
"logps/rejected": -301.6280212402344,
"loss": 0.6916,
"rewards/accuracies": 0.5062500238418579,
"rewards/chosen": 0.004609361290931702,
"rewards/margins": 0.0038493976462632418,
"rewards/rejected": 0.0007599632954224944,
"step": 880
},
{
"epoch": 0.92,
"learning_rate": 3.8538078836586294e-07,
"logits/chosen": -4.2500715255737305,
"logits/rejected": -4.115664958953857,
"logps/chosen": -425.0071716308594,
"logps/rejected": -319.0146484375,
"loss": 0.6913,
"rewards/accuracies": 0.5140625238418579,
"rewards/chosen": 0.004975964780896902,
"rewards/margins": 0.0043433718383312225,
"rewards/rejected": 0.0006325935246422887,
"step": 890
},
{
"epoch": 0.93,
"learning_rate": 3.83467278989667e-07,
"logits/chosen": -4.254255294799805,
"logits/rejected": -4.149744510650635,
"logps/chosen": -409.4825134277344,
"logps/rejected": -328.2546691894531,
"loss": 0.693,
"rewards/accuracies": 0.518750011920929,
"rewards/chosen": 0.0023609010968357325,
"rewards/margins": 0.0011571452487260103,
"rewards/rejected": 0.0012037558481097221,
"step": 900
},
{
"epoch": 0.94,
"learning_rate": 3.8155376961347106e-07,
"logits/chosen": -4.244211673736572,
"logits/rejected": -4.126115322113037,
"logps/chosen": -405.63763427734375,
"logps/rejected": -319.128662109375,
"loss": 0.6936,
"rewards/accuracies": 0.4984374940395355,
"rewards/chosen": 0.002668022643774748,
"rewards/margins": -0.00015769092715345323,
"rewards/rejected": 0.0028257134836167097,
"step": 910
},
{
"epoch": 0.95,
"learning_rate": 3.796402602372751e-07,
"logits/chosen": -4.292696952819824,
"logits/rejected": -4.156879425048828,
"logps/chosen": -415.4306640625,
"logps/rejected": -324.4388732910156,
"loss": 0.6919,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": 0.0032460917718708515,
"rewards/margins": 0.0033036619424819946,
"rewards/rejected": -5.7570869103074074e-05,
"step": 920
},
{
"epoch": 0.96,
"learning_rate": 3.777267508610792e-07,
"logits/chosen": -4.28294038772583,
"logits/rejected": -4.117633819580078,
"logps/chosen": -415.66290283203125,
"logps/rejected": -305.69390869140625,
"loss": 0.6919,
"rewards/accuracies": 0.5296875238418579,
"rewards/chosen": 0.004189764615148306,
"rewards/margins": 0.0031828763894736767,
"rewards/rejected": 0.0010068879928439856,
"step": 930
},
{
"epoch": 0.97,
"learning_rate": 3.7581324148488325e-07,
"logits/chosen": -4.260636806488037,
"logits/rejected": -4.170042037963867,
"logps/chosen": -407.6413879394531,
"logps/rejected": -327.45574951171875,
"loss": 0.6895,
"rewards/accuracies": 0.5765625238418579,
"rewards/chosen": 0.00892933551222086,
"rewards/margins": 0.008162255398929119,
"rewards/rejected": 0.0007670802297070622,
"step": 940
},
{
"epoch": 0.98,
"learning_rate": 3.738997321086873e-07,
"logits/chosen": -4.2640604972839355,
"logits/rejected": -4.171642303466797,
"logps/chosen": -400.89373779296875,
"logps/rejected": -322.080078125,
"loss": 0.6924,
"rewards/accuracies": 0.503125011920929,
"rewards/chosen": 0.00451917527243495,
"rewards/margins": 0.0021246224641799927,
"rewards/rejected": 0.002394552808254957,
"step": 950
},
{
"epoch": 0.99,
"learning_rate": 3.7198622273249137e-07,
"logits/chosen": -4.275049209594727,
"logits/rejected": -4.145798683166504,
"logps/chosen": -429.56634521484375,
"logps/rejected": -331.715576171875,
"loss": 0.6916,
"rewards/accuracies": 0.526562511920929,
"rewards/chosen": 0.0037804250605404377,
"rewards/margins": 0.003932067193090916,
"rewards/rejected": -0.00015164251090027392,
"step": 960
},
{
"epoch": 1.0,
"eval_logits/chosen": -4.191330432891846,
"eval_logits/rejected": -4.081260681152344,
"eval_logps/chosen": -402.61639404296875,
"eval_logps/rejected": -315.7343444824219,
"eval_loss": 0.6920775771141052,
"eval_rewards/accuracies": 0.5070000290870667,
"eval_rewards/chosen": 0.003913247026503086,
"eval_rewards/margins": 0.002829314675182104,
"eval_rewards/rejected": 0.0010839327005669475,
"eval_runtime": 762.8033,
"eval_samples_per_second": 2.622,
"eval_steps_per_second": 0.655,
"step": 968
},
{
"epoch": 1.0,
"learning_rate": 3.7007271335629544e-07,
"logits/chosen": -4.291719436645508,
"logits/rejected": -4.159844875335693,
"logps/chosen": -410.930419921875,
"logps/rejected": -320.3424377441406,
"loss": 0.6935,
"rewards/accuracies": 0.484375,
"rewards/chosen": 0.004596616607159376,
"rewards/margins": 3.339797694934532e-05,
"rewards/rejected": 0.004563218913972378,
"step": 970
},
{
"epoch": 1.01,
"learning_rate": 3.681592039800995e-07,
"logits/chosen": -4.274192810058594,
"logits/rejected": -4.13837194442749,
"logps/chosen": -401.00213623046875,
"logps/rejected": -310.458251953125,
"loss": 0.6907,
"rewards/accuracies": 0.5453125238418579,
"rewards/chosen": 0.0076770298182964325,
"rewards/margins": 0.005642565432935953,
"rewards/rejected": 0.0020344643853604794,
"step": 980
},
{
"epoch": 1.02,
"learning_rate": 3.662456946039035e-07,
"logits/chosen": -4.276472091674805,
"logits/rejected": -4.1441144943237305,
"logps/chosen": -413.98760986328125,
"logps/rejected": -310.71051025390625,
"loss": 0.6912,
"rewards/accuracies": 0.528124988079071,
"rewards/chosen": 0.010534636676311493,
"rewards/margins": 0.00471758097410202,
"rewards/rejected": 0.005817054770886898,
"step": 990
},
{
"epoch": 1.03,
"learning_rate": 3.6433218522770757e-07,
"logits/chosen": -4.2762131690979,
"logits/rejected": -4.135240077972412,
"logps/chosen": -420.061767578125,
"logps/rejected": -330.8781433105469,
"loss": 0.6936,
"rewards/accuracies": 0.503125011920929,
"rewards/chosen": 0.00425821915268898,
"rewards/margins": -0.00019165253615938127,
"rewards/rejected": 0.004449871368706226,
"step": 1000
},
{
"epoch": 1.04,
"learning_rate": 3.6241867585151163e-07,
"logits/chosen": -4.270270824432373,
"logits/rejected": -4.130080223083496,
"logps/chosen": -392.8804626464844,
"logps/rejected": -304.02294921875,
"loss": 0.692,
"rewards/accuracies": 0.534375011920929,
"rewards/chosen": 0.004289795644581318,
"rewards/margins": 0.0029843891970813274,
"rewards/rejected": 0.0013054062146693468,
"step": 1010
},
{
"epoch": 1.05,
"learning_rate": 3.605051664753157e-07,
"logits/chosen": -4.268284797668457,
"logits/rejected": -4.145885944366455,
"logps/chosen": -408.9548645019531,
"logps/rejected": -317.30560302734375,
"loss": 0.692,
"rewards/accuracies": 0.510937511920929,
"rewards/chosen": 0.007414447609335184,
"rewards/margins": 0.0030614163260906935,
"rewards/rejected": 0.004353031050413847,
"step": 1020
},
{
"epoch": 1.06,
"learning_rate": 3.5859165709911975e-07,
"logits/chosen": -4.300627708435059,
"logits/rejected": -4.120467185974121,
"logps/chosen": -422.968994140625,
"logps/rejected": -307.66473388671875,
"loss": 0.6929,
"rewards/accuracies": 0.5,
"rewards/chosen": 0.00493131997063756,
"rewards/margins": 0.0012998055899515748,
"rewards/rejected": 0.0036315140314400196,
"step": 1030
},
{
"epoch": 1.07,
"learning_rate": 3.566781477229238e-07,
"logits/chosen": -4.260711669921875,
"logits/rejected": -4.147767066955566,
"logps/chosen": -389.6875305175781,
"logps/rejected": -305.23516845703125,
"loss": 0.692,
"rewards/accuracies": 0.543749988079071,
"rewards/chosen": 0.005956425331532955,
"rewards/margins": 0.002882971428334713,
"rewards/rejected": 0.003073454136028886,
"step": 1040
},
{
"epoch": 1.08,
"learning_rate": 3.547646383467279e-07,
"logits/chosen": -4.267011642456055,
"logits/rejected": -4.122623443603516,
"logps/chosen": -403.59942626953125,
"logps/rejected": -312.45770263671875,
"loss": 0.6922,
"rewards/accuracies": 0.515625,
"rewards/chosen": 0.006427975837141275,
"rewards/margins": 0.0026332822162657976,
"rewards/rejected": 0.0037946938537061214,
"step": 1050
},
{
"epoch": 1.09,
"learning_rate": 3.5285112897053194e-07,
"logits/chosen": -4.294173717498779,
"logits/rejected": -4.156063556671143,
"logps/chosen": -417.2015686035156,
"logps/rejected": -329.7525634765625,
"loss": 0.69,
"rewards/accuracies": 0.5453125238418579,
"rewards/chosen": 0.009924950078129768,
"rewards/margins": 0.0070407153107225895,
"rewards/rejected": 0.002884234767407179,
"step": 1060
},
{
"epoch": 1.11,
"learning_rate": 3.50937619594336e-07,
"logits/chosen": -4.2720513343811035,
"logits/rejected": -4.118578910827637,
"logps/chosen": -408.00701904296875,
"logps/rejected": -299.61981201171875,
"loss": 0.6922,
"rewards/accuracies": 0.504687488079071,
"rewards/chosen": 0.0056653618812561035,
"rewards/margins": 0.0025350514333695173,
"rewards/rejected": 0.003130309982225299,
"step": 1070
},
{
"epoch": 1.12,
"learning_rate": 3.4902411021814007e-07,
"logits/chosen": -4.258962154388428,
"logits/rejected": -4.1505255699157715,
"logps/chosen": -391.7621154785156,
"logps/rejected": -303.1611022949219,
"loss": 0.6911,
"rewards/accuracies": 0.520312488079071,
"rewards/chosen": 0.007706860546022654,
"rewards/margins": 0.004729891195893288,
"rewards/rejected": 0.002976970048621297,
"step": 1080
},
{
"epoch": 1.13,
"learning_rate": 3.4711060084194413e-07,
"logits/chosen": -4.25510311126709,
"logits/rejected": -4.135909080505371,
"logps/chosen": -400.17205810546875,
"logps/rejected": -307.4388427734375,
"loss": 0.6903,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": 0.00958174280822277,
"rewards/margins": 0.006473850458860397,
"rewards/rejected": 0.0031078937463462353,
"step": 1090
},
{
"epoch": 1.14,
"learning_rate": 3.4519709146574814e-07,
"logits/chosen": -4.276521682739258,
"logits/rejected": -4.158177852630615,
"logps/chosen": -395.39215087890625,
"logps/rejected": -310.30010986328125,
"loss": 0.6905,
"rewards/accuracies": 0.5140625238418579,
"rewards/chosen": 0.009448185563087463,
"rewards/margins": 0.005911382380872965,
"rewards/rejected": 0.003536803647875786,
"step": 1100
},
{
"epoch": 1.15,
"learning_rate": 3.432835820895522e-07,
"logits/chosen": -4.291528224945068,
"logits/rejected": -4.155767917633057,
"logps/chosen": -423.88177490234375,
"logps/rejected": -330.54730224609375,
"loss": 0.6903,
"rewards/accuracies": 0.567187488079071,
"rewards/chosen": 0.009671617299318314,
"rewards/margins": 0.0064759948290884495,
"rewards/rejected": 0.003195622470229864,
"step": 1110
},
{
"epoch": 1.16,
"learning_rate": 3.4137007271335626e-07,
"logits/chosen": -4.27802848815918,
"logits/rejected": -4.153050422668457,
"logps/chosen": -406.92523193359375,
"logps/rejected": -305.8375244140625,
"loss": 0.6921,
"rewards/accuracies": 0.518750011920929,
"rewards/chosen": 0.007676907815039158,
"rewards/margins": 0.0026743696071207523,
"rewards/rejected": 0.0050025382079184055,
"step": 1120
},
{
"epoch": 1.17,
"learning_rate": 3.394565633371603e-07,
"logits/chosen": -4.263790607452393,
"logits/rejected": -4.140618801116943,
"logps/chosen": -417.0517578125,
"logps/rejected": -325.2332763671875,
"loss": 0.6915,
"rewards/accuracies": 0.5453125238418579,
"rewards/chosen": 0.007303275167942047,
"rewards/margins": 0.004130188841372728,
"rewards/rejected": 0.0031730863265693188,
"step": 1130
},
{
"epoch": 1.18,
"learning_rate": 3.375430539609644e-07,
"logits/chosen": -4.309510707855225,
"logits/rejected": -4.183161735534668,
"logps/chosen": -388.7647705078125,
"logps/rejected": -309.83673095703125,
"loss": 0.6904,
"rewards/accuracies": 0.526562511920929,
"rewards/chosen": 0.009641969576478004,
"rewards/margins": 0.006214521359652281,
"rewards/rejected": 0.0034274482168257236,
"step": 1140
},
{
"epoch": 1.19,
"learning_rate": 3.3562954458476845e-07,
"logits/chosen": -4.253263473510742,
"logits/rejected": -4.152525424957275,
"logps/chosen": -387.7099609375,
"logps/rejected": -302.3329162597656,
"loss": 0.6918,
"rewards/accuracies": 0.510937511920929,
"rewards/chosen": 0.0074900491163134575,
"rewards/margins": 0.0033420673571527004,
"rewards/rejected": 0.00414798129349947,
"step": 1150
},
{
"epoch": 1.2,
"learning_rate": 3.337160352085725e-07,
"logits/chosen": -4.244006156921387,
"logits/rejected": -4.125102996826172,
"logps/chosen": -400.85107421875,
"logps/rejected": -304.1709289550781,
"loss": 0.6923,
"rewards/accuracies": 0.5406249761581421,
"rewards/chosen": 0.007216416299343109,
"rewards/margins": 0.002480756724253297,
"rewards/rejected": 0.004735658876597881,
"step": 1160
},
{
"epoch": 1.21,
"learning_rate": 3.3180252583237657e-07,
"logits/chosen": -4.282382488250732,
"logits/rejected": -4.156040191650391,
"logps/chosen": -409.8832092285156,
"logps/rejected": -317.5098571777344,
"loss": 0.6925,
"rewards/accuracies": 0.5062500238418579,
"rewards/chosen": 0.008845189586281776,
"rewards/margins": 0.0021160345058888197,
"rewards/rejected": 0.006729154847562313,
"step": 1170
},
{
"epoch": 1.22,
"learning_rate": 3.2988901645618063e-07,
"logits/chosen": -4.294495582580566,
"logits/rejected": -4.211082458496094,
"logps/chosen": -398.4643249511719,
"logps/rejected": -337.3882141113281,
"loss": 0.6929,
"rewards/accuracies": 0.5,
"rewards/chosen": 0.005116584710776806,
"rewards/margins": 0.0013333541573956609,
"rewards/rejected": 0.003783230436965823,
"step": 1180
},
{
"epoch": 1.23,
"learning_rate": 3.279755070799847e-07,
"logits/chosen": -4.265214443206787,
"logits/rejected": -4.16311502456665,
"logps/chosen": -387.9737243652344,
"logps/rejected": -306.5306091308594,
"loss": 0.6923,
"rewards/accuracies": 0.5015624761581421,
"rewards/chosen": 0.005055157467722893,
"rewards/margins": 0.0023398033808916807,
"rewards/rejected": 0.002715354086831212,
"step": 1190
},
{
"epoch": 1.24,
"learning_rate": 3.260619977037887e-07,
"logits/chosen": -4.283998489379883,
"logits/rejected": -4.149415016174316,
"logps/chosen": -393.43609619140625,
"logps/rejected": -300.15667724609375,
"loss": 0.6914,
"rewards/accuracies": 0.520312488079071,
"rewards/chosen": 0.007636996451765299,
"rewards/margins": 0.004209595732390881,
"rewards/rejected": 0.0034273997880518436,
"step": 1200
},
{
"epoch": 1.25,
"learning_rate": 3.2414848832759277e-07,
"logits/chosen": -4.261348724365234,
"logits/rejected": -4.140153884887695,
"logps/chosen": -393.04046630859375,
"logps/rejected": -311.3672180175781,
"loss": 0.6917,
"rewards/accuracies": 0.520312488079071,
"rewards/chosen": 0.008078986778855324,
"rewards/margins": 0.003632976207882166,
"rewards/rejected": 0.0044460115022957325,
"step": 1210
},
{
"epoch": 1.26,
"learning_rate": 3.2223497895139683e-07,
"logits/chosen": -4.243834972381592,
"logits/rejected": -4.141684055328369,
"logps/chosen": -408.1537170410156,
"logps/rejected": -314.95404052734375,
"loss": 0.6917,
"rewards/accuracies": 0.520312488079071,
"rewards/chosen": 0.009249814786016941,
"rewards/margins": 0.0036557712592184544,
"rewards/rejected": 0.005594043061137199,
"step": 1220
},
{
"epoch": 1.27,
"learning_rate": 3.203214695752009e-07,
"logits/chosen": -4.290652275085449,
"logits/rejected": -4.172784328460693,
"logps/chosen": -404.22247314453125,
"logps/rejected": -311.638916015625,
"loss": 0.6926,
"rewards/accuracies": 0.5140625238418579,
"rewards/chosen": 0.008968379348516464,
"rewards/margins": 0.001797800650820136,
"rewards/rejected": 0.007170577999204397,
"step": 1230
},
{
"epoch": 1.28,
"learning_rate": 3.1840796019900495e-07,
"logits/chosen": -4.221989631652832,
"logits/rejected": -4.126450061798096,
"logps/chosen": -362.48773193359375,
"logps/rejected": -310.7374572753906,
"loss": 0.691,
"rewards/accuracies": 0.5484374761581421,
"rewards/chosen": 0.007350596599280834,
"rewards/margins": 0.004948228131979704,
"rewards/rejected": 0.0024023696314543486,
"step": 1240
},
{
"epoch": 1.29,
"learning_rate": 3.16494450822809e-07,
"logits/chosen": -4.273645877838135,
"logits/rejected": -4.147796630859375,
"logps/chosen": -397.3808288574219,
"logps/rejected": -298.5054626464844,
"loss": 0.6905,
"rewards/accuracies": 0.5296875238418579,
"rewards/chosen": 0.008822308853268623,
"rewards/margins": 0.0060681127943098545,
"rewards/rejected": 0.0027541951276361942,
"step": 1250
},
{
"epoch": 1.3,
"learning_rate": 3.145809414466131e-07,
"logits/chosen": -4.290090560913086,
"logits/rejected": -4.17457389831543,
"logps/chosen": -405.3314208984375,
"logps/rejected": -332.5986633300781,
"loss": 0.6905,
"rewards/accuracies": 0.5484374761581421,
"rewards/chosen": 0.008818728849291801,
"rewards/margins": 0.0060805464163422585,
"rewards/rejected": 0.0027381826657801867,
"step": 1260
},
{
"epoch": 1.31,
"learning_rate": 3.1266743207041714e-07,
"logits/chosen": -4.275857448577881,
"logits/rejected": -4.184884548187256,
"logps/chosen": -413.36260986328125,
"logps/rejected": -335.1319274902344,
"loss": 0.6903,
"rewards/accuracies": 0.528124988079071,
"rewards/chosen": 0.011068764142692089,
"rewards/margins": 0.006428800523281097,
"rewards/rejected": 0.004639963153749704,
"step": 1270
},
{
"epoch": 1.32,
"learning_rate": 3.107539226942212e-07,
"logits/chosen": -4.262097358703613,
"logits/rejected": -4.138064384460449,
"logps/chosen": -420.592041015625,
"logps/rejected": -336.1610107421875,
"loss": 0.6921,
"rewards/accuracies": 0.551562488079071,
"rewards/chosen": 0.008383669890463352,
"rewards/margins": 0.0029027739074081182,
"rewards/rejected": 0.00548089575022459,
"step": 1280
},
{
"epoch": 1.33,
"learning_rate": 3.0884041331802526e-07,
"logits/chosen": -4.273635387420654,
"logits/rejected": -4.136135578155518,
"logps/chosen": -419.8043518066406,
"logps/rejected": -326.3934631347656,
"loss": 0.691,
"rewards/accuracies": 0.5406249761581421,
"rewards/chosen": 0.008629587478935719,
"rewards/margins": 0.0051121762953698635,
"rewards/rejected": 0.003517411183565855,
"step": 1290
},
{
"epoch": 1.34,
"learning_rate": 3.0692690394182927e-07,
"logits/chosen": -4.25814962387085,
"logits/rejected": -4.133566856384277,
"logps/chosen": -388.8675231933594,
"logps/rejected": -307.92169189453125,
"loss": 0.6903,
"rewards/accuracies": 0.5296875238418579,
"rewards/chosen": 0.010921096429228783,
"rewards/margins": 0.0063910940662026405,
"rewards/rejected": 0.004530002363026142,
"step": 1300
},
{
"epoch": 1.35,
"learning_rate": 3.0501339456563334e-07,
"logits/chosen": -4.258055210113525,
"logits/rejected": -4.142639636993408,
"logps/chosen": -417.714599609375,
"logps/rejected": -331.1921691894531,
"loss": 0.6899,
"rewards/accuracies": 0.565625011920929,
"rewards/chosen": 0.011006483808159828,
"rewards/margins": 0.007266665808856487,
"rewards/rejected": 0.0037398170679807663,
"step": 1310
},
{
"epoch": 1.36,
"learning_rate": 3.030998851894374e-07,
"logits/chosen": -4.290154457092285,
"logits/rejected": -4.134154796600342,
"logps/chosen": -419.7810974121094,
"logps/rejected": -316.3617248535156,
"loss": 0.6899,
"rewards/accuracies": 0.5484374761581421,
"rewards/chosen": 0.012462007813155651,
"rewards/margins": 0.0071654594503343105,
"rewards/rejected": 0.005296547897160053,
"step": 1320
},
{
"epoch": 1.37,
"learning_rate": 3.0118637581324146e-07,
"logits/chosen": -4.25482702255249,
"logits/rejected": -4.102695941925049,
"logps/chosen": -426.0753479003906,
"logps/rejected": -321.2982177734375,
"loss": 0.6922,
"rewards/accuracies": 0.5093749761581421,
"rewards/chosen": 0.00866289995610714,
"rewards/margins": 0.0026752217672765255,
"rewards/rejected": 0.005987677723169327,
"step": 1330
},
{
"epoch": 1.38,
"learning_rate": 2.992728664370455e-07,
"logits/chosen": -4.27578067779541,
"logits/rejected": -4.178536415100098,
"logps/chosen": -399.89422607421875,
"logps/rejected": -309.16204833984375,
"loss": 0.6921,
"rewards/accuracies": 0.5234375,
"rewards/chosen": 0.0074403537437319756,
"rewards/margins": 0.0027673656586557627,
"rewards/rejected": 0.004672987386584282,
"step": 1340
},
{
"epoch": 1.39,
"learning_rate": 2.973593570608496e-07,
"logits/chosen": -4.252071380615234,
"logits/rejected": -4.123923301696777,
"logps/chosen": -402.0321044921875,
"logps/rejected": -311.18463134765625,
"loss": 0.6917,
"rewards/accuracies": 0.535937488079071,
"rewards/chosen": 0.007632538676261902,
"rewards/margins": 0.003616305533796549,
"rewards/rejected": 0.004016232676804066,
"step": 1350
},
{
"epoch": 1.4,
"learning_rate": 2.9544584768465365e-07,
"logits/chosen": -4.282023906707764,
"logits/rejected": -4.184301853179932,
"logps/chosen": -389.57904052734375,
"logps/rejected": -322.18389892578125,
"loss": 0.6926,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": 0.00903762225061655,
"rewards/margins": 0.0019117307383567095,
"rewards/rejected": 0.007125890348106623,
"step": 1360
},
{
"epoch": 1.41,
"learning_rate": 2.935323383084577e-07,
"logits/chosen": -4.287262916564941,
"logits/rejected": -4.1361284255981445,
"logps/chosen": -420.2037658691406,
"logps/rejected": -316.2614440917969,
"loss": 0.6909,
"rewards/accuracies": 0.551562488079071,
"rewards/chosen": 0.009709215722978115,
"rewards/margins": 0.005225184373557568,
"rewards/rejected": 0.0044840313494205475,
"step": 1370
},
{
"epoch": 1.43,
"learning_rate": 2.9161882893226177e-07,
"logits/chosen": -4.279843807220459,
"logits/rejected": -4.154780864715576,
"logps/chosen": -405.2857971191406,
"logps/rejected": -318.71368408203125,
"loss": 0.6921,
"rewards/accuracies": 0.5218750238418579,
"rewards/chosen": 0.007445839233696461,
"rewards/margins": 0.0028315638191998005,
"rewards/rejected": 0.00461427541449666,
"step": 1380
},
{
"epoch": 1.44,
"learning_rate": 2.8970531955606583e-07,
"logits/chosen": -4.297855377197266,
"logits/rejected": -4.1429829597473145,
"logps/chosen": -411.401123046875,
"logps/rejected": -309.50469970703125,
"loss": 0.6899,
"rewards/accuracies": 0.534375011920929,
"rewards/chosen": 0.010837659239768982,
"rewards/margins": 0.007386946585029364,
"rewards/rejected": 0.003450712887570262,
"step": 1390
},
{
"epoch": 1.45,
"learning_rate": 2.8779181017986984e-07,
"logits/chosen": -4.270615577697754,
"logits/rejected": -4.156781196594238,
"logps/chosen": -402.6678161621094,
"logps/rejected": -313.5867614746094,
"loss": 0.6915,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": 0.012565011158585548,
"rewards/margins": 0.004020330961793661,
"rewards/rejected": 0.008544680662453175,
"step": 1400
},
{
"epoch": 1.46,
"learning_rate": 2.858783008036739e-07,
"logits/chosen": -4.293181896209717,
"logits/rejected": -4.163121223449707,
"logps/chosen": -383.950927734375,
"logps/rejected": -298.0428161621094,
"loss": 0.6915,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": 0.009886051528155804,
"rewards/margins": 0.004015837796032429,
"rewards/rejected": 0.0058702146634459496,
"step": 1410
},
{
"epoch": 1.47,
"learning_rate": 2.8396479142747797e-07,
"logits/chosen": -4.2656683921813965,
"logits/rejected": -4.139580249786377,
"logps/chosen": -408.76361083984375,
"logps/rejected": -313.99298095703125,
"loss": 0.6905,
"rewards/accuracies": 0.543749988079071,
"rewards/chosen": 0.011651566252112389,
"rewards/margins": 0.006123474799096584,
"rewards/rejected": 0.005528091918677092,
"step": 1420
},
{
"epoch": 1.48,
"learning_rate": 2.8205128205128203e-07,
"logits/chosen": -4.313319206237793,
"logits/rejected": -4.1831374168396,
"logps/chosen": -416.11773681640625,
"logps/rejected": -317.92279052734375,
"loss": 0.6906,
"rewards/accuracies": 0.534375011920929,
"rewards/chosen": 0.01188584603369236,
"rewards/margins": 0.005925232544541359,
"rewards/rejected": 0.005960613489151001,
"step": 1430
},
{
"epoch": 1.49,
"learning_rate": 2.801377726750861e-07,
"logits/chosen": -4.274300575256348,
"logits/rejected": -4.167074203491211,
"logps/chosen": -395.77117919921875,
"logps/rejected": -322.4521179199219,
"loss": 0.6898,
"rewards/accuracies": 0.5796874761581421,
"rewards/chosen": 0.013639995828270912,
"rewards/margins": 0.007328096777200699,
"rewards/rejected": 0.006311898585408926,
"step": 1440
},
{
"epoch": 1.5,
"learning_rate": 2.7822426329889015e-07,
"logits/chosen": -4.246912956237793,
"logits/rejected": -4.1353559494018555,
"logps/chosen": -402.06451416015625,
"logps/rejected": -326.43328857421875,
"loss": 0.6924,
"rewards/accuracies": 0.512499988079071,
"rewards/chosen": 0.008587488904595375,
"rewards/margins": 0.0023642387241125107,
"rewards/rejected": 0.006223250180482864,
"step": 1450
},
{
"epoch": 1.51,
"learning_rate": 2.763107539226942e-07,
"logits/chosen": -4.270732879638672,
"logits/rejected": -4.132948398590088,
"logps/chosen": -413.1165466308594,
"logps/rejected": -322.5657653808594,
"loss": 0.6902,
"rewards/accuracies": 0.542187511920929,
"rewards/chosen": 0.012603357434272766,
"rewards/margins": 0.006664451211690903,
"rewards/rejected": 0.005938907153904438,
"step": 1460
},
{
"epoch": 1.52,
"learning_rate": 2.743972445464983e-07,
"logits/chosen": -4.273732662200928,
"logits/rejected": -4.119269371032715,
"logps/chosen": -390.9085998535156,
"logps/rejected": -302.38348388671875,
"loss": 0.6922,
"rewards/accuracies": 0.5093749761581421,
"rewards/chosen": 0.008494021371006966,
"rewards/margins": 0.0025641201063990593,
"rewards/rejected": 0.005929900798946619,
"step": 1470
},
{
"epoch": 1.53,
"learning_rate": 2.7248373517030234e-07,
"logits/chosen": -4.267933368682861,
"logits/rejected": -4.161561012268066,
"logps/chosen": -390.6529235839844,
"logps/rejected": -303.3753662109375,
"loss": 0.6907,
"rewards/accuracies": 0.5390625,
"rewards/chosen": 0.010899425484240055,
"rewards/margins": 0.005612888839095831,
"rewards/rejected": 0.005286536645144224,
"step": 1480
},
{
"epoch": 1.54,
"learning_rate": 2.705702257941064e-07,
"logits/chosen": -4.304495811462402,
"logits/rejected": -4.120760440826416,
"logps/chosen": -413.19744873046875,
"logps/rejected": -302.12603759765625,
"loss": 0.6905,
"rewards/accuracies": 0.5484374761581421,
"rewards/chosen": 0.011145773343741894,
"rewards/margins": 0.006089083384722471,
"rewards/rejected": 0.005056688562035561,
"step": 1490
},
{
"epoch": 1.55,
"learning_rate": 2.686567164179104e-07,
"logits/chosen": -4.27075719833374,
"logits/rejected": -4.125774383544922,
"logps/chosen": -398.0982360839844,
"logps/rejected": -307.02325439453125,
"loss": 0.6908,
"rewards/accuracies": 0.5328124761581421,
"rewards/chosen": 0.010808114893734455,
"rewards/margins": 0.005621565040200949,
"rewards/rejected": 0.005186550319194794,
"step": 1500
},
{
"epoch": 1.56,
"learning_rate": 2.6674320704171447e-07,
"logits/chosen": -4.2627692222595215,
"logits/rejected": -4.124849796295166,
"logps/chosen": -400.72039794921875,
"logps/rejected": -309.2858581542969,
"loss": 0.6912,
"rewards/accuracies": 0.5093749761581421,
"rewards/chosen": 0.00958535261452198,
"rewards/margins": 0.004684613086283207,
"rewards/rejected": 0.004900740925222635,
"step": 1510
},
{
"epoch": 1.57,
"learning_rate": 2.6482969766551853e-07,
"logits/chosen": -4.256047248840332,
"logits/rejected": -4.143467903137207,
"logps/chosen": -395.4497985839844,
"logps/rejected": -311.4337463378906,
"loss": 0.69,
"rewards/accuracies": 0.542187511920929,
"rewards/chosen": 0.012033809907734394,
"rewards/margins": 0.006985441781580448,
"rewards/rejected": 0.005048368591815233,
"step": 1520
},
{
"epoch": 1.58,
"learning_rate": 2.629161882893226e-07,
"logits/chosen": -4.2988762855529785,
"logits/rejected": -4.160223960876465,
"logps/chosen": -421.30926513671875,
"logps/rejected": -330.73834228515625,
"loss": 0.6912,
"rewards/accuracies": 0.546875,
"rewards/chosen": 0.012272657826542854,
"rewards/margins": 0.004651675932109356,
"rewards/rejected": 0.007620981428772211,
"step": 1530
},
{
"epoch": 1.59,
"learning_rate": 2.6100267891312666e-07,
"logits/chosen": -4.249758720397949,
"logits/rejected": -4.107924938201904,
"logps/chosen": -394.63458251953125,
"logps/rejected": -304.65570068359375,
"loss": 0.6893,
"rewards/accuracies": 0.5609375238418579,
"rewards/chosen": 0.014264127239584923,
"rewards/margins": 0.008482937701046467,
"rewards/rejected": 0.005781189538538456,
"step": 1540
},
{
"epoch": 1.6,
"learning_rate": 2.590891695369307e-07,
"logits/chosen": -4.296584606170654,
"logits/rejected": -4.137936592102051,
"logps/chosen": -426.6543884277344,
"logps/rejected": -326.82086181640625,
"loss": 0.6899,
"rewards/accuracies": 0.559374988079071,
"rewards/chosen": 0.013136537745594978,
"rewards/margins": 0.0072318254970014095,
"rewards/rejected": 0.005904710851609707,
"step": 1550
},
{
"epoch": 1.61,
"learning_rate": 2.571756601607348e-07,
"logits/chosen": -4.269163608551025,
"logits/rejected": -4.147532939910889,
"logps/chosen": -418.16259765625,
"logps/rejected": -319.60687255859375,
"loss": 0.6889,
"rewards/accuracies": 0.5484374761581421,
"rewards/chosen": 0.015316249802708626,
"rewards/margins": 0.00932287611067295,
"rewards/rejected": 0.005993373692035675,
"step": 1560
},
{
"epoch": 1.62,
"learning_rate": 2.5526215078453884e-07,
"logits/chosen": -4.262753963470459,
"logits/rejected": -4.136817932128906,
"logps/chosen": -414.9779357910156,
"logps/rejected": -330.0815734863281,
"loss": 0.6898,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": 0.013297900557518005,
"rewards/margins": 0.007364665158092976,
"rewards/rejected": 0.00593323539942503,
"step": 1570
},
{
"epoch": 1.63,
"learning_rate": 2.533486414083429e-07,
"logits/chosen": -4.293523788452148,
"logits/rejected": -4.158999443054199,
"logps/chosen": -413.1465759277344,
"logps/rejected": -309.12017822265625,
"loss": 0.6909,
"rewards/accuracies": 0.518750011920929,
"rewards/chosen": 0.013114909641444683,
"rewards/margins": 0.005343536846339703,
"rewards/rejected": 0.007771371863782406,
"step": 1580
},
{
"epoch": 1.64,
"learning_rate": 2.5143513203214697e-07,
"logits/chosen": -4.268404960632324,
"logits/rejected": -4.135837554931641,
"logps/chosen": -394.46466064453125,
"logps/rejected": -313.1191101074219,
"loss": 0.688,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": 0.01660170406103134,
"rewards/margins": 0.011182994581758976,
"rewards/rejected": 0.005418709013611078,
"step": 1590
},
{
"epoch": 1.65,
"learning_rate": 2.49521622655951e-07,
"logits/chosen": -4.279505252838135,
"logits/rejected": -4.153802871704102,
"logps/chosen": -396.4070739746094,
"logps/rejected": -320.4418029785156,
"loss": 0.6917,
"rewards/accuracies": 0.53125,
"rewards/chosen": 0.009519520215690136,
"rewards/margins": 0.0036930330097675323,
"rewards/rejected": 0.005826488137245178,
"step": 1600
},
{
"epoch": 1.66,
"learning_rate": 2.4760811327975504e-07,
"logits/chosen": -4.265553951263428,
"logits/rejected": -4.148402214050293,
"logps/chosen": -419.32171630859375,
"logps/rejected": -328.2819519042969,
"loss": 0.6886,
"rewards/accuracies": 0.5546875,
"rewards/chosen": 0.018884066492319107,
"rewards/margins": 0.00994439609348774,
"rewards/rejected": 0.008939670398831367,
"step": 1610
},
{
"epoch": 1.67,
"learning_rate": 2.456946039035591e-07,
"logits/chosen": -4.287269115447998,
"logits/rejected": -4.127593040466309,
"logps/chosen": -386.55499267578125,
"logps/rejected": -294.11505126953125,
"loss": 0.6894,
"rewards/accuracies": 0.5609375238418579,
"rewards/chosen": 0.013925912790000439,
"rewards/margins": 0.008140355348587036,
"rewards/rejected": 0.005785556975752115,
"step": 1620
},
{
"epoch": 1.68,
"learning_rate": 2.4378109452736316e-07,
"logits/chosen": -4.2739386558532715,
"logits/rejected": -4.1460676193237305,
"logps/chosen": -406.03900146484375,
"logps/rejected": -317.59918212890625,
"loss": 0.6912,
"rewards/accuracies": 0.526562511920929,
"rewards/chosen": 0.013792428188025951,
"rewards/margins": 0.004698522854596376,
"rewards/rejected": 0.009093904867768288,
"step": 1630
},
{
"epoch": 1.69,
"learning_rate": 2.418675851511672e-07,
"logits/chosen": -4.243307113647461,
"logits/rejected": -4.1257524490356445,
"logps/chosen": -389.49627685546875,
"logps/rejected": -310.25921630859375,
"loss": 0.6895,
"rewards/accuracies": 0.559374988079071,
"rewards/chosen": 0.01434385310858488,
"rewards/margins": 0.007951314561069012,
"rewards/rejected": 0.006392539478838444,
"step": 1640
},
{
"epoch": 1.7,
"learning_rate": 2.399540757749713e-07,
"logits/chosen": -4.276772975921631,
"logits/rejected": -4.147823333740234,
"logps/chosen": -384.64141845703125,
"logps/rejected": -305.95355224609375,
"loss": 0.6917,
"rewards/accuracies": 0.515625,
"rewards/chosen": 0.010843750089406967,
"rewards/margins": 0.003594112815335393,
"rewards/rejected": 0.0072496384382247925,
"step": 1650
},
{
"epoch": 1.71,
"learning_rate": 2.3804056639877535e-07,
"logits/chosen": -4.259124279022217,
"logits/rejected": -4.154895782470703,
"logps/chosen": -404.73663330078125,
"logps/rejected": -332.0699462890625,
"loss": 0.692,
"rewards/accuracies": 0.534375011920929,
"rewards/chosen": 0.010340576991438866,
"rewards/margins": 0.002993339207023382,
"rewards/rejected": 0.007347238250076771,
"step": 1660
},
{
"epoch": 1.72,
"learning_rate": 2.361270570225794e-07,
"logits/chosen": -4.2700676918029785,
"logits/rejected": -4.160883903503418,
"logps/chosen": -406.1430358886719,
"logps/rejected": -311.64459228515625,
"loss": 0.6904,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": 0.012120211496949196,
"rewards/margins": 0.006281781941652298,
"rewards/rejected": 0.005838429089635611,
"step": 1670
},
{
"epoch": 1.74,
"learning_rate": 2.3421354764638345e-07,
"logits/chosen": -4.2109479904174805,
"logits/rejected": -4.121271133422852,
"logps/chosen": -390.29046630859375,
"logps/rejected": -306.08843994140625,
"loss": 0.6896,
"rewards/accuracies": 0.5484374761581421,
"rewards/chosen": 0.012644372880458832,
"rewards/margins": 0.0077982256188988686,
"rewards/rejected": 0.004846146795898676,
"step": 1680
},
{
"epoch": 1.75,
"learning_rate": 2.323000382701875e-07,
"logits/chosen": -4.242735385894775,
"logits/rejected": -4.120673179626465,
"logps/chosen": -396.1617431640625,
"logps/rejected": -299.79345703125,
"loss": 0.6903,
"rewards/accuracies": 0.534375011920929,
"rewards/chosen": 0.013311442919075489,
"rewards/margins": 0.006340789142996073,
"rewards/rejected": 0.006970655173063278,
"step": 1690
},
{
"epoch": 1.76,
"learning_rate": 2.3038652889399157e-07,
"logits/chosen": -4.255246162414551,
"logits/rejected": -4.130453109741211,
"logps/chosen": -393.3377380371094,
"logps/rejected": -320.06109619140625,
"loss": 0.6897,
"rewards/accuracies": 0.535937488079071,
"rewards/chosen": 0.014351250603795052,
"rewards/margins": 0.00757851917296648,
"rewards/rejected": 0.006772731896489859,
"step": 1700
},
{
"epoch": 1.77,
"learning_rate": 2.2847301951779563e-07,
"logits/chosen": -4.267707347869873,
"logits/rejected": -4.147231101989746,
"logps/chosen": -414.69256591796875,
"logps/rejected": -318.17291259765625,
"loss": 0.6909,
"rewards/accuracies": 0.542187511920929,
"rewards/chosen": 0.012826653197407722,
"rewards/margins": 0.005257748067378998,
"rewards/rejected": 0.00756890419870615,
"step": 1710
},
{
"epoch": 1.78,
"learning_rate": 2.265595101415997e-07,
"logits/chosen": -4.258088111877441,
"logits/rejected": -4.129515647888184,
"logps/chosen": -401.8673400878906,
"logps/rejected": -315.34417724609375,
"loss": 0.6887,
"rewards/accuracies": 0.5484374761581421,
"rewards/chosen": 0.017096903175115585,
"rewards/margins": 0.009566163644194603,
"rewards/rejected": 0.0075307427905499935,
"step": 1720
},
{
"epoch": 1.79,
"learning_rate": 2.2464600076540373e-07,
"logits/chosen": -4.2839155197143555,
"logits/rejected": -4.152881622314453,
"logps/chosen": -420.7337951660156,
"logps/rejected": -324.15985107421875,
"loss": 0.6902,
"rewards/accuracies": 0.5390625,
"rewards/chosen": 0.013899828307330608,
"rewards/margins": 0.006815521512180567,
"rewards/rejected": 0.00708430539816618,
"step": 1730
},
{
"epoch": 1.8,
"learning_rate": 2.227324913892078e-07,
"logits/chosen": -4.261233329772949,
"logits/rejected": -4.125912666320801,
"logps/chosen": -408.70989990234375,
"logps/rejected": -311.13983154296875,
"loss": 0.6876,
"rewards/accuracies": 0.5703125,
"rewards/chosen": 0.018134312704205513,
"rewards/margins": 0.011861599050462246,
"rewards/rejected": 0.00627271318808198,
"step": 1740
},
{
"epoch": 1.81,
"learning_rate": 2.2081898201301186e-07,
"logits/chosen": -4.2318902015686035,
"logits/rejected": -4.125788688659668,
"logps/chosen": -421.2041015625,
"logps/rejected": -331.3543701171875,
"loss": 0.6906,
"rewards/accuracies": 0.543749988079071,
"rewards/chosen": 0.01709870807826519,
"rewards/margins": 0.005781983956694603,
"rewards/rejected": 0.011316723190248013,
"step": 1750
},
{
"epoch": 1.82,
"learning_rate": 2.1890547263681592e-07,
"logits/chosen": -4.270097732543945,
"logits/rejected": -4.112766265869141,
"logps/chosen": -415.7230529785156,
"logps/rejected": -309.1680603027344,
"loss": 0.6882,
"rewards/accuracies": 0.5703125,
"rewards/chosen": 0.016786256805062294,
"rewards/margins": 0.010718188248574734,
"rewards/rejected": 0.006068066693842411,
"step": 1760
},
{
"epoch": 1.83,
"learning_rate": 2.1699196326061998e-07,
"logits/chosen": -4.254898548126221,
"logits/rejected": -4.1117353439331055,
"logps/chosen": -406.07330322265625,
"logps/rejected": -307.8787841796875,
"loss": 0.6889,
"rewards/accuracies": 0.5625,
"rewards/chosen": 0.016035914421081543,
"rewards/margins": 0.009246991947293282,
"rewards/rejected": 0.006788922939449549,
"step": 1770
},
{
"epoch": 1.84,
"learning_rate": 2.1507845388442402e-07,
"logits/chosen": -4.308491230010986,
"logits/rejected": -4.162188529968262,
"logps/chosen": -414.5621643066406,
"logps/rejected": -300.40106201171875,
"loss": 0.6888,
"rewards/accuracies": 0.5625,
"rewards/chosen": 0.019205499440431595,
"rewards/margins": 0.009419824928045273,
"rewards/rejected": 0.009785676375031471,
"step": 1780
},
{
"epoch": 1.85,
"learning_rate": 2.1316494450822808e-07,
"logits/chosen": -4.263808250427246,
"logits/rejected": -4.1408891677856445,
"logps/chosen": -409.64141845703125,
"logps/rejected": -318.11041259765625,
"loss": 0.6901,
"rewards/accuracies": 0.5296875238418579,
"rewards/chosen": 0.014620177447795868,
"rewards/margins": 0.007014470640569925,
"rewards/rejected": 0.007605706341564655,
"step": 1790
},
{
"epoch": 1.86,
"learning_rate": 2.1125143513203214e-07,
"logits/chosen": -4.306565284729004,
"logits/rejected": -4.195437431335449,
"logps/chosen": -398.6597595214844,
"logps/rejected": -313.44366455078125,
"loss": 0.6913,
"rewards/accuracies": 0.5328124761581421,
"rewards/chosen": 0.013173435814678669,
"rewards/margins": 0.004629576578736305,
"rewards/rejected": 0.008543858304619789,
"step": 1800
},
{
"epoch": 1.87,
"learning_rate": 2.093379257558362e-07,
"logits/chosen": -4.287682056427002,
"logits/rejected": -4.170054912567139,
"logps/chosen": -413.82818603515625,
"logps/rejected": -336.8646545410156,
"loss": 0.6908,
"rewards/accuracies": 0.551562488079071,
"rewards/chosen": 0.016411561518907547,
"rewards/margins": 0.005662465933710337,
"rewards/rejected": 0.0107490923255682,
"step": 1810
},
{
"epoch": 1.88,
"learning_rate": 2.0742441637964026e-07,
"logits/chosen": -4.2755303382873535,
"logits/rejected": -4.166233062744141,
"logps/chosen": -372.7901306152344,
"logps/rejected": -299.3600158691406,
"loss": 0.6896,
"rewards/accuracies": 0.535937488079071,
"rewards/chosen": 0.015290270559489727,
"rewards/margins": 0.007844468578696251,
"rewards/rejected": 0.0074458010494709015,
"step": 1820
},
{
"epoch": 1.89,
"learning_rate": 2.055109070034443e-07,
"logits/chosen": -4.297582149505615,
"logits/rejected": -4.127498149871826,
"logps/chosen": -397.54498291015625,
"logps/rejected": -304.5601501464844,
"loss": 0.6905,
"rewards/accuracies": 0.5406249761581421,
"rewards/chosen": 0.01460904348641634,
"rewards/margins": 0.0060775866732001305,
"rewards/rejected": 0.00853145681321621,
"step": 1830
},
{
"epoch": 1.9,
"learning_rate": 2.0359739762724836e-07,
"logits/chosen": -4.267261028289795,
"logits/rejected": -4.143542289733887,
"logps/chosen": -438.79937744140625,
"logps/rejected": -327.8368225097656,
"loss": 0.6887,
"rewards/accuracies": 0.543749988079071,
"rewards/chosen": 0.017624245956540108,
"rewards/margins": 0.009726700372993946,
"rewards/rejected": 0.007897543720901012,
"step": 1840
},
{
"epoch": 1.91,
"learning_rate": 2.0168388825105242e-07,
"logits/chosen": -4.271047115325928,
"logits/rejected": -4.188268661499023,
"logps/chosen": -386.79815673828125,
"logps/rejected": -312.2374572753906,
"loss": 0.6912,
"rewards/accuracies": 0.535937488079071,
"rewards/chosen": 0.013870956376194954,
"rewards/margins": 0.004729996435344219,
"rewards/rejected": 0.00914095900952816,
"step": 1850
},
{
"epoch": 1.92,
"learning_rate": 1.997703788748565e-07,
"logits/chosen": -4.247762203216553,
"logits/rejected": -4.124339580535889,
"logps/chosen": -399.97686767578125,
"logps/rejected": -318.4253845214844,
"loss": 0.689,
"rewards/accuracies": 0.567187488079071,
"rewards/chosen": 0.01522988360375166,
"rewards/margins": 0.008996319025754929,
"rewards/rejected": 0.0062335641123354435,
"step": 1860
},
{
"epoch": 1.93,
"learning_rate": 1.9785686949866055e-07,
"logits/chosen": -4.297530174255371,
"logits/rejected": -4.151538372039795,
"logps/chosen": -411.59027099609375,
"logps/rejected": -309.5844421386719,
"loss": 0.6892,
"rewards/accuracies": 0.5562499761581421,
"rewards/chosen": 0.01565638557076454,
"rewards/margins": 0.008750095032155514,
"rewards/rejected": 0.006906290538609028,
"step": 1870
},
{
"epoch": 1.94,
"learning_rate": 1.9594336012246458e-07,
"logits/chosen": -4.267110347747803,
"logits/rejected": -4.1285834312438965,
"logps/chosen": -399.1042785644531,
"logps/rejected": -314.04193115234375,
"loss": 0.6885,
"rewards/accuracies": 0.5640624761581421,
"rewards/chosen": 0.01778355799615383,
"rewards/margins": 0.010103506036102772,
"rewards/rejected": 0.007680053357034922,
"step": 1880
},
{
"epoch": 1.95,
"learning_rate": 1.9402985074626865e-07,
"logits/chosen": -4.260018825531006,
"logits/rejected": -4.148495197296143,
"logps/chosen": -393.2950744628906,
"logps/rejected": -308.5839538574219,
"loss": 0.6888,
"rewards/accuracies": 0.559374988079071,
"rewards/chosen": 0.01795302703976631,
"rewards/margins": 0.009579015895724297,
"rewards/rejected": 0.008374011144042015,
"step": 1890
},
{
"epoch": 1.96,
"learning_rate": 1.921163413700727e-07,
"logits/chosen": -4.2746124267578125,
"logits/rejected": -4.14363956451416,
"logps/chosen": -423.131103515625,
"logps/rejected": -334.2845458984375,
"loss": 0.6892,
"rewards/accuracies": 0.5453125238418579,
"rewards/chosen": 0.01808355748653412,
"rewards/margins": 0.008949248120188713,
"rewards/rejected": 0.009134308435022831,
"step": 1900
},
{
"epoch": 1.97,
"learning_rate": 1.9020283199387677e-07,
"logits/chosen": -4.257375717163086,
"logits/rejected": -4.1283063888549805,
"logps/chosen": -409.4438171386719,
"logps/rejected": -327.1435852050781,
"loss": 0.6897,
"rewards/accuracies": 0.5390625,
"rewards/chosen": 0.016801532357931137,
"rewards/margins": 0.007867367006838322,
"rewards/rejected": 0.008934165351092815,
"step": 1910
},
{
"epoch": 1.98,
"learning_rate": 1.8828932261768083e-07,
"logits/chosen": -4.274910926818848,
"logits/rejected": -4.14418888092041,
"logps/chosen": -400.5626220703125,
"logps/rejected": -311.94171142578125,
"loss": 0.6904,
"rewards/accuracies": 0.520312488079071,
"rewards/chosen": 0.013942083343863487,
"rewards/margins": 0.006330497562885284,
"rewards/rejected": 0.0076115853153169155,
"step": 1920
},
{
"epoch": 1.99,
"learning_rate": 1.8637581324148487e-07,
"logits/chosen": -4.279542446136475,
"logits/rejected": -4.158401012420654,
"logps/chosen": -405.467529296875,
"logps/rejected": -329.11602783203125,
"loss": 0.6904,
"rewards/accuracies": 0.557812511920929,
"rewards/chosen": 0.018447261303663254,
"rewards/margins": 0.006302011664956808,
"rewards/rejected": 0.012145251035690308,
"step": 1930
},
{
"epoch": 2.0,
"eval_logits/chosen": -4.192010879516602,
"eval_logits/rejected": -4.082387447357178,
"eval_logps/chosen": -402.46429443359375,
"eval_logps/rejected": -315.65875244140625,
"eval_loss": 0.6883671879768372,
"eval_rewards/accuracies": 0.5569999814033508,
"eval_rewards/chosen": 0.019122228026390076,
"eval_rewards/margins": 0.010481986217200756,
"eval_rewards/rejected": 0.00864024180918932,
"eval_runtime": 765.0828,
"eval_samples_per_second": 2.614,
"eval_steps_per_second": 0.654,
"step": 1936
},
{
"epoch": 2.0,
"learning_rate": 1.8446230386528893e-07,
"logits/chosen": -4.2764363288879395,
"logits/rejected": -4.1841230392456055,
"logps/chosen": -391.84844970703125,
"logps/rejected": -318.74786376953125,
"loss": 0.6892,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": 0.017281491309404373,
"rewards/margins": 0.008870037272572517,
"rewards/rejected": 0.008411452174186707,
"step": 1940
},
{
"epoch": 2.01,
"learning_rate": 1.82548794489093e-07,
"logits/chosen": -4.290091514587402,
"logits/rejected": -4.141688346862793,
"logps/chosen": -413.09112548828125,
"logps/rejected": -315.75860595703125,
"loss": 0.6886,
"rewards/accuracies": 0.5484374761581421,
"rewards/chosen": 0.01853874884545803,
"rewards/margins": 0.010042714886367321,
"rewards/rejected": 0.008496033027768135,
"step": 1950
},
{
"epoch": 2.02,
"learning_rate": 1.8063528511289706e-07,
"logits/chosen": -4.286593437194824,
"logits/rejected": -4.1507697105407715,
"logps/chosen": -389.1523132324219,
"logps/rejected": -307.9405822753906,
"loss": 0.688,
"rewards/accuracies": 0.559374988079071,
"rewards/chosen": 0.018895253539085388,
"rewards/margins": 0.011058597825467587,
"rewards/rejected": 0.007836655713617802,
"step": 1960
},
{
"epoch": 2.03,
"learning_rate": 1.7872177573670112e-07,
"logits/chosen": -4.287877559661865,
"logits/rejected": -4.147292613983154,
"logps/chosen": -416.8997497558594,
"logps/rejected": -327.71551513671875,
"loss": 0.6886,
"rewards/accuracies": 0.5546875,
"rewards/chosen": 0.018623776733875275,
"rewards/margins": 0.009957761503756046,
"rewards/rejected": 0.008666014298796654,
"step": 1970
},
{
"epoch": 2.04,
"learning_rate": 1.7680826636050515e-07,
"logits/chosen": -4.2642436027526855,
"logits/rejected": -4.148723125457764,
"logps/chosen": -388.62054443359375,
"logps/rejected": -313.1318359375,
"loss": 0.6907,
"rewards/accuracies": 0.551562488079071,
"rewards/chosen": 0.01719742640852928,
"rewards/margins": 0.005715816281735897,
"rewards/rejected": 0.011481606401503086,
"step": 1980
},
{
"epoch": 2.06,
"learning_rate": 1.7489475698430921e-07,
"logits/chosen": -4.268927097320557,
"logits/rejected": -4.110062599182129,
"logps/chosen": -428.08935546875,
"logps/rejected": -315.6456604003906,
"loss": 0.6885,
"rewards/accuracies": 0.5625,
"rewards/chosen": 0.0186283178627491,
"rewards/margins": 0.010059954598546028,
"rewards/rejected": 0.008568364195525646,
"step": 1990
},
{
"epoch": 2.07,
"learning_rate": 1.7298124760811328e-07,
"logits/chosen": -4.257794380187988,
"logits/rejected": -4.151054859161377,
"logps/chosen": -390.84942626953125,
"logps/rejected": -318.6888732910156,
"loss": 0.6891,
"rewards/accuracies": 0.557812511920929,
"rewards/chosen": 0.016992371529340744,
"rewards/margins": 0.00896035972982645,
"rewards/rejected": 0.008032011799514294,
"step": 2000
},
{
"epoch": 2.08,
"learning_rate": 1.7106773823191734e-07,
"logits/chosen": -4.262537956237793,
"logits/rejected": -4.151637077331543,
"logps/chosen": -397.4742431640625,
"logps/rejected": -319.90289306640625,
"loss": 0.6891,
"rewards/accuracies": 0.5406249761581421,
"rewards/chosen": 0.017107700929045677,
"rewards/margins": 0.00891804601997137,
"rewards/rejected": 0.008189653977751732,
"step": 2010
},
{
"epoch": 2.09,
"learning_rate": 1.691542288557214e-07,
"logits/chosen": -4.258824348449707,
"logits/rejected": -4.141880989074707,
"logps/chosen": -393.25311279296875,
"logps/rejected": -307.5327453613281,
"loss": 0.6893,
"rewards/accuracies": 0.551562488079071,
"rewards/chosen": 0.01972380466759205,
"rewards/margins": 0.008552981540560722,
"rewards/rejected": 0.011170822195708752,
"step": 2020
},
{
"epoch": 2.1,
"learning_rate": 1.6724071947952544e-07,
"logits/chosen": -4.245623588562012,
"logits/rejected": -4.144326686859131,
"logps/chosen": -403.1796569824219,
"logps/rejected": -326.97894287109375,
"loss": 0.6911,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": 0.01679617539048195,
"rewards/margins": 0.004923067055642605,
"rewards/rejected": 0.011873109266161919,
"step": 2030
},
{
"epoch": 2.11,
"learning_rate": 1.653272101033295e-07,
"logits/chosen": -4.275112152099609,
"logits/rejected": -4.147688388824463,
"logps/chosen": -394.7803039550781,
"logps/rejected": -311.6099548339844,
"loss": 0.6887,
"rewards/accuracies": 0.557812511920929,
"rewards/chosen": 0.01747475564479828,
"rewards/margins": 0.00970934983342886,
"rewards/rejected": 0.00776540394872427,
"step": 2040
},
{
"epoch": 2.12,
"learning_rate": 1.6341370072713356e-07,
"logits/chosen": -4.274272918701172,
"logits/rejected": -4.157819747924805,
"logps/chosen": -378.563720703125,
"logps/rejected": -302.5975036621094,
"loss": 0.6892,
"rewards/accuracies": 0.5453125238418579,
"rewards/chosen": 0.01942756399512291,
"rewards/margins": 0.008909397758543491,
"rewards/rejected": 0.010518166236579418,
"step": 2050
},
{
"epoch": 2.13,
"learning_rate": 1.6150019135093762e-07,
"logits/chosen": -4.260178565979004,
"logits/rejected": -4.149471759796143,
"logps/chosen": -418.959716796875,
"logps/rejected": -332.4044494628906,
"loss": 0.6893,
"rewards/accuracies": 0.5484374761581421,
"rewards/chosen": 0.017765840515494347,
"rewards/margins": 0.008554233238101006,
"rewards/rejected": 0.009211607277393341,
"step": 2060
},
{
"epoch": 2.14,
"learning_rate": 1.5958668197474169e-07,
"logits/chosen": -4.2685041427612305,
"logits/rejected": -4.117525100708008,
"logps/chosen": -430.4039001464844,
"logps/rejected": -319.9500732421875,
"loss": 0.6873,
"rewards/accuracies": 0.578125,
"rewards/chosen": 0.022361256182193756,
"rewards/margins": 0.01272774301469326,
"rewards/rejected": 0.009633513167500496,
"step": 2070
},
{
"epoch": 2.15,
"learning_rate": 1.5767317259854572e-07,
"logits/chosen": -4.2741522789001465,
"logits/rejected": -4.18676233291626,
"logps/chosen": -379.95263671875,
"logps/rejected": -313.4046936035156,
"loss": 0.6891,
"rewards/accuracies": 0.557812511920929,
"rewards/chosen": 0.01609444059431553,
"rewards/margins": 0.009009727276861668,
"rewards/rejected": 0.007084711454808712,
"step": 2080
},
{
"epoch": 2.16,
"learning_rate": 1.5575966322234978e-07,
"logits/chosen": -4.279686450958252,
"logits/rejected": -4.155128479003906,
"logps/chosen": -416.2315368652344,
"logps/rejected": -321.23992919921875,
"loss": 0.6909,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": 0.01688700169324875,
"rewards/margins": 0.005368704441934824,
"rewards/rejected": 0.011518299579620361,
"step": 2090
},
{
"epoch": 2.17,
"learning_rate": 1.5384615384615385e-07,
"logits/chosen": -4.279124736785889,
"logits/rejected": -4.1183061599731445,
"logps/chosen": -428.96142578125,
"logps/rejected": -316.525146484375,
"loss": 0.6891,
"rewards/accuracies": 0.5546875,
"rewards/chosen": 0.019147472456097603,
"rewards/margins": 0.008967303670942783,
"rewards/rejected": 0.01018016878515482,
"step": 2100
},
{
"epoch": 2.18,
"learning_rate": 1.519326444699579e-07,
"logits/chosen": -4.268794059753418,
"logits/rejected": -4.155481815338135,
"logps/chosen": -411.06573486328125,
"logps/rejected": -329.96044921875,
"loss": 0.6888,
"rewards/accuracies": 0.534375011920929,
"rewards/chosen": 0.020965853706002235,
"rewards/margins": 0.009673124179244041,
"rewards/rejected": 0.011292731389403343,
"step": 2110
},
{
"epoch": 2.19,
"learning_rate": 1.5001913509376197e-07,
"logits/chosen": -4.276661396026611,
"logits/rejected": -4.141108512878418,
"logps/chosen": -391.82391357421875,
"logps/rejected": -302.3670349121094,
"loss": 0.6872,
"rewards/accuracies": 0.581250011920929,
"rewards/chosen": 0.02172374725341797,
"rewards/margins": 0.012819238007068634,
"rewards/rejected": 0.008904511108994484,
"step": 2120
},
{
"epoch": 2.2,
"learning_rate": 1.4810562571756603e-07,
"logits/chosen": -4.27203369140625,
"logits/rejected": -4.1426167488098145,
"logps/chosen": -420.7378845214844,
"logps/rejected": -312.4131774902344,
"loss": 0.689,
"rewards/accuracies": 0.578125,
"rewards/chosen": 0.018989499658346176,
"rewards/margins": 0.009152286686003208,
"rewards/rejected": 0.009837212972342968,
"step": 2130
},
{
"epoch": 2.21,
"learning_rate": 1.4619211634137007e-07,
"logits/chosen": -4.257569789886475,
"logits/rejected": -4.129474639892578,
"logps/chosen": -409.93609619140625,
"logps/rejected": -317.70989990234375,
"loss": 0.6886,
"rewards/accuracies": 0.5765625238418579,
"rewards/chosen": 0.019030291587114334,
"rewards/margins": 0.01004733331501484,
"rewards/rejected": 0.008982958272099495,
"step": 2140
},
{
"epoch": 2.22,
"learning_rate": 1.4427860696517413e-07,
"logits/chosen": -4.251282691955566,
"logits/rejected": -4.162935256958008,
"logps/chosen": -396.5443420410156,
"logps/rejected": -335.5440368652344,
"loss": 0.6887,
"rewards/accuracies": 0.5562499761581421,
"rewards/chosen": 0.020548541098833084,
"rewards/margins": 0.009832927957177162,
"rewards/rejected": 0.010715610347688198,
"step": 2150
},
{
"epoch": 2.23,
"learning_rate": 1.423650975889782e-07,
"logits/chosen": -4.25177526473999,
"logits/rejected": -4.121354579925537,
"logps/chosen": -406.2722473144531,
"logps/rejected": -319.5417175292969,
"loss": 0.6889,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": 0.022125694900751114,
"rewards/margins": 0.00926921982318163,
"rewards/rejected": 0.01285647600889206,
"step": 2160
},
{
"epoch": 2.24,
"learning_rate": 1.4045158821278225e-07,
"logits/chosen": -4.285470485687256,
"logits/rejected": -4.150871276855469,
"logps/chosen": -422.54071044921875,
"logps/rejected": -323.01458740234375,
"loss": 0.6883,
"rewards/accuracies": 0.559374988079071,
"rewards/chosen": 0.023092512041330338,
"rewards/margins": 0.010742807760834694,
"rewards/rejected": 0.012349705211818218,
"step": 2170
},
{
"epoch": 2.25,
"learning_rate": 1.3853807883658632e-07,
"logits/chosen": -4.2822346687316895,
"logits/rejected": -4.1521124839782715,
"logps/chosen": -414.7969665527344,
"logps/rejected": -330.4766540527344,
"loss": 0.6886,
"rewards/accuracies": 0.5531250238418579,
"rewards/chosen": 0.021155862137675285,
"rewards/margins": 0.010015945881605148,
"rewards/rejected": 0.011139917187392712,
"step": 2180
},
{
"epoch": 2.26,
"learning_rate": 1.3662456946039035e-07,
"logits/chosen": -4.249444961547852,
"logits/rejected": -4.152978897094727,
"logps/chosen": -389.48052978515625,
"logps/rejected": -311.3460693359375,
"loss": 0.6878,
"rewards/accuracies": 0.515625,
"rewards/chosen": 0.020100217312574387,
"rewards/margins": 0.01170959509909153,
"rewards/rejected": 0.008390624076128006,
"step": 2190
},
{
"epoch": 2.27,
"learning_rate": 1.3471106008419441e-07,
"logits/chosen": -4.282201290130615,
"logits/rejected": -4.174456596374512,
"logps/chosen": -396.2969665527344,
"logps/rejected": -308.55548095703125,
"loss": 0.6893,
"rewards/accuracies": 0.557812511920929,
"rewards/chosen": 0.020026249811053276,
"rewards/margins": 0.008474086411297321,
"rewards/rejected": 0.01155216433107853,
"step": 2200
},
{
"epoch": 2.28,
"learning_rate": 1.3279755070799848e-07,
"logits/chosen": -4.26694393157959,
"logits/rejected": -4.153203010559082,
"logps/chosen": -390.5587158203125,
"logps/rejected": -312.5738830566406,
"loss": 0.689,
"rewards/accuracies": 0.5406249761581421,
"rewards/chosen": 0.01699787937104702,
"rewards/margins": 0.009081227704882622,
"rewards/rejected": 0.007916653528809547,
"step": 2210
},
{
"epoch": 2.29,
"learning_rate": 1.3088404133180254e-07,
"logits/chosen": -4.26552677154541,
"logits/rejected": -4.145693778991699,
"logps/chosen": -413.443115234375,
"logps/rejected": -327.2570495605469,
"loss": 0.6887,
"rewards/accuracies": 0.5687500238418579,
"rewards/chosen": 0.022313930094242096,
"rewards/margins": 0.009759850800037384,
"rewards/rejected": 0.012554079294204712,
"step": 2220
},
{
"epoch": 2.3,
"learning_rate": 1.289705319556066e-07,
"logits/chosen": -4.264178276062012,
"logits/rejected": -4.172913551330566,
"logps/chosen": -407.66339111328125,
"logps/rejected": -321.8123779296875,
"loss": 0.6884,
"rewards/accuracies": 0.5562499761581421,
"rewards/chosen": 0.023416642099618912,
"rewards/margins": 0.010417203418910503,
"rewards/rejected": 0.012999439612030983,
"step": 2230
},
{
"epoch": 2.31,
"learning_rate": 1.2705702257941064e-07,
"logits/chosen": -4.250650405883789,
"logits/rejected": -4.140833854675293,
"logps/chosen": -374.83990478515625,
"logps/rejected": -302.1290588378906,
"loss": 0.6893,
"rewards/accuracies": 0.5484374761581421,
"rewards/chosen": 0.019031699746847153,
"rewards/margins": 0.008602599613368511,
"rewards/rejected": 0.010429101064801216,
"step": 2240
},
{
"epoch": 2.32,
"learning_rate": 1.251435132032147e-07,
"logits/chosen": -4.270743370056152,
"logits/rejected": -4.151588439941406,
"logps/chosen": -443.17791748046875,
"logps/rejected": -332.28302001953125,
"loss": 0.6913,
"rewards/accuracies": 0.535937488079071,
"rewards/chosen": 0.01945783570408821,
"rewards/margins": 0.004704002290964127,
"rewards/rejected": 0.014753831550478935,
"step": 2250
},
{
"epoch": 2.33,
"learning_rate": 1.2323000382701873e-07,
"logits/chosen": -4.275900840759277,
"logits/rejected": -4.1336822509765625,
"logps/chosen": -416.0301208496094,
"logps/rejected": -323.3976135253906,
"loss": 0.6874,
"rewards/accuracies": 0.5640624761581421,
"rewards/chosen": 0.021596388891339302,
"rewards/margins": 0.012395900674164295,
"rewards/rejected": 0.009200489148497581,
"step": 2260
},
{
"epoch": 2.34,
"learning_rate": 1.213164944508228e-07,
"logits/chosen": -4.26046085357666,
"logits/rejected": -4.138212203979492,
"logps/chosen": -397.1346740722656,
"logps/rejected": -319.06781005859375,
"loss": 0.689,
"rewards/accuracies": 0.565625011920929,
"rewards/chosen": 0.021542087197303772,
"rewards/margins": 0.009193692356348038,
"rewards/rejected": 0.012348394840955734,
"step": 2270
},
{
"epoch": 2.35,
"learning_rate": 1.1940298507462686e-07,
"logits/chosen": -4.280481815338135,
"logits/rejected": -4.179018974304199,
"logps/chosen": -407.4460754394531,
"logps/rejected": -328.47021484375,
"loss": 0.6879,
"rewards/accuracies": 0.5843750238418579,
"rewards/chosen": 0.01951112225651741,
"rewards/margins": 0.011365312151610851,
"rewards/rejected": 0.008145810104906559,
"step": 2280
},
{
"epoch": 2.37,
"learning_rate": 1.1748947569843092e-07,
"logits/chosen": -4.224648952484131,
"logits/rejected": -4.105835914611816,
"logps/chosen": -381.49658203125,
"logps/rejected": -303.1542663574219,
"loss": 0.6889,
"rewards/accuracies": 0.5625,
"rewards/chosen": 0.020115623250603676,
"rewards/margins": 0.009458022192120552,
"rewards/rejected": 0.010657599195837975,
"step": 2290
},
{
"epoch": 2.38,
"learning_rate": 1.1557596632223497e-07,
"logits/chosen": -4.270878791809082,
"logits/rejected": -4.1364850997924805,
"logps/chosen": -407.75909423828125,
"logps/rejected": -300.38336181640625,
"loss": 0.6885,
"rewards/accuracies": 0.5484374761581421,
"rewards/chosen": 0.019836071878671646,
"rewards/margins": 0.010231700725853443,
"rewards/rejected": 0.009604370221495628,
"step": 2300
},
{
"epoch": 2.39,
"learning_rate": 1.1366245694603903e-07,
"logits/chosen": -4.28275203704834,
"logits/rejected": -4.161673545837402,
"logps/chosen": -389.09051513671875,
"logps/rejected": -298.67401123046875,
"loss": 0.6881,
"rewards/accuracies": 0.5703125,
"rewards/chosen": 0.022033553570508957,
"rewards/margins": 0.010990725830197334,
"rewards/rejected": 0.011042827740311623,
"step": 2310
},
{
"epoch": 2.4,
"learning_rate": 1.1174894756984308e-07,
"logits/chosen": -4.2564802169799805,
"logits/rejected": -4.108375549316406,
"logps/chosen": -383.16119384765625,
"logps/rejected": -286.8339538574219,
"loss": 0.6897,
"rewards/accuracies": 0.534375011920929,
"rewards/chosen": 0.019884765148162842,
"rewards/margins": 0.007881352677941322,
"rewards/rejected": 0.012003413401544094,
"step": 2320
},
{
"epoch": 2.41,
"learning_rate": 1.0983543819364714e-07,
"logits/chosen": -4.2984795570373535,
"logits/rejected": -4.14174747467041,
"logps/chosen": -417.61273193359375,
"logps/rejected": -321.7851867675781,
"loss": 0.687,
"rewards/accuracies": 0.5718749761581421,
"rewards/chosen": 0.023209992796182632,
"rewards/margins": 0.013400438241660595,
"rewards/rejected": 0.009809553623199463,
"step": 2330
},
{
"epoch": 2.42,
"learning_rate": 1.079219288174512e-07,
"logits/chosen": -4.233872413635254,
"logits/rejected": -4.129950046539307,
"logps/chosen": -393.71453857421875,
"logps/rejected": -327.7266845703125,
"loss": 0.6903,
"rewards/accuracies": 0.526562511920929,
"rewards/chosen": 0.017995553091168404,
"rewards/margins": 0.006606035865843296,
"rewards/rejected": 0.011389517225325108,
"step": 2340
},
{
"epoch": 2.43,
"learning_rate": 1.0600841944125525e-07,
"logits/chosen": -4.279402732849121,
"logits/rejected": -4.134713172912598,
"logps/chosen": -390.8346862792969,
"logps/rejected": -306.3309020996094,
"loss": 0.6891,
"rewards/accuracies": 0.551562488079071,
"rewards/chosen": 0.02169058658182621,
"rewards/margins": 0.008909964933991432,
"rewards/rejected": 0.012780621647834778,
"step": 2350
},
{
"epoch": 2.44,
"learning_rate": 1.0409491006505931e-07,
"logits/chosen": -4.276646614074707,
"logits/rejected": -4.130280017852783,
"logps/chosen": -404.8982849121094,
"logps/rejected": -310.2183532714844,
"loss": 0.6874,
"rewards/accuracies": 0.5921875238418579,
"rewards/chosen": 0.022115709260106087,
"rewards/margins": 0.012477119453251362,
"rewards/rejected": 0.00963858887553215,
"step": 2360
},
{
"epoch": 2.45,
"learning_rate": 1.0218140068886336e-07,
"logits/chosen": -4.257068634033203,
"logits/rejected": -4.119657516479492,
"logps/chosen": -405.00750732421875,
"logps/rejected": -309.30023193359375,
"loss": 0.6898,
"rewards/accuracies": 0.5562499761581421,
"rewards/chosen": 0.021267231553792953,
"rewards/margins": 0.007724496070295572,
"rewards/rejected": 0.013542735949158669,
"step": 2370
},
{
"epoch": 2.46,
"learning_rate": 1.0026789131266743e-07,
"logits/chosen": -4.301741600036621,
"logits/rejected": -4.1736650466918945,
"logps/chosen": -388.27923583984375,
"logps/rejected": -308.6962890625,
"loss": 0.6892,
"rewards/accuracies": 0.543749988079071,
"rewards/chosen": 0.01904493011534214,
"rewards/margins": 0.008867397904396057,
"rewards/rejected": 0.010177532210946083,
"step": 2380
},
{
"epoch": 2.47,
"learning_rate": 9.835438193647149e-08,
"logits/chosen": -4.262604236602783,
"logits/rejected": -4.160672187805176,
"logps/chosen": -396.7488708496094,
"logps/rejected": -320.4295654296875,
"loss": 0.6852,
"rewards/accuracies": 0.609375,
"rewards/chosen": 0.027415934950113297,
"rewards/margins": 0.016748551279306412,
"rewards/rejected": 0.010667381808161736,
"step": 2390
},
{
"epoch": 2.48,
"learning_rate": 9.644087256027554e-08,
"logits/chosen": -4.258942604064941,
"logits/rejected": -4.159635543823242,
"logps/chosen": -410.4764099121094,
"logps/rejected": -334.953369140625,
"loss": 0.6895,
"rewards/accuracies": 0.5687500238418579,
"rewards/chosen": 0.021485131233930588,
"rewards/margins": 0.00812376569956541,
"rewards/rejected": 0.013361366465687752,
"step": 2400
},
{
"epoch": 2.49,
"learning_rate": 9.45273631840796e-08,
"logits/chosen": -4.264720916748047,
"logits/rejected": -4.117271900177002,
"logps/chosen": -385.35052490234375,
"logps/rejected": -290.49114990234375,
"loss": 0.6894,
"rewards/accuracies": 0.565625011920929,
"rewards/chosen": 0.018992017954587936,
"rewards/margins": 0.008459472097456455,
"rewards/rejected": 0.01053254771977663,
"step": 2410
},
{
"epoch": 2.5,
"learning_rate": 9.261385380788366e-08,
"logits/chosen": -4.276088714599609,
"logits/rejected": -4.1584858894348145,
"logps/chosen": -397.9452209472656,
"logps/rejected": -306.89739990234375,
"loss": 0.6881,
"rewards/accuracies": 0.5609375238418579,
"rewards/chosen": 0.020109858363866806,
"rewards/margins": 0.011120992712676525,
"rewards/rejected": 0.008988862857222557,
"step": 2420
},
{
"epoch": 2.51,
"learning_rate": 9.070034443168771e-08,
"logits/chosen": -4.260004997253418,
"logits/rejected": -4.13455867767334,
"logps/chosen": -389.76397705078125,
"logps/rejected": -304.0987548828125,
"loss": 0.6871,
"rewards/accuracies": 0.589062511920929,
"rewards/chosen": 0.021830763667821884,
"rewards/margins": 0.013007350265979767,
"rewards/rejected": 0.008823414333164692,
"step": 2430
},
{
"epoch": 2.52,
"learning_rate": 8.878683505549177e-08,
"logits/chosen": -4.2787675857543945,
"logits/rejected": -4.1580352783203125,
"logps/chosen": -410.0357971191406,
"logps/rejected": -329.86334228515625,
"loss": 0.6893,
"rewards/accuracies": 0.5625,
"rewards/chosen": 0.02292051911354065,
"rewards/margins": 0.008661197498440742,
"rewards/rejected": 0.014259323477745056,
"step": 2440
},
{
"epoch": 2.53,
"learning_rate": 8.687332567929582e-08,
"logits/chosen": -4.27437686920166,
"logits/rejected": -4.147732734680176,
"logps/chosen": -401.8902587890625,
"logps/rejected": -296.3741760253906,
"loss": 0.6876,
"rewards/accuracies": 0.5796874761581421,
"rewards/chosen": 0.024159640073776245,
"rewards/margins": 0.01200934313237667,
"rewards/rejected": 0.012150297872722149,
"step": 2450
},
{
"epoch": 2.54,
"learning_rate": 8.495981630309988e-08,
"logits/chosen": -4.271141529083252,
"logits/rejected": -4.141169548034668,
"logps/chosen": -401.7086181640625,
"logps/rejected": -319.8025817871094,
"loss": 0.6875,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": 0.023453358560800552,
"rewards/margins": 0.01219714991748333,
"rewards/rejected": 0.011256209574639797,
"step": 2460
},
{
"epoch": 2.55,
"learning_rate": 8.304630692690395e-08,
"logits/chosen": -4.2775139808654785,
"logits/rejected": -4.150107383728027,
"logps/chosen": -426.84576416015625,
"logps/rejected": -320.8899230957031,
"loss": 0.6879,
"rewards/accuracies": 0.559374988079071,
"rewards/chosen": 0.02233419567346573,
"rewards/margins": 0.011261718347668648,
"rewards/rejected": 0.011072477325797081,
"step": 2470
},
{
"epoch": 2.56,
"learning_rate": 8.1132797550708e-08,
"logits/chosen": -4.264378547668457,
"logits/rejected": -4.172327995300293,
"logps/chosen": -404.49420166015625,
"logps/rejected": -316.01739501953125,
"loss": 0.6862,
"rewards/accuracies": 0.5765625238418579,
"rewards/chosen": 0.02468792162835598,
"rewards/margins": 0.014898866415023804,
"rewards/rejected": 0.009789055213332176,
"step": 2480
},
{
"epoch": 2.57,
"learning_rate": 7.921928817451206e-08,
"logits/chosen": -4.274040222167969,
"logits/rejected": -4.133544921875,
"logps/chosen": -412.9867248535156,
"logps/rejected": -303.045166015625,
"loss": 0.6881,
"rewards/accuracies": 0.5609375238418579,
"rewards/chosen": 0.024420084431767464,
"rewards/margins": 0.011113069020211697,
"rewards/rejected": 0.013307017274200916,
"step": 2490
},
{
"epoch": 2.58,
"learning_rate": 7.73057787983161e-08,
"logits/chosen": -4.268971920013428,
"logits/rejected": -4.1228179931640625,
"logps/chosen": -412.2561950683594,
"logps/rejected": -311.38623046875,
"loss": 0.6879,
"rewards/accuracies": 0.5625,
"rewards/chosen": 0.02301056683063507,
"rewards/margins": 0.011502142064273357,
"rewards/rejected": 0.011508422903716564,
"step": 2500
},
{
"epoch": 2.59,
"learning_rate": 7.539226942212017e-08,
"logits/chosen": -4.249145030975342,
"logits/rejected": -4.131203651428223,
"logps/chosen": -407.7536926269531,
"logps/rejected": -336.33172607421875,
"loss": 0.6889,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": 0.024292880669236183,
"rewards/margins": 0.009482759051024914,
"rewards/rejected": 0.01481011975556612,
"step": 2510
},
{
"epoch": 2.6,
"learning_rate": 7.347876004592423e-08,
"logits/chosen": -4.284695625305176,
"logits/rejected": -4.161882400512695,
"logps/chosen": -408.2262878417969,
"logps/rejected": -316.01239013671875,
"loss": 0.689,
"rewards/accuracies": 0.565625011920929,
"rewards/chosen": 0.020047323778271675,
"rewards/margins": 0.009101735427975655,
"rewards/rejected": 0.01094558835029602,
"step": 2520
},
{
"epoch": 2.61,
"learning_rate": 7.156525066972828e-08,
"logits/chosen": -4.280055522918701,
"logits/rejected": -4.136964321136475,
"logps/chosen": -430.8373107910156,
"logps/rejected": -321.91973876953125,
"loss": 0.6871,
"rewards/accuracies": 0.565625011920929,
"rewards/chosen": 0.024292441084980965,
"rewards/margins": 0.013177357614040375,
"rewards/rejected": 0.01111508347094059,
"step": 2530
},
{
"epoch": 2.62,
"learning_rate": 6.965174129353234e-08,
"logits/chosen": -4.265179634094238,
"logits/rejected": -4.146527290344238,
"logps/chosen": -391.67169189453125,
"logps/rejected": -320.75372314453125,
"loss": 0.6881,
"rewards/accuracies": 0.5687500238418579,
"rewards/chosen": 0.02059927210211754,
"rewards/margins": 0.011022168211638927,
"rewards/rejected": 0.009577102959156036,
"step": 2540
},
{
"epoch": 2.63,
"learning_rate": 6.773823191733639e-08,
"logits/chosen": -4.287339210510254,
"logits/rejected": -4.139233112335205,
"logps/chosen": -410.2044372558594,
"logps/rejected": -304.69049072265625,
"loss": 0.6868,
"rewards/accuracies": 0.596875011920929,
"rewards/chosen": 0.024769442155957222,
"rewards/margins": 0.013655883260071278,
"rewards/rejected": 0.011113559827208519,
"step": 2550
},
{
"epoch": 2.64,
"learning_rate": 6.582472254114045e-08,
"logits/chosen": -4.297701835632324,
"logits/rejected": -4.167489051818848,
"logps/chosen": -416.37469482421875,
"logps/rejected": -326.1492614746094,
"loss": 0.6871,
"rewards/accuracies": 0.596875011920929,
"rewards/chosen": 0.023087535053491592,
"rewards/margins": 0.013127269223332405,
"rewards/rejected": 0.009960266761481762,
"step": 2560
},
{
"epoch": 2.65,
"learning_rate": 6.391121316494451e-08,
"logits/chosen": -4.28665828704834,
"logits/rejected": -4.161301612854004,
"logps/chosen": -382.5966796875,
"logps/rejected": -317.6145935058594,
"loss": 0.6885,
"rewards/accuracies": 0.565625011920929,
"rewards/chosen": 0.021074790507555008,
"rewards/margins": 0.010219180956482887,
"rewards/rejected": 0.010855610482394695,
"step": 2570
},
{
"epoch": 2.66,
"learning_rate": 6.199770378874856e-08,
"logits/chosen": -4.263566017150879,
"logits/rejected": -4.153146266937256,
"logps/chosen": -402.6602478027344,
"logps/rejected": -314.2333068847656,
"loss": 0.6873,
"rewards/accuracies": 0.567187488079071,
"rewards/chosen": 0.0247223861515522,
"rewards/margins": 0.01265608798712492,
"rewards/rejected": 0.012066296301782131,
"step": 2580
},
{
"epoch": 2.68,
"learning_rate": 6.008419441255262e-08,
"logits/chosen": -4.286923885345459,
"logits/rejected": -4.174257278442383,
"logps/chosen": -401.7779541015625,
"logps/rejected": -322.3847961425781,
"loss": 0.6885,
"rewards/accuracies": 0.5765625238418579,
"rewards/chosen": 0.024542078375816345,
"rewards/margins": 0.01016208902001381,
"rewards/rejected": 0.014379991218447685,
"step": 2590
},
{
"epoch": 2.69,
"learning_rate": 5.817068503635668e-08,
"logits/chosen": -4.264492988586426,
"logits/rejected": -4.1116790771484375,
"logps/chosen": -414.8555603027344,
"logps/rejected": -294.9403381347656,
"loss": 0.6862,
"rewards/accuracies": 0.589062511920929,
"rewards/chosen": 0.0254591666162014,
"rewards/margins": 0.014859716407954693,
"rewards/rejected": 0.010599448345601559,
"step": 2600
},
{
"epoch": 2.7,
"learning_rate": 5.6257175660160735e-08,
"logits/chosen": -4.258959770202637,
"logits/rejected": -4.129164695739746,
"logps/chosen": -404.98980712890625,
"logps/rejected": -310.5729675292969,
"loss": 0.6885,
"rewards/accuracies": 0.565625011920929,
"rewards/chosen": 0.021537696942687035,
"rewards/margins": 0.010314391925930977,
"rewards/rejected": 0.011223304085433483,
"step": 2610
},
{
"epoch": 2.71,
"learning_rate": 5.4343666283964784e-08,
"logits/chosen": -4.262757301330566,
"logits/rejected": -4.128348350524902,
"logps/chosen": -396.00445556640625,
"logps/rejected": -306.8481140136719,
"loss": 0.6865,
"rewards/accuracies": 0.573437511920929,
"rewards/chosen": 0.02303687483072281,
"rewards/margins": 0.014214645139873028,
"rewards/rejected": 0.008822232484817505,
"step": 2620
},
{
"epoch": 2.72,
"learning_rate": 5.243015690776884e-08,
"logits/chosen": -4.266745567321777,
"logits/rejected": -4.137091159820557,
"logps/chosen": -402.24993896484375,
"logps/rejected": -299.17864990234375,
"loss": 0.6885,
"rewards/accuracies": 0.5687500238418579,
"rewards/chosen": 0.023752233013510704,
"rewards/margins": 0.010097989812493324,
"rewards/rejected": 0.01365424133837223,
"step": 2630
},
{
"epoch": 2.73,
"learning_rate": 5.05166475315729e-08,
"logits/chosen": -4.23615026473999,
"logits/rejected": -4.157704830169678,
"logps/chosen": -375.03277587890625,
"logps/rejected": -310.47540283203125,
"loss": 0.6869,
"rewards/accuracies": 0.565625011920929,
"rewards/chosen": 0.024319518357515335,
"rewards/margins": 0.013446244411170483,
"rewards/rejected": 0.010873274877667427,
"step": 2640
},
{
"epoch": 2.74,
"learning_rate": 4.860313815537696e-08,
"logits/chosen": -4.296034812927246,
"logits/rejected": -4.151383399963379,
"logps/chosen": -410.50146484375,
"logps/rejected": -302.6440734863281,
"loss": 0.6882,
"rewards/accuracies": 0.5625,
"rewards/chosen": 0.02242346480488777,
"rewards/margins": 0.010714459232985973,
"rewards/rejected": 0.011709003709256649,
"step": 2650
},
{
"epoch": 2.75,
"learning_rate": 4.668962877918101e-08,
"logits/chosen": -4.266396522521973,
"logits/rejected": -4.138249397277832,
"logps/chosen": -404.47039794921875,
"logps/rejected": -302.8034362792969,
"loss": 0.6876,
"rewards/accuracies": 0.581250011920929,
"rewards/chosen": 0.024602141231298447,
"rewards/margins": 0.011944174766540527,
"rewards/rejected": 0.012657967396080494,
"step": 2660
},
{
"epoch": 2.76,
"learning_rate": 4.477611940298507e-08,
"logits/chosen": -4.252145767211914,
"logits/rejected": -4.149975776672363,
"logps/chosen": -393.45355224609375,
"logps/rejected": -314.1678771972656,
"loss": 0.6886,
"rewards/accuracies": 0.567187488079071,
"rewards/chosen": 0.02430255338549614,
"rewards/margins": 0.01004251278936863,
"rewards/rejected": 0.014260041527450085,
"step": 2670
},
{
"epoch": 2.77,
"learning_rate": 4.2862610026789124e-08,
"logits/chosen": -4.265524864196777,
"logits/rejected": -4.1408796310424805,
"logps/chosen": -405.55511474609375,
"logps/rejected": -310.058837890625,
"loss": 0.6879,
"rewards/accuracies": 0.5625,
"rewards/chosen": 0.021910104900598526,
"rewards/margins": 0.011513815261423588,
"rewards/rejected": 0.010396288707852364,
"step": 2680
},
{
"epoch": 2.78,
"learning_rate": 4.0949100650593186e-08,
"logits/chosen": -4.2873334884643555,
"logits/rejected": -4.150042533874512,
"logps/chosen": -411.6018981933594,
"logps/rejected": -312.466552734375,
"loss": 0.6872,
"rewards/accuracies": 0.5625,
"rewards/chosen": 0.023441951721906662,
"rewards/margins": 0.01284896582365036,
"rewards/rejected": 0.010592986829578876,
"step": 2690
},
{
"epoch": 2.79,
"learning_rate": 3.903559127439724e-08,
"logits/chosen": -4.27925968170166,
"logits/rejected": -4.131691932678223,
"logps/chosen": -414.133056640625,
"logps/rejected": -314.72442626953125,
"loss": 0.6894,
"rewards/accuracies": 0.551562488079071,
"rewards/chosen": 0.02151947282254696,
"rewards/margins": 0.00846365001052618,
"rewards/rejected": 0.013055823743343353,
"step": 2700
},
{
"epoch": 2.8,
"learning_rate": 3.71220818982013e-08,
"logits/chosen": -4.298044681549072,
"logits/rejected": -4.16172981262207,
"logps/chosen": -410.84063720703125,
"logps/rejected": -313.7170104980469,
"loss": 0.6917,
"rewards/accuracies": 0.5015624761581421,
"rewards/chosen": 0.01963016204535961,
"rewards/margins": 0.0036624562926590443,
"rewards/rejected": 0.015967708081007004,
"step": 2710
},
{
"epoch": 2.81,
"learning_rate": 3.520857252200535e-08,
"logits/chosen": -4.293381690979004,
"logits/rejected": -4.154776573181152,
"logps/chosen": -410.7498474121094,
"logps/rejected": -313.6238708496094,
"loss": 0.6862,
"rewards/accuracies": 0.59375,
"rewards/chosen": 0.023770933970808983,
"rewards/margins": 0.014910402707755566,
"rewards/rejected": 0.008860534057021141,
"step": 2720
},
{
"epoch": 2.82,
"learning_rate": 3.3295063145809414e-08,
"logits/chosen": -4.240599632263184,
"logits/rejected": -4.130780220031738,
"logps/chosen": -411.70074462890625,
"logps/rejected": -340.7054443359375,
"loss": 0.6901,
"rewards/accuracies": 0.5625,
"rewards/chosen": 0.023142261430621147,
"rewards/margins": 0.007068459875881672,
"rewards/rejected": 0.0160738043487072,
"step": 2730
},
{
"epoch": 2.83,
"learning_rate": 3.138155376961347e-08,
"logits/chosen": -4.274569511413574,
"logits/rejected": -4.144467830657959,
"logps/chosen": -390.971923828125,
"logps/rejected": -306.0947265625,
"loss": 0.6885,
"rewards/accuracies": 0.5703125,
"rewards/chosen": 0.02243395894765854,
"rewards/margins": 0.010051446035504341,
"rewards/rejected": 0.012382512912154198,
"step": 2740
},
{
"epoch": 2.84,
"learning_rate": 2.9468044393417525e-08,
"logits/chosen": -4.260807037353516,
"logits/rejected": -4.1508378982543945,
"logps/chosen": -390.257080078125,
"logps/rejected": -308.8485107421875,
"loss": 0.6894,
"rewards/accuracies": 0.559374988079071,
"rewards/chosen": 0.02236488275229931,
"rewards/margins": 0.008465753868222237,
"rewards/rejected": 0.013899129815399647,
"step": 2750
},
{
"epoch": 2.85,
"learning_rate": 2.755453501722158e-08,
"logits/chosen": -4.267168998718262,
"logits/rejected": -4.142585277557373,
"logps/chosen": -400.91070556640625,
"logps/rejected": -305.3719177246094,
"loss": 0.6891,
"rewards/accuracies": 0.546875,
"rewards/chosen": 0.022190194576978683,
"rewards/margins": 0.009010069072246552,
"rewards/rejected": 0.013180124573409557,
"step": 2760
},
{
"epoch": 2.86,
"learning_rate": 2.564102564102564e-08,
"logits/chosen": -4.2551798820495605,
"logits/rejected": -4.179601669311523,
"logps/chosen": -403.3457946777344,
"logps/rejected": -337.92742919921875,
"loss": 0.6875,
"rewards/accuracies": 0.567187488079071,
"rewards/chosen": 0.02564335986971855,
"rewards/margins": 0.012334323488175869,
"rewards/rejected": 0.013309036381542683,
"step": 2770
},
{
"epoch": 2.87,
"learning_rate": 2.3727516264829695e-08,
"logits/chosen": -4.273959159851074,
"logits/rejected": -4.142548084259033,
"logps/chosen": -399.456787109375,
"logps/rejected": -308.5451965332031,
"loss": 0.6888,
"rewards/accuracies": 0.5531250238418579,
"rewards/chosen": 0.02145221456885338,
"rewards/margins": 0.009570146910846233,
"rewards/rejected": 0.01188206858932972,
"step": 2780
},
{
"epoch": 2.88,
"learning_rate": 2.1814006888633754e-08,
"logits/chosen": -4.258917808532715,
"logits/rejected": -4.150449275970459,
"logps/chosen": -403.0406494140625,
"logps/rejected": -322.05072021484375,
"loss": 0.6899,
"rewards/accuracies": 0.565625011920929,
"rewards/chosen": 0.024062659591436386,
"rewards/margins": 0.0073325140401721,
"rewards/rejected": 0.01673014461994171,
"step": 2790
},
{
"epoch": 2.89,
"learning_rate": 1.990049751243781e-08,
"logits/chosen": -4.2558393478393555,
"logits/rejected": -4.121561527252197,
"logps/chosen": -406.07708740234375,
"logps/rejected": -318.92083740234375,
"loss": 0.6855,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": 0.026952465996146202,
"rewards/margins": 0.0162807684391737,
"rewards/rejected": 0.010671699419617653,
"step": 2800
},
{
"epoch": 2.9,
"learning_rate": 1.7986988136241865e-08,
"logits/chosen": -4.284726142883301,
"logits/rejected": -4.145880222320557,
"logps/chosen": -407.32513427734375,
"logps/rejected": -315.43463134765625,
"loss": 0.6879,
"rewards/accuracies": 0.5796874761581421,
"rewards/chosen": 0.025830427184700966,
"rewards/margins": 0.01146793458610773,
"rewards/rejected": 0.01436249352991581,
"step": 2810
},
{
"epoch": 2.91,
"learning_rate": 1.6073478760045924e-08,
"logits/chosen": -4.281344413757324,
"logits/rejected": -4.137038230895996,
"logps/chosen": -425.17877197265625,
"logps/rejected": -322.626953125,
"loss": 0.6874,
"rewards/accuracies": 0.5687500238418579,
"rewards/chosen": 0.024241294711828232,
"rewards/margins": 0.012439909391105175,
"rewards/rejected": 0.011801382526755333,
"step": 2820
},
{
"epoch": 2.92,
"learning_rate": 1.4159969383849981e-08,
"logits/chosen": -4.255224227905273,
"logits/rejected": -4.1160664558410645,
"logps/chosen": -417.343017578125,
"logps/rejected": -317.6444091796875,
"loss": 0.6898,
"rewards/accuracies": 0.5703125,
"rewards/chosen": 0.020717119798064232,
"rewards/margins": 0.00756122637540102,
"rewards/rejected": 0.013155892491340637,
"step": 2830
},
{
"epoch": 2.93,
"learning_rate": 1.2246460007654037e-08,
"logits/chosen": -4.262511730194092,
"logits/rejected": -4.158580780029297,
"logps/chosen": -414.7383728027344,
"logps/rejected": -330.22784423828125,
"loss": 0.6888,
"rewards/accuracies": 0.5562499761581421,
"rewards/chosen": 0.026073377579450607,
"rewards/margins": 0.00955992005765438,
"rewards/rejected": 0.016513461247086525,
"step": 2840
},
{
"epoch": 2.94,
"learning_rate": 1.0332950631458094e-08,
"logits/chosen": -4.292693138122559,
"logits/rejected": -4.152438163757324,
"logps/chosen": -409.6170349121094,
"logps/rejected": -313.6919860839844,
"loss": 0.6871,
"rewards/accuracies": 0.551562488079071,
"rewards/chosen": 0.027544280514121056,
"rewards/margins": 0.013068397529423237,
"rewards/rejected": 0.014475886709988117,
"step": 2850
},
{
"epoch": 2.95,
"learning_rate": 8.419441255262151e-09,
"logits/chosen": -4.245689392089844,
"logits/rejected": -4.140617847442627,
"logps/chosen": -390.8550109863281,
"logps/rejected": -315.28570556640625,
"loss": 0.6892,
"rewards/accuracies": 0.535937488079071,
"rewards/chosen": 0.022948402911424637,
"rewards/margins": 0.008875529281795025,
"rewards/rejected": 0.014072870835661888,
"step": 2860
},
{
"epoch": 2.96,
"learning_rate": 6.505931879066207e-09,
"logits/chosen": -4.273464202880859,
"logits/rejected": -4.1207990646362305,
"logps/chosen": -421.85577392578125,
"logps/rejected": -319.6473083496094,
"loss": 0.6868,
"rewards/accuracies": 0.5625,
"rewards/chosen": 0.025004684925079346,
"rewards/margins": 0.013815673068165779,
"rewards/rejected": 0.011189011856913567,
"step": 2870
},
{
"epoch": 2.97,
"learning_rate": 4.592422502870264e-09,
"logits/chosen": -4.305132865905762,
"logits/rejected": -4.1475958824157715,
"logps/chosen": -421.42529296875,
"logps/rejected": -318.45184326171875,
"loss": 0.6882,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": 0.024396046996116638,
"rewards/margins": 0.010859435424208641,
"rewards/rejected": 0.013536609709262848,
"step": 2880
},
{
"epoch": 2.98,
"learning_rate": 2.6789131266743202e-09,
"logits/chosen": -4.286343097686768,
"logits/rejected": -4.121587753295898,
"logps/chosen": -385.7117614746094,
"logps/rejected": -282.57080078125,
"loss": 0.6881,
"rewards/accuracies": 0.5625,
"rewards/chosen": 0.022473538294434547,
"rewards/margins": 0.011112211272120476,
"rewards/rejected": 0.011361326090991497,
"step": 2890
},
{
"epoch": 3.0,
"learning_rate": 7.654037504783773e-10,
"logits/chosen": -4.263542652130127,
"logits/rejected": -4.148170471191406,
"logps/chosen": -402.5519104003906,
"logps/rejected": -317.2384338378906,
"loss": 0.6876,
"rewards/accuracies": 0.578125,
"rewards/chosen": 0.027037670835852623,
"rewards/margins": 0.01205758098512888,
"rewards/rejected": 0.014980090782046318,
"step": 2900
},
{
"epoch": 3.0,
"eval_logits/chosen": -4.191605567932129,
"eval_logits/rejected": -4.081777572631836,
"eval_logps/chosen": -402.4017333984375,
"eval_logps/rejected": -315.6105651855469,
"eval_loss": 0.6876626014709473,
"eval_rewards/accuracies": 0.5644999742507935,
"eval_rewards/chosen": 0.025381002575159073,
"eval_rewards/margins": 0.011920945718884468,
"eval_rewards/rejected": 0.013460054062306881,
"eval_runtime": 776.0859,
"eval_samples_per_second": 2.577,
"eval_steps_per_second": 0.644,
"step": 2904
},
{
"epoch": 3.0,
"step": 2904,
"total_flos": 0.0,
"train_loss": 0.6907179896044994,
"train_runtime": 111372.3355,
"train_samples_per_second": 1.669,
"train_steps_per_second": 0.026
}
],
"logging_steps": 10,
"max_steps": 2904,
"num_train_epochs": 3,
"save_steps": 500,
"total_flos": 0.0,
"trial_name": null,
"trial_params": null
}