{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9995965030262273, "eval_steps": 500, "global_step": 1858, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 2.6881720430107528e-09, "logits/chosen": -2.5808520317077637, "logits/rejected": -2.0101242065429688, "logps/chosen": -299.3489990234375, "logps/rejected": -186.63014221191406, "loss": 1.2656, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "rewards/safe_rewards": 0.0, "rewards/unsafe_rewards": 0.0, "step": 1 }, { "epoch": 0.01, "learning_rate": 2.6881720430107527e-08, "logits/chosen": -2.38761043548584, "logits/rejected": -2.2287850379943848, "logps/chosen": -201.83148193359375, "logps/rejected": -189.46726989746094, "loss": 1.4296, "rewards/accuracies": 0.4305555522441864, "rewards/chosen": 2.8226104404893704e-05, "rewards/margins": -9.960395254893228e-05, "rewards/rejected": 0.00012783010606653988, "rewards/safe_rewards": -0.0001673989463597536, "rewards/unsafe_rewards": 0.0002238511951873079, "step": 10 }, { "epoch": 0.01, "learning_rate": 5.3763440860215054e-08, "logits/chosen": -2.3484911918640137, "logits/rejected": -2.053339719772339, "logps/chosen": -226.3044891357422, "logps/rejected": -181.17330932617188, "loss": 1.463, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -4.27155100624077e-05, "rewards/margins": 3.7895108562224777e-06, "rewards/rejected": -4.650496339309029e-05, "rewards/safe_rewards": -0.0004773393739014864, "rewards/unsafe_rewards": 0.0003919084556400776, "step": 20 }, { "epoch": 0.02, "learning_rate": 8.064516129032257e-08, "logits/chosen": -2.3405332565307617, "logits/rejected": -2.145922899246216, "logps/chosen": -215.05410766601562, "logps/rejected": -189.3188018798828, "loss": 1.431, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": 3.503418338368647e-05, "rewards/margins": 0.0005787784466519952, "rewards/rejected": -0.0005437443032860756, "rewards/safe_rewards": -0.00011984705633949488, "rewards/unsafe_rewards": 0.00018991540127899498, "step": 30 }, { "epoch": 0.02, "learning_rate": 1.0752688172043011e-07, "logits/chosen": -2.2765462398529053, "logits/rejected": -1.974180817604065, "logps/chosen": -180.71937561035156, "logps/rejected": -173.9296417236328, "loss": 1.4304, "rewards/accuracies": 0.65625, "rewards/chosen": 0.00015593590796925128, "rewards/margins": 0.0017944574356079102, "rewards/rejected": -0.0016385214403271675, "rewards/safe_rewards": 0.00035788281820714474, "rewards/unsafe_rewards": -4.6010944060981274e-05, "step": 40 }, { "epoch": 0.03, "learning_rate": 1.3440860215053762e-07, "logits/chosen": -2.403860569000244, "logits/rejected": -2.0332884788513184, "logps/chosen": -209.592529296875, "logps/rejected": -167.6835174560547, "loss": 1.4344, "rewards/accuracies": 0.606249988079071, "rewards/chosen": 0.0008560878923162818, "rewards/margins": 0.004450940527021885, "rewards/rejected": -0.0035948525182902813, "rewards/safe_rewards": 0.00018845750309992582, "rewards/unsafe_rewards": 0.0015237184707075357, "step": 50 }, { "epoch": 0.03, "learning_rate": 1.6129032258064515e-07, "logits/chosen": -2.330204486846924, "logits/rejected": -2.1555492877960205, "logps/chosen": -185.8196563720703, "logps/rejected": -185.08883666992188, "loss": 1.4264, "rewards/accuracies": 0.5625, "rewards/chosen": -0.0013676225207746029, "rewards/margins": 0.0021811036858707666, "rewards/rejected": -0.003548726439476013, "rewards/safe_rewards": -0.000750910839997232, "rewards/unsafe_rewards": -0.001984334085136652, "step": 60 }, { "epoch": 0.04, "learning_rate": 1.8817204301075268e-07, "logits/chosen": -2.3298559188842773, "logits/rejected": -2.0787758827209473, "logps/chosen": -202.39566040039062, "logps/rejected": -184.2627410888672, "loss": 1.3858, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.0020925761200487614, "rewards/margins": 0.006319403648376465, "rewards/rejected": -0.008411980234086514, "rewards/safe_rewards": -0.0030772520694881678, "rewards/unsafe_rewards": -0.0011078999377787113, "step": 70 }, { "epoch": 0.04, "learning_rate": 2.1505376344086022e-07, "logits/chosen": -2.3292670249938965, "logits/rejected": -2.1124508380889893, "logps/chosen": -220.2982940673828, "logps/rejected": -195.1908721923828, "loss": 1.3919, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.008099144324660301, "rewards/margins": 0.012995732948184013, "rewards/rejected": -0.021094877272844315, "rewards/safe_rewards": -0.010834941640496254, "rewards/unsafe_rewards": -0.005363349802792072, "step": 80 }, { "epoch": 0.05, "learning_rate": 2.4193548387096775e-07, "logits/chosen": -2.3152518272399902, "logits/rejected": -2.1177124977111816, "logps/chosen": -209.4881134033203, "logps/rejected": -170.4688720703125, "loss": 1.4229, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.03572874516248703, "rewards/margins": 0.025636380538344383, "rewards/rejected": -0.061365120112895966, "rewards/safe_rewards": -0.03464391082525253, "rewards/unsafe_rewards": -0.03681357204914093, "step": 90 }, { "epoch": 0.05, "learning_rate": 2.6881720430107523e-07, "logits/chosen": -2.312987804412842, "logits/rejected": -2.1187119483947754, "logps/chosen": -204.9591827392578, "logps/rejected": -180.0883331298828, "loss": 1.4099, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.0888073593378067, "rewards/margins": 0.04026350378990173, "rewards/rejected": -0.12907087802886963, "rewards/safe_rewards": -0.08975542336702347, "rewards/unsafe_rewards": -0.08785931766033173, "step": 100 }, { "epoch": 0.06, "learning_rate": 2.956989247311828e-07, "logits/chosen": -2.2454307079315186, "logits/rejected": -2.0064516067504883, "logps/chosen": -224.6505889892578, "logps/rejected": -190.16635131835938, "loss": 1.4442, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.12704427540302277, "rewards/margins": 0.054799921810626984, "rewards/rejected": -0.18184418976306915, "rewards/safe_rewards": -0.12200836837291718, "rewards/unsafe_rewards": -0.13208015263080597, "step": 110 }, { "epoch": 0.06, "learning_rate": 3.225806451612903e-07, "logits/chosen": -2.26438570022583, "logits/rejected": -1.9198744297027588, "logps/chosen": -222.88461303710938, "logps/rejected": -173.9338836669922, "loss": 1.3613, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.1163511872291565, "rewards/margins": 0.09466449916362762, "rewards/rejected": -0.21101567149162292, "rewards/safe_rewards": -0.11146645247936249, "rewards/unsafe_rewards": -0.1212359219789505, "step": 120 }, { "epoch": 0.07, "learning_rate": 3.4946236559139783e-07, "logits/chosen": -2.19810152053833, "logits/rejected": -1.9682261943817139, "logps/chosen": -241.05618286132812, "logps/rejected": -201.30264282226562, "loss": 1.2805, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.26873043179512024, "rewards/margins": 0.12516936659812927, "rewards/rejected": -0.39389973878860474, "rewards/safe_rewards": -0.21829214692115784, "rewards/unsafe_rewards": -0.3191686272621155, "step": 130 }, { "epoch": 0.08, "learning_rate": 3.7634408602150537e-07, "logits/chosen": -2.192406177520752, "logits/rejected": -1.845873236656189, "logps/chosen": -241.24447631835938, "logps/rejected": -219.4851531982422, "loss": 1.2394, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.2985479533672333, "rewards/margins": 0.09591639041900635, "rewards/rejected": -0.394464373588562, "rewards/safe_rewards": -0.2801482379436493, "rewards/unsafe_rewards": -0.31694772839546204, "step": 140 }, { "epoch": 0.08, "learning_rate": 4.0322580645161285e-07, "logits/chosen": -2.0371975898742676, "logits/rejected": -1.7497107982635498, "logps/chosen": -249.6632537841797, "logps/rejected": -225.79537963867188, "loss": 1.1556, "rewards/accuracies": 0.625, "rewards/chosen": -0.4271857738494873, "rewards/margins": 0.13365456461906433, "rewards/rejected": -0.5608403086662292, "rewards/safe_rewards": -0.42928582429885864, "rewards/unsafe_rewards": -0.4250856935977936, "step": 150 }, { "epoch": 0.09, "learning_rate": 4.3010752688172043e-07, "logits/chosen": -1.9401572942733765, "logits/rejected": -1.5656859874725342, "logps/chosen": -246.42398071289062, "logps/rejected": -226.51400756835938, "loss": 1.2948, "rewards/accuracies": 0.625, "rewards/chosen": -0.2622266113758087, "rewards/margins": 0.10594137012958527, "rewards/rejected": -0.3681679964065552, "rewards/safe_rewards": -0.24256543815135956, "rewards/unsafe_rewards": -0.28188782930374146, "step": 160 }, { "epoch": 0.09, "learning_rate": 4.569892473118279e-07, "logits/chosen": -1.9621531963348389, "logits/rejected": -1.6663320064544678, "logps/chosen": -253.3407440185547, "logps/rejected": -226.83035278320312, "loss": 1.1636, "rewards/accuracies": 0.71875, "rewards/chosen": -0.38964638113975525, "rewards/margins": 0.20659947395324707, "rewards/rejected": -0.5962458848953247, "rewards/safe_rewards": -0.3696528375148773, "rewards/unsafe_rewards": -0.4096398949623108, "step": 170 }, { "epoch": 0.1, "learning_rate": 4.838709677419355e-07, "logits/chosen": -2.0335605144500732, "logits/rejected": -1.65840744972229, "logps/chosen": -232.89804077148438, "logps/rejected": -238.404296875, "loss": 1.153, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.40930503606796265, "rewards/margins": 0.26214295625686646, "rewards/rejected": -0.6714479327201843, "rewards/safe_rewards": -0.42569422721862793, "rewards/unsafe_rewards": -0.39291584491729736, "step": 180 }, { "epoch": 0.1, "learning_rate": 4.999929391798331e-07, "logits/chosen": -2.087995767593384, "logits/rejected": -1.7015736103057861, "logps/chosen": -233.88204956054688, "logps/rejected": -231.9954071044922, "loss": 1.1701, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.39558762311935425, "rewards/margins": 0.25256142020225525, "rewards/rejected": -0.6481491327285767, "rewards/safe_rewards": -0.3778998851776123, "rewards/unsafe_rewards": -0.4132753312587738, "step": 190 }, { "epoch": 0.11, "learning_rate": 4.9991350953333e-07, "logits/chosen": -2.0101492404937744, "logits/rejected": -1.688194990158081, "logps/chosen": -269.3438720703125, "logps/rejected": -272.6773376464844, "loss": 1.1309, "rewards/accuracies": 0.6875, "rewards/chosen": -0.49628791213035583, "rewards/margins": 0.21785268187522888, "rewards/rejected": -0.7141406536102295, "rewards/safe_rewards": -0.47186246514320374, "rewards/unsafe_rewards": -0.5207133293151855, "step": 200 }, { "epoch": 0.11, "learning_rate": 4.997458523498236e-07, "logits/chosen": -2.1067311763763428, "logits/rejected": -1.8388845920562744, "logps/chosen": -260.73553466796875, "logps/rejected": -250.10794067382812, "loss": 1.1343, "rewards/accuracies": 0.65625, "rewards/chosen": -0.685817539691925, "rewards/margins": 0.2200475037097931, "rewards/rejected": -0.9058650732040405, "rewards/safe_rewards": -0.6602068543434143, "rewards/unsafe_rewards": -0.7114282250404358, "step": 210 }, { "epoch": 0.12, "learning_rate": 4.99490026817712e-07, "logits/chosen": -2.0778934955596924, "logits/rejected": -1.7963426113128662, "logps/chosen": -245.4992218017578, "logps/rejected": -252.6900177001953, "loss": 1.149, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.5112438201904297, "rewards/margins": 0.3192596137523651, "rewards/rejected": -0.8305034637451172, "rewards/safe_rewards": -0.469885915517807, "rewards/unsafe_rewards": -0.5526017546653748, "step": 220 }, { "epoch": 0.12, "learning_rate": 4.991461232516674e-07, "logits/chosen": -2.041980266571045, "logits/rejected": -1.7099599838256836, "logps/chosen": -286.7567443847656, "logps/rejected": -276.52020263671875, "loss": 1.1816, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.566757082939148, "rewards/margins": 0.22073951363563538, "rewards/rejected": -0.7874965667724609, "rewards/safe_rewards": -0.5972923040390015, "rewards/unsafe_rewards": -0.5362219214439392, "step": 230 }, { "epoch": 0.13, "learning_rate": 4.98714263060751e-07, "logits/chosen": -2.079230546951294, "logits/rejected": -1.6963192224502563, "logps/chosen": -245.6575927734375, "logps/rejected": -231.0216064453125, "loss": 1.1425, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.5724560022354126, "rewards/margins": 0.2712286710739136, "rewards/rejected": -0.8436846733093262, "rewards/safe_rewards": -0.5816723108291626, "rewards/unsafe_rewards": -0.5632396936416626, "step": 240 }, { "epoch": 0.13, "learning_rate": 4.98194598705552e-07, "logits/chosen": -1.9290701150894165, "logits/rejected": -1.7140071392059326, "logps/chosen": -283.2284851074219, "logps/rejected": -276.1746826171875, "loss": 1.2033, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.9349914789199829, "rewards/margins": 0.15353193879127502, "rewards/rejected": -1.088523268699646, "rewards/safe_rewards": -0.9215167164802551, "rewards/unsafe_rewards": -0.9484661817550659, "step": 250 }, { "epoch": 0.14, "learning_rate": 4.975873136443648e-07, "logits/chosen": -2.1985442638397217, "logits/rejected": -1.9197852611541748, "logps/chosen": -303.9427795410156, "logps/rejected": -298.4468688964844, "loss": 1.0412, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.7289992570877075, "rewards/margins": 0.2902063727378845, "rewards/rejected": -1.0192055702209473, "rewards/safe_rewards": -0.8002890348434448, "rewards/unsafe_rewards": -0.6577093005180359, "step": 260 }, { "epoch": 0.15, "learning_rate": 4.968926222684212e-07, "logits/chosen": -2.028428792953491, "logits/rejected": -1.8198667764663696, "logps/chosen": -262.1617126464844, "logps/rejected": -279.1395568847656, "loss": 1.0273, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.6195804476737976, "rewards/margins": 0.3619759678840637, "rewards/rejected": -0.9815564155578613, "rewards/safe_rewards": -0.6148445010185242, "rewards/unsafe_rewards": -0.6243164539337158, "step": 270 }, { "epoch": 0.15, "learning_rate": 4.961107698262044e-07, "logits/chosen": -1.9460862874984741, "logits/rejected": -1.6114823818206787, "logps/chosen": -289.10284423828125, "logps/rejected": -280.1482849121094, "loss": 1.0933, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.70842045545578, "rewards/margins": 0.2855362296104431, "rewards/rejected": -0.9939567446708679, "rewards/safe_rewards": -0.6650065183639526, "rewards/unsafe_rewards": -0.7518342733383179, "step": 280 }, { "epoch": 0.16, "learning_rate": 4.952420323368673e-07, "logits/chosen": -2.0242223739624023, "logits/rejected": -1.8324100971221924, "logps/chosen": -237.0861358642578, "logps/rejected": -266.013916015625, "loss": 1.1438, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.4568088948726654, "rewards/margins": 0.3911629617214203, "rewards/rejected": -0.8479719161987305, "rewards/safe_rewards": -0.49832311272621155, "rewards/unsafe_rewards": -0.41529473662376404, "step": 290 }, { "epoch": 0.16, "learning_rate": 4.942867164927899e-07, "logits/chosen": -1.9857969284057617, "logits/rejected": -1.7354557514190674, "logps/chosen": -262.5624084472656, "logps/rejected": -261.7809143066406, "loss": 1.1983, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.6027632355690002, "rewards/margins": 0.323146253824234, "rewards/rejected": -0.9259093999862671, "rewards/safe_rewards": -0.6246525645256042, "rewards/unsafe_rewards": -0.5808738470077515, "step": 300 }, { "epoch": 0.17, "learning_rate": 4.932451595513062e-07, "logits/chosen": -2.0492186546325684, "logits/rejected": -1.6627346277236938, "logps/chosen": -287.6437072753906, "logps/rejected": -296.08685302734375, "loss": 0.9844, "rewards/accuracies": 0.75, "rewards/chosen": -0.6867466568946838, "rewards/margins": 0.3914056420326233, "rewards/rejected": -1.0781524181365967, "rewards/safe_rewards": -0.7488200664520264, "rewards/unsafe_rewards": -0.6246733069419861, "step": 310 }, { "epoch": 0.17, "learning_rate": 4.921177292156419e-07, "logits/chosen": -2.0579938888549805, "logits/rejected": -1.661026954650879, "logps/chosen": -275.15484619140625, "logps/rejected": -296.12921142578125, "loss": 0.9846, "rewards/accuracies": 0.75, "rewards/chosen": -0.6913672089576721, "rewards/margins": 0.45374804735183716, "rewards/rejected": -1.1451152563095093, "rewards/safe_rewards": -0.6368889808654785, "rewards/unsafe_rewards": -0.7458454370498657, "step": 320 }, { "epoch": 0.18, "learning_rate": 4.909048235051033e-07, "logits/chosen": -1.8785194158554077, "logits/rejected": -1.6157915592193604, "logps/chosen": -287.54022216796875, "logps/rejected": -308.53485107421875, "loss": 1.05, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.881294846534729, "rewards/margins": 0.377378910779953, "rewards/rejected": -1.2586736679077148, "rewards/safe_rewards": -0.8856824040412903, "rewards/unsafe_rewards": -0.876907229423523, "step": 330 }, { "epoch": 0.18, "learning_rate": 4.896068706145631e-07, "logits/chosen": -2.043916702270508, "logits/rejected": -1.7367770671844482, "logps/chosen": -311.2376708984375, "logps/rejected": -299.87225341796875, "loss": 1.0624, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -1.005437970161438, "rewards/margins": 0.386522501707077, "rewards/rejected": -1.3919605016708374, "rewards/safe_rewards": -1.0309627056121826, "rewards/unsafe_rewards": -0.9799133539199829, "step": 340 }, { "epoch": 0.19, "learning_rate": 4.882243287632946e-07, "logits/chosen": -2.2845442295074463, "logits/rejected": -2.015984058380127, "logps/chosen": -249.07626342773438, "logps/rejected": -270.349609375, "loss": 1.0634, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.5606988668441772, "rewards/margins": 0.3308585584163666, "rewards/rejected": -0.8915573954582214, "rewards/safe_rewards": -0.5756514668464661, "rewards/unsafe_rewards": -0.5457462072372437, "step": 350 }, { "epoch": 0.19, "learning_rate": 4.867576860332048e-07, "logits/chosen": -2.2117257118225098, "logits/rejected": -1.9762624502182007, "logps/chosen": -250.96493530273438, "logps/rejected": -287.42401123046875, "loss": 1.0082, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.7890839576721191, "rewards/margins": 0.39461660385131836, "rewards/rejected": -1.1837005615234375, "rewards/safe_rewards": -0.8431307673454285, "rewards/unsafe_rewards": -0.7350370287895203, "step": 360 }, { "epoch": 0.2, "learning_rate": 4.85207460196526e-07, "logits/chosen": -2.083486557006836, "logits/rejected": -1.7575275897979736, "logps/chosen": -315.93890380859375, "logps/rejected": -334.19989013671875, "loss": 1.0535, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.1155636310577393, "rewards/margins": 0.3764941096305847, "rewards/rejected": -1.4920578002929688, "rewards/safe_rewards": -1.1053030490875244, "rewards/unsafe_rewards": -1.125824213027954, "step": 370 }, { "epoch": 0.2, "learning_rate": 4.835741985330259e-07, "logits/chosen": -2.07675838470459, "logits/rejected": -1.7431271076202393, "logps/chosen": -267.29132080078125, "logps/rejected": -277.96624755859375, "loss": 1.0022, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.6648051142692566, "rewards/margins": 0.39567989110946655, "rewards/rejected": -1.0604850053787231, "rewards/safe_rewards": -0.6165894865989685, "rewards/unsafe_rewards": -0.7130206823348999, "step": 380 }, { "epoch": 0.21, "learning_rate": 4.818584776367992e-07, "logits/chosen": -1.928900122642517, "logits/rejected": -1.7115558385849, "logps/chosen": -286.6376647949219, "logps/rejected": -315.5821838378906, "loss": 1.0452, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.8273309469223022, "rewards/margins": 0.4084450602531433, "rewards/rejected": -1.2357759475708008, "rewards/safe_rewards": -0.8857539892196655, "rewards/unsafe_rewards": -0.7689078450202942, "step": 390 }, { "epoch": 0.22, "learning_rate": 4.800609032127122e-07, "logits/chosen": -1.9402987957000732, "logits/rejected": -1.653032898902893, "logps/chosen": -315.54443359375, "logps/rejected": -311.26129150390625, "loss": 1.096, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.1000012159347534, "rewards/margins": 0.32444682717323303, "rewards/rejected": -1.4244478940963745, "rewards/safe_rewards": -1.031652569770813, "rewards/unsafe_rewards": -1.1683496236801147, "step": 400 }, { "epoch": 0.22, "learning_rate": 4.78182109862569e-07, "logits/chosen": -1.957297682762146, "logits/rejected": -1.815405249595642, "logps/chosen": -275.63458251953125, "logps/rejected": -291.7708435058594, "loss": 1.1237, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.8423420786857605, "rewards/margins": 0.27593573927879333, "rewards/rejected": -1.1182777881622314, "rewards/safe_rewards": -0.7917040586471558, "rewards/unsafe_rewards": -0.8929800987243652, "step": 410 }, { "epoch": 0.23, "learning_rate": 4.7622276086107677e-07, "logits/chosen": -2.0669121742248535, "logits/rejected": -1.759894609451294, "logps/chosen": -285.4234924316406, "logps/rejected": -295.45294189453125, "loss": 1.0445, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.7513025999069214, "rewards/margins": 0.3429652750492096, "rewards/rejected": -1.0942678451538086, "rewards/safe_rewards": -0.8147345781326294, "rewards/unsafe_rewards": -0.6878706216812134, "step": 420 }, { "epoch": 0.23, "learning_rate": 4.741835479216879e-07, "logits/chosen": -2.0404961109161377, "logits/rejected": -1.6269737482070923, "logps/chosen": -326.9522399902344, "logps/rejected": -324.0838623046875, "loss": 1.0151, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.8008167147636414, "rewards/margins": 0.46369487047195435, "rewards/rejected": -1.2645115852355957, "rewards/safe_rewards": -0.8669862747192383, "rewards/unsafe_rewards": -0.7346470952033997, "step": 430 }, { "epoch": 0.24, "learning_rate": 4.720651909524036e-07, "logits/chosen": -2.04176664352417, "logits/rejected": -1.7474247217178345, "logps/chosen": -265.5859069824219, "logps/rejected": -273.75836181640625, "loss": 1.0613, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.6951051950454712, "rewards/margins": 0.3854682147502899, "rewards/rejected": -1.0805734395980835, "rewards/safe_rewards": -0.8021440505981445, "rewards/unsafe_rewards": -0.5880664587020874, "step": 440 }, { "epoch": 0.24, "learning_rate": 4.698684378016222e-07, "logits/chosen": -1.9997985363006592, "logits/rejected": -1.7316805124282837, "logps/chosen": -258.0675354003906, "logps/rejected": -275.7622375488281, "loss": 1.057, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.7074462175369263, "rewards/margins": 0.33383387327194214, "rewards/rejected": -1.0412800312042236, "rewards/safe_rewards": -0.6980961561203003, "rewards/unsafe_rewards": -0.7167961597442627, "step": 450 }, { "epoch": 0.25, "learning_rate": 4.675940639941256e-07, "logits/chosen": -2.0017848014831543, "logits/rejected": -1.6473373174667358, "logps/chosen": -284.409912109375, "logps/rejected": -302.1802978515625, "loss": 0.9964, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.7972325086593628, "rewards/margins": 0.45086246728897095, "rewards/rejected": -1.248094916343689, "rewards/safe_rewards": -0.7986913919448853, "rewards/unsafe_rewards": -0.7957736849784851, "step": 460 }, { "epoch": 0.25, "learning_rate": 4.6524287245729286e-07, "logits/chosen": -1.8008419275283813, "logits/rejected": -1.545585036277771, "logps/chosen": -285.72833251953125, "logps/rejected": -295.92877197265625, "loss": 1.0215, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.9112635850906372, "rewards/margins": 0.3833921253681183, "rewards/rejected": -1.2946555614471436, "rewards/safe_rewards": -0.9739904403686523, "rewards/unsafe_rewards": -0.8485366702079773, "step": 470 }, { "epoch": 0.26, "learning_rate": 4.628156932376418e-07, "logits/chosen": -1.935253381729126, "logits/rejected": -1.5615403652191162, "logps/chosen": -283.60662841796875, "logps/rejected": -275.73553466796875, "loss": 1.0577, "rewards/accuracies": 0.75, "rewards/chosen": -0.8973907232284546, "rewards/margins": 0.34667691588401794, "rewards/rejected": -1.244067668914795, "rewards/safe_rewards": -0.9187390208244324, "rewards/unsafe_rewards": -0.8760424852371216, "step": 480 }, { "epoch": 0.26, "learning_rate": 4.603133832077953e-07, "logits/chosen": -2.0226516723632812, "logits/rejected": -1.757315993309021, "logps/chosen": -319.8951416015625, "logps/rejected": -344.83306884765625, "loss": 1.0078, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.8721585273742676, "rewards/margins": 0.43064364790916443, "rewards/rejected": -1.302802324295044, "rewards/safe_rewards": -0.8747318983078003, "rewards/unsafe_rewards": -0.8695852160453796, "step": 490 }, { "epoch": 0.27, "learning_rate": 4.5773682576397776e-07, "logits/chosen": -1.8940719366073608, "logits/rejected": -1.6204118728637695, "logps/chosen": -282.92388916015625, "logps/rejected": -298.21685791015625, "loss": 1.0274, "rewards/accuracies": 0.75, "rewards/chosen": -0.8302618265151978, "rewards/margins": 0.41684699058532715, "rewards/rejected": -1.2471086978912354, "rewards/safe_rewards": -0.8429195284843445, "rewards/unsafe_rewards": -0.8176040649414062, "step": 500 }, { "epoch": 0.27, "eval_logits/chosen": -1.461648941040039, "eval_logits/rejected": -0.9854875206947327, "eval_logps/chosen": -232.3561248779297, "eval_logps/rejected": -222.5215606689453, "eval_loss": 0.37307849526405334, "eval_rewards/accuracies": 0.7075266242027283, "eval_rewards/chosen": -1.0191720724105835, "eval_rewards/margins": 0.28133097290992737, "eval_rewards/rejected": -1.300503134727478, "eval_rewards/safe_rewards": -1.0088554620742798, "eval_rewards/unsafe_rewards": -1.028071403503418, "eval_runtime": 1058.7885, "eval_samples_per_second": 31.209, "eval_steps_per_second": 0.976, "step": 500 }, { "epoch": 0.27, "learning_rate": 4.5508693051414774e-07, "logits/chosen": -1.9713261127471924, "logits/rejected": -1.7668718099594116, "logps/chosen": -295.57354736328125, "logps/rejected": -315.67010498046875, "loss": 1.0145, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.992205023765564, "rewards/margins": 0.4039764404296875, "rewards/rejected": -1.3961814641952515, "rewards/safe_rewards": -0.9630918502807617, "rewards/unsafe_rewards": -1.0213181972503662, "step": 510 }, { "epoch": 0.28, "learning_rate": 4.52364632956877e-07, "logits/chosen": -2.020744562149048, "logits/rejected": -1.7648818492889404, "logps/chosen": -306.5279235839844, "logps/rejected": -284.1745910644531, "loss": 1.1779, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.9038923978805542, "rewards/margins": 0.2524818778038025, "rewards/rejected": -1.1563743352890015, "rewards/safe_rewards": -0.9023400545120239, "rewards/unsafe_rewards": -0.9054449200630188, "step": 520 }, { "epoch": 0.29, "learning_rate": 4.4957089415108895e-07, "logits/chosen": -1.965404748916626, "logits/rejected": -1.6829001903533936, "logps/chosen": -266.58013916015625, "logps/rejected": -311.5513916015625, "loss": 0.9603, "rewards/accuracies": 0.78125, "rewards/chosen": -0.7168244123458862, "rewards/margins": 0.5065333247184753, "rewards/rejected": -1.2233576774597168, "rewards/safe_rewards": -0.7431866526603699, "rewards/unsafe_rewards": -0.6904621720314026, "step": 530 }, { "epoch": 0.29, "learning_rate": 4.467067003767745e-07, "logits/chosen": -1.9147508144378662, "logits/rejected": -1.4926608800888062, "logps/chosen": -276.83740234375, "logps/rejected": -303.65716552734375, "loss": 1.0585, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.8392030000686646, "rewards/margins": 0.5333585739135742, "rewards/rejected": -1.3725616931915283, "rewards/safe_rewards": -0.8698552250862122, "rewards/unsafe_rewards": -0.8085508346557617, "step": 540 }, { "epoch": 0.3, "learning_rate": 4.437730627868027e-07, "logits/chosen": -1.8325055837631226, "logits/rejected": -1.4082276821136475, "logps/chosen": -256.5406799316406, "logps/rejected": -277.9321594238281, "loss": 0.9491, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.7962465286254883, "rewards/margins": 0.5230618715286255, "rewards/rejected": -1.3193082809448242, "rewards/safe_rewards": -0.7890614867210388, "rewards/unsafe_rewards": -0.803431510925293, "step": 550 }, { "epoch": 0.3, "learning_rate": 4.4077101704995163e-07, "logits/chosen": -1.9758007526397705, "logits/rejected": -1.716571569442749, "logps/chosen": -271.619140625, "logps/rejected": -284.5029602050781, "loss": 1.0222, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.7951744198799133, "rewards/margins": 0.3159455358982086, "rewards/rejected": -1.1111198663711548, "rewards/safe_rewards": -0.751990020275116, "rewards/unsafe_rewards": -0.8383587002754211, "step": 560 }, { "epoch": 0.31, "learning_rate": 4.3770162298528356e-07, "logits/chosen": -1.9304263591766357, "logits/rejected": -1.6568174362182617, "logps/chosen": -294.840087890625, "logps/rejected": -296.0787658691406, "loss": 1.0612, "rewards/accuracies": 0.6875, "rewards/chosen": -0.9997127652168274, "rewards/margins": 0.4206429421901703, "rewards/rejected": -1.4203556776046753, "rewards/safe_rewards": -0.9456769824028015, "rewards/unsafe_rewards": -1.053748369216919, "step": 570 }, { "epoch": 0.31, "learning_rate": 4.3456596418799476e-07, "logits/chosen": -1.8863794803619385, "logits/rejected": -1.6375776529312134, "logps/chosen": -308.4244689941406, "logps/rejected": -312.00836181640625, "loss": 0.9379, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.0467333793640137, "rewards/margins": 0.35309094190597534, "rewards/rejected": -1.3998241424560547, "rewards/safe_rewards": -1.0730044841766357, "rewards/unsafe_rewards": -1.0204620361328125, "step": 580 }, { "epoch": 0.32, "learning_rate": 4.313651476468715e-07, "logits/chosen": -1.9189523458480835, "logits/rejected": -1.6596009731292725, "logps/chosen": -302.38214111328125, "logps/rejected": -310.23687744140625, "loss": 0.9881, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.0581415891647339, "rewards/margins": 0.3177962601184845, "rewards/rejected": -1.375937819480896, "rewards/safe_rewards": -1.1356565952301025, "rewards/unsafe_rewards": -0.9806265830993652, "step": 590 }, { "epoch": 0.32, "learning_rate": 4.2810030335348693e-07, "logits/chosen": -1.9322669506072998, "logits/rejected": -1.6003156900405884, "logps/chosen": -305.31378173828125, "logps/rejected": -295.27435302734375, "loss": 1.0005, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.9873701930046082, "rewards/margins": 0.315813809633255, "rewards/rejected": -1.3031837940216064, "rewards/safe_rewards": -0.9451528787612915, "rewards/unsafe_rewards": -1.0295875072479248, "step": 600 }, { "epoch": 0.33, "learning_rate": 4.2477258390327806e-07, "logits/chosen": -1.9405990839004517, "logits/rejected": -1.6028436422348022, "logps/chosen": -266.96820068359375, "logps/rejected": -300.30810546875, "loss": 0.952, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.8195999264717102, "rewards/margins": 0.4460810720920563, "rewards/rejected": -1.2656810283660889, "rewards/safe_rewards": -0.8683537244796753, "rewards/unsafe_rewards": -0.7708461880683899, "step": 610 }, { "epoch": 0.33, "learning_rate": 4.2138316408864197e-07, "logits/chosen": -1.9435718059539795, "logits/rejected": -1.5219794511795044, "logps/chosen": -273.6173400878906, "logps/rejected": -296.8672790527344, "loss": 0.8771, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.7538294196128845, "rewards/margins": 0.5905435085296631, "rewards/rejected": -1.3443728685379028, "rewards/safe_rewards": -0.7166475653648376, "rewards/unsafe_rewards": -0.7910112142562866, "step": 620 }, { "epoch": 0.34, "learning_rate": 4.179332404841962e-07, "logits/chosen": -1.6942704916000366, "logits/rejected": -1.2099497318267822, "logps/chosen": -327.61376953125, "logps/rejected": -344.44866943359375, "loss": 0.9468, "rewards/accuracies": 0.71875, "rewards/chosen": -1.1748586893081665, "rewards/margins": 0.5536222457885742, "rewards/rejected": -1.7284809350967407, "rewards/safe_rewards": -1.184999942779541, "rewards/unsafe_rewards": -1.1647173166275024, "step": 630 }, { "epoch": 0.34, "learning_rate": 4.1442403102434954e-07, "logits/chosen": -1.7647279500961304, "logits/rejected": -1.3675248622894287, "logps/chosen": -310.9916076660156, "logps/rejected": -321.579345703125, "loss": 1.0042, "rewards/accuracies": 0.75, "rewards/chosen": -0.9101033210754395, "rewards/margins": 0.5184718370437622, "rewards/rejected": -1.428575038909912, "rewards/safe_rewards": -0.9146644473075867, "rewards/unsafe_rewards": -0.9055421948432922, "step": 640 }, { "epoch": 0.35, "learning_rate": 4.108567745733318e-07, "logits/chosen": -1.7713772058486938, "logits/rejected": -1.3516300916671753, "logps/chosen": -255.63150024414062, "logps/rejected": -284.5046081542969, "loss": 1.0245, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.821179986000061, "rewards/margins": 0.4137188792228699, "rewards/rejected": -1.2348989248275757, "rewards/safe_rewards": -0.8415560722351074, "rewards/unsafe_rewards": -0.8008037805557251, "step": 650 }, { "epoch": 0.36, "learning_rate": 4.0723273048783426e-07, "logits/chosen": -1.8442039489746094, "logits/rejected": -1.5036883354187012, "logps/chosen": -304.8941650390625, "logps/rejected": -295.1714172363281, "loss": 1.0401, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.8060005903244019, "rewards/margins": 0.4549127221107483, "rewards/rejected": -1.2609132528305054, "rewards/safe_rewards": -0.7164521813392639, "rewards/unsafe_rewards": -0.8955489993095398, "step": 660 }, { "epoch": 0.36, "learning_rate": 4.0355317817241697e-07, "logits/chosen": -1.8206180334091187, "logits/rejected": -1.4083284139633179, "logps/chosen": -316.703369140625, "logps/rejected": -288.38323974609375, "loss": 1.0164, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.8776229619979858, "rewards/margins": 0.3921293020248413, "rewards/rejected": -1.2697522640228271, "rewards/safe_rewards": -0.8068926930427551, "rewards/unsafe_rewards": -0.9483532905578613, "step": 670 }, { "epoch": 0.37, "learning_rate": 3.998194166278367e-07, "logits/chosen": -1.7814273834228516, "logits/rejected": -1.490839958190918, "logps/chosen": -295.8428955078125, "logps/rejected": -305.77972412109375, "loss": 1.0484, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -1.0455328226089478, "rewards/margins": 0.34231680631637573, "rewards/rejected": -1.3878495693206787, "rewards/safe_rewards": -1.0646705627441406, "rewards/unsafe_rewards": -1.0263949632644653, "step": 680 }, { "epoch": 0.37, "learning_rate": 3.9603276399245855e-07, "logits/chosen": -1.8057140111923218, "logits/rejected": -1.3926641941070557, "logps/chosen": -323.49139404296875, "logps/rejected": -323.6500549316406, "loss": 1.0317, "rewards/accuracies": 0.71875, "rewards/chosen": -1.1086748838424683, "rewards/margins": 0.44791918992996216, "rewards/rejected": -1.556593894958496, "rewards/safe_rewards": -1.087368130683899, "rewards/unsafe_rewards": -1.1299816370010376, "step": 690 }, { "epoch": 0.38, "learning_rate": 3.9219455707691e-07, "logits/chosen": -1.895880937576294, "logits/rejected": -1.5151503086090088, "logps/chosen": -295.07525634765625, "logps/rejected": -304.41241455078125, "loss": 0.9783, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.9127877950668335, "rewards/margins": 0.4130992889404297, "rewards/rejected": -1.3258870840072632, "rewards/safe_rewards": -0.9074820280075073, "rewards/unsafe_rewards": -0.9180935025215149, "step": 700 }, { "epoch": 0.38, "learning_rate": 3.883061508921439e-07, "logits/chosen": -1.9308559894561768, "logits/rejected": -1.6609346866607666, "logps/chosen": -277.08172607421875, "logps/rejected": -316.75994873046875, "loss": 1.0027, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.7832189798355103, "rewards/margins": 0.37469321489334106, "rewards/rejected": -1.1579121351242065, "rewards/safe_rewards": -0.7912700176239014, "rewards/unsafe_rewards": -0.7751679420471191, "step": 710 }, { "epoch": 0.39, "learning_rate": 3.8436891817107555e-07, "logits/chosen": -1.761370301246643, "logits/rejected": -1.5160869359970093, "logps/chosen": -285.6172790527344, "logps/rejected": -315.7353210449219, "loss": 1.0302, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.9513989686965942, "rewards/margins": 0.45606541633605957, "rewards/rejected": -1.4074645042419434, "rewards/safe_rewards": -1.0125133991241455, "rewards/unsafe_rewards": -0.890284538269043, "step": 720 }, { "epoch": 0.39, "learning_rate": 3.8038424888396414e-07, "logits/chosen": -1.8194071054458618, "logits/rejected": -1.4451847076416016, "logps/chosen": -295.29241943359375, "logps/rejected": -320.0098571777344, "loss": 0.9634, "rewards/accuracies": 0.71875, "rewards/chosen": -0.8949946165084839, "rewards/margins": 0.4506065249443054, "rewards/rejected": -1.3456013202667236, "rewards/safe_rewards": -0.855640709400177, "rewards/unsafe_rewards": -0.9343485832214355, "step": 730 }, { "epoch": 0.4, "learning_rate": 3.763535497477079e-07, "logits/chosen": -1.8460315465927124, "logits/rejected": -1.4700871706008911, "logps/chosen": -310.37518310546875, "logps/rejected": -316.61236572265625, "loss": 0.9976, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.0227649211883545, "rewards/margins": 0.43325185775756836, "rewards/rejected": -1.4560167789459229, "rewards/safe_rewards": -1.0539515018463135, "rewards/unsafe_rewards": -0.9915785789489746, "step": 740 }, { "epoch": 0.4, "learning_rate": 3.7227824372922795e-07, "logits/chosen": -1.8418188095092773, "logits/rejected": -1.479627013206482, "logps/chosen": -286.8067321777344, "logps/rejected": -302.6395568847656, "loss": 0.9795, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.9858185052871704, "rewards/margins": 0.41545191407203674, "rewards/rejected": -1.4012705087661743, "rewards/safe_rewards": -0.9399738311767578, "rewards/unsafe_rewards": -1.031663179397583, "step": 750 }, { "epoch": 0.41, "learning_rate": 3.681597695431148e-07, "logits/chosen": -1.749204397201538, "logits/rejected": -1.3665492534637451, "logps/chosen": -289.353271484375, "logps/rejected": -327.75677490234375, "loss": 0.9449, "rewards/accuracies": 0.78125, "rewards/chosen": -0.9073783755302429, "rewards/margins": 0.5846059918403625, "rewards/rejected": -1.4919843673706055, "rewards/safe_rewards": -0.9077240228652954, "rewards/unsafe_rewards": -0.9070326089859009, "step": 760 }, { "epoch": 0.41, "learning_rate": 3.639995811437159e-07, "logits/chosen": -1.667654037475586, "logits/rejected": -1.314335584640503, "logps/chosen": -289.2320556640625, "logps/rejected": -326.54229736328125, "loss": 0.9656, "rewards/accuracies": 0.75, "rewards/chosen": -0.9039511680603027, "rewards/margins": 0.5181721448898315, "rewards/rejected": -1.4221234321594238, "rewards/safe_rewards": -0.9193918108940125, "rewards/unsafe_rewards": -0.8885105848312378, "step": 770 }, { "epoch": 0.42, "learning_rate": 3.597991472118426e-07, "logits/chosen": -1.7382431030273438, "logits/rejected": -1.3142716884613037, "logps/chosen": -300.62677001953125, "logps/rejected": -313.65814208984375, "loss": 1.055, "rewards/accuracies": 0.75, "rewards/chosen": -0.8587741851806641, "rewards/margins": 0.427274614572525, "rewards/rejected": -1.2860486507415771, "rewards/safe_rewards": -0.8905943036079407, "rewards/unsafe_rewards": -0.8269540071487427, "step": 780 }, { "epoch": 0.43, "learning_rate": 3.5555995063627836e-07, "logits/chosen": -1.8035333156585693, "logits/rejected": -1.4783815145492554, "logps/chosen": -321.8671875, "logps/rejected": -317.4945068359375, "loss": 0.9705, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.9154338836669922, "rewards/margins": 0.400259792804718, "rewards/rejected": -1.3156936168670654, "rewards/safe_rewards": -0.946729838848114, "rewards/unsafe_rewards": -0.8841378092765808, "step": 790 }, { "epoch": 0.43, "learning_rate": 3.512834879902715e-07, "logits/chosen": -1.7292006015777588, "logits/rejected": -1.3531233072280884, "logps/chosen": -297.385009765625, "logps/rejected": -316.39666748046875, "loss": 0.9624, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.9740325808525085, "rewards/margins": 0.43682414293289185, "rewards/rejected": -1.4108567237854004, "rewards/safe_rewards": -0.979016900062561, "rewards/unsafe_rewards": -0.9690481424331665, "step": 800 }, { "epoch": 0.44, "learning_rate": 3.4697126900319616e-07, "logits/chosen": -1.6081682443618774, "logits/rejected": -1.1794389486312866, "logps/chosen": -295.1870422363281, "logps/rejected": -306.71160888671875, "loss": 1.0047, "rewards/accuracies": 0.78125, "rewards/chosen": -0.9457162022590637, "rewards/margins": 0.4736977517604828, "rewards/rejected": -1.4194139242172241, "rewards/safe_rewards": -0.8538358807563782, "rewards/unsafe_rewards": -1.0375964641571045, "step": 810 }, { "epoch": 0.44, "learning_rate": 3.426248160275693e-07, "logits/chosen": -1.7311818599700928, "logits/rejected": -1.3600565195083618, "logps/chosen": -278.1633605957031, "logps/rejected": -299.32635498046875, "loss": 1.0259, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.8063637018203735, "rewards/margins": 0.43319129943847656, "rewards/rejected": -1.2395551204681396, "rewards/safe_rewards": -0.859821617603302, "rewards/unsafe_rewards": -0.7529059052467346, "step": 820 }, { "epoch": 0.45, "learning_rate": 3.3824566350161094e-07, "logits/chosen": -1.785316824913025, "logits/rejected": -1.323632836341858, "logps/chosen": -275.93060302734375, "logps/rejected": -278.7740783691406, "loss": 0.9551, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.675979495048523, "rewards/margins": 0.428046852350235, "rewards/rejected": -1.104026436805725, "rewards/safe_rewards": -0.6841101050376892, "rewards/unsafe_rewards": -0.6678491234779358, "step": 830 }, { "epoch": 0.45, "learning_rate": 3.338353574075381e-07, "logits/chosen": -1.5819236040115356, "logits/rejected": -1.3700740337371826, "logps/chosen": -266.2092590332031, "logps/rejected": -284.17657470703125, "loss": 1.1627, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.9084580540657043, "rewards/margins": 0.3083532750606537, "rewards/rejected": -1.216811180114746, "rewards/safe_rewards": -0.9423872828483582, "rewards/unsafe_rewards": -0.8745288848876953, "step": 840 }, { "epoch": 0.46, "learning_rate": 3.2939545472578314e-07, "logits/chosen": -1.7262417078018188, "logits/rejected": -1.2280857563018799, "logps/chosen": -324.2544860839844, "logps/rejected": -322.0166320800781, "loss": 1.002, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.9809592366218567, "rewards/margins": 0.39795732498168945, "rewards/rejected": -1.3789165019989014, "rewards/safe_rewards": -0.9736999273300171, "rewards/unsafe_rewards": -0.9882184863090515, "step": 850 }, { "epoch": 0.46, "learning_rate": 3.2492752288532916e-07, "logits/chosen": -1.7719913721084595, "logits/rejected": -1.3637843132019043, "logps/chosen": -289.90692138671875, "logps/rejected": -295.659423828125, "loss": 1.0013, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.8620317578315735, "rewards/margins": 0.4116358757019043, "rewards/rejected": -1.273667573928833, "rewards/safe_rewards": -0.8024783134460449, "rewards/unsafe_rewards": -0.9215850830078125, "step": 860 }, { "epoch": 0.47, "learning_rate": 3.204331392103574e-07, "logits/chosen": -1.8806778192520142, "logits/rejected": -1.389723539352417, "logps/chosen": -291.46832275390625, "logps/rejected": -283.912841796875, "loss": 0.9879, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.7795971035957336, "rewards/margins": 0.4201742112636566, "rewards/rejected": -1.1997714042663574, "rewards/safe_rewards": -0.8063246607780457, "rewards/unsafe_rewards": -0.7528696060180664, "step": 870 }, { "epoch": 0.47, "learning_rate": 3.159138903634006e-07, "logits/chosen": -1.6265941858291626, "logits/rejected": -1.263318657875061, "logps/chosen": -308.3788757324219, "logps/rejected": -303.78753662109375, "loss": 0.9939, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.9974681735038757, "rewards/margins": 0.31481030583381653, "rewards/rejected": -1.312278389930725, "rewards/safe_rewards": -0.9676594734191895, "rewards/unsafe_rewards": -1.0272767543792725, "step": 880 }, { "epoch": 0.48, "learning_rate": 3.1137137178519977e-07, "logits/chosen": -1.5345653295516968, "logits/rejected": -1.2038419246673584, "logps/chosen": -277.8311767578125, "logps/rejected": -317.1457214355469, "loss": 0.9773, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.9578744769096375, "rewards/margins": 0.4849475026130676, "rewards/rejected": -1.4428222179412842, "rewards/safe_rewards": -0.9253692626953125, "rewards/unsafe_rewards": -0.9903799891471863, "step": 890 }, { "epoch": 0.48, "learning_rate": 3.068071871314626e-07, "logits/chosen": -1.477648138999939, "logits/rejected": -1.1965210437774658, "logps/chosen": -279.1951599121094, "logps/rejected": -292.29833984375, "loss": 0.9773, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -1.0047625303268433, "rewards/margins": 0.3235151171684265, "rewards/rejected": -1.328277826309204, "rewards/safe_rewards": -1.013329267501831, "rewards/unsafe_rewards": -0.9961959719657898, "step": 900 }, { "epoch": 0.49, "learning_rate": 3.022229477067205e-07, "logits/chosen": -1.6116926670074463, "logits/rejected": -1.2270816564559937, "logps/chosen": -320.8763732910156, "logps/rejected": -318.58355712890625, "loss": 0.8831, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -1.0378270149230957, "rewards/margins": 0.4454011917114258, "rewards/rejected": -1.4832282066345215, "rewards/safe_rewards": -1.0344152450561523, "rewards/unsafe_rewards": -1.0412386655807495, "step": 910 }, { "epoch": 0.49, "learning_rate": 2.976202718954869e-07, "logits/chosen": -1.5823721885681152, "logits/rejected": -1.1224069595336914, "logps/chosen": -333.028564453125, "logps/rejected": -350.3519592285156, "loss": 1.0709, "rewards/accuracies": 0.71875, "rewards/chosen": -1.2154980897903442, "rewards/margins": 0.46428003907203674, "rewards/rejected": -1.6797780990600586, "rewards/safe_rewards": -1.1762654781341553, "rewards/unsafe_rewards": -1.2547308206558228, "step": 920 }, { "epoch": 0.5, "learning_rate": 2.930007845909146e-07, "logits/chosen": -1.5423873662948608, "logits/rejected": -1.2088254690170288, "logps/chosen": -333.67645263671875, "logps/rejected": -360.3753662109375, "loss": 0.9999, "rewards/accuracies": 0.75, "rewards/chosen": -1.3605718612670898, "rewards/margins": 0.422391802072525, "rewards/rejected": -1.782963752746582, "rewards/safe_rewards": -1.422663688659668, "rewards/unsafe_rewards": -1.2984803915023804, "step": 930 }, { "epoch": 0.51, "learning_rate": 2.8836611662115634e-07, "logits/chosen": -1.6511509418487549, "logits/rejected": -1.275423288345337, "logps/chosen": -321.2275085449219, "logps/rejected": -312.20733642578125, "loss": 1.0348, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.010995626449585, "rewards/margins": 0.42944231629371643, "rewards/rejected": -1.440437912940979, "rewards/safe_rewards": -0.9675294160842896, "rewards/unsafe_rewards": -1.0544618368148804, "step": 940 }, { "epoch": 0.51, "learning_rate": 2.8371790417362986e-07, "logits/chosen": -1.5915000438690186, "logits/rejected": -1.2896572351455688, "logps/chosen": -281.606201171875, "logps/rejected": -308.20916748046875, "loss": 1.0666, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.9426482319831848, "rewards/margins": 0.37754327058792114, "rewards/rejected": -1.3201916217803955, "rewards/safe_rewards": -0.8953598141670227, "rewards/unsafe_rewards": -0.9899368286132812, "step": 950 }, { "epoch": 0.52, "learning_rate": 2.7905778821739056e-07, "logits/chosen": -1.478126049041748, "logits/rejected": -1.1587542295455933, "logps/chosen": -301.42181396484375, "logps/rejected": -304.0444030761719, "loss": 0.9759, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -1.0664873123168945, "rewards/margins": 0.36265167593955994, "rewards/rejected": -1.4291390180587769, "rewards/safe_rewards": -1.0449130535125732, "rewards/unsafe_rewards": -1.0880613327026367, "step": 960 }, { "epoch": 0.52, "learning_rate": 2.74387413923817e-07, "logits/chosen": -1.4300400018692017, "logits/rejected": -1.1722289323806763, "logps/chosen": -327.14739990234375, "logps/rejected": -327.6680908203125, "loss": 0.977, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.026637315750122, "rewards/margins": 0.39583832025527954, "rewards/rejected": -1.4224755764007568, "rewards/safe_rewards": -1.0289757251739502, "rewards/unsafe_rewards": -1.024298906326294, "step": 970 }, { "epoch": 0.53, "learning_rate": 2.69708430085812e-07, "logits/chosen": -1.6212282180786133, "logits/rejected": -1.1375267505645752, "logps/chosen": -331.76654052734375, "logps/rejected": -341.3858947753906, "loss": 1.0282, "rewards/accuracies": 0.78125, "rewards/chosen": -1.0838706493377686, "rewards/margins": 0.5636218190193176, "rewards/rejected": -1.6474926471710205, "rewards/safe_rewards": -1.0060853958129883, "rewards/unsafe_rewards": -1.161656141281128, "step": 980 }, { "epoch": 0.53, "learning_rate": 2.6502248853572504e-07, "logits/chosen": -1.5994454622268677, "logits/rejected": -1.2842458486557007, "logps/chosen": -291.59063720703125, "logps/rejected": -317.38970947265625, "loss": 1.0111, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.0371322631835938, "rewards/margins": 0.5142688155174255, "rewards/rejected": -1.551400899887085, "rewards/safe_rewards": -1.0258736610412598, "rewards/unsafe_rewards": -1.0483907461166382, "step": 990 }, { "epoch": 0.54, "learning_rate": 2.6033124356220325e-07, "logits/chosen": -1.5264742374420166, "logits/rejected": -1.1301841735839844, "logps/chosen": -305.7037048339844, "logps/rejected": -310.7588806152344, "loss": 0.9569, "rewards/accuracies": 0.78125, "rewards/chosen": -1.029294490814209, "rewards/margins": 0.4845431447029114, "rewards/rejected": -1.5138375759124756, "rewards/safe_rewards": -0.948198139667511, "rewards/unsafe_rewards": -1.1103906631469727, "step": 1000 }, { "epoch": 0.54, "eval_logits/chosen": -1.0711549520492554, "eval_logits/rejected": -0.5821475982666016, "eval_logps/chosen": -221.7958984375, "eval_logps/rejected": -212.7307586669922, "eval_loss": 0.34972870349884033, "eval_rewards/accuracies": 0.7209583520889282, "eval_rewards/chosen": -0.913569986820221, "eval_rewards/margins": 0.28902512788772583, "eval_rewards/rejected": -1.2025949954986572, "eval_rewards/safe_rewards": -0.9005662798881531, "eval_rewards/unsafe_rewards": -0.9165622591972351, "eval_runtime": 1062.9878, "eval_samples_per_second": 31.086, "eval_steps_per_second": 0.972, "step": 1000 }, { "epoch": 0.54, "learning_rate": 2.55636351326173e-07, "logits/chosen": -1.4447015523910522, "logits/rejected": -1.0418920516967773, "logps/chosen": -320.98358154296875, "logps/rejected": -330.81341552734375, "loss": 0.8958, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -1.149300217628479, "rewards/margins": 0.562938928604126, "rewards/rejected": -1.7122390270233154, "rewards/safe_rewards": -1.1756014823913574, "rewards/unsafe_rewards": -1.122998595237732, "step": 1010 }, { "epoch": 0.55, "learning_rate": 2.509394692761622e-07, "logits/chosen": -1.4893229007720947, "logits/rejected": -1.0318996906280518, "logps/chosen": -320.03070068359375, "logps/rejected": -331.4481201171875, "loss": 0.9718, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.1435344219207764, "rewards/margins": 0.5211852788925171, "rewards/rejected": -1.6647193431854248, "rewards/safe_rewards": -1.1441400051116943, "rewards/unsafe_rewards": -1.1429284811019897, "step": 1020 }, { "epoch": 0.55, "learning_rate": 2.462422555631674e-07, "logits/chosen": -1.3502875566482544, "logits/rejected": -0.8036888241767883, "logps/chosen": -344.90618896484375, "logps/rejected": -345.6969909667969, "loss": 0.953, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -1.4247171878814697, "rewards/margins": 0.532504141330719, "rewards/rejected": -1.957221269607544, "rewards/safe_rewards": -1.399427890777588, "rewards/unsafe_rewards": -1.4500062465667725, "step": 1030 }, { "epoch": 0.56, "learning_rate": 2.415463684552728e-07, "logits/chosen": -1.279679536819458, "logits/rejected": -0.875691294670105, "logps/chosen": -329.5700378417969, "logps/rejected": -347.35162353515625, "loss": 1.0239, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.3259227275848389, "rewards/margins": 0.47411665320396423, "rewards/rejected": -1.800039529800415, "rewards/safe_rewards": -1.3383002281188965, "rewards/unsafe_rewards": -1.3135454654693604, "step": 1040 }, { "epoch": 0.56, "learning_rate": 2.3685346575222807e-07, "logits/chosen": -1.4535144567489624, "logits/rejected": -0.9614871740341187, "logps/chosen": -325.8673400878906, "logps/rejected": -333.7716064453125, "loss": 0.9704, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.082326889038086, "rewards/margins": 0.5055097341537476, "rewards/rejected": -1.5878366231918335, "rewards/safe_rewards": -1.087576150894165, "rewards/unsafe_rewards": -1.0770776271820068, "step": 1050 }, { "epoch": 0.57, "learning_rate": 2.321652042001919e-07, "logits/chosen": -1.425490140914917, "logits/rejected": -0.9585882425308228, "logps/chosen": -332.23651123046875, "logps/rejected": -367.16656494140625, "loss": 0.9341, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.210457682609558, "rewards/margins": 0.5372633934020996, "rewards/rejected": -1.7477210760116577, "rewards/safe_rewards": -1.155447244644165, "rewards/unsafe_rewards": -1.265467882156372, "step": 1060 }, { "epoch": 0.58, "learning_rate": 2.2748323890684662e-07, "logits/chosen": -1.5478570461273193, "logits/rejected": -0.9782267808914185, "logps/chosen": -313.54412841796875, "logps/rejected": -330.4336853027344, "loss": 0.9344, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.0107817649841309, "rewards/margins": 0.6438864469528198, "rewards/rejected": -1.6546680927276611, "rewards/safe_rewards": -1.0349432229995728, "rewards/unsafe_rewards": -0.986620306968689, "step": 1070 }, { "epoch": 0.58, "learning_rate": 2.2280922275709213e-07, "logits/chosen": -1.4732444286346436, "logits/rejected": -1.1423754692077637, "logps/chosen": -317.08258056640625, "logps/rejected": -328.4356689453125, "loss": 0.982, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -1.0598576068878174, "rewards/margins": 0.41645368933677673, "rewards/rejected": -1.4763113260269165, "rewards/safe_rewards": -1.0165767669677734, "rewards/unsafe_rewards": -1.1031386852264404, "step": 1080 }, { "epoch": 0.59, "learning_rate": 2.1814480582952375e-07, "logits/chosen": -1.489725112915039, "logits/rejected": -1.1142539978027344, "logps/chosen": -309.04681396484375, "logps/rejected": -333.715576171875, "loss": 0.9727, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.0693882703781128, "rewards/margins": 0.4686933159828186, "rewards/rejected": -1.5380815267562866, "rewards/safe_rewards": -0.9820979237556458, "rewards/unsafe_rewards": -1.156678557395935, "step": 1090 }, { "epoch": 0.59, "learning_rate": 2.1349163481390187e-07, "logits/chosen": -1.421409010887146, "logits/rejected": -1.031862497329712, "logps/chosen": -316.99139404296875, "logps/rejected": -340.88909912109375, "loss": 0.9693, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.1647942066192627, "rewards/margins": 0.4819185137748718, "rewards/rejected": -1.6467128992080688, "rewards/safe_rewards": -1.0639079809188843, "rewards/unsafe_rewards": -1.2656804323196411, "step": 1100 }, { "epoch": 0.6, "learning_rate": 2.0885135242981647e-07, "logits/chosen": -1.4362539052963257, "logits/rejected": -1.005382776260376, "logps/chosen": -351.58978271484375, "logps/rejected": -323.1505126953125, "loss": 0.9265, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.1940038204193115, "rewards/margins": 0.4320971369743347, "rewards/rejected": -1.6261011362075806, "rewards/safe_rewards": -1.2753981351852417, "rewards/unsafe_rewards": -1.112609624862671, "step": 1110 }, { "epoch": 0.6, "learning_rate": 2.0422559684675494e-07, "logits/chosen": -1.4388701915740967, "logits/rejected": -0.9763511419296265, "logps/chosen": -328.2054748535156, "logps/rejected": -331.94427490234375, "loss": 0.9247, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -1.1099811792373657, "rewards/margins": 0.4838770031929016, "rewards/rejected": -1.5938583612442017, "rewards/safe_rewards": -1.143110752105713, "rewards/unsafe_rewards": -1.076851725578308, "step": 1120 }, { "epoch": 0.61, "learning_rate": 1.9961600110577457e-07, "logits/chosen": -1.3994672298431396, "logits/rejected": -0.9330530166625977, "logps/chosen": -320.69989013671875, "logps/rejected": -341.1868591308594, "loss": 1.007, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -1.1178644895553589, "rewards/margins": 0.4091947674751282, "rewards/rejected": -1.5270591974258423, "rewards/safe_rewards": -1.0592812299728394, "rewards/unsafe_rewards": -1.1764475107192993, "step": 1130 }, { "epoch": 0.61, "learning_rate": 1.950241925429867e-07, "logits/chosen": -1.5059046745300293, "logits/rejected": -0.9375900030136108, "logps/chosen": -301.2913513183594, "logps/rejected": -323.166259765625, "loss": 0.9384, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -1.0121370553970337, "rewards/margins": 0.6072208285331726, "rewards/rejected": -1.6193578243255615, "rewards/safe_rewards": -1.0190235376358032, "rewards/unsafe_rewards": -1.0052505731582642, "step": 1140 }, { "epoch": 0.62, "learning_rate": 1.9045179221505495e-07, "logits/chosen": -1.4665344953536987, "logits/rejected": -1.1525509357452393, "logps/chosen": -336.7375183105469, "logps/rejected": -342.58258056640625, "loss": 0.9456, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.0744739770889282, "rewards/margins": 0.4735342860221863, "rewards/rejected": -1.5480082035064697, "rewards/safe_rewards": -1.0436086654663086, "rewards/unsafe_rewards": -1.1053390502929688, "step": 1150 }, { "epoch": 0.62, "learning_rate": 1.8590041432690893e-07, "logits/chosen": -1.3896484375, "logits/rejected": -1.1142855882644653, "logps/chosen": -297.66619873046875, "logps/rejected": -315.8662109375, "loss": 0.977, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.100203037261963, "rewards/margins": 0.3822095990180969, "rewards/rejected": -1.4824126958847046, "rewards/safe_rewards": -1.1140623092651367, "rewards/unsafe_rewards": -1.0863438844680786, "step": 1160 }, { "epoch": 0.63, "learning_rate": 1.813716656618788e-07, "logits/chosen": -1.331933856010437, "logits/rejected": -1.0040611028671265, "logps/chosen": -300.43267822265625, "logps/rejected": -321.0027770996094, "loss": 0.969, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -1.1213122606277466, "rewards/margins": 0.4405437111854553, "rewards/rejected": -1.5618560314178467, "rewards/safe_rewards": -1.0390920639038086, "rewards/unsafe_rewards": -1.2035324573516846, "step": 1170 }, { "epoch": 0.63, "learning_rate": 1.7686714501444788e-07, "logits/chosen": -1.4456322193145752, "logits/rejected": -0.8397674560546875, "logps/chosen": -326.2867736816406, "logps/rejected": -333.5230407714844, "loss": 0.9325, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.1393133401870728, "rewards/margins": 0.5787724256515503, "rewards/rejected": -1.7180858850479126, "rewards/safe_rewards": -1.2189667224884033, "rewards/unsafe_rewards": -1.0596599578857422, "step": 1180 }, { "epoch": 0.64, "learning_rate": 1.7238844262582768e-07, "logits/chosen": -1.363268494606018, "logits/rejected": -1.1216974258422852, "logps/chosen": -321.5309143066406, "logps/rejected": -357.5130920410156, "loss": 0.9391, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -1.1455228328704834, "rewards/margins": 0.5131853222846985, "rewards/rejected": -1.6587082147598267, "rewards/safe_rewards": -1.0672725439071655, "rewards/unsafe_rewards": -1.2237731218338013, "step": 1190 }, { "epoch": 0.65, "learning_rate": 1.679371396225504e-07, "logits/chosen": -1.4268032312393188, "logits/rejected": -0.9499413371086121, "logps/chosen": -310.05328369140625, "logps/rejected": -351.35272216796875, "loss": 0.9212, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.089523434638977, "rewards/margins": 0.6114920973777771, "rewards/rejected": -1.7010157108306885, "rewards/safe_rewards": -1.027521014213562, "rewards/unsafe_rewards": -1.151525855064392, "step": 1200 }, { "epoch": 0.65, "learning_rate": 1.6351480745828096e-07, "logits/chosen": -1.4248908758163452, "logits/rejected": -1.0554434061050415, "logps/chosen": -324.76751708984375, "logps/rejected": -340.4091491699219, "loss": 0.8506, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -1.2382574081420898, "rewards/margins": 0.46283411979675293, "rewards/rejected": -1.7010915279388428, "rewards/safe_rewards": -1.2276763916015625, "rewards/unsafe_rewards": -1.2488384246826172, "step": 1210 }, { "epoch": 0.66, "learning_rate": 1.5912300735904248e-07, "logits/chosen": -1.5351839065551758, "logits/rejected": -1.176792860031128, "logps/chosen": -334.9280090332031, "logps/rejected": -334.9038391113281, "loss": 0.9582, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.1213009357452393, "rewards/margins": 0.4566107392311096, "rewards/rejected": -1.577911615371704, "rewards/safe_rewards": -1.2159931659698486, "rewards/unsafe_rewards": -1.0266087055206299, "step": 1220 }, { "epoch": 0.66, "learning_rate": 1.5476328977205395e-07, "logits/chosen": -1.5172470808029175, "logits/rejected": -1.0838744640350342, "logps/chosen": -318.88702392578125, "logps/rejected": -331.92626953125, "loss": 0.929, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -1.1251935958862305, "rewards/margins": 0.5502282381057739, "rewards/rejected": -1.675421953201294, "rewards/safe_rewards": -1.2570269107818604, "rewards/unsafe_rewards": -0.9933602213859558, "step": 1230 }, { "epoch": 0.67, "learning_rate": 1.5043719381837112e-07, "logits/chosen": -1.4699004888534546, "logits/rejected": -1.1323144435882568, "logps/chosen": -332.270751953125, "logps/rejected": -346.4047546386719, "loss": 0.9436, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.1165038347244263, "rewards/margins": 0.47613364458084106, "rewards/rejected": -1.5926374197006226, "rewards/safe_rewards": -1.0986835956573486, "rewards/unsafe_rewards": -1.1343239545822144, "step": 1240 }, { "epoch": 0.67, "learning_rate": 1.461462467495284e-07, "logits/chosen": -1.445765495300293, "logits/rejected": -1.0371205806732178, "logps/chosen": -287.15216064453125, "logps/rejected": -333.86676025390625, "loss": 0.8531, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -1.092584490776062, "rewards/margins": 0.5968576669692993, "rewards/rejected": -1.6894422769546509, "rewards/safe_rewards": -1.1179049015045166, "rewards/unsafe_rewards": -1.067264199256897, "step": 1250 }, { "epoch": 0.68, "learning_rate": 1.4189196340836865e-07, "logits/chosen": -1.6602089405059814, "logits/rejected": -1.2219452857971191, "logps/chosen": -297.80426025390625, "logps/rejected": -314.51458740234375, "loss": 0.9095, "rewards/accuracies": 0.71875, "rewards/chosen": -0.9903711080551147, "rewards/margins": 0.4589962363243103, "rewards/rejected": -1.4493674039840698, "rewards/safe_rewards": -1.0244104862213135, "rewards/unsafe_rewards": -0.9563320279121399, "step": 1260 }, { "epoch": 0.68, "learning_rate": 1.3767584569425561e-07, "logits/chosen": -1.6483261585235596, "logits/rejected": -1.1660915613174438, "logps/chosen": -309.3149719238281, "logps/rejected": -320.50726318359375, "loss": 0.9112, "rewards/accuracies": 0.78125, "rewards/chosen": -1.0096858739852905, "rewards/margins": 0.4956924319267273, "rewards/rejected": -1.5053783655166626, "rewards/safe_rewards": -1.045543909072876, "rewards/unsafe_rewards": -0.973828136920929, "step": 1270 }, { "epoch": 0.69, "learning_rate": 1.334993820328541e-07, "logits/chosen": -1.501082420349121, "logits/rejected": -1.1109731197357178, "logps/chosen": -289.6914978027344, "logps/rejected": -326.8876953125, "loss": 0.9211, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.1317590475082397, "rewards/margins": 0.6352017521858215, "rewards/rejected": -1.766960859298706, "rewards/safe_rewards": -1.0952441692352295, "rewards/unsafe_rewards": -1.1682740449905396, "step": 1280 }, { "epoch": 0.69, "learning_rate": 1.2936404685066852e-07, "logits/chosen": -1.487445592880249, "logits/rejected": -1.1660425662994385, "logps/chosen": -333.1927185058594, "logps/rejected": -355.2337341308594, "loss": 0.9879, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.1514599323272705, "rewards/margins": 0.4698604643344879, "rewards/rejected": -1.621320366859436, "rewards/safe_rewards": -1.2052063941955566, "rewards/unsafe_rewards": -1.0977137088775635, "step": 1290 }, { "epoch": 0.7, "learning_rate": 1.252713000545221e-07, "logits/chosen": -1.6853973865509033, "logits/rejected": -1.2987568378448486, "logps/chosen": -322.5224914550781, "logps/rejected": -331.0067138671875, "loss": 0.8714, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.0373764038085938, "rewards/margins": 0.5552427768707275, "rewards/rejected": -1.5926191806793213, "rewards/safe_rewards": -1.0258121490478516, "rewards/unsafe_rewards": -1.0489407777786255, "step": 1300 }, { "epoch": 0.7, "learning_rate": 1.2122258651616304e-07, "logits/chosen": -1.6230665445327759, "logits/rejected": -1.1880443096160889, "logps/chosen": -313.47772216796875, "logps/rejected": -304.3802185058594, "loss": 0.9486, "rewards/accuracies": 0.75, "rewards/chosen": -1.0324283838272095, "rewards/margins": 0.39181455969810486, "rewards/rejected": -1.4242427349090576, "rewards/safe_rewards": -0.9644128680229187, "rewards/unsafe_rewards": -1.1004436016082764, "step": 1310 }, { "epoch": 0.71, "learning_rate": 1.1721933556217792e-07, "logits/chosen": -1.5601403713226318, "logits/rejected": -1.2365220785140991, "logps/chosen": -307.1771240234375, "logps/rejected": -328.2700500488281, "loss": 0.9875, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -1.066193699836731, "rewards/margins": 0.4565289616584778, "rewards/rejected": -1.5227227210998535, "rewards/safe_rewards": -1.0890748500823975, "rewards/unsafe_rewards": -1.0433127880096436, "step": 1320 }, { "epoch": 0.72, "learning_rate": 1.1326296046939333e-07, "logits/chosen": -1.5020397901535034, "logits/rejected": -1.146071195602417, "logps/chosen": -292.02313232421875, "logps/rejected": -311.25604248046875, "loss": 0.926, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.971126914024353, "rewards/margins": 0.559014081954956, "rewards/rejected": -1.5301411151885986, "rewards/safe_rewards": -0.9235084652900696, "rewards/unsafe_rewards": -1.0187455415725708, "step": 1330 }, { "epoch": 0.72, "learning_rate": 1.0935485796594351e-07, "logits/chosen": -1.5235176086425781, "logits/rejected": -1.0568602085113525, "logps/chosen": -333.9339904785156, "logps/rejected": -333.97576904296875, "loss": 1.032, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.0942230224609375, "rewards/margins": 0.5436626672744751, "rewards/rejected": -1.6378856897354126, "rewards/safe_rewards": -1.079145073890686, "rewards/unsafe_rewards": -1.109300971031189, "step": 1340 }, { "epoch": 0.73, "learning_rate": 1.0549640773818028e-07, "logits/chosen": -1.411492943763733, "logits/rejected": -1.1914324760437012, "logps/chosen": -315.5474853515625, "logps/rejected": -322.1836242675781, "loss": 0.9732, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.1521949768066406, "rewards/margins": 0.41504979133605957, "rewards/rejected": -1.5672447681427002, "rewards/safe_rewards": -1.2113215923309326, "rewards/unsafe_rewards": -1.0930684804916382, "step": 1350 }, { "epoch": 0.73, "learning_rate": 1.0168897194359921e-07, "logits/chosen": -1.5103179216384888, "logits/rejected": -1.1291395425796509, "logps/chosen": -344.5137634277344, "logps/rejected": -345.2103576660156, "loss": 0.9403, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -1.1670544147491455, "rewards/margins": 0.4278257489204407, "rewards/rejected": -1.5948803424835205, "rewards/safe_rewards": -1.1103397607803345, "rewards/unsafe_rewards": -1.2237694263458252, "step": 1360 }, { "epoch": 0.74, "learning_rate": 9.793389472995392e-08, "logits/chosen": -1.437276840209961, "logits/rejected": -0.9090574383735657, "logps/chosen": -319.93829345703125, "logps/rejected": -318.78802490234375, "loss": 0.8305, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -1.0438860654830933, "rewards/margins": 0.630165696144104, "rewards/rejected": -1.6740516424179077, "rewards/safe_rewards": -0.9974796175956726, "rewards/unsafe_rewards": -1.0902923345565796, "step": 1370 }, { "epoch": 0.74, "learning_rate": 9.423250176072874e-08, "logits/chosen": -1.4429516792297363, "logits/rejected": -1.0162016153335571, "logps/chosen": -314.0827331542969, "logps/rejected": -311.3514404296875, "loss": 1.0966, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.2516918182373047, "rewards/margins": 0.3869698643684387, "rewards/rejected": -1.6386617422103882, "rewards/safe_rewards": -1.2116239070892334, "rewards/unsafe_rewards": -1.291759967803955, "step": 1380 }, { "epoch": 0.75, "learning_rate": 9.058609974713654e-08, "logits/chosen": -1.5069031715393066, "logits/rejected": -1.0585293769836426, "logps/chosen": -312.8193054199219, "logps/rejected": -344.32769775390625, "loss": 0.8858, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.0806872844696045, "rewards/margins": 0.6248958110809326, "rewards/rejected": -1.7055833339691162, "rewards/safe_rewards": -1.065792441368103, "rewards/unsafe_rewards": -1.0955822467803955, "step": 1390 }, { "epoch": 0.75, "learning_rate": 8.699597598680753e-08, "logits/chosen": -1.4285691976547241, "logits/rejected": -1.0217626094818115, "logps/chosen": -298.48516845703125, "logps/rejected": -319.5094909667969, "loss": 0.8598, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.0639463663101196, "rewards/margins": 0.5066736340522766, "rewards/rejected": -1.570619821548462, "rewards/safe_rewards": -1.049989938735962, "rewards/unsafe_rewards": -1.0779026746749878, "step": 1400 }, { "epoch": 0.76, "learning_rate": 8.346339790933166e-08, "logits/chosen": -1.4796028137207031, "logits/rejected": -1.0254989862442017, "logps/chosen": -303.11322021484375, "logps/rejected": -319.5587463378906, "loss": 0.9735, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.1025015115737915, "rewards/margins": 0.5202707052230835, "rewards/rejected": -1.622771978378296, "rewards/safe_rewards": -1.048610806465149, "rewards/unsafe_rewards": -1.1563920974731445, "step": 1410 }, { "epoch": 0.76, "learning_rate": 7.998961262881506e-08, "logits/chosen": -1.4263174533843994, "logits/rejected": -0.9260984659194946, "logps/chosen": -325.7618408203125, "logps/rejected": -318.91070556640625, "loss": 0.9355, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -1.0862057209014893, "rewards/margins": 0.51823890209198, "rewards/rejected": -1.6044447422027588, "rewards/safe_rewards": -1.1601320505142212, "rewards/unsafe_rewards": -1.0122793912887573, "step": 1420 }, { "epoch": 0.77, "learning_rate": 7.657584650360846e-08, "logits/chosen": -1.2710342407226562, "logits/rejected": -0.989566445350647, "logps/chosen": -300.971923828125, "logps/rejected": -315.53387451171875, "loss": 0.9896, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -1.1461577415466309, "rewards/margins": 0.5026684999465942, "rewards/rejected": -1.648826241493225, "rewards/safe_rewards": -1.1745736598968506, "rewards/unsafe_rewards": -1.117741584777832, "step": 1430 }, { "epoch": 0.77, "learning_rate": 7.322330470336313e-08, "logits/chosen": -1.38018798828125, "logits/rejected": -0.90345698595047, "logps/chosen": -322.70196533203125, "logps/rejected": -353.0235595703125, "loss": 0.9274, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -1.1377968788146973, "rewards/margins": 0.5669258236885071, "rewards/rejected": -1.7047227621078491, "rewards/safe_rewards": -1.0534660816192627, "rewards/unsafe_rewards": -1.2221280336380005, "step": 1440 }, { "epoch": 0.78, "learning_rate": 6.993317078356709e-08, "logits/chosen": -1.4155539274215698, "logits/rejected": -1.210967779159546, "logps/chosen": -331.1578674316406, "logps/rejected": -324.1207580566406, "loss": 0.9583, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -1.2102378606796265, "rewards/margins": 0.2986965477466583, "rewards/rejected": -1.5089343786239624, "rewards/safe_rewards": -1.2407358884811401, "rewards/unsafe_rewards": -1.1797398328781128, "step": 1450 }, { "epoch": 0.79, "learning_rate": 6.67066062677118e-08, "logits/chosen": -1.5069057941436768, "logits/rejected": -1.0829797983169556, "logps/chosen": -308.49664306640625, "logps/rejected": -310.1492004394531, "loss": 1.0037, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -1.0759823322296143, "rewards/margins": 0.45751920342445374, "rewards/rejected": -1.5335016250610352, "rewards/safe_rewards": -1.0966331958770752, "rewards/unsafe_rewards": -1.0553314685821533, "step": 1460 }, { "epoch": 0.79, "learning_rate": 6.354475023723685e-08, "logits/chosen": -1.4374122619628906, "logits/rejected": -1.0555397272109985, "logps/chosen": -348.89227294921875, "logps/rejected": -351.38665771484375, "loss": 0.949, "rewards/accuracies": 0.78125, "rewards/chosen": -1.1484794616699219, "rewards/margins": 0.5996941924095154, "rewards/rejected": -1.748173475265503, "rewards/safe_rewards": -1.1115808486938477, "rewards/unsafe_rewards": -1.1853783130645752, "step": 1470 }, { "epoch": 0.8, "learning_rate": 6.044871892939746e-08, "logits/chosen": -1.5760042667388916, "logits/rejected": -1.1585383415222168, "logps/chosen": -320.93597412109375, "logps/rejected": -338.1377868652344, "loss": 0.9439, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.0907033681869507, "rewards/margins": 0.4960748255252838, "rewards/rejected": -1.5867780447006226, "rewards/safe_rewards": -1.1278786659240723, "rewards/unsafe_rewards": -1.0535279512405396, "step": 1480 }, { "epoch": 0.8, "learning_rate": 5.741960534319676e-08, "logits/chosen": -1.5081236362457275, "logits/rejected": -1.237660527229309, "logps/chosen": -281.8060607910156, "logps/rejected": -306.55889892578125, "loss": 0.9089, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -1.0866477489471436, "rewards/margins": 0.4203459322452545, "rewards/rejected": -1.5069936513900757, "rewards/safe_rewards": -1.198955774307251, "rewards/unsafe_rewards": -0.9743399620056152, "step": 1490 }, { "epoch": 0.81, "learning_rate": 5.44584788535217e-08, "logits/chosen": -1.5474069118499756, "logits/rejected": -1.1630122661590576, "logps/chosen": -322.1282653808594, "logps/rejected": -332.53656005859375, "loss": 0.8619, "rewards/accuracies": 0.71875, "rewards/chosen": -1.0482981204986572, "rewards/margins": 0.5498597621917725, "rewards/rejected": -1.5981578826904297, "rewards/safe_rewards": -1.0035431385040283, "rewards/unsafe_rewards": -1.0930533409118652, "step": 1500 }, { "epoch": 0.81, "eval_logits/chosen": -1.146567940711975, "eval_logits/rejected": -0.7005062699317932, "eval_logps/chosen": -221.80184936523438, "eval_logps/rejected": -210.18832397460938, "eval_loss": 0.34287312626838684, "eval_rewards/accuracies": 0.7268877029418945, "eval_rewards/chosen": -0.913629412651062, "eval_rewards/margins": 0.2635413706302643, "eval_rewards/rejected": -1.1771708726882935, "eval_rewards/safe_rewards": -0.9047471284866333, "eval_rewards/unsafe_rewards": -0.9192255139350891, "eval_runtime": 1122.8444, "eval_samples_per_second": 29.429, "eval_steps_per_second": 0.92, "step": 1500 }, { "epoch": 0.81, "learning_rate": 5.156638483361933e-08, "logits/chosen": -1.612953782081604, "logits/rejected": -1.2291548252105713, "logps/chosen": -316.44293212890625, "logps/rejected": -337.2187194824219, "loss": 0.9167, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.9855987429618835, "rewards/margins": 0.5536761283874512, "rewards/rejected": -1.5392746925354004, "rewards/safe_rewards": -1.0115060806274414, "rewards/unsafe_rewards": -0.9596911668777466, "step": 1510 }, { "epoch": 0.82, "learning_rate": 4.8744344286046236e-08, "logits/chosen": -1.4979829788208008, "logits/rejected": -1.1606972217559814, "logps/chosen": -323.786865234375, "logps/rejected": -326.4385681152344, "loss": 0.9593, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.0706676244735718, "rewards/margins": 0.37281331419944763, "rewards/rejected": -1.4434809684753418, "rewards/safe_rewards": -1.150662899017334, "rewards/unsafe_rewards": -0.99067223072052, "step": 1520 }, { "epoch": 0.82, "learning_rate": 4.599335348222169e-08, "logits/chosen": -1.5021213293075562, "logits/rejected": -1.2472387552261353, "logps/chosen": -329.1572265625, "logps/rejected": -364.0357971191406, "loss": 0.9118, "rewards/accuracies": 0.78125, "rewards/chosen": -1.0721365213394165, "rewards/margins": 0.5561688542366028, "rewards/rejected": -1.628305435180664, "rewards/safe_rewards": -1.036094307899475, "rewards/unsafe_rewards": -1.108178734779358, "step": 1530 }, { "epoch": 0.83, "learning_rate": 4.331438361071163e-08, "logits/chosen": -1.5342875719070435, "logits/rejected": -1.3547062873840332, "logps/chosen": -334.36212158203125, "logps/rejected": -347.2046813964844, "loss": 0.9704, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.9460422396659851, "rewards/margins": 0.44854864478111267, "rewards/rejected": -1.394590973854065, "rewards/safe_rewards": -0.965340256690979, "rewards/unsafe_rewards": -0.9267444610595703, "step": 1540 }, { "epoch": 0.83, "learning_rate": 4.0708380434367864e-08, "logits/chosen": -1.5614886283874512, "logits/rejected": -1.1849619150161743, "logps/chosen": -300.528564453125, "logps/rejected": -326.10638427734375, "loss": 0.8839, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.0258691310882568, "rewards/margins": 0.4937085211277008, "rewards/rejected": -1.5195776224136353, "rewards/safe_rewards": -1.0579198598861694, "rewards/unsafe_rewards": -0.993818461894989, "step": 1550 }, { "epoch": 0.84, "learning_rate": 3.817626395644305e-08, "logits/chosen": -1.5818434953689575, "logits/rejected": -1.210106611251831, "logps/chosen": -297.70025634765625, "logps/rejected": -307.71710205078125, "loss": 1.0046, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.9973915219306946, "rewards/margins": 0.3978816568851471, "rewards/rejected": -1.3952730894088745, "rewards/safe_rewards": -0.9759462475776672, "rewards/unsafe_rewards": -1.0188367366790771, "step": 1560 }, { "epoch": 0.84, "learning_rate": 3.571892809580013e-08, "logits/chosen": -1.5090782642364502, "logits/rejected": -1.1929179430007935, "logps/chosen": -307.1462097167969, "logps/rejected": -320.7843017578125, "loss": 0.9616, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -1.1602782011032104, "rewards/margins": 0.36262303590774536, "rewards/rejected": -1.5229012966156006, "rewards/safe_rewards": -1.113433837890625, "rewards/unsafe_rewards": -1.207122564315796, "step": 1570 }, { "epoch": 0.85, "learning_rate": 3.333724037132976e-08, "logits/chosen": -1.5555639266967773, "logits/rejected": -1.2410228252410889, "logps/chosen": -306.10662841796875, "logps/rejected": -334.8534851074219, "loss": 0.96, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.9998570680618286, "rewards/margins": 0.5056936740875244, "rewards/rejected": -1.5055506229400635, "rewards/safe_rewards": -0.9567073583602905, "rewards/unsafe_rewards": -1.0430065393447876, "step": 1580 }, { "epoch": 0.86, "learning_rate": 3.1032041595688506e-08, "logits/chosen": -1.4647419452667236, "logits/rejected": -1.0212126970291138, "logps/chosen": -309.408935546875, "logps/rejected": -335.38299560546875, "loss": 0.9033, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.1254334449768066, "rewards/margins": 0.5138076543807983, "rewards/rejected": -1.6392412185668945, "rewards/safe_rewards": -1.1220200061798096, "rewards/unsafe_rewards": -1.1288467645645142, "step": 1590 }, { "epoch": 0.86, "learning_rate": 2.880414557846453e-08, "logits/chosen": -1.4411920309066772, "logits/rejected": -1.2204091548919678, "logps/chosen": -293.62689208984375, "logps/rejected": -315.4649353027344, "loss": 0.8851, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.0861139297485352, "rewards/margins": 0.4907267689704895, "rewards/rejected": -1.5768407583236694, "rewards/safe_rewards": -1.0654656887054443, "rewards/unsafe_rewards": -1.106762170791626, "step": 1600 }, { "epoch": 0.87, "learning_rate": 2.6654338838876662e-08, "logits/chosen": -1.585949420928955, "logits/rejected": -1.0787384510040283, "logps/chosen": -322.25885009765625, "logps/rejected": -315.6914978027344, "loss": 0.8857, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.0791314840316772, "rewards/margins": 0.5700170397758484, "rewards/rejected": -1.6491485834121704, "rewards/safe_rewards": -1.1312898397445679, "rewards/unsafe_rewards": -1.0269731283187866, "step": 1610 }, { "epoch": 0.87, "learning_rate": 2.4583380328107805e-08, "logits/chosen": -1.5202205181121826, "logits/rejected": -1.1094920635223389, "logps/chosen": -331.93377685546875, "logps/rejected": -334.93719482421875, "loss": 0.9394, "rewards/accuracies": 0.71875, "rewards/chosen": -1.0624914169311523, "rewards/margins": 0.5675247311592102, "rewards/rejected": -1.6300160884857178, "rewards/safe_rewards": -1.0210199356079102, "rewards/unsafe_rewards": -1.1039628982543945, "step": 1620 }, { "epoch": 0.88, "learning_rate": 2.259200116137039e-08, "logits/chosen": -1.4817931652069092, "logits/rejected": -1.1653249263763428, "logps/chosen": -334.85223388671875, "logps/rejected": -357.4493713378906, "loss": 0.9782, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.1782487630844116, "rewards/margins": 0.4701511263847351, "rewards/rejected": -1.6483999490737915, "rewards/safe_rewards": -1.1807775497436523, "rewards/unsafe_rewards": -1.175719976425171, "step": 1630 }, { "epoch": 0.88, "learning_rate": 2.068090435979958e-08, "logits/chosen": -1.4055829048156738, "logits/rejected": -1.156343698501587, "logps/chosen": -306.995361328125, "logps/rejected": -317.0059509277344, "loss": 0.9645, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -1.0118372440338135, "rewards/margins": 0.44742053747177124, "rewards/rejected": -1.45925772190094, "rewards/safe_rewards": -1.0128872394561768, "rewards/unsafe_rewards": -1.010787010192871, "step": 1640 }, { "epoch": 0.89, "learning_rate": 1.8850764602263423e-08, "logits/chosen": -1.4388693571090698, "logits/rejected": -1.0342535972595215, "logps/chosen": -311.6680603027344, "logps/rejected": -348.0157165527344, "loss": 0.9259, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.1476449966430664, "rewards/margins": 0.49749964475631714, "rewards/rejected": -1.6451447010040283, "rewards/safe_rewards": -1.1766583919525146, "rewards/unsafe_rewards": -1.1186316013336182, "step": 1650 }, { "epoch": 0.89, "learning_rate": 1.710222798718028e-08, "logits/chosen": -1.498641014099121, "logits/rejected": -1.1954280138015747, "logps/chosen": -323.10626220703125, "logps/rejected": -355.72705078125, "loss": 0.8871, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -1.1219761371612549, "rewards/margins": 0.4705938696861267, "rewards/rejected": -1.5925698280334473, "rewards/safe_rewards": -1.170627474784851, "rewards/unsafe_rewards": -1.07332444190979, "step": 1660 }, { "epoch": 0.9, "learning_rate": 1.5435911804424356e-08, "logits/chosen": -1.545506477355957, "logits/rejected": -1.2171175479888916, "logps/chosen": -331.9698181152344, "logps/rejected": -340.3002014160156, "loss": 0.9938, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -1.0450648069381714, "rewards/margins": 0.47697582840919495, "rewards/rejected": -1.522040605545044, "rewards/safe_rewards": -1.095984697341919, "rewards/unsafe_rewards": -0.9941450357437134, "step": 1670 }, { "epoch": 0.9, "learning_rate": 1.3852404317403199e-08, "logits/chosen": -1.41542649269104, "logits/rejected": -1.1583986282348633, "logps/chosen": -297.29962158203125, "logps/rejected": -336.06988525390625, "loss": 0.9583, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.1032707691192627, "rewards/margins": 0.3784652352333069, "rewards/rejected": -1.4817359447479248, "rewards/safe_rewards": -1.15559983253479, "rewards/unsafe_rewards": -1.0509414672851562, "step": 1680 }, { "epoch": 0.91, "learning_rate": 1.235226455538113e-08, "logits/chosen": -1.4842908382415771, "logits/rejected": -1.2061156034469604, "logps/chosen": -318.2637939453125, "logps/rejected": -340.8109436035156, "loss": 1.0008, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.1303695440292358, "rewards/margins": 0.47945070266723633, "rewards/rejected": -1.6098201274871826, "rewards/safe_rewards": -1.1323390007019043, "rewards/unsafe_rewards": -1.128400206565857, "step": 1690 }, { "epoch": 0.91, "learning_rate": 1.0936022116124321e-08, "logits/chosen": -1.4996792078018188, "logits/rejected": -1.116720199584961, "logps/chosen": -307.4630432128906, "logps/rejected": -331.1900634765625, "loss": 0.866, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -1.0742332935333252, "rewards/margins": 0.5489377975463867, "rewards/rejected": -1.6231712102890015, "rewards/safe_rewards": -1.107750654220581, "rewards/unsafe_rewards": -1.0407161712646484, "step": 1700 }, { "epoch": 0.92, "learning_rate": 9.60417697893534e-09, "logits/chosen": -1.482126235961914, "logits/rejected": -1.1564598083496094, "logps/chosen": -312.1965026855469, "logps/rejected": -340.93817138671875, "loss": 0.9667, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -1.156433343887329, "rewards/margins": 0.43530288338661194, "rewards/rejected": -1.5917361974716187, "rewards/safe_rewards": -1.0536446571350098, "rewards/unsafe_rewards": -1.2592222690582275, "step": 1710 }, { "epoch": 0.93, "learning_rate": 8.357199328144576e-09, "logits/chosen": -1.4593350887298584, "logits/rejected": -1.200596570968628, "logps/chosen": -356.2051086425781, "logps/rejected": -369.406494140625, "loss": 0.8611, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -1.1415997743606567, "rewards/margins": 0.49805140495300293, "rewards/rejected": -1.6396510601043701, "rewards/safe_rewards": -1.2040703296661377, "rewards/unsafe_rewards": -1.0791290998458862, "step": 1720 }, { "epoch": 0.93, "learning_rate": 7.1955293871198144e-09, "logits/chosen": -1.350987195968628, "logits/rejected": -1.1763075590133667, "logps/chosen": -296.27191162109375, "logps/rejected": -321.37518310546875, "loss": 0.9819, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -1.2195558547973633, "rewards/margins": 0.39501625299453735, "rewards/rejected": -1.6145721673965454, "rewards/safe_rewards": -1.2167112827301025, "rewards/unsafe_rewards": -1.2224003076553345, "step": 1730 }, { "epoch": 0.94, "learning_rate": 6.119577262853254e-09, "logits/chosen": -1.441007375717163, "logits/rejected": -1.0252482891082764, "logps/chosen": -299.6694641113281, "logps/rejected": -312.8736267089844, "loss": 0.9801, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.121921420097351, "rewards/margins": 0.5479141473770142, "rewards/rejected": -1.6698356866836548, "rewards/safe_rewards": -1.0443205833435059, "rewards/unsafe_rewards": -1.1995223760604858, "step": 1740 }, { "epoch": 0.94, "learning_rate": 5.129722801180542e-09, "logits/chosen": -1.4107353687286377, "logits/rejected": -1.0875647068023682, "logps/chosen": -322.1299743652344, "logps/rejected": -341.2066650390625, "loss": 0.8399, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -1.189591407775879, "rewards/margins": 0.5180362462997437, "rewards/rejected": -1.707627534866333, "rewards/safe_rewards": -1.3024308681488037, "rewards/unsafe_rewards": -1.076751708984375, "step": 1750 }, { "epoch": 0.95, "learning_rate": 4.226315452682816e-09, "logits/chosen": -1.4723705053329468, "logits/rejected": -1.1786158084869385, "logps/chosen": -305.52362060546875, "logps/rejected": -326.5906677246094, "loss": 0.9485, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -1.046170711517334, "rewards/margins": 0.4863569736480713, "rewards/rejected": -1.5325279235839844, "rewards/safe_rewards": -1.0766370296478271, "rewards/unsafe_rewards": -1.0157043933868408, "step": 1760 }, { "epoch": 0.95, "learning_rate": 3.4096741493194193e-09, "logits/chosen": -1.527930498123169, "logits/rejected": -1.2494192123413086, "logps/chosen": -315.3348693847656, "logps/rejected": -332.2015686035156, "loss": 1.0269, "rewards/accuracies": 0.6875, "rewards/chosen": -1.1415612697601318, "rewards/margins": 0.39452338218688965, "rewards/rejected": -1.5360848903656006, "rewards/safe_rewards": -1.0917105674743652, "rewards/unsafe_rewards": -1.1914122104644775, "step": 1770 }, { "epoch": 0.96, "learning_rate": 2.6800871918346846e-09, "logits/chosen": -1.5815150737762451, "logits/rejected": -1.1552437543869019, "logps/chosen": -320.330078125, "logps/rejected": -340.8857727050781, "loss": 0.9364, "rewards/accuracies": 0.75, "rewards/chosen": -1.0657531023025513, "rewards/margins": 0.5418880581855774, "rewards/rejected": -1.6076412200927734, "rewards/safe_rewards": -1.1473314762115479, "rewards/unsafe_rewards": -0.9841750264167786, "step": 1780 }, { "epoch": 0.96, "learning_rate": 2.0378121479783796e-09, "logits/chosen": -1.4162019491195679, "logits/rejected": -1.0255894660949707, "logps/chosen": -313.9689025878906, "logps/rejected": -334.1581115722656, "loss": 0.9987, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -1.1681249141693115, "rewards/margins": 0.49787864089012146, "rewards/rejected": -1.6660035848617554, "rewards/safe_rewards": -1.1383296251296997, "rewards/unsafe_rewards": -1.1979202032089233, "step": 1790 }, { "epoch": 0.97, "learning_rate": 1.4830757615760247e-09, "logits/chosen": -1.437466025352478, "logits/rejected": -1.0774166584014893, "logps/chosen": -325.25677490234375, "logps/rejected": -333.42791748046875, "loss": 0.9478, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.0991178750991821, "rewards/margins": 0.44227179884910583, "rewards/rejected": -1.5413895845413208, "rewards/safe_rewards": -1.1737608909606934, "rewards/unsafe_rewards": -1.024474859237671, "step": 1800 }, { "epoch": 0.97, "learning_rate": 1.0160738724809548e-09, "logits/chosen": -1.496584177017212, "logits/rejected": -1.0213630199432373, "logps/chosen": -305.14263916015625, "logps/rejected": -338.046875, "loss": 0.881, "rewards/accuracies": 0.78125, "rewards/chosen": -1.1096470355987549, "rewards/margins": 0.5430228114128113, "rewards/rejected": -1.6526696681976318, "rewards/safe_rewards": -1.1323860883712769, "rewards/unsafe_rewards": -1.0869077444076538, "step": 1810 }, { "epoch": 0.98, "learning_rate": 6.369713474366212e-10, "logits/chosen": -1.460850477218628, "logits/rejected": -1.1146998405456543, "logps/chosen": -342.700439453125, "logps/rejected": -370.11865234375, "loss": 0.8416, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -1.1760565042495728, "rewards/margins": 0.5496792793273926, "rewards/rejected": -1.7257359027862549, "rewards/safe_rewards": -1.209657907485962, "rewards/unsafe_rewards": -1.1424554586410522, "step": 1820 }, { "epoch": 0.98, "learning_rate": 3.459020218731512e-10, "logits/chosen": -1.4401605129241943, "logits/rejected": -1.135258674621582, "logps/chosen": -297.53607177734375, "logps/rejected": -319.9522399902344, "loss": 0.872, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -1.0829004049301147, "rewards/margins": 0.5533518195152283, "rewards/rejected": -1.6362521648406982, "rewards/safe_rewards": -1.041105031967163, "rewards/unsafe_rewards": -1.124695897102356, "step": 1830 }, { "epoch": 0.99, "learning_rate": 1.429686526593088e-10, "logits/chosen": -1.4089921712875366, "logits/rejected": -1.140413522720337, "logps/chosen": -320.8370666503906, "logps/rejected": -343.36822509765625, "loss": 1.0174, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -1.1735649108886719, "rewards/margins": 0.45700669288635254, "rewards/rejected": -1.6305716037750244, "rewards/safe_rewards": -1.203802227973938, "rewards/unsafe_rewards": -1.1433275938034058, "step": 1840 }, { "epoch": 1.0, "learning_rate": 2.824288182584622e-11, "logits/chosen": -1.5680155754089355, "logits/rejected": -1.1377493143081665, "logps/chosen": -327.45550537109375, "logps/rejected": -339.25115966796875, "loss": 0.8677, "rewards/accuracies": 0.71875, "rewards/chosen": -1.1065986156463623, "rewards/margins": 0.5338612198829651, "rewards/rejected": -1.6404597759246826, "rewards/safe_rewards": -1.171757459640503, "rewards/unsafe_rewards": -1.0414397716522217, "step": 1850 }, { "epoch": 1.0, "step": 1858, "total_flos": 0.0, "train_loss": 1.018996798697021, "train_runtime": 22449.6551, "train_samples_per_second": 2.649, "train_steps_per_second": 0.083 } ], "logging_steps": 10, "max_steps": 1858, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }