diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,3060 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.9995965030262273, + "eval_steps": 500, + "global_step": 1858, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 2.6881720430107528e-09, + "logits/chosen": -2.5808520317077637, + "logits/rejected": -2.0101242065429688, + "logps/chosen": -299.3489990234375, + "logps/rejected": -186.63014221191406, + "loss": 1.2656, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0, + "rewards/margins": 0.0, + "rewards/rejected": 0.0, + "rewards/safe_rewards": 0.0, + "rewards/unsafe_rewards": 0.0, + "step": 1 + }, + { + "epoch": 0.01, + "learning_rate": 2.6881720430107527e-08, + "logits/chosen": -2.38761043548584, + "logits/rejected": -2.2287850379943848, + "logps/chosen": -201.83148193359375, + "logps/rejected": -189.46726989746094, + "loss": 1.4296, + "rewards/accuracies": 0.4305555522441864, + "rewards/chosen": 2.8226104404893704e-05, + "rewards/margins": -9.960395254893228e-05, + "rewards/rejected": 0.00012783010606653988, + "rewards/safe_rewards": -0.0001673989463597536, + "rewards/unsafe_rewards": 0.0002238511951873079, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 5.3763440860215054e-08, + "logits/chosen": -2.3484911918640137, + "logits/rejected": -2.053339719772339, + "logps/chosen": -226.3044891357422, + "logps/rejected": -181.17330932617188, + "loss": 1.463, + "rewards/accuracies": 0.5375000238418579, + "rewards/chosen": -4.27155100624077e-05, + "rewards/margins": 3.7895108562224777e-06, + "rewards/rejected": -4.650496339309029e-05, + "rewards/safe_rewards": -0.0004773393739014864, + "rewards/unsafe_rewards": 0.0003919084556400776, + "step": 20 + }, + { + "epoch": 0.02, + "learning_rate": 8.064516129032257e-08, + "logits/chosen": -2.3405332565307617, + "logits/rejected": -2.145922899246216, + "logps/chosen": -215.05410766601562, + "logps/rejected": -189.3188018798828, + "loss": 1.431, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": 3.503418338368647e-05, + "rewards/margins": 0.0005787784466519952, + "rewards/rejected": -0.0005437443032860756, + "rewards/safe_rewards": -0.00011984705633949488, + "rewards/unsafe_rewards": 0.00018991540127899498, + "step": 30 + }, + { + "epoch": 0.02, + "learning_rate": 1.0752688172043011e-07, + "logits/chosen": -2.2765462398529053, + "logits/rejected": -1.974180817604065, + "logps/chosen": -180.71937561035156, + "logps/rejected": -173.9296417236328, + "loss": 1.4304, + "rewards/accuracies": 0.65625, + "rewards/chosen": 0.00015593590796925128, + "rewards/margins": 0.0017944574356079102, + "rewards/rejected": -0.0016385214403271675, + "rewards/safe_rewards": 0.00035788281820714474, + "rewards/unsafe_rewards": -4.6010944060981274e-05, + "step": 40 + }, + { + "epoch": 0.03, + "learning_rate": 1.3440860215053762e-07, + "logits/chosen": -2.403860569000244, + "logits/rejected": -2.0332884788513184, + "logps/chosen": -209.592529296875, + "logps/rejected": -167.6835174560547, + "loss": 1.4344, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": 0.0008560878923162818, + "rewards/margins": 0.004450940527021885, + "rewards/rejected": -0.0035948525182902813, + "rewards/safe_rewards": 0.00018845750309992582, + "rewards/unsafe_rewards": 0.0015237184707075357, + "step": 50 + }, + { + "epoch": 0.03, + "learning_rate": 1.6129032258064515e-07, + "logits/chosen": -2.330204486846924, + "logits/rejected": -2.1555492877960205, + "logps/chosen": -185.8196563720703, + "logps/rejected": -185.08883666992188, + "loss": 1.4264, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.0013676225207746029, + "rewards/margins": 0.0021811036858707666, + "rewards/rejected": -0.003548726439476013, + "rewards/safe_rewards": -0.000750910839997232, + "rewards/unsafe_rewards": -0.001984334085136652, + "step": 60 + }, + { + "epoch": 0.04, + "learning_rate": 1.8817204301075268e-07, + "logits/chosen": -2.3298559188842773, + "logits/rejected": -2.0787758827209473, + "logps/chosen": -202.39566040039062, + "logps/rejected": -184.2627410888672, + "loss": 1.3858, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.0020925761200487614, + "rewards/margins": 0.006319403648376465, + "rewards/rejected": -0.008411980234086514, + "rewards/safe_rewards": -0.0030772520694881678, + "rewards/unsafe_rewards": -0.0011078999377787113, + "step": 70 + }, + { + "epoch": 0.04, + "learning_rate": 2.1505376344086022e-07, + "logits/chosen": -2.3292670249938965, + "logits/rejected": -2.1124508380889893, + "logps/chosen": -220.2982940673828, + "logps/rejected": -195.1908721923828, + "loss": 1.3919, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": -0.008099144324660301, + "rewards/margins": 0.012995732948184013, + "rewards/rejected": -0.021094877272844315, + "rewards/safe_rewards": -0.010834941640496254, + "rewards/unsafe_rewards": -0.005363349802792072, + "step": 80 + }, + { + "epoch": 0.05, + "learning_rate": 2.4193548387096775e-07, + "logits/chosen": -2.3152518272399902, + "logits/rejected": -2.1177124977111816, + "logps/chosen": -209.4881134033203, + "logps/rejected": -170.4688720703125, + "loss": 1.4229, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.03572874516248703, + "rewards/margins": 0.025636380538344383, + "rewards/rejected": -0.061365120112895966, + "rewards/safe_rewards": -0.03464391082525253, + "rewards/unsafe_rewards": -0.03681357204914093, + "step": 90 + }, + { + "epoch": 0.05, + "learning_rate": 2.6881720430107523e-07, + "logits/chosen": -2.312987804412842, + "logits/rejected": -2.1187119483947754, + "logps/chosen": -204.9591827392578, + "logps/rejected": -180.0883331298828, + "loss": 1.4099, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.0888073593378067, + "rewards/margins": 0.04026350378990173, + "rewards/rejected": -0.12907087802886963, + "rewards/safe_rewards": -0.08975542336702347, + "rewards/unsafe_rewards": -0.08785931766033173, + "step": 100 + }, + { + "epoch": 0.06, + "learning_rate": 2.956989247311828e-07, + "logits/chosen": -2.2454307079315186, + "logits/rejected": -2.0064516067504883, + "logps/chosen": -224.6505889892578, + "logps/rejected": -190.16635131835938, + "loss": 1.4442, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.12704427540302277, + "rewards/margins": 0.054799921810626984, + "rewards/rejected": -0.18184418976306915, + "rewards/safe_rewards": -0.12200836837291718, + "rewards/unsafe_rewards": -0.13208015263080597, + "step": 110 + }, + { + "epoch": 0.06, + "learning_rate": 3.225806451612903e-07, + "logits/chosen": -2.26438570022583, + "logits/rejected": -1.9198744297027588, + "logps/chosen": -222.88461303710938, + "logps/rejected": -173.9338836669922, + "loss": 1.3613, + "rewards/accuracies": 0.706250011920929, + "rewards/chosen": -0.1163511872291565, + "rewards/margins": 0.09466449916362762, + "rewards/rejected": -0.21101567149162292, + "rewards/safe_rewards": -0.11146645247936249, + "rewards/unsafe_rewards": -0.1212359219789505, + "step": 120 + }, + { + "epoch": 0.07, + "learning_rate": 3.4946236559139783e-07, + "logits/chosen": -2.19810152053833, + "logits/rejected": -1.9682261943817139, + "logps/chosen": -241.05618286132812, + "logps/rejected": -201.30264282226562, + "loss": 1.2805, + "rewards/accuracies": 0.668749988079071, + "rewards/chosen": -0.26873043179512024, + "rewards/margins": 0.12516936659812927, + "rewards/rejected": -0.39389973878860474, + "rewards/safe_rewards": -0.21829214692115784, + "rewards/unsafe_rewards": -0.3191686272621155, + "step": 130 + }, + { + "epoch": 0.08, + "learning_rate": 3.7634408602150537e-07, + "logits/chosen": -2.192406177520752, + "logits/rejected": -1.845873236656189, + "logps/chosen": -241.24447631835938, + "logps/rejected": -219.4851531982422, + "loss": 1.2394, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.2985479533672333, + "rewards/margins": 0.09591639041900635, + "rewards/rejected": -0.394464373588562, + "rewards/safe_rewards": -0.2801482379436493, + "rewards/unsafe_rewards": -0.31694772839546204, + "step": 140 + }, + { + "epoch": 0.08, + "learning_rate": 4.0322580645161285e-07, + "logits/chosen": -2.0371975898742676, + "logits/rejected": -1.7497107982635498, + "logps/chosen": -249.6632537841797, + "logps/rejected": -225.79537963867188, + "loss": 1.1556, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.4271857738494873, + "rewards/margins": 0.13365456461906433, + "rewards/rejected": -0.5608403086662292, + "rewards/safe_rewards": -0.42928582429885864, + "rewards/unsafe_rewards": -0.4250856935977936, + "step": 150 + }, + { + "epoch": 0.09, + "learning_rate": 4.3010752688172043e-07, + "logits/chosen": -1.9401572942733765, + "logits/rejected": -1.5656859874725342, + "logps/chosen": -246.42398071289062, + "logps/rejected": -226.51400756835938, + "loss": 1.2948, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.2622266113758087, + "rewards/margins": 0.10594137012958527, + "rewards/rejected": -0.3681679964065552, + "rewards/safe_rewards": -0.24256543815135956, + "rewards/unsafe_rewards": -0.28188782930374146, + "step": 160 + }, + { + "epoch": 0.09, + "learning_rate": 4.569892473118279e-07, + "logits/chosen": -1.9621531963348389, + "logits/rejected": -1.6663320064544678, + "logps/chosen": -253.3407440185547, + "logps/rejected": -226.83035278320312, + "loss": 1.1636, + "rewards/accuracies": 0.71875, + "rewards/chosen": -0.38964638113975525, + "rewards/margins": 0.20659947395324707, + "rewards/rejected": -0.5962458848953247, + "rewards/safe_rewards": -0.3696528375148773, + "rewards/unsafe_rewards": -0.4096398949623108, + "step": 170 + }, + { + "epoch": 0.1, + "learning_rate": 4.838709677419355e-07, + "logits/chosen": -2.0335605144500732, + "logits/rejected": -1.65840744972229, + "logps/chosen": -232.89804077148438, + "logps/rejected": -238.404296875, + "loss": 1.153, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.40930503606796265, + "rewards/margins": 0.26214295625686646, + "rewards/rejected": -0.6714479327201843, + "rewards/safe_rewards": -0.42569422721862793, + "rewards/unsafe_rewards": -0.39291584491729736, + "step": 180 + }, + { + "epoch": 0.1, + "learning_rate": 4.999929391798331e-07, + "logits/chosen": -2.087995767593384, + "logits/rejected": -1.7015736103057861, + "logps/chosen": -233.88204956054688, + "logps/rejected": -231.9954071044922, + "loss": 1.1701, + "rewards/accuracies": 0.6937500238418579, + "rewards/chosen": -0.39558762311935425, + "rewards/margins": 0.25256142020225525, + "rewards/rejected": -0.6481491327285767, + "rewards/safe_rewards": -0.3778998851776123, + "rewards/unsafe_rewards": -0.4132753312587738, + "step": 190 + }, + { + "epoch": 0.11, + "learning_rate": 4.9991350953333e-07, + "logits/chosen": -2.0101492404937744, + "logits/rejected": -1.688194990158081, + "logps/chosen": -269.3438720703125, + "logps/rejected": -272.6773376464844, + "loss": 1.1309, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.49628791213035583, + "rewards/margins": 0.21785268187522888, + "rewards/rejected": -0.7141406536102295, + "rewards/safe_rewards": -0.47186246514320374, + "rewards/unsafe_rewards": -0.5207133293151855, + "step": 200 + }, + { + "epoch": 0.11, + "learning_rate": 4.997458523498236e-07, + "logits/chosen": -2.1067311763763428, + "logits/rejected": -1.8388845920562744, + "logps/chosen": -260.73553466796875, + "logps/rejected": -250.10794067382812, + "loss": 1.1343, + "rewards/accuracies": 0.65625, + "rewards/chosen": -0.685817539691925, + "rewards/margins": 0.2200475037097931, + "rewards/rejected": -0.9058650732040405, + "rewards/safe_rewards": -0.6602068543434143, + "rewards/unsafe_rewards": -0.7114282250404358, + "step": 210 + }, + { + "epoch": 0.12, + "learning_rate": 4.99490026817712e-07, + "logits/chosen": -2.0778934955596924, + "logits/rejected": -1.7963426113128662, + "logps/chosen": -245.4992218017578, + "logps/rejected": -252.6900177001953, + "loss": 1.149, + "rewards/accuracies": 0.7437499761581421, + "rewards/chosen": -0.5112438201904297, + "rewards/margins": 0.3192596137523651, + "rewards/rejected": -0.8305034637451172, + "rewards/safe_rewards": -0.469885915517807, + "rewards/unsafe_rewards": -0.5526017546653748, + "step": 220 + }, + { + "epoch": 0.12, + "learning_rate": 4.991461232516674e-07, + "logits/chosen": -2.041980266571045, + "logits/rejected": -1.7099599838256836, + "logps/chosen": -286.7567443847656, + "logps/rejected": -276.52020263671875, + "loss": 1.1816, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.566757082939148, + "rewards/margins": 0.22073951363563538, + "rewards/rejected": -0.7874965667724609, + "rewards/safe_rewards": -0.5972923040390015, + "rewards/unsafe_rewards": -0.5362219214439392, + "step": 230 + }, + { + "epoch": 0.13, + "learning_rate": 4.98714263060751e-07, + "logits/chosen": -2.079230546951294, + "logits/rejected": -1.6963192224502563, + "logps/chosen": -245.6575927734375, + "logps/rejected": -231.0216064453125, + "loss": 1.1425, + "rewards/accuracies": 0.6937500238418579, + "rewards/chosen": -0.5724560022354126, + "rewards/margins": 0.2712286710739136, + "rewards/rejected": -0.8436846733093262, + "rewards/safe_rewards": -0.5816723108291626, + "rewards/unsafe_rewards": -0.5632396936416626, + "step": 240 + }, + { + "epoch": 0.13, + "learning_rate": 4.98194598705552e-07, + "logits/chosen": -1.9290701150894165, + "logits/rejected": -1.7140071392059326, + "logps/chosen": -283.2284851074219, + "logps/rejected": -276.1746826171875, + "loss": 1.2033, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.9349914789199829, + "rewards/margins": 0.15353193879127502, + "rewards/rejected": -1.088523268699646, + "rewards/safe_rewards": -0.9215167164802551, + "rewards/unsafe_rewards": -0.9484661817550659, + "step": 250 + }, + { + "epoch": 0.14, + "learning_rate": 4.975873136443648e-07, + "logits/chosen": -2.1985442638397217, + "logits/rejected": -1.9197852611541748, + "logps/chosen": -303.9427795410156, + "logps/rejected": -298.4468688964844, + "loss": 1.0412, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.7289992570877075, + "rewards/margins": 0.2902063727378845, + "rewards/rejected": -1.0192055702209473, + "rewards/safe_rewards": -0.8002890348434448, + "rewards/unsafe_rewards": -0.6577093005180359, + "step": 260 + }, + { + "epoch": 0.15, + "learning_rate": 4.968926222684212e-07, + "logits/chosen": -2.028428792953491, + "logits/rejected": -1.8198667764663696, + "logps/chosen": -262.1617126464844, + "logps/rejected": -279.1395568847656, + "loss": 1.0273, + "rewards/accuracies": 0.6937500238418579, + "rewards/chosen": -0.6195804476737976, + "rewards/margins": 0.3619759678840637, + "rewards/rejected": -0.9815564155578613, + "rewards/safe_rewards": -0.6148445010185242, + "rewards/unsafe_rewards": -0.6243164539337158, + "step": 270 + }, + { + "epoch": 0.15, + "learning_rate": 4.961107698262044e-07, + "logits/chosen": -1.9460862874984741, + "logits/rejected": -1.6114823818206787, + "logps/chosen": -289.10284423828125, + "logps/rejected": -280.1482849121094, + "loss": 1.0933, + "rewards/accuracies": 0.668749988079071, + "rewards/chosen": -0.70842045545578, + "rewards/margins": 0.2855362296104431, + "rewards/rejected": -0.9939567446708679, + "rewards/safe_rewards": -0.6650065183639526, + "rewards/unsafe_rewards": -0.7518342733383179, + "step": 280 + }, + { + "epoch": 0.16, + "learning_rate": 4.952420323368673e-07, + "logits/chosen": -2.0242223739624023, + "logits/rejected": -1.8324100971221924, + "logps/chosen": -237.0861358642578, + "logps/rejected": -266.013916015625, + "loss": 1.1438, + "rewards/accuracies": 0.762499988079071, + "rewards/chosen": -0.4568088948726654, + "rewards/margins": 0.3911629617214203, + "rewards/rejected": -0.8479719161987305, + "rewards/safe_rewards": -0.49832311272621155, + "rewards/unsafe_rewards": -0.41529473662376404, + "step": 290 + }, + { + "epoch": 0.16, + "learning_rate": 4.942867164927899e-07, + "logits/chosen": -1.9857969284057617, + "logits/rejected": -1.7354557514190674, + "logps/chosen": -262.5624084472656, + "logps/rejected": -261.7809143066406, + "loss": 1.1983, + "rewards/accuracies": 0.6937500238418579, + "rewards/chosen": -0.6027632355690002, + "rewards/margins": 0.323146253824234, + "rewards/rejected": -0.9259093999862671, + "rewards/safe_rewards": -0.6246525645256042, + "rewards/unsafe_rewards": -0.5808738470077515, + "step": 300 + }, + { + "epoch": 0.17, + "learning_rate": 4.932451595513062e-07, + "logits/chosen": -2.0492186546325684, + "logits/rejected": -1.6627346277236938, + "logps/chosen": -287.6437072753906, + "logps/rejected": -296.08685302734375, + "loss": 0.9844, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.6867466568946838, + "rewards/margins": 0.3914056420326233, + "rewards/rejected": -1.0781524181365967, + "rewards/safe_rewards": -0.7488200664520264, + "rewards/unsafe_rewards": -0.6246733069419861, + "step": 310 + }, + { + "epoch": 0.17, + "learning_rate": 4.921177292156419e-07, + "logits/chosen": -2.0579938888549805, + "logits/rejected": -1.661026954650879, + "logps/chosen": -275.15484619140625, + "logps/rejected": -296.12921142578125, + "loss": 0.9846, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.6913672089576721, + "rewards/margins": 0.45374804735183716, + "rewards/rejected": -1.1451152563095093, + "rewards/safe_rewards": -0.6368889808654785, + "rewards/unsafe_rewards": -0.7458454370498657, + "step": 320 + }, + { + "epoch": 0.18, + "learning_rate": 4.909048235051033e-07, + "logits/chosen": -1.8785194158554077, + "logits/rejected": -1.6157915592193604, + "logps/chosen": -287.54022216796875, + "logps/rejected": -308.53485107421875, + "loss": 1.05, + "rewards/accuracies": 0.7437499761581421, + "rewards/chosen": -0.881294846534729, + "rewards/margins": 0.377378910779953, + "rewards/rejected": -1.2586736679077148, + "rewards/safe_rewards": -0.8856824040412903, + "rewards/unsafe_rewards": -0.876907229423523, + "step": 330 + }, + { + "epoch": 0.18, + "learning_rate": 4.896068706145631e-07, + "logits/chosen": -2.043916702270508, + "logits/rejected": -1.7367770671844482, + "logps/chosen": -311.2376708984375, + "logps/rejected": -299.87225341796875, + "loss": 1.0624, + "rewards/accuracies": 0.6937500238418579, + "rewards/chosen": -1.005437970161438, + "rewards/margins": 0.386522501707077, + "rewards/rejected": -1.3919605016708374, + "rewards/safe_rewards": -1.0309627056121826, + "rewards/unsafe_rewards": -0.9799133539199829, + "step": 340 + }, + { + "epoch": 0.19, + "learning_rate": 4.882243287632946e-07, + "logits/chosen": -2.2845442295074463, + "logits/rejected": -2.015984058380127, + "logps/chosen": -249.07626342773438, + "logps/rejected": -270.349609375, + "loss": 1.0634, + "rewards/accuracies": 0.731249988079071, + "rewards/chosen": -0.5606988668441772, + "rewards/margins": 0.3308585584163666, + "rewards/rejected": -0.8915573954582214, + "rewards/safe_rewards": -0.5756514668464661, + "rewards/unsafe_rewards": -0.5457462072372437, + "step": 350 + }, + { + "epoch": 0.19, + "learning_rate": 4.867576860332048e-07, + "logits/chosen": -2.2117257118225098, + "logits/rejected": -1.9762624502182007, + "logps/chosen": -250.96493530273438, + "logps/rejected": -287.42401123046875, + "loss": 1.0082, + "rewards/accuracies": 0.6812499761581421, + "rewards/chosen": -0.7890839576721191, + "rewards/margins": 0.39461660385131836, + "rewards/rejected": -1.1837005615234375, + "rewards/safe_rewards": -0.8431307673454285, + "rewards/unsafe_rewards": -0.7350370287895203, + "step": 360 + }, + { + "epoch": 0.2, + "learning_rate": 4.85207460196526e-07, + "logits/chosen": -2.083486557006836, + "logits/rejected": -1.7575275897979736, + "logps/chosen": -315.93890380859375, + "logps/rejected": -334.19989013671875, + "loss": 1.0535, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -1.1155636310577393, + "rewards/margins": 0.3764941096305847, + "rewards/rejected": -1.4920578002929688, + "rewards/safe_rewards": -1.1053030490875244, + "rewards/unsafe_rewards": -1.125824213027954, + "step": 370 + }, + { + "epoch": 0.2, + "learning_rate": 4.835741985330259e-07, + "logits/chosen": -2.07675838470459, + "logits/rejected": -1.7431271076202393, + "logps/chosen": -267.29132080078125, + "logps/rejected": -277.96624755859375, + "loss": 1.0022, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.6648051142692566, + "rewards/margins": 0.39567989110946655, + "rewards/rejected": -1.0604850053787231, + "rewards/safe_rewards": -0.6165894865989685, + "rewards/unsafe_rewards": -0.7130206823348999, + "step": 380 + }, + { + "epoch": 0.21, + "learning_rate": 4.818584776367992e-07, + "logits/chosen": -1.928900122642517, + "logits/rejected": -1.7115558385849, + "logps/chosen": -286.6376647949219, + "logps/rejected": -315.5821838378906, + "loss": 1.0452, + "rewards/accuracies": 0.7437499761581421, + "rewards/chosen": -0.8273309469223022, + "rewards/margins": 0.4084450602531433, + "rewards/rejected": -1.2357759475708008, + "rewards/safe_rewards": -0.8857539892196655, + "rewards/unsafe_rewards": -0.7689078450202942, + "step": 390 + }, + { + "epoch": 0.22, + "learning_rate": 4.800609032127122e-07, + "logits/chosen": -1.9402987957000732, + "logits/rejected": -1.653032898902893, + "logps/chosen": -315.54443359375, + "logps/rejected": -311.26129150390625, + "loss": 1.096, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -1.1000012159347534, + "rewards/margins": 0.32444682717323303, + "rewards/rejected": -1.4244478940963745, + "rewards/safe_rewards": -1.031652569770813, + "rewards/unsafe_rewards": -1.1683496236801147, + "step": 400 + }, + { + "epoch": 0.22, + "learning_rate": 4.78182109862569e-07, + "logits/chosen": -1.957297682762146, + "logits/rejected": -1.815405249595642, + "logps/chosen": -275.63458251953125, + "logps/rejected": -291.7708435058594, + "loss": 1.1237, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -0.8423420786857605, + "rewards/margins": 0.27593573927879333, + "rewards/rejected": -1.1182777881622314, + "rewards/safe_rewards": -0.7917040586471558, + "rewards/unsafe_rewards": -0.8929800987243652, + "step": 410 + }, + { + "epoch": 0.23, + "learning_rate": 4.7622276086107677e-07, + "logits/chosen": -2.0669121742248535, + "logits/rejected": -1.759894609451294, + "logps/chosen": -285.4234924316406, + "logps/rejected": -295.45294189453125, + "loss": 1.0445, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.7513025999069214, + "rewards/margins": 0.3429652750492096, + "rewards/rejected": -1.0942678451538086, + "rewards/safe_rewards": -0.8147345781326294, + "rewards/unsafe_rewards": -0.6878706216812134, + "step": 420 + }, + { + "epoch": 0.23, + "learning_rate": 4.741835479216879e-07, + "logits/chosen": -2.0404961109161377, + "logits/rejected": -1.6269737482070923, + "logps/chosen": -326.9522399902344, + "logps/rejected": -324.0838623046875, + "loss": 1.0151, + "rewards/accuracies": 0.6937500238418579, + "rewards/chosen": -0.8008167147636414, + "rewards/margins": 0.46369487047195435, + "rewards/rejected": -1.2645115852355957, + "rewards/safe_rewards": -0.8669862747192383, + "rewards/unsafe_rewards": -0.7346470952033997, + "step": 430 + }, + { + "epoch": 0.24, + "learning_rate": 4.720651909524036e-07, + "logits/chosen": -2.04176664352417, + "logits/rejected": -1.7474247217178345, + "logps/chosen": -265.5859069824219, + "logps/rejected": -273.75836181640625, + "loss": 1.0613, + "rewards/accuracies": 0.7562500238418579, + "rewards/chosen": -0.6951051950454712, + "rewards/margins": 0.3854682147502899, + "rewards/rejected": -1.0805734395980835, + "rewards/safe_rewards": -0.8021440505981445, + "rewards/unsafe_rewards": -0.5880664587020874, + "step": 440 + }, + { + "epoch": 0.24, + "learning_rate": 4.698684378016222e-07, + "logits/chosen": -1.9997985363006592, + "logits/rejected": -1.7316805124282837, + "logps/chosen": -258.0675354003906, + "logps/rejected": -275.7622375488281, + "loss": 1.057, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.7074462175369263, + "rewards/margins": 0.33383387327194214, + "rewards/rejected": -1.0412800312042236, + "rewards/safe_rewards": -0.6980961561203003, + "rewards/unsafe_rewards": -0.7167961597442627, + "step": 450 + }, + { + "epoch": 0.25, + "learning_rate": 4.675940639941256e-07, + "logits/chosen": -2.0017848014831543, + "logits/rejected": -1.6473373174667358, + "logps/chosen": -284.409912109375, + "logps/rejected": -302.1802978515625, + "loss": 0.9964, + "rewards/accuracies": 0.731249988079071, + "rewards/chosen": -0.7972325086593628, + "rewards/margins": 0.45086246728897095, + "rewards/rejected": -1.248094916343689, + "rewards/safe_rewards": -0.7986913919448853, + "rewards/unsafe_rewards": -0.7957736849784851, + "step": 460 + }, + { + "epoch": 0.25, + "learning_rate": 4.6524287245729286e-07, + "logits/chosen": -1.8008419275283813, + "logits/rejected": -1.545585036277771, + "logps/chosen": -285.72833251953125, + "logps/rejected": -295.92877197265625, + "loss": 1.0215, + "rewards/accuracies": 0.731249988079071, + "rewards/chosen": -0.9112635850906372, + "rewards/margins": 0.3833921253681183, + "rewards/rejected": -1.2946555614471436, + "rewards/safe_rewards": -0.9739904403686523, + "rewards/unsafe_rewards": -0.8485366702079773, + "step": 470 + }, + { + "epoch": 0.26, + "learning_rate": 4.628156932376418e-07, + "logits/chosen": -1.935253381729126, + "logits/rejected": -1.5615403652191162, + "logps/chosen": -283.60662841796875, + "logps/rejected": -275.73553466796875, + "loss": 1.0577, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.8973907232284546, + "rewards/margins": 0.34667691588401794, + "rewards/rejected": -1.244067668914795, + "rewards/safe_rewards": -0.9187390208244324, + "rewards/unsafe_rewards": -0.8760424852371216, + "step": 480 + }, + { + "epoch": 0.26, + "learning_rate": 4.603133832077953e-07, + "logits/chosen": -2.0226516723632812, + "logits/rejected": -1.757315993309021, + "logps/chosen": -319.8951416015625, + "logps/rejected": -344.83306884765625, + "loss": 1.0078, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.8721585273742676, + "rewards/margins": 0.43064364790916443, + "rewards/rejected": -1.302802324295044, + "rewards/safe_rewards": -0.8747318983078003, + "rewards/unsafe_rewards": -0.8695852160453796, + "step": 490 + }, + { + "epoch": 0.27, + "learning_rate": 4.5773682576397776e-07, + "logits/chosen": -1.8940719366073608, + "logits/rejected": -1.6204118728637695, + "logps/chosen": -282.92388916015625, + "logps/rejected": -298.21685791015625, + "loss": 1.0274, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.8302618265151978, + "rewards/margins": 0.41684699058532715, + "rewards/rejected": -1.2471086978912354, + "rewards/safe_rewards": -0.8429195284843445, + "rewards/unsafe_rewards": -0.8176040649414062, + "step": 500 + }, + { + "epoch": 0.27, + "eval_logits/chosen": -1.461648941040039, + "eval_logits/rejected": -0.9854875206947327, + "eval_logps/chosen": -232.3561248779297, + "eval_logps/rejected": -222.5215606689453, + "eval_loss": 0.37307849526405334, + "eval_rewards/accuracies": 0.7075266242027283, + "eval_rewards/chosen": -1.0191720724105835, + "eval_rewards/margins": 0.28133097290992737, + "eval_rewards/rejected": -1.300503134727478, + "eval_rewards/safe_rewards": -1.0088554620742798, + "eval_rewards/unsafe_rewards": -1.028071403503418, + "eval_runtime": 1058.7885, + "eval_samples_per_second": 31.209, + "eval_steps_per_second": 0.976, + "step": 500 + }, + { + "epoch": 0.27, + "learning_rate": 4.5508693051414774e-07, + "logits/chosen": -1.9713261127471924, + "logits/rejected": -1.7668718099594116, + "logps/chosen": -295.57354736328125, + "logps/rejected": -315.67010498046875, + "loss": 1.0145, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.992205023765564, + "rewards/margins": 0.4039764404296875, + "rewards/rejected": -1.3961814641952515, + "rewards/safe_rewards": -0.9630918502807617, + "rewards/unsafe_rewards": -1.0213181972503662, + "step": 510 + }, + { + "epoch": 0.28, + "learning_rate": 4.52364632956877e-07, + "logits/chosen": -2.020744562149048, + "logits/rejected": -1.7648818492889404, + "logps/chosen": -306.5279235839844, + "logps/rejected": -284.1745910644531, + "loss": 1.1779, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.9038923978805542, + "rewards/margins": 0.2524818778038025, + "rewards/rejected": -1.1563743352890015, + "rewards/safe_rewards": -0.9023400545120239, + "rewards/unsafe_rewards": -0.9054449200630188, + "step": 520 + }, + { + "epoch": 0.29, + "learning_rate": 4.4957089415108895e-07, + "logits/chosen": -1.965404748916626, + "logits/rejected": -1.6829001903533936, + "logps/chosen": -266.58013916015625, + "logps/rejected": -311.5513916015625, + "loss": 0.9603, + "rewards/accuracies": 0.78125, + "rewards/chosen": -0.7168244123458862, + "rewards/margins": 0.5065333247184753, + "rewards/rejected": -1.2233576774597168, + "rewards/safe_rewards": -0.7431866526603699, + "rewards/unsafe_rewards": -0.6904621720314026, + "step": 530 + }, + { + "epoch": 0.29, + "learning_rate": 4.467067003767745e-07, + "logits/chosen": -1.9147508144378662, + "logits/rejected": -1.4926608800888062, + "logps/chosen": -276.83740234375, + "logps/rejected": -303.65716552734375, + "loss": 1.0585, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -0.8392030000686646, + "rewards/margins": 0.5333585739135742, + "rewards/rejected": -1.3725616931915283, + "rewards/safe_rewards": -0.8698552250862122, + "rewards/unsafe_rewards": -0.8085508346557617, + "step": 540 + }, + { + "epoch": 0.3, + "learning_rate": 4.437730627868027e-07, + "logits/chosen": -1.8325055837631226, + "logits/rejected": -1.4082276821136475, + "logps/chosen": -256.5406799316406, + "logps/rejected": -277.9321594238281, + "loss": 0.9491, + "rewards/accuracies": 0.7437499761581421, + "rewards/chosen": -0.7962465286254883, + "rewards/margins": 0.5230618715286255, + "rewards/rejected": -1.3193082809448242, + "rewards/safe_rewards": -0.7890614867210388, + "rewards/unsafe_rewards": -0.803431510925293, + "step": 550 + }, + { + "epoch": 0.3, + "learning_rate": 4.4077101704995163e-07, + "logits/chosen": -1.9758007526397705, + "logits/rejected": -1.716571569442749, + "logps/chosen": -271.619140625, + "logps/rejected": -284.5029602050781, + "loss": 1.0222, + "rewards/accuracies": 0.668749988079071, + "rewards/chosen": -0.7951744198799133, + "rewards/margins": 0.3159455358982086, + "rewards/rejected": -1.1111198663711548, + "rewards/safe_rewards": -0.751990020275116, + "rewards/unsafe_rewards": -0.8383587002754211, + "step": 560 + }, + { + "epoch": 0.31, + "learning_rate": 4.3770162298528356e-07, + "logits/chosen": -1.9304263591766357, + "logits/rejected": -1.6568174362182617, + "logps/chosen": -294.840087890625, + "logps/rejected": -296.0787658691406, + "loss": 1.0612, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.9997127652168274, + "rewards/margins": 0.4206429421901703, + "rewards/rejected": -1.4203556776046753, + "rewards/safe_rewards": -0.9456769824028015, + "rewards/unsafe_rewards": -1.053748369216919, + "step": 570 + }, + { + "epoch": 0.31, + "learning_rate": 4.3456596418799476e-07, + "logits/chosen": -1.8863794803619385, + "logits/rejected": -1.6375776529312134, + "logps/chosen": -308.4244689941406, + "logps/rejected": -312.00836181640625, + "loss": 0.9379, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -1.0467333793640137, + "rewards/margins": 0.35309094190597534, + "rewards/rejected": -1.3998241424560547, + "rewards/safe_rewards": -1.0730044841766357, + "rewards/unsafe_rewards": -1.0204620361328125, + "step": 580 + }, + { + "epoch": 0.32, + "learning_rate": 4.313651476468715e-07, + "logits/chosen": -1.9189523458480835, + "logits/rejected": -1.6596009731292725, + "logps/chosen": -302.38214111328125, + "logps/rejected": -310.23687744140625, + "loss": 0.9881, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -1.0581415891647339, + "rewards/margins": 0.3177962601184845, + "rewards/rejected": -1.375937819480896, + "rewards/safe_rewards": -1.1356565952301025, + "rewards/unsafe_rewards": -0.9806265830993652, + "step": 590 + }, + { + "epoch": 0.32, + "learning_rate": 4.2810030335348693e-07, + "logits/chosen": -1.9322669506072998, + "logits/rejected": -1.6003156900405884, + "logps/chosen": -305.31378173828125, + "logps/rejected": -295.27435302734375, + "loss": 1.0005, + "rewards/accuracies": 0.6812499761581421, + "rewards/chosen": -0.9873701930046082, + "rewards/margins": 0.315813809633255, + "rewards/rejected": -1.3031837940216064, + "rewards/safe_rewards": -0.9451528787612915, + "rewards/unsafe_rewards": -1.0295875072479248, + "step": 600 + }, + { + "epoch": 0.33, + "learning_rate": 4.2477258390327806e-07, + "logits/chosen": -1.9405990839004517, + "logits/rejected": -1.6028436422348022, + "logps/chosen": -266.96820068359375, + "logps/rejected": -300.30810546875, + "loss": 0.952, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.8195999264717102, + "rewards/margins": 0.4460810720920563, + "rewards/rejected": -1.2656810283660889, + "rewards/safe_rewards": -0.8683537244796753, + "rewards/unsafe_rewards": -0.7708461880683899, + "step": 610 + }, + { + "epoch": 0.33, + "learning_rate": 4.2138316408864197e-07, + "logits/chosen": -1.9435718059539795, + "logits/rejected": -1.5219794511795044, + "logps/chosen": -273.6173400878906, + "logps/rejected": -296.8672790527344, + "loss": 0.8771, + "rewards/accuracies": 0.793749988079071, + "rewards/chosen": -0.7538294196128845, + "rewards/margins": 0.5905435085296631, + "rewards/rejected": -1.3443728685379028, + "rewards/safe_rewards": -0.7166475653648376, + "rewards/unsafe_rewards": -0.7910112142562866, + "step": 620 + }, + { + "epoch": 0.34, + "learning_rate": 4.179332404841962e-07, + "logits/chosen": -1.6942704916000366, + "logits/rejected": -1.2099497318267822, + "logps/chosen": -327.61376953125, + "logps/rejected": -344.44866943359375, + "loss": 0.9468, + "rewards/accuracies": 0.71875, + "rewards/chosen": -1.1748586893081665, + "rewards/margins": 0.5536222457885742, + "rewards/rejected": -1.7284809350967407, + "rewards/safe_rewards": -1.184999942779541, + "rewards/unsafe_rewards": -1.1647173166275024, + "step": 630 + }, + { + "epoch": 0.34, + "learning_rate": 4.1442403102434954e-07, + "logits/chosen": -1.7647279500961304, + "logits/rejected": -1.3675248622894287, + "logps/chosen": -310.9916076660156, + "logps/rejected": -321.579345703125, + "loss": 1.0042, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.9101033210754395, + "rewards/margins": 0.5184718370437622, + "rewards/rejected": -1.428575038909912, + "rewards/safe_rewards": -0.9146644473075867, + "rewards/unsafe_rewards": -0.9055421948432922, + "step": 640 + }, + { + "epoch": 0.35, + "learning_rate": 4.108567745733318e-07, + "logits/chosen": -1.7713772058486938, + "logits/rejected": -1.3516300916671753, + "logps/chosen": -255.63150024414062, + "logps/rejected": -284.5046081542969, + "loss": 1.0245, + "rewards/accuracies": 0.7437499761581421, + "rewards/chosen": -0.821179986000061, + "rewards/margins": 0.4137188792228699, + "rewards/rejected": -1.2348989248275757, + "rewards/safe_rewards": -0.8415560722351074, + "rewards/unsafe_rewards": -0.8008037805557251, + "step": 650 + }, + { + "epoch": 0.36, + "learning_rate": 4.0723273048783426e-07, + "logits/chosen": -1.8442039489746094, + "logits/rejected": -1.5036883354187012, + "logps/chosen": -304.8941650390625, + "logps/rejected": -295.1714172363281, + "loss": 1.0401, + "rewards/accuracies": 0.6812499761581421, + "rewards/chosen": -0.8060005903244019, + "rewards/margins": 0.4549127221107483, + "rewards/rejected": -1.2609132528305054, + "rewards/safe_rewards": -0.7164521813392639, + "rewards/unsafe_rewards": -0.8955489993095398, + "step": 660 + }, + { + "epoch": 0.36, + "learning_rate": 4.0355317817241697e-07, + "logits/chosen": -1.8206180334091187, + "logits/rejected": -1.4083284139633179, + "logps/chosen": -316.703369140625, + "logps/rejected": -288.38323974609375, + "loss": 1.0164, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.8776229619979858, + "rewards/margins": 0.3921293020248413, + "rewards/rejected": -1.2697522640228271, + "rewards/safe_rewards": -0.8068926930427551, + "rewards/unsafe_rewards": -0.9483532905578613, + "step": 670 + }, + { + "epoch": 0.37, + "learning_rate": 3.998194166278367e-07, + "logits/chosen": -1.7814273834228516, + "logits/rejected": -1.490839958190918, + "logps/chosen": -295.8428955078125, + "logps/rejected": -305.77972412109375, + "loss": 1.0484, + "rewards/accuracies": 0.6812499761581421, + "rewards/chosen": -1.0455328226089478, + "rewards/margins": 0.34231680631637573, + "rewards/rejected": -1.3878495693206787, + "rewards/safe_rewards": -1.0646705627441406, + "rewards/unsafe_rewards": -1.0263949632644653, + "step": 680 + }, + { + "epoch": 0.37, + "learning_rate": 3.9603276399245855e-07, + "logits/chosen": -1.8057140111923218, + "logits/rejected": -1.3926641941070557, + "logps/chosen": -323.49139404296875, + "logps/rejected": -323.6500549316406, + "loss": 1.0317, + "rewards/accuracies": 0.71875, + "rewards/chosen": -1.1086748838424683, + "rewards/margins": 0.44791918992996216, + "rewards/rejected": -1.556593894958496, + "rewards/safe_rewards": -1.087368130683899, + "rewards/unsafe_rewards": -1.1299816370010376, + "step": 690 + }, + { + "epoch": 0.38, + "learning_rate": 3.9219455707691e-07, + "logits/chosen": -1.895880937576294, + "logits/rejected": -1.5151503086090088, + "logps/chosen": -295.07525634765625, + "logps/rejected": -304.41241455078125, + "loss": 0.9783, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.9127877950668335, + "rewards/margins": 0.4130992889404297, + "rewards/rejected": -1.3258870840072632, + "rewards/safe_rewards": -0.9074820280075073, + "rewards/unsafe_rewards": -0.9180935025215149, + "step": 700 + }, + { + "epoch": 0.38, + "learning_rate": 3.883061508921439e-07, + "logits/chosen": -1.9308559894561768, + "logits/rejected": -1.6609346866607666, + "logps/chosen": -277.08172607421875, + "logps/rejected": -316.75994873046875, + "loss": 1.0027, + "rewards/accuracies": 0.7437499761581421, + "rewards/chosen": -0.7832189798355103, + "rewards/margins": 0.37469321489334106, + "rewards/rejected": -1.1579121351242065, + "rewards/safe_rewards": -0.7912700176239014, + "rewards/unsafe_rewards": -0.7751679420471191, + "step": 710 + }, + { + "epoch": 0.39, + "learning_rate": 3.8436891817107555e-07, + "logits/chosen": -1.761370301246643, + "logits/rejected": -1.5160869359970093, + "logps/chosen": -285.6172790527344, + "logps/rejected": -315.7353210449219, + "loss": 1.0302, + "rewards/accuracies": 0.731249988079071, + "rewards/chosen": -0.9513989686965942, + "rewards/margins": 0.45606541633605957, + "rewards/rejected": -1.4074645042419434, + "rewards/safe_rewards": -1.0125133991241455, + "rewards/unsafe_rewards": -0.890284538269043, + "step": 720 + }, + { + "epoch": 0.39, + "learning_rate": 3.8038424888396414e-07, + "logits/chosen": -1.8194071054458618, + "logits/rejected": -1.4451847076416016, + "logps/chosen": -295.29241943359375, + "logps/rejected": -320.0098571777344, + "loss": 0.9634, + "rewards/accuracies": 0.71875, + "rewards/chosen": -0.8949946165084839, + "rewards/margins": 0.4506065249443054, + "rewards/rejected": -1.3456013202667236, + "rewards/safe_rewards": -0.855640709400177, + "rewards/unsafe_rewards": -0.9343485832214355, + "step": 730 + }, + { + "epoch": 0.4, + "learning_rate": 3.763535497477079e-07, + "logits/chosen": -1.8460315465927124, + "logits/rejected": -1.4700871706008911, + "logps/chosen": -310.37518310546875, + "logps/rejected": -316.61236572265625, + "loss": 0.9976, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -1.0227649211883545, + "rewards/margins": 0.43325185775756836, + "rewards/rejected": -1.4560167789459229, + "rewards/safe_rewards": -1.0539515018463135, + "rewards/unsafe_rewards": -0.9915785789489746, + "step": 740 + }, + { + "epoch": 0.4, + "learning_rate": 3.7227824372922795e-07, + "logits/chosen": -1.8418188095092773, + "logits/rejected": -1.479627013206482, + "logps/chosen": -286.8067321777344, + "logps/rejected": -302.6395568847656, + "loss": 0.9795, + "rewards/accuracies": 0.768750011920929, + "rewards/chosen": -0.9858185052871704, + "rewards/margins": 0.41545191407203674, + "rewards/rejected": -1.4012705087661743, + "rewards/safe_rewards": -0.9399738311767578, + "rewards/unsafe_rewards": -1.031663179397583, + "step": 750 + }, + { + "epoch": 0.41, + "learning_rate": 3.681597695431148e-07, + "logits/chosen": -1.749204397201538, + "logits/rejected": -1.3665492534637451, + "logps/chosen": -289.353271484375, + "logps/rejected": -327.75677490234375, + "loss": 0.9449, + "rewards/accuracies": 0.78125, + "rewards/chosen": -0.9073783755302429, + "rewards/margins": 0.5846059918403625, + "rewards/rejected": -1.4919843673706055, + "rewards/safe_rewards": -0.9077240228652954, + "rewards/unsafe_rewards": -0.9070326089859009, + "step": 760 + }, + { + "epoch": 0.41, + "learning_rate": 3.639995811437159e-07, + "logits/chosen": -1.667654037475586, + "logits/rejected": -1.314335584640503, + "logps/chosen": -289.2320556640625, + "logps/rejected": -326.54229736328125, + "loss": 0.9656, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.9039511680603027, + "rewards/margins": 0.5181721448898315, + "rewards/rejected": -1.4221234321594238, + "rewards/safe_rewards": -0.9193918108940125, + "rewards/unsafe_rewards": -0.8885105848312378, + "step": 770 + }, + { + "epoch": 0.42, + "learning_rate": 3.597991472118426e-07, + "logits/chosen": -1.7382431030273438, + "logits/rejected": -1.3142716884613037, + "logps/chosen": -300.62677001953125, + "logps/rejected": -313.65814208984375, + "loss": 1.055, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.8587741851806641, + "rewards/margins": 0.427274614572525, + "rewards/rejected": -1.2860486507415771, + "rewards/safe_rewards": -0.8905943036079407, + "rewards/unsafe_rewards": -0.8269540071487427, + "step": 780 + }, + { + "epoch": 0.43, + "learning_rate": 3.5555995063627836e-07, + "logits/chosen": -1.8035333156585693, + "logits/rejected": -1.4783815145492554, + "logps/chosen": -321.8671875, + "logps/rejected": -317.4945068359375, + "loss": 0.9705, + "rewards/accuracies": 0.731249988079071, + "rewards/chosen": -0.9154338836669922, + "rewards/margins": 0.400259792804718, + "rewards/rejected": -1.3156936168670654, + "rewards/safe_rewards": -0.946729838848114, + "rewards/unsafe_rewards": -0.8841378092765808, + "step": 790 + }, + { + "epoch": 0.43, + "learning_rate": 3.512834879902715e-07, + "logits/chosen": -1.7292006015777588, + "logits/rejected": -1.3531233072280884, + "logps/chosen": -297.385009765625, + "logps/rejected": -316.39666748046875, + "loss": 0.9624, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.9740325808525085, + "rewards/margins": 0.43682414293289185, + "rewards/rejected": -1.4108567237854004, + "rewards/safe_rewards": -0.979016900062561, + "rewards/unsafe_rewards": -0.9690481424331665, + "step": 800 + }, + { + "epoch": 0.44, + "learning_rate": 3.4697126900319616e-07, + "logits/chosen": -1.6081682443618774, + "logits/rejected": -1.1794389486312866, + "logps/chosen": -295.1870422363281, + "logps/rejected": -306.71160888671875, + "loss": 1.0047, + "rewards/accuracies": 0.78125, + "rewards/chosen": -0.9457162022590637, + "rewards/margins": 0.4736977517604828, + "rewards/rejected": -1.4194139242172241, + "rewards/safe_rewards": -0.8538358807563782, + "rewards/unsafe_rewards": -1.0375964641571045, + "step": 810 + }, + { + "epoch": 0.44, + "learning_rate": 3.426248160275693e-07, + "logits/chosen": -1.7311818599700928, + "logits/rejected": -1.3600565195083618, + "logps/chosen": -278.1633605957031, + "logps/rejected": -299.32635498046875, + "loss": 1.0259, + "rewards/accuracies": 0.7562500238418579, + "rewards/chosen": -0.8063637018203735, + "rewards/margins": 0.43319129943847656, + "rewards/rejected": -1.2395551204681396, + "rewards/safe_rewards": -0.859821617603302, + "rewards/unsafe_rewards": -0.7529059052467346, + "step": 820 + }, + { + "epoch": 0.45, + "learning_rate": 3.3824566350161094e-07, + "logits/chosen": -1.785316824913025, + "logits/rejected": -1.323632836341858, + "logps/chosen": -275.93060302734375, + "logps/rejected": -278.7740783691406, + "loss": 0.9551, + "rewards/accuracies": 0.768750011920929, + "rewards/chosen": -0.675979495048523, + "rewards/margins": 0.428046852350235, + "rewards/rejected": -1.104026436805725, + "rewards/safe_rewards": -0.6841101050376892, + "rewards/unsafe_rewards": -0.6678491234779358, + "step": 830 + }, + { + "epoch": 0.45, + "learning_rate": 3.338353574075381e-07, + "logits/chosen": -1.5819236040115356, + "logits/rejected": -1.3700740337371826, + "logps/chosen": -266.2092590332031, + "logps/rejected": -284.17657470703125, + "loss": 1.1627, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.9084580540657043, + "rewards/margins": 0.3083532750606537, + "rewards/rejected": -1.216811180114746, + "rewards/safe_rewards": -0.9423872828483582, + "rewards/unsafe_rewards": -0.8745288848876953, + "step": 840 + }, + { + "epoch": 0.46, + "learning_rate": 3.2939545472578314e-07, + "logits/chosen": -1.7262417078018188, + "logits/rejected": -1.2280857563018799, + "logps/chosen": -324.2544860839844, + "logps/rejected": -322.0166320800781, + "loss": 1.002, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.9809592366218567, + "rewards/margins": 0.39795732498168945, + "rewards/rejected": -1.3789165019989014, + "rewards/safe_rewards": -0.9736999273300171, + "rewards/unsafe_rewards": -0.9882184863090515, + "step": 850 + }, + { + "epoch": 0.46, + "learning_rate": 3.2492752288532916e-07, + "logits/chosen": -1.7719913721084595, + "logits/rejected": -1.3637843132019043, + "logps/chosen": -289.90692138671875, + "logps/rejected": -295.659423828125, + "loss": 1.0013, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -0.8620317578315735, + "rewards/margins": 0.4116358757019043, + "rewards/rejected": -1.273667573928833, + "rewards/safe_rewards": -0.8024783134460449, + "rewards/unsafe_rewards": -0.9215850830078125, + "step": 860 + }, + { + "epoch": 0.47, + "learning_rate": 3.204331392103574e-07, + "logits/chosen": -1.8806778192520142, + "logits/rejected": -1.389723539352417, + "logps/chosen": -291.46832275390625, + "logps/rejected": -283.912841796875, + "loss": 0.9879, + "rewards/accuracies": 0.768750011920929, + "rewards/chosen": -0.7795971035957336, + "rewards/margins": 0.4201742112636566, + "rewards/rejected": -1.1997714042663574, + "rewards/safe_rewards": -0.8063246607780457, + "rewards/unsafe_rewards": -0.7528696060180664, + "step": 870 + }, + { + "epoch": 0.47, + "learning_rate": 3.159138903634006e-07, + "logits/chosen": -1.6265941858291626, + "logits/rejected": -1.263318657875061, + "logps/chosen": -308.3788757324219, + "logps/rejected": -303.78753662109375, + "loss": 0.9939, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.9974681735038757, + "rewards/margins": 0.31481030583381653, + "rewards/rejected": -1.312278389930725, + "rewards/safe_rewards": -0.9676594734191895, + "rewards/unsafe_rewards": -1.0272767543792725, + "step": 880 + }, + { + "epoch": 0.48, + "learning_rate": 3.1137137178519977e-07, + "logits/chosen": -1.5345653295516968, + "logits/rejected": -1.2038419246673584, + "logps/chosen": -277.8311767578125, + "logps/rejected": -317.1457214355469, + "loss": 0.9773, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -0.9578744769096375, + "rewards/margins": 0.4849475026130676, + "rewards/rejected": -1.4428222179412842, + "rewards/safe_rewards": -0.9253692626953125, + "rewards/unsafe_rewards": -0.9903799891471863, + "step": 890 + }, + { + "epoch": 0.48, + "learning_rate": 3.068071871314626e-07, + "logits/chosen": -1.477648138999939, + "logits/rejected": -1.1965210437774658, + "logps/chosen": -279.1951599121094, + "logps/rejected": -292.29833984375, + "loss": 0.9773, + "rewards/accuracies": 0.6812499761581421, + "rewards/chosen": -1.0047625303268433, + "rewards/margins": 0.3235151171684265, + "rewards/rejected": -1.328277826309204, + "rewards/safe_rewards": -1.013329267501831, + "rewards/unsafe_rewards": -0.9961959719657898, + "step": 900 + }, + { + "epoch": 0.49, + "learning_rate": 3.022229477067205e-07, + "logits/chosen": -1.6116926670074463, + "logits/rejected": -1.2270816564559937, + "logps/chosen": -320.8763732910156, + "logps/rejected": -318.58355712890625, + "loss": 0.8831, + "rewards/accuracies": 0.7562500238418579, + "rewards/chosen": -1.0378270149230957, + "rewards/margins": 0.4454011917114258, + "rewards/rejected": -1.4832282066345215, + "rewards/safe_rewards": -1.0344152450561523, + "rewards/unsafe_rewards": -1.0412386655807495, + "step": 910 + }, + { + "epoch": 0.49, + "learning_rate": 2.976202718954869e-07, + "logits/chosen": -1.5823721885681152, + "logits/rejected": -1.1224069595336914, + "logps/chosen": -333.028564453125, + "logps/rejected": -350.3519592285156, + "loss": 1.0709, + "rewards/accuracies": 0.71875, + "rewards/chosen": -1.2154980897903442, + "rewards/margins": 0.46428003907203674, + "rewards/rejected": -1.6797780990600586, + "rewards/safe_rewards": -1.1762654781341553, + "rewards/unsafe_rewards": -1.2547308206558228, + "step": 920 + }, + { + "epoch": 0.5, + "learning_rate": 2.930007845909146e-07, + "logits/chosen": -1.5423873662948608, + "logits/rejected": -1.2088254690170288, + "logps/chosen": -333.67645263671875, + "logps/rejected": -360.3753662109375, + "loss": 0.9999, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.3605718612670898, + "rewards/margins": 0.422391802072525, + "rewards/rejected": -1.782963752746582, + "rewards/safe_rewards": -1.422663688659668, + "rewards/unsafe_rewards": -1.2984803915023804, + "step": 930 + }, + { + "epoch": 0.51, + "learning_rate": 2.8836611662115634e-07, + "logits/chosen": -1.6511509418487549, + "logits/rejected": -1.275423288345337, + "logps/chosen": -321.2275085449219, + "logps/rejected": -312.20733642578125, + "loss": 1.0348, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -1.010995626449585, + "rewards/margins": 0.42944231629371643, + "rewards/rejected": -1.440437912940979, + "rewards/safe_rewards": -0.9675294160842896, + "rewards/unsafe_rewards": -1.0544618368148804, + "step": 940 + }, + { + "epoch": 0.51, + "learning_rate": 2.8371790417362986e-07, + "logits/chosen": -1.5915000438690186, + "logits/rejected": -1.2896572351455688, + "logps/chosen": -281.606201171875, + "logps/rejected": -308.20916748046875, + "loss": 1.0666, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.9426482319831848, + "rewards/margins": 0.37754327058792114, + "rewards/rejected": -1.3201916217803955, + "rewards/safe_rewards": -0.8953598141670227, + "rewards/unsafe_rewards": -0.9899368286132812, + "step": 950 + }, + { + "epoch": 0.52, + "learning_rate": 2.7905778821739056e-07, + "logits/chosen": -1.478126049041748, + "logits/rejected": -1.1587542295455933, + "logps/chosen": -301.42181396484375, + "logps/rejected": -304.0444030761719, + "loss": 0.9759, + "rewards/accuracies": 0.6812499761581421, + "rewards/chosen": -1.0664873123168945, + "rewards/margins": 0.36265167593955994, + "rewards/rejected": -1.4291390180587769, + "rewards/safe_rewards": -1.0449130535125732, + "rewards/unsafe_rewards": -1.0880613327026367, + "step": 960 + }, + { + "epoch": 0.52, + "learning_rate": 2.74387413923817e-07, + "logits/chosen": -1.4300400018692017, + "logits/rejected": -1.1722289323806763, + "logps/chosen": -327.14739990234375, + "logps/rejected": -327.6680908203125, + "loss": 0.977, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.026637315750122, + "rewards/margins": 0.39583832025527954, + "rewards/rejected": -1.4224755764007568, + "rewards/safe_rewards": -1.0289757251739502, + "rewards/unsafe_rewards": -1.024298906326294, + "step": 970 + }, + { + "epoch": 0.53, + "learning_rate": 2.69708430085812e-07, + "logits/chosen": -1.6212282180786133, + "logits/rejected": -1.1375267505645752, + "logps/chosen": -331.76654052734375, + "logps/rejected": -341.3858947753906, + "loss": 1.0282, + "rewards/accuracies": 0.78125, + "rewards/chosen": -1.0838706493377686, + "rewards/margins": 0.5636218190193176, + "rewards/rejected": -1.6474926471710205, + "rewards/safe_rewards": -1.0060853958129883, + "rewards/unsafe_rewards": -1.161656141281128, + "step": 980 + }, + { + "epoch": 0.53, + "learning_rate": 2.6502248853572504e-07, + "logits/chosen": -1.5994454622268677, + "logits/rejected": -1.2842458486557007, + "logps/chosen": -291.59063720703125, + "logps/rejected": -317.38970947265625, + "loss": 1.0111, + "rewards/accuracies": 0.762499988079071, + "rewards/chosen": -1.0371322631835938, + "rewards/margins": 0.5142688155174255, + "rewards/rejected": -1.551400899887085, + "rewards/safe_rewards": -1.0258736610412598, + "rewards/unsafe_rewards": -1.0483907461166382, + "step": 990 + }, + { + "epoch": 0.54, + "learning_rate": 2.6033124356220325e-07, + "logits/chosen": -1.5264742374420166, + "logits/rejected": -1.1301841735839844, + "logps/chosen": -305.7037048339844, + "logps/rejected": -310.7588806152344, + "loss": 0.9569, + "rewards/accuracies": 0.78125, + "rewards/chosen": -1.029294490814209, + "rewards/margins": 0.4845431447029114, + "rewards/rejected": -1.5138375759124756, + "rewards/safe_rewards": -0.948198139667511, + "rewards/unsafe_rewards": -1.1103906631469727, + "step": 1000 + }, + { + "epoch": 0.54, + "eval_logits/chosen": -1.0711549520492554, + "eval_logits/rejected": -0.5821475982666016, + "eval_logps/chosen": -221.7958984375, + "eval_logps/rejected": -212.7307586669922, + "eval_loss": 0.34972870349884033, + "eval_rewards/accuracies": 0.7209583520889282, + "eval_rewards/chosen": -0.913569986820221, + "eval_rewards/margins": 0.28902512788772583, + "eval_rewards/rejected": -1.2025949954986572, + "eval_rewards/safe_rewards": -0.9005662798881531, + "eval_rewards/unsafe_rewards": -0.9165622591972351, + "eval_runtime": 1062.9878, + "eval_samples_per_second": 31.086, + "eval_steps_per_second": 0.972, + "step": 1000 + }, + { + "epoch": 0.54, + "learning_rate": 2.55636351326173e-07, + "logits/chosen": -1.4447015523910522, + "logits/rejected": -1.0418920516967773, + "logps/chosen": -320.98358154296875, + "logps/rejected": -330.81341552734375, + "loss": 0.8958, + "rewards/accuracies": 0.668749988079071, + "rewards/chosen": -1.149300217628479, + "rewards/margins": 0.562938928604126, + "rewards/rejected": -1.7122390270233154, + "rewards/safe_rewards": -1.1756014823913574, + "rewards/unsafe_rewards": -1.122998595237732, + "step": 1010 + }, + { + "epoch": 0.55, + "learning_rate": 2.509394692761622e-07, + "logits/chosen": -1.4893229007720947, + "logits/rejected": -1.0318996906280518, + "logps/chosen": -320.03070068359375, + "logps/rejected": -331.4481201171875, + "loss": 0.9718, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -1.1435344219207764, + "rewards/margins": 0.5211852788925171, + "rewards/rejected": -1.6647193431854248, + "rewards/safe_rewards": -1.1441400051116943, + "rewards/unsafe_rewards": -1.1429284811019897, + "step": 1020 + }, + { + "epoch": 0.55, + "learning_rate": 2.462422555631674e-07, + "logits/chosen": -1.3502875566482544, + "logits/rejected": -0.8036888241767883, + "logps/chosen": -344.90618896484375, + "logps/rejected": -345.6969909667969, + "loss": 0.953, + "rewards/accuracies": 0.6812499761581421, + "rewards/chosen": -1.4247171878814697, + "rewards/margins": 0.532504141330719, + "rewards/rejected": -1.957221269607544, + "rewards/safe_rewards": -1.399427890777588, + "rewards/unsafe_rewards": -1.4500062465667725, + "step": 1030 + }, + { + "epoch": 0.56, + "learning_rate": 2.415463684552728e-07, + "logits/chosen": -1.279679536819458, + "logits/rejected": -0.875691294670105, + "logps/chosen": -329.5700378417969, + "logps/rejected": -347.35162353515625, + "loss": 1.0239, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -1.3259227275848389, + "rewards/margins": 0.47411665320396423, + "rewards/rejected": -1.800039529800415, + "rewards/safe_rewards": -1.3383002281188965, + "rewards/unsafe_rewards": -1.3135454654693604, + "step": 1040 + }, + { + "epoch": 0.56, + "learning_rate": 2.3685346575222807e-07, + "logits/chosen": -1.4535144567489624, + "logits/rejected": -0.9614871740341187, + "logps/chosen": -325.8673400878906, + "logps/rejected": -333.7716064453125, + "loss": 0.9704, + "rewards/accuracies": 0.762499988079071, + "rewards/chosen": -1.082326889038086, + "rewards/margins": 0.5055097341537476, + "rewards/rejected": -1.5878366231918335, + "rewards/safe_rewards": -1.087576150894165, + "rewards/unsafe_rewards": -1.0770776271820068, + "step": 1050 + }, + { + "epoch": 0.57, + "learning_rate": 2.321652042001919e-07, + "logits/chosen": -1.425490140914917, + "logits/rejected": -0.9585882425308228, + "logps/chosen": -332.23651123046875, + "logps/rejected": -367.16656494140625, + "loss": 0.9341, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -1.210457682609558, + "rewards/margins": 0.5372633934020996, + "rewards/rejected": -1.7477210760116577, + "rewards/safe_rewards": -1.155447244644165, + "rewards/unsafe_rewards": -1.265467882156372, + "step": 1060 + }, + { + "epoch": 0.58, + "learning_rate": 2.2748323890684662e-07, + "logits/chosen": -1.5478570461273193, + "logits/rejected": -0.9782267808914185, + "logps/chosen": -313.54412841796875, + "logps/rejected": -330.4336853027344, + "loss": 0.9344, + "rewards/accuracies": 0.762499988079071, + "rewards/chosen": -1.0107817649841309, + "rewards/margins": 0.6438864469528198, + "rewards/rejected": -1.6546680927276611, + "rewards/safe_rewards": -1.0349432229995728, + "rewards/unsafe_rewards": -0.986620306968689, + "step": 1070 + }, + { + "epoch": 0.58, + "learning_rate": 2.2280922275709213e-07, + "logits/chosen": -1.4732444286346436, + "logits/rejected": -1.1423754692077637, + "logps/chosen": -317.08258056640625, + "logps/rejected": -328.4356689453125, + "loss": 0.982, + "rewards/accuracies": 0.6937500238418579, + "rewards/chosen": -1.0598576068878174, + "rewards/margins": 0.41645368933677673, + "rewards/rejected": -1.4763113260269165, + "rewards/safe_rewards": -1.0165767669677734, + "rewards/unsafe_rewards": -1.1031386852264404, + "step": 1080 + }, + { + "epoch": 0.59, + "learning_rate": 2.1814480582952375e-07, + "logits/chosen": -1.489725112915039, + "logits/rejected": -1.1142539978027344, + "logps/chosen": -309.04681396484375, + "logps/rejected": -333.715576171875, + "loss": 0.9727, + "rewards/accuracies": 0.7875000238418579, + "rewards/chosen": -1.0693882703781128, + "rewards/margins": 0.4686933159828186, + "rewards/rejected": -1.5380815267562866, + "rewards/safe_rewards": -0.9820979237556458, + "rewards/unsafe_rewards": -1.156678557395935, + "step": 1090 + }, + { + "epoch": 0.59, + "learning_rate": 2.1349163481390187e-07, + "logits/chosen": -1.421409010887146, + "logits/rejected": -1.031862497329712, + "logps/chosen": -316.99139404296875, + "logps/rejected": -340.88909912109375, + "loss": 0.9693, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -1.1647942066192627, + "rewards/margins": 0.4819185137748718, + "rewards/rejected": -1.6467128992080688, + "rewards/safe_rewards": -1.0639079809188843, + "rewards/unsafe_rewards": -1.2656804323196411, + "step": 1100 + }, + { + "epoch": 0.6, + "learning_rate": 2.0885135242981647e-07, + "logits/chosen": -1.4362539052963257, + "logits/rejected": -1.005382776260376, + "logps/chosen": -351.58978271484375, + "logps/rejected": -323.1505126953125, + "loss": 0.9265, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -1.1940038204193115, + "rewards/margins": 0.4320971369743347, + "rewards/rejected": -1.6261011362075806, + "rewards/safe_rewards": -1.2753981351852417, + "rewards/unsafe_rewards": -1.112609624862671, + "step": 1110 + }, + { + "epoch": 0.6, + "learning_rate": 2.0422559684675494e-07, + "logits/chosen": -1.4388701915740967, + "logits/rejected": -0.9763511419296265, + "logps/chosen": -328.2054748535156, + "logps/rejected": -331.94427490234375, + "loss": 0.9247, + "rewards/accuracies": 0.793749988079071, + "rewards/chosen": -1.1099811792373657, + "rewards/margins": 0.4838770031929016, + "rewards/rejected": -1.5938583612442017, + "rewards/safe_rewards": -1.143110752105713, + "rewards/unsafe_rewards": -1.076851725578308, + "step": 1120 + }, + { + "epoch": 0.61, + "learning_rate": 1.9961600110577457e-07, + "logits/chosen": -1.3994672298431396, + "logits/rejected": -0.9330530166625977, + "logps/chosen": -320.69989013671875, + "logps/rejected": -341.1868591308594, + "loss": 1.007, + "rewards/accuracies": 0.731249988079071, + "rewards/chosen": -1.1178644895553589, + "rewards/margins": 0.4091947674751282, + "rewards/rejected": -1.5270591974258423, + "rewards/safe_rewards": -1.0592812299728394, + "rewards/unsafe_rewards": -1.1764475107192993, + "step": 1130 + }, + { + "epoch": 0.61, + "learning_rate": 1.950241925429867e-07, + "logits/chosen": -1.5059046745300293, + "logits/rejected": -0.9375900030136108, + "logps/chosen": -301.2913513183594, + "logps/rejected": -323.166259765625, + "loss": 0.9384, + "rewards/accuracies": 0.7562500238418579, + "rewards/chosen": -1.0121370553970337, + "rewards/margins": 0.6072208285331726, + "rewards/rejected": -1.6193578243255615, + "rewards/safe_rewards": -1.0190235376358032, + "rewards/unsafe_rewards": -1.0052505731582642, + "step": 1140 + }, + { + "epoch": 0.62, + "learning_rate": 1.9045179221505495e-07, + "logits/chosen": -1.4665344953536987, + "logits/rejected": -1.1525509357452393, + "logps/chosen": -336.7375183105469, + "logps/rejected": -342.58258056640625, + "loss": 0.9456, + "rewards/accuracies": 0.762499988079071, + "rewards/chosen": -1.0744739770889282, + "rewards/margins": 0.4735342860221863, + "rewards/rejected": -1.5480082035064697, + "rewards/safe_rewards": -1.0436086654663086, + "rewards/unsafe_rewards": -1.1053390502929688, + "step": 1150 + }, + { + "epoch": 0.62, + "learning_rate": 1.8590041432690893e-07, + "logits/chosen": -1.3896484375, + "logits/rejected": -1.1142855882644653, + "logps/chosen": -297.66619873046875, + "logps/rejected": -315.8662109375, + "loss": 0.977, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.100203037261963, + "rewards/margins": 0.3822095990180969, + "rewards/rejected": -1.4824126958847046, + "rewards/safe_rewards": -1.1140623092651367, + "rewards/unsafe_rewards": -1.0863438844680786, + "step": 1160 + }, + { + "epoch": 0.63, + "learning_rate": 1.813716656618788e-07, + "logits/chosen": -1.331933856010437, + "logits/rejected": -1.0040611028671265, + "logps/chosen": -300.43267822265625, + "logps/rejected": -321.0027770996094, + "loss": 0.969, + "rewards/accuracies": 0.7562500238418579, + "rewards/chosen": -1.1213122606277466, + "rewards/margins": 0.4405437111854553, + "rewards/rejected": -1.5618560314178467, + "rewards/safe_rewards": -1.0390920639038086, + "rewards/unsafe_rewards": -1.2035324573516846, + "step": 1170 + }, + { + "epoch": 0.63, + "learning_rate": 1.7686714501444788e-07, + "logits/chosen": -1.4456322193145752, + "logits/rejected": -0.8397674560546875, + "logps/chosen": -326.2867736816406, + "logps/rejected": -333.5230407714844, + "loss": 0.9325, + "rewards/accuracies": 0.762499988079071, + "rewards/chosen": -1.1393133401870728, + "rewards/margins": 0.5787724256515503, + "rewards/rejected": -1.7180858850479126, + "rewards/safe_rewards": -1.2189667224884033, + "rewards/unsafe_rewards": -1.0596599578857422, + "step": 1180 + }, + { + "epoch": 0.64, + "learning_rate": 1.7238844262582768e-07, + "logits/chosen": -1.363268494606018, + "logits/rejected": -1.1216974258422852, + "logps/chosen": -321.5309143066406, + "logps/rejected": -357.5130920410156, + "loss": 0.9391, + "rewards/accuracies": 0.6812499761581421, + "rewards/chosen": -1.1455228328704834, + "rewards/margins": 0.5131853222846985, + "rewards/rejected": -1.6587082147598267, + "rewards/safe_rewards": -1.0672725439071655, + "rewards/unsafe_rewards": -1.2237731218338013, + "step": 1190 + }, + { + "epoch": 0.65, + "learning_rate": 1.679371396225504e-07, + "logits/chosen": -1.4268032312393188, + "logits/rejected": -0.9499413371086121, + "logps/chosen": -310.05328369140625, + "logps/rejected": -351.35272216796875, + "loss": 0.9212, + "rewards/accuracies": 0.7875000238418579, + "rewards/chosen": -1.089523434638977, + "rewards/margins": 0.6114920973777771, + "rewards/rejected": -1.7010157108306885, + "rewards/safe_rewards": -1.027521014213562, + "rewards/unsafe_rewards": -1.151525855064392, + "step": 1200 + }, + { + "epoch": 0.65, + "learning_rate": 1.6351480745828096e-07, + "logits/chosen": -1.4248908758163452, + "logits/rejected": -1.0554434061050415, + "logps/chosen": -324.76751708984375, + "logps/rejected": -340.4091491699219, + "loss": 0.8506, + "rewards/accuracies": 0.731249988079071, + "rewards/chosen": -1.2382574081420898, + "rewards/margins": 0.46283411979675293, + "rewards/rejected": -1.7010915279388428, + "rewards/safe_rewards": -1.2276763916015625, + "rewards/unsafe_rewards": -1.2488384246826172, + "step": 1210 + }, + { + "epoch": 0.66, + "learning_rate": 1.5912300735904248e-07, + "logits/chosen": -1.5351839065551758, + "logits/rejected": -1.176792860031128, + "logps/chosen": -334.9280090332031, + "logps/rejected": -334.9038391113281, + "loss": 0.9582, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -1.1213009357452393, + "rewards/margins": 0.4566107392311096, + "rewards/rejected": -1.577911615371704, + "rewards/safe_rewards": -1.2159931659698486, + "rewards/unsafe_rewards": -1.0266087055206299, + "step": 1220 + }, + { + "epoch": 0.66, + "learning_rate": 1.5476328977205395e-07, + "logits/chosen": -1.5172470808029175, + "logits/rejected": -1.0838744640350342, + "logps/chosen": -318.88702392578125, + "logps/rejected": -331.92626953125, + "loss": 0.929, + "rewards/accuracies": 0.768750011920929, + "rewards/chosen": -1.1251935958862305, + "rewards/margins": 0.5502282381057739, + "rewards/rejected": -1.675421953201294, + "rewards/safe_rewards": -1.2570269107818604, + "rewards/unsafe_rewards": -0.9933602213859558, + "step": 1230 + }, + { + "epoch": 0.67, + "learning_rate": 1.5043719381837112e-07, + "logits/chosen": -1.4699004888534546, + "logits/rejected": -1.1323144435882568, + "logps/chosen": -332.270751953125, + "logps/rejected": -346.4047546386719, + "loss": 0.9436, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -1.1165038347244263, + "rewards/margins": 0.47613364458084106, + "rewards/rejected": -1.5926374197006226, + "rewards/safe_rewards": -1.0986835956573486, + "rewards/unsafe_rewards": -1.1343239545822144, + "step": 1240 + }, + { + "epoch": 0.67, + "learning_rate": 1.461462467495284e-07, + "logits/chosen": -1.445765495300293, + "logits/rejected": -1.0371205806732178, + "logps/chosen": -287.15216064453125, + "logps/rejected": -333.86676025390625, + "loss": 0.8531, + "rewards/accuracies": 0.768750011920929, + "rewards/chosen": -1.092584490776062, + "rewards/margins": 0.5968576669692993, + "rewards/rejected": -1.6894422769546509, + "rewards/safe_rewards": -1.1179049015045166, + "rewards/unsafe_rewards": -1.067264199256897, + "step": 1250 + }, + { + "epoch": 0.68, + "learning_rate": 1.4189196340836865e-07, + "logits/chosen": -1.6602089405059814, + "logits/rejected": -1.2219452857971191, + "logps/chosen": -297.80426025390625, + "logps/rejected": -314.51458740234375, + "loss": 0.9095, + "rewards/accuracies": 0.71875, + "rewards/chosen": -0.9903711080551147, + "rewards/margins": 0.4589962363243103, + "rewards/rejected": -1.4493674039840698, + "rewards/safe_rewards": -1.0244104862213135, + "rewards/unsafe_rewards": -0.9563320279121399, + "step": 1260 + }, + { + "epoch": 0.68, + "learning_rate": 1.3767584569425561e-07, + "logits/chosen": -1.6483261585235596, + "logits/rejected": -1.1660915613174438, + "logps/chosen": -309.3149719238281, + "logps/rejected": -320.50726318359375, + "loss": 0.9112, + "rewards/accuracies": 0.78125, + "rewards/chosen": -1.0096858739852905, + "rewards/margins": 0.4956924319267273, + "rewards/rejected": -1.5053783655166626, + "rewards/safe_rewards": -1.045543909072876, + "rewards/unsafe_rewards": -0.973828136920929, + "step": 1270 + }, + { + "epoch": 0.69, + "learning_rate": 1.334993820328541e-07, + "logits/chosen": -1.501082420349121, + "logits/rejected": -1.1109731197357178, + "logps/chosen": -289.6914978027344, + "logps/rejected": -326.8876953125, + "loss": 0.9211, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -1.1317590475082397, + "rewards/margins": 0.6352017521858215, + "rewards/rejected": -1.766960859298706, + "rewards/safe_rewards": -1.0952441692352295, + "rewards/unsafe_rewards": -1.1682740449905396, + "step": 1280 + }, + { + "epoch": 0.69, + "learning_rate": 1.2936404685066852e-07, + "logits/chosen": -1.487445592880249, + "logits/rejected": -1.1660425662994385, + "logps/chosen": -333.1927185058594, + "logps/rejected": -355.2337341308594, + "loss": 0.9879, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -1.1514599323272705, + "rewards/margins": 0.4698604643344879, + "rewards/rejected": -1.621320366859436, + "rewards/safe_rewards": -1.2052063941955566, + "rewards/unsafe_rewards": -1.0977137088775635, + "step": 1290 + }, + { + "epoch": 0.7, + "learning_rate": 1.252713000545221e-07, + "logits/chosen": -1.6853973865509033, + "logits/rejected": -1.2987568378448486, + "logps/chosen": -322.5224914550781, + "logps/rejected": -331.0067138671875, + "loss": 0.8714, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -1.0373764038085938, + "rewards/margins": 0.5552427768707275, + "rewards/rejected": -1.5926191806793213, + "rewards/safe_rewards": -1.0258121490478516, + "rewards/unsafe_rewards": -1.0489407777786255, + "step": 1300 + }, + { + "epoch": 0.7, + "learning_rate": 1.2122258651616304e-07, + "logits/chosen": -1.6230665445327759, + "logits/rejected": -1.1880443096160889, + "logps/chosen": -313.47772216796875, + "logps/rejected": -304.3802185058594, + "loss": 0.9486, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.0324283838272095, + "rewards/margins": 0.39181455969810486, + "rewards/rejected": -1.4242427349090576, + "rewards/safe_rewards": -0.9644128680229187, + "rewards/unsafe_rewards": -1.1004436016082764, + "step": 1310 + }, + { + "epoch": 0.71, + "learning_rate": 1.1721933556217792e-07, + "logits/chosen": -1.5601403713226318, + "logits/rejected": -1.2365220785140991, + "logps/chosen": -307.1771240234375, + "logps/rejected": -328.2700500488281, + "loss": 0.9875, + "rewards/accuracies": 0.731249988079071, + "rewards/chosen": -1.066193699836731, + "rewards/margins": 0.4565289616584778, + "rewards/rejected": -1.5227227210998535, + "rewards/safe_rewards": -1.0890748500823975, + "rewards/unsafe_rewards": -1.0433127880096436, + "step": 1320 + }, + { + "epoch": 0.72, + "learning_rate": 1.1326296046939333e-07, + "logits/chosen": -1.5020397901535034, + "logits/rejected": -1.146071195602417, + "logps/chosen": -292.02313232421875, + "logps/rejected": -311.25604248046875, + "loss": 0.926, + "rewards/accuracies": 0.731249988079071, + "rewards/chosen": -0.971126914024353, + "rewards/margins": 0.559014081954956, + "rewards/rejected": -1.5301411151885986, + "rewards/safe_rewards": -0.9235084652900696, + "rewards/unsafe_rewards": -1.0187455415725708, + "step": 1330 + }, + { + "epoch": 0.72, + "learning_rate": 1.0935485796594351e-07, + "logits/chosen": -1.5235176086425781, + "logits/rejected": -1.0568602085113525, + "logps/chosen": -333.9339904785156, + "logps/rejected": -333.97576904296875, + "loss": 1.032, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -1.0942230224609375, + "rewards/margins": 0.5436626672744751, + "rewards/rejected": -1.6378856897354126, + "rewards/safe_rewards": -1.079145073890686, + "rewards/unsafe_rewards": -1.109300971031189, + "step": 1340 + }, + { + "epoch": 0.73, + "learning_rate": 1.0549640773818028e-07, + "logits/chosen": -1.411492943763733, + "logits/rejected": -1.1914324760437012, + "logps/chosen": -315.5474853515625, + "logps/rejected": -322.1836242675781, + "loss": 0.9732, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -1.1521949768066406, + "rewards/margins": 0.41504979133605957, + "rewards/rejected": -1.5672447681427002, + "rewards/safe_rewards": -1.2113215923309326, + "rewards/unsafe_rewards": -1.0930684804916382, + "step": 1350 + }, + { + "epoch": 0.73, + "learning_rate": 1.0168897194359921e-07, + "logits/chosen": -1.5103179216384888, + "logits/rejected": -1.1291395425796509, + "logps/chosen": -344.5137634277344, + "logps/rejected": -345.2103576660156, + "loss": 0.9403, + "rewards/accuracies": 0.731249988079071, + "rewards/chosen": -1.1670544147491455, + "rewards/margins": 0.4278257489204407, + "rewards/rejected": -1.5948803424835205, + "rewards/safe_rewards": -1.1103397607803345, + "rewards/unsafe_rewards": -1.2237694263458252, + "step": 1360 + }, + { + "epoch": 0.74, + "learning_rate": 9.793389472995392e-08, + "logits/chosen": -1.437276840209961, + "logits/rejected": -0.9090574383735657, + "logps/chosen": -319.93829345703125, + "logps/rejected": -318.78802490234375, + "loss": 0.8305, + "rewards/accuracies": 0.8062499761581421, + "rewards/chosen": -1.0438860654830933, + "rewards/margins": 0.630165696144104, + "rewards/rejected": -1.6740516424179077, + "rewards/safe_rewards": -0.9974796175956726, + "rewards/unsafe_rewards": -1.0902923345565796, + "step": 1370 + }, + { + "epoch": 0.74, + "learning_rate": 9.423250176072874e-08, + "logits/chosen": -1.4429516792297363, + "logits/rejected": -1.0162016153335571, + "logps/chosen": -314.0827331542969, + "logps/rejected": -311.3514404296875, + "loss": 1.0966, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.2516918182373047, + "rewards/margins": 0.3869698643684387, + "rewards/rejected": -1.6386617422103882, + "rewards/safe_rewards": -1.2116239070892334, + "rewards/unsafe_rewards": -1.291759967803955, + "step": 1380 + }, + { + "epoch": 0.75, + "learning_rate": 9.058609974713654e-08, + "logits/chosen": -1.5069031715393066, + "logits/rejected": -1.0585293769836426, + "logps/chosen": -312.8193054199219, + "logps/rejected": -344.32769775390625, + "loss": 0.8858, + "rewards/accuracies": 0.7875000238418579, + "rewards/chosen": -1.0806872844696045, + "rewards/margins": 0.6248958110809326, + "rewards/rejected": -1.7055833339691162, + "rewards/safe_rewards": -1.065792441368103, + "rewards/unsafe_rewards": -1.0955822467803955, + "step": 1390 + }, + { + "epoch": 0.75, + "learning_rate": 8.699597598680753e-08, + "logits/chosen": -1.4285691976547241, + "logits/rejected": -1.0217626094818115, + "logps/chosen": -298.48516845703125, + "logps/rejected": -319.5094909667969, + "loss": 0.8598, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -1.0639463663101196, + "rewards/margins": 0.5066736340522766, + "rewards/rejected": -1.570619821548462, + "rewards/safe_rewards": -1.049989938735962, + "rewards/unsafe_rewards": -1.0779026746749878, + "step": 1400 + }, + { + "epoch": 0.76, + "learning_rate": 8.346339790933166e-08, + "logits/chosen": -1.4796028137207031, + "logits/rejected": -1.0254989862442017, + "logps/chosen": -303.11322021484375, + "logps/rejected": -319.5587463378906, + "loss": 0.9735, + "rewards/accuracies": 0.7875000238418579, + "rewards/chosen": -1.1025015115737915, + "rewards/margins": 0.5202707052230835, + "rewards/rejected": -1.622771978378296, + "rewards/safe_rewards": -1.048610806465149, + "rewards/unsafe_rewards": -1.1563920974731445, + "step": 1410 + }, + { + "epoch": 0.76, + "learning_rate": 7.998961262881506e-08, + "logits/chosen": -1.4263174533843994, + "logits/rejected": -0.9260984659194946, + "logps/chosen": -325.7618408203125, + "logps/rejected": -318.91070556640625, + "loss": 0.9355, + "rewards/accuracies": 0.731249988079071, + "rewards/chosen": -1.0862057209014893, + "rewards/margins": 0.51823890209198, + "rewards/rejected": -1.6044447422027588, + "rewards/safe_rewards": -1.1601320505142212, + "rewards/unsafe_rewards": -1.0122793912887573, + "step": 1420 + }, + { + "epoch": 0.77, + "learning_rate": 7.657584650360846e-08, + "logits/chosen": -1.2710342407226562, + "logits/rejected": -0.989566445350647, + "logps/chosen": -300.971923828125, + "logps/rejected": -315.53387451171875, + "loss": 0.9896, + "rewards/accuracies": 0.6812499761581421, + "rewards/chosen": -1.1461577415466309, + "rewards/margins": 0.5026684999465942, + "rewards/rejected": -1.648826241493225, + "rewards/safe_rewards": -1.1745736598968506, + "rewards/unsafe_rewards": -1.117741584777832, + "step": 1430 + }, + { + "epoch": 0.77, + "learning_rate": 7.322330470336313e-08, + "logits/chosen": -1.38018798828125, + "logits/rejected": -0.90345698595047, + "logps/chosen": -322.70196533203125, + "logps/rejected": -353.0235595703125, + "loss": 0.9274, + "rewards/accuracies": 0.768750011920929, + "rewards/chosen": -1.1377968788146973, + "rewards/margins": 0.5669258236885071, + "rewards/rejected": -1.7047227621078491, + "rewards/safe_rewards": -1.0534660816192627, + "rewards/unsafe_rewards": -1.2221280336380005, + "step": 1440 + }, + { + "epoch": 0.78, + "learning_rate": 6.993317078356709e-08, + "logits/chosen": -1.4155539274215698, + "logits/rejected": -1.210967779159546, + "logps/chosen": -331.1578674316406, + "logps/rejected": -324.1207580566406, + "loss": 0.9583, + "rewards/accuracies": 0.668749988079071, + "rewards/chosen": -1.2102378606796265, + "rewards/margins": 0.2986965477466583, + "rewards/rejected": -1.5089343786239624, + "rewards/safe_rewards": -1.2407358884811401, + "rewards/unsafe_rewards": -1.1797398328781128, + "step": 1450 + }, + { + "epoch": 0.79, + "learning_rate": 6.67066062677118e-08, + "logits/chosen": -1.5069057941436768, + "logits/rejected": -1.0829797983169556, + "logps/chosen": -308.49664306640625, + "logps/rejected": -310.1492004394531, + "loss": 1.0037, + "rewards/accuracies": 0.768750011920929, + "rewards/chosen": -1.0759823322296143, + "rewards/margins": 0.45751920342445374, + "rewards/rejected": -1.5335016250610352, + "rewards/safe_rewards": -1.0966331958770752, + "rewards/unsafe_rewards": -1.0553314685821533, + "step": 1460 + }, + { + "epoch": 0.79, + "learning_rate": 6.354475023723685e-08, + "logits/chosen": -1.4374122619628906, + "logits/rejected": -1.0555397272109985, + "logps/chosen": -348.89227294921875, + "logps/rejected": -351.38665771484375, + "loss": 0.949, + "rewards/accuracies": 0.78125, + "rewards/chosen": -1.1484794616699219, + "rewards/margins": 0.5996941924095154, + "rewards/rejected": -1.748173475265503, + "rewards/safe_rewards": -1.1115808486938477, + "rewards/unsafe_rewards": -1.1853783130645752, + "step": 1470 + }, + { + "epoch": 0.8, + "learning_rate": 6.044871892939746e-08, + "logits/chosen": -1.5760042667388916, + "logits/rejected": -1.1585383415222168, + "logps/chosen": -320.93597412109375, + "logps/rejected": -338.1377868652344, + "loss": 0.9439, + "rewards/accuracies": 0.762499988079071, + "rewards/chosen": -1.0907033681869507, + "rewards/margins": 0.4960748255252838, + "rewards/rejected": -1.5867780447006226, + "rewards/safe_rewards": -1.1278786659240723, + "rewards/unsafe_rewards": -1.0535279512405396, + "step": 1480 + }, + { + "epoch": 0.8, + "learning_rate": 5.741960534319676e-08, + "logits/chosen": -1.5081236362457275, + "logits/rejected": -1.237660527229309, + "logps/chosen": -281.8060607910156, + "logps/rejected": -306.55889892578125, + "loss": 0.9089, + "rewards/accuracies": 0.706250011920929, + "rewards/chosen": -1.0866477489471436, + "rewards/margins": 0.4203459322452545, + "rewards/rejected": -1.5069936513900757, + "rewards/safe_rewards": -1.198955774307251, + "rewards/unsafe_rewards": -0.9743399620056152, + "step": 1490 + }, + { + "epoch": 0.81, + "learning_rate": 5.44584788535217e-08, + "logits/chosen": -1.5474069118499756, + "logits/rejected": -1.1630122661590576, + "logps/chosen": -322.1282653808594, + "logps/rejected": -332.53656005859375, + "loss": 0.8619, + "rewards/accuracies": 0.71875, + "rewards/chosen": -1.0482981204986572, + "rewards/margins": 0.5498597621917725, + "rewards/rejected": -1.5981578826904297, + "rewards/safe_rewards": -1.0035431385040283, + "rewards/unsafe_rewards": -1.0930533409118652, + "step": 1500 + }, + { + "epoch": 0.81, + "eval_logits/chosen": -1.146567940711975, + "eval_logits/rejected": -0.7005062699317932, + "eval_logps/chosen": -221.80184936523438, + "eval_logps/rejected": -210.18832397460938, + "eval_loss": 0.34287312626838684, + "eval_rewards/accuracies": 0.7268877029418945, + "eval_rewards/chosen": -0.913629412651062, + "eval_rewards/margins": 0.2635413706302643, + "eval_rewards/rejected": -1.1771708726882935, + "eval_rewards/safe_rewards": -0.9047471284866333, + "eval_rewards/unsafe_rewards": -0.9192255139350891, + "eval_runtime": 1122.8444, + "eval_samples_per_second": 29.429, + "eval_steps_per_second": 0.92, + "step": 1500 + }, + { + "epoch": 0.81, + "learning_rate": 5.156638483361933e-08, + "logits/chosen": -1.612953782081604, + "logits/rejected": -1.2291548252105713, + "logps/chosen": -316.44293212890625, + "logps/rejected": -337.2187194824219, + "loss": 0.9167, + "rewards/accuracies": 0.793749988079071, + "rewards/chosen": -0.9855987429618835, + "rewards/margins": 0.5536761283874512, + "rewards/rejected": -1.5392746925354004, + "rewards/safe_rewards": -1.0115060806274414, + "rewards/unsafe_rewards": -0.9596911668777466, + "step": 1510 + }, + { + "epoch": 0.82, + "learning_rate": 4.8744344286046236e-08, + "logits/chosen": -1.4979829788208008, + "logits/rejected": -1.1606972217559814, + "logps/chosen": -323.786865234375, + "logps/rejected": -326.4385681152344, + "loss": 0.9593, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -1.0706676244735718, + "rewards/margins": 0.37281331419944763, + "rewards/rejected": -1.4434809684753418, + "rewards/safe_rewards": -1.150662899017334, + "rewards/unsafe_rewards": -0.99067223072052, + "step": 1520 + }, + { + "epoch": 0.82, + "learning_rate": 4.599335348222169e-08, + "logits/chosen": -1.5021213293075562, + "logits/rejected": -1.2472387552261353, + "logps/chosen": -329.1572265625, + "logps/rejected": -364.0357971191406, + "loss": 0.9118, + "rewards/accuracies": 0.78125, + "rewards/chosen": -1.0721365213394165, + "rewards/margins": 0.5561688542366028, + "rewards/rejected": -1.628305435180664, + "rewards/safe_rewards": -1.036094307899475, + "rewards/unsafe_rewards": -1.108178734779358, + "step": 1530 + }, + { + "epoch": 0.83, + "learning_rate": 4.331438361071163e-08, + "logits/chosen": -1.5342875719070435, + "logits/rejected": -1.3547062873840332, + "logps/chosen": -334.36212158203125, + "logps/rejected": -347.2046813964844, + "loss": 0.9704, + "rewards/accuracies": 0.6937500238418579, + "rewards/chosen": -0.9460422396659851, + "rewards/margins": 0.44854864478111267, + "rewards/rejected": -1.394590973854065, + "rewards/safe_rewards": -0.965340256690979, + "rewards/unsafe_rewards": -0.9267444610595703, + "step": 1540 + }, + { + "epoch": 0.83, + "learning_rate": 4.0708380434367864e-08, + "logits/chosen": -1.5614886283874512, + "logits/rejected": -1.1849619150161743, + "logps/chosen": -300.528564453125, + "logps/rejected": -326.10638427734375, + "loss": 0.8839, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -1.0258691310882568, + "rewards/margins": 0.4937085211277008, + "rewards/rejected": -1.5195776224136353, + "rewards/safe_rewards": -1.0579198598861694, + "rewards/unsafe_rewards": -0.993818461894989, + "step": 1550 + }, + { + "epoch": 0.84, + "learning_rate": 3.817626395644305e-08, + "logits/chosen": -1.5818434953689575, + "logits/rejected": -1.210106611251831, + "logps/chosen": -297.70025634765625, + "logps/rejected": -307.71710205078125, + "loss": 1.0046, + "rewards/accuracies": 0.6937500238418579, + "rewards/chosen": -0.9973915219306946, + "rewards/margins": 0.3978816568851471, + "rewards/rejected": -1.3952730894088745, + "rewards/safe_rewards": -0.9759462475776672, + "rewards/unsafe_rewards": -1.0188367366790771, + "step": 1560 + }, + { + "epoch": 0.84, + "learning_rate": 3.571892809580013e-08, + "logits/chosen": -1.5090782642364502, + "logits/rejected": -1.1929179430007935, + "logps/chosen": -307.1462097167969, + "logps/rejected": -320.7843017578125, + "loss": 0.9616, + "rewards/accuracies": 0.643750011920929, + "rewards/chosen": -1.1602782011032104, + "rewards/margins": 0.36262303590774536, + "rewards/rejected": -1.5229012966156006, + "rewards/safe_rewards": -1.113433837890625, + "rewards/unsafe_rewards": -1.207122564315796, + "step": 1570 + }, + { + "epoch": 0.85, + "learning_rate": 3.333724037132976e-08, + "logits/chosen": -1.5555639266967773, + "logits/rejected": -1.2410228252410889, + "logps/chosen": -306.10662841796875, + "logps/rejected": -334.8534851074219, + "loss": 0.96, + "rewards/accuracies": 0.706250011920929, + "rewards/chosen": -0.9998570680618286, + "rewards/margins": 0.5056936740875244, + "rewards/rejected": -1.5055506229400635, + "rewards/safe_rewards": -0.9567073583602905, + "rewards/unsafe_rewards": -1.0430065393447876, + "step": 1580 + }, + { + "epoch": 0.86, + "learning_rate": 3.1032041595688506e-08, + "logits/chosen": -1.4647419452667236, + "logits/rejected": -1.0212126970291138, + "logps/chosen": -309.408935546875, + "logps/rejected": -335.38299560546875, + "loss": 0.9033, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -1.1254334449768066, + "rewards/margins": 0.5138076543807983, + "rewards/rejected": -1.6392412185668945, + "rewards/safe_rewards": -1.1220200061798096, + "rewards/unsafe_rewards": -1.1288467645645142, + "step": 1590 + }, + { + "epoch": 0.86, + "learning_rate": 2.880414557846453e-08, + "logits/chosen": -1.4411920309066772, + "logits/rejected": -1.2204091548919678, + "logps/chosen": -293.62689208984375, + "logps/rejected": -315.4649353027344, + "loss": 0.8851, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -1.0861139297485352, + "rewards/margins": 0.4907267689704895, + "rewards/rejected": -1.5768407583236694, + "rewards/safe_rewards": -1.0654656887054443, + "rewards/unsafe_rewards": -1.106762170791626, + "step": 1600 + }, + { + "epoch": 0.87, + "learning_rate": 2.6654338838876662e-08, + "logits/chosen": -1.585949420928955, + "logits/rejected": -1.0787384510040283, + "logps/chosen": -322.25885009765625, + "logps/rejected": -315.6914978027344, + "loss": 0.8857, + "rewards/accuracies": 0.762499988079071, + "rewards/chosen": -1.0791314840316772, + "rewards/margins": 0.5700170397758484, + "rewards/rejected": -1.6491485834121704, + "rewards/safe_rewards": -1.1312898397445679, + "rewards/unsafe_rewards": -1.0269731283187866, + "step": 1610 + }, + { + "epoch": 0.87, + "learning_rate": 2.4583380328107805e-08, + "logits/chosen": -1.5202205181121826, + "logits/rejected": -1.1094920635223389, + "logps/chosen": -331.93377685546875, + "logps/rejected": -334.93719482421875, + "loss": 0.9394, + "rewards/accuracies": 0.71875, + "rewards/chosen": -1.0624914169311523, + "rewards/margins": 0.5675247311592102, + "rewards/rejected": -1.6300160884857178, + "rewards/safe_rewards": -1.0210199356079102, + "rewards/unsafe_rewards": -1.1039628982543945, + "step": 1620 + }, + { + "epoch": 0.88, + "learning_rate": 2.259200116137039e-08, + "logits/chosen": -1.4817931652069092, + "logits/rejected": -1.1653249263763428, + "logps/chosen": -334.85223388671875, + "logps/rejected": -357.4493713378906, + "loss": 0.9782, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -1.1782487630844116, + "rewards/margins": 0.4701511263847351, + "rewards/rejected": -1.6483999490737915, + "rewards/safe_rewards": -1.1807775497436523, + "rewards/unsafe_rewards": -1.175719976425171, + "step": 1630 + }, + { + "epoch": 0.88, + "learning_rate": 2.068090435979958e-08, + "logits/chosen": -1.4055829048156738, + "logits/rejected": -1.156343698501587, + "logps/chosen": -306.995361328125, + "logps/rejected": -317.0059509277344, + "loss": 0.9645, + "rewards/accuracies": 0.706250011920929, + "rewards/chosen": -1.0118372440338135, + "rewards/margins": 0.44742053747177124, + "rewards/rejected": -1.45925772190094, + "rewards/safe_rewards": -1.0128872394561768, + "rewards/unsafe_rewards": -1.010787010192871, + "step": 1640 + }, + { + "epoch": 0.89, + "learning_rate": 1.8850764602263423e-08, + "logits/chosen": -1.4388693571090698, + "logits/rejected": -1.0342535972595215, + "logps/chosen": -311.6680603027344, + "logps/rejected": -348.0157165527344, + "loss": 0.9259, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -1.1476449966430664, + "rewards/margins": 0.49749964475631714, + "rewards/rejected": -1.6451447010040283, + "rewards/safe_rewards": -1.1766583919525146, + "rewards/unsafe_rewards": -1.1186316013336182, + "step": 1650 + }, + { + "epoch": 0.89, + "learning_rate": 1.710222798718028e-08, + "logits/chosen": -1.498641014099121, + "logits/rejected": -1.1954280138015747, + "logps/chosen": -323.10626220703125, + "logps/rejected": -355.72705078125, + "loss": 0.8871, + "rewards/accuracies": 0.731249988079071, + "rewards/chosen": -1.1219761371612549, + "rewards/margins": 0.4705938696861267, + "rewards/rejected": -1.5925698280334473, + "rewards/safe_rewards": -1.170627474784851, + "rewards/unsafe_rewards": -1.07332444190979, + "step": 1660 + }, + { + "epoch": 0.9, + "learning_rate": 1.5435911804424356e-08, + "logits/chosen": -1.545506477355957, + "logits/rejected": -1.2171175479888916, + "logps/chosen": -331.9698181152344, + "logps/rejected": -340.3002014160156, + "loss": 0.9938, + "rewards/accuracies": 0.706250011920929, + "rewards/chosen": -1.0450648069381714, + "rewards/margins": 0.47697582840919495, + "rewards/rejected": -1.522040605545044, + "rewards/safe_rewards": -1.095984697341919, + "rewards/unsafe_rewards": -0.9941450357437134, + "step": 1670 + }, + { + "epoch": 0.9, + "learning_rate": 1.3852404317403199e-08, + "logits/chosen": -1.41542649269104, + "logits/rejected": -1.1583986282348633, + "logps/chosen": -297.29962158203125, + "logps/rejected": -336.06988525390625, + "loss": 0.9583, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -1.1032707691192627, + "rewards/margins": 0.3784652352333069, + "rewards/rejected": -1.4817359447479248, + "rewards/safe_rewards": -1.15559983253479, + "rewards/unsafe_rewards": -1.0509414672851562, + "step": 1680 + }, + { + "epoch": 0.91, + "learning_rate": 1.235226455538113e-08, + "logits/chosen": -1.4842908382415771, + "logits/rejected": -1.2061156034469604, + "logps/chosen": -318.2637939453125, + "logps/rejected": -340.8109436035156, + "loss": 1.0008, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -1.1303695440292358, + "rewards/margins": 0.47945070266723633, + "rewards/rejected": -1.6098201274871826, + "rewards/safe_rewards": -1.1323390007019043, + "rewards/unsafe_rewards": -1.128400206565857, + "step": 1690 + }, + { + "epoch": 0.91, + "learning_rate": 1.0936022116124321e-08, + "logits/chosen": -1.4996792078018188, + "logits/rejected": -1.116720199584961, + "logps/chosen": -307.4630432128906, + "logps/rejected": -331.1900634765625, + "loss": 0.866, + "rewards/accuracies": 0.768750011920929, + "rewards/chosen": -1.0742332935333252, + "rewards/margins": 0.5489377975463867, + "rewards/rejected": -1.6231712102890015, + "rewards/safe_rewards": -1.107750654220581, + "rewards/unsafe_rewards": -1.0407161712646484, + "step": 1700 + }, + { + "epoch": 0.92, + "learning_rate": 9.60417697893534e-09, + "logits/chosen": -1.482126235961914, + "logits/rejected": -1.1564598083496094, + "logps/chosen": -312.1965026855469, + "logps/rejected": -340.93817138671875, + "loss": 0.9667, + "rewards/accuracies": 0.731249988079071, + "rewards/chosen": -1.156433343887329, + "rewards/margins": 0.43530288338661194, + "rewards/rejected": -1.5917361974716187, + "rewards/safe_rewards": -1.0536446571350098, + "rewards/unsafe_rewards": -1.2592222690582275, + "step": 1710 + }, + { + "epoch": 0.93, + "learning_rate": 8.357199328144576e-09, + "logits/chosen": -1.4593350887298584, + "logits/rejected": -1.200596570968628, + "logps/chosen": -356.2051086425781, + "logps/rejected": -369.406494140625, + "loss": 0.8611, + "rewards/accuracies": 0.731249988079071, + "rewards/chosen": -1.1415997743606567, + "rewards/margins": 0.49805140495300293, + "rewards/rejected": -1.6396510601043701, + "rewards/safe_rewards": -1.2040703296661377, + "rewards/unsafe_rewards": -1.0791290998458862, + "step": 1720 + }, + { + "epoch": 0.93, + "learning_rate": 7.1955293871198144e-09, + "logits/chosen": -1.350987195968628, + "logits/rejected": -1.1763075590133667, + "logps/chosen": -296.27191162109375, + "logps/rejected": -321.37518310546875, + "loss": 0.9819, + "rewards/accuracies": 0.6812499761581421, + "rewards/chosen": -1.2195558547973633, + "rewards/margins": 0.39501625299453735, + "rewards/rejected": -1.6145721673965454, + "rewards/safe_rewards": -1.2167112827301025, + "rewards/unsafe_rewards": -1.2224003076553345, + "step": 1730 + }, + { + "epoch": 0.94, + "learning_rate": 6.119577262853254e-09, + "logits/chosen": -1.441007375717163, + "logits/rejected": -1.0252482891082764, + "logps/chosen": -299.6694641113281, + "logps/rejected": -312.8736267089844, + "loss": 0.9801, + "rewards/accuracies": 0.762499988079071, + "rewards/chosen": -1.121921420097351, + "rewards/margins": 0.5479141473770142, + "rewards/rejected": -1.6698356866836548, + "rewards/safe_rewards": -1.0443205833435059, + "rewards/unsafe_rewards": -1.1995223760604858, + "step": 1740 + }, + { + "epoch": 0.94, + "learning_rate": 5.129722801180542e-09, + "logits/chosen": -1.4107353687286377, + "logits/rejected": -1.0875647068023682, + "logps/chosen": -322.1299743652344, + "logps/rejected": -341.2066650390625, + "loss": 0.8399, + "rewards/accuracies": 0.6937500238418579, + "rewards/chosen": -1.189591407775879, + "rewards/margins": 0.5180362462997437, + "rewards/rejected": -1.707627534866333, + "rewards/safe_rewards": -1.3024308681488037, + "rewards/unsafe_rewards": -1.076751708984375, + "step": 1750 + }, + { + "epoch": 0.95, + "learning_rate": 4.226315452682816e-09, + "logits/chosen": -1.4723705053329468, + "logits/rejected": -1.1786158084869385, + "logps/chosen": -305.52362060546875, + "logps/rejected": -326.5906677246094, + "loss": 0.9485, + "rewards/accuracies": 0.7562500238418579, + "rewards/chosen": -1.046170711517334, + "rewards/margins": 0.4863569736480713, + "rewards/rejected": -1.5325279235839844, + "rewards/safe_rewards": -1.0766370296478271, + "rewards/unsafe_rewards": -1.0157043933868408, + "step": 1760 + }, + { + "epoch": 0.95, + "learning_rate": 3.4096741493194193e-09, + "logits/chosen": -1.527930498123169, + "logits/rejected": -1.2494192123413086, + "logps/chosen": -315.3348693847656, + "logps/rejected": -332.2015686035156, + "loss": 1.0269, + "rewards/accuracies": 0.6875, + "rewards/chosen": -1.1415612697601318, + "rewards/margins": 0.39452338218688965, + "rewards/rejected": -1.5360848903656006, + "rewards/safe_rewards": -1.0917105674743652, + "rewards/unsafe_rewards": -1.1914122104644775, + "step": 1770 + }, + { + "epoch": 0.96, + "learning_rate": 2.6800871918346846e-09, + "logits/chosen": -1.5815150737762451, + "logits/rejected": -1.1552437543869019, + "logps/chosen": -320.330078125, + "logps/rejected": -340.8857727050781, + "loss": 0.9364, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.0657531023025513, + "rewards/margins": 0.5418880581855774, + "rewards/rejected": -1.6076412200927734, + "rewards/safe_rewards": -1.1473314762115479, + "rewards/unsafe_rewards": -0.9841750264167786, + "step": 1780 + }, + { + "epoch": 0.96, + "learning_rate": 2.0378121479783796e-09, + "logits/chosen": -1.4162019491195679, + "logits/rejected": -1.0255894660949707, + "logps/chosen": -313.9689025878906, + "logps/rejected": -334.1581115722656, + "loss": 0.9987, + "rewards/accuracies": 0.7437499761581421, + "rewards/chosen": -1.1681249141693115, + "rewards/margins": 0.49787864089012146, + "rewards/rejected": -1.6660035848617554, + "rewards/safe_rewards": -1.1383296251296997, + "rewards/unsafe_rewards": -1.1979202032089233, + "step": 1790 + }, + { + "epoch": 0.97, + "learning_rate": 1.4830757615760247e-09, + "logits/chosen": -1.437466025352478, + "logits/rejected": -1.0774166584014893, + "logps/chosen": -325.25677490234375, + "logps/rejected": -333.42791748046875, + "loss": 0.9478, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -1.0991178750991821, + "rewards/margins": 0.44227179884910583, + "rewards/rejected": -1.5413895845413208, + "rewards/safe_rewards": -1.1737608909606934, + "rewards/unsafe_rewards": -1.024474859237671, + "step": 1800 + }, + { + "epoch": 0.97, + "learning_rate": 1.0160738724809548e-09, + "logits/chosen": -1.496584177017212, + "logits/rejected": -1.0213630199432373, + "logps/chosen": -305.14263916015625, + "logps/rejected": -338.046875, + "loss": 0.881, + "rewards/accuracies": 0.78125, + "rewards/chosen": -1.1096470355987549, + "rewards/margins": 0.5430228114128113, + "rewards/rejected": -1.6526696681976318, + "rewards/safe_rewards": -1.1323860883712769, + "rewards/unsafe_rewards": -1.0869077444076538, + "step": 1810 + }, + { + "epoch": 0.98, + "learning_rate": 6.369713474366212e-10, + "logits/chosen": -1.460850477218628, + "logits/rejected": -1.1146998405456543, + "logps/chosen": -342.700439453125, + "logps/rejected": -370.11865234375, + "loss": 0.8416, + "rewards/accuracies": 0.7437499761581421, + "rewards/chosen": -1.1760565042495728, + "rewards/margins": 0.5496792793273926, + "rewards/rejected": -1.7257359027862549, + "rewards/safe_rewards": -1.209657907485962, + "rewards/unsafe_rewards": -1.1424554586410522, + "step": 1820 + }, + { + "epoch": 0.98, + "learning_rate": 3.459020218731512e-10, + "logits/chosen": -1.4401605129241943, + "logits/rejected": -1.135258674621582, + "logps/chosen": -297.53607177734375, + "logps/rejected": -319.9522399902344, + "loss": 0.872, + "rewards/accuracies": 0.7437499761581421, + "rewards/chosen": -1.0829004049301147, + "rewards/margins": 0.5533518195152283, + "rewards/rejected": -1.6362521648406982, + "rewards/safe_rewards": -1.041105031967163, + "rewards/unsafe_rewards": -1.124695897102356, + "step": 1830 + }, + { + "epoch": 0.99, + "learning_rate": 1.429686526593088e-10, + "logits/chosen": -1.4089921712875366, + "logits/rejected": -1.140413522720337, + "logps/chosen": -320.8370666503906, + "logps/rejected": -343.36822509765625, + "loss": 1.0174, + "rewards/accuracies": 0.731249988079071, + "rewards/chosen": -1.1735649108886719, + "rewards/margins": 0.45700669288635254, + "rewards/rejected": -1.6305716037750244, + "rewards/safe_rewards": -1.203802227973938, + "rewards/unsafe_rewards": -1.1433275938034058, + "step": 1840 + }, + { + "epoch": 1.0, + "learning_rate": 2.824288182584622e-11, + "logits/chosen": -1.5680155754089355, + "logits/rejected": -1.1377493143081665, + "logps/chosen": -327.45550537109375, + "logps/rejected": -339.25115966796875, + "loss": 0.8677, + "rewards/accuracies": 0.71875, + "rewards/chosen": -1.1065986156463623, + "rewards/margins": 0.5338612198829651, + "rewards/rejected": -1.6404597759246826, + "rewards/safe_rewards": -1.171757459640503, + "rewards/unsafe_rewards": -1.0414397716522217, + "step": 1850 + }, + { + "epoch": 1.0, + "step": 1858, + "total_flos": 0.0, + "train_loss": 1.018996798697021, + "train_runtime": 22449.6551, + "train_samples_per_second": 2.649, + "train_steps_per_second": 0.083 + } + ], + "logging_steps": 10, + "max_steps": 1858, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +}