{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 100, "global_step": 5811, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 8.591065292096219e-10, "logits/chosen": -2.5129990577697754, "logits/rejected": -2.4275057315826416, "logps/chosen": -96.6673583984375, "logps/rejected": -105.15755462646484, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.01, "learning_rate": 8.59106529209622e-09, "logits/chosen": -2.988718271255493, "logits/rejected": -2.9780874252319336, "logps/chosen": -302.4128723144531, "logps/rejected": -225.56951904296875, "loss": 0.6947, "rewards/accuracies": 0.5, "rewards/chosen": -0.007120599504560232, "rewards/margins": -0.004252635408192873, "rewards/rejected": -0.002867964096367359, "step": 10 }, { "epoch": 0.01, "learning_rate": 1.718213058419244e-08, "logits/chosen": -2.8921194076538086, "logits/rejected": -2.7121551036834717, "logps/chosen": -287.7423400878906, "logps/rejected": -217.6292724609375, "loss": 0.6906, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.0037767409812659025, "rewards/margins": 0.010759315453469753, "rewards/rejected": -0.006982574705034494, "step": 20 }, { "epoch": 0.02, "learning_rate": 2.5773195876288656e-08, "logits/chosen": -3.015655994415283, "logits/rejected": -2.9962334632873535, "logps/chosen": -297.9928283691406, "logps/rejected": -203.88180541992188, "loss": 0.6891, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.025857295840978622, "rewards/margins": 0.025261688977479935, "rewards/rejected": 0.0005956076784059405, "step": 30 }, { "epoch": 0.02, "learning_rate": 3.436426116838488e-08, "logits/chosen": -2.8478853702545166, "logits/rejected": -2.9214625358581543, "logps/chosen": -267.7845153808594, "logps/rejected": -250.1910400390625, "loss": 0.6861, "rewards/accuracies": 0.75, "rewards/chosen": 0.027253543958067894, "rewards/margins": 0.02428315207362175, "rewards/rejected": 0.002970390487462282, "step": 40 }, { "epoch": 0.03, "learning_rate": 4.29553264604811e-08, "logits/chosen": -3.0094895362854004, "logits/rejected": -2.9605789184570312, "logps/chosen": -322.73681640625, "logps/rejected": -245.77450561523438, "loss": 0.6839, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.022152891382575035, "rewards/margins": 0.04998321458697319, "rewards/rejected": -0.027830326929688454, "step": 50 }, { "epoch": 0.03, "learning_rate": 5.154639175257731e-08, "logits/chosen": -2.9751992225646973, "logits/rejected": -2.9595389366149902, "logps/chosen": -308.54351806640625, "logps/rejected": -224.53707885742188, "loss": 0.6688, "rewards/accuracies": 0.75, "rewards/chosen": 0.04110954329371452, "rewards/margins": 0.11852701753377914, "rewards/rejected": -0.07741747796535492, "step": 60 }, { "epoch": 0.04, "learning_rate": 6.013745704467354e-08, "logits/chosen": -2.9823076725006104, "logits/rejected": -3.0206565856933594, "logps/chosen": -375.16925048828125, "logps/rejected": -224.032958984375, "loss": 0.6642, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.07654228806495667, "rewards/margins": 0.1424637734889984, "rewards/rejected": -0.06592147052288055, "step": 70 }, { "epoch": 0.04, "learning_rate": 6.872852233676976e-08, "logits/chosen": -3.0697617530822754, "logits/rejected": -3.036527156829834, "logps/chosen": -353.6755676269531, "logps/rejected": -216.9717559814453, "loss": 0.6418, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.04413590952754021, "rewards/margins": 0.10847017914056778, "rewards/rejected": -0.06433425843715668, "step": 80 }, { "epoch": 0.05, "learning_rate": 7.731958762886598e-08, "logits/chosen": -2.9876997470855713, "logits/rejected": -2.9616377353668213, "logps/chosen": -291.57012939453125, "logps/rejected": -193.0994873046875, "loss": 0.6367, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.08899353444576263, "rewards/margins": 0.24147820472717285, "rewards/rejected": -0.1524846851825714, "step": 90 }, { "epoch": 0.05, "learning_rate": 8.59106529209622e-08, "logits/chosen": -2.8401777744293213, "logits/rejected": -2.7715401649475098, "logps/chosen": -261.7100524902344, "logps/rejected": -255.4248046875, "loss": 0.6284, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.03916650265455246, "rewards/margins": 0.1420799195766449, "rewards/rejected": -0.10291342437267303, "step": 100 }, { "epoch": 0.05, "eval_logits/chosen": -2.823406934738159, "eval_logits/rejected": -2.797581672668457, "eval_logps/chosen": -253.8098907470703, "eval_logps/rejected": -258.8415832519531, "eval_loss": 0.6098471879959106, "eval_rewards/accuracies": 0.734375, "eval_rewards/chosen": 0.04252301901578903, "eval_rewards/margins": 0.22968964278697968, "eval_rewards/rejected": -0.18716664612293243, "eval_runtime": 58.4622, "eval_samples_per_second": 17.105, "eval_steps_per_second": 0.274, "step": 100 }, { "epoch": 0.06, "learning_rate": 9.450171821305841e-08, "logits/chosen": -3.0175564289093018, "logits/rejected": -3.084195137023926, "logps/chosen": -344.5015869140625, "logps/rejected": -294.0466613769531, "loss": 0.6208, "rewards/accuracies": 0.75, "rewards/chosen": 0.07540851831436157, "rewards/margins": 0.3283361792564392, "rewards/rejected": -0.25292766094207764, "step": 110 }, { "epoch": 0.06, "learning_rate": 1.0309278350515462e-07, "logits/chosen": -2.831212043762207, "logits/rejected": -2.7832601070404053, "logps/chosen": -184.1349639892578, "logps/rejected": -206.84634399414062, "loss": 0.587, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.06816364824771881, "rewards/margins": 0.0881614089012146, "rewards/rejected": -0.1563250720500946, "step": 120 }, { "epoch": 0.07, "learning_rate": 1.1168384879725086e-07, "logits/chosen": -3.0061099529266357, "logits/rejected": -2.8498525619506836, "logps/chosen": -333.06072998046875, "logps/rejected": -189.4818115234375, "loss": 0.5832, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.19702570140361786, "rewards/margins": 0.5247530341148376, "rewards/rejected": -0.3277273178100586, "step": 130 }, { "epoch": 0.07, "learning_rate": 1.202749140893471e-07, "logits/chosen": -2.8609023094177246, "logits/rejected": -2.77339243888855, "logps/chosen": -297.0363464355469, "logps/rejected": -242.37255859375, "loss": 0.5649, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.016815107315778732, "rewards/margins": 0.422064870595932, "rewards/rejected": -0.40524977445602417, "step": 140 }, { "epoch": 0.08, "learning_rate": 1.2886597938144328e-07, "logits/chosen": -3.088327407836914, "logits/rejected": -2.9465346336364746, "logps/chosen": -305.6724548339844, "logps/rejected": -314.7848205566406, "loss": 0.5548, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.12263361364603043, "rewards/margins": 0.6976320147514343, "rewards/rejected": -0.5749984979629517, "step": 150 }, { "epoch": 0.08, "learning_rate": 1.3745704467353952e-07, "logits/chosen": -2.883831024169922, "logits/rejected": -2.8376777172088623, "logps/chosen": -267.89154052734375, "logps/rejected": -199.8636474609375, "loss": 0.5362, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.17647962272167206, "rewards/margins": 0.5636450052261353, "rewards/rejected": -0.387165367603302, "step": 160 }, { "epoch": 0.09, "learning_rate": 1.4604810996563573e-07, "logits/chosen": -2.823948383331299, "logits/rejected": -2.7283661365509033, "logps/chosen": -234.5882568359375, "logps/rejected": -194.86480712890625, "loss": 0.4987, "rewards/accuracies": 0.75, "rewards/chosen": 0.09966392815113068, "rewards/margins": 0.7896274328231812, "rewards/rejected": -0.6899635791778564, "step": 170 }, { "epoch": 0.09, "learning_rate": 1.5463917525773197e-07, "logits/chosen": -2.9629111289978027, "logits/rejected": -2.9428882598876953, "logps/chosen": -232.97244262695312, "logps/rejected": -183.2829132080078, "loss": 0.5185, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.2645714282989502, "rewards/margins": 0.8501029014587402, "rewards/rejected": -0.5855314135551453, "step": 180 }, { "epoch": 0.1, "learning_rate": 1.6323024054982818e-07, "logits/chosen": -2.9642796516418457, "logits/rejected": -2.97268009185791, "logps/chosen": -275.6226501464844, "logps/rejected": -233.35537719726562, "loss": 0.5748, "rewards/accuracies": 0.75, "rewards/chosen": 0.1616288721561432, "rewards/margins": 0.4936322569847107, "rewards/rejected": -0.3320034146308899, "step": 190 }, { "epoch": 0.1, "learning_rate": 1.718213058419244e-07, "logits/chosen": -2.9080729484558105, "logits/rejected": -2.9043314456939697, "logps/chosen": -282.22369384765625, "logps/rejected": -235.44992065429688, "loss": 0.4908, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.13387183845043182, "rewards/margins": 0.7095439434051514, "rewards/rejected": -0.5756720900535583, "step": 200 }, { "epoch": 0.1, "eval_logits/chosen": -2.7959609031677246, "eval_logits/rejected": -2.7718665599823, "eval_logps/chosen": -254.51446533203125, "eval_logps/rejected": -263.8123779296875, "eval_loss": 0.5425560474395752, "eval_rewards/accuracies": 0.75, "eval_rewards/chosen": -0.027933437377214432, "eval_rewards/margins": 0.6563125252723694, "eval_rewards/rejected": -0.6842460036277771, "eval_runtime": 58.0136, "eval_samples_per_second": 17.237, "eval_steps_per_second": 0.276, "step": 200 }, { "epoch": 0.11, "learning_rate": 1.804123711340206e-07, "logits/chosen": -2.664795160293579, "logits/rejected": -2.427393674850464, "logps/chosen": -297.56488037109375, "logps/rejected": -226.8320770263672, "loss": 0.5682, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.22673270106315613, "rewards/margins": 0.919518768787384, "rewards/rejected": -1.1462514400482178, "step": 210 }, { "epoch": 0.11, "learning_rate": 1.8900343642611682e-07, "logits/chosen": -2.8164966106414795, "logits/rejected": -2.7533140182495117, "logps/chosen": -316.3358459472656, "logps/rejected": -248.8792724609375, "loss": 0.5418, "rewards/accuracies": 0.75, "rewards/chosen": -0.2400936633348465, "rewards/margins": 0.5202454924583435, "rewards/rejected": -0.7603391408920288, "step": 220 }, { "epoch": 0.12, "learning_rate": 1.9759450171821303e-07, "logits/chosen": -2.8455495834350586, "logits/rejected": -2.815950870513916, "logps/chosen": -291.536376953125, "logps/rejected": -252.3511199951172, "loss": 0.516, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.05435393005609512, "rewards/margins": 0.677357017993927, "rewards/rejected": -0.7317109107971191, "step": 230 }, { "epoch": 0.12, "learning_rate": 2.0618556701030925e-07, "logits/chosen": -2.884962558746338, "logits/rejected": -2.9899585247039795, "logps/chosen": -362.83612060546875, "logps/rejected": -246.82815551757812, "loss": 0.5416, "rewards/accuracies": 0.5, "rewards/chosen": -0.4428789019584656, "rewards/margins": 0.2312956303358078, "rewards/rejected": -0.6741746068000793, "step": 240 }, { "epoch": 0.13, "learning_rate": 2.1477663230240549e-07, "logits/chosen": -2.979492425918579, "logits/rejected": -2.9899439811706543, "logps/chosen": -232.15756225585938, "logps/rejected": -157.3478240966797, "loss": 0.5141, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.26762503385543823, "rewards/margins": 1.1515061855316162, "rewards/rejected": -0.8838812112808228, "step": 250 }, { "epoch": 0.13, "learning_rate": 2.2336769759450173e-07, "logits/chosen": -3.0052077770233154, "logits/rejected": -2.9878716468811035, "logps/chosen": -309.3619689941406, "logps/rejected": -189.45968627929688, "loss": 0.5321, "rewards/accuracies": 0.75, "rewards/chosen": 0.4304015040397644, "rewards/margins": 1.0413486957550049, "rewards/rejected": -0.6109471917152405, "step": 260 }, { "epoch": 0.14, "learning_rate": 2.3195876288659794e-07, "logits/chosen": -2.8794291019439697, "logits/rejected": -2.831512928009033, "logps/chosen": -306.6054992675781, "logps/rejected": -237.39382934570312, "loss": 0.4909, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.18433420360088348, "rewards/margins": 0.8596351742744446, "rewards/rejected": -0.6753008365631104, "step": 270 }, { "epoch": 0.14, "learning_rate": 2.405498281786942e-07, "logits/chosen": -2.9824016094207764, "logits/rejected": -2.9367408752441406, "logps/chosen": -350.133056640625, "logps/rejected": -254.4954071044922, "loss": 0.5336, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.20658831298351288, "rewards/margins": 0.7473801374435425, "rewards/rejected": -0.5407918691635132, "step": 280 }, { "epoch": 0.15, "learning_rate": 2.4914089347079036e-07, "logits/chosen": -2.7565178871154785, "logits/rejected": -2.944960832595825, "logps/chosen": -242.48397827148438, "logps/rejected": -227.69107055664062, "loss": 0.5056, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.699475109577179, "rewards/margins": 1.3677313327789307, "rewards/rejected": -0.6682561635971069, "step": 290 }, { "epoch": 0.15, "learning_rate": 2.5773195876288655e-07, "logits/chosen": -2.779125928878784, "logits/rejected": -2.943162679672241, "logps/chosen": -411.8221130371094, "logps/rejected": -222.3397216796875, "loss": 0.5264, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.34889036417007446, "rewards/margins": 1.2339386940002441, "rewards/rejected": -0.8850483894348145, "step": 300 }, { "epoch": 0.15, "eval_logits/chosen": -2.812185525894165, "eval_logits/rejected": -2.789177656173706, "eval_logps/chosen": -253.82086181640625, "eval_logps/rejected": -266.7626953125, "eval_loss": 0.5323615670204163, "eval_rewards/accuracies": 0.765625, "eval_rewards/chosen": 0.04142449051141739, "eval_rewards/margins": 1.0207018852233887, "eval_rewards/rejected": -0.9792775511741638, "eval_runtime": 59.6543, "eval_samples_per_second": 16.763, "eval_steps_per_second": 0.268, "step": 300 }, { "epoch": 0.16, "learning_rate": 2.663230240549828e-07, "logits/chosen": -2.9955785274505615, "logits/rejected": -2.9795451164245605, "logps/chosen": -318.2289123535156, "logps/rejected": -216.7342071533203, "loss": 0.5172, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.10595469176769257, "rewards/margins": 1.0604875087738037, "rewards/rejected": -0.9545329213142395, "step": 310 }, { "epoch": 0.17, "learning_rate": 2.7491408934707903e-07, "logits/chosen": -2.9341654777526855, "logits/rejected": -2.9966204166412354, "logps/chosen": -379.42572021484375, "logps/rejected": -282.17291259765625, "loss": 0.4682, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.08291205763816833, "rewards/margins": 1.1605613231658936, "rewards/rejected": -1.0776493549346924, "step": 320 }, { "epoch": 0.17, "learning_rate": 2.835051546391752e-07, "logits/chosen": -2.8474197387695312, "logits/rejected": -2.844364643096924, "logps/chosen": -331.90802001953125, "logps/rejected": -223.3518524169922, "loss": 0.4881, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.02964567206799984, "rewards/margins": 1.4632409811019897, "rewards/rejected": -1.4928867816925049, "step": 330 }, { "epoch": 0.18, "learning_rate": 2.9209621993127146e-07, "logits/chosen": -2.68745756149292, "logits/rejected": -2.817155361175537, "logps/chosen": -232.6031036376953, "logps/rejected": -246.84768676757812, "loss": 0.5196, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.5186244249343872, "rewards/margins": 1.0158860683441162, "rewards/rejected": -0.49726182222366333, "step": 340 }, { "epoch": 0.18, "learning_rate": 3.006872852233677e-07, "logits/chosen": -3.0304269790649414, "logits/rejected": -2.9698691368103027, "logps/chosen": -159.0189208984375, "logps/rejected": -212.7183380126953, "loss": 0.4873, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.310208797454834, "rewards/margins": 0.5256294012069702, "rewards/rejected": -0.8358383178710938, "step": 350 }, { "epoch": 0.19, "learning_rate": 3.0927835051546394e-07, "logits/chosen": -3.0391955375671387, "logits/rejected": -3.0694854259490967, "logps/chosen": -381.39715576171875, "logps/rejected": -347.92559814453125, "loss": 0.4607, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.43349432945251465, "rewards/margins": 1.4752476215362549, "rewards/rejected": -1.0417532920837402, "step": 360 }, { "epoch": 0.19, "learning_rate": 3.178694158075601e-07, "logits/chosen": -2.985565662384033, "logits/rejected": -2.951699733734131, "logps/chosen": -158.28598022460938, "logps/rejected": -127.53106689453125, "loss": 0.4825, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.025753701105713844, "rewards/margins": 1.3051038980484009, "rewards/rejected": -1.330857515335083, "step": 370 }, { "epoch": 0.2, "learning_rate": 3.2646048109965636e-07, "logits/chosen": -2.8883204460144043, "logits/rejected": -2.7797765731811523, "logps/chosen": -300.84283447265625, "logps/rejected": -306.0265197753906, "loss": 0.5606, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.03248428553342819, "rewards/margins": 0.8653362393379211, "rewards/rejected": -0.8328520655632019, "step": 380 }, { "epoch": 0.2, "learning_rate": 3.3505154639175255e-07, "logits/chosen": -2.8869693279266357, "logits/rejected": -2.8558154106140137, "logps/chosen": -295.94268798828125, "logps/rejected": -245.67544555664062, "loss": 0.4789, "rewards/accuracies": 0.75, "rewards/chosen": 0.0472743920981884, "rewards/margins": 1.3127429485321045, "rewards/rejected": -1.2654683589935303, "step": 390 }, { "epoch": 0.21, "learning_rate": 3.436426116838488e-07, "logits/chosen": -3.0791659355163574, "logits/rejected": -3.0203123092651367, "logps/chosen": -251.6421661376953, "logps/rejected": -219.4331512451172, "loss": 0.5536, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.3093084990978241, "rewards/margins": 0.4000861644744873, "rewards/rejected": -0.7093946933746338, "step": 400 }, { "epoch": 0.21, "eval_logits/chosen": -2.8764305114746094, "eval_logits/rejected": -2.8541693687438965, "eval_logps/chosen": -254.42034912109375, "eval_logps/rejected": -272.24603271484375, "eval_loss": 0.4957379102706909, "eval_rewards/accuracies": 0.796875, "eval_rewards/chosen": -0.01852385140955448, "eval_rewards/margins": 1.5090851783752441, "eval_rewards/rejected": -1.5276089906692505, "eval_runtime": 56.3835, "eval_samples_per_second": 17.736, "eval_steps_per_second": 0.284, "step": 400 }, { "epoch": 0.21, "learning_rate": 3.5223367697594503e-07, "logits/chosen": -2.8821568489074707, "logits/rejected": -2.8333544731140137, "logps/chosen": -320.3736267089844, "logps/rejected": -205.11056518554688, "loss": 0.4277, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.0024402737617492676, "rewards/margins": 1.244533658027649, "rewards/rejected": -1.2420933246612549, "step": 410 }, { "epoch": 0.22, "learning_rate": 3.608247422680412e-07, "logits/chosen": -2.9139723777770996, "logits/rejected": -2.8578293323516846, "logps/chosen": -301.2723693847656, "logps/rejected": -248.91744995117188, "loss": 0.5208, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.27292880415916443, "rewards/margins": 0.9970871210098267, "rewards/rejected": -1.2700159549713135, "step": 420 }, { "epoch": 0.22, "learning_rate": 3.6941580756013745e-07, "logits/chosen": -2.9251325130462646, "logits/rejected": -2.8964738845825195, "logps/chosen": -210.9687042236328, "logps/rejected": -185.3360137939453, "loss": 0.5229, "rewards/accuracies": 0.75, "rewards/chosen": -0.43344053626060486, "rewards/margins": 0.8561422228813171, "rewards/rejected": -1.2895828485488892, "step": 430 }, { "epoch": 0.23, "learning_rate": 3.7800687285223364e-07, "logits/chosen": -2.901094436645508, "logits/rejected": -2.8542165756225586, "logps/chosen": -348.6666259765625, "logps/rejected": -329.27294921875, "loss": 0.6022, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.014251199550926685, "rewards/margins": 0.753572940826416, "rewards/rejected": -0.7393215298652649, "step": 440 }, { "epoch": 0.23, "learning_rate": 3.865979381443299e-07, "logits/chosen": -2.936382532119751, "logits/rejected": -2.9940216541290283, "logps/chosen": -308.2112731933594, "logps/rejected": -232.1812744140625, "loss": 0.504, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.3824332356452942, "rewards/margins": 0.28821295499801636, "rewards/rejected": -0.6706462502479553, "step": 450 }, { "epoch": 0.24, "learning_rate": 3.9518900343642607e-07, "logits/chosen": -2.951936960220337, "logits/rejected": -3.0050208568573, "logps/chosen": -326.07659912109375, "logps/rejected": -301.6195983886719, "loss": 0.5801, "rewards/accuracies": 0.75, "rewards/chosen": 0.1851659119129181, "rewards/margins": 1.5346710681915283, "rewards/rejected": -1.349505066871643, "step": 460 }, { "epoch": 0.24, "learning_rate": 4.037800687285223e-07, "logits/chosen": -2.8154656887054443, "logits/rejected": -2.8765406608581543, "logps/chosen": -320.0531311035156, "logps/rejected": -226.99124145507812, "loss": 0.4564, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.22784185409545898, "rewards/margins": 0.7857998013496399, "rewards/rejected": -1.013641595840454, "step": 470 }, { "epoch": 0.25, "learning_rate": 4.123711340206185e-07, "logits/chosen": -3.0402634143829346, "logits/rejected": -3.021247625350952, "logps/chosen": -284.4671936035156, "logps/rejected": -287.5126647949219, "loss": 0.4915, "rewards/accuracies": 0.5, "rewards/chosen": -0.03487253934144974, "rewards/margins": 0.45380640029907227, "rewards/rejected": -0.4886789321899414, "step": 480 }, { "epoch": 0.25, "learning_rate": 4.209621993127148e-07, "logits/chosen": -2.9354074001312256, "logits/rejected": -2.920379638671875, "logps/chosen": -311.0786437988281, "logps/rejected": -246.13339233398438, "loss": 0.4588, "rewards/accuracies": 0.75, "rewards/chosen": -0.48857221007347107, "rewards/margins": 0.9489312171936035, "rewards/rejected": -1.4375033378601074, "step": 490 }, { "epoch": 0.26, "learning_rate": 4.2955326460481097e-07, "logits/chosen": -3.123109817504883, "logits/rejected": -3.0762407779693604, "logps/chosen": -309.3453063964844, "logps/rejected": -281.5166015625, "loss": 0.5362, "rewards/accuracies": 0.75, "rewards/chosen": 0.25006183981895447, "rewards/margins": 1.4525038003921509, "rewards/rejected": -1.202441930770874, "step": 500 }, { "epoch": 0.26, "eval_logits/chosen": -2.8957557678222656, "eval_logits/rejected": -2.87016224861145, "eval_logps/chosen": -256.86529541015625, "eval_logps/rejected": -272.88690185546875, "eval_loss": 0.503109335899353, "eval_rewards/accuracies": 0.78125, "eval_rewards/chosen": -0.2630198001861572, "eval_rewards/margins": 1.3286765813827515, "eval_rewards/rejected": -1.5916962623596191, "eval_runtime": 55.3853, "eval_samples_per_second": 18.055, "eval_steps_per_second": 0.289, "step": 500 }, { "epoch": 0.26, "learning_rate": 4.381443298969072e-07, "logits/chosen": -2.6036550998687744, "logits/rejected": -2.6383635997772217, "logps/chosen": -252.81375122070312, "logps/rejected": -243.0044708251953, "loss": 0.5633, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.6361426115036011, "rewards/margins": 0.3173540532588959, "rewards/rejected": -0.9534965753555298, "step": 510 }, { "epoch": 0.27, "learning_rate": 4.4673539518900345e-07, "logits/chosen": -2.96708607673645, "logits/rejected": -3.0084481239318848, "logps/chosen": -186.54592895507812, "logps/rejected": -187.34884643554688, "loss": 0.5443, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.1140596866607666, "rewards/margins": 1.242305874824524, "rewards/rejected": -1.356365442276001, "step": 520 }, { "epoch": 0.27, "learning_rate": 4.5532646048109964e-07, "logits/chosen": -2.987997531890869, "logits/rejected": -2.9607906341552734, "logps/chosen": -292.63690185546875, "logps/rejected": -240.1947479248047, "loss": 0.5091, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.1345333755016327, "rewards/margins": 1.3595573902130127, "rewards/rejected": -1.2250239849090576, "step": 530 }, { "epoch": 0.28, "learning_rate": 4.639175257731959e-07, "logits/chosen": -2.8647749423980713, "logits/rejected": -2.868330955505371, "logps/chosen": -186.86167907714844, "logps/rejected": -243.17910766601562, "loss": 0.6201, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.7495313882827759, "rewards/margins": 0.6188509464263916, "rewards/rejected": -1.3683823347091675, "step": 540 }, { "epoch": 0.28, "learning_rate": 4.7250859106529206e-07, "logits/chosen": -2.9979634284973145, "logits/rejected": -2.9638993740081787, "logps/chosen": -349.7961730957031, "logps/rejected": -288.20062255859375, "loss": 0.6153, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.04092409461736679, "rewards/margins": 1.4131947755813599, "rewards/rejected": -1.4541189670562744, "step": 550 }, { "epoch": 0.29, "learning_rate": 4.810996563573884e-07, "logits/chosen": -3.0216901302337646, "logits/rejected": -3.0451061725616455, "logps/chosen": -326.0102844238281, "logps/rejected": -307.83367919921875, "loss": 0.539, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.6230143904685974, "rewards/margins": 0.9213398098945618, "rewards/rejected": -1.5443540811538696, "step": 560 }, { "epoch": 0.29, "learning_rate": 4.896907216494845e-07, "logits/chosen": -2.990562677383423, "logits/rejected": -2.9301705360412598, "logps/chosen": -336.96826171875, "logps/rejected": -225.23599243164062, "loss": 0.5141, "rewards/accuracies": 0.75, "rewards/chosen": -0.8188888430595398, "rewards/margins": 1.2901289463043213, "rewards/rejected": -2.109017848968506, "step": 570 }, { "epoch": 0.3, "learning_rate": 4.982817869415807e-07, "logits/chosen": -2.900038003921509, "logits/rejected": -2.9860446453094482, "logps/chosen": -322.78240966796875, "logps/rejected": -231.99667358398438, "loss": 0.5668, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.6513129472732544, "rewards/margins": 0.49615031480789185, "rewards/rejected": -1.147463321685791, "step": 580 }, { "epoch": 0.3, "learning_rate": 4.992350353796136e-07, "logits/chosen": -2.936190605163574, "logits/rejected": -2.8864665031433105, "logps/chosen": -251.67172241210938, "logps/rejected": -241.36318969726562, "loss": 0.4964, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.027965422719717026, "rewards/margins": 1.3970218896865845, "rewards/rejected": -1.3690563440322876, "step": 590 }, { "epoch": 0.31, "learning_rate": 4.982788296041308e-07, "logits/chosen": -2.9233040809631348, "logits/rejected": -2.961263656616211, "logps/chosen": -199.49600219726562, "logps/rejected": -255.68612670898438, "loss": 0.5966, "rewards/accuracies": 0.75, "rewards/chosen": -0.13642588257789612, "rewards/margins": 1.8362632989883423, "rewards/rejected": -1.9726893901824951, "step": 600 }, { "epoch": 0.31, "eval_logits/chosen": -2.8986048698425293, "eval_logits/rejected": -2.8777544498443604, "eval_logps/chosen": -257.2279357910156, "eval_logps/rejected": -273.46136474609375, "eval_loss": 0.5963188409805298, "eval_rewards/accuracies": 0.78125, "eval_rewards/chosen": -0.29928162693977356, "eval_rewards/margins": 1.349860429763794, "eval_rewards/rejected": -1.6491420269012451, "eval_runtime": 54.6151, "eval_samples_per_second": 18.31, "eval_steps_per_second": 0.293, "step": 600 }, { "epoch": 0.31, "learning_rate": 4.973226238286479e-07, "logits/chosen": -2.9391376972198486, "logits/rejected": -2.9422051906585693, "logps/chosen": -302.2151184082031, "logps/rejected": -287.15606689453125, "loss": 0.5368, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.4417892098426819, "rewards/margins": 1.2578237056732178, "rewards/rejected": -1.6996129751205444, "step": 610 }, { "epoch": 0.32, "learning_rate": 4.96366418053165e-07, "logits/chosen": -3.0650055408477783, "logits/rejected": -3.0660297870635986, "logps/chosen": -334.0442810058594, "logps/rejected": -252.632080078125, "loss": 0.5857, "rewards/accuracies": 0.75, "rewards/chosen": 0.07565183192491531, "rewards/margins": 0.7010248899459839, "rewards/rejected": -0.6253730654716492, "step": 620 }, { "epoch": 0.33, "learning_rate": 4.954102122776821e-07, "logits/chosen": -2.918349027633667, "logits/rejected": -2.8907716274261475, "logps/chosen": -195.51907348632812, "logps/rejected": -167.48745727539062, "loss": 0.5561, "rewards/accuracies": 0.75, "rewards/chosen": -0.20618407428264618, "rewards/margins": 1.305176019668579, "rewards/rejected": -1.5113601684570312, "step": 630 }, { "epoch": 0.33, "learning_rate": 4.944540065021993e-07, "logits/chosen": -2.8896799087524414, "logits/rejected": -2.9980359077453613, "logps/chosen": -264.43023681640625, "logps/rejected": -230.43008422851562, "loss": 0.5386, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.18526920676231384, "rewards/margins": 1.8127784729003906, "rewards/rejected": -1.9980475902557373, "step": 640 }, { "epoch": 0.34, "learning_rate": 4.934978007267163e-07, "logits/chosen": -2.959494113922119, "logits/rejected": -2.982419967651367, "logps/chosen": -242.4766082763672, "logps/rejected": -274.0234680175781, "loss": 0.5399, "rewards/accuracies": 0.75, "rewards/chosen": -0.2960631847381592, "rewards/margins": 1.0227611064910889, "rewards/rejected": -1.318824291229248, "step": 650 }, { "epoch": 0.34, "learning_rate": 4.925415949512335e-07, "logits/chosen": -2.9986279010772705, "logits/rejected": -2.966939926147461, "logps/chosen": -328.2708435058594, "logps/rejected": -273.13006591796875, "loss": 0.5061, "rewards/accuracies": 0.75, "rewards/chosen": -0.47610530257225037, "rewards/margins": 1.19678795337677, "rewards/rejected": -1.6728931665420532, "step": 660 }, { "epoch": 0.35, "learning_rate": 4.915853891757506e-07, "logits/chosen": -2.902583360671997, "logits/rejected": -2.941610336303711, "logps/chosen": -197.52853393554688, "logps/rejected": -190.82029724121094, "loss": 0.6096, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.34461653232574463, "rewards/margins": 0.8742098808288574, "rewards/rejected": -1.2188262939453125, "step": 670 }, { "epoch": 0.35, "learning_rate": 4.906291834002677e-07, "logits/chosen": -2.8424625396728516, "logits/rejected": -2.791315793991089, "logps/chosen": -273.73455810546875, "logps/rejected": -229.92031860351562, "loss": 0.5023, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.6406866312026978, "rewards/margins": 0.9865404367446899, "rewards/rejected": -1.6272270679473877, "step": 680 }, { "epoch": 0.36, "learning_rate": 4.896729776247848e-07, "logits/chosen": -3.039944648742676, "logits/rejected": -2.9114279747009277, "logps/chosen": -344.2494201660156, "logps/rejected": -179.3026580810547, "loss": 0.4748, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.01536635123193264, "rewards/margins": 1.3766069412231445, "rewards/rejected": -1.3612406253814697, "step": 690 }, { "epoch": 0.36, "learning_rate": 4.88716771849302e-07, "logits/chosen": -2.9146389961242676, "logits/rejected": -2.918255090713501, "logps/chosen": -441.6365661621094, "logps/rejected": -344.4063720703125, "loss": 0.5014, "rewards/accuracies": 0.75, "rewards/chosen": -0.2016751766204834, "rewards/margins": 1.255171775817871, "rewards/rejected": -1.456847071647644, "step": 700 }, { "epoch": 0.36, "eval_logits/chosen": -2.7868502140045166, "eval_logits/rejected": -2.7659108638763428, "eval_logps/chosen": -257.09423828125, "eval_logps/rejected": -271.72039794921875, "eval_loss": 0.5382026433944702, "eval_rewards/accuracies": 0.75, "eval_rewards/chosen": -0.28591296076774597, "eval_rewards/margins": 1.189131736755371, "eval_rewards/rejected": -1.4750447273254395, "eval_runtime": 57.4875, "eval_samples_per_second": 17.395, "eval_steps_per_second": 0.278, "step": 700 }, { "epoch": 0.37, "learning_rate": 4.87760566073819e-07, "logits/chosen": -2.8412322998046875, "logits/rejected": -2.9222323894500732, "logps/chosen": -265.5148010253906, "logps/rejected": -250.9593963623047, "loss": 0.5254, "rewards/accuracies": 0.75, "rewards/chosen": -0.7364897131919861, "rewards/margins": 1.0006908178329468, "rewards/rejected": -1.7371807098388672, "step": 710 }, { "epoch": 0.37, "learning_rate": 4.868043602983362e-07, "logits/chosen": -2.9580254554748535, "logits/rejected": -2.9545352458953857, "logps/chosen": -275.90625, "logps/rejected": -375.8464660644531, "loss": 0.5059, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.17938612401485443, "rewards/margins": 2.483105421066284, "rewards/rejected": -2.662491798400879, "step": 720 }, { "epoch": 0.38, "learning_rate": 4.858481545228533e-07, "logits/chosen": -2.9665563106536865, "logits/rejected": -2.962049722671509, "logps/chosen": -315.9619140625, "logps/rejected": -332.67608642578125, "loss": 0.5005, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3134005665779114, "rewards/margins": 1.136232614517212, "rewards/rejected": -1.4496333599090576, "step": 730 }, { "epoch": 0.38, "learning_rate": 4.848919487473704e-07, "logits/chosen": -2.9164295196533203, "logits/rejected": -2.856682538986206, "logps/chosen": -322.0476379394531, "logps/rejected": -230.6309356689453, "loss": 0.5886, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.5171477198600769, "rewards/margins": 1.454939603805542, "rewards/rejected": -1.9720872640609741, "step": 740 }, { "epoch": 0.39, "learning_rate": 4.839357429718875e-07, "logits/chosen": -2.884716510772705, "logits/rejected": -2.9519991874694824, "logps/chosen": -297.78839111328125, "logps/rejected": -239.98959350585938, "loss": 0.5356, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.3978341817855835, "rewards/margins": 1.704395055770874, "rewards/rejected": -2.102229356765747, "step": 750 }, { "epoch": 0.39, "learning_rate": 4.829795371964047e-07, "logits/chosen": -2.831848621368408, "logits/rejected": -2.817645311355591, "logps/chosen": -249.10452270507812, "logps/rejected": -241.6534881591797, "loss": 0.8766, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.2761456370353699, "rewards/margins": 1.4581564664840698, "rewards/rejected": -1.7343019247055054, "step": 760 }, { "epoch": 0.4, "learning_rate": 4.820233314209217e-07, "logits/chosen": -2.9512200355529785, "logits/rejected": -2.9007842540740967, "logps/chosen": -230.91299438476562, "logps/rejected": -257.34375, "loss": 0.5829, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.4074572026729584, "rewards/margins": 1.2204868793487549, "rewards/rejected": -1.627943992614746, "step": 770 }, { "epoch": 0.4, "learning_rate": 4.810671256454389e-07, "logits/chosen": -2.7580349445343018, "logits/rejected": -2.8238117694854736, "logps/chosen": -309.89202880859375, "logps/rejected": -203.035400390625, "loss": 0.4978, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.5273137092590332, "rewards/margins": 1.8291202783584595, "rewards/rejected": -2.3564341068267822, "step": 780 }, { "epoch": 0.41, "learning_rate": 4.80110919869956e-07, "logits/chosen": -2.884530544281006, "logits/rejected": -2.9204657077789307, "logps/chosen": -300.7388916015625, "logps/rejected": -258.79180908203125, "loss": 0.5672, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.8420109748840332, "rewards/margins": 1.2984743118286133, "rewards/rejected": -2.1404852867126465, "step": 790 }, { "epoch": 0.41, "learning_rate": 4.791547140944731e-07, "logits/chosen": -2.904214859008789, "logits/rejected": -2.9331746101379395, "logps/chosen": -254.6400146484375, "logps/rejected": -260.09088134765625, "loss": 0.5334, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.006157719995826483, "rewards/margins": 1.616097092628479, "rewards/rejected": -1.6099392175674438, "step": 800 }, { "epoch": 0.41, "eval_logits/chosen": -2.7265069484710693, "eval_logits/rejected": -2.705258846282959, "eval_logps/chosen": -258.5242004394531, "eval_logps/rejected": -275.9377746582031, "eval_loss": 0.5677424669265747, "eval_rewards/accuracies": 0.796875, "eval_rewards/chosen": -0.4289108216762543, "eval_rewards/margins": 1.4678754806518555, "eval_rewards/rejected": -1.8967863321304321, "eval_runtime": 55.1088, "eval_samples_per_second": 18.146, "eval_steps_per_second": 0.29, "step": 800 }, { "epoch": 0.42, "learning_rate": 4.781985083189902e-07, "logits/chosen": -2.8288321495056152, "logits/rejected": -2.7795658111572266, "logps/chosen": -183.28457641601562, "logps/rejected": -241.27743530273438, "loss": 0.557, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.7061542868614197, "rewards/margins": 1.3368699550628662, "rewards/rejected": -2.0430245399475098, "step": 810 }, { "epoch": 0.42, "learning_rate": 4.772423025435074e-07, "logits/chosen": -3.0099616050720215, "logits/rejected": -2.973783016204834, "logps/chosen": -186.28518676757812, "logps/rejected": -266.48236083984375, "loss": 0.6266, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.71001797914505, "rewards/margins": -0.206703782081604, "rewards/rejected": -0.5033141374588013, "step": 820 }, { "epoch": 0.43, "learning_rate": 4.762860967680244e-07, "logits/chosen": -2.9518191814422607, "logits/rejected": -3.006854772567749, "logps/chosen": -195.8343048095703, "logps/rejected": -227.02340698242188, "loss": 0.5728, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.15900671482086182, "rewards/margins": 0.9850654602050781, "rewards/rejected": -1.1440720558166504, "step": 830 }, { "epoch": 0.43, "learning_rate": 4.7532989099254154e-07, "logits/chosen": -3.009342670440674, "logits/rejected": -3.0587260723114014, "logps/chosen": -300.0588684082031, "logps/rejected": -257.58203125, "loss": 0.6019, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.6498911380767822, "rewards/margins": 0.7764835953712463, "rewards/rejected": -1.4263746738433838, "step": 840 }, { "epoch": 0.44, "learning_rate": 4.7437368521705866e-07, "logits/chosen": -2.902837038040161, "logits/rejected": -2.8557207584381104, "logps/chosen": -246.87142944335938, "logps/rejected": -213.7313232421875, "loss": 0.5167, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.49609607458114624, "rewards/margins": 1.5304511785507202, "rewards/rejected": -2.026547431945801, "step": 850 }, { "epoch": 0.44, "learning_rate": 4.7341747944157577e-07, "logits/chosen": -2.726759910583496, "logits/rejected": -2.728843927383423, "logps/chosen": -199.48330688476562, "logps/rejected": -235.99014282226562, "loss": 0.5803, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.1567280292510986, "rewards/margins": 1.015815258026123, "rewards/rejected": -2.1725430488586426, "step": 860 }, { "epoch": 0.45, "learning_rate": 4.724612736660929e-07, "logits/chosen": -2.89784836769104, "logits/rejected": -2.9295287132263184, "logps/chosen": -265.757080078125, "logps/rejected": -193.9804229736328, "loss": 0.482, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.018481796607375145, "rewards/margins": 2.08237361907959, "rewards/rejected": -2.063891887664795, "step": 870 }, { "epoch": 0.45, "learning_rate": 4.7150506789061006e-07, "logits/chosen": -2.8157646656036377, "logits/rejected": -2.831799268722534, "logps/chosen": -235.80184936523438, "logps/rejected": -296.13421630859375, "loss": 0.5127, "rewards/accuracies": 0.75, "rewards/chosen": -0.5655059814453125, "rewards/margins": 1.134603500366211, "rewards/rejected": -1.7001097202301025, "step": 880 }, { "epoch": 0.46, "learning_rate": 4.7054886211512717e-07, "logits/chosen": -2.9849319458007812, "logits/rejected": -2.9874143600463867, "logps/chosen": -297.6209411621094, "logps/rejected": -262.95428466796875, "loss": 0.5396, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.07225757837295532, "rewards/margins": 0.5653451085090637, "rewards/rejected": -0.6376025676727295, "step": 890 }, { "epoch": 0.46, "learning_rate": 4.695926563396443e-07, "logits/chosen": -3.043614149093628, "logits/rejected": -3.0626580715179443, "logps/chosen": -243.42160034179688, "logps/rejected": -207.7015838623047, "loss": 0.5251, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3596685528755188, "rewards/margins": 1.1817331314086914, "rewards/rejected": -1.5414015054702759, "step": 900 }, { "epoch": 0.46, "eval_logits/chosen": -2.8661580085754395, "eval_logits/rejected": -2.846320629119873, "eval_logps/chosen": -256.3507080078125, "eval_logps/rejected": -270.0767822265625, "eval_loss": 0.5772436261177063, "eval_rewards/accuracies": 0.734375, "eval_rewards/chosen": -0.21155984699726105, "eval_rewards/margins": 1.0991249084472656, "eval_rewards/rejected": -1.3106846809387207, "eval_runtime": 58.614, "eval_samples_per_second": 17.061, "eval_steps_per_second": 0.273, "step": 900 }, { "epoch": 0.47, "learning_rate": 4.686364505641614e-07, "logits/chosen": -3.027421474456787, "logits/rejected": -3.1281371116638184, "logps/chosen": -293.70989990234375, "logps/rejected": -189.66464233398438, "loss": 0.4978, "rewards/accuracies": 0.75, "rewards/chosen": -0.41769805550575256, "rewards/margins": 1.199561357498169, "rewards/rejected": -1.6172593832015991, "step": 910 }, { "epoch": 0.47, "learning_rate": 4.676802447886785e-07, "logits/chosen": -2.8584070205688477, "logits/rejected": -2.882302761077881, "logps/chosen": -256.07684326171875, "logps/rejected": -255.83047485351562, "loss": 0.5412, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.33531659841537476, "rewards/margins": 0.8404865264892578, "rewards/rejected": -1.1758031845092773, "step": 920 }, { "epoch": 0.48, "learning_rate": 4.6672403901319564e-07, "logits/chosen": -2.929386854171753, "logits/rejected": -3.0053086280822754, "logps/chosen": -316.0078125, "logps/rejected": -171.36656188964844, "loss": 0.4762, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.023432254791259766, "rewards/margins": 2.0073459148406982, "rewards/rejected": -1.9839136600494385, "step": 930 }, { "epoch": 0.49, "learning_rate": 4.6576783323771275e-07, "logits/chosen": -2.8826663494110107, "logits/rejected": -2.8366870880126953, "logps/chosen": -243.7962188720703, "logps/rejected": -187.9961700439453, "loss": 0.503, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.2530770003795624, "rewards/margins": 1.1314551830291748, "rewards/rejected": -1.3845322132110596, "step": 940 }, { "epoch": 0.49, "learning_rate": 4.6481162746222987e-07, "logits/chosen": -2.8165650367736816, "logits/rejected": -2.9121110439300537, "logps/chosen": -251.54098510742188, "logps/rejected": -237.3175506591797, "loss": 0.8784, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.2420404851436615, "rewards/margins": 2.1679394245147705, "rewards/rejected": -2.409980058670044, "step": 950 }, { "epoch": 0.5, "learning_rate": 4.63855421686747e-07, "logits/chosen": -2.813908100128174, "logits/rejected": -2.8820648193359375, "logps/chosen": -274.791748046875, "logps/rejected": -240.8386993408203, "loss": 0.5649, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.04944751411676407, "rewards/margins": 1.3660838603973389, "rewards/rejected": -1.4155313968658447, "step": 960 }, { "epoch": 0.5, "learning_rate": 4.628992159112641e-07, "logits/chosen": -2.7781646251678467, "logits/rejected": -2.8930909633636475, "logps/chosen": -328.9050598144531, "logps/rejected": -236.53414916992188, "loss": 0.5675, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.06258317828178406, "rewards/margins": 1.6145604848861694, "rewards/rejected": -1.6771436929702759, "step": 970 }, { "epoch": 0.51, "learning_rate": 4.6194301013578116e-07, "logits/chosen": -2.9083309173583984, "logits/rejected": -2.824375629425049, "logps/chosen": -287.6618347167969, "logps/rejected": -230.19393920898438, "loss": 0.5169, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.4656705856323242, "rewards/margins": 1.6011505126953125, "rewards/rejected": -2.0668210983276367, "step": 980 }, { "epoch": 0.51, "learning_rate": 4.609868043602983e-07, "logits/chosen": -2.937588691711426, "logits/rejected": -2.896270275115967, "logps/chosen": -275.5927734375, "logps/rejected": -288.66680908203125, "loss": 0.4917, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.19148771464824677, "rewards/margins": 1.2772417068481445, "rewards/rejected": -1.4687296152114868, "step": 990 }, { "epoch": 0.52, "learning_rate": 4.600305985848154e-07, "logits/chosen": -2.9878718852996826, "logits/rejected": -3.0833239555358887, "logps/chosen": -213.8026123046875, "logps/rejected": -214.43362426757812, "loss": 0.5205, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.5573463439941406, "rewards/margins": 0.5783860087394714, "rewards/rejected": -1.1357324123382568, "step": 1000 }, { "epoch": 0.52, "eval_logits/chosen": -2.7978734970092773, "eval_logits/rejected": -2.78934907913208, "eval_logps/chosen": -258.027587890625, "eval_logps/rejected": -275.55523681640625, "eval_loss": 0.5262419581413269, "eval_rewards/accuracies": 0.71875, "eval_rewards/chosen": -0.37924808263778687, "eval_rewards/margins": 1.479280710220337, "eval_rewards/rejected": -1.858528733253479, "eval_runtime": 57.7979, "eval_samples_per_second": 17.302, "eval_steps_per_second": 0.277, "step": 1000 }, { "epoch": 0.52, "learning_rate": 4.590743928093325e-07, "logits/chosen": -2.8478896617889404, "logits/rejected": -2.786147117614746, "logps/chosen": -383.54327392578125, "logps/rejected": -270.4455261230469, "loss": 0.5498, "rewards/accuracies": 0.75, "rewards/chosen": -1.0184290409088135, "rewards/margins": 0.9436414837837219, "rewards/rejected": -1.9620707035064697, "step": 1010 }, { "epoch": 0.53, "learning_rate": 4.581181870338497e-07, "logits/chosen": -2.8463029861450195, "logits/rejected": -2.900444746017456, "logps/chosen": -327.9524841308594, "logps/rejected": -274.52862548828125, "loss": 0.5847, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.27039843797683716, "rewards/margins": 1.4945565462112427, "rewards/rejected": -1.7649548053741455, "step": 1020 }, { "epoch": 0.53, "learning_rate": 4.571619812583668e-07, "logits/chosen": -2.793391466140747, "logits/rejected": -2.795802354812622, "logps/chosen": -267.8859558105469, "logps/rejected": -217.9220733642578, "loss": 0.4481, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.3454091250896454, "rewards/margins": 2.0243167877197266, "rewards/rejected": -2.3697259426116943, "step": 1030 }, { "epoch": 0.54, "learning_rate": 4.562057754828839e-07, "logits/chosen": -2.868319034576416, "logits/rejected": -2.905986785888672, "logps/chosen": -264.89349365234375, "logps/rejected": -310.06231689453125, "loss": 0.5553, "rewards/accuracies": 0.75, "rewards/chosen": -0.33068472146987915, "rewards/margins": 2.051600933074951, "rewards/rejected": -2.3822855949401855, "step": 1040 }, { "epoch": 0.54, "learning_rate": 4.55249569707401e-07, "logits/chosen": -2.834726572036743, "logits/rejected": -2.9254660606384277, "logps/chosen": -293.556884765625, "logps/rejected": -234.21005249023438, "loss": 0.5042, "rewards/accuracies": 0.75, "rewards/chosen": -1.1473982334136963, "rewards/margins": 1.1652801036834717, "rewards/rejected": -2.312678337097168, "step": 1050 }, { "epoch": 0.55, "learning_rate": 4.5429336393191814e-07, "logits/chosen": -2.7575011253356934, "logits/rejected": -2.8820366859436035, "logps/chosen": -322.6793518066406, "logps/rejected": -208.82388305664062, "loss": 0.6648, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.6877565383911133, "rewards/margins": 0.889872670173645, "rewards/rejected": -1.5776290893554688, "step": 1060 }, { "epoch": 0.55, "learning_rate": 4.5333715815643525e-07, "logits/chosen": -2.8447697162628174, "logits/rejected": -2.8715322017669678, "logps/chosen": -386.28570556640625, "logps/rejected": -268.4273376464844, "loss": 0.5397, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.26172274351119995, "rewards/margins": 1.4582087993621826, "rewards/rejected": -1.7199318408966064, "step": 1070 }, { "epoch": 0.56, "learning_rate": 4.5238095238095237e-07, "logits/chosen": -2.957181215286255, "logits/rejected": -2.9968810081481934, "logps/chosen": -263.70684814453125, "logps/rejected": -233.9396209716797, "loss": 0.5506, "rewards/accuracies": 0.5, "rewards/chosen": -0.8327061533927917, "rewards/margins": 0.7109770774841309, "rewards/rejected": -1.5436832904815674, "step": 1080 }, { "epoch": 0.56, "learning_rate": 4.514247466054695e-07, "logits/chosen": -2.9863791465759277, "logits/rejected": -2.9521121978759766, "logps/chosen": -245.79244995117188, "logps/rejected": -189.44338989257812, "loss": 0.5105, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.5945242643356323, "rewards/margins": 1.325272560119629, "rewards/rejected": -1.9197969436645508, "step": 1090 }, { "epoch": 0.57, "learning_rate": 4.504685408299866e-07, "logits/chosen": -2.892086982727051, "logits/rejected": -2.942537784576416, "logps/chosen": -336.52685546875, "logps/rejected": -304.50567626953125, "loss": 0.5094, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.14167115092277527, "rewards/margins": 0.7381815314292908, "rewards/rejected": -0.8798527717590332, "step": 1100 }, { "epoch": 0.57, "eval_logits/chosen": -2.753582715988159, "eval_logits/rejected": -2.7452518939971924, "eval_logps/chosen": -260.51361083984375, "eval_logps/rejected": -276.33770751953125, "eval_loss": 0.5432895421981812, "eval_rewards/accuracies": 0.796875, "eval_rewards/chosen": -0.6278508901596069, "eval_rewards/margins": 1.3089274168014526, "eval_rewards/rejected": -1.93677818775177, "eval_runtime": 53.4701, "eval_samples_per_second": 18.702, "eval_steps_per_second": 0.299, "step": 1100 }, { "epoch": 0.57, "learning_rate": 4.495123350545037e-07, "logits/chosen": -2.9081952571868896, "logits/rejected": -2.9619812965393066, "logps/chosen": -278.28076171875, "logps/rejected": -240.11181640625, "loss": 0.5065, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.323709636926651, "rewards/margins": 0.9985870122909546, "rewards/rejected": -1.3222965002059937, "step": 1110 }, { "epoch": 0.58, "learning_rate": 4.4855612927902083e-07, "logits/chosen": -2.8727335929870605, "logits/rejected": -2.8023390769958496, "logps/chosen": -325.6292419433594, "logps/rejected": -239.16049194335938, "loss": 0.7095, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.41461220383644104, "rewards/margins": 1.4964089393615723, "rewards/rejected": -1.9110209941864014, "step": 1120 }, { "epoch": 0.58, "learning_rate": 4.4759992350353795e-07, "logits/chosen": -2.8231780529022217, "logits/rejected": -2.908735513687134, "logps/chosen": -304.6755676269531, "logps/rejected": -277.4778747558594, "loss": 0.4456, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.8432878255844116, "rewards/margins": 1.6819578409194946, "rewards/rejected": -2.5252456665039062, "step": 1130 }, { "epoch": 0.59, "learning_rate": 4.46643717728055e-07, "logits/chosen": -2.7151193618774414, "logits/rejected": -2.8067574501037598, "logps/chosen": -280.8679504394531, "logps/rejected": -273.8851318359375, "loss": 0.5359, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.0236613750457764, "rewards/margins": 1.5441632270812988, "rewards/rejected": -2.567824125289917, "step": 1140 }, { "epoch": 0.59, "learning_rate": 4.4568751195257213e-07, "logits/chosen": -2.776689052581787, "logits/rejected": -2.8435564041137695, "logps/chosen": -241.6661376953125, "logps/rejected": -214.4073944091797, "loss": 0.5624, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.6465439796447754, "rewards/margins": 1.3678399324417114, "rewards/rejected": -2.0143837928771973, "step": 1150 }, { "epoch": 0.6, "learning_rate": 4.447313061770893e-07, "logits/chosen": -2.7138195037841797, "logits/rejected": -2.585179328918457, "logps/chosen": -335.81146240234375, "logps/rejected": -350.88385009765625, "loss": 0.54, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.47935476899147034, "rewards/margins": 2.091104030609131, "rewards/rejected": -2.5704588890075684, "step": 1160 }, { "epoch": 0.6, "learning_rate": 4.437751004016064e-07, "logits/chosen": -2.958820104598999, "logits/rejected": -2.9421515464782715, "logps/chosen": -305.28839111328125, "logps/rejected": -262.6142883300781, "loss": 0.5226, "rewards/accuracies": 0.75, "rewards/chosen": -0.1983194649219513, "rewards/margins": 1.0410559177398682, "rewards/rejected": -1.239375352859497, "step": 1170 }, { "epoch": 0.61, "learning_rate": 4.4281889462612353e-07, "logits/chosen": -2.9157230854034424, "logits/rejected": -2.9529147148132324, "logps/chosen": -262.28411865234375, "logps/rejected": -205.23324584960938, "loss": 0.4879, "rewards/accuracies": 0.75, "rewards/chosen": -0.35369348526000977, "rewards/margins": 1.1171058416366577, "rewards/rejected": -1.4707993268966675, "step": 1180 }, { "epoch": 0.61, "learning_rate": 4.4186268885064064e-07, "logits/chosen": -3.012878894805908, "logits/rejected": -2.9825873374938965, "logps/chosen": -287.4079895019531, "logps/rejected": -257.97772216796875, "loss": 0.5481, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.8515245318412781, "rewards/margins": 0.8873499035835266, "rewards/rejected": -1.7388744354248047, "step": 1190 }, { "epoch": 0.62, "learning_rate": 4.4090648307515776e-07, "logits/chosen": -2.9880738258361816, "logits/rejected": -2.9730162620544434, "logps/chosen": -224.86508178710938, "logps/rejected": -165.42733764648438, "loss": 0.5837, "rewards/accuracies": 0.75, "rewards/chosen": -0.528744101524353, "rewards/margins": 0.9531749486923218, "rewards/rejected": -1.4819190502166748, "step": 1200 }, { "epoch": 0.62, "eval_logits/chosen": -2.775576591491699, "eval_logits/rejected": -2.7643439769744873, "eval_logps/chosen": -258.015380859375, "eval_logps/rejected": -276.55419921875, "eval_loss": 0.5348690152168274, "eval_rewards/accuracies": 0.765625, "eval_rewards/chosen": -0.37802520394325256, "eval_rewards/margins": 1.5804035663604736, "eval_rewards/rejected": -1.9584287405014038, "eval_runtime": 62.7794, "eval_samples_per_second": 15.929, "eval_steps_per_second": 0.255, "step": 1200 }, { "epoch": 0.62, "learning_rate": 4.399502772996749e-07, "logits/chosen": -3.065775156021118, "logits/rejected": -2.972374677658081, "logps/chosen": -329.2245178222656, "logps/rejected": -304.1506042480469, "loss": 0.4827, "rewards/accuracies": 0.75, "rewards/chosen": -0.5635257363319397, "rewards/margins": 1.3332871198654175, "rewards/rejected": -1.8968127965927124, "step": 1210 }, { "epoch": 0.63, "learning_rate": 4.38994071524192e-07, "logits/chosen": -3.030421733856201, "logits/rejected": -3.05527925491333, "logps/chosen": -258.029541015625, "logps/rejected": -280.21673583984375, "loss": 0.5735, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.8231660723686218, "rewards/margins": 1.493502140045166, "rewards/rejected": -2.3166680335998535, "step": 1220 }, { "epoch": 0.64, "learning_rate": 4.380378657487091e-07, "logits/chosen": -3.074777364730835, "logits/rejected": -3.0200257301330566, "logps/chosen": -330.8500061035156, "logps/rejected": -371.5386047363281, "loss": 0.5982, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.35177409648895264, "rewards/margins": 1.7098945379257202, "rewards/rejected": -2.061668634414673, "step": 1230 }, { "epoch": 0.64, "learning_rate": 4.370816599732262e-07, "logits/chosen": -2.999420166015625, "logits/rejected": -2.9334309101104736, "logps/chosen": -296.1866760253906, "logps/rejected": -236.72341918945312, "loss": 0.5337, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.419023871421814, "rewards/margins": 1.311092734336853, "rewards/rejected": -2.730116367340088, "step": 1240 }, { "epoch": 0.65, "learning_rate": 4.3612545419774334e-07, "logits/chosen": -2.883204460144043, "logits/rejected": -2.8073534965515137, "logps/chosen": -257.63519287109375, "logps/rejected": -301.64227294921875, "loss": 0.5928, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.9009000658988953, "rewards/margins": 1.269942045211792, "rewards/rejected": -2.170842409133911, "step": 1250 }, { "epoch": 0.65, "learning_rate": 4.3516924842226045e-07, "logits/chosen": -2.920656681060791, "logits/rejected": -2.91890025138855, "logps/chosen": -322.2228698730469, "logps/rejected": -296.6404113769531, "loss": 0.629, "rewards/accuracies": 0.75, "rewards/chosen": -0.9084477424621582, "rewards/margins": 1.9311565160751343, "rewards/rejected": -2.839603900909424, "step": 1260 }, { "epoch": 0.66, "learning_rate": 4.3421304264677757e-07, "logits/chosen": -2.7664384841918945, "logits/rejected": -2.7587597370147705, "logps/chosen": -214.4353485107422, "logps/rejected": -212.84805297851562, "loss": 0.476, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.016667127609253, "rewards/margins": 1.7192564010620117, "rewards/rejected": -2.7359237670898438, "step": 1270 }, { "epoch": 0.66, "learning_rate": 4.332568368712947e-07, "logits/chosen": -2.933134078979492, "logits/rejected": -2.877431869506836, "logps/chosen": -361.19573974609375, "logps/rejected": -279.9434814453125, "loss": 0.5784, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.5590813159942627, "rewards/margins": 0.4831056594848633, "rewards/rejected": -2.042186975479126, "step": 1280 }, { "epoch": 0.67, "learning_rate": 4.323006310958118e-07, "logits/chosen": -2.904773235321045, "logits/rejected": -2.985483169555664, "logps/chosen": -411.599853515625, "logps/rejected": -315.9092712402344, "loss": 0.4975, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.908369243144989, "rewards/margins": 1.5060293674468994, "rewards/rejected": -2.414398670196533, "step": 1290 }, { "epoch": 0.67, "learning_rate": 4.313444253203289e-07, "logits/chosen": -2.9397823810577393, "logits/rejected": -2.9090006351470947, "logps/chosen": -281.9468078613281, "logps/rejected": -261.4234619140625, "loss": 0.5214, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.9744187593460083, "rewards/margins": 1.7388330698013306, "rewards/rejected": -2.7132515907287598, "step": 1300 }, { "epoch": 0.67, "eval_logits/chosen": -2.711259603500366, "eval_logits/rejected": -2.698620080947876, "eval_logps/chosen": -264.2903137207031, "eval_logps/rejected": -279.27606201171875, "eval_loss": 0.5732331871986389, "eval_rewards/accuracies": 0.765625, "eval_rewards/chosen": -1.0055204629898071, "eval_rewards/margins": 1.2250933647155762, "eval_rewards/rejected": -2.2306137084960938, "eval_runtime": 57.0185, "eval_samples_per_second": 17.538, "eval_steps_per_second": 0.281, "step": 1300 }, { "epoch": 0.68, "learning_rate": 4.3038821954484603e-07, "logits/chosen": -2.8764219284057617, "logits/rejected": -2.7695984840393066, "logps/chosen": -238.80453491210938, "logps/rejected": -252.7805633544922, "loss": 0.4686, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -1.3183587789535522, "rewards/margins": 0.5890123248100281, "rewards/rejected": -1.907371163368225, "step": 1310 }, { "epoch": 0.68, "learning_rate": 4.2943201376936315e-07, "logits/chosen": -2.8463966846466064, "logits/rejected": -2.850677967071533, "logps/chosen": -291.9525146484375, "logps/rejected": -315.1170349121094, "loss": 0.5393, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.8451521992683411, "rewards/margins": 1.4508628845214844, "rewards/rejected": -2.2960150241851807, "step": 1320 }, { "epoch": 0.69, "learning_rate": 4.2847580799388026e-07, "logits/chosen": -2.8076231479644775, "logits/rejected": -2.7472128868103027, "logps/chosen": -258.0926513671875, "logps/rejected": -188.78359985351562, "loss": 0.5812, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.4854873418807983, "rewards/margins": 0.9221324920654297, "rewards/rejected": -2.4076199531555176, "step": 1330 }, { "epoch": 0.69, "learning_rate": 4.275196022183974e-07, "logits/chosen": -2.7642879486083984, "logits/rejected": -2.812042474746704, "logps/chosen": -319.1858825683594, "logps/rejected": -246.2572784423828, "loss": 0.553, "rewards/accuracies": 0.75, "rewards/chosen": -1.092181921005249, "rewards/margins": 0.8022899627685547, "rewards/rejected": -1.8944717645645142, "step": 1340 }, { "epoch": 0.7, "learning_rate": 4.265633964429145e-07, "logits/chosen": -2.812278985977173, "logits/rejected": -2.761359691619873, "logps/chosen": -342.2608337402344, "logps/rejected": -224.8918914794922, "loss": 0.5775, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.914333701133728, "rewards/margins": 1.487335205078125, "rewards/rejected": -2.4016687870025635, "step": 1350 }, { "epoch": 0.7, "learning_rate": 4.256071906674316e-07, "logits/chosen": -2.9929111003875732, "logits/rejected": -2.94170880317688, "logps/chosen": -379.921875, "logps/rejected": -368.10357666015625, "loss": 0.652, "rewards/accuracies": 0.75, "rewards/chosen": -0.6550450325012207, "rewards/margins": 0.9407709240913391, "rewards/rejected": -1.595815896987915, "step": 1360 }, { "epoch": 0.71, "learning_rate": 4.246509848919487e-07, "logits/chosen": -2.912461757659912, "logits/rejected": -2.9404354095458984, "logps/chosen": -348.6622009277344, "logps/rejected": -248.0426788330078, "loss": 0.5636, "rewards/accuracies": 0.75, "rewards/chosen": -0.7293527722358704, "rewards/margins": 1.5709936618804932, "rewards/rejected": -2.3003463745117188, "step": 1370 }, { "epoch": 0.71, "learning_rate": 4.2369477911646584e-07, "logits/chosen": -2.829761505126953, "logits/rejected": -2.7305688858032227, "logps/chosen": -295.06781005859375, "logps/rejected": -240.6433563232422, "loss": 0.5567, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.69425368309021, "rewards/margins": 1.3369704484939575, "rewards/rejected": -2.031224012374878, "step": 1380 }, { "epoch": 0.72, "learning_rate": 4.2273857334098296e-07, "logits/chosen": -2.7433788776397705, "logits/rejected": -2.67673921585083, "logps/chosen": -289.397216796875, "logps/rejected": -243.04833984375, "loss": 0.6061, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.7310119867324829, "rewards/margins": 0.585370659828186, "rewards/rejected": -1.316382646560669, "step": 1390 }, { "epoch": 0.72, "learning_rate": 4.2178236756550007e-07, "logits/chosen": -2.923424243927002, "logits/rejected": -2.912429094314575, "logps/chosen": -198.62017822265625, "logps/rejected": -277.8341979980469, "loss": 0.6914, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.4593328535556793, "rewards/margins": 0.9467372894287109, "rewards/rejected": -1.4060701131820679, "step": 1400 }, { "epoch": 0.72, "eval_logits/chosen": -2.727492094039917, "eval_logits/rejected": -2.7166121006011963, "eval_logps/chosen": -261.146728515625, "eval_logps/rejected": -278.7448425292969, "eval_loss": 0.5136687159538269, "eval_rewards/accuracies": 0.796875, "eval_rewards/chosen": -0.6911616921424866, "eval_rewards/margins": 1.4863313436508179, "eval_rewards/rejected": -2.177493095397949, "eval_runtime": 58.5256, "eval_samples_per_second": 17.087, "eval_steps_per_second": 0.273, "step": 1400 }, { "epoch": 0.73, "learning_rate": 4.208261617900172e-07, "logits/chosen": -2.9004664421081543, "logits/rejected": -2.991079807281494, "logps/chosen": -200.07357788085938, "logps/rejected": -212.9990234375, "loss": 0.4996, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.8260477185249329, "rewards/margins": 1.3000409603118896, "rewards/rejected": -2.1260886192321777, "step": 1410 }, { "epoch": 0.73, "learning_rate": 4.198699560145343e-07, "logits/chosen": -2.750919818878174, "logits/rejected": -2.563699245452881, "logps/chosen": -229.60348510742188, "logps/rejected": -250.4010467529297, "loss": 0.6298, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.8068568110466003, "rewards/margins": 0.5274587869644165, "rewards/rejected": -1.334315538406372, "step": 1420 }, { "epoch": 0.74, "learning_rate": 4.189137502390514e-07, "logits/chosen": -2.871040105819702, "logits/rejected": -2.859773635864258, "logps/chosen": -255.4622344970703, "logps/rejected": -367.54644775390625, "loss": 0.6702, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.7843345999717712, "rewards/margins": 1.0513083934783936, "rewards/rejected": -1.8356430530548096, "step": 1430 }, { "epoch": 0.74, "learning_rate": 4.179575444635686e-07, "logits/chosen": -2.797947406768799, "logits/rejected": -2.768245220184326, "logps/chosen": -297.70465087890625, "logps/rejected": -303.483154296875, "loss": 0.5911, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.02579927444458, "rewards/margins": 1.1320759057998657, "rewards/rejected": -2.1578750610351562, "step": 1440 }, { "epoch": 0.75, "learning_rate": 4.170013386880857e-07, "logits/chosen": -2.842421531677246, "logits/rejected": -2.816070318222046, "logps/chosen": -286.8984069824219, "logps/rejected": -256.8731689453125, "loss": 0.4926, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.8610552549362183, "rewards/margins": 0.9260069727897644, "rewards/rejected": -1.7870622873306274, "step": 1450 }, { "epoch": 0.75, "learning_rate": 4.1604513291260277e-07, "logits/chosen": -2.814866542816162, "logits/rejected": -2.7706210613250732, "logps/chosen": -273.64111328125, "logps/rejected": -259.51885986328125, "loss": 0.5227, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.5814541578292847, "rewards/margins": 2.4891767501831055, "rewards/rejected": -3.0706310272216797, "step": 1460 }, { "epoch": 0.76, "learning_rate": 4.150889271371199e-07, "logits/chosen": -2.7491848468780518, "logits/rejected": -2.800107002258301, "logps/chosen": -268.9418029785156, "logps/rejected": -296.1412658691406, "loss": 0.59, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.48690223693847656, "rewards/margins": 1.39999520778656, "rewards/rejected": -1.886897325515747, "step": 1470 }, { "epoch": 0.76, "learning_rate": 4.14132721361637e-07, "logits/chosen": -2.7790863513946533, "logits/rejected": -2.756493330001831, "logps/chosen": -376.4327392578125, "logps/rejected": -220.66128540039062, "loss": 0.4328, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.6699414849281311, "rewards/margins": 2.2528209686279297, "rewards/rejected": -2.922762632369995, "step": 1480 }, { "epoch": 0.77, "learning_rate": 4.131765155861541e-07, "logits/chosen": -2.732978343963623, "logits/rejected": -2.712939739227295, "logps/chosen": -216.78231811523438, "logps/rejected": -187.97975158691406, "loss": 0.5317, "rewards/accuracies": 0.75, "rewards/chosen": -0.3067319989204407, "rewards/margins": 1.883384346961975, "rewards/rejected": -2.1901164054870605, "step": 1490 }, { "epoch": 0.77, "learning_rate": 4.1222030981067123e-07, "logits/chosen": -2.7365033626556396, "logits/rejected": -2.709888458251953, "logps/chosen": -284.1839294433594, "logps/rejected": -285.10064697265625, "loss": 0.4655, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.2894710302352905, "rewards/margins": 0.8915459513664246, "rewards/rejected": -2.1810169219970703, "step": 1500 }, { "epoch": 0.77, "eval_logits/chosen": -2.6837804317474365, "eval_logits/rejected": -2.6651253700256348, "eval_logps/chosen": -262.2220153808594, "eval_logps/rejected": -279.8998718261719, "eval_loss": 0.5090023875236511, "eval_rewards/accuracies": 0.703125, "eval_rewards/chosen": -0.7986923456192017, "eval_rewards/margins": 1.494301438331604, "eval_rewards/rejected": -2.2929937839508057, "eval_runtime": 59.1398, "eval_samples_per_second": 16.909, "eval_steps_per_second": 0.271, "step": 1500 }, { "epoch": 0.78, "learning_rate": 4.1126410403518835e-07, "logits/chosen": -2.774035930633545, "logits/rejected": -2.5919785499572754, "logps/chosen": -263.84185791015625, "logps/rejected": -284.3238220214844, "loss": 0.506, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.6006399393081665, "rewards/margins": 2.4321160316467285, "rewards/rejected": -3.0327563285827637, "step": 1510 }, { "epoch": 0.78, "learning_rate": 4.1030789825970546e-07, "logits/chosen": -2.834711790084839, "logits/rejected": -2.8974971771240234, "logps/chosen": -314.38604736328125, "logps/rejected": -352.1858825683594, "loss": 0.5863, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.9283245205879211, "rewards/margins": 0.4295298457145691, "rewards/rejected": -1.3578544855117798, "step": 1520 }, { "epoch": 0.79, "learning_rate": 4.093516924842226e-07, "logits/chosen": -2.6712985038757324, "logits/rejected": -2.6710007190704346, "logps/chosen": -241.9701690673828, "logps/rejected": -220.11502075195312, "loss": 0.4433, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.21889445185661316, "rewards/margins": 1.4537973403930664, "rewards/rejected": -1.6726917028427124, "step": 1530 }, { "epoch": 0.8, "learning_rate": 4.083954867087397e-07, "logits/chosen": -2.9339497089385986, "logits/rejected": -2.9125781059265137, "logps/chosen": -197.97679138183594, "logps/rejected": -202.1653289794922, "loss": 0.5284, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -1.0227124691009521, "rewards/margins": 0.7052000761032104, "rewards/rejected": -1.7279125452041626, "step": 1540 }, { "epoch": 0.8, "learning_rate": 4.074392809332568e-07, "logits/chosen": -2.841710329055786, "logits/rejected": -2.8297677040100098, "logps/chosen": -316.0207824707031, "logps/rejected": -229.83837890625, "loss": 0.513, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.8295713663101196, "rewards/margins": 1.6421802043914795, "rewards/rejected": -2.4717514514923096, "step": 1550 }, { "epoch": 0.81, "learning_rate": 4.064830751577739e-07, "logits/chosen": -2.889648914337158, "logits/rejected": -2.767516613006592, "logps/chosen": -185.7052001953125, "logps/rejected": -177.06546020507812, "loss": 0.4782, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.2799871563911438, "rewards/margins": 1.6918373107910156, "rewards/rejected": -1.971824288368225, "step": 1560 }, { "epoch": 0.81, "learning_rate": 4.0552686938229104e-07, "logits/chosen": -2.913878917694092, "logits/rejected": -2.7892653942108154, "logps/chosen": -242.6092071533203, "logps/rejected": -218.4198760986328, "loss": 0.4485, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.5770751237869263, "rewards/margins": 1.0650291442871094, "rewards/rejected": -1.6421045064926147, "step": 1570 }, { "epoch": 0.82, "learning_rate": 4.045706636068082e-07, "logits/chosen": -2.769942045211792, "logits/rejected": -2.8180408477783203, "logps/chosen": -178.39805603027344, "logps/rejected": -233.78713989257812, "loss": 0.4683, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.6862732768058777, "rewards/margins": 1.50155770778656, "rewards/rejected": -2.187831163406372, "step": 1580 }, { "epoch": 0.82, "learning_rate": 4.036144578313253e-07, "logits/chosen": -2.832733392715454, "logits/rejected": -2.9456982612609863, "logps/chosen": -258.8636779785156, "logps/rejected": -223.64968872070312, "loss": 0.5346, "rewards/accuracies": 0.75, "rewards/chosen": -0.5500979423522949, "rewards/margins": 1.3681669235229492, "rewards/rejected": -1.9182647466659546, "step": 1590 }, { "epoch": 0.83, "learning_rate": 4.0265825205584244e-07, "logits/chosen": -2.820535182952881, "logits/rejected": -2.8729701042175293, "logps/chosen": -261.64056396484375, "logps/rejected": -265.8938293457031, "loss": 0.5731, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.2627449929714203, "rewards/margins": 1.6428673267364502, "rewards/rejected": -1.9056123495101929, "step": 1600 }, { "epoch": 0.83, "eval_logits/chosen": -2.6727685928344727, "eval_logits/rejected": -2.654268264770508, "eval_logps/chosen": -262.48760986328125, "eval_logps/rejected": -280.4902038574219, "eval_loss": 0.5312163829803467, "eval_rewards/accuracies": 0.78125, "eval_rewards/chosen": -0.8252508044242859, "eval_rewards/margins": 1.5267785787582397, "eval_rewards/rejected": -2.352029323577881, "eval_runtime": 58.1435, "eval_samples_per_second": 17.199, "eval_steps_per_second": 0.275, "step": 1600 }, { "epoch": 0.83, "learning_rate": 4.0170204628035956e-07, "logits/chosen": -2.7514543533325195, "logits/rejected": -2.8077378273010254, "logps/chosen": -211.7623748779297, "logps/rejected": -252.843994140625, "loss": 0.494, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.7454391121864319, "rewards/margins": 1.7308275699615479, "rewards/rejected": -2.476266384124756, "step": 1610 }, { "epoch": 0.84, "learning_rate": 4.007458405048766e-07, "logits/chosen": -2.857224225997925, "logits/rejected": -2.839128017425537, "logps/chosen": -282.80975341796875, "logps/rejected": -277.6203308105469, "loss": 0.4801, "rewards/accuracies": 0.75, "rewards/chosen": -0.9825541377067566, "rewards/margins": 1.5490392446517944, "rewards/rejected": -2.5315933227539062, "step": 1620 }, { "epoch": 0.84, "learning_rate": 3.9978963472939373e-07, "logits/chosen": -2.838963031768799, "logits/rejected": -2.808168411254883, "logps/chosen": -291.43280029296875, "logps/rejected": -260.65203857421875, "loss": 0.48, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.7560056447982788, "rewards/margins": 1.1872189044952393, "rewards/rejected": -1.9432246685028076, "step": 1630 }, { "epoch": 0.85, "learning_rate": 3.9883342895391085e-07, "logits/chosen": -2.815406084060669, "logits/rejected": -2.7723686695098877, "logps/chosen": -270.00689697265625, "logps/rejected": -229.3192901611328, "loss": 0.546, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.42303770780563354, "rewards/margins": 2.0215001106262207, "rewards/rejected": -2.4445383548736572, "step": 1640 }, { "epoch": 0.85, "learning_rate": 3.9787722317842796e-07, "logits/chosen": -2.89304256439209, "logits/rejected": -2.849522113800049, "logps/chosen": -351.61968994140625, "logps/rejected": -226.35800170898438, "loss": 0.5355, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.039799273014068604, "rewards/margins": 1.9888496398925781, "rewards/rejected": -1.9490505456924438, "step": 1650 }, { "epoch": 0.86, "learning_rate": 3.969210174029451e-07, "logits/chosen": -2.7271950244903564, "logits/rejected": -2.7782604694366455, "logps/chosen": -182.22679138183594, "logps/rejected": -258.85784912109375, "loss": 0.5878, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.30569443106651306, "rewards/margins": 2.009462833404541, "rewards/rejected": -2.315157175064087, "step": 1660 }, { "epoch": 0.86, "learning_rate": 3.959648116274622e-07, "logits/chosen": -2.7675626277923584, "logits/rejected": -2.7918038368225098, "logps/chosen": -258.5990295410156, "logps/rejected": -230.567138671875, "loss": 0.6044, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.6162198781967163, "rewards/margins": 1.2834047079086304, "rewards/rejected": -1.8996245861053467, "step": 1670 }, { "epoch": 0.87, "learning_rate": 3.950086058519793e-07, "logits/chosen": -2.922581195831299, "logits/rejected": -2.8593482971191406, "logps/chosen": -224.3607940673828, "logps/rejected": -256.91510009765625, "loss": 0.5717, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.4705128073692322, "rewards/margins": 1.2131963968276978, "rewards/rejected": -1.6837093830108643, "step": 1680 }, { "epoch": 0.87, "learning_rate": 3.9405240007649643e-07, "logits/chosen": -2.9224143028259277, "logits/rejected": -2.942783832550049, "logps/chosen": -216.39816284179688, "logps/rejected": -272.4553527832031, "loss": 0.5066, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.5551620125770569, "rewards/margins": 1.4193694591522217, "rewards/rejected": -1.9745315313339233, "step": 1690 }, { "epoch": 0.88, "learning_rate": 3.9309619430101354e-07, "logits/chosen": -2.8787567615509033, "logits/rejected": -2.8499011993408203, "logps/chosen": -241.4861297607422, "logps/rejected": -210.9620361328125, "loss": 0.5233, "rewards/accuracies": 0.75, "rewards/chosen": -0.7351746559143066, "rewards/margins": 1.017896294593811, "rewards/rejected": -1.7530708312988281, "step": 1700 }, { "epoch": 0.88, "eval_logits/chosen": -2.709693431854248, "eval_logits/rejected": -2.686978340148926, "eval_logps/chosen": -258.8084411621094, "eval_logps/rejected": -277.92047119140625, "eval_loss": 0.5205972790718079, "eval_rewards/accuracies": 0.78125, "eval_rewards/chosen": -0.4573337435722351, "eval_rewards/margins": 1.637721300125122, "eval_rewards/rejected": -2.095055103302002, "eval_runtime": 55.0835, "eval_samples_per_second": 18.154, "eval_steps_per_second": 0.29, "step": 1700 }, { "epoch": 0.88, "learning_rate": 3.9213998852553066e-07, "logits/chosen": -2.900834798812866, "logits/rejected": -2.703029155731201, "logps/chosen": -338.3115539550781, "logps/rejected": -326.6482238769531, "loss": 0.6064, "rewards/accuracies": 0.75, "rewards/chosen": -1.00930917263031, "rewards/margins": 1.1334011554718018, "rewards/rejected": -2.1427102088928223, "step": 1710 }, { "epoch": 0.89, "learning_rate": 3.9118378275004783e-07, "logits/chosen": -2.888641119003296, "logits/rejected": -2.817422389984131, "logps/chosen": -339.2339172363281, "logps/rejected": -273.8936462402344, "loss": 0.5537, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.8813110589981079, "rewards/margins": 1.3287475109100342, "rewards/rejected": -2.2100586891174316, "step": 1720 }, { "epoch": 0.89, "learning_rate": 3.9022757697456494e-07, "logits/chosen": -2.739957094192505, "logits/rejected": -2.78080677986145, "logps/chosen": -356.21844482421875, "logps/rejected": -331.55096435546875, "loss": 0.5099, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.5665701627731323, "rewards/margins": 1.0891746282577515, "rewards/rejected": -1.6557449102401733, "step": 1730 }, { "epoch": 0.9, "learning_rate": 3.8927137119908206e-07, "logits/chosen": -2.9097769260406494, "logits/rejected": -2.848907947540283, "logps/chosen": -305.66607666015625, "logps/rejected": -211.9135284423828, "loss": 0.487, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.30798807740211487, "rewards/margins": 2.1856961250305176, "rewards/rejected": -2.4936842918395996, "step": 1740 }, { "epoch": 0.9, "learning_rate": 3.883151654235992e-07, "logits/chosen": -2.831984281539917, "logits/rejected": -2.8497231006622314, "logps/chosen": -256.02667236328125, "logps/rejected": -253.0034942626953, "loss": 0.5203, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.10425261408090591, "rewards/margins": 1.9909296035766602, "rewards/rejected": -2.095182180404663, "step": 1750 }, { "epoch": 0.91, "learning_rate": 3.873589596481163e-07, "logits/chosen": -2.765052080154419, "logits/rejected": -2.8637542724609375, "logps/chosen": -267.7926025390625, "logps/rejected": -239.08718872070312, "loss": 0.5506, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.7126263976097107, "rewards/margins": 0.9376744031906128, "rewards/rejected": -1.6503007411956787, "step": 1760 }, { "epoch": 0.91, "learning_rate": 3.864027538726334e-07, "logits/chosen": -2.8235788345336914, "logits/rejected": -2.7703769207000732, "logps/chosen": -227.66281127929688, "logps/rejected": -227.4187469482422, "loss": 0.5707, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.47516727447509766, "rewards/margins": 1.8380186557769775, "rewards/rejected": -2.313185691833496, "step": 1770 }, { "epoch": 0.92, "learning_rate": 3.8544654809715047e-07, "logits/chosen": -2.840291976928711, "logits/rejected": -2.889273166656494, "logps/chosen": -289.7173156738281, "logps/rejected": -289.07391357421875, "loss": 0.5463, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.0754880905151367, "rewards/margins": 1.31435227394104, "rewards/rejected": -2.3898403644561768, "step": 1780 }, { "epoch": 0.92, "learning_rate": 3.844903423216676e-07, "logits/chosen": -2.862814426422119, "logits/rejected": -2.8419394493103027, "logps/chosen": -256.5997619628906, "logps/rejected": -221.2139434814453, "loss": 0.5288, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.6081517338752747, "rewards/margins": 0.817557156085968, "rewards/rejected": -1.4257088899612427, "step": 1790 }, { "epoch": 0.93, "learning_rate": 3.835341365461847e-07, "logits/chosen": -2.836991786956787, "logits/rejected": -2.78855562210083, "logps/chosen": -302.5685119628906, "logps/rejected": -237.11209106445312, "loss": 0.5593, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.189387708902359, "rewards/margins": 1.4565317630767822, "rewards/rejected": -1.6459195613861084, "step": 1800 }, { "epoch": 0.93, "eval_logits/chosen": -2.651865005493164, "eval_logits/rejected": -2.6221344470977783, "eval_logps/chosen": -259.7433166503906, "eval_logps/rejected": -278.97027587890625, "eval_loss": 0.523062527179718, "eval_rewards/accuracies": 0.796875, "eval_rewards/chosen": -0.5508205890655518, "eval_rewards/margins": 1.649214267730713, "eval_rewards/rejected": -2.2000348567962646, "eval_runtime": 52.7667, "eval_samples_per_second": 18.951, "eval_steps_per_second": 0.303, "step": 1800 }, { "epoch": 0.93, "learning_rate": 3.825779307707018e-07, "logits/chosen": -2.805868625640869, "logits/rejected": -2.8683719635009766, "logps/chosen": -165.62680053710938, "logps/rejected": -188.5421600341797, "loss": 0.5336, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.4086759090423584, "rewards/margins": 1.5789250135421753, "rewards/rejected": -1.9876010417938232, "step": 1810 }, { "epoch": 0.94, "learning_rate": 3.8162172499521893e-07, "logits/chosen": -2.823812961578369, "logits/rejected": -2.8563215732574463, "logps/chosen": -220.8478546142578, "logps/rejected": -212.088623046875, "loss": 0.5532, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.7714720368385315, "rewards/margins": 1.1308372020721436, "rewards/rejected": -1.9023091793060303, "step": 1820 }, { "epoch": 0.94, "learning_rate": 3.8066551921973605e-07, "logits/chosen": -2.772951126098633, "logits/rejected": -2.741703510284424, "logps/chosen": -282.5356750488281, "logps/rejected": -204.34359741210938, "loss": 0.4889, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.4399307668209076, "rewards/margins": 1.7154382467269897, "rewards/rejected": -2.1553690433502197, "step": 1830 }, { "epoch": 0.95, "learning_rate": 3.7970931344425316e-07, "logits/chosen": -2.822625160217285, "logits/rejected": -2.813814640045166, "logps/chosen": -298.0512390136719, "logps/rejected": -247.30172729492188, "loss": 0.4864, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.5513595938682556, "rewards/margins": 0.6814876198768616, "rewards/rejected": -1.2328474521636963, "step": 1840 }, { "epoch": 0.96, "learning_rate": 3.787531076687703e-07, "logits/chosen": -2.7317397594451904, "logits/rejected": -2.7448277473449707, "logps/chosen": -269.85760498046875, "logps/rejected": -223.9963836669922, "loss": 0.5273, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.41241344809532166, "rewards/margins": 2.08837890625, "rewards/rejected": -2.5007922649383545, "step": 1850 }, { "epoch": 0.96, "learning_rate": 3.7779690189328745e-07, "logits/chosen": -2.812678813934326, "logits/rejected": -2.8333404064178467, "logps/chosen": -280.35247802734375, "logps/rejected": -255.755126953125, "loss": 0.4633, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.533105194568634, "rewards/margins": 1.1613143682479858, "rewards/rejected": -1.694419503211975, "step": 1860 }, { "epoch": 0.97, "learning_rate": 3.7684069611780456e-07, "logits/chosen": -2.824018716812134, "logits/rejected": -2.789066791534424, "logps/chosen": -230.8186798095703, "logps/rejected": -197.3396453857422, "loss": 0.565, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.8623602986335754, "rewards/margins": 0.8971187472343445, "rewards/rejected": -1.7594791650772095, "step": 1870 }, { "epoch": 0.97, "learning_rate": 3.758844903423217e-07, "logits/chosen": -2.731091260910034, "logits/rejected": -2.8013782501220703, "logps/chosen": -261.1441650390625, "logps/rejected": -285.99298095703125, "loss": 0.5784, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.0055458545684814, "rewards/margins": 1.3556736707687378, "rewards/rejected": -2.3612194061279297, "step": 1880 }, { "epoch": 0.98, "learning_rate": 3.749282845668388e-07, "logits/chosen": -2.979485034942627, "logits/rejected": -2.8463826179504395, "logps/chosen": -279.7747497558594, "logps/rejected": -281.71881103515625, "loss": 0.5474, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.5681439638137817, "rewards/margins": 1.333145022392273, "rewards/rejected": -1.9012889862060547, "step": 1890 }, { "epoch": 0.98, "learning_rate": 3.739720787913559e-07, "logits/chosen": -2.8072428703308105, "logits/rejected": -2.8444907665252686, "logps/chosen": -318.6991882324219, "logps/rejected": -286.9081726074219, "loss": 0.4967, "rewards/accuracies": 0.75, "rewards/chosen": -0.501719057559967, "rewards/margins": 1.7894643545150757, "rewards/rejected": -2.2911829948425293, "step": 1900 }, { "epoch": 0.98, "eval_logits/chosen": -2.6878409385681152, "eval_logits/rejected": -2.65635347366333, "eval_logps/chosen": -259.57489013671875, "eval_logps/rejected": -276.53948974609375, "eval_loss": 0.528998613357544, "eval_rewards/accuracies": 0.828125, "eval_rewards/chosen": -0.5339791178703308, "eval_rewards/margins": 1.4229780435562134, "eval_rewards/rejected": -1.9569573402404785, "eval_runtime": 58.1447, "eval_samples_per_second": 17.198, "eval_steps_per_second": 0.275, "step": 1900 }, { "epoch": 0.99, "learning_rate": 3.73015873015873e-07, "logits/chosen": -2.8797740936279297, "logits/rejected": -2.8204915523529053, "logps/chosen": -275.71417236328125, "logps/rejected": -218.75216674804688, "loss": 0.497, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.2984248995780945, "rewards/margins": 0.8872405886650085, "rewards/rejected": -1.185665488243103, "step": 1910 }, { "epoch": 0.99, "learning_rate": 3.7205966724039014e-07, "logits/chosen": -2.728538990020752, "logits/rejected": -2.7202653884887695, "logps/chosen": -326.10626220703125, "logps/rejected": -258.46539306640625, "loss": 0.4882, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.4980931878089905, "rewards/margins": 1.183232069015503, "rewards/rejected": -1.6813253164291382, "step": 1920 }, { "epoch": 1.0, "learning_rate": 3.711034614649072e-07, "logits/chosen": -2.7934298515319824, "logits/rejected": -2.7393717765808105, "logps/chosen": -288.08892822265625, "logps/rejected": -230.5961151123047, "loss": 0.4772, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.8787330389022827, "rewards/margins": 1.967394232749939, "rewards/rejected": -2.8461270332336426, "step": 1930 }, { "epoch": 1.0, "learning_rate": 3.701472556894243e-07, "logits/chosen": -2.742077350616455, "logits/rejected": -2.6756367683410645, "logps/chosen": -230.4021759033203, "logps/rejected": -311.9582214355469, "loss": 0.3813, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.0876777321100235, "rewards/margins": 2.651371479034424, "rewards/rejected": -2.7390494346618652, "step": 1940 }, { "epoch": 1.01, "learning_rate": 3.6919104991394144e-07, "logits/chosen": -2.8869168758392334, "logits/rejected": -2.870358943939209, "logps/chosen": -180.28982543945312, "logps/rejected": -222.55810546875, "loss": 0.0952, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.38509073853492737, "rewards/margins": 3.711843490600586, "rewards/rejected": -3.3267529010772705, "step": 1950 }, { "epoch": 1.01, "learning_rate": 3.6823484413845855e-07, "logits/chosen": -2.7434253692626953, "logits/rejected": -2.826244354248047, "logps/chosen": -290.17999267578125, "logps/rejected": -359.95318603515625, "loss": 0.0795, "rewards/accuracies": 1.0, "rewards/chosen": 1.3888338804244995, "rewards/margins": 6.838004112243652, "rewards/rejected": -5.449170112609863, "step": 1960 }, { "epoch": 1.02, "learning_rate": 3.6727863836297567e-07, "logits/chosen": -2.761378765106201, "logits/rejected": -2.8128793239593506, "logps/chosen": -196.734619140625, "logps/rejected": -248.90444946289062, "loss": 0.0896, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.7179908752441406, "rewards/margins": 4.64093542098999, "rewards/rejected": -3.9229445457458496, "step": 1970 }, { "epoch": 1.02, "learning_rate": 3.663224325874928e-07, "logits/chosen": -2.7781982421875, "logits/rejected": -2.745850086212158, "logps/chosen": -230.58413696289062, "logps/rejected": -243.18405151367188, "loss": 0.1075, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.2712101638317108, "rewards/margins": 3.9006011486053467, "rewards/rejected": -4.171811103820801, "step": 1980 }, { "epoch": 1.03, "learning_rate": 3.653662268120099e-07, "logits/chosen": -2.69258975982666, "logits/rejected": -2.718759775161743, "logps/chosen": -263.413818359375, "logps/rejected": -258.7752990722656, "loss": 0.0765, "rewards/accuracies": 1.0, "rewards/chosen": 1.1363146305084229, "rewards/margins": 5.461094856262207, "rewards/rejected": -4.324779987335205, "step": 1990 }, { "epoch": 1.03, "learning_rate": 3.6441002103652707e-07, "logits/chosen": -2.7689287662506104, "logits/rejected": -2.7180933952331543, "logps/chosen": -184.6995391845703, "logps/rejected": -219.4838409423828, "loss": 0.0921, "rewards/accuracies": 1.0, "rewards/chosen": -0.8591575622558594, "rewards/margins": 3.6370902061462402, "rewards/rejected": -4.4962477684021, "step": 2000 }, { "epoch": 1.03, "eval_logits/chosen": -2.634498119354248, "eval_logits/rejected": -2.6040313243865967, "eval_logps/chosen": -265.61114501953125, "eval_logps/rejected": -288.5853576660156, "eval_loss": 0.5368282198905945, "eval_rewards/accuracies": 0.78125, "eval_rewards/chosen": -1.1376045942306519, "eval_rewards/margins": 2.023937463760376, "eval_rewards/rejected": -3.1615419387817383, "eval_runtime": 57.4706, "eval_samples_per_second": 17.4, "eval_steps_per_second": 0.278, "step": 2000 }, { "epoch": 1.04, "learning_rate": 3.634538152610442e-07, "logits/chosen": -2.779973268508911, "logits/rejected": -2.83324933052063, "logps/chosen": -228.81320190429688, "logps/rejected": -310.46905517578125, "loss": 0.0672, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.3947059214115143, "rewards/margins": 5.533167362213135, "rewards/rejected": -5.138461112976074, "step": 2010 }, { "epoch": 1.04, "learning_rate": 3.624976094855613e-07, "logits/chosen": -2.7965312004089355, "logits/rejected": -2.7170250415802, "logps/chosen": -250.6485137939453, "logps/rejected": -262.75152587890625, "loss": 0.0705, "rewards/accuracies": 1.0, "rewards/chosen": 0.7664440870285034, "rewards/margins": 7.152462959289551, "rewards/rejected": -6.386018753051758, "step": 2020 }, { "epoch": 1.05, "learning_rate": 3.615414037100784e-07, "logits/chosen": -2.6802003383636475, "logits/rejected": -2.638892650604248, "logps/chosen": -275.2335205078125, "logps/rejected": -222.55178833007812, "loss": 0.099, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.053186558187007904, "rewards/margins": 4.206416606903076, "rewards/rejected": -4.259603023529053, "step": 2030 }, { "epoch": 1.05, "learning_rate": 3.6058519793459553e-07, "logits/chosen": -2.795947790145874, "logits/rejected": -2.8200221061706543, "logps/chosen": -189.23316955566406, "logps/rejected": -218.7393341064453, "loss": 0.0855, "rewards/accuracies": 1.0, "rewards/chosen": 0.7773109674453735, "rewards/margins": 5.873230457305908, "rewards/rejected": -5.095919132232666, "step": 2040 }, { "epoch": 1.06, "learning_rate": 3.5962899215911265e-07, "logits/chosen": -2.630261182785034, "logits/rejected": -2.57206392288208, "logps/chosen": -217.30972290039062, "logps/rejected": -250.2468719482422, "loss": 0.0676, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.0636889785528183, "rewards/margins": 4.82668399810791, "rewards/rejected": -4.8903727531433105, "step": 2050 }, { "epoch": 1.06, "learning_rate": 3.5867278638362976e-07, "logits/chosen": -2.601591110229492, "logits/rejected": -2.6793808937072754, "logps/chosen": -240.95700073242188, "logps/rejected": -273.2774353027344, "loss": 0.0734, "rewards/accuracies": 1.0, "rewards/chosen": 0.05607228726148605, "rewards/margins": 5.447501182556152, "rewards/rejected": -5.391427516937256, "step": 2060 }, { "epoch": 1.07, "learning_rate": 3.577165806081469e-07, "logits/chosen": -2.7586188316345215, "logits/rejected": -2.804933547973633, "logps/chosen": -262.1355895996094, "logps/rejected": -302.13507080078125, "loss": 0.095, "rewards/accuracies": 1.0, "rewards/chosen": 0.5269735455513, "rewards/margins": 6.823977470397949, "rewards/rejected": -6.297003746032715, "step": 2070 }, { "epoch": 1.07, "learning_rate": 3.56760374832664e-07, "logits/chosen": -2.906247615814209, "logits/rejected": -2.7534871101379395, "logps/chosen": -250.2699737548828, "logps/rejected": -261.11962890625, "loss": 0.0438, "rewards/accuracies": 1.0, "rewards/chosen": 1.4384024143218994, "rewards/margins": 6.545752048492432, "rewards/rejected": -5.107348442077637, "step": 2080 }, { "epoch": 1.08, "learning_rate": 3.5580416905718106e-07, "logits/chosen": -2.655647039413452, "logits/rejected": -2.770217180252075, "logps/chosen": -294.7089538574219, "logps/rejected": -271.32037353515625, "loss": 0.1017, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.16056032478809357, "rewards/margins": 4.826616287231445, "rewards/rejected": -4.666056156158447, "step": 2090 }, { "epoch": 1.08, "learning_rate": 3.5484796328169817e-07, "logits/chosen": -2.8367366790771484, "logits/rejected": -2.7900490760803223, "logps/chosen": -235.642333984375, "logps/rejected": -272.7825012207031, "loss": 0.0733, "rewards/accuracies": 1.0, "rewards/chosen": 1.0952982902526855, "rewards/margins": 5.964905261993408, "rewards/rejected": -4.8696064949035645, "step": 2100 }, { "epoch": 1.08, "eval_logits/chosen": -2.659477949142456, "eval_logits/rejected": -2.628939151763916, "eval_logps/chosen": -265.2799377441406, "eval_logps/rejected": -291.42083740234375, "eval_loss": 0.5452979803085327, "eval_rewards/accuracies": 0.765625, "eval_rewards/chosen": -1.1044831275939941, "eval_rewards/margins": 2.3406097888946533, "eval_rewards/rejected": -3.4450929164886475, "eval_runtime": 55.5581, "eval_samples_per_second": 17.999, "eval_steps_per_second": 0.288, "step": 2100 }, { "epoch": 1.09, "learning_rate": 3.538917575062153e-07, "logits/chosen": -2.700606346130371, "logits/rejected": -2.6558995246887207, "logps/chosen": -210.18374633789062, "logps/rejected": -282.9020080566406, "loss": 0.1171, "rewards/accuracies": 1.0, "rewards/chosen": 0.15502862632274628, "rewards/margins": 5.092817783355713, "rewards/rejected": -4.937788963317871, "step": 2110 }, { "epoch": 1.09, "learning_rate": 3.529355517307324e-07, "logits/chosen": -2.8686940670013428, "logits/rejected": -2.740063190460205, "logps/chosen": -269.2464904785156, "logps/rejected": -366.86639404296875, "loss": 0.0607, "rewards/accuracies": 1.0, "rewards/chosen": 1.0339075326919556, "rewards/margins": 8.14382266998291, "rewards/rejected": -7.109914302825928, "step": 2120 }, { "epoch": 1.1, "learning_rate": 3.519793459552495e-07, "logits/chosen": -2.823467969894409, "logits/rejected": -2.833052635192871, "logps/chosen": -280.67706298828125, "logps/rejected": -302.888671875, "loss": 0.0767, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.975002646446228, "rewards/margins": 6.0545806884765625, "rewards/rejected": -5.079577445983887, "step": 2130 }, { "epoch": 1.1, "learning_rate": 3.510231401797667e-07, "logits/chosen": -2.9246203899383545, "logits/rejected": -2.8968777656555176, "logps/chosen": -409.8377380371094, "logps/rejected": -376.01153564453125, "loss": 0.0978, "rewards/accuracies": 1.0, "rewards/chosen": 0.6437445878982544, "rewards/margins": 7.857232570648193, "rewards/rejected": -7.2134881019592285, "step": 2140 }, { "epoch": 1.11, "learning_rate": 3.500669344042838e-07, "logits/chosen": -2.8815579414367676, "logits/rejected": -2.696906566619873, "logps/chosen": -312.1972961425781, "logps/rejected": -323.14031982421875, "loss": 0.0847, "rewards/accuracies": 1.0, "rewards/chosen": 0.4120159149169922, "rewards/margins": 6.041172027587891, "rewards/rejected": -5.629156112670898, "step": 2150 }, { "epoch": 1.12, "learning_rate": 3.491107286288009e-07, "logits/chosen": -2.869086265563965, "logits/rejected": -2.794461250305176, "logps/chosen": -230.935302734375, "logps/rejected": -229.4247283935547, "loss": 0.1155, "rewards/accuracies": 1.0, "rewards/chosen": 1.5138686895370483, "rewards/margins": 6.901867866516113, "rewards/rejected": -5.387998580932617, "step": 2160 }, { "epoch": 1.12, "learning_rate": 3.4815452285331803e-07, "logits/chosen": -2.906574249267578, "logits/rejected": -2.918184280395508, "logps/chosen": -435.5089416503906, "logps/rejected": -367.09820556640625, "loss": 0.0949, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.4653266966342926, "rewards/margins": 4.6506667137146, "rewards/rejected": -4.18533992767334, "step": 2170 }, { "epoch": 1.13, "learning_rate": 3.4719831707783515e-07, "logits/chosen": -2.6144165992736816, "logits/rejected": -2.6169540882110596, "logps/chosen": -259.0269470214844, "logps/rejected": -253.59518432617188, "loss": 0.1058, "rewards/accuracies": 1.0, "rewards/chosen": 0.8547903895378113, "rewards/margins": 5.843932151794434, "rewards/rejected": -4.989141941070557, "step": 2180 }, { "epoch": 1.13, "learning_rate": 3.4624211130235227e-07, "logits/chosen": -2.905179738998413, "logits/rejected": -2.836651086807251, "logps/chosen": -208.110107421875, "logps/rejected": -241.3552703857422, "loss": 0.086, "rewards/accuracies": 1.0, "rewards/chosen": 0.6082350015640259, "rewards/margins": 6.723033905029297, "rewards/rejected": -6.114798545837402, "step": 2190 }, { "epoch": 1.14, "learning_rate": 3.452859055268694e-07, "logits/chosen": -2.7746129035949707, "logits/rejected": -2.7575089931488037, "logps/chosen": -204.41021728515625, "logps/rejected": -263.98004150390625, "loss": 0.0972, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.20659780502319336, "rewards/margins": 5.700135707855225, "rewards/rejected": -5.90673303604126, "step": 2200 }, { "epoch": 1.14, "eval_logits/chosen": -2.670933246612549, "eval_logits/rejected": -2.647088050842285, "eval_logps/chosen": -271.1505126953125, "eval_logps/rejected": -296.7934265136719, "eval_loss": 0.557054877281189, "eval_rewards/accuracies": 0.8125, "eval_rewards/chosen": -1.6915401220321655, "eval_rewards/margins": 2.2908077239990234, "eval_rewards/rejected": -3.9823474884033203, "eval_runtime": 55.8179, "eval_samples_per_second": 17.915, "eval_steps_per_second": 0.287, "step": 2200 }, { "epoch": 1.14, "learning_rate": 3.443296997513865e-07, "logits/chosen": -2.8264622688293457, "logits/rejected": -2.802203416824341, "logps/chosen": -317.52960205078125, "logps/rejected": -342.2868957519531, "loss": 0.0822, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 1.2196967601776123, "rewards/margins": 7.658734321594238, "rewards/rejected": -6.439038276672363, "step": 2210 }, { "epoch": 1.15, "learning_rate": 3.433734939759036e-07, "logits/chosen": -2.8357937335968018, "logits/rejected": -2.8530819416046143, "logps/chosen": -235.8030548095703, "logps/rejected": -277.0107116699219, "loss": 0.1058, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.7737981677055359, "rewards/margins": 6.4052414894104, "rewards/rejected": -5.631443023681641, "step": 2220 }, { "epoch": 1.15, "learning_rate": 3.4241728820042073e-07, "logits/chosen": -2.7552199363708496, "logits/rejected": -2.7428534030914307, "logps/chosen": -227.3050079345703, "logps/rejected": -270.3177185058594, "loss": 0.121, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.6933164000511169, "rewards/margins": 4.165007591247559, "rewards/rejected": -4.858323574066162, "step": 2230 }, { "epoch": 1.16, "learning_rate": 3.4146108242493784e-07, "logits/chosen": -2.7783877849578857, "logits/rejected": -2.8269574642181396, "logps/chosen": -289.098388671875, "logps/rejected": -384.05474853515625, "loss": 0.1735, "rewards/accuracies": 1.0, "rewards/chosen": 0.44400936365127563, "rewards/margins": 7.5069899559021, "rewards/rejected": -7.062979698181152, "step": 2240 }, { "epoch": 1.16, "learning_rate": 3.405048766494549e-07, "logits/chosen": -2.576467990875244, "logits/rejected": -2.569551706314087, "logps/chosen": -264.085205078125, "logps/rejected": -242.80126953125, "loss": 0.2203, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.19903890788555145, "rewards/margins": 5.752175331115723, "rewards/rejected": -5.553135871887207, "step": 2250 }, { "epoch": 1.17, "learning_rate": 3.39548670873972e-07, "logits/chosen": -2.780787944793701, "logits/rejected": -2.7364470958709717, "logps/chosen": -327.9325866699219, "logps/rejected": -381.19915771484375, "loss": 0.0916, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.037799786776304245, "rewards/margins": 5.7476677894592285, "rewards/rejected": -5.785468101501465, "step": 2260 }, { "epoch": 1.17, "learning_rate": 3.3859246509848914e-07, "logits/chosen": -2.716096878051758, "logits/rejected": -2.7619426250457764, "logps/chosen": -270.20281982421875, "logps/rejected": -309.69500732421875, "loss": 0.0833, "rewards/accuracies": 1.0, "rewards/chosen": -0.1129147857427597, "rewards/margins": 5.1096320152282715, "rewards/rejected": -5.22254753112793, "step": 2270 }, { "epoch": 1.18, "learning_rate": 3.376362593230063e-07, "logits/chosen": -2.633354902267456, "logits/rejected": -2.5907938480377197, "logps/chosen": -214.78662109375, "logps/rejected": -303.368408203125, "loss": 0.0682, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.21891649067401886, "rewards/margins": 6.782713413238525, "rewards/rejected": -6.5637969970703125, "step": 2280 }, { "epoch": 1.18, "learning_rate": 3.366800535475234e-07, "logits/chosen": -2.788681745529175, "logits/rejected": -2.6701889038085938, "logps/chosen": -367.4220886230469, "logps/rejected": -265.5601501464844, "loss": 0.0699, "rewards/accuracies": 1.0, "rewards/chosen": 2.7071707248687744, "rewards/margins": 8.423349380493164, "rewards/rejected": -5.716177940368652, "step": 2290 }, { "epoch": 1.19, "learning_rate": 3.3572384777204054e-07, "logits/chosen": -2.6153483390808105, "logits/rejected": -2.575199842453003, "logps/chosen": -398.6623229980469, "logps/rejected": -343.8503112792969, "loss": 0.1058, "rewards/accuracies": 1.0, "rewards/chosen": 0.5428920984268188, "rewards/margins": 7.921414852142334, "rewards/rejected": -7.3785223960876465, "step": 2300 }, { "epoch": 1.19, "eval_logits/chosen": -2.5798184871673584, "eval_logits/rejected": -2.5527260303497314, "eval_logps/chosen": -264.8562927246094, "eval_logps/rejected": -295.91058349609375, "eval_loss": 0.5789377689361572, "eval_rewards/accuracies": 0.796875, "eval_rewards/chosen": -1.0621176958084106, "eval_rewards/margins": 2.8319482803344727, "eval_rewards/rejected": -3.8940658569335938, "eval_runtime": 58.0073, "eval_samples_per_second": 17.239, "eval_steps_per_second": 0.276, "step": 2300 }, { "epoch": 1.19, "learning_rate": 3.3476764199655765e-07, "logits/chosen": -2.513836145401001, "logits/rejected": -2.6243300437927246, "logps/chosen": -219.6814422607422, "logps/rejected": -218.55807495117188, "loss": 0.1111, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.24565038084983826, "rewards/margins": 4.5390119552612305, "rewards/rejected": -4.293361663818359, "step": 2310 }, { "epoch": 1.2, "learning_rate": 3.3381143622107477e-07, "logits/chosen": -2.8270373344421387, "logits/rejected": -2.7377943992614746, "logps/chosen": -401.356201171875, "logps/rejected": -353.20965576171875, "loss": 0.058, "rewards/accuracies": 1.0, "rewards/chosen": 0.6988257169723511, "rewards/margins": 6.860513210296631, "rewards/rejected": -6.161687850952148, "step": 2320 }, { "epoch": 1.2, "learning_rate": 3.328552304455919e-07, "logits/chosen": -2.717745542526245, "logits/rejected": -2.673698902130127, "logps/chosen": -268.2499084472656, "logps/rejected": -293.6933898925781, "loss": 0.0778, "rewards/accuracies": 1.0, "rewards/chosen": 0.8076402544975281, "rewards/margins": 7.105103969573975, "rewards/rejected": -6.297463417053223, "step": 2330 }, { "epoch": 1.21, "learning_rate": 3.31899024670109e-07, "logits/chosen": -2.759124517440796, "logits/rejected": -2.744246006011963, "logps/chosen": -315.86248779296875, "logps/rejected": -262.46099853515625, "loss": 0.0721, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.9767075777053833, "rewards/margins": 4.7864885330200195, "rewards/rejected": -5.7631964683532715, "step": 2340 }, { "epoch": 1.21, "learning_rate": 3.309428188946261e-07, "logits/chosen": -2.6659247875213623, "logits/rejected": -2.627288341522217, "logps/chosen": -165.9207305908203, "logps/rejected": -166.2641143798828, "loss": 0.1052, "rewards/accuracies": 1.0, "rewards/chosen": -0.17963728308677673, "rewards/margins": 4.340859413146973, "rewards/rejected": -4.520496368408203, "step": 2350 }, { "epoch": 1.22, "learning_rate": 3.2998661311914323e-07, "logits/chosen": -2.6454150676727295, "logits/rejected": -2.5655908584594727, "logps/chosen": -195.2259979248047, "logps/rejected": -328.01806640625, "loss": 0.0714, "rewards/accuracies": 1.0, "rewards/chosen": 0.6261202096939087, "rewards/margins": 8.00461196899414, "rewards/rejected": -7.3784918785095215, "step": 2360 }, { "epoch": 1.22, "learning_rate": 3.2903040734366035e-07, "logits/chosen": -2.7958927154541016, "logits/rejected": -2.7633419036865234, "logps/chosen": -299.79107666015625, "logps/rejected": -378.6677551269531, "loss": 0.0659, "rewards/accuracies": 1.0, "rewards/chosen": 1.1539552211761475, "rewards/margins": 7.467595100402832, "rewards/rejected": -6.31364107131958, "step": 2370 }, { "epoch": 1.23, "learning_rate": 3.2807420156817746e-07, "logits/chosen": -2.7090749740600586, "logits/rejected": -2.64817476272583, "logps/chosen": -226.72525024414062, "logps/rejected": -330.6046142578125, "loss": 0.1011, "rewards/accuracies": 1.0, "rewards/chosen": -0.004512411542236805, "rewards/margins": 5.72462272644043, "rewards/rejected": -5.729135990142822, "step": 2380 }, { "epoch": 1.23, "learning_rate": 3.271179957926946e-07, "logits/chosen": -2.447643995285034, "logits/rejected": -2.5474460124969482, "logps/chosen": -325.9497985839844, "logps/rejected": -339.540771484375, "loss": 0.0895, "rewards/accuracies": 1.0, "rewards/chosen": 0.07166711986064911, "rewards/margins": 5.96669864654541, "rewards/rejected": -5.895030975341797, "step": 2390 }, { "epoch": 1.24, "learning_rate": 3.261617900172117e-07, "logits/chosen": -2.672534465789795, "logits/rejected": -2.7197232246398926, "logps/chosen": -225.26168823242188, "logps/rejected": -283.93212890625, "loss": 0.2423, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.4908114969730377, "rewards/margins": 6.257862567901611, "rewards/rejected": -5.767050743103027, "step": 2400 }, { "epoch": 1.24, "eval_logits/chosen": -2.5784032344818115, "eval_logits/rejected": -2.5413780212402344, "eval_logps/chosen": -266.1980895996094, "eval_logps/rejected": -292.55987548828125, "eval_loss": 0.545539379119873, "eval_rewards/accuracies": 0.78125, "eval_rewards/chosen": -1.1962969303131104, "eval_rewards/margins": 2.362699508666992, "eval_rewards/rejected": -3.5589966773986816, "eval_runtime": 57.2058, "eval_samples_per_second": 17.481, "eval_steps_per_second": 0.28, "step": 2400 }, { "epoch": 1.24, "learning_rate": 3.2520558424172876e-07, "logits/chosen": -2.705650568008423, "logits/rejected": -2.6275668144226074, "logps/chosen": -204.61109924316406, "logps/rejected": -228.20059204101562, "loss": 0.0638, "rewards/accuracies": 1.0, "rewards/chosen": -0.12385628372430801, "rewards/margins": 5.060498237609863, "rewards/rejected": -5.184354782104492, "step": 2410 }, { "epoch": 1.25, "learning_rate": 3.242493784662459e-07, "logits/chosen": -2.7630136013031006, "logits/rejected": -2.7599825859069824, "logps/chosen": -271.2209777832031, "logps/rejected": -319.4446716308594, "loss": 0.0914, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.31576937437057495, "rewards/margins": 5.5094099044799805, "rewards/rejected": -5.193641185760498, "step": 2420 }, { "epoch": 1.25, "learning_rate": 3.2329317269076304e-07, "logits/chosen": -2.7317633628845215, "logits/rejected": -2.655245780944824, "logps/chosen": -273.3720397949219, "logps/rejected": -324.91253662109375, "loss": 0.0998, "rewards/accuracies": 1.0, "rewards/chosen": 1.1897385120391846, "rewards/margins": 7.156263828277588, "rewards/rejected": -5.966525077819824, "step": 2430 }, { "epoch": 1.26, "learning_rate": 3.2233696691528016e-07, "logits/chosen": -2.7852416038513184, "logits/rejected": -2.7786805629730225, "logps/chosen": -261.6937255859375, "logps/rejected": -367.239501953125, "loss": 0.0981, "rewards/accuracies": 1.0, "rewards/chosen": 1.2177461385726929, "rewards/margins": 8.014082908630371, "rewards/rejected": -6.796337127685547, "step": 2440 }, { "epoch": 1.26, "learning_rate": 3.2138076113979727e-07, "logits/chosen": -2.648454427719116, "logits/rejected": -2.6311562061309814, "logps/chosen": -240.67056274414062, "logps/rejected": -267.9673156738281, "loss": 0.0978, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.28647559881210327, "rewards/margins": 6.354050636291504, "rewards/rejected": -6.067575454711914, "step": 2450 }, { "epoch": 1.27, "learning_rate": 3.204245553643144e-07, "logits/chosen": -2.7582898139953613, "logits/rejected": -2.830904722213745, "logps/chosen": -309.43853759765625, "logps/rejected": -350.77618408203125, "loss": 0.098, "rewards/accuracies": 1.0, "rewards/chosen": 0.6567636728286743, "rewards/margins": 6.094487190246582, "rewards/rejected": -5.437722682952881, "step": 2460 }, { "epoch": 1.28, "learning_rate": 3.194683495888315e-07, "logits/chosen": -2.784090518951416, "logits/rejected": -2.7697300910949707, "logps/chosen": -277.5585021972656, "logps/rejected": -286.3648986816406, "loss": 0.1034, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.03639407083392143, "rewards/margins": 6.042351245880127, "rewards/rejected": -6.078745365142822, "step": 2470 }, { "epoch": 1.28, "learning_rate": 3.185121438133486e-07, "logits/chosen": -2.758202075958252, "logits/rejected": -2.7667107582092285, "logps/chosen": -282.9512023925781, "logps/rejected": -355.96319580078125, "loss": 0.1263, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.692094087600708, "rewards/margins": 7.221386909484863, "rewards/rejected": -6.529292106628418, "step": 2480 }, { "epoch": 1.29, "learning_rate": 3.1755593803786574e-07, "logits/chosen": -2.6361289024353027, "logits/rejected": -2.656646966934204, "logps/chosen": -201.61358642578125, "logps/rejected": -313.6552734375, "loss": 0.0946, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.4261380434036255, "rewards/margins": 5.699584484100342, "rewards/rejected": -7.125722408294678, "step": 2490 }, { "epoch": 1.29, "learning_rate": 3.1659973226238285e-07, "logits/chosen": -2.6961874961853027, "logits/rejected": -2.658639669418335, "logps/chosen": -205.56558227539062, "logps/rejected": -260.9989013671875, "loss": 0.1177, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.1720048189163208, "rewards/margins": 3.783812999725342, "rewards/rejected": -4.955817222595215, "step": 2500 }, { "epoch": 1.29, "eval_logits/chosen": -2.518930435180664, "eval_logits/rejected": -2.480231285095215, "eval_logps/chosen": -272.3760681152344, "eval_logps/rejected": -300.9119567871094, "eval_loss": 0.5888839960098267, "eval_rewards/accuracies": 0.796875, "eval_rewards/chosen": -1.814096450805664, "eval_rewards/margins": 2.580104112625122, "eval_rewards/rejected": -4.394200325012207, "eval_runtime": 58.8794, "eval_samples_per_second": 16.984, "eval_steps_per_second": 0.272, "step": 2500 }, { "epoch": 1.3, "learning_rate": 3.1564352648689997e-07, "logits/chosen": -2.7483344078063965, "logits/rejected": -2.7376341819763184, "logps/chosen": -269.5032653808594, "logps/rejected": -253.87051391601562, "loss": 0.1018, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.7494356632232666, "rewards/margins": 5.012850761413574, "rewards/rejected": -4.263415336608887, "step": 2510 }, { "epoch": 1.3, "learning_rate": 3.146873207114171e-07, "logits/chosen": -2.7938504219055176, "logits/rejected": -2.7508413791656494, "logps/chosen": -274.398193359375, "logps/rejected": -306.8814697265625, "loss": 0.1026, "rewards/accuracies": 1.0, "rewards/chosen": 0.5945212244987488, "rewards/margins": 7.071564674377441, "rewards/rejected": -6.477043151855469, "step": 2520 }, { "epoch": 1.31, "learning_rate": 3.137311149359342e-07, "logits/chosen": -2.858582019805908, "logits/rejected": -2.723261833190918, "logps/chosen": -274.79425048828125, "logps/rejected": -379.57501220703125, "loss": 0.0912, "rewards/accuracies": 1.0, "rewards/chosen": -0.21533474326133728, "rewards/margins": 5.66866397857666, "rewards/rejected": -5.883998870849609, "step": 2530 }, { "epoch": 1.31, "learning_rate": 3.127749091604513e-07, "logits/chosen": -2.8357625007629395, "logits/rejected": -2.814939260482788, "logps/chosen": -232.6085662841797, "logps/rejected": -294.39849853515625, "loss": 0.0899, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.3493742048740387, "rewards/margins": 6.373709678649902, "rewards/rejected": -6.723084449768066, "step": 2540 }, { "epoch": 1.32, "learning_rate": 3.1181870338496843e-07, "logits/chosen": -2.679546594619751, "logits/rejected": -2.7267134189605713, "logps/chosen": -209.9823760986328, "logps/rejected": -287.78826904296875, "loss": 0.091, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.18076567351818085, "rewards/margins": 5.487452983856201, "rewards/rejected": -5.306687831878662, "step": 2550 }, { "epoch": 1.32, "learning_rate": 3.108624976094856e-07, "logits/chosen": -2.5259623527526855, "logits/rejected": -2.6586062908172607, "logps/chosen": -365.71258544921875, "logps/rejected": -242.0026397705078, "loss": 0.1047, "rewards/accuracies": 1.0, "rewards/chosen": 0.6775692701339722, "rewards/margins": 5.460636615753174, "rewards/rejected": -4.783067226409912, "step": 2560 }, { "epoch": 1.33, "learning_rate": 3.0990629183400266e-07, "logits/chosen": -2.5484824180603027, "logits/rejected": -2.580888509750366, "logps/chosen": -270.9229736328125, "logps/rejected": -282.5039367675781, "loss": 0.1062, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.9959784746170044, "rewards/margins": 7.229222297668457, "rewards/rejected": -6.233242988586426, "step": 2570 }, { "epoch": 1.33, "learning_rate": 3.089500860585198e-07, "logits/chosen": -2.762617588043213, "logits/rejected": -2.739429235458374, "logps/chosen": -200.62588500976562, "logps/rejected": -308.99127197265625, "loss": 0.1209, "rewards/accuracies": 1.0, "rewards/chosen": 0.03968176990747452, "rewards/margins": 7.24197244644165, "rewards/rejected": -7.2022905349731445, "step": 2580 }, { "epoch": 1.34, "learning_rate": 3.079938802830369e-07, "logits/chosen": -2.7819771766662598, "logits/rejected": -2.755398750305176, "logps/chosen": -207.9453582763672, "logps/rejected": -235.0533447265625, "loss": 0.086, "rewards/accuracies": 1.0, "rewards/chosen": -0.6877096891403198, "rewards/margins": 5.369903564453125, "rewards/rejected": -6.057612895965576, "step": 2590 }, { "epoch": 1.34, "learning_rate": 3.07037674507554e-07, "logits/chosen": -2.706509828567505, "logits/rejected": -2.741109848022461, "logps/chosen": -328.0285949707031, "logps/rejected": -378.04339599609375, "loss": 0.1213, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.15584062039852142, "rewards/margins": 7.148020267486572, "rewards/rejected": -6.992179870605469, "step": 2600 }, { "epoch": 1.34, "eval_logits/chosen": -2.5206711292266846, "eval_logits/rejected": -2.4774041175842285, "eval_logps/chosen": -268.8435974121094, "eval_logps/rejected": -295.39013671875, "eval_loss": 0.5683205723762512, "eval_rewards/accuracies": 0.8125, "eval_rewards/chosen": -1.4608486890792847, "eval_rewards/margins": 2.3811748027801514, "eval_rewards/rejected": -3.8420238494873047, "eval_runtime": 59.9277, "eval_samples_per_second": 16.687, "eval_steps_per_second": 0.267, "step": 2600 }, { "epoch": 1.35, "learning_rate": 3.060814687320711e-07, "logits/chosen": -2.6932194232940674, "logits/rejected": -2.6989266872406006, "logps/chosen": -347.0301818847656, "logps/rejected": -320.0285949707031, "loss": 0.0986, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 1.9022884368896484, "rewards/margins": 8.200953483581543, "rewards/rejected": -6.2986650466918945, "step": 2610 }, { "epoch": 1.35, "learning_rate": 3.0512526295658824e-07, "logits/chosen": -2.5327630043029785, "logits/rejected": -2.500969409942627, "logps/chosen": -234.2447509765625, "logps/rejected": -247.3339080810547, "loss": 0.094, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.20838475227355957, "rewards/margins": 5.5928425788879395, "rewards/rejected": -5.80122709274292, "step": 2620 }, { "epoch": 1.36, "learning_rate": 3.0416905718110536e-07, "logits/chosen": -2.600487232208252, "logits/rejected": -2.682471990585327, "logps/chosen": -246.78024291992188, "logps/rejected": -246.34237670898438, "loss": 0.1632, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.0733012929558754, "rewards/margins": 5.1650261878967285, "rewards/rejected": -5.0917253494262695, "step": 2630 }, { "epoch": 1.36, "learning_rate": 3.0321285140562247e-07, "logits/chosen": -2.7498373985290527, "logits/rejected": -2.6727969646453857, "logps/chosen": -197.46665954589844, "logps/rejected": -293.0552062988281, "loss": 0.087, "rewards/accuracies": 1.0, "rewards/chosen": -1.277592420578003, "rewards/margins": 4.636383533477783, "rewards/rejected": -5.913976192474365, "step": 2640 }, { "epoch": 1.37, "learning_rate": 3.022566456301396e-07, "logits/chosen": -2.568721294403076, "logits/rejected": -2.7006120681762695, "logps/chosen": -296.3185119628906, "logps/rejected": -307.19818115234375, "loss": 0.1191, "rewards/accuracies": 1.0, "rewards/chosen": -0.03509577363729477, "rewards/margins": 6.352346897125244, "rewards/rejected": -6.387442111968994, "step": 2650 }, { "epoch": 1.37, "learning_rate": 3.013004398546567e-07, "logits/chosen": -2.590341091156006, "logits/rejected": -2.6296682357788086, "logps/chosen": -299.8781433105469, "logps/rejected": -309.7999572753906, "loss": 0.1044, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.5305342078208923, "rewards/margins": 5.682303428649902, "rewards/rejected": -6.212838172912598, "step": 2660 }, { "epoch": 1.38, "learning_rate": 3.003442340791738e-07, "logits/chosen": -2.69757080078125, "logits/rejected": -2.7322795391082764, "logps/chosen": -250.77490234375, "logps/rejected": -287.36785888671875, "loss": 0.0781, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.4853235185146332, "rewards/margins": 6.51000452041626, "rewards/rejected": -6.995328426361084, "step": 2670 }, { "epoch": 1.38, "learning_rate": 2.9938802830369093e-07, "logits/chosen": -2.7183175086975098, "logits/rejected": -2.7362000942230225, "logps/chosen": -189.50961303710938, "logps/rejected": -314.11114501953125, "loss": 0.1049, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7947381734848022, "rewards/margins": 6.518137454986572, "rewards/rejected": -7.312876224517822, "step": 2680 }, { "epoch": 1.39, "learning_rate": 2.9843182252820805e-07, "logits/chosen": -2.5106515884399414, "logits/rejected": -2.5575432777404785, "logps/chosen": -249.78012084960938, "logps/rejected": -237.69677734375, "loss": 0.0708, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6967580914497375, "rewards/margins": 5.879612922668457, "rewards/rejected": -6.576371192932129, "step": 2690 }, { "epoch": 1.39, "learning_rate": 2.974756167527252e-07, "logits/chosen": -2.7255759239196777, "logits/rejected": -2.62638521194458, "logps/chosen": -315.8297424316406, "logps/rejected": -245.0868682861328, "loss": 0.0889, "rewards/accuracies": 1.0, "rewards/chosen": -0.14952102303504944, "rewards/margins": 5.201340675354004, "rewards/rejected": -5.3508620262146, "step": 2700 }, { "epoch": 1.39, "eval_logits/chosen": -2.452204704284668, "eval_logits/rejected": -2.4123356342315674, "eval_logps/chosen": -270.2423400878906, "eval_logps/rejected": -294.30682373046875, "eval_loss": 0.5890262126922607, "eval_rewards/accuracies": 0.78125, "eval_rewards/chosen": -1.6007238626480103, "eval_rewards/margins": 2.132964849472046, "eval_rewards/rejected": -3.7336881160736084, "eval_runtime": 60.2724, "eval_samples_per_second": 16.591, "eval_steps_per_second": 0.265, "step": 2700 }, { "epoch": 1.4, "learning_rate": 2.9651941097724233e-07, "logits/chosen": -2.6822290420532227, "logits/rejected": -2.7052135467529297, "logps/chosen": -278.01422119140625, "logps/rejected": -339.3485412597656, "loss": 0.1087, "rewards/accuracies": 1.0, "rewards/chosen": -0.15647803246974945, "rewards/margins": 5.597909450531006, "rewards/rejected": -5.754388332366943, "step": 2710 }, { "epoch": 1.4, "learning_rate": 2.9556320520175945e-07, "logits/chosen": -2.5796897411346436, "logits/rejected": -2.6120152473449707, "logps/chosen": -276.00408935546875, "logps/rejected": -235.9673614501953, "loss": 0.1325, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.0651065111160278, "rewards/margins": 4.378058433532715, "rewards/rejected": -5.443163871765137, "step": 2720 }, { "epoch": 1.41, "learning_rate": 2.946069994262765e-07, "logits/chosen": -2.682631015777588, "logits/rejected": -2.5832624435424805, "logps/chosen": -325.86883544921875, "logps/rejected": -448.3667907714844, "loss": 0.1033, "rewards/accuracies": 1.0, "rewards/chosen": 1.8688300848007202, "rewards/margins": 9.84605598449707, "rewards/rejected": -7.977224826812744, "step": 2730 }, { "epoch": 1.41, "learning_rate": 2.9365079365079363e-07, "logits/chosen": -2.656704902648926, "logits/rejected": -2.6298129558563232, "logps/chosen": -312.0522766113281, "logps/rejected": -264.58172607421875, "loss": 0.0869, "rewards/accuracies": 1.0, "rewards/chosen": 0.325814425945282, "rewards/margins": 7.053065299987793, "rewards/rejected": -6.727250099182129, "step": 2740 }, { "epoch": 1.42, "learning_rate": 2.9269458787531074e-07, "logits/chosen": -2.7410786151885986, "logits/rejected": -2.60438871383667, "logps/chosen": -338.72125244140625, "logps/rejected": -300.7152099609375, "loss": 0.099, "rewards/accuracies": 1.0, "rewards/chosen": 0.48889246582984924, "rewards/margins": 5.851205348968506, "rewards/rejected": -5.3623127937316895, "step": 2750 }, { "epoch": 1.42, "learning_rate": 2.9173838209982786e-07, "logits/chosen": -2.5442605018615723, "logits/rejected": -2.396920680999756, "logps/chosen": -212.8131561279297, "logps/rejected": -245.8048858642578, "loss": 0.0924, "rewards/accuracies": 1.0, "rewards/chosen": -0.18661096692085266, "rewards/margins": 5.660151481628418, "rewards/rejected": -5.846762657165527, "step": 2760 }, { "epoch": 1.43, "learning_rate": 2.90782176324345e-07, "logits/chosen": -2.770378589630127, "logits/rejected": -2.652078628540039, "logps/chosen": -284.2225036621094, "logps/rejected": -237.8290557861328, "loss": 0.0933, "rewards/accuracies": 1.0, "rewards/chosen": -0.5430759787559509, "rewards/margins": 5.237969398498535, "rewards/rejected": -5.781044960021973, "step": 2770 }, { "epoch": 1.44, "learning_rate": 2.898259705488621e-07, "logits/chosen": -2.763434410095215, "logits/rejected": -2.824732542037964, "logps/chosen": -295.1107482910156, "logps/rejected": -265.9899597167969, "loss": 0.0896, "rewards/accuracies": 1.0, "rewards/chosen": -0.034651029855012894, "rewards/margins": 6.801316261291504, "rewards/rejected": -6.835967063903809, "step": 2780 }, { "epoch": 1.44, "learning_rate": 2.888697647733792e-07, "logits/chosen": -2.626591920852661, "logits/rejected": -2.693389892578125, "logps/chosen": -382.92559814453125, "logps/rejected": -343.2526550292969, "loss": 0.1328, "rewards/accuracies": 1.0, "rewards/chosen": 0.3407518267631531, "rewards/margins": 8.370372772216797, "rewards/rejected": -8.029620170593262, "step": 2790 }, { "epoch": 1.45, "learning_rate": 2.879135589978963e-07, "logits/chosen": -2.7098212242126465, "logits/rejected": -2.6440200805664062, "logps/chosen": -237.72042846679688, "logps/rejected": -263.23858642578125, "loss": 0.0995, "rewards/accuracies": 1.0, "rewards/chosen": 0.1682741343975067, "rewards/margins": 6.354408264160156, "rewards/rejected": -6.186134338378906, "step": 2800 }, { "epoch": 1.45, "eval_logits/chosen": -2.505031108856201, "eval_logits/rejected": -2.4685418605804443, "eval_logps/chosen": -269.7538146972656, "eval_logps/rejected": -295.3314514160156, "eval_loss": 0.6072700023651123, "eval_rewards/accuracies": 0.828125, "eval_rewards/chosen": -1.5518717765808105, "eval_rewards/margins": 2.2842793464660645, "eval_rewards/rejected": -3.836151123046875, "eval_runtime": 55.9165, "eval_samples_per_second": 17.884, "eval_steps_per_second": 0.286, "step": 2800 }, { "epoch": 1.45, "learning_rate": 2.8695735322241344e-07, "logits/chosen": -2.6192431449890137, "logits/rejected": -2.5992114543914795, "logps/chosen": -243.22531127929688, "logps/rejected": -259.7867431640625, "loss": 0.1528, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.13970229029655457, "rewards/margins": 6.5014328956604, "rewards/rejected": -6.641134738922119, "step": 2810 }, { "epoch": 1.46, "learning_rate": 2.8600114744693055e-07, "logits/chosen": -2.5777206420898438, "logits/rejected": -2.595568895339966, "logps/chosen": -315.70513916015625, "logps/rejected": -328.4643859863281, "loss": 0.0978, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 1.2178268432617188, "rewards/margins": 7.306548118591309, "rewards/rejected": -6.08872127532959, "step": 2820 }, { "epoch": 1.46, "learning_rate": 2.8504494167144767e-07, "logits/chosen": -2.5762603282928467, "logits/rejected": -2.6186347007751465, "logps/chosen": -338.2220153808594, "logps/rejected": -372.440673828125, "loss": 0.1064, "rewards/accuracies": 1.0, "rewards/chosen": 0.40805625915527344, "rewards/margins": 8.335431098937988, "rewards/rejected": -7.927374362945557, "step": 2830 }, { "epoch": 1.47, "learning_rate": 2.8408873589596484e-07, "logits/chosen": -2.6718220710754395, "logits/rejected": -2.6635639667510986, "logps/chosen": -187.11207580566406, "logps/rejected": -261.17236328125, "loss": 0.0828, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6262374520301819, "rewards/margins": 5.5064697265625, "rewards/rejected": -6.132707595825195, "step": 2840 }, { "epoch": 1.47, "learning_rate": 2.8313253012048195e-07, "logits/chosen": -2.549769163131714, "logits/rejected": -2.656653881072998, "logps/chosen": -244.2962188720703, "logps/rejected": -360.3494567871094, "loss": 0.1782, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.2172654867172241, "rewards/margins": 5.140130043029785, "rewards/rejected": -6.357396125793457, "step": 2850 }, { "epoch": 1.48, "learning_rate": 2.8217632434499907e-07, "logits/chosen": -2.645021915435791, "logits/rejected": -2.6408421993255615, "logps/chosen": -283.7989807128906, "logps/rejected": -308.847900390625, "loss": 0.1242, "rewards/accuracies": 1.0, "rewards/chosen": -0.1975909173488617, "rewards/margins": 6.074164867401123, "rewards/rejected": -6.271755695343018, "step": 2860 }, { "epoch": 1.48, "learning_rate": 2.812201185695162e-07, "logits/chosen": -2.6722216606140137, "logits/rejected": -2.614084482192993, "logps/chosen": -170.52699279785156, "logps/rejected": -282.1553039550781, "loss": 0.0736, "rewards/accuracies": 1.0, "rewards/chosen": 0.22432120144367218, "rewards/margins": 6.5738677978515625, "rewards/rejected": -6.349545955657959, "step": 2870 }, { "epoch": 1.49, "learning_rate": 2.802639127940333e-07, "logits/chosen": -2.706528425216675, "logits/rejected": -2.6337788105010986, "logps/chosen": -313.04913330078125, "logps/rejected": -283.40972900390625, "loss": 0.0995, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.12990444898605347, "rewards/margins": 6.201463222503662, "rewards/rejected": -6.071558475494385, "step": 2880 }, { "epoch": 1.49, "learning_rate": 2.7930770701855036e-07, "logits/chosen": -2.7476582527160645, "logits/rejected": -2.683351755142212, "logps/chosen": -351.7320251464844, "logps/rejected": -329.08026123046875, "loss": 0.1173, "rewards/accuracies": 1.0, "rewards/chosen": 0.49377602338790894, "rewards/margins": 8.726078987121582, "rewards/rejected": -8.23230266571045, "step": 2890 }, { "epoch": 1.5, "learning_rate": 2.783515012430675e-07, "logits/chosen": -2.5259041786193848, "logits/rejected": -2.5326766967773438, "logps/chosen": -190.27813720703125, "logps/rejected": -241.18991088867188, "loss": 0.1145, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.45352381467819214, "rewards/margins": 5.041954517364502, "rewards/rejected": -5.495478630065918, "step": 2900 }, { "epoch": 1.5, "eval_logits/chosen": -2.4674015045166016, "eval_logits/rejected": -2.42722749710083, "eval_logps/chosen": -272.1744384765625, "eval_logps/rejected": -299.8460998535156, "eval_loss": 0.579024076461792, "eval_rewards/accuracies": 0.84375, "eval_rewards/chosen": -1.7939329147338867, "eval_rewards/margins": 2.4936835765838623, "eval_rewards/rejected": -4.28761625289917, "eval_runtime": 57.5798, "eval_samples_per_second": 17.367, "eval_steps_per_second": 0.278, "step": 2900 }, { "epoch": 1.5, "learning_rate": 2.773952954675846e-07, "logits/chosen": -2.6517531871795654, "logits/rejected": -2.611769914627075, "logps/chosen": -357.39666748046875, "logps/rejected": -310.7156677246094, "loss": 0.086, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 1.0798923969268799, "rewards/margins": 8.055770874023438, "rewards/rejected": -6.9758782386779785, "step": 2910 }, { "epoch": 1.51, "learning_rate": 2.764390896921017e-07, "logits/chosen": -2.603874921798706, "logits/rejected": -2.5526695251464844, "logps/chosen": -272.8443298339844, "logps/rejected": -329.92401123046875, "loss": 0.0943, "rewards/accuracies": 1.0, "rewards/chosen": 1.03761887550354, "rewards/margins": 8.095129013061523, "rewards/rejected": -7.057511329650879, "step": 2920 }, { "epoch": 1.51, "learning_rate": 2.754828839166188e-07, "logits/chosen": -2.6576075553894043, "logits/rejected": -2.6514670848846436, "logps/chosen": -191.14877319335938, "logps/rejected": -301.5423889160156, "loss": 0.0724, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.5387217402458191, "rewards/margins": 7.507475852966309, "rewards/rejected": -6.968753814697266, "step": 2930 }, { "epoch": 1.52, "learning_rate": 2.7452667814113594e-07, "logits/chosen": -2.7524516582489014, "logits/rejected": -2.7706661224365234, "logps/chosen": -318.32501220703125, "logps/rejected": -316.9727783203125, "loss": 0.087, "rewards/accuracies": 1.0, "rewards/chosen": 0.01138849277049303, "rewards/margins": 5.306549072265625, "rewards/rejected": -5.295160293579102, "step": 2940 }, { "epoch": 1.52, "learning_rate": 2.7357047236565306e-07, "logits/chosen": -2.699721097946167, "logits/rejected": -2.7687458992004395, "logps/chosen": -280.2265319824219, "logps/rejected": -364.947509765625, "loss": 0.0659, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.4582802355289459, "rewards/margins": 6.130402088165283, "rewards/rejected": -5.672121047973633, "step": 2950 }, { "epoch": 1.53, "learning_rate": 2.7261426659017017e-07, "logits/chosen": -2.7413382530212402, "logits/rejected": -2.7660346031188965, "logps/chosen": -324.0436706542969, "logps/rejected": -243.81802368164062, "loss": 0.0914, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6909480094909668, "rewards/margins": 4.682136535644531, "rewards/rejected": -5.373085021972656, "step": 2960 }, { "epoch": 1.53, "learning_rate": 2.716580608146873e-07, "logits/chosen": -2.646226644515991, "logits/rejected": -2.712257146835327, "logps/chosen": -374.819580078125, "logps/rejected": -351.70025634765625, "loss": 0.0677, "rewards/accuracies": 1.0, "rewards/chosen": 1.3280370235443115, "rewards/margins": 9.090957641601562, "rewards/rejected": -7.762920379638672, "step": 2970 }, { "epoch": 1.54, "learning_rate": 2.7070185503920446e-07, "logits/chosen": -2.639868974685669, "logits/rejected": -2.7557711601257324, "logps/chosen": -273.087646484375, "logps/rejected": -317.41766357421875, "loss": 0.0616, "rewards/accuracies": 1.0, "rewards/chosen": -0.6701494455337524, "rewards/margins": 7.305191993713379, "rewards/rejected": -7.975341796875, "step": 2980 }, { "epoch": 1.54, "learning_rate": 2.6974564926372157e-07, "logits/chosen": -2.759113311767578, "logits/rejected": -2.7275261878967285, "logps/chosen": -301.2916259765625, "logps/rejected": -324.0910949707031, "loss": 0.0916, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.8304478526115417, "rewards/margins": 7.585775852203369, "rewards/rejected": -6.755328178405762, "step": 2990 }, { "epoch": 1.55, "learning_rate": 2.687894434882387e-07, "logits/chosen": -2.653514862060547, "logits/rejected": -2.606902599334717, "logps/chosen": -277.50506591796875, "logps/rejected": -264.87481689453125, "loss": 0.0644, "rewards/accuracies": 1.0, "rewards/chosen": -0.18126115202903748, "rewards/margins": 4.916709899902344, "rewards/rejected": -5.097971439361572, "step": 3000 }, { "epoch": 1.55, "eval_logits/chosen": -2.457401990890503, "eval_logits/rejected": -2.4193201065063477, "eval_logps/chosen": -271.5200500488281, "eval_logps/rejected": -299.0209045410156, "eval_loss": 0.5735086798667908, "eval_rewards/accuracies": 0.8125, "eval_rewards/chosen": -1.7284938097000122, "eval_rewards/margins": 2.476605176925659, "eval_rewards/rejected": -4.205099105834961, "eval_runtime": 58.4864, "eval_samples_per_second": 17.098, "eval_steps_per_second": 0.274, "step": 3000 }, { "epoch": 1.55, "learning_rate": 2.678332377127558e-07, "logits/chosen": -2.693279981613159, "logits/rejected": -2.6434133052825928, "logps/chosen": -257.072509765625, "logps/rejected": -268.16107177734375, "loss": 0.0794, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.660453736782074, "rewards/margins": 7.455300807952881, "rewards/rejected": -6.7948479652404785, "step": 3010 }, { "epoch": 1.56, "learning_rate": 2.668770319372729e-07, "logits/chosen": -2.476691484451294, "logits/rejected": -2.369554042816162, "logps/chosen": -243.014892578125, "logps/rejected": -179.72573852539062, "loss": 0.0769, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.28263527154922485, "rewards/margins": 4.549951076507568, "rewards/rejected": -4.832587242126465, "step": 3020 }, { "epoch": 1.56, "learning_rate": 2.6592082616179004e-07, "logits/chosen": -2.799598217010498, "logits/rejected": -2.6863772869110107, "logps/chosen": -202.5391082763672, "logps/rejected": -217.33743286132812, "loss": 0.0956, "rewards/accuracies": 1.0, "rewards/chosen": -0.2968635559082031, "rewards/margins": 5.342751979827881, "rewards/rejected": -5.639615535736084, "step": 3030 }, { "epoch": 1.57, "learning_rate": 2.649646203863071e-07, "logits/chosen": -2.78475022315979, "logits/rejected": -2.7314937114715576, "logps/chosen": -341.8492126464844, "logps/rejected": -377.26318359375, "loss": 0.0786, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.6281161308288574, "rewards/margins": 6.323546409606934, "rewards/rejected": -5.695430278778076, "step": 3040 }, { "epoch": 1.57, "learning_rate": 2.640084146108242e-07, "logits/chosen": -2.835313558578491, "logits/rejected": -2.8478219509124756, "logps/chosen": -413.6333923339844, "logps/rejected": -281.8817138671875, "loss": 0.0753, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.3795287013053894, "rewards/margins": 6.382612705230713, "rewards/rejected": -6.003084659576416, "step": 3050 }, { "epoch": 1.58, "learning_rate": 2.6305220883534133e-07, "logits/chosen": -2.7521536350250244, "logits/rejected": -2.802274227142334, "logps/chosen": -283.74365234375, "logps/rejected": -274.3045654296875, "loss": 0.0746, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.19368138909339905, "rewards/margins": 5.350792407989502, "rewards/rejected": -5.157111167907715, "step": 3060 }, { "epoch": 1.58, "learning_rate": 2.6209600305985845e-07, "logits/chosen": -2.6364054679870605, "logits/rejected": -2.6233391761779785, "logps/chosen": -320.7242736816406, "logps/rejected": -418.53863525390625, "loss": 0.1234, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.2693870961666107, "rewards/margins": 10.055575370788574, "rewards/rejected": -9.786188125610352, "step": 3070 }, { "epoch": 1.59, "learning_rate": 2.6113979728437556e-07, "logits/chosen": -2.757228374481201, "logits/rejected": -2.746696949005127, "logps/chosen": -278.10736083984375, "logps/rejected": -269.76751708984375, "loss": 0.1059, "rewards/accuracies": 1.0, "rewards/chosen": 0.18973210453987122, "rewards/margins": 5.467267036437988, "rewards/rejected": -5.2775349617004395, "step": 3080 }, { "epoch": 1.6, "learning_rate": 2.601835915088927e-07, "logits/chosen": -2.6811797618865967, "logits/rejected": -2.694938898086548, "logps/chosen": -268.3731689453125, "logps/rejected": -316.51507568359375, "loss": 0.0925, "rewards/accuracies": 1.0, "rewards/chosen": 1.0646179914474487, "rewards/margins": 7.384450435638428, "rewards/rejected": -6.319832801818848, "step": 3090 }, { "epoch": 1.6, "learning_rate": 2.592273857334098e-07, "logits/chosen": -2.8825931549072266, "logits/rejected": -2.838369607925415, "logps/chosen": -272.46466064453125, "logps/rejected": -302.24951171875, "loss": 0.0798, "rewards/accuracies": 1.0, "rewards/chosen": 1.7096697092056274, "rewards/margins": 8.205864906311035, "rewards/rejected": -6.496194362640381, "step": 3100 }, { "epoch": 1.6, "eval_logits/chosen": -2.569591760635376, "eval_logits/rejected": -2.536669969558716, "eval_logps/chosen": -271.4610290527344, "eval_logps/rejected": -299.8199768066406, "eval_loss": 0.5536529421806335, "eval_rewards/accuracies": 0.84375, "eval_rewards/chosen": -1.7225927114486694, "eval_rewards/margins": 2.5624139308929443, "eval_rewards/rejected": -4.285006523132324, "eval_runtime": 56.0043, "eval_samples_per_second": 17.856, "eval_steps_per_second": 0.286, "step": 3100 }, { "epoch": 1.61, "learning_rate": 2.582711799579269e-07, "logits/chosen": -2.8248748779296875, "logits/rejected": -2.778346300125122, "logps/chosen": -299.2210693359375, "logps/rejected": -313.359375, "loss": 0.071, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.7604249715805054, "rewards/margins": 7.902795314788818, "rewards/rejected": -7.142370700836182, "step": 3110 }, { "epoch": 1.61, "learning_rate": 2.573149741824441e-07, "logits/chosen": -2.6978442668914795, "logits/rejected": -2.6833174228668213, "logps/chosen": -336.5847473144531, "logps/rejected": -245.4396209716797, "loss": 0.0829, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.08581381291151047, "rewards/margins": 5.277584075927734, "rewards/rejected": -5.191770076751709, "step": 3120 }, { "epoch": 1.62, "learning_rate": 2.563587684069612e-07, "logits/chosen": -2.8688364028930664, "logits/rejected": -2.7075347900390625, "logps/chosen": -350.5228576660156, "logps/rejected": -273.2851867675781, "loss": 0.0564, "rewards/accuracies": 1.0, "rewards/chosen": -0.7785223126411438, "rewards/margins": 6.459234714508057, "rewards/rejected": -7.237756252288818, "step": 3130 }, { "epoch": 1.62, "learning_rate": 2.554025626314783e-07, "logits/chosen": -2.727123737335205, "logits/rejected": -2.725803852081299, "logps/chosen": -344.9717712402344, "logps/rejected": -390.8403625488281, "loss": 0.0898, "rewards/accuracies": 1.0, "rewards/chosen": -0.9268060922622681, "rewards/margins": 6.525388240814209, "rewards/rejected": -7.4521942138671875, "step": 3140 }, { "epoch": 1.63, "learning_rate": 2.544463568559954e-07, "logits/chosen": -2.786041259765625, "logits/rejected": -2.7138454914093018, "logps/chosen": -323.46832275390625, "logps/rejected": -323.85125732421875, "loss": 0.0678, "rewards/accuracies": 1.0, "rewards/chosen": 0.7023747563362122, "rewards/margins": 8.29463005065918, "rewards/rejected": -7.592255592346191, "step": 3150 }, { "epoch": 1.63, "learning_rate": 2.5349015108051254e-07, "logits/chosen": -2.623403787612915, "logits/rejected": -2.6345882415771484, "logps/chosen": -251.9879913330078, "logps/rejected": -317.69769287109375, "loss": 0.0833, "rewards/accuracies": 1.0, "rewards/chosen": 0.3217948377132416, "rewards/margins": 7.894297122955322, "rewards/rejected": -7.572502136230469, "step": 3160 }, { "epoch": 1.64, "learning_rate": 2.5253394530502966e-07, "logits/chosen": -2.483633518218994, "logits/rejected": -2.636124610900879, "logps/chosen": -375.053955078125, "logps/rejected": -289.378662109375, "loss": 0.0582, "rewards/accuracies": 1.0, "rewards/chosen": 0.537722647190094, "rewards/margins": 7.012340545654297, "rewards/rejected": -6.474618434906006, "step": 3170 }, { "epoch": 1.64, "learning_rate": 2.5157773952954677e-07, "logits/chosen": -2.746309757232666, "logits/rejected": -2.692573070526123, "logps/chosen": -342.6100158691406, "logps/rejected": -319.15850830078125, "loss": 0.1194, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.26776519417762756, "rewards/margins": 7.538763523101807, "rewards/rejected": -7.270998477935791, "step": 3180 }, { "epoch": 1.65, "learning_rate": 2.506215337540639e-07, "logits/chosen": -2.842471122741699, "logits/rejected": -2.7346935272216797, "logps/chosen": -357.83837890625, "logps/rejected": -415.0469665527344, "loss": 0.1034, "rewards/accuracies": 1.0, "rewards/chosen": 0.20541605353355408, "rewards/margins": 6.793099880218506, "rewards/rejected": -6.587684631347656, "step": 3190 }, { "epoch": 1.65, "learning_rate": 2.4966532797858095e-07, "logits/chosen": -2.6495633125305176, "logits/rejected": -2.666757106781006, "logps/chosen": -266.09454345703125, "logps/rejected": -267.7814636230469, "loss": 0.1013, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.0470559298992157, "rewards/margins": 6.690218925476074, "rewards/rejected": -6.73727560043335, "step": 3200 }, { "epoch": 1.65, "eval_logits/chosen": -2.526689291000366, "eval_logits/rejected": -2.492635726928711, "eval_logps/chosen": -269.9497985839844, "eval_logps/rejected": -296.7825012207031, "eval_loss": 0.5574991703033447, "eval_rewards/accuracies": 0.875, "eval_rewards/chosen": -1.5714715719223022, "eval_rewards/margins": 2.4097867012023926, "eval_rewards/rejected": -3.9812583923339844, "eval_runtime": 57.7657, "eval_samples_per_second": 17.311, "eval_steps_per_second": 0.277, "step": 3200 }, { "epoch": 1.66, "learning_rate": 2.4870912220309807e-07, "logits/chosen": -2.625276565551758, "logits/rejected": -2.73038649559021, "logps/chosen": -289.6630554199219, "logps/rejected": -282.751953125, "loss": 0.0784, "rewards/accuracies": 1.0, "rewards/chosen": 0.39164501428604126, "rewards/margins": 5.46439266204834, "rewards/rejected": -5.072747707366943, "step": 3210 }, { "epoch": 1.66, "learning_rate": 2.477529164276152e-07, "logits/chosen": -2.7541117668151855, "logits/rejected": -2.75673508644104, "logps/chosen": -309.00799560546875, "logps/rejected": -350.14556884765625, "loss": 0.0784, "rewards/accuracies": 1.0, "rewards/chosen": 1.0380117893218994, "rewards/margins": 7.454461574554443, "rewards/rejected": -6.416450500488281, "step": 3220 }, { "epoch": 1.67, "learning_rate": 2.4679671065213235e-07, "logits/chosen": -2.7203588485717773, "logits/rejected": -2.704502582550049, "logps/chosen": -347.71453857421875, "logps/rejected": -295.966552734375, "loss": 0.1137, "rewards/accuracies": 1.0, "rewards/chosen": 0.29694637656211853, "rewards/margins": 5.83956241607666, "rewards/rejected": -5.5426154136657715, "step": 3230 }, { "epoch": 1.67, "learning_rate": 2.4584050487664947e-07, "logits/chosen": -2.7662394046783447, "logits/rejected": -2.7026658058166504, "logps/chosen": -346.2272644042969, "logps/rejected": -320.8843078613281, "loss": 0.0958, "rewards/accuracies": 1.0, "rewards/chosen": 0.34752047061920166, "rewards/margins": 6.67000675201416, "rewards/rejected": -6.32248592376709, "step": 3240 }, { "epoch": 1.68, "learning_rate": 2.448842991011666e-07, "logits/chosen": -2.6715903282165527, "logits/rejected": -2.603444814682007, "logps/chosen": -275.488037109375, "logps/rejected": -369.26861572265625, "loss": 0.1227, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.22678561508655548, "rewards/margins": 7.3243255615234375, "rewards/rejected": -7.09753942489624, "step": 3250 }, { "epoch": 1.68, "learning_rate": 2.439280933256837e-07, "logits/chosen": -2.5521583557128906, "logits/rejected": -2.575525999069214, "logps/chosen": -234.0755157470703, "logps/rejected": -253.0180206298828, "loss": 0.0473, "rewards/accuracies": 1.0, "rewards/chosen": 0.34158411622047424, "rewards/margins": 7.31561803817749, "rewards/rejected": -6.974034309387207, "step": 3260 }, { "epoch": 1.69, "learning_rate": 2.429718875502008e-07, "logits/chosen": -2.246537923812866, "logits/rejected": -2.325873613357544, "logps/chosen": -278.77386474609375, "logps/rejected": -295.7586975097656, "loss": 0.0705, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.1356315165758133, "rewards/margins": 5.905457496643066, "rewards/rejected": -5.7698259353637695, "step": 3270 }, { "epoch": 1.69, "learning_rate": 2.420156817747179e-07, "logits/chosen": -2.7891759872436523, "logits/rejected": -2.7579565048217773, "logps/chosen": -356.2643127441406, "logps/rejected": -370.2890319824219, "loss": 0.0637, "rewards/accuracies": 1.0, "rewards/chosen": 1.439815878868103, "rewards/margins": 8.942848205566406, "rewards/rejected": -7.503033638000488, "step": 3280 }, { "epoch": 1.7, "learning_rate": 2.41059475999235e-07, "logits/chosen": -2.726214647293091, "logits/rejected": -2.5874438285827637, "logps/chosen": -226.1343231201172, "logps/rejected": -244.17489624023438, "loss": 0.0722, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.5386222004890442, "rewards/margins": 6.369637489318848, "rewards/rejected": -6.908260345458984, "step": 3290 }, { "epoch": 1.7, "learning_rate": 2.4010327022375216e-07, "logits/chosen": -2.660001516342163, "logits/rejected": -2.6236727237701416, "logps/chosen": -350.3585510253906, "logps/rejected": -423.14605712890625, "loss": 0.1254, "rewards/accuracies": 1.0, "rewards/chosen": 0.3113314211368561, "rewards/margins": 8.888386726379395, "rewards/rejected": -8.577055931091309, "step": 3300 }, { "epoch": 1.7, "eval_logits/chosen": -2.533999443054199, "eval_logits/rejected": -2.5016584396362305, "eval_logps/chosen": -270.6473083496094, "eval_logps/rejected": -301.6729736328125, "eval_loss": 0.5904735326766968, "eval_rewards/accuracies": 0.859375, "eval_rewards/chosen": -1.6412229537963867, "eval_rewards/margins": 2.8290822505950928, "eval_rewards/rejected": -4.4703049659729, "eval_runtime": 56.7796, "eval_samples_per_second": 17.612, "eval_steps_per_second": 0.282, "step": 3300 }, { "epoch": 1.71, "learning_rate": 2.391470644482693e-07, "logits/chosen": -2.5988898277282715, "logits/rejected": -2.633589267730713, "logps/chosen": -299.37860107421875, "logps/rejected": -375.39788818359375, "loss": 0.0757, "rewards/accuracies": 1.0, "rewards/chosen": 0.23076781630516052, "rewards/margins": 7.872524261474609, "rewards/rejected": -7.641757011413574, "step": 3310 }, { "epoch": 1.71, "learning_rate": 2.3819085867278636e-07, "logits/chosen": -2.6122288703918457, "logits/rejected": -2.6097447872161865, "logps/chosen": -184.86968994140625, "logps/rejected": -299.4801330566406, "loss": 0.0826, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.3567231297492981, "rewards/margins": 8.226961135864258, "rewards/rejected": -8.583684921264648, "step": 3320 }, { "epoch": 1.72, "learning_rate": 2.3723465289730348e-07, "logits/chosen": -2.629166603088379, "logits/rejected": -2.598412036895752, "logps/chosen": -287.3348693847656, "logps/rejected": -273.7483825683594, "loss": 0.1158, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.2611514925956726, "rewards/margins": 5.725651264190674, "rewards/rejected": -5.986802577972412, "step": 3330 }, { "epoch": 1.72, "learning_rate": 2.362784471218206e-07, "logits/chosen": -2.73244571685791, "logits/rejected": -2.7296879291534424, "logps/chosen": -355.73236083984375, "logps/rejected": -323.4547424316406, "loss": 0.0704, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 1.1553064584732056, "rewards/margins": 6.715522766113281, "rewards/rejected": -5.560215950012207, "step": 3340 }, { "epoch": 1.73, "learning_rate": 2.353222413463377e-07, "logits/chosen": -2.7055535316467285, "logits/rejected": -2.659834146499634, "logps/chosen": -234.71792602539062, "logps/rejected": -258.9352722167969, "loss": 0.0594, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.04846489429473877, "rewards/margins": 5.886017799377441, "rewards/rejected": -5.934482574462891, "step": 3350 }, { "epoch": 1.73, "learning_rate": 2.3436603557085483e-07, "logits/chosen": -2.7515339851379395, "logits/rejected": -2.647671937942505, "logps/chosen": -205.7976837158203, "logps/rejected": -278.0999450683594, "loss": 0.0886, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.5387641787528992, "rewards/margins": 6.382545471191406, "rewards/rejected": -5.8437819480896, "step": 3360 }, { "epoch": 1.74, "learning_rate": 2.3340982979537197e-07, "logits/chosen": -2.727328300476074, "logits/rejected": -2.7680537700653076, "logps/chosen": -395.30169677734375, "logps/rejected": -329.90234375, "loss": 0.0734, "rewards/accuracies": 1.0, "rewards/chosen": 0.9938201904296875, "rewards/margins": 6.250397682189941, "rewards/rejected": -5.256577491760254, "step": 3370 }, { "epoch": 1.74, "learning_rate": 2.3245362401988909e-07, "logits/chosen": -2.7096972465515137, "logits/rejected": -2.605597734451294, "logps/chosen": -308.7266845703125, "logps/rejected": -269.8343811035156, "loss": 0.0906, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 1.0761408805847168, "rewards/margins": 7.6199774742126465, "rewards/rejected": -6.543837547302246, "step": 3380 }, { "epoch": 1.75, "learning_rate": 2.314974182444062e-07, "logits/chosen": -2.7419021129608154, "logits/rejected": -2.797194004058838, "logps/chosen": -308.60302734375, "logps/rejected": -293.39581298828125, "loss": 0.1425, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.2203086614608765, "rewards/margins": 5.09138822555542, "rewards/rejected": -6.311697483062744, "step": 3390 }, { "epoch": 1.76, "learning_rate": 2.305412124689233e-07, "logits/chosen": -2.521355628967285, "logits/rejected": -2.5652852058410645, "logps/chosen": -271.5760192871094, "logps/rejected": -332.80743408203125, "loss": 0.085, "rewards/accuracies": 1.0, "rewards/chosen": 0.12037495523691177, "rewards/margins": 6.457161903381348, "rewards/rejected": -6.33678674697876, "step": 3400 }, { "epoch": 1.76, "eval_logits/chosen": -2.495957612991333, "eval_logits/rejected": -2.461439609527588, "eval_logps/chosen": -273.39410400390625, "eval_logps/rejected": -303.7296142578125, "eval_loss": 0.6132888793945312, "eval_rewards/accuracies": 0.84375, "eval_rewards/chosen": -1.9158999919891357, "eval_rewards/margins": 2.760065793991089, "eval_rewards/rejected": -4.675965309143066, "eval_runtime": 57.5942, "eval_samples_per_second": 17.363, "eval_steps_per_second": 0.278, "step": 3400 }, { "epoch": 1.76, "learning_rate": 2.295850066934404e-07, "logits/chosen": -2.6166062355041504, "logits/rejected": -2.540011167526245, "logps/chosen": -279.5812683105469, "logps/rejected": -327.165283203125, "loss": 0.0789, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.3240896463394165, "rewards/margins": 6.653228759765625, "rewards/rejected": -7.977317810058594, "step": 3410 }, { "epoch": 1.77, "learning_rate": 2.2862880091795752e-07, "logits/chosen": -2.5269017219543457, "logits/rejected": -2.516174077987671, "logps/chosen": -194.35435485839844, "logps/rejected": -307.7319030761719, "loss": 0.0863, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.5239468812942505, "rewards/margins": 5.74463415145874, "rewards/rejected": -6.268580436706543, "step": 3420 }, { "epoch": 1.77, "learning_rate": 2.2767259514247464e-07, "logits/chosen": -2.3292429447174072, "logits/rejected": -2.2449238300323486, "logps/chosen": -235.97329711914062, "logps/rejected": -297.90130615234375, "loss": 0.0867, "rewards/accuracies": 1.0, "rewards/chosen": 0.7022100687026978, "rewards/margins": 6.978515625, "rewards/rejected": -6.27630615234375, "step": 3430 }, { "epoch": 1.78, "learning_rate": 2.2671638936699178e-07, "logits/chosen": -2.413973093032837, "logits/rejected": -2.389719247817993, "logps/chosen": -315.0927734375, "logps/rejected": -248.7796630859375, "loss": 0.0776, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.12068784236907959, "rewards/margins": 5.998594760894775, "rewards/rejected": -5.877906799316406, "step": 3440 }, { "epoch": 1.78, "learning_rate": 2.257601835915089e-07, "logits/chosen": -2.582331895828247, "logits/rejected": -2.504185199737549, "logps/chosen": -333.48358154296875, "logps/rejected": -384.9881286621094, "loss": 0.0597, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.09340760856866837, "rewards/margins": 7.274853706359863, "rewards/rejected": -7.181446075439453, "step": 3450 }, { "epoch": 1.79, "learning_rate": 2.24803977816026e-07, "logits/chosen": -2.3808603286743164, "logits/rejected": -2.5695574283599854, "logps/chosen": -218.94461059570312, "logps/rejected": -255.54013061523438, "loss": 0.1264, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.38311663269996643, "rewards/margins": 8.536532402038574, "rewards/rejected": -8.153416633605957, "step": 3460 }, { "epoch": 1.79, "learning_rate": 2.2384777204054313e-07, "logits/chosen": -2.7166881561279297, "logits/rejected": -2.651099920272827, "logps/chosen": -266.19390869140625, "logps/rejected": -290.5924377441406, "loss": 0.0907, "rewards/accuracies": 1.0, "rewards/chosen": 0.5935767292976379, "rewards/margins": 7.180167198181152, "rewards/rejected": -6.586589813232422, "step": 3470 }, { "epoch": 1.8, "learning_rate": 2.2289156626506022e-07, "logits/chosen": -2.6081440448760986, "logits/rejected": -2.435035228729248, "logps/chosen": -307.9920959472656, "logps/rejected": -380.0340270996094, "loss": 0.081, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.4312973916530609, "rewards/margins": 8.036073684692383, "rewards/rejected": -7.604775428771973, "step": 3480 }, { "epoch": 1.8, "learning_rate": 2.2193536048957733e-07, "logits/chosen": -2.6976230144500732, "logits/rejected": -2.664168119430542, "logps/chosen": -258.38092041015625, "logps/rejected": -316.06903076171875, "loss": 0.1032, "rewards/accuracies": 1.0, "rewards/chosen": 0.064023457467556, "rewards/margins": 6.762887477874756, "rewards/rejected": -6.698863983154297, "step": 3490 }, { "epoch": 1.81, "learning_rate": 2.2097915471409445e-07, "logits/chosen": -2.7331182956695557, "logits/rejected": -2.7065072059631348, "logps/chosen": -242.06661987304688, "logps/rejected": -319.9682312011719, "loss": 0.065, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.17091651260852814, "rewards/margins": 7.219882011413574, "rewards/rejected": -7.0489654541015625, "step": 3500 }, { "epoch": 1.81, "eval_logits/chosen": -2.5004467964172363, "eval_logits/rejected": -2.4597153663635254, "eval_logps/chosen": -272.472412109375, "eval_logps/rejected": -300.49505615234375, "eval_loss": 0.607377290725708, "eval_rewards/accuracies": 0.859375, "eval_rewards/chosen": -1.8237330913543701, "eval_rewards/margins": 2.528778553009033, "eval_rewards/rejected": -4.352511405944824, "eval_runtime": 58.0784, "eval_samples_per_second": 17.218, "eval_steps_per_second": 0.275, "step": 3500 }, { "epoch": 1.81, "learning_rate": 2.200229489386116e-07, "logits/chosen": -2.7414891719818115, "logits/rejected": -2.6085870265960693, "logps/chosen": -311.56866455078125, "logps/rejected": -367.4567565917969, "loss": 0.0892, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.9936217069625854, "rewards/margins": 6.746335029602051, "rewards/rejected": -5.752713680267334, "step": 3510 }, { "epoch": 1.82, "learning_rate": 2.190667431631287e-07, "logits/chosen": -2.767604112625122, "logits/rejected": -2.6218278408050537, "logps/chosen": -354.67822265625, "logps/rejected": -397.64068603515625, "loss": 0.1602, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 1.2055106163024902, "rewards/margins": 9.150163650512695, "rewards/rejected": -7.944652557373047, "step": 3520 }, { "epoch": 1.82, "learning_rate": 2.1811053738764582e-07, "logits/chosen": -2.4568405151367188, "logits/rejected": -2.4575486183166504, "logps/chosen": -235.7547149658203, "logps/rejected": -265.8733215332031, "loss": 0.0686, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.24134965240955353, "rewards/margins": 4.935102939605713, "rewards/rejected": -5.17645263671875, "step": 3530 }, { "epoch": 1.83, "learning_rate": 2.1715433161216294e-07, "logits/chosen": -2.7147650718688965, "logits/rejected": -2.6502747535705566, "logps/chosen": -291.44219970703125, "logps/rejected": -360.6312255859375, "loss": 0.0713, "rewards/accuracies": 1.0, "rewards/chosen": 1.0583919286727905, "rewards/margins": 7.564295768737793, "rewards/rejected": -6.5059027671813965, "step": 3540 }, { "epoch": 1.83, "learning_rate": 2.1619812583668005e-07, "logits/chosen": -2.5987043380737305, "logits/rejected": -2.607950448989868, "logps/chosen": -299.592529296875, "logps/rejected": -337.1802978515625, "loss": 0.0877, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.13366484642028809, "rewards/margins": 6.168055534362793, "rewards/rejected": -6.034390926361084, "step": 3550 }, { "epoch": 1.84, "learning_rate": 2.1524192006119714e-07, "logits/chosen": -2.549741744995117, "logits/rejected": -2.519808292388916, "logps/chosen": -290.70684814453125, "logps/rejected": -333.82489013671875, "loss": 0.0653, "rewards/accuracies": 1.0, "rewards/chosen": -0.6356315612792969, "rewards/margins": 6.749837398529053, "rewards/rejected": -7.38546895980835, "step": 3560 }, { "epoch": 1.84, "learning_rate": 2.1428571428571426e-07, "logits/chosen": -2.5867228507995605, "logits/rejected": -2.5592923164367676, "logps/chosen": -248.76699829101562, "logps/rejected": -269.9541015625, "loss": 0.0585, "rewards/accuracies": 1.0, "rewards/chosen": -0.503594696521759, "rewards/margins": 5.626603126525879, "rewards/rejected": -6.130197525024414, "step": 3570 }, { "epoch": 1.85, "learning_rate": 2.133295085102314e-07, "logits/chosen": -2.4805967807769775, "logits/rejected": -2.5831592082977295, "logps/chosen": -288.694580078125, "logps/rejected": -289.60638427734375, "loss": 0.0924, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.20494358241558075, "rewards/margins": 6.796807289123535, "rewards/rejected": -7.0017499923706055, "step": 3580 }, { "epoch": 1.85, "learning_rate": 2.1237330273474851e-07, "logits/chosen": -2.6966331005096436, "logits/rejected": -2.650146245956421, "logps/chosen": -374.99774169921875, "logps/rejected": -346.72711181640625, "loss": 0.1106, "rewards/accuracies": 1.0, "rewards/chosen": 0.8571535348892212, "rewards/margins": 7.465939521789551, "rewards/rejected": -6.608786106109619, "step": 3590 }, { "epoch": 1.86, "learning_rate": 2.1141709695926563e-07, "logits/chosen": -2.73488450050354, "logits/rejected": -2.7135844230651855, "logps/chosen": -230.2847137451172, "logps/rejected": -190.71505737304688, "loss": 0.0755, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.4617141783237457, "rewards/margins": 5.763091087341309, "rewards/rejected": -6.224804878234863, "step": 3600 }, { "epoch": 1.86, "eval_logits/chosen": -2.4716105461120605, "eval_logits/rejected": -2.4327313899993896, "eval_logps/chosen": -273.4872131347656, "eval_logps/rejected": -300.97479248046875, "eval_loss": 0.5835925340652466, "eval_rewards/accuracies": 0.8125, "eval_rewards/chosen": -1.9252128601074219, "eval_rewards/margins": 2.4752719402313232, "eval_rewards/rejected": -4.400485038757324, "eval_runtime": 60.2598, "eval_samples_per_second": 16.595, "eval_steps_per_second": 0.266, "step": 3600 }, { "epoch": 1.86, "learning_rate": 2.1046089118378275e-07, "logits/chosen": -2.7524561882019043, "logits/rejected": -2.7061877250671387, "logps/chosen": -265.36962890625, "logps/rejected": -293.2806396484375, "loss": 0.1317, "rewards/accuracies": 1.0, "rewards/chosen": 0.8292659521102905, "rewards/margins": 7.344795227050781, "rewards/rejected": -6.515528678894043, "step": 3610 }, { "epoch": 1.87, "learning_rate": 2.0950468540829986e-07, "logits/chosen": -2.6626524925231934, "logits/rejected": -2.640347719192505, "logps/chosen": -207.5610809326172, "logps/rejected": -238.7421417236328, "loss": 0.097, "rewards/accuracies": 1.0, "rewards/chosen": -0.3047277331352234, "rewards/margins": 6.0444464683532715, "rewards/rejected": -6.349174499511719, "step": 3620 }, { "epoch": 1.87, "learning_rate": 2.0854847963281698e-07, "logits/chosen": -2.450810194015503, "logits/rejected": -2.3714869022369385, "logps/chosen": -290.0536804199219, "logps/rejected": -285.4010009765625, "loss": 0.076, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6087032556533813, "rewards/margins": 5.092909336090088, "rewards/rejected": -5.70161247253418, "step": 3630 }, { "epoch": 1.88, "learning_rate": 2.0759227385733407e-07, "logits/chosen": -2.564415216445923, "logits/rejected": -2.6595184803009033, "logps/chosen": -372.54949951171875, "logps/rejected": -315.68438720703125, "loss": 0.1015, "rewards/accuracies": 1.0, "rewards/chosen": -0.47370368242263794, "rewards/margins": 6.991959571838379, "rewards/rejected": -7.465662479400635, "step": 3640 }, { "epoch": 1.88, "learning_rate": 2.066360680818512e-07, "logits/chosen": -2.651179552078247, "logits/rejected": -2.6251769065856934, "logps/chosen": -375.2741394042969, "logps/rejected": -317.2344055175781, "loss": 0.1108, "rewards/accuracies": 1.0, "rewards/chosen": 0.11416218429803848, "rewards/margins": 7.2585554122924805, "rewards/rejected": -7.144394874572754, "step": 3650 }, { "epoch": 1.89, "learning_rate": 2.0567986230636832e-07, "logits/chosen": -2.5170671939849854, "logits/rejected": -2.639958620071411, "logps/chosen": -219.71676635742188, "logps/rejected": -264.04632568359375, "loss": 0.0927, "rewards/accuracies": 1.0, "rewards/chosen": -0.3074165880680084, "rewards/margins": 5.667797565460205, "rewards/rejected": -5.975214004516602, "step": 3660 }, { "epoch": 1.89, "learning_rate": 2.0472365653088544e-07, "logits/chosen": -2.646237850189209, "logits/rejected": -2.712930679321289, "logps/chosen": -297.159423828125, "logps/rejected": -336.8759765625, "loss": 0.0811, "rewards/accuracies": 1.0, "rewards/chosen": -0.833897590637207, "rewards/margins": 6.053628444671631, "rewards/rejected": -6.887526035308838, "step": 3670 }, { "epoch": 1.9, "learning_rate": 2.0376745075540256e-07, "logits/chosen": -2.8176498413085938, "logits/rejected": -2.798159122467041, "logps/chosen": -279.7525634765625, "logps/rejected": -284.43316650390625, "loss": 0.0621, "rewards/accuracies": 1.0, "rewards/chosen": -0.006855732295662165, "rewards/margins": 6.67547082901001, "rewards/rejected": -6.682325839996338, "step": 3680 }, { "epoch": 1.91, "learning_rate": 2.0281124497991967e-07, "logits/chosen": -2.7028536796569824, "logits/rejected": -2.6612937450408936, "logps/chosen": -252.33505249023438, "logps/rejected": -403.2816467285156, "loss": 0.0998, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.05786427855491638, "rewards/margins": 7.661648750305176, "rewards/rejected": -7.603785037994385, "step": 3690 }, { "epoch": 1.91, "learning_rate": 2.018550392044368e-07, "logits/chosen": -2.7595086097717285, "logits/rejected": -2.681696653366089, "logps/chosen": -295.5634460449219, "logps/rejected": -494.0884704589844, "loss": 0.0746, "rewards/accuracies": 1.0, "rewards/chosen": 0.988511860370636, "rewards/margins": 10.416128158569336, "rewards/rejected": -9.427616119384766, "step": 3700 }, { "epoch": 1.91, "eval_logits/chosen": -2.5114712715148926, "eval_logits/rejected": -2.468604564666748, "eval_logps/chosen": -273.5149230957031, "eval_logps/rejected": -301.87615966796875, "eval_loss": 0.5788707137107849, "eval_rewards/accuracies": 0.8125, "eval_rewards/chosen": -1.9279824495315552, "eval_rewards/margins": 2.562638521194458, "eval_rewards/rejected": -4.4906206130981445, "eval_runtime": 56.2772, "eval_samples_per_second": 17.769, "eval_steps_per_second": 0.284, "step": 3700 }, { "epoch": 1.92, "learning_rate": 2.0089883342895388e-07, "logits/chosen": -2.7059268951416016, "logits/rejected": -2.753756523132324, "logps/chosen": -202.04066467285156, "logps/rejected": -245.59237670898438, "loss": 0.075, "rewards/accuracies": 1.0, "rewards/chosen": -0.1704142987728119, "rewards/margins": 6.434650421142578, "rewards/rejected": -6.605063438415527, "step": 3710 }, { "epoch": 1.92, "learning_rate": 1.9994262765347102e-07, "logits/chosen": -2.642674207687378, "logits/rejected": -2.5932514667510986, "logps/chosen": -399.32305908203125, "logps/rejected": -326.49798583984375, "loss": 0.134, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.060870956629514694, "rewards/margins": 6.860370635986328, "rewards/rejected": -6.921241760253906, "step": 3720 }, { "epoch": 1.93, "learning_rate": 1.9898642187798813e-07, "logits/chosen": -2.6123080253601074, "logits/rejected": -2.7516627311706543, "logps/chosen": -410.9776306152344, "logps/rejected": -326.8647155761719, "loss": 0.0628, "rewards/accuracies": 1.0, "rewards/chosen": -0.07760889828205109, "rewards/margins": 6.036097049713135, "rewards/rejected": -6.113706111907959, "step": 3730 }, { "epoch": 1.93, "learning_rate": 1.9803021610250525e-07, "logits/chosen": -2.641099452972412, "logits/rejected": -2.711040735244751, "logps/chosen": -216.26535034179688, "logps/rejected": -274.23516845703125, "loss": 0.0742, "rewards/accuracies": 1.0, "rewards/chosen": 0.5522519946098328, "rewards/margins": 6.619080543518066, "rewards/rejected": -6.06682825088501, "step": 3740 }, { "epoch": 1.94, "learning_rate": 1.9707401032702237e-07, "logits/chosen": -2.6930148601531982, "logits/rejected": -2.691132068634033, "logps/chosen": -269.2910461425781, "logps/rejected": -311.1435241699219, "loss": 0.0593, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.5175756216049194, "rewards/margins": 6.387923240661621, "rewards/rejected": -5.870347023010254, "step": 3750 }, { "epoch": 1.94, "learning_rate": 1.9611780455153948e-07, "logits/chosen": -2.7549490928649902, "logits/rejected": -2.7406229972839355, "logps/chosen": -292.29833984375, "logps/rejected": -254.7724609375, "loss": 0.1138, "rewards/accuracies": 1.0, "rewards/chosen": 0.20989219844341278, "rewards/margins": 5.599099159240723, "rewards/rejected": -5.389206886291504, "step": 3760 }, { "epoch": 1.95, "learning_rate": 1.951615987760566e-07, "logits/chosen": -2.5066380500793457, "logits/rejected": -2.4894328117370605, "logps/chosen": -221.491455078125, "logps/rejected": -262.5354309082031, "loss": 0.072, "rewards/accuracies": 1.0, "rewards/chosen": -0.48722711205482483, "rewards/margins": 4.931153297424316, "rewards/rejected": -5.418381214141846, "step": 3770 }, { "epoch": 1.95, "learning_rate": 1.942053930005737e-07, "logits/chosen": -2.8080992698669434, "logits/rejected": -2.69472074508667, "logps/chosen": -234.15390014648438, "logps/rejected": -300.17291259765625, "loss": 0.0526, "rewards/accuracies": 1.0, "rewards/chosen": -0.7450860142707825, "rewards/margins": 7.135354518890381, "rewards/rejected": -7.880439758300781, "step": 3780 }, { "epoch": 1.96, "learning_rate": 1.9324918722509086e-07, "logits/chosen": -2.763511896133423, "logits/rejected": -2.758317708969116, "logps/chosen": -267.06695556640625, "logps/rejected": -251.7860107421875, "loss": 0.0843, "rewards/accuracies": 1.0, "rewards/chosen": -0.5050710439682007, "rewards/margins": 6.807704925537109, "rewards/rejected": -7.3127760887146, "step": 3790 }, { "epoch": 1.96, "learning_rate": 1.9229298144960794e-07, "logits/chosen": -2.6397032737731934, "logits/rejected": -2.6277005672454834, "logps/chosen": -230.0516357421875, "logps/rejected": -253.93594360351562, "loss": 0.1348, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.08455387502908707, "rewards/margins": 5.982255458831787, "rewards/rejected": -6.06680965423584, "step": 3800 }, { "epoch": 1.96, "eval_logits/chosen": -2.5393259525299072, "eval_logits/rejected": -2.494310140609741, "eval_logps/chosen": -272.8935546875, "eval_logps/rejected": -299.39764404296875, "eval_loss": 0.6015481352806091, "eval_rewards/accuracies": 0.828125, "eval_rewards/chosen": -1.865846872329712, "eval_rewards/margins": 2.376923084259033, "eval_rewards/rejected": -4.242770195007324, "eval_runtime": 57.6051, "eval_samples_per_second": 17.36, "eval_steps_per_second": 0.278, "step": 3800 }, { "epoch": 1.97, "learning_rate": 1.9133677567412506e-07, "logits/chosen": -2.790476083755493, "logits/rejected": -2.786289691925049, "logps/chosen": -319.78619384765625, "logps/rejected": -282.0672607421875, "loss": 0.0818, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.11253446340560913, "rewards/margins": 6.30682897567749, "rewards/rejected": -6.419363498687744, "step": 3810 }, { "epoch": 1.97, "learning_rate": 1.9038056989864218e-07, "logits/chosen": -2.7111282348632812, "logits/rejected": -2.765439033508301, "logps/chosen": -256.03546142578125, "logps/rejected": -314.55523681640625, "loss": 0.1137, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.10970799624919891, "rewards/margins": 6.709108829498291, "rewards/rejected": -6.818817138671875, "step": 3820 }, { "epoch": 1.98, "learning_rate": 1.894243641231593e-07, "logits/chosen": -2.5691027641296387, "logits/rejected": -2.4961977005004883, "logps/chosen": -272.5830993652344, "logps/rejected": -250.82357788085938, "loss": 0.103, "rewards/accuracies": 1.0, "rewards/chosen": -1.4049804210662842, "rewards/margins": 5.012188911437988, "rewards/rejected": -6.417168617248535, "step": 3830 }, { "epoch": 1.98, "learning_rate": 1.884681583476764e-07, "logits/chosen": -2.8165037631988525, "logits/rejected": -2.76141357421875, "logps/chosen": -229.1115264892578, "logps/rejected": -300.12347412109375, "loss": 0.0621, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.30909544229507446, "rewards/margins": 5.961316108703613, "rewards/rejected": -6.270411968231201, "step": 3840 }, { "epoch": 1.99, "learning_rate": 1.8751195257219352e-07, "logits/chosen": -2.683171272277832, "logits/rejected": -2.74794602394104, "logps/chosen": -281.92901611328125, "logps/rejected": -404.372314453125, "loss": 0.0803, "rewards/accuracies": 1.0, "rewards/chosen": -0.3100479245185852, "rewards/margins": 7.182066440582275, "rewards/rejected": -7.492114067077637, "step": 3850 }, { "epoch": 1.99, "learning_rate": 1.8655574679671067e-07, "logits/chosen": -2.660099506378174, "logits/rejected": -2.69828724861145, "logps/chosen": -241.91787719726562, "logps/rejected": -317.3074951171875, "loss": 0.0951, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.469612717628479, "rewards/margins": 7.08514404296875, "rewards/rejected": -8.554756164550781, "step": 3860 }, { "epoch": 2.0, "learning_rate": 1.8559954102122778e-07, "logits/chosen": -2.550110340118408, "logits/rejected": -2.5635857582092285, "logps/chosen": -280.5724792480469, "logps/rejected": -239.25119018554688, "loss": 0.1036, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.0125322341918945, "rewards/margins": 4.381348609924316, "rewards/rejected": -5.393881320953369, "step": 3870 }, { "epoch": 2.0, "learning_rate": 1.8464333524574487e-07, "logits/chosen": -2.7105534076690674, "logits/rejected": -2.670560598373413, "logps/chosen": -213.32907104492188, "logps/rejected": -330.0856628417969, "loss": 0.0344, "rewards/accuracies": 1.0, "rewards/chosen": -0.48191890120506287, "rewards/margins": 7.262728691101074, "rewards/rejected": -7.744647026062012, "step": 3880 }, { "epoch": 2.01, "learning_rate": 1.8368712947026199e-07, "logits/chosen": -2.8019955158233643, "logits/rejected": -2.7659356594085693, "logps/chosen": -305.8590393066406, "logps/rejected": -306.02325439453125, "loss": 0.022, "rewards/accuracies": 1.0, "rewards/chosen": 0.42963480949401855, "rewards/margins": 6.8882341384887695, "rewards/rejected": -6.458600044250488, "step": 3890 }, { "epoch": 2.01, "learning_rate": 1.827309236947791e-07, "logits/chosen": -2.6406970024108887, "logits/rejected": -2.6530818939208984, "logps/chosen": -155.24813842773438, "logps/rejected": -301.69390869140625, "loss": 0.0217, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.34457913041114807, "rewards/margins": 6.758476257324219, "rewards/rejected": -7.103055000305176, "step": 3900 }, { "epoch": 2.01, "eval_logits/chosen": -2.5271873474121094, "eval_logits/rejected": -2.4840664863586426, "eval_logps/chosen": -277.5699157714844, "eval_logps/rejected": -306.1987609863281, "eval_loss": 0.612151563167572, "eval_rewards/accuracies": 0.828125, "eval_rewards/chosen": -2.3334813117980957, "eval_rewards/margins": 2.589404582977295, "eval_rewards/rejected": -4.922885894775391, "eval_runtime": 54.5082, "eval_samples_per_second": 18.346, "eval_steps_per_second": 0.294, "step": 3900 }, { "epoch": 2.02, "learning_rate": 1.8177471791929622e-07, "logits/chosen": -2.416943073272705, "logits/rejected": -2.4777729511260986, "logps/chosen": -234.59054565429688, "logps/rejected": -378.48101806640625, "loss": 0.0294, "rewards/accuracies": 1.0, "rewards/chosen": -0.7428705096244812, "rewards/margins": 10.251193046569824, "rewards/rejected": -10.994062423706055, "step": 3910 }, { "epoch": 2.02, "learning_rate": 1.8081851214381333e-07, "logits/chosen": -2.6043264865875244, "logits/rejected": -2.5203278064727783, "logps/chosen": -263.97882080078125, "logps/rejected": -393.0724182128906, "loss": 0.0121, "rewards/accuracies": 1.0, "rewards/chosen": 0.05669177696108818, "rewards/margins": 8.67068099975586, "rewards/rejected": -8.613988876342773, "step": 3920 }, { "epoch": 2.03, "learning_rate": 1.7986230636833047e-07, "logits/chosen": -2.6340689659118652, "logits/rejected": -2.6645379066467285, "logps/chosen": -179.75973510742188, "logps/rejected": -246.31448364257812, "loss": 0.0189, "rewards/accuracies": 1.0, "rewards/chosen": -1.3356815576553345, "rewards/margins": 6.356654167175293, "rewards/rejected": -7.692336082458496, "step": 3930 }, { "epoch": 2.03, "learning_rate": 1.789061005928476e-07, "logits/chosen": -2.812453269958496, "logits/rejected": -2.752922534942627, "logps/chosen": -276.16876220703125, "logps/rejected": -295.46429443359375, "loss": 0.0279, "rewards/accuracies": 1.0, "rewards/chosen": 0.023592447862029076, "rewards/margins": 7.244173526763916, "rewards/rejected": -7.2205810546875, "step": 3940 }, { "epoch": 2.04, "learning_rate": 1.7794989481736468e-07, "logits/chosen": -2.6128292083740234, "logits/rejected": -2.644348382949829, "logps/chosen": -243.8308563232422, "logps/rejected": -270.5189514160156, "loss": 0.0299, "rewards/accuracies": 1.0, "rewards/chosen": -0.30936262011528015, "rewards/margins": 8.135075569152832, "rewards/rejected": -8.444437026977539, "step": 3950 }, { "epoch": 2.04, "learning_rate": 1.769936890418818e-07, "logits/chosen": -2.5391926765441895, "logits/rejected": -2.5172486305236816, "logps/chosen": -303.0284729003906, "logps/rejected": -303.38739013671875, "loss": 0.0178, "rewards/accuracies": 1.0, "rewards/chosen": 0.04590020328760147, "rewards/margins": 7.713925361633301, "rewards/rejected": -7.668023586273193, "step": 3960 }, { "epoch": 2.05, "learning_rate": 1.760374832663989e-07, "logits/chosen": -2.785437822341919, "logits/rejected": -2.667668104171753, "logps/chosen": -374.7364807128906, "logps/rejected": -371.83050537109375, "loss": 0.0097, "rewards/accuracies": 1.0, "rewards/chosen": 1.3061269521713257, "rewards/margins": 10.878585815429688, "rewards/rejected": -9.572458267211914, "step": 3970 }, { "epoch": 2.05, "learning_rate": 1.7508127749091603e-07, "logits/chosen": -2.6636507511138916, "logits/rejected": -2.622056007385254, "logps/chosen": -272.5489196777344, "logps/rejected": -296.45025634765625, "loss": 0.0159, "rewards/accuracies": 1.0, "rewards/chosen": -0.6961295008659363, "rewards/margins": 7.3043532371521, "rewards/rejected": -8.000483512878418, "step": 3980 }, { "epoch": 2.06, "learning_rate": 1.7412507171543314e-07, "logits/chosen": -2.6111302375793457, "logits/rejected": -2.7141504287719727, "logps/chosen": -306.14471435546875, "logps/rejected": -277.4181213378906, "loss": 0.0378, "rewards/accuracies": 1.0, "rewards/chosen": -0.2339712679386139, "rewards/margins": 8.951885223388672, "rewards/rejected": -9.185856819152832, "step": 3990 }, { "epoch": 2.07, "learning_rate": 1.7316886593995028e-07, "logits/chosen": -2.770508289337158, "logits/rejected": -2.7339038848876953, "logps/chosen": -301.5724182128906, "logps/rejected": -351.9184875488281, "loss": 0.0219, "rewards/accuracies": 1.0, "rewards/chosen": -0.5096687078475952, "rewards/margins": 8.371912956237793, "rewards/rejected": -8.881582260131836, "step": 4000 }, { "epoch": 2.07, "eval_logits/chosen": -2.454496383666992, "eval_logits/rejected": -2.4104785919189453, "eval_logps/chosen": -284.124755859375, "eval_logps/rejected": -317.1334228515625, "eval_loss": 0.6521932482719421, "eval_rewards/accuracies": 0.828125, "eval_rewards/chosen": -2.988966464996338, "eval_rewards/margins": 3.0273852348327637, "eval_rewards/rejected": -6.016351222991943, "eval_runtime": 61.6079, "eval_samples_per_second": 16.232, "eval_steps_per_second": 0.26, "step": 4000 }, { "epoch": 2.07, "learning_rate": 1.722126601644674e-07, "logits/chosen": -2.7440953254699707, "logits/rejected": -2.736643075942993, "logps/chosen": -323.197998046875, "logps/rejected": -265.2000732421875, "loss": 0.0133, "rewards/accuracies": 1.0, "rewards/chosen": -0.9906817674636841, "rewards/margins": 7.5294013023376465, "rewards/rejected": -8.520084381103516, "step": 4010 }, { "epoch": 2.08, "learning_rate": 1.7125645438898452e-07, "logits/chosen": -2.7173900604248047, "logits/rejected": -2.676675319671631, "logps/chosen": -314.3374938964844, "logps/rejected": -355.52618408203125, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -0.3634423315525055, "rewards/margins": 9.33686637878418, "rewards/rejected": -9.700309753417969, "step": 4020 }, { "epoch": 2.08, "learning_rate": 1.703002486135016e-07, "logits/chosen": -2.6162686347961426, "logits/rejected": -2.603562593460083, "logps/chosen": -248.433837890625, "logps/rejected": -305.6585388183594, "loss": 0.0108, "rewards/accuracies": 1.0, "rewards/chosen": -0.43873992562294006, "rewards/margins": 10.635756492614746, "rewards/rejected": -11.07449722290039, "step": 4030 }, { "epoch": 2.09, "learning_rate": 1.6934404283801872e-07, "logits/chosen": -2.52081036567688, "logits/rejected": -2.3590970039367676, "logps/chosen": -352.1839904785156, "logps/rejected": -366.2679138183594, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/chosen": 0.43957453966140747, "rewards/margins": 11.327077865600586, "rewards/rejected": -10.887503623962402, "step": 4040 }, { "epoch": 2.09, "learning_rate": 1.6838783706253584e-07, "logits/chosen": -2.640784502029419, "logits/rejected": -2.524874687194824, "logps/chosen": -167.76235961914062, "logps/rejected": -211.56985473632812, "loss": 0.0156, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.1560419499874115, "rewards/margins": 8.205093383789062, "rewards/rejected": -8.049051284790039, "step": 4050 }, { "epoch": 2.1, "learning_rate": 1.6743163128705295e-07, "logits/chosen": -2.604750156402588, "logits/rejected": -2.5685653686523438, "logps/chosen": -289.0841064453125, "logps/rejected": -324.72552490234375, "loss": 0.0133, "rewards/accuracies": 1.0, "rewards/chosen": -2.2151424884796143, "rewards/margins": 8.238618850708008, "rewards/rejected": -10.453761100769043, "step": 4060 }, { "epoch": 2.1, "learning_rate": 1.664754255115701e-07, "logits/chosen": -2.4264097213745117, "logits/rejected": -2.375046730041504, "logps/chosen": -224.1468505859375, "logps/rejected": -290.3971862792969, "loss": 0.0178, "rewards/accuracies": 1.0, "rewards/chosen": 0.14240024983882904, "rewards/margins": 9.841353416442871, "rewards/rejected": -9.698953628540039, "step": 4070 }, { "epoch": 2.11, "learning_rate": 1.655192197360872e-07, "logits/chosen": -2.753242254257202, "logits/rejected": -2.6922965049743652, "logps/chosen": -274.47601318359375, "logps/rejected": -324.0868835449219, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -0.3948326110839844, "rewards/margins": 8.828141212463379, "rewards/rejected": -9.22297477722168, "step": 4080 }, { "epoch": 2.11, "learning_rate": 1.6456301396060433e-07, "logits/chosen": -2.554525375366211, "logits/rejected": -2.6398258209228516, "logps/chosen": -365.6826477050781, "logps/rejected": -360.66107177734375, "loss": 0.019, "rewards/accuracies": 1.0, "rewards/chosen": -0.2733768820762634, "rewards/margins": 8.528050422668457, "rewards/rejected": -8.801426887512207, "step": 4090 }, { "epoch": 2.12, "learning_rate": 1.6360680818512144e-07, "logits/chosen": -2.7123589515686035, "logits/rejected": -2.61602783203125, "logps/chosen": -368.64544677734375, "logps/rejected": -432.6624450683594, "loss": 0.0119, "rewards/accuracies": 1.0, "rewards/chosen": -1.055361270904541, "rewards/margins": 8.865598678588867, "rewards/rejected": -9.920958518981934, "step": 4100 }, { "epoch": 2.12, "eval_logits/chosen": -2.4698657989501953, "eval_logits/rejected": -2.4272119998931885, "eval_logps/chosen": -289.0121154785156, "eval_logps/rejected": -323.7186584472656, "eval_loss": 0.692164421081543, "eval_rewards/accuracies": 0.796875, "eval_rewards/chosen": -3.477701187133789, "eval_rewards/margins": 3.197173595428467, "eval_rewards/rejected": -6.674875259399414, "eval_runtime": 57.1311, "eval_samples_per_second": 17.504, "eval_steps_per_second": 0.28, "step": 4100 }, { "epoch": 2.12, "learning_rate": 1.6265060240963853e-07, "logits/chosen": -2.6370177268981934, "logits/rejected": -2.5220537185668945, "logps/chosen": -334.99066162109375, "logps/rejected": -290.169189453125, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": 0.933539867401123, "rewards/margins": 8.522821426391602, "rewards/rejected": -7.589282989501953, "step": 4110 }, { "epoch": 2.13, "learning_rate": 1.6169439663415565e-07, "logits/chosen": -2.6449599266052246, "logits/rejected": -2.6207022666931152, "logps/chosen": -269.48529052734375, "logps/rejected": -324.10418701171875, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/chosen": -0.8957691192626953, "rewards/margins": 9.796669960021973, "rewards/rejected": -10.692439079284668, "step": 4120 }, { "epoch": 2.13, "learning_rate": 1.6073819085867276e-07, "logits/chosen": -2.797229290008545, "logits/rejected": -2.7991158962249756, "logps/chosen": -309.7330627441406, "logps/rejected": -439.6482849121094, "loss": 0.0158, "rewards/accuracies": 1.0, "rewards/chosen": 1.3518083095550537, "rewards/margins": 11.625936508178711, "rewards/rejected": -10.274128913879395, "step": 4130 }, { "epoch": 2.14, "learning_rate": 1.597819850831899e-07, "logits/chosen": -2.752419948577881, "logits/rejected": -2.6186330318450928, "logps/chosen": -208.90380859375, "logps/rejected": -247.1297149658203, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": 1.2973028421401978, "rewards/margins": 9.918791770935059, "rewards/rejected": -8.621490478515625, "step": 4140 }, { "epoch": 2.14, "learning_rate": 1.5882577930770702e-07, "logits/chosen": -2.5983939170837402, "logits/rejected": -2.551213502883911, "logps/chosen": -321.56195068359375, "logps/rejected": -328.3628234863281, "loss": 0.0088, "rewards/accuracies": 1.0, "rewards/chosen": 0.38840678334236145, "rewards/margins": 10.420036315917969, "rewards/rejected": -10.03162956237793, "step": 4150 }, { "epoch": 2.15, "learning_rate": 1.5786957353222414e-07, "logits/chosen": -2.5684826374053955, "logits/rejected": -2.608212471008301, "logps/chosen": -272.9964599609375, "logps/rejected": -265.1176452636719, "loss": 0.0104, "rewards/accuracies": 1.0, "rewards/chosen": -1.0111887454986572, "rewards/margins": 7.318711280822754, "rewards/rejected": -8.329900741577148, "step": 4160 }, { "epoch": 2.15, "learning_rate": 1.5691336775674125e-07, "logits/chosen": -2.4619576930999756, "logits/rejected": -2.555619716644287, "logps/chosen": -268.24859619140625, "logps/rejected": -298.4876403808594, "loss": 0.0098, "rewards/accuracies": 1.0, "rewards/chosen": -0.17980532348155975, "rewards/margins": 9.836331367492676, "rewards/rejected": -10.01613712310791, "step": 4170 }, { "epoch": 2.16, "learning_rate": 1.5595716198125837e-07, "logits/chosen": -2.7515110969543457, "logits/rejected": -2.7178173065185547, "logps/chosen": -400.4342346191406, "logps/rejected": -458.4161682128906, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": -0.9931119084358215, "rewards/margins": 10.737370491027832, "rewards/rejected": -11.73048210144043, "step": 4180 }, { "epoch": 2.16, "learning_rate": 1.5500095620577546e-07, "logits/chosen": -2.6951303482055664, "logits/rejected": -2.748305559158325, "logps/chosen": -240.6981201171875, "logps/rejected": -257.77752685546875, "loss": 0.0113, "rewards/accuracies": 1.0, "rewards/chosen": -1.5856528282165527, "rewards/margins": 7.461671352386475, "rewards/rejected": -9.047324180603027, "step": 4190 }, { "epoch": 2.17, "learning_rate": 1.5404475043029257e-07, "logits/chosen": -2.5231451988220215, "logits/rejected": -2.5645296573638916, "logps/chosen": -191.24134826660156, "logps/rejected": -325.54949951171875, "loss": 0.0153, "rewards/accuracies": 1.0, "rewards/chosen": -1.5546247959136963, "rewards/margins": 8.656599998474121, "rewards/rejected": -10.211225509643555, "step": 4200 }, { "epoch": 2.17, "eval_logits/chosen": -2.4464974403381348, "eval_logits/rejected": -2.4046523571014404, "eval_logps/chosen": -286.6412658691406, "eval_logps/rejected": -323.7452697753906, "eval_loss": 0.6993398666381836, "eval_rewards/accuracies": 0.796875, "eval_rewards/chosen": -3.2406165599823, "eval_rewards/margins": 3.4369187355041504, "eval_rewards/rejected": -6.677535533905029, "eval_runtime": 54.7971, "eval_samples_per_second": 18.249, "eval_steps_per_second": 0.292, "step": 4200 }, { "epoch": 2.17, "learning_rate": 1.5308854465480971e-07, "logits/chosen": -2.6298282146453857, "logits/rejected": -2.6375985145568848, "logps/chosen": -293.63629150390625, "logps/rejected": -297.9925537109375, "loss": 0.0112, "rewards/accuracies": 1.0, "rewards/chosen": -1.0975180864334106, "rewards/margins": 8.3226318359375, "rewards/rejected": -9.420149803161621, "step": 4210 }, { "epoch": 2.18, "learning_rate": 1.5213233887932683e-07, "logits/chosen": -2.5850603580474854, "logits/rejected": -2.606503963470459, "logps/chosen": -316.19854736328125, "logps/rejected": -332.8941955566406, "loss": 0.0088, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9063823819160461, "rewards/margins": 7.739400386810303, "rewards/rejected": -8.645783424377441, "step": 4220 }, { "epoch": 2.18, "learning_rate": 1.5117613310384395e-07, "logits/chosen": -2.5701706409454346, "logits/rejected": -2.5911612510681152, "logps/chosen": -269.97894287109375, "logps/rejected": -319.3363342285156, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/chosen": -0.6825813055038452, "rewards/margins": 10.70821762084961, "rewards/rejected": -11.390798568725586, "step": 4230 }, { "epoch": 2.19, "learning_rate": 1.5021992732836106e-07, "logits/chosen": -2.4653377532958984, "logits/rejected": -2.5559732913970947, "logps/chosen": -279.4239196777344, "logps/rejected": -356.7681884765625, "loss": 0.0143, "rewards/accuracies": 1.0, "rewards/chosen": -0.9881469011306763, "rewards/margins": 9.201104164123535, "rewards/rejected": -10.189250946044922, "step": 4240 }, { "epoch": 2.19, "learning_rate": 1.4926372155287818e-07, "logits/chosen": -2.7210116386413574, "logits/rejected": -2.593418836593628, "logps/chosen": -228.53121948242188, "logps/rejected": -271.88787841796875, "loss": 0.0185, "rewards/accuracies": 1.0, "rewards/chosen": -0.9970628619194031, "rewards/margins": 8.71868896484375, "rewards/rejected": -9.715751647949219, "step": 4250 }, { "epoch": 2.2, "learning_rate": 1.483075157773953e-07, "logits/chosen": -2.5943050384521484, "logits/rejected": -2.673746347427368, "logps/chosen": -251.91336059570312, "logps/rejected": -270.3241271972656, "loss": 0.0095, "rewards/accuracies": 1.0, "rewards/chosen": -1.251340627670288, "rewards/margins": 8.234782218933105, "rewards/rejected": -9.486123085021973, "step": 4260 }, { "epoch": 2.2, "learning_rate": 1.4735131000191238e-07, "logits/chosen": -2.6009936332702637, "logits/rejected": -2.607675313949585, "logps/chosen": -309.9886169433594, "logps/rejected": -332.39801025390625, "loss": 0.0121, "rewards/accuracies": 1.0, "rewards/chosen": -1.0659501552581787, "rewards/margins": 9.141355514526367, "rewards/rejected": -10.207304954528809, "step": 4270 }, { "epoch": 2.21, "learning_rate": 1.4639510422642952e-07, "logits/chosen": -2.7442212104797363, "logits/rejected": -2.6310532093048096, "logps/chosen": -345.13616943359375, "logps/rejected": -405.30755615234375, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/chosen": -0.7909820079803467, "rewards/margins": 9.60711669921875, "rewards/rejected": -10.398099899291992, "step": 4280 }, { "epoch": 2.21, "learning_rate": 1.4543889845094664e-07, "logits/chosen": -2.4257078170776367, "logits/rejected": -2.461683750152588, "logps/chosen": -375.21478271484375, "logps/rejected": -433.16973876953125, "loss": 0.0485, "rewards/accuracies": 1.0, "rewards/chosen": -0.8098801374435425, "rewards/margins": 11.403145790100098, "rewards/rejected": -12.21302604675293, "step": 4290 }, { "epoch": 2.22, "learning_rate": 1.4448269267546376e-07, "logits/chosen": -2.7228384017944336, "logits/rejected": -2.763788938522339, "logps/chosen": -330.9010314941406, "logps/rejected": -367.5445861816406, "loss": 0.011, "rewards/accuracies": 1.0, "rewards/chosen": 0.8657931089401245, "rewards/margins": 10.422277450561523, "rewards/rejected": -9.55648422241211, "step": 4300 }, { "epoch": 2.22, "eval_logits/chosen": -2.428981304168701, "eval_logits/rejected": -2.3842594623565674, "eval_logps/chosen": -292.2260437011719, "eval_logps/rejected": -331.3666687011719, "eval_loss": 0.7177846431732178, "eval_rewards/accuracies": 0.765625, "eval_rewards/chosen": -3.7990951538085938, "eval_rewards/margins": 3.6405770778656006, "eval_rewards/rejected": -7.439671993255615, "eval_runtime": 57.5668, "eval_samples_per_second": 17.371, "eval_steps_per_second": 0.278, "step": 4300 }, { "epoch": 2.23, "learning_rate": 1.4352648689998087e-07, "logits/chosen": -2.6788887977600098, "logits/rejected": -2.659087657928467, "logps/chosen": -255.2762908935547, "logps/rejected": -230.3298797607422, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": -0.5762121081352234, "rewards/margins": 9.638313293457031, "rewards/rejected": -10.21452522277832, "step": 4310 }, { "epoch": 2.23, "learning_rate": 1.42570281124498e-07, "logits/chosen": -2.5874216556549072, "logits/rejected": -2.647291898727417, "logps/chosen": -264.53802490234375, "logps/rejected": -368.0313415527344, "loss": 0.0123, "rewards/accuracies": 1.0, "rewards/chosen": -1.9759609699249268, "rewards/margins": 9.195481300354004, "rewards/rejected": -11.171442031860352, "step": 4320 }, { "epoch": 2.24, "learning_rate": 1.416140753490151e-07, "logits/chosen": -2.6484475135803223, "logits/rejected": -2.7253453731536865, "logps/chosen": -338.7431640625, "logps/rejected": -423.6756896972656, "loss": 0.0099, "rewards/accuracies": 1.0, "rewards/chosen": -0.1497945338487625, "rewards/margins": 11.79082202911377, "rewards/rejected": -11.940614700317383, "step": 4330 }, { "epoch": 2.24, "learning_rate": 1.4065786957353222e-07, "logits/chosen": -2.5038111209869385, "logits/rejected": -2.5019071102142334, "logps/chosen": -315.7591552734375, "logps/rejected": -336.18963623046875, "loss": 0.0246, "rewards/accuracies": 1.0, "rewards/chosen": 0.016095232218503952, "rewards/margins": 8.441411018371582, "rewards/rejected": -8.425315856933594, "step": 4340 }, { "epoch": 2.25, "learning_rate": 1.3970166379804933e-07, "logits/chosen": -2.49928879737854, "logits/rejected": -2.376461982727051, "logps/chosen": -380.0243225097656, "logps/rejected": -395.73077392578125, "loss": 0.0177, "rewards/accuracies": 1.0, "rewards/chosen": -0.1554236114025116, "rewards/margins": 9.892860412597656, "rewards/rejected": -10.048284530639648, "step": 4350 }, { "epoch": 2.25, "learning_rate": 1.3874545802256645e-07, "logits/chosen": -2.758044481277466, "logits/rejected": -2.6601271629333496, "logps/chosen": -311.03436279296875, "logps/rejected": -419.60418701171875, "loss": 0.0129, "rewards/accuracies": 1.0, "rewards/chosen": -1.6286399364471436, "rewards/margins": 10.120224952697754, "rewards/rejected": -11.748865127563477, "step": 4360 }, { "epoch": 2.26, "learning_rate": 1.3778925224708357e-07, "logits/chosen": -2.516096830368042, "logits/rejected": -2.5368704795837402, "logps/chosen": -253.93722534179688, "logps/rejected": -275.40423583984375, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -0.05337781831622124, "rewards/margins": 8.323257446289062, "rewards/rejected": -8.376635551452637, "step": 4370 }, { "epoch": 2.26, "learning_rate": 1.3683304647160068e-07, "logits/chosen": -2.6350722312927246, "logits/rejected": -2.5284571647644043, "logps/chosen": -279.087158203125, "logps/rejected": -357.74542236328125, "loss": 0.0119, "rewards/accuracies": 1.0, "rewards/chosen": -1.0737884044647217, "rewards/margins": 10.794805526733398, "rewards/rejected": -11.868593215942383, "step": 4380 }, { "epoch": 2.27, "learning_rate": 1.358768406961178e-07, "logits/chosen": -2.6859638690948486, "logits/rejected": -2.735161066055298, "logps/chosen": -295.9905700683594, "logps/rejected": -444.8924255371094, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/chosen": 0.7544025778770447, "rewards/margins": 10.69865608215332, "rewards/rejected": -9.944252967834473, "step": 4390 }, { "epoch": 2.27, "learning_rate": 1.349206349206349e-07, "logits/chosen": -2.757059335708618, "logits/rejected": -2.771275520324707, "logps/chosen": -266.63800048828125, "logps/rejected": -281.4782409667969, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": -1.0992039442062378, "rewards/margins": 8.593305587768555, "rewards/rejected": -9.692508697509766, "step": 4400 }, { "epoch": 2.27, "eval_logits/chosen": -2.4535796642303467, "eval_logits/rejected": -2.4095299243927, "eval_logps/chosen": -287.504150390625, "eval_logps/rejected": -324.9907531738281, "eval_loss": 0.6839932203292847, "eval_rewards/accuracies": 0.8125, "eval_rewards/chosen": -3.326904535293579, "eval_rewards/margins": 3.47517728805542, "eval_rewards/rejected": -6.802082061767578, "eval_runtime": 58.0489, "eval_samples_per_second": 17.227, "eval_steps_per_second": 0.276, "step": 4400 }, { "epoch": 2.28, "learning_rate": 1.3396442914515203e-07, "logits/chosen": -2.330714464187622, "logits/rejected": -2.469642400741577, "logps/chosen": -260.82843017578125, "logps/rejected": -299.21343994140625, "loss": 0.0115, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.7311785221099854, "rewards/margins": 8.269608497619629, "rewards/rejected": -10.000787734985352, "step": 4410 }, { "epoch": 2.28, "learning_rate": 1.3300822336966917e-07, "logits/chosen": -2.5285234451293945, "logits/rejected": -2.3487613201141357, "logps/chosen": -334.66229248046875, "logps/rejected": -329.3540954589844, "loss": 0.0094, "rewards/accuracies": 1.0, "rewards/chosen": 0.163953959941864, "rewards/margins": 9.816844940185547, "rewards/rejected": -9.652891159057617, "step": 4420 }, { "epoch": 2.29, "learning_rate": 1.3205201759418626e-07, "logits/chosen": -2.400176525115967, "logits/rejected": -2.173835277557373, "logps/chosen": -355.26043701171875, "logps/rejected": -349.78851318359375, "loss": 0.0099, "rewards/accuracies": 1.0, "rewards/chosen": 0.45656710863113403, "rewards/margins": 10.023509979248047, "rewards/rejected": -9.56694221496582, "step": 4430 }, { "epoch": 2.29, "learning_rate": 1.3109581181870338e-07, "logits/chosen": -2.611816883087158, "logits/rejected": -2.6642374992370605, "logps/chosen": -294.43756103515625, "logps/rejected": -321.86846923828125, "loss": 0.0105, "rewards/accuracies": 1.0, "rewards/chosen": 0.08868559449911118, "rewards/margins": 9.15350341796875, "rewards/rejected": -9.064818382263184, "step": 4440 }, { "epoch": 2.3, "learning_rate": 1.301396060432205e-07, "logits/chosen": -2.6899092197418213, "logits/rejected": -2.6209728717803955, "logps/chosen": -340.12030029296875, "logps/rejected": -341.85638427734375, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/chosen": -0.5528199672698975, "rewards/margins": 10.257894515991211, "rewards/rejected": -10.810712814331055, "step": 4450 }, { "epoch": 2.3, "learning_rate": 1.291834002677376e-07, "logits/chosen": -2.5746819972991943, "logits/rejected": -2.4712207317352295, "logps/chosen": -340.21661376953125, "logps/rejected": -348.29376220703125, "loss": 0.0111, "rewards/accuracies": 1.0, "rewards/chosen": -1.839795470237732, "rewards/margins": 9.294793128967285, "rewards/rejected": -11.134590148925781, "step": 4460 }, { "epoch": 2.31, "learning_rate": 1.2822719449225472e-07, "logits/chosen": -2.4497411251068115, "logits/rejected": -2.6023406982421875, "logps/chosen": -258.5740966796875, "logps/rejected": -322.1835021972656, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/chosen": -1.4901916980743408, "rewards/margins": 8.80390739440918, "rewards/rejected": -10.294098854064941, "step": 4470 }, { "epoch": 2.31, "learning_rate": 1.2727098871677184e-07, "logits/chosen": -2.679898262023926, "logits/rejected": -2.6797006130218506, "logps/chosen": -358.4029235839844, "logps/rejected": -349.04119873046875, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -0.31357401609420776, "rewards/margins": 9.342456817626953, "rewards/rejected": -9.656030654907227, "step": 4480 }, { "epoch": 2.32, "learning_rate": 1.2631478294128898e-07, "logits/chosen": -2.6513264179229736, "logits/rejected": -2.6451632976531982, "logps/chosen": -398.11871337890625, "logps/rejected": -359.9664611816406, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/chosen": 0.07513128221035004, "rewards/margins": 10.039201736450195, "rewards/rejected": -9.964070320129395, "step": 4490 }, { "epoch": 2.32, "learning_rate": 1.253585771658061e-07, "logits/chosen": -2.587759017944336, "logits/rejected": -2.633078098297119, "logps/chosen": -251.6234588623047, "logps/rejected": -377.10443115234375, "loss": 0.0197, "rewards/accuracies": 1.0, "rewards/chosen": -1.1120997667312622, "rewards/margins": 9.92921257019043, "rewards/rejected": -11.041314125061035, "step": 4500 }, { "epoch": 2.32, "eval_logits/chosen": -2.4542932510375977, "eval_logits/rejected": -2.411810874938965, "eval_logps/chosen": -291.1250305175781, "eval_logps/rejected": -329.98406982421875, "eval_loss": 0.7013015151023865, "eval_rewards/accuracies": 0.8125, "eval_rewards/chosen": -3.688992738723755, "eval_rewards/margins": 3.612422466278076, "eval_rewards/rejected": -7.301414966583252, "eval_runtime": 56.7399, "eval_samples_per_second": 17.624, "eval_steps_per_second": 0.282, "step": 4500 }, { "epoch": 2.33, "learning_rate": 1.2440237139032319e-07, "logits/chosen": -2.7155184745788574, "logits/rejected": -2.7012360095977783, "logps/chosen": -270.6969909667969, "logps/rejected": -277.15362548828125, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/chosen": -1.8242677450180054, "rewards/margins": 10.182249069213867, "rewards/rejected": -12.006516456604004, "step": 4510 }, { "epoch": 2.33, "learning_rate": 1.234461656148403e-07, "logits/chosen": -2.7778592109680176, "logits/rejected": -2.6845195293426514, "logps/chosen": -406.66497802734375, "logps/rejected": -398.89044189453125, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": -1.5758999586105347, "rewards/margins": 11.570829391479492, "rewards/rejected": -13.146730422973633, "step": 4520 }, { "epoch": 2.34, "learning_rate": 1.2248995983935742e-07, "logits/chosen": -2.7265734672546387, "logits/rejected": -2.6226305961608887, "logps/chosen": -284.33843994140625, "logps/rejected": -360.93121337890625, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -0.6043723821640015, "rewards/margins": 10.222024917602539, "rewards/rejected": -10.826397895812988, "step": 4530 }, { "epoch": 2.34, "learning_rate": 1.2153375406387456e-07, "logits/chosen": -2.724083185195923, "logits/rejected": -2.75142765045166, "logps/chosen": -355.35504150390625, "logps/rejected": -472.2686462402344, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/chosen": -1.3634461164474487, "rewards/margins": 11.717732429504395, "rewards/rejected": -13.081178665161133, "step": 4540 }, { "epoch": 2.35, "learning_rate": 1.2057754828839165e-07, "logits/chosen": -2.666905641555786, "logits/rejected": -2.737536907196045, "logps/chosen": -310.6121520996094, "logps/rejected": -350.0155029296875, "loss": 0.0089, "rewards/accuracies": 1.0, "rewards/chosen": -0.12724515795707703, "rewards/margins": 9.271829605102539, "rewards/rejected": -9.39907455444336, "step": 4550 }, { "epoch": 2.35, "learning_rate": 1.1962134251290876e-07, "logits/chosen": -2.6463513374328613, "logits/rejected": -2.6242516040802, "logps/chosen": -268.8026123046875, "logps/rejected": -253.2088623046875, "loss": 0.0123, "rewards/accuracies": 1.0, "rewards/chosen": -1.7786948680877686, "rewards/margins": 8.877888679504395, "rewards/rejected": -10.656583786010742, "step": 4560 }, { "epoch": 2.36, "learning_rate": 1.1866513673742588e-07, "logits/chosen": -2.6526236534118652, "logits/rejected": -2.5933640003204346, "logps/chosen": -244.880615234375, "logps/rejected": -330.068603515625, "loss": 0.0185, "rewards/accuracies": 1.0, "rewards/chosen": -1.4325166940689087, "rewards/margins": 8.305582046508789, "rewards/rejected": -9.73809814453125, "step": 4570 }, { "epoch": 2.36, "learning_rate": 1.1770893096194301e-07, "logits/chosen": -2.6464786529541016, "logits/rejected": -2.621084451675415, "logps/chosen": -358.1322326660156, "logps/rejected": -398.2645568847656, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/chosen": -1.9040740728378296, "rewards/margins": 10.22703742980957, "rewards/rejected": -12.131113052368164, "step": 4580 }, { "epoch": 2.37, "learning_rate": 1.1675272518646012e-07, "logits/chosen": -2.704784631729126, "logits/rejected": -2.6682817935943604, "logps/chosen": -297.62274169921875, "logps/rejected": -330.6324462890625, "loss": 0.0272, "rewards/accuracies": 1.0, "rewards/chosen": -0.19843515753746033, "rewards/margins": 9.420347213745117, "rewards/rejected": -9.618782997131348, "step": 4590 }, { "epoch": 2.37, "learning_rate": 1.1579651941097724e-07, "logits/chosen": -2.6055915355682373, "logits/rejected": -2.6153995990753174, "logps/chosen": -305.62933349609375, "logps/rejected": -291.2359924316406, "loss": 0.0182, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.7859185934066772, "rewards/margins": 8.813383102416992, "rewards/rejected": -10.599302291870117, "step": 4600 }, { "epoch": 2.37, "eval_logits/chosen": -2.4565374851226807, "eval_logits/rejected": -2.416307210922241, "eval_logps/chosen": -293.22906494140625, "eval_logps/rejected": -332.3355712890625, "eval_loss": 0.7476168870925903, "eval_rewards/accuracies": 0.828125, "eval_rewards/chosen": -3.8993959426879883, "eval_rewards/margins": 3.637169361114502, "eval_rewards/rejected": -7.536564826965332, "eval_runtime": 57.2122, "eval_samples_per_second": 17.479, "eval_steps_per_second": 0.28, "step": 4600 }, { "epoch": 2.38, "learning_rate": 1.1484031363549436e-07, "logits/chosen": -2.5126757621765137, "logits/rejected": -2.449023962020874, "logps/chosen": -327.66717529296875, "logps/rejected": -361.0265808105469, "loss": 0.0268, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.0840609073638916, "rewards/margins": 10.741630554199219, "rewards/rejected": -11.825691223144531, "step": 4610 }, { "epoch": 2.39, "learning_rate": 1.1388410786001147e-07, "logits/chosen": -2.6590983867645264, "logits/rejected": -2.688147783279419, "logps/chosen": -304.8904113769531, "logps/rejected": -383.8213195800781, "loss": 0.0162, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.8201377391815186, "rewards/margins": 9.453073501586914, "rewards/rejected": -11.273211479187012, "step": 4620 }, { "epoch": 2.39, "learning_rate": 1.1292790208452859e-07, "logits/chosen": -2.6834404468536377, "logits/rejected": -2.6824703216552734, "logps/chosen": -271.0035400390625, "logps/rejected": -379.20989990234375, "loss": 0.0132, "rewards/accuracies": 1.0, "rewards/chosen": -1.0902073383331299, "rewards/margins": 11.127284049987793, "rewards/rejected": -12.21749210357666, "step": 4630 }, { "epoch": 2.4, "learning_rate": 1.119716963090457e-07, "logits/chosen": -2.3537399768829346, "logits/rejected": -2.4233551025390625, "logps/chosen": -216.2086944580078, "logps/rejected": -297.00640869140625, "loss": 0.0146, "rewards/accuracies": 1.0, "rewards/chosen": -2.2917330265045166, "rewards/margins": 7.949918270111084, "rewards/rejected": -10.24165153503418, "step": 4640 }, { "epoch": 2.4, "learning_rate": 1.1101549053356282e-07, "logits/chosen": -2.7646780014038086, "logits/rejected": -2.6880381107330322, "logps/chosen": -306.4629821777344, "logps/rejected": -336.5583190917969, "loss": 0.0227, "rewards/accuracies": 1.0, "rewards/chosen": -1.0529628992080688, "rewards/margins": 8.59121036529541, "rewards/rejected": -9.644172668457031, "step": 4650 }, { "epoch": 2.41, "learning_rate": 1.1005928475807993e-07, "logits/chosen": -2.550281286239624, "logits/rejected": -2.499551296234131, "logps/chosen": -284.04730224609375, "logps/rejected": -312.99896240234375, "loss": 0.0107, "rewards/accuracies": 1.0, "rewards/chosen": -1.5507326126098633, "rewards/margins": 9.462206840515137, "rewards/rejected": -11.012939453125, "step": 4660 }, { "epoch": 2.41, "learning_rate": 1.0910307898259705e-07, "logits/chosen": -2.3352179527282715, "logits/rejected": -2.438673973083496, "logps/chosen": -236.6370849609375, "logps/rejected": -284.3169250488281, "loss": 0.0135, "rewards/accuracies": 1.0, "rewards/chosen": -1.611262559890747, "rewards/margins": 8.59797477722168, "rewards/rejected": -10.209238052368164, "step": 4670 }, { "epoch": 2.42, "learning_rate": 1.0814687320711418e-07, "logits/chosen": -2.474139928817749, "logits/rejected": -2.377544641494751, "logps/chosen": -238.1358184814453, "logps/rejected": -414.88720703125, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": -0.9006067514419556, "rewards/margins": 9.972890853881836, "rewards/rejected": -10.873498916625977, "step": 4680 }, { "epoch": 2.42, "learning_rate": 1.0719066743163128e-07, "logits/chosen": -2.493590831756592, "logits/rejected": -2.6044669151306152, "logps/chosen": -323.1622009277344, "logps/rejected": -323.79510498046875, "loss": 0.0178, "rewards/accuracies": 1.0, "rewards/chosen": -1.587964415550232, "rewards/margins": 9.504448890686035, "rewards/rejected": -11.092413902282715, "step": 4690 }, { "epoch": 2.43, "learning_rate": 1.062344616561484e-07, "logits/chosen": -2.638388156890869, "logits/rejected": -2.634883403778076, "logps/chosen": -368.1080627441406, "logps/rejected": -507.3169860839844, "loss": 0.0125, "rewards/accuracies": 1.0, "rewards/chosen": -0.11677348613739014, "rewards/margins": 13.523852348327637, "rewards/rejected": -13.640626907348633, "step": 4700 }, { "epoch": 2.43, "eval_logits/chosen": -2.4100139141082764, "eval_logits/rejected": -2.369899272918701, "eval_logps/chosen": -294.79522705078125, "eval_logps/rejected": -332.7344665527344, "eval_loss": 0.7198817133903503, "eval_rewards/accuracies": 0.84375, "eval_rewards/chosen": -4.056015968322754, "eval_rewards/margins": 3.5204358100891113, "eval_rewards/rejected": -7.576451778411865, "eval_runtime": 55.0706, "eval_samples_per_second": 18.158, "eval_steps_per_second": 0.291, "step": 4700 }, { "epoch": 2.43, "learning_rate": 1.0527825588066551e-07, "logits/chosen": -2.615658760070801, "logits/rejected": -2.48193097114563, "logps/chosen": -350.5819091796875, "logps/rejected": -332.139892578125, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/chosen": -1.4044158458709717, "rewards/margins": 10.358014106750488, "rewards/rejected": -11.762430191040039, "step": 4710 }, { "epoch": 2.44, "learning_rate": 1.0432205010518264e-07, "logits/chosen": -2.6633851528167725, "logits/rejected": -2.6755900382995605, "logps/chosen": -244.67703247070312, "logps/rejected": -381.3924865722656, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -0.5906885266304016, "rewards/margins": 11.672990798950195, "rewards/rejected": -12.263678550720215, "step": 4720 }, { "epoch": 2.44, "learning_rate": 1.0336584432969974e-07, "logits/chosen": -2.4058127403259277, "logits/rejected": -2.398548126220703, "logps/chosen": -268.20660400390625, "logps/rejected": -309.49078369140625, "loss": 0.0141, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.11153459548950195, "rewards/margins": 9.980080604553223, "rewards/rejected": -9.868546485900879, "step": 4730 }, { "epoch": 2.45, "learning_rate": 1.0240963855421686e-07, "logits/chosen": -2.542297601699829, "logits/rejected": -2.5843400955200195, "logps/chosen": -385.5765686035156, "logps/rejected": -344.6966552734375, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": -1.7057178020477295, "rewards/margins": 9.097609519958496, "rewards/rejected": -10.803327560424805, "step": 4740 }, { "epoch": 2.45, "learning_rate": 1.0145343277873399e-07, "logits/chosen": -2.505624771118164, "logits/rejected": -2.4930660724639893, "logps/chosen": -330.05987548828125, "logps/rejected": -383.5957336425781, "loss": 0.0139, "rewards/accuracies": 1.0, "rewards/chosen": -0.8719791173934937, "rewards/margins": 10.555585861206055, "rewards/rejected": -11.427566528320312, "step": 4750 }, { "epoch": 2.46, "learning_rate": 1.004972270032511e-07, "logits/chosen": -2.2423624992370605, "logits/rejected": -2.250560760498047, "logps/chosen": -287.89349365234375, "logps/rejected": -298.4164123535156, "loss": 0.0167, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.064730167388916, "rewards/margins": 7.943607330322266, "rewards/rejected": -9.00833797454834, "step": 4760 }, { "epoch": 2.46, "learning_rate": 9.95410212277682e-08, "logits/chosen": -2.6729438304901123, "logits/rejected": -2.5839288234710693, "logps/chosen": -304.6081237792969, "logps/rejected": -281.4034423828125, "loss": 0.0149, "rewards/accuracies": 1.0, "rewards/chosen": -0.613656759262085, "rewards/margins": 8.870094299316406, "rewards/rejected": -9.483750343322754, "step": 4770 }, { "epoch": 2.47, "learning_rate": 9.858481545228532e-08, "logits/chosen": -2.431548833847046, "logits/rejected": -2.5211846828460693, "logps/chosen": -185.5460205078125, "logps/rejected": -266.6904602050781, "loss": 0.0164, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.3175272941589355, "rewards/margins": 8.721592903137207, "rewards/rejected": -10.039118766784668, "step": 4780 }, { "epoch": 2.47, "learning_rate": 9.762860967680245e-08, "logits/chosen": -2.5315418243408203, "logits/rejected": -2.6745972633361816, "logps/chosen": -215.60311889648438, "logps/rejected": -252.6163330078125, "loss": 0.0082, "rewards/accuracies": 1.0, "rewards/chosen": -1.5650713443756104, "rewards/margins": 8.301115036010742, "rewards/rejected": -9.866186141967773, "step": 4790 }, { "epoch": 2.48, "learning_rate": 9.667240390131957e-08, "logits/chosen": -2.6866960525512695, "logits/rejected": -2.6582419872283936, "logps/chosen": -263.9376220703125, "logps/rejected": -329.9527587890625, "loss": 0.0082, "rewards/accuracies": 1.0, "rewards/chosen": -0.7252375483512878, "rewards/margins": 12.293913841247559, "rewards/rejected": -13.01915168762207, "step": 4800 }, { "epoch": 2.48, "eval_logits/chosen": -2.430349826812744, "eval_logits/rejected": -2.3925321102142334, "eval_logps/chosen": -290.84771728515625, "eval_logps/rejected": -328.32550048828125, "eval_loss": 0.7047879695892334, "eval_rewards/accuracies": 0.875, "eval_rewards/chosen": -3.6612637042999268, "eval_rewards/margins": 3.474294662475586, "eval_rewards/rejected": -7.135558605194092, "eval_runtime": 56.5008, "eval_samples_per_second": 17.699, "eval_steps_per_second": 0.283, "step": 4800 }, { "epoch": 2.48, "learning_rate": 9.571619812583667e-08, "logits/chosen": -2.3522887229919434, "logits/rejected": -2.5020272731781006, "logps/chosen": -404.24993896484375, "logps/rejected": -365.1546936035156, "loss": 0.0103, "rewards/accuracies": 1.0, "rewards/chosen": -0.5152179598808289, "rewards/margins": 10.364774703979492, "rewards/rejected": -10.879993438720703, "step": 4810 }, { "epoch": 2.49, "learning_rate": 9.47599923503538e-08, "logits/chosen": -2.634892225265503, "logits/rejected": -2.660521984100342, "logps/chosen": -330.85308837890625, "logps/rejected": -385.0195617675781, "loss": 0.0189, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.17577147483825684, "rewards/margins": 10.900343894958496, "rewards/rejected": -11.076115608215332, "step": 4820 }, { "epoch": 2.49, "learning_rate": 9.380378657487091e-08, "logits/chosen": -2.514988422393799, "logits/rejected": -2.510554790496826, "logps/chosen": -250.59939575195312, "logps/rejected": -327.1246643066406, "loss": 0.0152, "rewards/accuracies": 1.0, "rewards/chosen": -0.6027127504348755, "rewards/margins": 11.022318840026855, "rewards/rejected": -11.625032424926758, "step": 4830 }, { "epoch": 2.5, "learning_rate": 9.284758079938803e-08, "logits/chosen": -2.7160019874572754, "logits/rejected": -2.725782632827759, "logps/chosen": -366.26788330078125, "logps/rejected": -313.48223876953125, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/chosen": -1.3688530921936035, "rewards/margins": 8.293670654296875, "rewards/rejected": -9.66252326965332, "step": 4840 }, { "epoch": 2.5, "learning_rate": 9.189137502390513e-08, "logits/chosen": -2.5986154079437256, "logits/rejected": -2.60760760307312, "logps/chosen": -338.04925537109375, "logps/rejected": -425.7908630371094, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/chosen": -1.2331068515777588, "rewards/margins": 9.989707946777344, "rewards/rejected": -11.222814559936523, "step": 4850 }, { "epoch": 2.51, "learning_rate": 9.093516924842226e-08, "logits/chosen": -2.5680298805236816, "logits/rejected": -2.603311061859131, "logps/chosen": -270.52349853515625, "logps/rejected": -418.3185119628906, "loss": 0.0155, "rewards/accuracies": 1.0, "rewards/chosen": -0.3546164631843567, "rewards/margins": 11.326202392578125, "rewards/rejected": -11.680818557739258, "step": 4860 }, { "epoch": 2.51, "learning_rate": 8.997896347293938e-08, "logits/chosen": -2.529101610183716, "logits/rejected": -2.4874515533447266, "logps/chosen": -205.5690460205078, "logps/rejected": -333.98065185546875, "loss": 0.0151, "rewards/accuracies": 1.0, "rewards/chosen": -1.0936403274536133, "rewards/margins": 9.565814018249512, "rewards/rejected": -10.659454345703125, "step": 4870 }, { "epoch": 2.52, "learning_rate": 8.902275769745648e-08, "logits/chosen": -2.4107840061187744, "logits/rejected": -2.529804229736328, "logps/chosen": -229.46145629882812, "logps/rejected": -267.4582214355469, "loss": 0.0123, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.5835365056991577, "rewards/margins": 7.870436668395996, "rewards/rejected": -9.453973770141602, "step": 4880 }, { "epoch": 2.52, "learning_rate": 8.806655192197361e-08, "logits/chosen": -2.4289088249206543, "logits/rejected": -2.549330949783325, "logps/chosen": -171.3069610595703, "logps/rejected": -321.93853759765625, "loss": 0.0065, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.041638135910034, "rewards/margins": 10.020352363586426, "rewards/rejected": -12.061990737915039, "step": 4890 }, { "epoch": 2.53, "learning_rate": 8.711034614649072e-08, "logits/chosen": -2.7017006874084473, "logits/rejected": -2.7009201049804688, "logps/chosen": -279.64984130859375, "logps/rejected": -352.21160888671875, "loss": 0.0118, "rewards/accuracies": 1.0, "rewards/chosen": -1.1117719411849976, "rewards/margins": 10.062509536743164, "rewards/rejected": -11.17428207397461, "step": 4900 }, { "epoch": 2.53, "eval_logits/chosen": -2.4046812057495117, "eval_logits/rejected": -2.36327862739563, "eval_logps/chosen": -292.14312744140625, "eval_logps/rejected": -330.12237548828125, "eval_loss": 0.6975539326667786, "eval_rewards/accuracies": 0.8125, "eval_rewards/chosen": -3.7908036708831787, "eval_rewards/margins": 3.524440288543701, "eval_rewards/rejected": -7.315243721008301, "eval_runtime": 53.2942, "eval_samples_per_second": 18.764, "eval_steps_per_second": 0.3, "step": 4900 }, { "epoch": 2.53, "learning_rate": 8.615414037100784e-08, "logits/chosen": -2.5984580516815186, "logits/rejected": -2.746319532394409, "logps/chosen": -321.95367431640625, "logps/rejected": -298.1436767578125, "loss": 0.0084, "rewards/accuracies": 1.0, "rewards/chosen": -2.020658016204834, "rewards/margins": 8.369918823242188, "rewards/rejected": -10.390576362609863, "step": 4910 }, { "epoch": 2.54, "learning_rate": 8.519793459552494e-08, "logits/chosen": -2.359086513519287, "logits/rejected": -2.3888332843780518, "logps/chosen": -395.9248962402344, "logps/rejected": -374.02069091796875, "loss": 0.014, "rewards/accuracies": 1.0, "rewards/chosen": -0.04106631129980087, "rewards/margins": 11.742452621459961, "rewards/rejected": -11.783517837524414, "step": 4920 }, { "epoch": 2.55, "learning_rate": 8.424172882004207e-08, "logits/chosen": -2.661177158355713, "logits/rejected": -2.6514670848846436, "logps/chosen": -387.62054443359375, "logps/rejected": -339.0218505859375, "loss": 0.0263, "rewards/accuracies": 1.0, "rewards/chosen": 0.0007514476892538369, "rewards/margins": 9.993762016296387, "rewards/rejected": -9.993009567260742, "step": 4930 }, { "epoch": 2.55, "learning_rate": 8.328552304455919e-08, "logits/chosen": -2.335365056991577, "logits/rejected": -2.317937135696411, "logps/chosen": -231.7373504638672, "logps/rejected": -296.01287841796875, "loss": 0.0162, "rewards/accuracies": 1.0, "rewards/chosen": -0.7403205633163452, "rewards/margins": 9.786886215209961, "rewards/rejected": -10.527207374572754, "step": 4940 }, { "epoch": 2.56, "learning_rate": 8.23293172690763e-08, "logits/chosen": -2.5740818977355957, "logits/rejected": -2.612046718597412, "logps/chosen": -265.88116455078125, "logps/rejected": -311.5575256347656, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -1.1604253053665161, "rewards/margins": 10.83531665802002, "rewards/rejected": -11.995742797851562, "step": 4950 }, { "epoch": 2.56, "learning_rate": 8.137311149359343e-08, "logits/chosen": -2.7012178897857666, "logits/rejected": -2.6206467151641846, "logps/chosen": -434.08843994140625, "logps/rejected": -364.0971984863281, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": -0.09630658477544785, "rewards/margins": 11.247058868408203, "rewards/rejected": -11.343365669250488, "step": 4960 }, { "epoch": 2.57, "learning_rate": 8.041690571811053e-08, "logits/chosen": -2.614105463027954, "logits/rejected": -2.5202865600585938, "logps/chosen": -219.88876342773438, "logps/rejected": -269.26568603515625, "loss": 0.0173, "rewards/accuracies": 1.0, "rewards/chosen": -2.203838348388672, "rewards/margins": 8.655978202819824, "rewards/rejected": -10.859817504882812, "step": 4970 }, { "epoch": 2.57, "learning_rate": 7.946069994262765e-08, "logits/chosen": -2.546452045440674, "logits/rejected": -2.6220192909240723, "logps/chosen": -294.5769958496094, "logps/rejected": -284.33343505859375, "loss": 0.009, "rewards/accuracies": 1.0, "rewards/chosen": -1.0741980075836182, "rewards/margins": 9.485953330993652, "rewards/rejected": -10.560152053833008, "step": 4980 }, { "epoch": 2.58, "learning_rate": 7.850449416714476e-08, "logits/chosen": -2.7545557022094727, "logits/rejected": -2.676429033279419, "logps/chosen": -480.96600341796875, "logps/rejected": -401.0008850097656, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": -0.3216051459312439, "rewards/margins": 9.37825870513916, "rewards/rejected": -9.699864387512207, "step": 4990 }, { "epoch": 2.58, "learning_rate": 7.754828839166188e-08, "logits/chosen": -2.48799467086792, "logits/rejected": -2.4741270542144775, "logps/chosen": -251.6031036376953, "logps/rejected": -321.9014587402344, "loss": 0.0118, "rewards/accuracies": 1.0, "rewards/chosen": -1.3492779731750488, "rewards/margins": 8.896702766418457, "rewards/rejected": -10.245981216430664, "step": 5000 }, { "epoch": 2.58, "eval_logits/chosen": -2.4194068908691406, "eval_logits/rejected": -2.3763530254364014, "eval_logps/chosen": -293.284423828125, "eval_logps/rejected": -332.5270690917969, "eval_loss": 0.7198395133018494, "eval_rewards/accuracies": 0.828125, "eval_rewards/chosen": -3.9049317836761475, "eval_rewards/margins": 3.650782823562622, "eval_rewards/rejected": -7.555714130401611, "eval_runtime": 56.8998, "eval_samples_per_second": 17.575, "eval_steps_per_second": 0.281, "step": 5000 }, { "epoch": 2.59, "learning_rate": 7.6592082616179e-08, "logits/chosen": -2.4661271572113037, "logits/rejected": -2.477613687515259, "logps/chosen": -245.18594360351562, "logps/rejected": -335.5259094238281, "loss": 0.0138, "rewards/accuracies": 1.0, "rewards/chosen": -1.542055368423462, "rewards/margins": 10.06078052520752, "rewards/rejected": -11.602836608886719, "step": 5010 }, { "epoch": 2.59, "learning_rate": 7.563587684069611e-08, "logits/chosen": -2.5083346366882324, "logits/rejected": -2.643256187438965, "logps/chosen": -207.7921600341797, "logps/rejected": -385.1307678222656, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -1.6641393899917603, "rewards/margins": 10.65031909942627, "rewards/rejected": -12.314460754394531, "step": 5020 }, { "epoch": 2.6, "learning_rate": 7.467967106521324e-08, "logits/chosen": -2.588287830352783, "logits/rejected": -2.5413451194763184, "logps/chosen": -273.2277526855469, "logps/rejected": -238.3046875, "loss": 0.0103, "rewards/accuracies": 1.0, "rewards/chosen": -0.6997830867767334, "rewards/margins": 7.600827217102051, "rewards/rejected": -8.300610542297363, "step": 5030 }, { "epoch": 2.6, "learning_rate": 7.372346528973034e-08, "logits/chosen": -2.5987842082977295, "logits/rejected": -2.5648391246795654, "logps/chosen": -217.76416015625, "logps/rejected": -320.9278259277344, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": -1.9506279230117798, "rewards/margins": 10.49673080444336, "rewards/rejected": -12.447359085083008, "step": 5040 }, { "epoch": 2.61, "learning_rate": 7.276725951424746e-08, "logits/chosen": -2.5334415435791016, "logits/rejected": -2.48858642578125, "logps/chosen": -171.40257263183594, "logps/rejected": -390.48590087890625, "loss": 0.0139, "rewards/accuracies": 1.0, "rewards/chosen": -1.9724035263061523, "rewards/margins": 8.248844146728516, "rewards/rejected": -10.2212495803833, "step": 5050 }, { "epoch": 2.61, "learning_rate": 7.181105373876457e-08, "logits/chosen": -2.483840227127075, "logits/rejected": -2.437764883041382, "logps/chosen": -204.07522583007812, "logps/rejected": -299.15594482421875, "loss": 0.0111, "rewards/accuracies": 1.0, "rewards/chosen": -1.3759775161743164, "rewards/margins": 9.727631568908691, "rewards/rejected": -11.103609085083008, "step": 5060 }, { "epoch": 2.62, "learning_rate": 7.08548479632817e-08, "logits/chosen": -2.751817226409912, "logits/rejected": -2.6693196296691895, "logps/chosen": -372.95458984375, "logps/rejected": -369.3866271972656, "loss": 0.0131, "rewards/accuracies": 1.0, "rewards/chosen": -0.938228964805603, "rewards/margins": 9.311058044433594, "rewards/rejected": -10.249287605285645, "step": 5070 }, { "epoch": 2.62, "learning_rate": 6.98986421877988e-08, "logits/chosen": -2.541592836380005, "logits/rejected": -2.455427646636963, "logps/chosen": -295.2919006347656, "logps/rejected": -412.5565490722656, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/chosen": -1.2343528270721436, "rewards/margins": 10.338408470153809, "rewards/rejected": -11.572762489318848, "step": 5080 }, { "epoch": 2.63, "learning_rate": 6.894243641231592e-08, "logits/chosen": -2.578338623046875, "logits/rejected": -2.542959690093994, "logps/chosen": -227.2720947265625, "logps/rejected": -329.40032958984375, "loss": 0.0778, "rewards/accuracies": 1.0, "rewards/chosen": -0.6857062578201294, "rewards/margins": 11.191483497619629, "rewards/rejected": -11.877190589904785, "step": 5090 }, { "epoch": 2.63, "learning_rate": 6.798623063683305e-08, "logits/chosen": -2.287254810333252, "logits/rejected": -2.432054281234741, "logps/chosen": -312.0555114746094, "logps/rejected": -452.99169921875, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -0.9427648782730103, "rewards/margins": 15.484460830688477, "rewards/rejected": -16.42722511291504, "step": 5100 }, { "epoch": 2.63, "eval_logits/chosen": -2.3859879970550537, "eval_logits/rejected": -2.340737819671631, "eval_logps/chosen": -296.35302734375, "eval_logps/rejected": -336.11944580078125, "eval_loss": 0.7505870461463928, "eval_rewards/accuracies": 0.8125, "eval_rewards/chosen": -4.211794376373291, "eval_rewards/margins": 3.703155040740967, "eval_rewards/rejected": -7.914949893951416, "eval_runtime": 56.2566, "eval_samples_per_second": 17.776, "eval_steps_per_second": 0.284, "step": 5100 }, { "epoch": 2.64, "learning_rate": 6.703002486135017e-08, "logits/chosen": -2.3773114681243896, "logits/rejected": -2.5287060737609863, "logps/chosen": -236.22640991210938, "logps/rejected": -360.97784423828125, "loss": 0.0178, "rewards/accuracies": 1.0, "rewards/chosen": -0.9979360699653625, "rewards/margins": 10.704690933227539, "rewards/rejected": -11.702627182006836, "step": 5110 }, { "epoch": 2.64, "learning_rate": 6.607381908586727e-08, "logits/chosen": -2.642033338546753, "logits/rejected": -2.6108345985412598, "logps/chosen": -317.5076599121094, "logps/rejected": -348.7528076171875, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/chosen": -1.6177361011505127, "rewards/margins": 9.611312866210938, "rewards/rejected": -11.229048728942871, "step": 5120 }, { "epoch": 2.65, "learning_rate": 6.511761331038438e-08, "logits/chosen": -2.6155383586883545, "logits/rejected": -2.6100358963012695, "logps/chosen": -281.2548522949219, "logps/rejected": -298.05865478515625, "loss": 0.0147, "rewards/accuracies": 1.0, "rewards/chosen": -2.588085651397705, "rewards/margins": 7.020742893218994, "rewards/rejected": -9.608829498291016, "step": 5130 }, { "epoch": 2.65, "learning_rate": 6.416140753490151e-08, "logits/chosen": -2.627002239227295, "logits/rejected": -2.6328094005584717, "logps/chosen": -421.49774169921875, "logps/rejected": -432.20098876953125, "loss": 0.0179, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.4822900295257568, "rewards/margins": 8.786565780639648, "rewards/rejected": -10.268855094909668, "step": 5140 }, { "epoch": 2.66, "learning_rate": 6.320520175941863e-08, "logits/chosen": -2.4586381912231445, "logits/rejected": -2.452455997467041, "logps/chosen": -246.73715209960938, "logps/rejected": -327.2841491699219, "loss": 0.0172, "rewards/accuracies": 1.0, "rewards/chosen": -2.41286563873291, "rewards/margins": 9.417495727539062, "rewards/rejected": -11.830362319946289, "step": 5150 }, { "epoch": 2.66, "learning_rate": 6.224899598393573e-08, "logits/chosen": -2.557018756866455, "logits/rejected": -2.4926464557647705, "logps/chosen": -285.82635498046875, "logps/rejected": -348.3973693847656, "loss": 0.0111, "rewards/accuracies": 1.0, "rewards/chosen": -1.4971519708633423, "rewards/margins": 9.434330940246582, "rewards/rejected": -10.931482315063477, "step": 5160 }, { "epoch": 2.67, "learning_rate": 6.129279020845286e-08, "logits/chosen": -2.4606575965881348, "logits/rejected": -2.5436136722564697, "logps/chosen": -265.47454833984375, "logps/rejected": -310.14862060546875, "loss": 0.0113, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.3885912597179413, "rewards/margins": 10.448331832885742, "rewards/rejected": -10.836923599243164, "step": 5170 }, { "epoch": 2.67, "learning_rate": 6.033658443296998e-08, "logits/chosen": -2.5347704887390137, "logits/rejected": -2.484384059906006, "logps/chosen": -266.8102111816406, "logps/rejected": -340.2280578613281, "loss": 0.0178, "rewards/accuracies": 1.0, "rewards/chosen": -0.881771445274353, "rewards/margins": 11.114812850952148, "rewards/rejected": -11.99658489227295, "step": 5180 }, { "epoch": 2.68, "learning_rate": 5.9380378657487085e-08, "logits/chosen": -2.5079243183135986, "logits/rejected": -2.5110316276550293, "logps/chosen": -455.3853454589844, "logps/rejected": -375.2730407714844, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -0.6811244487762451, "rewards/margins": 11.579205513000488, "rewards/rejected": -12.26032829284668, "step": 5190 }, { "epoch": 2.68, "learning_rate": 5.842417288200421e-08, "logits/chosen": -2.665579080581665, "logits/rejected": -2.835705280303955, "logps/chosen": -388.7041320800781, "logps/rejected": -376.1544494628906, "loss": 0.0143, "rewards/accuracies": 1.0, "rewards/chosen": -0.7552551627159119, "rewards/margins": 9.90630054473877, "rewards/rejected": -10.6615571975708, "step": 5200 }, { "epoch": 2.68, "eval_logits/chosen": -2.394641160964966, "eval_logits/rejected": -2.350865125656128, "eval_logps/chosen": -296.6682434082031, "eval_logps/rejected": -336.7720642089844, "eval_loss": 0.7407526969909668, "eval_rewards/accuracies": 0.8125, "eval_rewards/chosen": -4.243312835693359, "eval_rewards/margins": 3.7369019985198975, "eval_rewards/rejected": -7.980215549468994, "eval_runtime": 55.9932, "eval_samples_per_second": 17.859, "eval_steps_per_second": 0.286, "step": 5200 }, { "epoch": 2.69, "learning_rate": 5.7467967106521317e-08, "logits/chosen": -2.518009901046753, "logits/rejected": -2.5615527629852295, "logps/chosen": -296.2488098144531, "logps/rejected": -408.9478454589844, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -1.9118080139160156, "rewards/margins": 10.68850326538086, "rewards/rejected": -12.600311279296875, "step": 5210 }, { "epoch": 2.69, "learning_rate": 5.651176133103844e-08, "logits/chosen": -2.740626096725464, "logits/rejected": -2.676818370819092, "logps/chosen": -306.84588623046875, "logps/rejected": -342.53240966796875, "loss": 0.0142, "rewards/accuracies": 1.0, "rewards/chosen": -0.46114450693130493, "rewards/margins": 10.382904052734375, "rewards/rejected": -10.844049453735352, "step": 5220 }, { "epoch": 2.7, "learning_rate": 5.555555555555555e-08, "logits/chosen": -2.5409655570983887, "logits/rejected": -2.4781863689422607, "logps/chosen": -215.8829345703125, "logps/rejected": -301.06756591796875, "loss": 0.0098, "rewards/accuracies": 1.0, "rewards/chosen": -1.1217892169952393, "rewards/margins": 8.91219711303711, "rewards/rejected": -10.033987045288086, "step": 5230 }, { "epoch": 2.71, "learning_rate": 5.459934978007267e-08, "logits/chosen": -2.6471657752990723, "logits/rejected": -2.611330509185791, "logps/chosen": -273.4901123046875, "logps/rejected": -403.7444763183594, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -0.45464539527893066, "rewards/margins": 11.401620864868164, "rewards/rejected": -11.856266021728516, "step": 5240 }, { "epoch": 2.71, "learning_rate": 5.3643144004589786e-08, "logits/chosen": -2.542269706726074, "logits/rejected": -2.433465003967285, "logps/chosen": -301.2662048339844, "logps/rejected": -399.2783203125, "loss": 0.0122, "rewards/accuracies": 1.0, "rewards/chosen": 0.4464475214481354, "rewards/margins": 13.448400497436523, "rewards/rejected": -13.001953125, "step": 5250 }, { "epoch": 2.72, "learning_rate": 5.26869382291069e-08, "logits/chosen": -2.6752572059631348, "logits/rejected": -2.7158637046813965, "logps/chosen": -226.4488983154297, "logps/rejected": -335.8851623535156, "loss": 0.0119, "rewards/accuracies": 1.0, "rewards/chosen": -0.019395578652620316, "rewards/margins": 10.634721755981445, "rewards/rejected": -10.654115676879883, "step": 5260 }, { "epoch": 2.72, "learning_rate": 5.173073245362402e-08, "logits/chosen": -2.265803337097168, "logits/rejected": -2.495293617248535, "logps/chosen": -273.8394470214844, "logps/rejected": -262.0378112792969, "loss": 0.0138, "rewards/accuracies": 1.0, "rewards/chosen": -1.907080888748169, "rewards/margins": 9.230062484741211, "rewards/rejected": -11.1371431350708, "step": 5270 }, { "epoch": 2.73, "learning_rate": 5.077452667814113e-08, "logits/chosen": -2.645397186279297, "logits/rejected": -2.6353235244750977, "logps/chosen": -234.93240356445312, "logps/rejected": -313.4653015136719, "loss": 0.0084, "rewards/accuracies": 1.0, "rewards/chosen": -1.619875192642212, "rewards/margins": 9.85165786743164, "rewards/rejected": -11.47153377532959, "step": 5280 }, { "epoch": 2.73, "learning_rate": 4.981832090265825e-08, "logits/chosen": -2.5697460174560547, "logits/rejected": -2.524587631225586, "logps/chosen": -278.901123046875, "logps/rejected": -400.72540283203125, "loss": 0.0122, "rewards/accuracies": 1.0, "rewards/chosen": 0.3195740878582001, "rewards/margins": 12.423995018005371, "rewards/rejected": -12.104421615600586, "step": 5290 }, { "epoch": 2.74, "learning_rate": 4.8862115127175364e-08, "logits/chosen": -2.6613426208496094, "logits/rejected": -2.5382397174835205, "logps/chosen": -298.51617431640625, "logps/rejected": -405.8147277832031, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -1.6868101358413696, "rewards/margins": 10.438592910766602, "rewards/rejected": -12.125402450561523, "step": 5300 }, { "epoch": 2.74, "eval_logits/chosen": -2.384242057800293, "eval_logits/rejected": -2.33884596824646, "eval_logps/chosen": -297.62750244140625, "eval_logps/rejected": -337.80126953125, "eval_loss": 0.7552159428596497, "eval_rewards/accuracies": 0.796875, "eval_rewards/chosen": -4.339241981506348, "eval_rewards/margins": 3.743894100189209, "eval_rewards/rejected": -8.083136558532715, "eval_runtime": 59.8742, "eval_samples_per_second": 16.702, "eval_steps_per_second": 0.267, "step": 5300 }, { "epoch": 2.74, "learning_rate": 4.790590935169248e-08, "logits/chosen": -2.6838698387145996, "logits/rejected": -2.574967384338379, "logps/chosen": -274.59368896484375, "logps/rejected": -469.4027404785156, "loss": 0.012, "rewards/accuracies": 1.0, "rewards/chosen": -1.4363610744476318, "rewards/margins": 10.517416000366211, "rewards/rejected": -11.953778266906738, "step": 5310 }, { "epoch": 2.75, "learning_rate": 4.69497035762096e-08, "logits/chosen": -2.537161350250244, "logits/rejected": -2.4791531562805176, "logps/chosen": -344.87347412109375, "logps/rejected": -444.57366943359375, "loss": 0.0116, "rewards/accuracies": 1.0, "rewards/chosen": -2.297786235809326, "rewards/margins": 12.291933059692383, "rewards/rejected": -14.589719772338867, "step": 5320 }, { "epoch": 2.75, "learning_rate": 4.599349780072671e-08, "logits/chosen": -2.5215706825256348, "logits/rejected": -2.408939838409424, "logps/chosen": -344.95184326171875, "logps/rejected": -272.84417724609375, "loss": 0.0134, "rewards/accuracies": 1.0, "rewards/chosen": -2.2328124046325684, "rewards/margins": 9.55348014831543, "rewards/rejected": -11.78629207611084, "step": 5330 }, { "epoch": 2.76, "learning_rate": 4.5037292025243834e-08, "logits/chosen": -2.4096181392669678, "logits/rejected": -2.3585500717163086, "logps/chosen": -231.6038055419922, "logps/rejected": -414.8946838378906, "loss": 0.0113, "rewards/accuracies": 1.0, "rewards/chosen": -2.199920892715454, "rewards/margins": 9.544143676757812, "rewards/rejected": -11.744064331054688, "step": 5340 }, { "epoch": 2.76, "learning_rate": 4.408108624976094e-08, "logits/chosen": -2.6306357383728027, "logits/rejected": -2.373485565185547, "logps/chosen": -273.1640625, "logps/rejected": -362.6429138183594, "loss": 0.0084, "rewards/accuracies": 1.0, "rewards/chosen": -0.7220970392227173, "rewards/margins": 11.861469268798828, "rewards/rejected": -12.583566665649414, "step": 5350 }, { "epoch": 2.77, "learning_rate": 4.3124880474278065e-08, "logits/chosen": -2.6821742057800293, "logits/rejected": -2.5935044288635254, "logps/chosen": -330.2795715332031, "logps/rejected": -295.5904541015625, "loss": 0.0196, "rewards/accuracies": 1.0, "rewards/chosen": -0.4781021177768707, "rewards/margins": 9.99770450592041, "rewards/rejected": -10.475805282592773, "step": 5360 }, { "epoch": 2.77, "learning_rate": 4.2168674698795174e-08, "logits/chosen": -2.668886184692383, "logits/rejected": -2.7140769958496094, "logps/chosen": -188.55136108398438, "logps/rejected": -355.8598327636719, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -2.314338207244873, "rewards/margins": 8.853775978088379, "rewards/rejected": -11.168115615844727, "step": 5370 }, { "epoch": 2.78, "learning_rate": 4.1212468923312296e-08, "logits/chosen": -2.5291595458984375, "logits/rejected": -2.4308247566223145, "logps/chosen": -271.3199768066406, "logps/rejected": -382.0475158691406, "loss": 0.0167, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.8199418783187866, "rewards/margins": 9.165318489074707, "rewards/rejected": -10.985260009765625, "step": 5380 }, { "epoch": 2.78, "learning_rate": 4.025626314782941e-08, "logits/chosen": -2.578953266143799, "logits/rejected": -2.5158464908599854, "logps/chosen": -277.73052978515625, "logps/rejected": -347.6210632324219, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -0.26607850193977356, "rewards/margins": 11.397039413452148, "rewards/rejected": -11.663119316101074, "step": 5390 }, { "epoch": 2.79, "learning_rate": 3.930005737234653e-08, "logits/chosen": -2.543391704559326, "logits/rejected": -2.6446430683135986, "logps/chosen": -216.94741821289062, "logps/rejected": -307.4268493652344, "loss": 0.0138, "rewards/accuracies": 1.0, "rewards/chosen": -1.3559612035751343, "rewards/margins": 9.72540283203125, "rewards/rejected": -11.0813627243042, "step": 5400 }, { "epoch": 2.79, "eval_logits/chosen": -2.3737339973449707, "eval_logits/rejected": -2.3286330699920654, "eval_logps/chosen": -296.6304016113281, "eval_logps/rejected": -336.73223876953125, "eval_loss": 0.7403773069381714, "eval_rewards/accuracies": 0.8125, "eval_rewards/chosen": -4.239532470703125, "eval_rewards/margins": 3.7366957664489746, "eval_rewards/rejected": -7.9762282371521, "eval_runtime": 58.906, "eval_samples_per_second": 16.976, "eval_steps_per_second": 0.272, "step": 5400 }, { "epoch": 2.79, "learning_rate": 3.8343851596863644e-08, "logits/chosen": -2.68801212310791, "logits/rejected": -2.5317561626434326, "logps/chosen": -327.53106689453125, "logps/rejected": -318.7012939453125, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": -0.34214359521865845, "rewards/margins": 12.371678352355957, "rewards/rejected": -12.713821411132812, "step": 5410 }, { "epoch": 2.8, "learning_rate": 3.738764582138076e-08, "logits/chosen": -2.63051700592041, "logits/rejected": -2.5712480545043945, "logps/chosen": -292.55035400390625, "logps/rejected": -406.0823059082031, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/chosen": -2.4555866718292236, "rewards/margins": 9.967303276062012, "rewards/rejected": -12.422890663146973, "step": 5420 }, { "epoch": 2.8, "learning_rate": 3.6431440045897875e-08, "logits/chosen": -2.558973550796509, "logits/rejected": -2.5760269165039062, "logps/chosen": -288.33062744140625, "logps/rejected": -438.51007080078125, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/chosen": -1.0792489051818848, "rewards/margins": 10.678377151489258, "rewards/rejected": -11.7576265335083, "step": 5430 }, { "epoch": 2.81, "learning_rate": 3.547523427041499e-08, "logits/chosen": -2.607342481613159, "logits/rejected": -2.609557628631592, "logps/chosen": -283.79608154296875, "logps/rejected": -293.2716369628906, "loss": 0.0106, "rewards/accuracies": 1.0, "rewards/chosen": -1.1565558910369873, "rewards/margins": 10.7040433883667, "rewards/rejected": -11.860601425170898, "step": 5440 }, { "epoch": 2.81, "learning_rate": 3.4519028494932106e-08, "logits/chosen": -2.703679084777832, "logits/rejected": -2.5151591300964355, "logps/chosen": -353.95758056640625, "logps/rejected": -433.54766845703125, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -0.7000893354415894, "rewards/margins": 11.649955749511719, "rewards/rejected": -12.350044250488281, "step": 5450 }, { "epoch": 2.82, "learning_rate": 3.356282271944923e-08, "logits/chosen": -2.603567600250244, "logits/rejected": -2.502267360687256, "logps/chosen": -226.2731170654297, "logps/rejected": -341.97320556640625, "loss": 0.0146, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9284445643424988, "rewards/margins": 11.389801979064941, "rewards/rejected": -12.318245887756348, "step": 5460 }, { "epoch": 2.82, "learning_rate": 3.260661694396634e-08, "logits/chosen": -2.5252528190612793, "logits/rejected": -2.5249342918395996, "logps/chosen": -263.4516906738281, "logps/rejected": -313.29998779296875, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -0.3164348602294922, "rewards/margins": 11.594769477844238, "rewards/rejected": -11.911203384399414, "step": 5470 }, { "epoch": 2.83, "learning_rate": 3.165041116848346e-08, "logits/chosen": -2.660788059234619, "logits/rejected": -2.5421648025512695, "logps/chosen": -253.57839965820312, "logps/rejected": -402.5025329589844, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -1.6556440591812134, "rewards/margins": 12.180809020996094, "rewards/rejected": -13.836453437805176, "step": 5480 }, { "epoch": 2.83, "learning_rate": 3.0694205393000576e-08, "logits/chosen": -2.5150065422058105, "logits/rejected": -2.4512484073638916, "logps/chosen": -212.173828125, "logps/rejected": -332.0416564941406, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": 0.4317614436149597, "rewards/margins": 11.226727485656738, "rewards/rejected": -10.794965744018555, "step": 5490 }, { "epoch": 2.84, "learning_rate": 2.9737999617517688e-08, "logits/chosen": -2.5827393531799316, "logits/rejected": -2.5749595165252686, "logps/chosen": -308.6050109863281, "logps/rejected": -365.627197265625, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/chosen": -0.4415368139743805, "rewards/margins": 10.942670822143555, "rewards/rejected": -11.384206771850586, "step": 5500 }, { "epoch": 2.84, "eval_logits/chosen": -2.364140272140503, "eval_logits/rejected": -2.319963216781616, "eval_logps/chosen": -298.70074462890625, "eval_logps/rejected": -339.1661682128906, "eval_loss": 0.7524814605712891, "eval_rewards/accuracies": 0.78125, "eval_rewards/chosen": -4.44656229019165, "eval_rewards/margins": 3.773061752319336, "eval_rewards/rejected": -8.219624519348145, "eval_runtime": 58.809, "eval_samples_per_second": 17.004, "eval_steps_per_second": 0.272, "step": 5500 }, { "epoch": 2.84, "learning_rate": 2.8781793842034804e-08, "logits/chosen": -2.426349401473999, "logits/rejected": -2.384749174118042, "logps/chosen": -259.9743347167969, "logps/rejected": -300.3887634277344, "loss": 0.015, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.3269202709198, "rewards/margins": 8.843810081481934, "rewards/rejected": -11.170731544494629, "step": 5510 }, { "epoch": 2.85, "learning_rate": 2.782558806655192e-08, "logits/chosen": -2.5341413021087646, "logits/rejected": -2.5924274921417236, "logps/chosen": -302.21563720703125, "logps/rejected": -409.6150817871094, "loss": 0.0134, "rewards/accuracies": 1.0, "rewards/chosen": -1.3284962177276611, "rewards/margins": 9.481134414672852, "rewards/rejected": -10.809629440307617, "step": 5520 }, { "epoch": 2.85, "learning_rate": 2.6869382291069035e-08, "logits/chosen": -2.4547677040100098, "logits/rejected": -2.457869052886963, "logps/chosen": -229.97561645507812, "logps/rejected": -327.1774597167969, "loss": 0.0107, "rewards/accuracies": 1.0, "rewards/chosen": -1.2366197109222412, "rewards/margins": 10.809396743774414, "rewards/rejected": -12.04601764678955, "step": 5530 }, { "epoch": 2.86, "learning_rate": 2.591317651558615e-08, "logits/chosen": -2.308411121368408, "logits/rejected": -2.4219300746917725, "logps/chosen": -283.2604675292969, "logps/rejected": -351.8711242675781, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -2.108804225921631, "rewards/margins": 10.290410995483398, "rewards/rejected": -12.399213790893555, "step": 5540 }, { "epoch": 2.87, "learning_rate": 2.4956970740103267e-08, "logits/chosen": -2.5385169982910156, "logits/rejected": -2.4845941066741943, "logps/chosen": -319.16473388671875, "logps/rejected": -403.1438293457031, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": -1.6357721090316772, "rewards/margins": 10.850339889526367, "rewards/rejected": -12.486112594604492, "step": 5550 }, { "epoch": 2.87, "learning_rate": 2.4000764964620386e-08, "logits/chosen": -2.6393580436706543, "logits/rejected": -2.731678009033203, "logps/chosen": -312.54034423828125, "logps/rejected": -417.93658447265625, "loss": 0.0119, "rewards/accuracies": 1.0, "rewards/chosen": -1.7174618244171143, "rewards/margins": 9.234588623046875, "rewards/rejected": -10.952049255371094, "step": 5560 }, { "epoch": 2.88, "learning_rate": 2.30445591891375e-08, "logits/chosen": -2.5211081504821777, "logits/rejected": -2.4729580879211426, "logps/chosen": -307.0782775878906, "logps/rejected": -450.36962890625, "loss": 0.0088, "rewards/accuracies": 1.0, "rewards/chosen": 0.05847327783703804, "rewards/margins": 13.286686897277832, "rewards/rejected": -13.228212356567383, "step": 5570 }, { "epoch": 2.88, "learning_rate": 2.2088353413654617e-08, "logits/chosen": -2.313760757446289, "logits/rejected": -2.362217664718628, "logps/chosen": -294.3525390625, "logps/rejected": -331.1678771972656, "loss": 0.0149, "rewards/accuracies": 1.0, "rewards/chosen": 1.203790307044983, "rewards/margins": 14.620699882507324, "rewards/rejected": -13.416910171508789, "step": 5580 }, { "epoch": 2.89, "learning_rate": 2.1132147638171733e-08, "logits/chosen": -2.0504655838012695, "logits/rejected": -2.1224112510681152, "logps/chosen": -262.3179626464844, "logps/rejected": -329.19732666015625, "loss": 0.0119, "rewards/accuracies": 1.0, "rewards/chosen": -0.7382757067680359, "rewards/margins": 10.974761962890625, "rewards/rejected": -11.713037490844727, "step": 5590 }, { "epoch": 2.89, "learning_rate": 2.0175941862688848e-08, "logits/chosen": -2.4296658039093018, "logits/rejected": -2.384312391281128, "logps/chosen": -263.25592041015625, "logps/rejected": -385.36688232421875, "loss": 0.0077, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.0216162204742432, "rewards/margins": 11.878069877624512, "rewards/rejected": -12.899686813354492, "step": 5600 }, { "epoch": 2.89, "eval_logits/chosen": -2.351677894592285, "eval_logits/rejected": -2.3077552318573, "eval_logps/chosen": -299.8206481933594, "eval_logps/rejected": -340.4544677734375, "eval_loss": 0.7519664168357849, "eval_rewards/accuracies": 0.796875, "eval_rewards/chosen": -4.5585551261901855, "eval_rewards/margins": 3.7899010181427, "eval_rewards/rejected": -8.348456382751465, "eval_runtime": 57.0149, "eval_samples_per_second": 17.539, "eval_steps_per_second": 0.281, "step": 5600 }, { "epoch": 2.9, "learning_rate": 1.9219736087205964e-08, "logits/chosen": -2.4466593265533447, "logits/rejected": -2.5641415119171143, "logps/chosen": -313.849609375, "logps/rejected": -355.71954345703125, "loss": 0.0183, "rewards/accuracies": 1.0, "rewards/chosen": 0.058788836002349854, "rewards/margins": 9.841516494750977, "rewards/rejected": -9.782726287841797, "step": 5610 }, { "epoch": 2.9, "learning_rate": 1.826353031172308e-08, "logits/chosen": -2.6215555667877197, "logits/rejected": -2.596318244934082, "logps/chosen": -334.0271911621094, "logps/rejected": -374.85211181640625, "loss": 0.0108, "rewards/accuracies": 1.0, "rewards/chosen": -1.0268580913543701, "rewards/margins": 10.430700302124023, "rewards/rejected": -11.45755672454834, "step": 5620 }, { "epoch": 2.91, "learning_rate": 1.73073245362402e-08, "logits/chosen": -2.3654887676239014, "logits/rejected": -2.3259222507476807, "logps/chosen": -418.31524658203125, "logps/rejected": -296.1111145019531, "loss": 0.023, "rewards/accuracies": 1.0, "rewards/chosen": -2.1241583824157715, "rewards/margins": 9.49864673614502, "rewards/rejected": -11.622804641723633, "step": 5630 }, { "epoch": 2.91, "learning_rate": 1.6351118760757314e-08, "logits/chosen": -2.4572885036468506, "logits/rejected": -2.4687421321868896, "logps/chosen": -250.23764038085938, "logps/rejected": -250.4460906982422, "loss": 0.0111, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8683540225028992, "rewards/margins": 8.310081481933594, "rewards/rejected": -9.178436279296875, "step": 5640 }, { "epoch": 2.92, "learning_rate": 1.539491298527443e-08, "logits/chosen": -2.6408703327178955, "logits/rejected": -2.641308307647705, "logps/chosen": -283.0168151855469, "logps/rejected": -405.4056701660156, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -0.7451741099357605, "rewards/margins": 11.91575813293457, "rewards/rejected": -12.660932540893555, "step": 5650 }, { "epoch": 2.92, "learning_rate": 1.4438707209791546e-08, "logits/chosen": -2.531616687774658, "logits/rejected": -2.6468756198883057, "logps/chosen": -414.9684143066406, "logps/rejected": -356.252685546875, "loss": 0.0157, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.21153855323791504, "rewards/margins": 10.415987014770508, "rewards/rejected": -10.627525329589844, "step": 5660 }, { "epoch": 2.93, "learning_rate": 1.3482501434308661e-08, "logits/chosen": -2.300788402557373, "logits/rejected": -2.308450937271118, "logps/chosen": -356.91632080078125, "logps/rejected": -350.33892822265625, "loss": 0.015, "rewards/accuracies": 1.0, "rewards/chosen": -1.6116657257080078, "rewards/margins": 11.264276504516602, "rewards/rejected": -12.875943183898926, "step": 5670 }, { "epoch": 2.93, "learning_rate": 1.2526295658825777e-08, "logits/chosen": -2.603456497192383, "logits/rejected": -2.6166439056396484, "logps/chosen": -311.9185791015625, "logps/rejected": -450.9242248535156, "loss": 0.0147, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.2669516801834106, "rewards/margins": 11.346095085144043, "rewards/rejected": -12.613046646118164, "step": 5680 }, { "epoch": 2.94, "learning_rate": 1.1570089883342895e-08, "logits/chosen": -2.5220370292663574, "logits/rejected": -2.4531850814819336, "logps/chosen": -311.7686462402344, "logps/rejected": -400.8462829589844, "loss": 0.0135, "rewards/accuracies": 1.0, "rewards/chosen": -1.0794099569320679, "rewards/margins": 12.467567443847656, "rewards/rejected": -13.546978950500488, "step": 5690 }, { "epoch": 2.94, "learning_rate": 1.061388410786001e-08, "logits/chosen": -2.4289803504943848, "logits/rejected": -2.4906742572784424, "logps/chosen": -285.3009338378906, "logps/rejected": -263.3441467285156, "loss": 0.0094, "rewards/accuracies": 1.0, "rewards/chosen": -2.07789945602417, "rewards/margins": 7.724274635314941, "rewards/rejected": -9.80217456817627, "step": 5700 }, { "epoch": 2.94, "eval_logits/chosen": -2.3509910106658936, "eval_logits/rejected": -2.3062477111816406, "eval_logps/chosen": -299.77734375, "eval_logps/rejected": -340.47900390625, "eval_loss": 0.7527089715003967, "eval_rewards/accuracies": 0.78125, "eval_rewards/chosen": -4.5542216300964355, "eval_rewards/margins": 3.7966880798339844, "eval_rewards/rejected": -8.350910186767578, "eval_runtime": 55.9629, "eval_samples_per_second": 17.869, "eval_steps_per_second": 0.286, "step": 5700 }, { "epoch": 2.95, "learning_rate": 9.657678332377126e-09, "logits/chosen": -2.4378364086151123, "logits/rejected": -2.5011210441589355, "logps/chosen": -278.77166748046875, "logps/rejected": -327.8222351074219, "loss": 0.0145, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.6567916870117188, "rewards/margins": 9.313983917236328, "rewards/rejected": -10.970773696899414, "step": 5710 }, { "epoch": 2.95, "learning_rate": 8.701472556894243e-09, "logits/chosen": -2.4347808361053467, "logits/rejected": -2.4027464389801025, "logps/chosen": -301.68988037109375, "logps/rejected": -355.3216247558594, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -2.6911423206329346, "rewards/margins": 10.72395133972168, "rewards/rejected": -13.415092468261719, "step": 5720 }, { "epoch": 2.96, "learning_rate": 7.745266781411359e-09, "logits/chosen": -2.4534902572631836, "logits/rejected": -2.554394006729126, "logps/chosen": -260.3663635253906, "logps/rejected": -410.23223876953125, "loss": 0.0164, "rewards/accuracies": 1.0, "rewards/chosen": -2.0108587741851807, "rewards/margins": 9.392390251159668, "rewards/rejected": -11.403249740600586, "step": 5730 }, { "epoch": 2.96, "learning_rate": 6.7890610059284754e-09, "logits/chosen": -2.566368579864502, "logits/rejected": -2.585576057434082, "logps/chosen": -261.0205993652344, "logps/rejected": -348.109619140625, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": -0.3029209077358246, "rewards/margins": 10.11386775970459, "rewards/rejected": -10.416789054870605, "step": 5740 }, { "epoch": 2.97, "learning_rate": 5.832855230445592e-09, "logits/chosen": -2.5495338439941406, "logits/rejected": -2.4890074729919434, "logps/chosen": -247.47286987304688, "logps/rejected": -320.06011962890625, "loss": 0.0072, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9899286031723022, "rewards/margins": 10.633166313171387, "rewards/rejected": -11.62309455871582, "step": 5750 }, { "epoch": 2.97, "learning_rate": 4.8766494549627085e-09, "logits/chosen": -2.60798978805542, "logits/rejected": -2.477149486541748, "logps/chosen": -305.1927795410156, "logps/rejected": -318.5039978027344, "loss": 0.017, "rewards/accuracies": 1.0, "rewards/chosen": -1.8671627044677734, "rewards/margins": 9.939409255981445, "rewards/rejected": -11.806573867797852, "step": 5760 }, { "epoch": 2.98, "learning_rate": 3.920443679479824e-09, "logits/chosen": -2.545316219329834, "logits/rejected": -2.5275652408599854, "logps/chosen": -292.89263916015625, "logps/rejected": -323.63385009765625, "loss": 0.0161, "rewards/accuracies": 1.0, "rewards/chosen": -2.3659417629241943, "rewards/margins": 9.62957763671875, "rewards/rejected": -11.995519638061523, "step": 5770 }, { "epoch": 2.98, "learning_rate": 2.96423790399694e-09, "logits/chosen": -2.48178768157959, "logits/rejected": -2.6639437675476074, "logps/chosen": -219.31777954101562, "logps/rejected": -324.5710754394531, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/chosen": -1.672484040260315, "rewards/margins": 8.533833503723145, "rewards/rejected": -10.206315994262695, "step": 5780 }, { "epoch": 2.99, "learning_rate": 2.008032128514056e-09, "logits/chosen": -2.54166841506958, "logits/rejected": -2.5910754203796387, "logps/chosen": -343.8594665527344, "logps/rejected": -451.935791015625, "loss": 0.011, "rewards/accuracies": 1.0, "rewards/chosen": -1.7438589334487915, "rewards/margins": 10.097026824951172, "rewards/rejected": -11.840886116027832, "step": 5790 }, { "epoch": 2.99, "learning_rate": 1.0518263530311723e-09, "logits/chosen": -2.5881881713867188, "logits/rejected": -2.5880398750305176, "logps/chosen": -201.51014709472656, "logps/rejected": -326.72650146484375, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -0.4769667983055115, "rewards/margins": 9.810731887817383, "rewards/rejected": -10.287699699401855, "step": 5800 }, { "epoch": 2.99, "eval_logits/chosen": -2.353024482727051, "eval_logits/rejected": -2.308088779449463, "eval_logps/chosen": -299.4037780761719, "eval_logps/rejected": -340.0493469238281, "eval_loss": 0.7519845962524414, "eval_rewards/accuracies": 0.78125, "eval_rewards/chosen": -4.5168681144714355, "eval_rewards/margins": 3.791072130203247, "eval_rewards/rejected": -8.307940483093262, "eval_runtime": 55.3708, "eval_samples_per_second": 18.06, "eval_steps_per_second": 0.289, "step": 5800 }, { "epoch": 3.0, "learning_rate": 9.562057754828839e-11, "logits/chosen": -2.476783514022827, "logits/rejected": -2.4620718955993652, "logps/chosen": -259.29327392578125, "logps/rejected": -435.9239196777344, "loss": 0.015, "rewards/accuracies": 1.0, "rewards/chosen": 0.16803565621376038, "rewards/margins": 8.936319351196289, "rewards/rejected": -8.768282890319824, "step": 5810 }, { "epoch": 3.0, "step": 5811, "total_flos": 0.0, "train_loss": 0.2172969928600547, "train_runtime": 23865.9828, "train_samples_per_second": 7.789, "train_steps_per_second": 0.243 } ], "logging_steps": 10, "max_steps": 5811, "num_train_epochs": 3, "save_steps": 500, "total_flos": 0.0, "trial_name": null, "trial_params": null }