{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.6001919846412287, "eval_steps": 500, "global_step": 2501, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.9920318725099604e-08, "logits/chosen": -1.8077198266983032, "logits/rejected": -1.711557388305664, "logps/chosen": -187.02471923828125, "logps/rejected": -122.2266616821289, "loss": 0.4697, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.0, "learning_rate": 1.9920318725099604e-07, "logits/chosen": -1.91868257522583, "logits/rejected": -1.382498860359192, "logps/chosen": -176.21807861328125, "logps/rejected": -120.50502014160156, "loss": 0.4051, "rewards/accuracies": 0.2777777910232544, "rewards/chosen": -4.647710011340678e-05, "rewards/margins": -6.936895078979433e-05, "rewards/rejected": 2.2891843400429934e-05, "step": 10 }, { "epoch": 0.0, "learning_rate": 3.9840637450199207e-07, "logits/chosen": -1.933166265487671, "logits/rejected": -1.376651406288147, "logps/chosen": -183.34518432617188, "logps/rejected": -109.8729476928711, "loss": 0.3902, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 5.835342381033115e-05, "rewards/margins": 5.425453491625376e-05, "rewards/rejected": 4.098887529835338e-06, "step": 20 }, { "epoch": 0.01, "learning_rate": 5.976095617529881e-07, "logits/chosen": -1.8762012720108032, "logits/rejected": -1.4956092834472656, "logps/chosen": -173.73521423339844, "logps/rejected": -129.2222137451172, "loss": 0.3999, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.000283640343695879, "rewards/margins": 0.0003057140565942973, "rewards/rejected": -2.2073701984481886e-05, "step": 30 }, { "epoch": 0.01, "learning_rate": 7.968127490039841e-07, "logits/chosen": -2.074092388153076, "logits/rejected": -1.6071268320083618, "logps/chosen": -128.0385284423828, "logps/rejected": -99.65340423583984, "loss": 0.3735, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.0004021901695523411, "rewards/margins": 0.0006805313169024885, "rewards/rejected": -0.0002783412055578083, "step": 40 }, { "epoch": 0.01, "learning_rate": 9.9601593625498e-07, "logits/chosen": -2.022669792175293, "logits/rejected": -1.4573835134506226, "logps/chosen": -151.2292022705078, "logps/rejected": -106.66209411621094, "loss": 0.383, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.0010000061010941863, "rewards/margins": 0.0013790394878014922, "rewards/rejected": -0.00037903329939581454, "step": 50 }, { "epoch": 0.01, "learning_rate": 1.1952191235059762e-06, "logits/chosen": -1.9887306690216064, "logits/rejected": -1.5061299800872803, "logps/chosen": -206.5648193359375, "logps/rejected": -130.1720733642578, "loss": 0.4111, "rewards/accuracies": 0.625, "rewards/chosen": 0.0012637332547456026, "rewards/margins": 0.0010847109369933605, "rewards/rejected": 0.00017902204126585275, "step": 60 }, { "epoch": 0.02, "learning_rate": 1.3944223107569721e-06, "logits/chosen": -2.064723253250122, "logits/rejected": -1.4734152555465698, "logps/chosen": -200.28292846679688, "logps/rejected": -123.68900299072266, "loss": 0.3915, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.00973748043179512, "rewards/margins": 0.007851692847907543, "rewards/rejected": -0.017589174211025238, "step": 70 }, { "epoch": 0.02, "learning_rate": 1.5936254980079683e-06, "logits/chosen": -1.9502429962158203, "logits/rejected": -1.3335387706756592, "logps/chosen": -223.71151733398438, "logps/rejected": -220.52392578125, "loss": 0.3923, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.063636913895607, "rewards/margins": 0.03966347128152847, "rewards/rejected": -0.10330037772655487, "step": 80 }, { "epoch": 0.02, "learning_rate": 1.7928286852589644e-06, "logits/chosen": -1.8217380046844482, "logits/rejected": -1.2573997974395752, "logps/chosen": -345.07183837890625, "logps/rejected": -365.8221740722656, "loss": 0.2872, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.15853700041770935, "rewards/margins": 0.059460896998643875, "rewards/rejected": -0.2179979383945465, "step": 90 }, { "epoch": 0.02, "learning_rate": 1.99203187250996e-06, "logits/chosen": -1.9064254760742188, "logits/rejected": -1.2212843894958496, "logps/chosen": -434.5439453125, "logps/rejected": -479.3141174316406, "loss": 0.279, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.2425861656665802, "rewards/margins": 0.1260627657175064, "rewards/rejected": -0.3686489164829254, "step": 100 }, { "epoch": 0.03, "learning_rate": 2.1912350597609563e-06, "logits/chosen": -1.9829845428466797, "logits/rejected": -1.3048018217086792, "logps/chosen": -418.07952880859375, "logps/rejected": -516.9057006835938, "loss": 0.2641, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2715674340724945, "rewards/margins": 0.14631584286689758, "rewards/rejected": -0.4178832471370697, "step": 110 }, { "epoch": 0.03, "learning_rate": 2.3904382470119524e-06, "logits/chosen": -1.9264233112335205, "logits/rejected": -1.4916644096374512, "logps/chosen": -516.511474609375, "logps/rejected": -570.0408325195312, "loss": 0.3119, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3569049537181854, "rewards/margins": 0.09669794887304306, "rewards/rejected": -0.4536028802394867, "step": 120 }, { "epoch": 0.03, "learning_rate": 2.589641434262948e-06, "logits/chosen": -2.02325177192688, "logits/rejected": -1.567561388015747, "logps/chosen": -412.42327880859375, "logps/rejected": -471.11962890625, "loss": 0.2955, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.23843295872211456, "rewards/margins": 0.11186476051807404, "rewards/rejected": -0.350297749042511, "step": 130 }, { "epoch": 0.03, "learning_rate": 2.7888446215139443e-06, "logits/chosen": -2.2830417156219482, "logits/rejected": -1.7622215747833252, "logps/chosen": -320.74603271484375, "logps/rejected": -390.9023742675781, "loss": 0.3239, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.15108834207057953, "rewards/margins": 0.13633789122104645, "rewards/rejected": -0.2874262034893036, "step": 140 }, { "epoch": 0.04, "learning_rate": 2.9880478087649404e-06, "logits/chosen": -2.1028332710266113, "logits/rejected": -1.609794020652771, "logps/chosen": -509.71051025390625, "logps/rejected": -607.0804443359375, "loss": 0.2944, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3575127124786377, "rewards/margins": 0.14793363213539124, "rewards/rejected": -0.5054463148117065, "step": 150 }, { "epoch": 0.04, "learning_rate": 3.1872509960159366e-06, "logits/chosen": -2.0964839458465576, "logits/rejected": -1.6341785192489624, "logps/chosen": -634.71337890625, "logps/rejected": -707.5382080078125, "loss": 0.275, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.45250073075294495, "rewards/margins": 0.1270226687192917, "rewards/rejected": -0.5795234441757202, "step": 160 }, { "epoch": 0.04, "learning_rate": 3.3864541832669323e-06, "logits/chosen": -1.9589221477508545, "logits/rejected": -1.5987298488616943, "logps/chosen": -550.2916870117188, "logps/rejected": -589.5011596679688, "loss": 0.3143, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.42545145750045776, "rewards/margins": 0.08496164530515671, "rewards/rejected": -0.5104131102561951, "step": 170 }, { "epoch": 0.04, "learning_rate": 3.585657370517929e-06, "logits/chosen": -2.2417616844177246, "logits/rejected": -1.566329836845398, "logps/chosen": -465.1465759277344, "logps/rejected": -618.5088500976562, "loss": 0.2378, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.28945469856262207, "rewards/margins": 0.20885801315307617, "rewards/rejected": -0.49831271171569824, "step": 180 }, { "epoch": 0.05, "learning_rate": 3.7848605577689246e-06, "logits/chosen": -2.0054221153259277, "logits/rejected": -1.608533263206482, "logps/chosen": -474.73468017578125, "logps/rejected": -527.5888061523438, "loss": 0.3241, "rewards/accuracies": 0.5, "rewards/chosen": -0.3396565020084381, "rewards/margins": 0.0798039585351944, "rewards/rejected": -0.4194604754447937, "step": 190 }, { "epoch": 0.05, "learning_rate": 3.98406374501992e-06, "logits/chosen": -2.1822266578674316, "logits/rejected": -1.7768001556396484, "logps/chosen": -381.39166259765625, "logps/rejected": -489.23114013671875, "loss": 0.2525, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2405729591846466, "rewards/margins": 0.13961976766586304, "rewards/rejected": -0.38019272685050964, "step": 200 }, { "epoch": 0.05, "learning_rate": 4.183266932270917e-06, "logits/chosen": -2.0433428287506104, "logits/rejected": -1.155823826789856, "logps/chosen": -570.4938354492188, "logps/rejected": -718.9401245117188, "loss": 0.2815, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.4118042588233948, "rewards/margins": 0.2083522379398346, "rewards/rejected": -0.620156466960907, "step": 210 }, { "epoch": 0.05, "learning_rate": 4.382470119521913e-06, "logits/chosen": -2.0706868171691895, "logits/rejected": -1.3509011268615723, "logps/chosen": -564.2838745117188, "logps/rejected": -744.248779296875, "loss": 0.261, "rewards/accuracies": 0.75, "rewards/chosen": -0.40354689955711365, "rewards/margins": 0.22655579447746277, "rewards/rejected": -0.6301027536392212, "step": 220 }, { "epoch": 0.06, "learning_rate": 4.581673306772908e-06, "logits/chosen": -2.1797804832458496, "logits/rejected": -1.5733859539031982, "logps/chosen": -329.7751159667969, "logps/rejected": -467.71807861328125, "loss": 0.2674, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1671646535396576, "rewards/margins": 0.19905708730220795, "rewards/rejected": -0.36622172594070435, "step": 230 }, { "epoch": 0.06, "learning_rate": 4.780876494023905e-06, "logits/chosen": -2.288848876953125, "logits/rejected": -1.6748888492584229, "logps/chosen": -571.8269653320312, "logps/rejected": -707.6842041015625, "loss": 0.2649, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.4129388928413391, "rewards/margins": 0.19371375441551208, "rewards/rejected": -0.6066526174545288, "step": 240 }, { "epoch": 0.06, "learning_rate": 4.980079681274901e-06, "logits/chosen": -2.222977876663208, "logits/rejected": -1.6339671611785889, "logps/chosen": -457.69000244140625, "logps/rejected": -580.9161987304688, "loss": 0.2866, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3078981339931488, "rewards/margins": 0.1683819591999054, "rewards/rejected": -0.4762801229953766, "step": 250 }, { "epoch": 0.06, "learning_rate": 4.999802610509541e-06, "logits/chosen": -2.17555570602417, "logits/rejected": -1.641728401184082, "logps/chosen": -571.9801025390625, "logps/rejected": -752.2948608398438, "loss": 0.2416, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.40048933029174805, "rewards/margins": 0.21740679442882538, "rewards/rejected": -0.6178960800170898, "step": 260 }, { "epoch": 0.06, "learning_rate": 4.9991203164860365e-06, "logits/chosen": -2.3382885456085205, "logits/rejected": -1.802610158920288, "logps/chosen": -461.989013671875, "logps/rejected": -589.45166015625, "loss": 0.2291, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.31522998213768005, "rewards/margins": 0.16316869854927063, "rewards/rejected": -0.4783986508846283, "step": 270 }, { "epoch": 0.07, "learning_rate": 4.997950814005098e-06, "logits/chosen": -2.5300729274749756, "logits/rejected": -1.6759153604507446, "logps/chosen": -527.8553466796875, "logps/rejected": -689.076904296875, "loss": 0.2481, "rewards/accuracies": 0.75, "rewards/chosen": -0.318958580493927, "rewards/margins": 0.24714262783527374, "rewards/rejected": -0.5661011934280396, "step": 280 }, { "epoch": 0.07, "learning_rate": 4.99629433106355e-06, "logits/chosen": -2.233731746673584, "logits/rejected": -1.6700172424316406, "logps/chosen": -671.8790283203125, "logps/rejected": -758.0708618164062, "loss": 0.2699, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.4771367013454437, "rewards/margins": 0.15160521864891052, "rewards/rejected": -0.628741979598999, "step": 290 }, { "epoch": 0.07, "learning_rate": 4.994151190596025e-06, "logits/chosen": -2.2926723957061768, "logits/rejected": -1.6014070510864258, "logps/chosen": -357.16583251953125, "logps/rejected": -530.6362915039062, "loss": 0.2773, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.20203897356987, "rewards/margins": 0.20360472798347473, "rewards/rejected": -0.4056437015533447, "step": 300 }, { "epoch": 0.07, "learning_rate": 4.9915218104120024e-06, "logits/chosen": -2.1675281524658203, "logits/rejected": -1.540875792503357, "logps/chosen": -517.7607421875, "logps/rejected": -669.5948486328125, "loss": 0.2334, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3637324273586273, "rewards/margins": 0.20930609107017517, "rewards/rejected": -0.5730385780334473, "step": 310 }, { "epoch": 0.08, "learning_rate": 4.98840670311436e-06, "logits/chosen": -2.3536510467529297, "logits/rejected": -1.794704794883728, "logps/chosen": -533.6524658203125, "logps/rejected": -621.9884643554688, "loss": 0.2721, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3811626136302948, "rewards/margins": 0.13614344596862793, "rewards/rejected": -0.5173059701919556, "step": 320 }, { "epoch": 0.08, "learning_rate": 4.984806475999437e-06, "logits/chosen": -2.2430691719055176, "logits/rejected": -1.409332036972046, "logps/chosen": -616.1778564453125, "logps/rejected": -776.3978271484375, "loss": 0.2444, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.42925572395324707, "rewards/margins": 0.21815767884254456, "rewards/rejected": -0.647413432598114, "step": 330 }, { "epoch": 0.08, "learning_rate": 4.980721830938645e-06, "logits/chosen": -2.1990833282470703, "logits/rejected": -1.544798493385315, "logps/chosen": -605.9384765625, "logps/rejected": -732.4371337890625, "loss": 0.2803, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.439105361700058, "rewards/margins": 0.172859787940979, "rewards/rejected": -0.6119651198387146, "step": 340 }, { "epoch": 0.08, "learning_rate": 4.9761535642416284e-06, "logits/chosen": -2.2408649921417236, "logits/rejected": -1.799768090248108, "logps/chosen": -489.03533935546875, "logps/rejected": -665.1949462890625, "loss": 0.2405, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.37104225158691406, "rewards/margins": 0.19040416181087494, "rewards/rejected": -0.5614464282989502, "step": 350 }, { "epoch": 0.09, "learning_rate": 4.9711025665010335e-06, "logits/chosen": -2.2574844360351562, "logits/rejected": -1.8442182540893555, "logps/chosen": -408.14654541015625, "logps/rejected": -561.5911865234375, "loss": 0.2777, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.269236296415329, "rewards/margins": 0.16201291978359222, "rewards/rejected": -0.4312492311000824, "step": 360 }, { "epoch": 0.09, "learning_rate": 4.965569822418878e-06, "logits/chosen": -2.0681309700012207, "logits/rejected": -1.4381722211837769, "logps/chosen": -559.8223876953125, "logps/rejected": -734.8892211914062, "loss": 0.2241, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.43346816301345825, "rewards/margins": 0.1917620450258255, "rewards/rejected": -0.6252301931381226, "step": 370 }, { "epoch": 0.09, "learning_rate": 4.9595564106145825e-06, "logits/chosen": -2.303969383239746, "logits/rejected": -1.733120322227478, "logps/chosen": -418.6346740722656, "logps/rejected": -560.189208984375, "loss": 0.2358, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2271452397108078, "rewards/margins": 0.19738546013832092, "rewards/rejected": -0.42453068494796753, "step": 380 }, { "epoch": 0.09, "learning_rate": 4.953063503414692e-06, "logits/chosen": -2.2448253631591797, "logits/rejected": -1.8518972396850586, "logps/chosen": -494.76055908203125, "logps/rejected": -628.48388671875, "loss": 0.2679, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.335178941488266, "rewards/margins": 0.17853963375091553, "rewards/rejected": -0.5137186050415039, "step": 390 }, { "epoch": 0.1, "learning_rate": 4.946092366624333e-06, "logits/chosen": -2.2507317066192627, "logits/rejected": -1.619484305381775, "logps/chosen": -526.4837646484375, "logps/rejected": -697.7025146484375, "loss": 0.2529, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.3731151223182678, "rewards/margins": 0.21801939606666565, "rewards/rejected": -0.5911344885826111, "step": 400 }, { "epoch": 0.1, "learning_rate": 4.938644359280433e-06, "logits/chosen": -2.375333070755005, "logits/rejected": -1.6055479049682617, "logps/chosen": -563.16552734375, "logps/rejected": -741.4131469726562, "loss": 0.212, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.36431047320365906, "rewards/margins": 0.2534615397453308, "rewards/rejected": -0.6177719831466675, "step": 410 }, { "epoch": 0.1, "learning_rate": 4.930720933386782e-06, "logits/chosen": -2.233098268508911, "logits/rejected": -1.6022119522094727, "logps/chosen": -426.771728515625, "logps/rejected": -549.2544555664062, "loss": 0.2503, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2603939175605774, "rewards/margins": 0.18492767214775085, "rewards/rejected": -0.445321649312973, "step": 420 }, { "epoch": 0.1, "learning_rate": 4.922323633630957e-06, "logits/chosen": -2.1745898723602295, "logits/rejected": -1.5291332006454468, "logps/chosen": -516.7222290039062, "logps/rejected": -726.829345703125, "loss": 0.2005, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.3355748653411865, "rewards/margins": 0.27592363953590393, "rewards/rejected": -0.6114985346794128, "step": 430 }, { "epoch": 0.11, "learning_rate": 4.913454097083185e-06, "logits/chosen": -2.2320826053619385, "logits/rejected": -1.747312307357788, "logps/chosen": -674.8052368164062, "logps/rejected": -752.8465576171875, "loss": 0.2891, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.5011820793151855, "rewards/margins": 0.12799863517284393, "rewards/rejected": -0.6291807889938354, "step": 440 }, { "epoch": 0.11, "learning_rate": 4.904114052877189e-06, "logits/chosen": -2.121434211730957, "logits/rejected": -1.4196488857269287, "logps/chosen": -554.5970458984375, "logps/rejected": -728.1629028320312, "loss": 0.2619, "rewards/accuracies": 0.75, "rewards/chosen": -0.39592963457107544, "rewards/margins": 0.23849153518676758, "rewards/rejected": -0.6344212293624878, "step": 450 }, { "epoch": 0.11, "learning_rate": 4.894305321873092e-06, "logits/chosen": -2.1138224601745605, "logits/rejected": -1.6369972229003906, "logps/chosen": -714.6239624023438, "logps/rejected": -836.7276611328125, "loss": 0.2451, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.5520918965339661, "rewards/margins": 0.17846426367759705, "rewards/rejected": -0.7305561304092407, "step": 460 }, { "epoch": 0.11, "learning_rate": 4.884029816302441e-06, "logits/chosen": -2.2457010746002197, "logits/rejected": -1.7463334798812866, "logps/chosen": -603.1580810546875, "logps/rejected": -735.0208740234375, "loss": 0.2874, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.4632132947444916, "rewards/margins": 0.17302510142326355, "rewards/rejected": -0.6362384557723999, "step": 470 }, { "epoch": 0.12, "learning_rate": 4.873289539395404e-06, "logits/chosen": -2.1492977142333984, "logits/rejected": -1.6171681880950928, "logps/chosen": -582.4373779296875, "logps/rejected": -740.5318603515625, "loss": 0.2167, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.42827337980270386, "rewards/margins": 0.2127343863248825, "rewards/rejected": -0.6410078406333923, "step": 480 }, { "epoch": 0.12, "learning_rate": 4.862086584990246e-06, "logits/chosen": -2.2339184284210205, "logits/rejected": -1.7799112796783447, "logps/chosen": -582.9187622070312, "logps/rejected": -674.3992309570312, "loss": 0.2649, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3971683382987976, "rewards/margins": 0.1706150472164154, "rewards/rejected": -0.5677834153175354, "step": 490 }, { "epoch": 0.12, "learning_rate": 4.850423137125126e-06, "logits/chosen": -1.98675537109375, "logits/rejected": -1.467492699623108, "logps/chosen": -641.2584228515625, "logps/rejected": -804.790771484375, "loss": 0.2572, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.4882410168647766, "rewards/margins": 0.21470656991004944, "rewards/rejected": -0.7029476165771484, "step": 500 }, { "epoch": 0.12, "learning_rate": 4.838301469612315e-06, "logits/chosen": -2.24493408203125, "logits/rejected": -1.498622179031372, "logps/chosen": -557.9942626953125, "logps/rejected": -705.88623046875, "loss": 0.2415, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.3977532982826233, "rewards/margins": 0.20435115694999695, "rewards/rejected": -0.6021044850349426, "step": 510 }, { "epoch": 0.12, "learning_rate": 4.825723945594912e-06, "logits/chosen": -2.1898727416992188, "logits/rejected": -1.737510323524475, "logps/chosen": -415.2431640625, "logps/rejected": -563.3753051757812, "loss": 0.285, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.27554893493652344, "rewards/margins": 0.1814078390598297, "rewards/rejected": -0.4569567143917084, "step": 520 }, { "epoch": 0.13, "learning_rate": 4.812693017086145e-06, "logits/chosen": -2.2709362506866455, "logits/rejected": -1.7534446716308594, "logps/chosen": -521.100341796875, "logps/rejected": -715.1611938476562, "loss": 0.2564, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.3482024073600769, "rewards/margins": 0.23160001635551453, "rewards/rejected": -0.5798024535179138, "step": 530 }, { "epoch": 0.13, "learning_rate": 4.799211224491348e-06, "logits/chosen": -2.000866651535034, "logits/rejected": -1.438919186592102, "logps/chosen": -606.0504760742188, "logps/rejected": -769.3602294921875, "loss": 0.2518, "rewards/accuracies": 0.75, "rewards/chosen": -0.5148371458053589, "rewards/margins": 0.1749372035264969, "rewards/rejected": -0.6897743344306946, "step": 540 }, { "epoch": 0.13, "learning_rate": 4.7852811961126974e-06, "logits/chosen": -2.2116503715515137, "logits/rejected": -1.419684648513794, "logps/chosen": -566.8517456054688, "logps/rejected": -797.03076171875, "loss": 0.2348, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.42510828375816345, "rewards/margins": 0.28039130568504333, "rewards/rejected": -0.7054997086524963, "step": 550 }, { "epoch": 0.13, "learning_rate": 4.770905647636828e-06, "logits/chosen": -2.19708514213562, "logits/rejected": -1.514418601989746, "logps/chosen": -372.2975769042969, "logps/rejected": -572.015625, "loss": 0.2529, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.22482657432556152, "rewards/margins": 0.2627830505371094, "rewards/rejected": -0.4876096844673157, "step": 560 }, { "epoch": 0.14, "learning_rate": 4.756087381605399e-06, "logits/chosen": -2.212829113006592, "logits/rejected": -1.5567867755889893, "logps/chosen": -451.62091064453125, "logps/rejected": -675.35498046875, "loss": 0.2243, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.28890347480773926, "rewards/margins": 0.2642940580844879, "rewards/rejected": -0.5531975030899048, "step": 570 }, { "epoch": 0.14, "learning_rate": 4.740829286868732e-06, "logits/chosen": -2.0651936531066895, "logits/rejected": -1.282173991203308, "logps/chosen": -608.8034057617188, "logps/rejected": -814.4363403320312, "loss": 0.2309, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.4570806920528412, "rewards/margins": 0.2720951437950134, "rewards/rejected": -0.729175865650177, "step": 580 }, { "epoch": 0.14, "learning_rate": 4.725134338022631e-06, "logits/chosen": -1.976910948753357, "logits/rejected": -1.3392863273620605, "logps/chosen": -673.251220703125, "logps/rejected": -844.18408203125, "loss": 0.2425, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.4962734580039978, "rewards/margins": 0.2366379052400589, "rewards/rejected": -0.7329114675521851, "step": 590 }, { "epoch": 0.14, "learning_rate": 4.709005594828471e-06, "logits/chosen": -2.177516460418701, "logits/rejected": -1.4188392162322998, "logps/chosen": -349.5711669921875, "logps/rejected": -489.1685485839844, "loss": 0.268, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.18546082079410553, "rewards/margins": 0.21840114891529083, "rewards/rejected": -0.4038619101047516, "step": 600 }, { "epoch": 0.15, "learning_rate": 4.692446201616692e-06, "logits/chosen": -2.171480894088745, "logits/rejected": -1.486372470855713, "logps/chosen": -489.7513732910156, "logps/rejected": -671.7752685546875, "loss": 0.2265, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3359985947608948, "rewards/margins": 0.2379292994737625, "rewards/rejected": -0.5739278197288513, "step": 610 }, { "epoch": 0.15, "learning_rate": 4.675459386673815e-06, "logits/chosen": -2.188599109649658, "logits/rejected": -1.417234182357788, "logps/chosen": -669.4722290039062, "logps/rejected": -851.4942626953125, "loss": 0.2386, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.4645751118659973, "rewards/margins": 0.2636135220527649, "rewards/rejected": -0.7281886339187622, "step": 620 }, { "epoch": 0.15, "learning_rate": 4.658048461613068e-06, "logits/chosen": -2.186264753341675, "logits/rejected": -1.3317312002182007, "logps/chosen": -558.1051635742188, "logps/rejected": -807.9237060546875, "loss": 0.2096, "rewards/accuracies": 0.875, "rewards/chosen": -0.4056881070137024, "rewards/margins": 0.2980197072029114, "rewards/rejected": -0.7037078738212585, "step": 630 }, { "epoch": 0.15, "learning_rate": 4.640216820728791e-06, "logits/chosen": -2.0650200843811035, "logits/rejected": -1.5434041023254395, "logps/chosen": -591.6378173828125, "logps/rejected": -726.2674560546875, "loss": 0.3041, "rewards/accuracies": 0.75, "rewards/chosen": -0.440895140171051, "rewards/margins": 0.15948018431663513, "rewards/rejected": -0.6003752946853638, "step": 640 }, { "epoch": 0.16, "learning_rate": 4.621967940334705e-06, "logits/chosen": -2.1776533126831055, "logits/rejected": -1.505947470664978, "logps/chosen": -533.233154296875, "logps/rejected": -758.1294555664062, "loss": 0.2585, "rewards/accuracies": 0.875, "rewards/chosen": -0.34916678071022034, "rewards/margins": 0.2798806130886078, "rewards/rejected": -0.6290473341941833, "step": 650 }, { "epoch": 0.16, "learning_rate": 4.603305378086201e-06, "logits/chosen": -1.815281629562378, "logits/rejected": -1.1680887937545776, "logps/chosen": -623.07275390625, "logps/rejected": -776.311279296875, "loss": 0.2435, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.4895709156990051, "rewards/margins": 0.18619278073310852, "rewards/rejected": -0.675763726234436, "step": 660 }, { "epoch": 0.16, "learning_rate": 4.584232772286769e-06, "logits/chosen": -1.9976768493652344, "logits/rejected": -1.2251824140548706, "logps/chosen": -644.8419189453125, "logps/rejected": -831.3948364257812, "loss": 0.2896, "rewards/accuracies": 0.75, "rewards/chosen": -0.4800949990749359, "rewards/margins": 0.2344970703125, "rewards/rejected": -0.7145919799804688, "step": 670 }, { "epoch": 0.16, "learning_rate": 4.5647538411786965e-06, "logits/chosen": -2.1248276233673096, "logits/rejected": -1.5544915199279785, "logps/chosen": -530.0264282226562, "logps/rejected": -631.1822509765625, "loss": 0.2546, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.36536192893981934, "rewards/margins": 0.15929196774959564, "rewards/rejected": -0.5246539115905762, "step": 680 }, { "epoch": 0.17, "learning_rate": 4.544872382218202e-06, "logits/chosen": -2.3693809509277344, "logits/rejected": -1.6609468460083008, "logps/chosen": -424.1357421875, "logps/rejected": -568.0796508789062, "loss": 0.2299, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.257046639919281, "rewards/margins": 0.19673588871955872, "rewards/rejected": -0.45378249883651733, "step": 690 }, { "epoch": 0.17, "learning_rate": 4.5245922713351e-06, "logits/chosen": -2.135702610015869, "logits/rejected": -1.4124939441680908, "logps/chosen": -638.5548095703125, "logps/rejected": -831.3570556640625, "loss": 0.2649, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.46429723501205444, "rewards/margins": 0.2442464828491211, "rewards/rejected": -0.7085437774658203, "step": 700 }, { "epoch": 0.17, "learning_rate": 4.503917462177192e-06, "logits/chosen": -2.142075300216675, "logits/rejected": -1.5568357706069946, "logps/chosen": -572.1000366210938, "logps/rejected": -753.2780151367188, "loss": 0.292, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.44795316457748413, "rewards/margins": 0.20818133652210236, "rewards/rejected": -0.6561344861984253, "step": 710 }, { "epoch": 0.17, "learning_rate": 4.482851985339487e-06, "logits/chosen": -2.1371376514434814, "logits/rejected": -1.4664158821105957, "logps/chosen": -545.5193481445312, "logps/rejected": -744.4154052734375, "loss": 0.239, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3687899112701416, "rewards/margins": 0.2610931992530823, "rewards/rejected": -0.6298831701278687, "step": 720 }, { "epoch": 0.18, "learning_rate": 4.461399947578434e-06, "logits/chosen": -2.133291721343994, "logits/rejected": -1.604524850845337, "logps/chosen": -517.1326293945312, "logps/rejected": -649.8372802734375, "loss": 0.231, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3689945638179779, "rewards/margins": 0.1885087788105011, "rewards/rejected": -0.5575034022331238, "step": 730 }, { "epoch": 0.18, "learning_rate": 4.439565531011299e-06, "logits/chosen": -1.8887875080108643, "logits/rejected": -1.270674467086792, "logps/chosen": -613.1268310546875, "logps/rejected": -801.4429321289062, "loss": 0.2473, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.4629307687282562, "rewards/margins": 0.22329919040203094, "rewards/rejected": -0.6862298846244812, "step": 740 }, { "epoch": 0.18, "learning_rate": 4.417352992300854e-06, "logits/chosen": -2.3292813301086426, "logits/rejected": -1.6490875482559204, "logps/chosen": -425.7843322753906, "logps/rejected": -597.09814453125, "loss": 0.2591, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2748299241065979, "rewards/margins": 0.2304181158542633, "rewards/rejected": -0.5052480101585388, "step": 750 }, { "epoch": 0.18, "learning_rate": 4.3947666618255335e-06, "logits/chosen": -2.14275860786438, "logits/rejected": -1.552585244178772, "logps/chosen": -411.56024169921875, "logps/rejected": -597.5908813476562, "loss": 0.2509, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2517482042312622, "rewards/margins": 0.2175951898097992, "rewards/rejected": -0.4693434238433838, "step": 760 }, { "epoch": 0.18, "learning_rate": 4.3718109428352155e-06, "logits/chosen": -2.094531536102295, "logits/rejected": -1.4484500885009766, "logps/chosen": -561.7736206054688, "logps/rejected": -734.1261596679688, "loss": 0.2477, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.4113832116127014, "rewards/margins": 0.2228744924068451, "rewards/rejected": -0.6342577338218689, "step": 770 }, { "epoch": 0.19, "learning_rate": 4.348490310592801e-06, "logits/chosen": -1.8707062005996704, "logits/rejected": -1.506484866142273, "logps/chosen": -714.3576049804688, "logps/rejected": -866.392578125, "loss": 0.2457, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.5910379886627197, "rewards/margins": 0.16303986310958862, "rewards/rejected": -0.7540777921676636, "step": 780 }, { "epoch": 0.19, "learning_rate": 4.3248093115017544e-06, "logits/chosen": -2.125237464904785, "logits/rejected": -1.4492751359939575, "logps/chosen": -582.9632568359375, "logps/rejected": -715.1638793945312, "loss": 0.2496, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.3824686110019684, "rewards/margins": 0.22166451811790466, "rewards/rejected": -0.6041331887245178, "step": 790 }, { "epoch": 0.19, "learning_rate": 4.3007725622197675e-06, "logits/chosen": -2.2058329582214355, "logits/rejected": -1.5816318988800049, "logps/chosen": -570.4851684570312, "logps/rejected": -719.0206909179688, "loss": 0.227, "rewards/accuracies": 0.75, "rewards/chosen": -0.38881629705429077, "rewards/margins": 0.21982832252979279, "rewards/rejected": -0.6086446046829224, "step": 800 }, { "epoch": 0.19, "learning_rate": 4.27638474875874e-06, "logits/chosen": -2.1510353088378906, "logits/rejected": -1.3720935583114624, "logps/chosen": -377.4300537109375, "logps/rejected": -588.8758544921875, "loss": 0.2153, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.23323938250541687, "rewards/margins": 0.264992356300354, "rewards/rejected": -0.4982317090034485, "step": 810 }, { "epoch": 0.2, "learning_rate": 4.25165062557123e-06, "logits/chosen": -2.1816329956054688, "logits/rejected": -1.862624168395996, "logps/chosen": -489.541015625, "logps/rejected": -573.84814453125, "loss": 0.2876, "rewards/accuracies": 0.625, "rewards/chosen": -0.33602797985076904, "rewards/margins": 0.1257353574037552, "rewards/rejected": -0.46176332235336304, "step": 820 }, { "epoch": 0.2, "learning_rate": 4.226575014623557e-06, "logits/chosen": -2.121556520462036, "logits/rejected": -1.500382661819458, "logps/chosen": -569.2263793945312, "logps/rejected": -745.4718017578125, "loss": 0.2127, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.37383171916007996, "rewards/margins": 0.24332182109355927, "rewards/rejected": -0.617153525352478, "step": 830 }, { "epoch": 0.2, "learning_rate": 4.201162804455764e-06, "logits/chosen": -2.158127784729004, "logits/rejected": -1.5402179956436157, "logps/chosen": -477.3233337402344, "logps/rejected": -729.618408203125, "loss": 0.2182, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.336378812789917, "rewards/margins": 0.27327030897140503, "rewards/rejected": -0.609649121761322, "step": 840 }, { "epoch": 0.2, "learning_rate": 4.175418949228571e-06, "logits/chosen": -2.0661299228668213, "logits/rejected": -1.3702499866485596, "logps/chosen": -421.61676025390625, "logps/rejected": -608.9027709960938, "loss": 0.2314, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.28428909182548523, "rewards/margins": 0.23253202438354492, "rewards/rejected": -0.5168210864067078, "step": 850 }, { "epoch": 0.21, "learning_rate": 4.149348467757566e-06, "logits/chosen": -2.2505486011505127, "logits/rejected": -1.6682631969451904, "logps/chosen": -519.7850341796875, "logps/rejected": -675.0706787109375, "loss": 0.2582, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.40990179777145386, "rewards/margins": 0.17124588787555695, "rewards/rejected": -0.5811477303504944, "step": 860 }, { "epoch": 0.21, "learning_rate": 4.122956442534765e-06, "logits/chosen": -2.070268154144287, "logits/rejected": -1.589167833328247, "logps/chosen": -569.9495239257812, "logps/rejected": -677.8106689453125, "loss": 0.2566, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.4036158621311188, "rewards/margins": 0.1584577113389969, "rewards/rejected": -0.5620735883712769, "step": 870 }, { "epoch": 0.21, "learning_rate": 4.096248018737781e-06, "logits/chosen": -1.9538896083831787, "logits/rejected": -1.506744623184204, "logps/chosen": -598.489990234375, "logps/rejected": -718.8297729492188, "loss": 0.2701, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.4573654234409332, "rewards/margins": 0.15690357983112335, "rewards/rejected": -0.614268958568573, "step": 880 }, { "epoch": 0.21, "learning_rate": 4.069228403226751e-06, "logits/chosen": -2.2838289737701416, "logits/rejected": -1.5676209926605225, "logps/chosen": -536.0494384765625, "logps/rejected": -721.7430419921875, "loss": 0.2196, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.37890490889549255, "rewards/margins": 0.2322990447282791, "rewards/rejected": -0.6112040281295776, "step": 890 }, { "epoch": 0.22, "learning_rate": 4.041902863529257e-06, "logits/chosen": -2.2310588359832764, "logits/rejected": -1.7672712802886963, "logps/chosen": -479.8710021972656, "logps/rejected": -604.5220947265625, "loss": 0.2552, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3082687258720398, "rewards/margins": 0.17145316302776337, "rewards/rejected": -0.47972187399864197, "step": 900 }, { "epoch": 0.22, "learning_rate": 4.014276726813404e-06, "logits/chosen": -2.1795742511749268, "logits/rejected": -1.4126112461090088, "logps/chosen": -480.098876953125, "logps/rejected": -701.5238037109375, "loss": 0.2253, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.32372376322746277, "rewards/margins": 0.2762491703033447, "rewards/rejected": -0.5999729037284851, "step": 910 }, { "epoch": 0.22, "learning_rate": 3.986355378849284e-06, "logits/chosen": -2.3270981311798096, "logits/rejected": -2.0575973987579346, "logps/chosen": -562.2911376953125, "logps/rejected": -678.2664794921875, "loss": 0.255, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.39332714676856995, "rewards/margins": 0.1472180336713791, "rewards/rejected": -0.5405451655387878, "step": 920 }, { "epoch": 0.22, "learning_rate": 3.958144262959004e-06, "logits/chosen": -2.204744815826416, "logits/rejected": -1.6555640697479248, "logps/chosen": -619.665283203125, "logps/rejected": -708.1978759765625, "loss": 0.4841, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.48071688413619995, "rewards/margins": 0.11553032696247101, "rewards/rejected": -0.5962471961975098, "step": 930 }, { "epoch": 0.23, "learning_rate": 3.929648878955507e-06, "logits/chosen": -2.1643776893615723, "logits/rejected": -1.7133562564849854, "logps/chosen": -498.3556213378906, "logps/rejected": -624.0385131835938, "loss": 0.2892, "rewards/accuracies": 0.75, "rewards/chosen": -0.3023318946361542, "rewards/margins": 0.18938633799552917, "rewards/rejected": -0.49171820282936096, "step": 940 }, { "epoch": 0.23, "learning_rate": 3.900874782070362e-06, "logits/chosen": -2.2862777709960938, "logits/rejected": -1.5697494745254517, "logps/chosen": -523.8767700195312, "logps/rejected": -712.3031005859375, "loss": 0.2906, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.35215550661087036, "rewards/margins": 0.26922211050987244, "rewards/rejected": -0.6213775873184204, "step": 950 }, { "epoch": 0.23, "learning_rate": 3.871827581870772e-06, "logits/chosen": -1.9373928308486938, "logits/rejected": -1.2519636154174805, "logps/chosen": -612.5736083984375, "logps/rejected": -769.370361328125, "loss": 0.2162, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.44734010100364685, "rewards/margins": 0.20580251514911652, "rewards/rejected": -0.6531426906585693, "step": 960 }, { "epoch": 0.23, "learning_rate": 3.842512941165968e-06, "logits/chosen": -1.9552310705184937, "logits/rejected": -1.2225010395050049, "logps/chosen": -595.6505126953125, "logps/rejected": -807.1351318359375, "loss": 0.2339, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.4424334168434143, "rewards/margins": 0.2755126357078552, "rewards/rejected": -0.7179459929466248, "step": 970 }, { "epoch": 0.24, "learning_rate": 3.8129365749032398e-06, "logits/chosen": -2.179598331451416, "logits/rejected": -1.5930414199829102, "logps/chosen": -470.39666748046875, "logps/rejected": -708.7127075195312, "loss": 0.2389, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.3327002227306366, "rewards/margins": 0.247235506772995, "rewards/rejected": -0.5799357295036316, "step": 980 }, { "epoch": 0.24, "learning_rate": 3.783104249053793e-06, "logits/chosen": -1.876232385635376, "logits/rejected": -1.1954014301300049, "logps/chosen": -482.48809814453125, "logps/rejected": -653.3897705078125, "loss": 0.2342, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.3456036448478699, "rewards/margins": 0.20723696053028107, "rewards/rejected": -0.5528405904769897, "step": 990 }, { "epoch": 0.24, "learning_rate": 3.7530217794886607e-06, "logits/chosen": -2.168401002883911, "logits/rejected": -1.362849473953247, "logps/chosen": -565.8056030273438, "logps/rejected": -753.4979248046875, "loss": 0.2181, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.3963169455528259, "rewards/margins": 0.2578599452972412, "rewards/rejected": -0.6541768908500671, "step": 1000 }, { "epoch": 0.24, "learning_rate": 3.722695030844891e-06, "logits/chosen": -1.9112541675567627, "logits/rejected": -1.4224046468734741, "logps/chosen": -536.9829711914062, "logps/rejected": -687.1131591796875, "loss": 0.2746, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.41709670424461365, "rewards/margins": 0.18000951409339905, "rewards/rejected": -0.5971062183380127, "step": 1010 }, { "epoch": 0.24, "learning_rate": 3.6921299153822198e-06, "logits/chosen": -2.2641046047210693, "logits/rejected": -1.6229751110076904, "logps/chosen": -529.2923583984375, "logps/rejected": -726.6102905273438, "loss": 0.228, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.34389495849609375, "rewards/margins": 0.2508707046508789, "rewards/rejected": -0.5947656035423279, "step": 1020 }, { "epoch": 0.25, "learning_rate": 3.66133239183047e-06, "logits/chosen": -2.1011762619018555, "logits/rejected": -1.4804975986480713, "logps/chosen": -495.906494140625, "logps/rejected": -683.6201171875, "loss": 0.2441, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3397464454174042, "rewards/margins": 0.23895248770713806, "rewards/rejected": -0.5786989331245422, "step": 1030 }, { "epoch": 0.25, "learning_rate": 3.630308464227877e-06, "logits/chosen": -2.093890428543091, "logits/rejected": -1.4402543306350708, "logps/chosen": -405.9877014160156, "logps/rejected": -528.1480712890625, "loss": 0.2503, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.20128004252910614, "rewards/margins": 0.19632327556610107, "rewards/rejected": -0.3976033329963684, "step": 1040 }, { "epoch": 0.25, "learning_rate": 3.5990641807506e-06, "logits/chosen": -2.3380658626556396, "logits/rejected": -1.6520893573760986, "logps/chosen": -430.703369140625, "logps/rejected": -646.5703735351562, "loss": 0.2577, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.27441808581352234, "rewards/margins": 0.26531141996383667, "rewards/rejected": -0.5397294759750366, "step": 1050 }, { "epoch": 0.25, "learning_rate": 3.5676056325336084e-06, "logits/chosen": -2.1818904876708984, "logits/rejected": -1.784393310546875, "logps/chosen": -520.1884155273438, "logps/rejected": -674.11962890625, "loss": 0.2316, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3729000687599182, "rewards/margins": 0.16966374218463898, "rewards/rejected": -0.5425638556480408, "step": 1060 }, { "epoch": 0.26, "learning_rate": 3.535938952483211e-06, "logits/chosen": -2.2213704586029053, "logits/rejected": -1.418710708618164, "logps/chosen": -560.0448608398438, "logps/rejected": -815.6527709960938, "loss": 0.2124, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.40334635972976685, "rewards/margins": 0.2945135533809662, "rewards/rejected": -0.6978598833084106, "step": 1070 }, { "epoch": 0.26, "learning_rate": 3.5040703140814254e-06, "logits/chosen": -2.221543788909912, "logits/rejected": -1.6487659215927124, "logps/chosen": -634.6618041992188, "logps/rejected": -839.3564453125, "loss": 0.2324, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.4737940728664398, "rewards/margins": 0.24561241269111633, "rewards/rejected": -0.7194064855575562, "step": 1080 }, { "epoch": 0.26, "learning_rate": 3.4720059301824527e-06, "logits/chosen": -2.189147710800171, "logits/rejected": -1.7664272785186768, "logps/chosen": -526.344970703125, "logps/rejected": -650.4627075195312, "loss": 0.2229, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.36051493883132935, "rewards/margins": 0.19068099558353424, "rewards/rejected": -0.5511959791183472, "step": 1090 }, { "epoch": 0.26, "learning_rate": 3.439752051801467e-06, "logits/chosen": -2.276291608810425, "logits/rejected": -1.5065466165542603, "logps/chosen": -466.7132263183594, "logps/rejected": -688.0767822265625, "loss": 0.2285, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.31362882256507874, "rewards/margins": 0.2555429935455322, "rewards/rejected": -0.5691717863082886, "step": 1100 }, { "epoch": 0.27, "learning_rate": 3.407314966895966e-06, "logits/chosen": -2.2229745388031006, "logits/rejected": -1.6873805522918701, "logps/chosen": -499.5079650878906, "logps/rejected": -678.7784423828125, "loss": 0.248, "rewards/accuracies": 0.75, "rewards/chosen": -0.34916746616363525, "rewards/margins": 0.20373527705669403, "rewards/rejected": -0.5529027581214905, "step": 1110 }, { "epoch": 0.27, "learning_rate": 3.3747009991399226e-06, "logits/chosen": -2.0915369987487793, "logits/rejected": -1.515972375869751, "logps/chosen": -462.2139587402344, "logps/rejected": -621.6764526367188, "loss": 0.2303, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.34132781624794006, "rewards/margins": 0.19509322941303253, "rewards/rejected": -0.536421000957489, "step": 1120 }, { "epoch": 0.27, "learning_rate": 3.341916506690971e-06, "logits/chosen": -2.1414732933044434, "logits/rejected": -1.5188627243041992, "logps/chosen": -487.9198303222656, "logps/rejected": -658.6336059570312, "loss": 0.2358, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.3202676773071289, "rewards/margins": 0.2106558084487915, "rewards/rejected": -0.5309234857559204, "step": 1130 }, { "epoch": 0.27, "learning_rate": 3.308967880950874e-06, "logits/chosen": -2.0529282093048096, "logits/rejected": -1.4871912002563477, "logps/chosen": -542.2448120117188, "logps/rejected": -726.5451049804688, "loss": 0.2379, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.41947245597839355, "rewards/margins": 0.209178164601326, "rewards/rejected": -0.6286506056785583, "step": 1140 }, { "epoch": 0.28, "learning_rate": 3.275861545319504e-06, "logits/chosen": -2.241400718688965, "logits/rejected": -1.7179415225982666, "logps/chosen": -475.1095275878906, "logps/rejected": -658.5631103515625, "loss": 0.254, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.31501632928848267, "rewards/margins": 0.21485765278339386, "rewards/rejected": -0.5298739671707153, "step": 1150 }, { "epoch": 0.28, "learning_rate": 3.2426039539425875e-06, "logits/chosen": -2.236320972442627, "logits/rejected": -1.3771488666534424, "logps/chosen": -501.77960205078125, "logps/rejected": -705.9559326171875, "loss": 0.2485, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.33146998286247253, "rewards/margins": 0.2820337116718292, "rewards/rejected": -0.613503634929657, "step": 1160 }, { "epoch": 0.28, "learning_rate": 3.2092015904534614e-06, "logits/chosen": -2.1324212551116943, "logits/rejected": -1.3361310958862305, "logps/chosen": -465.21234130859375, "logps/rejected": -626.5828857421875, "loss": 0.222, "rewards/accuracies": 0.75, "rewards/chosen": -0.30111175775527954, "rewards/margins": 0.2380938082933426, "rewards/rejected": -0.5392054915428162, "step": 1170 }, { "epoch": 0.28, "learning_rate": 3.17566096670907e-06, "logits/chosen": -2.1174681186676025, "logits/rejected": -1.8057496547698975, "logps/chosen": -514.4780883789062, "logps/rejected": -612.9591064453125, "loss": 0.2773, "rewards/accuracies": 0.625, "rewards/chosen": -0.3825177252292633, "rewards/margins": 0.13299870491027832, "rewards/rejected": -0.5155164003372192, "step": 1180 }, { "epoch": 0.29, "learning_rate": 3.14198862152047e-06, "logits/chosen": -2.3264975547790527, "logits/rejected": -1.8200445175170898, "logps/chosen": -509.34332275390625, "logps/rejected": -662.3193359375, "loss": 0.2653, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3371141850948334, "rewards/margins": 0.19747108221054077, "rewards/rejected": -0.5345852971076965, "step": 1190 }, { "epoch": 0.29, "learning_rate": 3.1081911193780734e-06, "logits/chosen": -2.1914255619049072, "logits/rejected": -1.6171748638153076, "logps/chosen": -607.6774291992188, "logps/rejected": -785.1390991210938, "loss": 0.2038, "rewards/accuracies": 0.875, "rewards/chosen": -0.4608604907989502, "rewards/margins": 0.20961742103099823, "rewards/rejected": -0.6704779267311096, "step": 1200 }, { "epoch": 0.29, "learning_rate": 3.074275049171889e-06, "logits/chosen": -2.2372031211853027, "logits/rejected": -1.4800232648849487, "logps/chosen": -483.6255798339844, "logps/rejected": -684.0525512695312, "loss": 0.2085, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3340616226196289, "rewards/margins": 0.26013877987861633, "rewards/rejected": -0.5942003726959229, "step": 1210 }, { "epoch": 0.29, "learning_rate": 3.0402470229070057e-06, "logits/chosen": -2.0542514324188232, "logits/rejected": -1.2669802904129028, "logps/chosen": -501.363037109375, "logps/rejected": -668.2828979492188, "loss": 0.2264, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.32397252321243286, "rewards/margins": 0.25711455941200256, "rewards/rejected": -0.5810869932174683, "step": 1220 }, { "epoch": 0.3, "learning_rate": 3.006113674414565e-06, "logits/chosen": -2.124558925628662, "logits/rejected": -1.5115940570831299, "logps/chosen": -526.3839721679688, "logps/rejected": -727.1844482421875, "loss": 0.2239, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.3876585364341736, "rewards/margins": 0.2264205515384674, "rewards/rejected": -0.6140791177749634, "step": 1230 }, { "epoch": 0.3, "learning_rate": 2.9718816580584885e-06, "logits/chosen": -2.330988645553589, "logits/rejected": -1.7894699573516846, "logps/chosen": -450.5931701660156, "logps/rejected": -639.8587036132812, "loss": 0.2195, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.300833135843277, "rewards/margins": 0.2428937703371048, "rewards/rejected": -0.5437268614768982, "step": 1240 }, { "epoch": 0.3, "learning_rate": 2.9375576474381907e-06, "logits/chosen": -2.09294056892395, "logits/rejected": -1.5990426540374756, "logps/chosen": -523.2689208984375, "logps/rejected": -714.5638427734375, "loss": 0.2505, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.36846724152565, "rewards/margins": 0.24714866280555725, "rewards/rejected": -0.6156159043312073, "step": 1250 }, { "epoch": 0.3, "learning_rate": 2.9031483340875523e-06, "logits/chosen": -2.161059617996216, "logits/rejected": -1.649753212928772, "logps/chosen": -595.0687255859375, "logps/rejected": -741.806396484375, "loss": 0.2356, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.42591404914855957, "rewards/margins": 0.19824771583080292, "rewards/rejected": -0.6241617202758789, "step": 1260 }, { "epoch": 0.3, "learning_rate": 2.868660426170388e-06, "logits/chosen": -2.0810506343841553, "logits/rejected": -1.3984508514404297, "logps/chosen": -519.595458984375, "logps/rejected": -709.3812255859375, "loss": 0.2182, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.37194812297821045, "rewards/margins": 0.21791160106658936, "rewards/rejected": -0.5898597836494446, "step": 1270 }, { "epoch": 0.31, "learning_rate": 2.8341006471726817e-06, "logits/chosen": -1.937578558921814, "logits/rejected": -1.3747153282165527, "logps/chosen": -516.6207885742188, "logps/rejected": -695.8414306640625, "loss": 0.2206, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.3487696647644043, "rewards/margins": 0.2523016929626465, "rewards/rejected": -0.6010713577270508, "step": 1280 }, { "epoch": 0.31, "learning_rate": 2.7994757345918244e-06, "logits/chosen": -2.414841890335083, "logits/rejected": -1.7823702096939087, "logps/chosen": -453.10357666015625, "logps/rejected": -609.0830078125, "loss": 0.2223, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.29341357946395874, "rewards/margins": 0.1968296319246292, "rewards/rejected": -0.49024319648742676, "step": 1290 }, { "epoch": 0.31, "learning_rate": 2.76479243862313e-06, "logits/chosen": -2.3372766971588135, "logits/rejected": -1.365880012512207, "logps/chosen": -455.2708435058594, "logps/rejected": -712.4964599609375, "loss": 0.2234, "rewards/accuracies": 0.875, "rewards/chosen": -0.29985883831977844, "rewards/margins": 0.31078290939331055, "rewards/rejected": -0.6106417775154114, "step": 1300 }, { "epoch": 0.31, "learning_rate": 2.7300575208438684e-06, "logits/chosen": -2.193761110305786, "logits/rejected": -1.7236446142196655, "logps/chosen": -438.6455078125, "logps/rejected": -565.2931518554688, "loss": 0.2211, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.28494128584861755, "rewards/margins": 0.18237076699733734, "rewards/rejected": -0.4673120379447937, "step": 1310 }, { "epoch": 0.32, "learning_rate": 2.695277752895084e-06, "logits/chosen": -2.1613688468933105, "logits/rejected": -1.6745634078979492, "logps/chosen": -445.5487365722656, "logps/rejected": -583.8851318359375, "loss": 0.2158, "rewards/accuracies": 0.75, "rewards/chosen": -0.30961447954177856, "rewards/margins": 0.17475193738937378, "rewards/rejected": -0.48436641693115234, "step": 1320 }, { "epoch": 0.32, "learning_rate": 2.6604599151614514e-06, "logits/chosen": -2.3241307735443115, "logits/rejected": -1.718146562576294, "logps/chosen": -472.390380859375, "logps/rejected": -670.6461791992188, "loss": 0.2484, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.30457672476768494, "rewards/margins": 0.2522759437561035, "rewards/rejected": -0.5568526983261108, "step": 1330 }, { "epoch": 0.32, "learning_rate": 2.625610795449424e-06, "logits/chosen": -2.118846893310547, "logits/rejected": -1.4407285451889038, "logps/chosen": -462.6249084472656, "logps/rejected": -711.793212890625, "loss": 0.2517, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.32376161217689514, "rewards/margins": 0.27978068590164185, "rewards/rejected": -0.6035423278808594, "step": 1340 }, { "epoch": 0.32, "learning_rate": 2.59073718766394e-06, "logits/chosen": -2.1999683380126953, "logits/rejected": -1.4900109767913818, "logps/chosen": -476.880859375, "logps/rejected": -675.8462524414062, "loss": 0.2466, "rewards/accuracies": 0.875, "rewards/chosen": -0.3132093548774719, "rewards/margins": 0.25780683755874634, "rewards/rejected": -0.571016252040863, "step": 1350 }, { "epoch": 0.33, "learning_rate": 2.5558458904839345e-06, "logits/chosen": -2.192030191421509, "logits/rejected": -1.7015453577041626, "logps/chosen": -497.59429931640625, "logps/rejected": -690.6241455078125, "loss": 0.2367, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.3215945065021515, "rewards/margins": 0.2261447012424469, "rewards/rejected": -0.5477392673492432, "step": 1360 }, { "epoch": 0.33, "learning_rate": 2.5209437060369266e-06, "logits/chosen": -2.1700994968414307, "logits/rejected": -1.501390814781189, "logps/chosen": -551.3470458984375, "logps/rejected": -779.6153564453125, "loss": 0.2324, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.39861801266670227, "rewards/margins": 0.272796094417572, "rewards/rejected": -0.6714141368865967, "step": 1370 }, { "epoch": 0.33, "learning_rate": 2.4860374385729298e-06, "logits/chosen": -2.3210222721099854, "logits/rejected": -1.647146224975586, "logps/chosen": -461.9217224121094, "logps/rejected": -660.2238159179688, "loss": 0.2164, "rewards/accuracies": 0.75, "rewards/chosen": -0.30266645550727844, "rewards/margins": 0.2639373540878296, "rewards/rejected": -0.5666038990020752, "step": 1380 }, { "epoch": 0.33, "learning_rate": 2.4511338931379475e-06, "logits/chosen": -2.270345687866211, "logits/rejected": -1.7757459878921509, "logps/chosen": -534.710693359375, "logps/rejected": -685.7799072265625, "loss": 0.1999, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3926665186882019, "rewards/margins": 0.20033708214759827, "rewards/rejected": -0.5930036306381226, "step": 1390 }, { "epoch": 0.34, "learning_rate": 2.4162398742473216e-06, "logits/chosen": -2.436246395111084, "logits/rejected": -1.9106261730194092, "logps/chosen": -395.49920654296875, "logps/rejected": -564.9219970703125, "loss": 0.2506, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.19471552968025208, "rewards/margins": 0.22500737011432648, "rewards/rejected": -0.41972288489341736, "step": 1400 }, { "epoch": 0.34, "learning_rate": 2.381362184559173e-06, "logits/chosen": -2.3186497688293457, "logits/rejected": -1.7353124618530273, "logps/chosen": -545.6030883789062, "logps/rejected": -732.5777587890625, "loss": 0.2682, "rewards/accuracies": 0.875, "rewards/chosen": -0.37879544496536255, "rewards/margins": 0.24574975669384003, "rewards/rejected": -0.6245452165603638, "step": 1410 }, { "epoch": 0.34, "learning_rate": 2.3465076235482117e-06, "logits/chosen": -2.099091053009033, "logits/rejected": -1.251534104347229, "logps/chosen": -591.8385009765625, "logps/rejected": -823.31201171875, "loss": 0.2263, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3871614336967468, "rewards/margins": 0.3113124966621399, "rewards/rejected": -0.6984738707542419, "step": 1420 }, { "epoch": 0.34, "learning_rate": 2.3116829861801687e-06, "logits/chosen": -2.2782912254333496, "logits/rejected": -1.6930453777313232, "logps/chosen": -549.5288696289062, "logps/rejected": -727.2321166992188, "loss": 0.244, "rewards/accuracies": 0.75, "rewards/chosen": -0.4014182984828949, "rewards/margins": 0.21473488211631775, "rewards/rejected": -0.6161531209945679, "step": 1430 }, { "epoch": 0.35, "learning_rate": 2.276895061587099e-06, "logits/chosen": -2.3278706073760986, "logits/rejected": -1.6391801834106445, "logps/chosen": -570.4884643554688, "logps/rejected": -818.1508178710938, "loss": 0.1976, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.40508347749710083, "rewards/margins": 0.28309813141822815, "rewards/rejected": -0.6881815791130066, "step": 1440 }, { "epoch": 0.35, "learning_rate": 2.242150631743832e-06, "logits/chosen": -2.3615341186523438, "logits/rejected": -1.725358009338379, "logps/chosen": -478.43603515625, "logps/rejected": -650.2882080078125, "loss": 0.2222, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.31791216135025024, "rewards/margins": 0.2205561101436615, "rewards/rejected": -0.5384682416915894, "step": 1450 }, { "epoch": 0.35, "learning_rate": 2.207456470145807e-06, "logits/chosen": -2.35868239402771, "logits/rejected": -1.982661247253418, "logps/chosen": -456.50439453125, "logps/rejected": -552.9801635742188, "loss": 0.291, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.29267337918281555, "rewards/margins": 0.14309945702552795, "rewards/rejected": -0.4357728064060211, "step": 1460 }, { "epoch": 0.35, "learning_rate": 2.17281934048857e-06, "logits/chosen": -2.1577529907226562, "logits/rejected": -1.2531640529632568, "logps/chosen": -470.220947265625, "logps/rejected": -691.4623413085938, "loss": 0.2374, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.2939762473106384, "rewards/margins": 0.2891116142272949, "rewards/rejected": -0.5830878615379333, "step": 1470 }, { "epoch": 0.36, "learning_rate": 2.1382459953491773e-06, "logits/chosen": -2.153989315032959, "logits/rejected": -1.712892770767212, "logps/chosen": -509.06103515625, "logps/rejected": -677.8247680664062, "loss": 0.2359, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.3281121850013733, "rewards/margins": 0.20805349946022034, "rewards/rejected": -0.5361656546592712, "step": 1480 }, { "epoch": 0.36, "learning_rate": 2.103743174869769e-06, "logits/chosen": -1.9424854516983032, "logits/rejected": -1.422446370124817, "logps/chosen": -697.8045654296875, "logps/rejected": -815.5999755859375, "loss": 0.2559, "rewards/accuracies": 0.75, "rewards/chosen": -0.5232383012771606, "rewards/margins": 0.18479886651039124, "rewards/rejected": -0.7080371379852295, "step": 1490 }, { "epoch": 0.36, "learning_rate": 2.0693176054435586e-06, "logits/chosen": -2.266700506210327, "logits/rejected": -1.5664805173873901, "logps/chosen": -565.6947021484375, "logps/rejected": -702.6282348632812, "loss": 0.2324, "rewards/accuracies": 0.875, "rewards/chosen": -0.3801000118255615, "rewards/margins": 0.20919163525104523, "rewards/rejected": -0.5892916321754456, "step": 1500 }, { "epoch": 0.36, "learning_rate": 2.034975998403517e-06, "logits/chosen": -2.202317953109741, "logits/rejected": -1.178363561630249, "logps/chosen": -471.96514892578125, "logps/rejected": -716.80322265625, "loss": 0.2517, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.29619717597961426, "rewards/margins": 0.32016804814338684, "rewards/rejected": -0.6163652539253235, "step": 1510 }, { "epoch": 0.36, "learning_rate": 2.0007250487139827e-06, "logits/chosen": -2.1976194381713867, "logits/rejected": -1.7854173183441162, "logps/chosen": -409.1556396484375, "logps/rejected": -538.7450561523438, "loss": 0.2324, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2653912305831909, "rewards/margins": 0.16031914949417114, "rewards/rejected": -0.42571038007736206, "step": 1520 }, { "epoch": 0.37, "learning_rate": 1.9665714336654604e-06, "logits/chosen": -2.2585816383361816, "logits/rejected": -1.4220424890518188, "logps/chosen": -477.648681640625, "logps/rejected": -702.324462890625, "loss": 0.2246, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.31048738956451416, "rewards/margins": 0.29150474071502686, "rewards/rejected": -0.6019921898841858, "step": 1530 }, { "epoch": 0.37, "learning_rate": 1.9325218115728756e-06, "logits/chosen": -2.259849786758423, "logits/rejected": -1.4995836019515991, "logps/chosen": -520.2024536132812, "logps/rejected": -719.3076782226562, "loss": 0.2124, "rewards/accuracies": 0.875, "rewards/chosen": -0.3540114760398865, "rewards/margins": 0.2679198682308197, "rewards/rejected": -0.6219313740730286, "step": 1540 }, { "epoch": 0.37, "learning_rate": 1.8985828204775206e-06, "logits/chosen": -2.2383124828338623, "logits/rejected": -1.5292937755584717, "logps/chosen": -460.6908264160156, "logps/rejected": -634.41552734375, "loss": 0.1988, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.31010597944259644, "rewards/margins": 0.2322625368833542, "rewards/rejected": -0.5423685312271118, "step": 1550 }, { "epoch": 0.37, "learning_rate": 1.8647610768529581e-06, "logits/chosen": -2.3701467514038086, "logits/rejected": -1.669344186782837, "logps/chosen": -515.9610595703125, "logps/rejected": -702.8071899414062, "loss": 0.2465, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3375261425971985, "rewards/margins": 0.22942647337913513, "rewards/rejected": -0.566952645778656, "step": 1560 }, { "epoch": 0.38, "learning_rate": 1.8310631743151187e-06, "logits/chosen": -2.3348028659820557, "logits/rejected": -1.6338012218475342, "logps/chosen": -478.33428955078125, "logps/rejected": -730.6288452148438, "loss": 0.2324, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.33109721541404724, "rewards/margins": 0.2853388786315918, "rewards/rejected": -0.6164361238479614, "step": 1570 }, { "epoch": 0.38, "learning_rate": 1.7974956823368728e-06, "logits/chosen": -2.156118631362915, "logits/rejected": -1.6093488931655884, "logps/chosen": -582.0015258789062, "logps/rejected": -735.3878784179688, "loss": 0.2101, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.4120170474052429, "rewards/margins": 0.207681804895401, "rewards/rejected": -0.6196987628936768, "step": 1580 }, { "epoch": 0.38, "learning_rate": 1.7640651449672913e-06, "logits/chosen": -2.3183345794677734, "logits/rejected": -1.6045589447021484, "logps/chosen": -474.8514709472656, "logps/rejected": -684.0841064453125, "loss": 0.2061, "rewards/accuracies": 0.875, "rewards/chosen": -0.28908270597457886, "rewards/margins": 0.27025845646858215, "rewards/rejected": -0.5593411326408386, "step": 1590 }, { "epoch": 0.38, "learning_rate": 1.7307780795558743e-06, "logits/chosen": -2.1290550231933594, "logits/rejected": -1.5637315511703491, "logps/chosen": -484.809814453125, "logps/rejected": -644.54052734375, "loss": 0.2349, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.3229495882987976, "rewards/margins": 0.21575050055980682, "rewards/rejected": -0.5387001633644104, "step": 1600 }, { "epoch": 0.39, "learning_rate": 1.6976409754819767e-06, "logits/chosen": -2.2557132244110107, "logits/rejected": -1.787325143814087, "logps/chosen": -505.43115234375, "logps/rejected": -684.2761840820312, "loss": 0.2373, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.3303944170475006, "rewards/margins": 0.22589488327503204, "rewards/rejected": -0.5562892556190491, "step": 1610 }, { "epoch": 0.39, "learning_rate": 1.6646602928896962e-06, "logits/chosen": -2.1125569343566895, "logits/rejected": -1.5178847312927246, "logps/chosen": -604.7052612304688, "logps/rejected": -748.1799926757812, "loss": 0.2035, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.4366299510002136, "rewards/margins": 0.2077966034412384, "rewards/rejected": -0.6444265246391296, "step": 1620 }, { "epoch": 0.39, "learning_rate": 1.6318424614284525e-06, "logits/chosen": -2.0228872299194336, "logits/rejected": -1.6469089984893799, "logps/chosen": -619.0574951171875, "logps/rejected": -747.111328125, "loss": 0.2531, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.4670810103416443, "rewards/margins": 0.15774844586849213, "rewards/rejected": -0.6248295307159424, "step": 1630 }, { "epoch": 0.39, "learning_rate": 1.5991938789995138e-06, "logits/chosen": -2.178020477294922, "logits/rejected": -1.616796851158142, "logps/chosen": -625.3834838867188, "logps/rejected": -798.6873168945312, "loss": 0.2581, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.46300649642944336, "rewards/margins": 0.23273572325706482, "rewards/rejected": -0.6957422494888306, "step": 1640 }, { "epoch": 0.4, "learning_rate": 1.5667209105087134e-06, "logits/chosen": -2.1452012062072754, "logits/rejected": -1.4882639646530151, "logps/chosen": -637.6602783203125, "logps/rejected": -835.0618896484375, "loss": 0.2314, "rewards/accuracies": 0.75, "rewards/chosen": -0.4847962260246277, "rewards/margins": 0.23071709275245667, "rewards/rejected": -0.7155133485794067, "step": 1650 }, { "epoch": 0.4, "learning_rate": 1.5344298866256002e-06, "logits/chosen": -2.1361632347106934, "logits/rejected": -1.393336296081543, "logps/chosen": -598.9241333007812, "logps/rejected": -825.7525634765625, "loss": 0.2541, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.4569925367832184, "rewards/margins": 0.26417863368988037, "rewards/rejected": -0.7211712002754211, "step": 1660 }, { "epoch": 0.4, "learning_rate": 1.502327102549262e-06, "logits/chosen": -2.1626908779144287, "logits/rejected": -1.562538743019104, "logps/chosen": -468.4820251464844, "logps/rejected": -660.3966064453125, "loss": 0.204, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3353859782218933, "rewards/margins": 0.2233034074306488, "rewards/rejected": -0.5586894154548645, "step": 1670 }, { "epoch": 0.4, "learning_rate": 1.4704188167810635e-06, "logits/chosen": -2.2278897762298584, "logits/rejected": -1.6712696552276611, "logps/chosen": -513.513916015625, "logps/rejected": -695.208984375, "loss": 0.2159, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.362549364566803, "rewards/margins": 0.23459453880786896, "rewards/rejected": -0.5971439480781555, "step": 1680 }, { "epoch": 0.41, "learning_rate": 1.438711249904536e-06, "logits/chosen": -2.2296204566955566, "logits/rejected": -1.5326087474822998, "logps/chosen": -495.2484436035156, "logps/rejected": -728.9078369140625, "loss": 0.2267, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.3783426582813263, "rewards/margins": 0.26764681935310364, "rewards/rejected": -0.6459894180297852, "step": 1690 }, { "epoch": 0.41, "learning_rate": 1.4072105833726685e-06, "logits/chosen": -2.308741569519043, "logits/rejected": -1.57771897315979, "logps/chosen": -545.0296630859375, "logps/rejected": -739.1373291015625, "loss": 0.2682, "rewards/accuracies": 0.875, "rewards/chosen": -0.3485683798789978, "rewards/margins": 0.25783371925354004, "rewards/rejected": -0.6064020991325378, "step": 1700 }, { "epoch": 0.41, "learning_rate": 1.375922958302815e-06, "logits/chosen": -2.177499532699585, "logits/rejected": -1.6153056621551514, "logps/chosen": -575.4890747070312, "logps/rejected": -719.1101684570312, "loss": 0.257, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.4291258454322815, "rewards/margins": 0.1913391649723053, "rewards/rejected": -0.6204649209976196, "step": 1710 }, { "epoch": 0.41, "learning_rate": 1.3448544742794792e-06, "logits/chosen": -2.359710931777954, "logits/rejected": -1.8718398809432983, "logps/chosen": -518.01318359375, "logps/rejected": -663.861083984375, "loss": 0.2177, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.39127427339553833, "rewards/margins": 0.15787231922149658, "rewards/rejected": -0.5491466522216797, "step": 1720 }, { "epoch": 0.42, "learning_rate": 1.3140111881651773e-06, "logits/chosen": -1.9541170597076416, "logits/rejected": -1.3082023859024048, "logps/chosen": -529.1683349609375, "logps/rejected": -741.0734252929688, "loss": 0.219, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.3982730805873871, "rewards/margins": 0.24865877628326416, "rewards/rejected": -0.6469318866729736, "step": 1730 }, { "epoch": 0.42, "learning_rate": 1.2833991129196508e-06, "logits/chosen": -2.241741418838501, "logits/rejected": -1.4685299396514893, "logps/chosen": -470.96209716796875, "logps/rejected": -709.0288696289062, "loss": 0.216, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.3294115662574768, "rewards/margins": 0.2778538763523102, "rewards/rejected": -0.6072654724121094, "step": 1740 }, { "epoch": 0.42, "learning_rate": 1.2530242164276236e-06, "logits/chosen": -2.0970499515533447, "logits/rejected": -1.3942879438400269, "logps/chosen": -486.5403747558594, "logps/rejected": -718.138671875, "loss": 0.2207, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.31325727701187134, "rewards/margins": 0.3117991089820862, "rewards/rejected": -0.6250563859939575, "step": 1750 }, { "epoch": 0.42, "learning_rate": 1.2228924203353507e-06, "logits/chosen": -2.068192720413208, "logits/rejected": -1.585180401802063, "logps/chosen": -533.9257202148438, "logps/rejected": -647.5416870117188, "loss": 0.2262, "rewards/accuracies": 0.75, "rewards/chosen": -0.390627920627594, "rewards/margins": 0.16129513084888458, "rewards/rejected": -0.5519230365753174, "step": 1760 }, { "epoch": 0.42, "learning_rate": 1.1930095988961837e-06, "logits/chosen": -2.2689132690429688, "logits/rejected": -1.6284101009368896, "logps/chosen": -489.23468017578125, "logps/rejected": -698.836181640625, "loss": 0.2103, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.31348201632499695, "rewards/margins": 0.2569728493690491, "rewards/rejected": -0.5704549551010132, "step": 1770 }, { "epoch": 0.43, "learning_rate": 1.1633815778253721e-06, "logits/chosen": -2.223635673522949, "logits/rejected": -1.5692812204360962, "logps/chosen": -564.4153442382812, "logps/rejected": -758.5940551757812, "loss": 0.2683, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.3882458806037903, "rewards/margins": 0.24454763531684875, "rewards/rejected": -0.6327935457229614, "step": 1780 }, { "epoch": 0.43, "learning_rate": 1.1340141331643276e-06, "logits/chosen": -2.226630926132202, "logits/rejected": -1.6583023071289062, "logps/chosen": -499.40155029296875, "logps/rejected": -738.2061767578125, "loss": 0.2232, "rewards/accuracies": 0.875, "rewards/chosen": -0.3272217810153961, "rewards/margins": 0.2891238331794739, "rewards/rejected": -0.6163456439971924, "step": 1790 }, { "epoch": 0.43, "learning_rate": 1.1049129901545756e-06, "logits/chosen": -2.193066358566284, "logits/rejected": -1.6954580545425415, "logps/chosen": -497.5428161621094, "logps/rejected": -683.050048828125, "loss": 0.2054, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3609580397605896, "rewards/margins": 0.21754273772239685, "rewards/rejected": -0.5785007476806641, "step": 1800 }, { "epoch": 0.43, "learning_rate": 1.0760838221216065e-06, "logits/chosen": -2.311552047729492, "logits/rejected": -1.7207205295562744, "logps/chosen": -450.60504150390625, "logps/rejected": -628.2011108398438, "loss": 0.2457, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.25561460852622986, "rewards/margins": 0.24946312606334686, "rewards/rejected": -0.5050776600837708, "step": 1810 }, { "epoch": 0.44, "learning_rate": 1.0475322493688506e-06, "logits/chosen": -2.1966607570648193, "logits/rejected": -1.5143920183181763, "logps/chosen": -420.51885986328125, "logps/rejected": -698.9932250976562, "loss": 0.2089, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.26709431409835815, "rewards/margins": 0.31104663014411926, "rewards/rejected": -0.578140914440155, "step": 1820 }, { "epoch": 0.44, "learning_rate": 1.0192638380819884e-06, "logits/chosen": -2.3707401752471924, "logits/rejected": -1.5773122310638428, "logps/chosen": -453.8907165527344, "logps/rejected": -664.8104248046875, "loss": 0.2373, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2851904630661011, "rewards/margins": 0.2743096947669983, "rewards/rejected": -0.5595001578330994, "step": 1830 }, { "epoch": 0.44, "learning_rate": 9.912840992438087e-07, "logits/chosen": -2.176928758621216, "logits/rejected": -1.5890274047851562, "logps/chosen": -573.779541015625, "logps/rejected": -796.2474975585938, "loss": 0.2305, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.4253336787223816, "rewards/margins": 0.26153475046157837, "rewards/rejected": -0.6868684887886047, "step": 1840 }, { "epoch": 0.44, "learning_rate": 9.63598487559839e-07, "logits/chosen": -2.2372374534606934, "logits/rejected": -1.546007752418518, "logps/chosen": -464.493408203125, "logps/rejected": -669.3597412109375, "loss": 0.208, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.30372193455696106, "rewards/margins": 0.278639018535614, "rewards/rejected": -0.5823609828948975, "step": 1850 }, { "epoch": 0.45, "learning_rate": 9.362124003949324e-07, "logits/chosen": -2.1051459312438965, "logits/rejected": -1.6941722631454468, "logps/chosen": -516.5181884765625, "logps/rejected": -681.1585693359375, "loss": 0.2474, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.36375877261161804, "rewards/margins": 0.18948772549629211, "rewards/rejected": -0.5532464981079102, "step": 1860 }, { "epoch": 0.45, "learning_rate": 9.091311767210453e-07, "logits/chosen": -2.1879124641418457, "logits/rejected": -1.6842693090438843, "logps/chosen": -510.8837890625, "logps/rejected": -665.6920166015625, "loss": 0.2358, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.36585649847984314, "rewards/margins": 0.18732208013534546, "rewards/rejected": -0.553178608417511, "step": 1870 }, { "epoch": 0.45, "learning_rate": 8.823600960763901e-07, "logits/chosen": -2.130765199661255, "logits/rejected": -1.5883699655532837, "logps/chosen": -534.0338745117188, "logps/rejected": -741.09423828125, "loss": 0.2077, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.38128194212913513, "rewards/margins": 0.25697097182273865, "rewards/rejected": -0.6382529139518738, "step": 1880 }, { "epoch": 0.45, "learning_rate": 8.559043775361816e-07, "logits/chosen": -2.1295900344848633, "logits/rejected": -1.5722177028656006, "logps/chosen": -551.2114868164062, "logps/rejected": -690.095947265625, "loss": 0.2276, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3628248870372772, "rewards/margins": 0.2208392173051834, "rewards/rejected": -0.5836641192436218, "step": 1890 }, { "epoch": 0.46, "learning_rate": 8.297691786951706e-07, "logits/chosen": -2.266829013824463, "logits/rejected": -1.4247468709945679, "logps/chosen": -547.1033935546875, "logps/rejected": -776.6325073242188, "loss": 0.2145, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.35183387994766235, "rewards/margins": 0.31444767117500305, "rewards/rejected": -0.666281521320343, "step": 1900 }, { "epoch": 0.46, "learning_rate": 8.039595946621551e-07, "logits/chosen": -2.2304577827453613, "logits/rejected": -1.3978160619735718, "logps/chosen": -538.714599609375, "logps/rejected": -793.1671142578125, "loss": 0.2318, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.39072176814079285, "rewards/margins": 0.2959004342556, "rewards/rejected": -0.6866222620010376, "step": 1910 }, { "epoch": 0.46, "learning_rate": 7.784806570666795e-07, "logits/chosen": -2.147185802459717, "logits/rejected": -1.5393598079681396, "logps/chosen": -481.76373291015625, "logps/rejected": -670.560791015625, "loss": 0.2545, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3331444263458252, "rewards/margins": 0.2202250212430954, "rewards/rejected": -0.553369402885437, "step": 1920 }, { "epoch": 0.46, "learning_rate": 7.533373330781127e-07, "logits/chosen": -2.355670928955078, "logits/rejected": -1.5707635879516602, "logps/chosen": -561.0064697265625, "logps/rejected": -760.4944458007812, "loss": 0.2233, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.3677171766757965, "rewards/margins": 0.2857670485973358, "rewards/rejected": -0.6534842252731323, "step": 1930 }, { "epoch": 0.47, "learning_rate": 7.285345244372843e-07, "logits/chosen": -2.2503583431243896, "logits/rejected": -1.3964335918426514, "logps/chosen": -503.65777587890625, "logps/rejected": -756.98828125, "loss": 0.1685, "rewards/accuracies": 0.875, "rewards/chosen": -0.33578115701675415, "rewards/margins": 0.312565416097641, "rewards/rejected": -0.6483466029167175, "step": 1940 }, { "epoch": 0.47, "learning_rate": 7.040770665008853e-07, "logits/chosen": -2.2794625759124756, "logits/rejected": -1.6694520711898804, "logps/chosen": -546.9520263671875, "logps/rejected": -695.8685302734375, "loss": 0.2262, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3904178738594055, "rewards/margins": 0.2141040861606598, "rewards/rejected": -0.6045219302177429, "step": 1950 }, { "epoch": 0.47, "learning_rate": 6.799697272987976e-07, "logits/chosen": -2.1750683784484863, "logits/rejected": -1.3738349676132202, "logps/chosen": -540.0596923828125, "logps/rejected": -722.267578125, "loss": 0.2455, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.3699941039085388, "rewards/margins": 0.2555733621120453, "rewards/rejected": -0.6255674958229065, "step": 1960 }, { "epoch": 0.47, "learning_rate": 6.562172066045655e-07, "logits/chosen": -2.167599678039551, "logits/rejected": -1.6329807043075562, "logps/chosen": -441.4002380371094, "logps/rejected": -602.337646484375, "loss": 0.2318, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.31705886125564575, "rewards/margins": 0.21183231472969055, "rewards/rejected": -0.5288912057876587, "step": 1970 }, { "epoch": 0.48, "learning_rate": 6.328241350191619e-07, "logits/chosen": -2.2226500511169434, "logits/rejected": -1.537512183189392, "logps/chosen": -480.624755859375, "logps/rejected": -686.3212890625, "loss": 0.2092, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.30479687452316284, "rewards/margins": 0.2621714472770691, "rewards/rejected": -0.5669684410095215, "step": 1980 }, { "epoch": 0.48, "learning_rate": 6.097950730682426e-07, "logits/chosen": -2.176600456237793, "logits/rejected": -1.5454185009002686, "logps/chosen": -503.0887756347656, "logps/rejected": -687.1947021484375, "loss": 0.2479, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3592723608016968, "rewards/margins": 0.23496529459953308, "rewards/rejected": -0.5942376255989075, "step": 1990 }, { "epoch": 0.48, "learning_rate": 5.871345103130646e-07, "logits/chosen": -2.087590217590332, "logits/rejected": -1.4282548427581787, "logps/chosen": -597.8372802734375, "logps/rejected": -783.7901611328125, "loss": 0.2177, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.44616231322288513, "rewards/margins": 0.21901002526283264, "rewards/rejected": -0.665172278881073, "step": 2000 }, { "epoch": 0.48, "learning_rate": 5.64846864475237e-07, "logits/chosen": -2.1877074241638184, "logits/rejected": -1.913442611694336, "logps/chosen": -512.3074951171875, "logps/rejected": -631.2486572265625, "loss": 0.2673, "rewards/accuracies": 0.75, "rewards/chosen": -0.35744622349739075, "rewards/margins": 0.16726166009902954, "rewards/rejected": -0.5247078537940979, "step": 2010 }, { "epoch": 0.48, "learning_rate": 5.429364805754758e-07, "logits/chosen": -2.0919928550720215, "logits/rejected": -1.5754165649414062, "logps/chosen": -502.09405517578125, "logps/rejected": -650.5390014648438, "loss": 0.2226, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3595220446586609, "rewards/margins": 0.19984325766563416, "rewards/rejected": -0.5593653321266174, "step": 2020 }, { "epoch": 0.49, "learning_rate": 5.214076300865359e-07, "logits/chosen": -1.9238027334213257, "logits/rejected": -1.2277315855026245, "logps/chosen": -580.2691650390625, "logps/rejected": -834.8424072265625, "loss": 0.2114, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.4341210722923279, "rewards/margins": 0.30377626419067383, "rewards/rejected": -0.7378972768783569, "step": 2030 }, { "epoch": 0.49, "learning_rate": 5.002645101004766e-07, "logits/chosen": -2.335980176925659, "logits/rejected": -1.380081057548523, "logps/chosen": -504.81024169921875, "logps/rejected": -794.1038208007812, "loss": 0.1986, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.3392624855041504, "rewards/margins": 0.3661222457885742, "rewards/rejected": -0.7053847908973694, "step": 2040 }, { "epoch": 0.49, "learning_rate": 4.795112425104323e-07, "logits/chosen": -2.2008166313171387, "logits/rejected": -1.5865943431854248, "logps/chosen": -581.0762939453125, "logps/rejected": -769.32177734375, "loss": 0.2619, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.4084292948246002, "rewards/margins": 0.25177350640296936, "rewards/rejected": -0.6602028012275696, "step": 2050 }, { "epoch": 0.49, "learning_rate": 4.591518732070402e-07, "logits/chosen": -1.9570486545562744, "logits/rejected": -1.4179704189300537, "logps/chosen": -534.230224609375, "logps/rejected": -729.9030151367188, "loss": 0.2396, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3860064148902893, "rewards/margins": 0.24045896530151367, "rewards/rejected": -0.6264654397964478, "step": 2060 }, { "epoch": 0.5, "learning_rate": 4.391903712896861e-07, "logits/chosen": -2.1633055210113525, "logits/rejected": -1.5320649147033691, "logps/chosen": -577.50439453125, "logps/rejected": -789.090087890625, "loss": 0.242, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.4344731271266937, "rewards/margins": 0.2326418161392212, "rewards/rejected": -0.6671148538589478, "step": 2070 }, { "epoch": 0.5, "learning_rate": 4.196306282927187e-07, "logits/chosen": -2.198305130004883, "logits/rejected": -1.6743977069854736, "logps/chosen": -516.3706665039062, "logps/rejected": -713.3460693359375, "loss": 0.2211, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3524255156517029, "rewards/margins": 0.25238287448883057, "rewards/rejected": -0.6048084497451782, "step": 2080 }, { "epoch": 0.5, "learning_rate": 4.0047645742679275e-07, "logits/chosen": -2.15580415725708, "logits/rejected": -1.6634889841079712, "logps/chosen": -539.8309326171875, "logps/rejected": -740.0956420898438, "loss": 0.218, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.36082297563552856, "rewards/margins": 0.23072440922260284, "rewards/rejected": -0.5915473103523254, "step": 2090 }, { "epoch": 0.5, "learning_rate": 3.817315928354695e-07, "logits/chosen": -2.187629222869873, "logits/rejected": -1.617875099182129, "logps/chosen": -528.2720336914062, "logps/rejected": -703.1224365234375, "loss": 0.2308, "rewards/accuracies": 0.75, "rewards/chosen": -0.36435794830322266, "rewards/margins": 0.23637576401233673, "rewards/rejected": -0.6007336378097534, "step": 2100 }, { "epoch": 0.51, "learning_rate": 3.633996888672428e-07, "logits/chosen": -2.1398205757141113, "logits/rejected": -1.48526132106781, "logps/chosen": -582.8170166015625, "logps/rejected": -740.50390625, "loss": 0.206, "rewards/accuracies": 0.75, "rewards/chosen": -0.4385349154472351, "rewards/margins": 0.2139698714017868, "rewards/rejected": -0.6525048017501831, "step": 2110 }, { "epoch": 0.51, "learning_rate": 3.4548431936311275e-07, "logits/chosen": -2.3196969032287598, "logits/rejected": -1.9413297176361084, "logps/chosen": -515.4064331054688, "logps/rejected": -631.7554931640625, "loss": 0.24, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3508912920951843, "rewards/margins": 0.1308077871799469, "rewards/rejected": -0.4816990792751312, "step": 2120 }, { "epoch": 0.51, "learning_rate": 3.2798897695986155e-07, "logits/chosen": -2.225336790084839, "logits/rejected": -1.4455909729003906, "logps/chosen": -562.00439453125, "logps/rejected": -792.2058715820312, "loss": 0.2268, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.3739345073699951, "rewards/margins": 0.29291829466819763, "rewards/rejected": -0.6668527722358704, "step": 2130 }, { "epoch": 0.51, "learning_rate": 3.1091707240915704e-07, "logits/chosen": -2.318101167678833, "logits/rejected": -1.5058709383010864, "logps/chosen": -528.3710327148438, "logps/rejected": -773.945068359375, "loss": 0.2322, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.3821602761745453, "rewards/margins": 0.2865941524505615, "rewards/rejected": -0.6687543988227844, "step": 2140 }, { "epoch": 0.52, "learning_rate": 2.942719339126171e-07, "logits/chosen": -2.114053249359131, "logits/rejected": -1.4541417360305786, "logps/chosen": -575.40625, "logps/rejected": -743.8985595703125, "loss": 0.2631, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.4056618809700012, "rewards/margins": 0.24168558418750763, "rewards/rejected": -0.6473473906517029, "step": 2150 }, { "epoch": 0.52, "learning_rate": 2.780568064729716e-07, "logits/chosen": -2.1276280879974365, "logits/rejected": -1.5844396352767944, "logps/chosen": -556.7276000976562, "logps/rejected": -722.495849609375, "loss": 0.232, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.40463319420814514, "rewards/margins": 0.21296298503875732, "rewards/rejected": -0.6175961494445801, "step": 2160 }, { "epoch": 0.52, "learning_rate": 2.622748512614437e-07, "logits/chosen": -2.328648328781128, "logits/rejected": -1.7766300439834595, "logps/chosen": -508.13250732421875, "logps/rejected": -652.4500122070312, "loss": 0.2499, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.35677647590637207, "rewards/margins": 0.18978366255760193, "rewards/rejected": -0.5465601682662964, "step": 2170 }, { "epoch": 0.52, "learning_rate": 2.4692914500147185e-07, "logits/chosen": -2.1308367252349854, "logits/rejected": -1.7297455072402954, "logps/chosen": -539.8509521484375, "logps/rejected": -701.5316162109375, "loss": 0.2199, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.37622612714767456, "rewards/margins": 0.193180650472641, "rewards/rejected": -0.5694067478179932, "step": 2180 }, { "epoch": 0.53, "learning_rate": 2.320226793688979e-07, "logits/chosen": -2.176285982131958, "logits/rejected": -1.585010290145874, "logps/chosen": -597.8599853515625, "logps/rejected": -753.0974731445312, "loss": 0.2317, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.4297494888305664, "rewards/margins": 0.20314817130565643, "rewards/rejected": -0.632897675037384, "step": 2190 }, { "epoch": 0.53, "learning_rate": 2.1755836040873197e-07, "logits/chosen": -2.09965181350708, "logits/rejected": -1.5018677711486816, "logps/chosen": -551.79541015625, "logps/rejected": -750.493408203125, "loss": 0.2168, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.4179263114929199, "rewards/margins": 0.248566672205925, "rewards/rejected": -0.6664929389953613, "step": 2200 }, { "epoch": 0.53, "learning_rate": 2.0353900796861503e-07, "logits/chosen": -2.2555556297302246, "logits/rejected": -1.8257776498794556, "logps/chosen": -505.36663818359375, "logps/rejected": -631.3004760742188, "loss": 0.2511, "rewards/accuracies": 0.75, "rewards/chosen": -0.35016122460365295, "rewards/margins": 0.18649618327617645, "rewards/rejected": -0.5366573929786682, "step": 2210 }, { "epoch": 0.53, "learning_rate": 1.8996735514908327e-07, "logits/chosen": -2.083270311355591, "logits/rejected": -1.2722394466400146, "logps/chosen": -549.7798461914062, "logps/rejected": -800.9820556640625, "loss": 0.2384, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.3795488774776459, "rewards/margins": 0.324424147605896, "rewards/rejected": -0.7039730548858643, "step": 2220 }, { "epoch": 0.54, "learning_rate": 1.7684604777074427e-07, "logits/chosen": -2.2368104457855225, "logits/rejected": -1.5557681322097778, "logps/chosen": -581.6041259765625, "logps/rejected": -770.4801635742188, "loss": 0.2392, "rewards/accuracies": 0.75, "rewards/chosen": -0.4050823152065277, "rewards/margins": 0.2399863749742508, "rewards/rejected": -0.6450687646865845, "step": 2230 }, { "epoch": 0.54, "learning_rate": 1.6417764385846996e-07, "logits/chosen": -2.2670297622680664, "logits/rejected": -1.6125434637069702, "logps/chosen": -544.0134887695312, "logps/rejected": -734.3864135742188, "loss": 0.2371, "rewards/accuracies": 0.75, "rewards/chosen": -0.38986533880233765, "rewards/margins": 0.23339907824993134, "rewards/rejected": -0.6232645511627197, "step": 2240 }, { "epoch": 0.54, "learning_rate": 1.5196461314270438e-07, "logits/chosen": -2.25602650642395, "logits/rejected": -1.743583083152771, "logps/chosen": -564.7320556640625, "logps/rejected": -716.3707275390625, "loss": 0.2454, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.42164283990859985, "rewards/margins": 0.19018793106079102, "rewards/rejected": -0.6118307709693909, "step": 2250 }, { "epoch": 0.54, "learning_rate": 1.4020933657798385e-07, "logits/chosen": -2.139263868331909, "logits/rejected": -1.414298415184021, "logps/chosen": -474.62567138671875, "logps/rejected": -724.90234375, "loss": 0.2306, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.3453265130519867, "rewards/margins": 0.2758641839027405, "rewards/rejected": -0.6211907267570496, "step": 2260 }, { "epoch": 0.54, "learning_rate": 1.2891410587876714e-07, "logits/chosen": -2.2542724609375, "logits/rejected": -1.5322940349578857, "logps/chosen": -551.5656127929688, "logps/rejected": -730.9732055664062, "loss": 0.2194, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.37710070610046387, "rewards/margins": 0.2374318391084671, "rewards/rejected": -0.614532470703125, "step": 2270 }, { "epoch": 0.55, "learning_rate": 1.180811230726589e-07, "logits/chosen": -2.2804083824157715, "logits/rejected": -1.5980250835418701, "logps/chosen": -603.2000122070312, "logps/rejected": -805.7347412109375, "loss": 0.2254, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.42967596650123596, "rewards/margins": 0.24143275618553162, "rewards/rejected": -0.6711087226867676, "step": 2280 }, { "epoch": 0.55, "learning_rate": 1.0771250007112155e-07, "logits/chosen": -1.982797384262085, "logits/rejected": -1.3462848663330078, "logps/chosen": -626.7324829101562, "logps/rejected": -776.6309814453125, "loss": 0.2424, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.46235284209251404, "rewards/margins": 0.2094097137451172, "rewards/rejected": -0.6717625856399536, "step": 2290 }, { "epoch": 0.55, "learning_rate": 9.781025825775392e-08, "logits/chosen": -2.231228828430176, "logits/rejected": -1.511236310005188, "logps/chosen": -614.570068359375, "logps/rejected": -825.9508666992188, "loss": 0.2251, "rewards/accuracies": 0.875, "rewards/chosen": -0.45310935378074646, "rewards/margins": 0.26085203886032104, "rewards/rejected": -0.7139613628387451, "step": 2300 }, { "epoch": 0.55, "learning_rate": 8.837632809421681e-08, "logits/chosen": -2.0968010425567627, "logits/rejected": -1.3801376819610596, "logps/chosen": -569.6008911132812, "logps/rejected": -795.4178466796875, "loss": 0.2706, "rewards/accuracies": 0.75, "rewards/chosen": -0.4094913601875305, "rewards/margins": 0.26325908303260803, "rewards/rejected": -0.6727504134178162, "step": 2310 }, { "epoch": 0.56, "learning_rate": 7.941254874388904e-08, "logits/chosen": -2.363359212875366, "logits/rejected": -1.888514757156372, "logps/chosen": -576.6600341796875, "logps/rejected": -702.62060546875, "loss": 0.2567, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.43606749176979065, "rewards/margins": 0.16073410212993622, "rewards/rejected": -0.5968016386032104, "step": 2320 }, { "epoch": 0.56, "learning_rate": 7.092066771331507e-08, "logits/chosen": -2.1661031246185303, "logits/rejected": -1.3824275732040405, "logps/chosen": -557.127685546875, "logps/rejected": -710.2548828125, "loss": 0.2119, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.38099274039268494, "rewards/margins": 0.22092202305793762, "rewards/rejected": -0.6019147634506226, "step": 2330 }, { "epoch": 0.56, "learning_rate": 6.29023405115281e-08, "logits/chosen": -2.1738715171813965, "logits/rejected": -1.355930209159851, "logps/chosen": -592.2274169921875, "logps/rejected": -788.6678466796875, "loss": 0.2163, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.40638837218284607, "rewards/margins": 0.2868625521659851, "rewards/rejected": -0.6932509541511536, "step": 2340 }, { "epoch": 0.56, "learning_rate": 5.535913032730295e-08, "logits/chosen": -2.406480312347412, "logits/rejected": -1.549068570137024, "logps/chosen": -548.6936645507812, "logps/rejected": -776.6002807617188, "loss": 0.1999, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3767138421535492, "rewards/margins": 0.2962132692337036, "rewards/rejected": -0.6729270815849304, "step": 2350 }, { "epoch": 0.57, "learning_rate": 4.829250772441091e-08, "logits/chosen": -2.178439140319824, "logits/rejected": -1.8580322265625, "logps/chosen": -604.1091918945312, "logps/rejected": -721.529541015625, "loss": 0.2414, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.45432454347610474, "rewards/margins": 0.1475105583667755, "rewards/rejected": -0.6018351316452026, "step": 2360 }, { "epoch": 0.57, "learning_rate": 4.170385035493108e-08, "logits/chosen": -2.346930503845215, "logits/rejected": -1.8971471786499023, "logps/chosen": -603.0961303710938, "logps/rejected": -768.2886352539062, "loss": 0.2649, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.43998581171035767, "rewards/margins": 0.20180657505989075, "rewards/rejected": -0.6417924165725708, "step": 2370 }, { "epoch": 0.57, "learning_rate": 3.5594442690671806e-08, "logits/chosen": -2.013385772705078, "logits/rejected": -1.435459852218628, "logps/chosen": -645.1131591796875, "logps/rejected": -806.417724609375, "loss": 0.2246, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.48335081338882446, "rewards/margins": 0.2265961617231369, "rewards/rejected": -0.7099469900131226, "step": 2380 }, { "epoch": 0.57, "learning_rate": 2.9965475772762154e-08, "logits/chosen": -2.311368465423584, "logits/rejected": -1.5600301027297974, "logps/chosen": -517.8285522460938, "logps/rejected": -704.8583374023438, "loss": 0.2205, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.3128499984741211, "rewards/margins": 0.2409205138683319, "rewards/rejected": -0.5537704825401306, "step": 2390 }, { "epoch": 0.58, "learning_rate": 2.48180469794565e-08, "logits/chosen": -2.281245470046997, "logits/rejected": -1.7727069854736328, "logps/chosen": -474.0367126464844, "logps/rejected": -638.2462768554688, "loss": 0.2309, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.33747315406799316, "rewards/margins": 0.18603594601154327, "rewards/rejected": -0.5235090851783752, "step": 2400 }, { "epoch": 0.58, "learning_rate": 2.015315981219651e-08, "logits/chosen": -2.094449281692505, "logits/rejected": -1.5543745756149292, "logps/chosen": -585.0030517578125, "logps/rejected": -759.0601806640625, "loss": 0.2485, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.4290609359741211, "rewards/margins": 0.21084634959697723, "rewards/rejected": -0.6399072408676147, "step": 2410 }, { "epoch": 0.58, "learning_rate": 1.5971723699979015e-08, "logits/chosen": -2.2383382320404053, "logits/rejected": -1.490106463432312, "logps/chosen": -590.9389038085938, "logps/rejected": -763.5640869140625, "loss": 0.2426, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.4045529365539551, "rewards/margins": 0.24901354312896729, "rewards/rejected": -0.6535664796829224, "step": 2420 }, { "epoch": 0.58, "learning_rate": 1.2274553822058944e-08, "logits/chosen": -2.32003116607666, "logits/rejected": -1.4864509105682373, "logps/chosen": -516.053955078125, "logps/rejected": -737.1478271484375, "loss": 0.2119, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3795922100543976, "rewards/margins": 0.26055005192756653, "rewards/rejected": -0.6401422619819641, "step": 2430 }, { "epoch": 0.59, "learning_rate": 9.062370949029231e-09, "logits/chosen": -2.3300509452819824, "logits/rejected": -1.7241268157958984, "logps/chosen": -578.6694946289062, "logps/rejected": -768.1864013671875, "loss": 0.2388, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.40254122018814087, "rewards/margins": 0.2296265810728073, "rewards/rejected": -0.6321677565574646, "step": 2440 }, { "epoch": 0.59, "learning_rate": 6.3358013023062656e-09, "logits/chosen": -2.0109941959381104, "logits/rejected": -1.2714914083480835, "logps/chosen": -602.9906005859375, "logps/rejected": -774.6351318359375, "loss": 0.2411, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.4527587294578552, "rewards/margins": 0.22364509105682373, "rewards/rejected": -0.676403820514679, "step": 2450 }, { "epoch": 0.59, "learning_rate": 4.095376432044218e-09, "logits/chosen": -2.153900623321533, "logits/rejected": -1.431398630142212, "logps/chosen": -501.6558532714844, "logps/rejected": -703.2398681640625, "loss": 0.2246, "rewards/accuracies": 0.75, "rewards/chosen": -0.3516218066215515, "rewards/margins": 0.26861336827278137, "rewards/rejected": -0.6202351450920105, "step": 2460 }, { "epoch": 0.59, "learning_rate": 2.3415331135115404e-09, "logits/chosen": -2.1722538471221924, "logits/rejected": -1.4859097003936768, "logps/chosen": -573.3174438476562, "logps/rejected": -785.3301391601562, "loss": 0.2381, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.41808199882507324, "rewards/margins": 0.2717761993408203, "rewards/rejected": -0.6898581981658936, "step": 2470 }, { "epoch": 0.6, "learning_rate": 1.0746132619374184e-09, "logits/chosen": -2.1704633235931396, "logits/rejected": -1.4999759197235107, "logps/chosen": -571.4072265625, "logps/rejected": -791.2116088867188, "loss": 0.2218, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.41102513670921326, "rewards/margins": 0.2840521037578583, "rewards/rejected": -0.6950772404670715, "step": 2480 }, { "epoch": 0.6, "learning_rate": 2.9486386585786395e-10, "logits/chosen": -2.255235433578491, "logits/rejected": -1.5667657852172852, "logps/chosen": -458.48248291015625, "logps/rejected": -655.1217651367188, "loss": 0.2506, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.30341845750808716, "rewards/margins": 0.2525646388530731, "rewards/rejected": -0.5559830665588379, "step": 2490 }, { "epoch": 0.6, "learning_rate": 2.4369389622913575e-12, "logits/chosen": -2.230522394180298, "logits/rejected": -1.6053917407989502, "logps/chosen": -498.0244140625, "logps/rejected": -680.3582763671875, "loss": 0.2225, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.36517006158828735, "rewards/margins": 0.2226562201976776, "rewards/rejected": -0.5878263115882874, "step": 2500 }, { "epoch": 0.6, "step": 2501, "total_flos": 0.0, "train_loss": 0.2318923625944615, "train_runtime": 76628.4869, "train_samples_per_second": 0.391, "train_steps_per_second": 0.033 } ], "logging_steps": 10, "max_steps": 2501, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }