{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.48420708268585916, "eval_steps": 100, "global_step": 14801, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 3.270111183780249e-09, "logits/chosen": -2.634561777114868, "logits/rejected": -2.673060417175293, "logps/chosen": -207.5323944091797, "logps/rejected": -286.9266052246094, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.0, "learning_rate": 3.270111183780249e-08, "logits/chosen": -2.217600107192993, "logits/rejected": -1.9652072191238403, "logps/chosen": -185.94793701171875, "logps/rejected": -165.36378479003906, "loss": 0.6931, "rewards/accuracies": 0.2777777910232544, "rewards/chosen": -2.7529633371159434e-05, "rewards/margins": -9.719059016788378e-05, "rewards/rejected": 6.966096407268196e-05, "step": 10 }, { "epoch": 0.0, "learning_rate": 6.540222367560497e-08, "logits/chosen": -2.43184232711792, "logits/rejected": -2.223078489303589, "logps/chosen": -232.47348022460938, "logps/rejected": -231.3294219970703, "loss": 0.6931, "rewards/accuracies": 0.375, "rewards/chosen": -2.5338464183732867e-05, "rewards/margins": -0.00011894272029167041, "rewards/rejected": 9.360425610793754e-05, "step": 20 }, { "epoch": 0.0, "learning_rate": 9.810333551340746e-08, "logits/chosen": -2.258497953414917, "logits/rejected": -2.1628453731536865, "logps/chosen": -197.47084045410156, "logps/rejected": -219.11550903320312, "loss": 0.6931, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -4.670469934353605e-05, "rewards/margins": -2.2600890588364564e-05, "rewards/rejected": -2.4103814212139696e-05, "step": 30 }, { "epoch": 0.0, "learning_rate": 1.3080444735120995e-07, "logits/chosen": -2.211336135864258, "logits/rejected": -2.251044750213623, "logps/chosen": -276.04290771484375, "logps/rejected": -265.7278137207031, "loss": 0.6931, "rewards/accuracies": 0.32499998807907104, "rewards/chosen": -5.52958736079745e-05, "rewards/margins": -8.68914503371343e-05, "rewards/rejected": 3.159556581522338e-05, "step": 40 }, { "epoch": 0.0, "learning_rate": 1.6350555918901243e-07, "logits/chosen": -2.349104642868042, "logits/rejected": -2.1418638229370117, "logps/chosen": -204.79901123046875, "logps/rejected": -184.77700805664062, "loss": 0.6931, "rewards/accuracies": 0.5, "rewards/chosen": 6.549026693392079e-06, "rewards/margins": 8.850884478306398e-05, "rewards/rejected": -8.195983537007123e-05, "step": 50 }, { "epoch": 0.0, "learning_rate": 1.9620667102681492e-07, "logits/chosen": -2.3065123558044434, "logits/rejected": -2.0677669048309326, "logps/chosen": -209.77523803710938, "logps/rejected": -185.83193969726562, "loss": 0.6932, "rewards/accuracies": 0.3499999940395355, "rewards/chosen": 3.0680701456731185e-05, "rewards/margins": -0.00012086327478755265, "rewards/rejected": 0.00015154397988226265, "step": 60 }, { "epoch": 0.0, "learning_rate": 2.289077828646174e-07, "logits/chosen": -2.2705559730529785, "logits/rejected": -2.1576623916625977, "logps/chosen": -218.01708984375, "logps/rejected": -207.9579620361328, "loss": 0.6931, "rewards/accuracies": 0.375, "rewards/chosen": 9.244588727597147e-05, "rewards/margins": 9.720615707919933e-06, "rewards/rejected": 8.272529521491379e-05, "step": 70 }, { "epoch": 0.01, "learning_rate": 2.616088947024199e-07, "logits/chosen": -2.5082621574401855, "logits/rejected": -2.2270846366882324, "logps/chosen": -258.78070068359375, "logps/rejected": -213.70150756835938, "loss": 0.6931, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": 0.00016971012519206852, "rewards/margins": 8.799631905276328e-05, "rewards/rejected": 8.171383524313569e-05, "step": 80 }, { "epoch": 0.01, "learning_rate": 2.943100065402224e-07, "logits/chosen": -2.257884979248047, "logits/rejected": -2.173158645629883, "logps/chosen": -184.643798828125, "logps/rejected": -165.50064086914062, "loss": 0.6931, "rewards/accuracies": 0.42500001192092896, "rewards/chosen": 0.00014099081454332918, "rewards/margins": 8.532649371773005e-05, "rewards/rejected": 5.566431718762033e-05, "step": 90 }, { "epoch": 0.01, "learning_rate": 3.2701111837802487e-07, "logits/chosen": -2.432077646255493, "logits/rejected": -2.426736354827881, "logps/chosen": -168.7462921142578, "logps/rejected": -183.92845153808594, "loss": 0.6931, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": 0.0001885929814307019, "rewards/margins": 2.7986563509330153e-05, "rewards/rejected": 0.00016060643247328699, "step": 100 }, { "epoch": 0.01, "eval_logits/chosen": -2.348320245742798, "eval_logits/rejected": -2.159881830215454, "eval_logps/chosen": -231.77328491210938, "eval_logps/rejected": -211.4641571044922, "eval_loss": 0.6931429505348206, "eval_rewards/accuracies": 0.484499990940094, "eval_rewards/chosen": 0.00023166697064880282, "eval_rewards/margins": 8.390223229071125e-05, "eval_rewards/rejected": 0.00014776474563404918, "eval_runtime": 711.5877, "eval_samples_per_second": 2.811, "eval_steps_per_second": 1.405, "step": 100 }, { "epoch": 0.01, "learning_rate": 3.5971223021582736e-07, "logits/chosen": -2.3472111225128174, "logits/rejected": -1.9997737407684326, "logps/chosen": -222.4732208251953, "logps/rejected": -167.11593627929688, "loss": 0.6931, "rewards/accuracies": 0.5, "rewards/chosen": 0.00032228889176622033, "rewards/margins": 0.00020361851784400642, "rewards/rejected": 0.00011867038119817153, "step": 110 }, { "epoch": 0.01, "learning_rate": 3.9241334205362984e-07, "logits/chosen": -2.3396477699279785, "logits/rejected": -2.2444756031036377, "logps/chosen": -223.9533233642578, "logps/rejected": -234.1103057861328, "loss": 0.6931, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": 0.00033879600232467055, "rewards/margins": 7.787643698975444e-05, "rewards/rejected": 0.00026091962354257703, "step": 120 }, { "epoch": 0.01, "learning_rate": 4.251144538914324e-07, "logits/chosen": -2.262049913406372, "logits/rejected": -2.2183430194854736, "logps/chosen": -149.32823181152344, "logps/rejected": -148.27149963378906, "loss": 0.6931, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.0003176346654072404, "rewards/margins": 0.00017084801220335066, "rewards/rejected": 0.00014678671141155064, "step": 130 }, { "epoch": 0.01, "learning_rate": 4.578155657292348e-07, "logits/chosen": -2.3226771354675293, "logits/rejected": -2.2234673500061035, "logps/chosen": -225.57870483398438, "logps/rejected": -159.41448974609375, "loss": 0.6931, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.00038025499088689685, "rewards/margins": 0.00010361654858570546, "rewards/rejected": 0.00027663842774927616, "step": 140 }, { "epoch": 0.01, "learning_rate": 4.905166775670374e-07, "logits/chosen": -2.3669283390045166, "logits/rejected": -2.1579947471618652, "logps/chosen": -230.99099731445312, "logps/rejected": -229.1034393310547, "loss": 0.6931, "rewards/accuracies": 0.625, "rewards/chosen": 0.00036465103039518, "rewards/margins": 0.0003103634517174214, "rewards/rejected": 5.428760778158903e-05, "step": 150 }, { "epoch": 0.01, "learning_rate": 5.232177894048398e-07, "logits/chosen": -2.2156825065612793, "logits/rejected": -2.2284252643585205, "logps/chosen": -260.325927734375, "logps/rejected": -224.88876342773438, "loss": 0.6931, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": 0.0005425811395980418, "rewards/margins": 0.0004318637656979263, "rewards/rejected": 0.00011071735207224265, "step": 160 }, { "epoch": 0.01, "learning_rate": 5.559189012426422e-07, "logits/chosen": -2.3160367012023926, "logits/rejected": -2.03806209564209, "logps/chosen": -180.5178680419922, "logps/rejected": -156.7935791015625, "loss": 0.6931, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.00033474405063316226, "rewards/margins": 0.0002378027857048437, "rewards/rejected": 9.694129403214902e-05, "step": 170 }, { "epoch": 0.01, "learning_rate": 5.886200130804448e-07, "logits/chosen": -2.398749828338623, "logits/rejected": -2.338441848754883, "logps/chosen": -217.6501007080078, "logps/rejected": -198.72491455078125, "loss": 0.6931, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.0005784613895229995, "rewards/margins": 0.0004989482113160193, "rewards/rejected": 7.951319275889546e-05, "step": 180 }, { "epoch": 0.01, "learning_rate": 6.213211249182473e-07, "logits/chosen": -2.0694451332092285, "logits/rejected": -2.1747512817382812, "logps/chosen": -191.06724548339844, "logps/rejected": -208.7252960205078, "loss": 0.6931, "rewards/accuracies": 0.625, "rewards/chosen": 0.000429000414442271, "rewards/margins": 0.000501616217661649, "rewards/rejected": -7.261570863192901e-05, "step": 190 }, { "epoch": 0.01, "learning_rate": 6.540222367560497e-07, "logits/chosen": -2.2755157947540283, "logits/rejected": -2.2391417026519775, "logps/chosen": -146.77293395996094, "logps/rejected": -177.70639038085938, "loss": 0.6931, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.0004391101247165352, "rewards/margins": 0.0006169243133626878, "rewards/rejected": -0.0001778141740942374, "step": 200 }, { "epoch": 0.01, "eval_logits/chosen": -2.348745822906494, "eval_logits/rejected": -2.160346269607544, "eval_logps/chosen": -231.43870544433594, "eval_logps/rejected": -211.644287109375, "eval_loss": 0.6931134462356567, "eval_rewards/accuracies": 0.5799999833106995, "eval_rewards/chosen": 0.0005662592011503875, "eval_rewards/margins": 0.0005986409960314631, "eval_rewards/rejected": -3.2381776691181585e-05, "eval_runtime": 712.2312, "eval_samples_per_second": 2.808, "eval_steps_per_second": 1.404, "step": 200 }, { "epoch": 0.01, "learning_rate": 6.867233485938523e-07, "logits/chosen": -2.4266586303710938, "logits/rejected": -2.202392339706421, "logps/chosen": -218.6244659423828, "logps/rejected": -188.30389404296875, "loss": 0.6931, "rewards/accuracies": 0.625, "rewards/chosen": 0.0009090522071346641, "rewards/margins": 0.0010470406850799918, "rewards/rejected": -0.0001379884488414973, "step": 210 }, { "epoch": 0.01, "learning_rate": 7.194244604316547e-07, "logits/chosen": -2.2122180461883545, "logits/rejected": -2.040276050567627, "logps/chosen": -182.87673950195312, "logps/rejected": -174.85745239257812, "loss": 0.6931, "rewards/accuracies": 0.5, "rewards/chosen": 0.0004596250946633518, "rewards/margins": 0.00037378407432697713, "rewards/rejected": 8.584104216424748e-05, "step": 220 }, { "epoch": 0.02, "learning_rate": 7.521255722694571e-07, "logits/chosen": -2.4205055236816406, "logits/rejected": -2.0249924659729004, "logps/chosen": -279.0646667480469, "logps/rejected": -184.08184814453125, "loss": 0.6931, "rewards/accuracies": 0.5, "rewards/chosen": 0.0007147075375542045, "rewards/margins": 0.00047254477976821363, "rewards/rejected": 0.0002421626850264147, "step": 230 }, { "epoch": 0.02, "learning_rate": 7.848266841072597e-07, "logits/chosen": -2.200258731842041, "logits/rejected": -2.1651644706726074, "logps/chosen": -214.66073608398438, "logps/rejected": -206.3551025390625, "loss": 0.6931, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.0011901266407221556, "rewards/margins": 0.0012617750326171517, "rewards/rejected": -7.16482536518015e-05, "step": 240 }, { "epoch": 0.02, "learning_rate": 8.175277959450622e-07, "logits/chosen": -2.1673741340637207, "logits/rejected": -2.3264830112457275, "logps/chosen": -218.3331756591797, "logps/rejected": -220.8343963623047, "loss": 0.6931, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.0020930031314492226, "rewards/margins": 0.0009613169240765274, "rewards/rejected": 0.0011316860327497125, "step": 250 }, { "epoch": 0.02, "learning_rate": 8.502289077828648e-07, "logits/chosen": -2.505481004714966, "logits/rejected": -2.143810272216797, "logps/chosen": -254.39773559570312, "logps/rejected": -189.3335418701172, "loss": 0.6931, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.0027572843246161938, "rewards/margins": 0.0012943788897246122, "rewards/rejected": 0.0014629056677222252, "step": 260 }, { "epoch": 0.02, "learning_rate": 8.829300196206672e-07, "logits/chosen": -2.4226317405700684, "logits/rejected": -2.127683401107788, "logps/chosen": -246.3780975341797, "logps/rejected": -230.76318359375, "loss": 0.693, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.002432792680338025, "rewards/margins": 0.0016146197449415922, "rewards/rejected": 0.0008181730518117547, "step": 270 }, { "epoch": 0.02, "learning_rate": 9.156311314584696e-07, "logits/chosen": -2.3078713417053223, "logits/rejected": -2.1943423748016357, "logps/chosen": -159.91470336914062, "logps/rejected": -146.4855194091797, "loss": 0.6931, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": 0.0022737388499081135, "rewards/margins": 0.001040069735608995, "rewards/rejected": 0.0012336688814684749, "step": 280 }, { "epoch": 0.02, "learning_rate": 9.483322432962722e-07, "logits/chosen": -2.5546011924743652, "logits/rejected": -2.160224199295044, "logps/chosen": -282.1301574707031, "logps/rejected": -225.6984100341797, "loss": 0.6931, "rewards/accuracies": 0.625, "rewards/chosen": 0.0030486714094877243, "rewards/margins": 0.0010619161184877157, "rewards/rejected": 0.0019867552910000086, "step": 290 }, { "epoch": 0.02, "learning_rate": 9.810333551340747e-07, "logits/chosen": -2.3406269550323486, "logits/rejected": -2.196272611618042, "logps/chosen": -264.9823913574219, "logps/rejected": -238.93710327148438, "loss": 0.6929, "rewards/accuracies": 0.625, "rewards/chosen": 0.0036772601306438446, "rewards/margins": 0.0026040554512292147, "rewards/rejected": 0.0010732045629993081, "step": 300 }, { "epoch": 0.02, "eval_logits/chosen": -2.3525564670562744, "eval_logits/rejected": -2.1640830039978027, "eval_logps/chosen": -228.56967163085938, "eval_logps/rejected": -209.95509338378906, "eval_loss": 0.6930469870567322, "eval_rewards/accuracies": 0.5809999704360962, "eval_rewards/chosen": 0.0034352699294686317, "eval_rewards/margins": 0.0017784537049010396, "eval_rewards/rejected": 0.0016568164573982358, "eval_runtime": 715.1216, "eval_samples_per_second": 2.797, "eval_steps_per_second": 1.398, "step": 300 }, { "epoch": 0.02, "learning_rate": 1.0137344669718771e-06, "logits/chosen": -2.3511688709259033, "logits/rejected": -2.3660411834716797, "logps/chosen": -166.951171875, "logps/rejected": -156.00137329101562, "loss": 0.6931, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.0025391627568751574, "rewards/margins": 0.0007007948006503284, "rewards/rejected": 0.0018383677816018462, "step": 310 }, { "epoch": 0.02, "learning_rate": 1.0464355788096796e-06, "logits/chosen": -2.454301357269287, "logits/rejected": -2.059354782104492, "logps/chosen": -221.11233520507812, "logps/rejected": -192.06488037109375, "loss": 0.693, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.004052783362567425, "rewards/margins": 0.001703445566818118, "rewards/rejected": 0.002349337562918663, "step": 320 }, { "epoch": 0.02, "learning_rate": 1.079136690647482e-06, "logits/chosen": -2.4363083839416504, "logits/rejected": -2.2041573524475098, "logps/chosen": -203.31265258789062, "logps/rejected": -175.83804321289062, "loss": 0.693, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.005098625086247921, "rewards/margins": 0.002672579139471054, "rewards/rejected": 0.0024260464124381542, "step": 330 }, { "epoch": 0.02, "learning_rate": 1.1118378024852844e-06, "logits/chosen": -2.1864969730377197, "logits/rejected": -2.3531885147094727, "logps/chosen": -150.56375122070312, "logps/rejected": -177.64382934570312, "loss": 0.6931, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": 0.0031506356317549944, "rewards/margins": -0.0003627676342148334, "rewards/rejected": 0.00351340277120471, "step": 340 }, { "epoch": 0.02, "learning_rate": 1.144538914323087e-06, "logits/chosen": -2.4262728691101074, "logits/rejected": -1.988315224647522, "logps/chosen": -317.635009765625, "logps/rejected": -248.2708740234375, "loss": 0.693, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.004475675523281097, "rewards/margins": 0.0034824323374778032, "rewards/rejected": 0.0009932438842952251, "step": 350 }, { "epoch": 0.02, "learning_rate": 1.1772400261608895e-06, "logits/chosen": -2.4900474548339844, "logits/rejected": -2.1962995529174805, "logps/chosen": -219.6117401123047, "logps/rejected": -192.6786346435547, "loss": 0.6929, "rewards/accuracies": 0.625, "rewards/chosen": 0.005868288688361645, "rewards/margins": 0.003231339855119586, "rewards/rejected": 0.0026369483675807714, "step": 360 }, { "epoch": 0.02, "learning_rate": 1.2099411379986922e-06, "logits/chosen": -2.169987440109253, "logits/rejected": -2.2462120056152344, "logps/chosen": -191.8892822265625, "logps/rejected": -206.02182006835938, "loss": 0.6931, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": 0.005607450846582651, "rewards/margins": 0.0013648418243974447, "rewards/rejected": 0.004242608789354563, "step": 370 }, { "epoch": 0.02, "learning_rate": 1.2426422498364946e-06, "logits/chosen": -2.330944538116455, "logits/rejected": -2.042590379714966, "logps/chosen": -215.90518188476562, "logps/rejected": -162.1782989501953, "loss": 0.693, "rewards/accuracies": 0.625, "rewards/chosen": 0.005743044428527355, "rewards/margins": 0.0038504921831190586, "rewards/rejected": 0.0018925521289929748, "step": 380 }, { "epoch": 0.03, "learning_rate": 1.2753433616742968e-06, "logits/chosen": -2.329392910003662, "logits/rejected": -2.248897075653076, "logps/chosen": -181.150634765625, "logps/rejected": -244.83511352539062, "loss": 0.6929, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.004150368273258209, "rewards/margins": 0.003002329496666789, "rewards/rejected": 0.001148038893006742, "step": 390 }, { "epoch": 0.03, "learning_rate": 1.3080444735120995e-06, "logits/chosen": -2.488135576248169, "logits/rejected": -2.11959171295166, "logps/chosen": -219.4470672607422, "logps/rejected": -179.94998168945312, "loss": 0.6929, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.006045623682439327, "rewards/margins": 0.005753137171268463, "rewards/rejected": 0.00029248674400150776, "step": 400 }, { "epoch": 0.03, "eval_logits/chosen": -2.3535032272338867, "eval_logits/rejected": -2.1649465560913086, "eval_logps/chosen": -226.90525817871094, "eval_logps/rejected": -210.06015014648438, "eval_loss": 0.6929447054862976, "eval_rewards/accuracies": 0.5950000286102295, "eval_rewards/chosen": 0.0050996895879507065, "eval_rewards/margins": 0.0035479466896504164, "eval_rewards/rejected": 0.001551742316223681, "eval_runtime": 713.0255, "eval_samples_per_second": 2.805, "eval_steps_per_second": 1.402, "step": 400 }, { "epoch": 0.03, "learning_rate": 1.3407455853499021e-06, "logits/chosen": -2.4732093811035156, "logits/rejected": -2.3230807781219482, "logps/chosen": -256.2766418457031, "logps/rejected": -224.01925659179688, "loss": 0.6929, "rewards/accuracies": 0.625, "rewards/chosen": 0.006076619029045105, "rewards/margins": 0.004139014054089785, "rewards/rejected": 0.0019376047421246767, "step": 410 }, { "epoch": 0.03, "learning_rate": 1.3734466971877046e-06, "logits/chosen": -2.2999634742736816, "logits/rejected": -2.199373960494995, "logps/chosen": -176.3344268798828, "logps/rejected": -173.9598388671875, "loss": 0.6929, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.00558522017672658, "rewards/margins": 0.005094148684293032, "rewards/rejected": 0.000491071492433548, "step": 420 }, { "epoch": 0.03, "learning_rate": 1.406147809025507e-06, "logits/chosen": -2.291569709777832, "logits/rejected": -2.1040902137756348, "logps/chosen": -209.72128295898438, "logps/rejected": -182.81837463378906, "loss": 0.6927, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.005425966810435057, "rewards/margins": 0.005596494302153587, "rewards/rejected": -0.00017052698240149766, "step": 430 }, { "epoch": 0.03, "learning_rate": 1.4388489208633094e-06, "logits/chosen": -2.3915810585021973, "logits/rejected": -2.1051740646362305, "logps/chosen": -250.69534301757812, "logps/rejected": -223.9762725830078, "loss": 0.693, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.0038019181229174137, "rewards/margins": 0.003192658070474863, "rewards/rejected": 0.0006092601688578725, "step": 440 }, { "epoch": 0.03, "learning_rate": 1.471550032701112e-06, "logits/chosen": -2.408799648284912, "logits/rejected": -2.205817937850952, "logps/chosen": -185.5984344482422, "logps/rejected": -198.32369995117188, "loss": 0.6926, "rewards/accuracies": 0.5, "rewards/chosen": 0.004049423150718212, "rewards/margins": 0.006923851557075977, "rewards/rejected": -0.002874427940696478, "step": 450 }, { "epoch": 0.03, "learning_rate": 1.5042511445389143e-06, "logits/chosen": -2.153529644012451, "logits/rejected": -2.2015368938446045, "logps/chosen": -147.46295166015625, "logps/rejected": -214.2803955078125, "loss": 0.6926, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.0034770288038998842, "rewards/margins": 0.0033914081286638975, "rewards/rejected": 8.562016591895372e-05, "step": 460 }, { "epoch": 0.03, "learning_rate": 1.536952256376717e-06, "logits/chosen": -2.0425426959991455, "logits/rejected": -2.104628324508667, "logps/chosen": -199.62979125976562, "logps/rejected": -254.56204223632812, "loss": 0.6925, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.001300838659517467, "rewards/margins": 0.009368222206830978, "rewards/rejected": -0.008067382499575615, "step": 470 }, { "epoch": 0.03, "learning_rate": 1.5696533682145194e-06, "logits/chosen": -2.432100296020508, "logits/rejected": -2.2004401683807373, "logps/chosen": -197.1683349609375, "logps/rejected": -171.0287322998047, "loss": 0.6928, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.012921568937599659, "rewards/margins": 0.004919148050248623, "rewards/rejected": -0.017840716987848282, "step": 480 }, { "epoch": 0.03, "learning_rate": 1.602354480052322e-06, "logits/chosen": -2.4052891731262207, "logits/rejected": -2.315910577774048, "logps/chosen": -282.2193603515625, "logps/rejected": -234.4072265625, "loss": 0.6927, "rewards/accuracies": 0.625, "rewards/chosen": -0.015579608269035816, "rewards/margins": 0.007139542605727911, "rewards/rejected": -0.02271914854645729, "step": 490 }, { "epoch": 0.03, "learning_rate": 1.6350555918901245e-06, "logits/chosen": -2.205134630203247, "logits/rejected": -1.87628173828125, "logps/chosen": -231.80007934570312, "logps/rejected": -226.1451416015625, "loss": 0.6927, "rewards/accuracies": 0.625, "rewards/chosen": -0.013730937615036964, "rewards/margins": 0.008888588286936283, "rewards/rejected": -0.022619523108005524, "step": 500 }, { "epoch": 0.03, "eval_logits/chosen": -2.345468044281006, "eval_logits/rejected": -2.1572470664978027, "eval_logps/chosen": -243.40798950195312, "eval_logps/rejected": -230.73947143554688, "eval_loss": 0.6927248239517212, "eval_rewards/accuracies": 0.6065000295639038, "eval_rewards/chosen": -0.011403032578527927, "eval_rewards/margins": 0.007724526803940535, "eval_rewards/rejected": -0.019127558916807175, "eval_runtime": 712.1411, "eval_samples_per_second": 2.808, "eval_steps_per_second": 1.404, "step": 500 }, { "epoch": 0.03, "learning_rate": 1.6677567037279269e-06, "logits/chosen": -2.485083818435669, "logits/rejected": -2.1355221271514893, "logps/chosen": -302.66400146484375, "logps/rejected": -273.6127014160156, "loss": 0.6929, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.006400348152965307, "rewards/margins": 0.011333698406815529, "rewards/rejected": -0.017734047025442123, "step": 510 }, { "epoch": 0.03, "learning_rate": 1.7004578155657295e-06, "logits/chosen": -2.294008493423462, "logits/rejected": -2.3222975730895996, "logps/chosen": -209.5379180908203, "logps/rejected": -202.2738037109375, "loss": 0.6925, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.0028185707051306963, "rewards/margins": 0.013156527653336525, "rewards/rejected": -0.015975097194314003, "step": 520 }, { "epoch": 0.03, "learning_rate": 1.7331589274035318e-06, "logits/chosen": -2.1821720600128174, "logits/rejected": -2.061917781829834, "logps/chosen": -188.24545288085938, "logps/rejected": -200.89163208007812, "loss": 0.6927, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.00431888597086072, "rewards/margins": 0.013200417160987854, "rewards/rejected": -0.017519304528832436, "step": 530 }, { "epoch": 0.04, "learning_rate": 1.7658600392413344e-06, "logits/chosen": -2.4073173999786377, "logits/rejected": -2.246474504470825, "logps/chosen": -201.97555541992188, "logps/rejected": -198.34249877929688, "loss": 0.6926, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.0008798660710453987, "rewards/margins": 0.013736550696194172, "rewards/rejected": -0.012856684625148773, "step": 540 }, { "epoch": 0.04, "learning_rate": 1.7985611510791368e-06, "logits/chosen": -2.404886245727539, "logits/rejected": -1.9447847604751587, "logps/chosen": -262.2571716308594, "logps/rejected": -263.9870910644531, "loss": 0.6925, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.0012924910988658667, "rewards/margins": 0.018118366599082947, "rewards/rejected": -0.016825873404741287, "step": 550 }, { "epoch": 0.04, "learning_rate": 1.8312622629169393e-06, "logits/chosen": -2.3182451725006104, "logits/rejected": -2.1790592670440674, "logps/chosen": -268.5740661621094, "logps/rejected": -243.84616088867188, "loss": 0.6925, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.00511964363977313, "rewards/margins": 0.009792742319405079, "rewards/rejected": -0.014912387356162071, "step": 560 }, { "epoch": 0.04, "learning_rate": 1.8639633747547417e-06, "logits/chosen": -2.3926608562469482, "logits/rejected": -2.2314422130584717, "logps/chosen": -234.6009979248047, "logps/rejected": -213.1090545654297, "loss": 0.6923, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.005455879960209131, "rewards/margins": 0.0141448974609375, "rewards/rejected": -0.019600778818130493, "step": 570 }, { "epoch": 0.04, "learning_rate": 1.8966644865925443e-06, "logits/chosen": -2.227658748626709, "logits/rejected": -2.256025791168213, "logps/chosen": -261.0607604980469, "logps/rejected": -281.42657470703125, "loss": 0.6928, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.02185194566845894, "rewards/margins": 0.008971361443400383, "rewards/rejected": -0.03082330897450447, "step": 580 }, { "epoch": 0.04, "learning_rate": 1.9293655984303466e-06, "logits/chosen": -2.6404712200164795, "logits/rejected": -2.2230751514434814, "logps/chosen": -301.16851806640625, "logps/rejected": -236.12405395507812, "loss": 0.6924, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.01820230856537819, "rewards/margins": 0.01796017773449421, "rewards/rejected": -0.03616248816251755, "step": 590 }, { "epoch": 0.04, "learning_rate": 1.9620667102681494e-06, "logits/chosen": -2.3834471702575684, "logits/rejected": -2.3926730155944824, "logps/chosen": -196.3507080078125, "logps/rejected": -190.89453125, "loss": 0.6924, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.0070982701145112514, "rewards/margins": 0.008864415809512138, "rewards/rejected": -0.015962684527039528, "step": 600 }, { "epoch": 0.04, "eval_logits/chosen": -2.34063458442688, "eval_logits/rejected": -2.152348279953003, "eval_logps/chosen": -240.43104553222656, "eval_logps/rejected": -234.4803924560547, "eval_loss": 0.6923562288284302, "eval_rewards/accuracies": 0.6110000014305115, "eval_rewards/chosen": -0.008426105603575706, "eval_rewards/margins": 0.014442377723753452, "eval_rewards/rejected": -0.022868484258651733, "eval_runtime": 713.9601, "eval_samples_per_second": 2.801, "eval_steps_per_second": 1.401, "step": 600 }, { "epoch": 0.04, "learning_rate": 1.994767822105952e-06, "logits/chosen": -2.2903950214385986, "logits/rejected": -2.190324306488037, "logps/chosen": -187.91201782226562, "logps/rejected": -209.1902618408203, "loss": 0.6917, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.012708373367786407, "rewards/margins": 0.014843207783997059, "rewards/rejected": -0.02755158208310604, "step": 610 }, { "epoch": 0.04, "learning_rate": 2.0274689339437543e-06, "logits/chosen": -2.1997437477111816, "logits/rejected": -1.9912179708480835, "logps/chosen": -294.8075256347656, "logps/rejected": -275.75335693359375, "loss": 0.6924, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.0161177609115839, "rewards/margins": 0.01630011573433876, "rewards/rejected": -0.03241787850856781, "step": 620 }, { "epoch": 0.04, "learning_rate": 2.0601700457815567e-06, "logits/chosen": -2.3434805870056152, "logits/rejected": -1.981257677078247, "logps/chosen": -297.05657958984375, "logps/rejected": -263.90301513671875, "loss": 0.6926, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.03770185634493828, "rewards/margins": 0.010359613224864006, "rewards/rejected": -0.048061467707157135, "step": 630 }, { "epoch": 0.04, "learning_rate": 2.092871157619359e-06, "logits/chosen": -2.390469789505005, "logits/rejected": -2.209237575531006, "logps/chosen": -203.32533264160156, "logps/rejected": -230.1988983154297, "loss": 0.6927, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.026663165539503098, "rewards/margins": 0.013162101618945599, "rewards/rejected": -0.039825260639190674, "step": 640 }, { "epoch": 0.04, "learning_rate": 2.1255722694571616e-06, "logits/chosen": -2.4085137844085693, "logits/rejected": -2.0080697536468506, "logps/chosen": -306.5794982910156, "logps/rejected": -235.6829071044922, "loss": 0.6925, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.027684833854436874, "rewards/margins": 0.022071022540330887, "rewards/rejected": -0.04975585266947746, "step": 650 }, { "epoch": 0.04, "learning_rate": 2.158273381294964e-06, "logits/chosen": -2.317622184753418, "logits/rejected": -2.2132556438446045, "logps/chosen": -252.6199188232422, "logps/rejected": -264.3005065917969, "loss": 0.6931, "rewards/accuracies": 0.42500001192092896, "rewards/chosen": -0.04019295051693916, "rewards/margins": 0.006169079802930355, "rewards/rejected": -0.046362027525901794, "step": 660 }, { "epoch": 0.04, "learning_rate": 2.190974493132767e-06, "logits/chosen": -2.3021767139434814, "logits/rejected": -1.9944665431976318, "logps/chosen": -284.7574157714844, "logps/rejected": -243.4713897705078, "loss": 0.6927, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.047078341245651245, "rewards/margins": 0.014970744028687477, "rewards/rejected": -0.06204908341169357, "step": 670 }, { "epoch": 0.04, "learning_rate": 2.223675604970569e-06, "logits/chosen": -2.3567278385162354, "logits/rejected": -2.1560585498809814, "logps/chosen": -256.2400817871094, "logps/rejected": -254.849609375, "loss": 0.6923, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.0455058254301548, "rewards/margins": 0.015939798206090927, "rewards/rejected": -0.06144562363624573, "step": 680 }, { "epoch": 0.05, "learning_rate": 2.2563767168083718e-06, "logits/chosen": -2.4585578441619873, "logits/rejected": -1.9678945541381836, "logps/chosen": -281.6275634765625, "logps/rejected": -244.1110076904297, "loss": 0.6919, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.024375787004828453, "rewards/margins": 0.02551574632525444, "rewards/rejected": -0.04989153519272804, "step": 690 }, { "epoch": 0.05, "learning_rate": 2.289077828646174e-06, "logits/chosen": -2.2911694049835205, "logits/rejected": -2.062880039215088, "logps/chosen": -273.7863464355469, "logps/rejected": -258.31988525390625, "loss": 0.6928, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.034725360572338104, "rewards/margins": 0.009239157661795616, "rewards/rejected": -0.04396451264619827, "step": 700 }, { "epoch": 0.05, "eval_logits/chosen": -2.32228422164917, "eval_logits/rejected": -2.1351630687713623, "eval_logps/chosen": -255.19515991210938, "eval_logps/rejected": -253.87286376953125, "eval_loss": 0.6921648979187012, "eval_rewards/accuracies": 0.609000027179718, "eval_rewards/chosen": -0.023190179839730263, "eval_rewards/margins": 0.01907077431678772, "eval_rewards/rejected": -0.04226095601916313, "eval_runtime": 712.4113, "eval_samples_per_second": 2.807, "eval_steps_per_second": 1.404, "step": 700 }, { "epoch": 0.05, "learning_rate": 2.3217789404839766e-06, "logits/chosen": -2.1814942359924316, "logits/rejected": -2.2622792720794678, "logps/chosen": -181.82546997070312, "logps/rejected": -238.5260772705078, "loss": 0.6924, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.024994343519210815, "rewards/margins": 0.00975788477808237, "rewards/rejected": -0.03475222736597061, "step": 710 }, { "epoch": 0.05, "learning_rate": 2.354480052321779e-06, "logits/chosen": -2.5221304893493652, "logits/rejected": -2.090299129486084, "logps/chosen": -269.60504150390625, "logps/rejected": -239.1893310546875, "loss": 0.6912, "rewards/accuracies": 0.625, "rewards/chosen": -0.011215592734515667, "rewards/margins": 0.029225636273622513, "rewards/rejected": -0.040441226214170456, "step": 720 }, { "epoch": 0.05, "learning_rate": 2.3871811641595815e-06, "logits/chosen": -2.3538801670074463, "logits/rejected": -2.1702470779418945, "logps/chosen": -261.742431640625, "logps/rejected": -221.083740234375, "loss": 0.6924, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.014950007200241089, "rewards/margins": 0.01799718663096428, "rewards/rejected": -0.03294719010591507, "step": 730 }, { "epoch": 0.05, "learning_rate": 2.4198822759973843e-06, "logits/chosen": -2.1946702003479004, "logits/rejected": -2.2114098072052, "logps/chosen": -207.88638305664062, "logps/rejected": -237.3756103515625, "loss": 0.6921, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.017280325293540955, "rewards/margins": 0.015556697733700275, "rewards/rejected": -0.032837022095918655, "step": 740 }, { "epoch": 0.05, "learning_rate": 2.4525833878351864e-06, "logits/chosen": -2.463273763656616, "logits/rejected": -2.261465311050415, "logps/chosen": -264.2398376464844, "logps/rejected": -214.43508911132812, "loss": 0.6923, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.011438943445682526, "rewards/margins": 0.016475234180688858, "rewards/rejected": -0.027914175763726234, "step": 750 }, { "epoch": 0.05, "learning_rate": 2.4852844996729892e-06, "logits/chosen": -2.2060532569885254, "logits/rejected": -2.109431743621826, "logps/chosen": -258.8271789550781, "logps/rejected": -292.1606140136719, "loss": 0.691, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.019702842459082603, "rewards/margins": 0.031875334680080414, "rewards/rejected": -0.05157817527651787, "step": 760 }, { "epoch": 0.05, "learning_rate": 2.5179856115107916e-06, "logits/chosen": -2.3242106437683105, "logits/rejected": -2.00541090965271, "logps/chosen": -298.7543640136719, "logps/rejected": -269.37701416015625, "loss": 0.6914, "rewards/accuracies": 0.625, "rewards/chosen": -0.025258159264922142, "rewards/margins": 0.024570953100919724, "rewards/rejected": -0.049829110503196716, "step": 770 }, { "epoch": 0.05, "learning_rate": 2.5506867233485937e-06, "logits/chosen": -2.4023263454437256, "logits/rejected": -1.9835220575332642, "logps/chosen": -288.435546875, "logps/rejected": -286.0493469238281, "loss": 0.6913, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.022739596664905548, "rewards/margins": 0.05511244013905525, "rewards/rejected": -0.0778520405292511, "step": 780 }, { "epoch": 0.05, "learning_rate": 2.5833878351863965e-06, "logits/chosen": -2.462794780731201, "logits/rejected": -2.393866777420044, "logps/chosen": -284.49871826171875, "logps/rejected": -318.21240234375, "loss": 0.6913, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.032500773668289185, "rewards/margins": 0.0363340824842453, "rewards/rejected": -0.06883485615253448, "step": 790 }, { "epoch": 0.05, "learning_rate": 2.616088947024199e-06, "logits/chosen": -2.1470065116882324, "logits/rejected": -2.3199901580810547, "logps/chosen": -244.1119842529297, "logps/rejected": -297.1820068359375, "loss": 0.6918, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.04762252792716026, "rewards/margins": 0.02854365110397339, "rewards/rejected": -0.07616618275642395, "step": 800 }, { "epoch": 0.05, "eval_logits/chosen": -2.32397723197937, "eval_logits/rejected": -2.135841131210327, "eval_logps/chosen": -289.2698669433594, "eval_logps/rejected": -303.1002502441406, "eval_loss": 0.6920146346092224, "eval_rewards/accuracies": 0.597000002861023, "eval_rewards/chosen": -0.05726493149995804, "eval_rewards/margins": 0.03422345593571663, "eval_rewards/rejected": -0.09148839116096497, "eval_runtime": 711.5428, "eval_samples_per_second": 2.811, "eval_steps_per_second": 1.405, "step": 800 }, { "epoch": 0.05, "learning_rate": 2.6487900588620014e-06, "logits/chosen": -2.1001205444335938, "logits/rejected": -1.8540256023406982, "logps/chosen": -248.87161254882812, "logps/rejected": -222.15377807617188, "loss": 0.6933, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.04875599965453148, "rewards/margins": 0.015534071251749992, "rewards/rejected": -0.06429006904363632, "step": 810 }, { "epoch": 0.05, "learning_rate": 2.6814911706998042e-06, "logits/chosen": -2.2964351177215576, "logits/rejected": -2.1176650524139404, "logps/chosen": -270.25018310546875, "logps/rejected": -257.04241943359375, "loss": 0.6931, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.044910985976457596, "rewards/margins": 0.017546221613883972, "rewards/rejected": -0.06245720386505127, "step": 820 }, { "epoch": 0.05, "learning_rate": 2.7141922825376067e-06, "logits/chosen": -2.1903445720672607, "logits/rejected": -2.1868643760681152, "logps/chosen": -296.24957275390625, "logps/rejected": -342.7115173339844, "loss": 0.6921, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.038325462490320206, "rewards/margins": 0.03653097525238991, "rewards/rejected": -0.07485643774271011, "step": 830 }, { "epoch": 0.05, "learning_rate": 2.746893394375409e-06, "logits/chosen": -2.328739881515503, "logits/rejected": -2.20552659034729, "logps/chosen": -264.1017150878906, "logps/rejected": -289.73760986328125, "loss": 0.692, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.029707293957471848, "rewards/margins": 0.02741720899939537, "rewards/rejected": -0.05712450295686722, "step": 840 }, { "epoch": 0.06, "learning_rate": 2.779594506213211e-06, "logits/chosen": -2.208885669708252, "logits/rejected": -1.9660180807113647, "logps/chosen": -225.4889373779297, "logps/rejected": -240.0717010498047, "loss": 0.6926, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.03563004359602928, "rewards/margins": 0.014896047301590443, "rewards/rejected": -0.05052609369158745, "step": 850 }, { "epoch": 0.06, "learning_rate": 2.812295618051014e-06, "logits/chosen": -2.3775460720062256, "logits/rejected": -2.2183637619018555, "logps/chosen": -306.49853515625, "logps/rejected": -272.1231994628906, "loss": 0.692, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.02553737163543701, "rewards/margins": 0.027351820841431618, "rewards/rejected": -0.05288919061422348, "step": 860 }, { "epoch": 0.06, "learning_rate": 2.8449967298888164e-06, "logits/chosen": -2.2363040447235107, "logits/rejected": -2.031376361846924, "logps/chosen": -247.669189453125, "logps/rejected": -234.5207977294922, "loss": 0.6928, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.07084518671035767, "rewards/margins": 0.014910000376403332, "rewards/rejected": -0.08575518429279327, "step": 870 }, { "epoch": 0.06, "learning_rate": 2.877697841726619e-06, "logits/chosen": -2.305286407470703, "logits/rejected": -2.2096505165100098, "logps/chosen": -266.10638427734375, "logps/rejected": -259.68817138671875, "loss": 0.6929, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.04727055877447128, "rewards/margins": 0.01039662305265665, "rewards/rejected": -0.05766718462109566, "step": 880 }, { "epoch": 0.06, "learning_rate": 2.9103989535644217e-06, "logits/chosen": -2.241441488265991, "logits/rejected": -2.3640246391296387, "logps/chosen": -255.78543090820312, "logps/rejected": -301.43780517578125, "loss": 0.6927, "rewards/accuracies": 0.625, "rewards/chosen": -0.04030177742242813, "rewards/margins": 0.015121949836611748, "rewards/rejected": -0.05542372912168503, "step": 890 }, { "epoch": 0.06, "learning_rate": 2.943100065402224e-06, "logits/chosen": -2.3066906929016113, "logits/rejected": -2.156431198120117, "logps/chosen": -330.0362243652344, "logps/rejected": -328.26934814453125, "loss": 0.6928, "rewards/accuracies": 0.625, "rewards/chosen": -0.03739301115274429, "rewards/margins": 0.013549859635531902, "rewards/rejected": -0.05094286799430847, "step": 900 }, { "epoch": 0.06, "eval_logits/chosen": -2.281608819961548, "eval_logits/rejected": -2.0968432426452637, "eval_logps/chosen": -253.80685424804688, "eval_logps/rejected": -254.93124389648438, "eval_loss": 0.6919333934783936, "eval_rewards/accuracies": 0.6050000190734863, "eval_rewards/chosen": -0.021801894530653954, "eval_rewards/margins": 0.02151745744049549, "eval_rewards/rejected": -0.04331935569643974, "eval_runtime": 713.366, "eval_samples_per_second": 2.804, "eval_steps_per_second": 1.402, "step": 900 }, { "epoch": 0.06, "learning_rate": 2.9758011772400266e-06, "logits/chosen": -2.219975471496582, "logits/rejected": -2.2243573665618896, "logps/chosen": -280.44561767578125, "logps/rejected": -302.7583923339844, "loss": 0.6926, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.023136485368013382, "rewards/margins": 0.01493816263973713, "rewards/rejected": -0.03807464614510536, "step": 910 }, { "epoch": 0.06, "learning_rate": 3.0085022890778286e-06, "logits/chosen": -2.263059139251709, "logits/rejected": -2.013516902923584, "logps/chosen": -195.3144989013672, "logps/rejected": -194.3135986328125, "loss": 0.6914, "rewards/accuracies": 0.625, "rewards/chosen": -0.01957494579255581, "rewards/margins": 0.025922566652297974, "rewards/rejected": -0.04549751058220863, "step": 920 }, { "epoch": 0.06, "learning_rate": 3.0412034009156314e-06, "logits/chosen": -2.2070722579956055, "logits/rejected": -2.362426280975342, "logps/chosen": -268.68743896484375, "logps/rejected": -294.49395751953125, "loss": 0.6918, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.03300117701292038, "rewards/margins": 0.030068224295973778, "rewards/rejected": -0.0630694031715393, "step": 930 }, { "epoch": 0.06, "learning_rate": 3.073904512753434e-06, "logits/chosen": -2.342238426208496, "logits/rejected": -1.9886445999145508, "logps/chosen": -268.29779052734375, "logps/rejected": -282.3440856933594, "loss": 0.6907, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.038649190217256546, "rewards/margins": 0.04897458851337433, "rewards/rejected": -0.08762378245592117, "step": 940 }, { "epoch": 0.06, "learning_rate": 3.1066056245912363e-06, "logits/chosen": -2.201458692550659, "logits/rejected": -2.262294292449951, "logps/chosen": -284.0932312011719, "logps/rejected": -297.143310546875, "loss": 0.6918, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.05310920998454094, "rewards/margins": 0.04500911384820938, "rewards/rejected": -0.09811832755804062, "step": 950 }, { "epoch": 0.06, "learning_rate": 3.1393067364290387e-06, "logits/chosen": -2.1673567295074463, "logits/rejected": -1.9466733932495117, "logps/chosen": -276.6808166503906, "logps/rejected": -281.02276611328125, "loss": 0.6926, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.05155404657125473, "rewards/margins": 0.0429275706410408, "rewards/rejected": -0.09448162466287613, "step": 960 }, { "epoch": 0.06, "learning_rate": 3.1720078482668416e-06, "logits/chosen": -2.082993984222412, "logits/rejected": -1.858926773071289, "logps/chosen": -253.553466796875, "logps/rejected": -248.26535034179688, "loss": 0.6915, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.04198281094431877, "rewards/margins": 0.0418187752366066, "rewards/rejected": -0.08380158245563507, "step": 970 }, { "epoch": 0.06, "learning_rate": 3.204708960104644e-06, "logits/chosen": -2.074291706085205, "logits/rejected": -1.7586740255355835, "logps/chosen": -243.01431274414062, "logps/rejected": -252.8297882080078, "loss": 0.6908, "rewards/accuracies": 0.625, "rewards/chosen": -0.025611573830246925, "rewards/margins": 0.042403195053339005, "rewards/rejected": -0.06801476329565048, "step": 980 }, { "epoch": 0.06, "learning_rate": 3.237410071942446e-06, "logits/chosen": -2.0311131477355957, "logits/rejected": -1.7820078134536743, "logps/chosen": -258.5697326660156, "logps/rejected": -232.48477172851562, "loss": 0.693, "rewards/accuracies": 0.5, "rewards/chosen": -0.05220867320895195, "rewards/margins": 0.007478843443095684, "rewards/rejected": -0.05968751758337021, "step": 990 }, { "epoch": 0.07, "learning_rate": 3.270111183780249e-06, "logits/chosen": -1.8629153966903687, "logits/rejected": -1.6702207326889038, "logps/chosen": -262.003662109375, "logps/rejected": -276.80450439453125, "loss": 0.6908, "rewards/accuracies": 0.625, "rewards/chosen": -0.061980120837688446, "rewards/margins": 0.03280384838581085, "rewards/rejected": -0.0947839766740799, "step": 1000 }, { "epoch": 0.07, "eval_logits/chosen": -1.7917609214782715, "eval_logits/rejected": -1.6315906047821045, "eval_logps/chosen": -308.50732421875, "eval_logps/rejected": -327.3461608886719, "eval_loss": 0.6914514899253845, "eval_rewards/accuracies": 0.6019999980926514, "eval_rewards/chosen": -0.07650233805179596, "eval_rewards/margins": 0.03923192247748375, "eval_rewards/rejected": -0.1157342717051506, "eval_runtime": 713.4479, "eval_samples_per_second": 2.803, "eval_steps_per_second": 1.402, "step": 1000 }, { "epoch": 0.07, "learning_rate": 3.3028122956180513e-06, "logits/chosen": -1.580582857131958, "logits/rejected": -1.674863576889038, "logps/chosen": -324.74774169921875, "logps/rejected": -371.5903625488281, "loss": 0.692, "rewards/accuracies": 0.5, "rewards/chosen": -0.1126498356461525, "rewards/margins": 0.01937369629740715, "rewards/rejected": -0.13202352821826935, "step": 1010 }, { "epoch": 0.07, "learning_rate": 3.3355134074558538e-06, "logits/chosen": -1.5238564014434814, "logits/rejected": -1.4265750646591187, "logps/chosen": -362.0272521972656, "logps/rejected": -369.1707458496094, "loss": 0.6928, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.14129449427127838, "rewards/margins": 0.032731056213378906, "rewards/rejected": -0.1740255504846573, "step": 1020 }, { "epoch": 0.07, "learning_rate": 3.368214519293656e-06, "logits/chosen": -1.5193853378295898, "logits/rejected": -1.2484958171844482, "logps/chosen": -309.3878173828125, "logps/rejected": -320.7041931152344, "loss": 0.6917, "rewards/accuracies": 0.625, "rewards/chosen": -0.11041752249002457, "rewards/margins": 0.03221488744020462, "rewards/rejected": -0.1426324099302292, "step": 1030 }, { "epoch": 0.07, "learning_rate": 3.400915631131459e-06, "logits/chosen": -1.4544426202774048, "logits/rejected": -1.4647828340530396, "logps/chosen": -321.44378662109375, "logps/rejected": -404.62298583984375, "loss": 0.6901, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.12347130477428436, "rewards/margins": 0.0545278862118721, "rewards/rejected": -0.17799919843673706, "step": 1040 }, { "epoch": 0.07, "learning_rate": 3.4336167429692615e-06, "logits/chosen": -1.6467136144638062, "logits/rejected": -1.4343664646148682, "logps/chosen": -333.1053771972656, "logps/rejected": -353.6955261230469, "loss": 0.6907, "rewards/accuracies": 0.75, "rewards/chosen": -0.1185549646615982, "rewards/margins": 0.03684395179152489, "rewards/rejected": -0.1553989052772522, "step": 1050 }, { "epoch": 0.07, "learning_rate": 3.4663178548070635e-06, "logits/chosen": -1.5289983749389648, "logits/rejected": -1.5519436597824097, "logps/chosen": -329.55096435546875, "logps/rejected": -367.48284912109375, "loss": 0.6907, "rewards/accuracies": 0.5, "rewards/chosen": -0.11459700018167496, "rewards/margins": 0.05437355488538742, "rewards/rejected": -0.16897056996822357, "step": 1060 }, { "epoch": 0.07, "learning_rate": 3.499018966644866e-06, "logits/chosen": -1.6656230688095093, "logits/rejected": -1.5398905277252197, "logps/chosen": -341.64501953125, "logps/rejected": -362.2322998046875, "loss": 0.692, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.1498199999332428, "rewards/margins": 0.027399664744734764, "rewards/rejected": -0.17721965909004211, "step": 1070 }, { "epoch": 0.07, "learning_rate": 3.531720078482669e-06, "logits/chosen": -1.6908804178237915, "logits/rejected": -1.3607423305511475, "logps/chosen": -335.6886901855469, "logps/rejected": -371.15911865234375, "loss": 0.6918, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.1098189726471901, "rewards/margins": 0.046701718121767044, "rewards/rejected": -0.15652066469192505, "step": 1080 }, { "epoch": 0.07, "learning_rate": 3.5644211903204712e-06, "logits/chosen": -1.6549320220947266, "logits/rejected": -1.6474714279174805, "logps/chosen": -311.31427001953125, "logps/rejected": -344.72930908203125, "loss": 0.6921, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.1124318465590477, "rewards/margins": 0.03060152195394039, "rewards/rejected": -0.14303335547447205, "step": 1090 }, { "epoch": 0.07, "learning_rate": 3.5971223021582737e-06, "logits/chosen": -1.7676842212677002, "logits/rejected": -1.6186103820800781, "logps/chosen": -357.2384948730469, "logps/rejected": -325.09490966796875, "loss": 0.6925, "rewards/accuracies": 0.625, "rewards/chosen": -0.10143768787384033, "rewards/margins": 0.024971742182970047, "rewards/rejected": -0.12640944123268127, "step": 1100 }, { "epoch": 0.07, "eval_logits/chosen": -1.6588186025619507, "eval_logits/rejected": -1.505243182182312, "eval_logps/chosen": -304.1779479980469, "eval_logps/rejected": -315.8557434082031, "eval_loss": 0.691482424736023, "eval_rewards/accuracies": 0.6169999837875366, "eval_rewards/chosen": -0.07217301428318024, "eval_rewards/margins": 0.03207085281610489, "eval_rewards/rejected": -0.10424386709928513, "eval_runtime": 713.1225, "eval_samples_per_second": 2.805, "eval_steps_per_second": 1.402, "step": 1100 }, { "epoch": 0.07, "learning_rate": 3.6298234139960765e-06, "logits/chosen": -1.7195770740509033, "logits/rejected": -1.4194886684417725, "logps/chosen": -287.1241760253906, "logps/rejected": -260.1143798828125, "loss": 0.6911, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.0693550780415535, "rewards/margins": 0.022661572322249413, "rewards/rejected": -0.09201665967702866, "step": 1110 }, { "epoch": 0.07, "learning_rate": 3.6625245258338785e-06, "logits/chosen": -1.38588547706604, "logits/rejected": -1.2366743087768555, "logps/chosen": -342.43560791015625, "logps/rejected": -496.285400390625, "loss": 0.6867, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.10365718603134155, "rewards/margins": 0.07738915830850601, "rewards/rejected": -0.18104635179042816, "step": 1120 }, { "epoch": 0.07, "learning_rate": 3.695225637671681e-06, "logits/chosen": -1.1003705263137817, "logits/rejected": -0.993769645690918, "logps/chosen": -531.3511962890625, "logps/rejected": -548.9378662109375, "loss": 0.6899, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.32407456636428833, "rewards/margins": 0.05066270753741264, "rewards/rejected": -0.37473729252815247, "step": 1130 }, { "epoch": 0.07, "learning_rate": 3.7279267495094834e-06, "logits/chosen": -1.1593372821807861, "logits/rejected": -0.9515183568000793, "logps/chosen": -496.2530822753906, "logps/rejected": -600.5367431640625, "loss": 0.6903, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.33582615852355957, "rewards/margins": 0.09415847808122635, "rewards/rejected": -0.4299846291542053, "step": 1140 }, { "epoch": 0.08, "learning_rate": 3.7606278613472863e-06, "logits/chosen": -1.6236388683319092, "logits/rejected": -1.340681791305542, "logps/chosen": -434.57965087890625, "logps/rejected": -395.1595764160156, "loss": 0.6931, "rewards/accuracies": 0.625, "rewards/chosen": -0.15230834484100342, "rewards/margins": 0.03814804553985596, "rewards/rejected": -0.19045640528202057, "step": 1150 }, { "epoch": 0.08, "learning_rate": 3.7933289731850887e-06, "logits/chosen": -1.626577615737915, "logits/rejected": -1.435408115386963, "logps/chosen": -276.0029602050781, "logps/rejected": -292.4320068359375, "loss": 0.6902, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.04744229465723038, "rewards/margins": 0.04468740522861481, "rewards/rejected": -0.09212969243526459, "step": 1160 }, { "epoch": 0.08, "learning_rate": 3.826030085022891e-06, "logits/chosen": -1.7114953994750977, "logits/rejected": -1.4889827966690063, "logps/chosen": -336.4309997558594, "logps/rejected": -342.95623779296875, "loss": 0.6915, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.08878123760223389, "rewards/margins": 0.042677246034145355, "rewards/rejected": -0.13145849108695984, "step": 1170 }, { "epoch": 0.08, "learning_rate": 3.858731196860693e-06, "logits/chosen": -1.5804107189178467, "logits/rejected": -1.2577301263809204, "logps/chosen": -375.09869384765625, "logps/rejected": -363.1798095703125, "loss": 0.6918, "rewards/accuracies": 0.625, "rewards/chosen": -0.12426527589559555, "rewards/margins": 0.030316686257719994, "rewards/rejected": -0.1545819640159607, "step": 1180 }, { "epoch": 0.08, "learning_rate": 3.891432308698496e-06, "logits/chosen": -1.211471438407898, "logits/rejected": -1.1756963729858398, "logps/chosen": -323.86114501953125, "logps/rejected": -392.4009094238281, "loss": 0.689, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.12863077223300934, "rewards/margins": 0.04329446703195572, "rewards/rejected": -0.17192521691322327, "step": 1190 }, { "epoch": 0.08, "learning_rate": 3.924133420536299e-06, "logits/chosen": -1.2308224439620972, "logits/rejected": -0.9529396295547485, "logps/chosen": -383.98529052734375, "logps/rejected": -426.99755859375, "loss": 0.6906, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.20184054970741272, "rewards/margins": 0.07641489803791046, "rewards/rejected": -0.278255432844162, "step": 1200 }, { "epoch": 0.08, "eval_logits/chosen": -1.248422384262085, "eval_logits/rejected": -1.1078686714172363, "eval_logps/chosen": -475.6541748046875, "eval_logps/rejected": -521.3765258789062, "eval_loss": 0.6910752058029175, "eval_rewards/accuracies": 0.6154999732971191, "eval_rewards/chosen": -0.2436492145061493, "eval_rewards/margins": 0.06611540168523788, "eval_rewards/rejected": -0.3097646236419678, "eval_runtime": 712.8179, "eval_samples_per_second": 2.806, "eval_steps_per_second": 1.403, "step": 1200 }, { "epoch": 0.08, "learning_rate": 3.956834532374101e-06, "logits/chosen": -1.3935415744781494, "logits/rejected": -1.0047038793563843, "logps/chosen": -454.3526916503906, "logps/rejected": -516.9708251953125, "loss": 0.6873, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.2490900456905365, "rewards/margins": 0.10656942427158356, "rewards/rejected": -0.35565948486328125, "step": 1210 }, { "epoch": 0.08, "learning_rate": 3.989535644211904e-06, "logits/chosen": -1.3957096338272095, "logits/rejected": -1.147242546081543, "logps/chosen": -396.89385986328125, "logps/rejected": -460.62542724609375, "loss": 0.6902, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.17976659536361694, "rewards/margins": 0.0863526314496994, "rewards/rejected": -0.26611918210983276, "step": 1220 }, { "epoch": 0.08, "learning_rate": 4.022236756049706e-06, "logits/chosen": -1.575761079788208, "logits/rejected": -1.3331005573272705, "logps/chosen": -450.54547119140625, "logps/rejected": -464.44378662109375, "loss": 0.691, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.1734083592891693, "rewards/margins": 0.04637376219034195, "rewards/rejected": -0.21978211402893066, "step": 1230 }, { "epoch": 0.08, "learning_rate": 4.054937867887509e-06, "logits/chosen": -1.4819010496139526, "logits/rejected": -1.2105497121810913, "logps/chosen": -371.9152526855469, "logps/rejected": -419.6205139160156, "loss": 0.691, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.14764845371246338, "rewards/margins": 0.05849369615316391, "rewards/rejected": -0.2061421424150467, "step": 1240 }, { "epoch": 0.08, "learning_rate": 4.087638979725311e-06, "logits/chosen": -1.3985395431518555, "logits/rejected": -1.4597551822662354, "logps/chosen": -394.02911376953125, "logps/rejected": -422.7919006347656, "loss": 0.6925, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.16618932783603668, "rewards/margins": 0.02730773761868477, "rewards/rejected": -0.19349706172943115, "step": 1250 }, { "epoch": 0.08, "learning_rate": 4.1203400915631135e-06, "logits/chosen": -1.326311707496643, "logits/rejected": -1.1958644390106201, "logps/chosen": -424.20330810546875, "logps/rejected": -432.0690002441406, "loss": 0.6919, "rewards/accuracies": 0.625, "rewards/chosen": -0.18687233328819275, "rewards/margins": 0.036146439611911774, "rewards/rejected": -0.2230187952518463, "step": 1260 }, { "epoch": 0.08, "learning_rate": 4.153041203400916e-06, "logits/chosen": -1.758768081665039, "logits/rejected": -1.6330296993255615, "logps/chosen": -425.97698974609375, "logps/rejected": -421.9137268066406, "loss": 0.6925, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.16155469417572021, "rewards/margins": 0.020571332424879074, "rewards/rejected": -0.1821260154247284, "step": 1270 }, { "epoch": 0.08, "learning_rate": 4.185742315238718e-06, "logits/chosen": -1.6385771036148071, "logits/rejected": -1.3802834749221802, "logps/chosen": -348.26763916015625, "logps/rejected": -387.1038818359375, "loss": 0.6899, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.15016606450080872, "rewards/margins": 0.05070319026708603, "rewards/rejected": -0.20086923241615295, "step": 1280 }, { "epoch": 0.08, "learning_rate": 4.218443427076521e-06, "logits/chosen": -1.485682725906372, "logits/rejected": -1.4408928155899048, "logps/chosen": -411.95843505859375, "logps/rejected": -446.6746520996094, "loss": 0.6922, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.22919993102550507, "rewards/margins": 0.033245109021663666, "rewards/rejected": -0.26244503259658813, "step": 1290 }, { "epoch": 0.09, "learning_rate": 4.251144538914323e-06, "logits/chosen": -1.6215198040008545, "logits/rejected": -1.503710150718689, "logps/chosen": -448.3627014160156, "logps/rejected": -474.224853515625, "loss": 0.6901, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.22777239978313446, "rewards/margins": 0.039057932794094086, "rewards/rejected": -0.26683029532432556, "step": 1300 }, { "epoch": 0.09, "eval_logits/chosen": -1.7383815050125122, "eval_logits/rejected": -1.5767197608947754, "eval_logps/chosen": -401.8933410644531, "eval_logps/rejected": -423.51837158203125, "eval_loss": 0.6908622980117798, "eval_rewards/accuracies": 0.6169999837875366, "eval_rewards/chosen": -0.16988839209079742, "eval_rewards/margins": 0.04201807454228401, "eval_rewards/rejected": -0.21190647780895233, "eval_runtime": 713.8861, "eval_samples_per_second": 2.802, "eval_steps_per_second": 1.401, "step": 1300 }, { "epoch": 0.09, "learning_rate": 4.283845650752126e-06, "logits/chosen": -1.7140634059906006, "logits/rejected": -1.4950907230377197, "logps/chosen": -477.6435546875, "logps/rejected": -462.10394287109375, "loss": 0.6909, "rewards/accuracies": 0.625, "rewards/chosen": -0.19143159687519073, "rewards/margins": 0.04865190014243126, "rewards/rejected": -0.2400834858417511, "step": 1310 }, { "epoch": 0.09, "learning_rate": 4.316546762589928e-06, "logits/chosen": -1.6233209371566772, "logits/rejected": -1.5323542356491089, "logps/chosen": -424.8470153808594, "logps/rejected": -447.05987548828125, "loss": 0.6904, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.21691949665546417, "rewards/margins": 0.05651101469993591, "rewards/rejected": -0.2734305262565613, "step": 1320 }, { "epoch": 0.09, "learning_rate": 4.349247874427731e-06, "logits/chosen": -1.5113164186477661, "logits/rejected": -1.2201378345489502, "logps/chosen": -510.0796813964844, "logps/rejected": -651.254638671875, "loss": 0.6919, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2835456430912018, "rewards/margins": 0.10109242051839828, "rewards/rejected": -0.38463807106018066, "step": 1330 }, { "epoch": 0.09, "learning_rate": 4.381948986265534e-06, "logits/chosen": -1.8363587856292725, "logits/rejected": -1.625396490097046, "logps/chosen": -422.99383544921875, "logps/rejected": -457.910888671875, "loss": 0.6917, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.16014917194843292, "rewards/margins": 0.0479239895939827, "rewards/rejected": -0.20807316899299622, "step": 1340 }, { "epoch": 0.09, "learning_rate": 4.414650098103336e-06, "logits/chosen": -2.073012590408325, "logits/rejected": -1.8140983581542969, "logps/chosen": -354.55499267578125, "logps/rejected": -365.2477111816406, "loss": 0.6911, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.09260248392820358, "rewards/margins": 0.03311682492494583, "rewards/rejected": -0.12571930885314941, "step": 1350 }, { "epoch": 0.09, "learning_rate": 4.447351209941138e-06, "logits/chosen": -1.6008856296539307, "logits/rejected": -1.4585543870925903, "logps/chosen": -376.71673583984375, "logps/rejected": -375.45831298828125, "loss": 0.6921, "rewards/accuracies": 0.5, "rewards/chosen": -0.16705211997032166, "rewards/margins": 0.016883080825209618, "rewards/rejected": -0.18393521010875702, "step": 1360 }, { "epoch": 0.09, "learning_rate": 4.480052321778941e-06, "logits/chosen": -1.5430247783660889, "logits/rejected": -1.509578824043274, "logps/chosen": -386.68414306640625, "logps/rejected": -411.19598388671875, "loss": 0.6913, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.15977385640144348, "rewards/margins": 0.04520783945918083, "rewards/rejected": -0.20498168468475342, "step": 1370 }, { "epoch": 0.09, "learning_rate": 4.5127534336167435e-06, "logits/chosen": -1.5650596618652344, "logits/rejected": -1.3618630170822144, "logps/chosen": -393.9236145019531, "logps/rejected": -384.3157043457031, "loss": 0.6917, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.14674964547157288, "rewards/margins": 0.034052155911922455, "rewards/rejected": -0.18080179393291473, "step": 1380 }, { "epoch": 0.09, "learning_rate": 4.5454545454545455e-06, "logits/chosen": -1.5686426162719727, "logits/rejected": -1.5197176933288574, "logps/chosen": -309.3796691894531, "logps/rejected": -324.637451171875, "loss": 0.692, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.13992100954055786, "rewards/margins": 0.025466833263635635, "rewards/rejected": -0.165387824177742, "step": 1390 }, { "epoch": 0.09, "learning_rate": 4.578155657292348e-06, "logits/chosen": -1.6215531826019287, "logits/rejected": -1.5280569791793823, "logps/chosen": -397.262939453125, "logps/rejected": -486.24444580078125, "loss": 0.6887, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1418572962284088, "rewards/margins": 0.08005955070257187, "rewards/rejected": -0.22191686928272247, "step": 1400 }, { "epoch": 0.09, "eval_logits/chosen": -1.4353091716766357, "eval_logits/rejected": -1.2833685874938965, "eval_logps/chosen": -424.39398193359375, "eval_logps/rejected": -463.2532043457031, "eval_loss": 0.6905860304832458, "eval_rewards/accuracies": 0.6420000195503235, "eval_rewards/chosen": -0.19238901138305664, "eval_rewards/margins": 0.05925232544541359, "eval_rewards/rejected": -0.25164133310317993, "eval_runtime": 714.6679, "eval_samples_per_second": 2.799, "eval_steps_per_second": 1.399, "step": 1400 }, { "epoch": 0.09, "learning_rate": 4.610856769130151e-06, "logits/chosen": -1.557550072669983, "logits/rejected": -1.5161031484603882, "logps/chosen": -425.23309326171875, "logps/rejected": -470.5696716308594, "loss": 0.692, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.18608888983726501, "rewards/margins": 0.07483814656734467, "rewards/rejected": -0.2609270215034485, "step": 1410 }, { "epoch": 0.09, "learning_rate": 4.643557880967953e-06, "logits/chosen": -1.56943678855896, "logits/rejected": -1.4001885652542114, "logps/chosen": -350.5581970214844, "logps/rejected": -405.6707458496094, "loss": 0.69, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.15175940096378326, "rewards/margins": 0.05761238932609558, "rewards/rejected": -0.20937177538871765, "step": 1420 }, { "epoch": 0.09, "learning_rate": 4.676258992805755e-06, "logits/chosen": -1.5054957866668701, "logits/rejected": -1.2771097421646118, "logps/chosen": -451.77630615234375, "logps/rejected": -502.28948974609375, "loss": 0.6907, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.18215635418891907, "rewards/margins": 0.0926615446805954, "rewards/rejected": -0.27481788396835327, "step": 1430 }, { "epoch": 0.09, "learning_rate": 4.708960104643558e-06, "logits/chosen": -1.6860449314117432, "logits/rejected": -1.6177040338516235, "logps/chosen": -447.60247802734375, "logps/rejected": -456.874267578125, "loss": 0.691, "rewards/accuracies": 0.625, "rewards/chosen": -0.141392782330513, "rewards/margins": 0.046031877398490906, "rewards/rejected": -0.18742462992668152, "step": 1440 }, { "epoch": 0.09, "learning_rate": 4.741661216481361e-06, "logits/chosen": -1.6008634567260742, "logits/rejected": -1.583705186843872, "logps/chosen": -372.9961242675781, "logps/rejected": -434.13079833984375, "loss": 0.6898, "rewards/accuracies": 0.625, "rewards/chosen": -0.13665054738521576, "rewards/margins": 0.04988841712474823, "rewards/rejected": -0.186538964509964, "step": 1450 }, { "epoch": 0.1, "learning_rate": 4.774362328319163e-06, "logits/chosen": -1.4470480680465698, "logits/rejected": -1.274153470993042, "logps/chosen": -381.94073486328125, "logps/rejected": -374.5335998535156, "loss": 0.6933, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.16615068912506104, "rewards/margins": 0.022542305290699005, "rewards/rejected": -0.18869297206401825, "step": 1460 }, { "epoch": 0.1, "learning_rate": 4.807063440156966e-06, "logits/chosen": -1.431060791015625, "logits/rejected": -1.1888434886932373, "logps/chosen": -480.28546142578125, "logps/rejected": -508.6800842285156, "loss": 0.6882, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.21697595715522766, "rewards/margins": 0.08976506441831589, "rewards/rejected": -0.30674102902412415, "step": 1470 }, { "epoch": 0.1, "learning_rate": 4.839764551994769e-06, "logits/chosen": -1.338303565979004, "logits/rejected": -1.0660719871520996, "logps/chosen": -683.1473999023438, "logps/rejected": -721.8735961914062, "loss": 0.6927, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.43443894386291504, "rewards/margins": 0.07645972073078156, "rewards/rejected": -0.5108987092971802, "step": 1480 }, { "epoch": 0.1, "learning_rate": 4.872465663832571e-06, "logits/chosen": -1.2499490976333618, "logits/rejected": -1.1854174137115479, "logps/chosen": -591.4575805664062, "logps/rejected": -592.4474487304688, "loss": 0.6924, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.38369911909103394, "rewards/margins": 0.021449998021125793, "rewards/rejected": -0.4051491320133209, "step": 1490 }, { "epoch": 0.1, "learning_rate": 4.905166775670373e-06, "logits/chosen": -1.4410350322723389, "logits/rejected": -1.2265688180923462, "logps/chosen": -486.8252868652344, "logps/rejected": -504.9773864746094, "loss": 0.69, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2650652825832367, "rewards/margins": 0.04554576426744461, "rewards/rejected": -0.3106110692024231, "step": 1500 }, { "epoch": 0.1, "eval_logits/chosen": -1.483712077140808, "eval_logits/rejected": -1.3358349800109863, "eval_logps/chosen": -428.61151123046875, "eval_logps/rejected": -450.56256103515625, "eval_loss": 0.6911273002624512, "eval_rewards/accuracies": 0.6230000257492065, "eval_rewards/chosen": -0.19660654664039612, "eval_rewards/margins": 0.042344145476818085, "eval_rewards/rejected": -0.2389506846666336, "eval_runtime": 715.7281, "eval_samples_per_second": 2.794, "eval_steps_per_second": 1.397, "step": 1500 }, { "epoch": 0.1, "learning_rate": 4.9378678875081756e-06, "logits/chosen": -1.7270991802215576, "logits/rejected": -1.4997197389602661, "logps/chosen": -360.70330810546875, "logps/rejected": -364.0918884277344, "loss": 0.6913, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.14595970511436462, "rewards/margins": 0.03439674526453018, "rewards/rejected": -0.1803564578294754, "step": 1510 }, { "epoch": 0.1, "learning_rate": 4.9705689993459784e-06, "logits/chosen": -1.6035175323486328, "logits/rejected": -1.2797515392303467, "logps/chosen": -346.11956787109375, "logps/rejected": -347.30596923828125, "loss": 0.6893, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.14119425415992737, "rewards/margins": 0.05889949947595596, "rewards/rejected": -0.20009374618530273, "step": 1520 }, { "epoch": 0.1, "learning_rate": 4.999999934793849e-06, "logits/chosen": -1.4298112392425537, "logits/rejected": -1.3906731605529785, "logps/chosen": -448.69049072265625, "logps/rejected": -460.69403076171875, "loss": 0.6915, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.20073723793029785, "rewards/margins": 0.04141997918486595, "rewards/rejected": -0.2421572208404541, "step": 1530 }, { "epoch": 0.1, "learning_rate": 4.999992110059814e-06, "logits/chosen": -1.2659540176391602, "logits/rejected": -1.233628511428833, "logps/chosen": -469.53607177734375, "logps/rejected": -498.04742431640625, "loss": 0.6919, "rewards/accuracies": 0.625, "rewards/chosen": -0.19184866547584534, "rewards/margins": 0.0486246719956398, "rewards/rejected": -0.24047331511974335, "step": 1540 }, { "epoch": 0.1, "learning_rate": 4.999971244142299e-06, "logits/chosen": -1.3973344564437866, "logits/rejected": -1.0956056118011475, "logps/chosen": -451.66668701171875, "logps/rejected": -487.6893615722656, "loss": 0.6923, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.17821362614631653, "rewards/margins": 0.07526890188455582, "rewards/rejected": -0.25348252058029175, "step": 1550 }, { "epoch": 0.1, "learning_rate": 4.999937337150149e-06, "logits/chosen": -1.2560392618179321, "logits/rejected": -1.0587233304977417, "logps/chosen": -322.35064697265625, "logps/rejected": -345.85009765625, "loss": 0.6913, "rewards/accuracies": 0.625, "rewards/chosen": -0.08783578872680664, "rewards/margins": 0.038880202919244766, "rewards/rejected": -0.1267159879207611, "step": 1560 }, { "epoch": 0.1, "learning_rate": 4.99989038926024e-06, "logits/chosen": -1.1934096813201904, "logits/rejected": -1.1851760149002075, "logps/chosen": -282.2227478027344, "logps/rejected": -342.9458312988281, "loss": 0.6892, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.08379466831684113, "rewards/margins": 0.058521050959825516, "rewards/rejected": -0.14231571555137634, "step": 1570 }, { "epoch": 0.1, "learning_rate": 4.999830400717476e-06, "logits/chosen": -1.2530221939086914, "logits/rejected": -1.1964467763900757, "logps/chosen": -362.67901611328125, "logps/rejected": -389.4422302246094, "loss": 0.6911, "rewards/accuracies": 0.625, "rewards/chosen": -0.07529197633266449, "rewards/margins": 0.04303868114948273, "rewards/rejected": -0.11833065748214722, "step": 1580 }, { "epoch": 0.1, "learning_rate": 4.999757371834787e-06, "logits/chosen": -0.992030918598175, "logits/rejected": -1.1213949918746948, "logps/chosen": -344.0542297363281, "logps/rejected": -430.8173828125, "loss": 0.6882, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.10423535108566284, "rewards/margins": 0.10227863490581512, "rewards/rejected": -0.20651397109031677, "step": 1590 }, { "epoch": 0.1, "learning_rate": 4.999671302993125e-06, "logits/chosen": -0.7400294542312622, "logits/rejected": -0.7438528537750244, "logps/chosen": -378.1280822753906, "logps/rejected": -455.39154052734375, "loss": 0.6896, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.12954765558242798, "rewards/margins": 0.0618753544986248, "rewards/rejected": -0.19142302870750427, "step": 1600 }, { "epoch": 0.1, "eval_logits/chosen": -0.9061187505722046, "eval_logits/rejected": -0.792312741279602, "eval_logps/chosen": -381.1222229003906, "eval_logps/rejected": -427.01788330078125, "eval_loss": 0.6905171871185303, "eval_rewards/accuracies": 0.6349999904632568, "eval_rewards/chosen": -0.14911724627017975, "eval_rewards/margins": 0.06628872454166412, "eval_rewards/rejected": -0.21540597081184387, "eval_runtime": 712.34, "eval_samples_per_second": 2.808, "eval_steps_per_second": 1.404, "step": 1600 }, { "epoch": 0.11, "learning_rate": 4.999572194641471e-06, "logits/chosen": -1.0279515981674194, "logits/rejected": -0.7255226969718933, "logps/chosen": -444.42486572265625, "logps/rejected": -483.9266662597656, "loss": 0.688, "rewards/accuracies": 0.75, "rewards/chosen": -0.16890659928321838, "rewards/margins": 0.0919717401266098, "rewards/rejected": -0.260878324508667, "step": 1610 }, { "epoch": 0.11, "learning_rate": 4.999460047296819e-06, "logits/chosen": -0.7330793142318726, "logits/rejected": -0.6069439053535461, "logps/chosen": -463.1800842285156, "logps/rejected": -511.03997802734375, "loss": 0.6915, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.24757592380046844, "rewards/margins": 0.06876906007528305, "rewards/rejected": -0.3163450062274933, "step": 1620 }, { "epoch": 0.11, "learning_rate": 4.999334861544186e-06, "logits/chosen": -0.7602131962776184, "logits/rejected": -0.7034991979598999, "logps/chosen": -490.57562255859375, "logps/rejected": -494.21197509765625, "loss": 0.6906, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.2611224055290222, "rewards/margins": 0.05963968485593796, "rewards/rejected": -0.3207620680332184, "step": 1630 }, { "epoch": 0.11, "learning_rate": 4.999196638036604e-06, "logits/chosen": -0.8633691668510437, "logits/rejected": -0.657917320728302, "logps/chosen": -595.30029296875, "logps/rejected": -597.7689819335938, "loss": 0.6924, "rewards/accuracies": 0.625, "rewards/chosen": -0.3078852891921997, "rewards/margins": 0.03770507127046585, "rewards/rejected": -0.34559035301208496, "step": 1640 }, { "epoch": 0.11, "learning_rate": 4.999045377495111e-06, "logits/chosen": -0.49410897493362427, "logits/rejected": -0.6826598048210144, "logps/chosen": -511.33984375, "logps/rejected": -666.5721435546875, "loss": 0.6906, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.33700522780418396, "rewards/margins": 0.06744986027479172, "rewards/rejected": -0.4044550359249115, "step": 1650 }, { "epoch": 0.11, "learning_rate": 4.998881080708759e-06, "logits/chosen": -0.972652792930603, "logits/rejected": -0.9712132215499878, "logps/chosen": -429.01959228515625, "logps/rejected": -423.8321838378906, "loss": 0.6931, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.18869802355766296, "rewards/margins": 0.02575744315981865, "rewards/rejected": -0.2144554853439331, "step": 1660 }, { "epoch": 0.11, "learning_rate": 4.998703748534599e-06, "logits/chosen": -0.9454406499862671, "logits/rejected": -0.8232323527336121, "logps/chosen": -364.2972106933594, "logps/rejected": -374.5244140625, "loss": 0.6915, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.12366746366024017, "rewards/margins": 0.060917746275663376, "rewards/rejected": -0.18458519876003265, "step": 1670 }, { "epoch": 0.11, "learning_rate": 4.998513381897683e-06, "logits/chosen": -1.4703190326690674, "logits/rejected": -1.2022755146026611, "logps/chosen": -322.563720703125, "logps/rejected": -298.5383605957031, "loss": 0.6911, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.08331999182701111, "rewards/margins": 0.03296893090009689, "rewards/rejected": -0.116288922727108, "step": 1680 }, { "epoch": 0.11, "learning_rate": 4.9983099817910565e-06, "logits/chosen": -1.4085242748260498, "logits/rejected": -1.3252454996109009, "logps/chosen": -338.73529052734375, "logps/rejected": -386.0179138183594, "loss": 0.6907, "rewards/accuracies": 0.625, "rewards/chosen": -0.09347338229417801, "rewards/margins": 0.05176064372062683, "rewards/rejected": -0.14523401856422424, "step": 1690 }, { "epoch": 0.11, "learning_rate": 4.998093549275754e-06, "logits/chosen": -1.4733991622924805, "logits/rejected": -1.5043426752090454, "logps/chosen": -324.3277282714844, "logps/rejected": -409.79693603515625, "loss": 0.6896, "rewards/accuracies": 0.75, "rewards/chosen": -0.06399369984865189, "rewards/margins": 0.06367628276348114, "rewards/rejected": -0.12766997516155243, "step": 1700 }, { "epoch": 0.11, "eval_logits/chosen": -1.480065107345581, "eval_logits/rejected": -1.3317376375198364, "eval_logps/chosen": -311.48211669921875, "eval_logps/rejected": -351.01513671875, "eval_loss": 0.6903863549232483, "eval_rewards/accuracies": 0.6255000233650208, "eval_rewards/chosen": -0.07947719097137451, "eval_rewards/margins": 0.059926047921180725, "eval_rewards/rejected": -0.13940322399139404, "eval_runtime": 714.0547, "eval_samples_per_second": 2.801, "eval_steps_per_second": 1.4, "step": 1700 }, { "epoch": 0.11, "learning_rate": 4.997864085480794e-06, "logits/chosen": -1.6208089590072632, "logits/rejected": -1.4398419857025146, "logps/chosen": -340.4198913574219, "logps/rejected": -412.36572265625, "loss": 0.6913, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.07407370954751968, "rewards/margins": 0.09048272669315338, "rewards/rejected": -0.16455642879009247, "step": 1710 }, { "epoch": 0.11, "learning_rate": 4.997621591603171e-06, "logits/chosen": -1.4687002897262573, "logits/rejected": -1.359674096107483, "logps/chosen": -256.868896484375, "logps/rejected": -314.82879638671875, "loss": 0.6916, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.09883610904216766, "rewards/margins": 0.05864466354250908, "rewards/rejected": -0.15748076140880585, "step": 1720 }, { "epoch": 0.11, "learning_rate": 4.997366068907853e-06, "logits/chosen": -1.5249855518341064, "logits/rejected": -1.4639043807983398, "logps/chosen": -335.9312744140625, "logps/rejected": -353.7437438964844, "loss": 0.6906, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.07752153277397156, "rewards/margins": 0.037687577307224274, "rewards/rejected": -0.11520912498235703, "step": 1730 }, { "epoch": 0.11, "learning_rate": 4.997097518727771e-06, "logits/chosen": -1.6601321697235107, "logits/rejected": -1.3635004758834839, "logps/chosen": -308.7919616699219, "logps/rejected": -330.8180236816406, "loss": 0.6899, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.0858355313539505, "rewards/margins": 0.05514850094914436, "rewards/rejected": -0.14098402857780457, "step": 1740 }, { "epoch": 0.11, "learning_rate": 4.9968159424638155e-06, "logits/chosen": -1.537062644958496, "logits/rejected": -1.6434835195541382, "logps/chosen": -314.24200439453125, "logps/rejected": -400.8989562988281, "loss": 0.691, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.0966355949640274, "rewards/margins": 0.0348118357360363, "rewards/rejected": -0.131447434425354, "step": 1750 }, { "epoch": 0.12, "learning_rate": 4.9965213415848235e-06, "logits/chosen": -1.3464255332946777, "logits/rejected": -1.0850141048431396, "logps/chosen": -371.60968017578125, "logps/rejected": -398.5357360839844, "loss": 0.6915, "rewards/accuracies": 0.625, "rewards/chosen": -0.14320513606071472, "rewards/margins": 0.06378600746393204, "rewards/rejected": -0.20699115097522736, "step": 1760 }, { "epoch": 0.12, "learning_rate": 4.9962137176275805e-06, "logits/chosen": -1.5550998449325562, "logits/rejected": -1.376962423324585, "logps/chosen": -311.85333251953125, "logps/rejected": -352.107177734375, "loss": 0.6925, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.07755790650844574, "rewards/margins": 0.04081185907125473, "rewards/rejected": -0.11836977303028107, "step": 1770 }, { "epoch": 0.12, "learning_rate": 4.9958930721968015e-06, "logits/chosen": -1.5606944561004639, "logits/rejected": -1.6893508434295654, "logps/chosen": -301.8111572265625, "logps/rejected": -350.09307861328125, "loss": 0.6917, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.08977598696947098, "rewards/margins": 0.03678018972277641, "rewards/rejected": -0.1265561580657959, "step": 1780 }, { "epoch": 0.12, "learning_rate": 4.995559406965132e-06, "logits/chosen": -1.7458531856536865, "logits/rejected": -1.5053004026412964, "logps/chosen": -324.5157775878906, "logps/rejected": -351.40771484375, "loss": 0.6905, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.09260044991970062, "rewards/margins": 0.056507617235183716, "rewards/rejected": -0.14910808205604553, "step": 1790 }, { "epoch": 0.12, "learning_rate": 4.995212723673131e-06, "logits/chosen": -1.6229438781738281, "logits/rejected": -1.3902084827423096, "logps/chosen": -318.61859130859375, "logps/rejected": -324.81866455078125, "loss": 0.6907, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.08841551840305328, "rewards/margins": 0.05618295073509216, "rewards/rejected": -0.14459846913814545, "step": 1800 }, { "epoch": 0.12, "eval_logits/chosen": -1.5447196960449219, "eval_logits/rejected": -1.3930197954177856, "eval_logps/chosen": -338.71893310546875, "eval_logps/rejected": -368.562744140625, "eval_loss": 0.6905578970909119, "eval_rewards/accuracies": 0.6294999718666077, "eval_rewards/chosen": -0.10671400278806686, "eval_rewards/margins": 0.05023682862520218, "eval_rewards/rejected": -0.15695083141326904, "eval_runtime": 713.2978, "eval_samples_per_second": 2.804, "eval_steps_per_second": 1.402, "step": 1800 }, { "epoch": 0.12, "learning_rate": 4.99485302412927e-06, "logits/chosen": -1.2425215244293213, "logits/rejected": -1.2682154178619385, "logps/chosen": -321.0690002441406, "logps/rejected": -383.86932373046875, "loss": 0.6892, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.1130579486489296, "rewards/margins": 0.062107671052217484, "rewards/rejected": -0.17516560852527618, "step": 1810 }, { "epoch": 0.12, "learning_rate": 4.994480310209918e-06, "logits/chosen": -1.6616127490997314, "logits/rejected": -1.7467199563980103, "logps/chosen": -315.12713623046875, "logps/rejected": -386.51824951171875, "loss": 0.6901, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.07314414530992508, "rewards/margins": 0.057636868208646774, "rewards/rejected": -0.13078099489212036, "step": 1820 }, { "epoch": 0.12, "learning_rate": 4.994094583859332e-06, "logits/chosen": -1.650968313217163, "logits/rejected": -1.4825265407562256, "logps/chosen": -240.92605590820312, "logps/rejected": -324.64337158203125, "loss": 0.6895, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.07601331919431686, "rewards/margins": 0.051687806844711304, "rewards/rejected": -0.12770113348960876, "step": 1830 }, { "epoch": 0.12, "learning_rate": 4.9936958470896525e-06, "logits/chosen": -1.4662501811981201, "logits/rejected": -1.2516024112701416, "logps/chosen": -343.7925109863281, "logps/rejected": -392.44610595703125, "loss": 0.6896, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.12004733085632324, "rewards/margins": 0.09155675023794174, "rewards/rejected": -0.2116040736436844, "step": 1840 }, { "epoch": 0.12, "learning_rate": 4.993284101980883e-06, "logits/chosen": -1.4967622756958008, "logits/rejected": -1.344617247581482, "logps/chosen": -351.4579162597656, "logps/rejected": -427.65966796875, "loss": 0.6849, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.1089625209569931, "rewards/margins": 0.11583471298217773, "rewards/rejected": -0.22479721903800964, "step": 1850 }, { "epoch": 0.12, "learning_rate": 4.9928593506808885e-06, "logits/chosen": -1.5015496015548706, "logits/rejected": -1.3130124807357788, "logps/chosen": -399.886474609375, "logps/rejected": -407.5121765136719, "loss": 0.6932, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.14307209849357605, "rewards/margins": 0.04460294917225838, "rewards/rejected": -0.18767504394054413, "step": 1860 }, { "epoch": 0.12, "learning_rate": 4.992421595405381e-06, "logits/chosen": -1.4745498895645142, "logits/rejected": -1.1933575868606567, "logps/chosen": -357.6884765625, "logps/rejected": -320.2742614746094, "loss": 0.6926, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.12949608266353607, "rewards/margins": 0.03343268856406212, "rewards/rejected": -0.16292878985404968, "step": 1870 }, { "epoch": 0.12, "learning_rate": 4.991970838437905e-06, "logits/chosen": -1.453904628753662, "logits/rejected": -1.405748963356018, "logps/chosen": -353.97015380859375, "logps/rejected": -452.02789306640625, "loss": 0.6906, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.12864060699939728, "rewards/margins": 0.06995304673910141, "rewards/rejected": -0.1985936164855957, "step": 1880 }, { "epoch": 0.12, "learning_rate": 4.9915070821298294e-06, "logits/chosen": -1.5130014419555664, "logits/rejected": -1.3204481601715088, "logps/chosen": -277.0668640136719, "logps/rejected": -301.8009338378906, "loss": 0.6916, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.11009702831506729, "rewards/margins": 0.03382255882024765, "rewards/rejected": -0.14391960203647614, "step": 1890 }, { "epoch": 0.12, "learning_rate": 4.991030328900336e-06, "logits/chosen": -1.4666025638580322, "logits/rejected": -1.2133960723876953, "logps/chosen": -396.7815856933594, "logps/rejected": -397.72039794921875, "loss": 0.6903, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.11221235990524292, "rewards/margins": 0.07097505033016205, "rewards/rejected": -0.18318741023540497, "step": 1900 }, { "epoch": 0.12, "eval_logits/chosen": -1.503396987915039, "eval_logits/rejected": -1.3577290773391724, "eval_logps/chosen": -351.95111083984375, "eval_logps/rejected": -381.6029357910156, "eval_loss": 0.6906358003616333, "eval_rewards/accuracies": 0.6510000228881836, "eval_rewards/chosen": -0.11994615197181702, "eval_rewards/margins": 0.050044890493154526, "eval_rewards/rejected": -0.16999106109142303, "eval_runtime": 711.5755, "eval_samples_per_second": 2.811, "eval_steps_per_second": 1.405, "step": 1900 }, { "epoch": 0.12, "learning_rate": 4.9905405812364014e-06, "logits/chosen": -1.5339515209197998, "logits/rejected": -1.5163123607635498, "logps/chosen": -315.5993957519531, "logps/rejected": -368.30645751953125, "loss": 0.6911, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.11330322176218033, "rewards/margins": 0.050241757184267044, "rewards/rejected": -0.16354498267173767, "step": 1910 }, { "epoch": 0.13, "learning_rate": 4.990037841692791e-06, "logits/chosen": -1.4599535465240479, "logits/rejected": -1.3372766971588135, "logps/chosen": -335.96630859375, "logps/rejected": -352.112060546875, "loss": 0.6899, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.1299254149198532, "rewards/margins": 0.05943036824464798, "rewards/rejected": -0.1893557757139206, "step": 1920 }, { "epoch": 0.13, "learning_rate": 4.989522112892039e-06, "logits/chosen": -1.4036133289337158, "logits/rejected": -1.385619878768921, "logps/chosen": -369.69952392578125, "logps/rejected": -417.546630859375, "loss": 0.6908, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.17207011580467224, "rewards/margins": 0.04873714596033096, "rewards/rejected": -0.220807284116745, "step": 1930 }, { "epoch": 0.13, "learning_rate": 4.98899339752444e-06, "logits/chosen": -1.4383190870285034, "logits/rejected": -1.197371006011963, "logps/chosen": -363.4906311035156, "logps/rejected": -421.6170959472656, "loss": 0.6884, "rewards/accuracies": 0.625, "rewards/chosen": -0.13388827443122864, "rewards/margins": 0.08706989139318466, "rewards/rejected": -0.2209581583738327, "step": 1940 }, { "epoch": 0.13, "learning_rate": 4.988451698348033e-06, "logits/chosen": -1.4711196422576904, "logits/rejected": -1.5271979570388794, "logps/chosen": -294.31884765625, "logps/rejected": -352.82635498046875, "loss": 0.6931, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.11599130928516388, "rewards/margins": 0.03823809325695038, "rewards/rejected": -0.15422940254211426, "step": 1950 }, { "epoch": 0.13, "learning_rate": 4.987897018188585e-06, "logits/chosen": -1.5414386987686157, "logits/rejected": -1.3048475980758667, "logps/chosen": -337.1903991699219, "logps/rejected": -325.1073913574219, "loss": 0.6926, "rewards/accuracies": 0.625, "rewards/chosen": -0.11457107961177826, "rewards/margins": 0.04459307715296745, "rewards/rejected": -0.159164160490036, "step": 1960 }, { "epoch": 0.13, "learning_rate": 4.9873293599395814e-06, "logits/chosen": -1.6896965503692627, "logits/rejected": -1.5547794103622437, "logps/chosen": -283.25335693359375, "logps/rejected": -343.1858825683594, "loss": 0.6885, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.08982162177562714, "rewards/margins": 0.06391600519418716, "rewards/rejected": -0.1537376344203949, "step": 1970 }, { "epoch": 0.13, "learning_rate": 4.986748726562203e-06, "logits/chosen": -1.6674537658691406, "logits/rejected": -1.5472710132598877, "logps/chosen": -302.44647216796875, "logps/rejected": -321.59149169921875, "loss": 0.6913, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.08659198135137558, "rewards/margins": 0.044037431478500366, "rewards/rejected": -0.13062942028045654, "step": 1980 }, { "epoch": 0.13, "learning_rate": 4.98615512108532e-06, "logits/chosen": -1.6517025232315063, "logits/rejected": -1.6035429239273071, "logps/chosen": -308.13177490234375, "logps/rejected": -352.00897216796875, "loss": 0.6908, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.08662106841802597, "rewards/margins": 0.04209376126527786, "rewards/rejected": -0.12871482968330383, "step": 1990 }, { "epoch": 0.13, "learning_rate": 4.985548546605469e-06, "logits/chosen": -1.2778985500335693, "logits/rejected": -1.4368833303451538, "logps/chosen": -372.85302734375, "logps/rejected": -435.37042236328125, "loss": 0.6907, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.15974724292755127, "rewards/margins": 0.04487691447138786, "rewards/rejected": -0.20462414622306824, "step": 2000 }, { "epoch": 0.13, "eval_logits/chosen": -1.3523448705673218, "eval_logits/rejected": -1.2126736640930176, "eval_logps/chosen": -394.4134521484375, "eval_logps/rejected": -441.41143798828125, "eval_loss": 0.6901971697807312, "eval_rewards/accuracies": 0.6430000066757202, "eval_rewards/chosen": -0.16240845620632172, "eval_rewards/margins": 0.06739108264446259, "eval_rewards/rejected": -0.2297995388507843, "eval_runtime": 713.6841, "eval_samples_per_second": 2.802, "eval_steps_per_second": 1.401, "step": 2000 }, { "epoch": 0.13, "learning_rate": 4.984929006286838e-06, "logits/chosen": -1.148874044418335, "logits/rejected": -1.0555192232131958, "logps/chosen": -372.86138916015625, "logps/rejected": -383.76776123046875, "loss": 0.6952, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.1635473668575287, "rewards/margins": 0.007450105156749487, "rewards/rejected": -0.1709974706172943, "step": 2010 }, { "epoch": 0.13, "learning_rate": 4.984296503361256e-06, "logits/chosen": -1.4644495248794556, "logits/rejected": -1.2704464197158813, "logps/chosen": -321.61212158203125, "logps/rejected": -318.55328369140625, "loss": 0.6919, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.11969226598739624, "rewards/margins": 0.03894500806927681, "rewards/rejected": -0.15863725543022156, "step": 2020 }, { "epoch": 0.13, "learning_rate": 4.9836510411281645e-06, "logits/chosen": -1.4437366724014282, "logits/rejected": -1.3444958925247192, "logps/chosen": -381.04412841796875, "logps/rejected": -431.23260498046875, "loss": 0.6872, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.10251230001449585, "rewards/margins": 0.08959086239337921, "rewards/rejected": -0.19210317730903625, "step": 2030 }, { "epoch": 0.13, "learning_rate": 4.982992622954613e-06, "logits/chosen": -1.6072601079940796, "logits/rejected": -1.3610594272613525, "logps/chosen": -382.94049072265625, "logps/rejected": -339.80303955078125, "loss": 0.6885, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09941227734088898, "rewards/margins": 0.07103113830089569, "rewards/rejected": -0.17044341564178467, "step": 2040 }, { "epoch": 0.13, "learning_rate": 4.9823212522752325e-06, "logits/chosen": -1.636498212814331, "logits/rejected": -1.4854671955108643, "logps/chosen": -410.88037109375, "logps/rejected": -476.26806640625, "loss": 0.6876, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.13306666910648346, "rewards/margins": 0.09942369163036346, "rewards/rejected": -0.23249034583568573, "step": 2050 }, { "epoch": 0.13, "learning_rate": 4.981636932592222e-06, "logits/chosen": -1.3983194828033447, "logits/rejected": -1.2955951690673828, "logps/chosen": -323.72686767578125, "logps/rejected": -399.1975402832031, "loss": 0.6908, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.11081385612487793, "rewards/margins": 0.07949165254831314, "rewards/rejected": -0.19030550122261047, "step": 2060 }, { "epoch": 0.14, "learning_rate": 4.980939667475328e-06, "logits/chosen": -1.643601417541504, "logits/rejected": -1.3366024494171143, "logps/chosen": -408.3066101074219, "logps/rejected": -416.02716064453125, "loss": 0.6905, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.13413146138191223, "rewards/margins": 0.06831257790327072, "rewards/rejected": -0.20244404673576355, "step": 2070 }, { "epoch": 0.14, "learning_rate": 4.980229460561826e-06, "logits/chosen": -1.4907690286636353, "logits/rejected": -1.3280975818634033, "logps/chosen": -370.56280517578125, "logps/rejected": -468.95697021484375, "loss": 0.6893, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.15248145163059235, "rewards/margins": 0.11370065063238144, "rewards/rejected": -0.2661820948123932, "step": 2080 }, { "epoch": 0.14, "learning_rate": 4.979506315556503e-06, "logits/chosen": -1.379464864730835, "logits/rejected": -1.1236019134521484, "logps/chosen": -457.6728515625, "logps/rejected": -503.1261291503906, "loss": 0.6905, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.17118071019649506, "rewards/margins": 0.09644129872322083, "rewards/rejected": -0.2676219940185547, "step": 2090 }, { "epoch": 0.14, "learning_rate": 4.9787702362316395e-06, "logits/chosen": -1.5906661748886108, "logits/rejected": -1.7217735052108765, "logps/chosen": -322.1238098144531, "logps/rejected": -401.7293701171875, "loss": 0.6901, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.13129660487174988, "rewards/margins": 0.05409153550863266, "rewards/rejected": -0.18538813292980194, "step": 2100 }, { "epoch": 0.14, "eval_logits/chosen": -1.4994711875915527, "eval_logits/rejected": -1.348573923110962, "eval_logps/chosen": -364.04266357421875, "eval_logps/rejected": -413.4233093261719, "eval_loss": 0.6901757717132568, "eval_rewards/accuracies": 0.6455000042915344, "eval_rewards/chosen": -0.13203772902488708, "eval_rewards/margins": 0.06977371871471405, "eval_rewards/rejected": -0.20181144773960114, "eval_runtime": 714.4369, "eval_samples_per_second": 2.799, "eval_steps_per_second": 1.4, "step": 2100 }, { "epoch": 0.14, "learning_rate": 4.9780212264269835e-06, "logits/chosen": -1.3951307535171509, "logits/rejected": -1.1739161014556885, "logps/chosen": -337.2302551269531, "logps/rejected": -362.6296081542969, "loss": 0.6915, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.15241114795207977, "rewards/margins": 0.042535070329904556, "rewards/rejected": -0.19494622945785522, "step": 2110 }, { "epoch": 0.14, "learning_rate": 4.977259290049739e-06, "logits/chosen": -1.7050163745880127, "logits/rejected": -1.2494385242462158, "logps/chosen": -407.9380187988281, "logps/rejected": -450.7481384277344, "loss": 0.6869, "rewards/accuracies": 0.75, "rewards/chosen": -0.12099560350179672, "rewards/margins": 0.11232854425907135, "rewards/rejected": -0.23332414031028748, "step": 2120 }, { "epoch": 0.14, "learning_rate": 4.976484431074538e-06, "logits/chosen": -1.445926308631897, "logits/rejected": -1.4503229856491089, "logps/chosen": -310.28448486328125, "logps/rejected": -348.43402099609375, "loss": 0.6915, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.11647901684045792, "rewards/margins": 0.055751871317625046, "rewards/rejected": -0.17223089933395386, "step": 2130 }, { "epoch": 0.14, "learning_rate": 4.975696653543425e-06, "logits/chosen": -1.506824016571045, "logits/rejected": -1.312302827835083, "logps/chosen": -383.40704345703125, "logps/rejected": -453.06707763671875, "loss": 0.6894, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.13375821709632874, "rewards/margins": 0.08222929388284683, "rewards/rejected": -0.21598748862743378, "step": 2140 }, { "epoch": 0.14, "learning_rate": 4.974895961565835e-06, "logits/chosen": -1.295353651046753, "logits/rejected": -1.1237144470214844, "logps/chosen": -343.5455627441406, "logps/rejected": -430.0581970214844, "loss": 0.6889, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.16829460859298706, "rewards/margins": 0.08128471672534943, "rewards/rejected": -0.2495793104171753, "step": 2150 }, { "epoch": 0.14, "learning_rate": 4.974082359318566e-06, "logits/chosen": -1.3289740085601807, "logits/rejected": -1.2230969667434692, "logps/chosen": -412.52789306640625, "logps/rejected": -453.0899353027344, "loss": 0.6903, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.1588778793811798, "rewards/margins": 0.08604113012552261, "rewards/rejected": -0.24491901695728302, "step": 2160 }, { "epoch": 0.14, "learning_rate": 4.973255851045769e-06, "logits/chosen": -1.4549330472946167, "logits/rejected": -1.424309253692627, "logps/chosen": -342.51593017578125, "logps/rejected": -382.6673583984375, "loss": 0.6903, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.12515771389007568, "rewards/margins": 0.07689893245697021, "rewards/rejected": -0.2020566463470459, "step": 2170 }, { "epoch": 0.14, "learning_rate": 4.972416441058915e-06, "logits/chosen": -1.2769469022750854, "logits/rejected": -1.1573234796524048, "logps/chosen": -402.8035583496094, "logps/rejected": -485.8418884277344, "loss": 0.6891, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.1730477660894394, "rewards/margins": 0.10968021303415298, "rewards/rejected": -0.2827279269695282, "step": 2180 }, { "epoch": 0.14, "learning_rate": 4.971564133736777e-06, "logits/chosen": -1.1722831726074219, "logits/rejected": -0.994546115398407, "logps/chosen": -317.39727783203125, "logps/rejected": -428.52752685546875, "loss": 0.688, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.13939666748046875, "rewards/margins": 0.10614917427301407, "rewards/rejected": -0.24554581940174103, "step": 2190 }, { "epoch": 0.14, "learning_rate": 4.970698933525409e-06, "logits/chosen": -1.7047713994979858, "logits/rejected": -1.4181147813796997, "logps/chosen": -431.55206298828125, "logps/rejected": -438.75457763671875, "loss": 0.6914, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.15220515429973602, "rewards/margins": 0.046874094754457474, "rewards/rejected": -0.1990792602300644, "step": 2200 }, { "epoch": 0.14, "eval_logits/chosen": -1.6063042879104614, "eval_logits/rejected": -1.4533063173294067, "eval_logps/chosen": -326.1747741699219, "eval_logps/rejected": -362.9125061035156, "eval_loss": 0.6902684569358826, "eval_rewards/accuracies": 0.6345000267028809, "eval_rewards/chosen": -0.09416984766721725, "eval_rewards/margins": 0.05713077262043953, "eval_rewards/rejected": -0.1513006091117859, "eval_runtime": 713.1171, "eval_samples_per_second": 2.805, "eval_steps_per_second": 1.402, "step": 2200 }, { "epoch": 0.14, "learning_rate": 4.969820844938118e-06, "logits/chosen": -1.692323088645935, "logits/rejected": -1.4323906898498535, "logps/chosen": -313.9912414550781, "logps/rejected": -315.52801513671875, "loss": 0.6902, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09138797223567963, "rewards/margins": 0.05543946474790573, "rewards/rejected": -0.14682744443416595, "step": 2210 }, { "epoch": 0.15, "learning_rate": 4.968929872555444e-06, "logits/chosen": -1.148425817489624, "logits/rejected": -1.1049809455871582, "logps/chosen": -421.64678955078125, "logps/rejected": -522.7020263671875, "loss": 0.6904, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.20451240241527557, "rewards/margins": 0.06539227813482285, "rewards/rejected": -0.2699046730995178, "step": 2220 }, { "epoch": 0.15, "learning_rate": 4.968026021025137e-06, "logits/chosen": -1.4740171432495117, "logits/rejected": -1.275011658668518, "logps/chosen": -370.6834411621094, "logps/rejected": -409.69671630859375, "loss": 0.6889, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.1691185086965561, "rewards/margins": 0.07907870411872864, "rewards/rejected": -0.24819722771644592, "step": 2230 }, { "epoch": 0.15, "learning_rate": 4.967109295062128e-06, "logits/chosen": -1.2822327613830566, "logits/rejected": -1.1900814771652222, "logps/chosen": -387.3942565917969, "logps/rejected": -473.9903869628906, "loss": 0.6914, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.16239280998706818, "rewards/margins": 0.06567515432834625, "rewards/rejected": -0.22806794941425323, "step": 2240 }, { "epoch": 0.15, "learning_rate": 4.966179699448509e-06, "logits/chosen": -1.2386096715927124, "logits/rejected": -1.0959924459457397, "logps/chosen": -346.8693542480469, "logps/rejected": -363.8343811035156, "loss": 0.6912, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.156114399433136, "rewards/margins": 0.034791115671396255, "rewards/rejected": -0.19090552628040314, "step": 2250 }, { "epoch": 0.15, "learning_rate": 4.965237239033506e-06, "logits/chosen": -1.4767388105392456, "logits/rejected": -1.2675386667251587, "logps/chosen": -446.92071533203125, "logps/rejected": -516.0152587890625, "loss": 0.6863, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.15726196765899658, "rewards/margins": 0.11387642472982407, "rewards/rejected": -0.27113842964172363, "step": 2260 }, { "epoch": 0.15, "learning_rate": 4.964281918733453e-06, "logits/chosen": -1.2942843437194824, "logits/rejected": -1.1992994546890259, "logps/chosen": -358.36810302734375, "logps/rejected": -457.5331115722656, "loss": 0.6884, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.17525368928909302, "rewards/margins": 0.10362176597118378, "rewards/rejected": -0.2788754105567932, "step": 2270 }, { "epoch": 0.15, "learning_rate": 4.9633137435317715e-06, "logits/chosen": -1.1768795251846313, "logits/rejected": -0.8136765360832214, "logps/chosen": -484.44561767578125, "logps/rejected": -525.266357421875, "loss": 0.6878, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2615506649017334, "rewards/margins": 0.10860586166381836, "rewards/rejected": -0.370156466960907, "step": 2280 }, { "epoch": 0.15, "learning_rate": 4.9623327184789355e-06, "logits/chosen": -1.1933273077011108, "logits/rejected": -1.1837798357009888, "logps/chosen": -521.596435546875, "logps/rejected": -575.0322875976562, "loss": 0.6926, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.3038559854030609, "rewards/margins": 0.058262161910533905, "rewards/rejected": -0.3621181547641754, "step": 2290 }, { "epoch": 0.15, "learning_rate": 4.9613388486924525e-06, "logits/chosen": -0.7321104407310486, "logits/rejected": -0.9151169657707214, "logps/chosen": -538.6513671875, "logps/rejected": -635.89453125, "loss": 0.6916, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.35851725935935974, "rewards/margins": 0.07986314594745636, "rewards/rejected": -0.4383804202079773, "step": 2300 }, { "epoch": 0.15, "eval_logits/chosen": -1.193457007408142, "eval_logits/rejected": -1.0524342060089111, "eval_logps/chosen": -545.2796020507812, "eval_logps/rejected": -613.0293579101562, "eval_loss": 0.6903690099716187, "eval_rewards/accuracies": 0.6449999809265137, "eval_rewards/chosen": -0.3132747411727905, "eval_rewards/margins": 0.08814278990030289, "eval_rewards/rejected": -0.4014175534248352, "eval_runtime": 714.3536, "eval_samples_per_second": 2.8, "eval_steps_per_second": 1.4, "step": 2300 }, { "epoch": 0.15, "learning_rate": 4.960332139356834e-06, "logits/chosen": -1.2785427570343018, "logits/rejected": -1.092272400856018, "logps/chosen": -472.427978515625, "logps/rejected": -542.86181640625, "loss": 0.6883, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2636111080646515, "rewards/margins": 0.09687371551990509, "rewards/rejected": -0.360484778881073, "step": 2310 }, { "epoch": 0.15, "learning_rate": 4.95931259572357e-06, "logits/chosen": -1.3524049520492554, "logits/rejected": -1.1524009704589844, "logps/chosen": -462.6844787597656, "logps/rejected": -562.32470703125, "loss": 0.6898, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.22844454646110535, "rewards/margins": 0.07396665960550308, "rewards/rejected": -0.30241116881370544, "step": 2320 }, { "epoch": 0.15, "learning_rate": 4.9582802231111e-06, "logits/chosen": -1.349764108657837, "logits/rejected": -1.385392189025879, "logps/chosen": -358.3040771484375, "logps/rejected": -393.629638671875, "loss": 0.6908, "rewards/accuracies": 0.625, "rewards/chosen": -0.14642992615699768, "rewards/margins": 0.06220410391688347, "rewards/rejected": -0.20863404870033264, "step": 2330 }, { "epoch": 0.15, "learning_rate": 4.957235026904782e-06, "logits/chosen": -1.4866364002227783, "logits/rejected": -1.245184063911438, "logps/chosen": -387.78863525390625, "logps/rejected": -390.4792175292969, "loss": 0.6903, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.1256362497806549, "rewards/margins": 0.05380575731396675, "rewards/rejected": -0.17944203317165375, "step": 2340 }, { "epoch": 0.15, "learning_rate": 4.956177012556875e-06, "logits/chosen": -1.5071487426757812, "logits/rejected": -1.2931318283081055, "logps/chosen": -406.36260986328125, "logps/rejected": -397.49951171875, "loss": 0.6889, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.16216978430747986, "rewards/margins": 0.06270495802164078, "rewards/rejected": -0.22487470507621765, "step": 2350 }, { "epoch": 0.15, "learning_rate": 4.9551061855864976e-06, "logits/chosen": -0.8723462224006653, "logits/rejected": -0.9245679974555969, "logps/chosen": -391.17108154296875, "logps/rejected": -441.70599365234375, "loss": 0.6905, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.1983429491519928, "rewards/margins": 0.04632434993982315, "rewards/rejected": -0.24466726183891296, "step": 2360 }, { "epoch": 0.16, "learning_rate": 4.95402255157961e-06, "logits/chosen": -0.9798853993415833, "logits/rejected": -0.9244794845581055, "logps/chosen": -366.5437927246094, "logps/rejected": -537.5667114257812, "loss": 0.689, "rewards/accuracies": 0.625, "rewards/chosen": -0.1814875304698944, "rewards/margins": 0.1064767986536026, "rewards/rejected": -0.2879643142223358, "step": 2370 }, { "epoch": 0.16, "learning_rate": 4.952926116188977e-06, "logits/chosen": -1.305229902267456, "logits/rejected": -1.3175709247589111, "logps/chosen": -363.5146484375, "logps/rejected": -447.1356506347656, "loss": 0.6929, "rewards/accuracies": 0.5, "rewards/chosen": -0.18013733625411987, "rewards/margins": 0.04441644623875618, "rewards/rejected": -0.22455377876758575, "step": 2380 }, { "epoch": 0.16, "learning_rate": 4.951816885134143e-06, "logits/chosen": -1.3067344427108765, "logits/rejected": -1.383279800415039, "logps/chosen": -364.8487243652344, "logps/rejected": -418.524169921875, "loss": 0.6909, "rewards/accuracies": 0.625, "rewards/chosen": -0.16329577565193176, "rewards/margins": 0.054536230862140656, "rewards/rejected": -0.21783199906349182, "step": 2390 }, { "epoch": 0.16, "learning_rate": 4.950694864201399e-06, "logits/chosen": -1.350503921508789, "logits/rejected": -1.2483956813812256, "logps/chosen": -370.8079833984375, "logps/rejected": -446.61199951171875, "loss": 0.6897, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.13212604820728302, "rewards/margins": 0.0736747682094574, "rewards/rejected": -0.20580080151557922, "step": 2400 }, { "epoch": 0.16, "eval_logits/chosen": -1.379401683807373, "eval_logits/rejected": -1.235855221748352, "eval_logps/chosen": -354.2447509765625, "eval_logps/rejected": -401.984375, "eval_loss": 0.6901373863220215, "eval_rewards/accuracies": 0.6389999985694885, "eval_rewards/chosen": -0.12223977595567703, "eval_rewards/margins": 0.06813269108533859, "eval_rewards/rejected": -0.19037246704101562, "eval_runtime": 714.0833, "eval_samples_per_second": 2.801, "eval_steps_per_second": 1.4, "step": 2400 }, { "epoch": 0.16, "learning_rate": 4.9495600592437575e-06, "logits/chosen": -1.3811991214752197, "logits/rejected": -1.3576008081436157, "logps/chosen": -401.98553466796875, "logps/rejected": -444.0850524902344, "loss": 0.6912, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.17509104311466217, "rewards/margins": 0.03891799598932266, "rewards/rejected": -0.21400907635688782, "step": 2410 }, { "epoch": 0.16, "learning_rate": 4.948412476180917e-06, "logits/chosen": -1.100079894065857, "logits/rejected": -0.9340648651123047, "logps/chosen": -325.56146240234375, "logps/rejected": -389.710693359375, "loss": 0.6902, "rewards/accuracies": 0.625, "rewards/chosen": -0.14148560166358948, "rewards/margins": 0.086885966360569, "rewards/rejected": -0.22837157547473907, "step": 2420 }, { "epoch": 0.16, "learning_rate": 4.947252120999232e-06, "logits/chosen": -1.1523898839950562, "logits/rejected": -0.9136794805526733, "logps/chosen": -423.8316345214844, "logps/rejected": -403.52197265625, "loss": 0.6924, "rewards/accuracies": 0.42500001192092896, "rewards/chosen": -0.15672679245471954, "rewards/margins": 0.03846416622400284, "rewards/rejected": -0.19519095122814178, "step": 2430 }, { "epoch": 0.16, "learning_rate": 4.946078999751683e-06, "logits/chosen": -1.1010525226593018, "logits/rejected": -0.948320209980011, "logps/chosen": -287.8900451660156, "logps/rejected": -328.5050048828125, "loss": 0.6904, "rewards/accuracies": 0.625, "rewards/chosen": -0.10826291143894196, "rewards/margins": 0.06217117980122566, "rewards/rejected": -0.1704341024160385, "step": 2440 }, { "epoch": 0.16, "learning_rate": 4.944893118557847e-06, "logits/chosen": -1.130669355392456, "logits/rejected": -1.0464719533920288, "logps/chosen": -333.6479187011719, "logps/rejected": -340.75628662109375, "loss": 0.69, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.12716332077980042, "rewards/margins": 0.060798801481723785, "rewards/rejected": -0.187962144613266, "step": 2450 }, { "epoch": 0.16, "learning_rate": 4.943694483603861e-06, "logits/chosen": -1.472876787185669, "logits/rejected": -1.113930344581604, "logps/chosen": -323.41058349609375, "logps/rejected": -335.5633850097656, "loss": 0.6899, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.0974065512418747, "rewards/margins": 0.06313179433345795, "rewards/rejected": -0.16053833067417145, "step": 2460 }, { "epoch": 0.16, "learning_rate": 4.9424831011423914e-06, "logits/chosen": -1.4405043125152588, "logits/rejected": -1.3846049308776855, "logps/chosen": -403.95758056640625, "logps/rejected": -391.89056396484375, "loss": 0.6924, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.11553101241588593, "rewards/margins": 0.024947669357061386, "rewards/rejected": -0.1404787003993988, "step": 2470 }, { "epoch": 0.16, "learning_rate": 4.9412589774926015e-06, "logits/chosen": -1.3660147190093994, "logits/rejected": -1.1039983034133911, "logps/chosen": -406.1762390136719, "logps/rejected": -425.1890563964844, "loss": 0.6907, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.13198652863502502, "rewards/margins": 0.06963631510734558, "rewards/rejected": -0.2016228437423706, "step": 2480 }, { "epoch": 0.16, "learning_rate": 4.940022119040121e-06, "logits/chosen": -1.2710121870040894, "logits/rejected": -1.1066303253173828, "logps/chosen": -425.4140625, "logps/rejected": -424.8721618652344, "loss": 0.6923, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.13349811732769012, "rewards/margins": 0.027276337146759033, "rewards/rejected": -0.16077445447444916, "step": 2490 }, { "epoch": 0.16, "learning_rate": 4.93877253223701e-06, "logits/chosen": -1.2585488557815552, "logits/rejected": -1.220293402671814, "logps/chosen": -403.06866455078125, "logps/rejected": -416.2520446777344, "loss": 0.6921, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.1107964739203453, "rewards/margins": 0.04475604370236397, "rewards/rejected": -0.15555252134799957, "step": 2500 }, { "epoch": 0.16, "eval_logits/chosen": -1.2731248140335083, "eval_logits/rejected": -1.1392936706542969, "eval_logps/chosen": -346.2454833984375, "eval_logps/rejected": -378.1649169921875, "eval_loss": 0.6903291940689087, "eval_rewards/accuracies": 0.6485000252723694, "eval_rewards/chosen": -0.11424053460359573, "eval_rewards/margins": 0.05231250822544098, "eval_rewards/rejected": -0.16655302047729492, "eval_runtime": 715.4866, "eval_samples_per_second": 2.795, "eval_steps_per_second": 1.398, "step": 2500 }, { "epoch": 0.16, "learning_rate": 4.937510223601725e-06, "logits/chosen": -1.5598738193511963, "logits/rejected": -1.5040004253387451, "logps/chosen": -357.2165222167969, "logps/rejected": -335.8756103515625, "loss": 0.6926, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.09469057619571686, "rewards/margins": 0.020870482549071312, "rewards/rejected": -0.11556105315685272, "step": 2510 }, { "epoch": 0.16, "learning_rate": 4.936235199719085e-06, "logits/chosen": -1.27707040309906, "logits/rejected": -1.1760832071304321, "logps/chosen": -290.1016845703125, "logps/rejected": -318.79022216796875, "loss": 0.6906, "rewards/accuracies": 0.625, "rewards/chosen": -0.11779968440532684, "rewards/margins": 0.05886771157383919, "rewards/rejected": -0.17666740715503693, "step": 2520 }, { "epoch": 0.17, "learning_rate": 4.93494746724024e-06, "logits/chosen": -1.3201732635498047, "logits/rejected": -1.1975219249725342, "logps/chosen": -349.1531677246094, "logps/rejected": -434.265380859375, "loss": 0.6897, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.12534867227077484, "rewards/margins": 0.06765227019786835, "rewards/rejected": -0.1930009424686432, "step": 2530 }, { "epoch": 0.17, "learning_rate": 4.933647032882635e-06, "logits/chosen": -1.3505040407180786, "logits/rejected": -1.165984869003296, "logps/chosen": -397.2581481933594, "logps/rejected": -416.6473083496094, "loss": 0.6898, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.1523962914943695, "rewards/margins": 0.06380870193243027, "rewards/rejected": -0.21620500087738037, "step": 2540 }, { "epoch": 0.17, "learning_rate": 4.932333903429969e-06, "logits/chosen": -0.8627212643623352, "logits/rejected": -0.7106344699859619, "logps/chosen": -363.2044982910156, "logps/rejected": -337.69976806640625, "loss": 0.6945, "rewards/accuracies": 0.42500001192092896, "rewards/chosen": -0.17012201249599457, "rewards/margins": 0.00030131227686069906, "rewards/rejected": -0.17042334377765656, "step": 2550 }, { "epoch": 0.17, "learning_rate": 4.931008085732172e-06, "logits/chosen": -0.9541371464729309, "logits/rejected": -0.5360308289527893, "logps/chosen": -377.06036376953125, "logps/rejected": -384.6097412109375, "loss": 0.691, "rewards/accuracies": 0.75, "rewards/chosen": -0.174237459897995, "rewards/margins": 0.05755072832107544, "rewards/rejected": -0.23178818821907043, "step": 2560 }, { "epoch": 0.17, "learning_rate": 4.9296695867053565e-06, "logits/chosen": -0.9381176233291626, "logits/rejected": -0.6313947439193726, "logps/chosen": -505.18646240234375, "logps/rejected": -498.3179626464844, "loss": 0.6912, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2045850306749344, "rewards/margins": 0.059091318398714066, "rewards/rejected": -0.26367634534835815, "step": 2570 }, { "epoch": 0.17, "learning_rate": 4.928318413331791e-06, "logits/chosen": -0.7713817358016968, "logits/rejected": -0.7979531288146973, "logps/chosen": -411.1729431152344, "logps/rejected": -445.4847106933594, "loss": 0.6919, "rewards/accuracies": 0.5, "rewards/chosen": -0.1999458223581314, "rewards/margins": 0.0502970889210701, "rewards/rejected": -0.2502428889274597, "step": 2580 }, { "epoch": 0.17, "learning_rate": 4.926954572659855e-06, "logits/chosen": -0.6779652833938599, "logits/rejected": -0.5368167161941528, "logps/chosen": -440.5191955566406, "logps/rejected": -534.2384033203125, "loss": 0.6897, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.19832351803779602, "rewards/margins": 0.07653336226940155, "rewards/rejected": -0.27485689520835876, "step": 2590 }, { "epoch": 0.17, "learning_rate": 4.925578071804013e-06, "logits/chosen": -0.46047964692115784, "logits/rejected": -0.48922720551490784, "logps/chosen": -441.5943908691406, "logps/rejected": -573.5369262695312, "loss": 0.6893, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2118886262178421, "rewards/margins": 0.07453066110610962, "rewards/rejected": -0.2864193022251129, "step": 2600 }, { "epoch": 0.17, "eval_logits/chosen": -0.7405468225479126, "eval_logits/rejected": -0.6333872079849243, "eval_logps/chosen": -444.1413879394531, "eval_logps/rejected": -493.07635498046875, "eval_loss": 0.6899484992027283, "eval_rewards/accuracies": 0.6545000076293945, "eval_rewards/chosen": -0.2121364325284958, "eval_rewards/margins": 0.06932804733514786, "eval_rewards/rejected": -0.28146445751190186, "eval_runtime": 712.5767, "eval_samples_per_second": 2.807, "eval_steps_per_second": 1.403, "step": 2600 }, { "epoch": 0.17, "learning_rate": 4.924188917944763e-06, "logits/chosen": -0.9295086860656738, "logits/rejected": -0.6540385484695435, "logps/chosen": -414.5174865722656, "logps/rejected": -501.05517578125, "loss": 0.6867, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.19451238214969635, "rewards/margins": 0.12005816400051117, "rewards/rejected": -0.3145705461502075, "step": 2610 }, { "epoch": 0.17, "learning_rate": 4.922787118328617e-06, "logits/chosen": -0.9196515083312988, "logits/rejected": -0.4873427748680115, "logps/chosen": -459.8988342285156, "logps/rejected": -429.284912109375, "loss": 0.6927, "rewards/accuracies": 0.5, "rewards/chosen": -0.22988121211528778, "rewards/margins": 0.04569891467690468, "rewards/rejected": -0.27558010816574097, "step": 2620 }, { "epoch": 0.17, "learning_rate": 4.921372680268045e-06, "logits/chosen": -0.7027789354324341, "logits/rejected": -0.7827448844909668, "logps/chosen": -487.5341796875, "logps/rejected": -478.0096740722656, "loss": 0.6937, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.25752943754196167, "rewards/margins": 0.020844275131821632, "rewards/rejected": -0.27837374806404114, "step": 2630 }, { "epoch": 0.17, "learning_rate": 4.919945611141451e-06, "logits/chosen": -1.108783483505249, "logits/rejected": -0.8095542788505554, "logps/chosen": -407.37945556640625, "logps/rejected": -403.690185546875, "loss": 0.6892, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.18214483559131622, "rewards/margins": 0.05483005568385124, "rewards/rejected": -0.23697488009929657, "step": 2640 }, { "epoch": 0.17, "learning_rate": 4.918505918393125e-06, "logits/chosen": -0.8918964266777039, "logits/rejected": -0.7974181175231934, "logps/chosen": -347.4504699707031, "logps/rejected": -446.9676208496094, "loss": 0.6887, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.17841976881027222, "rewards/margins": 0.07614380121231079, "rewards/rejected": -0.254563570022583, "step": 2650 }, { "epoch": 0.17, "learning_rate": 4.91705360953321e-06, "logits/chosen": -0.8860540390014648, "logits/rejected": -0.8829668164253235, "logps/chosen": -486.8497009277344, "logps/rejected": -531.5885009765625, "loss": 0.6889, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.24136213958263397, "rewards/margins": 0.07739405333995819, "rewards/rejected": -0.31875619292259216, "step": 2660 }, { "epoch": 0.17, "learning_rate": 4.9155886921376615e-06, "logits/chosen": -0.9232280850410461, "logits/rejected": -0.8939113616943359, "logps/chosen": -443.81170654296875, "logps/rejected": -532.4825439453125, "loss": 0.6931, "rewards/accuracies": 0.625, "rewards/chosen": -0.23784330487251282, "rewards/margins": 0.06700251996517181, "rewards/rejected": -0.3048458397388458, "step": 2670 }, { "epoch": 0.18, "learning_rate": 4.914111173848205e-06, "logits/chosen": -1.0852937698364258, "logits/rejected": -1.0277903079986572, "logps/chosen": -487.4720153808594, "logps/rejected": -509.03466796875, "loss": 0.6915, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.25444039702415466, "rewards/margins": 0.03698063641786575, "rewards/rejected": -0.2914210259914398, "step": 2680 }, { "epoch": 0.18, "learning_rate": 4.9126210623723e-06, "logits/chosen": -0.9503974914550781, "logits/rejected": -0.9779285192489624, "logps/chosen": -425.9971618652344, "logps/rejected": -529.239990234375, "loss": 0.6896, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.22562380135059357, "rewards/margins": 0.06947082281112671, "rewards/rejected": -0.2950945794582367, "step": 2690 }, { "epoch": 0.18, "learning_rate": 4.911118365483098e-06, "logits/chosen": -1.1807284355163574, "logits/rejected": -1.0885039567947388, "logps/chosen": -439.03369140625, "logps/rejected": -527.4857177734375, "loss": 0.6898, "rewards/accuracies": 0.75, "rewards/chosen": -0.2372794896364212, "rewards/margins": 0.07902587950229645, "rewards/rejected": -0.31630533933639526, "step": 2700 }, { "epoch": 0.18, "eval_logits/chosen": -1.12832510471344, "eval_logits/rejected": -0.998919665813446, "eval_logps/chosen": -451.3296203613281, "eval_logps/rejected": -502.5095520019531, "eval_loss": 0.6901895403862, "eval_rewards/accuracies": 0.6480000019073486, "eval_rewards/chosen": -0.21932466328144073, "eval_rewards/margins": 0.07157304137945175, "eval_rewards/rejected": -0.2908977270126343, "eval_runtime": 713.232, "eval_samples_per_second": 2.804, "eval_steps_per_second": 1.402, "step": 2700 }, { "epoch": 0.18, "learning_rate": 4.909603091019403e-06, "logits/chosen": -1.3059003353118896, "logits/rejected": -1.127820372581482, "logps/chosen": -425.28912353515625, "logps/rejected": -467.7945861816406, "loss": 0.692, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.18891176581382751, "rewards/margins": 0.07912580668926239, "rewards/rejected": -0.2680375576019287, "step": 2710 }, { "epoch": 0.18, "learning_rate": 4.908075246885626e-06, "logits/chosen": -1.0648815631866455, "logits/rejected": -1.033276915550232, "logps/chosen": -354.4007263183594, "logps/rejected": -371.9431457519531, "loss": 0.6921, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.20367324352264404, "rewards/margins": 0.04493376240134239, "rewards/rejected": -0.24860699474811554, "step": 2720 }, { "epoch": 0.18, "learning_rate": 4.906534841051755e-06, "logits/chosen": -0.9984892010688782, "logits/rejected": -0.9872691035270691, "logps/chosen": -434.988037109375, "logps/rejected": -493.0547790527344, "loss": 0.6901, "rewards/accuracies": 0.625, "rewards/chosen": -0.19158455729484558, "rewards/margins": 0.04703119397163391, "rewards/rejected": -0.2386157512664795, "step": 2730 }, { "epoch": 0.18, "learning_rate": 4.904981881553297e-06, "logits/chosen": -1.2664055824279785, "logits/rejected": -1.0330970287322998, "logps/chosen": -406.26007080078125, "logps/rejected": -387.2786560058594, "loss": 0.6909, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.1820637732744217, "rewards/margins": 0.04513677582144737, "rewards/rejected": -0.22720055282115936, "step": 2740 }, { "epoch": 0.18, "learning_rate": 4.903416376491252e-06, "logits/chosen": -1.3078968524932861, "logits/rejected": -1.1394526958465576, "logps/chosen": -448.9022521972656, "logps/rejected": -497.947265625, "loss": 0.6888, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.17061004042625427, "rewards/margins": 0.08963707834482193, "rewards/rejected": -0.260247141122818, "step": 2750 }, { "epoch": 0.18, "learning_rate": 4.90183833403206e-06, "logits/chosen": -1.508120059967041, "logits/rejected": -1.4299445152282715, "logps/chosen": -431.62591552734375, "logps/rejected": -478.345458984375, "loss": 0.6893, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.16741284728050232, "rewards/margins": 0.08297250419855118, "rewards/rejected": -0.2503853440284729, "step": 2760 }, { "epoch": 0.18, "learning_rate": 4.900247762407564e-06, "logits/chosen": -1.1569961309432983, "logits/rejected": -1.1519848108291626, "logps/chosen": -337.0670471191406, "logps/rejected": -452.12872314453125, "loss": 0.6879, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.1647750735282898, "rewards/margins": 0.09301907569169998, "rewards/rejected": -0.2577941417694092, "step": 2770 }, { "epoch": 0.18, "learning_rate": 4.898644669914965e-06, "logits/chosen": -1.2003084421157837, "logits/rejected": -1.125847578048706, "logps/chosen": -412.99810791015625, "logps/rejected": -477.783203125, "loss": 0.6907, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.18892566859722137, "rewards/margins": 0.07861106842756271, "rewards/rejected": -0.2675367295742035, "step": 2780 }, { "epoch": 0.18, "learning_rate": 4.897029064916778e-06, "logits/chosen": -0.9239814877510071, "logits/rejected": -0.8800607919692993, "logps/chosen": -387.64642333984375, "logps/rejected": -427.87005615234375, "loss": 0.6918, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.17949643731117249, "rewards/margins": 0.05261252075433731, "rewards/rejected": -0.2321089804172516, "step": 2790 }, { "epoch": 0.18, "learning_rate": 4.895400955840791e-06, "logits/chosen": -1.4370605945587158, "logits/rejected": -0.869090735912323, "logps/chosen": -342.58966064453125, "logps/rejected": -373.0824279785156, "loss": 0.6905, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.12223835289478302, "rewards/margins": 0.07219056785106659, "rewards/rejected": -0.1944289207458496, "step": 2800 }, { "epoch": 0.18, "eval_logits/chosen": -1.2887914180755615, "eval_logits/rejected": -1.1516715288162231, "eval_logps/chosen": -361.0450134277344, "eval_logps/rejected": -403.0553894042969, "eval_loss": 0.6901711225509644, "eval_rewards/accuracies": 0.6460000276565552, "eval_rewards/chosen": -0.12904000282287598, "eval_rewards/margins": 0.06240350008010864, "eval_rewards/rejected": -0.19144350290298462, "eval_runtime": 712.0065, "eval_samples_per_second": 2.809, "eval_steps_per_second": 1.404, "step": 2800 }, { "epoch": 0.18, "learning_rate": 4.893760351180018e-06, "logits/chosen": -1.2118985652923584, "logits/rejected": -1.2449872493743896, "logps/chosen": -342.95660400390625, "logps/rejected": -396.58294677734375, "loss": 0.6902, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.14441534876823425, "rewards/margins": 0.0485253669321537, "rewards/rejected": -0.19294071197509766, "step": 2810 }, { "epoch": 0.18, "learning_rate": 4.892107259492657e-06, "logits/chosen": -1.1593921184539795, "logits/rejected": -1.0327198505401611, "logps/chosen": -364.427001953125, "logps/rejected": -413.22467041015625, "loss": 0.6917, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.1245676726102829, "rewards/margins": 0.04277877137064934, "rewards/rejected": -0.16734644770622253, "step": 2820 }, { "epoch": 0.19, "learning_rate": 4.890441689402042e-06, "logits/chosen": -1.522952914237976, "logits/rejected": -1.2996022701263428, "logps/chosen": -446.1918029785156, "logps/rejected": -485.85943603515625, "loss": 0.6898, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.11068687587976456, "rewards/margins": 0.08766763657331467, "rewards/rejected": -0.19835449755191803, "step": 2830 }, { "epoch": 0.19, "learning_rate": 4.888763649596606e-06, "logits/chosen": -1.4260321855545044, "logits/rejected": -1.3132002353668213, "logps/chosen": -320.7117919921875, "logps/rejected": -362.90374755859375, "loss": 0.6906, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.11055009067058563, "rewards/margins": 0.050993360579013824, "rewards/rejected": -0.16154345870018005, "step": 2840 }, { "epoch": 0.19, "learning_rate": 4.887073148829824e-06, "logits/chosen": -1.2011626958847046, "logits/rejected": -1.1460561752319336, "logps/chosen": -384.56658935546875, "logps/rejected": -437.68212890625, "loss": 0.6896, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.11766302585601807, "rewards/margins": 0.07605113089084625, "rewards/rejected": -0.1937141716480255, "step": 2850 }, { "epoch": 0.19, "learning_rate": 4.885370195920177e-06, "logits/chosen": -0.9891164898872375, "logits/rejected": -0.9282897710800171, "logps/chosen": -330.49169921875, "logps/rejected": -383.62677001953125, "loss": 0.6918, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.1451679766178131, "rewards/margins": 0.06170827895402908, "rewards/rejected": -0.20687627792358398, "step": 2860 }, { "epoch": 0.19, "learning_rate": 4.883654799751101e-06, "logits/chosen": -1.0076591968536377, "logits/rejected": -0.9268460273742676, "logps/chosen": -313.0103759765625, "logps/rejected": -402.40716552734375, "loss": 0.6913, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.09730122238397598, "rewards/margins": 0.05465664714574814, "rewards/rejected": -0.15195786952972412, "step": 2870 }, { "epoch": 0.19, "learning_rate": 4.8819269692709435e-06, "logits/chosen": -1.1856772899627686, "logits/rejected": -0.9097310304641724, "logps/chosen": -355.238525390625, "logps/rejected": -365.5335693359375, "loss": 0.6899, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.08819227665662766, "rewards/margins": 0.08265722543001175, "rewards/rejected": -0.1708495169878006, "step": 2880 }, { "epoch": 0.19, "learning_rate": 4.880186713492915e-06, "logits/chosen": -1.0931060314178467, "logits/rejected": -0.9322888255119324, "logps/chosen": -365.28948974609375, "logps/rejected": -362.249755859375, "loss": 0.6904, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.13757213950157166, "rewards/margins": 0.050582222640514374, "rewards/rejected": -0.18815436959266663, "step": 2890 }, { "epoch": 0.19, "learning_rate": 4.878434041495041e-06, "logits/chosen": -1.1420027017593384, "logits/rejected": -1.2647812366485596, "logps/chosen": -358.3078308105469, "logps/rejected": -443.99798583984375, "loss": 0.6888, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.12114207446575165, "rewards/margins": 0.0838278979063034, "rewards/rejected": -0.20496997237205505, "step": 2900 }, { "epoch": 0.19, "eval_logits/chosen": -0.9719996452331543, "eval_logits/rejected": -0.8515611886978149, "eval_logps/chosen": -368.5532531738281, "eval_logps/rejected": -425.2482604980469, "eval_loss": 0.6900655627250671, "eval_rewards/accuracies": 0.6520000100135803, "eval_rewards/chosen": -0.13654829561710358, "eval_rewards/margins": 0.07708805054426193, "eval_rewards/rejected": -0.2136363536119461, "eval_runtime": 711.4859, "eval_samples_per_second": 2.811, "eval_steps_per_second": 1.406, "step": 2900 }, { "epoch": 0.19, "learning_rate": 4.876668962420117e-06, "logits/chosen": -0.9826984405517578, "logits/rejected": -0.8262590169906616, "logps/chosen": -399.0771484375, "logps/rejected": -415.10009765625, "loss": 0.6914, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.1094260960817337, "rewards/margins": 0.07563906162977219, "rewards/rejected": -0.1850651651620865, "step": 2910 }, { "epoch": 0.19, "learning_rate": 4.87489148547566e-06, "logits/chosen": -1.1586592197418213, "logits/rejected": -1.0520836114883423, "logps/chosen": -397.3544616699219, "logps/rejected": -410.04541015625, "loss": 0.6926, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.1374959796667099, "rewards/margins": 0.04586387053132057, "rewards/rejected": -0.18335983157157898, "step": 2920 }, { "epoch": 0.19, "learning_rate": 4.873101619933862e-06, "logits/chosen": -1.4020602703094482, "logits/rejected": -1.0666966438293457, "logps/chosen": -361.32073974609375, "logps/rejected": -391.4079284667969, "loss": 0.689, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.09679357707500458, "rewards/margins": 0.07940280437469482, "rewards/rejected": -0.1761963665485382, "step": 2930 }, { "epoch": 0.19, "learning_rate": 4.8712993751315385e-06, "logits/chosen": -1.1208691596984863, "logits/rejected": -1.032873272895813, "logps/chosen": -208.27444458007812, "logps/rejected": -259.03436279296875, "loss": 0.691, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.08761651068925858, "rewards/margins": 0.0468580424785614, "rewards/rejected": -0.13447454571723938, "step": 2940 }, { "epoch": 0.19, "learning_rate": 4.869484760470079e-06, "logits/chosen": -1.0466829538345337, "logits/rejected": -0.8319103121757507, "logps/chosen": -310.87298583984375, "logps/rejected": -347.3447265625, "loss": 0.6885, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.12221217155456543, "rewards/margins": 0.0727708712220192, "rewards/rejected": -0.19498305022716522, "step": 2950 }, { "epoch": 0.19, "learning_rate": 4.867657785415404e-06, "logits/chosen": -0.8036662936210632, "logits/rejected": -0.6130042672157288, "logps/chosen": -431.5562438964844, "logps/rejected": -459.91583251953125, "loss": 0.6894, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.18245726823806763, "rewards/margins": 0.0701041966676712, "rewards/rejected": -0.25256145000457764, "step": 2960 }, { "epoch": 0.19, "learning_rate": 4.865818459497911e-06, "logits/chosen": -0.666034996509552, "logits/rejected": -0.7290282249450684, "logps/chosen": -470.75860595703125, "logps/rejected": -461.13311767578125, "loss": 0.6888, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.18657991290092468, "rewards/margins": 0.07517173141241074, "rewards/rejected": -0.261751651763916, "step": 2970 }, { "epoch": 0.19, "learning_rate": 4.863966792312423e-06, "logits/chosen": -0.9364269375801086, "logits/rejected": -0.42835959792137146, "logps/chosen": -459.9546813964844, "logps/rejected": -544.4393920898438, "loss": 0.6891, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.21849580109119415, "rewards/margins": 0.12553612887859344, "rewards/rejected": -0.34403195977211, "step": 2980 }, { "epoch": 0.2, "learning_rate": 4.862102793518145e-06, "logits/chosen": -0.8742693662643433, "logits/rejected": -0.9743921160697937, "logps/chosen": -412.43212890625, "logps/rejected": -491.0814514160156, "loss": 0.6886, "rewards/accuracies": 0.625, "rewards/chosen": -0.2181502878665924, "rewards/margins": 0.0717243179678917, "rewards/rejected": -0.2898745834827423, "step": 2990 }, { "epoch": 0.2, "learning_rate": 4.8602264728386075e-06, "logits/chosen": -1.0399912595748901, "logits/rejected": -0.9057199358940125, "logps/chosen": -414.02679443359375, "logps/rejected": -484.69976806640625, "loss": 0.6906, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.16152727603912354, "rewards/margins": 0.07409163564443588, "rewards/rejected": -0.23561891913414001, "step": 3000 }, { "epoch": 0.2, "eval_logits/chosen": -0.8704602122306824, "eval_logits/rejected": -0.7523858547210693, "eval_logps/chosen": -415.2958984375, "eval_logps/rejected": -477.5717468261719, "eval_loss": 0.6900380849838257, "eval_rewards/accuracies": 0.6449999809265137, "eval_rewards/chosen": -0.1832909733057022, "eval_rewards/margins": 0.08266889303922653, "eval_rewards/rejected": -0.26595985889434814, "eval_runtime": 711.8755, "eval_samples_per_second": 2.809, "eval_steps_per_second": 1.405, "step": 3000 }, { "epoch": 0.2, "learning_rate": 4.858337840061616e-06, "logits/chosen": -0.7448334693908691, "logits/rejected": -0.8301981687545776, "logps/chosen": -357.12591552734375, "logps/rejected": -477.7318420410156, "loss": 0.6911, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.174045592546463, "rewards/margins": 0.06695671379566193, "rewards/rejected": -0.24100229144096375, "step": 3010 }, { "epoch": 0.2, "learning_rate": 4.856436905039208e-06, "logits/chosen": -1.0125486850738525, "logits/rejected": -0.8003193140029907, "logps/chosen": -378.47552490234375, "logps/rejected": -423.501220703125, "loss": 0.6892, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1672704517841339, "rewards/margins": 0.07941305637359619, "rewards/rejected": -0.2466835230588913, "step": 3020 }, { "epoch": 0.2, "learning_rate": 4.854523677687588e-06, "logits/chosen": -0.8735455274581909, "logits/rejected": -1.0956079959869385, "logps/chosen": -328.8739318847656, "logps/rejected": -404.0391540527344, "loss": 0.6891, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.14762839674949646, "rewards/margins": 0.05676870793104172, "rewards/rejected": -0.2043970823287964, "step": 3030 }, { "epoch": 0.2, "learning_rate": 4.85259816798709e-06, "logits/chosen": -1.2449532747268677, "logits/rejected": -0.9089319109916687, "logps/chosen": -387.965576171875, "logps/rejected": -398.33770751953125, "loss": 0.6902, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.10020353645086288, "rewards/margins": 0.09092805534601212, "rewards/rejected": -0.191131591796875, "step": 3040 }, { "epoch": 0.2, "learning_rate": 4.850660385982114e-06, "logits/chosen": -1.067596197128296, "logits/rejected": -0.9061563611030579, "logps/chosen": -339.7103576660156, "logps/rejected": -340.0997009277344, "loss": 0.6885, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.09199124574661255, "rewards/margins": 0.055670641362667084, "rewards/rejected": -0.14766189455986023, "step": 3050 }, { "epoch": 0.2, "learning_rate": 4.848710341781081e-06, "logits/chosen": -0.37353700399398804, "logits/rejected": -0.4667787551879883, "logps/chosen": -460.0509338378906, "logps/rejected": -516.2349853515625, "loss": 0.6922, "rewards/accuracies": 0.625, "rewards/chosen": -0.28614938259124756, "rewards/margins": 0.06191081553697586, "rewards/rejected": -0.3480601906776428, "step": 3060 }, { "epoch": 0.2, "learning_rate": 4.846748045556377e-06, "logits/chosen": -0.03157268464565277, "logits/rejected": 0.024191658943891525, "logps/chosen": -531.4178466796875, "logps/rejected": -538.5341186523438, "loss": 0.6913, "rewards/accuracies": 0.625, "rewards/chosen": -0.29193076491355896, "rewards/margins": 0.06762724369764328, "rewards/rejected": -0.35955798625946045, "step": 3070 }, { "epoch": 0.2, "learning_rate": 4.8447735075442995e-06, "logits/chosen": -0.2942689061164856, "logits/rejected": 0.010750794783234596, "logps/chosen": -462.2196350097656, "logps/rejected": -552.3308715820312, "loss": 0.6905, "rewards/accuracies": 0.75, "rewards/chosen": -0.2646574079990387, "rewards/margins": 0.09351170063018799, "rewards/rejected": -0.3581691086292267, "step": 3080 }, { "epoch": 0.2, "learning_rate": 4.8427867380450075e-06, "logits/chosen": -0.5210511684417725, "logits/rejected": -0.16920626163482666, "logps/chosen": -449.54473876953125, "logps/rejected": -469.4061584472656, "loss": 0.6892, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.21920542418956757, "rewards/margins": 0.06611128151416779, "rewards/rejected": -0.28531667590141296, "step": 3090 }, { "epoch": 0.2, "learning_rate": 4.840787747422462e-06, "logits/chosen": -0.5311521291732788, "logits/rejected": -0.4888841211795807, "logps/chosen": -417.81982421875, "logps/rejected": -453.7882385253906, "loss": 0.6921, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.22216370701789856, "rewards/margins": 0.06622599065303802, "rewards/rejected": -0.2883896827697754, "step": 3100 }, { "epoch": 0.2, "eval_logits/chosen": -0.49691787362098694, "eval_logits/rejected": -0.4020865261554718, "eval_logps/chosen": -462.8433837890625, "eval_logps/rejected": -519.7990112304688, "eval_loss": 0.6900349855422974, "eval_rewards/accuracies": 0.6549999713897705, "eval_rewards/chosen": -0.23083838820457458, "eval_rewards/margins": 0.0773487463593483, "eval_rewards/rejected": -0.3081871569156647, "eval_runtime": 714.5458, "eval_samples_per_second": 2.799, "eval_steps_per_second": 1.399, "step": 3100 }, { "epoch": 0.2, "learning_rate": 4.838776546104378e-06, "logits/chosen": -0.7088804841041565, "logits/rejected": -0.4187033772468567, "logps/chosen": -511.6211853027344, "logps/rejected": -560.3056640625, "loss": 0.6886, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.23066799342632294, "rewards/margins": 0.0890863910317421, "rewards/rejected": -0.31975439190864563, "step": 3110 }, { "epoch": 0.2, "learning_rate": 4.836753144582168e-06, "logits/chosen": -0.5761700868606567, "logits/rejected": -0.046261269599199295, "logps/chosen": -481.9817810058594, "logps/rejected": -563.0371704101562, "loss": 0.6887, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2397502362728119, "rewards/margins": 0.10516796261072159, "rewards/rejected": -0.34491822123527527, "step": 3120 }, { "epoch": 0.2, "learning_rate": 4.834717553410884e-06, "logits/chosen": -0.7033271789550781, "logits/rejected": -0.7667158842086792, "logps/chosen": -377.99761962890625, "logps/rejected": -490.375, "loss": 0.6898, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.1887216866016388, "rewards/margins": 0.0987081527709961, "rewards/rejected": -0.2874298691749573, "step": 3130 }, { "epoch": 0.21, "learning_rate": 4.832669783209167e-06, "logits/chosen": -0.44873374700546265, "logits/rejected": -0.6905600428581238, "logps/chosen": -442.02532958984375, "logps/rejected": -466.65301513671875, "loss": 0.693, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.19759933650493622, "rewards/margins": 0.02407877705991268, "rewards/rejected": -0.22167813777923584, "step": 3140 }, { "epoch": 0.21, "learning_rate": 4.8306098446591895e-06, "logits/chosen": -0.1389220654964447, "logits/rejected": -0.14139506220817566, "logps/chosen": -340.37078857421875, "logps/rejected": -415.58685302734375, "loss": 0.6906, "rewards/accuracies": 0.75, "rewards/chosen": -0.16410192847251892, "rewards/margins": 0.05028475075960159, "rewards/rejected": -0.21438665688037872, "step": 3150 }, { "epoch": 0.21, "learning_rate": 4.828537748506601e-06, "logits/chosen": -1.0547568798065186, "logits/rejected": -0.8391033411026001, "logps/chosen": -417.04150390625, "logps/rejected": -402.0226745605469, "loss": 0.6925, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.1436433047056198, "rewards/margins": 0.041227348148822784, "rewards/rejected": -0.184870645403862, "step": 3160 }, { "epoch": 0.21, "learning_rate": 4.826453505560469e-06, "logits/chosen": -0.6873368620872498, "logits/rejected": -0.6330714225769043, "logps/chosen": -317.03424072265625, "logps/rejected": -344.03424072265625, "loss": 0.6913, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.12605510652065277, "rewards/margins": 0.040696583688259125, "rewards/rejected": -0.1667516976594925, "step": 3170 }, { "epoch": 0.21, "learning_rate": 4.824357126693226e-06, "logits/chosen": -0.6209930777549744, "logits/rejected": -0.6906192898750305, "logps/chosen": -366.21697998046875, "logps/rejected": -356.8144836425781, "loss": 0.6915, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.10747381299734116, "rewards/margins": 0.039324913173913956, "rewards/rejected": -0.1467987298965454, "step": 3180 }, { "epoch": 0.21, "learning_rate": 4.8222486228406105e-06, "logits/chosen": -1.205514669418335, "logits/rejected": -0.9653299450874329, "logps/chosen": -301.0349426269531, "logps/rejected": -328.4139404296875, "loss": 0.6907, "rewards/accuracies": 0.75, "rewards/chosen": -0.0897945761680603, "rewards/margins": 0.062060046941041946, "rewards/rejected": -0.15185460448265076, "step": 3190 }, { "epoch": 0.21, "learning_rate": 4.820128005001612e-06, "logits/chosen": -0.8773876428604126, "logits/rejected": -0.741000771522522, "logps/chosen": -280.5494384765625, "logps/rejected": -370.9437561035156, "loss": 0.6867, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.06155586987733841, "rewards/margins": 0.11193714290857315, "rewards/rejected": -0.17349299788475037, "step": 3200 }, { "epoch": 0.21, "eval_logits/chosen": -1.090376615524292, "eval_logits/rejected": -0.9624568819999695, "eval_logps/chosen": -319.77587890625, "eval_logps/rejected": -371.4648132324219, "eval_loss": 0.6899227499961853, "eval_rewards/accuracies": 0.6620000004768372, "eval_rewards/chosen": -0.08777090907096863, "eval_rewards/margins": 0.07208200544118881, "eval_rewards/rejected": -0.15985292196273804, "eval_runtime": 711.7487, "eval_samples_per_second": 2.81, "eval_steps_per_second": 1.405, "step": 3200 }, { "epoch": 0.21, "learning_rate": 4.817995284238412e-06, "logits/chosen": -1.0149776935577393, "logits/rejected": -1.03653883934021, "logps/chosen": -287.68914794921875, "logps/rejected": -399.0724182128906, "loss": 0.689, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09116478264331818, "rewards/margins": 0.08389847725629807, "rewards/rejected": -0.17506323754787445, "step": 3210 }, { "epoch": 0.21, "learning_rate": 4.815850471676327e-06, "logits/chosen": -1.289499044418335, "logits/rejected": -0.8683086633682251, "logps/chosen": -321.6259460449219, "logps/rejected": -422.97540283203125, "loss": 0.6879, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.08321196585893631, "rewards/margins": 0.10559669882059097, "rewards/rejected": -0.1888086497783661, "step": 3220 }, { "epoch": 0.21, "learning_rate": 4.813693578503751e-06, "logits/chosen": -0.9852520823478699, "logits/rejected": -0.8063098192214966, "logps/chosen": -381.07916259765625, "logps/rejected": -408.11138916015625, "loss": 0.6905, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.08457235991954803, "rewards/margins": 0.08248989284038544, "rewards/rejected": -0.16706225275993347, "step": 3230 }, { "epoch": 0.21, "learning_rate": 4.811524615972093e-06, "logits/chosen": -0.9069948196411133, "logits/rejected": -1.0173349380493164, "logps/chosen": -335.2598571777344, "logps/rejected": -427.4632873535156, "loss": 0.6888, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.10532426834106445, "rewards/margins": 0.08400104939937592, "rewards/rejected": -0.18932530283927917, "step": 3240 }, { "epoch": 0.21, "learning_rate": 4.809343595395724e-06, "logits/chosen": -1.669086217880249, "logits/rejected": -1.3308725357055664, "logps/chosen": -278.1128845214844, "logps/rejected": -286.84002685546875, "loss": 0.6909, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.08905723690986633, "rewards/margins": 0.039315879344940186, "rewards/rejected": -0.12837311625480652, "step": 3250 }, { "epoch": 0.21, "learning_rate": 4.807150528151918e-06, "logits/chosen": -1.1300930976867676, "logits/rejected": -1.0958597660064697, "logps/chosen": -238.3439178466797, "logps/rejected": -335.4567565917969, "loss": 0.6892, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.07165135443210602, "rewards/margins": 0.07924596965312958, "rewards/rejected": -0.1508973240852356, "step": 3260 }, { "epoch": 0.21, "learning_rate": 4.804945425680787e-06, "logits/chosen": -1.1141166687011719, "logits/rejected": -1.0306416749954224, "logps/chosen": -273.11968994140625, "logps/rejected": -295.0377502441406, "loss": 0.6921, "rewards/accuracies": 0.625, "rewards/chosen": -0.08467648923397064, "rewards/margins": 0.04199628531932831, "rewards/rejected": -0.12667277455329895, "step": 3270 }, { "epoch": 0.21, "learning_rate": 4.802728299485225e-06, "logits/chosen": -0.7322720289230347, "logits/rejected": -0.6539539098739624, "logps/chosen": -266.72052001953125, "logps/rejected": -341.3778381347656, "loss": 0.6899, "rewards/accuracies": 0.5, "rewards/chosen": -0.11611612141132355, "rewards/margins": 0.053125642240047455, "rewards/rejected": -0.1692417562007904, "step": 3280 }, { "epoch": 0.22, "learning_rate": 4.8004991611308495e-06, "logits/chosen": -0.854507565498352, "logits/rejected": -0.7687502503395081, "logps/chosen": -337.65826416015625, "logps/rejected": -403.5023193359375, "loss": 0.6889, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.09610452502965927, "rewards/margins": 0.08189422637224197, "rewards/rejected": -0.17799875140190125, "step": 3290 }, { "epoch": 0.22, "learning_rate": 4.798258022245937e-06, "logits/chosen": -0.7789617776870728, "logits/rejected": -0.6801769733428955, "logps/chosen": -334.1788024902344, "logps/rejected": -364.3035888671875, "loss": 0.691, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.11657233536243439, "rewards/margins": 0.06858987361192703, "rewards/rejected": -0.18516221642494202, "step": 3300 }, { "epoch": 0.22, "eval_logits/chosen": -0.7451701164245605, "eval_logits/rejected": -0.6384189128875732, "eval_logps/chosen": -352.8772888183594, "eval_logps/rejected": -411.245361328125, "eval_loss": 0.6898645162582397, "eval_rewards/accuracies": 0.6470000147819519, "eval_rewards/chosen": -0.12087231874465942, "eval_rewards/margins": 0.07876119017601013, "eval_rewards/rejected": -0.19963350892066956, "eval_runtime": 711.1738, "eval_samples_per_second": 2.812, "eval_steps_per_second": 1.406, "step": 3300 }, { "epoch": 0.22, "learning_rate": 4.796004894521365e-06, "logits/chosen": -0.9636430740356445, "logits/rejected": -0.5158362984657288, "logps/chosen": -345.48443603515625, "logps/rejected": -448.40533447265625, "loss": 0.6899, "rewards/accuracies": 0.5, "rewards/chosen": -0.11691470444202423, "rewards/margins": 0.08166440576314926, "rewards/rejected": -0.1985791027545929, "step": 3310 }, { "epoch": 0.22, "learning_rate": 4.7937397897105545e-06, "logits/chosen": -0.7896434664726257, "logits/rejected": -0.6752771139144897, "logps/chosen": -333.14385986328125, "logps/rejected": -341.8313293457031, "loss": 0.6926, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.1279110163450241, "rewards/margins": 0.03390089422464371, "rewards/rejected": -0.1618119180202484, "step": 3320 }, { "epoch": 0.22, "learning_rate": 4.791462719629399e-06, "logits/chosen": -0.6939797401428223, "logits/rejected": -0.6913006901741028, "logps/chosen": -299.4644470214844, "logps/rejected": -361.3075256347656, "loss": 0.6889, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.11373928934335709, "rewards/margins": 0.08425451815128326, "rewards/rejected": -0.19799381494522095, "step": 3330 }, { "epoch": 0.22, "learning_rate": 4.789173696156212e-06, "logits/chosen": -1.0129543542861938, "logits/rejected": -0.723541259765625, "logps/chosen": -400.4210510253906, "logps/rejected": -522.765869140625, "loss": 0.687, "rewards/accuracies": 0.875, "rewards/chosen": -0.12398044764995575, "rewards/margins": 0.14099428057670593, "rewards/rejected": -0.26497477293014526, "step": 3340 }, { "epoch": 0.22, "learning_rate": 4.786872731231662e-06, "logits/chosen": -1.2101221084594727, "logits/rejected": -1.0943197011947632, "logps/chosen": -357.49139404296875, "logps/rejected": -421.72308349609375, "loss": 0.6911, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.14060047268867493, "rewards/margins": 0.07560074329376221, "rewards/rejected": -0.21620123088359833, "step": 3350 }, { "epoch": 0.22, "learning_rate": 4.784559836858709e-06, "logits/chosen": -0.8804407119750977, "logits/rejected": -0.7715723514556885, "logps/chosen": -346.12615966796875, "logps/rejected": -393.340087890625, "loss": 0.6904, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.11220908164978027, "rewards/margins": 0.07763482630252838, "rewards/rejected": -0.18984392285346985, "step": 3360 }, { "epoch": 0.22, "learning_rate": 4.782235025102542e-06, "logits/chosen": -0.9902753829956055, "logits/rejected": -0.9379558563232422, "logps/chosen": -355.44134521484375, "logps/rejected": -412.8868103027344, "loss": 0.6902, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.1291220337152481, "rewards/margins": 0.07492605596780777, "rewards/rejected": -0.20404811203479767, "step": 3370 }, { "epoch": 0.22, "learning_rate": 4.779898308090519e-06, "logits/chosen": -1.0589594841003418, "logits/rejected": -0.8376196622848511, "logps/chosen": -397.018310546875, "logps/rejected": -441.5438537597656, "loss": 0.6879, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.12234105169773102, "rewards/margins": 0.08135628700256348, "rewards/rejected": -0.2036973237991333, "step": 3380 }, { "epoch": 0.22, "learning_rate": 4.777549698012101e-06, "logits/chosen": -0.8556244969367981, "logits/rejected": -0.8381510972976685, "logps/chosen": -415.40826416015625, "logps/rejected": -501.6957092285156, "loss": 0.6898, "rewards/accuracies": 0.75, "rewards/chosen": -0.17219164967536926, "rewards/margins": 0.10544709861278534, "rewards/rejected": -0.2776387333869934, "step": 3390 }, { "epoch": 0.22, "learning_rate": 4.775189207118787e-06, "logits/chosen": -0.8066427111625671, "logits/rejected": -0.7407088875770569, "logps/chosen": -424.0018615722656, "logps/rejected": -482.9774475097656, "loss": 0.6903, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.15218417346477509, "rewards/margins": 0.08131326735019684, "rewards/rejected": -0.2334974706172943, "step": 3400 }, { "epoch": 0.22, "eval_logits/chosen": -0.9076970219612122, "eval_logits/rejected": -0.7890629768371582, "eval_logps/chosen": -387.86065673828125, "eval_logps/rejected": -448.2270812988281, "eval_loss": 0.6898848414421082, "eval_rewards/accuracies": 0.652999997138977, "eval_rewards/chosen": -0.1558557152748108, "eval_rewards/margins": 0.08075951784849167, "eval_rewards/rejected": -0.23661524057388306, "eval_runtime": 710.9356, "eval_samples_per_second": 2.813, "eval_steps_per_second": 1.407, "step": 3400 }, { "epoch": 0.22, "learning_rate": 4.772816847724054e-06, "logits/chosen": -0.9111618995666504, "logits/rejected": -1.0753004550933838, "logps/chosen": -363.90765380859375, "logps/rejected": -413.2244567871094, "loss": 0.6912, "rewards/accuracies": 0.5, "rewards/chosen": -0.14135311543941498, "rewards/margins": 0.04981910064816475, "rewards/rejected": -0.19117221236228943, "step": 3410 }, { "epoch": 0.22, "learning_rate": 4.770432632203294e-06, "logits/chosen": -0.5220621824264526, "logits/rejected": -0.48986703157424927, "logps/chosen": -373.3628845214844, "logps/rejected": -369.2757873535156, "loss": 0.6916, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.12971577048301697, "rewards/margins": 0.046231161803007126, "rewards/rejected": -0.17594695091247559, "step": 3420 }, { "epoch": 0.22, "learning_rate": 4.768036572993738e-06, "logits/chosen": -0.9792502522468567, "logits/rejected": -0.7328594923019409, "logps/chosen": -421.8783264160156, "logps/rejected": -474.849609375, "loss": 0.6893, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.14185933768749237, "rewards/margins": 0.06857124716043472, "rewards/rejected": -0.2104305773973465, "step": 3430 }, { "epoch": 0.23, "learning_rate": 4.765628682594409e-06, "logits/chosen": -0.96644127368927, "logits/rejected": -0.868308424949646, "logps/chosen": -350.9528503417969, "logps/rejected": -403.69879150390625, "loss": 0.6884, "rewards/accuracies": 0.625, "rewards/chosen": -0.10719581693410873, "rewards/margins": 0.07664835453033447, "rewards/rejected": -0.1838441640138626, "step": 3440 }, { "epoch": 0.23, "learning_rate": 4.763208973566041e-06, "logits/chosen": -0.9829349517822266, "logits/rejected": -0.7497692704200745, "logps/chosen": -293.51007080078125, "logps/rejected": -391.0221252441406, "loss": 0.69, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.10938040167093277, "rewards/margins": 0.07879676669836044, "rewards/rejected": -0.1881771832704544, "step": 3450 }, { "epoch": 0.23, "learning_rate": 4.76077745853102e-06, "logits/chosen": -1.1481579542160034, "logits/rejected": -1.1825287342071533, "logps/chosen": -374.2550048828125, "logps/rejected": -447.8692932128906, "loss": 0.6915, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.12420084327459335, "rewards/margins": 0.0724891722202301, "rewards/rejected": -0.19669003784656525, "step": 3460 }, { "epoch": 0.23, "learning_rate": 4.758334150173322e-06, "logits/chosen": -0.9127210378646851, "logits/rejected": -0.8635384440422058, "logps/chosen": -348.4943542480469, "logps/rejected": -384.9615173339844, "loss": 0.6919, "rewards/accuracies": 0.75, "rewards/chosen": -0.08541040867567062, "rewards/margins": 0.061520766466856, "rewards/rejected": -0.14693120121955872, "step": 3470 }, { "epoch": 0.23, "learning_rate": 4.755879061238439e-06, "logits/chosen": -1.1692310571670532, "logits/rejected": -0.9778891801834106, "logps/chosen": -360.33612060546875, "logps/rejected": -395.7692565917969, "loss": 0.6908, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.1041627898812294, "rewards/margins": 0.04807025566697121, "rewards/rejected": -0.1522330492734909, "step": 3480 }, { "epoch": 0.23, "learning_rate": 4.753412204533317e-06, "logits/chosen": -1.30173921585083, "logits/rejected": -0.767593502998352, "logps/chosen": -347.20074462890625, "logps/rejected": -391.4964904785156, "loss": 0.6887, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.08434270322322845, "rewards/margins": 0.08968711644411087, "rewards/rejected": -0.17402981221675873, "step": 3490 }, { "epoch": 0.23, "learning_rate": 4.750933592926292e-06, "logits/chosen": -1.0767269134521484, "logits/rejected": -0.7587383389472961, "logps/chosen": -338.02410888671875, "logps/rejected": -391.2127990722656, "loss": 0.6899, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.11929644644260406, "rewards/margins": 0.0804051011800766, "rewards/rejected": -0.19970154762268066, "step": 3500 }, { "epoch": 0.23, "eval_logits/chosen": -0.7943944334983826, "eval_logits/rejected": -0.6854715943336487, "eval_logps/chosen": -357.3798828125, "eval_logps/rejected": -413.9255676269531, "eval_loss": 0.6898289918899536, "eval_rewards/accuracies": 0.6514999866485596, "eval_rewards/chosen": -0.125374898314476, "eval_rewards/margins": 0.07693876326084137, "eval_rewards/rejected": -0.20231369137763977, "eval_runtime": 711.4904, "eval_samples_per_second": 2.811, "eval_steps_per_second": 1.406, "step": 3500 }, { "epoch": 0.23, "learning_rate": 4.7484432393470124e-06, "logits/chosen": -0.9757513999938965, "logits/rejected": -0.49566301703453064, "logps/chosen": -328.42205810546875, "logps/rejected": -394.03460693359375, "loss": 0.6838, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.1263401061296463, "rewards/margins": 0.12100942432880402, "rewards/rejected": -0.2473495453596115, "step": 3510 }, { "epoch": 0.23, "learning_rate": 4.745941156786385e-06, "logits/chosen": -0.3304889500141144, "logits/rejected": -0.5262193083763123, "logps/chosen": -322.601806640625, "logps/rejected": -498.60833740234375, "loss": 0.6845, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1695626974105835, "rewards/margins": 0.1451653391122818, "rewards/rejected": -0.3147280812263489, "step": 3520 }, { "epoch": 0.23, "learning_rate": 4.743427358296497e-06, "logits/chosen": -0.7082148790359497, "logits/rejected": -0.5598667860031128, "logps/chosen": -357.0320739746094, "logps/rejected": -537.770263671875, "loss": 0.686, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.16732384264469147, "rewards/margins": 0.16943085193634033, "rewards/rejected": -0.3367546796798706, "step": 3530 }, { "epoch": 0.23, "learning_rate": 4.740901856990553e-06, "logits/chosen": -0.903288722038269, "logits/rejected": -0.7495170831680298, "logps/chosen": -363.7324523925781, "logps/rejected": -382.14910888671875, "loss": 0.6909, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.10704050213098526, "rewards/margins": 0.062489282339811325, "rewards/rejected": -0.16952979564666748, "step": 3540 }, { "epoch": 0.23, "learning_rate": 4.738364666042804e-06, "logits/chosen": -1.2949492931365967, "logits/rejected": -0.9672085642814636, "logps/chosen": -367.40777587890625, "logps/rejected": -361.1066589355469, "loss": 0.6916, "rewards/accuracies": 0.625, "rewards/chosen": -0.07566282153129578, "rewards/margins": 0.05733874440193176, "rewards/rejected": -0.13300158083438873, "step": 3550 }, { "epoch": 0.23, "learning_rate": 4.735815798688483e-06, "logits/chosen": -1.204940676689148, "logits/rejected": -0.9728586077690125, "logps/chosen": -284.4416809082031, "logps/rejected": -391.280029296875, "loss": 0.6874, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.0861096978187561, "rewards/margins": 0.0866992324590683, "rewards/rejected": -0.1728089153766632, "step": 3560 }, { "epoch": 0.23, "learning_rate": 4.7332552682237285e-06, "logits/chosen": -0.8906214833259583, "logits/rejected": -0.621708869934082, "logps/chosen": -277.2056579589844, "logps/rejected": -358.495849609375, "loss": 0.6884, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.10344570875167847, "rewards/margins": 0.09706269949674606, "rewards/rejected": -0.20050843060016632, "step": 3570 }, { "epoch": 0.23, "learning_rate": 4.7306830880055234e-06, "logits/chosen": -0.8558648228645325, "logits/rejected": -0.8252252340316772, "logps/chosen": -372.6454162597656, "logps/rejected": -463.3543395996094, "loss": 0.6895, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.18191158771514893, "rewards/margins": 0.08237184584140778, "rewards/rejected": -0.2642834186553955, "step": 3580 }, { "epoch": 0.23, "learning_rate": 4.728099271451619e-06, "logits/chosen": -0.7084445953369141, "logits/rejected": -0.8211199045181274, "logps/chosen": -350.562744140625, "logps/rejected": -437.9385681152344, "loss": 0.6888, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.15955469012260437, "rewards/margins": 0.09432835131883621, "rewards/rejected": -0.2538830637931824, "step": 3590 }, { "epoch": 0.24, "learning_rate": 4.725503832040466e-06, "logits/chosen": -0.5046578049659729, "logits/rejected": -0.22391347587108612, "logps/chosen": -291.08856201171875, "logps/rejected": -397.1253967285156, "loss": 0.6911, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.1419307142496109, "rewards/margins": 0.07994942367076874, "rewards/rejected": -0.22188015282154083, "step": 3600 }, { "epoch": 0.24, "eval_logits/chosen": -0.6121751666069031, "eval_logits/rejected": -0.513374924659729, "eval_logps/chosen": -367.09832763671875, "eval_logps/rejected": -434.4856872558594, "eval_loss": 0.6898602843284607, "eval_rewards/accuracies": 0.6614999771118164, "eval_rewards/chosen": -0.1350933313369751, "eval_rewards/margins": 0.0877804234623909, "eval_rewards/rejected": -0.2228737622499466, "eval_runtime": 712.2305, "eval_samples_per_second": 2.808, "eval_steps_per_second": 1.404, "step": 3600 }, { "epoch": 0.24, "learning_rate": 4.722896783311152e-06, "logits/chosen": -0.7387981414794922, "logits/rejected": -0.5789721608161926, "logps/chosen": -409.89227294921875, "logps/rejected": -529.3055419921875, "loss": 0.6903, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.1508433073759079, "rewards/margins": 0.06827931106090546, "rewards/rejected": -0.21912261843681335, "step": 3610 }, { "epoch": 0.24, "learning_rate": 4.720278138863318e-06, "logits/chosen": -0.7191423177719116, "logits/rejected": -0.8290618658065796, "logps/chosen": -327.733642578125, "logps/rejected": -356.4250793457031, "loss": 0.692, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.13761194050312042, "rewards/margins": 0.06049853563308716, "rewards/rejected": -0.19811047613620758, "step": 3620 }, { "epoch": 0.24, "learning_rate": 4.717647912357095e-06, "logits/chosen": -1.1926231384277344, "logits/rejected": -1.2323840856552124, "logps/chosen": -389.05596923828125, "logps/rejected": -417.18768310546875, "loss": 0.6921, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.11657879501581192, "rewards/margins": 0.015199318528175354, "rewards/rejected": -0.13177812099456787, "step": 3630 }, { "epoch": 0.24, "learning_rate": 4.715006117513035e-06, "logits/chosen": -1.3196468353271484, "logits/rejected": -1.197674036026001, "logps/chosen": -379.3778381347656, "logps/rejected": -398.1944274902344, "loss": 0.6904, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.055252205580472946, "rewards/margins": 0.0739966481924057, "rewards/rejected": -0.12924885749816895, "step": 3640 }, { "epoch": 0.24, "learning_rate": 4.7123527681120326e-06, "logits/chosen": -1.1466625928878784, "logits/rejected": -0.9486912488937378, "logps/chosen": -327.9669494628906, "logps/rejected": -364.90716552734375, "loss": 0.6893, "rewards/accuracies": 0.625, "rewards/chosen": -0.08079581707715988, "rewards/margins": 0.06509184092283249, "rewards/rejected": -0.14588764309883118, "step": 3650 }, { "epoch": 0.24, "learning_rate": 4.7096878779952594e-06, "logits/chosen": -1.180060863494873, "logits/rejected": -1.1230758428573608, "logps/chosen": -391.13262939453125, "logps/rejected": -451.53204345703125, "loss": 0.6915, "rewards/accuracies": 0.75, "rewards/chosen": -0.11571931838989258, "rewards/margins": 0.06298209726810455, "rewards/rejected": -0.17870138585567474, "step": 3660 }, { "epoch": 0.24, "learning_rate": 4.707011461064086e-06, "logits/chosen": -0.8022671937942505, "logits/rejected": -0.5046889185905457, "logps/chosen": -414.7627868652344, "logps/rejected": -456.50311279296875, "loss": 0.6904, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.1059037446975708, "rewards/margins": 0.08593104034662247, "rewards/rejected": -0.19183477759361267, "step": 3670 }, { "epoch": 0.24, "learning_rate": 4.704323531280016e-06, "logits/chosen": -0.5141183137893677, "logits/rejected": -0.4972083568572998, "logps/chosen": -426.3848571777344, "logps/rejected": -414.1753845214844, "loss": 0.6893, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.09917335212230682, "rewards/margins": 0.07098612189292908, "rewards/rejected": -0.1701594740152359, "step": 3680 }, { "epoch": 0.24, "learning_rate": 4.701624102664606e-06, "logits/chosen": -0.9006779789924622, "logits/rejected": -0.8451553583145142, "logps/chosen": -396.20513916015625, "logps/rejected": -416.31158447265625, "loss": 0.6891, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.13508927822113037, "rewards/margins": 0.0742066353559494, "rewards/rejected": -0.20929589867591858, "step": 3690 }, { "epoch": 0.24, "learning_rate": 4.698913189299399e-06, "logits/chosen": -0.8794091939926147, "logits/rejected": -0.6929253339767456, "logps/chosen": -316.33880615234375, "logps/rejected": -401.0997619628906, "loss": 0.6938, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.13018698990345, "rewards/margins": 0.052254289388656616, "rewards/rejected": -0.18244127929210663, "step": 3700 }, { "epoch": 0.24, "eval_logits/chosen": -0.799005925655365, "eval_logits/rejected": -0.6917127370834351, "eval_logps/chosen": -355.70672607421875, "eval_logps/rejected": -411.4952087402344, "eval_loss": 0.6898983120918274, "eval_rewards/accuracies": 0.6545000076293945, "eval_rewards/chosen": -0.12370176613330841, "eval_rewards/margins": 0.07618161290884018, "eval_rewards/rejected": -0.199883371591568, "eval_runtime": 711.6062, "eval_samples_per_second": 2.811, "eval_steps_per_second": 1.405, "step": 3700 }, { "epoch": 0.24, "learning_rate": 4.696190805325847e-06, "logits/chosen": -0.9073203206062317, "logits/rejected": -0.7957035303115845, "logps/chosen": -319.0508117675781, "logps/rejected": -375.68402099609375, "loss": 0.6893, "rewards/accuracies": 0.625, "rewards/chosen": -0.11267198622226715, "rewards/margins": 0.0837518647313118, "rewards/rejected": -0.19642382860183716, "step": 3710 }, { "epoch": 0.24, "learning_rate": 4.693456964945239e-06, "logits/chosen": -1.1013727188110352, "logits/rejected": -0.7734390497207642, "logps/chosen": -399.56768798828125, "logps/rejected": -381.8271484375, "loss": 0.6887, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.10146210342645645, "rewards/margins": 0.0833517462015152, "rewards/rejected": -0.18481382727622986, "step": 3720 }, { "epoch": 0.24, "learning_rate": 4.6907116824186245e-06, "logits/chosen": -1.0235180854797363, "logits/rejected": -0.9562314748764038, "logps/chosen": -324.9698791503906, "logps/rejected": -381.88751220703125, "loss": 0.6886, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.09764232486486435, "rewards/margins": 0.05711390823125839, "rewards/rejected": -0.15475623309612274, "step": 3730 }, { "epoch": 0.24, "learning_rate": 4.687954972066742e-06, "logits/chosen": -0.7998303771018982, "logits/rejected": -0.7993026971817017, "logps/chosen": -362.21221923828125, "logps/rejected": -482.2862243652344, "loss": 0.6851, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.13239601254463196, "rewards/margins": 0.1409449279308319, "rewards/rejected": -0.2733409106731415, "step": 3740 }, { "epoch": 0.25, "learning_rate": 4.685186848269944e-06, "logits/chosen": -0.7055032849311829, "logits/rejected": -0.4607125222682953, "logps/chosen": -343.53765869140625, "logps/rejected": -379.7117919921875, "loss": 0.6925, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.13166530430316925, "rewards/margins": 0.07355310767889023, "rewards/rejected": -0.2052183896303177, "step": 3750 }, { "epoch": 0.25, "learning_rate": 4.682407325468119e-06, "logits/chosen": -0.8808666467666626, "logits/rejected": -0.6429244875907898, "logps/chosen": -321.9494323730469, "logps/rejected": -385.5625305175781, "loss": 0.6882, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.104530468583107, "rewards/margins": 0.0972428172826767, "rewards/rejected": -0.2017732560634613, "step": 3760 }, { "epoch": 0.25, "learning_rate": 4.67961641816062e-06, "logits/chosen": -0.9445309638977051, "logits/rejected": -0.8120043873786926, "logps/chosen": -366.25946044921875, "logps/rejected": -386.9581604003906, "loss": 0.6911, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.09046272188425064, "rewards/margins": 0.06202878803014755, "rewards/rejected": -0.1524915248155594, "step": 3770 }, { "epoch": 0.25, "learning_rate": 4.676814140906188e-06, "logits/chosen": -0.7849982380867004, "logits/rejected": -0.767396092414856, "logps/chosen": -365.19769287109375, "logps/rejected": -399.0850830078125, "loss": 0.689, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1256614476442337, "rewards/margins": 0.06449311971664429, "rewards/rejected": -0.19015458226203918, "step": 3780 }, { "epoch": 0.25, "learning_rate": 4.674000508322872e-06, "logits/chosen": -0.5533128976821899, "logits/rejected": -0.7101965546607971, "logps/chosen": -325.06658935546875, "logps/rejected": -406.6145324707031, "loss": 0.6909, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.10753166675567627, "rewards/margins": 0.07236433029174805, "rewards/rejected": -0.17989598214626312, "step": 3790 }, { "epoch": 0.25, "learning_rate": 4.671175535087959e-06, "logits/chosen": -0.9753785133361816, "logits/rejected": -1.0549020767211914, "logps/chosen": -404.6114807128906, "logps/rejected": -506.9873962402344, "loss": 0.6892, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.11328382790088654, "rewards/margins": 0.1056075319647789, "rewards/rejected": -0.21889135241508484, "step": 3800 }, { "epoch": 0.25, "eval_logits/chosen": -0.9947918057441711, "eval_logits/rejected": -0.8767626285552979, "eval_logps/chosen": -332.9628601074219, "eval_logps/rejected": -383.22491455078125, "eval_loss": 0.6898708343505859, "eval_rewards/accuracies": 0.6570000052452087, "eval_rewards/chosen": -0.10095791518688202, "eval_rewards/margins": 0.07065509259700775, "eval_rewards/rejected": -0.17161302268505096, "eval_runtime": 709.3129, "eval_samples_per_second": 2.82, "eval_steps_per_second": 1.41, "step": 3800 }, { "epoch": 0.25, "learning_rate": 4.6683392359378924e-06, "logits/chosen": -0.9528951644897461, "logits/rejected": -0.8176212310791016, "logps/chosen": -328.28814697265625, "logps/rejected": -373.052734375, "loss": 0.6911, "rewards/accuracies": 0.625, "rewards/chosen": -0.09313914179801941, "rewards/margins": 0.07028108835220337, "rewards/rejected": -0.16342023015022278, "step": 3810 }, { "epoch": 0.25, "learning_rate": 4.665491625668198e-06, "logits/chosen": -0.7815275192260742, "logits/rejected": -0.8232892751693726, "logps/chosen": -307.3453674316406, "logps/rejected": -411.70855712890625, "loss": 0.687, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.1510981172323227, "rewards/margins": 0.0808081179857254, "rewards/rejected": -0.2319062203168869, "step": 3820 }, { "epoch": 0.25, "learning_rate": 4.662632719133407e-06, "logits/chosen": -0.9967552423477173, "logits/rejected": -0.7885714769363403, "logps/chosen": -321.8594055175781, "logps/rejected": -320.747314453125, "loss": 0.6908, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.09106185287237167, "rewards/margins": 0.06589539349079132, "rewards/rejected": -0.1569572389125824, "step": 3830 }, { "epoch": 0.25, "learning_rate": 4.659762531246974e-06, "logits/chosen": -0.7940338253974915, "logits/rejected": -0.8099050521850586, "logps/chosen": -354.4307556152344, "logps/rejected": -366.3971252441406, "loss": 0.6905, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.13985204696655273, "rewards/margins": 0.04592302441596985, "rewards/rejected": -0.18577507138252258, "step": 3840 }, { "epoch": 0.25, "learning_rate": 4.656881076981207e-06, "logits/chosen": -1.061415433883667, "logits/rejected": -0.9680493474006653, "logps/chosen": -340.7538146972656, "logps/rejected": -374.0954895019531, "loss": 0.6914, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.12757045030593872, "rewards/margins": 0.051240403205156326, "rewards/rejected": -0.17881086468696594, "step": 3850 }, { "epoch": 0.25, "learning_rate": 4.653988371367183e-06, "logits/chosen": -1.040121078491211, "logits/rejected": -0.7497084140777588, "logps/chosen": -384.7109375, "logps/rejected": -378.0744934082031, "loss": 0.6911, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.14390695095062256, "rewards/margins": 0.05457156151533127, "rewards/rejected": -0.1984785497188568, "step": 3860 }, { "epoch": 0.25, "learning_rate": 4.651084429494671e-06, "logits/chosen": -1.0395872592926025, "logits/rejected": -0.7707028388977051, "logps/chosen": -477.115966796875, "logps/rejected": -465.1861267089844, "loss": 0.6904, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.20176498591899872, "rewards/margins": 0.067594014108181, "rewards/rejected": -0.26935896277427673, "step": 3870 }, { "epoch": 0.25, "learning_rate": 4.648169266512053e-06, "logits/chosen": -1.1651791334152222, "logits/rejected": -0.908767819404602, "logps/chosen": -450.6615295410156, "logps/rejected": -484.35955810546875, "loss": 0.6894, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.22533340752124786, "rewards/margins": 0.07865221053361893, "rewards/rejected": -0.3039856255054474, "step": 3880 }, { "epoch": 0.25, "learning_rate": 4.6452428976262505e-06, "logits/chosen": -0.9482473134994507, "logits/rejected": -0.659041166305542, "logps/chosen": -400.8704528808594, "logps/rejected": -511.81109619140625, "loss": 0.6889, "rewards/accuracies": 0.75, "rewards/chosen": -0.19707906246185303, "rewards/margins": 0.15526339411735535, "rewards/rejected": -0.352342426776886, "step": 3890 }, { "epoch": 0.26, "learning_rate": 4.642305338102633e-06, "logits/chosen": -0.8177778124809265, "logits/rejected": -1.0210679769515991, "logps/chosen": -383.3443908691406, "logps/rejected": -484.0030212402344, "loss": 0.688, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.22159484028816223, "rewards/margins": 0.08251297473907471, "rewards/rejected": -0.3041078448295593, "step": 3900 }, { "epoch": 0.26, "eval_logits/chosen": -0.8940591812133789, "eval_logits/rejected": -0.7769768238067627, "eval_logps/chosen": -480.23040771484375, "eval_logps/rejected": -543.86767578125, "eval_loss": 0.6898152828216553, "eval_rewards/accuracies": 0.6495000123977661, "eval_rewards/chosen": -0.24822546541690826, "eval_rewards/margins": 0.08403035253286362, "eval_rewards/rejected": -0.3322558104991913, "eval_runtime": 711.6846, "eval_samples_per_second": 2.81, "eval_steps_per_second": 1.405, "step": 3900 }, { "epoch": 0.26, "learning_rate": 4.639356603264953e-06, "logits/chosen": -0.9487046003341675, "logits/rejected": -0.9224430918693542, "logps/chosen": -470.205322265625, "logps/rejected": -499.61126708984375, "loss": 0.692, "rewards/accuracies": 0.625, "rewards/chosen": -0.22837357223033905, "rewards/margins": 0.055136702954769135, "rewards/rejected": -0.2835102677345276, "step": 3910 }, { "epoch": 0.26, "learning_rate": 4.636396708495255e-06, "logits/chosen": -0.718294084072113, "logits/rejected": -0.6768798828125, "logps/chosen": -443.03515625, "logps/rejected": -492.17315673828125, "loss": 0.6909, "rewards/accuracies": 0.625, "rewards/chosen": -0.21384009718894958, "rewards/margins": 0.07398195564746857, "rewards/rejected": -0.28782206773757935, "step": 3920 }, { "epoch": 0.26, "learning_rate": 4.633425669233799e-06, "logits/chosen": -1.1752598285675049, "logits/rejected": -1.0773875713348389, "logps/chosen": -427.16912841796875, "logps/rejected": -501.4466857910156, "loss": 0.6888, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.19234254956245422, "rewards/margins": 0.07872482389211655, "rewards/rejected": -0.27106738090515137, "step": 3930 }, { "epoch": 0.26, "learning_rate": 4.6304435009789825e-06, "logits/chosen": -1.1653227806091309, "logits/rejected": -0.8779115676879883, "logps/chosen": -412.82037353515625, "logps/rejected": -444.9153747558594, "loss": 0.6887, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.17531313002109528, "rewards/margins": 0.10300495475530624, "rewards/rejected": -0.2783181071281433, "step": 3940 }, { "epoch": 0.26, "learning_rate": 4.627450219287256e-06, "logits/chosen": -1.1403902769088745, "logits/rejected": -1.0868604183197021, "logps/chosen": -369.7454833984375, "logps/rejected": -423.36761474609375, "loss": 0.6897, "rewards/accuracies": 0.625, "rewards/chosen": -0.1881864070892334, "rewards/margins": 0.07613326609134674, "rewards/rejected": -0.26431962847709656, "step": 3950 }, { "epoch": 0.26, "learning_rate": 4.624445839773042e-06, "logits/chosen": -0.8833427429199219, "logits/rejected": -0.8696644902229309, "logps/chosen": -369.5878601074219, "logps/rejected": -404.59051513671875, "loss": 0.6925, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.19957657158374786, "rewards/margins": 0.036066509783267975, "rewards/rejected": -0.23564307391643524, "step": 3960 }, { "epoch": 0.26, "learning_rate": 4.621430378108656e-06, "logits/chosen": -0.9928571581840515, "logits/rejected": -0.78460294008255, "logps/chosen": -513.399658203125, "logps/rejected": -612.7415771484375, "loss": 0.6888, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.25564131140708923, "rewards/margins": 0.10610628128051758, "rewards/rejected": -0.3617476224899292, "step": 3970 }, { "epoch": 0.26, "learning_rate": 4.618403850024223e-06, "logits/chosen": -0.7883467078208923, "logits/rejected": -0.7039044499397278, "logps/chosen": -475.11004638671875, "logps/rejected": -494.36761474609375, "loss": 0.6908, "rewards/accuracies": 0.5, "rewards/chosen": -0.21961987018585205, "rewards/margins": 0.06591992825269699, "rewards/rejected": -0.28553980588912964, "step": 3980 }, { "epoch": 0.26, "learning_rate": 4.615366271307598e-06, "logits/chosen": -1.0304720401763916, "logits/rejected": -0.8591554760932922, "logps/chosen": -409.8172302246094, "logps/rejected": -460.185302734375, "loss": 0.6901, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.217874214053154, "rewards/margins": 0.06226044148206711, "rewards/rejected": -0.2801347076892853, "step": 3990 }, { "epoch": 0.26, "learning_rate": 4.612317657804277e-06, "logits/chosen": -1.009948492050171, "logits/rejected": -0.9941838979721069, "logps/chosen": -388.17620849609375, "logps/rejected": -533.5516357421875, "loss": 0.6879, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.24094316363334656, "rewards/margins": 0.09402754157781601, "rewards/rejected": -0.33497071266174316, "step": 4000 }, { "epoch": 0.26, "eval_logits/chosen": -1.0340783596038818, "eval_logits/rejected": -0.9098215103149414, "eval_logps/chosen": -458.0283203125, "eval_logps/rejected": -518.2861328125, "eval_loss": 0.6896993517875671, "eval_rewards/accuracies": 0.6539999842643738, "eval_rewards/chosen": -0.2260233461856842, "eval_rewards/margins": 0.08065088838338852, "eval_rewards/rejected": -0.3066742420196533, "eval_runtime": 713.6006, "eval_samples_per_second": 2.803, "eval_steps_per_second": 1.401, "step": 4000 }, { "epoch": 0.26, "learning_rate": 4.6092580254173236e-06, "logits/chosen": -0.9095097780227661, "logits/rejected": -0.8598026037216187, "logps/chosen": -497.9371643066406, "logps/rejected": -583.9747314453125, "loss": 0.6896, "rewards/accuracies": 0.75, "rewards/chosen": -0.2406143844127655, "rewards/margins": 0.10470570623874664, "rewards/rejected": -0.34532010555267334, "step": 4010 }, { "epoch": 0.26, "learning_rate": 4.606187390107277e-06, "logits/chosen": -0.9289296865463257, "logits/rejected": -0.8710733652114868, "logps/chosen": -481.1529235839844, "logps/rejected": -516.4467163085938, "loss": 0.6907, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.25627756118774414, "rewards/margins": 0.07428644597530365, "rewards/rejected": -0.3305639922618866, "step": 4020 }, { "epoch": 0.26, "learning_rate": 4.603105767892077e-06, "logits/chosen": -1.19761323928833, "logits/rejected": -1.1186004877090454, "logps/chosen": -411.83660888671875, "logps/rejected": -505.84063720703125, "loss": 0.6904, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.21714666485786438, "rewards/margins": 0.07605074346065521, "rewards/rejected": -0.2931973934173584, "step": 4030 }, { "epoch": 0.26, "learning_rate": 4.6000131748469725e-06, "logits/chosen": -1.1575210094451904, "logits/rejected": -0.9933841824531555, "logps/chosen": -445.5464782714844, "logps/rejected": -440.16754150390625, "loss": 0.6901, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.1954062283039093, "rewards/margins": 0.06893941015005112, "rewards/rejected": -0.264345645904541, "step": 4040 }, { "epoch": 0.26, "learning_rate": 4.596909627104445e-06, "logits/chosen": -1.3777754306793213, "logits/rejected": -1.2198288440704346, "logps/chosen": -518.49365234375, "logps/rejected": -564.2063598632812, "loss": 0.6885, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.26975539326667786, "rewards/margins": 0.07834690809249878, "rewards/rejected": -0.34810227155685425, "step": 4050 }, { "epoch": 0.27, "learning_rate": 4.5937951408541215e-06, "logits/chosen": -1.1894450187683105, "logits/rejected": -0.7648533582687378, "logps/chosen": -523.9410400390625, "logps/rejected": -590.8986206054688, "loss": 0.6903, "rewards/accuracies": 0.75, "rewards/chosen": -0.27130264043807983, "rewards/margins": 0.11233459413051605, "rewards/rejected": -0.3836372494697571, "step": 4060 }, { "epoch": 0.27, "learning_rate": 4.590669732342685e-06, "logits/chosen": -0.9605843424797058, "logits/rejected": -0.7980761528015137, "logps/chosen": -453.78070068359375, "logps/rejected": -545.031982421875, "loss": 0.6909, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.24163314700126648, "rewards/margins": 0.08935852348804474, "rewards/rejected": -0.3309916853904724, "step": 4070 }, { "epoch": 0.27, "learning_rate": 4.587533417873799e-06, "logits/chosen": -1.107875108718872, "logits/rejected": -1.0243065357208252, "logps/chosen": -502.90362548828125, "logps/rejected": -633.7393188476562, "loss": 0.6915, "rewards/accuracies": 0.75, "rewards/chosen": -0.3097396492958069, "rewards/margins": 0.07113198935985565, "rewards/rejected": -0.38087162375450134, "step": 4080 }, { "epoch": 0.27, "learning_rate": 4.584386213808016e-06, "logits/chosen": -1.0260846614837646, "logits/rejected": -0.8974090814590454, "logps/chosen": -467.3468322753906, "logps/rejected": -475.55755615234375, "loss": 0.6898, "rewards/accuracies": 0.625, "rewards/chosen": -0.24566781520843506, "rewards/margins": 0.05398694425821304, "rewards/rejected": -0.2996547818183899, "step": 4090 }, { "epoch": 0.27, "learning_rate": 4.581228136562693e-06, "logits/chosen": -1.1336443424224854, "logits/rejected": -1.0851821899414062, "logps/chosen": -446.1910705566406, "logps/rejected": -451.4580078125, "loss": 0.6933, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.20801062881946564, "rewards/margins": 0.030951568856835365, "rewards/rejected": -0.23896221816539764, "step": 4100 }, { "epoch": 0.27, "eval_logits/chosen": -1.1499063968658447, "eval_logits/rejected": -1.0199224948883057, "eval_logps/chosen": -460.51519775390625, "eval_logps/rejected": -502.691162109375, "eval_loss": 0.6899304389953613, "eval_rewards/accuracies": 0.6520000100135803, "eval_rewards/chosen": -0.22851026058197021, "eval_rewards/margins": 0.0625690221786499, "eval_rewards/rejected": -0.2910792827606201, "eval_runtime": 712.7574, "eval_samples_per_second": 2.806, "eval_steps_per_second": 1.403, "step": 4100 }, { "epoch": 0.27, "learning_rate": 4.578059202611909e-06, "logits/chosen": -1.1208741664886475, "logits/rejected": -1.065028429031372, "logps/chosen": -480.1509704589844, "logps/rejected": -500.01336669921875, "loss": 0.6914, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.22299210727214813, "rewards/margins": 0.034618355333805084, "rewards/rejected": -0.2576104402542114, "step": 4110 }, { "epoch": 0.27, "learning_rate": 4.574879428486376e-06, "logits/chosen": -1.1323649883270264, "logits/rejected": -1.178371787071228, "logps/chosen": -445.16302490234375, "logps/rejected": -493.7518615722656, "loss": 0.6914, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2322465479373932, "rewards/margins": 0.05151096731424332, "rewards/rejected": -0.2837575376033783, "step": 4120 }, { "epoch": 0.27, "learning_rate": 4.571688830773352e-06, "logits/chosen": -1.2729997634887695, "logits/rejected": -1.1686136722564697, "logps/chosen": -399.8155517578125, "logps/rejected": -415.05084228515625, "loss": 0.6915, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.17774733901023865, "rewards/margins": 0.03511429950594902, "rewards/rejected": -0.21286162734031677, "step": 4130 }, { "epoch": 0.27, "learning_rate": 4.568487426116559e-06, "logits/chosen": -1.0722920894622803, "logits/rejected": -0.9740289449691772, "logps/chosen": -349.8589782714844, "logps/rejected": -381.6366271972656, "loss": 0.6927, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.1778871715068817, "rewards/margins": 0.039310991764068604, "rewards/rejected": -0.21719813346862793, "step": 4140 }, { "epoch": 0.27, "learning_rate": 4.565275231216092e-06, "logits/chosen": -0.8283463716506958, "logits/rejected": -0.8446325063705444, "logps/chosen": -313.578369140625, "logps/rejected": -403.86944580078125, "loss": 0.6907, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.16223487257957458, "rewards/margins": 0.04430120810866356, "rewards/rejected": -0.20653608441352844, "step": 4150 }, { "epoch": 0.27, "learning_rate": 4.562052262828331e-06, "logits/chosen": -1.0895860195159912, "logits/rejected": -1.0035459995269775, "logps/chosen": -389.712890625, "logps/rejected": -451.557373046875, "loss": 0.6906, "rewards/accuracies": 0.625, "rewards/chosen": -0.19090190529823303, "rewards/margins": 0.06712041050195694, "rewards/rejected": -0.2580223083496094, "step": 4160 }, { "epoch": 0.27, "learning_rate": 4.558818537765861e-06, "logits/chosen": -1.3722314834594727, "logits/rejected": -0.8719658851623535, "logps/chosen": -417.94097900390625, "logps/rejected": -441.64080810546875, "loss": 0.6923, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.18192659318447113, "rewards/margins": 0.05856790393590927, "rewards/rejected": -0.2404944896697998, "step": 4170 }, { "epoch": 0.27, "learning_rate": 4.555574072897374e-06, "logits/chosen": -1.08540940284729, "logits/rejected": -1.1781179904937744, "logps/chosen": -367.94091796875, "logps/rejected": -442.3624572753906, "loss": 0.6888, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.16597968339920044, "rewards/margins": 0.07669314742088318, "rewards/rejected": -0.24267283082008362, "step": 4180 }, { "epoch": 0.27, "learning_rate": 4.552318885147589e-06, "logits/chosen": -1.2220853567123413, "logits/rejected": -0.9189378619194031, "logps/chosen": -418.6746520996094, "logps/rejected": -436.156005859375, "loss": 0.691, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.17775699496269226, "rewards/margins": 0.07687093317508698, "rewards/rejected": -0.25462794303894043, "step": 4190 }, { "epoch": 0.27, "learning_rate": 4.549052991497159e-06, "logits/chosen": -0.9784964323043823, "logits/rejected": -0.9273381233215332, "logps/chosen": -364.02545166015625, "logps/rejected": -428.04571533203125, "loss": 0.6908, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.18385827541351318, "rewards/margins": 0.06360138952732086, "rewards/rejected": -0.24745967984199524, "step": 4200 }, { "epoch": 0.27, "eval_logits/chosen": -0.8099203109741211, "eval_logits/rejected": -0.7000288367271423, "eval_logps/chosen": -446.6075134277344, "eval_logps/rejected": -499.034912109375, "eval_loss": 0.6899096965789795, "eval_rewards/accuracies": 0.6489999890327454, "eval_rewards/chosen": -0.21460255980491638, "eval_rewards/margins": 0.07282048463821411, "eval_rewards/rejected": -0.2874230444431305, "eval_runtime": 714.9068, "eval_samples_per_second": 2.798, "eval_steps_per_second": 1.399, "step": 4200 }, { "epoch": 0.28, "learning_rate": 4.545776408982585e-06, "logits/chosen": -0.8513118624687195, "logits/rejected": -0.7860215306282043, "logps/chosen": -444.19915771484375, "logps/rejected": -514.2364501953125, "loss": 0.689, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.212455153465271, "rewards/margins": 0.07905051857233047, "rewards/rejected": -0.2915056347846985, "step": 4210 }, { "epoch": 0.28, "learning_rate": 4.542489154696128e-06, "logits/chosen": -1.0325562953948975, "logits/rejected": -0.7060797810554504, "logps/chosen": -458.957275390625, "logps/rejected": -462.31268310546875, "loss": 0.6914, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1912369430065155, "rewards/margins": 0.06460615247488022, "rewards/rejected": -0.25584307312965393, "step": 4220 }, { "epoch": 0.28, "learning_rate": 4.5391912457857145e-06, "logits/chosen": -0.9764394760131836, "logits/rejected": -0.842827320098877, "logps/chosen": -466.9427795410156, "logps/rejected": -508.9811096191406, "loss": 0.6893, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.20197315514087677, "rewards/margins": 0.08914804458618164, "rewards/rejected": -0.2911211848258972, "step": 4230 }, { "epoch": 0.28, "learning_rate": 4.535882699454854e-06, "logits/chosen": -1.0353846549987793, "logits/rejected": -0.9514248967170715, "logps/chosen": -486.9790954589844, "logps/rejected": -598.344482421875, "loss": 0.6888, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.21592804789543152, "rewards/margins": 0.08970265835523605, "rewards/rejected": -0.30563071370124817, "step": 4240 }, { "epoch": 0.28, "learning_rate": 4.532563532962546e-06, "logits/chosen": -1.288633108139038, "logits/rejected": -1.264217734336853, "logps/chosen": -425.7466735839844, "logps/rejected": -523.7071533203125, "loss": 0.6908, "rewards/accuracies": 0.625, "rewards/chosen": -0.2358524352312088, "rewards/margins": 0.07636555284261703, "rewards/rejected": -0.3122180104255676, "step": 4250 }, { "epoch": 0.28, "learning_rate": 4.529233763623187e-06, "logits/chosen": -1.0135489702224731, "logits/rejected": -0.7392680644989014, "logps/chosen": -440.1654357910156, "logps/rejected": -471.6558532714844, "loss": 0.6884, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.2380649596452713, "rewards/margins": 0.07945012301206589, "rewards/rejected": -0.3175150752067566, "step": 4260 }, { "epoch": 0.28, "learning_rate": 4.5258934088064854e-06, "logits/chosen": -0.8823081254959106, "logits/rejected": -0.6247905492782593, "logps/chosen": -530.675048828125, "logps/rejected": -584.1381225585938, "loss": 0.6872, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.31069430708885193, "rewards/margins": 0.10816062986850739, "rewards/rejected": -0.4188549518585205, "step": 4270 }, { "epoch": 0.28, "learning_rate": 4.522542485937369e-06, "logits/chosen": -0.7952845692634583, "logits/rejected": -0.6527966260910034, "logps/chosen": -636.1341552734375, "logps/rejected": -646.477294921875, "loss": 0.6893, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.3454816937446594, "rewards/margins": 0.10698393732309341, "rewards/rejected": -0.45246559381484985, "step": 4280 }, { "epoch": 0.28, "learning_rate": 4.519181012495892e-06, "logits/chosen": -1.004214882850647, "logits/rejected": -0.6648763418197632, "logps/chosen": -556.7357788085938, "logps/rejected": -614.4586791992188, "loss": 0.6912, "rewards/accuracies": 0.625, "rewards/chosen": -0.3145165741443634, "rewards/margins": 0.08679699152708054, "rewards/rejected": -0.4013136029243469, "step": 4290 }, { "epoch": 0.28, "learning_rate": 4.515809006017147e-06, "logits/chosen": -0.7630017995834351, "logits/rejected": -0.6918413639068604, "logps/chosen": -509.41656494140625, "logps/rejected": -562.0657958984375, "loss": 0.6902, "rewards/accuracies": 0.625, "rewards/chosen": -0.2760070264339447, "rewards/margins": 0.0881584882736206, "rewards/rejected": -0.3641654849052429, "step": 4300 }, { "epoch": 0.28, "eval_logits/chosen": -0.8444295525550842, "eval_logits/rejected": -0.7308560013771057, "eval_logps/chosen": -529.0877685546875, "eval_logps/rejected": -583.29833984375, "eval_loss": 0.6897502541542053, "eval_rewards/accuracies": 0.6585000157356262, "eval_rewards/chosen": -0.2970828413963318, "eval_rewards/margins": 0.07460356503725052, "eval_rewards/rejected": -0.3716863989830017, "eval_runtime": 710.9846, "eval_samples_per_second": 2.813, "eval_steps_per_second": 1.407, "step": 4300 }, { "epoch": 0.28, "learning_rate": 4.512426484091171e-06, "logits/chosen": -1.0411491394042969, "logits/rejected": -0.8547149896621704, "logps/chosen": -571.1553955078125, "logps/rejected": -588.7039184570312, "loss": 0.6915, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2935742437839508, "rewards/margins": 0.053841590881347656, "rewards/rejected": -0.3474158048629761, "step": 4310 }, { "epoch": 0.28, "learning_rate": 4.509033464362858e-06, "logits/chosen": -0.6955040693283081, "logits/rejected": -0.6403937339782715, "logps/chosen": -528.7620849609375, "logps/rejected": -617.499755859375, "loss": 0.6899, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.2874962389469147, "rewards/margins": 0.0745067298412323, "rewards/rejected": -0.3620029389858246, "step": 4320 }, { "epoch": 0.28, "learning_rate": 4.505629964531857e-06, "logits/chosen": -0.9003432393074036, "logits/rejected": -0.7642195224761963, "logps/chosen": -521.0284423828125, "logps/rejected": -599.5103149414062, "loss": 0.6878, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.2978239953517914, "rewards/margins": 0.10929515212774277, "rewards/rejected": -0.40711918473243713, "step": 4330 }, { "epoch": 0.28, "learning_rate": 4.502216002352492e-06, "logits/chosen": -0.8330503702163696, "logits/rejected": -0.6456397771835327, "logps/chosen": -487.53094482421875, "logps/rejected": -522.7586669921875, "loss": 0.6924, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.3244900703430176, "rewards/margins": 0.054641880095005035, "rewards/rejected": -0.3791319727897644, "step": 4340 }, { "epoch": 0.28, "learning_rate": 4.498791595633663e-06, "logits/chosen": -0.7700721025466919, "logits/rejected": -0.5906479358673096, "logps/chosen": -466.475341796875, "logps/rejected": -426.53497314453125, "loss": 0.6911, "rewards/accuracies": 0.625, "rewards/chosen": -0.20409110188484192, "rewards/margins": 0.04726005345582962, "rewards/rejected": -0.25135114789009094, "step": 4350 }, { "epoch": 0.29, "learning_rate": 4.495356762238751e-06, "logits/chosen": -1.251646637916565, "logits/rejected": -0.8721704483032227, "logps/chosen": -453.76861572265625, "logps/rejected": -424.33721923828125, "loss": 0.6906, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.1705707162618637, "rewards/margins": 0.06802061945199966, "rewards/rejected": -0.23859134316444397, "step": 4360 }, { "epoch": 0.29, "learning_rate": 4.491911520085532e-06, "logits/chosen": -0.7782236933708191, "logits/rejected": -0.7609604001045227, "logps/chosen": -363.4910583496094, "logps/rejected": -446.3992614746094, "loss": 0.6897, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.1630832999944687, "rewards/margins": 0.07213564217090607, "rewards/rejected": -0.23521895706653595, "step": 4370 }, { "epoch": 0.29, "learning_rate": 4.488455887146075e-06, "logits/chosen": -1.0101211071014404, "logits/rejected": -0.898006796836853, "logps/chosen": -346.0672607421875, "logps/rejected": -476.0105895996094, "loss": 0.687, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.17595519125461578, "rewards/margins": 0.1167796403169632, "rewards/rejected": -0.2927348017692566, "step": 4380 }, { "epoch": 0.29, "learning_rate": 4.484989881446654e-06, "logits/chosen": -1.0024950504302979, "logits/rejected": -0.9726330041885376, "logps/chosen": -420.73260498046875, "logps/rejected": -436.6083068847656, "loss": 0.6917, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.2182566374540329, "rewards/margins": 0.033820368349552155, "rewards/rejected": -0.25207701325416565, "step": 4390 }, { "epoch": 0.29, "learning_rate": 4.481513521067654e-06, "logits/chosen": -0.8407198190689087, "logits/rejected": -0.7638456225395203, "logps/chosen": -533.3350830078125, "logps/rejected": -602.2070922851562, "loss": 0.6895, "rewards/accuracies": 0.75, "rewards/chosen": -0.3085605204105377, "rewards/margins": 0.10428784042596817, "rewards/rejected": -0.4128483235836029, "step": 4400 }, { "epoch": 0.29, "eval_logits/chosen": -0.7359596490859985, "eval_logits/rejected": -0.6256921291351318, "eval_logps/chosen": -519.137451171875, "eval_logps/rejected": -587.6251831054688, "eval_loss": 0.6898573040962219, "eval_rewards/accuracies": 0.6504999995231628, "eval_rewards/chosen": -0.28713250160217285, "eval_rewards/margins": 0.0888807401061058, "eval_rewards/rejected": -0.37601324915885925, "eval_runtime": 712.7139, "eval_samples_per_second": 2.806, "eval_steps_per_second": 1.403, "step": 4400 }, { "epoch": 0.29, "learning_rate": 4.478026824143473e-06, "logits/chosen": -0.8550176620483398, "logits/rejected": -0.8174192309379578, "logps/chosen": -546.6198120117188, "logps/rejected": -618.2431640625, "loss": 0.6856, "rewards/accuracies": 0.75, "rewards/chosen": -0.28055816888809204, "rewards/margins": 0.12819083034992218, "rewards/rejected": -0.4087490141391754, "step": 4410 }, { "epoch": 0.29, "learning_rate": 4.474529808862429e-06, "logits/chosen": -0.7030132412910461, "logits/rejected": -0.7720840573310852, "logps/chosen": -430.73333740234375, "logps/rejected": -544.6876831054688, "loss": 0.6906, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2419130504131317, "rewards/margins": 0.09633009135723114, "rewards/rejected": -0.3382430970668793, "step": 4420 }, { "epoch": 0.29, "learning_rate": 4.471022493466669e-06, "logits/chosen": -0.8346714973449707, "logits/rejected": -0.5992153882980347, "logps/chosen": -546.3121948242188, "logps/rejected": -537.5162353515625, "loss": 0.6904, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.24437315762043, "rewards/margins": 0.06821531802415848, "rewards/rejected": -0.31258848309516907, "step": 4430 }, { "epoch": 0.29, "learning_rate": 4.467504896252066e-06, "logits/chosen": -1.0627632141113281, "logits/rejected": -1.0447062253952026, "logps/chosen": -483.2215270996094, "logps/rejected": -545.9163818359375, "loss": 0.6894, "rewards/accuracies": 0.625, "rewards/chosen": -0.23352019488811493, "rewards/margins": 0.09143707901239395, "rewards/rejected": -0.32495731115341187, "step": 4440 }, { "epoch": 0.29, "learning_rate": 4.463977035568132e-06, "logits/chosen": -0.9531109929084778, "logits/rejected": -1.0412567853927612, "logps/chosen": -412.849853515625, "logps/rejected": -510.6705017089844, "loss": 0.691, "rewards/accuracies": 0.5, "rewards/chosen": -0.2004670351743698, "rewards/margins": 0.046115074306726456, "rewards/rejected": -0.24658215045928955, "step": 4450 }, { "epoch": 0.29, "learning_rate": 4.460438929817914e-06, "logits/chosen": -1.0476336479187012, "logits/rejected": -0.8358996510505676, "logps/chosen": -396.8749084472656, "logps/rejected": -445.3506774902344, "loss": 0.6895, "rewards/accuracies": 0.5, "rewards/chosen": -0.1904551386833191, "rewards/margins": 0.052764374762773514, "rewards/rejected": -0.2432195246219635, "step": 4460 }, { "epoch": 0.29, "learning_rate": 4.456890597457907e-06, "logits/chosen": -0.9712381362915039, "logits/rejected": -0.9402221441268921, "logps/chosen": -406.9786682128906, "logps/rejected": -508.30078125, "loss": 0.6899, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.19484297931194305, "rewards/margins": 0.08311771601438522, "rewards/rejected": -0.2779606878757477, "step": 4470 }, { "epoch": 0.29, "learning_rate": 4.453332056997951e-06, "logits/chosen": -0.9786797761917114, "logits/rejected": -0.9453691244125366, "logps/chosen": -319.5592041015625, "logps/rejected": -416.96270751953125, "loss": 0.6879, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.13959971070289612, "rewards/margins": 0.10172855854034424, "rewards/rejected": -0.24132826924324036, "step": 4480 }, { "epoch": 0.29, "learning_rate": 4.449763327001134e-06, "logits/chosen": -1.1327717304229736, "logits/rejected": -1.1574513912200928, "logps/chosen": -324.18731689453125, "logps/rejected": -421.53173828125, "loss": 0.6904, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.13542507588863373, "rewards/margins": 0.0689290389418602, "rewards/rejected": -0.20435413718223572, "step": 4490 }, { "epoch": 0.29, "learning_rate": 4.446184426083702e-06, "logits/chosen": -1.1786584854125977, "logits/rejected": -1.0005810260772705, "logps/chosen": -348.32135009765625, "logps/rejected": -479.8023376464844, "loss": 0.6864, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.15475699305534363, "rewards/margins": 0.12311340868473053, "rewards/rejected": -0.27787038683891296, "step": 4500 }, { "epoch": 0.29, "eval_logits/chosen": -1.1400220394134521, "eval_logits/rejected": -1.0094726085662842, "eval_logps/chosen": -381.37152099609375, "eval_logps/rejected": -431.19793701171875, "eval_loss": 0.6898050308227539, "eval_rewards/accuracies": 0.6589999794960022, "eval_rewards/chosen": -0.1493665874004364, "eval_rewards/margins": 0.07021944224834442, "eval_rewards/rejected": -0.21958602964878082, "eval_runtime": 711.7175, "eval_samples_per_second": 2.81, "eval_steps_per_second": 1.405, "step": 4500 }, { "epoch": 0.3, "learning_rate": 4.442595372914954e-06, "logits/chosen": -1.1510266065597534, "logits/rejected": -1.1077659130096436, "logps/chosen": -369.70440673828125, "logps/rejected": -363.3657531738281, "loss": 0.6884, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.13272404670715332, "rewards/margins": 0.07893560826778412, "rewards/rejected": -0.21165966987609863, "step": 4510 }, { "epoch": 0.3, "learning_rate": 4.43899618621715e-06, "logits/chosen": -1.1567199230194092, "logits/rejected": -0.9014299511909485, "logps/chosen": -433.1683044433594, "logps/rejected": -536.3552856445312, "loss": 0.6892, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1821144074201584, "rewards/margins": 0.09999031573534012, "rewards/rejected": -0.2821047008037567, "step": 4520 }, { "epoch": 0.3, "learning_rate": 4.4353868847654105e-06, "logits/chosen": -1.2916271686553955, "logits/rejected": -1.0142287015914917, "logps/chosen": -434.13055419921875, "logps/rejected": -476.5389709472656, "loss": 0.6863, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.18827231228351593, "rewards/margins": 0.07247602939605713, "rewards/rejected": -0.26074832677841187, "step": 4530 }, { "epoch": 0.3, "learning_rate": 4.43176748738762e-06, "logits/chosen": -0.826651394367218, "logits/rejected": -0.7963089942932129, "logps/chosen": -510.57464599609375, "logps/rejected": -617.2264404296875, "loss": 0.6899, "rewards/accuracies": 0.625, "rewards/chosen": -0.2786267399787903, "rewards/margins": 0.10226340591907501, "rewards/rejected": -0.3808901906013489, "step": 4540 }, { "epoch": 0.3, "learning_rate": 4.4281380129643295e-06, "logits/chosen": -0.8452268838882446, "logits/rejected": -0.6725960969924927, "logps/chosen": -487.48443603515625, "logps/rejected": -596.5521240234375, "loss": 0.689, "rewards/accuracies": 0.625, "rewards/chosen": -0.2577342391014099, "rewards/margins": 0.11954182386398315, "rewards/rejected": -0.37727606296539307, "step": 4550 }, { "epoch": 0.3, "learning_rate": 4.424498480428654e-06, "logits/chosen": -1.1150968074798584, "logits/rejected": -0.9688647389411926, "logps/chosen": -450.00439453125, "logps/rejected": -448.45977783203125, "loss": 0.692, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.20273244380950928, "rewards/margins": 0.038480862975120544, "rewards/rejected": -0.24121332168579102, "step": 4560 }, { "epoch": 0.3, "learning_rate": 4.420848908766178e-06, "logits/chosen": -1.285456657409668, "logits/rejected": -1.2038942575454712, "logps/chosen": -403.0211486816406, "logps/rejected": -477.5533142089844, "loss": 0.6894, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.19579431414604187, "rewards/margins": 0.06565193831920624, "rewards/rejected": -0.2614462375640869, "step": 4570 }, { "epoch": 0.3, "learning_rate": 4.417189317014855e-06, "logits/chosen": -1.1039775609970093, "logits/rejected": -1.2309410572052002, "logps/chosen": -409.04632568359375, "logps/rejected": -487.999755859375, "loss": 0.6902, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.20825043320655823, "rewards/margins": 0.04825657233595848, "rewards/rejected": -0.2565069794654846, "step": 4580 }, { "epoch": 0.3, "learning_rate": 4.41351972426491e-06, "logits/chosen": -0.9162073135375977, "logits/rejected": -0.947158932685852, "logps/chosen": -478.1299743652344, "logps/rejected": -601.3800048828125, "loss": 0.6908, "rewards/accuracies": 0.625, "rewards/chosen": -0.2313905954360962, "rewards/margins": 0.06919713318347931, "rewards/rejected": -0.3005877435207367, "step": 4590 }, { "epoch": 0.3, "learning_rate": 4.409840149658735e-06, "logits/chosen": -1.0446149110794067, "logits/rejected": -0.9430927038192749, "logps/chosen": -484.2718811035156, "logps/rejected": -492.84539794921875, "loss": 0.6903, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.19975461065769196, "rewards/margins": 0.05872530862689018, "rewards/rejected": -0.25847989320755005, "step": 4600 }, { "epoch": 0.3, "eval_logits/chosen": -1.1054784059524536, "eval_logits/rejected": -0.9768812656402588, "eval_logps/chosen": -445.2477111816406, "eval_logps/rejected": -493.52667236328125, "eval_loss": 0.6898093223571777, "eval_rewards/accuracies": 0.6499999761581421, "eval_rewards/chosen": -0.2132427990436554, "eval_rewards/margins": 0.0686720460653305, "eval_rewards/rejected": -0.2819148600101471, "eval_runtime": 712.2205, "eval_samples_per_second": 2.808, "eval_steps_per_second": 1.404, "step": 4600 }, { "epoch": 0.3, "learning_rate": 4.4061506123907925e-06, "logits/chosen": -1.0503458976745605, "logits/rejected": -0.9003597497940063, "logps/chosen": -485.533447265625, "logps/rejected": -500.1853942871094, "loss": 0.691, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2214416265487671, "rewards/margins": 0.05495814234018326, "rewards/rejected": -0.27639979124069214, "step": 4610 }, { "epoch": 0.3, "learning_rate": 4.402451131707519e-06, "logits/chosen": -1.256821632385254, "logits/rejected": -0.9206531643867493, "logps/chosen": -440.94482421875, "logps/rejected": -454.66729736328125, "loss": 0.6894, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2314794510602951, "rewards/margins": 0.0892038494348526, "rewards/rejected": -0.3206833302974701, "step": 4620 }, { "epoch": 0.3, "learning_rate": 4.398741726907215e-06, "logits/chosen": -1.3970555067062378, "logits/rejected": -1.088208556175232, "logps/chosen": -512.00732421875, "logps/rejected": -548.9866333007812, "loss": 0.6885, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.23274996876716614, "rewards/margins": 0.07730601727962494, "rewards/rejected": -0.31005600094795227, "step": 4630 }, { "epoch": 0.3, "learning_rate": 4.395022417339955e-06, "logits/chosen": -0.9462388753890991, "logits/rejected": -0.8615191578865051, "logps/chosen": -532.3143310546875, "logps/rejected": -612.8482055664062, "loss": 0.6913, "rewards/accuracies": 0.625, "rewards/chosen": -0.32568931579589844, "rewards/margins": 0.07204465568065643, "rewards/rejected": -0.3977339565753937, "step": 4640 }, { "epoch": 0.3, "learning_rate": 4.391293222407479e-06, "logits/chosen": -0.9256173968315125, "logits/rejected": -0.9904786944389343, "logps/chosen": -343.5927734375, "logps/rejected": -415.95867919921875, "loss": 0.6897, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.2070355862379074, "rewards/margins": 0.05413592979311943, "rewards/rejected": -0.26117151975631714, "step": 4650 }, { "epoch": 0.3, "learning_rate": 4.387554161563094e-06, "logits/chosen": -1.1515638828277588, "logits/rejected": -1.107965111732483, "logps/chosen": -439.2413635253906, "logps/rejected": -527.4022216796875, "loss": 0.686, "rewards/accuracies": 0.625, "rewards/chosen": -0.24037718772888184, "rewards/margins": 0.09998045116662979, "rewards/rejected": -0.34035763144493103, "step": 4660 }, { "epoch": 0.31, "learning_rate": 4.383805254311575e-06, "logits/chosen": -1.1828508377075195, "logits/rejected": -0.8469891548156738, "logps/chosen": -516.6743774414062, "logps/rejected": -550.5076293945312, "loss": 0.6886, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.2605333924293518, "rewards/margins": 0.07939942926168442, "rewards/rejected": -0.33993279933929443, "step": 4670 }, { "epoch": 0.31, "learning_rate": 4.380046520209056e-06, "logits/chosen": -0.9766277074813843, "logits/rejected": -0.7371279001235962, "logps/chosen": -435.83544921875, "logps/rejected": -506.59954833984375, "loss": 0.6916, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.23556911945343018, "rewards/margins": 0.09460324048995972, "rewards/rejected": -0.3301723599433899, "step": 4680 }, { "epoch": 0.31, "learning_rate": 4.376277978862936e-06, "logits/chosen": -0.708415150642395, "logits/rejected": -0.6641728281974792, "logps/chosen": -454.5379943847656, "logps/rejected": -470.12359619140625, "loss": 0.6909, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.22854939103126526, "rewards/margins": 0.056117068976163864, "rewards/rejected": -0.2846664488315582, "step": 4690 }, { "epoch": 0.31, "learning_rate": 4.372499649931774e-06, "logits/chosen": -0.9270528554916382, "logits/rejected": -0.7678083777427673, "logps/chosen": -500.28973388671875, "logps/rejected": -631.7337646484375, "loss": 0.6849, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.291198194026947, "rewards/margins": 0.12348760664463043, "rewards/rejected": -0.41468581557273865, "step": 4700 }, { "epoch": 0.31, "eval_logits/chosen": -0.826753556728363, "eval_logits/rejected": -0.7087231874465942, "eval_logps/chosen": -512.8201904296875, "eval_logps/rejected": -581.3583374023438, "eval_loss": 0.689807116985321, "eval_rewards/accuracies": 0.6545000076293945, "eval_rewards/chosen": -0.2808152437210083, "eval_rewards/margins": 0.0889311358332634, "eval_rewards/rejected": -0.3697463870048523, "eval_runtime": 711.8212, "eval_samples_per_second": 2.81, "eval_steps_per_second": 1.405, "step": 4700 }, { "epoch": 0.31, "learning_rate": 4.368711553125185e-06, "logits/chosen": -1.0156619548797607, "logits/rejected": -1.0053759813308716, "logps/chosen": -545.8829345703125, "logps/rejected": -550.94580078125, "loss": 0.6927, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.26869291067123413, "rewards/margins": 0.06350628286600113, "rewards/rejected": -0.3321991562843323, "step": 4710 }, { "epoch": 0.31, "learning_rate": 4.364913708203734e-06, "logits/chosen": -1.0214194059371948, "logits/rejected": -0.8588595390319824, "logps/chosen": -553.0416259765625, "logps/rejected": -555.3973388671875, "loss": 0.69, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.26814529299736023, "rewards/margins": 0.07777298986911774, "rewards/rejected": -0.34591832756996155, "step": 4720 }, { "epoch": 0.31, "learning_rate": 4.361106134978844e-06, "logits/chosen": -0.8875927925109863, "logits/rejected": -0.6843006014823914, "logps/chosen": -505.05859375, "logps/rejected": -550.0360107421875, "loss": 0.6923, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.23462817072868347, "rewards/margins": 0.05792809650301933, "rewards/rejected": -0.2925562858581543, "step": 4730 }, { "epoch": 0.31, "learning_rate": 4.357288853312681e-06, "logits/chosen": -0.9217559695243835, "logits/rejected": -0.9300669431686401, "logps/chosen": -516.9627685546875, "logps/rejected": -553.6403198242188, "loss": 0.6914, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.23348495364189148, "rewards/margins": 0.041414495557546616, "rewards/rejected": -0.2748994827270508, "step": 4740 }, { "epoch": 0.31, "learning_rate": 4.353461883118056e-06, "logits/chosen": -0.8890771865844727, "logits/rejected": -0.7557088732719421, "logps/chosen": -484.44598388671875, "logps/rejected": -505.0452575683594, "loss": 0.692, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.25515538454055786, "rewards/margins": 0.04175041243433952, "rewards/rejected": -0.2969058156013489, "step": 4750 }, { "epoch": 0.31, "learning_rate": 4.34962524435832e-06, "logits/chosen": -0.9046379327774048, "logits/rejected": -0.8349242210388184, "logps/chosen": -413.257568359375, "logps/rejected": -458.12005615234375, "loss": 0.6918, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.19357720017433167, "rewards/margins": 0.08169585466384888, "rewards/rejected": -0.27527305483818054, "step": 4760 }, { "epoch": 0.31, "learning_rate": 4.34577895704726e-06, "logits/chosen": -1.387485384941101, "logits/rejected": -1.1890177726745605, "logps/chosen": -425.19097900390625, "logps/rejected": -457.5673828125, "loss": 0.6898, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.16357699036598206, "rewards/margins": 0.05810046195983887, "rewards/rejected": -0.22167746722698212, "step": 4770 }, { "epoch": 0.31, "learning_rate": 4.3419230412489954e-06, "logits/chosen": -1.3138645887374878, "logits/rejected": -1.057762861251831, "logps/chosen": -456.3876953125, "logps/rejected": -419.55078125, "loss": 0.6919, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.16701987385749817, "rewards/margins": 0.0392269529402256, "rewards/rejected": -0.20624682307243347, "step": 4780 }, { "epoch": 0.31, "learning_rate": 4.338057517077872e-06, "logits/chosen": -1.07856023311615, "logits/rejected": -0.9140844345092773, "logps/chosen": -367.5122375488281, "logps/rejected": -492.6651306152344, "loss": 0.6811, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.17347107827663422, "rewards/margins": 0.16850519180297852, "rewards/rejected": -0.34197625517845154, "step": 4790 }, { "epoch": 0.31, "learning_rate": 4.334182404698356e-06, "logits/chosen": -0.8965989947319031, "logits/rejected": -0.6839637756347656, "logps/chosen": -468.75067138671875, "logps/rejected": -453.801513671875, "loss": 0.6902, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.23864367604255676, "rewards/margins": 0.06345070898532867, "rewards/rejected": -0.30209439992904663, "step": 4800 }, { "epoch": 0.31, "eval_logits/chosen": -0.7168214321136475, "eval_logits/rejected": -0.6072134375572205, "eval_logps/chosen": -480.07293701171875, "eval_logps/rejected": -542.0419921875, "eval_loss": 0.6897538304328918, "eval_rewards/accuracies": 0.6600000262260437, "eval_rewards/chosen": -0.24806798994541168, "eval_rewards/margins": 0.08236212283372879, "eval_rewards/rejected": -0.33043012022972107, "eval_runtime": 712.7229, "eval_samples_per_second": 2.806, "eval_steps_per_second": 1.403, "step": 4800 }, { "epoch": 0.31, "learning_rate": 4.330297724324933e-06, "logits/chosen": -1.0477492809295654, "logits/rejected": -0.5886969566345215, "logps/chosen": -563.2034912109375, "logps/rejected": -546.0494995117188, "loss": 0.6884, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.25479209423065186, "rewards/margins": 0.08404627442359924, "rewards/rejected": -0.3388383388519287, "step": 4810 }, { "epoch": 0.32, "learning_rate": 4.326403496221999e-06, "logits/chosen": -0.6333009004592896, "logits/rejected": -0.6737180948257446, "logps/chosen": -387.6405334472656, "logps/rejected": -422.314697265625, "loss": 0.6925, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.22621150314807892, "rewards/margins": 0.06145411729812622, "rewards/rejected": -0.28766560554504395, "step": 4820 }, { "epoch": 0.32, "learning_rate": 4.322499740703755e-06, "logits/chosen": -0.7092992067337036, "logits/rejected": -0.7767950892448425, "logps/chosen": -394.92071533203125, "logps/rejected": -468.0594787597656, "loss": 0.6901, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.2028215229511261, "rewards/margins": 0.04861544445157051, "rewards/rejected": -0.2514369785785675, "step": 4830 }, { "epoch": 0.32, "learning_rate": 4.318586478134101e-06, "logits/chosen": -0.8985971212387085, "logits/rejected": -0.44110220670700073, "logps/chosen": -401.379150390625, "logps/rejected": -434.3460388183594, "loss": 0.6888, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2087116688489914, "rewards/margins": 0.07348736375570297, "rewards/rejected": -0.28219905495643616, "step": 4840 }, { "epoch": 0.32, "learning_rate": 4.314663728926534e-06, "logits/chosen": -1.0497792959213257, "logits/rejected": -0.6469130516052246, "logps/chosen": -533.2115478515625, "logps/rejected": -588.5062255859375, "loss": 0.6904, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.27633586525917053, "rewards/margins": 0.065872922539711, "rewards/rejected": -0.34220877289772034, "step": 4850 }, { "epoch": 0.32, "learning_rate": 4.310731513544033e-06, "logits/chosen": -0.7769347429275513, "logits/rejected": -0.449188232421875, "logps/chosen": -508.60333251953125, "logps/rejected": -552.8875732421875, "loss": 0.6898, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2657455503940582, "rewards/margins": 0.09036391228437424, "rewards/rejected": -0.35610947012901306, "step": 4860 }, { "epoch": 0.32, "learning_rate": 4.30678985249896e-06, "logits/chosen": -0.8284038305282593, "logits/rejected": -0.8399826288223267, "logps/chosen": -380.78289794921875, "logps/rejected": -508.3587951660156, "loss": 0.6905, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.22363290190696716, "rewards/margins": 0.10931231826543808, "rewards/rejected": -0.33294519782066345, "step": 4870 }, { "epoch": 0.32, "learning_rate": 4.302838766352952e-06, "logits/chosen": -0.8813980221748352, "logits/rejected": -0.7245379686355591, "logps/chosen": -453.521728515625, "logps/rejected": -508.83062744140625, "loss": 0.6888, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1979287564754486, "rewards/margins": 0.09110042452812195, "rewards/rejected": -0.28902921080589294, "step": 4880 }, { "epoch": 0.32, "learning_rate": 4.298878275716806e-06, "logits/chosen": -0.8569731712341309, "logits/rejected": -0.7990323305130005, "logps/chosen": -425.7196350097656, "logps/rejected": -530.5278930664062, "loss": 0.6869, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2338402271270752, "rewards/margins": 0.10760519653558731, "rewards/rejected": -0.3414453864097595, "step": 4890 }, { "epoch": 0.32, "learning_rate": 4.294908401250386e-06, "logits/chosen": -0.950922966003418, "logits/rejected": -0.7585622072219849, "logps/chosen": -464.732421875, "logps/rejected": -525.8693237304688, "loss": 0.6888, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.24998939037322998, "rewards/margins": 0.10566927492618561, "rewards/rejected": -0.355658620595932, "step": 4900 }, { "epoch": 0.32, "eval_logits/chosen": -0.9126221537590027, "eval_logits/rejected": -0.7924286127090454, "eval_logps/chosen": -470.6658020019531, "eval_logps/rejected": -534.4230346679688, "eval_loss": 0.6897168159484863, "eval_rewards/accuracies": 0.6545000076293945, "eval_rewards/chosen": -0.23866090178489685, "eval_rewards/margins": 0.0841502919793129, "eval_rewards/rejected": -0.32281118631362915, "eval_runtime": 714.6649, "eval_samples_per_second": 2.799, "eval_steps_per_second": 1.399, "step": 4900 }, { "epoch": 0.32, "learning_rate": 4.290929163662498e-06, "logits/chosen": -0.6042841672897339, "logits/rejected": -0.6136349439620972, "logps/chosen": -481.3103942871094, "logps/rejected": -511.7767028808594, "loss": 0.688, "rewards/accuracies": 0.625, "rewards/chosen": -0.21157515048980713, "rewards/margins": 0.08818281441926956, "rewards/rejected": -0.2997579276561737, "step": 4910 }, { "epoch": 0.32, "learning_rate": 4.286940583710796e-06, "logits/chosen": -1.1480525732040405, "logits/rejected": -0.9411023259162903, "logps/chosen": -555.0806884765625, "logps/rejected": -602.908447265625, "loss": 0.6909, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.26233208179473877, "rewards/margins": 0.10445760190486908, "rewards/rejected": -0.36678972840309143, "step": 4920 }, { "epoch": 0.32, "learning_rate": 4.282942682201667e-06, "logits/chosen": -0.9523464441299438, "logits/rejected": -0.7611700296401978, "logps/chosen": -507.7801818847656, "logps/rejected": -540.0282592773438, "loss": 0.6916, "rewards/accuracies": 0.625, "rewards/chosen": -0.2556723952293396, "rewards/margins": 0.07183460146188736, "rewards/rejected": -0.327506959438324, "step": 4930 }, { "epoch": 0.32, "learning_rate": 4.278935479990123e-06, "logits/chosen": -1.2261298894882202, "logits/rejected": -0.8556804656982422, "logps/chosen": -438.10955810546875, "logps/rejected": -459.77496337890625, "loss": 0.6889, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.23344619572162628, "rewards/margins": 0.06463684141635895, "rewards/rejected": -0.29808303713798523, "step": 4940 }, { "epoch": 0.32, "learning_rate": 4.274918997979695e-06, "logits/chosen": -1.0745717287063599, "logits/rejected": -1.095640778541565, "logps/chosen": -417.48089599609375, "logps/rejected": -482.902099609375, "loss": 0.6912, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.22384591400623322, "rewards/margins": 0.0636858120560646, "rewards/rejected": -0.28753170371055603, "step": 4950 }, { "epoch": 0.32, "learning_rate": 4.270893257122319e-06, "logits/chosen": -0.9423874020576477, "logits/rejected": -0.796244740486145, "logps/chosen": -431.6581115722656, "logps/rejected": -574.3453369140625, "loss": 0.6867, "rewards/accuracies": 0.75, "rewards/chosen": -0.20293612778186798, "rewards/margins": 0.11317793279886246, "rewards/rejected": -0.3161140978336334, "step": 4960 }, { "epoch": 0.33, "learning_rate": 4.266858278418232e-06, "logits/chosen": -0.7076646685600281, "logits/rejected": -0.749599277973175, "logps/chosen": -414.93841552734375, "logps/rejected": -453.1956481933594, "loss": 0.6886, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.17833426594734192, "rewards/margins": 0.058608364313840866, "rewards/rejected": -0.23694264888763428, "step": 4970 }, { "epoch": 0.33, "learning_rate": 4.26281408291586e-06, "logits/chosen": -1.0780795812606812, "logits/rejected": -0.8412311673164368, "logps/chosen": -421.68280029296875, "logps/rejected": -495.57171630859375, "loss": 0.6892, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.17934545874595642, "rewards/margins": 0.10342135280370712, "rewards/rejected": -0.28276684880256653, "step": 4980 }, { "epoch": 0.33, "learning_rate": 4.258760691711706e-06, "logits/chosen": -1.0563790798187256, "logits/rejected": -0.9225692749023438, "logps/chosen": -394.8101501464844, "logps/rejected": -468.16937255859375, "loss": 0.6888, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.19734862446784973, "rewards/margins": 0.07795204222202301, "rewards/rejected": -0.27530068159103394, "step": 4990 }, { "epoch": 0.33, "learning_rate": 4.254698125950247e-06, "logits/chosen": -1.2388908863067627, "logits/rejected": -1.021576166152954, "logps/chosen": -485.2925720214844, "logps/rejected": -496.947265625, "loss": 0.6915, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.18427804112434387, "rewards/margins": 0.05832035094499588, "rewards/rejected": -0.24259838461875916, "step": 5000 }, { "epoch": 0.33, "eval_logits/chosen": -0.9265555143356323, "eval_logits/rejected": -0.8070250153541565, "eval_logps/chosen": -433.9764404296875, "eval_logps/rejected": -492.5659484863281, "eval_loss": 0.6898908615112305, "eval_rewards/accuracies": 0.6504999995231628, "eval_rewards/chosen": -0.20197151601314545, "eval_rewards/margins": 0.07898253947496414, "eval_rewards/rejected": -0.2809540629386902, "eval_runtime": 712.1506, "eval_samples_per_second": 2.808, "eval_steps_per_second": 1.404, "step": 5000 }, { "epoch": 0.33, "learning_rate": 4.250626406823815e-06, "logits/chosen": -1.112958312034607, "logits/rejected": -0.8601986765861511, "logps/chosen": -413.8302307128906, "logps/rejected": -578.3088989257812, "loss": 0.6883, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.19696754217147827, "rewards/margins": 0.1483006477355957, "rewards/rejected": -0.345268189907074, "step": 5010 }, { "epoch": 0.33, "learning_rate": 4.246545555572489e-06, "logits/chosen": -0.9461210370063782, "logits/rejected": -0.9753166437149048, "logps/chosen": -341.7334289550781, "logps/rejected": -459.89013671875, "loss": 0.688, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.1875373125076294, "rewards/margins": 0.09320464730262756, "rewards/rejected": -0.28074198961257935, "step": 5020 }, { "epoch": 0.33, "learning_rate": 4.242455593483992e-06, "logits/chosen": -1.0627763271331787, "logits/rejected": -0.887707531452179, "logps/chosen": -415.20526123046875, "logps/rejected": -419.7691345214844, "loss": 0.6918, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.19605137407779694, "rewards/margins": 0.05427330732345581, "rewards/rejected": -0.25032466650009155, "step": 5030 }, { "epoch": 0.33, "learning_rate": 4.238356541893567e-06, "logits/chosen": -0.9641706347465515, "logits/rejected": -0.9521792531013489, "logps/chosen": -398.6933288574219, "logps/rejected": -471.68597412109375, "loss": 0.6893, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2085602581501007, "rewards/margins": 0.08853522688150406, "rewards/rejected": -0.29709547758102417, "step": 5040 }, { "epoch": 0.33, "learning_rate": 4.234248422183876e-06, "logits/chosen": -1.157314658164978, "logits/rejected": -1.307806134223938, "logps/chosen": -382.83392333984375, "logps/rejected": -443.8204650878906, "loss": 0.6927, "rewards/accuracies": 0.625, "rewards/chosen": -0.13989922404289246, "rewards/margins": 0.06554291397333145, "rewards/rejected": -0.2054421454668045, "step": 5050 }, { "epoch": 0.33, "learning_rate": 4.230131255784884e-06, "logits/chosen": -1.608673095703125, "logits/rejected": -1.3411251306533813, "logps/chosen": -397.7110290527344, "logps/rejected": -447.5704040527344, "loss": 0.6883, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1398279219865799, "rewards/margins": 0.0686558336019516, "rewards/rejected": -0.2084837704896927, "step": 5060 }, { "epoch": 0.33, "learning_rate": 4.226005064173748e-06, "logits/chosen": -1.2943363189697266, "logits/rejected": -1.1875030994415283, "logps/chosen": -416.7265625, "logps/rejected": -486.3279724121094, "loss": 0.6899, "rewards/accuracies": 0.625, "rewards/chosen": -0.15208227932453156, "rewards/margins": 0.05114439129829407, "rewards/rejected": -0.20322665572166443, "step": 5070 }, { "epoch": 0.33, "learning_rate": 4.2218698688747035e-06, "logits/chosen": -0.8786938786506653, "logits/rejected": -0.7380436658859253, "logps/chosen": -452.21990966796875, "logps/rejected": -479.0074157714844, "loss": 0.6905, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.21951058506965637, "rewards/margins": 0.07191769778728485, "rewards/rejected": -0.29142826795578003, "step": 5080 }, { "epoch": 0.33, "learning_rate": 4.217725691458957e-06, "logits/chosen": -1.255380630493164, "logits/rejected": -1.0743123292922974, "logps/chosen": -422.770263671875, "logps/rejected": -541.0545654296875, "loss": 0.6877, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.23854613304138184, "rewards/margins": 0.09133829176425934, "rewards/rejected": -0.32988446950912476, "step": 5090 }, { "epoch": 0.33, "learning_rate": 4.213572553544565e-06, "logits/chosen": -0.7588680386543274, "logits/rejected": -0.7646905183792114, "logps/chosen": -560.6681518554688, "logps/rejected": -648.8077392578125, "loss": 0.6903, "rewards/accuracies": 0.75, "rewards/chosen": -0.3196962773799896, "rewards/margins": 0.09550414234399796, "rewards/rejected": -0.41520047187805176, "step": 5100 }, { "epoch": 0.33, "eval_logits/chosen": -0.8067855834960938, "eval_logits/rejected": -0.6929395794868469, "eval_logps/chosen": -539.7930908203125, "eval_logps/rejected": -603.40966796875, "eval_loss": 0.6898962259292603, "eval_rewards/accuracies": 0.6539999842643738, "eval_rewards/chosen": -0.3077881336212158, "eval_rewards/margins": 0.08400966227054596, "eval_rewards/rejected": -0.391797810792923, "eval_runtime": 711.9273, "eval_samples_per_second": 2.809, "eval_steps_per_second": 1.405, "step": 5100 }, { "epoch": 0.33, "learning_rate": 4.209410476796331e-06, "logits/chosen": -0.6897755861282349, "logits/rejected": -0.7003772258758545, "logps/chosen": -501.85760498046875, "logps/rejected": -574.7218017578125, "loss": 0.6883, "rewards/accuracies": 0.625, "rewards/chosen": -0.3307662010192871, "rewards/margins": 0.08285339176654816, "rewards/rejected": -0.41361960768699646, "step": 5110 }, { "epoch": 0.33, "learning_rate": 4.205239482925686e-06, "logits/chosen": -0.8003614544868469, "logits/rejected": -0.7187783718109131, "logps/chosen": -451.90130615234375, "logps/rejected": -535.4197998046875, "loss": 0.6902, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.264085978269577, "rewards/margins": 0.06139212101697922, "rewards/rejected": -0.32547810673713684, "step": 5120 }, { "epoch": 0.34, "learning_rate": 4.201059593690577e-06, "logits/chosen": -1.1568797826766968, "logits/rejected": -1.0991196632385254, "logps/chosen": -490.71771240234375, "logps/rejected": -528.8479614257812, "loss": 0.6904, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2651299834251404, "rewards/margins": 0.06355933845043182, "rewards/rejected": -0.328689306974411, "step": 5130 }, { "epoch": 0.34, "learning_rate": 4.196870830895354e-06, "logits/chosen": -0.9000298380851746, "logits/rejected": -0.8038280606269836, "logps/chosen": -490.931884765625, "logps/rejected": -603.1361694335938, "loss": 0.6909, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.2331843078136444, "rewards/margins": 0.06269694119691849, "rewards/rejected": -0.2958812415599823, "step": 5140 }, { "epoch": 0.34, "learning_rate": 4.192673216390657e-06, "logits/chosen": -0.9874833822250366, "logits/rejected": -0.7820544242858887, "logps/chosen": -462.16741943359375, "logps/rejected": -508.98223876953125, "loss": 0.6882, "rewards/accuracies": 0.75, "rewards/chosen": -0.2291944921016693, "rewards/margins": 0.08514241129159927, "rewards/rejected": -0.3143369257450104, "step": 5150 }, { "epoch": 0.34, "learning_rate": 4.188466772073296e-06, "logits/chosen": -0.9633606672286987, "logits/rejected": -0.9231443405151367, "logps/chosen": -481.64093017578125, "logps/rejected": -504.0276794433594, "loss": 0.6904, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.2611164152622223, "rewards/margins": 0.04314836859703064, "rewards/rejected": -0.30426478385925293, "step": 5160 }, { "epoch": 0.34, "learning_rate": 4.184251519886148e-06, "logits/chosen": -0.7404162287712097, "logits/rejected": -0.5939242243766785, "logps/chosen": -542.5810546875, "logps/rejected": -650.72705078125, "loss": 0.689, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3464735448360443, "rewards/margins": 0.07949092984199524, "rewards/rejected": -0.42596450448036194, "step": 5170 }, { "epoch": 0.34, "learning_rate": 4.180027481818033e-06, "logits/chosen": -0.7048271894454956, "logits/rejected": -0.8187531232833862, "logps/chosen": -608.216552734375, "logps/rejected": -635.06640625, "loss": 0.6902, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.34151846170425415, "rewards/margins": 0.0646834746003151, "rewards/rejected": -0.40620189905166626, "step": 5180 }, { "epoch": 0.34, "learning_rate": 4.175794679903602e-06, "logits/chosen": -0.46474045515060425, "logits/rejected": -0.3354906439781189, "logps/chosen": -589.6353759765625, "logps/rejected": -601.679443359375, "loss": 0.6924, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.36199450492858887, "rewards/margins": 0.08440116792917252, "rewards/rejected": -0.4463956952095032, "step": 5190 }, { "epoch": 0.34, "learning_rate": 4.171553136223222e-06, "logits/chosen": -0.686112642288208, "logits/rejected": -0.4601070284843445, "logps/chosen": -673.9461669921875, "logps/rejected": -799.9554443359375, "loss": 0.6889, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.406625360250473, "rewards/margins": 0.11529938876628876, "rewards/rejected": -0.5219247341156006, "step": 5200 }, { "epoch": 0.34, "eval_logits/chosen": -0.33751627802848816, "eval_logits/rejected": -0.25623294711112976, "eval_logps/chosen": -600.0008544921875, "eval_logps/rejected": -653.9317626953125, "eval_loss": 0.6899478435516357, "eval_rewards/accuracies": 0.6549999713897705, "eval_rewards/chosen": -0.3679959177970886, "eval_rewards/margins": 0.07432392239570618, "eval_rewards/rejected": -0.4423198103904724, "eval_runtime": 709.8614, "eval_samples_per_second": 2.817, "eval_steps_per_second": 1.409, "step": 5200 }, { "epoch": 0.34, "learning_rate": 4.167302872902865e-06, "logits/chosen": -0.4962449073791504, "logits/rejected": -0.06477615237236023, "logps/chosen": -634.7276000976562, "logps/rejected": -722.2471923828125, "loss": 0.6884, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3832593560218811, "rewards/margins": 0.0994577631354332, "rewards/rejected": -0.4827171862125397, "step": 5210 }, { "epoch": 0.34, "learning_rate": 4.163043912113985e-06, "logits/chosen": -0.5165904760360718, "logits/rejected": -0.3449627161026001, "logps/chosen": -581.4984741210938, "logps/rejected": -603.272216796875, "loss": 0.6904, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.32245832681655884, "rewards/margins": 0.055768370628356934, "rewards/rejected": -0.37822669744491577, "step": 5220 }, { "epoch": 0.34, "learning_rate": 4.15877627607341e-06, "logits/chosen": -0.4022120535373688, "logits/rejected": -0.07796867191791534, "logps/chosen": -487.5970153808594, "logps/rejected": -519.3673095703125, "loss": 0.6923, "rewards/accuracies": 0.625, "rewards/chosen": -0.2710022032260895, "rewards/margins": 0.05599268153309822, "rewards/rejected": -0.3269948959350586, "step": 5230 }, { "epoch": 0.34, "learning_rate": 4.154499987043217e-06, "logits/chosen": -0.6665756106376648, "logits/rejected": -0.5083428621292114, "logps/chosen": -463.33721923828125, "logps/rejected": -551.2430419921875, "loss": 0.6886, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.23913367092609406, "rewards/margins": 0.11008558422327042, "rewards/rejected": -0.34921926259994507, "step": 5240 }, { "epoch": 0.34, "learning_rate": 4.150215067330625e-06, "logits/chosen": -0.5539819598197937, "logits/rejected": -0.2385900914669037, "logps/chosen": -438.7664489746094, "logps/rejected": -541.8282470703125, "loss": 0.6892, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.22971764206886292, "rewards/margins": 0.08902446925640106, "rewards/rejected": -0.3187420964241028, "step": 5250 }, { "epoch": 0.34, "learning_rate": 4.145921539287876e-06, "logits/chosen": -0.4234296679496765, "logits/rejected": -0.4965476989746094, "logps/chosen": -424.6368713378906, "logps/rejected": -521.614013671875, "loss": 0.6893, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.23404428362846375, "rewards/margins": 0.11698039621114731, "rewards/rejected": -0.35102465748786926, "step": 5260 }, { "epoch": 0.34, "learning_rate": 4.141619425312115e-06, "logits/chosen": -0.5069370269775391, "logits/rejected": -0.24971413612365723, "logps/chosen": -435.09051513671875, "logps/rejected": -454.61328125, "loss": 0.6914, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.22643926739692688, "rewards/margins": 0.043964650481939316, "rewards/rejected": -0.2704039216041565, "step": 5270 }, { "epoch": 0.35, "learning_rate": 4.1373087478452735e-06, "logits/chosen": -0.49892836809158325, "logits/rejected": -0.5289067625999451, "logps/chosen": -421.7427673339844, "logps/rejected": -494.67437744140625, "loss": 0.6857, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.1965021938085556, "rewards/margins": 0.12754546105861664, "rewards/rejected": -0.32404765486717224, "step": 5280 }, { "epoch": 0.35, "learning_rate": 4.132989529373959e-06, "logits/chosen": -0.6782785058021545, "logits/rejected": -0.5537897944450378, "logps/chosen": -493.59088134765625, "logps/rejected": -491.10858154296875, "loss": 0.6892, "rewards/accuracies": 0.625, "rewards/chosen": -0.23767805099487305, "rewards/margins": 0.07651616632938385, "rewards/rejected": -0.3141942322254181, "step": 5290 }, { "epoch": 0.35, "learning_rate": 4.128661792429331e-06, "logits/chosen": -0.6442875266075134, "logits/rejected": -0.5757584571838379, "logps/chosen": -471.60137939453125, "logps/rejected": -537.8292846679688, "loss": 0.6925, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.2178308069705963, "rewards/margins": 0.06133151799440384, "rewards/rejected": -0.27916234731674194, "step": 5300 }, { "epoch": 0.35, "eval_logits/chosen": -0.8017680048942566, "eval_logits/rejected": -0.6898171305656433, "eval_logps/chosen": -452.794677734375, "eval_logps/rejected": -513.436767578125, "eval_loss": 0.6897254586219788, "eval_rewards/accuracies": 0.6604999899864197, "eval_rewards/chosen": -0.22078973054885864, "eval_rewards/margins": 0.08103515952825546, "eval_rewards/rejected": -0.3018249273300171, "eval_runtime": 709.4677, "eval_samples_per_second": 2.819, "eval_steps_per_second": 1.41, "step": 5300 }, { "epoch": 0.35, "learning_rate": 4.124325559586985e-06, "logits/chosen": -0.8358646631240845, "logits/rejected": -0.6630481481552124, "logps/chosen": -414.3356018066406, "logps/rejected": -449.273193359375, "loss": 0.6926, "rewards/accuracies": 0.5, "rewards/chosen": -0.2262789011001587, "rewards/margins": 0.022013569250702858, "rewards/rejected": -0.2482924908399582, "step": 5310 }, { "epoch": 0.35, "learning_rate": 4.119980853466835e-06, "logits/chosen": -0.6544975638389587, "logits/rejected": -0.30424556136131287, "logps/chosen": -454.3627014160156, "logps/rejected": -526.572265625, "loss": 0.6887, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.24520829319953918, "rewards/margins": 0.0993238165974617, "rewards/rejected": -0.3445320725440979, "step": 5320 }, { "epoch": 0.35, "learning_rate": 4.115627696732997e-06, "logits/chosen": -0.5331336259841919, "logits/rejected": -0.47379574179649353, "logps/chosen": -413.2391662597656, "logps/rejected": -478.57818603515625, "loss": 0.6905, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.22182488441467285, "rewards/margins": 0.08465997874736786, "rewards/rejected": -0.3064848780632019, "step": 5330 }, { "epoch": 0.35, "learning_rate": 4.111266112093668e-06, "logits/chosen": -0.7296448349952698, "logits/rejected": -0.6237698793411255, "logps/chosen": -479.72686767578125, "logps/rejected": -602.6467895507812, "loss": 0.6895, "rewards/accuracies": 0.75, "rewards/chosen": -0.27625924348831177, "rewards/margins": 0.10452202707529068, "rewards/rejected": -0.38078123331069946, "step": 5340 }, { "epoch": 0.35, "learning_rate": 4.1068961223010115e-06, "logits/chosen": -0.9738900065422058, "logits/rejected": -0.5101861953735352, "logps/chosen": -525.4327392578125, "logps/rejected": -602.5348510742188, "loss": 0.6873, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.25473204255104065, "rewards/margins": 0.09977851063013077, "rewards/rejected": -0.3545105457305908, "step": 5350 }, { "epoch": 0.35, "learning_rate": 4.102517750151034e-06, "logits/chosen": -1.0094826221466064, "logits/rejected": -0.7825483083724976, "logps/chosen": -503.8395080566406, "logps/rejected": -478.79833984375, "loss": 0.6911, "rewards/accuracies": 0.625, "rewards/chosen": -0.20879462361335754, "rewards/margins": 0.04627186059951782, "rewards/rejected": -0.25506648421287537, "step": 5360 }, { "epoch": 0.35, "learning_rate": 4.09813101848347e-06, "logits/chosen": -1.1168756484985352, "logits/rejected": -0.8798100352287292, "logps/chosen": -410.56915283203125, "logps/rejected": -495.3880310058594, "loss": 0.6931, "rewards/accuracies": 0.625, "rewards/chosen": -0.19158323109149933, "rewards/margins": 0.0676647424697876, "rewards/rejected": -0.25924795866012573, "step": 5370 }, { "epoch": 0.35, "learning_rate": 4.093735950181659e-06, "logits/chosen": -1.0186705589294434, "logits/rejected": -0.8906763792037964, "logps/chosen": -394.7282409667969, "logps/rejected": -509.34344482421875, "loss": 0.6883, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.169254869222641, "rewards/margins": 0.09007195383310318, "rewards/rejected": -0.2593268156051636, "step": 5380 }, { "epoch": 0.35, "learning_rate": 4.0893325681724326e-06, "logits/chosen": -1.2295897006988525, "logits/rejected": -1.1489160060882568, "logps/chosen": -473.47247314453125, "logps/rejected": -537.0003051757812, "loss": 0.6906, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2150508463382721, "rewards/margins": 0.07580719143152237, "rewards/rejected": -0.2908580005168915, "step": 5390 }, { "epoch": 0.35, "learning_rate": 4.084920895425988e-06, "logits/chosen": -1.0479966402053833, "logits/rejected": -0.8871825337409973, "logps/chosen": -483.7577209472656, "logps/rejected": -567.1939697265625, "loss": 0.6883, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.2463931292295456, "rewards/margins": 0.06979132443666458, "rewards/rejected": -0.31618446111679077, "step": 5400 }, { "epoch": 0.35, "eval_logits/chosen": -0.9780447483062744, "eval_logits/rejected": -0.8575934171676636, "eval_logps/chosen": -446.1243896484375, "eval_logps/rejected": -496.64959716796875, "eval_loss": 0.6898158192634583, "eval_rewards/accuracies": 0.6589999794960022, "eval_rewards/chosen": -0.21411941945552826, "eval_rewards/margins": 0.07091830670833588, "eval_rewards/rejected": -0.28503772616386414, "eval_runtime": 713.6779, "eval_samples_per_second": 2.802, "eval_steps_per_second": 1.401, "step": 5400 }, { "epoch": 0.35, "learning_rate": 4.080500954955769e-06, "logits/chosen": -0.8831078410148621, "logits/rejected": -0.783626139163971, "logps/chosen": -485.46875, "logps/rejected": -540.5457763671875, "loss": 0.6903, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.2295968234539032, "rewards/margins": 0.07336324453353882, "rewards/rejected": -0.302960067987442, "step": 5410 }, { "epoch": 0.35, "learning_rate": 4.076072769818354e-06, "logits/chosen": -1.2416795492172241, "logits/rejected": -0.9977342486381531, "logps/chosen": -439.8389587402344, "logps/rejected": -449.51629638671875, "loss": 0.6904, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.19611665606498718, "rewards/margins": 0.07164300978183746, "rewards/rejected": -0.26775965094566345, "step": 5420 }, { "epoch": 0.36, "learning_rate": 4.071636363113323e-06, "logits/chosen": -0.6604939103126526, "logits/rejected": -0.5699220895767212, "logps/chosen": -478.573974609375, "logps/rejected": -478.38104248046875, "loss": 0.69, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.2104438990354538, "rewards/margins": 0.053849607706069946, "rewards/rejected": -0.26429352164268494, "step": 5430 }, { "epoch": 0.36, "learning_rate": 4.067191757983146e-06, "logits/chosen": -0.6711565852165222, "logits/rejected": -0.5477440357208252, "logps/chosen": -479.72869873046875, "logps/rejected": -588.8829956054688, "loss": 0.6879, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2501530349254608, "rewards/margins": 0.11236833035945892, "rewards/rejected": -0.36252138018608093, "step": 5440 }, { "epoch": 0.36, "learning_rate": 4.062738977613063e-06, "logits/chosen": -0.45283088088035583, "logits/rejected": -0.5415789484977722, "logps/chosen": -490.650634765625, "logps/rejected": -506.84930419921875, "loss": 0.6897, "rewards/accuracies": 0.5, "rewards/chosen": -0.25617435574531555, "rewards/margins": 0.06459192931652069, "rewards/rejected": -0.32076629996299744, "step": 5450 }, { "epoch": 0.36, "learning_rate": 4.058278045230957e-06, "logits/chosen": -0.9490770101547241, "logits/rejected": -0.9275020360946655, "logps/chosen": -513.1905517578125, "logps/rejected": -554.4321899414062, "loss": 0.6926, "rewards/accuracies": 0.625, "rewards/chosen": -0.29482370615005493, "rewards/margins": 0.03965403884649277, "rewards/rejected": -0.3344777524471283, "step": 5460 }, { "epoch": 0.36, "learning_rate": 4.053808984107235e-06, "logits/chosen": -0.8835921287536621, "logits/rejected": -0.7733657360076904, "logps/chosen": -487.41632080078125, "logps/rejected": -488.1482849121094, "loss": 0.6923, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.25454801321029663, "rewards/margins": 0.04102517291903496, "rewards/rejected": -0.2955731749534607, "step": 5470 }, { "epoch": 0.36, "learning_rate": 4.04933181755471e-06, "logits/chosen": -0.8765344619750977, "logits/rejected": -0.8720951080322266, "logps/chosen": -470.0048828125, "logps/rejected": -541.0588989257812, "loss": 0.6898, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2634243369102478, "rewards/margins": 0.07826650887727737, "rewards/rejected": -0.3416908383369446, "step": 5480 }, { "epoch": 0.36, "learning_rate": 4.044846568928477e-06, "logits/chosen": -1.1567533016204834, "logits/rejected": -1.0573335886001587, "logps/chosen": -502.82281494140625, "logps/rejected": -555.1135864257812, "loss": 0.691, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2412986308336258, "rewards/margins": 0.06114867329597473, "rewards/rejected": -0.30244728922843933, "step": 5490 }, { "epoch": 0.36, "learning_rate": 4.040353261625788e-06, "logits/chosen": -1.226211667060852, "logits/rejected": -0.725497305393219, "logps/chosen": -487.32177734375, "logps/rejected": -539.41943359375, "loss": 0.6885, "rewards/accuracies": 0.625, "rewards/chosen": -0.2107764482498169, "rewards/margins": 0.090728260576725, "rewards/rejected": -0.3015047311782837, "step": 5500 }, { "epoch": 0.36, "eval_logits/chosen": -0.9162071943283081, "eval_logits/rejected": -0.8003301024436951, "eval_logps/chosen": -444.0844421386719, "eval_logps/rejected": -493.766357421875, "eval_loss": 0.6899505257606506, "eval_rewards/accuracies": 0.6549999713897705, "eval_rewards/chosen": -0.2120795100927353, "eval_rewards/margins": 0.07007495313882828, "eval_rewards/rejected": -0.2821544110774994, "eval_runtime": 710.8775, "eval_samples_per_second": 2.813, "eval_steps_per_second": 1.407, "step": 5500 }, { "epoch": 0.36, "learning_rate": 4.035851919085936e-06, "logits/chosen": -1.0080921649932861, "logits/rejected": -0.7228280305862427, "logps/chosen": -507.87493896484375, "logps/rejected": -510.32391357421875, "loss": 0.6879, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.24048081040382385, "rewards/margins": 0.08116090297698975, "rewards/rejected": -0.3216416835784912, "step": 5510 }, { "epoch": 0.36, "learning_rate": 4.031342564790128e-06, "logits/chosen": -0.9639323353767395, "logits/rejected": -0.7297991514205933, "logps/chosen": -411.42010498046875, "logps/rejected": -510.60272216796875, "loss": 0.6873, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.2054484337568283, "rewards/margins": 0.10395534336566925, "rewards/rejected": -0.30940383672714233, "step": 5520 }, { "epoch": 0.36, "learning_rate": 4.026825222261367e-06, "logits/chosen": -0.5591684579849243, "logits/rejected": -0.3978855013847351, "logps/chosen": -484.2080993652344, "logps/rejected": -536.934326171875, "loss": 0.6915, "rewards/accuracies": 0.625, "rewards/chosen": -0.31113308668136597, "rewards/margins": 0.059869349002838135, "rewards/rejected": -0.3710024058818817, "step": 5530 }, { "epoch": 0.36, "learning_rate": 4.022299915064321e-06, "logits/chosen": -0.7978732585906982, "logits/rejected": -0.5802344679832458, "logps/chosen": -589.16455078125, "logps/rejected": -607.4036865234375, "loss": 0.6912, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.28331655263900757, "rewards/margins": 0.07134465873241425, "rewards/rejected": -0.354661226272583, "step": 5540 }, { "epoch": 0.36, "learning_rate": 4.017766666805213e-06, "logits/chosen": -0.6884918808937073, "logits/rejected": -0.4019128382205963, "logps/chosen": -525.5811767578125, "logps/rejected": -562.6389770507812, "loss": 0.6887, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.31206080317497253, "rewards/margins": 0.07241939008235931, "rewards/rejected": -0.38448017835617065, "step": 5550 }, { "epoch": 0.36, "learning_rate": 4.013225501131684e-06, "logits/chosen": -0.6882126927375793, "logits/rejected": -0.44790735840797424, "logps/chosen": -526.3818969726562, "logps/rejected": -556.5396728515625, "loss": 0.6907, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.3090043067932129, "rewards/margins": 0.05713053420186043, "rewards/rejected": -0.3661348521709442, "step": 5560 }, { "epoch": 0.36, "learning_rate": 4.008676441732679e-06, "logits/chosen": -0.35928666591644287, "logits/rejected": -0.21741943061351776, "logps/chosen": -535.5043334960938, "logps/rejected": -553.3346557617188, "loss": 0.6908, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.32920995354652405, "rewards/margins": 0.06583123654127121, "rewards/rejected": -0.39504122734069824, "step": 5570 }, { "epoch": 0.37, "learning_rate": 4.00411951233832e-06, "logits/chosen": -0.6145802736282349, "logits/rejected": -0.5013601183891296, "logps/chosen": -569.8782958984375, "logps/rejected": -626.9734497070312, "loss": 0.6878, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3501531183719635, "rewards/margins": 0.10176874697208405, "rewards/rejected": -0.45192185044288635, "step": 5580 }, { "epoch": 0.37, "learning_rate": 3.999554736719785e-06, "logits/chosen": -0.47634387016296387, "logits/rejected": -0.47885531187057495, "logps/chosen": -616.468017578125, "logps/rejected": -658.5233154296875, "loss": 0.6899, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3244911730289459, "rewards/margins": 0.08557327091693878, "rewards/rejected": -0.4100644588470459, "step": 5590 }, { "epoch": 0.37, "learning_rate": 3.994982138689177e-06, "logits/chosen": -1.1665931940078735, "logits/rejected": -0.8447322845458984, "logps/chosen": -524.7276000976562, "logps/rejected": -590.0232543945312, "loss": 0.6896, "rewards/accuracies": 0.625, "rewards/chosen": -0.28732746839523315, "rewards/margins": 0.06714684516191483, "rewards/rejected": -0.35447433590888977, "step": 5600 }, { "epoch": 0.37, "eval_logits/chosen": -0.7256837487220764, "eval_logits/rejected": -0.6177822351455688, "eval_logps/chosen": -536.2576904296875, "eval_logps/rejected": -598.6423950195312, "eval_loss": 0.6898110508918762, "eval_rewards/accuracies": 0.6570000052452087, "eval_rewards/chosen": -0.3042526841163635, "eval_rewards/margins": 0.08277777582406998, "eval_rewards/rejected": -0.3870304524898529, "eval_runtime": 711.2848, "eval_samples_per_second": 2.812, "eval_steps_per_second": 1.406, "step": 5600 }, { "epoch": 0.37, "learning_rate": 3.990401742099408e-06, "logits/chosen": -0.596218466758728, "logits/rejected": -0.6072180867195129, "logps/chosen": -435.23553466796875, "logps/rejected": -483.957763671875, "loss": 0.6912, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.25637245178222656, "rewards/margins": 0.05607098340988159, "rewards/rejected": -0.31244343519210815, "step": 5610 }, { "epoch": 0.37, "learning_rate": 3.985813570844072e-06, "logits/chosen": -0.9559303522109985, "logits/rejected": -0.8587062954902649, "logps/chosen": -574.4474487304688, "logps/rejected": -632.5167236328125, "loss": 0.6893, "rewards/accuracies": 0.75, "rewards/chosen": -0.280060738325119, "rewards/margins": 0.08562088757753372, "rewards/rejected": -0.36568158864974976, "step": 5620 }, { "epoch": 0.37, "learning_rate": 3.981217648857316e-06, "logits/chosen": -0.8603259325027466, "logits/rejected": -0.8051680326461792, "logps/chosen": -411.08636474609375, "logps/rejected": -505.117431640625, "loss": 0.6892, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.23854593932628632, "rewards/margins": 0.09224925190210342, "rewards/rejected": -0.33079519867897034, "step": 5630 }, { "epoch": 0.37, "learning_rate": 3.97661400011372e-06, "logits/chosen": -0.8633445501327515, "logits/rejected": -0.9625928997993469, "logps/chosen": -488.6290588378906, "logps/rejected": -523.1886596679688, "loss": 0.6925, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.24560177326202393, "rewards/margins": 0.04598253220319748, "rewards/rejected": -0.291584312915802, "step": 5640 }, { "epoch": 0.37, "learning_rate": 3.972002648628174e-06, "logits/chosen": -0.9786394834518433, "logits/rejected": -0.8444933891296387, "logps/chosen": -515.1180419921875, "logps/rejected": -510.667724609375, "loss": 0.6921, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.23808720707893372, "rewards/margins": 0.04324156790971756, "rewards/rejected": -0.2813287377357483, "step": 5650 }, { "epoch": 0.37, "learning_rate": 3.967383618455743e-06, "logits/chosen": -0.9421932101249695, "logits/rejected": -1.0383517742156982, "logps/chosen": -491.4378356933594, "logps/rejected": -569.2808837890625, "loss": 0.6897, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.2643181085586548, "rewards/margins": 0.05965255945920944, "rewards/rejected": -0.3239706754684448, "step": 5660 }, { "epoch": 0.37, "learning_rate": 3.9627569336915515e-06, "logits/chosen": -1.2176904678344727, "logits/rejected": -1.0065767765045166, "logps/chosen": -496.88671875, "logps/rejected": -538.030029296875, "loss": 0.6881, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.24942393600940704, "rewards/margins": 0.0978199765086174, "rewards/rejected": -0.34724390506744385, "step": 5670 }, { "epoch": 0.37, "learning_rate": 3.9581226184706555e-06, "logits/chosen": -1.0491501092910767, "logits/rejected": -1.2479287385940552, "logps/chosen": -477.5037536621094, "logps/rejected": -624.1370849609375, "loss": 0.6893, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2828384041786194, "rewards/margins": 0.07593565434217453, "rewards/rejected": -0.3587740659713745, "step": 5680 }, { "epoch": 0.37, "learning_rate": 3.953480696967912e-06, "logits/chosen": -0.6281255483627319, "logits/rejected": -0.7220240831375122, "logps/chosen": -541.6065063476562, "logps/rejected": -634.47216796875, "loss": 0.6914, "rewards/accuracies": 0.625, "rewards/chosen": -0.3322349190711975, "rewards/margins": 0.0564199797809124, "rewards/rejected": -0.388654887676239, "step": 5690 }, { "epoch": 0.37, "learning_rate": 3.948831193397857e-06, "logits/chosen": -0.6184535622596741, "logits/rejected": -0.6359222531318665, "logps/chosen": -447.3412170410156, "logps/rejected": -531.6007080078125, "loss": 0.6898, "rewards/accuracies": 0.5, "rewards/chosen": -0.2813258171081543, "rewards/margins": 0.0784335732460022, "rewards/rejected": -0.3597593903541565, "step": 5700 }, { "epoch": 0.37, "eval_logits/chosen": -0.7838326692581177, "eval_logits/rejected": -0.6697111129760742, "eval_logps/chosen": -537.5149536132812, "eval_logps/rejected": -606.8261108398438, "eval_loss": 0.6897205710411072, "eval_rewards/accuracies": 0.6549999713897705, "eval_rewards/chosen": -0.305510014295578, "eval_rewards/margins": 0.08970417827367783, "eval_rewards/rejected": -0.39521417021751404, "eval_runtime": 712.2826, "eval_samples_per_second": 2.808, "eval_steps_per_second": 1.404, "step": 5700 }, { "epoch": 0.37, "learning_rate": 3.94417413201458e-06, "logits/chosen": -1.0492521524429321, "logits/rejected": -0.7466319799423218, "logps/chosen": -454.7074279785156, "logps/rejected": -513.38134765625, "loss": 0.69, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.2431359589099884, "rewards/margins": 0.07644067704677582, "rewards/rejected": -0.3195766806602478, "step": 5710 }, { "epoch": 0.37, "learning_rate": 3.9395095371115935e-06, "logits/chosen": -1.0224096775054932, "logits/rejected": -0.9557268023490906, "logps/chosen": -423.9097595214844, "logps/rejected": -488.0465393066406, "loss": 0.6883, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.21125265955924988, "rewards/margins": 0.07474798709154129, "rewards/rejected": -0.28600066900253296, "step": 5720 }, { "epoch": 0.37, "learning_rate": 3.93483743302171e-06, "logits/chosen": -1.046775221824646, "logits/rejected": -0.7583211064338684, "logps/chosen": -422.81829833984375, "logps/rejected": -471.67572021484375, "loss": 0.6892, "rewards/accuracies": 0.625, "rewards/chosen": -0.21002662181854248, "rewards/margins": 0.07259279489517212, "rewards/rejected": -0.282619446516037, "step": 5730 }, { "epoch": 0.38, "learning_rate": 3.930157844116913e-06, "logits/chosen": -0.9095266461372375, "logits/rejected": -0.6122316122055054, "logps/chosen": -429.01556396484375, "logps/rejected": -483.1982421875, "loss": 0.6897, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.21925830841064453, "rewards/margins": 0.06890133768320084, "rewards/rejected": -0.28815966844558716, "step": 5740 }, { "epoch": 0.38, "learning_rate": 3.925470794808229e-06, "logits/chosen": -0.7558301091194153, "logits/rejected": -0.6763758659362793, "logps/chosen": -495.16259765625, "logps/rejected": -552.1781005859375, "loss": 0.6896, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.25617489218711853, "rewards/margins": 0.08983282744884491, "rewards/rejected": -0.346007764339447, "step": 5750 }, { "epoch": 0.38, "learning_rate": 3.920776309545606e-06, "logits/chosen": -1.1489700078964233, "logits/rejected": -0.9279036521911621, "logps/chosen": -312.0481872558594, "logps/rejected": -378.68585205078125, "loss": 0.69, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.15934118628501892, "rewards/margins": 0.07347016036510468, "rewards/rejected": -0.2328113615512848, "step": 5760 }, { "epoch": 0.38, "learning_rate": 3.916074412817778e-06, "logits/chosen": -0.9631127119064331, "logits/rejected": -0.6907010078430176, "logps/chosen": -439.86724853515625, "logps/rejected": -540.8177490234375, "loss": 0.6878, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1965332329273224, "rewards/margins": 0.10042830556631088, "rewards/rejected": -0.29696157574653625, "step": 5770 }, { "epoch": 0.38, "learning_rate": 3.911365129152139e-06, "logits/chosen": -0.8396314382553101, "logits/rejected": -0.7624481916427612, "logps/chosen": -462.0514221191406, "logps/rejected": -539.0438232421875, "loss": 0.6897, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.22961759567260742, "rewards/margins": 0.08253227919340134, "rewards/rejected": -0.31214988231658936, "step": 5780 }, { "epoch": 0.38, "learning_rate": 3.906648483114623e-06, "logits/chosen": -0.6250481605529785, "logits/rejected": -0.40239137411117554, "logps/chosen": -438.18963623046875, "logps/rejected": -515.53662109375, "loss": 0.687, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2408660352230072, "rewards/margins": 0.10993202030658722, "rewards/rejected": -0.3507980704307556, "step": 5790 }, { "epoch": 0.38, "learning_rate": 3.901924499309564e-06, "logits/chosen": -0.30131223797798157, "logits/rejected": -0.2699768841266632, "logps/chosen": -511.6910095214844, "logps/rejected": -577.87060546875, "loss": 0.6894, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.2829861044883728, "rewards/margins": 0.0987856313586235, "rewards/rejected": -0.3817717134952545, "step": 5800 }, { "epoch": 0.38, "eval_logits/chosen": -0.3526945114135742, "eval_logits/rejected": -0.2645464539527893, "eval_logps/chosen": -553.59912109375, "eval_logps/rejected": -630.135498046875, "eval_loss": 0.6897721290588379, "eval_rewards/accuracies": 0.6524999737739563, "eval_rewards/chosen": -0.3215942084789276, "eval_rewards/margins": 0.09692942351102829, "eval_rewards/rejected": -0.4185236096382141, "eval_runtime": 709.8582, "eval_samples_per_second": 2.817, "eval_steps_per_second": 1.409, "step": 5800 }, { "epoch": 0.38, "learning_rate": 3.897193202379575e-06, "logits/chosen": -0.511224091053009, "logits/rejected": -0.2985347509384155, "logps/chosen": -489.2774353027344, "logps/rejected": -567.6716918945312, "loss": 0.6891, "rewards/accuracies": 0.625, "rewards/chosen": -0.2887997329235077, "rewards/margins": 0.09536701440811157, "rewards/rejected": -0.3841667175292969, "step": 5810 }, { "epoch": 0.38, "learning_rate": 3.8924546170054215e-06, "logits/chosen": -0.6443430185317993, "logits/rejected": -0.4279851019382477, "logps/chosen": -457.66064453125, "logps/rejected": -509.69818115234375, "loss": 0.6913, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.2417530119419098, "rewards/margins": 0.07151090353727341, "rewards/rejected": -0.3132639229297638, "step": 5820 }, { "epoch": 0.38, "learning_rate": 3.887708767905883e-06, "logits/chosen": -0.9861922264099121, "logits/rejected": -0.7905910015106201, "logps/chosen": -462.68408203125, "logps/rejected": -462.4227600097656, "loss": 0.6915, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2189168483018875, "rewards/margins": 0.06558167934417725, "rewards/rejected": -0.28449851274490356, "step": 5830 }, { "epoch": 0.38, "learning_rate": 3.882955679837636e-06, "logits/chosen": -0.8170859217643738, "logits/rejected": -0.9146119356155396, "logps/chosen": -450.39617919921875, "logps/rejected": -506.41033935546875, "loss": 0.6914, "rewards/accuracies": 0.75, "rewards/chosen": -0.21101923286914825, "rewards/margins": 0.05363879352807999, "rewards/rejected": -0.26465800404548645, "step": 5840 }, { "epoch": 0.38, "learning_rate": 3.878195377595113e-06, "logits/chosen": -0.7225337028503418, "logits/rejected": -0.6112938523292542, "logps/chosen": -427.2122497558594, "logps/rejected": -513.8952026367188, "loss": 0.69, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.19130650162696838, "rewards/margins": 0.09224653244018555, "rewards/rejected": -0.28355303406715393, "step": 5850 }, { "epoch": 0.38, "learning_rate": 3.873427886010384e-06, "logits/chosen": -0.8298704028129578, "logits/rejected": -0.5080928206443787, "logps/chosen": -387.6654357910156, "logps/rejected": -454.1085510253906, "loss": 0.6886, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.19721348583698273, "rewards/margins": 0.08553223311901093, "rewards/rejected": -0.28274568915367126, "step": 5860 }, { "epoch": 0.38, "learning_rate": 3.868653229953021e-06, "logits/chosen": -0.8196694254875183, "logits/rejected": -0.6232892274856567, "logps/chosen": -457.27911376953125, "logps/rejected": -562.1744384765625, "loss": 0.6879, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.22006423771381378, "rewards/margins": 0.10695929825305939, "rewards/rejected": -0.3270235061645508, "step": 5870 }, { "epoch": 0.38, "learning_rate": 3.8638714343299675e-06, "logits/chosen": -0.8010458946228027, "logits/rejected": -0.6639386415481567, "logps/chosen": -433.7034606933594, "logps/rejected": -544.5938720703125, "loss": 0.6878, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.21536073088645935, "rewards/margins": 0.09577467292547226, "rewards/rejected": -0.3111354112625122, "step": 5880 }, { "epoch": 0.39, "learning_rate": 3.859082524085414e-06, "logits/chosen": -0.6064971685409546, "logits/rejected": -0.4940188527107239, "logps/chosen": -530.43115234375, "logps/rejected": -537.8308715820312, "loss": 0.6898, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.25808802247047424, "rewards/margins": 0.06763879209756851, "rewards/rejected": -0.32572680711746216, "step": 5890 }, { "epoch": 0.39, "learning_rate": 3.854286524200659e-06, "logits/chosen": -1.008453130722046, "logits/rejected": -0.5371363162994385, "logps/chosen": -514.41748046875, "logps/rejected": -526.7296752929688, "loss": 0.6899, "rewards/accuracies": 0.625, "rewards/chosen": -0.2380000799894333, "rewards/margins": 0.06294857710599899, "rewards/rejected": -0.3009486794471741, "step": 5900 }, { "epoch": 0.39, "eval_logits/chosen": -0.5847914218902588, "eval_logits/rejected": -0.4853719472885132, "eval_logps/chosen": -493.8011779785156, "eval_logps/rejected": -557.822265625, "eval_loss": 0.6896816492080688, "eval_rewards/accuracies": 0.6535000205039978, "eval_rewards/chosen": -0.2617962658405304, "eval_rewards/margins": 0.08441410213708878, "eval_rewards/rejected": -0.34621039032936096, "eval_runtime": 709.3468, "eval_samples_per_second": 2.819, "eval_steps_per_second": 1.41, "step": 5900 }, { "epoch": 0.39, "learning_rate": 3.849483459693991e-06, "logits/chosen": -0.7097777128219604, "logits/rejected": -0.4602317214012146, "logps/chosen": -467.04962158203125, "logps/rejected": -541.8302001953125, "loss": 0.6856, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.25737297534942627, "rewards/margins": 0.11344969272613525, "rewards/rejected": -0.3708226680755615, "step": 5910 }, { "epoch": 0.39, "learning_rate": 3.844673355620544e-06, "logits/chosen": -0.7129204273223877, "logits/rejected": -0.409584105014801, "logps/chosen": -538.8270263671875, "logps/rejected": -606.05322265625, "loss": 0.6905, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.29225417971611023, "rewards/margins": 0.1055116280913353, "rewards/rejected": -0.3977658152580261, "step": 5920 }, { "epoch": 0.39, "learning_rate": 3.839856237072178e-06, "logits/chosen": -0.5346713066101074, "logits/rejected": -0.4406895637512207, "logps/chosen": -470.629638671875, "logps/rejected": -605.9718017578125, "loss": 0.6883, "rewards/accuracies": 0.625, "rewards/chosen": -0.28805553913116455, "rewards/margins": 0.11815004050731659, "rewards/rejected": -0.40620556473731995, "step": 5930 }, { "epoch": 0.39, "learning_rate": 3.8350321291773455e-06, "logits/chosen": -0.7473275661468506, "logits/rejected": -0.5856843590736389, "logps/chosen": -414.0442810058594, "logps/rejected": -446.9261169433594, "loss": 0.6912, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.21152237057685852, "rewards/margins": 0.07707812637090683, "rewards/rejected": -0.28860050439834595, "step": 5940 }, { "epoch": 0.39, "learning_rate": 3.830201057100953e-06, "logits/chosen": -1.1814167499542236, "logits/rejected": -1.068490743637085, "logps/chosen": -404.77191162109375, "logps/rejected": -512.7886352539062, "loss": 0.6883, "rewards/accuracies": 0.75, "rewards/chosen": -0.21299895644187927, "rewards/margins": 0.09252388775348663, "rewards/rejected": -0.3055228590965271, "step": 5950 }, { "epoch": 0.39, "learning_rate": 3.82536304604424e-06, "logits/chosen": -0.8924945592880249, "logits/rejected": -0.7654592394828796, "logps/chosen": -418.2235412597656, "logps/rejected": -458.09149169921875, "loss": 0.6913, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.18205930292606354, "rewards/margins": 0.07160413265228271, "rewards/rejected": -0.25366342067718506, "step": 5960 }, { "epoch": 0.39, "learning_rate": 3.8205181212446435e-06, "logits/chosen": -1.131145715713501, "logits/rejected": -0.971734881401062, "logps/chosen": -474.43389892578125, "logps/rejected": -510.0201110839844, "loss": 0.6899, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.20387499034404755, "rewards/margins": 0.0759897381067276, "rewards/rejected": -0.27986472845077515, "step": 5970 }, { "epoch": 0.39, "learning_rate": 3.815666307975664e-06, "logits/chosen": -1.009289026260376, "logits/rejected": -1.0255839824676514, "logps/chosen": -456.5907287597656, "logps/rejected": -486.37744140625, "loss": 0.6921, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2192729264497757, "rewards/margins": 0.04896865412592888, "rewards/rejected": -0.2682415843009949, "step": 5980 }, { "epoch": 0.39, "learning_rate": 3.8108076315467346e-06, "logits/chosen": -1.2979350090026855, "logits/rejected": -1.1967132091522217, "logps/chosen": -456.06988525390625, "logps/rejected": -439.7726135253906, "loss": 0.6911, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.19878706336021423, "rewards/margins": 0.05842934176325798, "rewards/rejected": -0.2572163939476013, "step": 5990 }, { "epoch": 0.39, "learning_rate": 3.805942117303093e-06, "logits/chosen": -1.3287451267242432, "logits/rejected": -1.1833655834197998, "logps/chosen": -516.2380981445312, "logps/rejected": -527.54638671875, "loss": 0.6896, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2002025842666626, "rewards/margins": 0.05783183500170708, "rewards/rejected": -0.2580344080924988, "step": 6000 }, { "epoch": 0.39, "eval_logits/chosen": -0.9796077013015747, "eval_logits/rejected": -0.8594146966934204, "eval_logps/chosen": -429.3099060058594, "eval_logps/rejected": -477.4434509277344, "eval_loss": 0.6897467970848083, "eval_rewards/accuracies": 0.6629999876022339, "eval_rewards/chosen": -0.19730499386787415, "eval_rewards/margins": 0.06852658092975616, "eval_rewards/rejected": -0.2658315598964691, "eval_runtime": 712.3923, "eval_samples_per_second": 2.807, "eval_steps_per_second": 1.404, "step": 6000 }, { "epoch": 0.39, "learning_rate": 3.8010697906256446e-06, "logits/chosen": -1.0929971933364868, "logits/rejected": -0.8249849081039429, "logps/chosen": -452.6302795410156, "logps/rejected": -492.43951416015625, "loss": 0.6908, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.24593381583690643, "rewards/margins": 0.06763944774866104, "rewards/rejected": -0.31357327103614807, "step": 6010 }, { "epoch": 0.39, "learning_rate": 3.7961906769308323e-06, "logits/chosen": -0.6220898628234863, "logits/rejected": -0.6787351369857788, "logps/chosen": -449.19256591796875, "logps/rejected": -520.2421875, "loss": 0.6902, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.24295739829540253, "rewards/margins": 0.06097465008497238, "rewards/rejected": -0.3039320409297943, "step": 6020 }, { "epoch": 0.39, "learning_rate": 3.7913048016705028e-06, "logits/chosen": -1.0191593170166016, "logits/rejected": -0.8695308566093445, "logps/chosen": -485.6734924316406, "logps/rejected": -543.5657958984375, "loss": 0.6902, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.22573387622833252, "rewards/margins": 0.05994036793708801, "rewards/rejected": -0.2856742739677429, "step": 6030 }, { "epoch": 0.4, "learning_rate": 3.786412190331775e-06, "logits/chosen": -0.974204421043396, "logits/rejected": -0.49800848960876465, "logps/chosen": -398.828125, "logps/rejected": -441.0577087402344, "loss": 0.6902, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.19596761465072632, "rewards/margins": 0.07932895421981812, "rewards/rejected": -0.2752965986728668, "step": 6040 }, { "epoch": 0.4, "learning_rate": 3.781512868436906e-06, "logits/chosen": -0.9405696988105774, "logits/rejected": -0.9803541898727417, "logps/chosen": -335.2028503417969, "logps/rejected": -394.23785400390625, "loss": 0.6901, "rewards/accuracies": 0.625, "rewards/chosen": -0.20259208977222443, "rewards/margins": 0.051246047019958496, "rewards/rejected": -0.25383812189102173, "step": 6050 }, { "epoch": 0.4, "learning_rate": 3.7766068615431605e-06, "logits/chosen": -0.8138763308525085, "logits/rejected": -0.7067015767097473, "logps/chosen": -491.45428466796875, "logps/rejected": -501.21636962890625, "loss": 0.6913, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.23436717689037323, "rewards/margins": 0.058397091925144196, "rewards/rejected": -0.292764276266098, "step": 6060 }, { "epoch": 0.4, "learning_rate": 3.771694195242671e-06, "logits/chosen": -1.1549289226531982, "logits/rejected": -0.5855950713157654, "logps/chosen": -562.266845703125, "logps/rejected": -520.1834716796875, "loss": 0.6907, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2630881071090698, "rewards/margins": 0.06964752078056335, "rewards/rejected": -0.33273565769195557, "step": 6070 }, { "epoch": 0.4, "learning_rate": 3.766774895162314e-06, "logits/chosen": -0.8373686075210571, "logits/rejected": -0.8374196290969849, "logps/chosen": -541.1320190429688, "logps/rejected": -534.19580078125, "loss": 0.6907, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.29459238052368164, "rewards/margins": 0.049199365079402924, "rewards/rejected": -0.34379175305366516, "step": 6080 }, { "epoch": 0.4, "learning_rate": 3.7618489869635666e-06, "logits/chosen": -0.8949093818664551, "logits/rejected": -0.7191926836967468, "logps/chosen": -550.8589477539062, "logps/rejected": -578.9771728515625, "loss": 0.692, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.3038319945335388, "rewards/margins": 0.047861166298389435, "rewards/rejected": -0.35169318318367004, "step": 6090 }, { "epoch": 0.4, "learning_rate": 3.756916496342379e-06, "logits/chosen": -1.0668814182281494, "logits/rejected": -1.097706913948059, "logps/chosen": -453.369873046875, "logps/rejected": -540.5994873046875, "loss": 0.6885, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.266497939825058, "rewards/margins": 0.0787767842411995, "rewards/rejected": -0.34527474641799927, "step": 6100 }, { "epoch": 0.4, "eval_logits/chosen": -0.9699369072914124, "eval_logits/rejected": -0.8472295999526978, "eval_logps/chosen": -491.1470031738281, "eval_logps/rejected": -547.7916870117188, "eval_loss": 0.6896329522132874, "eval_rewards/accuracies": 0.6579999923706055, "eval_rewards/chosen": -0.25914210081100464, "eval_rewards/margins": 0.0770377516746521, "eval_rewards/rejected": -0.33617985248565674, "eval_runtime": 711.0016, "eval_samples_per_second": 2.813, "eval_steps_per_second": 1.406, "step": 6100 }, { "epoch": 0.4, "learning_rate": 3.751977449029039e-06, "logits/chosen": -0.7568556070327759, "logits/rejected": -0.6889457702636719, "logps/chosen": -549.9276123046875, "logps/rejected": -603.6575317382812, "loss": 0.6898, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2898511290550232, "rewards/margins": 0.09112556278705597, "rewards/rejected": -0.38097670674324036, "step": 6110 }, { "epoch": 0.4, "learning_rate": 3.747031870788037e-06, "logits/chosen": -1.1127030849456787, "logits/rejected": -0.9563379287719727, "logps/chosen": -552.7032470703125, "logps/rejected": -554.0695190429688, "loss": 0.6896, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.23594991862773895, "rewards/margins": 0.08378570526838303, "rewards/rejected": -0.3197356164455414, "step": 6120 }, { "epoch": 0.4, "learning_rate": 3.7420797874179326e-06, "logits/chosen": -0.8063844442367554, "logits/rejected": -0.7351914644241333, "logps/chosen": -502.19293212890625, "logps/rejected": -521.9371337890625, "loss": 0.6895, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.2650834321975708, "rewards/margins": 0.07863331586122513, "rewards/rejected": -0.34371674060821533, "step": 6130 }, { "epoch": 0.4, "learning_rate": 3.7371212247512167e-06, "logits/chosen": -1.356092095375061, "logits/rejected": -1.071885347366333, "logps/chosen": -530.7686157226562, "logps/rejected": -548.25927734375, "loss": 0.6901, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.20437414944171906, "rewards/margins": 0.08259885013103485, "rewards/rejected": -0.2869729995727539, "step": 6140 }, { "epoch": 0.4, "learning_rate": 3.7321562086541817e-06, "logits/chosen": -1.1088091135025024, "logits/rejected": -1.0533579587936401, "logps/chosen": -501.5927734375, "logps/rejected": -564.1729736328125, "loss": 0.6901, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.24861261248588562, "rewards/margins": 0.06178991124033928, "rewards/rejected": -0.3104025721549988, "step": 6150 }, { "epoch": 0.4, "learning_rate": 3.7271847650267834e-06, "logits/chosen": -0.9327411651611328, "logits/rejected": -0.7886485457420349, "logps/chosen": -478.2225036621094, "logps/rejected": -538.6837158203125, "loss": 0.6895, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.2782878577709198, "rewards/margins": 0.0613514706492424, "rewards/rejected": -0.3396393656730652, "step": 6160 }, { "epoch": 0.4, "learning_rate": 3.7222069198025086e-06, "logits/chosen": -0.8222224116325378, "logits/rejected": -0.6890066862106323, "logps/chosen": -570.3583984375, "logps/rejected": -650.7569580078125, "loss": 0.6893, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3618159890174866, "rewards/margins": 0.10099077224731445, "rewards/rejected": -0.462806761264801, "step": 6170 }, { "epoch": 0.4, "learning_rate": 3.7172226989482353e-06, "logits/chosen": -0.9450828433036804, "logits/rejected": -0.867464542388916, "logps/chosen": -548.2552490234375, "logps/rejected": -607.8885498046875, "loss": 0.6908, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.33828240633010864, "rewards/margins": 0.06177844852209091, "rewards/rejected": -0.40006089210510254, "step": 6180 }, { "epoch": 0.4, "learning_rate": 3.7122321284641007e-06, "logits/chosen": -1.3301527500152588, "logits/rejected": -1.1098195314407349, "logps/chosen": -669.6278076171875, "logps/rejected": -665.1828002929688, "loss": 0.6877, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.3068411350250244, "rewards/margins": 0.1060449630022049, "rewards/rejected": -0.4128860831260681, "step": 6190 }, { "epoch": 0.41, "learning_rate": 3.707235234383365e-06, "logits/chosen": -1.1410796642303467, "logits/rejected": -1.0234348773956299, "logps/chosen": -490.13421630859375, "logps/rejected": -463.69854736328125, "loss": 0.6905, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.22888025641441345, "rewards/margins": 0.06034322455525398, "rewards/rejected": -0.28922349214553833, "step": 6200 }, { "epoch": 0.41, "eval_logits/chosen": -1.252907395362854, "eval_logits/rejected": -1.114737629890442, "eval_logps/chosen": -485.2598876953125, "eval_logps/rejected": -540.0977783203125, "eval_loss": 0.6896072030067444, "eval_rewards/accuracies": 0.6610000133514404, "eval_rewards/chosen": -0.2532549202442169, "eval_rewards/margins": 0.07523093372583389, "eval_rewards/rejected": -0.3284858763217926, "eval_runtime": 711.0801, "eval_samples_per_second": 2.813, "eval_steps_per_second": 1.406, "step": 6200 }, { "epoch": 0.41, "learning_rate": 3.702232042772277e-06, "logits/chosen": -1.2290165424346924, "logits/rejected": -1.167140007019043, "logps/chosen": -497.998291015625, "logps/rejected": -577.1484985351562, "loss": 0.6885, "rewards/accuracies": 0.75, "rewards/chosen": -0.29545214772224426, "rewards/margins": 0.101504847407341, "rewards/rejected": -0.3969569802284241, "step": 6210 }, { "epoch": 0.41, "learning_rate": 3.6972225797299325e-06, "logits/chosen": -1.2075756788253784, "logits/rejected": -1.2317383289337158, "logps/chosen": -572.742431640625, "logps/rejected": -648.7164916992188, "loss": 0.6871, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3222338557243347, "rewards/margins": 0.09190957248210907, "rewards/rejected": -0.4141434133052826, "step": 6220 }, { "epoch": 0.41, "learning_rate": 3.692206871388147e-06, "logits/chosen": -1.2632339000701904, "logits/rejected": -0.8921878933906555, "logps/chosen": -526.5006713867188, "logps/rejected": -603.5263061523438, "loss": 0.6893, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2941119372844696, "rewards/margins": 0.11825486272573471, "rewards/rejected": -0.4123667776584625, "step": 6230 }, { "epoch": 0.41, "learning_rate": 3.6871849439113115e-06, "logits/chosen": -0.7660868167877197, "logits/rejected": -0.9121445417404175, "logps/chosen": -515.5945434570312, "logps/rejected": -584.4954223632812, "loss": 0.6892, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.28568369150161743, "rewards/margins": 0.07844862341880798, "rewards/rejected": -0.3641323447227478, "step": 6240 }, { "epoch": 0.41, "learning_rate": 3.682156823496259e-06, "logits/chosen": -1.1769955158233643, "logits/rejected": -0.8457037806510925, "logps/chosen": -528.9486694335938, "logps/rejected": -607.7376708984375, "loss": 0.6914, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3150347173213959, "rewards/margins": 0.11115459352731705, "rewards/rejected": -0.4261893332004547, "step": 6250 }, { "epoch": 0.41, "learning_rate": 3.67712253637213e-06, "logits/chosen": -1.2539669275283813, "logits/rejected": -1.0763676166534424, "logps/chosen": -583.54296875, "logps/rejected": -560.4552001953125, "loss": 0.689, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.29124850034713745, "rewards/margins": 0.06682470440864563, "rewards/rejected": -0.35807323455810547, "step": 6260 }, { "epoch": 0.41, "learning_rate": 3.672082108800231e-06, "logits/chosen": -1.0638225078582764, "logits/rejected": -1.035290002822876, "logps/chosen": -571.1708374023438, "logps/rejected": -620.5901489257812, "loss": 0.6893, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3573550581932068, "rewards/margins": 0.08345872163772583, "rewards/rejected": -0.4408137798309326, "step": 6270 }, { "epoch": 0.41, "learning_rate": 3.6670355670739012e-06, "logits/chosen": -1.1504619121551514, "logits/rejected": -0.996173083782196, "logps/chosen": -468.37017822265625, "logps/rejected": -558.45263671875, "loss": 0.6883, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.30641672015190125, "rewards/margins": 0.09272117912769318, "rewards/rejected": -0.3991378843784332, "step": 6280 }, { "epoch": 0.41, "learning_rate": 3.6619829375183745e-06, "logits/chosen": -1.3015220165252686, "logits/rejected": -1.161517858505249, "logps/chosen": -546.1529541015625, "logps/rejected": -649.0066528320312, "loss": 0.6889, "rewards/accuracies": 0.75, "rewards/chosen": -0.3201504349708557, "rewards/margins": 0.11892461776733398, "rewards/rejected": -0.4390750527381897, "step": 6290 }, { "epoch": 0.41, "learning_rate": 3.6569242464906427e-06, "logits/chosen": -1.3468759059906006, "logits/rejected": -1.2353953123092651, "logps/chosen": -450.9453125, "logps/rejected": -544.6844482421875, "loss": 0.6885, "rewards/accuracies": 0.625, "rewards/chosen": -0.24321627616882324, "rewards/margins": 0.0726899802684784, "rewards/rejected": -0.31590625643730164, "step": 6300 }, { "epoch": 0.41, "eval_logits/chosen": -1.4056432247161865, "eval_logits/rejected": -1.2599819898605347, "eval_logps/chosen": -469.2735595703125, "eval_logps/rejected": -521.1331176757812, "eval_loss": 0.6896970272064209, "eval_rewards/accuracies": 0.6635000109672546, "eval_rewards/chosen": -0.23726864159107208, "eval_rewards/margins": 0.07225258648395538, "eval_rewards/rejected": -0.30952122807502747, "eval_runtime": 711.0613, "eval_samples_per_second": 2.813, "eval_steps_per_second": 1.406, "step": 6300 }, { "epoch": 0.41, "learning_rate": 3.6518595203793156e-06, "logits/chosen": -1.3377609252929688, "logits/rejected": -1.2768046855926514, "logps/chosen": -483.3002014160156, "logps/rejected": -601.4906616210938, "loss": 0.6901, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.22458593547344208, "rewards/margins": 0.1047157272696495, "rewards/rejected": -0.329301655292511, "step": 6310 }, { "epoch": 0.41, "learning_rate": 3.646788785604485e-06, "logits/chosen": -1.5382457971572876, "logits/rejected": -1.4908673763275146, "logps/chosen": -389.2879943847656, "logps/rejected": -431.7403869628906, "loss": 0.6918, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.1836296021938324, "rewards/margins": 0.044697392731904984, "rewards/rejected": -0.22832700610160828, "step": 6320 }, { "epoch": 0.41, "learning_rate": 3.641712068617588e-06, "logits/chosen": -1.4615576267242432, "logits/rejected": -1.3482048511505127, "logps/chosen": -468.55291748046875, "logps/rejected": -462.7494201660156, "loss": 0.6925, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.20719289779663086, "rewards/margins": 0.04774096980690956, "rewards/rejected": -0.2549338936805725, "step": 6330 }, { "epoch": 0.41, "learning_rate": 3.6366293959012673e-06, "logits/chosen": -1.3200759887695312, "logits/rejected": -1.13698410987854, "logps/chosen": -369.1453857421875, "logps/rejected": -432.5205078125, "loss": 0.6885, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.18794342875480652, "rewards/margins": 0.08289827406406403, "rewards/rejected": -0.27084171772003174, "step": 6340 }, { "epoch": 0.42, "learning_rate": 3.631540793969233e-06, "logits/chosen": -1.6114717721939087, "logits/rejected": -1.4890989065170288, "logps/chosen": -392.2261047363281, "logps/rejected": -440.48553466796875, "loss": 0.6906, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.20360836386680603, "rewards/margins": 0.046581171452999115, "rewards/rejected": -0.25018954277038574, "step": 6350 }, { "epoch": 0.42, "learning_rate": 3.626446289366127e-06, "logits/chosen": -1.4099271297454834, "logits/rejected": -1.1388559341430664, "logps/chosen": -523.2926025390625, "logps/rejected": -492.5755920410156, "loss": 0.692, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.30314338207244873, "rewards/margins": 0.04080190882086754, "rewards/rejected": -0.34394532442092896, "step": 6360 }, { "epoch": 0.42, "learning_rate": 3.6213459086673786e-06, "logits/chosen": -1.2427856922149658, "logits/rejected": -1.2967463731765747, "logps/chosen": -508.787841796875, "logps/rejected": -602.8619384765625, "loss": 0.6879, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.3378502130508423, "rewards/margins": 0.084700807929039, "rewards/rejected": -0.42255106568336487, "step": 6370 }, { "epoch": 0.42, "learning_rate": 3.6162396784790737e-06, "logits/chosen": -0.9532783627510071, "logits/rejected": -0.8490549921989441, "logps/chosen": -580.7579956054688, "logps/rejected": -648.3948974609375, "loss": 0.6895, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.35775092244148254, "rewards/margins": 0.06779655069112778, "rewards/rejected": -0.4255475103855133, "step": 6380 }, { "epoch": 0.42, "learning_rate": 3.6111276254378095e-06, "logits/chosen": -1.2318470478057861, "logits/rejected": -1.1283295154571533, "logps/chosen": -554.7551879882812, "logps/rejected": -658.4044189453125, "loss": 0.6885, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3267982602119446, "rewards/margins": 0.10806284844875336, "rewards/rejected": -0.43486112356185913, "step": 6390 }, { "epoch": 0.42, "learning_rate": 3.606009776210559e-06, "logits/chosen": -1.2396069765090942, "logits/rejected": -1.1743987798690796, "logps/chosen": -625.6569213867188, "logps/rejected": -670.0760498046875, "loss": 0.689, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3850333094596863, "rewards/margins": 0.0880778431892395, "rewards/rejected": -0.4731111526489258, "step": 6400 }, { "epoch": 0.42, "eval_logits/chosen": -1.2236859798431396, "eval_logits/rejected": -1.084417700767517, "eval_logps/chosen": -566.3375854492188, "eval_logps/rejected": -629.9546508789062, "eval_loss": 0.6897481083869934, "eval_rewards/accuracies": 0.652999997138977, "eval_rewards/chosen": -0.3343326151371002, "eval_rewards/margins": 0.08401010930538177, "eval_rewards/rejected": -0.4183427393436432, "eval_runtime": 712.7818, "eval_samples_per_second": 2.806, "eval_steps_per_second": 1.403, "step": 6400 }, { "epoch": 0.42, "learning_rate": 3.600886157494531e-06, "logits/chosen": -1.4335150718688965, "logits/rejected": -1.3242355585098267, "logps/chosen": -562.1947021484375, "logps/rejected": -640.3161010742188, "loss": 0.6887, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2993382513523102, "rewards/margins": 0.08990345895290375, "rewards/rejected": -0.38924169540405273, "step": 6410 }, { "epoch": 0.42, "learning_rate": 3.5957567960170304e-06, "logits/chosen": -1.4665154218673706, "logits/rejected": -0.9679352045059204, "logps/chosen": -610.47607421875, "logps/rejected": -579.2901611328125, "loss": 0.6891, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.31929099559783936, "rewards/margins": 0.0874718576669693, "rewards/rejected": -0.40676283836364746, "step": 6420 }, { "epoch": 0.42, "learning_rate": 3.590621718535319e-06, "logits/chosen": -1.0034302473068237, "logits/rejected": -0.9371808171272278, "logps/chosen": -572.9190673828125, "logps/rejected": -681.28857421875, "loss": 0.6876, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3761967122554779, "rewards/margins": 0.1092621460556984, "rewards/rejected": -0.4854588508605957, "step": 6430 }, { "epoch": 0.42, "learning_rate": 3.5854809518364775e-06, "logits/chosen": -1.368154525756836, "logits/rejected": -1.2136409282684326, "logps/chosen": -512.4900512695312, "logps/rejected": -557.9346923828125, "loss": 0.6879, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.26634320616722107, "rewards/margins": 0.08995746076107025, "rewards/rejected": -0.3563006818294525, "step": 6440 }, { "epoch": 0.42, "learning_rate": 3.580334522737262e-06, "logits/chosen": -1.1654552221298218, "logits/rejected": -1.0585110187530518, "logps/chosen": -509.4706115722656, "logps/rejected": -566.9360961914062, "loss": 0.6896, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.30752623081207275, "rewards/margins": 0.09111505001783371, "rewards/rejected": -0.39864128828048706, "step": 6450 }, { "epoch": 0.42, "learning_rate": 3.575182458083968e-06, "logits/chosen": -1.1148736476898193, "logits/rejected": -1.0673105716705322, "logps/chosen": -555.1051025390625, "logps/rejected": -637.6865234375, "loss": 0.6894, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3143001198768616, "rewards/margins": 0.11058640480041504, "rewards/rejected": -0.4248865246772766, "step": 6460 }, { "epoch": 0.42, "learning_rate": 3.5700247847522883e-06, "logits/chosen": -1.364552617073059, "logits/rejected": -1.2792692184448242, "logps/chosen": -456.8177795410156, "logps/rejected": -548.9254760742188, "loss": 0.6884, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.2540197968482971, "rewards/margins": 0.09212598204612732, "rewards/rejected": -0.34614577889442444, "step": 6470 }, { "epoch": 0.42, "learning_rate": 3.5648615296471743e-06, "logits/chosen": -1.1691317558288574, "logits/rejected": -1.1110568046569824, "logps/chosen": -531.5234985351562, "logps/rejected": -679.093017578125, "loss": 0.6901, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3341607451438904, "rewards/margins": 0.10949563980102539, "rewards/rejected": -0.4436563551425934, "step": 6480 }, { "epoch": 0.42, "learning_rate": 3.559692719702693e-06, "logits/chosen": -1.022430181503296, "logits/rejected": -0.8281211853027344, "logps/chosen": -661.8057861328125, "logps/rejected": -702.7855834960938, "loss": 0.6886, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.37566065788269043, "rewards/margins": 0.0977093055844307, "rewards/rejected": -0.4733699858188629, "step": 6490 }, { "epoch": 0.43, "learning_rate": 3.55451838188189e-06, "logits/chosen": -1.336548089981079, "logits/rejected": -1.3366944789886475, "logps/chosen": -549.550048828125, "logps/rejected": -651.333984375, "loss": 0.6913, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2882632613182068, "rewards/margins": 0.07985077798366547, "rewards/rejected": -0.36811405420303345, "step": 6500 }, { "epoch": 0.43, "eval_logits/chosen": -1.3350025415420532, "eval_logits/rejected": -1.1925042867660522, "eval_logps/chosen": -548.86865234375, "eval_logps/rejected": -604.5547485351562, "eval_loss": 0.6896828413009644, "eval_rewards/accuracies": 0.6629999876022339, "eval_rewards/chosen": -0.3168637156486511, "eval_rewards/margins": 0.0760791078209877, "eval_rewards/rejected": -0.392942875623703, "eval_runtime": 712.0531, "eval_samples_per_second": 2.809, "eval_steps_per_second": 1.404, "step": 6500 }, { "epoch": 0.43, "learning_rate": 3.549338543176645e-06, "logits/chosen": -1.4357526302337646, "logits/rejected": -1.2661854028701782, "logps/chosen": -615.6642456054688, "logps/rejected": -650.0222778320312, "loss": 0.6903, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3091757595539093, "rewards/margins": 0.06671115756034851, "rewards/rejected": -0.3758869469165802, "step": 6510 }, { "epoch": 0.43, "learning_rate": 3.5441532306075342e-06, "logits/chosen": -1.4813920259475708, "logits/rejected": -1.4216079711914062, "logps/chosen": -537.5429077148438, "logps/rejected": -614.580322265625, "loss": 0.6922, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.3061237633228302, "rewards/margins": 0.030472075566649437, "rewards/rejected": -0.3365958333015442, "step": 6520 }, { "epoch": 0.43, "learning_rate": 3.5389624712236894e-06, "logits/chosen": -1.4329283237457275, "logits/rejected": -1.2437920570373535, "logps/chosen": -460.76629638671875, "logps/rejected": -458.86273193359375, "loss": 0.6929, "rewards/accuracies": 0.5, "rewards/chosen": -0.2467305213212967, "rewards/margins": 0.02502426877617836, "rewards/rejected": -0.27175474166870117, "step": 6530 }, { "epoch": 0.43, "learning_rate": 3.533766292102653e-06, "logits/chosen": -1.3346855640411377, "logits/rejected": -1.3417136669158936, "logps/chosen": -468.8275451660156, "logps/rejected": -511.7674255371094, "loss": 0.6898, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.25898295640945435, "rewards/margins": 0.054628659039735794, "rewards/rejected": -0.31361156702041626, "step": 6540 }, { "epoch": 0.43, "learning_rate": 3.5285647203502404e-06, "logits/chosen": -1.6727336645126343, "logits/rejected": -1.5055919885635376, "logps/chosen": -507.2945251464844, "logps/rejected": -517.0139770507812, "loss": 0.6904, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2500235438346863, "rewards/margins": 0.04748542234301567, "rewards/rejected": -0.29750901460647583, "step": 6550 }, { "epoch": 0.43, "learning_rate": 3.5233577831003983e-06, "logits/chosen": -1.3712760210037231, "logits/rejected": -1.2215116024017334, "logps/chosen": -527.0596923828125, "logps/rejected": -569.3143920898438, "loss": 0.6888, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.27488380670547485, "rewards/margins": 0.06592334061861038, "rewards/rejected": -0.34080708026885986, "step": 6560 }, { "epoch": 0.43, "learning_rate": 3.5181455075150628e-06, "logits/chosen": -1.2225806713104248, "logits/rejected": -0.9543337821960449, "logps/chosen": -461.31195068359375, "logps/rejected": -474.75469970703125, "loss": 0.6918, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2793658375740051, "rewards/margins": 0.060065627098083496, "rewards/rejected": -0.33943140506744385, "step": 6570 }, { "epoch": 0.43, "learning_rate": 3.512927920784016e-06, "logits/chosen": -1.384887456893921, "logits/rejected": -1.2461804151535034, "logps/chosen": -491.25933837890625, "logps/rejected": -593.423095703125, "loss": 0.6872, "rewards/accuracies": 0.625, "rewards/chosen": -0.2655852437019348, "rewards/margins": 0.12214380502700806, "rewards/rejected": -0.38772904872894287, "step": 6580 }, { "epoch": 0.43, "learning_rate": 3.5077050501247457e-06, "logits/chosen": -1.5173418521881104, "logits/rejected": -1.079252004623413, "logps/chosen": -513.4905395507812, "logps/rejected": -533.9172973632812, "loss": 0.6885, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.23247560858726501, "rewards/margins": 0.08781534433364868, "rewards/rejected": -0.3202909529209137, "step": 6590 }, { "epoch": 0.43, "learning_rate": 3.5024769227823042e-06, "logits/chosen": -1.4403669834136963, "logits/rejected": -1.2567652463912964, "logps/chosen": -453.385009765625, "logps/rejected": -498.5462951660156, "loss": 0.6896, "rewards/accuracies": 0.625, "rewards/chosen": -0.2905137538909912, "rewards/margins": 0.08595889806747437, "rewards/rejected": -0.3764726221561432, "step": 6600 }, { "epoch": 0.43, "eval_logits/chosen": -1.3852163553237915, "eval_logits/rejected": -1.2377227544784546, "eval_logps/chosen": -535.6201171875, "eval_logps/rejected": -595.6209716796875, "eval_loss": 0.6897550225257874, "eval_rewards/accuracies": 0.6570000052452087, "eval_rewards/chosen": -0.30361512303352356, "eval_rewards/margins": 0.08039402216672897, "eval_rewards/rejected": -0.38400915265083313, "eval_runtime": 711.5795, "eval_samples_per_second": 2.811, "eval_steps_per_second": 1.405, "step": 6600 }, { "epoch": 0.43, "learning_rate": 3.4972435660291646e-06, "logits/chosen": -1.5519202947616577, "logits/rejected": -1.4770103693008423, "logps/chosen": -551.5953979492188, "logps/rejected": -601.3228759765625, "loss": 0.6898, "rewards/accuracies": 0.75, "rewards/chosen": -0.3084511160850525, "rewards/margins": 0.07466720044612885, "rewards/rejected": -0.38311833143234253, "step": 6610 }, { "epoch": 0.43, "learning_rate": 3.492005007165079e-06, "logits/chosen": -1.3561322689056396, "logits/rejected": -1.2405471801757812, "logps/chosen": -475.966064453125, "logps/rejected": -540.77490234375, "loss": 0.6894, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.24642908573150635, "rewards/margins": 0.05865221098065376, "rewards/rejected": -0.305081307888031, "step": 6620 }, { "epoch": 0.43, "learning_rate": 3.4867612735169377e-06, "logits/chosen": -1.5976279973983765, "logits/rejected": -1.2298024892807007, "logps/chosen": -501.75970458984375, "logps/rejected": -508.467041015625, "loss": 0.6887, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.27672532200813293, "rewards/margins": 0.08493559062480927, "rewards/rejected": -0.361660897731781, "step": 6630 }, { "epoch": 0.43, "learning_rate": 3.4815123924386226e-06, "logits/chosen": -1.727242112159729, "logits/rejected": -1.4164018630981445, "logps/chosen": -563.4137573242188, "logps/rejected": -560.3493041992188, "loss": 0.6905, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.25687241554260254, "rewards/margins": 0.06660804897546768, "rewards/rejected": -0.3234805166721344, "step": 6640 }, { "epoch": 0.44, "learning_rate": 3.4762583913108696e-06, "logits/chosen": -1.1681668758392334, "logits/rejected": -1.0194041728973389, "logps/chosen": -583.726318359375, "logps/rejected": -609.8234252929688, "loss": 0.6911, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3139679729938507, "rewards/margins": 0.06701229512691498, "rewards/rejected": -0.3809802830219269, "step": 6650 }, { "epoch": 0.44, "learning_rate": 3.4709992975411217e-06, "logits/chosen": -1.322264313697815, "logits/rejected": -1.0045913457870483, "logps/chosen": -579.4341430664062, "logps/rejected": -617.7483520507812, "loss": 0.6879, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3244747221469879, "rewards/margins": 0.09058120846748352, "rewards/rejected": -0.41505590081214905, "step": 6660 }, { "epoch": 0.44, "learning_rate": 3.4657351385633886e-06, "logits/chosen": -1.29805326461792, "logits/rejected": -1.1522525548934937, "logps/chosen": -485.3212890625, "logps/rejected": -585.7586669921875, "loss": 0.6853, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3000306189060211, "rewards/margins": 0.11263756453990936, "rewards/rejected": -0.4126681685447693, "step": 6670 }, { "epoch": 0.44, "learning_rate": 3.4604659418381024e-06, "logits/chosen": -1.2660630941390991, "logits/rejected": -0.9279665946960449, "logps/chosen": -625.5154418945312, "logps/rejected": -693.3626708984375, "loss": 0.6894, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.4074612259864807, "rewards/margins": 0.09737777709960938, "rewards/rejected": -0.5048390030860901, "step": 6680 }, { "epoch": 0.44, "learning_rate": 3.4551917348519744e-06, "logits/chosen": -1.2196203470230103, "logits/rejected": -1.0248401165008545, "logps/chosen": -648.0506591796875, "logps/rejected": -681.2324829101562, "loss": 0.689, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3665519952774048, "rewards/margins": 0.08038230240345001, "rewards/rejected": -0.446934312582016, "step": 6690 }, { "epoch": 0.44, "learning_rate": 3.4499125451178505e-06, "logits/chosen": -0.7199057340621948, "logits/rejected": -0.7669742703437805, "logps/chosen": -606.9825439453125, "logps/rejected": -685.78515625, "loss": 0.6913, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.4026912748813629, "rewards/margins": 0.05418992042541504, "rewards/rejected": -0.45688119530677795, "step": 6700 }, { "epoch": 0.44, "eval_logits/chosen": -1.0456030368804932, "eval_logits/rejected": -0.9157735705375671, "eval_logps/chosen": -599.052001953125, "eval_logps/rejected": -667.5596313476562, "eval_loss": 0.6897513270378113, "eval_rewards/accuracies": 0.6660000085830688, "eval_rewards/chosen": -0.36704710125923157, "eval_rewards/margins": 0.0889006108045578, "eval_rewards/rejected": -0.45594772696495056, "eval_runtime": 713.5868, "eval_samples_per_second": 2.803, "eval_steps_per_second": 1.401, "step": 6700 }, { "epoch": 0.44, "learning_rate": 3.4446284001745723e-06, "logits/chosen": -0.6961051225662231, "logits/rejected": -0.6591076254844666, "logps/chosen": -626.5772705078125, "logps/rejected": -720.2598876953125, "loss": 0.6901, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.42034369707107544, "rewards/margins": 0.07693564146757126, "rewards/rejected": -0.4972793459892273, "step": 6710 }, { "epoch": 0.44, "learning_rate": 3.439339327586827e-06, "logits/chosen": -1.069394826889038, "logits/rejected": -1.0984818935394287, "logps/chosen": -469.156494140625, "logps/rejected": -561.3721923828125, "loss": 0.6881, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.2791387736797333, "rewards/margins": 0.09757934510707855, "rewards/rejected": -0.37671810388565063, "step": 6720 }, { "epoch": 0.44, "learning_rate": 3.434045354945008e-06, "logits/chosen": -1.1852693557739258, "logits/rejected": -1.0654106140136719, "logps/chosen": -654.6146240234375, "logps/rejected": -706.553466796875, "loss": 0.6905, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.38059002161026, "rewards/margins": 0.05667469650506973, "rewards/rejected": -0.43726474046707153, "step": 6730 }, { "epoch": 0.44, "learning_rate": 3.4287465098650713e-06, "logits/chosen": -1.4549082517623901, "logits/rejected": -1.2506242990493774, "logps/chosen": -577.9610595703125, "logps/rejected": -617.8677978515625, "loss": 0.6922, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3299856185913086, "rewards/margins": 0.054748255759477615, "rewards/rejected": -0.3847338557243347, "step": 6740 }, { "epoch": 0.44, "learning_rate": 3.423442819988387e-06, "logits/chosen": -1.1500657796859741, "logits/rejected": -1.0138304233551025, "logps/chosen": -514.3472900390625, "logps/rejected": -589.5288696289062, "loss": 0.6899, "rewards/accuracies": 0.75, "rewards/chosen": -0.3309600353240967, "rewards/margins": 0.08237095177173615, "rewards/rejected": -0.41333094239234924, "step": 6750 }, { "epoch": 0.44, "learning_rate": 3.4181343129816e-06, "logits/chosen": -1.1339448690414429, "logits/rejected": -1.0129796266555786, "logps/chosen": -467.60845947265625, "logps/rejected": -527.6707763671875, "loss": 0.6886, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.29124677181243896, "rewards/margins": 0.07435248792171478, "rewards/rejected": -0.36559924483299255, "step": 6760 }, { "epoch": 0.44, "learning_rate": 3.4128210165364837e-06, "logits/chosen": -1.1104885339736938, "logits/rejected": -0.9423076510429382, "logps/chosen": -488.9002990722656, "logps/rejected": -624.3773803710938, "loss": 0.6875, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.29749101400375366, "rewards/margins": 0.12695378065109253, "rewards/rejected": -0.4244448244571686, "step": 6770 }, { "epoch": 0.44, "learning_rate": 3.407502958369795e-06, "logits/chosen": -1.258338212966919, "logits/rejected": -1.0859405994415283, "logps/chosen": -544.0638427734375, "logps/rejected": -629.9528198242188, "loss": 0.6869, "rewards/accuracies": 0.625, "rewards/chosen": -0.3016160726547241, "rewards/margins": 0.11510130017995834, "rewards/rejected": -0.41671738028526306, "step": 6780 }, { "epoch": 0.44, "learning_rate": 3.4021801662231297e-06, "logits/chosen": -1.2023009061813354, "logits/rejected": -1.0326160192489624, "logps/chosen": -596.251708984375, "logps/rejected": -640.0447998046875, "loss": 0.6929, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3473750352859497, "rewards/margins": 0.06606845557689667, "rewards/rejected": -0.4134434759616852, "step": 6790 }, { "epoch": 0.44, "learning_rate": 3.3968526678627793e-06, "logits/chosen": -0.9796515703201294, "logits/rejected": -0.7606045007705688, "logps/chosen": -580.5537109375, "logps/rejected": -609.6514892578125, "loss": 0.6913, "rewards/accuracies": 0.625, "rewards/chosen": -0.315523236989975, "rewards/margins": 0.07320135086774826, "rewards/rejected": -0.38872459530830383, "step": 6800 }, { "epoch": 0.44, "eval_logits/chosen": -1.1365275382995605, "eval_logits/rejected": -1.0038988590240479, "eval_logps/chosen": -551.1534423828125, "eval_logps/rejected": -605.5634765625, "eval_loss": 0.6897253394126892, "eval_rewards/accuracies": 0.6604999899864197, "eval_rewards/chosen": -0.3191484808921814, "eval_rewards/margins": 0.07480315864086151, "eval_rewards/rejected": -0.3939516544342041, "eval_runtime": 712.3322, "eval_samples_per_second": 2.808, "eval_steps_per_second": 1.404, "step": 6800 }, { "epoch": 0.45, "learning_rate": 3.391520491079586e-06, "logits/chosen": -1.4882662296295166, "logits/rejected": -1.206194519996643, "logps/chosen": -492.03875732421875, "logps/rejected": -509.9769592285156, "loss": 0.6922, "rewards/accuracies": 0.625, "rewards/chosen": -0.2960761487483978, "rewards/margins": 0.04846780747175217, "rewards/rejected": -0.344543993473053, "step": 6810 }, { "epoch": 0.45, "learning_rate": 3.3861836636887936e-06, "logits/chosen": -1.2734750509262085, "logits/rejected": -1.0188143253326416, "logps/chosen": -568.8356323242188, "logps/rejected": -597.8465576171875, "loss": 0.6889, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.30201685428619385, "rewards/margins": 0.0754893496632576, "rewards/rejected": -0.37750619649887085, "step": 6820 }, { "epoch": 0.45, "learning_rate": 3.3808422135299106e-06, "logits/chosen": -1.2087388038635254, "logits/rejected": -1.1798069477081299, "logps/chosen": -597.0426025390625, "logps/rejected": -713.0640258789062, "loss": 0.6915, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3113647699356079, "rewards/margins": 0.0629459097981453, "rewards/rejected": -0.3743106722831726, "step": 6830 }, { "epoch": 0.45, "learning_rate": 3.375496168466556e-06, "logits/chosen": -1.3035211563110352, "logits/rejected": -1.0096242427825928, "logps/chosen": -470.8985900878906, "logps/rejected": -466.59674072265625, "loss": 0.6912, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.25596046447753906, "rewards/margins": 0.051624737679958344, "rewards/rejected": -0.307585209608078, "step": 6840 }, { "epoch": 0.45, "learning_rate": 3.3701455563863205e-06, "logits/chosen": -1.547277808189392, "logits/rejected": -1.3091676235198975, "logps/chosen": -585.567138671875, "logps/rejected": -648.2615966796875, "loss": 0.6866, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.29369252920150757, "rewards/margins": 0.09901341050863266, "rewards/rejected": -0.39270591735839844, "step": 6850 }, { "epoch": 0.45, "learning_rate": 3.3647904052006174e-06, "logits/chosen": -1.3013098239898682, "logits/rejected": -1.2090144157409668, "logps/chosen": -570.9986572265625, "logps/rejected": -649.5106201171875, "loss": 0.6898, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3057003915309906, "rewards/margins": 0.07550543546676636, "rewards/rejected": -0.38120585680007935, "step": 6860 }, { "epoch": 0.45, "learning_rate": 3.3594307428445383e-06, "logits/chosen": -1.5023900270462036, "logits/rejected": -1.1324571371078491, "logps/chosen": -625.2291259765625, "logps/rejected": -659.7060546875, "loss": 0.6912, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2919783592224121, "rewards/margins": 0.06589344888925552, "rewards/rejected": -0.35787174105644226, "step": 6870 }, { "epoch": 0.45, "learning_rate": 3.354066597276707e-06, "logits/chosen": -1.0524461269378662, "logits/rejected": -1.0098249912261963, "logps/chosen": -505.1480407714844, "logps/rejected": -606.7689819335938, "loss": 0.6897, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2815837860107422, "rewards/margins": 0.0662604421377182, "rewards/rejected": -0.3478442430496216, "step": 6880 }, { "epoch": 0.45, "learning_rate": 3.348697996479136e-06, "logits/chosen": -1.2552497386932373, "logits/rejected": -1.074495553970337, "logps/chosen": -531.0899658203125, "logps/rejected": -532.2979736328125, "loss": 0.6907, "rewards/accuracies": 0.625, "rewards/chosen": -0.3032810389995575, "rewards/margins": 0.054679740220308304, "rewards/rejected": -0.3579607605934143, "step": 6890 }, { "epoch": 0.45, "learning_rate": 3.3433249684570757e-06, "logits/chosen": -0.9574271440505981, "logits/rejected": -0.7741330862045288, "logps/chosen": -471.801025390625, "logps/rejected": -523.0010986328125, "loss": 0.6876, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2939714789390564, "rewards/margins": 0.09691840410232544, "rewards/rejected": -0.3908899128437042, "step": 6900 }, { "epoch": 0.45, "eval_logits/chosen": -1.0858708620071411, "eval_logits/rejected": -0.9569290280342102, "eval_logps/chosen": -539.8351440429688, "eval_logps/rejected": -594.458984375, "eval_loss": 0.689683735370636, "eval_rewards/accuracies": 0.6639999747276306, "eval_rewards/chosen": -0.30783024430274963, "eval_rewards/margins": 0.07501688599586487, "eval_rewards/rejected": -0.3828471302986145, "eval_runtime": 712.1685, "eval_samples_per_second": 2.808, "eval_steps_per_second": 1.404, "step": 6900 }, { "epoch": 0.45, "learning_rate": 3.3379475412388724e-06, "logits/chosen": -1.131521463394165, "logits/rejected": -0.975692868232727, "logps/chosen": -551.9547729492188, "logps/rejected": -620.2999877929688, "loss": 0.6881, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.3099997341632843, "rewards/margins": 0.10204390436410904, "rewards/rejected": -0.41204363107681274, "step": 6910 }, { "epoch": 0.45, "learning_rate": 3.3325657428758207e-06, "logits/chosen": -0.8180997967720032, "logits/rejected": -0.8396323323249817, "logps/chosen": -594.6802978515625, "logps/rejected": -686.7999877929688, "loss": 0.6878, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.35100477933883667, "rewards/margins": 0.09129307419061661, "rewards/rejected": -0.4422978460788727, "step": 6920 }, { "epoch": 0.45, "learning_rate": 3.3271796014420175e-06, "logits/chosen": -0.9754294157028198, "logits/rejected": -0.6773120760917664, "logps/chosen": -604.6719970703125, "logps/rejected": -706.2049560546875, "loss": 0.6881, "rewards/accuracies": 0.75, "rewards/chosen": -0.39222168922424316, "rewards/margins": 0.11924131214618683, "rewards/rejected": -0.5114629864692688, "step": 6930 }, { "epoch": 0.45, "learning_rate": 3.3217891450342142e-06, "logits/chosen": -0.8988175392150879, "logits/rejected": -0.7741016149520874, "logps/chosen": -586.4400024414062, "logps/rejected": -626.4324951171875, "loss": 0.6893, "rewards/accuracies": 0.75, "rewards/chosen": -0.3278416097164154, "rewards/margins": 0.1089852824807167, "rewards/rejected": -0.4368268847465515, "step": 6940 }, { "epoch": 0.45, "learning_rate": 3.3163944017716733e-06, "logits/chosen": -1.3334197998046875, "logits/rejected": -1.1372065544128418, "logps/chosen": -505.1522521972656, "logps/rejected": -549.527587890625, "loss": 0.6903, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.28878146409988403, "rewards/margins": 0.07747305184602737, "rewards/rejected": -0.36625441908836365, "step": 6950 }, { "epoch": 0.46, "learning_rate": 3.310995399796017e-06, "logits/chosen": -1.424912691116333, "logits/rejected": -1.3498830795288086, "logps/chosen": -537.4611206054688, "logps/rejected": -573.2302856445312, "loss": 0.6916, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.2634749114513397, "rewards/margins": 0.042571231722831726, "rewards/rejected": -0.30604615807533264, "step": 6960 }, { "epoch": 0.46, "learning_rate": 3.305592167271085e-06, "logits/chosen": -1.2780954837799072, "logits/rejected": -1.0659189224243164, "logps/chosen": -450.2206115722656, "logps/rejected": -520.7966918945312, "loss": 0.689, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.25297442078590393, "rewards/margins": 0.08148813247680664, "rewards/rejected": -0.33446258306503296, "step": 6970 }, { "epoch": 0.46, "learning_rate": 3.3001847323827846e-06, "logits/chosen": -1.1722548007965088, "logits/rejected": -1.2698702812194824, "logps/chosen": -590.0223388671875, "logps/rejected": -674.6921997070312, "loss": 0.6897, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.32192471623420715, "rewards/margins": 0.08808865398168564, "rewards/rejected": -0.410013347864151, "step": 6980 }, { "epoch": 0.46, "learning_rate": 3.2947731233389447e-06, "logits/chosen": -0.9997597932815552, "logits/rejected": -0.8057335019111633, "logps/chosen": -589.4088745117188, "logps/rejected": -651.3054809570312, "loss": 0.6872, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3419021964073181, "rewards/margins": 0.11732400953769684, "rewards/rejected": -0.45922619104385376, "step": 6990 }, { "epoch": 0.46, "learning_rate": 3.2893573683691706e-06, "logits/chosen": -0.9610411524772644, "logits/rejected": -0.8816120028495789, "logps/chosen": -530.5889892578125, "logps/rejected": -620.2159423828125, "loss": 0.6878, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3258955478668213, "rewards/margins": 0.1075226441025734, "rewards/rejected": -0.4334181845188141, "step": 7000 }, { "epoch": 0.46, "eval_logits/chosen": -1.064884066581726, "eval_logits/rejected": -0.9341198205947876, "eval_logps/chosen": -569.5941162109375, "eval_logps/rejected": -639.7523803710938, "eval_loss": 0.6896393895149231, "eval_rewards/accuracies": 0.6654999852180481, "eval_rewards/chosen": -0.33758923411369324, "eval_rewards/margins": 0.09055128693580627, "eval_rewards/rejected": -0.4281404912471771, "eval_runtime": 712.6501, "eval_samples_per_second": 2.806, "eval_steps_per_second": 1.403, "step": 7000 }, { "epoch": 0.46, "learning_rate": 3.2839374957246915e-06, "logits/chosen": -1.1829535961151123, "logits/rejected": -0.9157463312149048, "logps/chosen": -620.6079711914062, "logps/rejected": -588.7822265625, "loss": 0.6915, "rewards/accuracies": 0.625, "rewards/chosen": -0.3545176684856415, "rewards/margins": 0.05971246212720871, "rewards/rejected": -0.4142301678657532, "step": 7010 }, { "epoch": 0.46, "learning_rate": 3.2785135336782187e-06, "logits/chosen": -1.0277976989746094, "logits/rejected": -0.9129239916801453, "logps/chosen": -609.3104248046875, "logps/rejected": -723.1586303710938, "loss": 0.6886, "rewards/accuracies": 0.75, "rewards/chosen": -0.3753950893878937, "rewards/margins": 0.09153584390878677, "rewards/rejected": -0.46693092584609985, "step": 7020 }, { "epoch": 0.46, "learning_rate": 3.2730855105237952e-06, "logits/chosen": -1.1726951599121094, "logits/rejected": -1.0626368522644043, "logps/chosen": -561.26904296875, "logps/rejected": -700.5789184570312, "loss": 0.6907, "rewards/accuracies": 0.75, "rewards/chosen": -0.3409925401210785, "rewards/margins": 0.09512937068939209, "rewards/rejected": -0.43612194061279297, "step": 7030 }, { "epoch": 0.46, "learning_rate": 3.2676534545766486e-06, "logits/chosen": -1.134777307510376, "logits/rejected": -1.0207163095474243, "logps/chosen": -526.9071044921875, "logps/rejected": -582.7493896484375, "loss": 0.6905, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3144376277923584, "rewards/margins": 0.06573508679866791, "rewards/rejected": -0.3801727890968323, "step": 7040 }, { "epoch": 0.46, "learning_rate": 3.262217394173043e-06, "logits/chosen": -1.130771517753601, "logits/rejected": -0.9906819462776184, "logps/chosen": -521.9103393554688, "logps/rejected": -615.9810791015625, "loss": 0.6894, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.28105971217155457, "rewards/margins": 0.10015375912189484, "rewards/rejected": -0.3812134861946106, "step": 7050 }, { "epoch": 0.46, "learning_rate": 3.2567773576701333e-06, "logits/chosen": -1.1347095966339111, "logits/rejected": -0.9777033925056458, "logps/chosen": -511.82757568359375, "logps/rejected": -625.614501953125, "loss": 0.6845, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.25607019662857056, "rewards/margins": 0.14410245418548584, "rewards/rejected": -0.40017271041870117, "step": 7060 }, { "epoch": 0.46, "learning_rate": 3.2513333734458154e-06, "logits/chosen": -1.1313894987106323, "logits/rejected": -0.9789068102836609, "logps/chosen": -475.10919189453125, "logps/rejected": -516.0199584960938, "loss": 0.6902, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.2682678997516632, "rewards/margins": 0.057518370449543, "rewards/rejected": -0.325786292552948, "step": 7070 }, { "epoch": 0.46, "learning_rate": 3.245885469898576e-06, "logits/chosen": -0.9258352518081665, "logits/rejected": -0.9314023852348328, "logps/chosen": -608.5376586914062, "logps/rejected": -644.3211669921875, "loss": 0.6892, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.30798250436782837, "rewards/margins": 0.09947158396244049, "rewards/rejected": -0.40745407342910767, "step": 7080 }, { "epoch": 0.46, "learning_rate": 3.2404336754473497e-06, "logits/chosen": -0.9622844457626343, "logits/rejected": -0.8000280261039734, "logps/chosen": -542.6066284179688, "logps/rejected": -540.3517456054688, "loss": 0.6925, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2750997245311737, "rewards/margins": 0.06251533329486847, "rewards/rejected": -0.3376150131225586, "step": 7090 }, { "epoch": 0.46, "learning_rate": 3.234978018531367e-06, "logits/chosen": -1.5038312673568726, "logits/rejected": -1.0402882099151611, "logps/chosen": -514.2202758789062, "logps/rejected": -525.5071411132812, "loss": 0.6889, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.25572001934051514, "rewards/margins": 0.07608649134635925, "rewards/rejected": -0.3318065106868744, "step": 7100 }, { "epoch": 0.46, "eval_logits/chosen": -1.0207056999206543, "eval_logits/rejected": -0.8951786160469055, "eval_logps/chosen": -503.5751647949219, "eval_logps/rejected": -564.20068359375, "eval_loss": 0.6896175742149353, "eval_rewards/accuracies": 0.6600000262260437, "eval_rewards/chosen": -0.2715701758861542, "eval_rewards/margins": 0.0810185894370079, "eval_rewards/rejected": -0.35258880257606506, "eval_runtime": 712.637, "eval_samples_per_second": 2.806, "eval_steps_per_second": 1.403, "step": 7100 }, { "epoch": 0.47, "learning_rate": 3.229518527610006e-06, "logits/chosen": -1.2460079193115234, "logits/rejected": -1.0841786861419678, "logps/chosen": -569.5468139648438, "logps/rejected": -593.5337524414062, "loss": 0.6909, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.27675676345825195, "rewards/margins": 0.06916297227144241, "rewards/rejected": -0.3459196984767914, "step": 7110 }, { "epoch": 0.47, "learning_rate": 3.2240552311626465e-06, "logits/chosen": -1.044368028640747, "logits/rejected": -0.8659143447875977, "logps/chosen": -494.69952392578125, "logps/rejected": -543.4378662109375, "loss": 0.6912, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.2534857988357544, "rewards/margins": 0.06368336081504822, "rewards/rejected": -0.3171691298484802, "step": 7120 }, { "epoch": 0.47, "learning_rate": 3.2185881576885193e-06, "logits/chosen": -1.044262170791626, "logits/rejected": -0.8921842575073242, "logps/chosen": -525.1110229492188, "logps/rejected": -555.7213134765625, "loss": 0.6911, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.31338828802108765, "rewards/margins": 0.06927771121263504, "rewards/rejected": -0.3826659321784973, "step": 7130 }, { "epoch": 0.47, "learning_rate": 3.213117335706557e-06, "logits/chosen": -1.098400354385376, "logits/rejected": -1.0391719341278076, "logps/chosen": -560.6351928710938, "logps/rejected": -638.1511840820312, "loss": 0.6917, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.30459028482437134, "rewards/margins": 0.06830967217683792, "rewards/rejected": -0.37289994955062866, "step": 7140 }, { "epoch": 0.47, "learning_rate": 3.2076427937552473e-06, "logits/chosen": -1.081235647201538, "logits/rejected": -0.7705933451652527, "logps/chosen": -514.1536254882812, "logps/rejected": -602.1751098632812, "loss": 0.6869, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.26866382360458374, "rewards/margins": 0.10714519023895264, "rewards/rejected": -0.37580904364585876, "step": 7150 }, { "epoch": 0.47, "learning_rate": 3.2021645603924827e-06, "logits/chosen": -0.8100953102111816, "logits/rejected": -0.822729766368866, "logps/chosen": -427.54852294921875, "logps/rejected": -539.2092895507812, "loss": 0.6887, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.28909438848495483, "rewards/margins": 0.10229630768299103, "rewards/rejected": -0.39139071106910706, "step": 7160 }, { "epoch": 0.47, "learning_rate": 3.196682664195412e-06, "logits/chosen": -0.9460660815238953, "logits/rejected": -0.8326196670532227, "logps/chosen": -485.0545959472656, "logps/rejected": -488.9381408691406, "loss": 0.6925, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.28100645542144775, "rewards/margins": 0.038774557411670685, "rewards/rejected": -0.31978100538253784, "step": 7170 }, { "epoch": 0.47, "learning_rate": 3.191197133760291e-06, "logits/chosen": -1.653093934059143, "logits/rejected": -1.1370487213134766, "logps/chosen": -529.1414184570312, "logps/rejected": -552.0350341796875, "loss": 0.6874, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.26173800230026245, "rewards/margins": 0.09714240580797195, "rewards/rejected": -0.35888034105300903, "step": 7180 }, { "epoch": 0.47, "learning_rate": 3.185707997702334e-06, "logits/chosen": -1.275370717048645, "logits/rejected": -0.9616245031356812, "logps/chosen": -505.08514404296875, "logps/rejected": -542.3439331054688, "loss": 0.6893, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2643203139305115, "rewards/margins": 0.07511034607887268, "rewards/rejected": -0.33943066000938416, "step": 7190 }, { "epoch": 0.47, "learning_rate": 3.1802152846555624e-06, "logits/chosen": -1.2004523277282715, "logits/rejected": -1.0477879047393799, "logps/chosen": -486.0389709472656, "logps/rejected": -555.9532470703125, "loss": 0.6887, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2617960274219513, "rewards/margins": 0.08543354272842407, "rewards/rejected": -0.347229540348053, "step": 7200 }, { "epoch": 0.47, "eval_logits/chosen": -1.1860355138778687, "eval_logits/rejected": -1.0530312061309814, "eval_logps/chosen": -497.23974609375, "eval_logps/rejected": -547.0663452148438, "eval_loss": 0.6897386312484741, "eval_rewards/accuracies": 0.6625000238418579, "eval_rewards/chosen": -0.2652347981929779, "eval_rewards/margins": 0.07021969556808472, "eval_rewards/rejected": -0.335454523563385, "eval_runtime": 712.9853, "eval_samples_per_second": 2.805, "eval_steps_per_second": 1.403, "step": 7200 }, { "epoch": 0.47, "learning_rate": 3.174719023272659e-06, "logits/chosen": -1.4130502939224243, "logits/rejected": -1.3417845964431763, "logps/chosen": -477.9608459472656, "logps/rejected": -615.33056640625, "loss": 0.6874, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2642667293548584, "rewards/margins": 0.09102049469947815, "rewards/rejected": -0.35528722405433655, "step": 7210 }, { "epoch": 0.47, "learning_rate": 3.169219242224816e-06, "logits/chosen": -1.2868996858596802, "logits/rejected": -1.0738362073898315, "logps/chosen": -539.2318115234375, "logps/rejected": -595.610595703125, "loss": 0.6906, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.2993045747280121, "rewards/margins": 0.06247404217720032, "rewards/rejected": -0.3617786765098572, "step": 7220 }, { "epoch": 0.47, "learning_rate": 3.1637159702015837e-06, "logits/chosen": -1.0716454982757568, "logits/rejected": -0.9601171612739563, "logps/chosen": -474.85205078125, "logps/rejected": -549.5961303710938, "loss": 0.6888, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2689599096775055, "rewards/margins": 0.09406690299510956, "rewards/rejected": -0.36302685737609863, "step": 7230 }, { "epoch": 0.47, "learning_rate": 3.1582092359107263e-06, "logits/chosen": -1.0168414115905762, "logits/rejected": -0.7780404090881348, "logps/chosen": -583.4414672851562, "logps/rejected": -625.4762573242188, "loss": 0.6925, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.30516377091407776, "rewards/margins": 0.08786555379629135, "rewards/rejected": -0.3930293023586273, "step": 7240 }, { "epoch": 0.47, "learning_rate": 3.152699068078067e-06, "logits/chosen": -0.9496833682060242, "logits/rejected": -0.8669363260269165, "logps/chosen": -580.4671630859375, "logps/rejected": -680.8070678710938, "loss": 0.6871, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.30017679929733276, "rewards/margins": 0.12088117748498917, "rewards/rejected": -0.4210579991340637, "step": 7250 }, { "epoch": 0.48, "learning_rate": 3.1471854954473415e-06, "logits/chosen": -1.3222639560699463, "logits/rejected": -1.259479284286499, "logps/chosen": -438.91619873046875, "logps/rejected": -524.3823852539062, "loss": 0.689, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1881495714187622, "rewards/margins": 0.08923407644033432, "rewards/rejected": -0.2773836553096771, "step": 7260 }, { "epoch": 0.48, "learning_rate": 3.1416685467800436e-06, "logits/chosen": -1.019896149635315, "logits/rejected": -0.694831907749176, "logps/chosen": -449.73675537109375, "logps/rejected": -520.8202514648438, "loss": 0.69, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.26415398716926575, "rewards/margins": 0.09361319243907928, "rewards/rejected": -0.35776716470718384, "step": 7270 }, { "epoch": 0.48, "learning_rate": 3.1361482508552803e-06, "logits/chosen": -1.1474792957305908, "logits/rejected": -0.9644671678543091, "logps/chosen": -501.0804138183594, "logps/rejected": -530.1576538085938, "loss": 0.6903, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.25912952423095703, "rewards/margins": 0.07246353477239609, "rewards/rejected": -0.3315930664539337, "step": 7280 }, { "epoch": 0.48, "learning_rate": 3.1306246364696198e-06, "logits/chosen": -1.4395965337753296, "logits/rejected": -1.1949145793914795, "logps/chosen": -488.775146484375, "logps/rejected": -538.5449829101562, "loss": 0.69, "rewards/accuracies": 0.625, "rewards/chosen": -0.23630082607269287, "rewards/margins": 0.06727494299411774, "rewards/rejected": -0.3035758137702942, "step": 7290 }, { "epoch": 0.48, "learning_rate": 3.1250977324369413e-06, "logits/chosen": -1.0189143419265747, "logits/rejected": -0.9124029874801636, "logps/chosen": -389.81689453125, "logps/rejected": -490.656494140625, "loss": 0.6905, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.23086443543434143, "rewards/margins": 0.09412762522697449, "rewards/rejected": -0.32499203085899353, "step": 7300 }, { "epoch": 0.48, "eval_logits/chosen": -1.0693286657333374, "eval_logits/rejected": -0.9412211775779724, "eval_logps/chosen": -495.2763366699219, "eval_logps/rejected": -555.8788452148438, "eval_loss": 0.6896201372146606, "eval_rewards/accuracies": 0.6639999747276306, "eval_rewards/chosen": -0.2632714509963989, "eval_rewards/margins": 0.08099555969238281, "eval_rewards/rejected": -0.3442670404911041, "eval_runtime": 713.0344, "eval_samples_per_second": 2.805, "eval_steps_per_second": 1.402, "step": 7300 }, { "epoch": 0.48, "learning_rate": 3.1195675675882825e-06, "logits/chosen": -1.1212948560714722, "logits/rejected": -0.8987517356872559, "logps/chosen": -525.4148559570312, "logps/rejected": -579.3656005859375, "loss": 0.69, "rewards/accuracies": 0.625, "rewards/chosen": -0.2902238070964813, "rewards/margins": 0.08455558121204376, "rewards/rejected": -0.3747794032096863, "step": 7310 }, { "epoch": 0.48, "learning_rate": 3.1140341707716926e-06, "logits/chosen": -0.747536301612854, "logits/rejected": -0.7013038992881775, "logps/chosen": -444.2554626464844, "logps/rejected": -501.15228271484375, "loss": 0.6867, "rewards/accuracies": 0.625, "rewards/chosen": -0.2484477311372757, "rewards/margins": 0.0999131128191948, "rewards/rejected": -0.3483608365058899, "step": 7320 }, { "epoch": 0.48, "learning_rate": 3.1084975708520803e-06, "logits/chosen": -1.2651160955429077, "logits/rejected": -0.9623421430587769, "logps/chosen": -483.85772705078125, "logps/rejected": -513.4347534179688, "loss": 0.6899, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.22213390469551086, "rewards/margins": 0.10032358020544052, "rewards/rejected": -0.3224574625492096, "step": 7330 }, { "epoch": 0.48, "learning_rate": 3.1029577967110625e-06, "logits/chosen": -1.2189199924468994, "logits/rejected": -0.9497553110122681, "logps/chosen": -417.85028076171875, "logps/rejected": -415.18450927734375, "loss": 0.6914, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.2014559805393219, "rewards/margins": 0.046392567455768585, "rewards/rejected": -0.24784855544567108, "step": 7340 }, { "epoch": 0.48, "learning_rate": 3.097414877246814e-06, "logits/chosen": -1.115504264831543, "logits/rejected": -0.8397358655929565, "logps/chosen": -399.6795654296875, "logps/rejected": -476.15484619140625, "loss": 0.6857, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.20103612542152405, "rewards/margins": 0.10598810762166977, "rewards/rejected": -0.3070242404937744, "step": 7350 }, { "epoch": 0.48, "learning_rate": 3.0918688413739197e-06, "logits/chosen": -0.9786425828933716, "logits/rejected": -0.6896412968635559, "logps/chosen": -420.2076110839844, "logps/rejected": -461.7596130371094, "loss": 0.6871, "rewards/accuracies": 0.5, "rewards/chosen": -0.1875246912240982, "rewards/margins": 0.10582470893859863, "rewards/rejected": -0.29334941506385803, "step": 7360 }, { "epoch": 0.48, "learning_rate": 3.0863197180232178e-06, "logits/chosen": -0.9284044504165649, "logits/rejected": -0.8172693252563477, "logps/chosen": -454.2342224121094, "logps/rejected": -525.4736938476562, "loss": 0.689, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.25449004769325256, "rewards/margins": 0.08346012979745865, "rewards/rejected": -0.3379501700401306, "step": 7370 }, { "epoch": 0.48, "learning_rate": 3.0807675361416554e-06, "logits/chosen": -0.7114228010177612, "logits/rejected": -0.6189266443252563, "logps/chosen": -422.91912841796875, "logps/rejected": -438.93487548828125, "loss": 0.6895, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.23403885960578918, "rewards/margins": 0.09505019336938858, "rewards/rejected": -0.32908907532691956, "step": 7380 }, { "epoch": 0.48, "learning_rate": 3.0752123246921327e-06, "logits/chosen": -0.8941015005111694, "logits/rejected": -0.5493389368057251, "logps/chosen": -558.5103149414062, "logps/rejected": -587.1238403320312, "loss": 0.6894, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.280597060918808, "rewards/margins": 0.09554499387741089, "rewards/rejected": -0.37614205479621887, "step": 7390 }, { "epoch": 0.48, "learning_rate": 3.069654112653353e-06, "logits/chosen": -0.9813539385795593, "logits/rejected": -0.7545603513717651, "logps/chosen": -551.376953125, "logps/rejected": -564.7586669921875, "loss": 0.6933, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3348926901817322, "rewards/margins": 0.0459338054060936, "rewards/rejected": -0.38082653284072876, "step": 7400 }, { "epoch": 0.48, "eval_logits/chosen": -0.8035207986831665, "eval_logits/rejected": -0.6899078488349915, "eval_logps/chosen": -545.2488403320312, "eval_logps/rejected": -615.5215454101562, "eval_loss": 0.6896498799324036, "eval_rewards/accuracies": 0.6585000157356262, "eval_rewards/chosen": -0.3132438361644745, "eval_rewards/margins": 0.0906657725572586, "eval_rewards/rejected": -0.4039096534252167, "eval_runtime": 712.1104, "eval_samples_per_second": 2.809, "eval_steps_per_second": 1.404, "step": 7400 }, { "epoch": 0.48, "learning_rate": 3.064092929019673e-06, "logits/chosen": -0.7480974197387695, "logits/rejected": -0.9871516227722168, "logps/chosen": -570.5189819335938, "logps/rejected": -646.7600708007812, "loss": 0.6916, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.3129754960536957, "rewards/margins": 0.05636630207300186, "rewards/rejected": -0.36934179067611694, "step": 7410 }, { "epoch": 0.49, "learning_rate": 3.058528802800952e-06, "logits/chosen": -1.0600616931915283, "logits/rejected": -0.8567035794258118, "logps/chosen": -565.076171875, "logps/rejected": -612.2608032226562, "loss": 0.6909, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.27244246006011963, "rewards/margins": 0.08375748246908188, "rewards/rejected": -0.3561999201774597, "step": 7420 }, { "epoch": 0.49, "learning_rate": 3.052961763022397e-06, "logits/chosen": -1.265679121017456, "logits/rejected": -0.6907863616943359, "logps/chosen": -431.6497497558594, "logps/rejected": -513.7686767578125, "loss": 0.6877, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.24876856803894043, "rewards/margins": 0.1208425760269165, "rewards/rejected": -0.3696111738681793, "step": 7430 }, { "epoch": 0.49, "learning_rate": 3.047391838724415e-06, "logits/chosen": -1.1518621444702148, "logits/rejected": -1.00303053855896, "logps/chosen": -498.6324157714844, "logps/rejected": -584.2637939453125, "loss": 0.6891, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.263113796710968, "rewards/margins": 0.1016978994011879, "rewards/rejected": -0.3648116886615753, "step": 7440 }, { "epoch": 0.49, "learning_rate": 3.0418190589624587e-06, "logits/chosen": -0.8011142611503601, "logits/rejected": -0.6937967538833618, "logps/chosen": -416.416015625, "logps/rejected": -479.81103515625, "loss": 0.692, "rewards/accuracies": 0.625, "rewards/chosen": -0.23879393935203552, "rewards/margins": 0.055042654275894165, "rewards/rejected": -0.2938365936279297, "step": 7450 }, { "epoch": 0.49, "learning_rate": 3.0362434528068784e-06, "logits/chosen": -0.749529242515564, "logits/rejected": -0.5978935956954956, "logps/chosen": -543.2520751953125, "logps/rejected": -544.2491455078125, "loss": 0.688, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2739289402961731, "rewards/margins": 0.08649125695228577, "rewards/rejected": -0.3604201376438141, "step": 7460 }, { "epoch": 0.49, "learning_rate": 3.0306650493427657e-06, "logits/chosen": -0.901824951171875, "logits/rejected": -0.7608638405799866, "logps/chosen": -480.89569091796875, "logps/rejected": -546.8543090820312, "loss": 0.6896, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.24927644431591034, "rewards/margins": 0.07492052763700485, "rewards/rejected": -0.3241969645023346, "step": 7470 }, { "epoch": 0.49, "learning_rate": 3.0250838776698077e-06, "logits/chosen": -1.011126160621643, "logits/rejected": -0.7641777992248535, "logps/chosen": -426.700439453125, "logps/rejected": -525.545166015625, "loss": 0.6859, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2427811324596405, "rewards/margins": 0.10159105062484741, "rewards/rejected": -0.3443722128868103, "step": 7480 }, { "epoch": 0.49, "learning_rate": 3.0194999669021275e-06, "logits/chosen": -0.6938611268997192, "logits/rejected": -0.42879757285118103, "logps/chosen": -464.25030517578125, "logps/rejected": -515.6192626953125, "loss": 0.689, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.23738184571266174, "rewards/margins": 0.09842096269130707, "rewards/rejected": -0.33580282330513, "step": 7490 }, { "epoch": 0.49, "learning_rate": 3.0139133461681403e-06, "logits/chosen": -1.1078835725784302, "logits/rejected": -0.9325857162475586, "logps/chosen": -487.8389587402344, "logps/rejected": -546.95361328125, "loss": 0.6885, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.22344477474689484, "rewards/margins": 0.11525474488735199, "rewards/rejected": -0.3386995196342468, "step": 7500 }, { "epoch": 0.49, "eval_logits/chosen": -0.9689752459526062, "eval_logits/rejected": -0.8481876254081726, "eval_logps/chosen": -479.9737548828125, "eval_logps/rejected": -539.5113525390625, "eval_loss": 0.6896456480026245, "eval_rewards/accuracies": 0.6614999771118164, "eval_rewards/chosen": -0.24796883761882782, "eval_rewards/margins": 0.0799306184053421, "eval_rewards/rejected": -0.3278994560241699, "eval_runtime": 712.6198, "eval_samples_per_second": 2.807, "eval_steps_per_second": 1.403, "step": 7500 }, { "epoch": 0.49, "learning_rate": 3.0083240446103965e-06, "logits/chosen": -0.7331717610359192, "logits/rejected": -0.4825916290283203, "logps/chosen": -436.3609313964844, "logps/rejected": -545.4282836914062, "loss": 0.6881, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2523263394832611, "rewards/margins": 0.10139818489551544, "rewards/rejected": -0.35372450947761536, "step": 7510 }, { "epoch": 0.49, "learning_rate": 3.0027320913854306e-06, "logits/chosen": -1.3195829391479492, "logits/rejected": -1.117133617401123, "logps/chosen": -533.9688720703125, "logps/rejected": -565.7762451171875, "loss": 0.6909, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2415073662996292, "rewards/margins": 0.09451662003993988, "rewards/rejected": -0.3360239863395691, "step": 7520 }, { "epoch": 0.49, "learning_rate": 2.997137515663609e-06, "logits/chosen": -1.0817979574203491, "logits/rejected": -0.9075329899787903, "logps/chosen": -417.1910705566406, "logps/rejected": -472.84747314453125, "loss": 0.6893, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.1918136477470398, "rewards/margins": 0.09335563331842422, "rewards/rejected": -0.2851692736148834, "step": 7530 }, { "epoch": 0.49, "learning_rate": 2.991540346628981e-06, "logits/chosen": -0.9919770956039429, "logits/rejected": -1.0041069984436035, "logps/chosen": -508.04888916015625, "logps/rejected": -528.6693725585938, "loss": 0.6902, "rewards/accuracies": 0.625, "rewards/chosen": -0.2684485614299774, "rewards/margins": 0.04672873765230179, "rewards/rejected": -0.3151772916316986, "step": 7540 }, { "epoch": 0.49, "learning_rate": 2.985940613479121e-06, "logits/chosen": -1.1806385517120361, "logits/rejected": -1.057145357131958, "logps/chosen": -549.0791625976562, "logps/rejected": -559.5987548828125, "loss": 0.6895, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.25522947311401367, "rewards/margins": 0.06989149749279022, "rewards/rejected": -0.3251209855079651, "step": 7550 }, { "epoch": 0.49, "learning_rate": 2.980338345424981e-06, "logits/chosen": -0.8516530990600586, "logits/rejected": -0.7801792025566101, "logps/chosen": -522.4078979492188, "logps/rejected": -543.2108154296875, "loss": 0.6907, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2731880843639374, "rewards/margins": 0.06907974183559418, "rewards/rejected": -0.34226787090301514, "step": 7560 }, { "epoch": 0.5, "learning_rate": 2.974733571690735e-06, "logits/chosen": -0.9777799844741821, "logits/rejected": -0.5789401531219482, "logps/chosen": -561.6461791992188, "logps/rejected": -590.323486328125, "loss": 0.69, "rewards/accuracies": 0.75, "rewards/chosen": -0.3242393732070923, "rewards/margins": 0.08796980232000351, "rewards/rejected": -0.4122091233730316, "step": 7570 }, { "epoch": 0.5, "learning_rate": 2.9691263215136274e-06, "logits/chosen": -1.1575870513916016, "logits/rejected": -1.032047986984253, "logps/chosen": -534.2648315429688, "logps/rejected": -575.9432373046875, "loss": 0.6911, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2672644555568695, "rewards/margins": 0.07333298027515411, "rewards/rejected": -0.3405974507331848, "step": 7580 }, { "epoch": 0.5, "learning_rate": 2.963516624143823e-06, "logits/chosen": -0.8190193176269531, "logits/rejected": -1.0621607303619385, "logps/chosen": -522.3610229492188, "logps/rejected": -564.7208251953125, "loss": 0.6884, "rewards/accuracies": 0.625, "rewards/chosen": -0.3097943663597107, "rewards/margins": 0.07989050447940826, "rewards/rejected": -0.38968485593795776, "step": 7590 }, { "epoch": 0.5, "learning_rate": 2.9579045088442504e-06, "logits/chosen": -0.9224055409431458, "logits/rejected": -0.7572005987167358, "logps/chosen": -440.4720153808594, "logps/rejected": -552.9095458984375, "loss": 0.6873, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.25196707248687744, "rewards/margins": 0.09268857538700104, "rewards/rejected": -0.3446556627750397, "step": 7600 }, { "epoch": 0.5, "eval_logits/chosen": -1.0028795003890991, "eval_logits/rejected": -0.8792377710342407, "eval_logps/chosen": -477.3995666503906, "eval_logps/rejected": -540.662353515625, "eval_loss": 0.689660906791687, "eval_rewards/accuracies": 0.6639999747276306, "eval_rewards/chosen": -0.24539463222026825, "eval_rewards/margins": 0.08365590125322342, "eval_rewards/rejected": -0.3290505111217499, "eval_runtime": 714.1393, "eval_samples_per_second": 2.801, "eval_steps_per_second": 1.4, "step": 7600 }, { "epoch": 0.5, "learning_rate": 2.9522900048904534e-06, "logits/chosen": -1.1636269092559814, "logits/rejected": -0.9207497835159302, "logps/chosen": -517.7084350585938, "logps/rejected": -542.8178100585938, "loss": 0.6924, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2779703140258789, "rewards/margins": 0.05513089895248413, "rewards/rejected": -0.33310121297836304, "step": 7610 }, { "epoch": 0.5, "learning_rate": 2.9466731415704343e-06, "logits/chosen": -1.0667656660079956, "logits/rejected": -0.9428873062133789, "logps/chosen": -439.12677001953125, "logps/rejected": -518.74853515625, "loss": 0.6909, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.21534006297588348, "rewards/margins": 0.08398912847042084, "rewards/rejected": -0.2993291914463043, "step": 7620 }, { "epoch": 0.5, "learning_rate": 2.941053948184503e-06, "logits/chosen": -1.16767156124115, "logits/rejected": -0.817290186882019, "logps/chosen": -478.71978759765625, "logps/rejected": -502.38909912109375, "loss": 0.6902, "rewards/accuracies": 0.625, "rewards/chosen": -0.19878068566322327, "rewards/margins": 0.05858156830072403, "rewards/rejected": -0.2573622763156891, "step": 7630 }, { "epoch": 0.5, "learning_rate": 2.935432454045125e-06, "logits/chosen": -0.7946759462356567, "logits/rejected": -0.8207203149795532, "logps/chosen": -449.922607421875, "logps/rejected": -463.6610412597656, "loss": 0.6913, "rewards/accuracies": 0.625, "rewards/chosen": -0.21795828640460968, "rewards/margins": 0.034945905208587646, "rewards/rejected": -0.2529042065143585, "step": 7640 }, { "epoch": 0.5, "learning_rate": 2.929808688476768e-06, "logits/chosen": -1.0972411632537842, "logits/rejected": -1.0782297849655151, "logps/chosen": -453.37548828125, "logps/rejected": -522.5484008789062, "loss": 0.689, "rewards/accuracies": 0.625, "rewards/chosen": -0.21401312947273254, "rewards/margins": 0.08677531778812408, "rewards/rejected": -0.3007884621620178, "step": 7650 }, { "epoch": 0.5, "learning_rate": 2.924182680815748e-06, "logits/chosen": -0.9795387387275696, "logits/rejected": -0.9352675676345825, "logps/chosen": -444.06121826171875, "logps/rejected": -548.3419799804688, "loss": 0.6878, "rewards/accuracies": 0.75, "rewards/chosen": -0.20943062007427216, "rewards/margins": 0.12698841094970703, "rewards/rejected": -0.336419016122818, "step": 7660 }, { "epoch": 0.5, "learning_rate": 2.9185544604100765e-06, "logits/chosen": -0.669420599937439, "logits/rejected": -0.5942717790603638, "logps/chosen": -396.4417419433594, "logps/rejected": -474.1393127441406, "loss": 0.6887, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.20053164660930634, "rewards/margins": 0.08195292949676514, "rewards/rejected": -0.28248459100723267, "step": 7670 }, { "epoch": 0.5, "learning_rate": 2.9129240566193083e-06, "logits/chosen": -1.212862253189087, "logits/rejected": -0.8634244799613953, "logps/chosen": -411.9769592285156, "logps/rejected": -494.0874938964844, "loss": 0.6881, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.20989665389060974, "rewards/margins": 0.09539847820997238, "rewards/rejected": -0.30529510974884033, "step": 7680 }, { "epoch": 0.5, "learning_rate": 2.9072914988143874e-06, "logits/chosen": -0.9252969026565552, "logits/rejected": -0.7269047498703003, "logps/chosen": -435.27142333984375, "logps/rejected": -558.121826171875, "loss": 0.6887, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.23384162783622742, "rewards/margins": 0.13132184743881226, "rewards/rejected": -0.36516350507736206, "step": 7690 }, { "epoch": 0.5, "learning_rate": 2.9016568163774956e-06, "logits/chosen": -0.9825423359870911, "logits/rejected": -0.8417407870292664, "logps/chosen": -364.4339599609375, "logps/rejected": -400.3785400390625, "loss": 0.6896, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.19220469892024994, "rewards/margins": 0.06999743729829788, "rewards/rejected": -0.2622021436691284, "step": 7700 }, { "epoch": 0.5, "eval_logits/chosen": -1.0249030590057373, "eval_logits/rejected": -0.9031400084495544, "eval_logps/chosen": -435.697998046875, "eval_logps/rejected": -492.8592529296875, "eval_loss": 0.6896706223487854, "eval_rewards/accuracies": 0.6539999842643738, "eval_rewards/chosen": -0.20369309186935425, "eval_rewards/margins": 0.07755427807569504, "eval_rewards/rejected": -0.2812473475933075, "eval_runtime": 711.5485, "eval_samples_per_second": 2.811, "eval_steps_per_second": 1.405, "step": 7700 }, { "epoch": 0.5, "learning_rate": 2.8960200387018942e-06, "logits/chosen": -1.09833562374115, "logits/rejected": -0.9112803339958191, "logps/chosen": -508.74224853515625, "logps/rejected": -526.496337890625, "loss": 0.6911, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.20100924372673035, "rewards/margins": 0.06658849865198135, "rewards/rejected": -0.2675977349281311, "step": 7710 }, { "epoch": 0.51, "learning_rate": 2.8903811951917792e-06, "logits/chosen": -1.0677770376205444, "logits/rejected": -1.047732949256897, "logps/chosen": -385.08856201171875, "logps/rejected": -401.1496276855469, "loss": 0.6901, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.18680894374847412, "rewards/margins": 0.06320323050022125, "rewards/rejected": -0.25001221895217896, "step": 7720 }, { "epoch": 0.51, "learning_rate": 2.88474031526212e-06, "logits/chosen": -1.150158166885376, "logits/rejected": -1.023202896118164, "logps/chosen": -423.855712890625, "logps/rejected": -499.7687072753906, "loss": 0.6909, "rewards/accuracies": 0.625, "rewards/chosen": -0.22101597487926483, "rewards/margins": 0.06248442456126213, "rewards/rejected": -0.2835003733634949, "step": 7730 }, { "epoch": 0.51, "learning_rate": 2.879097428338509e-06, "logits/chosen": -0.9290957450866699, "logits/rejected": -0.6512723565101624, "logps/chosen": -430.292236328125, "logps/rejected": -476.34912109375, "loss": 0.6895, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.21439728140830994, "rewards/margins": 0.06838522851467133, "rewards/rejected": -0.28278249502182007, "step": 7740 }, { "epoch": 0.51, "learning_rate": 2.8734525638570094e-06, "logits/chosen": -0.9917430877685547, "logits/rejected": -0.9474889636039734, "logps/chosen": -447.1341247558594, "logps/rejected": -494.3531799316406, "loss": 0.692, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.21293988823890686, "rewards/margins": 0.059592366218566895, "rewards/rejected": -0.27253225445747375, "step": 7750 }, { "epoch": 0.51, "learning_rate": 2.8678057512639982e-06, "logits/chosen": -1.0115077495574951, "logits/rejected": -0.9387832880020142, "logps/chosen": -477.07373046875, "logps/rejected": -581.225830078125, "loss": 0.6873, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.1910073161125183, "rewards/margins": 0.1262114942073822, "rewards/rejected": -0.3172187805175781, "step": 7760 }, { "epoch": 0.51, "learning_rate": 2.8621570200160172e-06, "logits/chosen": -0.4895518720149994, "logits/rejected": -0.45712098479270935, "logps/chosen": -366.78436279296875, "logps/rejected": -456.4737243652344, "loss": 0.6898, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.1982152760028839, "rewards/margins": 0.09786740690469742, "rewards/rejected": -0.29608267545700073, "step": 7770 }, { "epoch": 0.51, "learning_rate": 2.856506399579615e-06, "logits/chosen": -0.9345094561576843, "logits/rejected": -0.994672954082489, "logps/chosen": -507.7185974121094, "logps/rejected": -560.7838134765625, "loss": 0.6891, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2893551290035248, "rewards/margins": 0.07255448400974274, "rewards/rejected": -0.3619096279144287, "step": 7780 }, { "epoch": 0.51, "learning_rate": 2.8508539194311964e-06, "logits/chosen": -0.7989007234573364, "logits/rejected": -0.9685953855514526, "logps/chosen": -515.143798828125, "logps/rejected": -594.9364013671875, "loss": 0.6908, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2614103853702545, "rewards/margins": 0.06696247309446335, "rewards/rejected": -0.32837286591529846, "step": 7790 }, { "epoch": 0.51, "learning_rate": 2.8451996090568656e-06, "logits/chosen": -0.7361730337142944, "logits/rejected": -0.4997798502445221, "logps/chosen": -490.7713317871094, "logps/rejected": -557.9491577148438, "loss": 0.6893, "rewards/accuracies": 0.625, "rewards/chosen": -0.3046317994594574, "rewards/margins": 0.08513940870761871, "rewards/rejected": -0.3897712230682373, "step": 7800 }, { "epoch": 0.51, "eval_logits/chosen": -0.7930099368095398, "eval_logits/rejected": -0.6832570433616638, "eval_logps/chosen": -514.2276000976562, "eval_logps/rejected": -576.969970703125, "eval_loss": 0.6896798610687256, "eval_rewards/accuracies": 0.6675000190734863, "eval_rewards/chosen": -0.2822226583957672, "eval_rewards/margins": 0.08313547074794769, "eval_rewards/rejected": -0.3653581142425537, "eval_runtime": 713.829, "eval_samples_per_second": 2.802, "eval_steps_per_second": 1.401, "step": 7800 }, { "epoch": 0.51, "learning_rate": 2.839543497952276e-06, "logits/chosen": -0.7512328028678894, "logits/rejected": -0.7098406553268433, "logps/chosen": -445.0135192871094, "logps/rejected": -524.4622192382812, "loss": 0.6903, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.26040345430374146, "rewards/margins": 0.08815246820449829, "rewards/rejected": -0.34855595231056213, "step": 7810 }, { "epoch": 0.51, "learning_rate": 2.833885615622474e-06, "logits/chosen": -0.7569184899330139, "logits/rejected": -0.6354383826255798, "logps/chosen": -483.88323974609375, "logps/rejected": -554.6300659179688, "loss": 0.6924, "rewards/accuracies": 0.625, "rewards/chosen": -0.2813073992729187, "rewards/margins": 0.06127142161130905, "rewards/rejected": -0.34257885813713074, "step": 7820 }, { "epoch": 0.51, "learning_rate": 2.8282259915817454e-06, "logits/chosen": -0.5648205876350403, "logits/rejected": -0.5008620619773865, "logps/chosen": -384.44256591796875, "logps/rejected": -520.5614013671875, "loss": 0.6877, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.24349789321422577, "rewards/margins": 0.09590722620487213, "rewards/rejected": -0.3394051194190979, "step": 7830 }, { "epoch": 0.51, "learning_rate": 2.8225646553534614e-06, "logits/chosen": -0.5399858951568604, "logits/rejected": -0.48315078020095825, "logps/chosen": -408.0323181152344, "logps/rejected": -473.1873474121094, "loss": 0.6909, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.20871634781360626, "rewards/margins": 0.0684463381767273, "rewards/rejected": -0.27716267108917236, "step": 7840 }, { "epoch": 0.51, "learning_rate": 2.8169016364699255e-06, "logits/chosen": -0.8743473291397095, "logits/rejected": -0.7486833333969116, "logps/chosen": -465.1512145996094, "logps/rejected": -522.1315307617188, "loss": 0.6922, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.25326332449913025, "rewards/margins": 0.05523008853197098, "rewards/rejected": -0.308493435382843, "step": 7850 }, { "epoch": 0.51, "learning_rate": 2.811236964472217e-06, "logits/chosen": -1.0294277667999268, "logits/rejected": -0.9134146571159363, "logps/chosen": -556.4703369140625, "logps/rejected": -564.4383544921875, "loss": 0.6898, "rewards/accuracies": 0.625, "rewards/chosen": -0.24606367945671082, "rewards/margins": 0.06808798015117645, "rewards/rejected": -0.31415167450904846, "step": 7860 }, { "epoch": 0.51, "learning_rate": 2.805570668910041e-06, "logits/chosen": -0.6756094098091125, "logits/rejected": -0.7497716546058655, "logps/chosen": -456.1519470214844, "logps/rejected": -607.6312255859375, "loss": 0.6894, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.28651267290115356, "rewards/margins": 0.08797899633646011, "rewards/rejected": -0.3744916319847107, "step": 7870 }, { "epoch": 0.52, "learning_rate": 2.7999027793415695e-06, "logits/chosen": -1.1828815937042236, "logits/rejected": -0.7200473546981812, "logps/chosen": -459.301513671875, "logps/rejected": -471.305419921875, "loss": 0.6912, "rewards/accuracies": 0.75, "rewards/chosen": -0.21438555419445038, "rewards/margins": 0.0569295659661293, "rewards/rejected": -0.2713150680065155, "step": 7880 }, { "epoch": 0.52, "learning_rate": 2.794233325333293e-06, "logits/chosen": -0.9350395202636719, "logits/rejected": -0.7725561857223511, "logps/chosen": -467.2784118652344, "logps/rejected": -540.7667846679688, "loss": 0.6889, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2089969664812088, "rewards/margins": 0.09716568142175674, "rewards/rejected": -0.30616268515586853, "step": 7890 }, { "epoch": 0.52, "learning_rate": 2.7885623364598597e-06, "logits/chosen": -1.1787984371185303, "logits/rejected": -0.8761898875236511, "logps/chosen": -518.7886352539062, "logps/rejected": -571.7611083984375, "loss": 0.6896, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.25509604811668396, "rewards/margins": 0.09489138424396515, "rewards/rejected": -0.3499874174594879, "step": 7900 }, { "epoch": 0.52, "eval_logits/chosen": -0.9352292418479919, "eval_logits/rejected": -0.8164902925491333, "eval_logps/chosen": -467.99554443359375, "eval_logps/rejected": -532.0469970703125, "eval_loss": 0.68949294090271, "eval_rewards/accuracies": 0.6685000061988831, "eval_rewards/chosen": -0.23599061369895935, "eval_rewards/margins": 0.08444450050592422, "eval_rewards/rejected": -0.32043513655662537, "eval_runtime": 713.5958, "eval_samples_per_second": 2.803, "eval_steps_per_second": 1.401, "step": 7900 }, { "epoch": 0.52, "learning_rate": 2.782889842303926e-06, "logits/chosen": -0.9351937174797058, "logits/rejected": -0.8792891502380371, "logps/chosen": -421.2020568847656, "logps/rejected": -468.4584045410156, "loss": 0.6924, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.26097291707992554, "rewards/margins": 0.05275429040193558, "rewards/rejected": -0.3137272298336029, "step": 7910 }, { "epoch": 0.52, "learning_rate": 2.7772158724559987e-06, "logits/chosen": -0.956935703754425, "logits/rejected": -0.681462824344635, "logps/chosen": -415.68115234375, "logps/rejected": -615.3704833984375, "loss": 0.6835, "rewards/accuracies": 0.75, "rewards/chosen": -0.19857212901115417, "rewards/margins": 0.1622186303138733, "rewards/rejected": -0.3607906997203827, "step": 7920 }, { "epoch": 0.52, "learning_rate": 2.7715404565142856e-06, "logits/chosen": -0.7236738204956055, "logits/rejected": -0.8611618280410767, "logps/chosen": -417.05206298828125, "logps/rejected": -469.84051513671875, "loss": 0.6907, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.21708783507347107, "rewards/margins": 0.05901111289858818, "rewards/rejected": -0.27609896659851074, "step": 7930 }, { "epoch": 0.52, "learning_rate": 2.7658636240845354e-06, "logits/chosen": -1.2458736896514893, "logits/rejected": -1.17683744430542, "logps/chosen": -462.50439453125, "logps/rejected": -576.8449096679688, "loss": 0.6897, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.23905611038208008, "rewards/margins": 0.09972243756055832, "rewards/rejected": -0.338778555393219, "step": 7940 }, { "epoch": 0.52, "learning_rate": 2.7601854047798872e-06, "logits/chosen": -0.7429434657096863, "logits/rejected": -0.7902488112449646, "logps/chosen": -438.64617919921875, "logps/rejected": -532.12255859375, "loss": 0.6895, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.21472282707691193, "rewards/margins": 0.07406110316514969, "rewards/rejected": -0.2887839078903198, "step": 7950 }, { "epoch": 0.52, "learning_rate": 2.7545058282207148e-06, "logits/chosen": -0.7798100113868713, "logits/rejected": -0.6986501812934875, "logps/chosen": -432.69061279296875, "logps/rejected": -461.27197265625, "loss": 0.6909, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.22114884853363037, "rewards/margins": 0.059160299599170685, "rewards/rejected": -0.28030914068222046, "step": 7960 }, { "epoch": 0.52, "learning_rate": 2.748824924034471e-06, "logits/chosen": -1.0739469528198242, "logits/rejected": -0.9439611434936523, "logps/chosen": -469.6466369628906, "logps/rejected": -524.2724609375, "loss": 0.6893, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.2502364218235016, "rewards/margins": 0.07352254539728165, "rewards/rejected": -0.32375895977020264, "step": 7970 }, { "epoch": 0.52, "learning_rate": 2.743142721855536e-06, "logits/chosen": -0.69549560546875, "logits/rejected": -0.8490379452705383, "logps/chosen": -343.1895446777344, "logps/rejected": -391.9134826660156, "loss": 0.6904, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.19132943451404572, "rewards/margins": 0.057072412222623825, "rewards/rejected": -0.24840185046195984, "step": 7980 }, { "epoch": 0.52, "learning_rate": 2.737459251325058e-06, "logits/chosen": -1.0810500383377075, "logits/rejected": -0.9517616033554077, "logps/chosen": -445.8333435058594, "logps/rejected": -476.86529541015625, "loss": 0.691, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.17585977911949158, "rewards/margins": 0.05356324836611748, "rewards/rejected": -0.22942304611206055, "step": 7990 }, { "epoch": 0.52, "learning_rate": 2.731774542090804e-06, "logits/chosen": -0.80693519115448, "logits/rejected": -0.7859255075454712, "logps/chosen": -362.6540222167969, "logps/rejected": -392.09698486328125, "loss": 0.6909, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.16987797617912292, "rewards/margins": 0.04847797378897667, "rewards/rejected": -0.2183559387922287, "step": 8000 }, { "epoch": 0.52, "eval_logits/chosen": -1.0597189664840698, "eval_logits/rejected": -0.9344833493232727, "eval_logps/chosen": -404.4119873046875, "eval_logps/rejected": -465.3135681152344, "eval_loss": 0.6895393133163452, "eval_rewards/accuracies": 0.6694999933242798, "eval_rewards/chosen": -0.17240706086158752, "eval_rewards/margins": 0.08129459619522095, "eval_rewards/rejected": -0.25370165705680847, "eval_runtime": 711.0651, "eval_samples_per_second": 2.813, "eval_steps_per_second": 1.406, "step": 8000 }, { "epoch": 0.52, "learning_rate": 2.7260886238070034e-06, "logits/chosen": -1.1494150161743164, "logits/rejected": -1.0372936725616455, "logps/chosen": -357.6014709472656, "logps/rejected": -431.92059326171875, "loss": 0.6908, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.16113150119781494, "rewards/margins": 0.07902725785970688, "rewards/rejected": -0.24015876650810242, "step": 8010 }, { "epoch": 0.52, "learning_rate": 2.72040152613419e-06, "logits/chosen": -0.9957448244094849, "logits/rejected": -0.8766078948974609, "logps/chosen": -375.31829833984375, "logps/rejected": -415.14068603515625, "loss": 0.6853, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.15844318270683289, "rewards/margins": 0.12304798513650894, "rewards/rejected": -0.2814911901950836, "step": 8020 }, { "epoch": 0.53, "learning_rate": 2.7147132787390516e-06, "logits/chosen": -1.0838630199432373, "logits/rejected": -0.817136287689209, "logps/chosen": -400.7259521484375, "logps/rejected": -463.8260192871094, "loss": 0.6913, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.17324785888195038, "rewards/margins": 0.0800856351852417, "rewards/rejected": -0.25333350896835327, "step": 8030 }, { "epoch": 0.53, "learning_rate": 2.709023911294273e-06, "logits/chosen": -1.1286613941192627, "logits/rejected": -0.913567066192627, "logps/chosen": -388.92364501953125, "logps/rejected": -496.14349365234375, "loss": 0.6862, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1466047465801239, "rewards/margins": 0.1381194144487381, "rewards/rejected": -0.2847242057323456, "step": 8040 }, { "epoch": 0.53, "learning_rate": 2.7033334534783806e-06, "logits/chosen": -1.009552240371704, "logits/rejected": -1.1506755352020264, "logps/chosen": -369.41131591796875, "logps/rejected": -469.4898986816406, "loss": 0.6886, "rewards/accuracies": 0.625, "rewards/chosen": -0.17026540637016296, "rewards/margins": 0.08387952297925949, "rewards/rejected": -0.25414493680000305, "step": 8050 }, { "epoch": 0.53, "learning_rate": 2.697641934975592e-06, "logits/chosen": -1.0629603862762451, "logits/rejected": -0.896773636341095, "logps/chosen": -427.0707092285156, "logps/rejected": -479.0992736816406, "loss": 0.6882, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.19928427040576935, "rewards/margins": 0.0845823660492897, "rewards/rejected": -0.28386664390563965, "step": 8060 }, { "epoch": 0.53, "learning_rate": 2.691949385475654e-06, "logits/chosen": -1.056208610534668, "logits/rejected": -0.8916531801223755, "logps/chosen": -451.06201171875, "logps/rejected": -505.90771484375, "loss": 0.6893, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.20717112720012665, "rewards/margins": 0.08041705191135406, "rewards/rejected": -0.2875882089138031, "step": 8070 }, { "epoch": 0.53, "learning_rate": 2.6862558346736937e-06, "logits/chosen": -0.9724780917167664, "logits/rejected": -0.7502321600914001, "logps/chosen": -438.94268798828125, "logps/rejected": -590.4031982421875, "loss": 0.6851, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.20059040188789368, "rewards/margins": 0.15666231513023376, "rewards/rejected": -0.35725271701812744, "step": 8080 }, { "epoch": 0.53, "learning_rate": 2.6805613122700617e-06, "logits/chosen": -0.7160404920578003, "logits/rejected": -0.7909665107727051, "logps/chosen": -459.51922607421875, "logps/rejected": -552.0443725585938, "loss": 0.6877, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.23594674468040466, "rewards/margins": 0.09158305823802948, "rewards/rejected": -0.32752981781959534, "step": 8090 }, { "epoch": 0.53, "learning_rate": 2.674865847970176e-06, "logits/chosen": -0.8122416734695435, "logits/rejected": -0.6669374704360962, "logps/chosen": -430.8330078125, "logps/rejected": -524.0494384765625, "loss": 0.6908, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2232166826725006, "rewards/margins": 0.07076805830001831, "rewards/rejected": -0.2939847409725189, "step": 8100 }, { "epoch": 0.53, "eval_logits/chosen": -0.7900794148445129, "eval_logits/rejected": -0.6794930100440979, "eval_logps/chosen": -433.87579345703125, "eval_logps/rejected": -497.8059387207031, "eval_loss": 0.6894783973693848, "eval_rewards/accuracies": 0.6694999933242798, "eval_rewards/chosen": -0.2018708437681198, "eval_rewards/margins": 0.0843232199549675, "eval_rewards/rejected": -0.2861940860748291, "eval_runtime": 711.208, "eval_samples_per_second": 2.812, "eval_steps_per_second": 1.406, "step": 8100 }, { "epoch": 0.53, "learning_rate": 2.669169471484368e-06, "logits/chosen": -0.5697265267372131, "logits/rejected": -0.6256684064865112, "logps/chosen": -370.5645446777344, "logps/rejected": -415.7994079589844, "loss": 0.6906, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.20550651848316193, "rewards/margins": 0.0478750616312027, "rewards/rejected": -0.25338155031204224, "step": 8110 }, { "epoch": 0.53, "learning_rate": 2.6634722125277278e-06, "logits/chosen": -0.9089109301567078, "logits/rejected": -0.6783393621444702, "logps/chosen": -463.1395568847656, "logps/rejected": -536.8549194335938, "loss": 0.6893, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.23101966083049774, "rewards/margins": 0.06552055478096008, "rewards/rejected": -0.29654020071029663, "step": 8120 }, { "epoch": 0.53, "learning_rate": 2.6577741008199498e-06, "logits/chosen": -0.617620587348938, "logits/rejected": -0.5651569366455078, "logps/chosen": -502.4048767089844, "logps/rejected": -587.86767578125, "loss": 0.6865, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.24624672532081604, "rewards/margins": 0.1455041915178299, "rewards/rejected": -0.39175087213516235, "step": 8130 }, { "epoch": 0.53, "learning_rate": 2.652075166085175e-06, "logits/chosen": -0.578894317150116, "logits/rejected": -0.6739285588264465, "logps/chosen": -505.9727478027344, "logps/rejected": -664.359375, "loss": 0.6873, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.27139347791671753, "rewards/margins": 0.1348414421081543, "rewards/rejected": -0.4062349200248718, "step": 8140 }, { "epoch": 0.53, "learning_rate": 2.6463754380518395e-06, "logits/chosen": -0.558585524559021, "logits/rejected": -0.4612973630428314, "logps/chosen": -513.1217651367188, "logps/rejected": -555.1633911132812, "loss": 0.6908, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.28216418623924255, "rewards/margins": 0.0968686193227768, "rewards/rejected": -0.37903279066085815, "step": 8150 }, { "epoch": 0.53, "learning_rate": 2.6406749464525167e-06, "logits/chosen": -1.1261868476867676, "logits/rejected": -0.7899680733680725, "logps/chosen": -406.1955871582031, "logps/rejected": -457.78369140625, "loss": 0.69, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.172636479139328, "rewards/margins": 0.09881995618343353, "rewards/rejected": -0.27145642042160034, "step": 8160 }, { "epoch": 0.53, "learning_rate": 2.634973721023762e-06, "logits/chosen": -1.2199698686599731, "logits/rejected": -1.0744082927703857, "logps/chosen": -463.51263427734375, "logps/rejected": -482.42181396484375, "loss": 0.6898, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.20905041694641113, "rewards/margins": 0.06834776699542999, "rewards/rejected": -0.2773981988430023, "step": 8170 }, { "epoch": 0.54, "learning_rate": 2.6292717915059605e-06, "logits/chosen": -1.2687675952911377, "logits/rejected": -1.109535813331604, "logps/chosen": -468.4798889160156, "logps/rejected": -526.0375366210938, "loss": 0.6892, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.192849799990654, "rewards/margins": 0.11375071108341217, "rewards/rejected": -0.30660054087638855, "step": 8180 }, { "epoch": 0.54, "learning_rate": 2.6235691876431706e-06, "logits/chosen": -1.241114616394043, "logits/rejected": -1.140925645828247, "logps/chosen": -392.8011169433594, "logps/rejected": -462.74755859375, "loss": 0.6893, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.17418552935123444, "rewards/margins": 0.06434730440378189, "rewards/rejected": -0.23853282630443573, "step": 8190 }, { "epoch": 0.54, "learning_rate": 2.6178659391829673e-06, "logits/chosen": -1.2482163906097412, "logits/rejected": -0.9498428106307983, "logps/chosen": -402.7013854980469, "logps/rejected": -434.3684997558594, "loss": 0.6904, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.16522344946861267, "rewards/margins": 0.07251036912202835, "rewards/rejected": -0.23773381114006042, "step": 8200 }, { "epoch": 0.54, "eval_logits/chosen": -1.0770260095596313, "eval_logits/rejected": -0.9495123028755188, "eval_logps/chosen": -421.29119873046875, "eval_logps/rejected": -480.12030029296875, "eval_loss": 0.6895091533660889, "eval_rewards/accuracies": 0.6694999933242798, "eval_rewards/chosen": -0.1892862617969513, "eval_rewards/margins": 0.07922215014696121, "eval_rewards/rejected": -0.2685084342956543, "eval_runtime": 712.5679, "eval_samples_per_second": 2.807, "eval_steps_per_second": 1.403, "step": 8200 }, { "epoch": 0.54, "learning_rate": 2.6121620758762877e-06, "logits/chosen": -1.1685333251953125, "logits/rejected": -0.8667934536933899, "logps/chosen": -379.981201171875, "logps/rejected": -443.89166259765625, "loss": 0.6904, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.18789689242839813, "rewards/margins": 0.06470952928066254, "rewards/rejected": -0.25260645151138306, "step": 8210 }, { "epoch": 0.54, "learning_rate": 2.606457627477277e-06, "logits/chosen": -0.8580241203308105, "logits/rejected": -0.7762119174003601, "logps/chosen": -338.51263427734375, "logps/rejected": -426.2496032714844, "loss": 0.6912, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.16288332641124725, "rewards/margins": 0.08438492566347122, "rewards/rejected": -0.24726824462413788, "step": 8220 }, { "epoch": 0.54, "learning_rate": 2.6007526237431324e-06, "logits/chosen": -1.172778844833374, "logits/rejected": -1.0370298624038696, "logps/chosen": -346.90911865234375, "logps/rejected": -456.95361328125, "loss": 0.6886, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.16487959027290344, "rewards/margins": 0.0959264412522316, "rewards/rejected": -0.26080602407455444, "step": 8230 }, { "epoch": 0.54, "learning_rate": 2.5950470944339478e-06, "logits/chosen": -1.0417600870132446, "logits/rejected": -1.0672943592071533, "logps/chosen": -371.01171875, "logps/rejected": -402.5978698730469, "loss": 0.6916, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.14920981228351593, "rewards/margins": 0.035081896930933, "rewards/rejected": -0.18429169058799744, "step": 8240 }, { "epoch": 0.54, "learning_rate": 2.58934106931256e-06, "logits/chosen": -1.0141005516052246, "logits/rejected": -0.886761486530304, "logps/chosen": -412.96478271484375, "logps/rejected": -452.2259826660156, "loss": 0.6907, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.19219276309013367, "rewards/margins": 0.05843869969248772, "rewards/rejected": -0.2506314516067505, "step": 8250 }, { "epoch": 0.54, "learning_rate": 2.58363457814439e-06, "logits/chosen": -1.1281472444534302, "logits/rejected": -0.8433464169502258, "logps/chosen": -416.047119140625, "logps/rejected": -489.42401123046875, "loss": 0.6873, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.20537519454956055, "rewards/margins": 0.08710043132305145, "rewards/rejected": -0.2924756109714508, "step": 8260 }, { "epoch": 0.54, "learning_rate": 2.5779276506972924e-06, "logits/chosen": -0.9526575803756714, "logits/rejected": -0.9733279943466187, "logps/chosen": -408.75347900390625, "logps/rejected": -421.7810974121094, "loss": 0.691, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.1768006533384323, "rewards/margins": 0.05240979790687561, "rewards/rejected": -0.22921045124530792, "step": 8270 }, { "epoch": 0.54, "learning_rate": 2.5722203167413945e-06, "logits/chosen": -1.0990087985992432, "logits/rejected": -0.9820888638496399, "logps/chosen": -455.8550720214844, "logps/rejected": -470.61651611328125, "loss": 0.6894, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.17255744338035583, "rewards/margins": 0.09668220579624176, "rewards/rejected": -0.2692396342754364, "step": 8280 }, { "epoch": 0.54, "learning_rate": 2.5665126060489476e-06, "logits/chosen": -1.152363896369934, "logits/rejected": -1.097588300704956, "logps/chosen": -353.132080078125, "logps/rejected": -451.05560302734375, "loss": 0.6896, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.1650300920009613, "rewards/margins": 0.06907693296670914, "rewards/rejected": -0.23410698771476746, "step": 8290 }, { "epoch": 0.54, "learning_rate": 2.560804548394165e-06, "logits/chosen": -0.9613862037658691, "logits/rejected": -0.5457326173782349, "logps/chosen": -443.27239990234375, "logps/rejected": -486.93218994140625, "loss": 0.6895, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.19599205255508423, "rewards/margins": 0.08893202245235443, "rewards/rejected": -0.28492408990859985, "step": 8300 }, { "epoch": 0.54, "eval_logits/chosen": -0.9703200459480286, "eval_logits/rejected": -0.8503559231758118, "eval_logps/chosen": -410.2919921875, "eval_logps/rejected": -467.85498046875, "eval_loss": 0.6896072626113892, "eval_rewards/accuracies": 0.6654999852180481, "eval_rewards/chosen": -0.17828704416751862, "eval_rewards/margins": 0.07795605808496475, "eval_rewards/rejected": -0.25624310970306396, "eval_runtime": 714.4842, "eval_samples_per_second": 2.799, "eval_steps_per_second": 1.4, "step": 8300 }, { "epoch": 0.54, "learning_rate": 2.5550961735530734e-06, "logits/chosen": -0.6795090436935425, "logits/rejected": -0.7947781085968018, "logps/chosen": -300.90093994140625, "logps/rejected": -393.47308349609375, "loss": 0.6903, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1378651261329651, "rewards/margins": 0.061981432139873505, "rewards/rejected": -0.1998465359210968, "step": 8310 }, { "epoch": 0.54, "learning_rate": 2.549387511303351e-06, "logits/chosen": -0.9080276489257812, "logits/rejected": -1.0452932119369507, "logps/chosen": -340.3172302246094, "logps/rejected": -445.0389099121094, "loss": 0.6902, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.1705804169178009, "rewards/margins": 0.06044108793139458, "rewards/rejected": -0.23102149367332458, "step": 8320 }, { "epoch": 0.55, "learning_rate": 2.5436785914241774e-06, "logits/chosen": -0.8201481103897095, "logits/rejected": -0.6838145852088928, "logps/chosen": -406.7204284667969, "logps/rejected": -494.86260986328125, "loss": 0.6871, "rewards/accuracies": 0.75, "rewards/chosen": -0.20776119828224182, "rewards/margins": 0.120111845433712, "rewards/rejected": -0.3278730511665344, "step": 8330 }, { "epoch": 0.55, "learning_rate": 2.5379694436960746e-06, "logits/chosen": -0.9807443618774414, "logits/rejected": -0.955755352973938, "logps/chosen": -435.4742126464844, "logps/rejected": -507.7806701660156, "loss": 0.691, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.18946322798728943, "rewards/margins": 0.06174159049987793, "rewards/rejected": -0.25120481848716736, "step": 8340 }, { "epoch": 0.55, "learning_rate": 2.5322600979007533e-06, "logits/chosen": -1.0486241579055786, "logits/rejected": -0.7849219441413879, "logps/chosen": -409.82379150390625, "logps/rejected": -457.51300048828125, "loss": 0.6899, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1973283886909485, "rewards/margins": 0.06838833540678024, "rewards/rejected": -0.2657167315483093, "step": 8350 }, { "epoch": 0.55, "learning_rate": 2.5265505838209592e-06, "logits/chosen": -0.9295563697814941, "logits/rejected": -0.830061137676239, "logps/chosen": -477.27569580078125, "logps/rejected": -481.6053161621094, "loss": 0.6919, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.22031202912330627, "rewards/margins": 0.05074296146631241, "rewards/rejected": -0.2710549831390381, "step": 8360 }, { "epoch": 0.55, "learning_rate": 2.520840931240314e-06, "logits/chosen": -1.0167980194091797, "logits/rejected": -0.7042320966720581, "logps/chosen": -412.95123291015625, "logps/rejected": -432.8705139160156, "loss": 0.6911, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.20314264297485352, "rewards/margins": 0.08364014327526093, "rewards/rejected": -0.28678280115127563, "step": 8370 }, { "epoch": 0.55, "learning_rate": 2.515131169943162e-06, "logits/chosen": -0.5957569479942322, "logits/rejected": -0.5955820083618164, "logps/chosen": -463.299072265625, "logps/rejected": -550.264892578125, "loss": 0.6897, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.20520460605621338, "rewards/margins": 0.09393807500600815, "rewards/rejected": -0.2991426885128021, "step": 8380 }, { "epoch": 0.55, "learning_rate": 2.509421329714416e-06, "logits/chosen": -0.6069525480270386, "logits/rejected": -0.7541376352310181, "logps/chosen": -362.64208984375, "logps/rejected": -435.2427673339844, "loss": 0.6916, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.15650290250778198, "rewards/margins": 0.051408637315034866, "rewards/rejected": -0.20791153609752655, "step": 8390 }, { "epoch": 0.55, "learning_rate": 2.5037114403393987e-06, "logits/chosen": -0.810130774974823, "logits/rejected": -0.5814931392669678, "logps/chosen": -348.248046875, "logps/rejected": -374.8896179199219, "loss": 0.6904, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.13726766407489777, "rewards/margins": 0.06042211130261421, "rewards/rejected": -0.19768977165222168, "step": 8400 }, { "epoch": 0.55, "eval_logits/chosen": -0.901390016078949, "eval_logits/rejected": -0.7864856719970703, "eval_logps/chosen": -381.29022216796875, "eval_logps/rejected": -431.9936828613281, "eval_loss": 0.689697265625, "eval_rewards/accuracies": 0.6679999828338623, "eval_rewards/chosen": -0.14928528666496277, "eval_rewards/margins": 0.07109646499156952, "eval_rewards/rejected": -0.22038176655769348, "eval_runtime": 711.3945, "eval_samples_per_second": 2.811, "eval_steps_per_second": 1.406, "step": 8400 }, { "epoch": 0.55, "learning_rate": 2.4980015316036908e-06, "logits/chosen": -0.7899686098098755, "logits/rejected": -0.7767657041549683, "logps/chosen": -312.8028564453125, "logps/rejected": -435.9971618652344, "loss": 0.6873, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.1369931399822235, "rewards/margins": 0.1016085147857666, "rewards/rejected": -0.2386016547679901, "step": 8410 }, { "epoch": 0.55, "learning_rate": 2.4922916332929725e-06, "logits/chosen": -1.0517879724502563, "logits/rejected": -1.0950580835342407, "logps/chosen": -374.1047058105469, "logps/rejected": -374.71875, "loss": 0.6915, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.13868047297000885, "rewards/margins": 0.040228597819805145, "rewards/rejected": -0.1789090633392334, "step": 8420 }, { "epoch": 0.55, "learning_rate": 2.4865817751928716e-06, "logits/chosen": -1.0115129947662354, "logits/rejected": -0.9112080335617065, "logps/chosen": -355.5943908691406, "logps/rejected": -507.1390686035156, "loss": 0.6861, "rewards/accuracies": 0.75, "rewards/chosen": -0.16010567545890808, "rewards/margins": 0.125122532248497, "rewards/rejected": -0.2852281928062439, "step": 8430 }, { "epoch": 0.55, "learning_rate": 2.4808719870888037e-06, "logits/chosen": -0.7915637493133545, "logits/rejected": -0.5190998315811157, "logps/chosen": -387.52020263671875, "logps/rejected": -465.8919982910156, "loss": 0.6892, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.16984879970550537, "rewards/margins": 0.11164551973342896, "rewards/rejected": -0.2814943194389343, "step": 8440 }, { "epoch": 0.55, "learning_rate": 2.4751622987658206e-06, "logits/chosen": -0.9463188052177429, "logits/rejected": -0.8167473077774048, "logps/chosen": -460.9830627441406, "logps/rejected": -512.6512451171875, "loss": 0.6917, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.22303274273872375, "rewards/margins": 0.0624089241027832, "rewards/rejected": -0.28544169664382935, "step": 8450 }, { "epoch": 0.55, "learning_rate": 2.4694527400084546e-06, "logits/chosen": -0.720486044883728, "logits/rejected": -0.5732995867729187, "logps/chosen": -403.9100036621094, "logps/rejected": -468.7867126464844, "loss": 0.6903, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.1816653460264206, "rewards/margins": 0.0707213282585144, "rewards/rejected": -0.2523866891860962, "step": 8460 }, { "epoch": 0.55, "learning_rate": 2.4637433406005607e-06, "logits/chosen": -0.6172093152999878, "logits/rejected": -0.9131113290786743, "logps/chosen": -549.1463623046875, "logps/rejected": -563.6685180664062, "loss": 0.6913, "rewards/accuracies": 0.625, "rewards/chosen": -0.239420086145401, "rewards/margins": 0.0425652377307415, "rewards/rejected": -0.2819853127002716, "step": 8470 }, { "epoch": 0.55, "learning_rate": 2.4580341303251628e-06, "logits/chosen": -0.32746773958206177, "logits/rejected": -0.24373102188110352, "logps/chosen": -470.934814453125, "logps/rejected": -526.7442016601562, "loss": 0.689, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.21098360419273376, "rewards/margins": 0.09220820665359497, "rewards/rejected": -0.30319178104400635, "step": 8480 }, { "epoch": 0.56, "learning_rate": 2.4523251389642984e-06, "logits/chosen": -0.7050188779830933, "logits/rejected": -0.28900861740112305, "logps/chosen": -509.91156005859375, "logps/rejected": -579.9166870117188, "loss": 0.6873, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.25535228848457336, "rewards/margins": 0.1071435958147049, "rewards/rejected": -0.36249589920043945, "step": 8490 }, { "epoch": 0.56, "learning_rate": 2.4466163962988626e-06, "logits/chosen": -0.9161072969436646, "logits/rejected": -0.6629343628883362, "logps/chosen": -513.077392578125, "logps/rejected": -519.474609375, "loss": 0.6878, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.23247647285461426, "rewards/margins": 0.10511653125286102, "rewards/rejected": -0.3375930190086365, "step": 8500 }, { "epoch": 0.56, "eval_logits/chosen": -0.5619562268257141, "eval_logits/rejected": -0.4634077250957489, "eval_logps/chosen": -475.04638671875, "eval_logps/rejected": -545.1040649414062, "eval_loss": 0.6894838213920593, "eval_rewards/accuracies": 0.6710000038146973, "eval_rewards/chosen": -0.24304144084453583, "eval_rewards/margins": 0.09045073390007019, "eval_rewards/rejected": -0.3334921598434448, "eval_runtime": 711.5654, "eval_samples_per_second": 2.811, "eval_steps_per_second": 1.405, "step": 8500 }, { "epoch": 0.56, "learning_rate": 2.4409079321084543e-06, "logits/chosen": -0.755149245262146, "logits/rejected": -0.960436224937439, "logps/chosen": -416.8389587402344, "logps/rejected": -544.0230712890625, "loss": 0.6908, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.20171932876110077, "rewards/margins": 0.09724519401788712, "rewards/rejected": -0.2989645004272461, "step": 8510 }, { "epoch": 0.56, "learning_rate": 2.4351997761712184e-06, "logits/chosen": -1.0244697332382202, "logits/rejected": -0.45252904295921326, "logps/chosen": -469.56524658203125, "logps/rejected": -494.75933837890625, "loss": 0.6897, "rewards/accuracies": 0.75, "rewards/chosen": -0.22417688369750977, "rewards/margins": 0.09190000593662262, "rewards/rejected": -0.31607693433761597, "step": 8520 }, { "epoch": 0.56, "learning_rate": 2.4294919582636933e-06, "logits/chosen": -0.831392765045166, "logits/rejected": -0.5988849997520447, "logps/chosen": -412.9861755371094, "logps/rejected": -481.98699951171875, "loss": 0.691, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.20188367366790771, "rewards/margins": 0.08084017038345337, "rewards/rejected": -0.2827238440513611, "step": 8530 }, { "epoch": 0.56, "learning_rate": 2.423784508160652e-06, "logits/chosen": -0.7932353019714355, "logits/rejected": -0.6916912198066711, "logps/chosen": -520.588623046875, "logps/rejected": -544.4139404296875, "loss": 0.6907, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.2653577923774719, "rewards/margins": 0.07166271656751633, "rewards/rejected": -0.33702048659324646, "step": 8540 }, { "epoch": 0.56, "learning_rate": 2.418077455634951e-06, "logits/chosen": -0.6658292412757874, "logits/rejected": -0.5401934385299683, "logps/chosen": -470.5606384277344, "logps/rejected": -553.2406005859375, "loss": 0.691, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.25398963689804077, "rewards/margins": 0.05512278154492378, "rewards/rejected": -0.30911239981651306, "step": 8550 }, { "epoch": 0.56, "learning_rate": 2.4123708304573714e-06, "logits/chosen": -0.7948893308639526, "logits/rejected": -0.4241601526737213, "logps/chosen": -517.2756958007812, "logps/rejected": -576.2462158203125, "loss": 0.6894, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2275083363056183, "rewards/margins": 0.0756693035364151, "rewards/rejected": -0.3031776547431946, "step": 8560 }, { "epoch": 0.56, "learning_rate": 2.406664662396465e-06, "logits/chosen": -0.20848624408245087, "logits/rejected": -0.26154419779777527, "logps/chosen": -458.6073303222656, "logps/rejected": -498.234375, "loss": 0.691, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2742254137992859, "rewards/margins": 0.055309224873781204, "rewards/rejected": -0.3295346796512604, "step": 8570 }, { "epoch": 0.56, "learning_rate": 2.4009589812184012e-06, "logits/chosen": -0.5426809191703796, "logits/rejected": -0.23366662859916687, "logps/chosen": -436.4287109375, "logps/rejected": -460.65057373046875, "loss": 0.6884, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.2332087755203247, "rewards/margins": 0.07733286917209625, "rewards/rejected": -0.31054162979125977, "step": 8580 }, { "epoch": 0.56, "learning_rate": 2.3952538166868073e-06, "logits/chosen": -0.4449075162410736, "logits/rejected": -0.5031110048294067, "logps/chosen": -477.5169982910156, "logps/rejected": -586.2160034179688, "loss": 0.6874, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.25997069478034973, "rewards/margins": 0.12529215216636658, "rewards/rejected": -0.38526278734207153, "step": 8590 }, { "epoch": 0.56, "learning_rate": 2.389549198562616e-06, "logits/chosen": -0.7170125246047974, "logits/rejected": -0.3539445996284485, "logps/chosen": -483.71112060546875, "logps/rejected": -566.279052734375, "loss": 0.6881, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.25762954354286194, "rewards/margins": 0.1125480905175209, "rewards/rejected": -0.37017759680747986, "step": 8600 }, { "epoch": 0.56, "eval_logits/chosen": -0.6347896456718445, "eval_logits/rejected": -0.5309445261955261, "eval_logps/chosen": -483.77215576171875, "eval_logps/rejected": -558.7501831054688, "eval_loss": 0.6894675493240356, "eval_rewards/accuracies": 0.671999990940094, "eval_rewards/chosen": -0.25176724791526794, "eval_rewards/margins": 0.09537114202976227, "eval_rewards/rejected": -0.3471384048461914, "eval_runtime": 711.4841, "eval_samples_per_second": 2.811, "eval_steps_per_second": 1.406, "step": 8600 }, { "epoch": 0.56, "learning_rate": 2.3838451566039098e-06, "logits/chosen": -0.9295812845230103, "logits/rejected": -0.7526025176048279, "logps/chosen": -484.5731506347656, "logps/rejected": -528.5623779296875, "loss": 0.6923, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.24606318771839142, "rewards/margins": 0.05412333458662033, "rewards/rejected": -0.30018651485443115, "step": 8610 }, { "epoch": 0.56, "learning_rate": 2.3781417205657662e-06, "logits/chosen": -0.779945433139801, "logits/rejected": -0.6768251657485962, "logps/chosen": -427.7649841308594, "logps/rejected": -465.7723083496094, "loss": 0.6904, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.23000852763652802, "rewards/margins": 0.076214499771595, "rewards/rejected": -0.3062230050563812, "step": 8620 }, { "epoch": 0.56, "learning_rate": 2.3724389202001006e-06, "logits/chosen": -0.6447828412055969, "logits/rejected": -0.3995209336280823, "logps/chosen": -456.41064453125, "logps/rejected": -512.5110473632812, "loss": 0.6917, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2540872097015381, "rewards/margins": 0.07779712229967117, "rewards/rejected": -0.33188432455062866, "step": 8630 }, { "epoch": 0.57, "learning_rate": 2.366736785255514e-06, "logits/chosen": -0.7542355060577393, "logits/rejected": -0.869966983795166, "logps/chosen": -447.4637145996094, "logps/rejected": -502.99365234375, "loss": 0.6909, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.24873492121696472, "rewards/margins": 0.0667499229311943, "rewards/rejected": -0.315484881401062, "step": 8640 }, { "epoch": 0.57, "learning_rate": 2.3610353454771355e-06, "logits/chosen": -0.5472957491874695, "logits/rejected": -0.3664063513278961, "logps/chosen": -403.11505126953125, "logps/rejected": -469.02313232421875, "loss": 0.6903, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.21316757798194885, "rewards/margins": 0.08220270276069641, "rewards/rejected": -0.29537031054496765, "step": 8650 }, { "epoch": 0.57, "learning_rate": 2.355334630606467e-06, "logits/chosen": -1.101891279220581, "logits/rejected": -0.8851909637451172, "logps/chosen": -476.35845947265625, "logps/rejected": -476.6539611816406, "loss": 0.6906, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2360682189464569, "rewards/margins": 0.06384102255105972, "rewards/rejected": -0.2999092638492584, "step": 8660 }, { "epoch": 0.57, "learning_rate": 2.349634670381231e-06, "logits/chosen": -0.5369294285774231, "logits/rejected": -0.39624541997909546, "logps/chosen": -454.20489501953125, "logps/rejected": -533.3726806640625, "loss": 0.6903, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.24700577557086945, "rewards/margins": 0.06936918944120407, "rewards/rejected": -0.3163749575614929, "step": 8670 }, { "epoch": 0.57, "learning_rate": 2.3439354945352104e-06, "logits/chosen": -0.8326675295829773, "logits/rejected": -0.682153582572937, "logps/chosen": -448.55816650390625, "logps/rejected": -442.1712951660156, "loss": 0.6912, "rewards/accuracies": 0.5, "rewards/chosen": -0.205034539103508, "rewards/margins": 0.03801094740629196, "rewards/rejected": -0.24304552376270294, "step": 8680 }, { "epoch": 0.57, "learning_rate": 2.3382371327981e-06, "logits/chosen": -0.8642188310623169, "logits/rejected": -0.7281670570373535, "logps/chosen": -430.5753479003906, "logps/rejected": -505.33294677734375, "loss": 0.6894, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.1990990936756134, "rewards/margins": 0.08905245363712311, "rewards/rejected": -0.2881515622138977, "step": 8690 }, { "epoch": 0.57, "learning_rate": 2.3325396148953456e-06, "logits/chosen": -0.4669378697872162, "logits/rejected": -0.6072463989257812, "logps/chosen": -429.3271484375, "logps/rejected": -579.4981689453125, "loss": 0.6888, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2580257058143616, "rewards/margins": 0.0960407704114914, "rewards/rejected": -0.35406649112701416, "step": 8700 }, { "epoch": 0.57, "eval_logits/chosen": -0.7447597980499268, "eval_logits/rejected": -0.6359822750091553, "eval_logps/chosen": -477.96826171875, "eval_logps/rejected": -541.177001953125, "eval_loss": 0.689468264579773, "eval_rewards/accuracies": 0.6765000224113464, "eval_rewards/chosen": -0.24596332013607025, "eval_rewards/margins": 0.08360182493925095, "eval_rewards/rejected": -0.329565167427063, "eval_runtime": 710.5611, "eval_samples_per_second": 2.815, "eval_steps_per_second": 1.407, "step": 8700 }, { "epoch": 0.57, "learning_rate": 2.3268429705479915e-06, "logits/chosen": -1.2217304706573486, "logits/rejected": -0.7867422103881836, "logps/chosen": -480.87493896484375, "logps/rejected": -520.9791259765625, "loss": 0.6906, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2560623288154602, "rewards/margins": 0.08142323791980743, "rewards/rejected": -0.3374856114387512, "step": 8710 }, { "epoch": 0.57, "learning_rate": 2.3211472294725248e-06, "logits/chosen": -0.6829847693443298, "logits/rejected": -0.580116868019104, "logps/chosen": -448.57513427734375, "logps/rejected": -521.0882568359375, "loss": 0.6901, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.23216000199317932, "rewards/margins": 0.08518026024103165, "rewards/rejected": -0.31734028458595276, "step": 8720 }, { "epoch": 0.57, "learning_rate": 2.315452421380721e-06, "logits/chosen": -0.5848901867866516, "logits/rejected": -0.49349188804626465, "logps/chosen": -485.47344970703125, "logps/rejected": -522.7279663085938, "loss": 0.688, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.23052915930747986, "rewards/margins": 0.08524759113788605, "rewards/rejected": -0.3157767653465271, "step": 8730 }, { "epoch": 0.57, "learning_rate": 2.3097585759794886e-06, "logits/chosen": -0.7139642834663391, "logits/rejected": -0.6301506161689758, "logps/chosen": -469.02227783203125, "logps/rejected": -537.8853759765625, "loss": 0.6867, "rewards/accuracies": 0.75, "rewards/chosen": -0.21468093991279602, "rewards/margins": 0.13225510716438293, "rewards/rejected": -0.34693604707717896, "step": 8740 }, { "epoch": 0.57, "learning_rate": 2.3040657229707155e-06, "logits/chosen": -1.0114556550979614, "logits/rejected": -0.8004629015922546, "logps/chosen": -378.1190490722656, "logps/rejected": -497.8941345214844, "loss": 0.6893, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.20607146620750427, "rewards/margins": 0.10821950435638428, "rewards/rejected": -0.3142909109592438, "step": 8750 }, { "epoch": 0.57, "learning_rate": 2.2983738920511104e-06, "logits/chosen": -0.8854055404663086, "logits/rejected": -0.7368249297142029, "logps/chosen": -463.99664306640625, "logps/rejected": -481.71868896484375, "loss": 0.6915, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.1942036896944046, "rewards/margins": 0.06802419573068619, "rewards/rejected": -0.262227863073349, "step": 8760 }, { "epoch": 0.57, "learning_rate": 2.2926831129120523e-06, "logits/chosen": -0.5618699193000793, "logits/rejected": -0.4119883179664612, "logps/chosen": -441.3275451660156, "logps/rejected": -469.8602600097656, "loss": 0.6917, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2055359184741974, "rewards/margins": 0.0568884015083313, "rewards/rejected": -0.2624242901802063, "step": 8770 }, { "epoch": 0.57, "learning_rate": 2.2869934152394323e-06, "logits/chosen": -0.8328984379768372, "logits/rejected": -0.8400952219963074, "logps/chosen": -495.8323669433594, "logps/rejected": -512.2872314453125, "loss": 0.6887, "rewards/accuracies": 0.625, "rewards/chosen": -0.22686699032783508, "rewards/margins": 0.07205691188573837, "rewards/rejected": -0.29892387986183167, "step": 8780 }, { "epoch": 0.58, "learning_rate": 2.281304828713501e-06, "logits/chosen": -1.0833885669708252, "logits/rejected": -0.8467610478401184, "logps/chosen": -438.43768310546875, "logps/rejected": -495.0404357910156, "loss": 0.6897, "rewards/accuracies": 0.625, "rewards/chosen": -0.2054503858089447, "rewards/margins": 0.064105324447155, "rewards/rejected": -0.2695557177066803, "step": 8790 }, { "epoch": 0.58, "learning_rate": 2.275617383008711e-06, "logits/chosen": -0.9343339800834656, "logits/rejected": -0.8902202844619751, "logps/chosen": -425.01849365234375, "logps/rejected": -473.1581115722656, "loss": 0.6912, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.18953107297420502, "rewards/margins": 0.05080736428499222, "rewards/rejected": -0.24033844470977783, "step": 8800 }, { "epoch": 0.58, "eval_logits/chosen": -0.8669275045394897, "eval_logits/rejected": -0.7510409951210022, "eval_logps/chosen": -423.5054626464844, "eval_logps/rejected": -484.2248840332031, "eval_loss": 0.6895233392715454, "eval_rewards/accuracies": 0.6725000143051147, "eval_rewards/chosen": -0.19150054454803467, "eval_rewards/margins": 0.08111248910427094, "eval_rewards/rejected": -0.2726130485534668, "eval_runtime": 713.1294, "eval_samples_per_second": 2.805, "eval_steps_per_second": 1.402, "step": 8800 }, { "epoch": 0.58, "learning_rate": 2.269931107793567e-06, "logits/chosen": -0.539323091506958, "logits/rejected": -0.5320712327957153, "logps/chosen": -374.38763427734375, "logps/rejected": -448.4871520996094, "loss": 0.6902, "rewards/accuracies": 0.75, "rewards/chosen": -0.16210730373859406, "rewards/margins": 0.06600706279277802, "rewards/rejected": -0.22811436653137207, "step": 8810 }, { "epoch": 0.58, "learning_rate": 2.2642460327304655e-06, "logits/chosen": -1.025653600692749, "logits/rejected": -0.9366380572319031, "logps/chosen": -453.3301696777344, "logps/rejected": -506.7493591308594, "loss": 0.69, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.20877043902873993, "rewards/margins": 0.06926669180393219, "rewards/rejected": -0.2780371308326721, "step": 8820 }, { "epoch": 0.58, "learning_rate": 2.258562187475543e-06, "logits/chosen": -0.886056125164032, "logits/rejected": -0.4289335310459137, "logps/chosen": -431.5992126464844, "logps/rejected": -468.0819396972656, "loss": 0.6894, "rewards/accuracies": 0.625, "rewards/chosen": -0.2046523094177246, "rewards/margins": 0.07399457693099976, "rewards/rejected": -0.27864688634872437, "step": 8830 }, { "epoch": 0.58, "learning_rate": 2.2528796016785196e-06, "logits/chosen": -0.4931762218475342, "logits/rejected": -0.624854564666748, "logps/chosen": -372.86358642578125, "logps/rejected": -496.1195373535156, "loss": 0.6884, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.1829456388950348, "rewards/margins": 0.11358989775180817, "rewards/rejected": -0.29653555154800415, "step": 8840 }, { "epoch": 0.58, "learning_rate": 2.247198304982548e-06, "logits/chosen": -0.5007576942443848, "logits/rejected": -0.5184835195541382, "logps/chosen": -354.9455261230469, "logps/rejected": -422.990234375, "loss": 0.6899, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.1929740458726883, "rewards/margins": 0.07199414074420929, "rewards/rejected": -0.2649681866168976, "step": 8850 }, { "epoch": 0.58, "learning_rate": 2.2415183270240533e-06, "logits/chosen": -1.1592953205108643, "logits/rejected": -1.062793493270874, "logps/chosen": -411.7584533691406, "logps/rejected": -514.5060424804688, "loss": 0.6887, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.21242883801460266, "rewards/margins": 0.09844199568033218, "rewards/rejected": -0.31087082624435425, "step": 8860 }, { "epoch": 0.58, "learning_rate": 2.2358396974325837e-06, "logits/chosen": -0.6631832122802734, "logits/rejected": -0.6946643590927124, "logps/chosen": -466.4697265625, "logps/rejected": -542.7886962890625, "loss": 0.6877, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.22420215606689453, "rewards/margins": 0.10258595645427704, "rewards/rejected": -0.32678812742233276, "step": 8870 }, { "epoch": 0.58, "learning_rate": 2.2301624458306525e-06, "logits/chosen": -0.7441704869270325, "logits/rejected": -0.8345525860786438, "logps/chosen": -523.4190063476562, "logps/rejected": -540.7713623046875, "loss": 0.6905, "rewards/accuracies": 0.75, "rewards/chosen": -0.26407089829444885, "rewards/margins": 0.06869350373744965, "rewards/rejected": -0.3327644467353821, "step": 8880 }, { "epoch": 0.58, "learning_rate": 2.2244866018335855e-06, "logits/chosen": -0.6869930028915405, "logits/rejected": -0.6048182845115662, "logps/chosen": -440.927978515625, "logps/rejected": -521.34912109375, "loss": 0.6903, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.22387027740478516, "rewards/margins": 0.06601408869028091, "rewards/rejected": -0.28988438844680786, "step": 8890 }, { "epoch": 0.58, "learning_rate": 2.2188121950493648e-06, "logits/chosen": -0.844458281993866, "logits/rejected": -0.4750286638736725, "logps/chosen": -480.06488037109375, "logps/rejected": -465.815673828125, "loss": 0.6905, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2613215148448944, "rewards/margins": 0.06400308758020401, "rewards/rejected": -0.3253245949745178, "step": 8900 }, { "epoch": 0.58, "eval_logits/chosen": -0.6408426761627197, "eval_logits/rejected": -0.5339207649230957, "eval_logps/chosen": -507.0281677246094, "eval_logps/rejected": -584.7627563476562, "eval_loss": 0.6895042061805725, "eval_rewards/accuracies": 0.6704999804496765, "eval_rewards/chosen": -0.2750232219696045, "eval_rewards/margins": 0.09812760353088379, "eval_rewards/rejected": -0.37315088510513306, "eval_runtime": 711.495, "eval_samples_per_second": 2.811, "eval_steps_per_second": 1.405, "step": 8900 }, { "epoch": 0.58, "learning_rate": 2.2131392550784766e-06, "logits/chosen": -0.6945571303367615, "logits/rejected": -0.4500119686126709, "logps/chosen": -585.53955078125, "logps/rejected": -574.7061157226562, "loss": 0.6914, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3006609082221985, "rewards/margins": 0.08473079651594162, "rewards/rejected": -0.3853917121887207, "step": 8910 }, { "epoch": 0.58, "learning_rate": 2.2074678115137533e-06, "logits/chosen": -0.7692958116531372, "logits/rejected": -0.5215278267860413, "logps/chosen": -463.76641845703125, "logps/rejected": -587.1498413085938, "loss": 0.6867, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2690494656562805, "rewards/margins": 0.11846695095300674, "rewards/rejected": -0.38751640915870667, "step": 8920 }, { "epoch": 0.58, "learning_rate": 2.201797893940224e-06, "logits/chosen": -0.49802762269973755, "logits/rejected": -0.6506582498550415, "logps/chosen": -514.5814819335938, "logps/rejected": -604.6488037109375, "loss": 0.6896, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.28140413761138916, "rewards/margins": 0.06971672922372818, "rewards/rejected": -0.35112085938453674, "step": 8930 }, { "epoch": 0.58, "learning_rate": 2.196129531934956e-06, "logits/chosen": -0.5921011567115784, "logits/rejected": -0.6181560754776001, "logps/chosen": -492.1026306152344, "logps/rejected": -557.617919921875, "loss": 0.6909, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.2541886866092682, "rewards/margins": 0.08357776701450348, "rewards/rejected": -0.33776646852493286, "step": 8940 }, { "epoch": 0.59, "learning_rate": 2.190462755066902e-06, "logits/chosen": -0.9346240758895874, "logits/rejected": -0.6352697610855103, "logps/chosen": -520.2872314453125, "logps/rejected": -543.5521240234375, "loss": 0.6925, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.25788596272468567, "rewards/margins": 0.04875577986240387, "rewards/rejected": -0.30664172768592834, "step": 8950 }, { "epoch": 0.59, "learning_rate": 2.184797592896746e-06, "logits/chosen": -1.015408992767334, "logits/rejected": -0.8906301259994507, "logps/chosen": -459.4242248535156, "logps/rejected": -503.21258544921875, "loss": 0.6892, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.224988654255867, "rewards/margins": 0.0703214704990387, "rewards/rejected": -0.2953101396560669, "step": 8960 }, { "epoch": 0.59, "learning_rate": 2.17913407497675e-06, "logits/chosen": -0.9990288615226746, "logits/rejected": -0.9135416150093079, "logps/chosen": -332.30548095703125, "logps/rejected": -434.0537109375, "loss": 0.6906, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.15392352640628815, "rewards/margins": 0.067796990275383, "rewards/rejected": -0.22172050178050995, "step": 8970 }, { "epoch": 0.59, "learning_rate": 2.173472230850596e-06, "logits/chosen": -1.244238257408142, "logits/rejected": -0.9179713129997253, "logps/chosen": -357.5025329589844, "logps/rejected": -372.48565673828125, "loss": 0.6904, "rewards/accuracies": 0.5, "rewards/chosen": -0.15723998844623566, "rewards/margins": 0.055426858365535736, "rewards/rejected": -0.2126668244600296, "step": 8980 }, { "epoch": 0.59, "learning_rate": 2.1678120900532375e-06, "logits/chosen": -1.006601095199585, "logits/rejected": -0.8393747210502625, "logps/chosen": -450.2059020996094, "logps/rejected": -521.0638427734375, "loss": 0.691, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.2142910659313202, "rewards/margins": 0.10011821985244751, "rewards/rejected": -0.3144093155860901, "step": 8990 }, { "epoch": 0.59, "learning_rate": 2.1621536821107412e-06, "logits/chosen": -0.9647336006164551, "logits/rejected": -0.7863036394119263, "logps/chosen": -395.4327087402344, "logps/rejected": -435.06201171875, "loss": 0.6889, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.19286289811134338, "rewards/margins": 0.08166161924600601, "rewards/rejected": -0.2745245099067688, "step": 9000 }, { "epoch": 0.59, "eval_logits/chosen": -0.9425244331359863, "eval_logits/rejected": -0.820991039276123, "eval_logps/chosen": -420.3014831542969, "eval_logps/rejected": -482.8201904296875, "eval_loss": 0.6894809603691101, "eval_rewards/accuracies": 0.671999990940094, "eval_rewards/chosen": -0.18829651176929474, "eval_rewards/margins": 0.08291179686784744, "eval_rewards/rejected": -0.2712083160877228, "eval_runtime": 711.4161, "eval_samples_per_second": 2.811, "eval_steps_per_second": 1.406, "step": 9000 }, { "epoch": 0.59, "learning_rate": 2.1564970365401346e-06, "logits/chosen": -1.1748039722442627, "logits/rejected": -0.8536974191665649, "logps/chosen": -374.5556945800781, "logps/rejected": -409.34722900390625, "loss": 0.6884, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.19040462374687195, "rewards/margins": 0.07279963046312332, "rewards/rejected": -0.26320427656173706, "step": 9010 }, { "epoch": 0.59, "learning_rate": 2.1508421828492527e-06, "logits/chosen": -1.266325831413269, "logits/rejected": -0.9437382817268372, "logps/chosen": -389.5987243652344, "logps/rejected": -388.35601806640625, "loss": 0.6921, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.16440604627132416, "rewards/margins": 0.06699763238430023, "rewards/rejected": -0.2314036786556244, "step": 9020 }, { "epoch": 0.59, "learning_rate": 2.145189150536582e-06, "logits/chosen": -1.0054259300231934, "logits/rejected": -0.9243372082710266, "logps/chosen": -381.30938720703125, "logps/rejected": -391.0116271972656, "loss": 0.6907, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.15913987159729004, "rewards/margins": 0.05910447984933853, "rewards/rejected": -0.21824435889720917, "step": 9030 }, { "epoch": 0.59, "learning_rate": 2.139537969091107e-06, "logits/chosen": -0.9480058550834656, "logits/rejected": -0.9302452206611633, "logps/chosen": -426.36700439453125, "logps/rejected": -399.29840087890625, "loss": 0.6926, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.1607833206653595, "rewards/margins": 0.03510197624564171, "rewards/rejected": -0.1958852857351303, "step": 9040 }, { "epoch": 0.59, "learning_rate": 2.1338886679921603e-06, "logits/chosen": -1.1572840213775635, "logits/rejected": -1.0841923952102661, "logps/chosen": -390.0577697753906, "logps/rejected": -422.123291015625, "loss": 0.691, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.1485147327184677, "rewards/margins": 0.05247587710618973, "rewards/rejected": -0.20099060237407684, "step": 9050 }, { "epoch": 0.59, "learning_rate": 2.128241276709263e-06, "logits/chosen": -1.3356164693832397, "logits/rejected": -1.3861857652664185, "logps/chosen": -322.12042236328125, "logps/rejected": -411.61212158203125, "loss": 0.6912, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.11885868012905121, "rewards/margins": 0.06945428252220154, "rewards/rejected": -0.18831294775009155, "step": 9060 }, { "epoch": 0.59, "learning_rate": 2.1225958247019746e-06, "logits/chosen": -1.4204630851745605, "logits/rejected": -1.453250527381897, "logps/chosen": -330.41796875, "logps/rejected": -398.22210693359375, "loss": 0.6911, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.1432761251926422, "rewards/margins": 0.04766453430056572, "rewards/rejected": -0.19094067811965942, "step": 9070 }, { "epoch": 0.59, "learning_rate": 2.1169523414197383e-06, "logits/chosen": -1.002547025680542, "logits/rejected": -0.9242392778396606, "logps/chosen": -345.39385986328125, "logps/rejected": -413.39593505859375, "loss": 0.6905, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.14658816158771515, "rewards/margins": 0.044680409133434296, "rewards/rejected": -0.19126857817173004, "step": 9080 }, { "epoch": 0.59, "learning_rate": 2.1113108563017267e-06, "logits/chosen": -0.8716124296188354, "logits/rejected": -0.8876460194587708, "logps/chosen": -437.42974853515625, "logps/rejected": -488.0655212402344, "loss": 0.6903, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2246464490890503, "rewards/margins": 0.08126501739025116, "rewards/rejected": -0.30591148138046265, "step": 9090 }, { "epoch": 0.6, "learning_rate": 2.1056713987766905e-06, "logits/chosen": -1.1082594394683838, "logits/rejected": -0.9452310800552368, "logps/chosen": -418.8975524902344, "logps/rejected": -449.0021057128906, "loss": 0.6906, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.20369724929332733, "rewards/margins": 0.07881996780633926, "rewards/rejected": -0.2825172245502472, "step": 9100 }, { "epoch": 0.6, "eval_logits/chosen": -1.003013014793396, "eval_logits/rejected": -0.8787204623222351, "eval_logps/chosen": -430.4298095703125, "eval_logps/rejected": -486.5714416503906, "eval_loss": 0.6895273923873901, "eval_rewards/accuracies": 0.6729999780654907, "eval_rewards/chosen": -0.19842489063739777, "eval_rewards/margins": 0.07653466612100601, "eval_rewards/rejected": -0.2749595642089844, "eval_runtime": 712.7409, "eval_samples_per_second": 2.806, "eval_steps_per_second": 1.403, "step": 9100 }, { "epoch": 0.6, "learning_rate": 2.1000339982628022e-06, "logits/chosen": -0.8402494192123413, "logits/rejected": -0.7123501300811768, "logps/chosen": -485.10626220703125, "logps/rejected": -507.3006896972656, "loss": 0.6898, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.2338334023952484, "rewards/margins": 0.056514572352170944, "rewards/rejected": -0.29034799337387085, "step": 9110 }, { "epoch": 0.6, "learning_rate": 2.0943986841675043e-06, "logits/chosen": -1.1401221752166748, "logits/rejected": -0.8223906755447388, "logps/chosen": -401.43218994140625, "logps/rejected": -469.53082275390625, "loss": 0.6897, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.19822123646736145, "rewards/margins": 0.0874384194612503, "rewards/rejected": -0.2856596112251282, "step": 9120 }, { "epoch": 0.6, "learning_rate": 2.088765485887356e-06, "logits/chosen": -1.15511155128479, "logits/rejected": -0.8909670114517212, "logps/chosen": -433.95330810546875, "logps/rejected": -433.62237548828125, "loss": 0.6917, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.18923839926719666, "rewards/margins": 0.041294485330581665, "rewards/rejected": -0.2305329144001007, "step": 9130 }, { "epoch": 0.6, "learning_rate": 2.083134432807879e-06, "logits/chosen": -1.2437583208084106, "logits/rejected": -1.0313438177108765, "logps/chosen": -401.11529541015625, "logps/rejected": -514.318603515625, "loss": 0.6891, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.20684321224689484, "rewards/margins": 0.09168516844511032, "rewards/rejected": -0.29852837324142456, "step": 9140 }, { "epoch": 0.6, "learning_rate": 2.077505554303404e-06, "logits/chosen": -1.0879318714141846, "logits/rejected": -1.0778875350952148, "logps/chosen": -328.68927001953125, "logps/rejected": -402.76544189453125, "loss": 0.6899, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.1541372537612915, "rewards/margins": 0.06970960646867752, "rewards/rejected": -0.22384683787822723, "step": 9150 }, { "epoch": 0.6, "learning_rate": 2.071878879736918e-06, "logits/chosen": -1.2794255018234253, "logits/rejected": -1.1359623670578003, "logps/chosen": -445.3270568847656, "logps/rejected": -587.7103271484375, "loss": 0.6901, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.19848747551441193, "rewards/margins": 0.0640995055437088, "rewards/rejected": -0.2625869810581207, "step": 9160 }, { "epoch": 0.6, "learning_rate": 2.0662544384599136e-06, "logits/chosen": -1.0890775918960571, "logits/rejected": -0.9574755430221558, "logps/chosen": -342.47711181640625, "logps/rejected": -402.0417785644531, "loss": 0.6893, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.1403704583644867, "rewards/margins": 0.07605002820491791, "rewards/rejected": -0.2164204865694046, "step": 9170 }, { "epoch": 0.6, "learning_rate": 2.0606322598122314e-06, "logits/chosen": -1.0150939226150513, "logits/rejected": -1.157862901687622, "logps/chosen": -346.90814208984375, "logps/rejected": -394.8082580566406, "loss": 0.6924, "rewards/accuracies": 0.5, "rewards/chosen": -0.1603945940732956, "rewards/margins": 0.028239255771040916, "rewards/rejected": -0.18863385915756226, "step": 9180 }, { "epoch": 0.6, "learning_rate": 2.0550123731219085e-06, "logits/chosen": -1.5570096969604492, "logits/rejected": -1.143188714981079, "logps/chosen": -394.85992431640625, "logps/rejected": -420.784423828125, "loss": 0.6889, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.14168012142181396, "rewards/margins": 0.06706938147544861, "rewards/rejected": -0.20874948799610138, "step": 9190 }, { "epoch": 0.6, "learning_rate": 2.0493948077050267e-06, "logits/chosen": -0.7615960240364075, "logits/rejected": -0.6373671293258667, "logps/chosen": -375.5524597167969, "logps/rejected": -436.08282470703125, "loss": 0.6893, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.182296484708786, "rewards/margins": 0.07861991226673126, "rewards/rejected": -0.26091641187667847, "step": 9200 }, { "epoch": 0.6, "eval_logits/chosen": -1.085320234298706, "eval_logits/rejected": -0.9545806646347046, "eval_logps/chosen": -403.3822326660156, "eval_logps/rejected": -463.1018981933594, "eval_loss": 0.6895037293434143, "eval_rewards/accuracies": 0.6729999780654907, "eval_rewards/chosen": -0.1713773012161255, "eval_rewards/margins": 0.08011273294687271, "eval_rewards/rejected": -0.2514900267124176, "eval_runtime": 711.4246, "eval_samples_per_second": 2.811, "eval_steps_per_second": 1.406, "step": 9200 }, { "epoch": 0.6, "learning_rate": 2.0437795928655596e-06, "logits/chosen": -1.1752150058746338, "logits/rejected": -1.2434207201004028, "logps/chosen": -457.6922912597656, "logps/rejected": -494.33966064453125, "loss": 0.6907, "rewards/accuracies": 0.625, "rewards/chosen": -0.17597965896129608, "rewards/margins": 0.054831117391586304, "rewards/rejected": -0.23081080615520477, "step": 9210 }, { "epoch": 0.6, "learning_rate": 2.0381667578952184e-06, "logits/chosen": -1.1544066667556763, "logits/rejected": -1.0161818265914917, "logps/chosen": -409.118408203125, "logps/rejected": -512.7477416992188, "loss": 0.6879, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.1982455998659134, "rewards/margins": 0.0980122834444046, "rewards/rejected": -0.2962579131126404, "step": 9220 }, { "epoch": 0.6, "learning_rate": 2.0325563320732995e-06, "logits/chosen": -1.1459739208221436, "logits/rejected": -1.059901475906372, "logps/chosen": -457.919921875, "logps/rejected": -496.1634216308594, "loss": 0.6902, "rewards/accuracies": 0.625, "rewards/chosen": -0.19458158314228058, "rewards/margins": 0.08226142078638077, "rewards/rejected": -0.27684301137924194, "step": 9230 }, { "epoch": 0.6, "learning_rate": 2.026948344666532e-06, "logits/chosen": -0.7194372415542603, "logits/rejected": -0.8435705304145813, "logps/chosen": -418.29852294921875, "logps/rejected": -508.6609802246094, "loss": 0.6885, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.21917256712913513, "rewards/margins": 0.09080708771944046, "rewards/rejected": -0.30997970700263977, "step": 9240 }, { "epoch": 0.61, "learning_rate": 2.0213428249289257e-06, "logits/chosen": -0.40980544686317444, "logits/rejected": -0.6821666955947876, "logps/chosen": -415.1513671875, "logps/rejected": -512.7869873046875, "loss": 0.6876, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2166108340024948, "rewards/margins": 0.0958281084895134, "rewards/rejected": -0.31243896484375, "step": 9250 }, { "epoch": 0.61, "learning_rate": 2.0157398021016175e-06, "logits/chosen": -0.7078922390937805, "logits/rejected": -0.7407819628715515, "logps/chosen": -340.1689758300781, "logps/rejected": -475.88323974609375, "loss": 0.691, "rewards/accuracies": 0.5, "rewards/chosen": -0.19263359904289246, "rewards/margins": 0.08950956165790558, "rewards/rejected": -0.28214317560195923, "step": 9260 }, { "epoch": 0.61, "learning_rate": 2.010139305412719e-06, "logits/chosen": -1.393827199935913, "logits/rejected": -1.0710934400558472, "logps/chosen": -490.1766662597656, "logps/rejected": -514.3074951171875, "loss": 0.6914, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.21358105540275574, "rewards/margins": 0.06649493426084518, "rewards/rejected": -0.28007596731185913, "step": 9270 }, { "epoch": 0.61, "learning_rate": 2.0045413640771644e-06, "logits/chosen": -1.0676630735397339, "logits/rejected": -0.7858937978744507, "logps/chosen": -457.89892578125, "logps/rejected": -559.87353515625, "loss": 0.6878, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.20273356139659882, "rewards/margins": 0.09871906787157059, "rewards/rejected": -0.3014525771141052, "step": 9280 }, { "epoch": 0.61, "learning_rate": 1.998946007296558e-06, "logits/chosen": -1.0419930219650269, "logits/rejected": -0.9732457995414734, "logps/chosen": -513.1734619140625, "logps/rejected": -541.3367309570312, "loss": 0.6887, "rewards/accuracies": 0.75, "rewards/chosen": -0.20457284152507782, "rewards/margins": 0.09160022437572479, "rewards/rejected": -0.2961730659008026, "step": 9290 }, { "epoch": 0.61, "learning_rate": 1.9933532642590215e-06, "logits/chosen": -0.5975018739700317, "logits/rejected": -0.3555176556110382, "logps/chosen": -357.795654296875, "logps/rejected": -402.771484375, "loss": 0.6902, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.16863739490509033, "rewards/margins": 0.0951366201043129, "rewards/rejected": -0.26377400755882263, "step": 9300 }, { "epoch": 0.61, "eval_logits/chosen": -0.89243084192276, "eval_logits/rejected": -0.7708998322486877, "eval_logps/chosen": -436.14013671875, "eval_logps/rejected": -502.2715148925781, "eval_loss": 0.6894857287406921, "eval_rewards/accuracies": 0.6790000200271606, "eval_rewards/chosen": -0.20413516461849213, "eval_rewards/margins": 0.08652444928884506, "eval_rewards/rejected": -0.2906596064567566, "eval_runtime": 711.624, "eval_samples_per_second": 2.81, "eval_steps_per_second": 1.405, "step": 9300 }, { "epoch": 0.61, "learning_rate": 1.987763164139042e-06, "logits/chosen": -1.0309691429138184, "logits/rejected": -0.7755805253982544, "logps/chosen": -406.9186706542969, "logps/rejected": -505.7527770996094, "loss": 0.6892, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.20166358351707458, "rewards/margins": 0.09657981246709824, "rewards/rejected": -0.29824337363243103, "step": 9310 }, { "epoch": 0.61, "learning_rate": 1.982175736097321e-06, "logits/chosen": -0.7986326217651367, "logits/rejected": -0.7574408650398254, "logps/chosen": -505.83282470703125, "logps/rejected": -606.5858154296875, "loss": 0.6905, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.23129181563854218, "rewards/margins": 0.08373314142227173, "rewards/rejected": -0.3150249421596527, "step": 9320 }, { "epoch": 0.61, "learning_rate": 1.9765910092806196e-06, "logits/chosen": -0.8272625803947449, "logits/rejected": -0.6810213327407837, "logps/chosen": -341.3271179199219, "logps/rejected": -382.89288330078125, "loss": 0.6895, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.16325163841247559, "rewards/margins": 0.06122400239109993, "rewards/rejected": -0.2244756519794464, "step": 9330 }, { "epoch": 0.61, "learning_rate": 1.9710090128216083e-06, "logits/chosen": -0.9659280776977539, "logits/rejected": -0.7761660814285278, "logps/chosen": -455.28497314453125, "logps/rejected": -549.11181640625, "loss": 0.6884, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.23255494236946106, "rewards/margins": 0.1099899634718895, "rewards/rejected": -0.34254494309425354, "step": 9340 }, { "epoch": 0.61, "learning_rate": 1.9654297758387155e-06, "logits/chosen": -0.8274409174919128, "logits/rejected": -0.6248763799667358, "logps/chosen": -396.43341064453125, "logps/rejected": -489.4363708496094, "loss": 0.6899, "rewards/accuracies": 0.625, "rewards/chosen": -0.23556342720985413, "rewards/margins": 0.07227747142314911, "rewards/rejected": -0.30784088373184204, "step": 9350 }, { "epoch": 0.61, "learning_rate": 1.9598533274359736e-06, "logits/chosen": -0.7758110761642456, "logits/rejected": -0.902672290802002, "logps/chosen": -464.6673278808594, "logps/rejected": -500.7264709472656, "loss": 0.6924, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.22498705983161926, "rewards/margins": 0.02869754657149315, "rewards/rejected": -0.2536846101284027, "step": 9360 }, { "epoch": 0.61, "learning_rate": 1.9542796967028697e-06, "logits/chosen": -1.1198780536651611, "logits/rejected": -0.8304969072341919, "logps/chosen": -425.0093688964844, "logps/rejected": -467.0662536621094, "loss": 0.6913, "rewards/accuracies": 0.75, "rewards/chosen": -0.20676879584789276, "rewards/margins": 0.060299746692180634, "rewards/rejected": -0.2670685648918152, "step": 9370 }, { "epoch": 0.61, "learning_rate": 1.948708912714192e-06, "logits/chosen": -0.4404030740261078, "logits/rejected": -0.6012068390846252, "logps/chosen": -505.26495361328125, "logps/rejected": -536.6444091796875, "loss": 0.6913, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2583196759223938, "rewards/margins": 0.06231521815061569, "rewards/rejected": -0.3206349015235901, "step": 9380 }, { "epoch": 0.61, "learning_rate": 1.9431410045298786e-06, "logits/chosen": -0.5475056767463684, "logits/rejected": -0.7080952525138855, "logps/chosen": -436.47412109375, "logps/rejected": -505.7579650878906, "loss": 0.6893, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.21934881806373596, "rewards/margins": 0.07025954127311707, "rewards/rejected": -0.28960832953453064, "step": 9390 }, { "epoch": 0.62, "learning_rate": 1.9375760011948654e-06, "logits/chosen": -0.8554168939590454, "logits/rejected": -0.8827948570251465, "logps/chosen": -414.16375732421875, "logps/rejected": -530.1060791015625, "loss": 0.6885, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.21448588371276855, "rewards/margins": 0.09362180531024933, "rewards/rejected": -0.3081076741218567, "step": 9400 }, { "epoch": 0.62, "eval_logits/chosen": -0.787796139717102, "eval_logits/rejected": -0.6740127205848694, "eval_logps/chosen": -455.37786865234375, "eval_logps/rejected": -517.357421875, "eval_loss": 0.6894824504852295, "eval_rewards/accuracies": 0.6704999804496765, "eval_rewards/chosen": -0.2233729362487793, "eval_rewards/margins": 0.08237263560295105, "eval_rewards/rejected": -0.30574557185173035, "eval_runtime": 711.2531, "eval_samples_per_second": 2.812, "eval_steps_per_second": 1.406, "step": 9400 }, { "epoch": 0.62, "learning_rate": 1.932013931738937e-06, "logits/chosen": -0.7979623675346375, "logits/rejected": -0.5755224227905273, "logps/chosen": -462.43115234375, "logps/rejected": -596.0765380859375, "loss": 0.685, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.2588294446468353, "rewards/margins": 0.12076359987258911, "rewards/rejected": -0.37959304451942444, "step": 9410 }, { "epoch": 0.62, "learning_rate": 1.9264548251765717e-06, "logits/chosen": -0.9121201634407043, "logits/rejected": -0.862923800945282, "logps/chosen": -441.1590881347656, "logps/rejected": -514.7960205078125, "loss": 0.6907, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.23735642433166504, "rewards/margins": 0.07674865424633026, "rewards/rejected": -0.3141050934791565, "step": 9420 }, { "epoch": 0.62, "learning_rate": 1.9208987105067924e-06, "logits/chosen": -0.6129502058029175, "logits/rejected": -0.43746525049209595, "logps/chosen": -465.37872314453125, "logps/rejected": -516.73974609375, "loss": 0.691, "rewards/accuracies": 0.625, "rewards/chosen": -0.25082165002822876, "rewards/margins": 0.07524871081113815, "rewards/rejected": -0.3260703682899475, "step": 9430 }, { "epoch": 0.62, "learning_rate": 1.9153456167130154e-06, "logits/chosen": -0.7086097598075867, "logits/rejected": -0.7439953088760376, "logps/chosen": -449.641845703125, "logps/rejected": -551.921142578125, "loss": 0.6906, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.24557125568389893, "rewards/margins": 0.07608067989349365, "rewards/rejected": -0.3216519057750702, "step": 9440 }, { "epoch": 0.62, "learning_rate": 1.9097955727628975e-06, "logits/chosen": -0.9949433207511902, "logits/rejected": -1.032881259918213, "logps/chosen": -380.15447998046875, "logps/rejected": -458.3058166503906, "loss": 0.6901, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.1841690093278885, "rewards/margins": 0.06621630489826202, "rewards/rejected": -0.2503852844238281, "step": 9450 }, { "epoch": 0.62, "learning_rate": 1.904248607608187e-06, "logits/chosen": -0.6118771433830261, "logits/rejected": -0.8482357263565063, "logps/chosen": -460.8009338378906, "logps/rejected": -472.385498046875, "loss": 0.6913, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.20358090102672577, "rewards/margins": 0.05869137495756149, "rewards/rejected": -0.26227226853370667, "step": 9460 }, { "epoch": 0.62, "learning_rate": 1.8987047501845714e-06, "logits/chosen": -0.9858795404434204, "logits/rejected": -0.7422040104866028, "logps/chosen": -359.5581359863281, "logps/rejected": -448.4444274902344, "loss": 0.6895, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.19263368844985962, "rewards/margins": 0.09513186663389206, "rewards/rejected": -0.2877655625343323, "step": 9470 }, { "epoch": 0.62, "learning_rate": 1.8931640294115267e-06, "logits/chosen": -0.7054397463798523, "logits/rejected": -0.45379215478897095, "logps/chosen": -383.64605712890625, "logps/rejected": -470.921875, "loss": 0.6894, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1896400898694992, "rewards/margins": 0.10280604660511017, "rewards/rejected": -0.29244619607925415, "step": 9480 }, { "epoch": 0.62, "learning_rate": 1.8876264741921662e-06, "logits/chosen": -0.7664824724197388, "logits/rejected": -0.7304006814956665, "logps/chosen": -371.33160400390625, "logps/rejected": -480.13214111328125, "loss": 0.6863, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1812061369419098, "rewards/margins": 0.11423603445291519, "rewards/rejected": -0.2954421937465668, "step": 9490 }, { "epoch": 0.62, "learning_rate": 1.8820921134130912e-06, "logits/chosen": -0.9434563517570496, "logits/rejected": -0.5874304175376892, "logps/chosen": -430.4812927246094, "logps/rejected": -517.7457275390625, "loss": 0.6864, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.19996920228004456, "rewards/margins": 0.13217367231845856, "rewards/rejected": -0.33214282989501953, "step": 9500 }, { "epoch": 0.62, "eval_logits/chosen": -0.7308316826820374, "eval_logits/rejected": -0.6198488473892212, "eval_logps/chosen": -440.1344909667969, "eval_logps/rejected": -503.9653625488281, "eval_loss": 0.6894819140434265, "eval_rewards/accuracies": 0.6669999957084656, "eval_rewards/chosen": -0.20812954008579254, "eval_rewards/margins": 0.08422394841909409, "eval_rewards/rejected": -0.2923535108566284, "eval_runtime": 713.9575, "eval_samples_per_second": 2.801, "eval_steps_per_second": 1.401, "step": 9500 }, { "epoch": 0.62, "learning_rate": 1.8765609759442378e-06, "logits/chosen": -0.2680138647556305, "logits/rejected": -0.4130997657775879, "logps/chosen": -458.49627685546875, "logps/rejected": -515.45458984375, "loss": 0.6896, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2171749621629715, "rewards/margins": 0.06777051836252213, "rewards/rejected": -0.2849455177783966, "step": 9510 }, { "epoch": 0.62, "learning_rate": 1.8710330906387288e-06, "logits/chosen": -1.2024356126785278, "logits/rejected": -1.1673656702041626, "logps/chosen": -469.227294921875, "logps/rejected": -582.1397705078125, "loss": 0.6886, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.23073968291282654, "rewards/margins": 0.0893225371837616, "rewards/rejected": -0.32006222009658813, "step": 9520 }, { "epoch": 0.62, "learning_rate": 1.8655084863327222e-06, "logits/chosen": -0.6578270196914673, "logits/rejected": -0.5960260629653931, "logps/chosen": -350.1307678222656, "logps/rejected": -429.6395568847656, "loss": 0.6914, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.16667279601097107, "rewards/margins": 0.07334667444229126, "rewards/rejected": -0.24001947045326233, "step": 9530 }, { "epoch": 0.62, "learning_rate": 1.8599871918452603e-06, "logits/chosen": -0.5374518036842346, "logits/rejected": -0.6281536817550659, "logps/chosen": -431.32977294921875, "logps/rejected": -529.6610107421875, "loss": 0.6904, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.20823796093463898, "rewards/margins": 0.08508185297250748, "rewards/rejected": -0.29331979155540466, "step": 9540 }, { "epoch": 0.62, "learning_rate": 1.8544692359781192e-06, "logits/chosen": -0.5340497493743896, "logits/rejected": -0.5739427804946899, "logps/chosen": -366.29071044921875, "logps/rejected": -406.6949157714844, "loss": 0.691, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.1787060797214508, "rewards/margins": 0.07135146111249924, "rewards/rejected": -0.250057578086853, "step": 9550 }, { "epoch": 0.63, "learning_rate": 1.8489546475156602e-06, "logits/chosen": -0.9955617189407349, "logits/rejected": -1.0135492086410522, "logps/chosen": -411.125, "logps/rejected": -467.4065856933594, "loss": 0.6909, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.1916321963071823, "rewards/margins": 0.0744495689868927, "rewards/rejected": -0.2660817801952362, "step": 9560 }, { "epoch": 0.63, "learning_rate": 1.8434434552246778e-06, "logits/chosen": -0.7039095163345337, "logits/rejected": -0.6999632120132446, "logps/chosen": -402.59906005859375, "logps/rejected": -470.253173828125, "loss": 0.6899, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.19296754896640778, "rewards/margins": 0.07891705632209778, "rewards/rejected": -0.27188462018966675, "step": 9570 }, { "epoch": 0.63, "learning_rate": 1.837935687854251e-06, "logits/chosen": -0.8548401594161987, "logits/rejected": -0.6474151015281677, "logps/chosen": -410.0220642089844, "logps/rejected": -464.441162109375, "loss": 0.6881, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.1884753704071045, "rewards/margins": 0.08621923625469208, "rewards/rejected": -0.27469462156295776, "step": 9580 }, { "epoch": 0.63, "learning_rate": 1.832431374135592e-06, "logits/chosen": -0.8793581128120422, "logits/rejected": -0.9984419941902161, "logps/chosen": -440.98486328125, "logps/rejected": -550.4117431640625, "loss": 0.6869, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.19254949688911438, "rewards/margins": 0.12829996645450592, "rewards/rejected": -0.3208494484424591, "step": 9590 }, { "epoch": 0.63, "learning_rate": 1.8269305427818977e-06, "logits/chosen": -0.9290812611579895, "logits/rejected": -0.8652921915054321, "logps/chosen": -393.28179931640625, "logps/rejected": -435.561279296875, "loss": 0.6896, "rewards/accuracies": 0.625, "rewards/chosen": -0.18466556072235107, "rewards/margins": 0.06994330883026123, "rewards/rejected": -0.2546088695526123, "step": 9600 }, { "epoch": 0.63, "eval_logits/chosen": -0.7276079058647156, "eval_logits/rejected": -0.6168313026428223, "eval_logps/chosen": -417.38580322265625, "eval_logps/rejected": -483.2872619628906, "eval_loss": 0.6895156502723694, "eval_rewards/accuracies": 0.6664999723434448, "eval_rewards/chosen": -0.18538087606430054, "eval_rewards/margins": 0.08629447966814041, "eval_rewards/rejected": -0.27167531847953796, "eval_runtime": 712.7219, "eval_samples_per_second": 2.806, "eval_steps_per_second": 1.403, "step": 9600 }, { "epoch": 0.63, "learning_rate": 1.821433222488199e-06, "logits/chosen": -0.357731431722641, "logits/rejected": -0.5084939002990723, "logps/chosen": -405.1507263183594, "logps/rejected": -457.59283447265625, "loss": 0.689, "rewards/accuracies": 0.625, "rewards/chosen": -0.17953599989414215, "rewards/margins": 0.08018968254327774, "rewards/rejected": -0.2597256600856781, "step": 9610 }, { "epoch": 0.63, "learning_rate": 1.8159394419312112e-06, "logits/chosen": -0.9117706418037415, "logits/rejected": -0.628116250038147, "logps/chosen": -446.15411376953125, "logps/rejected": -529.2586059570312, "loss": 0.6874, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.18742407858371735, "rewards/margins": 0.1296432465314865, "rewards/rejected": -0.31706729531288147, "step": 9620 }, { "epoch": 0.63, "learning_rate": 1.8104492297691845e-06, "logits/chosen": -0.8102725744247437, "logits/rejected": -0.6468926668167114, "logps/chosen": -486.02703857421875, "logps/rejected": -541.3604125976562, "loss": 0.6913, "rewards/accuracies": 0.75, "rewards/chosen": -0.25950390100479126, "rewards/margins": 0.07944594323635101, "rewards/rejected": -0.33894985914230347, "step": 9630 }, { "epoch": 0.63, "learning_rate": 1.8049626146417562e-06, "logits/chosen": 0.026175355538725853, "logits/rejected": -0.2544723153114319, "logps/chosen": -342.4808349609375, "logps/rejected": -426.19879150390625, "loss": 0.6904, "rewards/accuracies": 0.625, "rewards/chosen": -0.18740496039390564, "rewards/margins": 0.08743083477020264, "rewards/rejected": -0.2748357951641083, "step": 9640 }, { "epoch": 0.63, "learning_rate": 1.7994796251697983e-06, "logits/chosen": -0.44325417280197144, "logits/rejected": -0.2331937998533249, "logps/chosen": -398.76373291015625, "logps/rejected": -543.6065063476562, "loss": 0.6888, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2067248374223709, "rewards/margins": 0.10527799278497696, "rewards/rejected": -0.31200283765792847, "step": 9650 }, { "epoch": 0.63, "learning_rate": 1.794000289955269e-06, "logits/chosen": -0.5990532636642456, "logits/rejected": -0.7400835752487183, "logps/chosen": -462.24212646484375, "logps/rejected": -519.3464965820312, "loss": 0.6884, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.1951819211244583, "rewards/margins": 0.08855116367340088, "rewards/rejected": -0.283733069896698, "step": 9660 }, { "epoch": 0.63, "learning_rate": 1.7885246375810646e-06, "logits/chosen": -0.23755809664726257, "logits/rejected": -0.2857111394405365, "logps/chosen": -383.4107360839844, "logps/rejected": -439.93896484375, "loss": 0.6909, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.15913687646389008, "rewards/margins": 0.060290198773145676, "rewards/rejected": -0.21942707896232605, "step": 9670 }, { "epoch": 0.63, "learning_rate": 1.7830526966108713e-06, "logits/chosen": -0.5294663310050964, "logits/rejected": -0.3975537419319153, "logps/chosen": -385.46209716796875, "logps/rejected": -486.99285888671875, "loss": 0.6846, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.19898918271064758, "rewards/margins": 0.13014227151870728, "rewards/rejected": -0.32913145422935486, "step": 9680 }, { "epoch": 0.63, "learning_rate": 1.7775844955890129e-06, "logits/chosen": -0.42855939269065857, "logits/rejected": -0.328756183385849, "logps/chosen": -384.8292541503906, "logps/rejected": -471.77593994140625, "loss": 0.6881, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1732829064130783, "rewards/margins": 0.09852338582277298, "rewards/rejected": -0.2718062996864319, "step": 9690 }, { "epoch": 0.63, "learning_rate": 1.7721200630403046e-06, "logits/chosen": -0.3515569567680359, "logits/rejected": -0.3742366433143616, "logps/chosen": -369.99420166015625, "logps/rejected": -472.16693115234375, "loss": 0.6884, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.1742318570613861, "rewards/margins": 0.07662680745124817, "rewards/rejected": -0.2508586645126343, "step": 9700 }, { "epoch": 0.63, "eval_logits/chosen": -0.6133941411972046, "eval_logits/rejected": -0.5091242790222168, "eval_logps/chosen": -434.1581726074219, "eval_logps/rejected": -500.04058837890625, "eval_loss": 0.689453125, "eval_rewards/accuracies": 0.6664999723434448, "eval_rewards/chosen": -0.20215323567390442, "eval_rewards/margins": 0.08627549558877945, "eval_rewards/rejected": -0.28842872381210327, "eval_runtime": 712.5855, "eval_samples_per_second": 2.807, "eval_steps_per_second": 1.403, "step": 9700 }, { "epoch": 0.64, "learning_rate": 1.7666594274699037e-06, "logits/chosen": -0.5265650749206543, "logits/rejected": -0.5063202381134033, "logps/chosen": -477.45159912109375, "logps/rejected": -557.30419921875, "loss": 0.6884, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.22476127743721008, "rewards/margins": 0.11805678904056549, "rewards/rejected": -0.3428180515766144, "step": 9710 }, { "epoch": 0.64, "learning_rate": 1.76120261736316e-06, "logits/chosen": -0.47983318567276, "logits/rejected": -0.20942839980125427, "logps/chosen": -440.2186584472656, "logps/rejected": -536.6563110351562, "loss": 0.6865, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.21350359916687012, "rewards/margins": 0.12358088791370392, "rewards/rejected": -0.33708450198173523, "step": 9720 }, { "epoch": 0.64, "learning_rate": 1.755749661185468e-06, "logits/chosen": -0.7283905148506165, "logits/rejected": -0.5965417623519897, "logps/chosen": -487.33905029296875, "logps/rejected": -531.7615966796875, "loss": 0.6896, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.19288946688175201, "rewards/margins": 0.10325628519058228, "rewards/rejected": -0.2961457371711731, "step": 9730 }, { "epoch": 0.64, "learning_rate": 1.7503005873821183e-06, "logits/chosen": -0.529441237449646, "logits/rejected": -0.7391183376312256, "logps/chosen": -351.6926574707031, "logps/rejected": -468.90545654296875, "loss": 0.6895, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.191264346241951, "rewards/margins": 0.09094887971878052, "rewards/rejected": -0.2822132408618927, "step": 9740 }, { "epoch": 0.64, "learning_rate": 1.744855424378148e-06, "logits/chosen": -0.3442351818084717, "logits/rejected": -0.7468951940536499, "logps/chosen": -372.4602355957031, "logps/rejected": -495.74658203125, "loss": 0.6909, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.1838332712650299, "rewards/margins": 0.09984080493450165, "rewards/rejected": -0.28367406129837036, "step": 9750 }, { "epoch": 0.64, "learning_rate": 1.7394142005781973e-06, "logits/chosen": -0.7553730607032776, "logits/rejected": -0.6287229061126709, "logps/chosen": -458.0218200683594, "logps/rejected": -533.7131958007812, "loss": 0.6928, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.19631078839302063, "rewards/margins": 0.07212035357952118, "rewards/rejected": -0.2684311270713806, "step": 9760 }, { "epoch": 0.64, "learning_rate": 1.7339769443663528e-06, "logits/chosen": -0.6719237565994263, "logits/rejected": -0.6959569454193115, "logps/chosen": -333.442626953125, "logps/rejected": -423.80023193359375, "loss": 0.6874, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1905004382133484, "rewards/margins": 0.089654341340065, "rewards/rejected": -0.2801547646522522, "step": 9770 }, { "epoch": 0.64, "learning_rate": 1.7285436841060078e-06, "logits/chosen": -0.7846375703811646, "logits/rejected": -0.6807979345321655, "logps/chosen": -460.4491271972656, "logps/rejected": -497.080322265625, "loss": 0.6898, "rewards/accuracies": 0.75, "rewards/chosen": -0.18432073295116425, "rewards/margins": 0.07958535850048065, "rewards/rejected": -0.2639060914516449, "step": 9780 }, { "epoch": 0.64, "learning_rate": 1.7231144481397083e-06, "logits/chosen": -0.8831745982170105, "logits/rejected": -0.7654666900634766, "logps/chosen": -393.7134094238281, "logps/rejected": -424.652587890625, "loss": 0.6905, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.17331509292125702, "rewards/margins": 0.05637786537408829, "rewards/rejected": -0.22969293594360352, "step": 9790 }, { "epoch": 0.64, "learning_rate": 1.7176892647890092e-06, "logits/chosen": -0.7050382494926453, "logits/rejected": -0.3511679768562317, "logps/chosen": -425.826416015625, "logps/rejected": -440.35748291015625, "loss": 0.6913, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.19016043841838837, "rewards/margins": 0.05341249704360962, "rewards/rejected": -0.2435729205608368, "step": 9800 }, { "epoch": 0.64, "eval_logits/chosen": -0.6631197929382324, "eval_logits/rejected": -0.5572806000709534, "eval_logps/chosen": -428.2689514160156, "eval_logps/rejected": -488.5942077636719, "eval_loss": 0.6894755363464355, "eval_rewards/accuracies": 0.6675000190734863, "eval_rewards/chosen": -0.19626396894454956, "eval_rewards/margins": 0.08071837574243546, "eval_rewards/rejected": -0.2769823670387268, "eval_runtime": 709.5855, "eval_samples_per_second": 2.819, "eval_steps_per_second": 1.409, "step": 9800 }, { "epoch": 0.64, "learning_rate": 1.7122681623543239e-06, "logits/chosen": -0.8072730302810669, "logits/rejected": -0.9070757031440735, "logps/chosen": -437.646240234375, "logps/rejected": -526.9542846679688, "loss": 0.6896, "rewards/accuracies": 0.75, "rewards/chosen": -0.19065766036510468, "rewards/margins": 0.10052184760570526, "rewards/rejected": -0.29117950797080994, "step": 9810 }, { "epoch": 0.64, "learning_rate": 1.7068511691147788e-06, "logits/chosen": -0.5625017881393433, "logits/rejected": -0.4836824834346771, "logps/chosen": -361.634521484375, "logps/rejected": -447.08758544921875, "loss": 0.6911, "rewards/accuracies": 0.625, "rewards/chosen": -0.1675034761428833, "rewards/margins": 0.07708346098661423, "rewards/rejected": -0.24458694458007812, "step": 9820 }, { "epoch": 0.64, "learning_rate": 1.7014383133280636e-06, "logits/chosen": -0.7409011125564575, "logits/rejected": -0.38703542947769165, "logps/chosen": -476.31243896484375, "logps/rejected": -494.09637451171875, "loss": 0.6899, "rewards/accuracies": 0.625, "rewards/chosen": -0.22767803072929382, "rewards/margins": 0.07117791473865509, "rewards/rejected": -0.2988559305667877, "step": 9830 }, { "epoch": 0.64, "learning_rate": 1.696029623230286e-06, "logits/chosen": -0.6452018022537231, "logits/rejected": -0.7865114212036133, "logps/chosen": -469.92877197265625, "logps/rejected": -596.3943481445312, "loss": 0.689, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2140064686536789, "rewards/margins": 0.10768643766641617, "rewards/rejected": -0.32169288396835327, "step": 9840 }, { "epoch": 0.64, "learning_rate": 1.6906251270358229e-06, "logits/chosen": -0.7774800062179565, "logits/rejected": -0.6933233141899109, "logps/chosen": -474.37335205078125, "logps/rejected": -498.35589599609375, "loss": 0.6903, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2115190029144287, "rewards/margins": 0.0737244039773941, "rewards/rejected": -0.2852434515953064, "step": 9850 }, { "epoch": 0.65, "learning_rate": 1.685224852937174e-06, "logits/chosen": -0.5178209543228149, "logits/rejected": -0.14217159152030945, "logps/chosen": -408.82708740234375, "logps/rejected": -625.0120849609375, "loss": 0.6826, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.2104623019695282, "rewards/margins": 0.174326092004776, "rewards/rejected": -0.3847884237766266, "step": 9860 }, { "epoch": 0.65, "learning_rate": 1.6798288291048136e-06, "logits/chosen": -0.42471179366111755, "logits/rejected": -0.3782634139060974, "logps/chosen": -467.9342346191406, "logps/rejected": -558.4611206054688, "loss": 0.687, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.24469268321990967, "rewards/margins": 0.11923189461231232, "rewards/rejected": -0.3639245331287384, "step": 9870 }, { "epoch": 0.65, "learning_rate": 1.6744370836870466e-06, "logits/chosen": -1.2039653062820435, "logits/rejected": -0.723548948764801, "logps/chosen": -551.3778076171875, "logps/rejected": -583.1861572265625, "loss": 0.6865, "rewards/accuracies": 0.625, "rewards/chosen": -0.21668429672718048, "rewards/margins": 0.11532609164714813, "rewards/rejected": -0.3320103585720062, "step": 9880 }, { "epoch": 0.65, "learning_rate": 1.6690496448098576e-06, "logits/chosen": -0.4742654860019684, "logits/rejected": -0.34280937910079956, "logps/chosen": -430.12713623046875, "logps/rejected": -484.00201416015625, "loss": 0.6895, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.2010909616947174, "rewards/margins": 0.0747339203953743, "rewards/rejected": -0.2758248746395111, "step": 9890 }, { "epoch": 0.65, "learning_rate": 1.6636665405767666e-06, "logits/chosen": -0.3358609974384308, "logits/rejected": -0.2778807282447815, "logps/chosen": -421.57672119140625, "logps/rejected": -476.6004333496094, "loss": 0.6887, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.18380577862262726, "rewards/margins": 0.07391373813152313, "rewards/rejected": -0.2577195465564728, "step": 9900 }, { "epoch": 0.65, "eval_logits/chosen": -0.5865435004234314, "eval_logits/rejected": -0.4826829731464386, "eval_logps/chosen": -436.94842529296875, "eval_logps/rejected": -508.5710754394531, "eval_loss": 0.689460039138794, "eval_rewards/accuracies": 0.6700000166893005, "eval_rewards/chosen": -0.20494350790977478, "eval_rewards/margins": 0.09201564639806747, "eval_rewards/rejected": -0.29695916175842285, "eval_runtime": 711.2802, "eval_samples_per_second": 2.812, "eval_steps_per_second": 1.406, "step": 9900 }, { "epoch": 0.65, "learning_rate": 1.6582877990686827e-06, "logits/chosen": -0.517175555229187, "logits/rejected": -0.7299954891204834, "logps/chosen": -289.72222900390625, "logps/rejected": -421.864990234375, "loss": 0.689, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.16842789947986603, "rewards/margins": 0.1098114401102066, "rewards/rejected": -0.27823930978775024, "step": 9910 }, { "epoch": 0.65, "learning_rate": 1.6529134483437562e-06, "logits/chosen": -0.4364466667175293, "logits/rejected": -0.7086406946182251, "logps/chosen": -417.412841796875, "logps/rejected": -474.3499450683594, "loss": 0.6889, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.21183860301971436, "rewards/margins": 0.09639080613851547, "rewards/rejected": -0.30822938680648804, "step": 9920 }, { "epoch": 0.65, "learning_rate": 1.647543516437233e-06, "logits/chosen": -0.7980550527572632, "logits/rejected": -0.8304376602172852, "logps/chosen": -403.45947265625, "logps/rejected": -505.7483825683594, "loss": 0.6896, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.20442131161689758, "rewards/margins": 0.08160404860973358, "rewards/rejected": -0.28602534532546997, "step": 9930 }, { "epoch": 0.65, "learning_rate": 1.6421780313613088e-06, "logits/chosen": -0.5756974220275879, "logits/rejected": -0.19046545028686523, "logps/chosen": -413.3451232910156, "logps/rejected": -482.7889099121094, "loss": 0.6912, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.20860353112220764, "rewards/margins": 0.10263363271951675, "rewards/rejected": -0.3112371265888214, "step": 9940 }, { "epoch": 0.65, "learning_rate": 1.6368170211049816e-06, "logits/chosen": -0.2424355298280716, "logits/rejected": -0.3142424523830414, "logps/chosen": -515.1586303710938, "logps/rejected": -553.0162353515625, "loss": 0.6886, "rewards/accuracies": 0.625, "rewards/chosen": -0.23135843873023987, "rewards/margins": 0.09796912968158722, "rewards/rejected": -0.3293275833129883, "step": 9950 }, { "epoch": 0.65, "learning_rate": 1.6314605136339074e-06, "logits/chosen": -0.6656386256217957, "logits/rejected": -0.4989562928676605, "logps/chosen": -398.7882995605469, "logps/rejected": -458.17120361328125, "loss": 0.6909, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.20320162177085876, "rewards/margins": 0.07728231698274612, "rewards/rejected": -0.2804839611053467, "step": 9960 }, { "epoch": 0.65, "learning_rate": 1.6261085368902526e-06, "logits/chosen": -1.0514520406723022, "logits/rejected": -0.9138363599777222, "logps/chosen": -455.0506286621094, "logps/rejected": -479.02337646484375, "loss": 0.6885, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.1901097595691681, "rewards/margins": 0.0676022320985794, "rewards/rejected": -0.2577120065689087, "step": 9970 }, { "epoch": 0.65, "learning_rate": 1.6207611187925503e-06, "logits/chosen": -0.7324178814888, "logits/rejected": -0.6601067185401917, "logps/chosen": -412.18756103515625, "logps/rejected": -551.6229858398438, "loss": 0.6865, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2033606320619583, "rewards/margins": 0.09311771392822266, "rewards/rejected": -0.2964783310890198, "step": 9980 }, { "epoch": 0.65, "learning_rate": 1.6154182872355512e-06, "logits/chosen": -0.40643399953842163, "logits/rejected": -0.5821970105171204, "logps/chosen": -396.7106628417969, "logps/rejected": -487.38092041015625, "loss": 0.6909, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.2328932285308838, "rewards/margins": 0.0762505978345871, "rewards/rejected": -0.3091438412666321, "step": 9990 }, { "epoch": 0.65, "learning_rate": 1.610080070090084e-06, "logits/chosen": -0.5443228483200073, "logits/rejected": -0.3826178312301636, "logps/chosen": -480.44256591796875, "logps/rejected": -582.5927124023438, "loss": 0.6886, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.299521267414093, "rewards/margins": 0.11117850244045258, "rewards/rejected": -0.4106997549533844, "step": 10000 }, { "epoch": 0.65, "eval_logits/chosen": -0.4209235906600952, "eval_logits/rejected": -0.3262253701686859, "eval_logps/chosen": -493.7507629394531, "eval_logps/rejected": -570.7291870117188, "eval_loss": 0.6895338296890259, "eval_rewards/accuracies": 0.6685000061988831, "eval_rewards/chosen": -0.2617458403110504, "eval_rewards/margins": 0.09737147390842438, "eval_rewards/rejected": -0.3591172993183136, "eval_runtime": 709.5084, "eval_samples_per_second": 2.819, "eval_steps_per_second": 1.409, "step": 10000 }, { "epoch": 0.65, "learning_rate": 1.6047464952029034e-06, "logits/chosen": -0.7706862092018127, "logits/rejected": -0.7439472675323486, "logps/chosen": -504.51043701171875, "logps/rejected": -627.5145263671875, "loss": 0.6887, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.2502230405807495, "rewards/margins": 0.1147780567407608, "rewards/rejected": -0.3650010824203491, "step": 10010 }, { "epoch": 0.66, "learning_rate": 1.5994175903965486e-06, "logits/chosen": -0.2823607325553894, "logits/rejected": -0.013178685680031776, "logps/chosen": -538.0333251953125, "logps/rejected": -646.7186889648438, "loss": 0.6901, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2877029478549957, "rewards/margins": 0.10693083703517914, "rewards/rejected": -0.39463382959365845, "step": 10020 }, { "epoch": 0.66, "learning_rate": 1.5940933834691977e-06, "logits/chosen": -0.7852429747581482, "logits/rejected": -0.46865981817245483, "logps/chosen": -570.1761474609375, "logps/rejected": -542.385498046875, "loss": 0.6903, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2649102807044983, "rewards/margins": 0.08010663837194443, "rewards/rejected": -0.3450169265270233, "step": 10030 }, { "epoch": 0.66, "learning_rate": 1.588773902194522e-06, "logits/chosen": -0.39314407110214233, "logits/rejected": -0.09499244391918182, "logps/chosen": -512.2601318359375, "logps/rejected": -632.1806030273438, "loss": 0.6864, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3081805109977722, "rewards/margins": 0.12416081130504608, "rewards/rejected": -0.4323412775993347, "step": 10040 }, { "epoch": 0.66, "learning_rate": 1.583459174321541e-06, "logits/chosen": -0.08470626175403595, "logits/rejected": -0.16978123784065247, "logps/chosen": -516.5139770507812, "logps/rejected": -590.5281372070312, "loss": 0.6894, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3068966865539551, "rewards/margins": 0.10110817104578018, "rewards/rejected": -0.40800485014915466, "step": 10050 }, { "epoch": 0.66, "learning_rate": 1.5781492275744797e-06, "logits/chosen": -0.8645523190498352, "logits/rejected": -0.7959304451942444, "logps/chosen": -579.1370849609375, "logps/rejected": -673.6524658203125, "loss": 0.6913, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.28489264845848083, "rewards/margins": 0.12073332071304321, "rewards/rejected": -0.40562596917152405, "step": 10060 }, { "epoch": 0.66, "learning_rate": 1.5728440896526215e-06, "logits/chosen": -0.2425081729888916, "logits/rejected": -0.21429088711738586, "logps/chosen": -523.8734741210938, "logps/rejected": -572.2105712890625, "loss": 0.6891, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2504715323448181, "rewards/margins": 0.09453996270895004, "rewards/rejected": -0.34501153230667114, "step": 10070 }, { "epoch": 0.66, "learning_rate": 1.5675437882301633e-06, "logits/chosen": -0.5220723152160645, "logits/rejected": -0.5302685499191284, "logps/chosen": -468.63037109375, "logps/rejected": -466.52032470703125, "loss": 0.6921, "rewards/accuracies": 0.625, "rewards/chosen": -0.24958617985248566, "rewards/margins": 0.032685764133930206, "rewards/rejected": -0.2822719216346741, "step": 10080 }, { "epoch": 0.66, "learning_rate": 1.5622483509560748e-06, "logits/chosen": -0.31761685013771057, "logits/rejected": -0.4478934407234192, "logps/chosen": -406.1260681152344, "logps/rejected": -529.2910766601562, "loss": 0.6905, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.23539690673351288, "rewards/margins": 0.0929826870560646, "rewards/rejected": -0.32837963104248047, "step": 10090 }, { "epoch": 0.66, "learning_rate": 1.5569578054539506e-06, "logits/chosen": -0.6085635423660278, "logits/rejected": -0.3032626509666443, "logps/chosen": -530.7781982421875, "logps/rejected": -589.9613037109375, "loss": 0.6858, "rewards/accuracies": 0.875, "rewards/chosen": -0.2515811324119568, "rewards/margins": 0.14304611086845398, "rewards/rejected": -0.3946272134780884, "step": 10100 }, { "epoch": 0.66, "eval_logits/chosen": -0.5254442691802979, "eval_logits/rejected": -0.42709270119667053, "eval_logps/chosen": -472.549072265625, "eval_logps/rejected": -537.1530151367188, "eval_loss": 0.6895158886909485, "eval_rewards/accuracies": 0.6690000295639038, "eval_rewards/chosen": -0.2405441403388977, "eval_rewards/margins": 0.08499700576066971, "eval_rewards/rejected": -0.3255411386489868, "eval_runtime": 712.2687, "eval_samples_per_second": 2.808, "eval_steps_per_second": 1.404, "step": 10100 }, { "epoch": 0.66, "learning_rate": 1.551672179321867e-06, "logits/chosen": -0.5627814531326294, "logits/rejected": -0.5385065078735352, "logps/chosen": -443.6526794433594, "logps/rejected": -504.775390625, "loss": 0.6902, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.22779580950737, "rewards/margins": 0.0822938084602356, "rewards/rejected": -0.310089647769928, "step": 10110 }, { "epoch": 0.66, "learning_rate": 1.5463915001322398e-06, "logits/chosen": -0.5434405207633972, "logits/rejected": -0.3636297881603241, "logps/chosen": -501.84539794921875, "logps/rejected": -582.4625244140625, "loss": 0.687, "rewards/accuracies": 0.625, "rewards/chosen": -0.2561890482902527, "rewards/margins": 0.09498479962348938, "rewards/rejected": -0.35117384791374207, "step": 10120 }, { "epoch": 0.66, "learning_rate": 1.5411157954316784e-06, "logits/chosen": -0.8339093327522278, "logits/rejected": -0.37349334359169006, "logps/chosen": -435.55792236328125, "logps/rejected": -486.6434020996094, "loss": 0.6908, "rewards/accuracies": 0.625, "rewards/chosen": -0.23368044197559357, "rewards/margins": 0.06477528065443039, "rewards/rejected": -0.29845571517944336, "step": 10130 }, { "epoch": 0.66, "learning_rate": 1.535845092740843e-06, "logits/chosen": -0.5845457315444946, "logits/rejected": -0.6137182116508484, "logps/chosen": -437.1864318847656, "logps/rejected": -508.76416015625, "loss": 0.6902, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.19986525177955627, "rewards/margins": 0.05918189138174057, "rewards/rejected": -0.25904718041419983, "step": 10140 }, { "epoch": 0.66, "learning_rate": 1.5305794195543005e-06, "logits/chosen": -0.8156601190567017, "logits/rejected": -0.7871606945991516, "logps/chosen": -426.68145751953125, "logps/rejected": -507.53546142578125, "loss": 0.688, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2195023000240326, "rewards/margins": 0.09778545051813126, "rewards/rejected": -0.31728774309158325, "step": 10150 }, { "epoch": 0.66, "learning_rate": 1.5253188033403816e-06, "logits/chosen": -0.9039441347122192, "logits/rejected": -0.7695221304893494, "logps/chosen": -358.87200927734375, "logps/rejected": -414.21600341796875, "loss": 0.6913, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.18813714385032654, "rewards/margins": 0.03868565335869789, "rewards/rejected": -0.22682280838489532, "step": 10160 }, { "epoch": 0.67, "learning_rate": 1.520063271541037e-06, "logits/chosen": -0.7685378193855286, "logits/rejected": -0.6929147839546204, "logps/chosen": -407.37103271484375, "logps/rejected": -530.870361328125, "loss": 0.6845, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.22886629402637482, "rewards/margins": 0.13480234146118164, "rewards/rejected": -0.36366862058639526, "step": 10170 }, { "epoch": 0.67, "learning_rate": 1.5148128515716954e-06, "logits/chosen": -0.9215852618217468, "logits/rejected": -0.5616071820259094, "logps/chosen": -481.96209716796875, "logps/rejected": -521.5393676757812, "loss": 0.6867, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.21543291211128235, "rewards/margins": 0.11195148527622223, "rewards/rejected": -0.32738441228866577, "step": 10180 }, { "epoch": 0.67, "learning_rate": 1.5095675708211197e-06, "logits/chosen": -0.8018338084220886, "logits/rejected": -0.7028027772903442, "logps/chosen": -449.2286682128906, "logps/rejected": -511.21270751953125, "loss": 0.6906, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.25806769728660583, "rewards/margins": 0.038640473037958145, "rewards/rejected": -0.29670819640159607, "step": 10190 }, { "epoch": 0.67, "learning_rate": 1.504327456651263e-06, "logits/chosen": -0.5294234156608582, "logits/rejected": -0.4144687056541443, "logps/chosen": -522.6754150390625, "logps/rejected": -584.6783447265625, "loss": 0.6902, "rewards/accuracies": 0.75, "rewards/chosen": -0.2612723708152771, "rewards/margins": 0.08896765112876892, "rewards/rejected": -0.3502400517463684, "step": 10200 }, { "epoch": 0.67, "eval_logits/chosen": -0.6712846159934998, "eval_logits/rejected": -0.5611080527305603, "eval_logps/chosen": -470.42236328125, "eval_logps/rejected": -546.7171630859375, "eval_loss": 0.6895034313201904, "eval_rewards/accuracies": 0.6639999747276306, "eval_rewards/chosen": -0.23841746151447296, "eval_rewards/margins": 0.09668787568807602, "eval_rewards/rejected": -0.33510535955429077, "eval_runtime": 714.6345, "eval_samples_per_second": 2.799, "eval_steps_per_second": 1.399, "step": 10200 }, { "epoch": 0.67, "learning_rate": 1.4990925363971284e-06, "logits/chosen": -0.7982445955276489, "logits/rejected": -0.2872164845466614, "logps/chosen": -553.0101318359375, "logps/rejected": -664.3682250976562, "loss": 0.6874, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.2630870044231415, "rewards/margins": 0.17984716594219208, "rewards/rejected": -0.44293412566185, "step": 10210 }, { "epoch": 0.67, "learning_rate": 1.4938628373666236e-06, "logits/chosen": -0.6805712580680847, "logits/rejected": -0.5815011262893677, "logps/chosen": -409.97039794921875, "logps/rejected": -481.96588134765625, "loss": 0.6915, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.2373911440372467, "rewards/margins": 0.07012283056974411, "rewards/rejected": -0.3075140118598938, "step": 10220 }, { "epoch": 0.67, "learning_rate": 1.4886383868404203e-06, "logits/chosen": -0.47447291016578674, "logits/rejected": -0.5905685424804688, "logps/chosen": -360.18585205078125, "logps/rejected": -445.53790283203125, "loss": 0.6882, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.19896404445171356, "rewards/margins": 0.09538199752569199, "rewards/rejected": -0.29434603452682495, "step": 10230 }, { "epoch": 0.67, "learning_rate": 1.483419212071813e-06, "logits/chosen": -0.3768971562385559, "logits/rejected": -0.14265303313732147, "logps/chosen": -408.9107971191406, "logps/rejected": -469.2865295410156, "loss": 0.6894, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2204466611146927, "rewards/margins": 0.06755717098712921, "rewards/rejected": -0.2880038321018219, "step": 10240 }, { "epoch": 0.67, "learning_rate": 1.478205340286573e-06, "logits/chosen": -0.629867434501648, "logits/rejected": -0.7656704187393188, "logps/chosen": -462.8060607910156, "logps/rejected": -520.7987060546875, "loss": 0.6897, "rewards/accuracies": 0.5, "rewards/chosen": -0.2591971755027771, "rewards/margins": 0.0759974792599678, "rewards/rejected": -0.3351946771144867, "step": 10250 }, { "epoch": 0.67, "learning_rate": 1.4729967986828104e-06, "logits/chosen": -0.6806701421737671, "logits/rejected": -0.6869579553604126, "logps/chosen": -535.6580200195312, "logps/rejected": -573.3906860351562, "loss": 0.6902, "rewards/accuracies": 0.75, "rewards/chosen": -0.21367435157299042, "rewards/margins": 0.0857006385922432, "rewards/rejected": -0.2993749976158142, "step": 10260 }, { "epoch": 0.67, "learning_rate": 1.4677936144308286e-06, "logits/chosen": -0.8180096745491028, "logits/rejected": -0.5855274796485901, "logps/chosen": -411.0531311035156, "logps/rejected": -505.92669677734375, "loss": 0.6884, "rewards/accuracies": 0.75, "rewards/chosen": -0.18680839240550995, "rewards/margins": 0.12293653190135956, "rewards/rejected": -0.3097449541091919, "step": 10270 }, { "epoch": 0.67, "learning_rate": 1.4625958146729864e-06, "logits/chosen": -0.9897669553756714, "logits/rejected": -0.6518400311470032, "logps/chosen": -429.7906188964844, "logps/rejected": -494.58447265625, "loss": 0.6894, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.20952677726745605, "rewards/margins": 0.08074188232421875, "rewards/rejected": -0.2902686595916748, "step": 10280 }, { "epoch": 0.67, "learning_rate": 1.4574034265235523e-06, "logits/chosen": -0.6724362373352051, "logits/rejected": -0.5424883961677551, "logps/chosen": -464.368408203125, "logps/rejected": -473.7183532714844, "loss": 0.69, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.20853987336158752, "rewards/margins": 0.10711286962032318, "rewards/rejected": -0.3156526982784271, "step": 10290 }, { "epoch": 0.67, "learning_rate": 1.452216477068568e-06, "logits/chosen": -0.5634249448776245, "logits/rejected": -0.3757302165031433, "logps/chosen": -402.8147277832031, "logps/rejected": -414.9501953125, "loss": 0.6877, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1727697104215622, "rewards/margins": 0.10505032539367676, "rewards/rejected": -0.27782002091407776, "step": 10300 }, { "epoch": 0.67, "eval_logits/chosen": -0.8372055292129517, "eval_logits/rejected": -0.7203603386878967, "eval_logps/chosen": -434.0806579589844, "eval_logps/rejected": -497.1747131347656, "eval_loss": 0.6894459128379822, "eval_rewards/accuracies": 0.6635000109672546, "eval_rewards/chosen": -0.2020757496356964, "eval_rewards/margins": 0.08348707854747772, "eval_rewards/rejected": -0.28556281328201294, "eval_runtime": 711.9834, "eval_samples_per_second": 2.809, "eval_steps_per_second": 1.405, "step": 10300 }, { "epoch": 0.67, "learning_rate": 1.4470349933657004e-06, "logits/chosen": -1.407127022743225, "logits/rejected": -0.9657286405563354, "logps/chosen": -401.1145324707031, "logps/rejected": -463.8077087402344, "loss": 0.6893, "rewards/accuracies": 0.625, "rewards/chosen": -0.18038493394851685, "rewards/margins": 0.08667208254337311, "rewards/rejected": -0.26705700159072876, "step": 10310 }, { "epoch": 0.68, "learning_rate": 1.4418590024441096e-06, "logits/chosen": -1.1771811246871948, "logits/rejected": -0.6675149202346802, "logps/chosen": -440.747802734375, "logps/rejected": -459.735595703125, "loss": 0.6885, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.18728753924369812, "rewards/margins": 0.08547311276197433, "rewards/rejected": -0.27276068925857544, "step": 10320 }, { "epoch": 0.68, "learning_rate": 1.436688531304297e-06, "logits/chosen": -1.0007431507110596, "logits/rejected": -0.8121240735054016, "logps/chosen": -394.47149658203125, "logps/rejected": -483.74847412109375, "loss": 0.6889, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.17614403367042542, "rewards/margins": 0.09476612508296967, "rewards/rejected": -0.2709101438522339, "step": 10330 }, { "epoch": 0.68, "learning_rate": 1.431523606917974e-06, "logits/chosen": -0.9082180261611938, "logits/rejected": -0.850312352180481, "logps/chosen": -435.8108825683594, "logps/rejected": -534.7352294921875, "loss": 0.6863, "rewards/accuracies": 0.625, "rewards/chosen": -0.23071154952049255, "rewards/margins": 0.09230966120958328, "rewards/rejected": -0.3230212330818176, "step": 10340 }, { "epoch": 0.68, "learning_rate": 1.4263642562279162e-06, "logits/chosen": -0.6657235622406006, "logits/rejected": -0.5200079083442688, "logps/chosen": -468.4452209472656, "logps/rejected": -580.9529418945312, "loss": 0.6886, "rewards/accuracies": 0.75, "rewards/chosen": -0.21895787119865417, "rewards/margins": 0.10401761531829834, "rewards/rejected": -0.3229754567146301, "step": 10350 }, { "epoch": 0.68, "learning_rate": 1.4212105061478257e-06, "logits/chosen": -0.7509498000144958, "logits/rejected": -0.4829614758491516, "logps/chosen": -484.610107421875, "logps/rejected": -565.8938598632812, "loss": 0.6896, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2547195553779602, "rewards/margins": 0.07346032559871674, "rewards/rejected": -0.32817989587783813, "step": 10360 }, { "epoch": 0.68, "learning_rate": 1.4160623835621848e-06, "logits/chosen": -1.2137842178344727, "logits/rejected": -0.8153377771377563, "logps/chosen": -424.40850830078125, "logps/rejected": -510.79656982421875, "loss": 0.6893, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.18709731101989746, "rewards/margins": 0.09786628186702728, "rewards/rejected": -0.28496360778808594, "step": 10370 }, { "epoch": 0.68, "learning_rate": 1.4109199153261249e-06, "logits/chosen": -0.8705890774726868, "logits/rejected": -0.6633458733558655, "logps/chosen": -490.9913635253906, "logps/rejected": -560.652099609375, "loss": 0.6887, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.21499666571617126, "rewards/margins": 0.09997449815273285, "rewards/rejected": -0.3149711489677429, "step": 10380 }, { "epoch": 0.68, "learning_rate": 1.405783128265278e-06, "logits/chosen": -0.9433485269546509, "logits/rejected": -0.8324483633041382, "logps/chosen": -443.7115173339844, "logps/rejected": -508.8597717285156, "loss": 0.6899, "rewards/accuracies": 0.625, "rewards/chosen": -0.2395780384540558, "rewards/margins": 0.06841441243886948, "rewards/rejected": -0.30799245834350586, "step": 10390 }, { "epoch": 0.68, "learning_rate": 1.4006520491756427e-06, "logits/chosen": -0.784449577331543, "logits/rejected": -0.4519527554512024, "logps/chosen": -396.655029296875, "logps/rejected": -426.204833984375, "loss": 0.6889, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2004307508468628, "rewards/margins": 0.09419076144695282, "rewards/rejected": -0.2946215271949768, "step": 10400 }, { "epoch": 0.68, "eval_logits/chosen": -0.8564895987510681, "eval_logits/rejected": -0.737651526927948, "eval_logps/chosen": -437.34539794921875, "eval_logps/rejected": -502.5918884277344, "eval_loss": 0.6894581913948059, "eval_rewards/accuracies": 0.6610000133514404, "eval_rewards/chosen": -0.2053404450416565, "eval_rewards/margins": 0.08563953638076782, "eval_rewards/rejected": -0.2909799814224243, "eval_runtime": 711.3734, "eval_samples_per_second": 2.811, "eval_steps_per_second": 1.406, "step": 10400 }, { "epoch": 0.68, "learning_rate": 1.39552670482344e-06, "logits/chosen": -0.7949396371841431, "logits/rejected": -0.9270607233047485, "logps/chosen": -369.264892578125, "logps/rejected": -430.3038024902344, "loss": 0.6908, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.1946653127670288, "rewards/margins": 0.06114745885133743, "rewards/rejected": -0.25581276416778564, "step": 10410 }, { "epoch": 0.68, "learning_rate": 1.3904071219449776e-06, "logits/chosen": -0.6988734006881714, "logits/rejected": -0.4601953625679016, "logps/chosen": -389.9330749511719, "logps/rejected": -380.8857727050781, "loss": 0.6893, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.19003143906593323, "rewards/margins": 0.07561281323432922, "rewards/rejected": -0.26564425230026245, "step": 10420 }, { "epoch": 0.68, "learning_rate": 1.3852933272465068e-06, "logits/chosen": -0.8293148279190063, "logits/rejected": -0.7336186170578003, "logps/chosen": -386.4721374511719, "logps/rejected": -419.66241455078125, "loss": 0.6912, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.14861205220222473, "rewards/margins": 0.06841407716274261, "rewards/rejected": -0.21702614426612854, "step": 10430 }, { "epoch": 0.68, "learning_rate": 1.3801853474040873e-06, "logits/chosen": -0.7749053239822388, "logits/rejected": -0.702245831489563, "logps/chosen": -456.7560119628906, "logps/rejected": -536.7758178710938, "loss": 0.6887, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.21823477745056152, "rewards/margins": 0.0964801087975502, "rewards/rejected": -0.31471487879753113, "step": 10440 }, { "epoch": 0.68, "learning_rate": 1.3750832090634417e-06, "logits/chosen": -0.9500045776367188, "logits/rejected": -0.7475318908691406, "logps/chosen": -374.98687744140625, "logps/rejected": -435.96630859375, "loss": 0.6894, "rewards/accuracies": 0.625, "rewards/chosen": -0.1888141632080078, "rewards/margins": 0.07563088089227676, "rewards/rejected": -0.264445036649704, "step": 10450 }, { "epoch": 0.68, "learning_rate": 1.3699869388398245e-06, "logits/chosen": -0.7953190803527832, "logits/rejected": -0.6879931092262268, "logps/chosen": -428.355712890625, "logps/rejected": -493.54345703125, "loss": 0.6899, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.21533437073230743, "rewards/margins": 0.08364120870828629, "rewards/rejected": -0.2989755868911743, "step": 10460 }, { "epoch": 0.69, "learning_rate": 1.3648965633178772e-06, "logits/chosen": -0.9219416379928589, "logits/rejected": -0.820416271686554, "logps/chosen": -412.6971130371094, "logps/rejected": -518.1492919921875, "loss": 0.6909, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.21196496486663818, "rewards/margins": 0.09404056519269943, "rewards/rejected": -0.3060055673122406, "step": 10470 }, { "epoch": 0.69, "learning_rate": 1.3598121090514938e-06, "logits/chosen": -0.6970809698104858, "logits/rejected": -0.8272876739501953, "logps/chosen": -389.41754150390625, "logps/rejected": -445.25836181640625, "loss": 0.6886, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.20640969276428223, "rewards/margins": 0.0799640640616417, "rewards/rejected": -0.28637373447418213, "step": 10480 }, { "epoch": 0.69, "learning_rate": 1.3547336025636753e-06, "logits/chosen": -0.7740924954414368, "logits/rejected": -0.5170813798904419, "logps/chosen": -529.0531616210938, "logps/rejected": -559.5513305664062, "loss": 0.6902, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.25375670194625854, "rewards/margins": 0.07202710956335068, "rewards/rejected": -0.3257838189601898, "step": 10490 }, { "epoch": 0.69, "learning_rate": 1.3496610703464022e-06, "logits/chosen": -0.993310272693634, "logits/rejected": -0.6279395818710327, "logps/chosen": -482.531005859375, "logps/rejected": -524.1070556640625, "loss": 0.6885, "rewards/accuracies": 0.625, "rewards/chosen": -0.2533164620399475, "rewards/margins": 0.0870974212884903, "rewards/rejected": -0.3404138684272766, "step": 10500 }, { "epoch": 0.69, "eval_logits/chosen": -0.812269389629364, "eval_logits/rejected": -0.6946424841880798, "eval_logps/chosen": -479.5160217285156, "eval_logps/rejected": -547.5474853515625, "eval_loss": 0.6894866824150085, "eval_rewards/accuracies": 0.6650000214576721, "eval_rewards/chosen": -0.24751105904579163, "eval_rewards/margins": 0.08842450380325317, "eval_rewards/rejected": -0.3359355330467224, "eval_runtime": 713.5643, "eval_samples_per_second": 2.803, "eval_steps_per_second": 1.401, "step": 10500 }, { "epoch": 0.69, "learning_rate": 1.3445945388604848e-06, "logits/chosen": -0.9369242787361145, "logits/rejected": -0.4151206910610199, "logps/chosen": -525.0983276367188, "logps/rejected": -590.3690185546875, "loss": 0.6899, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.286874920129776, "rewards/margins": 0.1084313616156578, "rewards/rejected": -0.39530622959136963, "step": 10510 }, { "epoch": 0.69, "learning_rate": 1.3395340345354358e-06, "logits/chosen": -0.9469447135925293, "logits/rejected": -1.009319543838501, "logps/chosen": -480.42010498046875, "logps/rejected": -589.4058837890625, "loss": 0.6876, "rewards/accuracies": 0.625, "rewards/chosen": -0.2567223012447357, "rewards/margins": 0.091738261282444, "rewards/rejected": -0.3484605848789215, "step": 10520 }, { "epoch": 0.69, "learning_rate": 1.334479583769322e-06, "logits/chosen": -1.1004682779312134, "logits/rejected": -1.1085875034332275, "logps/chosen": -510.69415283203125, "logps/rejected": -516.8112182617188, "loss": 0.6908, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.25585538148880005, "rewards/margins": 0.056086838245391846, "rewards/rejected": -0.3119421899318695, "step": 10530 }, { "epoch": 0.69, "learning_rate": 1.3294312129286366e-06, "logits/chosen": -0.8170161247253418, "logits/rejected": -0.7216283082962036, "logps/chosen": -492.08306884765625, "logps/rejected": -539.3572998046875, "loss": 0.6905, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.22535443305969238, "rewards/margins": 0.06094512343406677, "rewards/rejected": -0.28629955649375916, "step": 10540 }, { "epoch": 0.69, "learning_rate": 1.324388948348153e-06, "logits/chosen": -1.2214971780776978, "logits/rejected": -0.864739716053009, "logps/chosen": -502.04327392578125, "logps/rejected": -509.402587890625, "loss": 0.6881, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.21680526435375214, "rewards/margins": 0.08872579038143158, "rewards/rejected": -0.3055310845375061, "step": 10550 }, { "epoch": 0.69, "learning_rate": 1.319352816330796e-06, "logits/chosen": -1.2389277219772339, "logits/rejected": -0.8248780369758606, "logps/chosen": -516.2705688476562, "logps/rejected": -500.32025146484375, "loss": 0.6897, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.23307332396507263, "rewards/margins": 0.09050510078668594, "rewards/rejected": -0.323578417301178, "step": 10560 }, { "epoch": 0.69, "learning_rate": 1.314322843147494e-06, "logits/chosen": -0.8264732360839844, "logits/rejected": -0.9180240631103516, "logps/chosen": -447.1441955566406, "logps/rejected": -576.095458984375, "loss": 0.6896, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2768183648586273, "rewards/margins": 0.07408357411623001, "rewards/rejected": -0.3509019613265991, "step": 10570 }, { "epoch": 0.69, "learning_rate": 1.3092990550370526e-06, "logits/chosen": -0.9271947145462036, "logits/rejected": -0.8801366090774536, "logps/chosen": -596.9766235351562, "logps/rejected": -602.197998046875, "loss": 0.689, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.25121819972991943, "rewards/margins": 0.09057263284921646, "rewards/rejected": -0.3417908549308777, "step": 10580 }, { "epoch": 0.69, "learning_rate": 1.3042814782060131e-06, "logits/chosen": -0.531276524066925, "logits/rejected": -0.5136385560035706, "logps/chosen": -380.8265686035156, "logps/rejected": -469.82818603515625, "loss": 0.6881, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.19932372868061066, "rewards/margins": 0.11291786283254623, "rewards/rejected": -0.3122416138648987, "step": 10590 }, { "epoch": 0.69, "learning_rate": 1.2992701388285112e-06, "logits/chosen": -0.6711562871932983, "logits/rejected": -0.576374888420105, "logps/chosen": -481.093017578125, "logps/rejected": -520.4686279296875, "loss": 0.6884, "rewards/accuracies": 0.625, "rewards/chosen": -0.21174398064613342, "rewards/margins": 0.08384759724140167, "rewards/rejected": -0.2955915331840515, "step": 10600 }, { "epoch": 0.69, "eval_logits/chosen": -0.8155879378318787, "eval_logits/rejected": -0.6967446208000183, "eval_logps/chosen": -486.49542236328125, "eval_logps/rejected": -556.6547241210938, "eval_loss": 0.6894893646240234, "eval_rewards/accuracies": 0.6639999747276306, "eval_rewards/chosen": -0.254490464925766, "eval_rewards/margins": 0.0905524417757988, "eval_rewards/rejected": -0.3450429141521454, "eval_runtime": 709.7403, "eval_samples_per_second": 2.818, "eval_steps_per_second": 1.409, "step": 10600 }, { "epoch": 0.69, "learning_rate": 1.29426506304615e-06, "logits/chosen": -0.749521017074585, "logits/rejected": -0.7463639974594116, "logps/chosen": -512.3372802734375, "logps/rejected": -551.0499877929688, "loss": 0.691, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.28961387276649475, "rewards/margins": 0.05819075182080269, "rewards/rejected": -0.34780463576316833, "step": 10610 }, { "epoch": 0.69, "learning_rate": 1.289266276967855e-06, "logits/chosen": -1.1036258935928345, "logits/rejected": -0.867837131023407, "logps/chosen": -578.3683471679688, "logps/rejected": -556.658935546875, "loss": 0.6907, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2397865355014801, "rewards/margins": 0.06844881922006607, "rewards/rejected": -0.3082353472709656, "step": 10620 }, { "epoch": 0.7, "learning_rate": 1.284273806669745e-06, "logits/chosen": -0.8476123809814453, "logits/rejected": -0.8278233408927917, "logps/chosen": -527.3364868164062, "logps/rejected": -636.2213134765625, "loss": 0.6881, "rewards/accuracies": 0.625, "rewards/chosen": -0.2880115211009979, "rewards/margins": 0.09134428203105927, "rewards/rejected": -0.3793558180332184, "step": 10630 }, { "epoch": 0.7, "learning_rate": 1.2792876781949884e-06, "logits/chosen": -0.48908740282058716, "logits/rejected": -0.4103211760520935, "logps/chosen": -421.69970703125, "logps/rejected": -510.91363525390625, "loss": 0.6872, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.21562710404396057, "rewards/margins": 0.10853157937526703, "rewards/rejected": -0.3241586983203888, "step": 10640 }, { "epoch": 0.7, "learning_rate": 1.274307917553676e-06, "logits/chosen": -0.8523713946342468, "logits/rejected": -0.666365385055542, "logps/chosen": -444.986572265625, "logps/rejected": -592.508056640625, "loss": 0.6903, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.24932782351970673, "rewards/margins": 0.12292194366455078, "rewards/rejected": -0.3722497522830963, "step": 10650 }, { "epoch": 0.7, "learning_rate": 1.2693345507226767e-06, "logits/chosen": -0.8844467997550964, "logits/rejected": -0.6996780633926392, "logps/chosen": -486.7682189941406, "logps/rejected": -608.3629150390625, "loss": 0.6868, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.25677794218063354, "rewards/margins": 0.11948125064373016, "rewards/rejected": -0.3762592077255249, "step": 10660 }, { "epoch": 0.7, "learning_rate": 1.2643676036455099e-06, "logits/chosen": -1.2263991832733154, "logits/rejected": -1.0565673112869263, "logps/chosen": -498.09698486328125, "logps/rejected": -500.1378479003906, "loss": 0.692, "rewards/accuracies": 0.625, "rewards/chosen": -0.21024446189403534, "rewards/margins": 0.05278193950653076, "rewards/rejected": -0.2630263864994049, "step": 10670 }, { "epoch": 0.7, "learning_rate": 1.259407102232203e-06, "logits/chosen": -1.1713817119598389, "logits/rejected": -0.7351571917533875, "logps/chosen": -511.2571716308594, "logps/rejected": -547.3772583007812, "loss": 0.6877, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.23362283408641815, "rewards/margins": 0.10906408727169037, "rewards/rejected": -0.3426869511604309, "step": 10680 }, { "epoch": 0.7, "learning_rate": 1.254453072359163e-06, "logits/chosen": -0.7428683638572693, "logits/rejected": -0.7556576132774353, "logps/chosen": -448.65008544921875, "logps/rejected": -503.0829162597656, "loss": 0.689, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2146300971508026, "rewards/margins": 0.07629900425672531, "rewards/rejected": -0.2909291386604309, "step": 10690 }, { "epoch": 0.7, "learning_rate": 1.2495055398690337e-06, "logits/chosen": -1.2361562252044678, "logits/rejected": -1.0086596012115479, "logps/chosen": -436.9013671875, "logps/rejected": -475.19317626953125, "loss": 0.6909, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.2070470154285431, "rewards/margins": 0.0475749596953392, "rewards/rejected": -0.2546219527721405, "step": 10700 }, { "epoch": 0.7, "eval_logits/chosen": -0.8308368921279907, "eval_logits/rejected": -0.7124754786491394, "eval_logps/chosen": -463.0313415527344, "eval_logps/rejected": -527.8247680664062, "eval_loss": 0.6894720196723938, "eval_rewards/accuracies": 0.6629999876022339, "eval_rewards/chosen": -0.23102642595767975, "eval_rewards/margins": 0.0851864367723465, "eval_rewards/rejected": -0.31621286273002625, "eval_runtime": 712.5873, "eval_samples_per_second": 2.807, "eval_steps_per_second": 1.403, "step": 10700 }, { "epoch": 0.7, "learning_rate": 1.2445645305705718e-06, "logits/chosen": -0.9717991948127747, "logits/rejected": -0.9585930109024048, "logps/chosen": -448.2286071777344, "logps/rejected": -500.2596740722656, "loss": 0.6909, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.24190764129161835, "rewards/margins": 0.07267390191555023, "rewards/rejected": -0.31458157300949097, "step": 10710 }, { "epoch": 0.7, "learning_rate": 1.2396300702384995e-06, "logits/chosen": -0.9743059277534485, "logits/rejected": -0.8761451840400696, "logps/chosen": -494.4071350097656, "logps/rejected": -494.4388732910156, "loss": 0.6919, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.23659905791282654, "rewards/margins": 0.03769702836871147, "rewards/rejected": -0.2742961049079895, "step": 10720 }, { "epoch": 0.7, "learning_rate": 1.234702184613381e-06, "logits/chosen": -0.8256417512893677, "logits/rejected": -0.5968748927116394, "logps/chosen": -424.6349182128906, "logps/rejected": -496.51983642578125, "loss": 0.6904, "rewards/accuracies": 0.625, "rewards/chosen": -0.20915257930755615, "rewards/margins": 0.07222042977809906, "rewards/rejected": -0.2813730239868164, "step": 10730 }, { "epoch": 0.7, "learning_rate": 1.2297808994014793e-06, "logits/chosen": -1.0779728889465332, "logits/rejected": -0.8660680055618286, "logps/chosen": -496.3164978027344, "logps/rejected": -523.3186645507812, "loss": 0.6898, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.21038170158863068, "rewards/margins": 0.061894625425338745, "rewards/rejected": -0.27227628231048584, "step": 10740 }, { "epoch": 0.7, "learning_rate": 1.2248662402746314e-06, "logits/chosen": -0.7504767179489136, "logits/rejected": -0.931710422039032, "logps/chosen": -452.50958251953125, "logps/rejected": -514.7100219726562, "loss": 0.6908, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.2625929117202759, "rewards/margins": 0.068316251039505, "rewards/rejected": -0.3309091627597809, "step": 10750 }, { "epoch": 0.7, "learning_rate": 1.2199582328701045e-06, "logits/chosen": -0.9508997797966003, "logits/rejected": -0.8796059489250183, "logps/chosen": -510.3228454589844, "logps/rejected": -566.1322021484375, "loss": 0.6876, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.21968936920166016, "rewards/margins": 0.09610020369291306, "rewards/rejected": -0.3157895803451538, "step": 10760 }, { "epoch": 0.7, "learning_rate": 1.2150569027904712e-06, "logits/chosen": -0.9090763330459595, "logits/rejected": -0.8921510577201843, "logps/chosen": -478.50311279296875, "logps/rejected": -540.8385009765625, "loss": 0.69, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.23251965641975403, "rewards/margins": 0.06863830238580704, "rewards/rejected": -0.30115798115730286, "step": 10770 }, { "epoch": 0.71, "learning_rate": 1.2101622756034688e-06, "logits/chosen": -0.9387717247009277, "logits/rejected": -0.887730598449707, "logps/chosen": -405.3013610839844, "logps/rejected": -457.65142822265625, "loss": 0.6888, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.18254734575748444, "rewards/margins": 0.08271530270576477, "rewards/rejected": -0.2652626633644104, "step": 10780 }, { "epoch": 0.71, "learning_rate": 1.2052743768418715e-06, "logits/chosen": -0.9470183253288269, "logits/rejected": -0.7718468308448792, "logps/chosen": -441.03863525390625, "logps/rejected": -491.30267333984375, "loss": 0.6887, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.193229541182518, "rewards/margins": 0.08427095413208008, "rewards/rejected": -0.2775005102157593, "step": 10790 }, { "epoch": 0.71, "learning_rate": 1.2003932320033523e-06, "logits/chosen": -1.0047305822372437, "logits/rejected": -1.0284090042114258, "logps/chosen": -418.42095947265625, "logps/rejected": -513.3245849609375, "loss": 0.6877, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1919182986021042, "rewards/margins": 0.09427173435688019, "rewards/rejected": -0.286190003156662, "step": 10800 }, { "epoch": 0.71, "eval_logits/chosen": -0.9391505718231201, "eval_logits/rejected": -0.815139651298523, "eval_logps/chosen": -437.78961181640625, "eval_logps/rejected": -501.56768798828125, "eval_loss": 0.689470112323761, "eval_rewards/accuracies": 0.6644999980926514, "eval_rewards/chosen": -0.2057846486568451, "eval_rewards/margins": 0.08417114615440369, "eval_rewards/rejected": -0.2899557948112488, "eval_runtime": 714.604, "eval_samples_per_second": 2.799, "eval_steps_per_second": 1.399, "step": 10800 }, { "epoch": 0.71, "learning_rate": 1.1955188665503553e-06, "logits/chosen": -0.7959145903587341, "logits/rejected": -0.7350171804428101, "logps/chosen": -426.7283630371094, "logps/rejected": -471.46685791015625, "loss": 0.6917, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.22160467505455017, "rewards/margins": 0.06552859395742416, "rewards/rejected": -0.28713327646255493, "step": 10810 }, { "epoch": 0.71, "learning_rate": 1.1906513059099566e-06, "logits/chosen": -1.0689995288848877, "logits/rejected": -0.7881597280502319, "logps/chosen": -464.06591796875, "logps/rejected": -566.7840576171875, "loss": 0.6877, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.23257681727409363, "rewards/margins": 0.11145871877670288, "rewards/rejected": -0.3440355658531189, "step": 10820 }, { "epoch": 0.71, "learning_rate": 1.185790575473738e-06, "logits/chosen": -1.0131001472473145, "logits/rejected": -0.6255987286567688, "logps/chosen": -447.65093994140625, "logps/rejected": -485.94342041015625, "loss": 0.6916, "rewards/accuracies": 0.625, "rewards/chosen": -0.21851758658885956, "rewards/margins": 0.08556672930717468, "rewards/rejected": -0.30408430099487305, "step": 10830 }, { "epoch": 0.71, "learning_rate": 1.1809367005976516e-06, "logits/chosen": -0.995489776134491, "logits/rejected": -0.8367295265197754, "logps/chosen": -449.3817443847656, "logps/rejected": -429.474365234375, "loss": 0.6913, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.16688695549964905, "rewards/margins": 0.05114005133509636, "rewards/rejected": -0.2180270254611969, "step": 10840 }, { "epoch": 0.71, "learning_rate": 1.1760897066018842e-06, "logits/chosen": -0.9616080522537231, "logits/rejected": -0.8490033149719238, "logps/chosen": -380.6590576171875, "logps/rejected": -465.58599853515625, "loss": 0.6892, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.1576179563999176, "rewards/margins": 0.09092387557029724, "rewards/rejected": -0.24854183197021484, "step": 10850 }, { "epoch": 0.71, "learning_rate": 1.1712496187707327e-06, "logits/chosen": -0.9444979429244995, "logits/rejected": -1.1311503648757935, "logps/chosen": -450.02069091796875, "logps/rejected": -567.10009765625, "loss": 0.6898, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.20971646904945374, "rewards/margins": 0.1311451941728592, "rewards/rejected": -0.3408616781234741, "step": 10860 }, { "epoch": 0.71, "learning_rate": 1.1664164623524646e-06, "logits/chosen": -1.0897196531295776, "logits/rejected": -0.906332790851593, "logps/chosen": -367.5469970703125, "logps/rejected": -410.7867126464844, "loss": 0.6899, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.14917199313640594, "rewards/margins": 0.07149702310562134, "rewards/rejected": -0.2206690013408661, "step": 10870 }, { "epoch": 0.71, "learning_rate": 1.1615902625591926e-06, "logits/chosen": -1.2006911039352417, "logits/rejected": -0.863299548625946, "logps/chosen": -397.97344970703125, "logps/rejected": -452.78216552734375, "loss": 0.6901, "rewards/accuracies": 0.75, "rewards/chosen": -0.1727522760629654, "rewards/margins": 0.06422239542007446, "rewards/rejected": -0.23697467148303986, "step": 10880 }, { "epoch": 0.71, "learning_rate": 1.156771044566738e-06, "logits/chosen": -1.1622774600982666, "logits/rejected": -1.058846116065979, "logps/chosen": -425.43115234375, "logps/rejected": -453.4656677246094, "loss": 0.6896, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.1608165055513382, "rewards/margins": 0.07125195860862732, "rewards/rejected": -0.23206846415996552, "step": 10890 }, { "epoch": 0.71, "learning_rate": 1.1519588335145037e-06, "logits/chosen": -1.2024497985839844, "logits/rejected": -1.3856556415557861, "logps/chosen": -353.27618408203125, "logps/rejected": -407.64788818359375, "loss": 0.6921, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.14569738507270813, "rewards/margins": 0.03567231446504593, "rewards/rejected": -0.18136970698833466, "step": 10900 }, { "epoch": 0.71, "eval_logits/chosen": -1.0966393947601318, "eval_logits/rejected": -0.9659644961357117, "eval_logps/chosen": -388.65643310546875, "eval_logps/rejected": -443.52410888671875, "eval_loss": 0.6895360350608826, "eval_rewards/accuracies": 0.6570000052452087, "eval_rewards/chosen": -0.15665146708488464, "eval_rewards/margins": 0.07526073604822159, "eval_rewards/rejected": -0.23191221058368683, "eval_runtime": 711.6877, "eval_samples_per_second": 2.81, "eval_steps_per_second": 1.405, "step": 10900 }, { "epoch": 0.71, "learning_rate": 1.1471536545053382e-06, "logits/chosen": -1.0944491624832153, "logits/rejected": -1.0983108282089233, "logps/chosen": -351.8878479003906, "logps/rejected": -432.76995849609375, "loss": 0.6895, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.14285293221473694, "rewards/margins": 0.07332994043827057, "rewards/rejected": -0.2161828726530075, "step": 10910 }, { "epoch": 0.71, "learning_rate": 1.1423555326054112e-06, "logits/chosen": -1.0121129751205444, "logits/rejected": -0.732758641242981, "logps/chosen": -467.73419189453125, "logps/rejected": -534.3839111328125, "loss": 0.6838, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.18489964306354523, "rewards/margins": 0.14008580148220062, "rewards/rejected": -0.32498544454574585, "step": 10920 }, { "epoch": 0.72, "learning_rate": 1.1375644928440743e-06, "logits/chosen": -1.107006311416626, "logits/rejected": -0.8201066851615906, "logps/chosen": -414.925048828125, "logps/rejected": -444.3072204589844, "loss": 0.6883, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.1782417744398117, "rewards/margins": 0.09688602387905121, "rewards/rejected": -0.2751278281211853, "step": 10930 }, { "epoch": 0.72, "learning_rate": 1.1327805602137396e-06, "logits/chosen": -1.1339380741119385, "logits/rejected": -0.9242345094680786, "logps/chosen": -462.92095947265625, "logps/rejected": -484.5670471191406, "loss": 0.6904, "rewards/accuracies": 0.75, "rewards/chosen": -0.20417681336402893, "rewards/margins": 0.07462415099143982, "rewards/rejected": -0.27880096435546875, "step": 10940 }, { "epoch": 0.72, "learning_rate": 1.1280037596697426e-06, "logits/chosen": -0.986310601234436, "logits/rejected": -0.737085223197937, "logps/chosen": -454.80084228515625, "logps/rejected": -629.8025512695312, "loss": 0.6838, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.23391835391521454, "rewards/margins": 0.13807490468025208, "rewards/rejected": -0.3719932436943054, "step": 10950 }, { "epoch": 0.72, "learning_rate": 1.123234116130216e-06, "logits/chosen": -0.8748584985733032, "logits/rejected": -0.7704537510871887, "logps/chosen": -405.89532470703125, "logps/rejected": -528.647705078125, "loss": 0.6887, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.22000694274902344, "rewards/margins": 0.11669802665710449, "rewards/rejected": -0.3367049992084503, "step": 10960 }, { "epoch": 0.72, "learning_rate": 1.1184716544759553e-06, "logits/chosen": -0.62715744972229, "logits/rejected": -0.5959141254425049, "logps/chosen": -356.5045471191406, "logps/rejected": -417.14794921875, "loss": 0.6907, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.19114038348197937, "rewards/margins": 0.04822884127497673, "rewards/rejected": -0.2393692284822464, "step": 10970 }, { "epoch": 0.72, "learning_rate": 1.1137163995502948e-06, "logits/chosen": -1.4193073511123657, "logits/rejected": -1.2204351425170898, "logps/chosen": -414.5079040527344, "logps/rejected": -458.13067626953125, "loss": 0.6894, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.19297286868095398, "rewards/margins": 0.07632466405630112, "rewards/rejected": -0.2692975401878357, "step": 10980 }, { "epoch": 0.72, "learning_rate": 1.1089683761589717e-06, "logits/chosen": -0.8085821866989136, "logits/rejected": -0.7703992128372192, "logps/chosen": -439.84722900390625, "logps/rejected": -536.17529296875, "loss": 0.6882, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1996408998966217, "rewards/margins": 0.11524226516485214, "rewards/rejected": -0.31488317251205444, "step": 10990 }, { "epoch": 0.72, "learning_rate": 1.1042276090700044e-06, "logits/chosen": -0.9247690439224243, "logits/rejected": -0.9810608625411987, "logps/chosen": -441.11273193359375, "logps/rejected": -537.4869384765625, "loss": 0.6906, "rewards/accuracies": 0.5, "rewards/chosen": -0.23081064224243164, "rewards/margins": 0.06620490550994873, "rewards/rejected": -0.297015517950058, "step": 11000 }, { "epoch": 0.72, "eval_logits/chosen": -1.0156525373458862, "eval_logits/rejected": -0.8884658217430115, "eval_logps/chosen": -420.56817626953125, "eval_logps/rejected": -481.687744140625, "eval_loss": 0.6894798874855042, "eval_rewards/accuracies": 0.6629999876022339, "eval_rewards/chosen": -0.18856322765350342, "eval_rewards/margins": 0.08151256293058395, "eval_rewards/rejected": -0.27007579803466797, "eval_runtime": 711.7879, "eval_samples_per_second": 2.81, "eval_steps_per_second": 1.405, "step": 11000 }, { "epoch": 0.72, "learning_rate": 1.0994941230135536e-06, "logits/chosen": -1.0726044178009033, "logits/rejected": -0.9668411016464233, "logps/chosen": -422.48162841796875, "logps/rejected": -508.0166931152344, "loss": 0.6873, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.18963167071342468, "rewards/margins": 0.12159478664398193, "rewards/rejected": -0.3112264573574066, "step": 11010 }, { "epoch": 0.72, "learning_rate": 1.094767942681804e-06, "logits/chosen": -1.4820367097854614, "logits/rejected": -1.0809131860733032, "logps/chosen": -502.901611328125, "logps/rejected": -554.4119262695312, "loss": 0.6883, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.25503259897232056, "rewards/margins": 0.09071089327335358, "rewards/rejected": -0.3457435369491577, "step": 11020 }, { "epoch": 0.72, "learning_rate": 1.0900490927288248e-06, "logits/chosen": -0.7793976068496704, "logits/rejected": -0.827666163444519, "logps/chosen": -469.4134826660156, "logps/rejected": -488.8060607910156, "loss": 0.6902, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.20894518494606018, "rewards/margins": 0.06886889785528183, "rewards/rejected": -0.2778140902519226, "step": 11030 }, { "epoch": 0.72, "learning_rate": 1.0853375977704511e-06, "logits/chosen": -1.1008172035217285, "logits/rejected": -0.9268584251403809, "logps/chosen": -440.64056396484375, "logps/rejected": -460.67547607421875, "loss": 0.6903, "rewards/accuracies": 0.75, "rewards/chosen": -0.20021602511405945, "rewards/margins": 0.08404561877250671, "rewards/rejected": -0.28426164388656616, "step": 11040 }, { "epoch": 0.72, "learning_rate": 1.0806334823841466e-06, "logits/chosen": -1.024103045463562, "logits/rejected": -1.1570180654525757, "logps/chosen": -462.835693359375, "logps/rejected": -540.9618530273438, "loss": 0.69, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.22321978211402893, "rewards/margins": 0.04872307926416397, "rewards/rejected": -0.2719428837299347, "step": 11050 }, { "epoch": 0.72, "learning_rate": 1.0759367711088825e-06, "logits/chosen": -0.7810468673706055, "logits/rejected": -1.0236024856567383, "logps/chosen": -382.93597412109375, "logps/rejected": -475.3539123535156, "loss": 0.6912, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.1991034895181656, "rewards/margins": 0.058940671384334564, "rewards/rejected": -0.25804418325424194, "step": 11060 }, { "epoch": 0.72, "learning_rate": 1.0712474884450056e-06, "logits/chosen": -0.9814950823783875, "logits/rejected": -0.8216627240180969, "logps/chosen": -388.156982421875, "logps/rejected": -453.73712158203125, "loss": 0.6898, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.18532374501228333, "rewards/margins": 0.09436696022748947, "rewards/rejected": -0.279690682888031, "step": 11070 }, { "epoch": 0.72, "learning_rate": 1.066565658854112e-06, "logits/chosen": -0.7910436391830444, "logits/rejected": -0.8780626058578491, "logps/chosen": -318.71221923828125, "logps/rejected": -394.83660888671875, "loss": 0.6895, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.19088035821914673, "rewards/margins": 0.0793568417429924, "rewards/rejected": -0.27023714780807495, "step": 11080 }, { "epoch": 0.73, "learning_rate": 1.0618913067589165e-06, "logits/chosen": -1.094334363937378, "logits/rejected": -0.7383924722671509, "logps/chosen": -391.47113037109375, "logps/rejected": -446.92816162109375, "loss": 0.688, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.17565642297267914, "rewards/margins": 0.09291692823171616, "rewards/rejected": -0.2685733735561371, "step": 11090 }, { "epoch": 0.73, "learning_rate": 1.0572244565431313e-06, "logits/chosen": -0.9324450492858887, "logits/rejected": -0.9185946583747864, "logps/chosen": -366.2502746582031, "logps/rejected": -447.4685974121094, "loss": 0.6898, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.22083349525928497, "rewards/margins": 0.07605709135532379, "rewards/rejected": -0.29689058661460876, "step": 11100 }, { "epoch": 0.73, "eval_logits/chosen": -1.029784083366394, "eval_logits/rejected": -0.9005224704742432, "eval_logps/chosen": -430.2828674316406, "eval_logps/rejected": -496.58447265625, "eval_loss": 0.6894676685333252, "eval_rewards/accuracies": 0.6654999852180481, "eval_rewards/chosen": -0.19827795028686523, "eval_rewards/margins": 0.08669465035200119, "eval_rewards/rejected": -0.284972608089447, "eval_runtime": 710.9479, "eval_samples_per_second": 2.813, "eval_steps_per_second": 1.407, "step": 11100 }, { "epoch": 0.73, "learning_rate": 1.0525651325513317e-06, "logits/chosen": -1.035304307937622, "logits/rejected": -1.05049729347229, "logps/chosen": -518.7673950195312, "logps/rejected": -558.48046875, "loss": 0.6895, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.18749776482582092, "rewards/margins": 0.06086844950914383, "rewards/rejected": -0.24836620688438416, "step": 11110 }, { "epoch": 0.73, "learning_rate": 1.0479133590888351e-06, "logits/chosen": -1.0515987873077393, "logits/rejected": -0.9245373606681824, "logps/chosen": -455.1729431152344, "logps/rejected": -532.0485229492188, "loss": 0.6884, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.19971804320812225, "rewards/margins": 0.10384778678417206, "rewards/rejected": -0.3035658299922943, "step": 11120 }, { "epoch": 0.73, "learning_rate": 1.0432691604215695e-06, "logits/chosen": -1.10709547996521, "logits/rejected": -0.8915435075759888, "logps/chosen": -410.8944396972656, "logps/rejected": -440.82598876953125, "loss": 0.6912, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.17584750056266785, "rewards/margins": 0.061644673347473145, "rewards/rejected": -0.2374921590089798, "step": 11130 }, { "epoch": 0.73, "learning_rate": 1.0386325607759515e-06, "logits/chosen": -1.0275440216064453, "logits/rejected": -0.8162240982055664, "logps/chosen": -342.28564453125, "logps/rejected": -422.4649353027344, "loss": 0.6888, "rewards/accuracies": 0.625, "rewards/chosen": -0.15089556574821472, "rewards/margins": 0.10024967044591904, "rewards/rejected": -0.25114524364471436, "step": 11140 }, { "epoch": 0.73, "learning_rate": 1.0340035843387544e-06, "logits/chosen": -0.9475866556167603, "logits/rejected": -0.8041400909423828, "logps/chosen": -361.7947998046875, "logps/rejected": -417.688720703125, "loss": 0.6897, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.18205925822257996, "rewards/margins": 0.08347681164741516, "rewards/rejected": -0.26553604006767273, "step": 11150 }, { "epoch": 0.73, "learning_rate": 1.0293822552569887e-06, "logits/chosen": -1.2313281297683716, "logits/rejected": -1.0497896671295166, "logps/chosen": -428.425048828125, "logps/rejected": -475.63018798828125, "loss": 0.6898, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.17654123902320862, "rewards/margins": 0.10303560644388199, "rewards/rejected": -0.27957683801651, "step": 11160 }, { "epoch": 0.73, "learning_rate": 1.0247685976377688e-06, "logits/chosen": -1.1329296827316284, "logits/rejected": -0.8583731651306152, "logps/chosen": -384.9459533691406, "logps/rejected": -419.2796325683594, "loss": 0.6907, "rewards/accuracies": 0.625, "rewards/chosen": -0.20326094329357147, "rewards/margins": 0.07316506654024124, "rewards/rejected": -0.2764259874820709, "step": 11170 }, { "epoch": 0.73, "learning_rate": 1.0201626355481939e-06, "logits/chosen": -1.3057048320770264, "logits/rejected": -1.1107124090194702, "logps/chosen": -395.8485412597656, "logps/rejected": -430.41162109375, "loss": 0.6878, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.17972803115844727, "rewards/margins": 0.08812297135591507, "rewards/rejected": -0.2678510248661041, "step": 11180 }, { "epoch": 0.73, "learning_rate": 1.0155643930152192e-06, "logits/chosen": -1.33696448802948, "logits/rejected": -1.2072172164916992, "logps/chosen": -448.7349548339844, "logps/rejected": -438.774658203125, "loss": 0.6911, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.176127627491951, "rewards/margins": 0.044685568660497665, "rewards/rejected": -0.22081318497657776, "step": 11190 }, { "epoch": 0.73, "learning_rate": 1.0109738940255286e-06, "logits/chosen": -1.1134618520736694, "logits/rejected": -0.9695285558700562, "logps/chosen": -376.0254821777344, "logps/rejected": -419.3876953125, "loss": 0.6924, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.15904290974140167, "rewards/margins": 0.08064347505569458, "rewards/rejected": -0.23968639969825745, "step": 11200 }, { "epoch": 0.73, "eval_logits/chosen": -1.2116992473602295, "eval_logits/rejected": -1.0739349126815796, "eval_logps/chosen": -382.55328369140625, "eval_logps/rejected": -440.3695983886719, "eval_loss": 0.6895102262496948, "eval_rewards/accuracies": 0.6629999876022339, "eval_rewards/chosen": -0.15054833889007568, "eval_rewards/margins": 0.07820937782526016, "eval_rewards/rejected": -0.22875770926475525, "eval_runtime": 713.425, "eval_samples_per_second": 2.803, "eval_steps_per_second": 1.402, "step": 11200 }, { "epoch": 0.73, "learning_rate": 1.0063911625254155e-06, "logits/chosen": -1.3005118370056152, "logits/rejected": -1.296314001083374, "logps/chosen": -356.2990417480469, "logps/rejected": -432.0716247558594, "loss": 0.6897, "rewards/accuracies": 0.625, "rewards/chosen": -0.12037642300128937, "rewards/margins": 0.07514314353466034, "rewards/rejected": -0.1955195516347885, "step": 11210 }, { "epoch": 0.73, "learning_rate": 1.0018162224206502e-06, "logits/chosen": -1.1771663427352905, "logits/rejected": -1.177433967590332, "logps/chosen": -327.0699157714844, "logps/rejected": -436.6388244628906, "loss": 0.688, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.1525951325893402, "rewards/margins": 0.10585884749889374, "rewards/rejected": -0.25845396518707275, "step": 11220 }, { "epoch": 0.73, "learning_rate": 9.97249097576363e-07, "logits/chosen": -1.6533960103988647, "logits/rejected": -1.2510101795196533, "logps/chosen": -379.99169921875, "logps/rejected": -440.383056640625, "loss": 0.6881, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1469351053237915, "rewards/margins": 0.10388322174549103, "rewards/rejected": -0.25081831216812134, "step": 11230 }, { "epoch": 0.74, "learning_rate": 9.92689811816913e-07, "logits/chosen": -1.237443208694458, "logits/rejected": -0.949772834777832, "logps/chosen": -375.50042724609375, "logps/rejected": -397.1983337402344, "loss": 0.6905, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.16297926008701324, "rewards/margins": 0.06910089403390884, "rewards/rejected": -0.23208017647266388, "step": 11240 }, { "epoch": 0.74, "learning_rate": 9.881383889257691e-07, "logits/chosen": -1.180985450744629, "logits/rejected": -1.2715930938720703, "logps/chosen": -327.3847961425781, "logps/rejected": -448.04351806640625, "loss": 0.6897, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.15808793902397156, "rewards/margins": 0.05658021569252014, "rewards/rejected": -0.2146681249141693, "step": 11250 }, { "epoch": 0.74, "learning_rate": 9.835948526453817e-07, "logits/chosen": -0.9011642336845398, "logits/rejected": -1.2479734420776367, "logps/chosen": -360.42840576171875, "logps/rejected": -456.5986328125, "loss": 0.6905, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.17999902367591858, "rewards/margins": 0.05249845236539841, "rewards/rejected": -0.2324974536895752, "step": 11260 }, { "epoch": 0.74, "learning_rate": 9.790592266770633e-07, "logits/chosen": -1.3939253091812134, "logits/rejected": -1.224125623703003, "logps/chosen": -446.0503845214844, "logps/rejected": -484.1180114746094, "loss": 0.6902, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.17670570313930511, "rewards/margins": 0.06630632281303406, "rewards/rejected": -0.24301204085350037, "step": 11270 }, { "epoch": 0.74, "learning_rate": 9.745315346808584e-07, "logits/chosen": -0.9992599487304688, "logits/rejected": -1.0071967840194702, "logps/chosen": -369.0317077636719, "logps/rejected": -412.9254455566406, "loss": 0.6894, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.14995627105236053, "rewards/margins": 0.06627680361270905, "rewards/rejected": -0.2162330597639084, "step": 11280 }, { "epoch": 0.74, "learning_rate": 9.70011800275428e-07, "logits/chosen": -1.0035455226898193, "logits/rejected": -0.9785023927688599, "logps/chosen": -427.5858459472656, "logps/rejected": -535.3264770507812, "loss": 0.6879, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.19006092846393585, "rewards/margins": 0.09629470109939575, "rewards/rejected": -0.2863556444644928, "step": 11290 }, { "epoch": 0.74, "learning_rate": 9.655000470379206e-07, "logits/chosen": -1.0799880027770996, "logits/rejected": -0.8926523923873901, "logps/chosen": -401.95391845703125, "logps/rejected": -504.5462341308594, "loss": 0.6875, "rewards/accuracies": 0.75, "rewards/chosen": -0.19160649180412292, "rewards/margins": 0.11327487230300903, "rewards/rejected": -0.30488136410713196, "step": 11300 }, { "epoch": 0.74, "eval_logits/chosen": -1.12637197971344, "eval_logits/rejected": -0.9923059344291687, "eval_logps/chosen": -401.6754150390625, "eval_logps/rejected": -464.677490234375, "eval_loss": 0.6894660592079163, "eval_rewards/accuracies": 0.6610000133514404, "eval_rewards/chosen": -0.16967050731182098, "eval_rewards/margins": 0.08339511603116989, "eval_rewards/rejected": -0.2530656158924103, "eval_runtime": 713.4984, "eval_samples_per_second": 2.803, "eval_steps_per_second": 1.402, "step": 11300 }, { "epoch": 0.74, "learning_rate": 9.609962985038517e-07, "logits/chosen": -1.1582266092300415, "logits/rejected": -1.0454920530319214, "logps/chosen": -378.60369873046875, "logps/rejected": -485.1788024902344, "loss": 0.6891, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.16324260830879211, "rewards/margins": 0.1317000836133957, "rewards/rejected": -0.2949426770210266, "step": 11310 }, { "epoch": 0.74, "learning_rate": 9.565005781669786e-07, "logits/chosen": -1.4098128080368042, "logits/rejected": -1.043099284172058, "logps/chosen": -432.05108642578125, "logps/rejected": -467.31146240234375, "loss": 0.6879, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.17146047949790955, "rewards/margins": 0.08857326209545135, "rewards/rejected": -0.2600337564945221, "step": 11320 }, { "epoch": 0.74, "learning_rate": 9.520129094791822e-07, "logits/chosen": -0.9881695508956909, "logits/rejected": -0.7796091437339783, "logps/chosen": -361.5747985839844, "logps/rejected": -445.5174255371094, "loss": 0.6882, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.18207907676696777, "rewards/margins": 0.09414590150117874, "rewards/rejected": -0.2762250006198883, "step": 11330 }, { "epoch": 0.74, "learning_rate": 9.475333158503389e-07, "logits/chosen": -1.012505054473877, "logits/rejected": -0.8788460493087769, "logps/chosen": -367.9222717285156, "logps/rejected": -380.99481201171875, "loss": 0.6906, "rewards/accuracies": 0.625, "rewards/chosen": -0.1500999927520752, "rewards/margins": 0.058231133967638016, "rewards/rejected": -0.20833110809326172, "step": 11340 }, { "epoch": 0.74, "learning_rate": 9.430618206482053e-07, "logits/chosen": -0.9951919317245483, "logits/rejected": -0.8707451820373535, "logps/chosen": -304.44342041015625, "logps/rejected": -351.59326171875, "loss": 0.6917, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.16402050852775574, "rewards/margins": 0.04655434936285019, "rewards/rejected": -0.21057486534118652, "step": 11350 }, { "epoch": 0.74, "learning_rate": 9.385984471982892e-07, "logits/chosen": -0.931800365447998, "logits/rejected": -0.7422152161598206, "logps/chosen": -387.48687744140625, "logps/rejected": -480.77734375, "loss": 0.6854, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.17266109585762024, "rewards/margins": 0.14402495324611664, "rewards/rejected": -0.3166860342025757, "step": 11360 }, { "epoch": 0.74, "learning_rate": 9.341432187837343e-07, "logits/chosen": -1.2325918674468994, "logits/rejected": -1.0530160665512085, "logps/chosen": -355.48663330078125, "logps/rejected": -480.79327392578125, "loss": 0.6857, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1651829034090042, "rewards/margins": 0.110438272356987, "rewards/rejected": -0.2756211459636688, "step": 11370 }, { "epoch": 0.74, "learning_rate": 9.29696158645193e-07, "logits/chosen": -1.0486128330230713, "logits/rejected": -1.179518222808838, "logps/chosen": -401.9928283691406, "logps/rejected": -560.9896240234375, "loss": 0.6882, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.18089056015014648, "rewards/margins": 0.13000695407390594, "rewards/rejected": -0.3108975291252136, "step": 11380 }, { "epoch": 0.75, "learning_rate": 9.252572899807111e-07, "logits/chosen": -1.0828187465667725, "logits/rejected": -0.9198848009109497, "logps/chosen": -479.14947509765625, "logps/rejected": -569.015625, "loss": 0.687, "rewards/accuracies": 0.75, "rewards/chosen": -0.20854994654655457, "rewards/margins": 0.11605298519134521, "rewards/rejected": -0.3246029317378998, "step": 11390 }, { "epoch": 0.75, "learning_rate": 9.208266359456003e-07, "logits/chosen": -1.1783645153045654, "logits/rejected": -1.0648075342178345, "logps/chosen": -355.53656005859375, "logps/rejected": -432.8563537597656, "loss": 0.6895, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.14973084628582, "rewards/margins": 0.06774896383285522, "rewards/rejected": -0.21747978031635284, "step": 11400 }, { "epoch": 0.75, "eval_logits/chosen": -1.054376482963562, "eval_logits/rejected": -0.9229505658149719, "eval_logps/chosen": -428.9089050292969, "eval_logps/rejected": -497.7010498046875, "eval_loss": 0.6894968748092651, "eval_rewards/accuracies": 0.6639999747276306, "eval_rewards/chosen": -0.19690395891666412, "eval_rewards/margins": 0.0891851931810379, "eval_rewards/rejected": -0.286089152097702, "eval_runtime": 711.3033, "eval_samples_per_second": 2.812, "eval_steps_per_second": 1.406, "step": 11400 }, { "epoch": 0.75, "learning_rate": 9.164042196523229e-07, "logits/chosen": -1.3578684329986572, "logits/rejected": -1.054465413093567, "logps/chosen": -382.7578430175781, "logps/rejected": -469.48394775390625, "loss": 0.6905, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.19335679709911346, "rewards/margins": 0.10024379193782806, "rewards/rejected": -0.29360055923461914, "step": 11410 }, { "epoch": 0.75, "learning_rate": 9.119900641703696e-07, "logits/chosen": -1.283399224281311, "logits/rejected": -1.0210292339324951, "logps/chosen": -414.1683654785156, "logps/rejected": -439.62286376953125, "loss": 0.6907, "rewards/accuracies": 0.625, "rewards/chosen": -0.20072200894355774, "rewards/margins": 0.0717894434928894, "rewards/rejected": -0.27251142263412476, "step": 11420 }, { "epoch": 0.75, "learning_rate": 9.075841925261364e-07, "logits/chosen": -1.400123953819275, "logits/rejected": -1.266704797744751, "logps/chosen": -412.7691955566406, "logps/rejected": -482.8182678222656, "loss": 0.6916, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.17485003173351288, "rewards/margins": 0.08447788655757904, "rewards/rejected": -0.2593279480934143, "step": 11430 }, { "epoch": 0.75, "learning_rate": 9.031866277028093e-07, "logits/chosen": -1.0190837383270264, "logits/rejected": -0.8646179437637329, "logps/chosen": -373.0989685058594, "logps/rejected": -495.5443420410156, "loss": 0.6887, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.18202418088912964, "rewards/margins": 0.09563577175140381, "rewards/rejected": -0.27765995264053345, "step": 11440 }, { "epoch": 0.75, "learning_rate": 8.987973926402391e-07, "logits/chosen": -0.8709138035774231, "logits/rejected": -1.0138837099075317, "logps/chosen": -385.26495361328125, "logps/rejected": -469.19696044921875, "loss": 0.69, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.17292535305023193, "rewards/margins": 0.08451946079730988, "rewards/rejected": -0.257444828748703, "step": 11450 }, { "epoch": 0.75, "learning_rate": 8.944165102348273e-07, "logits/chosen": -1.1840312480926514, "logits/rejected": -1.033864140510559, "logps/chosen": -287.67584228515625, "logps/rejected": -409.1636047363281, "loss": 0.6871, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.13072873651981354, "rewards/margins": 0.11423041671514511, "rewards/rejected": -0.24495916068553925, "step": 11460 }, { "epoch": 0.75, "learning_rate": 8.900440033394018e-07, "logits/chosen": -0.9068467020988464, "logits/rejected": -0.953558623790741, "logps/chosen": -363.6817626953125, "logps/rejected": -424.71832275390625, "loss": 0.6902, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.18116554617881775, "rewards/margins": 0.07032226771116257, "rewards/rejected": -0.2514878213405609, "step": 11470 }, { "epoch": 0.75, "learning_rate": 8.856798947631009e-07, "logits/chosen": -1.083222508430481, "logits/rejected": -1.029837965965271, "logps/chosen": -384.2429504394531, "logps/rejected": -513.6322021484375, "loss": 0.688, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.19336727261543274, "rewards/margins": 0.11243722587823868, "rewards/rejected": -0.30580443143844604, "step": 11480 }, { "epoch": 0.75, "learning_rate": 8.813242072712519e-07, "logits/chosen": -0.4971315860748291, "logits/rejected": -0.38992008566856384, "logps/chosen": -384.61212158203125, "logps/rejected": -450.76025390625, "loss": 0.6906, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.22274982929229736, "rewards/margins": 0.06501208990812302, "rewards/rejected": -0.28776195645332336, "step": 11490 }, { "epoch": 0.75, "learning_rate": 8.769769635852557e-07, "logits/chosen": -0.9182844161987305, "logits/rejected": -0.9621591567993164, "logps/chosen": -387.2684020996094, "logps/rejected": -422.38043212890625, "loss": 0.6901, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.17196063697338104, "rewards/margins": 0.07186910510063171, "rewards/rejected": -0.24382975697517395, "step": 11500 }, { "epoch": 0.75, "eval_logits/chosen": -0.9567478895187378, "eval_logits/rejected": -0.8319298624992371, "eval_logps/chosen": -427.27978515625, "eval_logps/rejected": -491.3986511230469, "eval_loss": 0.6894726753234863, "eval_rewards/accuracies": 0.6639999747276306, "eval_rewards/chosen": -0.19527484476566315, "eval_rewards/margins": 0.08451192826032639, "eval_rewards/rejected": -0.27978676557540894, "eval_runtime": 713.116, "eval_samples_per_second": 2.805, "eval_steps_per_second": 1.402, "step": 11500 }, { "epoch": 0.75, "learning_rate": 8.726381863824635e-07, "logits/chosen": -1.3189074993133545, "logits/rejected": -1.1210556030273438, "logps/chosen": -484.98870849609375, "logps/rejected": -490.3335876464844, "loss": 0.69, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1999116688966751, "rewards/margins": 0.07275116443634033, "rewards/rejected": -0.27266281843185425, "step": 11510 }, { "epoch": 0.75, "learning_rate": 8.683078982960638e-07, "logits/chosen": -0.7536784410476685, "logits/rejected": -0.5630779266357422, "logps/chosen": -427.4227600097656, "logps/rejected": -500.4649963378906, "loss": 0.6866, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.20273777842521667, "rewards/margins": 0.12313316762447357, "rewards/rejected": -0.32587096095085144, "step": 11520 }, { "epoch": 0.75, "learning_rate": 8.639861219149584e-07, "logits/chosen": -1.0072379112243652, "logits/rejected": -0.716784656047821, "logps/chosen": -491.2522888183594, "logps/rejected": -557.5786743164062, "loss": 0.6855, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.22727027535438538, "rewards/margins": 0.10925090312957764, "rewards/rejected": -0.3365211486816406, "step": 11530 }, { "epoch": 0.76, "learning_rate": 8.596728797836532e-07, "logits/chosen": -0.9275296926498413, "logits/rejected": -0.8758915066719055, "logps/chosen": -419.6770935058594, "logps/rejected": -570.2562255859375, "loss": 0.6861, "rewards/accuracies": 0.75, "rewards/chosen": -0.20436839759349823, "rewards/margins": 0.12098614871501923, "rewards/rejected": -0.32535451650619507, "step": 11540 }, { "epoch": 0.76, "learning_rate": 8.553681944021294e-07, "logits/chosen": -1.092116355895996, "logits/rejected": -1.2861175537109375, "logps/chosen": -432.4119567871094, "logps/rejected": -507.87091064453125, "loss": 0.6879, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.19332793354988098, "rewards/margins": 0.0948062315583229, "rewards/rejected": -0.2881341874599457, "step": 11550 }, { "epoch": 0.76, "learning_rate": 8.510720882257365e-07, "logits/chosen": -0.5880703926086426, "logits/rejected": -0.6488803625106812, "logps/chosen": -372.99285888671875, "logps/rejected": -544.4835205078125, "loss": 0.6861, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.21169230341911316, "rewards/margins": 0.13025878369808197, "rewards/rejected": -0.3419511020183563, "step": 11560 }, { "epoch": 0.76, "learning_rate": 8.467845836650667e-07, "logits/chosen": -0.49036699533462524, "logits/rejected": -0.5513705015182495, "logps/chosen": -451.46856689453125, "logps/rejected": -561.2171630859375, "loss": 0.6865, "rewards/accuracies": 0.75, "rewards/chosen": -0.24320204555988312, "rewards/margins": 0.103193499147892, "rewards/rejected": -0.3463955521583557, "step": 11570 }, { "epoch": 0.76, "learning_rate": 8.425057030858461e-07, "logits/chosen": -0.6223480701446533, "logits/rejected": -0.6779859066009521, "logps/chosen": -356.28814697265625, "logps/rejected": -496.072998046875, "loss": 0.6887, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.20023226737976074, "rewards/margins": 0.11076197773218155, "rewards/rejected": -0.3109942674636841, "step": 11580 }, { "epoch": 0.76, "learning_rate": 8.382354688088098e-07, "logits/chosen": -0.7387554049491882, "logits/rejected": -0.8713496923446655, "logps/chosen": -365.7668762207031, "logps/rejected": -448.03173828125, "loss": 0.6899, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.20761151611804962, "rewards/margins": 0.06684192270040512, "rewards/rejected": -0.27445346117019653, "step": 11590 }, { "epoch": 0.76, "learning_rate": 8.33973903109594e-07, "logits/chosen": -0.8083747625350952, "logits/rejected": -0.8849031329154968, "logps/chosen": -453.2215270996094, "logps/rejected": -530.6839599609375, "loss": 0.6887, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.23662443459033966, "rewards/margins": 0.10902400314807892, "rewards/rejected": -0.3456484079360962, "step": 11600 }, { "epoch": 0.76, "eval_logits/chosen": -0.8248559236526489, "eval_logits/rejected": -0.70490562915802, "eval_logps/chosen": -462.7817077636719, "eval_logps/rejected": -536.8844604492188, "eval_loss": 0.6895047426223755, "eval_rewards/accuracies": 0.6685000061988831, "eval_rewards/chosen": -0.23077677190303802, "eval_rewards/margins": 0.0944957509636879, "eval_rewards/rejected": -0.3252725601196289, "eval_runtime": 713.646, "eval_samples_per_second": 2.803, "eval_steps_per_second": 1.401, "step": 11600 }, { "epoch": 0.76, "learning_rate": 8.297210282186102e-07, "logits/chosen": -0.9280464053153992, "logits/rejected": -0.8609668612480164, "logps/chosen": -516.7465209960938, "logps/rejected": -612.9113159179688, "loss": 0.6888, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.28672903776168823, "rewards/margins": 0.07479820400476456, "rewards/rejected": -0.3615272045135498, "step": 11610 }, { "epoch": 0.76, "learning_rate": 8.254768663209397e-07, "logits/chosen": -0.6439899206161499, "logits/rejected": -0.7141983509063721, "logps/chosen": -500.17041015625, "logps/rejected": -502.80487060546875, "loss": 0.6909, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.22159060835838318, "rewards/margins": 0.06510604918003082, "rewards/rejected": -0.2866966724395752, "step": 11620 }, { "epoch": 0.76, "learning_rate": 8.212414395562079e-07, "logits/chosen": -0.7860328555107117, "logits/rejected": -0.6665444374084473, "logps/chosen": -492.0284729003906, "logps/rejected": -576.9085083007812, "loss": 0.691, "rewards/accuracies": 0.625, "rewards/chosen": -0.26181426644325256, "rewards/margins": 0.06095917150378227, "rewards/rejected": -0.32277345657348633, "step": 11630 }, { "epoch": 0.76, "learning_rate": 8.170147700184775e-07, "logits/chosen": -0.8009759783744812, "logits/rejected": -0.7388449907302856, "logps/chosen": -486.1048889160156, "logps/rejected": -573.0155029296875, "loss": 0.6916, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.22965356707572937, "rewards/margins": 0.09584342688322067, "rewards/rejected": -0.32549700140953064, "step": 11640 }, { "epoch": 0.76, "learning_rate": 8.127968797561242e-07, "logits/chosen": -1.0365979671478271, "logits/rejected": -0.8095973134040833, "logps/chosen": -490.6710510253906, "logps/rejected": -588.9063110351562, "loss": 0.6878, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.2667003273963928, "rewards/margins": 0.12065819650888443, "rewards/rejected": -0.38735854625701904, "step": 11650 }, { "epoch": 0.76, "learning_rate": 8.085877907717338e-07, "logits/chosen": -0.8645883798599243, "logits/rejected": -1.0048613548278809, "logps/chosen": -445.19183349609375, "logps/rejected": -539.5614013671875, "loss": 0.6891, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.22428032755851746, "rewards/margins": 0.10604876279830933, "rewards/rejected": -0.3303290903568268, "step": 11660 }, { "epoch": 0.76, "learning_rate": 8.043875250219732e-07, "logits/chosen": -0.7957605123519897, "logits/rejected": -0.49647361040115356, "logps/chosen": -491.366943359375, "logps/rejected": -514.1613159179688, "loss": 0.6913, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.26196223497390747, "rewards/margins": 0.043863289058208466, "rewards/rejected": -0.30582553148269653, "step": 11670 }, { "epoch": 0.76, "learning_rate": 8.001961044174881e-07, "logits/chosen": -0.9177546501159668, "logits/rejected": -0.48230376839637756, "logps/chosen": -473.46527099609375, "logps/rejected": -463.75103759765625, "loss": 0.6925, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.24500080943107605, "rewards/margins": 0.04747391492128372, "rewards/rejected": -0.29247474670410156, "step": 11680 }, { "epoch": 0.76, "learning_rate": 7.960135508227795e-07, "logits/chosen": -0.8630214929580688, "logits/rejected": -0.7500642538070679, "logps/chosen": -520.3016357421875, "logps/rejected": -544.3031005859375, "loss": 0.6906, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.22643259167671204, "rewards/margins": 0.08831746876239777, "rewards/rejected": -0.314750075340271, "step": 11690 }, { "epoch": 0.77, "learning_rate": 7.91839886056098e-07, "logits/chosen": -1.036927342414856, "logits/rejected": -0.5842759609222412, "logps/chosen": -545.1009521484375, "logps/rejected": -588.3203735351562, "loss": 0.6883, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2613387405872345, "rewards/margins": 0.07379014045000076, "rewards/rejected": -0.33512887358665466, "step": 11700 }, { "epoch": 0.77, "eval_logits/chosen": -0.756587028503418, "eval_logits/rejected": -0.6421234011650085, "eval_logps/chosen": -466.8341979980469, "eval_logps/rejected": -534.1975708007812, "eval_loss": 0.6894639730453491, "eval_rewards/accuracies": 0.6660000085830688, "eval_rewards/chosen": -0.23482927680015564, "eval_rewards/margins": 0.08775635808706284, "eval_rewards/rejected": -0.3225856423377991, "eval_runtime": 713.4686, "eval_samples_per_second": 2.803, "eval_steps_per_second": 1.402, "step": 11700 }, { "epoch": 0.77, "learning_rate": 7.876751318893217e-07, "logits/chosen": -0.7366732358932495, "logits/rejected": -0.5020397901535034, "logps/chosen": -468.701171875, "logps/rejected": -529.0864868164062, "loss": 0.6899, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.23083552718162537, "rewards/margins": 0.08197341859340668, "rewards/rejected": -0.31280896067619324, "step": 11710 }, { "epoch": 0.77, "learning_rate": 7.8351931004785e-07, "logits/chosen": -0.34233760833740234, "logits/rejected": -0.5591579675674438, "logps/chosen": -460.55810546875, "logps/rejected": -525.3573608398438, "loss": 0.6884, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.2519298195838928, "rewards/margins": 0.08968259394168854, "rewards/rejected": -0.34161242842674255, "step": 11720 }, { "epoch": 0.77, "learning_rate": 7.793724422104834e-07, "logits/chosen": -0.8081684112548828, "logits/rejected": -0.6944714188575745, "logps/chosen": -440.3197326660156, "logps/rejected": -627.394775390625, "loss": 0.6883, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2314954698085785, "rewards/margins": 0.11759781837463379, "rewards/rejected": -0.3490932583808899, "step": 11730 }, { "epoch": 0.77, "learning_rate": 7.752345500093184e-07, "logits/chosen": -0.9091407060623169, "logits/rejected": -0.751275360584259, "logps/chosen": -466.60687255859375, "logps/rejected": -499.1985778808594, "loss": 0.6904, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.256451815366745, "rewards/margins": 0.06235620379447937, "rewards/rejected": -0.31880801916122437, "step": 11740 }, { "epoch": 0.77, "learning_rate": 7.711056550296253e-07, "logits/chosen": -0.9662020802497864, "logits/rejected": -0.7012864947319031, "logps/chosen": -461.15264892578125, "logps/rejected": -520.930419921875, "loss": 0.6915, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2143588364124298, "rewards/margins": 0.09680284559726715, "rewards/rejected": -0.31116166710853577, "step": 11750 }, { "epoch": 0.77, "learning_rate": 7.669857788097445e-07, "logits/chosen": -0.2298242151737213, "logits/rejected": -0.07954345643520355, "logps/chosen": -426.2181701660156, "logps/rejected": -551.5553588867188, "loss": 0.6878, "rewards/accuracies": 0.625, "rewards/chosen": -0.26465222239494324, "rewards/margins": 0.09871228784322739, "rewards/rejected": -0.3633645176887512, "step": 11760 }, { "epoch": 0.77, "learning_rate": 7.628749428409676e-07, "logits/chosen": -0.6088379621505737, "logits/rejected": -0.47349509596824646, "logps/chosen": -493.9090881347656, "logps/rejected": -533.0814208984375, "loss": 0.6887, "rewards/accuracies": 0.625, "rewards/chosen": -0.2570212781429291, "rewards/margins": 0.09792395681142807, "rewards/rejected": -0.35494521260261536, "step": 11770 }, { "epoch": 0.77, "learning_rate": 7.587731685674288e-07, "logits/chosen": -0.9257609248161316, "logits/rejected": -0.8395794034004211, "logps/chosen": -529.6868896484375, "logps/rejected": -625.3295288085938, "loss": 0.6904, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.25609859824180603, "rewards/margins": 0.08780574798583984, "rewards/rejected": -0.34390437602996826, "step": 11780 }, { "epoch": 0.77, "learning_rate": 7.546804773859931e-07, "logits/chosen": -0.7353237271308899, "logits/rejected": -0.6386014223098755, "logps/chosen": -471.8373107910156, "logps/rejected": -562.5552368164062, "loss": 0.6894, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2447972297668457, "rewards/margins": 0.11065386235713959, "rewards/rejected": -0.3554511070251465, "step": 11790 }, { "epoch": 0.77, "learning_rate": 7.505968906461409e-07, "logits/chosen": -0.370036780834198, "logits/rejected": -0.8298758268356323, "logps/chosen": -501.50555419921875, "logps/rejected": -546.6343383789062, "loss": 0.6904, "rewards/accuracies": 0.625, "rewards/chosen": -0.26056551933288574, "rewards/margins": 0.06927505880594254, "rewards/rejected": -0.3298405706882477, "step": 11800 }, { "epoch": 0.77, "eval_logits/chosen": -0.6847590804100037, "eval_logits/rejected": -0.5755062103271484, "eval_logps/chosen": -467.6891174316406, "eval_logps/rejected": -530.0805053710938, "eval_loss": 0.6894600987434387, "eval_rewards/accuracies": 0.6679999828338623, "eval_rewards/chosen": -0.23568420112133026, "eval_rewards/margins": 0.08278439193964005, "eval_rewards/rejected": -0.3184686303138733, "eval_runtime": 711.5188, "eval_samples_per_second": 2.811, "eval_steps_per_second": 1.405, "step": 11800 }, { "epoch": 0.77, "learning_rate": 7.465224296498627e-07, "logits/chosen": -0.7143672704696655, "logits/rejected": -0.5428879261016846, "logps/chosen": -466.81646728515625, "logps/rejected": -510.3091735839844, "loss": 0.6886, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2338574230670929, "rewards/margins": 0.08462175726890564, "rewards/rejected": -0.31847918033599854, "step": 11810 }, { "epoch": 0.77, "learning_rate": 7.424571156515412e-07, "logits/chosen": -0.8459585309028625, "logits/rejected": -0.6181514859199524, "logps/chosen": -418.2208557128906, "logps/rejected": -538.0280151367188, "loss": 0.6913, "rewards/accuracies": 0.625, "rewards/chosen": -0.24051566421985626, "rewards/margins": 0.09713063389062881, "rewards/rejected": -0.3376463055610657, "step": 11820 }, { "epoch": 0.77, "learning_rate": 7.38400969857847e-07, "logits/chosen": -0.5362470149993896, "logits/rejected": -0.6336470246315002, "logps/chosen": -466.18505859375, "logps/rejected": -603.7225952148438, "loss": 0.6845, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.28304263949394226, "rewards/margins": 0.1343853771686554, "rewards/rejected": -0.41742801666259766, "step": 11830 }, { "epoch": 0.77, "learning_rate": 7.343540134276225e-07, "logits/chosen": -0.7161901593208313, "logits/rejected": -0.7293332815170288, "logps/chosen": -376.135009765625, "logps/rejected": -450.649658203125, "loss": 0.6902, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.20542338490486145, "rewards/margins": 0.07261840254068375, "rewards/rejected": -0.2780417799949646, "step": 11840 }, { "epoch": 0.78, "learning_rate": 7.303162674717762e-07, "logits/chosen": -0.172419935464859, "logits/rejected": -0.13953010737895966, "logps/chosen": -492.84796142578125, "logps/rejected": -520.361572265625, "loss": 0.6886, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2826201617717743, "rewards/margins": 0.08334905654191971, "rewards/rejected": -0.3659692406654358, "step": 11850 }, { "epoch": 0.78, "learning_rate": 7.26287753053167e-07, "logits/chosen": -0.7456333637237549, "logits/rejected": -0.5391548275947571, "logps/chosen": -545.03857421875, "logps/rejected": -628.8922119140625, "loss": 0.6885, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2800625264644623, "rewards/margins": 0.085121750831604, "rewards/rejected": -0.3651842474937439, "step": 11860 }, { "epoch": 0.78, "learning_rate": 7.222684911865013e-07, "logits/chosen": -0.6446608304977417, "logits/rejected": -0.5831423401832581, "logps/chosen": -434.08026123046875, "logps/rejected": -561.3636474609375, "loss": 0.6878, "rewards/accuracies": 0.75, "rewards/chosen": -0.22691166400909424, "rewards/margins": 0.10559795051813126, "rewards/rejected": -0.3325095772743225, "step": 11870 }, { "epoch": 0.78, "learning_rate": 7.182585028382166e-07, "logits/chosen": -0.8354743719100952, "logits/rejected": -0.6697049736976624, "logps/chosen": -525.44091796875, "logps/rejected": -582.7346801757812, "loss": 0.6893, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.24928930401802063, "rewards/margins": 0.0897437110543251, "rewards/rejected": -0.3390330374240875, "step": 11880 }, { "epoch": 0.78, "learning_rate": 7.142578089263769e-07, "logits/chosen": -0.9254902601242065, "logits/rejected": -1.04551362991333, "logps/chosen": -551.900634765625, "logps/rejected": -593.8734741210938, "loss": 0.6911, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2334720641374588, "rewards/margins": 0.10077275335788727, "rewards/rejected": -0.33424481749534607, "step": 11890 }, { "epoch": 0.78, "learning_rate": 7.102664303205611e-07, "logits/chosen": -0.6265154480934143, "logits/rejected": -0.9039812088012695, "logps/chosen": -465.548828125, "logps/rejected": -537.3599853515625, "loss": 0.6868, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.24498634040355682, "rewards/margins": 0.08968769758939743, "rewards/rejected": -0.33467406034469604, "step": 11900 }, { "epoch": 0.78, "eval_logits/chosen": -0.7183237671852112, "eval_logits/rejected": -0.6058202981948853, "eval_logps/chosen": -469.5601501464844, "eval_logps/rejected": -536.6124877929688, "eval_loss": 0.6894504427909851, "eval_rewards/accuracies": 0.6660000085830688, "eval_rewards/chosen": -0.23755520582199097, "eval_rewards/margins": 0.08744542300701141, "eval_rewards/rejected": -0.3250006139278412, "eval_runtime": 711.8508, "eval_samples_per_second": 2.81, "eval_steps_per_second": 1.405, "step": 11900 }, { "epoch": 0.78, "learning_rate": 7.062843878417566e-07, "logits/chosen": -1.0695551633834839, "logits/rejected": -0.8493108749389648, "logps/chosen": -417.53173828125, "logps/rejected": -471.24237060546875, "loss": 0.689, "rewards/accuracies": 0.625, "rewards/chosen": -0.19409912824630737, "rewards/margins": 0.08675482124090195, "rewards/rejected": -0.2808539569377899, "step": 11910 }, { "epoch": 0.78, "learning_rate": 7.023117022622458e-07, "logits/chosen": -0.8141118288040161, "logits/rejected": -0.4967958331108093, "logps/chosen": -513.6212768554688, "logps/rejected": -578.6097412109375, "loss": 0.6901, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.27510595321655273, "rewards/margins": 0.0882568210363388, "rewards/rejected": -0.36336278915405273, "step": 11920 }, { "epoch": 0.78, "learning_rate": 6.983483943055042e-07, "logits/chosen": -0.8237019777297974, "logits/rejected": -0.6630524396896362, "logps/chosen": -520.7310791015625, "logps/rejected": -528.470458984375, "loss": 0.6895, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.24026444554328918, "rewards/margins": 0.06451254338026047, "rewards/rejected": -0.3047769367694855, "step": 11930 }, { "epoch": 0.78, "learning_rate": 6.943944846460859e-07, "logits/chosen": -0.4450019896030426, "logits/rejected": -0.535525381565094, "logps/chosen": -430.12127685546875, "logps/rejected": -439.7928771972656, "loss": 0.6925, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.2113669365644455, "rewards/margins": 0.05641376972198486, "rewards/rejected": -0.26778069138526917, "step": 11940 }, { "epoch": 0.78, "learning_rate": 6.904499939095225e-07, "logits/chosen": -0.7984381914138794, "logits/rejected": -0.735599160194397, "logps/chosen": -448.34613037109375, "logps/rejected": -533.6369018554688, "loss": 0.6892, "rewards/accuracies": 0.625, "rewards/chosen": -0.22599899768829346, "rewards/margins": 0.10061899572610855, "rewards/rejected": -0.3266179859638214, "step": 11950 }, { "epoch": 0.78, "learning_rate": 6.865149426722079e-07, "logits/chosen": -0.5745676755905151, "logits/rejected": -0.5700176954269409, "logps/chosen": -530.6217651367188, "logps/rejected": -595.4599609375, "loss": 0.6894, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.25943660736083984, "rewards/margins": 0.09553225338459015, "rewards/rejected": -0.3549688458442688, "step": 11960 }, { "epoch": 0.78, "learning_rate": 6.825893514612985e-07, "logits/chosen": -0.5172964930534363, "logits/rejected": -0.4114875793457031, "logps/chosen": -450.946044921875, "logps/rejected": -549.2027587890625, "loss": 0.6883, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2143792361021042, "rewards/margins": 0.10044028609991074, "rewards/rejected": -0.31481948494911194, "step": 11970 }, { "epoch": 0.78, "learning_rate": 6.786732407546001e-07, "logits/chosen": -0.5917221307754517, "logits/rejected": -0.4425802230834961, "logps/chosen": -415.364501953125, "logps/rejected": -455.5563049316406, "loss": 0.6891, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.2144833356142044, "rewards/margins": 0.07990734279155731, "rewards/rejected": -0.29439064860343933, "step": 11980 }, { "epoch": 0.78, "learning_rate": 6.747666309804654e-07, "logits/chosen": -1.1100962162017822, "logits/rejected": -0.7921835780143738, "logps/chosen": -522.1273193359375, "logps/rejected": -514.0115966796875, "loss": 0.6903, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.234450101852417, "rewards/margins": 0.07141976058483124, "rewards/rejected": -0.30586984753608704, "step": 11990 }, { "epoch": 0.79, "learning_rate": 6.708695425176831e-07, "logits/chosen": -0.783919095993042, "logits/rejected": -0.4991689622402191, "logps/chosen": -423.63970947265625, "logps/rejected": -534.7395629882812, "loss": 0.6884, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2546009421348572, "rewards/margins": 0.09034449607133865, "rewards/rejected": -0.3449454605579376, "step": 12000 }, { "epoch": 0.79, "eval_logits/chosen": -0.8023983836174011, "eval_logits/rejected": -0.6853892803192139, "eval_logps/chosen": -454.8382568359375, "eval_logps/rejected": -520.2646484375, "eval_loss": 0.6894471049308777, "eval_rewards/accuracies": 0.6669999957084656, "eval_rewards/chosen": -0.22283333539962769, "eval_rewards/margins": 0.08581943064928055, "eval_rewards/rejected": -0.30865269899368286, "eval_runtime": 712.3537, "eval_samples_per_second": 2.808, "eval_steps_per_second": 1.404, "step": 12000 }, { "epoch": 0.79, "learning_rate": 6.669819956953768e-07, "logits/chosen": -0.5224130749702454, "logits/rejected": -0.6283994913101196, "logps/chosen": -375.14593505859375, "logps/rejected": -472.1822814941406, "loss": 0.6892, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.20481471717357635, "rewards/margins": 0.08849630504846573, "rewards/rejected": -0.2933110296726227, "step": 12010 }, { "epoch": 0.79, "learning_rate": 6.631040107928957e-07, "logits/chosen": -1.2082918882369995, "logits/rejected": -0.6000986099243164, "logps/chosen": -500.62945556640625, "logps/rejected": -478.07373046875, "loss": 0.6911, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.23244516551494598, "rewards/margins": 0.0737428143620491, "rewards/rejected": -0.3061879575252533, "step": 12020 }, { "epoch": 0.79, "learning_rate": 6.592356080397072e-07, "logits/chosen": -0.8976734280586243, "logits/rejected": -0.6858707666397095, "logps/chosen": -438.0572204589844, "logps/rejected": -480.2071838378906, "loss": 0.6894, "rewards/accuracies": 0.625, "rewards/chosen": -0.20976808667182922, "rewards/margins": 0.09861332178115845, "rewards/rejected": -0.30838140845298767, "step": 12030 }, { "epoch": 0.79, "learning_rate": 6.553768076152963e-07, "logits/chosen": -0.5755825042724609, "logits/rejected": -0.7588081955909729, "logps/chosen": -362.34368896484375, "logps/rejected": -512.9739379882812, "loss": 0.6883, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.20424611866474152, "rewards/margins": 0.12352615594863892, "rewards/rejected": -0.3277722895145416, "step": 12040 }, { "epoch": 0.79, "learning_rate": 6.51527629649055e-07, "logits/chosen": -1.09049391746521, "logits/rejected": -0.8895280957221985, "logps/chosen": -486.25701904296875, "logps/rejected": -511.87615966796875, "loss": 0.6908, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.23267392814159393, "rewards/margins": 0.054766200482845306, "rewards/rejected": -0.28744015097618103, "step": 12050 }, { "epoch": 0.79, "learning_rate": 6.476880942201824e-07, "logits/chosen": -1.276995301246643, "logits/rejected": -0.8726984262466431, "logps/chosen": -411.46307373046875, "logps/rejected": -439.18359375, "loss": 0.688, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.17012545466423035, "rewards/margins": 0.0863446518778801, "rewards/rejected": -0.25647011399269104, "step": 12060 }, { "epoch": 0.79, "learning_rate": 6.438582213575748e-07, "logits/chosen": -0.8580164909362793, "logits/rejected": -0.9575880765914917, "logps/chosen": -431.1397399902344, "logps/rejected": -528.38427734375, "loss": 0.6904, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.20122317969799042, "rewards/margins": 0.08609914034605026, "rewards/rejected": -0.28732234239578247, "step": 12070 }, { "epoch": 0.79, "learning_rate": 6.400380310397267e-07, "logits/chosen": -0.8642458915710449, "logits/rejected": -0.5412619709968567, "logps/chosen": -440.3069763183594, "logps/rejected": -521.5443725585938, "loss": 0.6919, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.20819664001464844, "rewards/margins": 0.049825601279735565, "rewards/rejected": -0.2580222487449646, "step": 12080 }, { "epoch": 0.79, "learning_rate": 6.362275431946202e-07, "logits/chosen": -0.6833379864692688, "logits/rejected": -0.6843885183334351, "logps/chosen": -441.56927490234375, "logps/rejected": -506.1634826660156, "loss": 0.6899, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.205215722322464, "rewards/margins": 0.058545976877212524, "rewards/rejected": -0.2637616991996765, "step": 12090 }, { "epoch": 0.79, "learning_rate": 6.324267776996285e-07, "logits/chosen": -1.1212000846862793, "logits/rejected": -0.6497322916984558, "logps/chosen": -623.5897216796875, "logps/rejected": -622.3527221679688, "loss": 0.6878, "rewards/accuracies": 0.75, "rewards/chosen": -0.24901065230369568, "rewards/margins": 0.12074349075555801, "rewards/rejected": -0.3697541356086731, "step": 12100 }, { "epoch": 0.79, "eval_logits/chosen": -0.8305608034133911, "eval_logits/rejected": -0.7124419808387756, "eval_logps/chosen": -446.484130859375, "eval_logps/rejected": -511.0565185546875, "eval_loss": 0.6894470453262329, "eval_rewards/accuracies": 0.6654999852180481, "eval_rewards/chosen": -0.21447916328907013, "eval_rewards/margins": 0.08496550470590591, "eval_rewards/rejected": -0.29944467544555664, "eval_runtime": 710.5726, "eval_samples_per_second": 2.815, "eval_steps_per_second": 1.407, "step": 12100 }, { "epoch": 0.79, "learning_rate": 6.286357543814045e-07, "logits/chosen": -0.7228476405143738, "logits/rejected": -0.848524272441864, "logps/chosen": -406.7255859375, "logps/rejected": -588.52490234375, "loss": 0.6865, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.21319575607776642, "rewards/margins": 0.11378808319568634, "rewards/rejected": -0.32698389887809753, "step": 12110 }, { "epoch": 0.79, "learning_rate": 6.248544930157838e-07, "logits/chosen": -0.9068900346755981, "logits/rejected": -0.7620546817779541, "logps/chosen": -416.1766662597656, "logps/rejected": -525.029541015625, "loss": 0.6867, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.23209106922149658, "rewards/margins": 0.1266333907842636, "rewards/rejected": -0.358724445104599, "step": 12120 }, { "epoch": 0.79, "learning_rate": 6.21083013327678e-07, "logits/chosen": -0.777552604675293, "logits/rejected": -0.7302781343460083, "logps/chosen": -505.84814453125, "logps/rejected": -513.743896484375, "loss": 0.6897, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.20160739123821259, "rewards/margins": 0.06382157653570175, "rewards/rejected": -0.26542896032333374, "step": 12130 }, { "epoch": 0.79, "learning_rate": 6.17321334990973e-07, "logits/chosen": -0.9642006754875183, "logits/rejected": -0.6679797172546387, "logps/chosen": -407.752685546875, "logps/rejected": -446.0419006347656, "loss": 0.6911, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.2069481909275055, "rewards/margins": 0.07352651655673981, "rewards/rejected": -0.2804746627807617, "step": 12140 }, { "epoch": 0.79, "learning_rate": 6.135694776284243e-07, "logits/chosen": -1.1946967840194702, "logits/rejected": -0.7229410409927368, "logps/chosen": -462.3304138183594, "logps/rejected": -509.57952880859375, "loss": 0.6892, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.19153942167758942, "rewards/margins": 0.11202134191989899, "rewards/rejected": -0.3035607635974884, "step": 12150 }, { "epoch": 0.8, "learning_rate": 6.098274608115595e-07, "logits/chosen": -0.9830659031867981, "logits/rejected": -0.5625541806221008, "logps/chosen": -399.18585205078125, "logps/rejected": -403.7681884765625, "loss": 0.6932, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.19353525340557098, "rewards/margins": 0.03321469947695732, "rewards/rejected": -0.2267499417066574, "step": 12160 }, { "epoch": 0.8, "learning_rate": 6.060953040605697e-07, "logits/chosen": -1.0277113914489746, "logits/rejected": -0.6402236223220825, "logps/chosen": -523.7098388671875, "logps/rejected": -552.3817138671875, "loss": 0.6907, "rewards/accuracies": 0.75, "rewards/chosen": -0.18458017706871033, "rewards/margins": 0.09856677800416946, "rewards/rejected": -0.2831469774246216, "step": 12170 }, { "epoch": 0.8, "learning_rate": 6.023730268442144e-07, "logits/chosen": -0.8050621151924133, "logits/rejected": -0.6555970311164856, "logps/chosen": -415.20281982421875, "logps/rejected": -501.6064453125, "loss": 0.6869, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.21038201451301575, "rewards/margins": 0.11484841257333755, "rewards/rejected": -0.3252304494380951, "step": 12180 }, { "epoch": 0.8, "learning_rate": 5.986606485797131e-07, "logits/chosen": -0.7921792268753052, "logits/rejected": -0.92046058177948, "logps/chosen": -378.4645690917969, "logps/rejected": -452.6407165527344, "loss": 0.6891, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.17857401072978973, "rewards/margins": 0.06911009550094604, "rewards/rejected": -0.24768409132957458, "step": 12190 }, { "epoch": 0.8, "learning_rate": 5.949581886326511e-07, "logits/chosen": -0.7513020038604736, "logits/rejected": -0.9230319857597351, "logps/chosen": -474.7445373535156, "logps/rejected": -501.08935546875, "loss": 0.6903, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.17695248126983643, "rewards/margins": 0.05525914952158928, "rewards/rejected": -0.2322116196155548, "step": 12200 }, { "epoch": 0.8, "eval_logits/chosen": -0.9147159457206726, "eval_logits/rejected": -0.7926742434501648, "eval_logps/chosen": -427.7273864746094, "eval_logps/rejected": -490.2318115234375, "eval_loss": 0.6894443035125732, "eval_rewards/accuracies": 0.6675000190734863, "eval_rewards/chosen": -0.19572244584560394, "eval_rewards/margins": 0.08289749175310135, "eval_rewards/rejected": -0.2786199450492859, "eval_runtime": 712.1709, "eval_samples_per_second": 2.808, "eval_steps_per_second": 1.404, "step": 12200 }, { "epoch": 0.8, "learning_rate": 5.912656663168717e-07, "logits/chosen": -1.0834193229675293, "logits/rejected": -1.0200486183166504, "logps/chosen": -409.322998046875, "logps/rejected": -465.9730529785156, "loss": 0.6907, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.18531367182731628, "rewards/margins": 0.06302249431610107, "rewards/rejected": -0.24833616614341736, "step": 12210 }, { "epoch": 0.8, "learning_rate": 5.875831008943817e-07, "logits/chosen": -0.7899206876754761, "logits/rejected": -0.7594529390335083, "logps/chosen": -378.1102600097656, "logps/rejected": -410.220703125, "loss": 0.691, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.20046624541282654, "rewards/margins": 0.054710645228624344, "rewards/rejected": -0.2551768720149994, "step": 12220 }, { "epoch": 0.8, "learning_rate": 5.839105115752442e-07, "logits/chosen": -0.7225872278213501, "logits/rejected": -0.6378189325332642, "logps/chosen": -462.96588134765625, "logps/rejected": -512.4617309570312, "loss": 0.6881, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.23998837172985077, "rewards/margins": 0.08950354158878326, "rewards/rejected": -0.32949191331863403, "step": 12230 }, { "epoch": 0.8, "learning_rate": 5.802479175174855e-07, "logits/chosen": -0.7741121053695679, "logits/rejected": -0.794974684715271, "logps/chosen": -390.6680603027344, "logps/rejected": -477.42755126953125, "loss": 0.6891, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.224673792719841, "rewards/margins": 0.08083239942789078, "rewards/rejected": -0.30550616979599, "step": 12240 }, { "epoch": 0.8, "learning_rate": 5.765953378269901e-07, "logits/chosen": -0.9988287687301636, "logits/rejected": -1.0308479070663452, "logps/chosen": -427.112548828125, "logps/rejected": -573.0469970703125, "loss": 0.6864, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2201191484928131, "rewards/margins": 0.11489073187112808, "rewards/rejected": -0.3350098729133606, "step": 12250 }, { "epoch": 0.8, "learning_rate": 5.729527915574037e-07, "logits/chosen": -0.7769566178321838, "logits/rejected": -0.8973855972290039, "logps/chosen": -426.7218322753906, "logps/rejected": -519.5902099609375, "loss": 0.6898, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.20630326867103577, "rewards/margins": 0.09252933412790298, "rewards/rejected": -0.29883262515068054, "step": 12260 }, { "epoch": 0.8, "learning_rate": 5.693202977100304e-07, "logits/chosen": -0.7344051003456116, "logits/rejected": -0.650239884853363, "logps/chosen": -386.86102294921875, "logps/rejected": -460.61614990234375, "loss": 0.6898, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.21671593189239502, "rewards/margins": 0.07709144055843353, "rewards/rejected": -0.29380735754966736, "step": 12270 }, { "epoch": 0.8, "learning_rate": 5.656978752337389e-07, "logits/chosen": -0.9088813662528992, "logits/rejected": -0.9434731602668762, "logps/chosen": -446.60223388671875, "logps/rejected": -563.0369262695312, "loss": 0.6872, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.24694709479808807, "rewards/margins": 0.11503490060567856, "rewards/rejected": -0.36198195815086365, "step": 12280 }, { "epoch": 0.8, "learning_rate": 5.620855430248581e-07, "logits/chosen": -0.6687235236167908, "logits/rejected": -0.7715210914611816, "logps/chosen": -318.0511474609375, "logps/rejected": -428.16424560546875, "loss": 0.6892, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.15605607628822327, "rewards/margins": 0.11229152977466583, "rewards/rejected": -0.2683475911617279, "step": 12290 }, { "epoch": 0.8, "learning_rate": 5.584833199270837e-07, "logits/chosen": -1.0661684274673462, "logits/rejected": -0.7041251063346863, "logps/chosen": -458.58795166015625, "logps/rejected": -540.2942504882812, "loss": 0.6914, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.22995570302009583, "rewards/margins": 0.08939947187900543, "rewards/rejected": -0.31935515999794006, "step": 12300 }, { "epoch": 0.8, "eval_logits/chosen": -0.8736314177513123, "eval_logits/rejected": -0.7532578110694885, "eval_logps/chosen": -441.02703857421875, "eval_logps/rejected": -506.0263671875, "eval_loss": 0.6894257068634033, "eval_rewards/accuracies": 0.6654999852180481, "eval_rewards/chosen": -0.20902210474014282, "eval_rewards/margins": 0.08539240807294846, "eval_rewards/rejected": -0.2944145202636719, "eval_runtime": 713.5476, "eval_samples_per_second": 2.803, "eval_steps_per_second": 1.401, "step": 12300 }, { "epoch": 0.81, "learning_rate": 5.548912247313742e-07, "logits/chosen": -1.3093442916870117, "logits/rejected": -1.0217863321304321, "logps/chosen": -513.5242919921875, "logps/rejected": -541.9788818359375, "loss": 0.6907, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.22972431778907776, "rewards/margins": 0.07603023946285248, "rewards/rejected": -0.30575451254844666, "step": 12310 }, { "epoch": 0.81, "learning_rate": 5.513092761758596e-07, "logits/chosen": -1.2074533700942993, "logits/rejected": -0.9448097348213196, "logps/chosen": -509.68133544921875, "logps/rejected": -491.7139587402344, "loss": 0.692, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.23847146332263947, "rewards/margins": 0.049013566225767136, "rewards/rejected": -0.2874850332736969, "step": 12320 }, { "epoch": 0.81, "learning_rate": 5.477374929457363e-07, "logits/chosen": -1.1951650381088257, "logits/rejected": -0.9347305297851562, "logps/chosen": -422.927490234375, "logps/rejected": -459.3108825683594, "loss": 0.6918, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2208663523197174, "rewards/margins": 0.05555524677038193, "rewards/rejected": -0.27642157673835754, "step": 12330 }, { "epoch": 0.81, "learning_rate": 5.441758936731772e-07, "logits/chosen": -0.9595106840133667, "logits/rejected": -0.6512321829795837, "logps/chosen": -437.71051025390625, "logps/rejected": -497.3162536621094, "loss": 0.6902, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.20236524939537048, "rewards/margins": 0.07739603519439697, "rewards/rejected": -0.27976128458976746, "step": 12340 }, { "epoch": 0.81, "learning_rate": 5.406244969372273e-07, "logits/chosen": -0.9617953300476074, "logits/rejected": -0.7921175956726074, "logps/chosen": -404.3094482421875, "logps/rejected": -547.6414794921875, "loss": 0.6864, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.20458662509918213, "rewards/margins": 0.13683012127876282, "rewards/rejected": -0.34141674637794495, "step": 12350 }, { "epoch": 0.81, "learning_rate": 5.370833212637122e-07, "logits/chosen": -0.6919295191764832, "logits/rejected": -0.6015470623970032, "logps/chosen": -408.9327697753906, "logps/rejected": -477.7970275878906, "loss": 0.6905, "rewards/accuracies": 0.75, "rewards/chosen": -0.19042177498340607, "rewards/margins": 0.08264943957328796, "rewards/rejected": -0.2730712294578552, "step": 12360 }, { "epoch": 0.81, "learning_rate": 5.335523851251392e-07, "logits/chosen": -1.001318097114563, "logits/rejected": -0.8748119473457336, "logps/chosen": -415.00048828125, "logps/rejected": -490.12945556640625, "loss": 0.6873, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.20886659622192383, "rewards/margins": 0.0959271788597107, "rewards/rejected": -0.3047937750816345, "step": 12370 }, { "epoch": 0.81, "learning_rate": 5.300317069406003e-07, "logits/chosen": -1.0027813911437988, "logits/rejected": -0.7950852513313293, "logps/chosen": -341.1397399902344, "logps/rejected": -453.4449768066406, "loss": 0.6865, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.18039390444755554, "rewards/margins": 0.1085384264588356, "rewards/rejected": -0.28893235325813293, "step": 12380 }, { "epoch": 0.81, "learning_rate": 5.265213050756782e-07, "logits/chosen": -1.218680500984192, "logits/rejected": -0.9439771771430969, "logps/chosen": -411.95489501953125, "logps/rejected": -501.7945861816406, "loss": 0.6889, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.17990362644195557, "rewards/margins": 0.09197081625461578, "rewards/rejected": -0.27187445759773254, "step": 12390 }, { "epoch": 0.81, "learning_rate": 5.230211978423477e-07, "logits/chosen": -1.046304702758789, "logits/rejected": -0.9308651089668274, "logps/chosen": -416.91558837890625, "logps/rejected": -460.021484375, "loss": 0.6923, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.19811533391475677, "rewards/margins": 0.05827472731471062, "rewards/rejected": -0.2563900351524353, "step": 12400 }, { "epoch": 0.81, "eval_logits/chosen": -0.9178078770637512, "eval_logits/rejected": -0.7956603169441223, "eval_logps/chosen": -427.3274230957031, "eval_logps/rejected": -489.2209777832031, "eval_loss": 0.6894317865371704, "eval_rewards/accuracies": 0.6700000166893005, "eval_rewards/chosen": -0.1953224390745163, "eval_rewards/margins": 0.08228664100170135, "eval_rewards/rejected": -0.27760908007621765, "eval_runtime": 710.5715, "eval_samples_per_second": 2.815, "eval_steps_per_second": 1.407, "step": 12400 }, { "epoch": 0.81, "learning_rate": 5.195314034988835e-07, "logits/chosen": -1.2224987745285034, "logits/rejected": -1.0301649570465088, "logps/chosen": -390.1413269042969, "logps/rejected": -409.8642578125, "loss": 0.6894, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.17085427045822144, "rewards/margins": 0.08676211535930634, "rewards/rejected": -0.2576163709163666, "step": 12410 }, { "epoch": 0.81, "learning_rate": 5.160519402497616e-07, "logits/chosen": -0.9702883958816528, "logits/rejected": -0.8835731744766235, "logps/chosen": -442.39300537109375, "logps/rejected": -527.3919677734375, "loss": 0.6875, "rewards/accuracies": 0.625, "rewards/chosen": -0.21233896911144257, "rewards/margins": 0.08905895054340363, "rewards/rejected": -0.3013979196548462, "step": 12420 }, { "epoch": 0.81, "learning_rate": 5.125828262455679e-07, "logits/chosen": -0.9499308466911316, "logits/rejected": -0.7601709961891174, "logps/chosen": -452.78863525390625, "logps/rejected": -510.4447326660156, "loss": 0.6884, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.19707557559013367, "rewards/margins": 0.08799558877944946, "rewards/rejected": -0.28507116436958313, "step": 12430 }, { "epoch": 0.81, "learning_rate": 5.091240795828992e-07, "logits/chosen": -0.7149444818496704, "logits/rejected": -0.6284686923027039, "logps/chosen": -386.01531982421875, "logps/rejected": -501.17694091796875, "loss": 0.6904, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.18352492153644562, "rewards/margins": 0.09533097594976425, "rewards/rejected": -0.2788558602333069, "step": 12440 }, { "epoch": 0.81, "learning_rate": 5.056757183042732e-07, "logits/chosen": -0.9699877500534058, "logits/rejected": -0.785503089427948, "logps/chosen": -440.580810546875, "logps/rejected": -510.8658752441406, "loss": 0.6892, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.20702461898326874, "rewards/margins": 0.0901625007390976, "rewards/rejected": -0.2971871495246887, "step": 12450 }, { "epoch": 0.82, "learning_rate": 5.022377603980308e-07, "logits/chosen": -1.19623601436615, "logits/rejected": -0.7461040019989014, "logps/chosen": -453.72381591796875, "logps/rejected": -481.7816467285156, "loss": 0.6883, "rewards/accuracies": 0.75, "rewards/chosen": -0.2044392079114914, "rewards/margins": 0.09452911466360092, "rewards/rejected": -0.29896828532218933, "step": 12460 }, { "epoch": 0.82, "learning_rate": 4.988102237982454e-07, "logits/chosen": -1.0094448328018188, "logits/rejected": -0.7690650224685669, "logps/chosen": -428.43182373046875, "logps/rejected": -440.9228515625, "loss": 0.6922, "rewards/accuracies": 0.625, "rewards/chosen": -0.2062513530254364, "rewards/margins": 0.053066302090883255, "rewards/rejected": -0.25931766629219055, "step": 12470 }, { "epoch": 0.82, "learning_rate": 4.953931263846251e-07, "logits/chosen": -0.9864797592163086, "logits/rejected": -0.7949808239936829, "logps/chosen": -471.72576904296875, "logps/rejected": -527.9396362304688, "loss": 0.6891, "rewards/accuracies": 0.75, "rewards/chosen": -0.2100168913602829, "rewards/margins": 0.10017456114292145, "rewards/rejected": -0.31019148230552673, "step": 12480 }, { "epoch": 0.82, "learning_rate": 4.919864859824266e-07, "logits/chosen": -0.8903275728225708, "logits/rejected": -0.9078360795974731, "logps/chosen": -443.26885986328125, "logps/rejected": -481.3370666503906, "loss": 0.6905, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.20583471655845642, "rewards/margins": 0.08310334384441376, "rewards/rejected": -0.288938045501709, "step": 12490 }, { "epoch": 0.82, "learning_rate": 4.885903203623532e-07, "logits/chosen": -1.361114263534546, "logits/rejected": -0.7789111137390137, "logps/chosen": -457.87957763671875, "logps/rejected": -490.6852111816406, "loss": 0.6892, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.17286573350429535, "rewards/margins": 0.09657898545265198, "rewards/rejected": -0.26944470405578613, "step": 12500 }, { "epoch": 0.82, "eval_logits/chosen": -0.9396504163742065, "eval_logits/rejected": -0.8163852691650391, "eval_logps/chosen": -422.764892578125, "eval_logps/rejected": -484.4884338378906, "eval_loss": 0.6894283294677734, "eval_rewards/accuracies": 0.6664999723434448, "eval_rewards/chosen": -0.19075995683670044, "eval_rewards/margins": 0.08211655169725418, "eval_rewards/rejected": -0.2728765308856964, "eval_runtime": 711.9412, "eval_samples_per_second": 2.809, "eval_steps_per_second": 1.405, "step": 12500 }, { "epoch": 0.82, "learning_rate": 4.852046472404695e-07, "logits/chosen": -0.9885215759277344, "logits/rejected": -0.5774581432342529, "logps/chosen": -446.4098205566406, "logps/rejected": -402.09356689453125, "loss": 0.6899, "rewards/accuracies": 0.625, "rewards/chosen": -0.1485210806131363, "rewards/margins": 0.08135717362165451, "rewards/rejected": -0.2298782616853714, "step": 12510 }, { "epoch": 0.82, "learning_rate": 4.818294842781035e-07, "logits/chosen": -1.1667752265930176, "logits/rejected": -0.8311818242073059, "logps/chosen": -395.07470703125, "logps/rejected": -465.4454650878906, "loss": 0.6888, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.16886630654335022, "rewards/margins": 0.12272864580154419, "rewards/rejected": -0.291594922542572, "step": 12520 }, { "epoch": 0.82, "learning_rate": 4.784648490817601e-07, "logits/chosen": -1.015737771987915, "logits/rejected": -0.901741623878479, "logps/chosen": -391.2193298339844, "logps/rejected": -415.372802734375, "loss": 0.6903, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.1711125373840332, "rewards/margins": 0.06741202622652054, "rewards/rejected": -0.23852458596229553, "step": 12530 }, { "epoch": 0.82, "learning_rate": 4.751107592030235e-07, "logits/chosen": -0.9657641649246216, "logits/rejected": -0.6886070966720581, "logps/chosen": -327.6506652832031, "logps/rejected": -425.15374755859375, "loss": 0.6893, "rewards/accuracies": 0.625, "rewards/chosen": -0.1577988713979721, "rewards/margins": 0.11460302025079727, "rewards/rejected": -0.27240189909935, "step": 12540 }, { "epoch": 0.82, "learning_rate": 4.717672321384703e-07, "logits/chosen": -0.9216817617416382, "logits/rejected": -0.42492127418518066, "logps/chosen": -415.05120849609375, "logps/rejected": -458.6930236816406, "loss": 0.6886, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.19180552661418915, "rewards/margins": 0.08583752810955048, "rewards/rejected": -0.277643084526062, "step": 12550 }, { "epoch": 0.82, "learning_rate": 4.684342853295748e-07, "logits/chosen": -0.7486587166786194, "logits/rejected": -0.7542505860328674, "logps/chosen": -368.861328125, "logps/rejected": -465.25213623046875, "loss": 0.688, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.18116456270217896, "rewards/margins": 0.10031484067440033, "rewards/rejected": -0.2814793884754181, "step": 12560 }, { "epoch": 0.82, "learning_rate": 4.651119361626213e-07, "logits/chosen": -1.3638161420822144, "logits/rejected": -0.8579947352409363, "logps/chosen": -395.2978515625, "logps/rejected": -425.96185302734375, "loss": 0.69, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.15417927503585815, "rewards/margins": 0.07689286768436432, "rewards/rejected": -0.23107214272022247, "step": 12570 }, { "epoch": 0.82, "learning_rate": 4.618002019686091e-07, "logits/chosen": -0.983599841594696, "logits/rejected": -1.0167783498764038, "logps/chosen": -462.54071044921875, "logps/rejected": -489.15533447265625, "loss": 0.6889, "rewards/accuracies": 0.625, "rewards/chosen": -0.18332836031913757, "rewards/margins": 0.0832507386803627, "rewards/rejected": -0.26657912135124207, "step": 12580 }, { "epoch": 0.82, "learning_rate": 4.5849910002316757e-07, "logits/chosen": -0.9545795321464539, "logits/rejected": -0.8534967303276062, "logps/chosen": -411.9749450683594, "logps/rejected": -470.1524353027344, "loss": 0.6911, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.231462761759758, "rewards/margins": 0.09144178777933121, "rewards/rejected": -0.3229045569896698, "step": 12590 }, { "epoch": 0.82, "learning_rate": 4.5520864754645984e-07, "logits/chosen": -1.293601155281067, "logits/rejected": -1.0598740577697754, "logps/chosen": -457.161376953125, "logps/rejected": -484.68731689453125, "loss": 0.6898, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.17581158876419067, "rewards/margins": 0.07135093212127686, "rewards/rejected": -0.24716253578662872, "step": 12600 }, { "epoch": 0.82, "eval_logits/chosen": -0.9114385843276978, "eval_logits/rejected": -0.7893572449684143, "eval_logps/chosen": -430.8013916015625, "eval_logps/rejected": -493.8382568359375, "eval_loss": 0.6894257664680481, "eval_rewards/accuracies": 0.6664999723434448, "eval_rewards/chosen": -0.19879642128944397, "eval_rewards/margins": 0.08342995494604111, "eval_rewards/rejected": -0.2822263836860657, "eval_runtime": 711.5076, "eval_samples_per_second": 2.811, "eval_steps_per_second": 1.405, "step": 12600 }, { "epoch": 0.83, "learning_rate": 4.5192886170309896e-07, "logits/chosen": -0.8391144871711731, "logits/rejected": -0.8102821111679077, "logps/chosen": -396.1620178222656, "logps/rejected": -444.4454040527344, "loss": 0.6914, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.20539355278015137, "rewards/margins": 0.04809931665658951, "rewards/rejected": -0.25349289178848267, "step": 12610 }, { "epoch": 0.83, "learning_rate": 4.486597596020548e-07, "logits/chosen": -0.7510364651679993, "logits/rejected": -0.8711469769477844, "logps/chosen": -448.0186462402344, "logps/rejected": -485.5732421875, "loss": 0.6891, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.22843733429908752, "rewards/margins": 0.07587635517120361, "rewards/rejected": -0.3043137192726135, "step": 12620 }, { "epoch": 0.83, "learning_rate": 4.454013582965644e-07, "logits/chosen": -0.673041820526123, "logits/rejected": -0.5068280100822449, "logps/chosen": -488.75177001953125, "logps/rejected": -497.9751892089844, "loss": 0.6914, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2252950668334961, "rewards/margins": 0.05966676399111748, "rewards/rejected": -0.2849618196487427, "step": 12630 }, { "epoch": 0.83, "learning_rate": 4.4215367478404605e-07, "logits/chosen": -0.7873706817626953, "logits/rejected": -0.7081938982009888, "logps/chosen": -487.13531494140625, "logps/rejected": -583.9402465820312, "loss": 0.6903, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2163754403591156, "rewards/margins": 0.07243311405181885, "rewards/rejected": -0.28880855441093445, "step": 12640 }, { "epoch": 0.83, "learning_rate": 4.389167260060068e-07, "logits/chosen": -0.8580641746520996, "logits/rejected": -0.8711032867431641, "logps/chosen": -384.7228088378906, "logps/rejected": -469.16259765625, "loss": 0.6875, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.17603252828121185, "rewards/margins": 0.12261439859867096, "rewards/rejected": -0.2986469268798828, "step": 12650 }, { "epoch": 0.83, "learning_rate": 4.356905288479579e-07, "logits/chosen": -0.8820209503173828, "logits/rejected": -0.6234656572341919, "logps/chosen": -429.2950134277344, "logps/rejected": -532.017333984375, "loss": 0.6855, "rewards/accuracies": 0.75, "rewards/chosen": -0.20277424156665802, "rewards/margins": 0.13148626685142517, "rewards/rejected": -0.334260493516922, "step": 12660 }, { "epoch": 0.83, "learning_rate": 4.3247510013932377e-07, "logits/chosen": -0.5720213651657104, "logits/rejected": -0.6938979029655457, "logps/chosen": -463.2184143066406, "logps/rejected": -558.4837036132812, "loss": 0.6897, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.21102993190288544, "rewards/margins": 0.09544537961483002, "rewards/rejected": -0.30647531151771545, "step": 12670 }, { "epoch": 0.83, "learning_rate": 4.2927045665335594e-07, "logits/chosen": -0.3011423647403717, "logits/rejected": -0.4939189851284027, "logps/chosen": -386.75531005859375, "logps/rejected": -474.6163635253906, "loss": 0.6875, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.22187760472297668, "rewards/margins": 0.09141628444194794, "rewards/rejected": -0.3132938742637634, "step": 12680 }, { "epoch": 0.83, "learning_rate": 4.260766151070439e-07, "logits/chosen": -0.6524245142936707, "logits/rejected": -0.6261091232299805, "logps/chosen": -446.9039611816406, "logps/rejected": -528.741943359375, "loss": 0.6902, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2224704921245575, "rewards/margins": 0.09309863299131393, "rewards/rejected": -0.315569132566452, "step": 12690 }, { "epoch": 0.83, "learning_rate": 4.228935921610308e-07, "logits/chosen": -1.0410144329071045, "logits/rejected": -0.8041863441467285, "logps/chosen": -446.28924560546875, "logps/rejected": -444.7435607910156, "loss": 0.6903, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.18257063627243042, "rewards/margins": 0.06250576674938202, "rewards/rejected": -0.24507638812065125, "step": 12700 }, { "epoch": 0.83, "eval_logits/chosen": -0.9304318428039551, "eval_logits/rejected": -0.8066224455833435, "eval_logps/chosen": -433.35626220703125, "eval_logps/rejected": -498.3890380859375, "eval_loss": 0.6894228458404541, "eval_rewards/accuracies": 0.6664999723434448, "eval_rewards/chosen": -0.2013513296842575, "eval_rewards/margins": 0.08542577177286148, "eval_rewards/rejected": -0.286777138710022, "eval_runtime": 713.6499, "eval_samples_per_second": 2.802, "eval_steps_per_second": 1.401, "step": 12700 }, { "epoch": 0.83, "learning_rate": 4.1972140441952246e-07, "logits/chosen": -0.8487979173660278, "logits/rejected": -0.9116487503051758, "logps/chosen": -430.19549560546875, "logps/rejected": -499.740966796875, "loss": 0.6896, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.19061222672462463, "rewards/margins": 0.06753625720739365, "rewards/rejected": -0.2581484913825989, "step": 12710 }, { "epoch": 0.83, "learning_rate": 4.165600684302046e-07, "logits/chosen": -1.0365651845932007, "logits/rejected": -0.9350225329399109, "logps/chosen": -358.640625, "logps/rejected": -459.84130859375, "loss": 0.6895, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.18285666406154633, "rewards/margins": 0.09124276787042618, "rewards/rejected": -0.2740994095802307, "step": 12720 }, { "epoch": 0.83, "learning_rate": 4.13409600684154e-07, "logits/chosen": -1.0195610523223877, "logits/rejected": -0.8225051760673523, "logps/chosen": -417.7220153808594, "logps/rejected": -484.96685791015625, "loss": 0.6882, "rewards/accuracies": 0.625, "rewards/chosen": -0.2027023732662201, "rewards/margins": 0.09642815589904785, "rewards/rejected": -0.29913052916526794, "step": 12730 }, { "epoch": 0.83, "learning_rate": 4.102700176157548e-07, "logits/chosen": -1.102210521697998, "logits/rejected": -0.8111448287963867, "logps/chosen": -552.992431640625, "logps/rejected": -525.7896728515625, "loss": 0.6899, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.22823718190193176, "rewards/margins": 0.07058385014533997, "rewards/rejected": -0.29882103204727173, "step": 12740 }, { "epoch": 0.83, "learning_rate": 4.0714133560260884e-07, "logits/chosen": -1.0088520050048828, "logits/rejected": -1.0012805461883545, "logps/chosen": -464.440185546875, "logps/rejected": -468.52886962890625, "loss": 0.6917, "rewards/accuracies": 0.625, "rewards/chosen": -0.20490708947181702, "rewards/margins": 0.062319546937942505, "rewards/rejected": -0.2672266364097595, "step": 12750 }, { "epoch": 0.83, "learning_rate": 4.0402357096545527e-07, "logits/chosen": -0.741023063659668, "logits/rejected": -0.9252802133560181, "logps/chosen": -452.5013122558594, "logps/rejected": -524.4758911132812, "loss": 0.6897, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.20073971152305603, "rewards/margins": 0.08214665949344635, "rewards/rejected": -0.2828863859176636, "step": 12760 }, { "epoch": 0.84, "learning_rate": 4.0091673996808025e-07, "logits/chosen": -1.2111819982528687, "logits/rejected": -1.0127800703048706, "logps/chosen": -428.5201110839844, "logps/rejected": -490.87725830078125, "loss": 0.6887, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.23146001994609833, "rewards/margins": 0.0869842916727066, "rewards/rejected": -0.31844431161880493, "step": 12770 }, { "epoch": 0.84, "learning_rate": 3.9782085881723776e-07, "logits/chosen": -0.9244493246078491, "logits/rejected": -0.8639167547225952, "logps/chosen": -360.8436584472656, "logps/rejected": -485.7451171875, "loss": 0.6883, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.19524605572223663, "rewards/margins": 0.11293482780456543, "rewards/rejected": -0.30818089842796326, "step": 12780 }, { "epoch": 0.84, "learning_rate": 3.947359436625592e-07, "logits/chosen": -0.9303233027458191, "logits/rejected": -0.6964131593704224, "logps/chosen": -436.606201171875, "logps/rejected": -506.09381103515625, "loss": 0.6888, "rewards/accuracies": 0.75, "rewards/chosen": -0.20194324851036072, "rewards/margins": 0.10705173015594482, "rewards/rejected": -0.30899497866630554, "step": 12790 }, { "epoch": 0.84, "learning_rate": 3.9166201059647386e-07, "logits/chosen": -0.9158743619918823, "logits/rejected": -1.0071403980255127, "logps/chosen": -471.6205139160156, "logps/rejected": -480.96356201171875, "loss": 0.6906, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.2097702920436859, "rewards/margins": 0.04627406597137451, "rewards/rejected": -0.2560443580150604, "step": 12800 }, { "epoch": 0.84, "eval_logits/chosen": -0.9460026621818542, "eval_logits/rejected": -0.8214091062545776, "eval_logps/chosen": -435.67803955078125, "eval_logps/rejected": -501.1642150878906, "eval_loss": 0.6894211769104004, "eval_rewards/accuracies": 0.6664999723434448, "eval_rewards/chosen": -0.2036730945110321, "eval_rewards/margins": 0.08587922900915146, "eval_rewards/rejected": -0.28955233097076416, "eval_runtime": 710.9115, "eval_samples_per_second": 2.813, "eval_steps_per_second": 1.407, "step": 12800 }, { "epoch": 0.84, "learning_rate": 3.8859907565412194e-07, "logits/chosen": -0.8359258770942688, "logits/rejected": -1.1643868684768677, "logps/chosen": -392.3347473144531, "logps/rejected": -482.1744079589844, "loss": 0.6884, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2126855105161667, "rewards/margins": 0.09041811525821686, "rewards/rejected": -0.30310362577438354, "step": 12810 }, { "epoch": 0.84, "learning_rate": 3.8554715481327303e-07, "logits/chosen": -0.8129196166992188, "logits/rejected": -0.8484827280044556, "logps/chosen": -465.0271911621094, "logps/rejected": -534.2659912109375, "loss": 0.6875, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.2317681610584259, "rewards/margins": 0.10463656485080719, "rewards/rejected": -0.3364047408103943, "step": 12820 }, { "epoch": 0.84, "learning_rate": 3.8250626399424007e-07, "logits/chosen": -1.0457967519760132, "logits/rejected": -0.993150532245636, "logps/chosen": -481.54150390625, "logps/rejected": -543.5672607421875, "loss": 0.6903, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.23006339371204376, "rewards/margins": 0.08087591826915741, "rewards/rejected": -0.31093934178352356, "step": 12830 }, { "epoch": 0.84, "learning_rate": 3.7947641905980104e-07, "logits/chosen": -0.8676019906997681, "logits/rejected": -0.9785581827163696, "logps/chosen": -386.38897705078125, "logps/rejected": -436.10076904296875, "loss": 0.6888, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.17907050251960754, "rewards/margins": 0.08439258486032486, "rewards/rejected": -0.2634630799293518, "step": 12840 }, { "epoch": 0.84, "learning_rate": 3.764576358151098e-07, "logits/chosen": -0.989855170249939, "logits/rejected": -0.8942712545394897, "logps/chosen": -364.42938232421875, "logps/rejected": -425.1758728027344, "loss": 0.6905, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.18961241841316223, "rewards/margins": 0.07259075343608856, "rewards/rejected": -0.2622031569480896, "step": 12850 }, { "epoch": 0.84, "learning_rate": 3.7344993000761944e-07, "logits/chosen": -0.9940598607063293, "logits/rejected": -0.9289056062698364, "logps/chosen": -395.99505615234375, "logps/rejected": -533.985595703125, "loss": 0.6892, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.21867993474006653, "rewards/margins": 0.09501878172159195, "rewards/rejected": -0.31369873881340027, "step": 12860 }, { "epoch": 0.84, "learning_rate": 3.7045331732699585e-07, "logits/chosen": -0.9832733869552612, "logits/rejected": -0.8541062474250793, "logps/chosen": -412.0921936035156, "logps/rejected": -504.63653564453125, "loss": 0.6858, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2068941295146942, "rewards/margins": 0.12824371457099915, "rewards/rejected": -0.33513784408569336, "step": 12870 }, { "epoch": 0.84, "learning_rate": 3.6746781340503993e-07, "logits/chosen": -0.9805141687393188, "logits/rejected": -0.6995661854743958, "logps/chosen": -416.86358642578125, "logps/rejected": -514.7808837890625, "loss": 0.6865, "rewards/accuracies": 0.625, "rewards/chosen": -0.18244779109954834, "rewards/margins": 0.10594437271356583, "rewards/rejected": -0.2883921265602112, "step": 12880 }, { "epoch": 0.84, "learning_rate": 3.6449343381560116e-07, "logits/chosen": -0.8010265231132507, "logits/rejected": -0.6467529535293579, "logps/chosen": -479.70025634765625, "logps/rejected": -569.8721313476562, "loss": 0.6886, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.248274564743042, "rewards/margins": 0.10356147587299347, "rewards/rejected": -0.35183608531951904, "step": 12890 }, { "epoch": 0.84, "learning_rate": 3.615301940745017e-07, "logits/chosen": -1.3851318359375, "logits/rejected": -0.8564912676811218, "logps/chosen": -510.59564208984375, "logps/rejected": -476.83636474609375, "loss": 0.6903, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.1934005469083786, "rewards/margins": 0.07141765207052231, "rewards/rejected": -0.2648181915283203, "step": 12900 }, { "epoch": 0.84, "eval_logits/chosen": -0.9909506440162659, "eval_logits/rejected": -0.863025426864624, "eval_logps/chosen": -435.8428649902344, "eval_logps/rejected": -502.7374267578125, "eval_loss": 0.6894172430038452, "eval_rewards/accuracies": 0.6675000190734863, "eval_rewards/chosen": -0.20383791625499725, "eval_rewards/margins": 0.08728757500648499, "eval_rewards/rejected": -0.29112547636032104, "eval_runtime": 711.8623, "eval_samples_per_second": 2.81, "eval_steps_per_second": 1.405, "step": 12900 }, { "epoch": 0.84, "learning_rate": 3.5857810963945084e-07, "logits/chosen": -0.6161023378372192, "logits/rejected": -0.6896204352378845, "logps/chosen": -450.0265197753906, "logps/rejected": -523.8975830078125, "loss": 0.69, "rewards/accuracies": 0.75, "rewards/chosen": -0.23551742732524872, "rewards/margins": 0.08869399130344391, "rewards/rejected": -0.32421138882637024, "step": 12910 }, { "epoch": 0.85, "learning_rate": 3.556371959099678e-07, "logits/chosen": -1.257305383682251, "logits/rejected": -0.9748373031616211, "logps/chosen": -494.3572692871094, "logps/rejected": -524.6000366210938, "loss": 0.691, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.19718293845653534, "rewards/margins": 0.07021328061819077, "rewards/rejected": -0.2673962116241455, "step": 12920 }, { "epoch": 0.85, "learning_rate": 3.5270746822729797e-07, "logits/chosen": -1.0669023990631104, "logits/rejected": -0.8817640542984009, "logps/chosen": -452.60992431640625, "logps/rejected": -569.9772338867188, "loss": 0.689, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.20512787997722626, "rewards/margins": 0.10479208081960678, "rewards/rejected": -0.30991998314857483, "step": 12930 }, { "epoch": 0.85, "learning_rate": 3.4978894187433746e-07, "logits/chosen": -0.9080924987792969, "logits/rejected": -0.8624773025512695, "logps/chosen": -320.5248718261719, "logps/rejected": -369.5634460449219, "loss": 0.6895, "rewards/accuracies": 0.5, "rewards/chosen": -0.16944964230060577, "rewards/margins": 0.05994703620672226, "rewards/rejected": -0.22939670085906982, "step": 12940 }, { "epoch": 0.85, "learning_rate": 3.468816320755486e-07, "logits/chosen": -0.6681433916091919, "logits/rejected": -0.731353759765625, "logps/chosen": -412.3421936035156, "logps/rejected": -438.8526306152344, "loss": 0.6902, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.18841442465782166, "rewards/margins": 0.0661202147603035, "rewards/rejected": -0.25453463196754456, "step": 12950 }, { "epoch": 0.85, "learning_rate": 3.4398555399688336e-07, "logits/chosen": -1.000149130821228, "logits/rejected": -0.8533943295478821, "logps/chosen": -453.640869140625, "logps/rejected": -463.63946533203125, "loss": 0.6918, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.24229037761688232, "rewards/margins": 0.030662816017866135, "rewards/rejected": -0.27295318245887756, "step": 12960 }, { "epoch": 0.85, "learning_rate": 3.411007227457047e-07, "logits/chosen": -1.1260017156600952, "logits/rejected": -0.7768393754959106, "logps/chosen": -441.520751953125, "logps/rejected": -516.2427978515625, "loss": 0.687, "rewards/accuracies": 0.625, "rewards/chosen": -0.19375945627689362, "rewards/margins": 0.10699689388275146, "rewards/rejected": -0.3007563352584839, "step": 12970 }, { "epoch": 0.85, "learning_rate": 3.382271533707043e-07, "logits/chosen": -0.7902621030807495, "logits/rejected": -0.7129308581352234, "logps/chosen": -382.034423828125, "logps/rejected": -416.2178649902344, "loss": 0.6904, "rewards/accuracies": 0.625, "rewards/chosen": -0.190429225564003, "rewards/margins": 0.05706711858510971, "rewards/rejected": -0.2474963366985321, "step": 12980 }, { "epoch": 0.85, "learning_rate": 3.353648608618287e-07, "logits/chosen": -0.9405345916748047, "logits/rejected": -0.8393017649650574, "logps/chosen": -348.46832275390625, "logps/rejected": -424.5315856933594, "loss": 0.6892, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.18059708178043365, "rewards/margins": 0.08603395521640778, "rewards/rejected": -0.2666310667991638, "step": 12990 }, { "epoch": 0.85, "learning_rate": 3.3251386015019676e-07, "logits/chosen": -1.3043386936187744, "logits/rejected": -1.0379887819290161, "logps/chosen": -399.287841796875, "logps/rejected": -450.4970703125, "loss": 0.6887, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.19652841985225677, "rewards/margins": 0.08495378494262695, "rewards/rejected": -0.28148218989372253, "step": 13000 }, { "epoch": 0.85, "eval_logits/chosen": -0.9750109314918518, "eval_logits/rejected": -0.8479962348937988, "eval_logps/chosen": -436.7467956542969, "eval_logps/rejected": -504.1571960449219, "eval_loss": 0.6894132494926453, "eval_rewards/accuracies": 0.6690000295639038, "eval_rewards/chosen": -0.20474188029766083, "eval_rewards/margins": 0.08780339360237122, "eval_rewards/rejected": -0.29254525899887085, "eval_runtime": 713.1079, "eval_samples_per_second": 2.805, "eval_steps_per_second": 1.402, "step": 13000 }, { "epoch": 0.85, "learning_rate": 3.296741661080255e-07, "logits/chosen": -1.1275584697723389, "logits/rejected": -0.9574069976806641, "logps/chosen": -465.5220642089844, "logps/rejected": -555.017578125, "loss": 0.688, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.23238368332386017, "rewards/margins": 0.09603676944971085, "rewards/rejected": -0.3284204602241516, "step": 13010 }, { "epoch": 0.85, "learning_rate": 3.2684579354854974e-07, "logits/chosen": -1.1925865411758423, "logits/rejected": -1.0892870426177979, "logps/chosen": -534.2598266601562, "logps/rejected": -627.32861328125, "loss": 0.6908, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.24180126190185547, "rewards/margins": 0.08281029760837555, "rewards/rejected": -0.3246115744113922, "step": 13020 }, { "epoch": 0.85, "learning_rate": 3.2402875722594653e-07, "logits/chosen": -0.8882959485054016, "logits/rejected": -1.0372774600982666, "logps/chosen": -346.94049072265625, "logps/rejected": -442.657958984375, "loss": 0.6893, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.17789503931999207, "rewards/margins": 0.08956596255302429, "rewards/rejected": -0.26746100187301636, "step": 13030 }, { "epoch": 0.85, "learning_rate": 3.212230718352566e-07, "logits/chosen": -0.898537814617157, "logits/rejected": -0.8267822265625, "logps/chosen": -404.4369812011719, "logps/rejected": -369.17401123046875, "loss": 0.6928, "rewards/accuracies": 0.625, "rewards/chosen": -0.17938710749149323, "rewards/margins": 0.02843407727777958, "rewards/rejected": -0.20782120525836945, "step": 13040 }, { "epoch": 0.85, "learning_rate": 3.1842875201231025e-07, "logits/chosen": -0.8991460800170898, "logits/rejected": -0.7728959321975708, "logps/chosen": -393.7642822265625, "logps/rejected": -441.01751708984375, "loss": 0.6896, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.1720242202281952, "rewards/margins": 0.07302843034267426, "rewards/rejected": -0.24505265057086945, "step": 13050 }, { "epoch": 0.85, "learning_rate": 3.156458123336478e-07, "logits/chosen": -0.7052079439163208, "logits/rejected": -0.6316913962364197, "logps/chosen": -323.0262145996094, "logps/rejected": -438.30230712890625, "loss": 0.6885, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.16451437771320343, "rewards/margins": 0.12542487680912018, "rewards/rejected": -0.2899392545223236, "step": 13060 }, { "epoch": 0.86, "learning_rate": 3.128742673164459e-07, "logits/chosen": -1.2318341732025146, "logits/rejected": -0.754733145236969, "logps/chosen": -494.69354248046875, "logps/rejected": -532.0003662109375, "loss": 0.6904, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2117662876844406, "rewards/margins": 0.08182285726070404, "rewards/rejected": -0.29358917474746704, "step": 13070 }, { "epoch": 0.86, "learning_rate": 3.101141314184414e-07, "logits/chosen": -1.3839300870895386, "logits/rejected": -1.1595733165740967, "logps/chosen": -392.35955810546875, "logps/rejected": -436.986083984375, "loss": 0.6924, "rewards/accuracies": 0.75, "rewards/chosen": -0.18697500228881836, "rewards/margins": 0.053837817162275314, "rewards/rejected": -0.24081282317638397, "step": 13080 }, { "epoch": 0.86, "learning_rate": 3.0736541903785526e-07, "logits/chosen": -0.802038311958313, "logits/rejected": -0.9247132539749146, "logps/chosen": -411.74761962890625, "logps/rejected": -556.392822265625, "loss": 0.6901, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.20273271203041077, "rewards/margins": 0.09518562257289886, "rewards/rejected": -0.2979183495044708, "step": 13090 }, { "epoch": 0.86, "learning_rate": 3.0462814451331704e-07, "logits/chosen": -1.0374623537063599, "logits/rejected": -0.762954592704773, "logps/chosen": -451.2318420410156, "logps/rejected": -498.00738525390625, "loss": 0.6917, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.22055545449256897, "rewards/margins": 0.04687047749757767, "rewards/rejected": -0.26742592453956604, "step": 13100 }, { "epoch": 0.86, "eval_logits/chosen": -1.0175011157989502, "eval_logits/rejected": -0.8888563513755798, "eval_logps/chosen": -425.418212890625, "eval_logps/rejected": -489.5446472167969, "eval_loss": 0.6894149780273438, "eval_rewards/accuracies": 0.6669999957084656, "eval_rewards/chosen": -0.19341330230236053, "eval_rewards/margins": 0.08451951295137405, "eval_rewards/rejected": -0.2779327929019928, "eval_runtime": 713.3691, "eval_samples_per_second": 2.804, "eval_steps_per_second": 1.402, "step": 13100 }, { "epoch": 0.86, "learning_rate": 3.019023221237927e-07, "logits/chosen": -0.9610816240310669, "logits/rejected": -0.8090234994888306, "logps/chosen": -420.1961975097656, "logps/rejected": -456.49639892578125, "loss": 0.6895, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.18346384167671204, "rewards/margins": 0.09386763721704483, "rewards/rejected": -0.27733147144317627, "step": 13110 }, { "epoch": 0.86, "learning_rate": 2.991879660885058e-07, "logits/chosen": -1.1881482601165771, "logits/rejected": -1.1225849390029907, "logps/chosen": -430.271240234375, "logps/rejected": -506.01873779296875, "loss": 0.6912, "rewards/accuracies": 0.75, "rewards/chosen": -0.1667243391275406, "rewards/margins": 0.09210772812366486, "rewards/rejected": -0.25883203744888306, "step": 13120 }, { "epoch": 0.86, "learning_rate": 2.9648509056686786e-07, "logits/chosen": -1.0945974588394165, "logits/rejected": -0.9265823364257812, "logps/chosen": -369.04547119140625, "logps/rejected": -435.9185485839844, "loss": 0.6872, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.19210371375083923, "rewards/margins": 0.09244880080223083, "rewards/rejected": -0.28455251455307007, "step": 13130 }, { "epoch": 0.86, "learning_rate": 2.937937096584012e-07, "logits/chosen": -1.111617088317871, "logits/rejected": -0.7723917961120605, "logps/chosen": -483.25146484375, "logps/rejected": -492.22027587890625, "loss": 0.6889, "rewards/accuracies": 0.625, "rewards/chosen": -0.1928504854440689, "rewards/margins": 0.06945283710956573, "rewards/rejected": -0.26230329275131226, "step": 13140 }, { "epoch": 0.86, "learning_rate": 2.9111383740266756e-07, "logits/chosen": -0.8475943803787231, "logits/rejected": -0.8579694032669067, "logps/chosen": -453.10394287109375, "logps/rejected": -499.24853515625, "loss": 0.691, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2173198163509369, "rewards/margins": 0.05193269997835159, "rewards/rejected": -0.2692525088787079, "step": 13150 }, { "epoch": 0.86, "learning_rate": 2.8844548777919255e-07, "logits/chosen": -1.0501482486724854, "logits/rejected": -0.9597611427307129, "logps/chosen": -372.1024169921875, "logps/rejected": -426.0980529785156, "loss": 0.6885, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.16821856796741486, "rewards/margins": 0.07627587020397186, "rewards/rejected": -0.2444944679737091, "step": 13160 }, { "epoch": 0.86, "learning_rate": 2.8578867470739594e-07, "logits/chosen": -0.6772990226745605, "logits/rejected": -0.6000491380691528, "logps/chosen": -413.3113708496094, "logps/rejected": -483.9972229003906, "loss": 0.6874, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.22882306575775146, "rewards/margins": 0.10210853815078735, "rewards/rejected": -0.3309316039085388, "step": 13170 }, { "epoch": 0.86, "learning_rate": 2.8314341204651484e-07, "logits/chosen": -1.3596770763397217, "logits/rejected": -1.1702024936676025, "logps/chosen": -441.63323974609375, "logps/rejected": -478.5375061035156, "loss": 0.6871, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.16559413075447083, "rewards/margins": 0.10968685150146484, "rewards/rejected": -0.2752809524536133, "step": 13180 }, { "epoch": 0.86, "learning_rate": 2.805097135955362e-07, "logits/chosen": -0.9181706309318542, "logits/rejected": -0.7884522080421448, "logps/chosen": -407.10137939453125, "logps/rejected": -474.822021484375, "loss": 0.6879, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.19457736611366272, "rewards/margins": 0.10264654457569122, "rewards/rejected": -0.29722392559051514, "step": 13190 }, { "epoch": 0.86, "learning_rate": 2.778875930931213e-07, "logits/chosen": -0.9860755801200867, "logits/rejected": -0.741965651512146, "logps/chosen": -424.412841796875, "logps/rejected": -510.8443908691406, "loss": 0.6877, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.18929395079612732, "rewards/margins": 0.10379371792078018, "rewards/rejected": -0.2930876612663269, "step": 13200 }, { "epoch": 0.86, "eval_logits/chosen": -1.009841799736023, "eval_logits/rejected": -0.8810458183288574, "eval_logps/chosen": -429.9781494140625, "eval_logps/rejected": -496.1291198730469, "eval_loss": 0.6894113421440125, "eval_rewards/accuracies": 0.6694999933242798, "eval_rewards/chosen": -0.19797320663928986, "eval_rewards/margins": 0.08654402941465378, "eval_rewards/rejected": -0.28451722860336304, "eval_runtime": 711.5759, "eval_samples_per_second": 2.811, "eval_steps_per_second": 1.405, "step": 13200 }, { "epoch": 0.86, "learning_rate": 2.7527706421753426e-07, "logits/chosen": -1.1855487823486328, "logits/rejected": -1.001201868057251, "logps/chosen": -393.73419189453125, "logps/rejected": -456.26422119140625, "loss": 0.6906, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.19533300399780273, "rewards/margins": 0.058041296899318695, "rewards/rejected": -0.253374308347702, "step": 13210 }, { "epoch": 0.86, "learning_rate": 2.726781405865736e-07, "logits/chosen": -1.0462353229522705, "logits/rejected": -0.8738126754760742, "logps/chosen": -496.15081787109375, "logps/rejected": -472.3070373535156, "loss": 0.6897, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.19348499178886414, "rewards/margins": 0.09192480891942978, "rewards/rejected": -0.2854097783565521, "step": 13220 }, { "epoch": 0.87, "learning_rate": 2.7009083575749687e-07, "logits/chosen": -0.8790783882141113, "logits/rejected": -0.8375867605209351, "logps/chosen": -439.9881896972656, "logps/rejected": -513.730712890625, "loss": 0.6902, "rewards/accuracies": 0.625, "rewards/chosen": -0.19612789154052734, "rewards/margins": 0.07537803053855896, "rewards/rejected": -0.2715059220790863, "step": 13230 }, { "epoch": 0.87, "learning_rate": 2.6751516322695457e-07, "logits/chosen": -1.0426018238067627, "logits/rejected": -1.0287061929702759, "logps/chosen": -396.2439270019531, "logps/rejected": -439.5823669433594, "loss": 0.6898, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.2060004025697708, "rewards/margins": 0.04937834292650223, "rewards/rejected": -0.25537875294685364, "step": 13240 }, { "epoch": 0.87, "learning_rate": 2.649511364309154e-07, "logits/chosen": -1.2703790664672852, "logits/rejected": -1.1133968830108643, "logps/chosen": -391.85626220703125, "logps/rejected": -450.3577575683594, "loss": 0.69, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.18956658244132996, "rewards/margins": 0.07938266545534134, "rewards/rejected": -0.2689492404460907, "step": 13250 }, { "epoch": 0.87, "learning_rate": 2.6239876874460003e-07, "logits/chosen": -1.2480677366256714, "logits/rejected": -1.2331262826919556, "logps/chosen": -475.4595642089844, "logps/rejected": -563.0646362304688, "loss": 0.6891, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.191044420003891, "rewards/margins": 0.11816122382879257, "rewards/rejected": -0.3092056214809418, "step": 13260 }, { "epoch": 0.87, "learning_rate": 2.5985807348240744e-07, "logits/chosen": -1.130602478981018, "logits/rejected": -0.6957510709762573, "logps/chosen": -423.267578125, "logps/rejected": -497.05487060546875, "loss": 0.6873, "rewards/accuracies": 0.75, "rewards/chosen": -0.1909240484237671, "rewards/margins": 0.12173442542552948, "rewards/rejected": -0.31265848875045776, "step": 13270 }, { "epoch": 0.87, "learning_rate": 2.5732906389785014e-07, "logits/chosen": -1.2089236974716187, "logits/rejected": -1.1420354843139648, "logps/chosen": -468.90277099609375, "logps/rejected": -554.2119140625, "loss": 0.686, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.1911967247724533, "rewards/margins": 0.12345466762781143, "rewards/rejected": -0.31465139985084534, "step": 13280 }, { "epoch": 0.87, "learning_rate": 2.5481175318347956e-07, "logits/chosen": -0.947446346282959, "logits/rejected": -1.148301124572754, "logps/chosen": -402.0899658203125, "logps/rejected": -498.4195861816406, "loss": 0.6894, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.17459237575531006, "rewards/margins": 0.07843352854251862, "rewards/rejected": -0.25302591919898987, "step": 13290 }, { "epoch": 0.87, "learning_rate": 2.5230615447082246e-07, "logits/chosen": -0.957288384437561, "logits/rejected": -0.8125879168510437, "logps/chosen": -444.80499267578125, "logps/rejected": -516.0078735351562, "loss": 0.6887, "rewards/accuracies": 0.625, "rewards/chosen": -0.19348831474781036, "rewards/margins": 0.08598808944225311, "rewards/rejected": -0.27947643399238586, "step": 13300 }, { "epoch": 0.87, "eval_logits/chosen": -1.0318516492843628, "eval_logits/rejected": -0.9020374417304993, "eval_logps/chosen": -425.52032470703125, "eval_logps/rejected": -491.04864501953125, "eval_loss": 0.6894135475158691, "eval_rewards/accuracies": 0.6664999723434448, "eval_rewards/chosen": -0.1935153603553772, "eval_rewards/margins": 0.08592142909765244, "eval_rewards/rejected": -0.27943679690361023, "eval_runtime": 712.2993, "eval_samples_per_second": 2.808, "eval_steps_per_second": 1.404, "step": 13300 }, { "epoch": 0.87, "learning_rate": 2.49812280830308e-07, "logits/chosen": -0.9962084889411926, "logits/rejected": -0.7481353878974915, "logps/chosen": -430.90087890625, "logps/rejected": -566.72314453125, "loss": 0.6842, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.20586366951465607, "rewards/margins": 0.17067831754684448, "rewards/rejected": -0.37654200196266174, "step": 13310 }, { "epoch": 0.87, "learning_rate": 2.4733014527120457e-07, "logits/chosen": -0.7532116770744324, "logits/rejected": -0.8916665315628052, "logps/chosen": -472.84320068359375, "logps/rejected": -545.5331420898438, "loss": 0.6899, "rewards/accuracies": 0.75, "rewards/chosen": -0.27042829990386963, "rewards/margins": 0.09558326005935669, "rewards/rejected": -0.3660115599632263, "step": 13320 }, { "epoch": 0.87, "learning_rate": 2.4485976074154565e-07, "logits/chosen": -1.0746243000030518, "logits/rejected": -1.1461856365203857, "logps/chosen": -411.5790100097656, "logps/rejected": -471.8722229003906, "loss": 0.6911, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.20258104801177979, "rewards/margins": 0.030249997973442078, "rewards/rejected": -0.23283103108406067, "step": 13330 }, { "epoch": 0.87, "learning_rate": 2.4240114012806763e-07, "logits/chosen": -0.9970208406448364, "logits/rejected": -1.0200673341751099, "logps/chosen": -391.00347900390625, "logps/rejected": -435.83270263671875, "loss": 0.6912, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1832052767276764, "rewards/margins": 0.06189913675189018, "rewards/rejected": -0.24510440230369568, "step": 13340 }, { "epoch": 0.87, "learning_rate": 2.399542962561399e-07, "logits/chosen": -1.0039441585540771, "logits/rejected": -0.7964978814125061, "logps/chosen": -419.21307373046875, "logps/rejected": -483.9776306152344, "loss": 0.6852, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.193757101893425, "rewards/margins": 0.11217392981052399, "rewards/rejected": -0.305931031703949, "step": 13350 }, { "epoch": 0.87, "learning_rate": 2.3751924188969876e-07, "logits/chosen": -0.9888512492179871, "logits/rejected": -0.8423534631729126, "logps/chosen": -445.109375, "logps/rejected": -521.4771728515625, "loss": 0.6901, "rewards/accuracies": 0.625, "rewards/chosen": -0.19337087869644165, "rewards/margins": 0.09620238840579987, "rewards/rejected": -0.2895732820034027, "step": 13360 }, { "epoch": 0.87, "learning_rate": 2.3509598973118024e-07, "logits/chosen": -1.299377679824829, "logits/rejected": -1.1307967901229858, "logps/chosen": -387.85406494140625, "logps/rejected": -382.24884033203125, "loss": 0.6917, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.16684868931770325, "rewards/margins": 0.057182587683200836, "rewards/rejected": -0.22403128445148468, "step": 13370 }, { "epoch": 0.88, "learning_rate": 2.326845524214555e-07, "logits/chosen": -0.9441890716552734, "logits/rejected": -0.9570524096488953, "logps/chosen": -430.52069091796875, "logps/rejected": -422.7290954589844, "loss": 0.6922, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.19546636939048767, "rewards/margins": 0.01833316683769226, "rewards/rejected": -0.21379955112934113, "step": 13380 }, { "epoch": 0.88, "learning_rate": 2.3028494253976158e-07, "logits/chosen": -1.0607401132583618, "logits/rejected": -0.8935171961784363, "logps/chosen": -563.0780639648438, "logps/rejected": -556.1412353515625, "loss": 0.6908, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.22181260585784912, "rewards/margins": 0.0576445572078228, "rewards/rejected": -0.279457151889801, "step": 13390 }, { "epoch": 0.88, "learning_rate": 2.2789717260364026e-07, "logits/chosen": -1.0842921733856201, "logits/rejected": -0.9010177850723267, "logps/chosen": -335.4914245605469, "logps/rejected": -370.0270690917969, "loss": 0.6916, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.17038536071777344, "rewards/margins": 0.05626339837908745, "rewards/rejected": -0.2266487330198288, "step": 13400 }, { "epoch": 0.88, "eval_logits/chosen": -1.0430668592453003, "eval_logits/rejected": -0.9128531217575073, "eval_logps/chosen": -420.7966003417969, "eval_logps/rejected": -485.21160888671875, "eval_loss": 0.6894127726554871, "eval_rewards/accuracies": 0.6710000038146973, "eval_rewards/chosen": -0.18879161775112152, "eval_rewards/margins": 0.08480807393789291, "eval_rewards/rejected": -0.2735997140407562, "eval_runtime": 715.5823, "eval_samples_per_second": 2.795, "eval_steps_per_second": 1.397, "step": 13400 }, { "epoch": 0.88, "learning_rate": 2.255212550688682e-07, "logits/chosen": -1.1218197345733643, "logits/rejected": -1.4631164073944092, "logps/chosen": -419.88525390625, "logps/rejected": -582.7513427734375, "loss": 0.687, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2019301950931549, "rewards/margins": 0.09867943078279495, "rewards/rejected": -0.30060964822769165, "step": 13410 }, { "epoch": 0.88, "learning_rate": 2.2315720232939598e-07, "logits/chosen": -1.5930960178375244, "logits/rejected": -1.086121678352356, "logps/chosen": -431.61279296875, "logps/rejected": -448.45965576171875, "loss": 0.6893, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.16949909925460815, "rewards/margins": 0.1064492017030716, "rewards/rejected": -0.27594828605651855, "step": 13420 }, { "epoch": 0.88, "learning_rate": 2.2080502671727956e-07, "logits/chosen": -1.189905047416687, "logits/rejected": -0.9950293302536011, "logps/chosen": -352.42913818359375, "logps/rejected": -403.2584228515625, "loss": 0.6888, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.13234731554985046, "rewards/margins": 0.07265688478946686, "rewards/rejected": -0.20500421524047852, "step": 13430 }, { "epoch": 0.88, "learning_rate": 2.1846474050262078e-07, "logits/chosen": -1.0095980167388916, "logits/rejected": -0.7977234125137329, "logps/chosen": -414.53826904296875, "logps/rejected": -418.682373046875, "loss": 0.6904, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.16797752678394318, "rewards/margins": 0.0699412003159523, "rewards/rejected": -0.2379187047481537, "step": 13440 }, { "epoch": 0.88, "learning_rate": 2.1613635589349756e-07, "logits/chosen": -0.8134937286376953, "logits/rejected": -0.7540784478187561, "logps/chosen": -371.159423828125, "logps/rejected": -508.986572265625, "loss": 0.6888, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1739961802959442, "rewards/margins": 0.11030639708042145, "rewards/rejected": -0.28430259227752686, "step": 13450 }, { "epoch": 0.88, "learning_rate": 2.1381988503590578e-07, "logits/chosen": -0.7340242266654968, "logits/rejected": -0.8426862955093384, "logps/chosen": -414.74554443359375, "logps/rejected": -516.2962646484375, "loss": 0.6885, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.19887368381023407, "rewards/margins": 0.10770060122013092, "rewards/rejected": -0.306574285030365, "step": 13460 }, { "epoch": 0.88, "learning_rate": 2.11515340013691e-07, "logits/chosen": -1.2820839881896973, "logits/rejected": -1.135124921798706, "logps/chosen": -422.4356384277344, "logps/rejected": -517.8685302734375, "loss": 0.6882, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.1912631094455719, "rewards/margins": 0.11701309680938721, "rewards/rejected": -0.3082761764526367, "step": 13470 }, { "epoch": 0.88, "learning_rate": 2.092227328484897e-07, "logits/chosen": -0.8282972574234009, "logits/rejected": -0.8670898675918579, "logps/chosen": -382.09368896484375, "logps/rejected": -512.9632568359375, "loss": 0.6875, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.17808496952056885, "rewards/margins": 0.10021205991506577, "rewards/rejected": -0.2782970368862152, "step": 13480 }, { "epoch": 0.88, "learning_rate": 2.0694207549966345e-07, "logits/chosen": -0.8017724752426147, "logits/rejected": -0.9765009880065918, "logps/chosen": -418.8724670410156, "logps/rejected": -440.66070556640625, "loss": 0.6912, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.20996403694152832, "rewards/margins": 0.03865761682391167, "rewards/rejected": -0.24862165749073029, "step": 13490 }, { "epoch": 0.88, "learning_rate": 2.0467337986423864e-07, "logits/chosen": -1.2652013301849365, "logits/rejected": -1.0905256271362305, "logps/chosen": -490.0810546875, "logps/rejected": -521.456787109375, "loss": 0.6905, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.18435756862163544, "rewards/margins": 0.06458848714828491, "rewards/rejected": -0.24894602596759796, "step": 13500 }, { "epoch": 0.88, "eval_logits/chosen": -1.045881986618042, "eval_logits/rejected": -0.9156625866889954, "eval_logps/chosen": -419.99395751953125, "eval_logps/rejected": -484.36981201171875, "eval_loss": 0.6894125938415527, "eval_rewards/accuracies": 0.6679999828338623, "eval_rewards/chosen": -0.1879890114068985, "eval_rewards/margins": 0.08476891368627548, "eval_rewards/rejected": -0.2727579176425934, "eval_runtime": 711.5302, "eval_samples_per_second": 2.811, "eval_steps_per_second": 1.405, "step": 13500 }, { "epoch": 0.88, "learning_rate": 2.0241665777684272e-07, "logits/chosen": -1.3042113780975342, "logits/rejected": -1.0920075178146362, "logps/chosen": -444.8846740722656, "logps/rejected": -533.3680419921875, "loss": 0.6872, "rewards/accuracies": 0.75, "rewards/chosen": -0.1765916794538498, "rewards/margins": 0.13209852576255798, "rewards/rejected": -0.30869022011756897, "step": 13510 }, { "epoch": 0.88, "learning_rate": 2.0017192100964366e-07, "logits/chosen": -0.8175595998764038, "logits/rejected": -0.8133748769760132, "logps/chosen": -421.3980407714844, "logps/rejected": -504.69171142578125, "loss": 0.691, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.22144213318824768, "rewards/margins": 0.08117649704217911, "rewards/rejected": -0.3026186227798462, "step": 13520 }, { "epoch": 0.89, "learning_rate": 1.9793918127228777e-07, "logits/chosen": -1.3139551877975464, "logits/rejected": -0.9030052423477173, "logps/chosen": -541.2322998046875, "logps/rejected": -568.1669921875, "loss": 0.6882, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.22493290901184082, "rewards/margins": 0.08101674169301987, "rewards/rejected": -0.3059496581554413, "step": 13530 }, { "epoch": 0.89, "learning_rate": 1.9571845021184005e-07, "logits/chosen": -0.777948260307312, "logits/rejected": -0.833267331123352, "logps/chosen": -427.2875061035156, "logps/rejected": -516.5462646484375, "loss": 0.6886, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.19630160927772522, "rewards/margins": 0.08514077961444855, "rewards/rejected": -0.2814423739910126, "step": 13540 }, { "epoch": 0.89, "learning_rate": 1.9350973941272027e-07, "logits/chosen": -1.1817286014556885, "logits/rejected": -0.9291670918464661, "logps/chosen": -404.9017639160156, "logps/rejected": -474.9322814941406, "loss": 0.689, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.20112669467926025, "rewards/margins": 0.0905313715338707, "rewards/rejected": -0.29165807366371155, "step": 13550 }, { "epoch": 0.89, "learning_rate": 1.9131306039664676e-07, "logits/chosen": -0.8950015306472778, "logits/rejected": -0.7792337536811829, "logps/chosen": -387.6864318847656, "logps/rejected": -513.8165283203125, "loss": 0.6877, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.18778234720230103, "rewards/margins": 0.1017737165093422, "rewards/rejected": -0.289556086063385, "step": 13560 }, { "epoch": 0.89, "learning_rate": 1.8912842462257358e-07, "logits/chosen": -0.9837632179260254, "logits/rejected": -0.853921115398407, "logps/chosen": -414.85821533203125, "logps/rejected": -504.00079345703125, "loss": 0.6874, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.19468896090984344, "rewards/margins": 0.10612950474023819, "rewards/rejected": -0.30081844329833984, "step": 13570 }, { "epoch": 0.89, "learning_rate": 1.869558434866303e-07, "logits/chosen": -1.0088794231414795, "logits/rejected": -1.1799061298370361, "logps/chosen": -385.1492919921875, "logps/rejected": -505.05718994140625, "loss": 0.6871, "rewards/accuracies": 0.625, "rewards/chosen": -0.2040594518184662, "rewards/margins": 0.10187806189060211, "rewards/rejected": -0.3059375286102295, "step": 13580 }, { "epoch": 0.89, "learning_rate": 1.847953283220652e-07, "logits/chosen": -1.1091597080230713, "logits/rejected": -0.862514317035675, "logps/chosen": -449.0830993652344, "logps/rejected": -513.0009765625, "loss": 0.6855, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.19297336041927338, "rewards/margins": 0.1425541192293167, "rewards/rejected": -0.3355274498462677, "step": 13590 }, { "epoch": 0.89, "learning_rate": 1.8264689039918265e-07, "logits/chosen": -1.008798360824585, "logits/rejected": -0.9485238790512085, "logps/chosen": -476.99005126953125, "logps/rejected": -521.00146484375, "loss": 0.691, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2171596735715866, "rewards/margins": 0.0714375376701355, "rewards/rejected": -0.2885972261428833, "step": 13600 }, { "epoch": 0.89, "eval_logits/chosen": -1.0011253356933594, "eval_logits/rejected": -0.8732011914253235, "eval_logps/chosen": -428.5783386230469, "eval_logps/rejected": -494.4617919921875, "eval_loss": 0.6894011497497559, "eval_rewards/accuracies": 0.6690000295639038, "eval_rewards/chosen": -0.196573406457901, "eval_rewards/margins": 0.08627651631832123, "eval_rewards/rejected": -0.28284990787506104, "eval_runtime": 712.7327, "eval_samples_per_second": 2.806, "eval_steps_per_second": 1.403, "step": 13600 }, { "epoch": 0.89, "learning_rate": 1.8051054092528857e-07, "logits/chosen": -1.0556079149246216, "logits/rejected": -0.8579056859016418, "logps/chosen": -450.79071044921875, "logps/rejected": -553.0870971679688, "loss": 0.6881, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.18968930840492249, "rewards/margins": 0.11205662786960602, "rewards/rejected": -0.3017459213733673, "step": 13610 }, { "epoch": 0.89, "learning_rate": 1.783862910446271e-07, "logits/chosen": -0.7849446535110474, "logits/rejected": -0.7896897196769714, "logps/chosen": -367.90234375, "logps/rejected": -482.52349853515625, "loss": 0.6883, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.19504904747009277, "rewards/margins": 0.12393607199192047, "rewards/rejected": -0.31898510456085205, "step": 13620 }, { "epoch": 0.89, "learning_rate": 1.762741518383271e-07, "logits/chosen": -1.12753427028656, "logits/rejected": -0.8379606008529663, "logps/chosen": -404.31524658203125, "logps/rejected": -460.7677307128906, "loss": 0.6883, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.18248286843299866, "rewards/margins": 0.08411658555269241, "rewards/rejected": -0.26659947633743286, "step": 13630 }, { "epoch": 0.89, "learning_rate": 1.7417413432434082e-07, "logits/chosen": -0.9677637219429016, "logits/rejected": -0.8881511688232422, "logps/chosen": -455.74493408203125, "logps/rejected": -473.82879638671875, "loss": 0.6912, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2030853033065796, "rewards/margins": 0.07085120677947998, "rewards/rejected": -0.27393651008605957, "step": 13640 }, { "epoch": 0.89, "learning_rate": 1.7208624945738855e-07, "logits/chosen": -1.2254602909088135, "logits/rejected": -1.1744589805603027, "logps/chosen": -401.1329650878906, "logps/rejected": -447.900390625, "loss": 0.6931, "rewards/accuracies": 0.625, "rewards/chosen": -0.18304863572120667, "rewards/margins": 0.039330027997493744, "rewards/rejected": -0.22237864136695862, "step": 13650 }, { "epoch": 0.89, "learning_rate": 1.7001050812889995e-07, "logits/chosen": -1.284170389175415, "logits/rejected": -1.093874454498291, "logps/chosen": -479.20928955078125, "logps/rejected": -530.1829833984375, "loss": 0.6896, "rewards/accuracies": 0.75, "rewards/chosen": -0.22367647290229797, "rewards/margins": 0.09122467041015625, "rewards/rejected": -0.3149011731147766, "step": 13660 }, { "epoch": 0.89, "learning_rate": 1.679469211669596e-07, "logits/chosen": -1.0277354717254639, "logits/rejected": -0.8371337056159973, "logps/chosen": -439.98541259765625, "logps/rejected": -501.0602111816406, "loss": 0.6874, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.21504028141498566, "rewards/margins": 0.11267922818660736, "rewards/rejected": -0.327719509601593, "step": 13670 }, { "epoch": 0.9, "learning_rate": 1.6589549933624715e-07, "logits/chosen": -1.0300480127334595, "logits/rejected": -0.852430522441864, "logps/chosen": -399.5941162109375, "logps/rejected": -503.5506286621094, "loss": 0.6854, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.15938711166381836, "rewards/margins": 0.15544193983078003, "rewards/rejected": -0.314829021692276, "step": 13680 }, { "epoch": 0.9, "learning_rate": 1.638562533379845e-07, "logits/chosen": -0.9181520342826843, "logits/rejected": -0.8067198991775513, "logps/chosen": -435.23541259765625, "logps/rejected": -446.798583984375, "loss": 0.69, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.17953279614448547, "rewards/margins": 0.07383112609386444, "rewards/rejected": -0.2533639073371887, "step": 13690 }, { "epoch": 0.9, "learning_rate": 1.6182919380987676e-07, "logits/chosen": -1.0368890762329102, "logits/rejected": -0.9775916337966919, "logps/chosen": -417.2001037597656, "logps/rejected": -454.9668884277344, "loss": 0.6911, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.19148211181163788, "rewards/margins": 0.0531439408659935, "rewards/rejected": -0.24462607502937317, "step": 13700 }, { "epoch": 0.9, "eval_logits/chosen": -1.0116302967071533, "eval_logits/rejected": -0.8832849860191345, "eval_logps/chosen": -426.22021484375, "eval_logps/rejected": -491.4140625, "eval_loss": 0.6893994212150574, "eval_rewards/accuracies": 0.6700000166893005, "eval_rewards/chosen": -0.19421526789665222, "eval_rewards/margins": 0.08558690547943115, "eval_rewards/rejected": -0.279802143573761, "eval_runtime": 710.9793, "eval_samples_per_second": 2.813, "eval_steps_per_second": 1.407, "step": 13700 }, { "epoch": 0.9, "learning_rate": 1.598143313260603e-07, "logits/chosen": -0.732464075088501, "logits/rejected": -0.7959052920341492, "logps/chosen": -376.18701171875, "logps/rejected": -440.91363525390625, "loss": 0.6901, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.18813328444957733, "rewards/margins": 0.07674112170934677, "rewards/rejected": -0.2648743987083435, "step": 13710 }, { "epoch": 0.9, "learning_rate": 1.5781167639704415e-07, "logits/chosen": -1.1566427946090698, "logits/rejected": -0.7914190888404846, "logps/chosen": -514.2080078125, "logps/rejected": -461.599365234375, "loss": 0.6913, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.182041198015213, "rewards/margins": 0.07163481414318085, "rewards/rejected": -0.25367602705955505, "step": 13720 }, { "epoch": 0.9, "learning_rate": 1.5582123946965787e-07, "logits/chosen": -0.8978877067565918, "logits/rejected": -0.6969678997993469, "logps/chosen": -414.6393127441406, "logps/rejected": -515.791015625, "loss": 0.6886, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.18024428188800812, "rewards/margins": 0.09100376069545746, "rewards/rejected": -0.2712480425834656, "step": 13730 }, { "epoch": 0.9, "learning_rate": 1.5384303092699504e-07, "logits/chosen": -1.1189682483673096, "logits/rejected": -0.7042065858840942, "logps/chosen": -487.235595703125, "logps/rejected": -604.0064086914062, "loss": 0.6878, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.20025098323822021, "rewards/margins": 0.11845190823078156, "rewards/rejected": -0.3187028765678406, "step": 13740 }, { "epoch": 0.9, "learning_rate": 1.518770610883613e-07, "logits/chosen": -0.8458231687545776, "logits/rejected": -0.7532489895820618, "logps/chosen": -462.36895751953125, "logps/rejected": -562.3704833984375, "loss": 0.6888, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.2478303462266922, "rewards/margins": 0.12941531836986542, "rewards/rejected": -0.3772456645965576, "step": 13750 }, { "epoch": 0.9, "learning_rate": 1.4992334020921735e-07, "logits/chosen": -1.03566575050354, "logits/rejected": -1.0163966417312622, "logps/chosen": -357.2666320800781, "logps/rejected": -445.370361328125, "loss": 0.6879, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.184294193983078, "rewards/margins": 0.11771754920482635, "rewards/rejected": -0.30201178789138794, "step": 13760 }, { "epoch": 0.9, "learning_rate": 1.4798187848112905e-07, "logits/chosen": -1.0435506105422974, "logits/rejected": -0.6870570182800293, "logps/chosen": -468.039794921875, "logps/rejected": -527.7335815429688, "loss": 0.688, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.24506907165050507, "rewards/margins": 0.1005513072013855, "rewards/rejected": -0.345620334148407, "step": 13770 }, { "epoch": 0.9, "learning_rate": 1.460526860317113e-07, "logits/chosen": -1.1585135459899902, "logits/rejected": -1.0323517322540283, "logps/chosen": -376.08807373046875, "logps/rejected": -538.8499755859375, "loss": 0.6847, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.20280340313911438, "rewards/margins": 0.1310150921344757, "rewards/rejected": -0.3338184952735901, "step": 13780 }, { "epoch": 0.9, "learning_rate": 1.441357729245771e-07, "logits/chosen": -1.2246206998825073, "logits/rejected": -0.9151955842971802, "logps/chosen": -479.27117919921875, "logps/rejected": -512.744140625, "loss": 0.6893, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.23127934336662292, "rewards/margins": 0.08946017920970917, "rewards/rejected": -0.3207395672798157, "step": 13790 }, { "epoch": 0.9, "learning_rate": 1.4223114915928482e-07, "logits/chosen": -0.5556536316871643, "logits/rejected": -0.6485458612442017, "logps/chosen": -435.077392578125, "logps/rejected": -520.010009765625, "loss": 0.6892, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.20915481448173523, "rewards/margins": 0.07816542685031891, "rewards/rejected": -0.28732022643089294, "step": 13800 }, { "epoch": 0.9, "eval_logits/chosen": -0.9911072850227356, "eval_logits/rejected": -0.8638641834259033, "eval_logps/chosen": -431.2166748046875, "eval_logps/rejected": -497.59661865234375, "eval_loss": 0.6893988251686096, "eval_rewards/accuracies": 0.6694999933242798, "eval_rewards/chosen": -0.19921176135540009, "eval_rewards/margins": 0.08677300810813904, "eval_rewards/rejected": -0.2859847843647003, "eval_runtime": 712.8812, "eval_samples_per_second": 2.806, "eval_steps_per_second": 1.403, "step": 13800 }, { "epoch": 0.9, "learning_rate": 1.403388246712842e-07, "logits/chosen": -0.9798790812492371, "logits/rejected": -0.8478490114212036, "logps/chosen": -359.9521484375, "logps/rejected": -412.8531188964844, "loss": 0.6895, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.19570107758045197, "rewards/margins": 0.0600527822971344, "rewards/rejected": -0.25575387477874756, "step": 13810 }, { "epoch": 0.9, "learning_rate": 1.3845880933186757e-07, "logits/chosen": -1.1259896755218506, "logits/rejected": -1.0273348093032837, "logps/chosen": -453.7496643066406, "logps/rejected": -459.85321044921875, "loss": 0.6919, "rewards/accuracies": 0.5, "rewards/chosen": -0.2150108814239502, "rewards/margins": 0.040339548140764236, "rewards/rejected": -0.2553504407405853, "step": 13820 }, { "epoch": 0.9, "learning_rate": 1.3659111294811457e-07, "logits/chosen": -0.9863991737365723, "logits/rejected": -0.9783857464790344, "logps/chosen": -417.46417236328125, "logps/rejected": -466.4806213378906, "loss": 0.6905, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.22562098503112793, "rewards/margins": 0.06691263616085052, "rewards/rejected": -0.29253360629081726, "step": 13830 }, { "epoch": 0.91, "learning_rate": 1.347357452628459e-07, "logits/chosen": -1.432991623878479, "logits/rejected": -1.2990912199020386, "logps/chosen": -434.8438415527344, "logps/rejected": -499.17071533203125, "loss": 0.6908, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.18954630196094513, "rewards/margins": 0.07182861864566803, "rewards/rejected": -0.26137489080429077, "step": 13840 }, { "epoch": 0.91, "learning_rate": 1.3289271595456732e-07, "logits/chosen": -1.0181031227111816, "logits/rejected": -0.6720232963562012, "logps/chosen": -441.0040588378906, "logps/rejected": -518.5680541992188, "loss": 0.6878, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.23667487502098083, "rewards/margins": 0.1033577173948288, "rewards/rejected": -0.34003257751464844, "step": 13850 }, { "epoch": 0.91, "learning_rate": 1.310620346374228e-07, "logits/chosen": -0.9424416422843933, "logits/rejected": -0.7790125608444214, "logps/chosen": -452.8560485839844, "logps/rejected": -542.755126953125, "loss": 0.6865, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.22063413262367249, "rewards/margins": 0.1267022341489792, "rewards/rejected": -0.3473363518714905, "step": 13860 }, { "epoch": 0.91, "learning_rate": 1.2924371086114274e-07, "logits/chosen": -1.1187223196029663, "logits/rejected": -0.7112355828285217, "logps/chosen": -440.7926330566406, "logps/rejected": -511.54248046875, "loss": 0.6901, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.20473113656044006, "rewards/margins": 0.07677672803401947, "rewards/rejected": -0.2815078794956207, "step": 13870 }, { "epoch": 0.91, "learning_rate": 1.274377541109953e-07, "logits/chosen": -0.8828527331352234, "logits/rejected": -0.9465206265449524, "logps/chosen": -361.6195983886719, "logps/rejected": -506.4344787597656, "loss": 0.6886, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.19726905226707458, "rewards/margins": 0.06689117848873138, "rewards/rejected": -0.2641602158546448, "step": 13880 }, { "epoch": 0.91, "learning_rate": 1.2564417380773435e-07, "logits/chosen": -0.7460101842880249, "logits/rejected": -0.46463337540626526, "logps/chosen": -390.36688232421875, "logps/rejected": -515.1717529296875, "loss": 0.6888, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.21186986565589905, "rewards/margins": 0.10232283920049667, "rewards/rejected": -0.3141927123069763, "step": 13890 }, { "epoch": 0.91, "learning_rate": 1.2386297930755436e-07, "logits/chosen": -1.199453592300415, "logits/rejected": -1.083516001701355, "logps/chosen": -502.26470947265625, "logps/rejected": -592.527099609375, "loss": 0.6905, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.25437894463539124, "rewards/margins": 0.09508887678384781, "rewards/rejected": -0.34946778416633606, "step": 13900 }, { "epoch": 0.91, "eval_logits/chosen": -0.9932417273521423, "eval_logits/rejected": -0.8657166957855225, "eval_logps/chosen": -430.56683349609375, "eval_logps/rejected": -497.0989990234375, "eval_loss": 0.6894011497497559, "eval_rewards/accuracies": 0.6704999804496765, "eval_rewards/chosen": -0.1985618770122528, "eval_rewards/margins": 0.08692525327205658, "eval_rewards/rejected": -0.2854871153831482, "eval_runtime": 711.9181, "eval_samples_per_second": 2.809, "eval_steps_per_second": 1.405, "step": 13900 }, { "epoch": 0.91, "learning_rate": 1.220941799020378e-07, "logits/chosen": -0.9004515409469604, "logits/rejected": -0.7649658918380737, "logps/chosen": -409.7417907714844, "logps/rejected": -484.17108154296875, "loss": 0.6902, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.1906607449054718, "rewards/margins": 0.09678211808204651, "rewards/rejected": -0.2874428629875183, "step": 13910 }, { "epoch": 0.91, "learning_rate": 1.2033778481810975e-07, "logits/chosen": -1.0336154699325562, "logits/rejected": -1.0059373378753662, "logps/chosen": -399.5526428222656, "logps/rejected": -472.43011474609375, "loss": 0.6867, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.17955917119979858, "rewards/margins": 0.11022365093231201, "rewards/rejected": -0.2897828221321106, "step": 13920 }, { "epoch": 0.91, "learning_rate": 1.1859380321798591e-07, "logits/chosen": -1.0649926662445068, "logits/rejected": -1.3081092834472656, "logps/chosen": -397.7228698730469, "logps/rejected": -483.80780029296875, "loss": 0.6883, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.1951616108417511, "rewards/margins": 0.07134003937244415, "rewards/rejected": -0.26650166511535645, "step": 13930 }, { "epoch": 0.91, "learning_rate": 1.1686224419912989e-07, "logits/chosen": -0.9648985862731934, "logits/rejected": -0.7549543380737305, "logps/chosen": -487.0858459472656, "logps/rejected": -567.8107299804688, "loss": 0.6875, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2342132031917572, "rewards/margins": 0.11001571267843246, "rewards/rejected": -0.34422892332077026, "step": 13940 }, { "epoch": 0.91, "learning_rate": 1.1514311679420104e-07, "logits/chosen": -0.598174512386322, "logits/rejected": -0.6542484164237976, "logps/chosen": -364.93170166015625, "logps/rejected": -530.19189453125, "loss": 0.6867, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2097831517457962, "rewards/margins": 0.10944052785634995, "rewards/rejected": -0.31922370195388794, "step": 13950 }, { "epoch": 0.91, "learning_rate": 1.1343642997101029e-07, "logits/chosen": -1.0652467012405396, "logits/rejected": -0.8850333094596863, "logps/chosen": -407.06231689453125, "logps/rejected": -484.21783447265625, "loss": 0.6911, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.2047090083360672, "rewards/margins": 0.0903773307800293, "rewards/rejected": -0.2950863242149353, "step": 13960 }, { "epoch": 0.91, "learning_rate": 1.1174219263247188e-07, "logits/chosen": -0.5526877641677856, "logits/rejected": -0.5558839440345764, "logps/chosen": -417.8456115722656, "logps/rejected": -504.50421142578125, "loss": 0.6887, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.21902808547019958, "rewards/margins": 0.10418976843357086, "rewards/rejected": -0.32321786880493164, "step": 13970 }, { "epoch": 0.91, "learning_rate": 1.1006041361655839e-07, "logits/chosen": -1.1823909282684326, "logits/rejected": -0.7704101204872131, "logps/chosen": -402.4588317871094, "logps/rejected": -427.7669372558594, "loss": 0.6895, "rewards/accuracies": 0.5, "rewards/chosen": -0.194021537899971, "rewards/margins": 0.06524568051099777, "rewards/rejected": -0.2592672109603882, "step": 13980 }, { "epoch": 0.92, "learning_rate": 1.0839110169625189e-07, "logits/chosen": -0.8379222750663757, "logits/rejected": -0.9152050018310547, "logps/chosen": -440.62347412109375, "logps/rejected": -559.3052368164062, "loss": 0.6869, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.23378117382526398, "rewards/margins": 0.1260370910167694, "rewards/rejected": -0.3598182797431946, "step": 13990 }, { "epoch": 0.92, "learning_rate": 1.06734265579502e-07, "logits/chosen": -1.0023798942565918, "logits/rejected": -0.6805317997932434, "logps/chosen": -481.406494140625, "logps/rejected": -506.0174865722656, "loss": 0.6884, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2198897898197174, "rewards/margins": 0.09111623466014862, "rewards/rejected": -0.3110060393810272, "step": 14000 }, { "epoch": 0.92, "eval_logits/chosen": -0.9784961938858032, "eval_logits/rejected": -0.851710319519043, "eval_logps/chosen": -433.59979248046875, "eval_logps/rejected": -500.4915771484375, "eval_loss": 0.689401388168335, "eval_rewards/accuracies": 0.6669999957084656, "eval_rewards/chosen": -0.20159488916397095, "eval_rewards/margins": 0.08728481084108353, "eval_rewards/rejected": -0.2888796925544739, "eval_runtime": 712.0057, "eval_samples_per_second": 2.809, "eval_steps_per_second": 1.404, "step": 14000 }, { "epoch": 0.92, "learning_rate": 1.050899139091771e-07, "logits/chosen": -1.23249351978302, "logits/rejected": -0.8209800720214844, "logps/chosen": -479.36785888671875, "logps/rejected": -524.4945068359375, "loss": 0.6895, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.20074041187763214, "rewards/margins": 0.09164474904537201, "rewards/rejected": -0.29238516092300415, "step": 14010 }, { "epoch": 0.92, "learning_rate": 1.0345805526302072e-07, "logits/chosen": -1.1188102960586548, "logits/rejected": -0.9375091791152954, "logps/chosen": -407.9237365722656, "logps/rejected": -487.79052734375, "loss": 0.69, "rewards/accuracies": 0.75, "rewards/chosen": -0.2067614048719406, "rewards/margins": 0.0871044397354126, "rewards/rejected": -0.2938658595085144, "step": 14020 }, { "epoch": 0.92, "learning_rate": 1.0183869815360764e-07, "logits/chosen": -1.065840244293213, "logits/rejected": -1.1328171491622925, "logps/chosen": -385.97149658203125, "logps/rejected": -474.182861328125, "loss": 0.6912, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.19405806064605713, "rewards/margins": 0.05557119846343994, "rewards/rejected": -0.24962928891181946, "step": 14030 }, { "epoch": 0.92, "learning_rate": 1.0023185102829763e-07, "logits/chosen": -0.771183967590332, "logits/rejected": -0.8472586870193481, "logps/chosen": -448.053955078125, "logps/rejected": -546.9066162109375, "loss": 0.6899, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.21723172068595886, "rewards/margins": 0.09466800838708878, "rewards/rejected": -0.31189972162246704, "step": 14040 }, { "epoch": 0.92, "learning_rate": 9.863752226919182e-08, "logits/chosen": -0.8391677737236023, "logits/rejected": -0.6935745477676392, "logps/chosen": -425.4812927246094, "logps/rejected": -488.382568359375, "loss": 0.6863, "rewards/accuracies": 0.75, "rewards/chosen": -0.18774910271167755, "rewards/margins": 0.12847992777824402, "rewards/rejected": -0.31622904539108276, "step": 14050 }, { "epoch": 0.92, "learning_rate": 9.705572019309107e-08, "logits/chosen": -0.9686983823776245, "logits/rejected": -0.8074959516525269, "logps/chosen": -480.96795654296875, "logps/rejected": -553.8460693359375, "loss": 0.6877, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2129405289888382, "rewards/margins": 0.1043621078133583, "rewards/rejected": -0.3173026442527771, "step": 14060 }, { "epoch": 0.92, "learning_rate": 9.548645305144849e-08, "logits/chosen": -1.1921392679214478, "logits/rejected": -0.9313928484916687, "logps/chosen": -352.3310546875, "logps/rejected": -439.64532470703125, "loss": 0.6876, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.1788359135389328, "rewards/margins": 0.08334054052829742, "rewards/rejected": -0.2621764540672302, "step": 14070 }, { "epoch": 0.92, "learning_rate": 9.392972903033149e-08, "logits/chosen": -0.7557094097137451, "logits/rejected": -1.0380260944366455, "logps/chosen": -400.652587890625, "logps/rejected": -442.6686096191406, "loss": 0.692, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.17752131819725037, "rewards/margins": 0.04955955222249031, "rewards/rejected": -0.22708086669445038, "step": 14080 }, { "epoch": 0.92, "learning_rate": 9.238555625037449e-08, "logits/chosen": -0.8334128260612488, "logits/rejected": -0.7627261281013489, "logps/chosen": -386.14227294921875, "logps/rejected": -404.98895263671875, "loss": 0.6913, "rewards/accuracies": 0.625, "rewards/chosen": -0.19543412327766418, "rewards/margins": 0.048528458923101425, "rewards/rejected": -0.24396257102489471, "step": 14090 }, { "epoch": 0.92, "learning_rate": 9.085394276673903e-08, "logits/chosen": -0.9746842384338379, "logits/rejected": -0.9705870747566223, "logps/chosen": -472.040771484375, "logps/rejected": -545.2486572265625, "loss": 0.6892, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2038746178150177, "rewards/margins": 0.08765744417905807, "rewards/rejected": -0.291532039642334, "step": 14100 }, { "epoch": 0.92, "eval_logits/chosen": -0.9989323616027832, "eval_logits/rejected": -0.8711248636245728, "eval_logps/chosen": -429.91204833984375, "eval_logps/rejected": -496.2607116699219, "eval_loss": 0.6894006133079529, "eval_rewards/accuracies": 0.6694999933242798, "eval_rewards/chosen": -0.19790711998939514, "eval_rewards/margins": 0.08674175292253494, "eval_rewards/rejected": -0.2846488356590271, "eval_runtime": 711.9768, "eval_samples_per_second": 2.809, "eval_steps_per_second": 1.405, "step": 14100 }, { "epoch": 0.92, "learning_rate": 8.933489656907157e-08, "logits/chosen": -0.8989300727844238, "logits/rejected": -0.9867172241210938, "logps/chosen": -416.06744384765625, "logps/rejected": -497.34051513671875, "loss": 0.6907, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.19873200356960297, "rewards/margins": 0.0563327893614769, "rewards/rejected": -0.25506478548049927, "step": 14110 }, { "epoch": 0.92, "learning_rate": 8.782842558146127e-08, "logits/chosen": -0.8634630441665649, "logits/rejected": -0.7865079641342163, "logps/chosen": -339.5974426269531, "logps/rejected": -452.58404541015625, "loss": 0.6879, "rewards/accuracies": 0.75, "rewards/chosen": -0.177713543176651, "rewards/margins": 0.11469310522079468, "rewards/rejected": -0.2924066185951233, "step": 14120 }, { "epoch": 0.92, "learning_rate": 8.633453766239836e-08, "logits/chosen": -1.139054536819458, "logits/rejected": -1.0553338527679443, "logps/chosen": -413.77044677734375, "logps/rejected": -440.55194091796875, "loss": 0.6915, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.17864930629730225, "rewards/margins": 0.05863678455352783, "rewards/rejected": -0.23728612065315247, "step": 14130 }, { "epoch": 0.93, "learning_rate": 8.485324060473448e-08, "logits/chosen": -1.0663942098617554, "logits/rejected": -0.8815576434135437, "logps/chosen": -439.49884033203125, "logps/rejected": -488.25054931640625, "loss": 0.6903, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.19921675324440002, "rewards/margins": 0.06114257499575615, "rewards/rejected": -0.26035934686660767, "step": 14140 }, { "epoch": 0.93, "learning_rate": 8.338454213564052e-08, "logits/chosen": -1.031376600265503, "logits/rejected": -0.7835978269577026, "logps/chosen": -444.68743896484375, "logps/rejected": -538.5584106445312, "loss": 0.689, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.21379053592681885, "rewards/margins": 0.1110650897026062, "rewards/rejected": -0.32485562562942505, "step": 14150 }, { "epoch": 0.93, "learning_rate": 8.192844991656679e-08, "logits/chosen": -0.9138960838317871, "logits/rejected": -0.6291934847831726, "logps/chosen": -473.10870361328125, "logps/rejected": -506.9134826660156, "loss": 0.6891, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.23439207673072815, "rewards/margins": 0.07021282613277435, "rewards/rejected": -0.3046048879623413, "step": 14160 }, { "epoch": 0.93, "learning_rate": 8.048497154320434e-08, "logits/chosen": -0.947569727897644, "logits/rejected": -1.0524301528930664, "logps/chosen": -345.71954345703125, "logps/rejected": -428.63604736328125, "loss": 0.6894, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.2161700427532196, "rewards/margins": 0.07381902635097504, "rewards/rejected": -0.28998905420303345, "step": 14170 }, { "epoch": 0.93, "learning_rate": 7.905411454544265e-08, "logits/chosen": -1.0077670812606812, "logits/rejected": -0.9876540303230286, "logps/chosen": -435.50872802734375, "logps/rejected": -503.3299865722656, "loss": 0.6905, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.19900453090667725, "rewards/margins": 0.06307898461818695, "rewards/rejected": -0.262083500623703, "step": 14180 }, { "epoch": 0.93, "learning_rate": 7.763588638733332e-08, "logits/chosen": -0.9834734201431274, "logits/rejected": -1.0122195482254028, "logps/chosen": -455.09893798828125, "logps/rejected": -530.7279052734375, "loss": 0.6877, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.19267883896827698, "rewards/margins": 0.09545306861400604, "rewards/rejected": -0.2881319224834442, "step": 14190 }, { "epoch": 0.93, "learning_rate": 7.623029446704899e-08, "logits/chosen": -1.1731340885162354, "logits/rejected": -1.228776454925537, "logps/chosen": -514.779541015625, "logps/rejected": -593.9116821289062, "loss": 0.689, "rewards/accuracies": 0.75, "rewards/chosen": -0.20333810150623322, "rewards/margins": 0.11602671444416046, "rewards/rejected": -0.31936484575271606, "step": 14200 }, { "epoch": 0.93, "eval_logits/chosen": -0.9908974766731262, "eval_logits/rejected": -0.8632987141609192, "eval_logps/chosen": -431.88525390625, "eval_logps/rejected": -498.7848815917969, "eval_loss": 0.6893996596336365, "eval_rewards/accuracies": 0.6694999933242798, "eval_rewards/chosen": -0.19988025724887848, "eval_rewards/margins": 0.08729271590709686, "eval_rewards/rejected": -0.28717297315597534, "eval_runtime": 713.4147, "eval_samples_per_second": 2.803, "eval_steps_per_second": 1.402, "step": 14200 }, { "epoch": 0.93, "learning_rate": 7.483734611684557e-08, "logits/chosen": -0.7480652928352356, "logits/rejected": -0.5744966268539429, "logps/chosen": -455.3934020996094, "logps/rejected": -489.6495666503906, "loss": 0.6908, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.19850760698318481, "rewards/margins": 0.08841712027788162, "rewards/rejected": -0.2869247496128082, "step": 14210 }, { "epoch": 0.93, "learning_rate": 7.345704860302366e-08, "logits/chosen": -1.3832318782806396, "logits/rejected": -0.9964305758476257, "logps/chosen": -455.84136962890625, "logps/rejected": -557.6566162109375, "loss": 0.6895, "rewards/accuracies": 0.75, "rewards/chosen": -0.20758870244026184, "rewards/margins": 0.10249364376068115, "rewards/rejected": -0.3100823760032654, "step": 14220 }, { "epoch": 0.93, "learning_rate": 7.208940912589224e-08, "logits/chosen": -0.9903473854064941, "logits/rejected": -0.7882139086723328, "logps/chosen": -443.6026916503906, "logps/rejected": -530.8634643554688, "loss": 0.6858, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.23401638865470886, "rewards/margins": 0.12463922798633575, "rewards/rejected": -0.3586556017398834, "step": 14230 }, { "epoch": 0.93, "learning_rate": 7.073443481972753e-08, "logits/chosen": -0.9210270047187805, "logits/rejected": -0.8315000534057617, "logps/chosen": -408.10357666015625, "logps/rejected": -515.8826293945312, "loss": 0.6879, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.22394785284996033, "rewards/margins": 0.08827735483646393, "rewards/rejected": -0.31222519278526306, "step": 14240 }, { "epoch": 0.93, "learning_rate": 6.939213275274027e-08, "logits/chosen": -1.0639857053756714, "logits/rejected": -1.0767017602920532, "logps/chosen": -434.39300537109375, "logps/rejected": -488.84893798828125, "loss": 0.6904, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.19513921439647675, "rewards/margins": 0.0740586444735527, "rewards/rejected": -0.26919785141944885, "step": 14250 }, { "epoch": 0.93, "learning_rate": 6.806250992703461e-08, "logits/chosen": -0.936127781867981, "logits/rejected": -0.8528211712837219, "logps/chosen": -408.73883056640625, "logps/rejected": -458.3291931152344, "loss": 0.6905, "rewards/accuracies": 0.625, "rewards/chosen": -0.1920442134141922, "rewards/margins": 0.0758005753159523, "rewards/rejected": -0.2678447961807251, "step": 14260 }, { "epoch": 0.93, "learning_rate": 6.674557327857572e-08, "logits/chosen": -1.1486737728118896, "logits/rejected": -1.0023722648620605, "logps/chosen": -447.0084533691406, "logps/rejected": -563.09375, "loss": 0.6867, "rewards/accuracies": 0.75, "rewards/chosen": -0.19273777306079865, "rewards/margins": 0.12517333030700684, "rewards/rejected": -0.3179110884666443, "step": 14270 }, { "epoch": 0.93, "learning_rate": 6.544132967714917e-08, "logits/chosen": -0.7697897553443909, "logits/rejected": -0.6848057508468628, "logps/chosen": -498.2203063964844, "logps/rejected": -595.4434204101562, "loss": 0.6877, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.25136035680770874, "rewards/margins": 0.11093559116125107, "rewards/rejected": -0.3622959554195404, "step": 14280 }, { "epoch": 0.93, "learning_rate": 6.414978592632932e-08, "logits/chosen": -0.9375301599502563, "logits/rejected": -0.8963106870651245, "logps/chosen": -468.3148498535156, "logps/rejected": -512.3311767578125, "loss": 0.69, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.20721551775932312, "rewards/margins": 0.09047175943851471, "rewards/rejected": -0.297687292098999, "step": 14290 }, { "epoch": 0.94, "learning_rate": 6.287094876344046e-08, "logits/chosen": -1.2189066410064697, "logits/rejected": -1.0829071998596191, "logps/chosen": -325.34210205078125, "logps/rejected": -410.9091796875, "loss": 0.6911, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.15292184054851532, "rewards/margins": 0.07892803847789764, "rewards/rejected": -0.23184990882873535, "step": 14300 }, { "epoch": 0.94, "eval_logits/chosen": -0.998076856136322, "eval_logits/rejected": -0.8702616095542908, "eval_logps/chosen": -430.6116638183594, "eval_logps/rejected": -496.98193359375, "eval_loss": 0.689400315284729, "eval_rewards/accuracies": 0.6679999828338623, "eval_rewards/chosen": -0.1986067146062851, "eval_rewards/margins": 0.08676330000162125, "eval_rewards/rejected": -0.2853700518608093, "eval_runtime": 711.6289, "eval_samples_per_second": 2.81, "eval_steps_per_second": 1.405, "step": 14300 }, { "epoch": 0.94, "learning_rate": 6.160482485952413e-08, "logits/chosen": -1.1886036396026611, "logits/rejected": -1.1761853694915771, "logps/chosen": -464.11669921875, "logps/rejected": -506.704345703125, "loss": 0.6902, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2276669293642044, "rewards/margins": 0.07438355684280396, "rewards/rejected": -0.30205050110816956, "step": 14310 }, { "epoch": 0.94, "learning_rate": 6.035142081930234e-08, "logits/chosen": -0.9976833462715149, "logits/rejected": -0.8096323013305664, "logps/chosen": -493.4520568847656, "logps/rejected": -496.55987548828125, "loss": 0.6905, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2306557446718216, "rewards/margins": 0.08012469112873077, "rewards/rejected": -0.31078043580055237, "step": 14320 }, { "epoch": 0.94, "learning_rate": 5.911074318114496e-08, "logits/chosen": -0.91179358959198, "logits/rejected": -0.7653204202651978, "logps/chosen": -409.9183654785156, "logps/rejected": -543.5857543945312, "loss": 0.69, "rewards/accuracies": 0.625, "rewards/chosen": -0.20633026957511902, "rewards/margins": 0.09154252707958221, "rewards/rejected": -0.2978728115558624, "step": 14330 }, { "epoch": 0.94, "learning_rate": 5.788279841703381e-08, "logits/chosen": -1.199209451675415, "logits/rejected": -0.9316481351852417, "logps/chosen": -374.29852294921875, "logps/rejected": -461.6366271972656, "loss": 0.6889, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.19038894772529602, "rewards/margins": 0.09500516206026077, "rewards/rejected": -0.2853941321372986, "step": 14340 }, { "epoch": 0.94, "learning_rate": 5.66675929325311e-08, "logits/chosen": -1.1369472742080688, "logits/rejected": -0.9022246599197388, "logps/chosen": -429.6748962402344, "logps/rejected": -462.70355224609375, "loss": 0.6914, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.20901048183441162, "rewards/margins": 0.04437742009758949, "rewards/rejected": -0.2533878982067108, "step": 14350 }, { "epoch": 0.94, "learning_rate": 5.546513306674301e-08, "logits/chosen": -0.8335935473442078, "logits/rejected": -0.7389085292816162, "logps/chosen": -486.3041076660156, "logps/rejected": -500.9739685058594, "loss": 0.6885, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.207787424325943, "rewards/margins": 0.08975278586149216, "rewards/rejected": -0.29754018783569336, "step": 14360 }, { "epoch": 0.94, "learning_rate": 5.4275425092290004e-08, "logits/chosen": -1.5143823623657227, "logits/rejected": -1.290815830230713, "logps/chosen": -443.744384765625, "logps/rejected": -505.951416015625, "loss": 0.6909, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.18894822895526886, "rewards/margins": 0.08064968138933182, "rewards/rejected": -0.2695979177951813, "step": 14370 }, { "epoch": 0.94, "learning_rate": 5.309847521527078e-08, "logits/chosen": -0.7275829315185547, "logits/rejected": -0.6369680166244507, "logps/chosen": -491.9794006347656, "logps/rejected": -519.0418701171875, "loss": 0.689, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.20740604400634766, "rewards/margins": 0.06999073177576065, "rewards/rejected": -0.2773967981338501, "step": 14380 }, { "epoch": 0.94, "learning_rate": 5.1934289575233385e-08, "logits/chosen": -0.7574592232704163, "logits/rejected": -0.5376136302947998, "logps/chosen": -447.2177734375, "logps/rejected": -517.1713256835938, "loss": 0.6887, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2076883316040039, "rewards/margins": 0.10432090610265732, "rewards/rejected": -0.3120092451572418, "step": 14390 }, { "epoch": 0.94, "learning_rate": 5.078287424513994e-08, "logits/chosen": -1.222109079360962, "logits/rejected": -1.0470283031463623, "logps/chosen": -476.23614501953125, "logps/rejected": -512.8048095703125, "loss": 0.6898, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.20618847012519836, "rewards/margins": 0.11139892041683197, "rewards/rejected": -0.31758740544319153, "step": 14400 }, { "epoch": 0.94, "eval_logits/chosen": -0.9976576566696167, "eval_logits/rejected": -0.8699551820755005, "eval_logps/chosen": -430.67169189453125, "eval_logps/rejected": -497.1328430175781, "eval_loss": 0.6893981695175171, "eval_rewards/accuracies": 0.6675000190734863, "eval_rewards/chosen": -0.1986667662858963, "eval_rewards/margins": 0.08685415238142014, "eval_rewards/rejected": -0.2855209410190582, "eval_runtime": 711.3754, "eval_samples_per_second": 2.811, "eval_steps_per_second": 1.406, "step": 14400 }, { "epoch": 0.94, "learning_rate": 4.964423523133671e-08, "logits/chosen": -1.3410192728042603, "logits/rejected": -0.8826667070388794, "logps/chosen": -393.24261474609375, "logps/rejected": -424.50665283203125, "loss": 0.6917, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1755111664533615, "rewards/margins": 0.06561318039894104, "rewards/rejected": -0.24112434685230255, "step": 14410 }, { "epoch": 0.94, "learning_rate": 4.8518378473522976e-08, "logits/chosen": -1.096555471420288, "logits/rejected": -0.836302638053894, "logps/chosen": -454.3536682128906, "logps/rejected": -537.505126953125, "loss": 0.6867, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.20384380221366882, "rewards/margins": 0.08987867832183838, "rewards/rejected": -0.2937224507331848, "step": 14420 }, { "epoch": 0.94, "learning_rate": 4.7405309844718584e-08, "logits/chosen": -1.0802674293518066, "logits/rejected": -0.9114401936531067, "logps/chosen": -414.07745361328125, "logps/rejected": -547.9803466796875, "loss": 0.6862, "rewards/accuracies": 0.75, "rewards/chosen": -0.22157840430736542, "rewards/margins": 0.12470661103725433, "rewards/rejected": -0.34628504514694214, "step": 14430 }, { "epoch": 0.94, "learning_rate": 4.630503515123508e-08, "logits/chosen": -1.1976383924484253, "logits/rejected": -0.8946923017501831, "logps/chosen": -411.8882751464844, "logps/rejected": -443.54473876953125, "loss": 0.6885, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2110222578048706, "rewards/margins": 0.08152283728122711, "rewards/rejected": -0.2925451099872589, "step": 14440 }, { "epoch": 0.95, "learning_rate": 4.5217560132644056e-08, "logits/chosen": -0.8913286924362183, "logits/rejected": -0.7216338515281677, "logps/chosen": -345.64935302734375, "logps/rejected": -428.1698303222656, "loss": 0.6902, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.19502976536750793, "rewards/margins": 0.06411959230899811, "rewards/rejected": -0.25914937257766724, "step": 14450 }, { "epoch": 0.95, "learning_rate": 4.41428904617483e-08, "logits/chosen": -0.997122585773468, "logits/rejected": -1.084720492362976, "logps/chosen": -370.079345703125, "logps/rejected": -443.838623046875, "loss": 0.6913, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.1929706335067749, "rewards/margins": 0.0690988153219223, "rewards/rejected": -0.262069433927536, "step": 14460 }, { "epoch": 0.95, "learning_rate": 4.3081031744550696e-08, "logits/chosen": -1.1552343368530273, "logits/rejected": -1.1827692985534668, "logps/chosen": -410.41705322265625, "logps/rejected": -485.11676025390625, "loss": 0.6892, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.15727174282073975, "rewards/margins": 0.09702739119529724, "rewards/rejected": -0.2542991042137146, "step": 14470 }, { "epoch": 0.95, "learning_rate": 4.2031989520227025e-08, "logits/chosen": -0.9480659365653992, "logits/rejected": -0.8651493191719055, "logps/chosen": -449.3002014160156, "logps/rejected": -501.34088134765625, "loss": 0.6905, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.2209857702255249, "rewards/margins": 0.07392819970846176, "rewards/rejected": -0.29491397738456726, "step": 14480 }, { "epoch": 0.95, "learning_rate": 4.099576926109461e-08, "logits/chosen": -1.2740567922592163, "logits/rejected": -0.925223171710968, "logps/chosen": -428.7998962402344, "logps/rejected": -424.1492614746094, "loss": 0.6902, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.18901804089546204, "rewards/margins": 0.07450384646654129, "rewards/rejected": -0.2635219097137451, "step": 14490 }, { "epoch": 0.95, "learning_rate": 3.997237637258705e-08, "logits/chosen": -1.1149488687515259, "logits/rejected": -0.8887525796890259, "logps/chosen": -495.614013671875, "logps/rejected": -540.0551147460938, "loss": 0.6909, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.178360715508461, "rewards/margins": 0.08778423070907593, "rewards/rejected": -0.2661449611186981, "step": 14500 }, { "epoch": 0.95, "eval_logits/chosen": -0.9958268404006958, "eval_logits/rejected": -0.8681316375732422, "eval_logps/chosen": -431.3943786621094, "eval_logps/rejected": -498.07061767578125, "eval_loss": 0.6893977522850037, "eval_rewards/accuracies": 0.6694999933242798, "eval_rewards/chosen": -0.19938941299915314, "eval_rewards/margins": 0.08706925064325333, "eval_rewards/rejected": -0.28645867109298706, "eval_runtime": 709.4627, "eval_samples_per_second": 2.819, "eval_steps_per_second": 1.41, "step": 14500 }, { "epoch": 0.95, "learning_rate": 3.8961816193222035e-08, "logits/chosen": -1.0719501972198486, "logits/rejected": -0.824332058429718, "logps/chosen": -477.8409118652344, "logps/rejected": -475.36474609375, "loss": 0.6912, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.2419906109571457, "rewards/margins": 0.05485706776380539, "rewards/rejected": -0.29684773087501526, "step": 14510 }, { "epoch": 0.95, "learning_rate": 3.79640939945769e-08, "logits/chosen": -1.032693862915039, "logits/rejected": -0.8485744595527649, "logps/chosen": -428.54351806640625, "logps/rejected": -388.8865661621094, "loss": 0.6919, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.1423225998878479, "rewards/margins": 0.04660715162754059, "rewards/rejected": -0.18892976641654968, "step": 14520 }, { "epoch": 0.95, "learning_rate": 3.697921498125895e-08, "logits/chosen": -0.861232578754425, "logits/rejected": -1.0105615854263306, "logps/chosen": -435.960205078125, "logps/rejected": -542.7559204101562, "loss": 0.6892, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.22600717842578888, "rewards/margins": 0.10091586410999298, "rewards/rejected": -0.32692304253578186, "step": 14530 }, { "epoch": 0.95, "learning_rate": 3.6007184290880456e-08, "logits/chosen": -1.1425590515136719, "logits/rejected": -1.0225722789764404, "logps/chosen": -445.26361083984375, "logps/rejected": -494.39453125, "loss": 0.6908, "rewards/accuracies": 0.625, "rewards/chosen": -0.23329059779644012, "rewards/margins": 0.06579459458589554, "rewards/rejected": -0.29908519983291626, "step": 14540 }, { "epoch": 0.95, "learning_rate": 3.504800699402872e-08, "logits/chosen": -1.2578893899917603, "logits/rejected": -1.0587711334228516, "logps/chosen": -548.8036499023438, "logps/rejected": -515.9398193359375, "loss": 0.6917, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2031494677066803, "rewards/margins": 0.04050298407673836, "rewards/rejected": -0.24365243315696716, "step": 14550 }, { "epoch": 0.95, "learning_rate": 3.4101688094242967e-08, "logits/chosen": -1.0031616687774658, "logits/rejected": -0.8348426818847656, "logps/chosen": -522.0515747070312, "logps/rejected": -614.2586669921875, "loss": 0.689, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2524906098842621, "rewards/margins": 0.11738330125808716, "rewards/rejected": -0.36987388134002686, "step": 14560 }, { "epoch": 0.95, "learning_rate": 3.3168232527985564e-08, "logits/chosen": -0.6329993009567261, "logits/rejected": -0.6292056441307068, "logps/chosen": -437.3837890625, "logps/rejected": -472.7875061035156, "loss": 0.6903, "rewards/accuracies": 0.75, "rewards/chosen": -0.1957651823759079, "rewards/margins": 0.09015369415283203, "rewards/rejected": -0.2859188914299011, "step": 14570 }, { "epoch": 0.95, "learning_rate": 3.224764516461892e-08, "logits/chosen": -1.0613816976547241, "logits/rejected": -0.7705498337745667, "logps/chosen": -434.5621032714844, "logps/rejected": -519.8670043945312, "loss": 0.6876, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1795632541179657, "rewards/margins": 0.11605356633663177, "rewards/rejected": -0.2956168055534363, "step": 14580 }, { "epoch": 0.95, "learning_rate": 3.133993080637665e-08, "logits/chosen": -1.1013673543930054, "logits/rejected": -0.8421053886413574, "logps/chosen": -413.6099548339844, "logps/rejected": -492.38116455078125, "loss": 0.69, "rewards/accuracies": 0.75, "rewards/chosen": -0.21072909235954285, "rewards/margins": 0.09118209034204483, "rewards/rejected": -0.3019111752510071, "step": 14590 }, { "epoch": 0.96, "learning_rate": 3.0445094188342186e-08, "logits/chosen": -0.4487873613834381, "logits/rejected": -0.3390078842639923, "logps/chosen": -452.75299072265625, "logps/rejected": -464.5770568847656, "loss": 0.6889, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.199276402592659, "rewards/margins": 0.09208828955888748, "rewards/rejected": -0.2913646996021271, "step": 14600 }, { "epoch": 0.96, "eval_logits/chosen": -0.9952174425125122, "eval_logits/rejected": -0.8675841689109802, "eval_logps/chosen": -430.40631103515625, "eval_logps/rejected": -496.6932373046875, "eval_loss": 0.6893989443778992, "eval_rewards/accuracies": 0.6690000295639038, "eval_rewards/chosen": -0.19840139150619507, "eval_rewards/margins": 0.08667998015880585, "eval_rewards/rejected": -0.2850813567638397, "eval_runtime": 711.9516, "eval_samples_per_second": 2.809, "eval_steps_per_second": 1.405, "step": 14600 }, { "epoch": 0.96, "learning_rate": 2.9563139978421028e-08, "logits/chosen": -0.8672275543212891, "logits/rejected": -0.9306055903434753, "logps/chosen": -410.693115234375, "logps/rejected": -462.60345458984375, "loss": 0.6902, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.1848212331533432, "rewards/margins": 0.05969489365816116, "rewards/rejected": -0.24451613426208496, "step": 14610 }, { "epoch": 0.96, "learning_rate": 2.869407277731939e-08, "logits/chosen": -0.6720192432403564, "logits/rejected": -0.7131026983261108, "logps/chosen": -373.59527587890625, "logps/rejected": -424.0103454589844, "loss": 0.6897, "rewards/accuracies": 0.625, "rewards/chosen": -0.18833813071250916, "rewards/margins": 0.07358469069004059, "rewards/rejected": -0.26192283630371094, "step": 14620 }, { "epoch": 0.96, "learning_rate": 2.783789711851642e-08, "logits/chosen": -1.121572732925415, "logits/rejected": -0.8326706886291504, "logps/chosen": -361.41058349609375, "logps/rejected": -448.26629638671875, "loss": 0.6882, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.19955962896347046, "rewards/margins": 0.10928840935230255, "rewards/rejected": -0.3088480532169342, "step": 14630 }, { "epoch": 0.96, "learning_rate": 2.6994617468244778e-08, "logits/chosen": -1.0026696920394897, "logits/rejected": -0.8741849660873413, "logps/chosen": -406.95697021484375, "logps/rejected": -448.57568359375, "loss": 0.6889, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.1939363181591034, "rewards/margins": 0.10553745925426483, "rewards/rejected": -0.29947376251220703, "step": 14640 }, { "epoch": 0.96, "learning_rate": 2.6164238225463155e-08, "logits/chosen": -0.8640907406806946, "logits/rejected": -0.5929582118988037, "logps/chosen": -480.9214782714844, "logps/rejected": -498.310546875, "loss": 0.6901, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.1984943449497223, "rewards/margins": 0.09589709341526031, "rewards/rejected": -0.294391393661499, "step": 14650 }, { "epoch": 0.96, "learning_rate": 2.534676372183742e-08, "logits/chosen": -0.7579206228256226, "logits/rejected": -0.7912012338638306, "logps/chosen": -489.6610412597656, "logps/rejected": -507.6478576660156, "loss": 0.6901, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.20570676028728485, "rewards/margins": 0.07031337171792984, "rewards/rejected": -0.2760201096534729, "step": 14660 }, { "epoch": 0.96, "learning_rate": 2.4542198221714218e-08, "logits/chosen": -0.6668469309806824, "logits/rejected": -0.5565620064735413, "logps/chosen": -329.5209655761719, "logps/rejected": -421.29571533203125, "loss": 0.6883, "rewards/accuracies": 0.5, "rewards/chosen": -0.1914140284061432, "rewards/margins": 0.09154781699180603, "rewards/rejected": -0.28296181559562683, "step": 14670 }, { "epoch": 0.96, "learning_rate": 2.3750545922101854e-08, "logits/chosen": -1.431932806968689, "logits/rejected": -0.8189393877983093, "logps/chosen": -505.0044860839844, "logps/rejected": -524.7289428710938, "loss": 0.6901, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.1956133097410202, "rewards/margins": 0.08811850845813751, "rewards/rejected": -0.2837317883968353, "step": 14680 }, { "epoch": 0.96, "learning_rate": 2.2971810952646112e-08, "logits/chosen": -1.1525204181671143, "logits/rejected": -0.9542962908744812, "logps/chosen": -463.877197265625, "logps/rejected": -472.92974853515625, "loss": 0.6906, "rewards/accuracies": 0.625, "rewards/chosen": -0.20551447570323944, "rewards/margins": 0.05675836652517319, "rewards/rejected": -0.26227283477783203, "step": 14690 }, { "epoch": 0.96, "learning_rate": 2.2205997375610576e-08, "logits/chosen": -0.7429525852203369, "logits/rejected": -0.7011960744857788, "logps/chosen": -341.0621032714844, "logps/rejected": -451.4022521972656, "loss": 0.6902, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.15555623173713684, "rewards/margins": 0.10518001019954681, "rewards/rejected": -0.26073622703552246, "step": 14700 }, { "epoch": 0.96, "eval_logits/chosen": -0.9974082112312317, "eval_logits/rejected": -0.8696686029434204, "eval_logps/chosen": -430.09259033203125, "eval_logps/rejected": -496.2928771972656, "eval_loss": 0.6893972158432007, "eval_rewards/accuracies": 0.6690000295639038, "eval_rewards/chosen": -0.1980876475572586, "eval_rewards/margins": 0.0865933746099472, "eval_rewards/rejected": -0.2846809923648834, "eval_runtime": 711.6144, "eval_samples_per_second": 2.811, "eval_steps_per_second": 1.405, "step": 14700 }, { "epoch": 0.96, "learning_rate": 2.1453109185853304e-08, "logits/chosen": -1.0774486064910889, "logits/rejected": -1.0813241004943848, "logps/chosen": -369.2832336425781, "logps/rejected": -461.7610778808594, "loss": 0.6884, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.16720476746559143, "rewards/margins": 0.09073060750961304, "rewards/rejected": -0.2579353451728821, "step": 14710 }, { "epoch": 0.96, "learning_rate": 2.0713150310808784e-08, "logits/chosen": -1.082183599472046, "logits/rejected": -1.0008739233016968, "logps/chosen": -428.96868896484375, "logps/rejected": -478.0359802246094, "loss": 0.6914, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.20348091423511505, "rewards/margins": 0.04330389201641083, "rewards/rejected": -0.24678480625152588, "step": 14720 }, { "epoch": 0.96, "learning_rate": 1.9986124610464064e-08, "logits/chosen": -0.7365717887878418, "logits/rejected": -0.5972979664802551, "logps/chosen": -514.8629760742188, "logps/rejected": -576.7749633789062, "loss": 0.6877, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.22763288021087646, "rewards/margins": 0.13172422349452972, "rewards/rejected": -0.35935714840888977, "step": 14730 }, { "epoch": 0.96, "learning_rate": 1.927203587734211e-08, "logits/chosen": -0.6570479273796082, "logits/rejected": -0.6569720506668091, "logps/chosen": -454.4698791503906, "logps/rejected": -490.32122802734375, "loss": 0.6891, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.20147821307182312, "rewards/margins": 0.08437781035900116, "rewards/rejected": -0.2858560085296631, "step": 14740 }, { "epoch": 0.97, "learning_rate": 1.8570887836479034e-08, "logits/chosen": -0.90229731798172, "logits/rejected": -0.6604552268981934, "logps/chosen": -403.8556823730469, "logps/rejected": -527.2491455078125, "loss": 0.6896, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.21023252606391907, "rewards/margins": 0.07057814300060272, "rewards/rejected": -0.2808106541633606, "step": 14750 }, { "epoch": 0.97, "learning_rate": 1.7882684145406616e-08, "logits/chosen": -1.0498579740524292, "logits/rejected": -1.024839162826538, "logps/chosen": -483.9183654785156, "logps/rejected": -557.7072143554688, "loss": 0.6876, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.18749774992465973, "rewards/margins": 0.07567773759365082, "rewards/rejected": -0.26317542791366577, "step": 14760 }, { "epoch": 0.97, "learning_rate": 1.7207428394132865e-08, "logits/chosen": -1.2564983367919922, "logits/rejected": -0.9558451771736145, "logps/chosen": -474.35345458984375, "logps/rejected": -536.0001220703125, "loss": 0.6865, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2045731544494629, "rewards/margins": 0.11633388698101044, "rewards/rejected": -0.3209070563316345, "step": 14770 }, { "epoch": 0.97, "learning_rate": 1.654512410512177e-08, "logits/chosen": -1.035137414932251, "logits/rejected": -0.7679761648178101, "logps/chosen": -451.58837890625, "logps/rejected": -457.88275146484375, "loss": 0.6907, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.19801142811775208, "rewards/margins": 0.06868289411067963, "rewards/rejected": -0.2666943073272705, "step": 14780 }, { "epoch": 0.97, "learning_rate": 1.5895774733277468e-08, "logits/chosen": -0.9306725263595581, "logits/rejected": -0.8101316690444946, "logps/chosen": -483.390380859375, "logps/rejected": -517.47900390625, "loss": 0.6894, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.20357339084148407, "rewards/margins": 0.08432519435882568, "rewards/rejected": -0.28789860010147095, "step": 14790 }, { "epoch": 0.97, "learning_rate": 1.5259383665924e-08, "logits/chosen": -1.5006357431411743, "logits/rejected": -1.146349549293518, "logps/chosen": -514.8986206054688, "logps/rejected": -501.13482666015625, "loss": 0.6894, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.17280684411525726, "rewards/margins": 0.0802449956536293, "rewards/rejected": -0.2530518174171448, "step": 14800 }, { "epoch": 0.97, "eval_logits/chosen": -0.9956094622612, "eval_logits/rejected": -0.8681559562683105, "eval_logps/chosen": -430.40167236328125, "eval_logps/rejected": -496.6893615722656, "eval_loss": 0.6893979907035828, "eval_rewards/accuracies": 0.6675000190734863, "eval_rewards/chosen": -0.1983967423439026, "eval_rewards/margins": 0.08668076992034912, "eval_rewards/rejected": -0.2850775122642517, "eval_runtime": 712.7525, "eval_samples_per_second": 2.806, "eval_steps_per_second": 1.403, "step": 14800 }, { "epoch": 0.48, "step": 14801, "total_flos": 0.0, "train_loss": 4.683178136915897e-05, "train_runtime": 5.5433, "train_samples_per_second": 11.029, "train_steps_per_second": 5.592 } ], "logging_steps": 10, "max_steps": 31, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }