diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,23132 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.48420708268585916, + "eval_steps": 100, + "global_step": 14801, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 3.270111183780249e-09, + "logits/chosen": -2.634561777114868, + "logits/rejected": -2.673060417175293, + "logps/chosen": -207.5323944091797, + "logps/rejected": -286.9266052246094, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0, + "rewards/margins": 0.0, + "rewards/rejected": 0.0, + "step": 1 + }, + { + "epoch": 0.0, + "learning_rate": 3.270111183780249e-08, + "logits/chosen": -2.217600107192993, + "logits/rejected": -1.9652072191238403, + "logps/chosen": -185.94793701171875, + "logps/rejected": -165.36378479003906, + "loss": 0.6931, + "rewards/accuracies": 0.2777777910232544, + "rewards/chosen": -2.7529633371159434e-05, + "rewards/margins": -9.719059016788378e-05, + "rewards/rejected": 6.966096407268196e-05, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 6.540222367560497e-08, + "logits/chosen": -2.43184232711792, + "logits/rejected": -2.223078489303589, + "logps/chosen": -232.47348022460938, + "logps/rejected": -231.3294219970703, + "loss": 0.6931, + "rewards/accuracies": 0.375, + "rewards/chosen": -2.5338464183732867e-05, + "rewards/margins": -0.00011894272029167041, + "rewards/rejected": 9.360425610793754e-05, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 9.810333551340746e-08, + "logits/chosen": -2.258497953414917, + "logits/rejected": -2.1628453731536865, + "logps/chosen": -197.47084045410156, + "logps/rejected": -219.11550903320312, + "loss": 0.6931, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -4.670469934353605e-05, + "rewards/margins": -2.2600890588364564e-05, + "rewards/rejected": -2.4103814212139696e-05, + "step": 30 + }, + { + "epoch": 0.0, + "learning_rate": 1.3080444735120995e-07, + "logits/chosen": -2.211336135864258, + "logits/rejected": -2.251044750213623, + "logps/chosen": -276.04290771484375, + "logps/rejected": -265.7278137207031, + "loss": 0.6931, + "rewards/accuracies": 0.32499998807907104, + "rewards/chosen": -5.52958736079745e-05, + "rewards/margins": -8.68914503371343e-05, + "rewards/rejected": 3.159556581522338e-05, + "step": 40 + }, + { + "epoch": 0.0, + "learning_rate": 1.6350555918901243e-07, + "logits/chosen": -2.349104642868042, + "logits/rejected": -2.1418638229370117, + "logps/chosen": -204.79901123046875, + "logps/rejected": -184.77700805664062, + "loss": 0.6931, + "rewards/accuracies": 0.5, + "rewards/chosen": 6.549026693392079e-06, + "rewards/margins": 8.850884478306398e-05, + "rewards/rejected": -8.195983537007123e-05, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 1.9620667102681492e-07, + "logits/chosen": -2.3065123558044434, + "logits/rejected": -2.0677669048309326, + "logps/chosen": -209.77523803710938, + "logps/rejected": -185.83193969726562, + "loss": 0.6932, + "rewards/accuracies": 0.3499999940395355, + "rewards/chosen": 3.0680701456731185e-05, + "rewards/margins": -0.00012086327478755265, + "rewards/rejected": 0.00015154397988226265, + "step": 60 + }, + { + "epoch": 0.0, + "learning_rate": 2.289077828646174e-07, + "logits/chosen": -2.2705559730529785, + "logits/rejected": -2.1576623916625977, + "logps/chosen": -218.01708984375, + "logps/rejected": -207.9579620361328, + "loss": 0.6931, + "rewards/accuracies": 0.375, + "rewards/chosen": 9.244588727597147e-05, + "rewards/margins": 9.720615707919933e-06, + "rewards/rejected": 8.272529521491379e-05, + "step": 70 + }, + { + "epoch": 0.01, + "learning_rate": 2.616088947024199e-07, + "logits/chosen": -2.5082621574401855, + "logits/rejected": -2.2270846366882324, + "logps/chosen": -258.78070068359375, + "logps/rejected": -213.70150756835938, + "loss": 0.6931, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": 0.00016971012519206852, + "rewards/margins": 8.799631905276328e-05, + "rewards/rejected": 8.171383524313569e-05, + "step": 80 + }, + { + "epoch": 0.01, + "learning_rate": 2.943100065402224e-07, + "logits/chosen": -2.257884979248047, + "logits/rejected": -2.173158645629883, + "logps/chosen": -184.643798828125, + "logps/rejected": -165.50064086914062, + "loss": 0.6931, + "rewards/accuracies": 0.42500001192092896, + "rewards/chosen": 0.00014099081454332918, + "rewards/margins": 8.532649371773005e-05, + "rewards/rejected": 5.566431718762033e-05, + "step": 90 + }, + { + "epoch": 0.01, + "learning_rate": 3.2701111837802487e-07, + "logits/chosen": -2.432077646255493, + "logits/rejected": -2.426736354827881, + "logps/chosen": -168.7462921142578, + "logps/rejected": -183.92845153808594, + "loss": 0.6931, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": 0.0001885929814307019, + "rewards/margins": 2.7986563509330153e-05, + "rewards/rejected": 0.00016060643247328699, + "step": 100 + }, + { + "epoch": 0.01, + "eval_logits/chosen": -2.348320245742798, + "eval_logits/rejected": -2.159881830215454, + "eval_logps/chosen": -231.77328491210938, + "eval_logps/rejected": -211.4641571044922, + "eval_loss": 0.6931429505348206, + "eval_rewards/accuracies": 0.484499990940094, + "eval_rewards/chosen": 0.00023166697064880282, + "eval_rewards/margins": 8.390223229071125e-05, + "eval_rewards/rejected": 0.00014776474563404918, + "eval_runtime": 711.5877, + "eval_samples_per_second": 2.811, + "eval_steps_per_second": 1.405, + "step": 100 + }, + { + "epoch": 0.01, + "learning_rate": 3.5971223021582736e-07, + "logits/chosen": -2.3472111225128174, + "logits/rejected": -1.9997737407684326, + "logps/chosen": -222.4732208251953, + "logps/rejected": -167.11593627929688, + "loss": 0.6931, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.00032228889176622033, + "rewards/margins": 0.00020361851784400642, + "rewards/rejected": 0.00011867038119817153, + "step": 110 + }, + { + "epoch": 0.01, + "learning_rate": 3.9241334205362984e-07, + "logits/chosen": -2.3396477699279785, + "logits/rejected": -2.2444756031036377, + "logps/chosen": -223.9533233642578, + "logps/rejected": -234.1103057861328, + "loss": 0.6931, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": 0.00033879600232467055, + "rewards/margins": 7.787643698975444e-05, + "rewards/rejected": 0.00026091962354257703, + "step": 120 + }, + { + "epoch": 0.01, + "learning_rate": 4.251144538914324e-07, + "logits/chosen": -2.262049913406372, + "logits/rejected": -2.2183430194854736, + "logps/chosen": -149.32823181152344, + "logps/rejected": -148.27149963378906, + "loss": 0.6931, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": 0.0003176346654072404, + "rewards/margins": 0.00017084801220335066, + "rewards/rejected": 0.00014678671141155064, + "step": 130 + }, + { + "epoch": 0.01, + "learning_rate": 4.578155657292348e-07, + "logits/chosen": -2.3226771354675293, + "logits/rejected": -2.2234673500061035, + "logps/chosen": -225.57870483398438, + "logps/rejected": -159.41448974609375, + "loss": 0.6931, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": 0.00038025499088689685, + "rewards/margins": 0.00010361654858570546, + "rewards/rejected": 0.00027663842774927616, + "step": 140 + }, + { + "epoch": 0.01, + "learning_rate": 4.905166775670374e-07, + "logits/chosen": -2.3669283390045166, + "logits/rejected": -2.1579947471618652, + "logps/chosen": -230.99099731445312, + "logps/rejected": -229.1034393310547, + "loss": 0.6931, + "rewards/accuracies": 0.625, + "rewards/chosen": 0.00036465103039518, + "rewards/margins": 0.0003103634517174214, + "rewards/rejected": 5.428760778158903e-05, + "step": 150 + }, + { + "epoch": 0.01, + "learning_rate": 5.232177894048398e-07, + "logits/chosen": -2.2156825065612793, + "logits/rejected": -2.2284252643585205, + "logps/chosen": -260.325927734375, + "logps/rejected": -224.88876342773438, + "loss": 0.6931, + "rewards/accuracies": 0.4749999940395355, + "rewards/chosen": 0.0005425811395980418, + "rewards/margins": 0.0004318637656979263, + "rewards/rejected": 0.00011071735207224265, + "step": 160 + }, + { + "epoch": 0.01, + "learning_rate": 5.559189012426422e-07, + "logits/chosen": -2.3160367012023926, + "logits/rejected": -2.03806209564209, + "logps/chosen": -180.5178680419922, + "logps/rejected": -156.7935791015625, + "loss": 0.6931, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": 0.00033474405063316226, + "rewards/margins": 0.0002378027857048437, + "rewards/rejected": 9.694129403214902e-05, + "step": 170 + }, + { + "epoch": 0.01, + "learning_rate": 5.886200130804448e-07, + "logits/chosen": -2.398749828338623, + "logits/rejected": -2.338441848754883, + "logps/chosen": -217.6501007080078, + "logps/rejected": -198.72491455078125, + "loss": 0.6931, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.0005784613895229995, + "rewards/margins": 0.0004989482113160193, + "rewards/rejected": 7.951319275889546e-05, + "step": 180 + }, + { + "epoch": 0.01, + "learning_rate": 6.213211249182473e-07, + "logits/chosen": -2.0694451332092285, + "logits/rejected": -2.1747512817382812, + "logps/chosen": -191.06724548339844, + "logps/rejected": -208.7252960205078, + "loss": 0.6931, + "rewards/accuracies": 0.625, + "rewards/chosen": 0.000429000414442271, + "rewards/margins": 0.000501616217661649, + "rewards/rejected": -7.261570863192901e-05, + "step": 190 + }, + { + "epoch": 0.01, + "learning_rate": 6.540222367560497e-07, + "logits/chosen": -2.2755157947540283, + "logits/rejected": -2.2391417026519775, + "logps/chosen": -146.77293395996094, + "logps/rejected": -177.70639038085938, + "loss": 0.6931, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": 0.0004391101247165352, + "rewards/margins": 0.0006169243133626878, + "rewards/rejected": -0.0001778141740942374, + "step": 200 + }, + { + "epoch": 0.01, + "eval_logits/chosen": -2.348745822906494, + "eval_logits/rejected": -2.160346269607544, + "eval_logps/chosen": -231.43870544433594, + "eval_logps/rejected": -211.644287109375, + "eval_loss": 0.6931134462356567, + "eval_rewards/accuracies": 0.5799999833106995, + "eval_rewards/chosen": 0.0005662592011503875, + "eval_rewards/margins": 0.0005986409960314631, + "eval_rewards/rejected": -3.2381776691181585e-05, + "eval_runtime": 712.2312, + "eval_samples_per_second": 2.808, + "eval_steps_per_second": 1.404, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 6.867233485938523e-07, + "logits/chosen": -2.4266586303710938, + "logits/rejected": -2.202392339706421, + "logps/chosen": -218.6244659423828, + "logps/rejected": -188.30389404296875, + "loss": 0.6931, + "rewards/accuracies": 0.625, + "rewards/chosen": 0.0009090522071346641, + "rewards/margins": 0.0010470406850799918, + "rewards/rejected": -0.0001379884488414973, + "step": 210 + }, + { + "epoch": 0.01, + "learning_rate": 7.194244604316547e-07, + "logits/chosen": -2.2122180461883545, + "logits/rejected": -2.040276050567627, + "logps/chosen": -182.87673950195312, + "logps/rejected": -174.85745239257812, + "loss": 0.6931, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.0004596250946633518, + "rewards/margins": 0.00037378407432697713, + "rewards/rejected": 8.584104216424748e-05, + "step": 220 + }, + { + "epoch": 0.02, + "learning_rate": 7.521255722694571e-07, + "logits/chosen": -2.4205055236816406, + "logits/rejected": -2.0249924659729004, + "logps/chosen": -279.0646667480469, + "logps/rejected": -184.08184814453125, + "loss": 0.6931, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.0007147075375542045, + "rewards/margins": 0.00047254477976821363, + "rewards/rejected": 0.0002421626850264147, + "step": 230 + }, + { + "epoch": 0.02, + "learning_rate": 7.848266841072597e-07, + "logits/chosen": -2.200258731842041, + "logits/rejected": -2.1651644706726074, + "logps/chosen": -214.66073608398438, + "logps/rejected": -206.3551025390625, + "loss": 0.6931, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": 0.0011901266407221556, + "rewards/margins": 0.0012617750326171517, + "rewards/rejected": -7.16482536518015e-05, + "step": 240 + }, + { + "epoch": 0.02, + "learning_rate": 8.175277959450622e-07, + "logits/chosen": -2.1673741340637207, + "logits/rejected": -2.3264830112457275, + "logps/chosen": -218.3331756591797, + "logps/rejected": -220.8343963623047, + "loss": 0.6931, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": 0.0020930031314492226, + "rewards/margins": 0.0009613169240765274, + "rewards/rejected": 0.0011316860327497125, + "step": 250 + }, + { + "epoch": 0.02, + "learning_rate": 8.502289077828648e-07, + "logits/chosen": -2.505481004714966, + "logits/rejected": -2.143810272216797, + "logps/chosen": -254.39773559570312, + "logps/rejected": -189.3335418701172, + "loss": 0.6931, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": 0.0027572843246161938, + "rewards/margins": 0.0012943788897246122, + "rewards/rejected": 0.0014629056677222252, + "step": 260 + }, + { + "epoch": 0.02, + "learning_rate": 8.829300196206672e-07, + "logits/chosen": -2.4226317405700684, + "logits/rejected": -2.127683401107788, + "logps/chosen": -246.3780975341797, + "logps/rejected": -230.76318359375, + "loss": 0.693, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": 0.002432792680338025, + "rewards/margins": 0.0016146197449415922, + "rewards/rejected": 0.0008181730518117547, + "step": 270 + }, + { + "epoch": 0.02, + "learning_rate": 9.156311314584696e-07, + "logits/chosen": -2.3078713417053223, + "logits/rejected": -2.1943423748016357, + "logps/chosen": -159.91470336914062, + "logps/rejected": -146.4855194091797, + "loss": 0.6931, + "rewards/accuracies": 0.4749999940395355, + "rewards/chosen": 0.0022737388499081135, + "rewards/margins": 0.001040069735608995, + "rewards/rejected": 0.0012336688814684749, + "step": 280 + }, + { + "epoch": 0.02, + "learning_rate": 9.483322432962722e-07, + "logits/chosen": -2.5546011924743652, + "logits/rejected": -2.160224199295044, + "logps/chosen": -282.1301574707031, + "logps/rejected": -225.6984100341797, + "loss": 0.6931, + "rewards/accuracies": 0.625, + "rewards/chosen": 0.0030486714094877243, + "rewards/margins": 0.0010619161184877157, + "rewards/rejected": 0.0019867552910000086, + "step": 290 + }, + { + "epoch": 0.02, + "learning_rate": 9.810333551340747e-07, + "logits/chosen": -2.3406269550323486, + "logits/rejected": -2.196272611618042, + "logps/chosen": -264.9823913574219, + "logps/rejected": -238.93710327148438, + "loss": 0.6929, + "rewards/accuracies": 0.625, + "rewards/chosen": 0.0036772601306438446, + "rewards/margins": 0.0026040554512292147, + "rewards/rejected": 0.0010732045629993081, + "step": 300 + }, + { + "epoch": 0.02, + "eval_logits/chosen": -2.3525564670562744, + "eval_logits/rejected": -2.1640830039978027, + "eval_logps/chosen": -228.56967163085938, + "eval_logps/rejected": -209.95509338378906, + "eval_loss": 0.6930469870567322, + "eval_rewards/accuracies": 0.5809999704360962, + "eval_rewards/chosen": 0.0034352699294686317, + "eval_rewards/margins": 0.0017784537049010396, + "eval_rewards/rejected": 0.0016568164573982358, + "eval_runtime": 715.1216, + "eval_samples_per_second": 2.797, + "eval_steps_per_second": 1.398, + "step": 300 + }, + { + "epoch": 0.02, + "learning_rate": 1.0137344669718771e-06, + "logits/chosen": -2.3511688709259033, + "logits/rejected": -2.3660411834716797, + "logps/chosen": -166.951171875, + "logps/rejected": -156.00137329101562, + "loss": 0.6931, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": 0.0025391627568751574, + "rewards/margins": 0.0007007948006503284, + "rewards/rejected": 0.0018383677816018462, + "step": 310 + }, + { + "epoch": 0.02, + "learning_rate": 1.0464355788096796e-06, + "logits/chosen": -2.454301357269287, + "logits/rejected": -2.059354782104492, + "logps/chosen": -221.11233520507812, + "logps/rejected": -192.06488037109375, + "loss": 0.693, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.004052783362567425, + "rewards/margins": 0.001703445566818118, + "rewards/rejected": 0.002349337562918663, + "step": 320 + }, + { + "epoch": 0.02, + "learning_rate": 1.079136690647482e-06, + "logits/chosen": -2.4363083839416504, + "logits/rejected": -2.2041573524475098, + "logps/chosen": -203.31265258789062, + "logps/rejected": -175.83804321289062, + "loss": 0.693, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": 0.005098625086247921, + "rewards/margins": 0.002672579139471054, + "rewards/rejected": 0.0024260464124381542, + "step": 330 + }, + { + "epoch": 0.02, + "learning_rate": 1.1118378024852844e-06, + "logits/chosen": -2.1864969730377197, + "logits/rejected": -2.3531885147094727, + "logps/chosen": -150.56375122070312, + "logps/rejected": -177.64382934570312, + "loss": 0.6931, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": 0.0031506356317549944, + "rewards/margins": -0.0003627676342148334, + "rewards/rejected": 0.00351340277120471, + "step": 340 + }, + { + "epoch": 0.02, + "learning_rate": 1.144538914323087e-06, + "logits/chosen": -2.4262728691101074, + "logits/rejected": -1.988315224647522, + "logps/chosen": -317.635009765625, + "logps/rejected": -248.2708740234375, + "loss": 0.693, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": 0.004475675523281097, + "rewards/margins": 0.0034824323374778032, + "rewards/rejected": 0.0009932438842952251, + "step": 350 + }, + { + "epoch": 0.02, + "learning_rate": 1.1772400261608895e-06, + "logits/chosen": -2.4900474548339844, + "logits/rejected": -2.1962995529174805, + "logps/chosen": -219.6117401123047, + "logps/rejected": -192.6786346435547, + "loss": 0.6929, + "rewards/accuracies": 0.625, + "rewards/chosen": 0.005868288688361645, + "rewards/margins": 0.003231339855119586, + "rewards/rejected": 0.0026369483675807714, + "step": 360 + }, + { + "epoch": 0.02, + "learning_rate": 1.2099411379986922e-06, + "logits/chosen": -2.169987440109253, + "logits/rejected": -2.2462120056152344, + "logps/chosen": -191.8892822265625, + "logps/rejected": -206.02182006835938, + "loss": 0.6931, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": 0.005607450846582651, + "rewards/margins": 0.0013648418243974447, + "rewards/rejected": 0.004242608789354563, + "step": 370 + }, + { + "epoch": 0.02, + "learning_rate": 1.2426422498364946e-06, + "logits/chosen": -2.330944538116455, + "logits/rejected": -2.042590379714966, + "logps/chosen": -215.90518188476562, + "logps/rejected": -162.1782989501953, + "loss": 0.693, + "rewards/accuracies": 0.625, + "rewards/chosen": 0.005743044428527355, + "rewards/margins": 0.0038504921831190586, + "rewards/rejected": 0.0018925521289929748, + "step": 380 + }, + { + "epoch": 0.03, + "learning_rate": 1.2753433616742968e-06, + "logits/chosen": -2.329392910003662, + "logits/rejected": -2.248897075653076, + "logps/chosen": -181.150634765625, + "logps/rejected": -244.83511352539062, + "loss": 0.6929, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": 0.004150368273258209, + "rewards/margins": 0.003002329496666789, + "rewards/rejected": 0.001148038893006742, + "step": 390 + }, + { + "epoch": 0.03, + "learning_rate": 1.3080444735120995e-06, + "logits/chosen": -2.488135576248169, + "logits/rejected": -2.11959171295166, + "logps/chosen": -219.4470672607422, + "logps/rejected": -179.94998168945312, + "loss": 0.6929, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": 0.006045623682439327, + "rewards/margins": 0.005753137171268463, + "rewards/rejected": 0.00029248674400150776, + "step": 400 + }, + { + "epoch": 0.03, + "eval_logits/chosen": -2.3535032272338867, + "eval_logits/rejected": -2.1649465560913086, + "eval_logps/chosen": -226.90525817871094, + "eval_logps/rejected": -210.06015014648438, + "eval_loss": 0.6929447054862976, + "eval_rewards/accuracies": 0.5950000286102295, + "eval_rewards/chosen": 0.0050996895879507065, + "eval_rewards/margins": 0.0035479466896504164, + "eval_rewards/rejected": 0.001551742316223681, + "eval_runtime": 713.0255, + "eval_samples_per_second": 2.805, + "eval_steps_per_second": 1.402, + "step": 400 + }, + { + "epoch": 0.03, + "learning_rate": 1.3407455853499021e-06, + "logits/chosen": -2.4732093811035156, + "logits/rejected": -2.3230807781219482, + "logps/chosen": -256.2766418457031, + "logps/rejected": -224.01925659179688, + "loss": 0.6929, + "rewards/accuracies": 0.625, + "rewards/chosen": 0.006076619029045105, + "rewards/margins": 0.004139014054089785, + "rewards/rejected": 0.0019376047421246767, + "step": 410 + }, + { + "epoch": 0.03, + "learning_rate": 1.3734466971877046e-06, + "logits/chosen": -2.2999634742736816, + "logits/rejected": -2.199373960494995, + "logps/chosen": -176.3344268798828, + "logps/rejected": -173.9598388671875, + "loss": 0.6929, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": 0.00558522017672658, + "rewards/margins": 0.005094148684293032, + "rewards/rejected": 0.000491071492433548, + "step": 420 + }, + { + "epoch": 0.03, + "learning_rate": 1.406147809025507e-06, + "logits/chosen": -2.291569709777832, + "logits/rejected": -2.1040902137756348, + "logps/chosen": -209.72128295898438, + "logps/rejected": -182.81837463378906, + "loss": 0.6927, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": 0.005425966810435057, + "rewards/margins": 0.005596494302153587, + "rewards/rejected": -0.00017052698240149766, + "step": 430 + }, + { + "epoch": 0.03, + "learning_rate": 1.4388489208633094e-06, + "logits/chosen": -2.3915810585021973, + "logits/rejected": -2.1051740646362305, + "logps/chosen": -250.69534301757812, + "logps/rejected": -223.9762725830078, + "loss": 0.693, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": 0.0038019181229174137, + "rewards/margins": 0.003192658070474863, + "rewards/rejected": 0.0006092601688578725, + "step": 440 + }, + { + "epoch": 0.03, + "learning_rate": 1.471550032701112e-06, + "logits/chosen": -2.408799648284912, + "logits/rejected": -2.205817937850952, + "logps/chosen": -185.5984344482422, + "logps/rejected": -198.32369995117188, + "loss": 0.6926, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.004049423150718212, + "rewards/margins": 0.006923851557075977, + "rewards/rejected": -0.002874427940696478, + "step": 450 + }, + { + "epoch": 0.03, + "learning_rate": 1.5042511445389143e-06, + "logits/chosen": -2.153529644012451, + "logits/rejected": -2.2015368938446045, + "logps/chosen": -147.46295166015625, + "logps/rejected": -214.2803955078125, + "loss": 0.6926, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": 0.0034770288038998842, + "rewards/margins": 0.0033914081286638975, + "rewards/rejected": 8.562016591895372e-05, + "step": 460 + }, + { + "epoch": 0.03, + "learning_rate": 1.536952256376717e-06, + "logits/chosen": -2.0425426959991455, + "logits/rejected": -2.104628324508667, + "logps/chosen": -199.62979125976562, + "logps/rejected": -254.56204223632812, + "loss": 0.6925, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": 0.001300838659517467, + "rewards/margins": 0.009368222206830978, + "rewards/rejected": -0.008067382499575615, + "step": 470 + }, + { + "epoch": 0.03, + "learning_rate": 1.5696533682145194e-06, + "logits/chosen": -2.432100296020508, + "logits/rejected": -2.2004401683807373, + "logps/chosen": -197.1683349609375, + "logps/rejected": -171.0287322998047, + "loss": 0.6928, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.012921568937599659, + "rewards/margins": 0.004919148050248623, + "rewards/rejected": -0.017840716987848282, + "step": 480 + }, + { + "epoch": 0.03, + "learning_rate": 1.602354480052322e-06, + "logits/chosen": -2.4052891731262207, + "logits/rejected": -2.315910577774048, + "logps/chosen": -282.2193603515625, + "logps/rejected": -234.4072265625, + "loss": 0.6927, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.015579608269035816, + "rewards/margins": 0.007139542605727911, + "rewards/rejected": -0.02271914854645729, + "step": 490 + }, + { + "epoch": 0.03, + "learning_rate": 1.6350555918901245e-06, + "logits/chosen": -2.205134630203247, + "logits/rejected": -1.87628173828125, + "logps/chosen": -231.80007934570312, + "logps/rejected": -226.1451416015625, + "loss": 0.6927, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.013730937615036964, + "rewards/margins": 0.008888588286936283, + "rewards/rejected": -0.022619523108005524, + "step": 500 + }, + { + "epoch": 0.03, + "eval_logits/chosen": -2.345468044281006, + "eval_logits/rejected": -2.1572470664978027, + "eval_logps/chosen": -243.40798950195312, + "eval_logps/rejected": -230.73947143554688, + "eval_loss": 0.6927248239517212, + "eval_rewards/accuracies": 0.6065000295639038, + "eval_rewards/chosen": -0.011403032578527927, + "eval_rewards/margins": 0.007724526803940535, + "eval_rewards/rejected": -0.019127558916807175, + "eval_runtime": 712.1411, + "eval_samples_per_second": 2.808, + "eval_steps_per_second": 1.404, + "step": 500 + }, + { + "epoch": 0.03, + "learning_rate": 1.6677567037279269e-06, + "logits/chosen": -2.485083818435669, + "logits/rejected": -2.1355221271514893, + "logps/chosen": -302.66400146484375, + "logps/rejected": -273.6127014160156, + "loss": 0.6929, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.006400348152965307, + "rewards/margins": 0.011333698406815529, + "rewards/rejected": -0.017734047025442123, + "step": 510 + }, + { + "epoch": 0.03, + "learning_rate": 1.7004578155657295e-06, + "logits/chosen": -2.294008493423462, + "logits/rejected": -2.3222975730895996, + "logps/chosen": -209.5379180908203, + "logps/rejected": -202.2738037109375, + "loss": 0.6925, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.0028185707051306963, + "rewards/margins": 0.013156527653336525, + "rewards/rejected": -0.015975097194314003, + "step": 520 + }, + { + "epoch": 0.03, + "learning_rate": 1.7331589274035318e-06, + "logits/chosen": -2.1821720600128174, + "logits/rejected": -2.061917781829834, + "logps/chosen": -188.24545288085938, + "logps/rejected": -200.89163208007812, + "loss": 0.6927, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.00431888597086072, + "rewards/margins": 0.013200417160987854, + "rewards/rejected": -0.017519304528832436, + "step": 530 + }, + { + "epoch": 0.04, + "learning_rate": 1.7658600392413344e-06, + "logits/chosen": -2.4073173999786377, + "logits/rejected": -2.246474504470825, + "logps/chosen": -201.97555541992188, + "logps/rejected": -198.34249877929688, + "loss": 0.6926, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 0.0008798660710453987, + "rewards/margins": 0.013736550696194172, + "rewards/rejected": -0.012856684625148773, + "step": 540 + }, + { + "epoch": 0.04, + "learning_rate": 1.7985611510791368e-06, + "logits/chosen": -2.404886245727539, + "logits/rejected": -1.9447847604751587, + "logps/chosen": -262.2571716308594, + "logps/rejected": -263.9870910644531, + "loss": 0.6925, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": 0.0012924910988658667, + "rewards/margins": 0.018118366599082947, + "rewards/rejected": -0.016825873404741287, + "step": 550 + }, + { + "epoch": 0.04, + "learning_rate": 1.8312622629169393e-06, + "logits/chosen": -2.3182451725006104, + "logits/rejected": -2.1790592670440674, + "logps/chosen": -268.5740661621094, + "logps/rejected": -243.84616088867188, + "loss": 0.6925, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.00511964363977313, + "rewards/margins": 0.009792742319405079, + "rewards/rejected": -0.014912387356162071, + "step": 560 + }, + { + "epoch": 0.04, + "learning_rate": 1.8639633747547417e-06, + "logits/chosen": -2.3926608562469482, + "logits/rejected": -2.2314422130584717, + "logps/chosen": -234.6009979248047, + "logps/rejected": -213.1090545654297, + "loss": 0.6923, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.005455879960209131, + "rewards/margins": 0.0141448974609375, + "rewards/rejected": -0.019600778818130493, + "step": 570 + }, + { + "epoch": 0.04, + "learning_rate": 1.8966644865925443e-06, + "logits/chosen": -2.227658748626709, + "logits/rejected": -2.256025791168213, + "logps/chosen": -261.0607604980469, + "logps/rejected": -281.42657470703125, + "loss": 0.6928, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.02185194566845894, + "rewards/margins": 0.008971361443400383, + "rewards/rejected": -0.03082330897450447, + "step": 580 + }, + { + "epoch": 0.04, + "learning_rate": 1.9293655984303466e-06, + "logits/chosen": -2.6404712200164795, + "logits/rejected": -2.2230751514434814, + "logps/chosen": -301.16851806640625, + "logps/rejected": -236.12405395507812, + "loss": 0.6924, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.01820230856537819, + "rewards/margins": 0.01796017773449421, + "rewards/rejected": -0.03616248816251755, + "step": 590 + }, + { + "epoch": 0.04, + "learning_rate": 1.9620667102681494e-06, + "logits/chosen": -2.3834471702575684, + "logits/rejected": -2.3926730155944824, + "logps/chosen": -196.3507080078125, + "logps/rejected": -190.89453125, + "loss": 0.6924, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -0.0070982701145112514, + "rewards/margins": 0.008864415809512138, + "rewards/rejected": -0.015962684527039528, + "step": 600 + }, + { + "epoch": 0.04, + "eval_logits/chosen": -2.34063458442688, + "eval_logits/rejected": -2.152348279953003, + "eval_logps/chosen": -240.43104553222656, + "eval_logps/rejected": -234.4803924560547, + "eval_loss": 0.6923562288284302, + "eval_rewards/accuracies": 0.6110000014305115, + "eval_rewards/chosen": -0.008426105603575706, + "eval_rewards/margins": 0.014442377723753452, + "eval_rewards/rejected": -0.022868484258651733, + "eval_runtime": 713.9601, + "eval_samples_per_second": 2.801, + "eval_steps_per_second": 1.401, + "step": 600 + }, + { + "epoch": 0.04, + "learning_rate": 1.994767822105952e-06, + "logits/chosen": -2.2903950214385986, + "logits/rejected": -2.190324306488037, + "logps/chosen": -187.91201782226562, + "logps/rejected": -209.1902618408203, + "loss": 0.6917, + "rewards/accuracies": 0.4749999940395355, + "rewards/chosen": -0.012708373367786407, + "rewards/margins": 0.014843207783997059, + "rewards/rejected": -0.02755158208310604, + "step": 610 + }, + { + "epoch": 0.04, + "learning_rate": 2.0274689339437543e-06, + "logits/chosen": -2.1997437477111816, + "logits/rejected": -1.9912179708480835, + "logps/chosen": -294.8075256347656, + "logps/rejected": -275.75335693359375, + "loss": 0.6924, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.0161177609115839, + "rewards/margins": 0.01630011573433876, + "rewards/rejected": -0.03241787850856781, + "step": 620 + }, + { + "epoch": 0.04, + "learning_rate": 2.0601700457815567e-06, + "logits/chosen": -2.3434805870056152, + "logits/rejected": -1.981257677078247, + "logps/chosen": -297.05657958984375, + "logps/rejected": -263.90301513671875, + "loss": 0.6926, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.03770185634493828, + "rewards/margins": 0.010359613224864006, + "rewards/rejected": -0.048061467707157135, + "step": 630 + }, + { + "epoch": 0.04, + "learning_rate": 2.092871157619359e-06, + "logits/chosen": -2.390469789505005, + "logits/rejected": -2.209237575531006, + "logps/chosen": -203.32533264160156, + "logps/rejected": -230.1988983154297, + "loss": 0.6927, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.026663165539503098, + "rewards/margins": 0.013162101618945599, + "rewards/rejected": -0.039825260639190674, + "step": 640 + }, + { + "epoch": 0.04, + "learning_rate": 2.1255722694571616e-06, + "logits/chosen": -2.4085137844085693, + "logits/rejected": -2.0080697536468506, + "logps/chosen": -306.5794982910156, + "logps/rejected": -235.6829071044922, + "loss": 0.6925, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.027684833854436874, + "rewards/margins": 0.022071022540330887, + "rewards/rejected": -0.04975585266947746, + "step": 650 + }, + { + "epoch": 0.04, + "learning_rate": 2.158273381294964e-06, + "logits/chosen": -2.317622184753418, + "logits/rejected": -2.2132556438446045, + "logps/chosen": -252.6199188232422, + "logps/rejected": -264.3005065917969, + "loss": 0.6931, + "rewards/accuracies": 0.42500001192092896, + "rewards/chosen": -0.04019295051693916, + "rewards/margins": 0.006169079802930355, + "rewards/rejected": -0.046362027525901794, + "step": 660 + }, + { + "epoch": 0.04, + "learning_rate": 2.190974493132767e-06, + "logits/chosen": -2.3021767139434814, + "logits/rejected": -1.9944665431976318, + "logps/chosen": -284.7574157714844, + "logps/rejected": -243.4713897705078, + "loss": 0.6927, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.047078341245651245, + "rewards/margins": 0.014970744028687477, + "rewards/rejected": -0.06204908341169357, + "step": 670 + }, + { + "epoch": 0.04, + "learning_rate": 2.223675604970569e-06, + "logits/chosen": -2.3567278385162354, + "logits/rejected": -2.1560585498809814, + "logps/chosen": -256.2400817871094, + "logps/rejected": -254.849609375, + "loss": 0.6923, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.0455058254301548, + "rewards/margins": 0.015939798206090927, + "rewards/rejected": -0.06144562363624573, + "step": 680 + }, + { + "epoch": 0.05, + "learning_rate": 2.2563767168083718e-06, + "logits/chosen": -2.4585578441619873, + "logits/rejected": -1.9678945541381836, + "logps/chosen": -281.6275634765625, + "logps/rejected": -244.1110076904297, + "loss": 0.6919, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.024375787004828453, + "rewards/margins": 0.02551574632525444, + "rewards/rejected": -0.04989153519272804, + "step": 690 + }, + { + "epoch": 0.05, + "learning_rate": 2.289077828646174e-06, + "logits/chosen": -2.2911694049835205, + "logits/rejected": -2.062880039215088, + "logps/chosen": -273.7863464355469, + "logps/rejected": -258.31988525390625, + "loss": 0.6928, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.034725360572338104, + "rewards/margins": 0.009239157661795616, + "rewards/rejected": -0.04396451264619827, + "step": 700 + }, + { + "epoch": 0.05, + "eval_logits/chosen": -2.32228422164917, + "eval_logits/rejected": -2.1351630687713623, + "eval_logps/chosen": -255.19515991210938, + "eval_logps/rejected": -253.87286376953125, + "eval_loss": 0.6921648979187012, + "eval_rewards/accuracies": 0.609000027179718, + "eval_rewards/chosen": -0.023190179839730263, + "eval_rewards/margins": 0.01907077431678772, + "eval_rewards/rejected": -0.04226095601916313, + "eval_runtime": 712.4113, + "eval_samples_per_second": 2.807, + "eval_steps_per_second": 1.404, + "step": 700 + }, + { + "epoch": 0.05, + "learning_rate": 2.3217789404839766e-06, + "logits/chosen": -2.1814942359924316, + "logits/rejected": -2.2622792720794678, + "logps/chosen": -181.82546997070312, + "logps/rejected": -238.5260772705078, + "loss": 0.6924, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -0.024994343519210815, + "rewards/margins": 0.00975788477808237, + "rewards/rejected": -0.03475222736597061, + "step": 710 + }, + { + "epoch": 0.05, + "learning_rate": 2.354480052321779e-06, + "logits/chosen": -2.5221304893493652, + "logits/rejected": -2.090299129486084, + "logps/chosen": -269.60504150390625, + "logps/rejected": -239.1893310546875, + "loss": 0.6912, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.011215592734515667, + "rewards/margins": 0.029225636273622513, + "rewards/rejected": -0.040441226214170456, + "step": 720 + }, + { + "epoch": 0.05, + "learning_rate": 2.3871811641595815e-06, + "logits/chosen": -2.3538801670074463, + "logits/rejected": -2.1702470779418945, + "logps/chosen": -261.742431640625, + "logps/rejected": -221.083740234375, + "loss": 0.6924, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.014950007200241089, + "rewards/margins": 0.01799718663096428, + "rewards/rejected": -0.03294719010591507, + "step": 730 + }, + { + "epoch": 0.05, + "learning_rate": 2.4198822759973843e-06, + "logits/chosen": -2.1946702003479004, + "logits/rejected": -2.2114098072052, + "logps/chosen": -207.88638305664062, + "logps/rejected": -237.3756103515625, + "loss": 0.6921, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.017280325293540955, + "rewards/margins": 0.015556697733700275, + "rewards/rejected": -0.032837022095918655, + "step": 740 + }, + { + "epoch": 0.05, + "learning_rate": 2.4525833878351864e-06, + "logits/chosen": -2.463273763656616, + "logits/rejected": -2.261465311050415, + "logps/chosen": -264.2398376464844, + "logps/rejected": -214.43508911132812, + "loss": 0.6923, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.011438943445682526, + "rewards/margins": 0.016475234180688858, + "rewards/rejected": -0.027914175763726234, + "step": 750 + }, + { + "epoch": 0.05, + "learning_rate": 2.4852844996729892e-06, + "logits/chosen": -2.2060532569885254, + "logits/rejected": -2.109431743621826, + "logps/chosen": -258.8271789550781, + "logps/rejected": -292.1606140136719, + "loss": 0.691, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.019702842459082603, + "rewards/margins": 0.031875334680080414, + "rewards/rejected": -0.05157817527651787, + "step": 760 + }, + { + "epoch": 0.05, + "learning_rate": 2.5179856115107916e-06, + "logits/chosen": -2.3242106437683105, + "logits/rejected": -2.00541090965271, + "logps/chosen": -298.7543640136719, + "logps/rejected": -269.37701416015625, + "loss": 0.6914, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.025258159264922142, + "rewards/margins": 0.024570953100919724, + "rewards/rejected": -0.049829110503196716, + "step": 770 + }, + { + "epoch": 0.05, + "learning_rate": 2.5506867233485937e-06, + "logits/chosen": -2.4023263454437256, + "logits/rejected": -1.9835220575332642, + "logps/chosen": -288.435546875, + "logps/rejected": -286.0493469238281, + "loss": 0.6913, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.022739596664905548, + "rewards/margins": 0.05511244013905525, + "rewards/rejected": -0.0778520405292511, + "step": 780 + }, + { + "epoch": 0.05, + "learning_rate": 2.5833878351863965e-06, + "logits/chosen": -2.462794780731201, + "logits/rejected": -2.393866777420044, + "logps/chosen": -284.49871826171875, + "logps/rejected": -318.21240234375, + "loss": 0.6913, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.032500773668289185, + "rewards/margins": 0.0363340824842453, + "rewards/rejected": -0.06883485615253448, + "step": 790 + }, + { + "epoch": 0.05, + "learning_rate": 2.616088947024199e-06, + "logits/chosen": -2.1470065116882324, + "logits/rejected": -2.3199901580810547, + "logps/chosen": -244.1119842529297, + "logps/rejected": -297.1820068359375, + "loss": 0.6918, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.04762252792716026, + "rewards/margins": 0.02854365110397339, + "rewards/rejected": -0.07616618275642395, + "step": 800 + }, + { + "epoch": 0.05, + "eval_logits/chosen": -2.32397723197937, + "eval_logits/rejected": -2.135841131210327, + "eval_logps/chosen": -289.2698669433594, + "eval_logps/rejected": -303.1002502441406, + "eval_loss": 0.6920146346092224, + "eval_rewards/accuracies": 0.597000002861023, + "eval_rewards/chosen": -0.05726493149995804, + "eval_rewards/margins": 0.03422345593571663, + "eval_rewards/rejected": -0.09148839116096497, + "eval_runtime": 711.5428, + "eval_samples_per_second": 2.811, + "eval_steps_per_second": 1.405, + "step": 800 + }, + { + "epoch": 0.05, + "learning_rate": 2.6487900588620014e-06, + "logits/chosen": -2.1001205444335938, + "logits/rejected": -1.8540256023406982, + "logps/chosen": -248.87161254882812, + "logps/rejected": -222.15377807617188, + "loss": 0.6933, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.04875599965453148, + "rewards/margins": 0.015534071251749992, + "rewards/rejected": -0.06429006904363632, + "step": 810 + }, + { + "epoch": 0.05, + "learning_rate": 2.6814911706998042e-06, + "logits/chosen": -2.2964351177215576, + "logits/rejected": -2.1176650524139404, + "logps/chosen": -270.25018310546875, + "logps/rejected": -257.04241943359375, + "loss": 0.6931, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.044910985976457596, + "rewards/margins": 0.017546221613883972, + "rewards/rejected": -0.06245720386505127, + "step": 820 + }, + { + "epoch": 0.05, + "learning_rate": 2.7141922825376067e-06, + "logits/chosen": -2.1903445720672607, + "logits/rejected": -2.1868643760681152, + "logps/chosen": -296.24957275390625, + "logps/rejected": -342.7115173339844, + "loss": 0.6921, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.038325462490320206, + "rewards/margins": 0.03653097525238991, + "rewards/rejected": -0.07485643774271011, + "step": 830 + }, + { + "epoch": 0.05, + "learning_rate": 2.746893394375409e-06, + "logits/chosen": -2.328739881515503, + "logits/rejected": -2.20552659034729, + "logps/chosen": -264.1017150878906, + "logps/rejected": -289.73760986328125, + "loss": 0.692, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.029707293957471848, + "rewards/margins": 0.02741720899939537, + "rewards/rejected": -0.05712450295686722, + "step": 840 + }, + { + "epoch": 0.06, + "learning_rate": 2.779594506213211e-06, + "logits/chosen": -2.208885669708252, + "logits/rejected": -1.9660180807113647, + "logps/chosen": -225.4889373779297, + "logps/rejected": -240.0717010498047, + "loss": 0.6926, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.03563004359602928, + "rewards/margins": 0.014896047301590443, + "rewards/rejected": -0.05052609369158745, + "step": 850 + }, + { + "epoch": 0.06, + "learning_rate": 2.812295618051014e-06, + "logits/chosen": -2.3775460720062256, + "logits/rejected": -2.2183637619018555, + "logps/chosen": -306.49853515625, + "logps/rejected": -272.1231994628906, + "loss": 0.692, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.02553737163543701, + "rewards/margins": 0.027351820841431618, + "rewards/rejected": -0.05288919061422348, + "step": 860 + }, + { + "epoch": 0.06, + "learning_rate": 2.8449967298888164e-06, + "logits/chosen": -2.2363040447235107, + "logits/rejected": -2.031376361846924, + "logps/chosen": -247.669189453125, + "logps/rejected": -234.5207977294922, + "loss": 0.6928, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.07084518671035767, + "rewards/margins": 0.014910000376403332, + "rewards/rejected": -0.08575518429279327, + "step": 870 + }, + { + "epoch": 0.06, + "learning_rate": 2.877697841726619e-06, + "logits/chosen": -2.305286407470703, + "logits/rejected": -2.2096505165100098, + "logps/chosen": -266.10638427734375, + "logps/rejected": -259.68817138671875, + "loss": 0.6929, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -0.04727055877447128, + "rewards/margins": 0.01039662305265665, + "rewards/rejected": -0.05766718462109566, + "step": 880 + }, + { + "epoch": 0.06, + "learning_rate": 2.9103989535644217e-06, + "logits/chosen": -2.241441488265991, + "logits/rejected": -2.3640246391296387, + "logps/chosen": -255.78543090820312, + "logps/rejected": -301.43780517578125, + "loss": 0.6927, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.04030177742242813, + "rewards/margins": 0.015121949836611748, + "rewards/rejected": -0.05542372912168503, + "step": 890 + }, + { + "epoch": 0.06, + "learning_rate": 2.943100065402224e-06, + "logits/chosen": -2.3066906929016113, + "logits/rejected": -2.156431198120117, + "logps/chosen": -330.0362243652344, + "logps/rejected": -328.26934814453125, + "loss": 0.6928, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.03739301115274429, + "rewards/margins": 0.013549859635531902, + "rewards/rejected": -0.05094286799430847, + "step": 900 + }, + { + "epoch": 0.06, + "eval_logits/chosen": -2.281608819961548, + "eval_logits/rejected": -2.0968432426452637, + "eval_logps/chosen": -253.80685424804688, + "eval_logps/rejected": -254.93124389648438, + "eval_loss": 0.6919333934783936, + "eval_rewards/accuracies": 0.6050000190734863, + "eval_rewards/chosen": -0.021801894530653954, + "eval_rewards/margins": 0.02151745744049549, + "eval_rewards/rejected": -0.04331935569643974, + "eval_runtime": 713.366, + "eval_samples_per_second": 2.804, + "eval_steps_per_second": 1.402, + "step": 900 + }, + { + "epoch": 0.06, + "learning_rate": 2.9758011772400266e-06, + "logits/chosen": -2.219975471496582, + "logits/rejected": -2.2243573665618896, + "logps/chosen": -280.44561767578125, + "logps/rejected": -302.7583923339844, + "loss": 0.6926, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.023136485368013382, + "rewards/margins": 0.01493816263973713, + "rewards/rejected": -0.03807464614510536, + "step": 910 + }, + { + "epoch": 0.06, + "learning_rate": 3.0085022890778286e-06, + "logits/chosen": -2.263059139251709, + "logits/rejected": -2.013516902923584, + "logps/chosen": -195.3144989013672, + "logps/rejected": -194.3135986328125, + "loss": 0.6914, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.01957494579255581, + "rewards/margins": 0.025922566652297974, + "rewards/rejected": -0.04549751058220863, + "step": 920 + }, + { + "epoch": 0.06, + "learning_rate": 3.0412034009156314e-06, + "logits/chosen": -2.2070722579956055, + "logits/rejected": -2.362426280975342, + "logps/chosen": -268.68743896484375, + "logps/rejected": -294.49395751953125, + "loss": 0.6918, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.03300117701292038, + "rewards/margins": 0.030068224295973778, + "rewards/rejected": -0.0630694031715393, + "step": 930 + }, + { + "epoch": 0.06, + "learning_rate": 3.073904512753434e-06, + "logits/chosen": -2.342238426208496, + "logits/rejected": -1.9886445999145508, + "logps/chosen": -268.29779052734375, + "logps/rejected": -282.3440856933594, + "loss": 0.6907, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.038649190217256546, + "rewards/margins": 0.04897458851337433, + "rewards/rejected": -0.08762378245592117, + "step": 940 + }, + { + "epoch": 0.06, + "learning_rate": 3.1066056245912363e-06, + "logits/chosen": -2.201458692550659, + "logits/rejected": -2.262294292449951, + "logps/chosen": -284.0932312011719, + "logps/rejected": -297.143310546875, + "loss": 0.6918, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.05310920998454094, + "rewards/margins": 0.04500911384820938, + "rewards/rejected": -0.09811832755804062, + "step": 950 + }, + { + "epoch": 0.06, + "learning_rate": 3.1393067364290387e-06, + "logits/chosen": -2.1673567295074463, + "logits/rejected": -1.9466733932495117, + "logps/chosen": -276.6808166503906, + "logps/rejected": -281.02276611328125, + "loss": 0.6926, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.05155404657125473, + "rewards/margins": 0.0429275706410408, + "rewards/rejected": -0.09448162466287613, + "step": 960 + }, + { + "epoch": 0.06, + "learning_rate": 3.1720078482668416e-06, + "logits/chosen": -2.082993984222412, + "logits/rejected": -1.858926773071289, + "logps/chosen": -253.553466796875, + "logps/rejected": -248.26535034179688, + "loss": 0.6915, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.04198281094431877, + "rewards/margins": 0.0418187752366066, + "rewards/rejected": -0.08380158245563507, + "step": 970 + }, + { + "epoch": 0.06, + "learning_rate": 3.204708960104644e-06, + "logits/chosen": -2.074291706085205, + "logits/rejected": -1.7586740255355835, + "logps/chosen": -243.01431274414062, + "logps/rejected": -252.8297882080078, + "loss": 0.6908, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.025611573830246925, + "rewards/margins": 0.042403195053339005, + "rewards/rejected": -0.06801476329565048, + "step": 980 + }, + { + "epoch": 0.06, + "learning_rate": 3.237410071942446e-06, + "logits/chosen": -2.0311131477355957, + "logits/rejected": -1.7820078134536743, + "logps/chosen": -258.5697326660156, + "logps/rejected": -232.48477172851562, + "loss": 0.693, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.05220867320895195, + "rewards/margins": 0.007478843443095684, + "rewards/rejected": -0.05968751758337021, + "step": 990 + }, + { + "epoch": 0.07, + "learning_rate": 3.270111183780249e-06, + "logits/chosen": -1.8629153966903687, + "logits/rejected": -1.6702207326889038, + "logps/chosen": -262.003662109375, + "logps/rejected": -276.80450439453125, + "loss": 0.6908, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.061980120837688446, + "rewards/margins": 0.03280384838581085, + "rewards/rejected": -0.0947839766740799, + "step": 1000 + }, + { + "epoch": 0.07, + "eval_logits/chosen": -1.7917609214782715, + "eval_logits/rejected": -1.6315906047821045, + "eval_logps/chosen": -308.50732421875, + "eval_logps/rejected": -327.3461608886719, + "eval_loss": 0.6914514899253845, + "eval_rewards/accuracies": 0.6019999980926514, + "eval_rewards/chosen": -0.07650233805179596, + "eval_rewards/margins": 0.03923192247748375, + "eval_rewards/rejected": -0.1157342717051506, + "eval_runtime": 713.4479, + "eval_samples_per_second": 2.803, + "eval_steps_per_second": 1.402, + "step": 1000 + }, + { + "epoch": 0.07, + "learning_rate": 3.3028122956180513e-06, + "logits/chosen": -1.580582857131958, + "logits/rejected": -1.674863576889038, + "logps/chosen": -324.74774169921875, + "logps/rejected": -371.5903625488281, + "loss": 0.692, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.1126498356461525, + "rewards/margins": 0.01937369629740715, + "rewards/rejected": -0.13202352821826935, + "step": 1010 + }, + { + "epoch": 0.07, + "learning_rate": 3.3355134074558538e-06, + "logits/chosen": -1.5238564014434814, + "logits/rejected": -1.4265750646591187, + "logps/chosen": -362.0272521972656, + "logps/rejected": -369.1707458496094, + "loss": 0.6928, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.14129449427127838, + "rewards/margins": 0.032731056213378906, + "rewards/rejected": -0.1740255504846573, + "step": 1020 + }, + { + "epoch": 0.07, + "learning_rate": 3.368214519293656e-06, + "logits/chosen": -1.5193853378295898, + "logits/rejected": -1.2484958171844482, + "logps/chosen": -309.3878173828125, + "logps/rejected": -320.7041931152344, + "loss": 0.6917, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.11041752249002457, + "rewards/margins": 0.03221488744020462, + "rewards/rejected": -0.1426324099302292, + "step": 1030 + }, + { + "epoch": 0.07, + "learning_rate": 3.400915631131459e-06, + "logits/chosen": -1.4544426202774048, + "logits/rejected": -1.4647828340530396, + "logps/chosen": -321.44378662109375, + "logps/rejected": -404.62298583984375, + "loss": 0.6901, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.12347130477428436, + "rewards/margins": 0.0545278862118721, + "rewards/rejected": -0.17799919843673706, + "step": 1040 + }, + { + "epoch": 0.07, + "learning_rate": 3.4336167429692615e-06, + "logits/chosen": -1.6467136144638062, + "logits/rejected": -1.4343664646148682, + "logps/chosen": -333.1053771972656, + "logps/rejected": -353.6955261230469, + "loss": 0.6907, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1185549646615982, + "rewards/margins": 0.03684395179152489, + "rewards/rejected": -0.1553989052772522, + "step": 1050 + }, + { + "epoch": 0.07, + "learning_rate": 3.4663178548070635e-06, + "logits/chosen": -1.5289983749389648, + "logits/rejected": -1.5519436597824097, + "logps/chosen": -329.55096435546875, + "logps/rejected": -367.48284912109375, + "loss": 0.6907, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.11459700018167496, + "rewards/margins": 0.05437355488538742, + "rewards/rejected": -0.16897056996822357, + "step": 1060 + }, + { + "epoch": 0.07, + "learning_rate": 3.499018966644866e-06, + "logits/chosen": -1.6656230688095093, + "logits/rejected": -1.5398905277252197, + "logps/chosen": -341.64501953125, + "logps/rejected": -362.2322998046875, + "loss": 0.692, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.1498199999332428, + "rewards/margins": 0.027399664744734764, + "rewards/rejected": -0.17721965909004211, + "step": 1070 + }, + { + "epoch": 0.07, + "learning_rate": 3.531720078482669e-06, + "logits/chosen": -1.6908804178237915, + "logits/rejected": -1.3607423305511475, + "logps/chosen": -335.6886901855469, + "logps/rejected": -371.15911865234375, + "loss": 0.6918, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.1098189726471901, + "rewards/margins": 0.046701718121767044, + "rewards/rejected": -0.15652066469192505, + "step": 1080 + }, + { + "epoch": 0.07, + "learning_rate": 3.5644211903204712e-06, + "logits/chosen": -1.6549320220947266, + "logits/rejected": -1.6474714279174805, + "logps/chosen": -311.31427001953125, + "logps/rejected": -344.72930908203125, + "loss": 0.6921, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.1124318465590477, + "rewards/margins": 0.03060152195394039, + "rewards/rejected": -0.14303335547447205, + "step": 1090 + }, + { + "epoch": 0.07, + "learning_rate": 3.5971223021582737e-06, + "logits/chosen": -1.7676842212677002, + "logits/rejected": -1.6186103820800781, + "logps/chosen": -357.2384948730469, + "logps/rejected": -325.09490966796875, + "loss": 0.6925, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.10143768787384033, + "rewards/margins": 0.024971742182970047, + "rewards/rejected": -0.12640944123268127, + "step": 1100 + }, + { + "epoch": 0.07, + "eval_logits/chosen": -1.6588186025619507, + "eval_logits/rejected": -1.505243182182312, + "eval_logps/chosen": -304.1779479980469, + "eval_logps/rejected": -315.8557434082031, + "eval_loss": 0.691482424736023, + "eval_rewards/accuracies": 0.6169999837875366, + "eval_rewards/chosen": -0.07217301428318024, + "eval_rewards/margins": 0.03207085281610489, + "eval_rewards/rejected": -0.10424386709928513, + "eval_runtime": 713.1225, + "eval_samples_per_second": 2.805, + "eval_steps_per_second": 1.402, + "step": 1100 + }, + { + "epoch": 0.07, + "learning_rate": 3.6298234139960765e-06, + "logits/chosen": -1.7195770740509033, + "logits/rejected": -1.4194886684417725, + "logps/chosen": -287.1241760253906, + "logps/rejected": -260.1143798828125, + "loss": 0.6911, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.0693550780415535, + "rewards/margins": 0.022661572322249413, + "rewards/rejected": -0.09201665967702866, + "step": 1110 + }, + { + "epoch": 0.07, + "learning_rate": 3.6625245258338785e-06, + "logits/chosen": -1.38588547706604, + "logits/rejected": -1.2366743087768555, + "logps/chosen": -342.43560791015625, + "logps/rejected": -496.285400390625, + "loss": 0.6867, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.10365718603134155, + "rewards/margins": 0.07738915830850601, + "rewards/rejected": -0.18104635179042816, + "step": 1120 + }, + { + "epoch": 0.07, + "learning_rate": 3.695225637671681e-06, + "logits/chosen": -1.1003705263137817, + "logits/rejected": -0.993769645690918, + "logps/chosen": -531.3511962890625, + "logps/rejected": -548.9378662109375, + "loss": 0.6899, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.32407456636428833, + "rewards/margins": 0.05066270753741264, + "rewards/rejected": -0.37473729252815247, + "step": 1130 + }, + { + "epoch": 0.07, + "learning_rate": 3.7279267495094834e-06, + "logits/chosen": -1.1593372821807861, + "logits/rejected": -0.9515183568000793, + "logps/chosen": -496.2530822753906, + "logps/rejected": -600.5367431640625, + "loss": 0.6903, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.33582615852355957, + "rewards/margins": 0.09415847808122635, + "rewards/rejected": -0.4299846291542053, + "step": 1140 + }, + { + "epoch": 0.08, + "learning_rate": 3.7606278613472863e-06, + "logits/chosen": -1.6236388683319092, + "logits/rejected": -1.340681791305542, + "logps/chosen": -434.57965087890625, + "logps/rejected": -395.1595764160156, + "loss": 0.6931, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.15230834484100342, + "rewards/margins": 0.03814804553985596, + "rewards/rejected": -0.19045640528202057, + "step": 1150 + }, + { + "epoch": 0.08, + "learning_rate": 3.7933289731850887e-06, + "logits/chosen": -1.626577615737915, + "logits/rejected": -1.435408115386963, + "logps/chosen": -276.0029602050781, + "logps/rejected": -292.4320068359375, + "loss": 0.6902, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.04744229465723038, + "rewards/margins": 0.04468740522861481, + "rewards/rejected": -0.09212969243526459, + "step": 1160 + }, + { + "epoch": 0.08, + "learning_rate": 3.826030085022891e-06, + "logits/chosen": -1.7114953994750977, + "logits/rejected": -1.4889827966690063, + "logps/chosen": -336.4309997558594, + "logps/rejected": -342.95623779296875, + "loss": 0.6915, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.08878123760223389, + "rewards/margins": 0.042677246034145355, + "rewards/rejected": -0.13145849108695984, + "step": 1170 + }, + { + "epoch": 0.08, + "learning_rate": 3.858731196860693e-06, + "logits/chosen": -1.5804107189178467, + "logits/rejected": -1.2577301263809204, + "logps/chosen": -375.09869384765625, + "logps/rejected": -363.1798095703125, + "loss": 0.6918, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.12426527589559555, + "rewards/margins": 0.030316686257719994, + "rewards/rejected": -0.1545819640159607, + "step": 1180 + }, + { + "epoch": 0.08, + "learning_rate": 3.891432308698496e-06, + "logits/chosen": -1.211471438407898, + "logits/rejected": -1.1756963729858398, + "logps/chosen": -323.86114501953125, + "logps/rejected": -392.4009094238281, + "loss": 0.689, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.12863077223300934, + "rewards/margins": 0.04329446703195572, + "rewards/rejected": -0.17192521691322327, + "step": 1190 + }, + { + "epoch": 0.08, + "learning_rate": 3.924133420536299e-06, + "logits/chosen": -1.2308224439620972, + "logits/rejected": -0.9529396295547485, + "logps/chosen": -383.98529052734375, + "logps/rejected": -426.99755859375, + "loss": 0.6906, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.20184054970741272, + "rewards/margins": 0.07641489803791046, + "rewards/rejected": -0.278255432844162, + "step": 1200 + }, + { + "epoch": 0.08, + "eval_logits/chosen": -1.248422384262085, + "eval_logits/rejected": -1.1078686714172363, + "eval_logps/chosen": -475.6541748046875, + "eval_logps/rejected": -521.3765258789062, + "eval_loss": 0.6910752058029175, + "eval_rewards/accuracies": 0.6154999732971191, + "eval_rewards/chosen": -0.2436492145061493, + "eval_rewards/margins": 0.06611540168523788, + "eval_rewards/rejected": -0.3097646236419678, + "eval_runtime": 712.8179, + "eval_samples_per_second": 2.806, + "eval_steps_per_second": 1.403, + "step": 1200 + }, + { + "epoch": 0.08, + "learning_rate": 3.956834532374101e-06, + "logits/chosen": -1.3935415744781494, + "logits/rejected": -1.0047038793563843, + "logps/chosen": -454.3526916503906, + "logps/rejected": -516.9708251953125, + "loss": 0.6873, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.2490900456905365, + "rewards/margins": 0.10656942427158356, + "rewards/rejected": -0.35565948486328125, + "step": 1210 + }, + { + "epoch": 0.08, + "learning_rate": 3.989535644211904e-06, + "logits/chosen": -1.3957096338272095, + "logits/rejected": -1.147242546081543, + "logps/chosen": -396.89385986328125, + "logps/rejected": -460.62542724609375, + "loss": 0.6902, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.17976659536361694, + "rewards/margins": 0.0863526314496994, + "rewards/rejected": -0.26611918210983276, + "step": 1220 + }, + { + "epoch": 0.08, + "learning_rate": 4.022236756049706e-06, + "logits/chosen": -1.575761079788208, + "logits/rejected": -1.3331005573272705, + "logps/chosen": -450.54547119140625, + "logps/rejected": -464.44378662109375, + "loss": 0.691, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.1734083592891693, + "rewards/margins": 0.04637376219034195, + "rewards/rejected": -0.21978211402893066, + "step": 1230 + }, + { + "epoch": 0.08, + "learning_rate": 4.054937867887509e-06, + "logits/chosen": -1.4819010496139526, + "logits/rejected": -1.2105497121810913, + "logps/chosen": -371.9152526855469, + "logps/rejected": -419.6205139160156, + "loss": 0.691, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.14764845371246338, + "rewards/margins": 0.05849369615316391, + "rewards/rejected": -0.2061421424150467, + "step": 1240 + }, + { + "epoch": 0.08, + "learning_rate": 4.087638979725311e-06, + "logits/chosen": -1.3985395431518555, + "logits/rejected": -1.4597551822662354, + "logps/chosen": -394.02911376953125, + "logps/rejected": -422.7919006347656, + "loss": 0.6925, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.16618932783603668, + "rewards/margins": 0.02730773761868477, + "rewards/rejected": -0.19349706172943115, + "step": 1250 + }, + { + "epoch": 0.08, + "learning_rate": 4.1203400915631135e-06, + "logits/chosen": -1.326311707496643, + "logits/rejected": -1.1958644390106201, + "logps/chosen": -424.20330810546875, + "logps/rejected": -432.0690002441406, + "loss": 0.6919, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.18687233328819275, + "rewards/margins": 0.036146439611911774, + "rewards/rejected": -0.2230187952518463, + "step": 1260 + }, + { + "epoch": 0.08, + "learning_rate": 4.153041203400916e-06, + "logits/chosen": -1.758768081665039, + "logits/rejected": -1.6330296993255615, + "logps/chosen": -425.97698974609375, + "logps/rejected": -421.9137268066406, + "loss": 0.6925, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.16155469417572021, + "rewards/margins": 0.020571332424879074, + "rewards/rejected": -0.1821260154247284, + "step": 1270 + }, + { + "epoch": 0.08, + "learning_rate": 4.185742315238718e-06, + "logits/chosen": -1.6385771036148071, + "logits/rejected": -1.3802834749221802, + "logps/chosen": -348.26763916015625, + "logps/rejected": -387.1038818359375, + "loss": 0.6899, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.15016606450080872, + "rewards/margins": 0.05070319026708603, + "rewards/rejected": -0.20086923241615295, + "step": 1280 + }, + { + "epoch": 0.08, + "learning_rate": 4.218443427076521e-06, + "logits/chosen": -1.485682725906372, + "logits/rejected": -1.4408928155899048, + "logps/chosen": -411.95843505859375, + "logps/rejected": -446.6746520996094, + "loss": 0.6922, + "rewards/accuracies": 0.4749999940395355, + "rewards/chosen": -0.22919993102550507, + "rewards/margins": 0.033245109021663666, + "rewards/rejected": -0.26244503259658813, + "step": 1290 + }, + { + "epoch": 0.09, + "learning_rate": 4.251144538914323e-06, + "logits/chosen": -1.6215198040008545, + "logits/rejected": -1.503710150718689, + "logps/chosen": -448.3627014160156, + "logps/rejected": -474.224853515625, + "loss": 0.6901, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.22777239978313446, + "rewards/margins": 0.039057932794094086, + "rewards/rejected": -0.26683029532432556, + "step": 1300 + }, + { + "epoch": 0.09, + "eval_logits/chosen": -1.7383815050125122, + "eval_logits/rejected": -1.5767197608947754, + "eval_logps/chosen": -401.8933410644531, + "eval_logps/rejected": -423.51837158203125, + "eval_loss": 0.6908622980117798, + "eval_rewards/accuracies": 0.6169999837875366, + "eval_rewards/chosen": -0.16988839209079742, + "eval_rewards/margins": 0.04201807454228401, + "eval_rewards/rejected": -0.21190647780895233, + "eval_runtime": 713.8861, + "eval_samples_per_second": 2.802, + "eval_steps_per_second": 1.401, + "step": 1300 + }, + { + "epoch": 0.09, + "learning_rate": 4.283845650752126e-06, + "logits/chosen": -1.7140634059906006, + "logits/rejected": -1.4950907230377197, + "logps/chosen": -477.6435546875, + "logps/rejected": -462.10394287109375, + "loss": 0.6909, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.19143159687519073, + "rewards/margins": 0.04865190014243126, + "rewards/rejected": -0.2400834858417511, + "step": 1310 + }, + { + "epoch": 0.09, + "learning_rate": 4.316546762589928e-06, + "logits/chosen": -1.6233209371566772, + "logits/rejected": -1.5323542356491089, + "logps/chosen": -424.8470153808594, + "logps/rejected": -447.05987548828125, + "loss": 0.6904, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.21691949665546417, + "rewards/margins": 0.05651101469993591, + "rewards/rejected": -0.2734305262565613, + "step": 1320 + }, + { + "epoch": 0.09, + "learning_rate": 4.349247874427731e-06, + "logits/chosen": -1.5113164186477661, + "logits/rejected": -1.2201378345489502, + "logps/chosen": -510.0796813964844, + "logps/rejected": -651.254638671875, + "loss": 0.6919, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.2835456430912018, + "rewards/margins": 0.10109242051839828, + "rewards/rejected": -0.38463807106018066, + "step": 1330 + }, + { + "epoch": 0.09, + "learning_rate": 4.381948986265534e-06, + "logits/chosen": -1.8363587856292725, + "logits/rejected": -1.625396490097046, + "logps/chosen": -422.99383544921875, + "logps/rejected": -457.910888671875, + "loss": 0.6917, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.16014917194843292, + "rewards/margins": 0.0479239895939827, + "rewards/rejected": -0.20807316899299622, + "step": 1340 + }, + { + "epoch": 0.09, + "learning_rate": 4.414650098103336e-06, + "logits/chosen": -2.073012590408325, + "logits/rejected": -1.8140983581542969, + "logps/chosen": -354.55499267578125, + "logps/rejected": -365.2477111816406, + "loss": 0.6911, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.09260248392820358, + "rewards/margins": 0.03311682492494583, + "rewards/rejected": -0.12571930885314941, + "step": 1350 + }, + { + "epoch": 0.09, + "learning_rate": 4.447351209941138e-06, + "logits/chosen": -1.6008856296539307, + "logits/rejected": -1.4585543870925903, + "logps/chosen": -376.71673583984375, + "logps/rejected": -375.45831298828125, + "loss": 0.6921, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.16705211997032166, + "rewards/margins": 0.016883080825209618, + "rewards/rejected": -0.18393521010875702, + "step": 1360 + }, + { + "epoch": 0.09, + "learning_rate": 4.480052321778941e-06, + "logits/chosen": -1.5430247783660889, + "logits/rejected": -1.509578824043274, + "logps/chosen": -386.68414306640625, + "logps/rejected": -411.19598388671875, + "loss": 0.6913, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.15977385640144348, + "rewards/margins": 0.04520783945918083, + "rewards/rejected": -0.20498168468475342, + "step": 1370 + }, + { + "epoch": 0.09, + "learning_rate": 4.5127534336167435e-06, + "logits/chosen": -1.5650596618652344, + "logits/rejected": -1.3618630170822144, + "logps/chosen": -393.9236145019531, + "logps/rejected": -384.3157043457031, + "loss": 0.6917, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.14674964547157288, + "rewards/margins": 0.034052155911922455, + "rewards/rejected": -0.18080179393291473, + "step": 1380 + }, + { + "epoch": 0.09, + "learning_rate": 4.5454545454545455e-06, + "logits/chosen": -1.5686426162719727, + "logits/rejected": -1.5197176933288574, + "logps/chosen": -309.3796691894531, + "logps/rejected": -324.637451171875, + "loss": 0.692, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.13992100954055786, + "rewards/margins": 0.025466833263635635, + "rewards/rejected": -0.165387824177742, + "step": 1390 + }, + { + "epoch": 0.09, + "learning_rate": 4.578155657292348e-06, + "logits/chosen": -1.6215531826019287, + "logits/rejected": -1.5280569791793823, + "logps/chosen": -397.262939453125, + "logps/rejected": -486.24444580078125, + "loss": 0.6887, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.1418572962284088, + "rewards/margins": 0.08005955070257187, + "rewards/rejected": -0.22191686928272247, + "step": 1400 + }, + { + "epoch": 0.09, + "eval_logits/chosen": -1.4353091716766357, + "eval_logits/rejected": -1.2833685874938965, + "eval_logps/chosen": -424.39398193359375, + "eval_logps/rejected": -463.2532043457031, + "eval_loss": 0.6905860304832458, + "eval_rewards/accuracies": 0.6420000195503235, + "eval_rewards/chosen": -0.19238901138305664, + "eval_rewards/margins": 0.05925232544541359, + "eval_rewards/rejected": -0.25164133310317993, + "eval_runtime": 714.6679, + "eval_samples_per_second": 2.799, + "eval_steps_per_second": 1.399, + "step": 1400 + }, + { + "epoch": 0.09, + "learning_rate": 4.610856769130151e-06, + "logits/chosen": -1.557550072669983, + "logits/rejected": -1.5161031484603882, + "logps/chosen": -425.23309326171875, + "logps/rejected": -470.5696716308594, + "loss": 0.692, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.18608888983726501, + "rewards/margins": 0.07483814656734467, + "rewards/rejected": -0.2609270215034485, + "step": 1410 + }, + { + "epoch": 0.09, + "learning_rate": 4.643557880967953e-06, + "logits/chosen": -1.56943678855896, + "logits/rejected": -1.4001885652542114, + "logps/chosen": -350.5581970214844, + "logps/rejected": -405.6707458496094, + "loss": 0.69, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.15175940096378326, + "rewards/margins": 0.05761238932609558, + "rewards/rejected": -0.20937177538871765, + "step": 1420 + }, + { + "epoch": 0.09, + "learning_rate": 4.676258992805755e-06, + "logits/chosen": -1.5054957866668701, + "logits/rejected": -1.2771097421646118, + "logps/chosen": -451.77630615234375, + "logps/rejected": -502.28948974609375, + "loss": 0.6907, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.18215635418891907, + "rewards/margins": 0.0926615446805954, + "rewards/rejected": -0.27481788396835327, + "step": 1430 + }, + { + "epoch": 0.09, + "learning_rate": 4.708960104643558e-06, + "logits/chosen": -1.6860449314117432, + "logits/rejected": -1.6177040338516235, + "logps/chosen": -447.60247802734375, + "logps/rejected": -456.874267578125, + "loss": 0.691, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.141392782330513, + "rewards/margins": 0.046031877398490906, + "rewards/rejected": -0.18742462992668152, + "step": 1440 + }, + { + "epoch": 0.09, + "learning_rate": 4.741661216481361e-06, + "logits/chosen": -1.6008634567260742, + "logits/rejected": -1.583705186843872, + "logps/chosen": -372.9961242675781, + "logps/rejected": -434.13079833984375, + "loss": 0.6898, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.13665054738521576, + "rewards/margins": 0.04988841712474823, + "rewards/rejected": -0.186538964509964, + "step": 1450 + }, + { + "epoch": 0.1, + "learning_rate": 4.774362328319163e-06, + "logits/chosen": -1.4470480680465698, + "logits/rejected": -1.274153470993042, + "logps/chosen": -381.94073486328125, + "logps/rejected": -374.5335998535156, + "loss": 0.6933, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.16615068912506104, + "rewards/margins": 0.022542305290699005, + "rewards/rejected": -0.18869297206401825, + "step": 1460 + }, + { + "epoch": 0.1, + "learning_rate": 4.807063440156966e-06, + "logits/chosen": -1.431060791015625, + "logits/rejected": -1.1888434886932373, + "logps/chosen": -480.28546142578125, + "logps/rejected": -508.6800842285156, + "loss": 0.6882, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.21697595715522766, + "rewards/margins": 0.08976506441831589, + "rewards/rejected": -0.30674102902412415, + "step": 1470 + }, + { + "epoch": 0.1, + "learning_rate": 4.839764551994769e-06, + "logits/chosen": -1.338303565979004, + "logits/rejected": -1.0660719871520996, + "logps/chosen": -683.1473999023438, + "logps/rejected": -721.8735961914062, + "loss": 0.6927, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.43443894386291504, + "rewards/margins": 0.07645972073078156, + "rewards/rejected": -0.5108987092971802, + "step": 1480 + }, + { + "epoch": 0.1, + "learning_rate": 4.872465663832571e-06, + "logits/chosen": -1.2499490976333618, + "logits/rejected": -1.1854174137115479, + "logps/chosen": -591.4575805664062, + "logps/rejected": -592.4474487304688, + "loss": 0.6924, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.38369911909103394, + "rewards/margins": 0.021449998021125793, + "rewards/rejected": -0.4051491320133209, + "step": 1490 + }, + { + "epoch": 0.1, + "learning_rate": 4.905166775670373e-06, + "logits/chosen": -1.4410350322723389, + "logits/rejected": -1.2265688180923462, + "logps/chosen": -486.8252868652344, + "logps/rejected": -504.9773864746094, + "loss": 0.69, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.2650652825832367, + "rewards/margins": 0.04554576426744461, + "rewards/rejected": -0.3106110692024231, + "step": 1500 + }, + { + "epoch": 0.1, + "eval_logits/chosen": -1.483712077140808, + "eval_logits/rejected": -1.3358349800109863, + "eval_logps/chosen": -428.61151123046875, + "eval_logps/rejected": -450.56256103515625, + "eval_loss": 0.6911273002624512, + "eval_rewards/accuracies": 0.6230000257492065, + "eval_rewards/chosen": -0.19660654664039612, + "eval_rewards/margins": 0.042344145476818085, + "eval_rewards/rejected": -0.2389506846666336, + "eval_runtime": 715.7281, + "eval_samples_per_second": 2.794, + "eval_steps_per_second": 1.397, + "step": 1500 + }, + { + "epoch": 0.1, + "learning_rate": 4.9378678875081756e-06, + "logits/chosen": -1.7270991802215576, + "logits/rejected": -1.4997197389602661, + "logps/chosen": -360.70330810546875, + "logps/rejected": -364.0918884277344, + "loss": 0.6913, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.14595970511436462, + "rewards/margins": 0.03439674526453018, + "rewards/rejected": -0.1803564578294754, + "step": 1510 + }, + { + "epoch": 0.1, + "learning_rate": 4.9705689993459784e-06, + "logits/chosen": -1.6035175323486328, + "logits/rejected": -1.2797515392303467, + "logps/chosen": -346.11956787109375, + "logps/rejected": -347.30596923828125, + "loss": 0.6893, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.14119425415992737, + "rewards/margins": 0.05889949947595596, + "rewards/rejected": -0.20009374618530273, + "step": 1520 + }, + { + "epoch": 0.1, + "learning_rate": 4.999999934793849e-06, + "logits/chosen": -1.4298112392425537, + "logits/rejected": -1.3906731605529785, + "logps/chosen": -448.69049072265625, + "logps/rejected": -460.69403076171875, + "loss": 0.6915, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.20073723793029785, + "rewards/margins": 0.04141997918486595, + "rewards/rejected": -0.2421572208404541, + "step": 1530 + }, + { + "epoch": 0.1, + "learning_rate": 4.999992110059814e-06, + "logits/chosen": -1.2659540176391602, + "logits/rejected": -1.233628511428833, + "logps/chosen": -469.53607177734375, + "logps/rejected": -498.04742431640625, + "loss": 0.6919, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.19184866547584534, + "rewards/margins": 0.0486246719956398, + "rewards/rejected": -0.24047331511974335, + "step": 1540 + }, + { + "epoch": 0.1, + "learning_rate": 4.999971244142299e-06, + "logits/chosen": -1.3973344564437866, + "logits/rejected": -1.0956056118011475, + "logps/chosen": -451.66668701171875, + "logps/rejected": -487.6893615722656, + "loss": 0.6923, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.17821362614631653, + "rewards/margins": 0.07526890188455582, + "rewards/rejected": -0.25348252058029175, + "step": 1550 + }, + { + "epoch": 0.1, + "learning_rate": 4.999937337150149e-06, + "logits/chosen": -1.2560392618179321, + "logits/rejected": -1.0587233304977417, + "logps/chosen": -322.35064697265625, + "logps/rejected": -345.85009765625, + "loss": 0.6913, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.08783578872680664, + "rewards/margins": 0.038880202919244766, + "rewards/rejected": -0.1267159879207611, + "step": 1560 + }, + { + "epoch": 0.1, + "learning_rate": 4.99989038926024e-06, + "logits/chosen": -1.1934096813201904, + "logits/rejected": -1.1851760149002075, + "logps/chosen": -282.2227478027344, + "logps/rejected": -342.9458312988281, + "loss": 0.6892, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.08379466831684113, + "rewards/margins": 0.058521050959825516, + "rewards/rejected": -0.14231571555137634, + "step": 1570 + }, + { + "epoch": 0.1, + "learning_rate": 4.999830400717476e-06, + "logits/chosen": -1.2530221939086914, + "logits/rejected": -1.1964467763900757, + "logps/chosen": -362.67901611328125, + "logps/rejected": -389.4422302246094, + "loss": 0.6911, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.07529197633266449, + "rewards/margins": 0.04303868114948273, + "rewards/rejected": -0.11833065748214722, + "step": 1580 + }, + { + "epoch": 0.1, + "learning_rate": 4.999757371834787e-06, + "logits/chosen": -0.992030918598175, + "logits/rejected": -1.1213949918746948, + "logps/chosen": -344.0542297363281, + "logps/rejected": -430.8173828125, + "loss": 0.6882, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.10423535108566284, + "rewards/margins": 0.10227863490581512, + "rewards/rejected": -0.20651397109031677, + "step": 1590 + }, + { + "epoch": 0.1, + "learning_rate": 4.999671302993125e-06, + "logits/chosen": -0.7400294542312622, + "logits/rejected": -0.7438528537750244, + "logps/chosen": -378.1280822753906, + "logps/rejected": -455.39154052734375, + "loss": 0.6896, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.12954765558242798, + "rewards/margins": 0.0618753544986248, + "rewards/rejected": -0.19142302870750427, + "step": 1600 + }, + { + "epoch": 0.1, + "eval_logits/chosen": -0.9061187505722046, + "eval_logits/rejected": -0.792312741279602, + "eval_logps/chosen": -381.1222229003906, + "eval_logps/rejected": -427.01788330078125, + "eval_loss": 0.6905171871185303, + "eval_rewards/accuracies": 0.6349999904632568, + "eval_rewards/chosen": -0.14911724627017975, + "eval_rewards/margins": 0.06628872454166412, + "eval_rewards/rejected": -0.21540597081184387, + "eval_runtime": 712.34, + "eval_samples_per_second": 2.808, + "eval_steps_per_second": 1.404, + "step": 1600 + }, + { + "epoch": 0.11, + "learning_rate": 4.999572194641471e-06, + "logits/chosen": -1.0279515981674194, + "logits/rejected": -0.7255226969718933, + "logps/chosen": -444.42486572265625, + "logps/rejected": -483.9266662597656, + "loss": 0.688, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.16890659928321838, + "rewards/margins": 0.0919717401266098, + "rewards/rejected": -0.260878324508667, + "step": 1610 + }, + { + "epoch": 0.11, + "learning_rate": 4.999460047296819e-06, + "logits/chosen": -0.7330793142318726, + "logits/rejected": -0.6069439053535461, + "logps/chosen": -463.1800842285156, + "logps/rejected": -511.03997802734375, + "loss": 0.6915, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.24757592380046844, + "rewards/margins": 0.06876906007528305, + "rewards/rejected": -0.3163450062274933, + "step": 1620 + }, + { + "epoch": 0.11, + "learning_rate": 4.999334861544186e-06, + "logits/chosen": -0.7602131962776184, + "logits/rejected": -0.7034991979598999, + "logps/chosen": -490.57562255859375, + "logps/rejected": -494.21197509765625, + "loss": 0.6906, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.2611224055290222, + "rewards/margins": 0.05963968485593796, + "rewards/rejected": -0.3207620680332184, + "step": 1630 + }, + { + "epoch": 0.11, + "learning_rate": 4.999196638036604e-06, + "logits/chosen": -0.8633691668510437, + "logits/rejected": -0.657917320728302, + "logps/chosen": -595.30029296875, + "logps/rejected": -597.7689819335938, + "loss": 0.6924, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.3078852891921997, + "rewards/margins": 0.03770507127046585, + "rewards/rejected": -0.34559035301208496, + "step": 1640 + }, + { + "epoch": 0.11, + "learning_rate": 4.999045377495111e-06, + "logits/chosen": -0.49410897493362427, + "logits/rejected": -0.6826598048210144, + "logps/chosen": -511.33984375, + "logps/rejected": -666.5721435546875, + "loss": 0.6906, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.33700522780418396, + "rewards/margins": 0.06744986027479172, + "rewards/rejected": -0.4044550359249115, + "step": 1650 + }, + { + "epoch": 0.11, + "learning_rate": 4.998881080708759e-06, + "logits/chosen": -0.972652792930603, + "logits/rejected": -0.9712132215499878, + "logps/chosen": -429.01959228515625, + "logps/rejected": -423.8321838378906, + "loss": 0.6931, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.18869802355766296, + "rewards/margins": 0.02575744315981865, + "rewards/rejected": -0.2144554853439331, + "step": 1660 + }, + { + "epoch": 0.11, + "learning_rate": 4.998703748534599e-06, + "logits/chosen": -0.9454406499862671, + "logits/rejected": -0.8232323527336121, + "logps/chosen": -364.2972106933594, + "logps/rejected": -374.5244140625, + "loss": 0.6915, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.12366746366024017, + "rewards/margins": 0.060917746275663376, + "rewards/rejected": -0.18458519876003265, + "step": 1670 + }, + { + "epoch": 0.11, + "learning_rate": 4.998513381897683e-06, + "logits/chosen": -1.4703190326690674, + "logits/rejected": -1.2022755146026611, + "logps/chosen": -322.563720703125, + "logps/rejected": -298.5383605957031, + "loss": 0.6911, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.08331999182701111, + "rewards/margins": 0.03296893090009689, + "rewards/rejected": -0.116288922727108, + "step": 1680 + }, + { + "epoch": 0.11, + "learning_rate": 4.9983099817910565e-06, + "logits/chosen": -1.4085242748260498, + "logits/rejected": -1.3252454996109009, + "logps/chosen": -338.73529052734375, + "logps/rejected": -386.0179138183594, + "loss": 0.6907, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.09347338229417801, + "rewards/margins": 0.05176064372062683, + "rewards/rejected": -0.14523401856422424, + "step": 1690 + }, + { + "epoch": 0.11, + "learning_rate": 4.998093549275754e-06, + "logits/chosen": -1.4733991622924805, + "logits/rejected": -1.5043426752090454, + "logps/chosen": -324.3277282714844, + "logps/rejected": -409.79693603515625, + "loss": 0.6896, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.06399369984865189, + "rewards/margins": 0.06367628276348114, + "rewards/rejected": -0.12766997516155243, + "step": 1700 + }, + { + "epoch": 0.11, + "eval_logits/chosen": -1.480065107345581, + "eval_logits/rejected": -1.3317376375198364, + "eval_logps/chosen": -311.48211669921875, + "eval_logps/rejected": -351.01513671875, + "eval_loss": 0.6903863549232483, + "eval_rewards/accuracies": 0.6255000233650208, + "eval_rewards/chosen": -0.07947719097137451, + "eval_rewards/margins": 0.059926047921180725, + "eval_rewards/rejected": -0.13940322399139404, + "eval_runtime": 714.0547, + "eval_samples_per_second": 2.801, + "eval_steps_per_second": 1.4, + "step": 1700 + }, + { + "epoch": 0.11, + "learning_rate": 4.997864085480794e-06, + "logits/chosen": -1.6208089590072632, + "logits/rejected": -1.4398419857025146, + "logps/chosen": -340.4198913574219, + "logps/rejected": -412.36572265625, + "loss": 0.6913, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.07407370954751968, + "rewards/margins": 0.09048272669315338, + "rewards/rejected": -0.16455642879009247, + "step": 1710 + }, + { + "epoch": 0.11, + "learning_rate": 4.997621591603171e-06, + "logits/chosen": -1.4687002897262573, + "logits/rejected": -1.359674096107483, + "logps/chosen": -256.868896484375, + "logps/rejected": -314.82879638671875, + "loss": 0.6916, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.09883610904216766, + "rewards/margins": 0.05864466354250908, + "rewards/rejected": -0.15748076140880585, + "step": 1720 + }, + { + "epoch": 0.11, + "learning_rate": 4.997366068907853e-06, + "logits/chosen": -1.5249855518341064, + "logits/rejected": -1.4639043807983398, + "logps/chosen": -335.9312744140625, + "logps/rejected": -353.7437438964844, + "loss": 0.6906, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.07752153277397156, + "rewards/margins": 0.037687577307224274, + "rewards/rejected": -0.11520912498235703, + "step": 1730 + }, + { + "epoch": 0.11, + "learning_rate": 4.997097518727771e-06, + "logits/chosen": -1.6601321697235107, + "logits/rejected": -1.3635004758834839, + "logps/chosen": -308.7919616699219, + "logps/rejected": -330.8180236816406, + "loss": 0.6899, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.0858355313539505, + "rewards/margins": 0.05514850094914436, + "rewards/rejected": -0.14098402857780457, + "step": 1740 + }, + { + "epoch": 0.11, + "learning_rate": 4.9968159424638155e-06, + "logits/chosen": -1.537062644958496, + "logits/rejected": -1.6434835195541382, + "logps/chosen": -314.24200439453125, + "logps/rejected": -400.8989562988281, + "loss": 0.691, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.0966355949640274, + "rewards/margins": 0.0348118357360363, + "rewards/rejected": -0.131447434425354, + "step": 1750 + }, + { + "epoch": 0.12, + "learning_rate": 4.9965213415848235e-06, + "logits/chosen": -1.3464255332946777, + "logits/rejected": -1.0850141048431396, + "logps/chosen": -371.60968017578125, + "logps/rejected": -398.5357360839844, + "loss": 0.6915, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.14320513606071472, + "rewards/margins": 0.06378600746393204, + "rewards/rejected": -0.20699115097522736, + "step": 1760 + }, + { + "epoch": 0.12, + "learning_rate": 4.9962137176275805e-06, + "logits/chosen": -1.5550998449325562, + "logits/rejected": -1.376962423324585, + "logps/chosen": -311.85333251953125, + "logps/rejected": -352.107177734375, + "loss": 0.6925, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.07755790650844574, + "rewards/margins": 0.04081185907125473, + "rewards/rejected": -0.11836977303028107, + "step": 1770 + }, + { + "epoch": 0.12, + "learning_rate": 4.9958930721968015e-06, + "logits/chosen": -1.5606944561004639, + "logits/rejected": -1.6893508434295654, + "logps/chosen": -301.8111572265625, + "logps/rejected": -350.09307861328125, + "loss": 0.6917, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.08977598696947098, + "rewards/margins": 0.03678018972277641, + "rewards/rejected": -0.1265561580657959, + "step": 1780 + }, + { + "epoch": 0.12, + "learning_rate": 4.995559406965132e-06, + "logits/chosen": -1.7458531856536865, + "logits/rejected": -1.5053004026412964, + "logps/chosen": -324.5157775878906, + "logps/rejected": -351.40771484375, + "loss": 0.6905, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.09260044991970062, + "rewards/margins": 0.056507617235183716, + "rewards/rejected": -0.14910808205604553, + "step": 1790 + }, + { + "epoch": 0.12, + "learning_rate": 4.995212723673131e-06, + "logits/chosen": -1.6229438781738281, + "logits/rejected": -1.3902084827423096, + "logps/chosen": -318.61859130859375, + "logps/rejected": -324.81866455078125, + "loss": 0.6907, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.08841551840305328, + "rewards/margins": 0.05618295073509216, + "rewards/rejected": -0.14459846913814545, + "step": 1800 + }, + { + "epoch": 0.12, + "eval_logits/chosen": -1.5447196960449219, + "eval_logits/rejected": -1.3930197954177856, + "eval_logps/chosen": -338.71893310546875, + "eval_logps/rejected": -368.562744140625, + "eval_loss": 0.6905578970909119, + "eval_rewards/accuracies": 0.6294999718666077, + "eval_rewards/chosen": -0.10671400278806686, + "eval_rewards/margins": 0.05023682862520218, + "eval_rewards/rejected": -0.15695083141326904, + "eval_runtime": 713.2978, + "eval_samples_per_second": 2.804, + "eval_steps_per_second": 1.402, + "step": 1800 + }, + { + "epoch": 0.12, + "learning_rate": 4.99485302412927e-06, + "logits/chosen": -1.2425215244293213, + "logits/rejected": -1.2682154178619385, + "logps/chosen": -321.0690002441406, + "logps/rejected": -383.86932373046875, + "loss": 0.6892, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.1130579486489296, + "rewards/margins": 0.062107671052217484, + "rewards/rejected": -0.17516560852527618, + "step": 1810 + }, + { + "epoch": 0.12, + "learning_rate": 4.994480310209918e-06, + "logits/chosen": -1.6616127490997314, + "logits/rejected": -1.7467199563980103, + "logps/chosen": -315.12713623046875, + "logps/rejected": -386.51824951171875, + "loss": 0.6901, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.07314414530992508, + "rewards/margins": 0.057636868208646774, + "rewards/rejected": -0.13078099489212036, + "step": 1820 + }, + { + "epoch": 0.12, + "learning_rate": 4.994094583859332e-06, + "logits/chosen": -1.650968313217163, + "logits/rejected": -1.4825265407562256, + "logps/chosen": -240.92605590820312, + "logps/rejected": -324.64337158203125, + "loss": 0.6895, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.07601331919431686, + "rewards/margins": 0.051687806844711304, + "rewards/rejected": -0.12770113348960876, + "step": 1830 + }, + { + "epoch": 0.12, + "learning_rate": 4.9936958470896525e-06, + "logits/chosen": -1.4662501811981201, + "logits/rejected": -1.2516024112701416, + "logps/chosen": -343.7925109863281, + "logps/rejected": -392.44610595703125, + "loss": 0.6896, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.12004733085632324, + "rewards/margins": 0.09155675023794174, + "rewards/rejected": -0.2116040736436844, + "step": 1840 + }, + { + "epoch": 0.12, + "learning_rate": 4.993284101980883e-06, + "logits/chosen": -1.4967622756958008, + "logits/rejected": -1.344617247581482, + "logps/chosen": -351.4579162597656, + "logps/rejected": -427.65966796875, + "loss": 0.6849, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.1089625209569931, + "rewards/margins": 0.11583471298217773, + "rewards/rejected": -0.22479721903800964, + "step": 1850 + }, + { + "epoch": 0.12, + "learning_rate": 4.9928593506808885e-06, + "logits/chosen": -1.5015496015548706, + "logits/rejected": -1.3130124807357788, + "logps/chosen": -399.886474609375, + "logps/rejected": -407.5121765136719, + "loss": 0.6932, + "rewards/accuracies": 0.4749999940395355, + "rewards/chosen": -0.14307209849357605, + "rewards/margins": 0.04460294917225838, + "rewards/rejected": -0.18767504394054413, + "step": 1860 + }, + { + "epoch": 0.12, + "learning_rate": 4.992421595405381e-06, + "logits/chosen": -1.4745498895645142, + "logits/rejected": -1.1933575868606567, + "logps/chosen": -357.6884765625, + "logps/rejected": -320.2742614746094, + "loss": 0.6926, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.12949608266353607, + "rewards/margins": 0.03343268856406212, + "rewards/rejected": -0.16292878985404968, + "step": 1870 + }, + { + "epoch": 0.12, + "learning_rate": 4.991970838437905e-06, + "logits/chosen": -1.453904628753662, + "logits/rejected": -1.405748963356018, + "logps/chosen": -353.97015380859375, + "logps/rejected": -452.02789306640625, + "loss": 0.6906, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.12864060699939728, + "rewards/margins": 0.06995304673910141, + "rewards/rejected": -0.1985936164855957, + "step": 1880 + }, + { + "epoch": 0.12, + "learning_rate": 4.9915070821298294e-06, + "logits/chosen": -1.5130014419555664, + "logits/rejected": -1.3204481601715088, + "logps/chosen": -277.0668640136719, + "logps/rejected": -301.8009338378906, + "loss": 0.6916, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.11009702831506729, + "rewards/margins": 0.03382255882024765, + "rewards/rejected": -0.14391960203647614, + "step": 1890 + }, + { + "epoch": 0.12, + "learning_rate": 4.991030328900336e-06, + "logits/chosen": -1.4666025638580322, + "logits/rejected": -1.2133960723876953, + "logps/chosen": -396.7815856933594, + "logps/rejected": -397.72039794921875, + "loss": 0.6903, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.11221235990524292, + "rewards/margins": 0.07097505033016205, + "rewards/rejected": -0.18318741023540497, + "step": 1900 + }, + { + "epoch": 0.12, + "eval_logits/chosen": -1.503396987915039, + "eval_logits/rejected": -1.3577290773391724, + "eval_logps/chosen": -351.95111083984375, + "eval_logps/rejected": -381.6029357910156, + "eval_loss": 0.6906358003616333, + "eval_rewards/accuracies": 0.6510000228881836, + "eval_rewards/chosen": -0.11994615197181702, + "eval_rewards/margins": 0.050044890493154526, + "eval_rewards/rejected": -0.16999106109142303, + "eval_runtime": 711.5755, + "eval_samples_per_second": 2.811, + "eval_steps_per_second": 1.405, + "step": 1900 + }, + { + "epoch": 0.12, + "learning_rate": 4.9905405812364014e-06, + "logits/chosen": -1.5339515209197998, + "logits/rejected": -1.5163123607635498, + "logps/chosen": -315.5993957519531, + "logps/rejected": -368.30645751953125, + "loss": 0.6911, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.11330322176218033, + "rewards/margins": 0.050241757184267044, + "rewards/rejected": -0.16354498267173767, + "step": 1910 + }, + { + "epoch": 0.13, + "learning_rate": 4.990037841692791e-06, + "logits/chosen": -1.4599535465240479, + "logits/rejected": -1.3372766971588135, + "logps/chosen": -335.96630859375, + "logps/rejected": -352.112060546875, + "loss": 0.6899, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.1299254149198532, + "rewards/margins": 0.05943036824464798, + "rewards/rejected": -0.1893557757139206, + "step": 1920 + }, + { + "epoch": 0.13, + "learning_rate": 4.989522112892039e-06, + "logits/chosen": -1.4036133289337158, + "logits/rejected": -1.385619878768921, + "logps/chosen": -369.69952392578125, + "logps/rejected": -417.546630859375, + "loss": 0.6908, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.17207011580467224, + "rewards/margins": 0.04873714596033096, + "rewards/rejected": -0.220807284116745, + "step": 1930 + }, + { + "epoch": 0.13, + "learning_rate": 4.98899339752444e-06, + "logits/chosen": -1.4383190870285034, + "logits/rejected": -1.197371006011963, + "logps/chosen": -363.4906311035156, + "logps/rejected": -421.6170959472656, + "loss": 0.6884, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.13388827443122864, + "rewards/margins": 0.08706989139318466, + "rewards/rejected": -0.2209581583738327, + "step": 1940 + }, + { + "epoch": 0.13, + "learning_rate": 4.988451698348033e-06, + "logits/chosen": -1.4711196422576904, + "logits/rejected": -1.5271979570388794, + "logps/chosen": -294.31884765625, + "logps/rejected": -352.82635498046875, + "loss": 0.6931, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.11599130928516388, + "rewards/margins": 0.03823809325695038, + "rewards/rejected": -0.15422940254211426, + "step": 1950 + }, + { + "epoch": 0.13, + "learning_rate": 4.987897018188585e-06, + "logits/chosen": -1.5414386987686157, + "logits/rejected": -1.3048475980758667, + "logps/chosen": -337.1903991699219, + "logps/rejected": -325.1073913574219, + "loss": 0.6926, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.11457107961177826, + "rewards/margins": 0.04459307715296745, + "rewards/rejected": -0.159164160490036, + "step": 1960 + }, + { + "epoch": 0.13, + "learning_rate": 4.9873293599395814e-06, + "logits/chosen": -1.6896965503692627, + "logits/rejected": -1.5547794103622437, + "logps/chosen": -283.25335693359375, + "logps/rejected": -343.1858825683594, + "loss": 0.6885, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.08982162177562714, + "rewards/margins": 0.06391600519418716, + "rewards/rejected": -0.1537376344203949, + "step": 1970 + }, + { + "epoch": 0.13, + "learning_rate": 4.986748726562203e-06, + "logits/chosen": -1.6674537658691406, + "logits/rejected": -1.5472710132598877, + "logps/chosen": -302.44647216796875, + "logps/rejected": -321.59149169921875, + "loss": 0.6913, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.08659198135137558, + "rewards/margins": 0.044037431478500366, + "rewards/rejected": -0.13062942028045654, + "step": 1980 + }, + { + "epoch": 0.13, + "learning_rate": 4.98615512108532e-06, + "logits/chosen": -1.6517025232315063, + "logits/rejected": -1.6035429239273071, + "logps/chosen": -308.13177490234375, + "logps/rejected": -352.00897216796875, + "loss": 0.6908, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.08662106841802597, + "rewards/margins": 0.04209376126527786, + "rewards/rejected": -0.12871482968330383, + "step": 1990 + }, + { + "epoch": 0.13, + "learning_rate": 4.985548546605469e-06, + "logits/chosen": -1.2778985500335693, + "logits/rejected": -1.4368833303451538, + "logps/chosen": -372.85302734375, + "logps/rejected": -435.37042236328125, + "loss": 0.6907, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.15974724292755127, + "rewards/margins": 0.04487691447138786, + "rewards/rejected": -0.20462414622306824, + "step": 2000 + }, + { + "epoch": 0.13, + "eval_logits/chosen": -1.3523448705673218, + "eval_logits/rejected": -1.2126736640930176, + "eval_logps/chosen": -394.4134521484375, + "eval_logps/rejected": -441.41143798828125, + "eval_loss": 0.6901971697807312, + "eval_rewards/accuracies": 0.6430000066757202, + "eval_rewards/chosen": -0.16240845620632172, + "eval_rewards/margins": 0.06739108264446259, + "eval_rewards/rejected": -0.2297995388507843, + "eval_runtime": 713.6841, + "eval_samples_per_second": 2.802, + "eval_steps_per_second": 1.401, + "step": 2000 + }, + { + "epoch": 0.13, + "learning_rate": 4.984929006286838e-06, + "logits/chosen": -1.148874044418335, + "logits/rejected": -1.0555192232131958, + "logps/chosen": -372.86138916015625, + "logps/rejected": -383.76776123046875, + "loss": 0.6952, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.1635473668575287, + "rewards/margins": 0.007450105156749487, + "rewards/rejected": -0.1709974706172943, + "step": 2010 + }, + { + "epoch": 0.13, + "learning_rate": 4.984296503361256e-06, + "logits/chosen": -1.4644495248794556, + "logits/rejected": -1.2704464197158813, + "logps/chosen": -321.61212158203125, + "logps/rejected": -318.55328369140625, + "loss": 0.6919, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.11969226598739624, + "rewards/margins": 0.03894500806927681, + "rewards/rejected": -0.15863725543022156, + "step": 2020 + }, + { + "epoch": 0.13, + "learning_rate": 4.9836510411281645e-06, + "logits/chosen": -1.4437366724014282, + "logits/rejected": -1.3444958925247192, + "logps/chosen": -381.04412841796875, + "logps/rejected": -431.23260498046875, + "loss": 0.6872, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.10251230001449585, + "rewards/margins": 0.08959086239337921, + "rewards/rejected": -0.19210317730903625, + "step": 2030 + }, + { + "epoch": 0.13, + "learning_rate": 4.982992622954613e-06, + "logits/chosen": -1.6072601079940796, + "logits/rejected": -1.3610594272613525, + "logps/chosen": -382.94049072265625, + "logps/rejected": -339.80303955078125, + "loss": 0.6885, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.09941227734088898, + "rewards/margins": 0.07103113830089569, + "rewards/rejected": -0.17044341564178467, + "step": 2040 + }, + { + "epoch": 0.13, + "learning_rate": 4.9823212522752325e-06, + "logits/chosen": -1.636498212814331, + "logits/rejected": -1.4854671955108643, + "logps/chosen": -410.88037109375, + "logps/rejected": -476.26806640625, + "loss": 0.6876, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.13306666910648346, + "rewards/margins": 0.09942369163036346, + "rewards/rejected": -0.23249034583568573, + "step": 2050 + }, + { + "epoch": 0.13, + "learning_rate": 4.981636932592222e-06, + "logits/chosen": -1.3983194828033447, + "logits/rejected": -1.2955951690673828, + "logps/chosen": -323.72686767578125, + "logps/rejected": -399.1975402832031, + "loss": 0.6908, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.11081385612487793, + "rewards/margins": 0.07949165254831314, + "rewards/rejected": -0.19030550122261047, + "step": 2060 + }, + { + "epoch": 0.14, + "learning_rate": 4.980939667475328e-06, + "logits/chosen": -1.643601417541504, + "logits/rejected": -1.3366024494171143, + "logps/chosen": -408.3066101074219, + "logps/rejected": -416.02716064453125, + "loss": 0.6905, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.13413146138191223, + "rewards/margins": 0.06831257790327072, + "rewards/rejected": -0.20244404673576355, + "step": 2070 + }, + { + "epoch": 0.14, + "learning_rate": 4.980229460561826e-06, + "logits/chosen": -1.4907690286636353, + "logits/rejected": -1.3280975818634033, + "logps/chosen": -370.56280517578125, + "logps/rejected": -468.95697021484375, + "loss": 0.6893, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.15248145163059235, + "rewards/margins": 0.11370065063238144, + "rewards/rejected": -0.2661820948123932, + "step": 2080 + }, + { + "epoch": 0.14, + "learning_rate": 4.979506315556503e-06, + "logits/chosen": -1.379464864730835, + "logits/rejected": -1.1236019134521484, + "logps/chosen": -457.6728515625, + "logps/rejected": -503.1261291503906, + "loss": 0.6905, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.17118071019649506, + "rewards/margins": 0.09644129872322083, + "rewards/rejected": -0.2676219940185547, + "step": 2090 + }, + { + "epoch": 0.14, + "learning_rate": 4.9787702362316395e-06, + "logits/chosen": -1.5906661748886108, + "logits/rejected": -1.7217735052108765, + "logps/chosen": -322.1238098144531, + "logps/rejected": -401.7293701171875, + "loss": 0.6901, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.13129660487174988, + "rewards/margins": 0.05409153550863266, + "rewards/rejected": -0.18538813292980194, + "step": 2100 + }, + { + "epoch": 0.14, + "eval_logits/chosen": -1.4994711875915527, + "eval_logits/rejected": -1.348573923110962, + "eval_logps/chosen": -364.04266357421875, + "eval_logps/rejected": -413.4233093261719, + "eval_loss": 0.6901757717132568, + "eval_rewards/accuracies": 0.6455000042915344, + "eval_rewards/chosen": -0.13203772902488708, + "eval_rewards/margins": 0.06977371871471405, + "eval_rewards/rejected": -0.20181144773960114, + "eval_runtime": 714.4369, + "eval_samples_per_second": 2.799, + "eval_steps_per_second": 1.4, + "step": 2100 + }, + { + "epoch": 0.14, + "learning_rate": 4.9780212264269835e-06, + "logits/chosen": -1.3951307535171509, + "logits/rejected": -1.1739161014556885, + "logps/chosen": -337.2302551269531, + "logps/rejected": -362.6296081542969, + "loss": 0.6915, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.15241114795207977, + "rewards/margins": 0.042535070329904556, + "rewards/rejected": -0.19494622945785522, + "step": 2110 + }, + { + "epoch": 0.14, + "learning_rate": 4.977259290049739e-06, + "logits/chosen": -1.7050163745880127, + "logits/rejected": -1.2494385242462158, + "logps/chosen": -407.9380187988281, + "logps/rejected": -450.7481384277344, + "loss": 0.6869, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.12099560350179672, + "rewards/margins": 0.11232854425907135, + "rewards/rejected": -0.23332414031028748, + "step": 2120 + }, + { + "epoch": 0.14, + "learning_rate": 4.976484431074538e-06, + "logits/chosen": -1.445926308631897, + "logits/rejected": -1.4503229856491089, + "logps/chosen": -310.28448486328125, + "logps/rejected": -348.43402099609375, + "loss": 0.6915, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.11647901684045792, + "rewards/margins": 0.055751871317625046, + "rewards/rejected": -0.17223089933395386, + "step": 2130 + }, + { + "epoch": 0.14, + "learning_rate": 4.975696653543425e-06, + "logits/chosen": -1.506824016571045, + "logits/rejected": -1.312302827835083, + "logps/chosen": -383.40704345703125, + "logps/rejected": -453.06707763671875, + "loss": 0.6894, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.13375821709632874, + "rewards/margins": 0.08222929388284683, + "rewards/rejected": -0.21598748862743378, + "step": 2140 + }, + { + "epoch": 0.14, + "learning_rate": 4.974895961565835e-06, + "logits/chosen": -1.295353651046753, + "logits/rejected": -1.1237144470214844, + "logps/chosen": -343.5455627441406, + "logps/rejected": -430.0581970214844, + "loss": 0.6889, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.16829460859298706, + "rewards/margins": 0.08128471672534943, + "rewards/rejected": -0.2495793104171753, + "step": 2150 + }, + { + "epoch": 0.14, + "learning_rate": 4.974082359318566e-06, + "logits/chosen": -1.3289740085601807, + "logits/rejected": -1.2230969667434692, + "logps/chosen": -412.52789306640625, + "logps/rejected": -453.0899353027344, + "loss": 0.6903, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.1588778793811798, + "rewards/margins": 0.08604113012552261, + "rewards/rejected": -0.24491901695728302, + "step": 2160 + }, + { + "epoch": 0.14, + "learning_rate": 4.973255851045769e-06, + "logits/chosen": -1.4549330472946167, + "logits/rejected": -1.424309253692627, + "logps/chosen": -342.51593017578125, + "logps/rejected": -382.6673583984375, + "loss": 0.6903, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.12515771389007568, + "rewards/margins": 0.07689893245697021, + "rewards/rejected": -0.2020566463470459, + "step": 2170 + }, + { + "epoch": 0.14, + "learning_rate": 4.972416441058915e-06, + "logits/chosen": -1.2769469022750854, + "logits/rejected": -1.1573234796524048, + "logps/chosen": -402.8035583496094, + "logps/rejected": -485.8418884277344, + "loss": 0.6891, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.1730477660894394, + "rewards/margins": 0.10968021303415298, + "rewards/rejected": -0.2827279269695282, + "step": 2180 + }, + { + "epoch": 0.14, + "learning_rate": 4.971564133736777e-06, + "logits/chosen": -1.1722831726074219, + "logits/rejected": -0.994546115398407, + "logps/chosen": -317.39727783203125, + "logps/rejected": -428.52752685546875, + "loss": 0.688, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.13939666748046875, + "rewards/margins": 0.10614917427301407, + "rewards/rejected": -0.24554581940174103, + "step": 2190 + }, + { + "epoch": 0.14, + "learning_rate": 4.970698933525409e-06, + "logits/chosen": -1.7047713994979858, + "logits/rejected": -1.4181147813796997, + "logps/chosen": -431.55206298828125, + "logps/rejected": -438.75457763671875, + "loss": 0.6914, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.15220515429973602, + "rewards/margins": 0.046874094754457474, + "rewards/rejected": -0.1990792602300644, + "step": 2200 + }, + { + "epoch": 0.14, + "eval_logits/chosen": -1.6063042879104614, + "eval_logits/rejected": -1.4533063173294067, + "eval_logps/chosen": -326.1747741699219, + "eval_logps/rejected": -362.9125061035156, + "eval_loss": 0.6902684569358826, + "eval_rewards/accuracies": 0.6345000267028809, + "eval_rewards/chosen": -0.09416984766721725, + "eval_rewards/margins": 0.05713077262043953, + "eval_rewards/rejected": -0.1513006091117859, + "eval_runtime": 713.1171, + "eval_samples_per_second": 2.805, + "eval_steps_per_second": 1.402, + "step": 2200 + }, + { + "epoch": 0.14, + "learning_rate": 4.969820844938118e-06, + "logits/chosen": -1.692323088645935, + "logits/rejected": -1.4323906898498535, + "logps/chosen": -313.9912414550781, + "logps/rejected": -315.52801513671875, + "loss": 0.6902, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.09138797223567963, + "rewards/margins": 0.05543946474790573, + "rewards/rejected": -0.14682744443416595, + "step": 2210 + }, + { + "epoch": 0.15, + "learning_rate": 4.968929872555444e-06, + "logits/chosen": -1.148425817489624, + "logits/rejected": -1.1049809455871582, + "logps/chosen": -421.64678955078125, + "logps/rejected": -522.7020263671875, + "loss": 0.6904, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.20451240241527557, + "rewards/margins": 0.06539227813482285, + "rewards/rejected": -0.2699046730995178, + "step": 2220 + }, + { + "epoch": 0.15, + "learning_rate": 4.968026021025137e-06, + "logits/chosen": -1.4740171432495117, + "logits/rejected": -1.275011658668518, + "logps/chosen": -370.6834411621094, + "logps/rejected": -409.69671630859375, + "loss": 0.6889, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.1691185086965561, + "rewards/margins": 0.07907870411872864, + "rewards/rejected": -0.24819722771644592, + "step": 2230 + }, + { + "epoch": 0.15, + "learning_rate": 4.967109295062128e-06, + "logits/chosen": -1.2822327613830566, + "logits/rejected": -1.1900814771652222, + "logps/chosen": -387.3942565917969, + "logps/rejected": -473.9903869628906, + "loss": 0.6914, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.16239280998706818, + "rewards/margins": 0.06567515432834625, + "rewards/rejected": -0.22806794941425323, + "step": 2240 + }, + { + "epoch": 0.15, + "learning_rate": 4.966179699448509e-06, + "logits/chosen": -1.2386096715927124, + "logits/rejected": -1.0959924459457397, + "logps/chosen": -346.8693542480469, + "logps/rejected": -363.8343811035156, + "loss": 0.6912, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.156114399433136, + "rewards/margins": 0.034791115671396255, + "rewards/rejected": -0.19090552628040314, + "step": 2250 + }, + { + "epoch": 0.15, + "learning_rate": 4.965237239033506e-06, + "logits/chosen": -1.4767388105392456, + "logits/rejected": -1.2675386667251587, + "logps/chosen": -446.92071533203125, + "logps/rejected": -516.0152587890625, + "loss": 0.6863, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.15726196765899658, + "rewards/margins": 0.11387642472982407, + "rewards/rejected": -0.27113842964172363, + "step": 2260 + }, + { + "epoch": 0.15, + "learning_rate": 4.964281918733453e-06, + "logits/chosen": -1.2942843437194824, + "logits/rejected": -1.1992994546890259, + "logps/chosen": -358.36810302734375, + "logps/rejected": -457.5331115722656, + "loss": 0.6884, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.17525368928909302, + "rewards/margins": 0.10362176597118378, + "rewards/rejected": -0.2788754105567932, + "step": 2270 + }, + { + "epoch": 0.15, + "learning_rate": 4.9633137435317715e-06, + "logits/chosen": -1.1768795251846313, + "logits/rejected": -0.8136765360832214, + "logps/chosen": -484.44561767578125, + "logps/rejected": -525.266357421875, + "loss": 0.6878, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.2615506649017334, + "rewards/margins": 0.10860586166381836, + "rewards/rejected": -0.370156466960907, + "step": 2280 + }, + { + "epoch": 0.15, + "learning_rate": 4.9623327184789355e-06, + "logits/chosen": -1.1933273077011108, + "logits/rejected": -1.1837798357009888, + "logps/chosen": -521.596435546875, + "logps/rejected": -575.0322875976562, + "loss": 0.6926, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.3038559854030609, + "rewards/margins": 0.058262161910533905, + "rewards/rejected": -0.3621181547641754, + "step": 2290 + }, + { + "epoch": 0.15, + "learning_rate": 4.9613388486924525e-06, + "logits/chosen": -0.7321104407310486, + "logits/rejected": -0.9151169657707214, + "logps/chosen": -538.6513671875, + "logps/rejected": -635.89453125, + "loss": 0.6916, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.35851725935935974, + "rewards/margins": 0.07986314594745636, + "rewards/rejected": -0.4383804202079773, + "step": 2300 + }, + { + "epoch": 0.15, + "eval_logits/chosen": -1.193457007408142, + "eval_logits/rejected": -1.0524342060089111, + "eval_logps/chosen": -545.2796020507812, + "eval_logps/rejected": -613.0293579101562, + "eval_loss": 0.6903690099716187, + "eval_rewards/accuracies": 0.6449999809265137, + "eval_rewards/chosen": -0.3132747411727905, + "eval_rewards/margins": 0.08814278990030289, + "eval_rewards/rejected": -0.4014175534248352, + "eval_runtime": 714.3536, + "eval_samples_per_second": 2.8, + "eval_steps_per_second": 1.4, + "step": 2300 + }, + { + "epoch": 0.15, + "learning_rate": 4.960332139356834e-06, + "logits/chosen": -1.2785427570343018, + "logits/rejected": -1.092272400856018, + "logps/chosen": -472.427978515625, + "logps/rejected": -542.86181640625, + "loss": 0.6883, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.2636111080646515, + "rewards/margins": 0.09687371551990509, + "rewards/rejected": -0.360484778881073, + "step": 2310 + }, + { + "epoch": 0.15, + "learning_rate": 4.95931259572357e-06, + "logits/chosen": -1.3524049520492554, + "logits/rejected": -1.1524009704589844, + "logps/chosen": -462.6844787597656, + "logps/rejected": -562.32470703125, + "loss": 0.6898, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.22844454646110535, + "rewards/margins": 0.07396665960550308, + "rewards/rejected": -0.30241116881370544, + "step": 2320 + }, + { + "epoch": 0.15, + "learning_rate": 4.9582802231111e-06, + "logits/chosen": -1.349764108657837, + "logits/rejected": -1.385392189025879, + "logps/chosen": -358.3040771484375, + "logps/rejected": -393.629638671875, + "loss": 0.6908, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.14642992615699768, + "rewards/margins": 0.06220410391688347, + "rewards/rejected": -0.20863404870033264, + "step": 2330 + }, + { + "epoch": 0.15, + "learning_rate": 4.957235026904782e-06, + "logits/chosen": -1.4866364002227783, + "logits/rejected": -1.245184063911438, + "logps/chosen": -387.78863525390625, + "logps/rejected": -390.4792175292969, + "loss": 0.6903, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.1256362497806549, + "rewards/margins": 0.05380575731396675, + "rewards/rejected": -0.17944203317165375, + "step": 2340 + }, + { + "epoch": 0.15, + "learning_rate": 4.956177012556875e-06, + "logits/chosen": -1.5071487426757812, + "logits/rejected": -1.2931318283081055, + "logps/chosen": -406.36260986328125, + "logps/rejected": -397.49951171875, + "loss": 0.6889, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.16216978430747986, + "rewards/margins": 0.06270495802164078, + "rewards/rejected": -0.22487470507621765, + "step": 2350 + }, + { + "epoch": 0.15, + "learning_rate": 4.9551061855864976e-06, + "logits/chosen": -0.8723462224006653, + "logits/rejected": -0.9245679974555969, + "logps/chosen": -391.17108154296875, + "logps/rejected": -441.70599365234375, + "loss": 0.6905, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.1983429491519928, + "rewards/margins": 0.04632434993982315, + "rewards/rejected": -0.24466726183891296, + "step": 2360 + }, + { + "epoch": 0.16, + "learning_rate": 4.95402255157961e-06, + "logits/chosen": -0.9798853993415833, + "logits/rejected": -0.9244794845581055, + "logps/chosen": -366.5437927246094, + "logps/rejected": -537.5667114257812, + "loss": 0.689, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.1814875304698944, + "rewards/margins": 0.1064767986536026, + "rewards/rejected": -0.2879643142223358, + "step": 2370 + }, + { + "epoch": 0.16, + "learning_rate": 4.952926116188977e-06, + "logits/chosen": -1.305229902267456, + "logits/rejected": -1.3175709247589111, + "logps/chosen": -363.5146484375, + "logps/rejected": -447.1356506347656, + "loss": 0.6929, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.18013733625411987, + "rewards/margins": 0.04441644623875618, + "rewards/rejected": -0.22455377876758575, + "step": 2380 + }, + { + "epoch": 0.16, + "learning_rate": 4.951816885134143e-06, + "logits/chosen": -1.3067344427108765, + "logits/rejected": -1.383279800415039, + "logps/chosen": -364.8487243652344, + "logps/rejected": -418.524169921875, + "loss": 0.6909, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.16329577565193176, + "rewards/margins": 0.054536230862140656, + "rewards/rejected": -0.21783199906349182, + "step": 2390 + }, + { + "epoch": 0.16, + "learning_rate": 4.950694864201399e-06, + "logits/chosen": -1.350503921508789, + "logits/rejected": -1.2483956813812256, + "logps/chosen": -370.8079833984375, + "logps/rejected": -446.61199951171875, + "loss": 0.6897, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.13212604820728302, + "rewards/margins": 0.0736747682094574, + "rewards/rejected": -0.20580080151557922, + "step": 2400 + }, + { + "epoch": 0.16, + "eval_logits/chosen": -1.379401683807373, + "eval_logits/rejected": -1.235855221748352, + "eval_logps/chosen": -354.2447509765625, + "eval_logps/rejected": -401.984375, + "eval_loss": 0.6901373863220215, + "eval_rewards/accuracies": 0.6389999985694885, + "eval_rewards/chosen": -0.12223977595567703, + "eval_rewards/margins": 0.06813269108533859, + "eval_rewards/rejected": -0.19037246704101562, + "eval_runtime": 714.0833, + "eval_samples_per_second": 2.801, + "eval_steps_per_second": 1.4, + "step": 2400 + }, + { + "epoch": 0.16, + "learning_rate": 4.9495600592437575e-06, + "logits/chosen": -1.3811991214752197, + "logits/rejected": -1.3576008081436157, + "logps/chosen": -401.98553466796875, + "logps/rejected": -444.0850524902344, + "loss": 0.6912, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.17509104311466217, + "rewards/margins": 0.03891799598932266, + "rewards/rejected": -0.21400907635688782, + "step": 2410 + }, + { + "epoch": 0.16, + "learning_rate": 4.948412476180917e-06, + "logits/chosen": -1.100079894065857, + "logits/rejected": -0.9340648651123047, + "logps/chosen": -325.56146240234375, + "logps/rejected": -389.710693359375, + "loss": 0.6902, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.14148560166358948, + "rewards/margins": 0.086885966360569, + "rewards/rejected": -0.22837157547473907, + "step": 2420 + }, + { + "epoch": 0.16, + "learning_rate": 4.947252120999232e-06, + "logits/chosen": -1.1523898839950562, + "logits/rejected": -0.9136794805526733, + "logps/chosen": -423.8316345214844, + "logps/rejected": -403.52197265625, + "loss": 0.6924, + "rewards/accuracies": 0.42500001192092896, + "rewards/chosen": -0.15672679245471954, + "rewards/margins": 0.03846416622400284, + "rewards/rejected": -0.19519095122814178, + "step": 2430 + }, + { + "epoch": 0.16, + "learning_rate": 4.946078999751683e-06, + "logits/chosen": -1.1010525226593018, + "logits/rejected": -0.948320209980011, + "logps/chosen": -287.8900451660156, + "logps/rejected": -328.5050048828125, + "loss": 0.6904, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.10826291143894196, + "rewards/margins": 0.06217117980122566, + "rewards/rejected": -0.1704341024160385, + "step": 2440 + }, + { + "epoch": 0.16, + "learning_rate": 4.944893118557847e-06, + "logits/chosen": -1.130669355392456, + "logits/rejected": -1.0464719533920288, + "logps/chosen": -333.6479187011719, + "logps/rejected": -340.75628662109375, + "loss": 0.69, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.12716332077980042, + "rewards/margins": 0.060798801481723785, + "rewards/rejected": -0.187962144613266, + "step": 2450 + }, + { + "epoch": 0.16, + "learning_rate": 4.943694483603861e-06, + "logits/chosen": -1.472876787185669, + "logits/rejected": -1.113930344581604, + "logps/chosen": -323.41058349609375, + "logps/rejected": -335.5633850097656, + "loss": 0.6899, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.0974065512418747, + "rewards/margins": 0.06313179433345795, + "rewards/rejected": -0.16053833067417145, + "step": 2460 + }, + { + "epoch": 0.16, + "learning_rate": 4.9424831011423914e-06, + "logits/chosen": -1.4405043125152588, + "logits/rejected": -1.3846049308776855, + "logps/chosen": -403.95758056640625, + "logps/rejected": -391.89056396484375, + "loss": 0.6924, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.11553101241588593, + "rewards/margins": 0.024947669357061386, + "rewards/rejected": -0.1404787003993988, + "step": 2470 + }, + { + "epoch": 0.16, + "learning_rate": 4.9412589774926015e-06, + "logits/chosen": -1.3660147190093994, + "logits/rejected": -1.1039983034133911, + "logps/chosen": -406.1762390136719, + "logps/rejected": -425.1890563964844, + "loss": 0.6907, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.13198652863502502, + "rewards/margins": 0.06963631510734558, + "rewards/rejected": -0.2016228437423706, + "step": 2480 + }, + { + "epoch": 0.16, + "learning_rate": 4.940022119040121e-06, + "logits/chosen": -1.2710121870040894, + "logits/rejected": -1.1066303253173828, + "logps/chosen": -425.4140625, + "logps/rejected": -424.8721618652344, + "loss": 0.6923, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.13349811732769012, + "rewards/margins": 0.027276337146759033, + "rewards/rejected": -0.16077445447444916, + "step": 2490 + }, + { + "epoch": 0.16, + "learning_rate": 4.93877253223701e-06, + "logits/chosen": -1.2585488557815552, + "logits/rejected": -1.220293402671814, + "logps/chosen": -403.06866455078125, + "logps/rejected": -416.2520446777344, + "loss": 0.6921, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.1107964739203453, + "rewards/margins": 0.04475604370236397, + "rewards/rejected": -0.15555252134799957, + "step": 2500 + }, + { + "epoch": 0.16, + "eval_logits/chosen": -1.2731248140335083, + "eval_logits/rejected": -1.1392936706542969, + "eval_logps/chosen": -346.2454833984375, + "eval_logps/rejected": -378.1649169921875, + "eval_loss": 0.6903291940689087, + "eval_rewards/accuracies": 0.6485000252723694, + "eval_rewards/chosen": -0.11424053460359573, + "eval_rewards/margins": 0.05231250822544098, + "eval_rewards/rejected": -0.16655302047729492, + "eval_runtime": 715.4866, + "eval_samples_per_second": 2.795, + "eval_steps_per_second": 1.398, + "step": 2500 + }, + { + "epoch": 0.16, + "learning_rate": 4.937510223601725e-06, + "logits/chosen": -1.5598738193511963, + "logits/rejected": -1.5040004253387451, + "logps/chosen": -357.2165222167969, + "logps/rejected": -335.8756103515625, + "loss": 0.6926, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.09469057619571686, + "rewards/margins": 0.020870482549071312, + "rewards/rejected": -0.11556105315685272, + "step": 2510 + }, + { + "epoch": 0.16, + "learning_rate": 4.936235199719085e-06, + "logits/chosen": -1.27707040309906, + "logits/rejected": -1.1760832071304321, + "logps/chosen": -290.1016845703125, + "logps/rejected": -318.79022216796875, + "loss": 0.6906, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.11779968440532684, + "rewards/margins": 0.05886771157383919, + "rewards/rejected": -0.17666740715503693, + "step": 2520 + }, + { + "epoch": 0.17, + "learning_rate": 4.93494746724024e-06, + "logits/chosen": -1.3201732635498047, + "logits/rejected": -1.1975219249725342, + "logps/chosen": -349.1531677246094, + "logps/rejected": -434.265380859375, + "loss": 0.6897, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.12534867227077484, + "rewards/margins": 0.06765227019786835, + "rewards/rejected": -0.1930009424686432, + "step": 2530 + }, + { + "epoch": 0.17, + "learning_rate": 4.933647032882635e-06, + "logits/chosen": -1.3505040407180786, + "logits/rejected": -1.165984869003296, + "logps/chosen": -397.2581481933594, + "logps/rejected": -416.6473083496094, + "loss": 0.6898, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.1523962914943695, + "rewards/margins": 0.06380870193243027, + "rewards/rejected": -0.21620500087738037, + "step": 2540 + }, + { + "epoch": 0.17, + "learning_rate": 4.932333903429969e-06, + "logits/chosen": -0.8627212643623352, + "logits/rejected": -0.7106344699859619, + "logps/chosen": -363.2044982910156, + "logps/rejected": -337.69976806640625, + "loss": 0.6945, + "rewards/accuracies": 0.42500001192092896, + "rewards/chosen": -0.17012201249599457, + "rewards/margins": 0.00030131227686069906, + "rewards/rejected": -0.17042334377765656, + "step": 2550 + }, + { + "epoch": 0.17, + "learning_rate": 4.931008085732172e-06, + "logits/chosen": -0.9541371464729309, + "logits/rejected": -0.5360308289527893, + "logps/chosen": -377.06036376953125, + "logps/rejected": -384.6097412109375, + "loss": 0.691, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.174237459897995, + "rewards/margins": 0.05755072832107544, + "rewards/rejected": -0.23178818821907043, + "step": 2560 + }, + { + "epoch": 0.17, + "learning_rate": 4.9296695867053565e-06, + "logits/chosen": -0.9381176233291626, + "logits/rejected": -0.6313947439193726, + "logps/chosen": -505.18646240234375, + "logps/rejected": -498.3179626464844, + "loss": 0.6912, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.2045850306749344, + "rewards/margins": 0.059091318398714066, + "rewards/rejected": -0.26367634534835815, + "step": 2570 + }, + { + "epoch": 0.17, + "learning_rate": 4.928318413331791e-06, + "logits/chosen": -0.7713817358016968, + "logits/rejected": -0.7979531288146973, + "logps/chosen": -411.1729431152344, + "logps/rejected": -445.4847106933594, + "loss": 0.6919, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.1999458223581314, + "rewards/margins": 0.0502970889210701, + "rewards/rejected": -0.2502428889274597, + "step": 2580 + }, + { + "epoch": 0.17, + "learning_rate": 4.926954572659855e-06, + "logits/chosen": -0.6779652833938599, + "logits/rejected": -0.5368167161941528, + "logps/chosen": -440.5191955566406, + "logps/rejected": -534.2384033203125, + "loss": 0.6897, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.19832351803779602, + "rewards/margins": 0.07653336226940155, + "rewards/rejected": -0.27485689520835876, + "step": 2590 + }, + { + "epoch": 0.17, + "learning_rate": 4.925578071804013e-06, + "logits/chosen": -0.46047964692115784, + "logits/rejected": -0.48922720551490784, + "logps/chosen": -441.5943908691406, + "logps/rejected": -573.5369262695312, + "loss": 0.6893, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.2118886262178421, + "rewards/margins": 0.07453066110610962, + "rewards/rejected": -0.2864193022251129, + "step": 2600 + }, + { + "epoch": 0.17, + "eval_logits/chosen": -0.7405468225479126, + "eval_logits/rejected": -0.6333872079849243, + "eval_logps/chosen": -444.1413879394531, + "eval_logps/rejected": -493.07635498046875, + "eval_loss": 0.6899484992027283, + "eval_rewards/accuracies": 0.6545000076293945, + "eval_rewards/chosen": -0.2121364325284958, + "eval_rewards/margins": 0.06932804733514786, + "eval_rewards/rejected": -0.28146445751190186, + "eval_runtime": 712.5767, + "eval_samples_per_second": 2.807, + "eval_steps_per_second": 1.403, + "step": 2600 + }, + { + "epoch": 0.17, + "learning_rate": 4.924188917944763e-06, + "logits/chosen": -0.9295086860656738, + "logits/rejected": -0.6540385484695435, + "logps/chosen": -414.5174865722656, + "logps/rejected": -501.05517578125, + "loss": 0.6867, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.19451238214969635, + "rewards/margins": 0.12005816400051117, + "rewards/rejected": -0.3145705461502075, + "step": 2610 + }, + { + "epoch": 0.17, + "learning_rate": 4.922787118328617e-06, + "logits/chosen": -0.9196515083312988, + "logits/rejected": -0.4873427748680115, + "logps/chosen": -459.8988342285156, + "logps/rejected": -429.284912109375, + "loss": 0.6927, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.22988121211528778, + "rewards/margins": 0.04569891467690468, + "rewards/rejected": -0.27558010816574097, + "step": 2620 + }, + { + "epoch": 0.17, + "learning_rate": 4.921372680268045e-06, + "logits/chosen": -0.7027789354324341, + "logits/rejected": -0.7827448844909668, + "logps/chosen": -487.5341796875, + "logps/rejected": -478.0096740722656, + "loss": 0.6937, + "rewards/accuracies": 0.4749999940395355, + "rewards/chosen": -0.25752943754196167, + "rewards/margins": 0.020844275131821632, + "rewards/rejected": -0.27837374806404114, + "step": 2630 + }, + { + "epoch": 0.17, + "learning_rate": 4.919945611141451e-06, + "logits/chosen": -1.108783483505249, + "logits/rejected": -0.8095542788505554, + "logps/chosen": -407.37945556640625, + "logps/rejected": -403.690185546875, + "loss": 0.6892, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.18214483559131622, + "rewards/margins": 0.05483005568385124, + "rewards/rejected": -0.23697488009929657, + "step": 2640 + }, + { + "epoch": 0.17, + "learning_rate": 4.918505918393125e-06, + "logits/chosen": -0.8918964266777039, + "logits/rejected": -0.7974181175231934, + "logps/chosen": -347.4504699707031, + "logps/rejected": -446.9676208496094, + "loss": 0.6887, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.17841976881027222, + "rewards/margins": 0.07614380121231079, + "rewards/rejected": -0.254563570022583, + "step": 2650 + }, + { + "epoch": 0.17, + "learning_rate": 4.91705360953321e-06, + "logits/chosen": -0.8860540390014648, + "logits/rejected": -0.8829668164253235, + "logps/chosen": -486.8497009277344, + "logps/rejected": -531.5885009765625, + "loss": 0.6889, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.24136213958263397, + "rewards/margins": 0.07739405333995819, + "rewards/rejected": -0.31875619292259216, + "step": 2660 + }, + { + "epoch": 0.17, + "learning_rate": 4.9155886921376615e-06, + "logits/chosen": -0.9232280850410461, + "logits/rejected": -0.8939113616943359, + "logps/chosen": -443.81170654296875, + "logps/rejected": -532.4825439453125, + "loss": 0.6931, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.23784330487251282, + "rewards/margins": 0.06700251996517181, + "rewards/rejected": -0.3048458397388458, + "step": 2670 + }, + { + "epoch": 0.18, + "learning_rate": 4.914111173848205e-06, + "logits/chosen": -1.0852937698364258, + "logits/rejected": -1.0277903079986572, + "logps/chosen": -487.4720153808594, + "logps/rejected": -509.03466796875, + "loss": 0.6915, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.25444039702415466, + "rewards/margins": 0.03698063641786575, + "rewards/rejected": -0.2914210259914398, + "step": 2680 + }, + { + "epoch": 0.18, + "learning_rate": 4.9126210623723e-06, + "logits/chosen": -0.9503974914550781, + "logits/rejected": -0.9779285192489624, + "logps/chosen": -425.9971618652344, + "logps/rejected": -529.239990234375, + "loss": 0.6896, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.22562380135059357, + "rewards/margins": 0.06947082281112671, + "rewards/rejected": -0.2950945794582367, + "step": 2690 + }, + { + "epoch": 0.18, + "learning_rate": 4.911118365483098e-06, + "logits/chosen": -1.1807284355163574, + "logits/rejected": -1.0885039567947388, + "logps/chosen": -439.03369140625, + "logps/rejected": -527.4857177734375, + "loss": 0.6898, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.2372794896364212, + "rewards/margins": 0.07902587950229645, + "rewards/rejected": -0.31630533933639526, + "step": 2700 + }, + { + "epoch": 0.18, + "eval_logits/chosen": -1.12832510471344, + "eval_logits/rejected": -0.998919665813446, + "eval_logps/chosen": -451.3296203613281, + "eval_logps/rejected": -502.5095520019531, + "eval_loss": 0.6901895403862, + "eval_rewards/accuracies": 0.6480000019073486, + "eval_rewards/chosen": -0.21932466328144073, + "eval_rewards/margins": 0.07157304137945175, + "eval_rewards/rejected": -0.2908977270126343, + "eval_runtime": 713.232, + "eval_samples_per_second": 2.804, + "eval_steps_per_second": 1.402, + "step": 2700 + }, + { + "epoch": 0.18, + "learning_rate": 4.909603091019403e-06, + "logits/chosen": -1.3059003353118896, + "logits/rejected": -1.127820372581482, + "logps/chosen": -425.28912353515625, + "logps/rejected": -467.7945861816406, + "loss": 0.692, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.18891176581382751, + "rewards/margins": 0.07912580668926239, + "rewards/rejected": -0.2680375576019287, + "step": 2710 + }, + { + "epoch": 0.18, + "learning_rate": 4.908075246885626e-06, + "logits/chosen": -1.0648815631866455, + "logits/rejected": -1.033276915550232, + "logps/chosen": -354.4007263183594, + "logps/rejected": -371.9431457519531, + "loss": 0.6921, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.20367324352264404, + "rewards/margins": 0.04493376240134239, + "rewards/rejected": -0.24860699474811554, + "step": 2720 + }, + { + "epoch": 0.18, + "learning_rate": 4.906534841051755e-06, + "logits/chosen": -0.9984892010688782, + "logits/rejected": -0.9872691035270691, + "logps/chosen": -434.988037109375, + "logps/rejected": -493.0547790527344, + "loss": 0.6901, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.19158455729484558, + "rewards/margins": 0.04703119397163391, + "rewards/rejected": -0.2386157512664795, + "step": 2730 + }, + { + "epoch": 0.18, + "learning_rate": 4.904981881553297e-06, + "logits/chosen": -1.2664055824279785, + "logits/rejected": -1.0330970287322998, + "logps/chosen": -406.26007080078125, + "logps/rejected": -387.2786560058594, + "loss": 0.6909, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.1820637732744217, + "rewards/margins": 0.04513677582144737, + "rewards/rejected": -0.22720055282115936, + "step": 2740 + }, + { + "epoch": 0.18, + "learning_rate": 4.903416376491252e-06, + "logits/chosen": -1.3078968524932861, + "logits/rejected": -1.1394526958465576, + "logps/chosen": -448.9022521972656, + "logps/rejected": -497.947265625, + "loss": 0.6888, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.17061004042625427, + "rewards/margins": 0.08963707834482193, + "rewards/rejected": -0.260247141122818, + "step": 2750 + }, + { + "epoch": 0.18, + "learning_rate": 4.90183833403206e-06, + "logits/chosen": -1.508120059967041, + "logits/rejected": -1.4299445152282715, + "logps/chosen": -431.62591552734375, + "logps/rejected": -478.345458984375, + "loss": 0.6893, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.16741284728050232, + "rewards/margins": 0.08297250419855118, + "rewards/rejected": -0.2503853440284729, + "step": 2760 + }, + { + "epoch": 0.18, + "learning_rate": 4.900247762407564e-06, + "logits/chosen": -1.1569961309432983, + "logits/rejected": -1.1519848108291626, + "logps/chosen": -337.0670471191406, + "logps/rejected": -452.12872314453125, + "loss": 0.6879, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.1647750735282898, + "rewards/margins": 0.09301907569169998, + "rewards/rejected": -0.2577941417694092, + "step": 2770 + }, + { + "epoch": 0.18, + "learning_rate": 4.898644669914965e-06, + "logits/chosen": -1.2003084421157837, + "logits/rejected": -1.125847578048706, + "logps/chosen": -412.99810791015625, + "logps/rejected": -477.783203125, + "loss": 0.6907, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.18892566859722137, + "rewards/margins": 0.07861106842756271, + "rewards/rejected": -0.2675367295742035, + "step": 2780 + }, + { + "epoch": 0.18, + "learning_rate": 4.897029064916778e-06, + "logits/chosen": -0.9239814877510071, + "logits/rejected": -0.8800607919692993, + "logps/chosen": -387.64642333984375, + "logps/rejected": -427.87005615234375, + "loss": 0.6918, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.17949643731117249, + "rewards/margins": 0.05261252075433731, + "rewards/rejected": -0.2321089804172516, + "step": 2790 + }, + { + "epoch": 0.18, + "learning_rate": 4.895400955840791e-06, + "logits/chosen": -1.4370605945587158, + "logits/rejected": -0.869090735912323, + "logps/chosen": -342.58966064453125, + "logps/rejected": -373.0824279785156, + "loss": 0.6905, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.12223835289478302, + "rewards/margins": 0.07219056785106659, + "rewards/rejected": -0.1944289207458496, + "step": 2800 + }, + { + "epoch": 0.18, + "eval_logits/chosen": -1.2887914180755615, + "eval_logits/rejected": -1.1516715288162231, + "eval_logps/chosen": -361.0450134277344, + "eval_logps/rejected": -403.0553894042969, + "eval_loss": 0.6901711225509644, + "eval_rewards/accuracies": 0.6460000276565552, + "eval_rewards/chosen": -0.12904000282287598, + "eval_rewards/margins": 0.06240350008010864, + "eval_rewards/rejected": -0.19144350290298462, + "eval_runtime": 712.0065, + "eval_samples_per_second": 2.809, + "eval_steps_per_second": 1.404, + "step": 2800 + }, + { + "epoch": 0.18, + "learning_rate": 4.893760351180018e-06, + "logits/chosen": -1.2118985652923584, + "logits/rejected": -1.2449872493743896, + "logps/chosen": -342.95660400390625, + "logps/rejected": -396.58294677734375, + "loss": 0.6902, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.14441534876823425, + "rewards/margins": 0.0485253669321537, + "rewards/rejected": -0.19294071197509766, + "step": 2810 + }, + { + "epoch": 0.18, + "learning_rate": 4.892107259492657e-06, + "logits/chosen": -1.1593921184539795, + "logits/rejected": -1.0327198505401611, + "logps/chosen": -364.427001953125, + "logps/rejected": -413.22467041015625, + "loss": 0.6917, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.1245676726102829, + "rewards/margins": 0.04277877137064934, + "rewards/rejected": -0.16734644770622253, + "step": 2820 + }, + { + "epoch": 0.19, + "learning_rate": 4.890441689402042e-06, + "logits/chosen": -1.522952914237976, + "logits/rejected": -1.2996022701263428, + "logps/chosen": -446.1918029785156, + "logps/rejected": -485.85943603515625, + "loss": 0.6898, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.11068687587976456, + "rewards/margins": 0.08766763657331467, + "rewards/rejected": -0.19835449755191803, + "step": 2830 + }, + { + "epoch": 0.19, + "learning_rate": 4.888763649596606e-06, + "logits/chosen": -1.4260321855545044, + "logits/rejected": -1.3132002353668213, + "logps/chosen": -320.7117919921875, + "logps/rejected": -362.90374755859375, + "loss": 0.6906, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.11055009067058563, + "rewards/margins": 0.050993360579013824, + "rewards/rejected": -0.16154345870018005, + "step": 2840 + }, + { + "epoch": 0.19, + "learning_rate": 4.887073148829824e-06, + "logits/chosen": -1.2011626958847046, + "logits/rejected": -1.1460561752319336, + "logps/chosen": -384.56658935546875, + "logps/rejected": -437.68212890625, + "loss": 0.6896, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.11766302585601807, + "rewards/margins": 0.07605113089084625, + "rewards/rejected": -0.1937141716480255, + "step": 2850 + }, + { + "epoch": 0.19, + "learning_rate": 4.885370195920177e-06, + "logits/chosen": -0.9891164898872375, + "logits/rejected": -0.9282897710800171, + "logps/chosen": -330.49169921875, + "logps/rejected": -383.62677001953125, + "loss": 0.6918, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.1451679766178131, + "rewards/margins": 0.06170827895402908, + "rewards/rejected": -0.20687627792358398, + "step": 2860 + }, + { + "epoch": 0.19, + "learning_rate": 4.883654799751101e-06, + "logits/chosen": -1.0076591968536377, + "logits/rejected": -0.9268460273742676, + "logps/chosen": -313.0103759765625, + "logps/rejected": -402.40716552734375, + "loss": 0.6913, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.09730122238397598, + "rewards/margins": 0.05465664714574814, + "rewards/rejected": -0.15195786952972412, + "step": 2870 + }, + { + "epoch": 0.19, + "learning_rate": 4.8819269692709435e-06, + "logits/chosen": -1.1856772899627686, + "logits/rejected": -0.9097310304641724, + "logps/chosen": -355.238525390625, + "logps/rejected": -365.5335693359375, + "loss": 0.6899, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.08819227665662766, + "rewards/margins": 0.08265722543001175, + "rewards/rejected": -0.1708495169878006, + "step": 2880 + }, + { + "epoch": 0.19, + "learning_rate": 4.880186713492915e-06, + "logits/chosen": -1.0931060314178467, + "logits/rejected": -0.9322888255119324, + "logps/chosen": -365.28948974609375, + "logps/rejected": -362.249755859375, + "loss": 0.6904, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.13757213950157166, + "rewards/margins": 0.050582222640514374, + "rewards/rejected": -0.18815436959266663, + "step": 2890 + }, + { + "epoch": 0.19, + "learning_rate": 4.878434041495041e-06, + "logits/chosen": -1.1420027017593384, + "logits/rejected": -1.2647812366485596, + "logps/chosen": -358.3078308105469, + "logps/rejected": -443.99798583984375, + "loss": 0.6888, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.12114207446575165, + "rewards/margins": 0.0838278979063034, + "rewards/rejected": -0.20496997237205505, + "step": 2900 + }, + { + "epoch": 0.19, + "eval_logits/chosen": -0.9719996452331543, + "eval_logits/rejected": -0.8515611886978149, + "eval_logps/chosen": -368.5532531738281, + "eval_logps/rejected": -425.2482604980469, + "eval_loss": 0.6900655627250671, + "eval_rewards/accuracies": 0.6520000100135803, + "eval_rewards/chosen": -0.13654829561710358, + "eval_rewards/margins": 0.07708805054426193, + "eval_rewards/rejected": -0.2136363536119461, + "eval_runtime": 711.4859, + "eval_samples_per_second": 2.811, + "eval_steps_per_second": 1.406, + "step": 2900 + }, + { + "epoch": 0.19, + "learning_rate": 4.876668962420117e-06, + "logits/chosen": -0.9826984405517578, + "logits/rejected": -0.8262590169906616, + "logps/chosen": -399.0771484375, + "logps/rejected": -415.10009765625, + "loss": 0.6914, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.1094260960817337, + "rewards/margins": 0.07563906162977219, + "rewards/rejected": -0.1850651651620865, + "step": 2910 + }, + { + "epoch": 0.19, + "learning_rate": 4.87489148547566e-06, + "logits/chosen": -1.1586592197418213, + "logits/rejected": -1.0520836114883423, + "logps/chosen": -397.3544616699219, + "logps/rejected": -410.04541015625, + "loss": 0.6926, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.1374959796667099, + "rewards/margins": 0.04586387053132057, + "rewards/rejected": -0.18335983157157898, + "step": 2920 + }, + { + "epoch": 0.19, + "learning_rate": 4.873101619933862e-06, + "logits/chosen": -1.4020602703094482, + "logits/rejected": -1.0666966438293457, + "logps/chosen": -361.32073974609375, + "logps/rejected": -391.4079284667969, + "loss": 0.689, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.09679357707500458, + "rewards/margins": 0.07940280437469482, + "rewards/rejected": -0.1761963665485382, + "step": 2930 + }, + { + "epoch": 0.19, + "learning_rate": 4.8712993751315385e-06, + "logits/chosen": -1.1208691596984863, + "logits/rejected": -1.032873272895813, + "logps/chosen": -208.27444458007812, + "logps/rejected": -259.03436279296875, + "loss": 0.691, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.08761651068925858, + "rewards/margins": 0.0468580424785614, + "rewards/rejected": -0.13447454571723938, + "step": 2940 + }, + { + "epoch": 0.19, + "learning_rate": 4.869484760470079e-06, + "logits/chosen": -1.0466829538345337, + "logits/rejected": -0.8319103121757507, + "logps/chosen": -310.87298583984375, + "logps/rejected": -347.3447265625, + "loss": 0.6885, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.12221217155456543, + "rewards/margins": 0.0727708712220192, + "rewards/rejected": -0.19498305022716522, + "step": 2950 + }, + { + "epoch": 0.19, + "learning_rate": 4.867657785415404e-06, + "logits/chosen": -0.8036662936210632, + "logits/rejected": -0.6130042672157288, + "logps/chosen": -431.5562438964844, + "logps/rejected": -459.91583251953125, + "loss": 0.6894, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.18245726823806763, + "rewards/margins": 0.0701041966676712, + "rewards/rejected": -0.25256145000457764, + "step": 2960 + }, + { + "epoch": 0.19, + "learning_rate": 4.865818459497911e-06, + "logits/chosen": -0.666034996509552, + "logits/rejected": -0.7290282249450684, + "logps/chosen": -470.75860595703125, + "logps/rejected": -461.13311767578125, + "loss": 0.6888, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.18657991290092468, + "rewards/margins": 0.07517173141241074, + "rewards/rejected": -0.261751651763916, + "step": 2970 + }, + { + "epoch": 0.19, + "learning_rate": 4.863966792312423e-06, + "logits/chosen": -0.9364269375801086, + "logits/rejected": -0.42835959792137146, + "logps/chosen": -459.9546813964844, + "logps/rejected": -544.4393920898438, + "loss": 0.6891, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.21849580109119415, + "rewards/margins": 0.12553612887859344, + "rewards/rejected": -0.34403195977211, + "step": 2980 + }, + { + "epoch": 0.2, + "learning_rate": 4.862102793518145e-06, + "logits/chosen": -0.8742693662643433, + "logits/rejected": -0.9743921160697937, + "logps/chosen": -412.43212890625, + "logps/rejected": -491.0814514160156, + "loss": 0.6886, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.2181502878665924, + "rewards/margins": 0.0717243179678917, + "rewards/rejected": -0.2898745834827423, + "step": 2990 + }, + { + "epoch": 0.2, + "learning_rate": 4.8602264728386075e-06, + "logits/chosen": -1.0399912595748901, + "logits/rejected": -0.9057199358940125, + "logps/chosen": -414.02679443359375, + "logps/rejected": -484.69976806640625, + "loss": 0.6906, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.16152727603912354, + "rewards/margins": 0.07409163564443588, + "rewards/rejected": -0.23561891913414001, + "step": 3000 + }, + { + "epoch": 0.2, + "eval_logits/chosen": -0.8704602122306824, + "eval_logits/rejected": -0.7523858547210693, + "eval_logps/chosen": -415.2958984375, + "eval_logps/rejected": -477.5717468261719, + "eval_loss": 0.6900380849838257, + "eval_rewards/accuracies": 0.6449999809265137, + "eval_rewards/chosen": -0.1832909733057022, + "eval_rewards/margins": 0.08266889303922653, + "eval_rewards/rejected": -0.26595985889434814, + "eval_runtime": 711.8755, + "eval_samples_per_second": 2.809, + "eval_steps_per_second": 1.405, + "step": 3000 + }, + { + "epoch": 0.2, + "learning_rate": 4.858337840061616e-06, + "logits/chosen": -0.7448334693908691, + "logits/rejected": -0.8301981687545776, + "logps/chosen": -357.12591552734375, + "logps/rejected": -477.7318420410156, + "loss": 0.6911, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.174045592546463, + "rewards/margins": 0.06695671379566193, + "rewards/rejected": -0.24100229144096375, + "step": 3010 + }, + { + "epoch": 0.2, + "learning_rate": 4.856436905039208e-06, + "logits/chosen": -1.0125486850738525, + "logits/rejected": -0.8003193140029907, + "logps/chosen": -378.47552490234375, + "logps/rejected": -423.501220703125, + "loss": 0.6892, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.1672704517841339, + "rewards/margins": 0.07941305637359619, + "rewards/rejected": -0.2466835230588913, + "step": 3020 + }, + { + "epoch": 0.2, + "learning_rate": 4.854523677687588e-06, + "logits/chosen": -0.8735455274581909, + "logits/rejected": -1.0956079959869385, + "logps/chosen": -328.8739318847656, + "logps/rejected": -404.0391540527344, + "loss": 0.6891, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.14762839674949646, + "rewards/margins": 0.05676870793104172, + "rewards/rejected": -0.2043970823287964, + "step": 3030 + }, + { + "epoch": 0.2, + "learning_rate": 4.85259816798709e-06, + "logits/chosen": -1.2449532747268677, + "logits/rejected": -0.9089319109916687, + "logps/chosen": -387.965576171875, + "logps/rejected": -398.33770751953125, + "loss": 0.6902, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.10020353645086288, + "rewards/margins": 0.09092805534601212, + "rewards/rejected": -0.191131591796875, + "step": 3040 + }, + { + "epoch": 0.2, + "learning_rate": 4.850660385982114e-06, + "logits/chosen": -1.067596197128296, + "logits/rejected": -0.9061563611030579, + "logps/chosen": -339.7103576660156, + "logps/rejected": -340.0997009277344, + "loss": 0.6885, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.09199124574661255, + "rewards/margins": 0.055670641362667084, + "rewards/rejected": -0.14766189455986023, + "step": 3050 + }, + { + "epoch": 0.2, + "learning_rate": 4.848710341781081e-06, + "logits/chosen": -0.37353700399398804, + "logits/rejected": -0.4667787551879883, + "logps/chosen": -460.0509338378906, + "logps/rejected": -516.2349853515625, + "loss": 0.6922, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.28614938259124756, + "rewards/margins": 0.06191081553697586, + "rewards/rejected": -0.3480601906776428, + "step": 3060 + }, + { + "epoch": 0.2, + "learning_rate": 4.846748045556377e-06, + "logits/chosen": -0.03157268464565277, + "logits/rejected": 0.024191658943891525, + "logps/chosen": -531.4178466796875, + "logps/rejected": -538.5341186523438, + "loss": 0.6913, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.29193076491355896, + "rewards/margins": 0.06762724369764328, + "rewards/rejected": -0.35955798625946045, + "step": 3070 + }, + { + "epoch": 0.2, + "learning_rate": 4.8447735075442995e-06, + "logits/chosen": -0.2942689061164856, + "logits/rejected": 0.010750794783234596, + "logps/chosen": -462.2196350097656, + "logps/rejected": -552.3308715820312, + "loss": 0.6905, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.2646574079990387, + "rewards/margins": 0.09351170063018799, + "rewards/rejected": -0.3581691086292267, + "step": 3080 + }, + { + "epoch": 0.2, + "learning_rate": 4.8427867380450075e-06, + "logits/chosen": -0.5210511684417725, + "logits/rejected": -0.16920626163482666, + "logps/chosen": -449.54473876953125, + "logps/rejected": -469.4061584472656, + "loss": 0.6892, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.21920542418956757, + "rewards/margins": 0.06611128151416779, + "rewards/rejected": -0.28531667590141296, + "step": 3090 + }, + { + "epoch": 0.2, + "learning_rate": 4.840787747422462e-06, + "logits/chosen": -0.5311521291732788, + "logits/rejected": -0.4888841211795807, + "logps/chosen": -417.81982421875, + "logps/rejected": -453.7882385253906, + "loss": 0.6921, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.22216370701789856, + "rewards/margins": 0.06622599065303802, + "rewards/rejected": -0.2883896827697754, + "step": 3100 + }, + { + "epoch": 0.2, + "eval_logits/chosen": -0.49691787362098694, + "eval_logits/rejected": -0.4020865261554718, + "eval_logps/chosen": -462.8433837890625, + "eval_logps/rejected": -519.7990112304688, + "eval_loss": 0.6900349855422974, + "eval_rewards/accuracies": 0.6549999713897705, + "eval_rewards/chosen": -0.23083838820457458, + "eval_rewards/margins": 0.0773487463593483, + "eval_rewards/rejected": -0.3081871569156647, + "eval_runtime": 714.5458, + "eval_samples_per_second": 2.799, + "eval_steps_per_second": 1.399, + "step": 3100 + }, + { + "epoch": 0.2, + "learning_rate": 4.838776546104378e-06, + "logits/chosen": -0.7088804841041565, + "logits/rejected": -0.4187033772468567, + "logps/chosen": -511.6211853027344, + "logps/rejected": -560.3056640625, + "loss": 0.6886, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.23066799342632294, + "rewards/margins": 0.0890863910317421, + "rewards/rejected": -0.31975439190864563, + "step": 3110 + }, + { + "epoch": 0.2, + "learning_rate": 4.836753144582168e-06, + "logits/chosen": -0.5761700868606567, + "logits/rejected": -0.046261269599199295, + "logps/chosen": -481.9817810058594, + "logps/rejected": -563.0371704101562, + "loss": 0.6887, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.2397502362728119, + "rewards/margins": 0.10516796261072159, + "rewards/rejected": -0.34491822123527527, + "step": 3120 + }, + { + "epoch": 0.2, + "learning_rate": 4.834717553410884e-06, + "logits/chosen": -0.7033271789550781, + "logits/rejected": -0.7667158842086792, + "logps/chosen": -377.99761962890625, + "logps/rejected": -490.375, + "loss": 0.6898, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.1887216866016388, + "rewards/margins": 0.0987081527709961, + "rewards/rejected": -0.2874298691749573, + "step": 3130 + }, + { + "epoch": 0.21, + "learning_rate": 4.832669783209167e-06, + "logits/chosen": -0.44873374700546265, + "logits/rejected": -0.6905600428581238, + "logps/chosen": -442.02532958984375, + "logps/rejected": -466.65301513671875, + "loss": 0.693, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.19759933650493622, + "rewards/margins": 0.02407877705991268, + "rewards/rejected": -0.22167813777923584, + "step": 3140 + }, + { + "epoch": 0.21, + "learning_rate": 4.8306098446591895e-06, + "logits/chosen": -0.1389220654964447, + "logits/rejected": -0.14139506220817566, + "logps/chosen": -340.37078857421875, + "logps/rejected": -415.58685302734375, + "loss": 0.6906, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.16410192847251892, + "rewards/margins": 0.05028475075960159, + "rewards/rejected": -0.21438665688037872, + "step": 3150 + }, + { + "epoch": 0.21, + "learning_rate": 4.828537748506601e-06, + "logits/chosen": -1.0547568798065186, + "logits/rejected": -0.8391033411026001, + "logps/chosen": -417.04150390625, + "logps/rejected": -402.0226745605469, + "loss": 0.6925, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.1436433047056198, + "rewards/margins": 0.041227348148822784, + "rewards/rejected": -0.184870645403862, + "step": 3160 + }, + { + "epoch": 0.21, + "learning_rate": 4.826453505560469e-06, + "logits/chosen": -0.6873368620872498, + "logits/rejected": -0.6330714225769043, + "logps/chosen": -317.03424072265625, + "logps/rejected": -344.03424072265625, + "loss": 0.6913, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.12605510652065277, + "rewards/margins": 0.040696583688259125, + "rewards/rejected": -0.1667516976594925, + "step": 3170 + }, + { + "epoch": 0.21, + "learning_rate": 4.824357126693226e-06, + "logits/chosen": -0.6209930777549744, + "logits/rejected": -0.6906192898750305, + "logps/chosen": -366.21697998046875, + "logps/rejected": -356.8144836425781, + "loss": 0.6915, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.10747381299734116, + "rewards/margins": 0.039324913173913956, + "rewards/rejected": -0.1467987298965454, + "step": 3180 + }, + { + "epoch": 0.21, + "learning_rate": 4.8222486228406105e-06, + "logits/chosen": -1.205514669418335, + "logits/rejected": -0.9653299450874329, + "logps/chosen": -301.0349426269531, + "logps/rejected": -328.4139404296875, + "loss": 0.6907, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.0897945761680603, + "rewards/margins": 0.062060046941041946, + "rewards/rejected": -0.15185460448265076, + "step": 3190 + }, + { + "epoch": 0.21, + "learning_rate": 4.820128005001612e-06, + "logits/chosen": -0.8773876428604126, + "logits/rejected": -0.741000771522522, + "logps/chosen": -280.5494384765625, + "logps/rejected": -370.9437561035156, + "loss": 0.6867, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.06155586987733841, + "rewards/margins": 0.11193714290857315, + "rewards/rejected": -0.17349299788475037, + "step": 3200 + }, + { + "epoch": 0.21, + "eval_logits/chosen": -1.090376615524292, + "eval_logits/rejected": -0.9624568819999695, + "eval_logps/chosen": -319.77587890625, + "eval_logps/rejected": -371.4648132324219, + "eval_loss": 0.6899227499961853, + "eval_rewards/accuracies": 0.6620000004768372, + "eval_rewards/chosen": -0.08777090907096863, + "eval_rewards/margins": 0.07208200544118881, + "eval_rewards/rejected": -0.15985292196273804, + "eval_runtime": 711.7487, + "eval_samples_per_second": 2.81, + "eval_steps_per_second": 1.405, + "step": 3200 + }, + { + "epoch": 0.21, + "learning_rate": 4.817995284238412e-06, + "logits/chosen": -1.0149776935577393, + "logits/rejected": -1.03653883934021, + "logps/chosen": -287.68914794921875, + "logps/rejected": -399.0724182128906, + "loss": 0.689, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.09116478264331818, + "rewards/margins": 0.08389847725629807, + "rewards/rejected": -0.17506323754787445, + "step": 3210 + }, + { + "epoch": 0.21, + "learning_rate": 4.815850471676327e-06, + "logits/chosen": -1.289499044418335, + "logits/rejected": -0.8683086633682251, + "logps/chosen": -321.6259460449219, + "logps/rejected": -422.97540283203125, + "loss": 0.6879, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.08321196585893631, + "rewards/margins": 0.10559669882059097, + "rewards/rejected": -0.1888086497783661, + "step": 3220 + }, + { + "epoch": 0.21, + "learning_rate": 4.813693578503751e-06, + "logits/chosen": -0.9852520823478699, + "logits/rejected": -0.8063098192214966, + "logps/chosen": -381.07916259765625, + "logps/rejected": -408.11138916015625, + "loss": 0.6905, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.08457235991954803, + "rewards/margins": 0.08248989284038544, + "rewards/rejected": -0.16706225275993347, + "step": 3230 + }, + { + "epoch": 0.21, + "learning_rate": 4.811524615972093e-06, + "logits/chosen": -0.9069948196411133, + "logits/rejected": -1.0173349380493164, + "logps/chosen": -335.2598571777344, + "logps/rejected": -427.4632873535156, + "loss": 0.6888, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.10532426834106445, + "rewards/margins": 0.08400104939937592, + "rewards/rejected": -0.18932530283927917, + "step": 3240 + }, + { + "epoch": 0.21, + "learning_rate": 4.809343595395724e-06, + "logits/chosen": -1.669086217880249, + "logits/rejected": -1.3308725357055664, + "logps/chosen": -278.1128845214844, + "logps/rejected": -286.84002685546875, + "loss": 0.6909, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.08905723690986633, + "rewards/margins": 0.039315879344940186, + "rewards/rejected": -0.12837311625480652, + "step": 3250 + }, + { + "epoch": 0.21, + "learning_rate": 4.807150528151918e-06, + "logits/chosen": -1.1300930976867676, + "logits/rejected": -1.0958597660064697, + "logps/chosen": -238.3439178466797, + "logps/rejected": -335.4567565917969, + "loss": 0.6892, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.07165135443210602, + "rewards/margins": 0.07924596965312958, + "rewards/rejected": -0.1508973240852356, + "step": 3260 + }, + { + "epoch": 0.21, + "learning_rate": 4.804945425680787e-06, + "logits/chosen": -1.1141166687011719, + "logits/rejected": -1.0306416749954224, + "logps/chosen": -273.11968994140625, + "logps/rejected": -295.0377502441406, + "loss": 0.6921, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.08467648923397064, + "rewards/margins": 0.04199628531932831, + "rewards/rejected": -0.12667277455329895, + "step": 3270 + }, + { + "epoch": 0.21, + "learning_rate": 4.802728299485225e-06, + "logits/chosen": -0.7322720289230347, + "logits/rejected": -0.6539539098739624, + "logps/chosen": -266.72052001953125, + "logps/rejected": -341.3778381347656, + "loss": 0.6899, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.11611612141132355, + "rewards/margins": 0.053125642240047455, + "rewards/rejected": -0.1692417562007904, + "step": 3280 + }, + { + "epoch": 0.22, + "learning_rate": 4.8004991611308495e-06, + "logits/chosen": -0.854507565498352, + "logits/rejected": -0.7687502503395081, + "logps/chosen": -337.65826416015625, + "logps/rejected": -403.5023193359375, + "loss": 0.6889, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.09610452502965927, + "rewards/margins": 0.08189422637224197, + "rewards/rejected": -0.17799875140190125, + "step": 3290 + }, + { + "epoch": 0.22, + "learning_rate": 4.798258022245937e-06, + "logits/chosen": -0.7789617776870728, + "logits/rejected": -0.6801769733428955, + "logps/chosen": -334.1788024902344, + "logps/rejected": -364.3035888671875, + "loss": 0.691, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.11657233536243439, + "rewards/margins": 0.06858987361192703, + "rewards/rejected": -0.18516221642494202, + "step": 3300 + }, + { + "epoch": 0.22, + "eval_logits/chosen": -0.7451701164245605, + "eval_logits/rejected": -0.6384189128875732, + "eval_logps/chosen": -352.8772888183594, + "eval_logps/rejected": -411.245361328125, + "eval_loss": 0.6898645162582397, + "eval_rewards/accuracies": 0.6470000147819519, + "eval_rewards/chosen": -0.12087231874465942, + "eval_rewards/margins": 0.07876119017601013, + "eval_rewards/rejected": -0.19963350892066956, + "eval_runtime": 711.1738, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 1.406, + "step": 3300 + }, + { + "epoch": 0.22, + "learning_rate": 4.796004894521365e-06, + "logits/chosen": -0.9636430740356445, + "logits/rejected": -0.5158362984657288, + "logps/chosen": -345.48443603515625, + "logps/rejected": -448.40533447265625, + "loss": 0.6899, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.11691470444202423, + "rewards/margins": 0.08166440576314926, + "rewards/rejected": -0.1985791027545929, + "step": 3310 + }, + { + "epoch": 0.22, + "learning_rate": 4.7937397897105545e-06, + "logits/chosen": -0.7896434664726257, + "logits/rejected": -0.6752771139144897, + "logps/chosen": -333.14385986328125, + "logps/rejected": -341.8313293457031, + "loss": 0.6926, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.1279110163450241, + "rewards/margins": 0.03390089422464371, + "rewards/rejected": -0.1618119180202484, + "step": 3320 + }, + { + "epoch": 0.22, + "learning_rate": 4.791462719629399e-06, + "logits/chosen": -0.6939797401428223, + "logits/rejected": -0.6913006901741028, + "logps/chosen": -299.4644470214844, + "logps/rejected": -361.3075256347656, + "loss": 0.6889, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.11373928934335709, + "rewards/margins": 0.08425451815128326, + "rewards/rejected": -0.19799381494522095, + "step": 3330 + }, + { + "epoch": 0.22, + "learning_rate": 4.789173696156212e-06, + "logits/chosen": -1.0129543542861938, + "logits/rejected": -0.723541259765625, + "logps/chosen": -400.4210510253906, + "logps/rejected": -522.765869140625, + "loss": 0.687, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12398044764995575, + "rewards/margins": 0.14099428057670593, + "rewards/rejected": -0.26497477293014526, + "step": 3340 + }, + { + "epoch": 0.22, + "learning_rate": 4.786872731231662e-06, + "logits/chosen": -1.2101221084594727, + "logits/rejected": -1.0943197011947632, + "logps/chosen": -357.49139404296875, + "logps/rejected": -421.72308349609375, + "loss": 0.6911, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.14060047268867493, + "rewards/margins": 0.07560074329376221, + "rewards/rejected": -0.21620123088359833, + "step": 3350 + }, + { + "epoch": 0.22, + "learning_rate": 4.784559836858709e-06, + "logits/chosen": -0.8804407119750977, + "logits/rejected": -0.7715723514556885, + "logps/chosen": -346.12615966796875, + "logps/rejected": -393.340087890625, + "loss": 0.6904, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.11220908164978027, + "rewards/margins": 0.07763482630252838, + "rewards/rejected": -0.18984392285346985, + "step": 3360 + }, + { + "epoch": 0.22, + "learning_rate": 4.782235025102542e-06, + "logits/chosen": -0.9902753829956055, + "logits/rejected": -0.9379558563232422, + "logps/chosen": -355.44134521484375, + "logps/rejected": -412.8868103027344, + "loss": 0.6902, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.1291220337152481, + "rewards/margins": 0.07492605596780777, + "rewards/rejected": -0.20404811203479767, + "step": 3370 + }, + { + "epoch": 0.22, + "learning_rate": 4.779898308090519e-06, + "logits/chosen": -1.0589594841003418, + "logits/rejected": -0.8376196622848511, + "logps/chosen": -397.018310546875, + "logps/rejected": -441.5438537597656, + "loss": 0.6879, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.12234105169773102, + "rewards/margins": 0.08135628700256348, + "rewards/rejected": -0.2036973237991333, + "step": 3380 + }, + { + "epoch": 0.22, + "learning_rate": 4.777549698012101e-06, + "logits/chosen": -0.8556244969367981, + "logits/rejected": -0.8381510972976685, + "logps/chosen": -415.40826416015625, + "logps/rejected": -501.6957092285156, + "loss": 0.6898, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.17219164967536926, + "rewards/margins": 0.10544709861278534, + "rewards/rejected": -0.2776387333869934, + "step": 3390 + }, + { + "epoch": 0.22, + "learning_rate": 4.775189207118787e-06, + "logits/chosen": -0.8066427111625671, + "logits/rejected": -0.7407088875770569, + "logps/chosen": -424.0018615722656, + "logps/rejected": -482.9774475097656, + "loss": 0.6903, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.15218417346477509, + "rewards/margins": 0.08131326735019684, + "rewards/rejected": -0.2334974706172943, + "step": 3400 + }, + { + "epoch": 0.22, + "eval_logits/chosen": -0.9076970219612122, + "eval_logits/rejected": -0.7890629768371582, + "eval_logps/chosen": -387.86065673828125, + "eval_logps/rejected": -448.2270812988281, + "eval_loss": 0.6898848414421082, + "eval_rewards/accuracies": 0.652999997138977, + "eval_rewards/chosen": -0.1558557152748108, + "eval_rewards/margins": 0.08075951784849167, + "eval_rewards/rejected": -0.23661524057388306, + "eval_runtime": 710.9356, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 1.407, + "step": 3400 + }, + { + "epoch": 0.22, + "learning_rate": 4.772816847724054e-06, + "logits/chosen": -0.9111618995666504, + "logits/rejected": -1.0753004550933838, + "logps/chosen": -363.90765380859375, + "logps/rejected": -413.2244567871094, + "loss": 0.6912, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.14135311543941498, + "rewards/margins": 0.04981910064816475, + "rewards/rejected": -0.19117221236228943, + "step": 3410 + }, + { + "epoch": 0.22, + "learning_rate": 4.770432632203294e-06, + "logits/chosen": -0.5220621824264526, + "logits/rejected": -0.48986703157424927, + "logps/chosen": -373.3628845214844, + "logps/rejected": -369.2757873535156, + "loss": 0.6916, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.12971577048301697, + "rewards/margins": 0.046231161803007126, + "rewards/rejected": -0.17594695091247559, + "step": 3420 + }, + { + "epoch": 0.22, + "learning_rate": 4.768036572993738e-06, + "logits/chosen": -0.9792502522468567, + "logits/rejected": -0.7328594923019409, + "logps/chosen": -421.8783264160156, + "logps/rejected": -474.849609375, + "loss": 0.6893, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.14185933768749237, + "rewards/margins": 0.06857124716043472, + "rewards/rejected": -0.2104305773973465, + "step": 3430 + }, + { + "epoch": 0.23, + "learning_rate": 4.765628682594409e-06, + "logits/chosen": -0.96644127368927, + "logits/rejected": -0.868308424949646, + "logps/chosen": -350.9528503417969, + "logps/rejected": -403.69879150390625, + "loss": 0.6884, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.10719581693410873, + "rewards/margins": 0.07664835453033447, + "rewards/rejected": -0.1838441640138626, + "step": 3440 + }, + { + "epoch": 0.23, + "learning_rate": 4.763208973566041e-06, + "logits/chosen": -0.9829349517822266, + "logits/rejected": -0.7497692704200745, + "logps/chosen": -293.51007080078125, + "logps/rejected": -391.0221252441406, + "loss": 0.69, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.10938040167093277, + "rewards/margins": 0.07879676669836044, + "rewards/rejected": -0.1881771832704544, + "step": 3450 + }, + { + "epoch": 0.23, + "learning_rate": 4.76077745853102e-06, + "logits/chosen": -1.1481579542160034, + "logits/rejected": -1.1825287342071533, + "logps/chosen": -374.2550048828125, + "logps/rejected": -447.8692932128906, + "loss": 0.6915, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.12420084327459335, + "rewards/margins": 0.0724891722202301, + "rewards/rejected": -0.19669003784656525, + "step": 3460 + }, + { + "epoch": 0.23, + "learning_rate": 4.758334150173322e-06, + "logits/chosen": -0.9127210378646851, + "logits/rejected": -0.8635384440422058, + "logps/chosen": -348.4943542480469, + "logps/rejected": -384.9615173339844, + "loss": 0.6919, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.08541040867567062, + "rewards/margins": 0.061520766466856, + "rewards/rejected": -0.14693120121955872, + "step": 3470 + }, + { + "epoch": 0.23, + "learning_rate": 4.755879061238439e-06, + "logits/chosen": -1.1692310571670532, + "logits/rejected": -0.9778891801834106, + "logps/chosen": -360.33612060546875, + "logps/rejected": -395.7692565917969, + "loss": 0.6908, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.1041627898812294, + "rewards/margins": 0.04807025566697121, + "rewards/rejected": -0.1522330492734909, + "step": 3480 + }, + { + "epoch": 0.23, + "learning_rate": 4.753412204533317e-06, + "logits/chosen": -1.30173921585083, + "logits/rejected": -0.767593502998352, + "logps/chosen": -347.20074462890625, + "logps/rejected": -391.4964904785156, + "loss": 0.6887, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.08434270322322845, + "rewards/margins": 0.08968711644411087, + "rewards/rejected": -0.17402981221675873, + "step": 3490 + }, + { + "epoch": 0.23, + "learning_rate": 4.750933592926292e-06, + "logits/chosen": -1.0767269134521484, + "logits/rejected": -0.7587383389472961, + "logps/chosen": -338.02410888671875, + "logps/rejected": -391.2127990722656, + "loss": 0.6899, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.11929644644260406, + "rewards/margins": 0.0804051011800766, + "rewards/rejected": -0.19970154762268066, + "step": 3500 + }, + { + "epoch": 0.23, + "eval_logits/chosen": -0.7943944334983826, + "eval_logits/rejected": -0.6854715943336487, + "eval_logps/chosen": -357.3798828125, + "eval_logps/rejected": -413.9255676269531, + "eval_loss": 0.6898289918899536, + "eval_rewards/accuracies": 0.6514999866485596, + "eval_rewards/chosen": -0.125374898314476, + "eval_rewards/margins": 0.07693876326084137, + "eval_rewards/rejected": -0.20231369137763977, + "eval_runtime": 711.4904, + "eval_samples_per_second": 2.811, + "eval_steps_per_second": 1.406, + "step": 3500 + }, + { + "epoch": 0.23, + "learning_rate": 4.7484432393470124e-06, + "logits/chosen": -0.9757513999938965, + "logits/rejected": -0.49566301703453064, + "logps/chosen": -328.42205810546875, + "logps/rejected": -394.03460693359375, + "loss": 0.6838, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.1263401061296463, + "rewards/margins": 0.12100942432880402, + "rewards/rejected": -0.2473495453596115, + "step": 3510 + }, + { + "epoch": 0.23, + "learning_rate": 4.745941156786385e-06, + "logits/chosen": -0.3304889500141144, + "logits/rejected": -0.5262193083763123, + "logps/chosen": -322.601806640625, + "logps/rejected": -498.60833740234375, + "loss": 0.6845, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.1695626974105835, + "rewards/margins": 0.1451653391122818, + "rewards/rejected": -0.3147280812263489, + "step": 3520 + }, + { + "epoch": 0.23, + "learning_rate": 4.743427358296497e-06, + "logits/chosen": -0.7082148790359497, + "logits/rejected": -0.5598667860031128, + "logps/chosen": -357.0320739746094, + "logps/rejected": -537.770263671875, + "loss": 0.686, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.16732384264469147, + "rewards/margins": 0.16943085193634033, + "rewards/rejected": -0.3367546796798706, + "step": 3530 + }, + { + "epoch": 0.23, + "learning_rate": 4.740901856990553e-06, + "logits/chosen": -0.903288722038269, + "logits/rejected": -0.7495170831680298, + "logps/chosen": -363.7324523925781, + "logps/rejected": -382.14910888671875, + "loss": 0.6909, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.10704050213098526, + "rewards/margins": 0.062489282339811325, + "rewards/rejected": -0.16952979564666748, + "step": 3540 + }, + { + "epoch": 0.23, + "learning_rate": 4.738364666042804e-06, + "logits/chosen": -1.2949492931365967, + "logits/rejected": -0.9672085642814636, + "logps/chosen": -367.40777587890625, + "logps/rejected": -361.1066589355469, + "loss": 0.6916, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.07566282153129578, + "rewards/margins": 0.05733874440193176, + "rewards/rejected": -0.13300158083438873, + "step": 3550 + }, + { + "epoch": 0.23, + "learning_rate": 4.735815798688483e-06, + "logits/chosen": -1.204940676689148, + "logits/rejected": -0.9728586077690125, + "logps/chosen": -284.4416809082031, + "logps/rejected": -391.280029296875, + "loss": 0.6874, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.0861096978187561, + "rewards/margins": 0.0866992324590683, + "rewards/rejected": -0.1728089153766632, + "step": 3560 + }, + { + "epoch": 0.23, + "learning_rate": 4.7332552682237285e-06, + "logits/chosen": -0.8906214833259583, + "logits/rejected": -0.621708869934082, + "logps/chosen": -277.2056579589844, + "logps/rejected": -358.495849609375, + "loss": 0.6884, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.10344570875167847, + "rewards/margins": 0.09706269949674606, + "rewards/rejected": -0.20050843060016632, + "step": 3570 + }, + { + "epoch": 0.23, + "learning_rate": 4.7306830880055234e-06, + "logits/chosen": -0.8558648228645325, + "logits/rejected": -0.8252252340316772, + "logps/chosen": -372.6454162597656, + "logps/rejected": -463.3543395996094, + "loss": 0.6895, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.18191158771514893, + "rewards/margins": 0.08237184584140778, + "rewards/rejected": -0.2642834186553955, + "step": 3580 + }, + { + "epoch": 0.23, + "learning_rate": 4.728099271451619e-06, + "logits/chosen": -0.7084445953369141, + "logits/rejected": -0.8211199045181274, + "logps/chosen": -350.562744140625, + "logps/rejected": -437.9385681152344, + "loss": 0.6888, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.15955469012260437, + "rewards/margins": 0.09432835131883621, + "rewards/rejected": -0.2538830637931824, + "step": 3590 + }, + { + "epoch": 0.24, + "learning_rate": 4.725503832040466e-06, + "logits/chosen": -0.5046578049659729, + "logits/rejected": -0.22391347587108612, + "logps/chosen": -291.08856201171875, + "logps/rejected": -397.1253967285156, + "loss": 0.6911, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.1419307142496109, + "rewards/margins": 0.07994942367076874, + "rewards/rejected": -0.22188015282154083, + "step": 3600 + }, + { + "epoch": 0.24, + "eval_logits/chosen": -0.6121751666069031, + "eval_logits/rejected": -0.513374924659729, + "eval_logps/chosen": -367.09832763671875, + "eval_logps/rejected": -434.4856872558594, + "eval_loss": 0.6898602843284607, + "eval_rewards/accuracies": 0.6614999771118164, + "eval_rewards/chosen": -0.1350933313369751, + "eval_rewards/margins": 0.0877804234623909, + "eval_rewards/rejected": -0.2228737622499466, + "eval_runtime": 712.2305, + "eval_samples_per_second": 2.808, + "eval_steps_per_second": 1.404, + "step": 3600 + }, + { + "epoch": 0.24, + "learning_rate": 4.722896783311152e-06, + "logits/chosen": -0.7387981414794922, + "logits/rejected": -0.5789721608161926, + "logps/chosen": -409.89227294921875, + "logps/rejected": -529.3055419921875, + "loss": 0.6903, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.1508433073759079, + "rewards/margins": 0.06827931106090546, + "rewards/rejected": -0.21912261843681335, + "step": 3610 + }, + { + "epoch": 0.24, + "learning_rate": 4.720278138863318e-06, + "logits/chosen": -0.7191423177719116, + "logits/rejected": -0.8290618658065796, + "logps/chosen": -327.733642578125, + "logps/rejected": -356.4250793457031, + "loss": 0.692, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -0.13761194050312042, + "rewards/margins": 0.06049853563308716, + "rewards/rejected": -0.19811047613620758, + "step": 3620 + }, + { + "epoch": 0.24, + "learning_rate": 4.717647912357095e-06, + "logits/chosen": -1.1926231384277344, + "logits/rejected": -1.2323840856552124, + "logps/chosen": -389.05596923828125, + "logps/rejected": -417.18768310546875, + "loss": 0.6921, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.11657879501581192, + "rewards/margins": 0.015199318528175354, + "rewards/rejected": -0.13177812099456787, + "step": 3630 + }, + { + "epoch": 0.24, + "learning_rate": 4.715006117513035e-06, + "logits/chosen": -1.3196468353271484, + "logits/rejected": -1.197674036026001, + "logps/chosen": -379.3778381347656, + "logps/rejected": -398.1944274902344, + "loss": 0.6904, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.055252205580472946, + "rewards/margins": 0.0739966481924057, + "rewards/rejected": -0.12924885749816895, + "step": 3640 + }, + { + "epoch": 0.24, + "learning_rate": 4.7123527681120326e-06, + "logits/chosen": -1.1466625928878784, + "logits/rejected": -0.9486912488937378, + "logps/chosen": -327.9669494628906, + "logps/rejected": -364.90716552734375, + "loss": 0.6893, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.08079581707715988, + "rewards/margins": 0.06509184092283249, + "rewards/rejected": -0.14588764309883118, + "step": 3650 + }, + { + "epoch": 0.24, + "learning_rate": 4.7096878779952594e-06, + "logits/chosen": -1.180060863494873, + "logits/rejected": -1.1230758428573608, + "logps/chosen": -391.13262939453125, + "logps/rejected": -451.53204345703125, + "loss": 0.6915, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.11571931838989258, + "rewards/margins": 0.06298209726810455, + "rewards/rejected": -0.17870138585567474, + "step": 3660 + }, + { + "epoch": 0.24, + "learning_rate": 4.707011461064086e-06, + "logits/chosen": -0.8022671937942505, + "logits/rejected": -0.5046889185905457, + "logps/chosen": -414.7627868652344, + "logps/rejected": -456.50311279296875, + "loss": 0.6904, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.1059037446975708, + "rewards/margins": 0.08593104034662247, + "rewards/rejected": -0.19183477759361267, + "step": 3670 + }, + { + "epoch": 0.24, + "learning_rate": 4.704323531280016e-06, + "logits/chosen": -0.5141183137893677, + "logits/rejected": -0.4972083568572998, + "logps/chosen": -426.3848571777344, + "logps/rejected": -414.1753845214844, + "loss": 0.6893, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.09917335212230682, + "rewards/margins": 0.07098612189292908, + "rewards/rejected": -0.1701594740152359, + "step": 3680 + }, + { + "epoch": 0.24, + "learning_rate": 4.701624102664606e-06, + "logits/chosen": -0.9006779789924622, + "logits/rejected": -0.8451553583145142, + "logps/chosen": -396.20513916015625, + "logps/rejected": -416.31158447265625, + "loss": 0.6891, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.13508927822113037, + "rewards/margins": 0.0742066353559494, + "rewards/rejected": -0.20929589867591858, + "step": 3690 + }, + { + "epoch": 0.24, + "learning_rate": 4.698913189299399e-06, + "logits/chosen": -0.8794091939926147, + "logits/rejected": -0.6929253339767456, + "logps/chosen": -316.33880615234375, + "logps/rejected": -401.0997619628906, + "loss": 0.6938, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.13018698990345, + "rewards/margins": 0.052254289388656616, + "rewards/rejected": -0.18244127929210663, + "step": 3700 + }, + { + "epoch": 0.24, + "eval_logits/chosen": -0.799005925655365, + "eval_logits/rejected": -0.6917127370834351, + "eval_logps/chosen": -355.70672607421875, + "eval_logps/rejected": -411.4952087402344, + "eval_loss": 0.6898983120918274, + "eval_rewards/accuracies": 0.6545000076293945, + "eval_rewards/chosen": -0.12370176613330841, + "eval_rewards/margins": 0.07618161290884018, + "eval_rewards/rejected": -0.199883371591568, + "eval_runtime": 711.6062, + "eval_samples_per_second": 2.811, + "eval_steps_per_second": 1.405, + "step": 3700 + }, + { + "epoch": 0.24, + "learning_rate": 4.696190805325847e-06, + "logits/chosen": -0.9073203206062317, + "logits/rejected": -0.7957035303115845, + "logps/chosen": -319.0508117675781, + "logps/rejected": -375.68402099609375, + "loss": 0.6893, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.11267198622226715, + "rewards/margins": 0.0837518647313118, + "rewards/rejected": -0.19642382860183716, + "step": 3710 + }, + { + "epoch": 0.24, + "learning_rate": 4.693456964945239e-06, + "logits/chosen": -1.1013727188110352, + "logits/rejected": -0.7734390497207642, + "logps/chosen": -399.56768798828125, + "logps/rejected": -381.8271484375, + "loss": 0.6887, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.10146210342645645, + "rewards/margins": 0.0833517462015152, + "rewards/rejected": -0.18481382727622986, + "step": 3720 + }, + { + "epoch": 0.24, + "learning_rate": 4.6907116824186245e-06, + "logits/chosen": -1.0235180854797363, + "logits/rejected": -0.9562314748764038, + "logps/chosen": -324.9698791503906, + "logps/rejected": -381.88751220703125, + "loss": 0.6886, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.09764232486486435, + "rewards/margins": 0.05711390823125839, + "rewards/rejected": -0.15475623309612274, + "step": 3730 + }, + { + "epoch": 0.24, + "learning_rate": 4.687954972066742e-06, + "logits/chosen": -0.7998303771018982, + "logits/rejected": -0.7993026971817017, + "logps/chosen": -362.21221923828125, + "logps/rejected": -482.2862243652344, + "loss": 0.6851, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.13239601254463196, + "rewards/margins": 0.1409449279308319, + "rewards/rejected": -0.2733409106731415, + "step": 3740 + }, + { + "epoch": 0.25, + "learning_rate": 4.685186848269944e-06, + "logits/chosen": -0.7055032849311829, + "logits/rejected": -0.4607125222682953, + "logps/chosen": -343.53765869140625, + "logps/rejected": -379.7117919921875, + "loss": 0.6925, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.13166530430316925, + "rewards/margins": 0.07355310767889023, + "rewards/rejected": -0.2052183896303177, + "step": 3750 + }, + { + "epoch": 0.25, + "learning_rate": 4.682407325468119e-06, + "logits/chosen": -0.8808666467666626, + "logits/rejected": -0.6429244875907898, + "logps/chosen": -321.9494323730469, + "logps/rejected": -385.5625305175781, + "loss": 0.6882, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.104530468583107, + "rewards/margins": 0.0972428172826767, + "rewards/rejected": -0.2017732560634613, + "step": 3760 + }, + { + "epoch": 0.25, + "learning_rate": 4.67961641816062e-06, + "logits/chosen": -0.9445309638977051, + "logits/rejected": -0.8120043873786926, + "logps/chosen": -366.25946044921875, + "logps/rejected": -386.9581604003906, + "loss": 0.6911, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.09046272188425064, + "rewards/margins": 0.06202878803014755, + "rewards/rejected": -0.1524915248155594, + "step": 3770 + }, + { + "epoch": 0.25, + "learning_rate": 4.676814140906188e-06, + "logits/chosen": -0.7849982380867004, + "logits/rejected": -0.767396092414856, + "logps/chosen": -365.19769287109375, + "logps/rejected": -399.0850830078125, + "loss": 0.689, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.1256614476442337, + "rewards/margins": 0.06449311971664429, + "rewards/rejected": -0.19015458226203918, + "step": 3780 + }, + { + "epoch": 0.25, + "learning_rate": 4.674000508322872e-06, + "logits/chosen": -0.5533128976821899, + "logits/rejected": -0.7101965546607971, + "logps/chosen": -325.06658935546875, + "logps/rejected": -406.6145324707031, + "loss": 0.6909, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.10753166675567627, + "rewards/margins": 0.07236433029174805, + "rewards/rejected": -0.17989598214626312, + "step": 3790 + }, + { + "epoch": 0.25, + "learning_rate": 4.671175535087959e-06, + "logits/chosen": -0.9753785133361816, + "logits/rejected": -1.0549020767211914, + "logps/chosen": -404.6114807128906, + "logps/rejected": -506.9873962402344, + "loss": 0.6892, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.11328382790088654, + "rewards/margins": 0.1056075319647789, + "rewards/rejected": -0.21889135241508484, + "step": 3800 + }, + { + "epoch": 0.25, + "eval_logits/chosen": -0.9947918057441711, + "eval_logits/rejected": -0.8767626285552979, + "eval_logps/chosen": -332.9628601074219, + "eval_logps/rejected": -383.22491455078125, + "eval_loss": 0.6898708343505859, + "eval_rewards/accuracies": 0.6570000052452087, + "eval_rewards/chosen": -0.10095791518688202, + "eval_rewards/margins": 0.07065509259700775, + "eval_rewards/rejected": -0.17161302268505096, + "eval_runtime": 709.3129, + "eval_samples_per_second": 2.82, + "eval_steps_per_second": 1.41, + "step": 3800 + }, + { + "epoch": 0.25, + "learning_rate": 4.6683392359378924e-06, + "logits/chosen": -0.9528951644897461, + "logits/rejected": -0.8176212310791016, + "logps/chosen": -328.28814697265625, + "logps/rejected": -373.052734375, + "loss": 0.6911, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.09313914179801941, + "rewards/margins": 0.07028108835220337, + "rewards/rejected": -0.16342023015022278, + "step": 3810 + }, + { + "epoch": 0.25, + "learning_rate": 4.665491625668198e-06, + "logits/chosen": -0.7815275192260742, + "logits/rejected": -0.8232892751693726, + "logps/chosen": -307.3453674316406, + "logps/rejected": -411.70855712890625, + "loss": 0.687, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.1510981172323227, + "rewards/margins": 0.0808081179857254, + "rewards/rejected": -0.2319062203168869, + "step": 3820 + }, + { + "epoch": 0.25, + "learning_rate": 4.662632719133407e-06, + "logits/chosen": -0.9967552423477173, + "logits/rejected": -0.7885714769363403, + "logps/chosen": -321.8594055175781, + "logps/rejected": -320.747314453125, + "loss": 0.6908, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.09106185287237167, + "rewards/margins": 0.06589539349079132, + "rewards/rejected": -0.1569572389125824, + "step": 3830 + }, + { + "epoch": 0.25, + "learning_rate": 4.659762531246974e-06, + "logits/chosen": -0.7940338253974915, + "logits/rejected": -0.8099050521850586, + "logps/chosen": -354.4307556152344, + "logps/rejected": -366.3971252441406, + "loss": 0.6905, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.13985204696655273, + "rewards/margins": 0.04592302441596985, + "rewards/rejected": -0.18577507138252258, + "step": 3840 + }, + { + "epoch": 0.25, + "learning_rate": 4.656881076981207e-06, + "logits/chosen": -1.061415433883667, + "logits/rejected": -0.9680493474006653, + "logps/chosen": -340.7538146972656, + "logps/rejected": -374.0954895019531, + "loss": 0.6914, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.12757045030593872, + "rewards/margins": 0.051240403205156326, + "rewards/rejected": -0.17881086468696594, + "step": 3850 + }, + { + "epoch": 0.25, + "learning_rate": 4.653988371367183e-06, + "logits/chosen": -1.040121078491211, + "logits/rejected": -0.7497084140777588, + "logps/chosen": -384.7109375, + "logps/rejected": -378.0744934082031, + "loss": 0.6911, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.14390695095062256, + "rewards/margins": 0.05457156151533127, + "rewards/rejected": -0.1984785497188568, + "step": 3860 + }, + { + "epoch": 0.25, + "learning_rate": 4.651084429494671e-06, + "logits/chosen": -1.0395872592926025, + "logits/rejected": -0.7707028388977051, + "logps/chosen": -477.115966796875, + "logps/rejected": -465.1861267089844, + "loss": 0.6904, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.20176498591899872, + "rewards/margins": 0.067594014108181, + "rewards/rejected": -0.26935896277427673, + "step": 3870 + }, + { + "epoch": 0.25, + "learning_rate": 4.648169266512053e-06, + "logits/chosen": -1.1651791334152222, + "logits/rejected": -0.908767819404602, + "logps/chosen": -450.6615295410156, + "logps/rejected": -484.35955810546875, + "loss": 0.6894, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.22533340752124786, + "rewards/margins": 0.07865221053361893, + "rewards/rejected": -0.3039856255054474, + "step": 3880 + }, + { + "epoch": 0.25, + "learning_rate": 4.6452428976262505e-06, + "logits/chosen": -0.9482473134994507, + "logits/rejected": -0.659041166305542, + "logps/chosen": -400.8704528808594, + "logps/rejected": -511.81109619140625, + "loss": 0.6889, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.19707906246185303, + "rewards/margins": 0.15526339411735535, + "rewards/rejected": -0.352342426776886, + "step": 3890 + }, + { + "epoch": 0.26, + "learning_rate": 4.642305338102633e-06, + "logits/chosen": -0.8177778124809265, + "logits/rejected": -1.0210679769515991, + "logps/chosen": -383.3443908691406, + "logps/rejected": -484.0030212402344, + "loss": 0.688, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.22159484028816223, + "rewards/margins": 0.08251297473907471, + "rewards/rejected": -0.3041078448295593, + "step": 3900 + }, + { + "epoch": 0.26, + "eval_logits/chosen": -0.8940591812133789, + "eval_logits/rejected": -0.7769768238067627, + "eval_logps/chosen": -480.23040771484375, + "eval_logps/rejected": -543.86767578125, + "eval_loss": 0.6898152828216553, + "eval_rewards/accuracies": 0.6495000123977661, + "eval_rewards/chosen": -0.24822546541690826, + "eval_rewards/margins": 0.08403035253286362, + "eval_rewards/rejected": -0.3322558104991913, + "eval_runtime": 711.6846, + "eval_samples_per_second": 2.81, + "eval_steps_per_second": 1.405, + "step": 3900 + }, + { + "epoch": 0.26, + "learning_rate": 4.639356603264953e-06, + "logits/chosen": -0.9487046003341675, + "logits/rejected": -0.9224430918693542, + "logps/chosen": -470.205322265625, + "logps/rejected": -499.61126708984375, + "loss": 0.692, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.22837357223033905, + "rewards/margins": 0.055136702954769135, + "rewards/rejected": -0.2835102677345276, + "step": 3910 + }, + { + "epoch": 0.26, + "learning_rate": 4.636396708495255e-06, + "logits/chosen": -0.718294084072113, + "logits/rejected": -0.6768798828125, + "logps/chosen": -443.03515625, + "logps/rejected": -492.17315673828125, + "loss": 0.6909, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.21384009718894958, + "rewards/margins": 0.07398195564746857, + "rewards/rejected": -0.28782206773757935, + "step": 3920 + }, + { + "epoch": 0.26, + "learning_rate": 4.633425669233799e-06, + "logits/chosen": -1.1752598285675049, + "logits/rejected": -1.0773875713348389, + "logps/chosen": -427.16912841796875, + "logps/rejected": -501.4466857910156, + "loss": 0.6888, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.19234254956245422, + "rewards/margins": 0.07872482389211655, + "rewards/rejected": -0.27106738090515137, + "step": 3930 + }, + { + "epoch": 0.26, + "learning_rate": 4.6304435009789825e-06, + "logits/chosen": -1.1653227806091309, + "logits/rejected": -0.8779115676879883, + "logps/chosen": -412.82037353515625, + "logps/rejected": -444.9153747558594, + "loss": 0.6887, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.17531313002109528, + "rewards/margins": 0.10300495475530624, + "rewards/rejected": -0.2783181071281433, + "step": 3940 + }, + { + "epoch": 0.26, + "learning_rate": 4.627450219287256e-06, + "logits/chosen": -1.1403902769088745, + "logits/rejected": -1.0868604183197021, + "logps/chosen": -369.7454833984375, + "logps/rejected": -423.36761474609375, + "loss": 0.6897, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.1881864070892334, + "rewards/margins": 0.07613326609134674, + "rewards/rejected": -0.26431962847709656, + "step": 3950 + }, + { + "epoch": 0.26, + "learning_rate": 4.624445839773042e-06, + "logits/chosen": -0.8833427429199219, + "logits/rejected": -0.8696644902229309, + "logps/chosen": -369.5878601074219, + "logps/rejected": -404.59051513671875, + "loss": 0.6925, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.19957657158374786, + "rewards/margins": 0.036066509783267975, + "rewards/rejected": -0.23564307391643524, + "step": 3960 + }, + { + "epoch": 0.26, + "learning_rate": 4.621430378108656e-06, + "logits/chosen": -0.9928571581840515, + "logits/rejected": -0.78460294008255, + "logps/chosen": -513.399658203125, + "logps/rejected": -612.7415771484375, + "loss": 0.6888, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.25564131140708923, + "rewards/margins": 0.10610628128051758, + "rewards/rejected": -0.3617476224899292, + "step": 3970 + }, + { + "epoch": 0.26, + "learning_rate": 4.618403850024223e-06, + "logits/chosen": -0.7883467078208923, + "logits/rejected": -0.7039044499397278, + "logps/chosen": -475.11004638671875, + "logps/rejected": -494.36761474609375, + "loss": 0.6908, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.21961987018585205, + "rewards/margins": 0.06591992825269699, + "rewards/rejected": -0.28553980588912964, + "step": 3980 + }, + { + "epoch": 0.26, + "learning_rate": 4.615366271307598e-06, + "logits/chosen": -1.0304720401763916, + "logits/rejected": -0.8591554760932922, + "logps/chosen": -409.8172302246094, + "logps/rejected": -460.185302734375, + "loss": 0.6901, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.217874214053154, + "rewards/margins": 0.06226044148206711, + "rewards/rejected": -0.2801347076892853, + "step": 3990 + }, + { + "epoch": 0.26, + "learning_rate": 4.612317657804277e-06, + "logits/chosen": -1.009948492050171, + "logits/rejected": -0.9941838979721069, + "logps/chosen": -388.17620849609375, + "logps/rejected": -533.5516357421875, + "loss": 0.6879, + "rewards/accuracies": 0.4749999940395355, + "rewards/chosen": -0.24094316363334656, + "rewards/margins": 0.09402754157781601, + "rewards/rejected": -0.33497071266174316, + "step": 4000 + }, + { + "epoch": 0.26, + "eval_logits/chosen": -1.0340783596038818, + "eval_logits/rejected": -0.9098215103149414, + "eval_logps/chosen": -458.0283203125, + "eval_logps/rejected": -518.2861328125, + "eval_loss": 0.6896993517875671, + "eval_rewards/accuracies": 0.6539999842643738, + "eval_rewards/chosen": -0.2260233461856842, + "eval_rewards/margins": 0.08065088838338852, + "eval_rewards/rejected": -0.3066742420196533, + "eval_runtime": 713.6006, + "eval_samples_per_second": 2.803, + "eval_steps_per_second": 1.401, + "step": 4000 + }, + { + "epoch": 0.26, + "learning_rate": 4.6092580254173236e-06, + "logits/chosen": -0.9095097780227661, + "logits/rejected": -0.8598026037216187, + "logps/chosen": -497.9371643066406, + "logps/rejected": -583.9747314453125, + "loss": 0.6896, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.2406143844127655, + "rewards/margins": 0.10470570623874664, + "rewards/rejected": -0.34532010555267334, + "step": 4010 + }, + { + "epoch": 0.26, + "learning_rate": 4.606187390107277e-06, + "logits/chosen": -0.9289296865463257, + "logits/rejected": -0.8710733652114868, + "logps/chosen": -481.1529235839844, + "logps/rejected": -516.4467163085938, + "loss": 0.6907, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.25627756118774414, + "rewards/margins": 0.07428644597530365, + "rewards/rejected": -0.3305639922618866, + "step": 4020 + }, + { + "epoch": 0.26, + "learning_rate": 4.603105767892077e-06, + "logits/chosen": -1.19761323928833, + "logits/rejected": -1.1186004877090454, + "logps/chosen": -411.83660888671875, + "logps/rejected": -505.84063720703125, + "loss": 0.6904, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.21714666485786438, + "rewards/margins": 0.07605074346065521, + "rewards/rejected": -0.2931973934173584, + "step": 4030 + }, + { + "epoch": 0.26, + "learning_rate": 4.6000131748469725e-06, + "logits/chosen": -1.1575210094451904, + "logits/rejected": -0.9933841824531555, + "logps/chosen": -445.5464782714844, + "logps/rejected": -440.16754150390625, + "loss": 0.6901, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.1954062283039093, + "rewards/margins": 0.06893941015005112, + "rewards/rejected": -0.264345645904541, + "step": 4040 + }, + { + "epoch": 0.26, + "learning_rate": 4.596909627104445e-06, + "logits/chosen": -1.3777754306793213, + "logits/rejected": -1.2198288440704346, + "logps/chosen": -518.49365234375, + "logps/rejected": -564.2063598632812, + "loss": 0.6885, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.26975539326667786, + "rewards/margins": 0.07834690809249878, + "rewards/rejected": -0.34810227155685425, + "step": 4050 + }, + { + "epoch": 0.27, + "learning_rate": 4.5937951408541215e-06, + "logits/chosen": -1.1894450187683105, + "logits/rejected": -0.7648533582687378, + "logps/chosen": -523.9410400390625, + "logps/rejected": -590.8986206054688, + "loss": 0.6903, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.27130264043807983, + "rewards/margins": 0.11233459413051605, + "rewards/rejected": -0.3836372494697571, + "step": 4060 + }, + { + "epoch": 0.27, + "learning_rate": 4.590669732342685e-06, + "logits/chosen": -0.9605843424797058, + "logits/rejected": -0.7980761528015137, + "logps/chosen": -453.78070068359375, + "logps/rejected": -545.031982421875, + "loss": 0.6909, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.24163314700126648, + "rewards/margins": 0.08935852348804474, + "rewards/rejected": -0.3309916853904724, + "step": 4070 + }, + { + "epoch": 0.27, + "learning_rate": 4.587533417873799e-06, + "logits/chosen": -1.107875108718872, + "logits/rejected": -1.0243065357208252, + "logps/chosen": -502.90362548828125, + "logps/rejected": -633.7393188476562, + "loss": 0.6915, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.3097396492958069, + "rewards/margins": 0.07113198935985565, + "rewards/rejected": -0.38087162375450134, + "step": 4080 + }, + { + "epoch": 0.27, + "learning_rate": 4.584386213808016e-06, + "logits/chosen": -1.0260846614837646, + "logits/rejected": -0.8974090814590454, + "logps/chosen": -467.3468322753906, + "logps/rejected": -475.55755615234375, + "loss": 0.6898, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.24566781520843506, + "rewards/margins": 0.05398694425821304, + "rewards/rejected": -0.2996547818183899, + "step": 4090 + }, + { + "epoch": 0.27, + "learning_rate": 4.581228136562693e-06, + "logits/chosen": -1.1336443424224854, + "logits/rejected": -1.0851821899414062, + "logps/chosen": -446.1910705566406, + "logps/rejected": -451.4580078125, + "loss": 0.6933, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.20801062881946564, + "rewards/margins": 0.030951568856835365, + "rewards/rejected": -0.23896221816539764, + "step": 4100 + }, + { + "epoch": 0.27, + "eval_logits/chosen": -1.1499063968658447, + "eval_logits/rejected": -1.0199224948883057, + "eval_logps/chosen": -460.51519775390625, + "eval_logps/rejected": -502.691162109375, + "eval_loss": 0.6899304389953613, + "eval_rewards/accuracies": 0.6520000100135803, + "eval_rewards/chosen": -0.22851026058197021, + "eval_rewards/margins": 0.0625690221786499, + "eval_rewards/rejected": -0.2910792827606201, + "eval_runtime": 712.7574, + "eval_samples_per_second": 2.806, + "eval_steps_per_second": 1.403, + "step": 4100 + }, + { + "epoch": 0.27, + "learning_rate": 4.578059202611909e-06, + "logits/chosen": -1.1208741664886475, + "logits/rejected": -1.065028429031372, + "logps/chosen": -480.1509704589844, + "logps/rejected": -500.01336669921875, + "loss": 0.6914, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.22299210727214813, + "rewards/margins": 0.034618355333805084, + "rewards/rejected": -0.2576104402542114, + "step": 4110 + }, + { + "epoch": 0.27, + "learning_rate": 4.574879428486376e-06, + "logits/chosen": -1.1323649883270264, + "logits/rejected": -1.178371787071228, + "logps/chosen": -445.16302490234375, + "logps/rejected": -493.7518615722656, + "loss": 0.6914, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.2322465479373932, + "rewards/margins": 0.05151096731424332, + "rewards/rejected": -0.2837575376033783, + "step": 4120 + }, + { + "epoch": 0.27, + "learning_rate": 4.571688830773352e-06, + "logits/chosen": -1.2729997634887695, + "logits/rejected": -1.1686136722564697, + "logps/chosen": -399.8155517578125, + "logps/rejected": -415.05084228515625, + "loss": 0.6915, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.17774733901023865, + "rewards/margins": 0.03511429950594902, + "rewards/rejected": -0.21286162734031677, + "step": 4130 + }, + { + "epoch": 0.27, + "learning_rate": 4.568487426116559e-06, + "logits/chosen": -1.0722920894622803, + "logits/rejected": -0.9740289449691772, + "logps/chosen": -349.8589782714844, + "logps/rejected": -381.6366271972656, + "loss": 0.6927, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.1778871715068817, + "rewards/margins": 0.039310991764068604, + "rewards/rejected": -0.21719813346862793, + "step": 4140 + }, + { + "epoch": 0.27, + "learning_rate": 4.565275231216092e-06, + "logits/chosen": -0.8283463716506958, + "logits/rejected": -0.8446325063705444, + "logps/chosen": -313.578369140625, + "logps/rejected": -403.86944580078125, + "loss": 0.6907, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.16223487257957458, + "rewards/margins": 0.04430120810866356, + "rewards/rejected": -0.20653608441352844, + "step": 4150 + }, + { + "epoch": 0.27, + "learning_rate": 4.562052262828331e-06, + "logits/chosen": -1.0895860195159912, + "logits/rejected": -1.0035459995269775, + "logps/chosen": -389.712890625, + "logps/rejected": -451.557373046875, + "loss": 0.6906, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.19090190529823303, + "rewards/margins": 0.06712041050195694, + "rewards/rejected": -0.2580223083496094, + "step": 4160 + }, + { + "epoch": 0.27, + "learning_rate": 4.558818537765861e-06, + "logits/chosen": -1.3722314834594727, + "logits/rejected": -0.8719658851623535, + "logps/chosen": -417.94097900390625, + "logps/rejected": -441.64080810546875, + "loss": 0.6923, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.18192659318447113, + "rewards/margins": 0.05856790393590927, + "rewards/rejected": -0.2404944896697998, + "step": 4170 + }, + { + "epoch": 0.27, + "learning_rate": 4.555574072897374e-06, + "logits/chosen": -1.08540940284729, + "logits/rejected": -1.1781179904937744, + "logps/chosen": -367.94091796875, + "logps/rejected": -442.3624572753906, + "loss": 0.6888, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.16597968339920044, + "rewards/margins": 0.07669314742088318, + "rewards/rejected": -0.24267283082008362, + "step": 4180 + }, + { + "epoch": 0.27, + "learning_rate": 4.552318885147589e-06, + "logits/chosen": -1.2220853567123413, + "logits/rejected": -0.9189378619194031, + "logps/chosen": -418.6746520996094, + "logps/rejected": -436.156005859375, + "loss": 0.691, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.17775699496269226, + "rewards/margins": 0.07687093317508698, + "rewards/rejected": -0.25462794303894043, + "step": 4190 + }, + { + "epoch": 0.27, + "learning_rate": 4.549052991497159e-06, + "logits/chosen": -0.9784964323043823, + "logits/rejected": -0.9273381233215332, + "logps/chosen": -364.02545166015625, + "logps/rejected": -428.04571533203125, + "loss": 0.6908, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.18385827541351318, + "rewards/margins": 0.06360138952732086, + "rewards/rejected": -0.24745967984199524, + "step": 4200 + }, + { + "epoch": 0.27, + "eval_logits/chosen": -0.8099203109741211, + "eval_logits/rejected": -0.7000288367271423, + "eval_logps/chosen": -446.6075134277344, + "eval_logps/rejected": -499.034912109375, + "eval_loss": 0.6899096965789795, + "eval_rewards/accuracies": 0.6489999890327454, + "eval_rewards/chosen": -0.21460255980491638, + "eval_rewards/margins": 0.07282048463821411, + "eval_rewards/rejected": -0.2874230444431305, + "eval_runtime": 714.9068, + "eval_samples_per_second": 2.798, + "eval_steps_per_second": 1.399, + "step": 4200 + }, + { + "epoch": 0.28, + "learning_rate": 4.545776408982585e-06, + "logits/chosen": -0.8513118624687195, + "logits/rejected": -0.7860215306282043, + "logps/chosen": -444.19915771484375, + "logps/rejected": -514.2364501953125, + "loss": 0.689, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.212455153465271, + "rewards/margins": 0.07905051857233047, + "rewards/rejected": -0.2915056347846985, + "step": 4210 + }, + { + "epoch": 0.28, + "learning_rate": 4.542489154696128e-06, + "logits/chosen": -1.0325562953948975, + "logits/rejected": -0.7060797810554504, + "logps/chosen": -458.957275390625, + "logps/rejected": -462.31268310546875, + "loss": 0.6914, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.1912369430065155, + "rewards/margins": 0.06460615247488022, + "rewards/rejected": -0.25584307312965393, + "step": 4220 + }, + { + "epoch": 0.28, + "learning_rate": 4.5391912457857145e-06, + "logits/chosen": -0.9764394760131836, + "logits/rejected": -0.842827320098877, + "logps/chosen": -466.9427795410156, + "logps/rejected": -508.9811096191406, + "loss": 0.6893, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.20197315514087677, + "rewards/margins": 0.08914804458618164, + "rewards/rejected": -0.2911211848258972, + "step": 4230 + }, + { + "epoch": 0.28, + "learning_rate": 4.535882699454854e-06, + "logits/chosen": -1.0353846549987793, + "logits/rejected": -0.9514248967170715, + "logps/chosen": -486.9790954589844, + "logps/rejected": -598.344482421875, + "loss": 0.6888, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.21592804789543152, + "rewards/margins": 0.08970265835523605, + "rewards/rejected": -0.30563071370124817, + "step": 4240 + }, + { + "epoch": 0.28, + "learning_rate": 4.532563532962546e-06, + "logits/chosen": -1.288633108139038, + "logits/rejected": -1.264217734336853, + "logps/chosen": -425.7466735839844, + "logps/rejected": -523.7071533203125, + "loss": 0.6908, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.2358524352312088, + "rewards/margins": 0.07636555284261703, + "rewards/rejected": -0.3122180104255676, + "step": 4250 + }, + { + "epoch": 0.28, + "learning_rate": 4.529233763623187e-06, + "logits/chosen": -1.0135489702224731, + "logits/rejected": -0.7392680644989014, + "logps/chosen": -440.1654357910156, + "logps/rejected": -471.6558532714844, + "loss": 0.6884, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.2380649596452713, + "rewards/margins": 0.07945012301206589, + "rewards/rejected": -0.3175150752067566, + "step": 4260 + }, + { + "epoch": 0.28, + "learning_rate": 4.5258934088064854e-06, + "logits/chosen": -0.8823081254959106, + "logits/rejected": -0.6247905492782593, + "logps/chosen": -530.675048828125, + "logps/rejected": -584.1381225585938, + "loss": 0.6872, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.31069430708885193, + "rewards/margins": 0.10816062986850739, + "rewards/rejected": -0.4188549518585205, + "step": 4270 + }, + { + "epoch": 0.28, + "learning_rate": 4.522542485937369e-06, + "logits/chosen": -0.7952845692634583, + "logits/rejected": -0.6527966260910034, + "logps/chosen": -636.1341552734375, + "logps/rejected": -646.477294921875, + "loss": 0.6893, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.3454816937446594, + "rewards/margins": 0.10698393732309341, + "rewards/rejected": -0.45246559381484985, + "step": 4280 + }, + { + "epoch": 0.28, + "learning_rate": 4.519181012495892e-06, + "logits/chosen": -1.004214882850647, + "logits/rejected": -0.6648763418197632, + "logps/chosen": -556.7357788085938, + "logps/rejected": -614.4586791992188, + "loss": 0.6912, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.3145165741443634, + "rewards/margins": 0.08679699152708054, + "rewards/rejected": -0.4013136029243469, + "step": 4290 + }, + { + "epoch": 0.28, + "learning_rate": 4.515809006017147e-06, + "logits/chosen": -0.7630017995834351, + "logits/rejected": -0.6918413639068604, + "logps/chosen": -509.41656494140625, + "logps/rejected": -562.0657958984375, + "loss": 0.6902, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.2760070264339447, + "rewards/margins": 0.0881584882736206, + "rewards/rejected": -0.3641654849052429, + "step": 4300 + }, + { + "epoch": 0.28, + "eval_logits/chosen": -0.8444295525550842, + "eval_logits/rejected": -0.7308560013771057, + "eval_logps/chosen": -529.0877685546875, + "eval_logps/rejected": -583.29833984375, + "eval_loss": 0.6897502541542053, + "eval_rewards/accuracies": 0.6585000157356262, + "eval_rewards/chosen": -0.2970828413963318, + "eval_rewards/margins": 0.07460356503725052, + "eval_rewards/rejected": -0.3716863989830017, + "eval_runtime": 710.9846, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 1.407, + "step": 4300 + }, + { + "epoch": 0.28, + "learning_rate": 4.512426484091171e-06, + "logits/chosen": -1.0411491394042969, + "logits/rejected": -0.8547149896621704, + "logps/chosen": -571.1553955078125, + "logps/rejected": -588.7039184570312, + "loss": 0.6915, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.2935742437839508, + "rewards/margins": 0.053841590881347656, + "rewards/rejected": -0.3474158048629761, + "step": 4310 + }, + { + "epoch": 0.28, + "learning_rate": 4.509033464362858e-06, + "logits/chosen": -0.6955040693283081, + "logits/rejected": -0.6403937339782715, + "logps/chosen": -528.7620849609375, + "logps/rejected": -617.499755859375, + "loss": 0.6899, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.2874962389469147, + "rewards/margins": 0.0745067298412323, + "rewards/rejected": -0.3620029389858246, + "step": 4320 + }, + { + "epoch": 0.28, + "learning_rate": 4.505629964531857e-06, + "logits/chosen": -0.9003432393074036, + "logits/rejected": -0.7642195224761963, + "logps/chosen": -521.0284423828125, + "logps/rejected": -599.5103149414062, + "loss": 0.6878, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.2978239953517914, + "rewards/margins": 0.10929515212774277, + "rewards/rejected": -0.40711918473243713, + "step": 4330 + }, + { + "epoch": 0.28, + "learning_rate": 4.502216002352492e-06, + "logits/chosen": -0.8330503702163696, + "logits/rejected": -0.6456397771835327, + "logps/chosen": -487.53094482421875, + "logps/rejected": -522.7586669921875, + "loss": 0.6924, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.3244900703430176, + "rewards/margins": 0.054641880095005035, + "rewards/rejected": -0.3791319727897644, + "step": 4340 + }, + { + "epoch": 0.28, + "learning_rate": 4.498791595633663e-06, + "logits/chosen": -0.7700721025466919, + "logits/rejected": -0.5906479358673096, + "logps/chosen": -466.475341796875, + "logps/rejected": -426.53497314453125, + "loss": 0.6911, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.20409110188484192, + "rewards/margins": 0.04726005345582962, + "rewards/rejected": -0.25135114789009094, + "step": 4350 + }, + { + "epoch": 0.29, + "learning_rate": 4.495356762238751e-06, + "logits/chosen": -1.251646637916565, + "logits/rejected": -0.8721704483032227, + "logps/chosen": -453.76861572265625, + "logps/rejected": -424.33721923828125, + "loss": 0.6906, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.1705707162618637, + "rewards/margins": 0.06802061945199966, + "rewards/rejected": -0.23859134316444397, + "step": 4360 + }, + { + "epoch": 0.29, + "learning_rate": 4.491911520085532e-06, + "logits/chosen": -0.7782236933708191, + "logits/rejected": -0.7609604001045227, + "logps/chosen": -363.4910583496094, + "logps/rejected": -446.3992614746094, + "loss": 0.6897, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.1630832999944687, + "rewards/margins": 0.07213564217090607, + "rewards/rejected": -0.23521895706653595, + "step": 4370 + }, + { + "epoch": 0.29, + "learning_rate": 4.488455887146075e-06, + "logits/chosen": -1.0101211071014404, + "logits/rejected": -0.898006796836853, + "logps/chosen": -346.0672607421875, + "logps/rejected": -476.0105895996094, + "loss": 0.687, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.17595519125461578, + "rewards/margins": 0.1167796403169632, + "rewards/rejected": -0.2927348017692566, + "step": 4380 + }, + { + "epoch": 0.29, + "learning_rate": 4.484989881446654e-06, + "logits/chosen": -1.0024950504302979, + "logits/rejected": -0.9726330041885376, + "logps/chosen": -420.73260498046875, + "logps/rejected": -436.6083068847656, + "loss": 0.6917, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.2182566374540329, + "rewards/margins": 0.033820368349552155, + "rewards/rejected": -0.25207701325416565, + "step": 4390 + }, + { + "epoch": 0.29, + "learning_rate": 4.481513521067654e-06, + "logits/chosen": -0.8407198190689087, + "logits/rejected": -0.7638456225395203, + "logps/chosen": -533.3350830078125, + "logps/rejected": -602.2070922851562, + "loss": 0.6895, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.3085605204105377, + "rewards/margins": 0.10428784042596817, + "rewards/rejected": -0.4128483235836029, + "step": 4400 + }, + { + "epoch": 0.29, + "eval_logits/chosen": -0.7359596490859985, + "eval_logits/rejected": -0.6256921291351318, + "eval_logps/chosen": -519.137451171875, + "eval_logps/rejected": -587.6251831054688, + "eval_loss": 0.6898573040962219, + "eval_rewards/accuracies": 0.6504999995231628, + "eval_rewards/chosen": -0.28713250160217285, + "eval_rewards/margins": 0.0888807401061058, + "eval_rewards/rejected": -0.37601324915885925, + "eval_runtime": 712.7139, + "eval_samples_per_second": 2.806, + "eval_steps_per_second": 1.403, + "step": 4400 + }, + { + "epoch": 0.29, + "learning_rate": 4.478026824143473e-06, + "logits/chosen": -0.8550176620483398, + "logits/rejected": -0.8174192309379578, + "logps/chosen": -546.6198120117188, + "logps/rejected": -618.2431640625, + "loss": 0.6856, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.28055816888809204, + "rewards/margins": 0.12819083034992218, + "rewards/rejected": -0.4087490141391754, + "step": 4410 + }, + { + "epoch": 0.29, + "learning_rate": 4.474529808862429e-06, + "logits/chosen": -0.7030132412910461, + "logits/rejected": -0.7720840573310852, + "logps/chosen": -430.73333740234375, + "logps/rejected": -544.6876831054688, + "loss": 0.6906, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.2419130504131317, + "rewards/margins": 0.09633009135723114, + "rewards/rejected": -0.3382430970668793, + "step": 4420 + }, + { + "epoch": 0.29, + "learning_rate": 4.471022493466669e-06, + "logits/chosen": -0.8346714973449707, + "logits/rejected": -0.5992153882980347, + "logps/chosen": -546.3121948242188, + "logps/rejected": -537.5162353515625, + "loss": 0.6904, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.24437315762043, + "rewards/margins": 0.06821531802415848, + "rewards/rejected": -0.31258848309516907, + "step": 4430 + }, + { + "epoch": 0.29, + "learning_rate": 4.467504896252066e-06, + "logits/chosen": -1.0627632141113281, + "logits/rejected": -1.0447062253952026, + "logps/chosen": -483.2215270996094, + "logps/rejected": -545.9163818359375, + "loss": 0.6894, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.23352019488811493, + "rewards/margins": 0.09143707901239395, + "rewards/rejected": -0.32495731115341187, + "step": 4440 + }, + { + "epoch": 0.29, + "learning_rate": 4.463977035568132e-06, + "logits/chosen": -0.9531109929084778, + "logits/rejected": -1.0412567853927612, + "logps/chosen": -412.849853515625, + "logps/rejected": -510.6705017089844, + "loss": 0.691, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.2004670351743698, + "rewards/margins": 0.046115074306726456, + "rewards/rejected": -0.24658215045928955, + "step": 4450 + }, + { + "epoch": 0.29, + "learning_rate": 4.460438929817914e-06, + "logits/chosen": -1.0476336479187012, + "logits/rejected": -0.8358996510505676, + "logps/chosen": -396.8749084472656, + "logps/rejected": -445.3506774902344, + "loss": 0.6895, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.1904551386833191, + "rewards/margins": 0.052764374762773514, + "rewards/rejected": -0.2432195246219635, + "step": 4460 + }, + { + "epoch": 0.29, + "learning_rate": 4.456890597457907e-06, + "logits/chosen": -0.9712381362915039, + "logits/rejected": -0.9402221441268921, + "logps/chosen": -406.9786682128906, + "logps/rejected": -508.30078125, + "loss": 0.6899, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.19484297931194305, + "rewards/margins": 0.08311771601438522, + "rewards/rejected": -0.2779606878757477, + "step": 4470 + }, + { + "epoch": 0.29, + "learning_rate": 4.453332056997951e-06, + "logits/chosen": -0.9786797761917114, + "logits/rejected": -0.9453691244125366, + "logps/chosen": -319.5592041015625, + "logps/rejected": -416.96270751953125, + "loss": 0.6879, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.13959971070289612, + "rewards/margins": 0.10172855854034424, + "rewards/rejected": -0.24132826924324036, + "step": 4480 + }, + { + "epoch": 0.29, + "learning_rate": 4.449763327001134e-06, + "logits/chosen": -1.1327717304229736, + "logits/rejected": -1.1574513912200928, + "logps/chosen": -324.18731689453125, + "logps/rejected": -421.53173828125, + "loss": 0.6904, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.13542507588863373, + "rewards/margins": 0.0689290389418602, + "rewards/rejected": -0.20435413718223572, + "step": 4490 + }, + { + "epoch": 0.29, + "learning_rate": 4.446184426083702e-06, + "logits/chosen": -1.1786584854125977, + "logits/rejected": -1.0005810260772705, + "logps/chosen": -348.32135009765625, + "logps/rejected": -479.8023376464844, + "loss": 0.6864, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.15475699305534363, + "rewards/margins": 0.12311340868473053, + "rewards/rejected": -0.27787038683891296, + "step": 4500 + }, + { + "epoch": 0.29, + "eval_logits/chosen": -1.1400220394134521, + "eval_logits/rejected": -1.0094726085662842, + "eval_logps/chosen": -381.37152099609375, + "eval_logps/rejected": -431.19793701171875, + "eval_loss": 0.6898050308227539, + "eval_rewards/accuracies": 0.6589999794960022, + "eval_rewards/chosen": -0.1493665874004364, + "eval_rewards/margins": 0.07021944224834442, + "eval_rewards/rejected": -0.21958602964878082, + "eval_runtime": 711.7175, + "eval_samples_per_second": 2.81, + "eval_steps_per_second": 1.405, + "step": 4500 + }, + { + "epoch": 0.3, + "learning_rate": 4.442595372914954e-06, + "logits/chosen": -1.1510266065597534, + "logits/rejected": -1.1077659130096436, + "logps/chosen": -369.70440673828125, + "logps/rejected": -363.3657531738281, + "loss": 0.6884, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.13272404670715332, + "rewards/margins": 0.07893560826778412, + "rewards/rejected": -0.21165966987609863, + "step": 4510 + }, + { + "epoch": 0.3, + "learning_rate": 4.43899618621715e-06, + "logits/chosen": -1.1567199230194092, + "logits/rejected": -0.9014299511909485, + "logps/chosen": -433.1683044433594, + "logps/rejected": -536.3552856445312, + "loss": 0.6892, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.1821144074201584, + "rewards/margins": 0.09999031573534012, + "rewards/rejected": -0.2821047008037567, + "step": 4520 + }, + { + "epoch": 0.3, + "learning_rate": 4.4353868847654105e-06, + "logits/chosen": -1.2916271686553955, + "logits/rejected": -1.0142287015914917, + "logps/chosen": -434.13055419921875, + "logps/rejected": -476.5389709472656, + "loss": 0.6863, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.18827231228351593, + "rewards/margins": 0.07247602939605713, + "rewards/rejected": -0.26074832677841187, + "step": 4530 + }, + { + "epoch": 0.3, + "learning_rate": 4.43176748738762e-06, + "logits/chosen": -0.826651394367218, + "logits/rejected": -0.7963089942932129, + "logps/chosen": -510.57464599609375, + "logps/rejected": -617.2264404296875, + "loss": 0.6899, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.2786267399787903, + "rewards/margins": 0.10226340591907501, + "rewards/rejected": -0.3808901906013489, + "step": 4540 + }, + { + "epoch": 0.3, + "learning_rate": 4.4281380129643295e-06, + "logits/chosen": -0.8452268838882446, + "logits/rejected": -0.6725960969924927, + "logps/chosen": -487.48443603515625, + "logps/rejected": -596.5521240234375, + "loss": 0.689, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.2577342391014099, + "rewards/margins": 0.11954182386398315, + "rewards/rejected": -0.37727606296539307, + "step": 4550 + }, + { + "epoch": 0.3, + "learning_rate": 4.424498480428654e-06, + "logits/chosen": -1.1150968074798584, + "logits/rejected": -0.9688647389411926, + "logps/chosen": -450.00439453125, + "logps/rejected": -448.45977783203125, + "loss": 0.692, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.20273244380950928, + "rewards/margins": 0.038480862975120544, + "rewards/rejected": -0.24121332168579102, + "step": 4560 + }, + { + "epoch": 0.3, + "learning_rate": 4.420848908766178e-06, + "logits/chosen": -1.285456657409668, + "logits/rejected": -1.2038942575454712, + "logps/chosen": -403.0211486816406, + "logps/rejected": -477.5533142089844, + "loss": 0.6894, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.19579431414604187, + "rewards/margins": 0.06565193831920624, + "rewards/rejected": -0.2614462375640869, + "step": 4570 + }, + { + "epoch": 0.3, + "learning_rate": 4.417189317014855e-06, + "logits/chosen": -1.1039775609970093, + "logits/rejected": -1.2309410572052002, + "logps/chosen": -409.04632568359375, + "logps/rejected": -487.999755859375, + "loss": 0.6902, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.20825043320655823, + "rewards/margins": 0.04825657233595848, + "rewards/rejected": -0.2565069794654846, + "step": 4580 + }, + { + "epoch": 0.3, + "learning_rate": 4.41351972426491e-06, + "logits/chosen": -0.9162073135375977, + "logits/rejected": -0.947158932685852, + "logps/chosen": -478.1299743652344, + "logps/rejected": -601.3800048828125, + "loss": 0.6908, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.2313905954360962, + "rewards/margins": 0.06919713318347931, + "rewards/rejected": -0.3005877435207367, + "step": 4590 + }, + { + "epoch": 0.3, + "learning_rate": 4.409840149658735e-06, + "logits/chosen": -1.0446149110794067, + "logits/rejected": -0.9430927038192749, + "logps/chosen": -484.2718811035156, + "logps/rejected": -492.84539794921875, + "loss": 0.6903, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.19975461065769196, + "rewards/margins": 0.05872530862689018, + "rewards/rejected": -0.25847989320755005, + "step": 4600 + }, + { + "epoch": 0.3, + "eval_logits/chosen": -1.1054784059524536, + "eval_logits/rejected": -0.9768812656402588, + "eval_logps/chosen": -445.2477111816406, + "eval_logps/rejected": -493.52667236328125, + "eval_loss": 0.6898093223571777, + "eval_rewards/accuracies": 0.6499999761581421, + "eval_rewards/chosen": -0.2132427990436554, + "eval_rewards/margins": 0.0686720460653305, + "eval_rewards/rejected": -0.2819148600101471, + "eval_runtime": 712.2205, + "eval_samples_per_second": 2.808, + "eval_steps_per_second": 1.404, + "step": 4600 + }, + { + "epoch": 0.3, + "learning_rate": 4.4061506123907925e-06, + "logits/chosen": -1.0503458976745605, + "logits/rejected": -0.9003597497940063, + "logps/chosen": -485.533447265625, + "logps/rejected": -500.1853942871094, + "loss": 0.691, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.2214416265487671, + "rewards/margins": 0.05495814234018326, + "rewards/rejected": -0.27639979124069214, + "step": 4610 + }, + { + "epoch": 0.3, + "learning_rate": 4.402451131707519e-06, + "logits/chosen": -1.256821632385254, + "logits/rejected": -0.9206531643867493, + "logps/chosen": -440.94482421875, + "logps/rejected": -454.66729736328125, + "loss": 0.6894, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.2314794510602951, + "rewards/margins": 0.0892038494348526, + "rewards/rejected": -0.3206833302974701, + "step": 4620 + }, + { + "epoch": 0.3, + "learning_rate": 4.398741726907215e-06, + "logits/chosen": -1.3970555067062378, + "logits/rejected": -1.088208556175232, + "logps/chosen": -512.00732421875, + "logps/rejected": -548.9866333007812, + "loss": 0.6885, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.23274996876716614, + "rewards/margins": 0.07730601727962494, + "rewards/rejected": -0.31005600094795227, + "step": 4630 + }, + { + "epoch": 0.3, + "learning_rate": 4.395022417339955e-06, + "logits/chosen": -0.9462388753890991, + "logits/rejected": -0.8615191578865051, + "logps/chosen": -532.3143310546875, + "logps/rejected": -612.8482055664062, + "loss": 0.6913, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.32568931579589844, + "rewards/margins": 0.07204465568065643, + "rewards/rejected": -0.3977339565753937, + "step": 4640 + }, + { + "epoch": 0.3, + "learning_rate": 4.391293222407479e-06, + "logits/chosen": -0.9256173968315125, + "logits/rejected": -0.9904786944389343, + "logps/chosen": -343.5927734375, + "logps/rejected": -415.95867919921875, + "loss": 0.6897, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.2070355862379074, + "rewards/margins": 0.05413592979311943, + "rewards/rejected": -0.26117151975631714, + "step": 4650 + }, + { + "epoch": 0.3, + "learning_rate": 4.387554161563094e-06, + "logits/chosen": -1.1515638828277588, + "logits/rejected": -1.107965111732483, + "logps/chosen": -439.2413635253906, + "logps/rejected": -527.4022216796875, + "loss": 0.686, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.24037718772888184, + "rewards/margins": 0.09998045116662979, + "rewards/rejected": -0.34035763144493103, + "step": 4660 + }, + { + "epoch": 0.31, + "learning_rate": 4.383805254311575e-06, + "logits/chosen": -1.1828508377075195, + "logits/rejected": -0.8469891548156738, + "logps/chosen": -516.6743774414062, + "logps/rejected": -550.5076293945312, + "loss": 0.6886, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.2605333924293518, + "rewards/margins": 0.07939942926168442, + "rewards/rejected": -0.33993279933929443, + "step": 4670 + }, + { + "epoch": 0.31, + "learning_rate": 4.380046520209056e-06, + "logits/chosen": -0.9766277074813843, + "logits/rejected": -0.7371279001235962, + "logps/chosen": -435.83544921875, + "logps/rejected": -506.59954833984375, + "loss": 0.6916, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.23556911945343018, + "rewards/margins": 0.09460324048995972, + "rewards/rejected": -0.3301723599433899, + "step": 4680 + }, + { + "epoch": 0.31, + "learning_rate": 4.376277978862936e-06, + "logits/chosen": -0.708415150642395, + "logits/rejected": -0.6641728281974792, + "logps/chosen": -454.5379943847656, + "logps/rejected": -470.12359619140625, + "loss": 0.6909, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.22854939103126526, + "rewards/margins": 0.056117068976163864, + "rewards/rejected": -0.2846664488315582, + "step": 4690 + }, + { + "epoch": 0.31, + "learning_rate": 4.372499649931774e-06, + "logits/chosen": -0.9270528554916382, + "logits/rejected": -0.7678083777427673, + "logps/chosen": -500.28973388671875, + "logps/rejected": -631.7337646484375, + "loss": 0.6849, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.291198194026947, + "rewards/margins": 0.12348760664463043, + "rewards/rejected": -0.41468581557273865, + "step": 4700 + }, + { + "epoch": 0.31, + "eval_logits/chosen": -0.826753556728363, + "eval_logits/rejected": -0.7087231874465942, + "eval_logps/chosen": -512.8201904296875, + "eval_logps/rejected": -581.3583374023438, + "eval_loss": 0.689807116985321, + "eval_rewards/accuracies": 0.6545000076293945, + "eval_rewards/chosen": -0.2808152437210083, + "eval_rewards/margins": 0.0889311358332634, + "eval_rewards/rejected": -0.3697463870048523, + "eval_runtime": 711.8212, + "eval_samples_per_second": 2.81, + "eval_steps_per_second": 1.405, + "step": 4700 + }, + { + "epoch": 0.31, + "learning_rate": 4.368711553125185e-06, + "logits/chosen": -1.0156619548797607, + "logits/rejected": -1.0053759813308716, + "logps/chosen": -545.8829345703125, + "logps/rejected": -550.94580078125, + "loss": 0.6927, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.26869291067123413, + "rewards/margins": 0.06350628286600113, + "rewards/rejected": -0.3321991562843323, + "step": 4710 + }, + { + "epoch": 0.31, + "learning_rate": 4.364913708203734e-06, + "logits/chosen": -1.0214194059371948, + "logits/rejected": -0.8588595390319824, + "logps/chosen": -553.0416259765625, + "logps/rejected": -555.3973388671875, + "loss": 0.69, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.26814529299736023, + "rewards/margins": 0.07777298986911774, + "rewards/rejected": -0.34591832756996155, + "step": 4720 + }, + { + "epoch": 0.31, + "learning_rate": 4.361106134978844e-06, + "logits/chosen": -0.8875927925109863, + "logits/rejected": -0.6843006014823914, + "logps/chosen": -505.05859375, + "logps/rejected": -550.0360107421875, + "loss": 0.6923, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.23462817072868347, + "rewards/margins": 0.05792809650301933, + "rewards/rejected": -0.2925562858581543, + "step": 4730 + }, + { + "epoch": 0.31, + "learning_rate": 4.357288853312681e-06, + "logits/chosen": -0.9217559695243835, + "logits/rejected": -0.9300669431686401, + "logps/chosen": -516.9627685546875, + "logps/rejected": -553.6403198242188, + "loss": 0.6914, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.23348495364189148, + "rewards/margins": 0.041414495557546616, + "rewards/rejected": -0.2748994827270508, + "step": 4740 + }, + { + "epoch": 0.31, + "learning_rate": 4.353461883118056e-06, + "logits/chosen": -0.8890771865844727, + "logits/rejected": -0.7557088732719421, + "logps/chosen": -484.44598388671875, + "logps/rejected": -505.0452575683594, + "loss": 0.692, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.25515538454055786, + "rewards/margins": 0.04175041243433952, + "rewards/rejected": -0.2969058156013489, + "step": 4750 + }, + { + "epoch": 0.31, + "learning_rate": 4.34962524435832e-06, + "logits/chosen": -0.9046379327774048, + "logits/rejected": -0.8349242210388184, + "logps/chosen": -413.257568359375, + "logps/rejected": -458.12005615234375, + "loss": 0.6918, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.19357720017433167, + "rewards/margins": 0.08169585466384888, + "rewards/rejected": -0.27527305483818054, + "step": 4760 + }, + { + "epoch": 0.31, + "learning_rate": 4.34577895704726e-06, + "logits/chosen": -1.387485384941101, + "logits/rejected": -1.1890177726745605, + "logps/chosen": -425.19097900390625, + "logps/rejected": -457.5673828125, + "loss": 0.6898, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.16357699036598206, + "rewards/margins": 0.05810046195983887, + "rewards/rejected": -0.22167746722698212, + "step": 4770 + }, + { + "epoch": 0.31, + "learning_rate": 4.3419230412489954e-06, + "logits/chosen": -1.3138645887374878, + "logits/rejected": -1.057762861251831, + "logps/chosen": -456.3876953125, + "logps/rejected": -419.55078125, + "loss": 0.6919, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.16701987385749817, + "rewards/margins": 0.0392269529402256, + "rewards/rejected": -0.20624682307243347, + "step": 4780 + }, + { + "epoch": 0.31, + "learning_rate": 4.338057517077872e-06, + "logits/chosen": -1.07856023311615, + "logits/rejected": -0.9140844345092773, + "logps/chosen": -367.5122375488281, + "logps/rejected": -492.6651306152344, + "loss": 0.6811, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.17347107827663422, + "rewards/margins": 0.16850519180297852, + "rewards/rejected": -0.34197625517845154, + "step": 4790 + }, + { + "epoch": 0.31, + "learning_rate": 4.334182404698356e-06, + "logits/chosen": -0.8965989947319031, + "logits/rejected": -0.6839637756347656, + "logps/chosen": -468.75067138671875, + "logps/rejected": -453.801513671875, + "loss": 0.6902, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.23864367604255676, + "rewards/margins": 0.06345070898532867, + "rewards/rejected": -0.30209439992904663, + "step": 4800 + }, + { + "epoch": 0.31, + "eval_logits/chosen": -0.7168214321136475, + "eval_logits/rejected": -0.6072134375572205, + "eval_logps/chosen": -480.07293701171875, + "eval_logps/rejected": -542.0419921875, + "eval_loss": 0.6897538304328918, + "eval_rewards/accuracies": 0.6600000262260437, + "eval_rewards/chosen": -0.24806798994541168, + "eval_rewards/margins": 0.08236212283372879, + "eval_rewards/rejected": -0.33043012022972107, + "eval_runtime": 712.7229, + "eval_samples_per_second": 2.806, + "eval_steps_per_second": 1.403, + "step": 4800 + }, + { + "epoch": 0.31, + "learning_rate": 4.330297724324933e-06, + "logits/chosen": -1.0477492809295654, + "logits/rejected": -0.5886969566345215, + "logps/chosen": -563.2034912109375, + "logps/rejected": -546.0494995117188, + "loss": 0.6884, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.25479209423065186, + "rewards/margins": 0.08404627442359924, + "rewards/rejected": -0.3388383388519287, + "step": 4810 + }, + { + "epoch": 0.32, + "learning_rate": 4.326403496221999e-06, + "logits/chosen": -0.6333009004592896, + "logits/rejected": -0.6737180948257446, + "logps/chosen": -387.6405334472656, + "logps/rejected": -422.314697265625, + "loss": 0.6925, + "rewards/accuracies": 0.4749999940395355, + "rewards/chosen": -0.22621150314807892, + "rewards/margins": 0.06145411729812622, + "rewards/rejected": -0.28766560554504395, + "step": 4820 + }, + { + "epoch": 0.32, + "learning_rate": 4.322499740703755e-06, + "logits/chosen": -0.7092992067337036, + "logits/rejected": -0.7767950892448425, + "logps/chosen": -394.92071533203125, + "logps/rejected": -468.0594787597656, + "loss": 0.6901, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.2028215229511261, + "rewards/margins": 0.04861544445157051, + "rewards/rejected": -0.2514369785785675, + "step": 4830 + }, + { + "epoch": 0.32, + "learning_rate": 4.318586478134101e-06, + "logits/chosen": -0.8985971212387085, + "logits/rejected": -0.44110220670700073, + "logps/chosen": -401.379150390625, + "logps/rejected": -434.3460388183594, + "loss": 0.6888, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.2087116688489914, + "rewards/margins": 0.07348736375570297, + "rewards/rejected": -0.28219905495643616, + "step": 4840 + }, + { + "epoch": 0.32, + "learning_rate": 4.314663728926534e-06, + "logits/chosen": -1.0497792959213257, + "logits/rejected": -0.6469130516052246, + "logps/chosen": -533.2115478515625, + "logps/rejected": -588.5062255859375, + "loss": 0.6904, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.27633586525917053, + "rewards/margins": 0.065872922539711, + "rewards/rejected": -0.34220877289772034, + "step": 4850 + }, + { + "epoch": 0.32, + "learning_rate": 4.310731513544033e-06, + "logits/chosen": -0.7769347429275513, + "logits/rejected": -0.449188232421875, + "logps/chosen": -508.60333251953125, + "logps/rejected": -552.8875732421875, + "loss": 0.6898, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.2657455503940582, + "rewards/margins": 0.09036391228437424, + "rewards/rejected": -0.35610947012901306, + "step": 4860 + }, + { + "epoch": 0.32, + "learning_rate": 4.30678985249896e-06, + "logits/chosen": -0.8284038305282593, + "logits/rejected": -0.8399826288223267, + "logps/chosen": -380.78289794921875, + "logps/rejected": -508.3587951660156, + "loss": 0.6905, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.22363290190696716, + "rewards/margins": 0.10931231826543808, + "rewards/rejected": -0.33294519782066345, + "step": 4870 + }, + { + "epoch": 0.32, + "learning_rate": 4.302838766352952e-06, + "logits/chosen": -0.8813980221748352, + "logits/rejected": -0.7245379686355591, + "logps/chosen": -453.521728515625, + "logps/rejected": -508.83062744140625, + "loss": 0.6888, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.1979287564754486, + "rewards/margins": 0.09110042452812195, + "rewards/rejected": -0.28902921080589294, + "step": 4880 + }, + { + "epoch": 0.32, + "learning_rate": 4.298878275716806e-06, + "logits/chosen": -0.8569731712341309, + "logits/rejected": -0.7990323305130005, + "logps/chosen": -425.7196350097656, + "logps/rejected": -530.5278930664062, + "loss": 0.6869, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.2338402271270752, + "rewards/margins": 0.10760519653558731, + "rewards/rejected": -0.3414453864097595, + "step": 4890 + }, + { + "epoch": 0.32, + "learning_rate": 4.294908401250386e-06, + "logits/chosen": -0.950922966003418, + "logits/rejected": -0.7585622072219849, + "logps/chosen": -464.732421875, + "logps/rejected": -525.8693237304688, + "loss": 0.6888, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.24998939037322998, + "rewards/margins": 0.10566927492618561, + "rewards/rejected": -0.355658620595932, + "step": 4900 + }, + { + "epoch": 0.32, + "eval_logits/chosen": -0.9126221537590027, + "eval_logits/rejected": -0.7924286127090454, + "eval_logps/chosen": -470.6658020019531, + "eval_logps/rejected": -534.4230346679688, + "eval_loss": 0.6897168159484863, + "eval_rewards/accuracies": 0.6545000076293945, + "eval_rewards/chosen": -0.23866090178489685, + "eval_rewards/margins": 0.0841502919793129, + "eval_rewards/rejected": -0.32281118631362915, + "eval_runtime": 714.6649, + "eval_samples_per_second": 2.799, + "eval_steps_per_second": 1.399, + "step": 4900 + }, + { + "epoch": 0.32, + "learning_rate": 4.290929163662498e-06, + "logits/chosen": -0.6042841672897339, + "logits/rejected": -0.6136349439620972, + "logps/chosen": -481.3103942871094, + "logps/rejected": -511.7767028808594, + "loss": 0.688, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.21157515048980713, + "rewards/margins": 0.08818281441926956, + "rewards/rejected": -0.2997579276561737, + "step": 4910 + }, + { + "epoch": 0.32, + "learning_rate": 4.286940583710796e-06, + "logits/chosen": -1.1480525732040405, + "logits/rejected": -0.9411023259162903, + "logps/chosen": -555.0806884765625, + "logps/rejected": -602.908447265625, + "loss": 0.6909, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.26233208179473877, + "rewards/margins": 0.10445760190486908, + "rewards/rejected": -0.36678972840309143, + "step": 4920 + }, + { + "epoch": 0.32, + "learning_rate": 4.282942682201667e-06, + "logits/chosen": -0.9523464441299438, + "logits/rejected": -0.7611700296401978, + "logps/chosen": -507.7801818847656, + "logps/rejected": -540.0282592773438, + "loss": 0.6916, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.2556723952293396, + "rewards/margins": 0.07183460146188736, + "rewards/rejected": -0.327506959438324, + "step": 4930 + }, + { + "epoch": 0.32, + "learning_rate": 4.278935479990123e-06, + "logits/chosen": -1.2261298894882202, + "logits/rejected": -0.8556804656982422, + "logps/chosen": -438.10955810546875, + "logps/rejected": -459.77496337890625, + "loss": 0.6889, + "rewards/accuracies": 0.4749999940395355, + "rewards/chosen": -0.23344619572162628, + "rewards/margins": 0.06463684141635895, + "rewards/rejected": -0.29808303713798523, + "step": 4940 + }, + { + "epoch": 0.32, + "learning_rate": 4.274918997979695e-06, + "logits/chosen": -1.0745717287063599, + "logits/rejected": -1.095640778541565, + "logps/chosen": -417.48089599609375, + "logps/rejected": -482.902099609375, + "loss": 0.6912, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.22384591400623322, + "rewards/margins": 0.0636858120560646, + "rewards/rejected": -0.28753170371055603, + "step": 4950 + }, + { + "epoch": 0.32, + "learning_rate": 4.270893257122319e-06, + "logits/chosen": -0.9423874020576477, + "logits/rejected": -0.796244740486145, + "logps/chosen": -431.6581115722656, + "logps/rejected": -574.3453369140625, + "loss": 0.6867, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.20293612778186798, + "rewards/margins": 0.11317793279886246, + "rewards/rejected": -0.3161140978336334, + "step": 4960 + }, + { + "epoch": 0.33, + "learning_rate": 4.266858278418232e-06, + "logits/chosen": -0.7076646685600281, + "logits/rejected": -0.749599277973175, + "logps/chosen": -414.93841552734375, + "logps/rejected": -453.1956481933594, + "loss": 0.6886, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.17833426594734192, + "rewards/margins": 0.058608364313840866, + "rewards/rejected": -0.23694264888763428, + "step": 4970 + }, + { + "epoch": 0.33, + "learning_rate": 4.26281408291586e-06, + "logits/chosen": -1.0780795812606812, + "logits/rejected": -0.8412311673164368, + "logps/chosen": -421.68280029296875, + "logps/rejected": -495.57171630859375, + "loss": 0.6892, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.17934545874595642, + "rewards/margins": 0.10342135280370712, + "rewards/rejected": -0.28276684880256653, + "step": 4980 + }, + { + "epoch": 0.33, + "learning_rate": 4.258760691711706e-06, + "logits/chosen": -1.0563790798187256, + "logits/rejected": -0.9225692749023438, + "logps/chosen": -394.8101501464844, + "logps/rejected": -468.16937255859375, + "loss": 0.6888, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.19734862446784973, + "rewards/margins": 0.07795204222202301, + "rewards/rejected": -0.27530068159103394, + "step": 4990 + }, + { + "epoch": 0.33, + "learning_rate": 4.254698125950247e-06, + "logits/chosen": -1.2388908863067627, + "logits/rejected": -1.021576166152954, + "logps/chosen": -485.2925720214844, + "logps/rejected": -496.947265625, + "loss": 0.6915, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.18427804112434387, + "rewards/margins": 0.05832035094499588, + "rewards/rejected": -0.24259838461875916, + "step": 5000 + }, + { + "epoch": 0.33, + "eval_logits/chosen": -0.9265555143356323, + "eval_logits/rejected": -0.8070250153541565, + "eval_logps/chosen": -433.9764404296875, + "eval_logps/rejected": -492.5659484863281, + "eval_loss": 0.6898908615112305, + "eval_rewards/accuracies": 0.6504999995231628, + "eval_rewards/chosen": -0.20197151601314545, + "eval_rewards/margins": 0.07898253947496414, + "eval_rewards/rejected": -0.2809540629386902, + "eval_runtime": 712.1506, + "eval_samples_per_second": 2.808, + "eval_steps_per_second": 1.404, + "step": 5000 + }, + { + "epoch": 0.33, + "learning_rate": 4.250626406823815e-06, + "logits/chosen": -1.112958312034607, + "logits/rejected": -0.8601986765861511, + "logps/chosen": -413.8302307128906, + "logps/rejected": -578.3088989257812, + "loss": 0.6883, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.19696754217147827, + "rewards/margins": 0.1483006477355957, + "rewards/rejected": -0.345268189907074, + "step": 5010 + }, + { + "epoch": 0.33, + "learning_rate": 4.246545555572489e-06, + "logits/chosen": -0.9461210370063782, + "logits/rejected": -0.9753166437149048, + "logps/chosen": -341.7334289550781, + "logps/rejected": -459.89013671875, + "loss": 0.688, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.1875373125076294, + "rewards/margins": 0.09320464730262756, + "rewards/rejected": -0.28074198961257935, + "step": 5020 + }, + { + "epoch": 0.33, + "learning_rate": 4.242455593483992e-06, + "logits/chosen": -1.0627763271331787, + "logits/rejected": -0.887707531452179, + "logps/chosen": -415.20526123046875, + "logps/rejected": -419.7691345214844, + "loss": 0.6918, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.19605137407779694, + "rewards/margins": 0.05427330732345581, + "rewards/rejected": -0.25032466650009155, + "step": 5030 + }, + { + "epoch": 0.33, + "learning_rate": 4.238356541893567e-06, + "logits/chosen": -0.9641706347465515, + "logits/rejected": -0.9521792531013489, + "logps/chosen": -398.6933288574219, + "logps/rejected": -471.68597412109375, + "loss": 0.6893, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.2085602581501007, + "rewards/margins": 0.08853522688150406, + "rewards/rejected": -0.29709547758102417, + "step": 5040 + }, + { + "epoch": 0.33, + "learning_rate": 4.234248422183876e-06, + "logits/chosen": -1.157314658164978, + "logits/rejected": -1.307806134223938, + "logps/chosen": -382.83392333984375, + "logps/rejected": -443.8204650878906, + "loss": 0.6927, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.13989922404289246, + "rewards/margins": 0.06554291397333145, + "rewards/rejected": -0.2054421454668045, + "step": 5050 + }, + { + "epoch": 0.33, + "learning_rate": 4.230131255784884e-06, + "logits/chosen": -1.608673095703125, + "logits/rejected": -1.3411251306533813, + "logps/chosen": -397.7110290527344, + "logps/rejected": -447.5704040527344, + "loss": 0.6883, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.1398279219865799, + "rewards/margins": 0.0686558336019516, + "rewards/rejected": -0.2084837704896927, + "step": 5060 + }, + { + "epoch": 0.33, + "learning_rate": 4.226005064173748e-06, + "logits/chosen": -1.2943363189697266, + "logits/rejected": -1.1875030994415283, + "logps/chosen": -416.7265625, + "logps/rejected": -486.3279724121094, + "loss": 0.6899, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.15208227932453156, + "rewards/margins": 0.05114439129829407, + "rewards/rejected": -0.20322665572166443, + "step": 5070 + }, + { + "epoch": 0.33, + "learning_rate": 4.2218698688747035e-06, + "logits/chosen": -0.8786938786506653, + "logits/rejected": -0.7380436658859253, + "logps/chosen": -452.21990966796875, + "logps/rejected": -479.0074157714844, + "loss": 0.6905, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.21951058506965637, + "rewards/margins": 0.07191769778728485, + "rewards/rejected": -0.29142826795578003, + "step": 5080 + }, + { + "epoch": 0.33, + "learning_rate": 4.217725691458957e-06, + "logits/chosen": -1.255380630493164, + "logits/rejected": -1.0743123292922974, + "logps/chosen": -422.770263671875, + "logps/rejected": -541.0545654296875, + "loss": 0.6877, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.23854613304138184, + "rewards/margins": 0.09133829176425934, + "rewards/rejected": -0.32988446950912476, + "step": 5090 + }, + { + "epoch": 0.33, + "learning_rate": 4.213572553544565e-06, + "logits/chosen": -0.7588680386543274, + "logits/rejected": -0.7646905183792114, + "logps/chosen": -560.6681518554688, + "logps/rejected": -648.8077392578125, + "loss": 0.6903, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.3196962773799896, + "rewards/margins": 0.09550414234399796, + "rewards/rejected": -0.41520047187805176, + "step": 5100 + }, + { + "epoch": 0.33, + "eval_logits/chosen": -0.8067855834960938, + "eval_logits/rejected": -0.6929395794868469, + "eval_logps/chosen": -539.7930908203125, + "eval_logps/rejected": -603.40966796875, + "eval_loss": 0.6898962259292603, + "eval_rewards/accuracies": 0.6539999842643738, + "eval_rewards/chosen": -0.3077881336212158, + "eval_rewards/margins": 0.08400966227054596, + "eval_rewards/rejected": -0.391797810792923, + "eval_runtime": 711.9273, + "eval_samples_per_second": 2.809, + "eval_steps_per_second": 1.405, + "step": 5100 + }, + { + "epoch": 0.33, + "learning_rate": 4.209410476796331e-06, + "logits/chosen": -0.6897755861282349, + "logits/rejected": -0.7003772258758545, + "logps/chosen": -501.85760498046875, + "logps/rejected": -574.7218017578125, + "loss": 0.6883, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.3307662010192871, + "rewards/margins": 0.08285339176654816, + "rewards/rejected": -0.41361960768699646, + "step": 5110 + }, + { + "epoch": 0.33, + "learning_rate": 4.205239482925686e-06, + "logits/chosen": -0.8003614544868469, + "logits/rejected": -0.7187783718109131, + "logps/chosen": -451.90130615234375, + "logps/rejected": -535.4197998046875, + "loss": 0.6902, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.264085978269577, + "rewards/margins": 0.06139212101697922, + "rewards/rejected": -0.32547810673713684, + "step": 5120 + }, + { + "epoch": 0.34, + "learning_rate": 4.201059593690577e-06, + "logits/chosen": -1.1568797826766968, + "logits/rejected": -1.0991196632385254, + "logps/chosen": -490.71771240234375, + "logps/rejected": -528.8479614257812, + "loss": 0.6904, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.2651299834251404, + "rewards/margins": 0.06355933845043182, + "rewards/rejected": -0.328689306974411, + "step": 5130 + }, + { + "epoch": 0.34, + "learning_rate": 4.196870830895354e-06, + "logits/chosen": -0.9000298380851746, + "logits/rejected": -0.8038280606269836, + "logps/chosen": -490.931884765625, + "logps/rejected": -603.1361694335938, + "loss": 0.6909, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.2331843078136444, + "rewards/margins": 0.06269694119691849, + "rewards/rejected": -0.2958812415599823, + "step": 5140 + }, + { + "epoch": 0.34, + "learning_rate": 4.192673216390657e-06, + "logits/chosen": -0.9874833822250366, + "logits/rejected": -0.7820544242858887, + "logps/chosen": -462.16741943359375, + "logps/rejected": -508.98223876953125, + "loss": 0.6882, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.2291944921016693, + "rewards/margins": 0.08514241129159927, + "rewards/rejected": -0.3143369257450104, + "step": 5150 + }, + { + "epoch": 0.34, + "learning_rate": 4.188466772073296e-06, + "logits/chosen": -0.9633606672286987, + "logits/rejected": -0.9231443405151367, + "logps/chosen": -481.64093017578125, + "logps/rejected": -504.0276794433594, + "loss": 0.6904, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.2611164152622223, + "rewards/margins": 0.04314836859703064, + "rewards/rejected": -0.30426478385925293, + "step": 5160 + }, + { + "epoch": 0.34, + "learning_rate": 4.184251519886148e-06, + "logits/chosen": -0.7404162287712097, + "logits/rejected": -0.5939242243766785, + "logps/chosen": -542.5810546875, + "logps/rejected": -650.72705078125, + "loss": 0.689, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.3464735448360443, + "rewards/margins": 0.07949092984199524, + "rewards/rejected": -0.42596450448036194, + "step": 5170 + }, + { + "epoch": 0.34, + "learning_rate": 4.180027481818033e-06, + "logits/chosen": -0.7048271894454956, + "logits/rejected": -0.8187531232833862, + "logps/chosen": -608.216552734375, + "logps/rejected": -635.06640625, + "loss": 0.6902, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.34151846170425415, + "rewards/margins": 0.0646834746003151, + "rewards/rejected": -0.40620189905166626, + "step": 5180 + }, + { + "epoch": 0.34, + "learning_rate": 4.175794679903602e-06, + "logits/chosen": -0.46474045515060425, + "logits/rejected": -0.3354906439781189, + "logps/chosen": -589.6353759765625, + "logps/rejected": -601.679443359375, + "loss": 0.6924, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.36199450492858887, + "rewards/margins": 0.08440116792917252, + "rewards/rejected": -0.4463956952095032, + "step": 5190 + }, + { + "epoch": 0.34, + "learning_rate": 4.171553136223222e-06, + "logits/chosen": -0.686112642288208, + "logits/rejected": -0.4601070284843445, + "logps/chosen": -673.9461669921875, + "logps/rejected": -799.9554443359375, + "loss": 0.6889, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.406625360250473, + "rewards/margins": 0.11529938876628876, + "rewards/rejected": -0.5219247341156006, + "step": 5200 + }, + { + "epoch": 0.34, + "eval_logits/chosen": -0.33751627802848816, + "eval_logits/rejected": -0.25623294711112976, + "eval_logps/chosen": -600.0008544921875, + "eval_logps/rejected": -653.9317626953125, + "eval_loss": 0.6899478435516357, + "eval_rewards/accuracies": 0.6549999713897705, + "eval_rewards/chosen": -0.3679959177970886, + "eval_rewards/margins": 0.07432392239570618, + "eval_rewards/rejected": -0.4423198103904724, + "eval_runtime": 709.8614, + "eval_samples_per_second": 2.817, + "eval_steps_per_second": 1.409, + "step": 5200 + }, + { + "epoch": 0.34, + "learning_rate": 4.167302872902865e-06, + "logits/chosen": -0.4962449073791504, + "logits/rejected": -0.06477615237236023, + "logps/chosen": -634.7276000976562, + "logps/rejected": -722.2471923828125, + "loss": 0.6884, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.3832593560218811, + "rewards/margins": 0.0994577631354332, + "rewards/rejected": -0.4827171862125397, + "step": 5210 + }, + { + "epoch": 0.34, + "learning_rate": 4.163043912113985e-06, + "logits/chosen": -0.5165904760360718, + "logits/rejected": -0.3449627161026001, + "logps/chosen": -581.4984741210938, + "logps/rejected": -603.272216796875, + "loss": 0.6904, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.32245832681655884, + "rewards/margins": 0.055768370628356934, + "rewards/rejected": -0.37822669744491577, + "step": 5220 + }, + { + "epoch": 0.34, + "learning_rate": 4.15877627607341e-06, + "logits/chosen": -0.4022120535373688, + "logits/rejected": -0.07796867191791534, + "logps/chosen": -487.5970153808594, + "logps/rejected": -519.3673095703125, + "loss": 0.6923, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.2710022032260895, + "rewards/margins": 0.05599268153309822, + "rewards/rejected": -0.3269948959350586, + "step": 5230 + }, + { + "epoch": 0.34, + "learning_rate": 4.154499987043217e-06, + "logits/chosen": -0.6665756106376648, + "logits/rejected": -0.5083428621292114, + "logps/chosen": -463.33721923828125, + "logps/rejected": -551.2430419921875, + "loss": 0.6886, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.23913367092609406, + "rewards/margins": 0.11008558422327042, + "rewards/rejected": -0.34921926259994507, + "step": 5240 + }, + { + "epoch": 0.34, + "learning_rate": 4.150215067330625e-06, + "logits/chosen": -0.5539819598197937, + "logits/rejected": -0.2385900914669037, + "logps/chosen": -438.7664489746094, + "logps/rejected": -541.8282470703125, + "loss": 0.6892, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.22971764206886292, + "rewards/margins": 0.08902446925640106, + "rewards/rejected": -0.3187420964241028, + "step": 5250 + }, + { + "epoch": 0.34, + "learning_rate": 4.145921539287876e-06, + "logits/chosen": -0.4234296679496765, + "logits/rejected": -0.4965476989746094, + "logps/chosen": -424.6368713378906, + "logps/rejected": -521.614013671875, + "loss": 0.6893, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.23404428362846375, + "rewards/margins": 0.11698039621114731, + "rewards/rejected": -0.35102465748786926, + "step": 5260 + }, + { + "epoch": 0.34, + "learning_rate": 4.141619425312115e-06, + "logits/chosen": -0.5069370269775391, + "logits/rejected": -0.24971413612365723, + "logps/chosen": -435.09051513671875, + "logps/rejected": -454.61328125, + "loss": 0.6914, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -0.22643926739692688, + "rewards/margins": 0.043964650481939316, + "rewards/rejected": -0.2704039216041565, + "step": 5270 + }, + { + "epoch": 0.35, + "learning_rate": 4.1373087478452735e-06, + "logits/chosen": -0.49892836809158325, + "logits/rejected": -0.5289067625999451, + "logps/chosen": -421.7427673339844, + "logps/rejected": -494.67437744140625, + "loss": 0.6857, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.1965021938085556, + "rewards/margins": 0.12754546105861664, + "rewards/rejected": -0.32404765486717224, + "step": 5280 + }, + { + "epoch": 0.35, + "learning_rate": 4.132989529373959e-06, + "logits/chosen": -0.6782785058021545, + "logits/rejected": -0.5537897944450378, + "logps/chosen": -493.59088134765625, + "logps/rejected": -491.10858154296875, + "loss": 0.6892, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.23767805099487305, + "rewards/margins": 0.07651616632938385, + "rewards/rejected": -0.3141942322254181, + "step": 5290 + }, + { + "epoch": 0.35, + "learning_rate": 4.128661792429331e-06, + "logits/chosen": -0.6442875266075134, + "logits/rejected": -0.5757584571838379, + "logps/chosen": -471.60137939453125, + "logps/rejected": -537.8292846679688, + "loss": 0.6925, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.2178308069705963, + "rewards/margins": 0.06133151799440384, + "rewards/rejected": -0.27916234731674194, + "step": 5300 + }, + { + "epoch": 0.35, + "eval_logits/chosen": -0.8017680048942566, + "eval_logits/rejected": -0.6898171305656433, + "eval_logps/chosen": -452.794677734375, + "eval_logps/rejected": -513.436767578125, + "eval_loss": 0.6897254586219788, + "eval_rewards/accuracies": 0.6604999899864197, + "eval_rewards/chosen": -0.22078973054885864, + "eval_rewards/margins": 0.08103515952825546, + "eval_rewards/rejected": -0.3018249273300171, + "eval_runtime": 709.4677, + "eval_samples_per_second": 2.819, + "eval_steps_per_second": 1.41, + "step": 5300 + }, + { + "epoch": 0.35, + "learning_rate": 4.124325559586985e-06, + "logits/chosen": -0.8358646631240845, + "logits/rejected": -0.6630481481552124, + "logps/chosen": -414.3356018066406, + "logps/rejected": -449.273193359375, + "loss": 0.6926, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.2262789011001587, + "rewards/margins": 0.022013569250702858, + "rewards/rejected": -0.2482924908399582, + "step": 5310 + }, + { + "epoch": 0.35, + "learning_rate": 4.119980853466835e-06, + "logits/chosen": -0.6544975638389587, + "logits/rejected": -0.30424556136131287, + "logps/chosen": -454.3627014160156, + "logps/rejected": -526.572265625, + "loss": 0.6887, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.24520829319953918, + "rewards/margins": 0.0993238165974617, + "rewards/rejected": -0.3445320725440979, + "step": 5320 + }, + { + "epoch": 0.35, + "learning_rate": 4.115627696732997e-06, + "logits/chosen": -0.5331336259841919, + "logits/rejected": -0.47379574179649353, + "logps/chosen": -413.2391662597656, + "logps/rejected": -478.57818603515625, + "loss": 0.6905, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.22182488441467285, + "rewards/margins": 0.08465997874736786, + "rewards/rejected": -0.3064848780632019, + "step": 5330 + }, + { + "epoch": 0.35, + "learning_rate": 4.111266112093668e-06, + "logits/chosen": -0.7296448349952698, + "logits/rejected": -0.6237698793411255, + "logps/chosen": -479.72686767578125, + "logps/rejected": -602.6467895507812, + "loss": 0.6895, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.27625924348831177, + "rewards/margins": 0.10452202707529068, + "rewards/rejected": -0.38078123331069946, + "step": 5340 + }, + { + "epoch": 0.35, + "learning_rate": 4.1068961223010115e-06, + "logits/chosen": -0.9738900065422058, + "logits/rejected": -0.5101861953735352, + "logps/chosen": -525.4327392578125, + "logps/rejected": -602.5348510742188, + "loss": 0.6873, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.25473204255104065, + "rewards/margins": 0.09977851063013077, + "rewards/rejected": -0.3545105457305908, + "step": 5350 + }, + { + "epoch": 0.35, + "learning_rate": 4.102517750151034e-06, + "logits/chosen": -1.0094826221466064, + "logits/rejected": -0.7825483083724976, + "logps/chosen": -503.8395080566406, + "logps/rejected": -478.79833984375, + "loss": 0.6911, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.20879462361335754, + "rewards/margins": 0.04627186059951782, + "rewards/rejected": -0.25506648421287537, + "step": 5360 + }, + { + "epoch": 0.35, + "learning_rate": 4.09813101848347e-06, + "logits/chosen": -1.1168756484985352, + "logits/rejected": -0.8798100352287292, + "logps/chosen": -410.56915283203125, + "logps/rejected": -495.3880310058594, + "loss": 0.6931, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.19158323109149933, + "rewards/margins": 0.0676647424697876, + "rewards/rejected": -0.25924795866012573, + "step": 5370 + }, + { + "epoch": 0.35, + "learning_rate": 4.093735950181659e-06, + "logits/chosen": -1.0186705589294434, + "logits/rejected": -0.8906763792037964, + "logps/chosen": -394.7282409667969, + "logps/rejected": -509.34344482421875, + "loss": 0.6883, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.169254869222641, + "rewards/margins": 0.09007195383310318, + "rewards/rejected": -0.2593268156051636, + "step": 5380 + }, + { + "epoch": 0.35, + "learning_rate": 4.0893325681724326e-06, + "logits/chosen": -1.2295897006988525, + "logits/rejected": -1.1489160060882568, + "logps/chosen": -473.47247314453125, + "logps/rejected": -537.0003051757812, + "loss": 0.6906, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.2150508463382721, + "rewards/margins": 0.07580719143152237, + "rewards/rejected": -0.2908580005168915, + "step": 5390 + }, + { + "epoch": 0.35, + "learning_rate": 4.084920895425988e-06, + "logits/chosen": -1.0479966402053833, + "logits/rejected": -0.8871825337409973, + "logps/chosen": -483.7577209472656, + "logps/rejected": -567.1939697265625, + "loss": 0.6883, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.2463931292295456, + "rewards/margins": 0.06979132443666458, + "rewards/rejected": -0.31618446111679077, + "step": 5400 + }, + { + "epoch": 0.35, + "eval_logits/chosen": -0.9780447483062744, + "eval_logits/rejected": -0.8575934171676636, + "eval_logps/chosen": -446.1243896484375, + "eval_logps/rejected": -496.64959716796875, + "eval_loss": 0.6898158192634583, + "eval_rewards/accuracies": 0.6589999794960022, + "eval_rewards/chosen": -0.21411941945552826, + "eval_rewards/margins": 0.07091830670833588, + "eval_rewards/rejected": -0.28503772616386414, + "eval_runtime": 713.6779, + "eval_samples_per_second": 2.802, + "eval_steps_per_second": 1.401, + "step": 5400 + }, + { + "epoch": 0.35, + "learning_rate": 4.080500954955769e-06, + "logits/chosen": -0.8831078410148621, + "logits/rejected": -0.783626139163971, + "logps/chosen": -485.46875, + "logps/rejected": -540.5457763671875, + "loss": 0.6903, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.2295968234539032, + "rewards/margins": 0.07336324453353882, + "rewards/rejected": -0.302960067987442, + "step": 5410 + }, + { + "epoch": 0.35, + "learning_rate": 4.076072769818354e-06, + "logits/chosen": -1.2416795492172241, + "logits/rejected": -0.9977342486381531, + "logps/chosen": -439.8389587402344, + "logps/rejected": -449.51629638671875, + "loss": 0.6904, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.19611665606498718, + "rewards/margins": 0.07164300978183746, + "rewards/rejected": -0.26775965094566345, + "step": 5420 + }, + { + "epoch": 0.36, + "learning_rate": 4.071636363113323e-06, + "logits/chosen": -0.6604939103126526, + "logits/rejected": -0.5699220895767212, + "logps/chosen": -478.573974609375, + "logps/rejected": -478.38104248046875, + "loss": 0.69, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.2104438990354538, + "rewards/margins": 0.053849607706069946, + "rewards/rejected": -0.26429352164268494, + "step": 5430 + }, + { + "epoch": 0.36, + "learning_rate": 4.067191757983146e-06, + "logits/chosen": -0.6711565852165222, + "logits/rejected": -0.5477440357208252, + "logps/chosen": -479.72869873046875, + "logps/rejected": -588.8829956054688, + "loss": 0.6879, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.2501530349254608, + "rewards/margins": 0.11236833035945892, + "rewards/rejected": -0.36252138018608093, + "step": 5440 + }, + { + "epoch": 0.36, + "learning_rate": 4.062738977613063e-06, + "logits/chosen": -0.45283088088035583, + "logits/rejected": -0.5415789484977722, + "logps/chosen": -490.650634765625, + "logps/rejected": -506.84930419921875, + "loss": 0.6897, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.25617435574531555, + "rewards/margins": 0.06459192931652069, + "rewards/rejected": -0.32076629996299744, + "step": 5450 + }, + { + "epoch": 0.36, + "learning_rate": 4.058278045230957e-06, + "logits/chosen": -0.9490770101547241, + "logits/rejected": -0.9275020360946655, + "logps/chosen": -513.1905517578125, + "logps/rejected": -554.4321899414062, + "loss": 0.6926, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.29482370615005493, + "rewards/margins": 0.03965403884649277, + "rewards/rejected": -0.3344777524471283, + "step": 5460 + }, + { + "epoch": 0.36, + "learning_rate": 4.053808984107235e-06, + "logits/chosen": -0.8835921287536621, + "logits/rejected": -0.7733657360076904, + "logps/chosen": -487.41632080078125, + "logps/rejected": -488.1482849121094, + "loss": 0.6923, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.25454801321029663, + "rewards/margins": 0.04102517291903496, + "rewards/rejected": -0.2955731749534607, + "step": 5470 + }, + { + "epoch": 0.36, + "learning_rate": 4.04933181755471e-06, + "logits/chosen": -0.8765344619750977, + "logits/rejected": -0.8720951080322266, + "logps/chosen": -470.0048828125, + "logps/rejected": -541.0588989257812, + "loss": 0.6898, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.2634243369102478, + "rewards/margins": 0.07826650887727737, + "rewards/rejected": -0.3416908383369446, + "step": 5480 + }, + { + "epoch": 0.36, + "learning_rate": 4.044846568928477e-06, + "logits/chosen": -1.1567533016204834, + "logits/rejected": -1.0573335886001587, + "logps/chosen": -502.82281494140625, + "logps/rejected": -555.1135864257812, + "loss": 0.691, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.2412986308336258, + "rewards/margins": 0.06114867329597473, + "rewards/rejected": -0.30244728922843933, + "step": 5490 + }, + { + "epoch": 0.36, + "learning_rate": 4.040353261625788e-06, + "logits/chosen": -1.226211667060852, + "logits/rejected": -0.725497305393219, + "logps/chosen": -487.32177734375, + "logps/rejected": -539.41943359375, + "loss": 0.6885, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.2107764482498169, + "rewards/margins": 0.090728260576725, + "rewards/rejected": -0.3015047311782837, + "step": 5500 + }, + { + "epoch": 0.36, + "eval_logits/chosen": -0.9162071943283081, + "eval_logits/rejected": -0.8003301024436951, + "eval_logps/chosen": -444.0844421386719, + "eval_logps/rejected": -493.766357421875, + "eval_loss": 0.6899505257606506, + "eval_rewards/accuracies": 0.6549999713897705, + "eval_rewards/chosen": -0.2120795100927353, + "eval_rewards/margins": 0.07007495313882828, + "eval_rewards/rejected": -0.2821544110774994, + "eval_runtime": 710.8775, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 1.407, + "step": 5500 + }, + { + "epoch": 0.36, + "learning_rate": 4.035851919085936e-06, + "logits/chosen": -1.0080921649932861, + "logits/rejected": -0.7228280305862427, + "logps/chosen": -507.87493896484375, + "logps/rejected": -510.32391357421875, + "loss": 0.6879, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.24048081040382385, + "rewards/margins": 0.08116090297698975, + "rewards/rejected": -0.3216416835784912, + "step": 5510 + }, + { + "epoch": 0.36, + "learning_rate": 4.031342564790128e-06, + "logits/chosen": -0.9639323353767395, + "logits/rejected": -0.7297991514205933, + "logps/chosen": -411.42010498046875, + "logps/rejected": -510.60272216796875, + "loss": 0.6873, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.2054484337568283, + "rewards/margins": 0.10395534336566925, + "rewards/rejected": -0.30940383672714233, + "step": 5520 + }, + { + "epoch": 0.36, + "learning_rate": 4.026825222261367e-06, + "logits/chosen": -0.5591684579849243, + "logits/rejected": -0.3978855013847351, + "logps/chosen": -484.2080993652344, + "logps/rejected": -536.934326171875, + "loss": 0.6915, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.31113308668136597, + "rewards/margins": 0.059869349002838135, + "rewards/rejected": -0.3710024058818817, + "step": 5530 + }, + { + "epoch": 0.36, + "learning_rate": 4.022299915064321e-06, + "logits/chosen": -0.7978732585906982, + "logits/rejected": -0.5802344679832458, + "logps/chosen": -589.16455078125, + "logps/rejected": -607.4036865234375, + "loss": 0.6912, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.28331655263900757, + "rewards/margins": 0.07134465873241425, + "rewards/rejected": -0.354661226272583, + "step": 5540 + }, + { + "epoch": 0.36, + "learning_rate": 4.017766666805213e-06, + "logits/chosen": -0.6884918808937073, + "logits/rejected": -0.4019128382205963, + "logps/chosen": -525.5811767578125, + "logps/rejected": -562.6389770507812, + "loss": 0.6887, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.31206080317497253, + "rewards/margins": 0.07241939008235931, + "rewards/rejected": -0.38448017835617065, + "step": 5550 + }, + { + "epoch": 0.36, + "learning_rate": 4.013225501131684e-06, + "logits/chosen": -0.6882126927375793, + "logits/rejected": -0.44790735840797424, + "logps/chosen": -526.3818969726562, + "logps/rejected": -556.5396728515625, + "loss": 0.6907, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.3090043067932129, + "rewards/margins": 0.05713053420186043, + "rewards/rejected": -0.3661348521709442, + "step": 5560 + }, + { + "epoch": 0.36, + "learning_rate": 4.008676441732679e-06, + "logits/chosen": -0.35928666591644287, + "logits/rejected": -0.21741943061351776, + "logps/chosen": -535.5043334960938, + "logps/rejected": -553.3346557617188, + "loss": 0.6908, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.32920995354652405, + "rewards/margins": 0.06583123654127121, + "rewards/rejected": -0.39504122734069824, + "step": 5570 + }, + { + "epoch": 0.37, + "learning_rate": 4.00411951233832e-06, + "logits/chosen": -0.6145802736282349, + "logits/rejected": -0.5013601183891296, + "logps/chosen": -569.8782958984375, + "logps/rejected": -626.9734497070312, + "loss": 0.6878, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.3501531183719635, + "rewards/margins": 0.10176874697208405, + "rewards/rejected": -0.45192185044288635, + "step": 5580 + }, + { + "epoch": 0.37, + "learning_rate": 3.999554736719785e-06, + "logits/chosen": -0.47634387016296387, + "logits/rejected": -0.47885531187057495, + "logps/chosen": -616.468017578125, + "logps/rejected": -658.5233154296875, + "loss": 0.6899, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.3244911730289459, + "rewards/margins": 0.08557327091693878, + "rewards/rejected": -0.4100644588470459, + "step": 5590 + }, + { + "epoch": 0.37, + "learning_rate": 3.994982138689177e-06, + "logits/chosen": -1.1665931940078735, + "logits/rejected": -0.8447322845458984, + "logps/chosen": -524.7276000976562, + "logps/rejected": -590.0232543945312, + "loss": 0.6896, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.28732746839523315, + "rewards/margins": 0.06714684516191483, + "rewards/rejected": -0.35447433590888977, + "step": 5600 + }, + { + "epoch": 0.37, + "eval_logits/chosen": -0.7256837487220764, + "eval_logits/rejected": -0.6177822351455688, + "eval_logps/chosen": -536.2576904296875, + "eval_logps/rejected": -598.6423950195312, + "eval_loss": 0.6898110508918762, + "eval_rewards/accuracies": 0.6570000052452087, + "eval_rewards/chosen": -0.3042526841163635, + "eval_rewards/margins": 0.08277777582406998, + "eval_rewards/rejected": -0.3870304524898529, + "eval_runtime": 711.2848, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 1.406, + "step": 5600 + }, + { + "epoch": 0.37, + "learning_rate": 3.990401742099408e-06, + "logits/chosen": -0.596218466758728, + "logits/rejected": -0.6072180867195129, + "logps/chosen": -435.23553466796875, + "logps/rejected": -483.957763671875, + "loss": 0.6912, + "rewards/accuracies": 0.4749999940395355, + "rewards/chosen": -0.25637245178222656, + "rewards/margins": 0.05607098340988159, + "rewards/rejected": -0.31244343519210815, + "step": 5610 + }, + { + "epoch": 0.37, + "learning_rate": 3.985813570844072e-06, + "logits/chosen": -0.9559303522109985, + "logits/rejected": -0.8587062954902649, + "logps/chosen": -574.4474487304688, + "logps/rejected": -632.5167236328125, + "loss": 0.6893, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.280060738325119, + "rewards/margins": 0.08562088757753372, + "rewards/rejected": -0.36568158864974976, + "step": 5620 + }, + { + "epoch": 0.37, + "learning_rate": 3.981217648857316e-06, + "logits/chosen": -0.8603259325027466, + "logits/rejected": -0.8051680326461792, + "logps/chosen": -411.08636474609375, + "logps/rejected": -505.117431640625, + "loss": 0.6892, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.23854593932628632, + "rewards/margins": 0.09224925190210342, + "rewards/rejected": -0.33079519867897034, + "step": 5630 + }, + { + "epoch": 0.37, + "learning_rate": 3.97661400011372e-06, + "logits/chosen": -0.8633445501327515, + "logits/rejected": -0.9625928997993469, + "logps/chosen": -488.6290588378906, + "logps/rejected": -523.1886596679688, + "loss": 0.6925, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.24560177326202393, + "rewards/margins": 0.04598253220319748, + "rewards/rejected": -0.291584312915802, + "step": 5640 + }, + { + "epoch": 0.37, + "learning_rate": 3.972002648628174e-06, + "logits/chosen": -0.9786394834518433, + "logits/rejected": -0.8444933891296387, + "logps/chosen": -515.1180419921875, + "logps/rejected": -510.667724609375, + "loss": 0.6921, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.23808720707893372, + "rewards/margins": 0.04324156790971756, + "rewards/rejected": -0.2813287377357483, + "step": 5650 + }, + { + "epoch": 0.37, + "learning_rate": 3.967383618455743e-06, + "logits/chosen": -0.9421932101249695, + "logits/rejected": -1.0383517742156982, + "logps/chosen": -491.4378356933594, + "logps/rejected": -569.2808837890625, + "loss": 0.6897, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.2643181085586548, + "rewards/margins": 0.05965255945920944, + "rewards/rejected": -0.3239706754684448, + "step": 5660 + }, + { + "epoch": 0.37, + "learning_rate": 3.9627569336915515e-06, + "logits/chosen": -1.2176904678344727, + "logits/rejected": -1.0065767765045166, + "logps/chosen": -496.88671875, + "logps/rejected": -538.030029296875, + "loss": 0.6881, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.24942393600940704, + "rewards/margins": 0.0978199765086174, + "rewards/rejected": -0.34724390506744385, + "step": 5670 + }, + { + "epoch": 0.37, + "learning_rate": 3.9581226184706555e-06, + "logits/chosen": -1.0491501092910767, + "logits/rejected": -1.2479287385940552, + "logps/chosen": -477.5037536621094, + "logps/rejected": -624.1370849609375, + "loss": 0.6893, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.2828384041786194, + "rewards/margins": 0.07593565434217453, + "rewards/rejected": -0.3587740659713745, + "step": 5680 + }, + { + "epoch": 0.37, + "learning_rate": 3.953480696967912e-06, + "logits/chosen": -0.6281255483627319, + "logits/rejected": -0.7220240831375122, + "logps/chosen": -541.6065063476562, + "logps/rejected": -634.47216796875, + "loss": 0.6914, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.3322349190711975, + "rewards/margins": 0.0564199797809124, + "rewards/rejected": -0.388654887676239, + "step": 5690 + }, + { + "epoch": 0.37, + "learning_rate": 3.948831193397857e-06, + "logits/chosen": -0.6184535622596741, + "logits/rejected": -0.6359222531318665, + "logps/chosen": -447.3412170410156, + "logps/rejected": -531.6007080078125, + "loss": 0.6898, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.2813258171081543, + "rewards/margins": 0.0784335732460022, + "rewards/rejected": -0.3597593903541565, + "step": 5700 + }, + { + "epoch": 0.37, + "eval_logits/chosen": -0.7838326692581177, + "eval_logits/rejected": -0.6697111129760742, + "eval_logps/chosen": -537.5149536132812, + "eval_logps/rejected": -606.8261108398438, + "eval_loss": 0.6897205710411072, + "eval_rewards/accuracies": 0.6549999713897705, + "eval_rewards/chosen": -0.305510014295578, + "eval_rewards/margins": 0.08970417827367783, + "eval_rewards/rejected": -0.39521417021751404, + "eval_runtime": 712.2826, + "eval_samples_per_second": 2.808, + "eval_steps_per_second": 1.404, + "step": 5700 + }, + { + "epoch": 0.37, + "learning_rate": 3.94417413201458e-06, + "logits/chosen": -1.0492521524429321, + "logits/rejected": -0.7466319799423218, + "logps/chosen": -454.7074279785156, + "logps/rejected": -513.38134765625, + "loss": 0.69, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.2431359589099884, + "rewards/margins": 0.07644067704677582, + "rewards/rejected": -0.3195766806602478, + "step": 5710 + }, + { + "epoch": 0.37, + "learning_rate": 3.9395095371115935e-06, + "logits/chosen": -1.0224096775054932, + "logits/rejected": -0.9557268023490906, + "logps/chosen": -423.9097595214844, + "logps/rejected": -488.0465393066406, + "loss": 0.6883, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.21125265955924988, + "rewards/margins": 0.07474798709154129, + "rewards/rejected": -0.28600066900253296, + "step": 5720 + }, + { + "epoch": 0.37, + "learning_rate": 3.93483743302171e-06, + "logits/chosen": -1.046775221824646, + "logits/rejected": -0.7583211064338684, + "logps/chosen": -422.81829833984375, + "logps/rejected": -471.67572021484375, + "loss": 0.6892, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.21002662181854248, + "rewards/margins": 0.07259279489517212, + "rewards/rejected": -0.282619446516037, + "step": 5730 + }, + { + "epoch": 0.38, + "learning_rate": 3.930157844116913e-06, + "logits/chosen": -0.9095266461372375, + "logits/rejected": -0.6122316122055054, + "logps/chosen": -429.01556396484375, + "logps/rejected": -483.1982421875, + "loss": 0.6897, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.21925830841064453, + "rewards/margins": 0.06890133768320084, + "rewards/rejected": -0.28815966844558716, + "step": 5740 + }, + { + "epoch": 0.38, + "learning_rate": 3.925470794808229e-06, + "logits/chosen": -0.7558301091194153, + "logits/rejected": -0.6763758659362793, + "logps/chosen": -495.16259765625, + "logps/rejected": -552.1781005859375, + "loss": 0.6896, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.25617489218711853, + "rewards/margins": 0.08983282744884491, + "rewards/rejected": -0.346007764339447, + "step": 5750 + }, + { + "epoch": 0.38, + "learning_rate": 3.920776309545606e-06, + "logits/chosen": -1.1489700078964233, + "logits/rejected": -0.9279036521911621, + "logps/chosen": -312.0481872558594, + "logps/rejected": -378.68585205078125, + "loss": 0.69, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -0.15934118628501892, + "rewards/margins": 0.07347016036510468, + "rewards/rejected": -0.2328113615512848, + "step": 5760 + }, + { + "epoch": 0.38, + "learning_rate": 3.916074412817778e-06, + "logits/chosen": -0.9631127119064331, + "logits/rejected": -0.6907010078430176, + "logps/chosen": -439.86724853515625, + "logps/rejected": -540.8177490234375, + "loss": 0.6878, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.1965332329273224, + "rewards/margins": 0.10042830556631088, + "rewards/rejected": -0.29696157574653625, + "step": 5770 + }, + { + "epoch": 0.38, + "learning_rate": 3.911365129152139e-06, + "logits/chosen": -0.8396314382553101, + "logits/rejected": -0.7624481916427612, + "logps/chosen": -462.0514221191406, + "logps/rejected": -539.0438232421875, + "loss": 0.6897, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.22961759567260742, + "rewards/margins": 0.08253227919340134, + "rewards/rejected": -0.31214988231658936, + "step": 5780 + }, + { + "epoch": 0.38, + "learning_rate": 3.906648483114623e-06, + "logits/chosen": -0.6250481605529785, + "logits/rejected": -0.40239137411117554, + "logps/chosen": -438.18963623046875, + "logps/rejected": -515.53662109375, + "loss": 0.687, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.2408660352230072, + "rewards/margins": 0.10993202030658722, + "rewards/rejected": -0.3507980704307556, + "step": 5790 + }, + { + "epoch": 0.38, + "learning_rate": 3.901924499309564e-06, + "logits/chosen": -0.30131223797798157, + "logits/rejected": -0.2699768841266632, + "logps/chosen": -511.6910095214844, + "logps/rejected": -577.87060546875, + "loss": 0.6894, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.2829861044883728, + "rewards/margins": 0.0987856313586235, + "rewards/rejected": -0.3817717134952545, + "step": 5800 + }, + { + "epoch": 0.38, + "eval_logits/chosen": -0.3526945114135742, + "eval_logits/rejected": -0.2645464539527893, + "eval_logps/chosen": -553.59912109375, + "eval_logps/rejected": -630.135498046875, + "eval_loss": 0.6897721290588379, + "eval_rewards/accuracies": 0.6524999737739563, + "eval_rewards/chosen": -0.3215942084789276, + "eval_rewards/margins": 0.09692942351102829, + "eval_rewards/rejected": -0.4185236096382141, + "eval_runtime": 709.8582, + "eval_samples_per_second": 2.817, + "eval_steps_per_second": 1.409, + "step": 5800 + }, + { + "epoch": 0.38, + "learning_rate": 3.897193202379575e-06, + "logits/chosen": -0.511224091053009, + "logits/rejected": -0.2985347509384155, + "logps/chosen": -489.2774353027344, + "logps/rejected": -567.6716918945312, + "loss": 0.6891, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.2887997329235077, + "rewards/margins": 0.09536701440811157, + "rewards/rejected": -0.3841667175292969, + "step": 5810 + }, + { + "epoch": 0.38, + "learning_rate": 3.8924546170054215e-06, + "logits/chosen": -0.6443430185317993, + "logits/rejected": -0.4279851019382477, + "logps/chosen": -457.66064453125, + "logps/rejected": -509.69818115234375, + "loss": 0.6913, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.2417530119419098, + "rewards/margins": 0.07151090353727341, + "rewards/rejected": -0.3132639229297638, + "step": 5820 + }, + { + "epoch": 0.38, + "learning_rate": 3.887708767905883e-06, + "logits/chosen": -0.9861922264099121, + "logits/rejected": -0.7905910015106201, + "logps/chosen": -462.68408203125, + "logps/rejected": -462.4227600097656, + "loss": 0.6915, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.2189168483018875, + "rewards/margins": 0.06558167934417725, + "rewards/rejected": -0.28449851274490356, + "step": 5830 + }, + { + "epoch": 0.38, + "learning_rate": 3.882955679837636e-06, + "logits/chosen": -0.8170859217643738, + "logits/rejected": -0.9146119356155396, + "logps/chosen": -450.39617919921875, + "logps/rejected": -506.41033935546875, + "loss": 0.6914, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.21101923286914825, + "rewards/margins": 0.05363879352807999, + "rewards/rejected": -0.26465800404548645, + "step": 5840 + }, + { + "epoch": 0.38, + "learning_rate": 3.878195377595113e-06, + "logits/chosen": -0.7225337028503418, + "logits/rejected": -0.6112938523292542, + "logps/chosen": -427.2122497558594, + "logps/rejected": -513.8952026367188, + "loss": 0.69, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.19130650162696838, + "rewards/margins": 0.09224653244018555, + "rewards/rejected": -0.28355303406715393, + "step": 5850 + }, + { + "epoch": 0.38, + "learning_rate": 3.873427886010384e-06, + "logits/chosen": -0.8298704028129578, + "logits/rejected": -0.5080928206443787, + "logps/chosen": -387.6654357910156, + "logps/rejected": -454.1085510253906, + "loss": 0.6886, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.19721348583698273, + "rewards/margins": 0.08553223311901093, + "rewards/rejected": -0.28274568915367126, + "step": 5860 + }, + { + "epoch": 0.38, + "learning_rate": 3.868653229953021e-06, + "logits/chosen": -0.8196694254875183, + "logits/rejected": -0.6232892274856567, + "logps/chosen": -457.27911376953125, + "logps/rejected": -562.1744384765625, + "loss": 0.6879, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.22006423771381378, + "rewards/margins": 0.10695929825305939, + "rewards/rejected": -0.3270235061645508, + "step": 5870 + }, + { + "epoch": 0.38, + "learning_rate": 3.8638714343299675e-06, + "logits/chosen": -0.8010458946228027, + "logits/rejected": -0.6639386415481567, + "logps/chosen": -433.7034606933594, + "logps/rejected": -544.5938720703125, + "loss": 0.6878, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.21536073088645935, + "rewards/margins": 0.09577467292547226, + "rewards/rejected": -0.3111354112625122, + "step": 5880 + }, + { + "epoch": 0.39, + "learning_rate": 3.859082524085414e-06, + "logits/chosen": -0.6064971685409546, + "logits/rejected": -0.4940188527107239, + "logps/chosen": -530.43115234375, + "logps/rejected": -537.8308715820312, + "loss": 0.6898, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.25808802247047424, + "rewards/margins": 0.06763879209756851, + "rewards/rejected": -0.32572680711746216, + "step": 5890 + }, + { + "epoch": 0.39, + "learning_rate": 3.854286524200659e-06, + "logits/chosen": -1.008453130722046, + "logits/rejected": -0.5371363162994385, + "logps/chosen": -514.41748046875, + "logps/rejected": -526.7296752929688, + "loss": 0.6899, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.2380000799894333, + "rewards/margins": 0.06294857710599899, + "rewards/rejected": -0.3009486794471741, + "step": 5900 + }, + { + "epoch": 0.39, + "eval_logits/chosen": -0.5847914218902588, + "eval_logits/rejected": -0.4853719472885132, + "eval_logps/chosen": -493.8011779785156, + "eval_logps/rejected": -557.822265625, + "eval_loss": 0.6896816492080688, + "eval_rewards/accuracies": 0.6535000205039978, + "eval_rewards/chosen": -0.2617962658405304, + "eval_rewards/margins": 0.08441410213708878, + "eval_rewards/rejected": -0.34621039032936096, + "eval_runtime": 709.3468, + "eval_samples_per_second": 2.819, + "eval_steps_per_second": 1.41, + "step": 5900 + }, + { + "epoch": 0.39, + "learning_rate": 3.849483459693991e-06, + "logits/chosen": -0.7097777128219604, + "logits/rejected": -0.4602317214012146, + "logps/chosen": -467.04962158203125, + "logps/rejected": -541.8302001953125, + "loss": 0.6856, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.25737297534942627, + "rewards/margins": 0.11344969272613525, + "rewards/rejected": -0.3708226680755615, + "step": 5910 + }, + { + "epoch": 0.39, + "learning_rate": 3.844673355620544e-06, + "logits/chosen": -0.7129204273223877, + "logits/rejected": -0.409584105014801, + "logps/chosen": -538.8270263671875, + "logps/rejected": -606.05322265625, + "loss": 0.6905, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.29225417971611023, + "rewards/margins": 0.1055116280913353, + "rewards/rejected": -0.3977658152580261, + "step": 5920 + }, + { + "epoch": 0.39, + "learning_rate": 3.839856237072178e-06, + "logits/chosen": -0.5346713066101074, + "logits/rejected": -0.4406895637512207, + "logps/chosen": -470.629638671875, + "logps/rejected": -605.9718017578125, + "loss": 0.6883, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.28805553913116455, + "rewards/margins": 0.11815004050731659, + "rewards/rejected": -0.40620556473731995, + "step": 5930 + }, + { + "epoch": 0.39, + "learning_rate": 3.8350321291773455e-06, + "logits/chosen": -0.7473275661468506, + "logits/rejected": -0.5856843590736389, + "logps/chosen": -414.0442810058594, + "logps/rejected": -446.9261169433594, + "loss": 0.6912, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.21152237057685852, + "rewards/margins": 0.07707812637090683, + "rewards/rejected": -0.28860050439834595, + "step": 5940 + }, + { + "epoch": 0.39, + "learning_rate": 3.830201057100953e-06, + "logits/chosen": -1.1814167499542236, + "logits/rejected": -1.068490743637085, + "logps/chosen": -404.77191162109375, + "logps/rejected": -512.7886352539062, + "loss": 0.6883, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.21299895644187927, + "rewards/margins": 0.09252388775348663, + "rewards/rejected": -0.3055228590965271, + "step": 5950 + }, + { + "epoch": 0.39, + "learning_rate": 3.82536304604424e-06, + "logits/chosen": -0.8924945592880249, + "logits/rejected": -0.7654592394828796, + "logps/chosen": -418.2235412597656, + "logps/rejected": -458.09149169921875, + "loss": 0.6913, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.18205930292606354, + "rewards/margins": 0.07160413265228271, + "rewards/rejected": -0.25366342067718506, + "step": 5960 + }, + { + "epoch": 0.39, + "learning_rate": 3.8205181212446435e-06, + "logits/chosen": -1.131145715713501, + "logits/rejected": -0.971734881401062, + "logps/chosen": -474.43389892578125, + "logps/rejected": -510.0201110839844, + "loss": 0.6899, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.20387499034404755, + "rewards/margins": 0.0759897381067276, + "rewards/rejected": -0.27986472845077515, + "step": 5970 + }, + { + "epoch": 0.39, + "learning_rate": 3.815666307975664e-06, + "logits/chosen": -1.009289026260376, + "logits/rejected": -1.0255839824676514, + "logps/chosen": -456.5907287597656, + "logps/rejected": -486.37744140625, + "loss": 0.6921, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.2192729264497757, + "rewards/margins": 0.04896865412592888, + "rewards/rejected": -0.2682415843009949, + "step": 5980 + }, + { + "epoch": 0.39, + "learning_rate": 3.8108076315467346e-06, + "logits/chosen": -1.2979350090026855, + "logits/rejected": -1.1967132091522217, + "logps/chosen": -456.06988525390625, + "logps/rejected": -439.7726135253906, + "loss": 0.6911, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.19878706336021423, + "rewards/margins": 0.05842934176325798, + "rewards/rejected": -0.2572163939476013, + "step": 5990 + }, + { + "epoch": 0.39, + "learning_rate": 3.805942117303093e-06, + "logits/chosen": -1.3287451267242432, + "logits/rejected": -1.1833655834197998, + "logps/chosen": -516.2380981445312, + "logps/rejected": -527.54638671875, + "loss": 0.6896, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.2002025842666626, + "rewards/margins": 0.05783183500170708, + "rewards/rejected": -0.2580344080924988, + "step": 6000 + }, + { + "epoch": 0.39, + "eval_logits/chosen": -0.9796077013015747, + "eval_logits/rejected": -0.8594146966934204, + "eval_logps/chosen": -429.3099060058594, + "eval_logps/rejected": -477.4434509277344, + "eval_loss": 0.6897467970848083, + "eval_rewards/accuracies": 0.6629999876022339, + "eval_rewards/chosen": -0.19730499386787415, + "eval_rewards/margins": 0.06852658092975616, + "eval_rewards/rejected": -0.2658315598964691, + "eval_runtime": 712.3923, + "eval_samples_per_second": 2.807, + "eval_steps_per_second": 1.404, + "step": 6000 + }, + { + "epoch": 0.39, + "learning_rate": 3.8010697906256446e-06, + "logits/chosen": -1.0929971933364868, + "logits/rejected": -0.8249849081039429, + "logps/chosen": -452.6302795410156, + "logps/rejected": -492.43951416015625, + "loss": 0.6908, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.24593381583690643, + "rewards/margins": 0.06763944774866104, + "rewards/rejected": -0.31357327103614807, + "step": 6010 + }, + { + "epoch": 0.39, + "learning_rate": 3.7961906769308323e-06, + "logits/chosen": -0.6220898628234863, + "logits/rejected": -0.6787351369857788, + "logps/chosen": -449.19256591796875, + "logps/rejected": -520.2421875, + "loss": 0.6902, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.24295739829540253, + "rewards/margins": 0.06097465008497238, + "rewards/rejected": -0.3039320409297943, + "step": 6020 + }, + { + "epoch": 0.39, + "learning_rate": 3.7913048016705028e-06, + "logits/chosen": -1.0191593170166016, + "logits/rejected": -0.8695308566093445, + "logps/chosen": -485.6734924316406, + "logps/rejected": -543.5657958984375, + "loss": 0.6902, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.22573387622833252, + "rewards/margins": 0.05994036793708801, + "rewards/rejected": -0.2856742739677429, + "step": 6030 + }, + { + "epoch": 0.4, + "learning_rate": 3.786412190331775e-06, + "logits/chosen": -0.974204421043396, + "logits/rejected": -0.49800848960876465, + "logps/chosen": -398.828125, + "logps/rejected": -441.0577087402344, + "loss": 0.6902, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.19596761465072632, + "rewards/margins": 0.07932895421981812, + "rewards/rejected": -0.2752965986728668, + "step": 6040 + }, + { + "epoch": 0.4, + "learning_rate": 3.781512868436906e-06, + "logits/chosen": -0.9405696988105774, + "logits/rejected": -0.9803541898727417, + "logps/chosen": -335.2028503417969, + "logps/rejected": -394.23785400390625, + "loss": 0.6901, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.20259208977222443, + "rewards/margins": 0.051246047019958496, + "rewards/rejected": -0.25383812189102173, + "step": 6050 + }, + { + "epoch": 0.4, + "learning_rate": 3.7766068615431605e-06, + "logits/chosen": -0.8138763308525085, + "logits/rejected": -0.7067015767097473, + "logps/chosen": -491.45428466796875, + "logps/rejected": -501.21636962890625, + "loss": 0.6913, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.23436717689037323, + "rewards/margins": 0.058397091925144196, + "rewards/rejected": -0.292764276266098, + "step": 6060 + }, + { + "epoch": 0.4, + "learning_rate": 3.771694195242671e-06, + "logits/chosen": -1.1549289226531982, + "logits/rejected": -0.5855950713157654, + "logps/chosen": -562.266845703125, + "logps/rejected": -520.1834716796875, + "loss": 0.6907, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.2630881071090698, + "rewards/margins": 0.06964752078056335, + "rewards/rejected": -0.33273565769195557, + "step": 6070 + }, + { + "epoch": 0.4, + "learning_rate": 3.766774895162314e-06, + "logits/chosen": -0.8373686075210571, + "logits/rejected": -0.8374196290969849, + "logps/chosen": -541.1320190429688, + "logps/rejected": -534.19580078125, + "loss": 0.6907, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.29459238052368164, + "rewards/margins": 0.049199365079402924, + "rewards/rejected": -0.34379175305366516, + "step": 6080 + }, + { + "epoch": 0.4, + "learning_rate": 3.7618489869635666e-06, + "logits/chosen": -0.8949093818664551, + "logits/rejected": -0.7191926836967468, + "logps/chosen": -550.8589477539062, + "logps/rejected": -578.9771728515625, + "loss": 0.692, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.3038319945335388, + "rewards/margins": 0.047861166298389435, + "rewards/rejected": -0.35169318318367004, + "step": 6090 + }, + { + "epoch": 0.4, + "learning_rate": 3.756916496342379e-06, + "logits/chosen": -1.0668814182281494, + "logits/rejected": -1.097706913948059, + "logps/chosen": -453.369873046875, + "logps/rejected": -540.5994873046875, + "loss": 0.6885, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.266497939825058, + "rewards/margins": 0.0787767842411995, + "rewards/rejected": -0.34527474641799927, + "step": 6100 + }, + { + "epoch": 0.4, + "eval_logits/chosen": -0.9699369072914124, + "eval_logits/rejected": -0.8472295999526978, + "eval_logps/chosen": -491.1470031738281, + "eval_logps/rejected": -547.7916870117188, + "eval_loss": 0.6896329522132874, + "eval_rewards/accuracies": 0.6579999923706055, + "eval_rewards/chosen": -0.25914210081100464, + "eval_rewards/margins": 0.0770377516746521, + "eval_rewards/rejected": -0.33617985248565674, + "eval_runtime": 711.0016, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 1.406, + "step": 6100 + }, + { + "epoch": 0.4, + "learning_rate": 3.751977449029039e-06, + "logits/chosen": -0.7568556070327759, + "logits/rejected": -0.6889457702636719, + "logps/chosen": -549.9276123046875, + "logps/rejected": -603.6575317382812, + "loss": 0.6898, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.2898511290550232, + "rewards/margins": 0.09112556278705597, + "rewards/rejected": -0.38097670674324036, + "step": 6110 + }, + { + "epoch": 0.4, + "learning_rate": 3.747031870788037e-06, + "logits/chosen": -1.1127030849456787, + "logits/rejected": -0.9563379287719727, + "logps/chosen": -552.7032470703125, + "logps/rejected": -554.0695190429688, + "loss": 0.6896, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.23594991862773895, + "rewards/margins": 0.08378570526838303, + "rewards/rejected": -0.3197356164455414, + "step": 6120 + }, + { + "epoch": 0.4, + "learning_rate": 3.7420797874179326e-06, + "logits/chosen": -0.8063844442367554, + "logits/rejected": -0.7351914644241333, + "logps/chosen": -502.19293212890625, + "logps/rejected": -521.9371337890625, + "loss": 0.6895, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.2650834321975708, + "rewards/margins": 0.07863331586122513, + "rewards/rejected": -0.34371674060821533, + "step": 6130 + }, + { + "epoch": 0.4, + "learning_rate": 3.7371212247512167e-06, + "logits/chosen": -1.356092095375061, + "logits/rejected": -1.071885347366333, + "logps/chosen": -530.7686157226562, + "logps/rejected": -548.25927734375, + "loss": 0.6901, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.20437414944171906, + "rewards/margins": 0.08259885013103485, + "rewards/rejected": -0.2869729995727539, + "step": 6140 + }, + { + "epoch": 0.4, + "learning_rate": 3.7321562086541817e-06, + "logits/chosen": -1.1088091135025024, + "logits/rejected": -1.0533579587936401, + "logps/chosen": -501.5927734375, + "logps/rejected": -564.1729736328125, + "loss": 0.6901, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.24861261248588562, + "rewards/margins": 0.06178991124033928, + "rewards/rejected": -0.3104025721549988, + "step": 6150 + }, + { + "epoch": 0.4, + "learning_rate": 3.7271847650267834e-06, + "logits/chosen": -0.9327411651611328, + "logits/rejected": -0.7886485457420349, + "logps/chosen": -478.2225036621094, + "logps/rejected": -538.6837158203125, + "loss": 0.6895, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.2782878577709198, + "rewards/margins": 0.0613514706492424, + "rewards/rejected": -0.3396393656730652, + "step": 6160 + }, + { + "epoch": 0.4, + "learning_rate": 3.7222069198025086e-06, + "logits/chosen": -0.8222224116325378, + "logits/rejected": -0.6890066862106323, + "logps/chosen": -570.3583984375, + "logps/rejected": -650.7569580078125, + "loss": 0.6893, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.3618159890174866, + "rewards/margins": 0.10099077224731445, + "rewards/rejected": -0.462806761264801, + "step": 6170 + }, + { + "epoch": 0.4, + "learning_rate": 3.7172226989482353e-06, + "logits/chosen": -0.9450828433036804, + "logits/rejected": -0.867464542388916, + "logps/chosen": -548.2552490234375, + "logps/rejected": -607.8885498046875, + "loss": 0.6908, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.33828240633010864, + "rewards/margins": 0.06177844852209091, + "rewards/rejected": -0.40006089210510254, + "step": 6180 + }, + { + "epoch": 0.4, + "learning_rate": 3.7122321284641007e-06, + "logits/chosen": -1.3301527500152588, + "logits/rejected": -1.1098195314407349, + "logps/chosen": -669.6278076171875, + "logps/rejected": -665.1828002929688, + "loss": 0.6877, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.3068411350250244, + "rewards/margins": 0.1060449630022049, + "rewards/rejected": -0.4128860831260681, + "step": 6190 + }, + { + "epoch": 0.41, + "learning_rate": 3.707235234383365e-06, + "logits/chosen": -1.1410796642303467, + "logits/rejected": -1.0234348773956299, + "logps/chosen": -490.13421630859375, + "logps/rejected": -463.69854736328125, + "loss": 0.6905, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.22888025641441345, + "rewards/margins": 0.06034322455525398, + "rewards/rejected": -0.28922349214553833, + "step": 6200 + }, + { + "epoch": 0.41, + "eval_logits/chosen": -1.252907395362854, + "eval_logits/rejected": -1.114737629890442, + "eval_logps/chosen": -485.2598876953125, + "eval_logps/rejected": -540.0977783203125, + "eval_loss": 0.6896072030067444, + "eval_rewards/accuracies": 0.6610000133514404, + "eval_rewards/chosen": -0.2532549202442169, + "eval_rewards/margins": 0.07523093372583389, + "eval_rewards/rejected": -0.3284858763217926, + "eval_runtime": 711.0801, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 1.406, + "step": 6200 + }, + { + "epoch": 0.41, + "learning_rate": 3.702232042772277e-06, + "logits/chosen": -1.2290165424346924, + "logits/rejected": -1.167140007019043, + "logps/chosen": -497.998291015625, + "logps/rejected": -577.1484985351562, + "loss": 0.6885, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.29545214772224426, + "rewards/margins": 0.101504847407341, + "rewards/rejected": -0.3969569802284241, + "step": 6210 + }, + { + "epoch": 0.41, + "learning_rate": 3.6972225797299325e-06, + "logits/chosen": -1.2075756788253784, + "logits/rejected": -1.2317383289337158, + "logps/chosen": -572.742431640625, + "logps/rejected": -648.7164916992188, + "loss": 0.6871, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.3222338557243347, + "rewards/margins": 0.09190957248210907, + "rewards/rejected": -0.4141434133052826, + "step": 6220 + }, + { + "epoch": 0.41, + "learning_rate": 3.692206871388147e-06, + "logits/chosen": -1.2632339000701904, + "logits/rejected": -0.8921878933906555, + "logps/chosen": -526.5006713867188, + "logps/rejected": -603.5263061523438, + "loss": 0.6893, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.2941119372844696, + "rewards/margins": 0.11825486272573471, + "rewards/rejected": -0.4123667776584625, + "step": 6230 + }, + { + "epoch": 0.41, + "learning_rate": 3.6871849439113115e-06, + "logits/chosen": -0.7660868167877197, + "logits/rejected": -0.9121445417404175, + "logps/chosen": -515.5945434570312, + "logps/rejected": -584.4954223632812, + "loss": 0.6892, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.28568369150161743, + "rewards/margins": 0.07844862341880798, + "rewards/rejected": -0.3641323447227478, + "step": 6240 + }, + { + "epoch": 0.41, + "learning_rate": 3.682156823496259e-06, + "logits/chosen": -1.1769955158233643, + "logits/rejected": -0.8457037806510925, + "logps/chosen": -528.9486694335938, + "logps/rejected": -607.7376708984375, + "loss": 0.6914, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.3150347173213959, + "rewards/margins": 0.11115459352731705, + "rewards/rejected": -0.4261893332004547, + "step": 6250 + }, + { + "epoch": 0.41, + "learning_rate": 3.67712253637213e-06, + "logits/chosen": -1.2539669275283813, + "logits/rejected": -1.0763676166534424, + "logps/chosen": -583.54296875, + "logps/rejected": -560.4552001953125, + "loss": 0.689, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.29124850034713745, + "rewards/margins": 0.06682470440864563, + "rewards/rejected": -0.35807323455810547, + "step": 6260 + }, + { + "epoch": 0.41, + "learning_rate": 3.672082108800231e-06, + "logits/chosen": -1.0638225078582764, + "logits/rejected": -1.035290002822876, + "logps/chosen": -571.1708374023438, + "logps/rejected": -620.5901489257812, + "loss": 0.6893, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.3573550581932068, + "rewards/margins": 0.08345872163772583, + "rewards/rejected": -0.4408137798309326, + "step": 6270 + }, + { + "epoch": 0.41, + "learning_rate": 3.6670355670739012e-06, + "logits/chosen": -1.1504619121551514, + "logits/rejected": -0.996173083782196, + "logps/chosen": -468.37017822265625, + "logps/rejected": -558.45263671875, + "loss": 0.6883, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.30641672015190125, + "rewards/margins": 0.09272117912769318, + "rewards/rejected": -0.3991378843784332, + "step": 6280 + }, + { + "epoch": 0.41, + "learning_rate": 3.6619829375183745e-06, + "logits/chosen": -1.3015220165252686, + "logits/rejected": -1.161517858505249, + "logps/chosen": -546.1529541015625, + "logps/rejected": -649.0066528320312, + "loss": 0.6889, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.3201504349708557, + "rewards/margins": 0.11892461776733398, + "rewards/rejected": -0.4390750527381897, + "step": 6290 + }, + { + "epoch": 0.41, + "learning_rate": 3.6569242464906427e-06, + "logits/chosen": -1.3468759059906006, + "logits/rejected": -1.2353953123092651, + "logps/chosen": -450.9453125, + "logps/rejected": -544.6844482421875, + "loss": 0.6885, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.24321627616882324, + "rewards/margins": 0.0726899802684784, + "rewards/rejected": -0.31590625643730164, + "step": 6300 + }, + { + "epoch": 0.41, + "eval_logits/chosen": -1.4056432247161865, + "eval_logits/rejected": -1.2599819898605347, + "eval_logps/chosen": -469.2735595703125, + "eval_logps/rejected": -521.1331176757812, + "eval_loss": 0.6896970272064209, + "eval_rewards/accuracies": 0.6635000109672546, + "eval_rewards/chosen": -0.23726864159107208, + "eval_rewards/margins": 0.07225258648395538, + "eval_rewards/rejected": -0.30952122807502747, + "eval_runtime": 711.0613, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 1.406, + "step": 6300 + }, + { + "epoch": 0.41, + "learning_rate": 3.6518595203793156e-06, + "logits/chosen": -1.3377609252929688, + "logits/rejected": -1.2768046855926514, + "logps/chosen": -483.3002014160156, + "logps/rejected": -601.4906616210938, + "loss": 0.6901, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.22458593547344208, + "rewards/margins": 0.1047157272696495, + "rewards/rejected": -0.329301655292511, + "step": 6310 + }, + { + "epoch": 0.41, + "learning_rate": 3.646788785604485e-06, + "logits/chosen": -1.5382457971572876, + "logits/rejected": -1.4908673763275146, + "logps/chosen": -389.2879943847656, + "logps/rejected": -431.7403869628906, + "loss": 0.6918, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.1836296021938324, + "rewards/margins": 0.044697392731904984, + "rewards/rejected": -0.22832700610160828, + "step": 6320 + }, + { + "epoch": 0.41, + "learning_rate": 3.641712068617588e-06, + "logits/chosen": -1.4615576267242432, + "logits/rejected": -1.3482048511505127, + "logps/chosen": -468.55291748046875, + "logps/rejected": -462.7494201660156, + "loss": 0.6925, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.20719289779663086, + "rewards/margins": 0.04774096980690956, + "rewards/rejected": -0.2549338936805725, + "step": 6330 + }, + { + "epoch": 0.41, + "learning_rate": 3.6366293959012673e-06, + "logits/chosen": -1.3200759887695312, + "logits/rejected": -1.13698410987854, + "logps/chosen": -369.1453857421875, + "logps/rejected": -432.5205078125, + "loss": 0.6885, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.18794342875480652, + "rewards/margins": 0.08289827406406403, + "rewards/rejected": -0.27084171772003174, + "step": 6340 + }, + { + "epoch": 0.42, + "learning_rate": 3.631540793969233e-06, + "logits/chosen": -1.6114717721939087, + "logits/rejected": -1.4890989065170288, + "logps/chosen": -392.2261047363281, + "logps/rejected": -440.48553466796875, + "loss": 0.6906, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.20360836386680603, + "rewards/margins": 0.046581171452999115, + "rewards/rejected": -0.25018954277038574, + "step": 6350 + }, + { + "epoch": 0.42, + "learning_rate": 3.626446289366127e-06, + "logits/chosen": -1.4099271297454834, + "logits/rejected": -1.1388559341430664, + "logps/chosen": -523.2926025390625, + "logps/rejected": -492.5755920410156, + "loss": 0.692, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.30314338207244873, + "rewards/margins": 0.04080190882086754, + "rewards/rejected": -0.34394532442092896, + "step": 6360 + }, + { + "epoch": 0.42, + "learning_rate": 3.6213459086673786e-06, + "logits/chosen": -1.2427856922149658, + "logits/rejected": -1.2967463731765747, + "logps/chosen": -508.787841796875, + "logps/rejected": -602.8619384765625, + "loss": 0.6879, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.3378502130508423, + "rewards/margins": 0.084700807929039, + "rewards/rejected": -0.42255106568336487, + "step": 6370 + }, + { + "epoch": 0.42, + "learning_rate": 3.6162396784790737e-06, + "logits/chosen": -0.9532783627510071, + "logits/rejected": -0.8490549921989441, + "logps/chosen": -580.7579956054688, + "logps/rejected": -648.3948974609375, + "loss": 0.6895, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.35775092244148254, + "rewards/margins": 0.06779655069112778, + "rewards/rejected": -0.4255475103855133, + "step": 6380 + }, + { + "epoch": 0.42, + "learning_rate": 3.6111276254378095e-06, + "logits/chosen": -1.2318470478057861, + "logits/rejected": -1.1283295154571533, + "logps/chosen": -554.7551879882812, + "logps/rejected": -658.4044189453125, + "loss": 0.6885, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.3267982602119446, + "rewards/margins": 0.10806284844875336, + "rewards/rejected": -0.43486112356185913, + "step": 6390 + }, + { + "epoch": 0.42, + "learning_rate": 3.606009776210559e-06, + "logits/chosen": -1.2396069765090942, + "logits/rejected": -1.1743987798690796, + "logps/chosen": -625.6569213867188, + "logps/rejected": -670.0760498046875, + "loss": 0.689, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.3850333094596863, + "rewards/margins": 0.0880778431892395, + "rewards/rejected": -0.4731111526489258, + "step": 6400 + }, + { + "epoch": 0.42, + "eval_logits/chosen": -1.2236859798431396, + "eval_logits/rejected": -1.084417700767517, + "eval_logps/chosen": -566.3375854492188, + "eval_logps/rejected": -629.9546508789062, + "eval_loss": 0.6897481083869934, + "eval_rewards/accuracies": 0.652999997138977, + "eval_rewards/chosen": -0.3343326151371002, + "eval_rewards/margins": 0.08401010930538177, + "eval_rewards/rejected": -0.4183427393436432, + "eval_runtime": 712.7818, + "eval_samples_per_second": 2.806, + "eval_steps_per_second": 1.403, + "step": 6400 + }, + { + "epoch": 0.42, + "learning_rate": 3.600886157494531e-06, + "logits/chosen": -1.4335150718688965, + "logits/rejected": -1.3242355585098267, + "logps/chosen": -562.1947021484375, + "logps/rejected": -640.3161010742188, + "loss": 0.6887, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.2993382513523102, + "rewards/margins": 0.08990345895290375, + "rewards/rejected": -0.38924169540405273, + "step": 6410 + }, + { + "epoch": 0.42, + "learning_rate": 3.5957567960170304e-06, + "logits/chosen": -1.4665154218673706, + "logits/rejected": -0.9679352045059204, + "logps/chosen": -610.47607421875, + "logps/rejected": -579.2901611328125, + "loss": 0.6891, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.31929099559783936, + "rewards/margins": 0.0874718576669693, + "rewards/rejected": -0.40676283836364746, + "step": 6420 + }, + { + "epoch": 0.42, + "learning_rate": 3.590621718535319e-06, + "logits/chosen": -1.0034302473068237, + "logits/rejected": -0.9371808171272278, + "logps/chosen": -572.9190673828125, + "logps/rejected": -681.28857421875, + "loss": 0.6876, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.3761967122554779, + "rewards/margins": 0.1092621460556984, + "rewards/rejected": -0.4854588508605957, + "step": 6430 + }, + { + "epoch": 0.42, + "learning_rate": 3.5854809518364775e-06, + "logits/chosen": -1.368154525756836, + "logits/rejected": -1.2136409282684326, + "logps/chosen": -512.4900512695312, + "logps/rejected": -557.9346923828125, + "loss": 0.6879, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.26634320616722107, + "rewards/margins": 0.08995746076107025, + "rewards/rejected": -0.3563006818294525, + "step": 6440 + }, + { + "epoch": 0.42, + "learning_rate": 3.580334522737262e-06, + "logits/chosen": -1.1654552221298218, + "logits/rejected": -1.0585110187530518, + "logps/chosen": -509.4706115722656, + "logps/rejected": -566.9360961914062, + "loss": 0.6896, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.30752623081207275, + "rewards/margins": 0.09111505001783371, + "rewards/rejected": -0.39864128828048706, + "step": 6450 + }, + { + "epoch": 0.42, + "learning_rate": 3.575182458083968e-06, + "logits/chosen": -1.1148736476898193, + "logits/rejected": -1.0673105716705322, + "logps/chosen": -555.1051025390625, + "logps/rejected": -637.6865234375, + "loss": 0.6894, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.3143001198768616, + "rewards/margins": 0.11058640480041504, + "rewards/rejected": -0.4248865246772766, + "step": 6460 + }, + { + "epoch": 0.42, + "learning_rate": 3.5700247847522883e-06, + "logits/chosen": -1.364552617073059, + "logits/rejected": -1.2792692184448242, + "logps/chosen": -456.8177795410156, + "logps/rejected": -548.9254760742188, + "loss": 0.6884, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.2540197968482971, + "rewards/margins": 0.09212598204612732, + "rewards/rejected": -0.34614577889442444, + "step": 6470 + }, + { + "epoch": 0.42, + "learning_rate": 3.5648615296471743e-06, + "logits/chosen": -1.1691317558288574, + "logits/rejected": -1.1110568046569824, + "logps/chosen": -531.5234985351562, + "logps/rejected": -679.093017578125, + "loss": 0.6901, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.3341607451438904, + "rewards/margins": 0.10949563980102539, + "rewards/rejected": -0.4436563551425934, + "step": 6480 + }, + { + "epoch": 0.42, + "learning_rate": 3.559692719702693e-06, + "logits/chosen": -1.022430181503296, + "logits/rejected": -0.8281211853027344, + "logps/chosen": -661.8057861328125, + "logps/rejected": -702.7855834960938, + "loss": 0.6886, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.37566065788269043, + "rewards/margins": 0.0977093055844307, + "rewards/rejected": -0.4733699858188629, + "step": 6490 + }, + { + "epoch": 0.43, + "learning_rate": 3.55451838188189e-06, + "logits/chosen": -1.336548089981079, + "logits/rejected": -1.3366944789886475, + "logps/chosen": -549.550048828125, + "logps/rejected": -651.333984375, + "loss": 0.6913, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.2882632613182068, + "rewards/margins": 0.07985077798366547, + "rewards/rejected": -0.36811405420303345, + "step": 6500 + }, + { + "epoch": 0.43, + "eval_logits/chosen": -1.3350025415420532, + "eval_logits/rejected": -1.1925042867660522, + "eval_logps/chosen": -548.86865234375, + "eval_logps/rejected": -604.5547485351562, + "eval_loss": 0.6896828413009644, + "eval_rewards/accuracies": 0.6629999876022339, + "eval_rewards/chosen": -0.3168637156486511, + "eval_rewards/margins": 0.0760791078209877, + "eval_rewards/rejected": -0.392942875623703, + "eval_runtime": 712.0531, + "eval_samples_per_second": 2.809, + "eval_steps_per_second": 1.404, + "step": 6500 + }, + { + "epoch": 0.43, + "learning_rate": 3.549338543176645e-06, + "logits/chosen": -1.4357526302337646, + "logits/rejected": -1.2661854028701782, + "logps/chosen": -615.6642456054688, + "logps/rejected": -650.0222778320312, + "loss": 0.6903, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.3091757595539093, + "rewards/margins": 0.06671115756034851, + "rewards/rejected": -0.3758869469165802, + "step": 6510 + }, + { + "epoch": 0.43, + "learning_rate": 3.5441532306075342e-06, + "logits/chosen": -1.4813920259475708, + "logits/rejected": -1.4216079711914062, + "logps/chosen": -537.5429077148438, + "logps/rejected": -614.580322265625, + "loss": 0.6922, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.3061237633228302, + "rewards/margins": 0.030472075566649437, + "rewards/rejected": -0.3365958333015442, + "step": 6520 + }, + { + "epoch": 0.43, + "learning_rate": 3.5389624712236894e-06, + "logits/chosen": -1.4329283237457275, + "logits/rejected": -1.2437920570373535, + "logps/chosen": -460.76629638671875, + "logps/rejected": -458.86273193359375, + "loss": 0.6929, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.2467305213212967, + "rewards/margins": 0.02502426877617836, + "rewards/rejected": -0.27175474166870117, + "step": 6530 + }, + { + "epoch": 0.43, + "learning_rate": 3.533766292102653e-06, + "logits/chosen": -1.3346855640411377, + "logits/rejected": -1.3417136669158936, + "logps/chosen": -468.8275451660156, + "logps/rejected": -511.7674255371094, + "loss": 0.6898, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.25898295640945435, + "rewards/margins": 0.054628659039735794, + "rewards/rejected": -0.31361156702041626, + "step": 6540 + }, + { + "epoch": 0.43, + "learning_rate": 3.5285647203502404e-06, + "logits/chosen": -1.6727336645126343, + "logits/rejected": -1.5055919885635376, + "logps/chosen": -507.2945251464844, + "logps/rejected": -517.0139770507812, + "loss": 0.6904, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.2500235438346863, + "rewards/margins": 0.04748542234301567, + "rewards/rejected": -0.29750901460647583, + "step": 6550 + }, + { + "epoch": 0.43, + "learning_rate": 3.5233577831003983e-06, + "logits/chosen": -1.3712760210037231, + "logits/rejected": -1.2215116024017334, + "logps/chosen": -527.0596923828125, + "logps/rejected": -569.3143920898438, + "loss": 0.6888, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.27488380670547485, + "rewards/margins": 0.06592334061861038, + "rewards/rejected": -0.34080708026885986, + "step": 6560 + }, + { + "epoch": 0.43, + "learning_rate": 3.5181455075150628e-06, + "logits/chosen": -1.2225806713104248, + "logits/rejected": -0.9543337821960449, + "logps/chosen": -461.31195068359375, + "logps/rejected": -474.75469970703125, + "loss": 0.6918, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.2793658375740051, + "rewards/margins": 0.060065627098083496, + "rewards/rejected": -0.33943140506744385, + "step": 6570 + }, + { + "epoch": 0.43, + "learning_rate": 3.512927920784016e-06, + "logits/chosen": -1.384887456893921, + "logits/rejected": -1.2461804151535034, + "logps/chosen": -491.25933837890625, + "logps/rejected": -593.423095703125, + "loss": 0.6872, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.2655852437019348, + "rewards/margins": 0.12214380502700806, + "rewards/rejected": -0.38772904872894287, + "step": 6580 + }, + { + "epoch": 0.43, + "learning_rate": 3.5077050501247457e-06, + "logits/chosen": -1.5173418521881104, + "logits/rejected": -1.079252004623413, + "logps/chosen": -513.4905395507812, + "logps/rejected": -533.9172973632812, + "loss": 0.6885, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.23247560858726501, + "rewards/margins": 0.08781534433364868, + "rewards/rejected": -0.3202909529209137, + "step": 6590 + }, + { + "epoch": 0.43, + "learning_rate": 3.5024769227823042e-06, + "logits/chosen": -1.4403669834136963, + "logits/rejected": -1.2567652463912964, + "logps/chosen": -453.385009765625, + "logps/rejected": -498.5462951660156, + "loss": 0.6896, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.2905137538909912, + "rewards/margins": 0.08595889806747437, + "rewards/rejected": -0.3764726221561432, + "step": 6600 + }, + { + "epoch": 0.43, + "eval_logits/chosen": -1.3852163553237915, + "eval_logits/rejected": -1.2377227544784546, + "eval_logps/chosen": -535.6201171875, + "eval_logps/rejected": -595.6209716796875, + "eval_loss": 0.6897550225257874, + "eval_rewards/accuracies": 0.6570000052452087, + "eval_rewards/chosen": -0.30361512303352356, + "eval_rewards/margins": 0.08039402216672897, + "eval_rewards/rejected": -0.38400915265083313, + "eval_runtime": 711.5795, + "eval_samples_per_second": 2.811, + "eval_steps_per_second": 1.405, + "step": 6600 + }, + { + "epoch": 0.43, + "learning_rate": 3.4972435660291646e-06, + "logits/chosen": -1.5519202947616577, + "logits/rejected": -1.4770103693008423, + "logps/chosen": -551.5953979492188, + "logps/rejected": -601.3228759765625, + "loss": 0.6898, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.3084511160850525, + "rewards/margins": 0.07466720044612885, + "rewards/rejected": -0.38311833143234253, + "step": 6610 + }, + { + "epoch": 0.43, + "learning_rate": 3.492005007165079e-06, + "logits/chosen": -1.3561322689056396, + "logits/rejected": -1.2405471801757812, + "logps/chosen": -475.966064453125, + "logps/rejected": -540.77490234375, + "loss": 0.6894, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.24642908573150635, + "rewards/margins": 0.05865221098065376, + "rewards/rejected": -0.305081307888031, + "step": 6620 + }, + { + "epoch": 0.43, + "learning_rate": 3.4867612735169377e-06, + "logits/chosen": -1.5976279973983765, + "logits/rejected": -1.2298024892807007, + "logps/chosen": -501.75970458984375, + "logps/rejected": -508.467041015625, + "loss": 0.6887, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.27672532200813293, + "rewards/margins": 0.08493559062480927, + "rewards/rejected": -0.361660897731781, + "step": 6630 + }, + { + "epoch": 0.43, + "learning_rate": 3.4815123924386226e-06, + "logits/chosen": -1.727242112159729, + "logits/rejected": -1.4164018630981445, + "logps/chosen": -563.4137573242188, + "logps/rejected": -560.3493041992188, + "loss": 0.6905, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.25687241554260254, + "rewards/margins": 0.06660804897546768, + "rewards/rejected": -0.3234805166721344, + "step": 6640 + }, + { + "epoch": 0.44, + "learning_rate": 3.4762583913108696e-06, + "logits/chosen": -1.1681668758392334, + "logits/rejected": -1.0194041728973389, + "logps/chosen": -583.726318359375, + "logps/rejected": -609.8234252929688, + "loss": 0.6911, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.3139679729938507, + "rewards/margins": 0.06701229512691498, + "rewards/rejected": -0.3809802830219269, + "step": 6650 + }, + { + "epoch": 0.44, + "learning_rate": 3.4709992975411217e-06, + "logits/chosen": -1.322264313697815, + "logits/rejected": -1.0045913457870483, + "logps/chosen": -579.4341430664062, + "logps/rejected": -617.7483520507812, + "loss": 0.6879, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.3244747221469879, + "rewards/margins": 0.09058120846748352, + "rewards/rejected": -0.41505590081214905, + "step": 6660 + }, + { + "epoch": 0.44, + "learning_rate": 3.4657351385633886e-06, + "logits/chosen": -1.29805326461792, + "logits/rejected": -1.1522525548934937, + "logps/chosen": -485.3212890625, + "logps/rejected": -585.7586669921875, + "loss": 0.6853, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.3000306189060211, + "rewards/margins": 0.11263756453990936, + "rewards/rejected": -0.4126681685447693, + "step": 6670 + }, + { + "epoch": 0.44, + "learning_rate": 3.4604659418381024e-06, + "logits/chosen": -1.2660630941390991, + "logits/rejected": -0.9279665946960449, + "logps/chosen": -625.5154418945312, + "logps/rejected": -693.3626708984375, + "loss": 0.6894, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.4074612259864807, + "rewards/margins": 0.09737777709960938, + "rewards/rejected": -0.5048390030860901, + "step": 6680 + }, + { + "epoch": 0.44, + "learning_rate": 3.4551917348519744e-06, + "logits/chosen": -1.2196203470230103, + "logits/rejected": -1.0248401165008545, + "logps/chosen": -648.0506591796875, + "logps/rejected": -681.2324829101562, + "loss": 0.689, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.3665519952774048, + "rewards/margins": 0.08038230240345001, + "rewards/rejected": -0.446934312582016, + "step": 6690 + }, + { + "epoch": 0.44, + "learning_rate": 3.4499125451178505e-06, + "logits/chosen": -0.7199057340621948, + "logits/rejected": -0.7669742703437805, + "logps/chosen": -606.9825439453125, + "logps/rejected": -685.78515625, + "loss": 0.6913, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.4026912748813629, + "rewards/margins": 0.05418992042541504, + "rewards/rejected": -0.45688119530677795, + "step": 6700 + }, + { + "epoch": 0.44, + "eval_logits/chosen": -1.0456030368804932, + "eval_logits/rejected": -0.9157735705375671, + "eval_logps/chosen": -599.052001953125, + "eval_logps/rejected": -667.5596313476562, + "eval_loss": 0.6897513270378113, + "eval_rewards/accuracies": 0.6660000085830688, + "eval_rewards/chosen": -0.36704710125923157, + "eval_rewards/margins": 0.0889006108045578, + "eval_rewards/rejected": -0.45594772696495056, + "eval_runtime": 713.5868, + "eval_samples_per_second": 2.803, + "eval_steps_per_second": 1.401, + "step": 6700 + }, + { + "epoch": 0.44, + "learning_rate": 3.4446284001745723e-06, + "logits/chosen": -0.6961051225662231, + "logits/rejected": -0.6591076254844666, + "logps/chosen": -626.5772705078125, + "logps/rejected": -720.2598876953125, + "loss": 0.6901, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.42034369707107544, + "rewards/margins": 0.07693564146757126, + "rewards/rejected": -0.4972793459892273, + "step": 6710 + }, + { + "epoch": 0.44, + "learning_rate": 3.439339327586827e-06, + "logits/chosen": -1.069394826889038, + "logits/rejected": -1.0984818935394287, + "logps/chosen": -469.156494140625, + "logps/rejected": -561.3721923828125, + "loss": 0.6881, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.2791387736797333, + "rewards/margins": 0.09757934510707855, + "rewards/rejected": -0.37671810388565063, + "step": 6720 + }, + { + "epoch": 0.44, + "learning_rate": 3.434045354945008e-06, + "logits/chosen": -1.1852693557739258, + "logits/rejected": -1.0654106140136719, + "logps/chosen": -654.6146240234375, + "logps/rejected": -706.553466796875, + "loss": 0.6905, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.38059002161026, + "rewards/margins": 0.05667469650506973, + "rewards/rejected": -0.43726474046707153, + "step": 6730 + }, + { + "epoch": 0.44, + "learning_rate": 3.4287465098650713e-06, + "logits/chosen": -1.4549082517623901, + "logits/rejected": -1.2506242990493774, + "logps/chosen": -577.9610595703125, + "logps/rejected": -617.8677978515625, + "loss": 0.6922, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.3299856185913086, + "rewards/margins": 0.054748255759477615, + "rewards/rejected": -0.3847338557243347, + "step": 6740 + }, + { + "epoch": 0.44, + "learning_rate": 3.423442819988387e-06, + "logits/chosen": -1.1500657796859741, + "logits/rejected": -1.0138304233551025, + "logps/chosen": -514.3472900390625, + "logps/rejected": -589.5288696289062, + "loss": 0.6899, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.3309600353240967, + "rewards/margins": 0.08237095177173615, + "rewards/rejected": -0.41333094239234924, + "step": 6750 + }, + { + "epoch": 0.44, + "learning_rate": 3.4181343129816e-06, + "logits/chosen": -1.1339448690414429, + "logits/rejected": -1.0129796266555786, + "logps/chosen": -467.60845947265625, + "logps/rejected": -527.6707763671875, + "loss": 0.6886, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.29124677181243896, + "rewards/margins": 0.07435248792171478, + "rewards/rejected": -0.36559924483299255, + "step": 6760 + }, + { + "epoch": 0.44, + "learning_rate": 3.4128210165364837e-06, + "logits/chosen": -1.1104885339736938, + "logits/rejected": -0.9423076510429382, + "logps/chosen": -488.9002990722656, + "logps/rejected": -624.3773803710938, + "loss": 0.6875, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.29749101400375366, + "rewards/margins": 0.12695378065109253, + "rewards/rejected": -0.4244448244571686, + "step": 6770 + }, + { + "epoch": 0.44, + "learning_rate": 3.407502958369795e-06, + "logits/chosen": -1.258338212966919, + "logits/rejected": -1.0859405994415283, + "logps/chosen": -544.0638427734375, + "logps/rejected": -629.9528198242188, + "loss": 0.6869, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.3016160726547241, + "rewards/margins": 0.11510130017995834, + "rewards/rejected": -0.41671738028526306, + "step": 6780 + }, + { + "epoch": 0.44, + "learning_rate": 3.4021801662231297e-06, + "logits/chosen": -1.2023009061813354, + "logits/rejected": -1.0326160192489624, + "logps/chosen": -596.251708984375, + "logps/rejected": -640.0447998046875, + "loss": 0.6929, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.3473750352859497, + "rewards/margins": 0.06606845557689667, + "rewards/rejected": -0.4134434759616852, + "step": 6790 + }, + { + "epoch": 0.44, + "learning_rate": 3.3968526678627793e-06, + "logits/chosen": -0.9796515703201294, + "logits/rejected": -0.7606045007705688, + "logps/chosen": -580.5537109375, + "logps/rejected": -609.6514892578125, + "loss": 0.6913, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.315523236989975, + "rewards/margins": 0.07320135086774826, + "rewards/rejected": -0.38872459530830383, + "step": 6800 + }, + { + "epoch": 0.44, + "eval_logits/chosen": -1.1365275382995605, + "eval_logits/rejected": -1.0038988590240479, + "eval_logps/chosen": -551.1534423828125, + "eval_logps/rejected": -605.5634765625, + "eval_loss": 0.6897253394126892, + "eval_rewards/accuracies": 0.6604999899864197, + "eval_rewards/chosen": -0.3191484808921814, + "eval_rewards/margins": 0.07480315864086151, + "eval_rewards/rejected": -0.3939516544342041, + "eval_runtime": 712.3322, + "eval_samples_per_second": 2.808, + "eval_steps_per_second": 1.404, + "step": 6800 + }, + { + "epoch": 0.45, + "learning_rate": 3.391520491079586e-06, + "logits/chosen": -1.4882662296295166, + "logits/rejected": -1.206194519996643, + "logps/chosen": -492.03875732421875, + "logps/rejected": -509.9769592285156, + "loss": 0.6922, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.2960761487483978, + "rewards/margins": 0.04846780747175217, + "rewards/rejected": -0.344543993473053, + "step": 6810 + }, + { + "epoch": 0.45, + "learning_rate": 3.3861836636887936e-06, + "logits/chosen": -1.2734750509262085, + "logits/rejected": -1.0188143253326416, + "logps/chosen": -568.8356323242188, + "logps/rejected": -597.8465576171875, + "loss": 0.6889, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.30201685428619385, + "rewards/margins": 0.0754893496632576, + "rewards/rejected": -0.37750619649887085, + "step": 6820 + }, + { + "epoch": 0.45, + "learning_rate": 3.3808422135299106e-06, + "logits/chosen": -1.2087388038635254, + "logits/rejected": -1.1798069477081299, + "logps/chosen": -597.0426025390625, + "logps/rejected": -713.0640258789062, + "loss": 0.6915, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.3113647699356079, + "rewards/margins": 0.0629459097981453, + "rewards/rejected": -0.3743106722831726, + "step": 6830 + }, + { + "epoch": 0.45, + "learning_rate": 3.375496168466556e-06, + "logits/chosen": -1.3035211563110352, + "logits/rejected": -1.0096242427825928, + "logps/chosen": -470.8985900878906, + "logps/rejected": -466.59674072265625, + "loss": 0.6912, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.25596046447753906, + "rewards/margins": 0.051624737679958344, + "rewards/rejected": -0.307585209608078, + "step": 6840 + }, + { + "epoch": 0.45, + "learning_rate": 3.3701455563863205e-06, + "logits/chosen": -1.547277808189392, + "logits/rejected": -1.3091676235198975, + "logps/chosen": -585.567138671875, + "logps/rejected": -648.2615966796875, + "loss": 0.6866, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.29369252920150757, + "rewards/margins": 0.09901341050863266, + "rewards/rejected": -0.39270591735839844, + "step": 6850 + }, + { + "epoch": 0.45, + "learning_rate": 3.3647904052006174e-06, + "logits/chosen": -1.3013098239898682, + "logits/rejected": -1.2090144157409668, + "logps/chosen": -570.9986572265625, + "logps/rejected": -649.5106201171875, + "loss": 0.6898, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.3057003915309906, + "rewards/margins": 0.07550543546676636, + "rewards/rejected": -0.38120585680007935, + "step": 6860 + }, + { + "epoch": 0.45, + "learning_rate": 3.3594307428445383e-06, + "logits/chosen": -1.5023900270462036, + "logits/rejected": -1.1324571371078491, + "logps/chosen": -625.2291259765625, + "logps/rejected": -659.7060546875, + "loss": 0.6912, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.2919783592224121, + "rewards/margins": 0.06589344888925552, + "rewards/rejected": -0.35787174105644226, + "step": 6870 + }, + { + "epoch": 0.45, + "learning_rate": 3.354066597276707e-06, + "logits/chosen": -1.0524461269378662, + "logits/rejected": -1.0098249912261963, + "logps/chosen": -505.1480407714844, + "logps/rejected": -606.7689819335938, + "loss": 0.6897, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.2815837860107422, + "rewards/margins": 0.0662604421377182, + "rewards/rejected": -0.3478442430496216, + "step": 6880 + }, + { + "epoch": 0.45, + "learning_rate": 3.348697996479136e-06, + "logits/chosen": -1.2552497386932373, + "logits/rejected": -1.074495553970337, + "logps/chosen": -531.0899658203125, + "logps/rejected": -532.2979736328125, + "loss": 0.6907, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.3032810389995575, + "rewards/margins": 0.054679740220308304, + "rewards/rejected": -0.3579607605934143, + "step": 6890 + }, + { + "epoch": 0.45, + "learning_rate": 3.3433249684570757e-06, + "logits/chosen": -0.9574271440505981, + "logits/rejected": -0.7741330862045288, + "logps/chosen": -471.801025390625, + "logps/rejected": -523.0010986328125, + "loss": 0.6876, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.2939714789390564, + "rewards/margins": 0.09691840410232544, + "rewards/rejected": -0.3908899128437042, + "step": 6900 + }, + { + "epoch": 0.45, + "eval_logits/chosen": -1.0858708620071411, + "eval_logits/rejected": -0.9569290280342102, + "eval_logps/chosen": -539.8351440429688, + "eval_logps/rejected": -594.458984375, + "eval_loss": 0.689683735370636, + "eval_rewards/accuracies": 0.6639999747276306, + "eval_rewards/chosen": -0.30783024430274963, + "eval_rewards/margins": 0.07501688599586487, + "eval_rewards/rejected": -0.3828471302986145, + "eval_runtime": 712.1685, + "eval_samples_per_second": 2.808, + "eval_steps_per_second": 1.404, + "step": 6900 + }, + { + "epoch": 0.45, + "learning_rate": 3.3379475412388724e-06, + "logits/chosen": -1.131521463394165, + "logits/rejected": -0.975692868232727, + "logps/chosen": -551.9547729492188, + "logps/rejected": -620.2999877929688, + "loss": 0.6881, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.3099997341632843, + "rewards/margins": 0.10204390436410904, + "rewards/rejected": -0.41204363107681274, + "step": 6910 + }, + { + "epoch": 0.45, + "learning_rate": 3.3325657428758207e-06, + "logits/chosen": -0.8180997967720032, + "logits/rejected": -0.8396323323249817, + "logps/chosen": -594.6802978515625, + "logps/rejected": -686.7999877929688, + "loss": 0.6878, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.35100477933883667, + "rewards/margins": 0.09129307419061661, + "rewards/rejected": -0.4422978460788727, + "step": 6920 + }, + { + "epoch": 0.45, + "learning_rate": 3.3271796014420175e-06, + "logits/chosen": -0.9754294157028198, + "logits/rejected": -0.6773120760917664, + "logps/chosen": -604.6719970703125, + "logps/rejected": -706.2049560546875, + "loss": 0.6881, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.39222168922424316, + "rewards/margins": 0.11924131214618683, + "rewards/rejected": -0.5114629864692688, + "step": 6930 + }, + { + "epoch": 0.45, + "learning_rate": 3.3217891450342142e-06, + "logits/chosen": -0.8988175392150879, + "logits/rejected": -0.7741016149520874, + "logps/chosen": -586.4400024414062, + "logps/rejected": -626.4324951171875, + "loss": 0.6893, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.3278416097164154, + "rewards/margins": 0.1089852824807167, + "rewards/rejected": -0.4368268847465515, + "step": 6940 + }, + { + "epoch": 0.45, + "learning_rate": 3.3163944017716733e-06, + "logits/chosen": -1.3334197998046875, + "logits/rejected": -1.1372065544128418, + "logps/chosen": -505.1522521972656, + "logps/rejected": -549.527587890625, + "loss": 0.6903, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.28878146409988403, + "rewards/margins": 0.07747305184602737, + "rewards/rejected": -0.36625441908836365, + "step": 6950 + }, + { + "epoch": 0.46, + "learning_rate": 3.310995399796017e-06, + "logits/chosen": -1.424912691116333, + "logits/rejected": -1.3498830795288086, + "logps/chosen": -537.4611206054688, + "logps/rejected": -573.2302856445312, + "loss": 0.6916, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.2634749114513397, + "rewards/margins": 0.042571231722831726, + "rewards/rejected": -0.30604615807533264, + "step": 6960 + }, + { + "epoch": 0.46, + "learning_rate": 3.305592167271085e-06, + "logits/chosen": -1.2780954837799072, + "logits/rejected": -1.0659189224243164, + "logps/chosen": -450.2206115722656, + "logps/rejected": -520.7966918945312, + "loss": 0.689, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.25297442078590393, + "rewards/margins": 0.08148813247680664, + "rewards/rejected": -0.33446258306503296, + "step": 6970 + }, + { + "epoch": 0.46, + "learning_rate": 3.3001847323827846e-06, + "logits/chosen": -1.1722548007965088, + "logits/rejected": -1.2698702812194824, + "logps/chosen": -590.0223388671875, + "logps/rejected": -674.6921997070312, + "loss": 0.6897, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.32192471623420715, + "rewards/margins": 0.08808865398168564, + "rewards/rejected": -0.410013347864151, + "step": 6980 + }, + { + "epoch": 0.46, + "learning_rate": 3.2947731233389447e-06, + "logits/chosen": -0.9997597932815552, + "logits/rejected": -0.8057335019111633, + "logps/chosen": -589.4088745117188, + "logps/rejected": -651.3054809570312, + "loss": 0.6872, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.3419021964073181, + "rewards/margins": 0.11732400953769684, + "rewards/rejected": -0.45922619104385376, + "step": 6990 + }, + { + "epoch": 0.46, + "learning_rate": 3.2893573683691706e-06, + "logits/chosen": -0.9610411524772644, + "logits/rejected": -0.8816120028495789, + "logps/chosen": -530.5889892578125, + "logps/rejected": -620.2159423828125, + "loss": 0.6878, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.3258955478668213, + "rewards/margins": 0.1075226441025734, + "rewards/rejected": -0.4334181845188141, + "step": 7000 + }, + { + "epoch": 0.46, + "eval_logits/chosen": -1.064884066581726, + "eval_logits/rejected": -0.9341198205947876, + "eval_logps/chosen": -569.5941162109375, + "eval_logps/rejected": -639.7523803710938, + "eval_loss": 0.6896393895149231, + "eval_rewards/accuracies": 0.6654999852180481, + "eval_rewards/chosen": -0.33758923411369324, + "eval_rewards/margins": 0.09055128693580627, + "eval_rewards/rejected": -0.4281404912471771, + "eval_runtime": 712.6501, + "eval_samples_per_second": 2.806, + "eval_steps_per_second": 1.403, + "step": 7000 + }, + { + "epoch": 0.46, + "learning_rate": 3.2839374957246915e-06, + "logits/chosen": -1.1829535961151123, + "logits/rejected": -0.9157463312149048, + "logps/chosen": -620.6079711914062, + "logps/rejected": -588.7822265625, + "loss": 0.6915, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.3545176684856415, + "rewards/margins": 0.05971246212720871, + "rewards/rejected": -0.4142301678657532, + "step": 7010 + }, + { + "epoch": 0.46, + "learning_rate": 3.2785135336782187e-06, + "logits/chosen": -1.0277976989746094, + "logits/rejected": -0.9129239916801453, + "logps/chosen": -609.3104248046875, + "logps/rejected": -723.1586303710938, + "loss": 0.6886, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.3753950893878937, + "rewards/margins": 0.09153584390878677, + "rewards/rejected": -0.46693092584609985, + "step": 7020 + }, + { + "epoch": 0.46, + "learning_rate": 3.2730855105237952e-06, + "logits/chosen": -1.1726951599121094, + "logits/rejected": -1.0626368522644043, + "logps/chosen": -561.26904296875, + "logps/rejected": -700.5789184570312, + "loss": 0.6907, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.3409925401210785, + "rewards/margins": 0.09512937068939209, + "rewards/rejected": -0.43612194061279297, + "step": 7030 + }, + { + "epoch": 0.46, + "learning_rate": 3.2676534545766486e-06, + "logits/chosen": -1.134777307510376, + "logits/rejected": -1.0207163095474243, + "logps/chosen": -526.9071044921875, + "logps/rejected": -582.7493896484375, + "loss": 0.6905, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.3144376277923584, + "rewards/margins": 0.06573508679866791, + "rewards/rejected": -0.3801727890968323, + "step": 7040 + }, + { + "epoch": 0.46, + "learning_rate": 3.262217394173043e-06, + "logits/chosen": -1.130771517753601, + "logits/rejected": -0.9906819462776184, + "logps/chosen": -521.9103393554688, + "logps/rejected": -615.9810791015625, + "loss": 0.6894, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.28105971217155457, + "rewards/margins": 0.10015375912189484, + "rewards/rejected": -0.3812134861946106, + "step": 7050 + }, + { + "epoch": 0.46, + "learning_rate": 3.2567773576701333e-06, + "logits/chosen": -1.1347095966339111, + "logits/rejected": -0.9777033925056458, + "logps/chosen": -511.82757568359375, + "logps/rejected": -625.614501953125, + "loss": 0.6845, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.25607019662857056, + "rewards/margins": 0.14410245418548584, + "rewards/rejected": -0.40017271041870117, + "step": 7060 + }, + { + "epoch": 0.46, + "learning_rate": 3.2513333734458154e-06, + "logits/chosen": -1.1313894987106323, + "logits/rejected": -0.9789068102836609, + "logps/chosen": -475.10919189453125, + "logps/rejected": -516.0199584960938, + "loss": 0.6902, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.2682678997516632, + "rewards/margins": 0.057518370449543, + "rewards/rejected": -0.325786292552948, + "step": 7070 + }, + { + "epoch": 0.46, + "learning_rate": 3.245885469898576e-06, + "logits/chosen": -0.9258352518081665, + "logits/rejected": -0.9314023852348328, + "logps/chosen": -608.5376586914062, + "logps/rejected": -644.3211669921875, + "loss": 0.6892, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.30798250436782837, + "rewards/margins": 0.09947158396244049, + "rewards/rejected": -0.40745407342910767, + "step": 7080 + }, + { + "epoch": 0.46, + "learning_rate": 3.2404336754473497e-06, + "logits/chosen": -0.9622844457626343, + "logits/rejected": -0.8000280261039734, + "logps/chosen": -542.6066284179688, + "logps/rejected": -540.3517456054688, + "loss": 0.6925, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.2750997245311737, + "rewards/margins": 0.06251533329486847, + "rewards/rejected": -0.3376150131225586, + "step": 7090 + }, + { + "epoch": 0.46, + "learning_rate": 3.234978018531367e-06, + "logits/chosen": -1.5038312673568726, + "logits/rejected": -1.0402882099151611, + "logps/chosen": -514.2202758789062, + "logps/rejected": -525.5071411132812, + "loss": 0.6889, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.25572001934051514, + "rewards/margins": 0.07608649134635925, + "rewards/rejected": -0.3318065106868744, + "step": 7100 + }, + { + "epoch": 0.46, + "eval_logits/chosen": -1.0207056999206543, + "eval_logits/rejected": -0.8951786160469055, + "eval_logps/chosen": -503.5751647949219, + "eval_logps/rejected": -564.20068359375, + "eval_loss": 0.6896175742149353, + "eval_rewards/accuracies": 0.6600000262260437, + "eval_rewards/chosen": -0.2715701758861542, + "eval_rewards/margins": 0.0810185894370079, + "eval_rewards/rejected": -0.35258880257606506, + "eval_runtime": 712.637, + "eval_samples_per_second": 2.806, + "eval_steps_per_second": 1.403, + "step": 7100 + }, + { + "epoch": 0.47, + "learning_rate": 3.229518527610006e-06, + "logits/chosen": -1.2460079193115234, + "logits/rejected": -1.0841786861419678, + "logps/chosen": -569.5468139648438, + "logps/rejected": -593.5337524414062, + "loss": 0.6909, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.27675676345825195, + "rewards/margins": 0.06916297227144241, + "rewards/rejected": -0.3459196984767914, + "step": 7110 + }, + { + "epoch": 0.47, + "learning_rate": 3.2240552311626465e-06, + "logits/chosen": -1.044368028640747, + "logits/rejected": -0.8659143447875977, + "logps/chosen": -494.69952392578125, + "logps/rejected": -543.4378662109375, + "loss": 0.6912, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.2534857988357544, + "rewards/margins": 0.06368336081504822, + "rewards/rejected": -0.3171691298484802, + "step": 7120 + }, + { + "epoch": 0.47, + "learning_rate": 3.2185881576885193e-06, + "logits/chosen": -1.044262170791626, + "logits/rejected": -0.8921842575073242, + "logps/chosen": -525.1110229492188, + "logps/rejected": -555.7213134765625, + "loss": 0.6911, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.31338828802108765, + "rewards/margins": 0.06927771121263504, + "rewards/rejected": -0.3826659321784973, + "step": 7130 + }, + { + "epoch": 0.47, + "learning_rate": 3.213117335706557e-06, + "logits/chosen": -1.098400354385376, + "logits/rejected": -1.0391719341278076, + "logps/chosen": -560.6351928710938, + "logps/rejected": -638.1511840820312, + "loss": 0.6917, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.30459028482437134, + "rewards/margins": 0.06830967217683792, + "rewards/rejected": -0.37289994955062866, + "step": 7140 + }, + { + "epoch": 0.47, + "learning_rate": 3.2076427937552473e-06, + "logits/chosen": -1.081235647201538, + "logits/rejected": -0.7705933451652527, + "logps/chosen": -514.1536254882812, + "logps/rejected": -602.1751098632812, + "loss": 0.6869, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.26866382360458374, + "rewards/margins": 0.10714519023895264, + "rewards/rejected": -0.37580904364585876, + "step": 7150 + }, + { + "epoch": 0.47, + "learning_rate": 3.2021645603924827e-06, + "logits/chosen": -0.8100953102111816, + "logits/rejected": -0.822729766368866, + "logps/chosen": -427.54852294921875, + "logps/rejected": -539.2092895507812, + "loss": 0.6887, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.28909438848495483, + "rewards/margins": 0.10229630768299103, + "rewards/rejected": -0.39139071106910706, + "step": 7160 + }, + { + "epoch": 0.47, + "learning_rate": 3.196682664195412e-06, + "logits/chosen": -0.9460660815238953, + "logits/rejected": -0.8326196670532227, + "logps/chosen": -485.0545959472656, + "logps/rejected": -488.9381408691406, + "loss": 0.6925, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.28100645542144775, + "rewards/margins": 0.038774557411670685, + "rewards/rejected": -0.31978100538253784, + "step": 7170 + }, + { + "epoch": 0.47, + "learning_rate": 3.191197133760291e-06, + "logits/chosen": -1.653093934059143, + "logits/rejected": -1.1370487213134766, + "logps/chosen": -529.1414184570312, + "logps/rejected": -552.0350341796875, + "loss": 0.6874, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.26173800230026245, + "rewards/margins": 0.09714240580797195, + "rewards/rejected": -0.35888034105300903, + "step": 7180 + }, + { + "epoch": 0.47, + "learning_rate": 3.185707997702334e-06, + "logits/chosen": -1.275370717048645, + "logits/rejected": -0.9616245031356812, + "logps/chosen": -505.08514404296875, + "logps/rejected": -542.3439331054688, + "loss": 0.6893, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.2643203139305115, + "rewards/margins": 0.07511034607887268, + "rewards/rejected": -0.33943066000938416, + "step": 7190 + }, + { + "epoch": 0.47, + "learning_rate": 3.1802152846555624e-06, + "logits/chosen": -1.2004523277282715, + "logits/rejected": -1.0477879047393799, + "logps/chosen": -486.0389709472656, + "logps/rejected": -555.9532470703125, + "loss": 0.6887, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.2617960274219513, + "rewards/margins": 0.08543354272842407, + "rewards/rejected": -0.347229540348053, + "step": 7200 + }, + { + "epoch": 0.47, + "eval_logits/chosen": -1.1860355138778687, + "eval_logits/rejected": -1.0530312061309814, + "eval_logps/chosen": -497.23974609375, + "eval_logps/rejected": -547.0663452148438, + "eval_loss": 0.6897386312484741, + "eval_rewards/accuracies": 0.6625000238418579, + "eval_rewards/chosen": -0.2652347981929779, + "eval_rewards/margins": 0.07021969556808472, + "eval_rewards/rejected": -0.335454523563385, + "eval_runtime": 712.9853, + "eval_samples_per_second": 2.805, + "eval_steps_per_second": 1.403, + "step": 7200 + }, + { + "epoch": 0.47, + "learning_rate": 3.174719023272659e-06, + "logits/chosen": -1.4130502939224243, + "logits/rejected": -1.3417845964431763, + "logps/chosen": -477.9608459472656, + "logps/rejected": -615.33056640625, + "loss": 0.6874, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.2642667293548584, + "rewards/margins": 0.09102049469947815, + "rewards/rejected": -0.35528722405433655, + "step": 7210 + }, + { + "epoch": 0.47, + "learning_rate": 3.169219242224816e-06, + "logits/chosen": -1.2868996858596802, + "logits/rejected": -1.0738362073898315, + "logps/chosen": -539.2318115234375, + "logps/rejected": -595.610595703125, + "loss": 0.6906, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.2993045747280121, + "rewards/margins": 0.06247404217720032, + "rewards/rejected": -0.3617786765098572, + "step": 7220 + }, + { + "epoch": 0.47, + "learning_rate": 3.1637159702015837e-06, + "logits/chosen": -1.0716454982757568, + "logits/rejected": -0.9601171612739563, + "logps/chosen": -474.85205078125, + "logps/rejected": -549.5961303710938, + "loss": 0.6888, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.2689599096775055, + "rewards/margins": 0.09406690299510956, + "rewards/rejected": -0.36302685737609863, + "step": 7230 + }, + { + "epoch": 0.47, + "learning_rate": 3.1582092359107263e-06, + "logits/chosen": -1.0168414115905762, + "logits/rejected": -0.7780404090881348, + "logps/chosen": -583.4414672851562, + "logps/rejected": -625.4762573242188, + "loss": 0.6925, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.30516377091407776, + "rewards/margins": 0.08786555379629135, + "rewards/rejected": -0.3930293023586273, + "step": 7240 + }, + { + "epoch": 0.47, + "learning_rate": 3.152699068078067e-06, + "logits/chosen": -0.9496833682060242, + "logits/rejected": -0.8669363260269165, + "logps/chosen": -580.4671630859375, + "logps/rejected": -680.8070678710938, + "loss": 0.6871, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.30017679929733276, + "rewards/margins": 0.12088117748498917, + "rewards/rejected": -0.4210579991340637, + "step": 7250 + }, + { + "epoch": 0.48, + "learning_rate": 3.1471854954473415e-06, + "logits/chosen": -1.3222639560699463, + "logits/rejected": -1.259479284286499, + "logps/chosen": -438.91619873046875, + "logps/rejected": -524.3823852539062, + "loss": 0.689, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.1881495714187622, + "rewards/margins": 0.08923407644033432, + "rewards/rejected": -0.2773836553096771, + "step": 7260 + }, + { + "epoch": 0.48, + "learning_rate": 3.1416685467800436e-06, + "logits/chosen": -1.019896149635315, + "logits/rejected": -0.694831907749176, + "logps/chosen": -449.73675537109375, + "logps/rejected": -520.8202514648438, + "loss": 0.69, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.26415398716926575, + "rewards/margins": 0.09361319243907928, + "rewards/rejected": -0.35776716470718384, + "step": 7270 + }, + { + "epoch": 0.48, + "learning_rate": 3.1361482508552803e-06, + "logits/chosen": -1.1474792957305908, + "logits/rejected": -0.9644671678543091, + "logps/chosen": -501.0804138183594, + "logps/rejected": -530.1576538085938, + "loss": 0.6903, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.25912952423095703, + "rewards/margins": 0.07246353477239609, + "rewards/rejected": -0.3315930664539337, + "step": 7280 + }, + { + "epoch": 0.48, + "learning_rate": 3.1306246364696198e-06, + "logits/chosen": -1.4395965337753296, + "logits/rejected": -1.1949145793914795, + "logps/chosen": -488.775146484375, + "logps/rejected": -538.5449829101562, + "loss": 0.69, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.23630082607269287, + "rewards/margins": 0.06727494299411774, + "rewards/rejected": -0.3035758137702942, + "step": 7290 + }, + { + "epoch": 0.48, + "learning_rate": 3.1250977324369413e-06, + "logits/chosen": -1.0189143419265747, + "logits/rejected": -0.9124029874801636, + "logps/chosen": -389.81689453125, + "logps/rejected": -490.656494140625, + "loss": 0.6905, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.23086443543434143, + "rewards/margins": 0.09412762522697449, + "rewards/rejected": -0.32499203085899353, + "step": 7300 + }, + { + "epoch": 0.48, + "eval_logits/chosen": -1.0693286657333374, + "eval_logits/rejected": -0.9412211775779724, + "eval_logps/chosen": -495.2763366699219, + "eval_logps/rejected": -555.8788452148438, + "eval_loss": 0.6896201372146606, + "eval_rewards/accuracies": 0.6639999747276306, + "eval_rewards/chosen": -0.2632714509963989, + "eval_rewards/margins": 0.08099555969238281, + "eval_rewards/rejected": -0.3442670404911041, + "eval_runtime": 713.0344, + "eval_samples_per_second": 2.805, + "eval_steps_per_second": 1.402, + "step": 7300 + }, + { + "epoch": 0.48, + "learning_rate": 3.1195675675882825e-06, + "logits/chosen": -1.1212948560714722, + "logits/rejected": -0.8987517356872559, + "logps/chosen": -525.4148559570312, + "logps/rejected": -579.3656005859375, + "loss": 0.69, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.2902238070964813, + "rewards/margins": 0.08455558121204376, + "rewards/rejected": -0.3747794032096863, + "step": 7310 + }, + { + "epoch": 0.48, + "learning_rate": 3.1140341707716926e-06, + "logits/chosen": -0.747536301612854, + "logits/rejected": -0.7013038992881775, + "logps/chosen": -444.2554626464844, + "logps/rejected": -501.15228271484375, + "loss": 0.6867, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.2484477311372757, + "rewards/margins": 0.0999131128191948, + "rewards/rejected": -0.3483608365058899, + "step": 7320 + }, + { + "epoch": 0.48, + "learning_rate": 3.1084975708520803e-06, + "logits/chosen": -1.2651160955429077, + "logits/rejected": -0.9623421430587769, + "logps/chosen": -483.85772705078125, + "logps/rejected": -513.4347534179688, + "loss": 0.6899, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.22213390469551086, + "rewards/margins": 0.10032358020544052, + "rewards/rejected": -0.3224574625492096, + "step": 7330 + }, + { + "epoch": 0.48, + "learning_rate": 3.1029577967110625e-06, + "logits/chosen": -1.2189199924468994, + "logits/rejected": -0.9497553110122681, + "logps/chosen": -417.85028076171875, + "logps/rejected": -415.18450927734375, + "loss": 0.6914, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.2014559805393219, + "rewards/margins": 0.046392567455768585, + "rewards/rejected": -0.24784855544567108, + "step": 7340 + }, + { + "epoch": 0.48, + "learning_rate": 3.097414877246814e-06, + "logits/chosen": -1.115504264831543, + "logits/rejected": -0.8397358655929565, + "logps/chosen": -399.6795654296875, + "logps/rejected": -476.15484619140625, + "loss": 0.6857, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.20103612542152405, + "rewards/margins": 0.10598810762166977, + "rewards/rejected": -0.3070242404937744, + "step": 7350 + }, + { + "epoch": 0.48, + "learning_rate": 3.0918688413739197e-06, + "logits/chosen": -0.9786425828933716, + "logits/rejected": -0.6896412968635559, + "logps/chosen": -420.2076110839844, + "logps/rejected": -461.7596130371094, + "loss": 0.6871, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.1875246912240982, + "rewards/margins": 0.10582470893859863, + "rewards/rejected": -0.29334941506385803, + "step": 7360 + }, + { + "epoch": 0.48, + "learning_rate": 3.0863197180232178e-06, + "logits/chosen": -0.9284044504165649, + "logits/rejected": -0.8172693252563477, + "logps/chosen": -454.2342224121094, + "logps/rejected": -525.4736938476562, + "loss": 0.689, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.25449004769325256, + "rewards/margins": 0.08346012979745865, + "rewards/rejected": -0.3379501700401306, + "step": 7370 + }, + { + "epoch": 0.48, + "learning_rate": 3.0807675361416554e-06, + "logits/chosen": -0.7114228010177612, + "logits/rejected": -0.6189266443252563, + "logps/chosen": -422.91912841796875, + "logps/rejected": -438.93487548828125, + "loss": 0.6895, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.23403885960578918, + "rewards/margins": 0.09505019336938858, + "rewards/rejected": -0.32908907532691956, + "step": 7380 + }, + { + "epoch": 0.48, + "learning_rate": 3.0752123246921327e-06, + "logits/chosen": -0.8941015005111694, + "logits/rejected": -0.5493389368057251, + "logps/chosen": -558.5103149414062, + "logps/rejected": -587.1238403320312, + "loss": 0.6894, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.280597060918808, + "rewards/margins": 0.09554499387741089, + "rewards/rejected": -0.37614205479621887, + "step": 7390 + }, + { + "epoch": 0.48, + "learning_rate": 3.069654112653353e-06, + "logits/chosen": -0.9813539385795593, + "logits/rejected": -0.7545603513717651, + "logps/chosen": -551.376953125, + "logps/rejected": -564.7586669921875, + "loss": 0.6933, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.3348926901817322, + "rewards/margins": 0.0459338054060936, + "rewards/rejected": -0.38082653284072876, + "step": 7400 + }, + { + "epoch": 0.48, + "eval_logits/chosen": -0.8035207986831665, + "eval_logits/rejected": -0.6899078488349915, + "eval_logps/chosen": -545.2488403320312, + "eval_logps/rejected": -615.5215454101562, + "eval_loss": 0.6896498799324036, + "eval_rewards/accuracies": 0.6585000157356262, + "eval_rewards/chosen": -0.3132438361644745, + "eval_rewards/margins": 0.0906657725572586, + "eval_rewards/rejected": -0.4039096534252167, + "eval_runtime": 712.1104, + "eval_samples_per_second": 2.809, + "eval_steps_per_second": 1.404, + "step": 7400 + }, + { + "epoch": 0.48, + "learning_rate": 3.064092929019673e-06, + "logits/chosen": -0.7480974197387695, + "logits/rejected": -0.9871516227722168, + "logps/chosen": -570.5189819335938, + "logps/rejected": -646.7600708007812, + "loss": 0.6916, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.3129754960536957, + "rewards/margins": 0.05636630207300186, + "rewards/rejected": -0.36934179067611694, + "step": 7410 + }, + { + "epoch": 0.49, + "learning_rate": 3.058528802800952e-06, + "logits/chosen": -1.0600616931915283, + "logits/rejected": -0.8567035794258118, + "logps/chosen": -565.076171875, + "logps/rejected": -612.2608032226562, + "loss": 0.6909, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.27244246006011963, + "rewards/margins": 0.08375748246908188, + "rewards/rejected": -0.3561999201774597, + "step": 7420 + }, + { + "epoch": 0.49, + "learning_rate": 3.052961763022397e-06, + "logits/chosen": -1.265679121017456, + "logits/rejected": -0.6907863616943359, + "logps/chosen": -431.6497497558594, + "logps/rejected": -513.7686767578125, + "loss": 0.6877, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.24876856803894043, + "rewards/margins": 0.1208425760269165, + "rewards/rejected": -0.3696111738681793, + "step": 7430 + }, + { + "epoch": 0.49, + "learning_rate": 3.047391838724415e-06, + "logits/chosen": -1.1518621444702148, + "logits/rejected": -1.00303053855896, + "logps/chosen": -498.6324157714844, + "logps/rejected": -584.2637939453125, + "loss": 0.6891, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.263113796710968, + "rewards/margins": 0.1016978994011879, + "rewards/rejected": -0.3648116886615753, + "step": 7440 + }, + { + "epoch": 0.49, + "learning_rate": 3.0418190589624587e-06, + "logits/chosen": -0.8011142611503601, + "logits/rejected": -0.6937967538833618, + "logps/chosen": -416.416015625, + "logps/rejected": -479.81103515625, + "loss": 0.692, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.23879393935203552, + "rewards/margins": 0.055042654275894165, + "rewards/rejected": -0.2938365936279297, + "step": 7450 + }, + { + "epoch": 0.49, + "learning_rate": 3.0362434528068784e-06, + "logits/chosen": -0.749529242515564, + "logits/rejected": -0.5978935956954956, + "logps/chosen": -543.2520751953125, + "logps/rejected": -544.2491455078125, + "loss": 0.688, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.2739289402961731, + "rewards/margins": 0.08649125695228577, + "rewards/rejected": -0.3604201376438141, + "step": 7460 + }, + { + "epoch": 0.49, + "learning_rate": 3.0306650493427657e-06, + "logits/chosen": -0.901824951171875, + "logits/rejected": -0.7608638405799866, + "logps/chosen": -480.89569091796875, + "logps/rejected": -546.8543090820312, + "loss": 0.6896, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.24927644431591034, + "rewards/margins": 0.07492052763700485, + "rewards/rejected": -0.3241969645023346, + "step": 7470 + }, + { + "epoch": 0.49, + "learning_rate": 3.0250838776698077e-06, + "logits/chosen": -1.011126160621643, + "logits/rejected": -0.7641777992248535, + "logps/chosen": -426.700439453125, + "logps/rejected": -525.545166015625, + "loss": 0.6859, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.2427811324596405, + "rewards/margins": 0.10159105062484741, + "rewards/rejected": -0.3443722128868103, + "step": 7480 + }, + { + "epoch": 0.49, + "learning_rate": 3.0194999669021275e-06, + "logits/chosen": -0.6938611268997192, + "logits/rejected": -0.42879757285118103, + "logps/chosen": -464.25030517578125, + "logps/rejected": -515.6192626953125, + "loss": 0.689, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.23738184571266174, + "rewards/margins": 0.09842096269130707, + "rewards/rejected": -0.33580282330513, + "step": 7490 + }, + { + "epoch": 0.49, + "learning_rate": 3.0139133461681403e-06, + "logits/chosen": -1.1078835725784302, + "logits/rejected": -0.9325857162475586, + "logps/chosen": -487.8389587402344, + "logps/rejected": -546.95361328125, + "loss": 0.6885, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.22344477474689484, + "rewards/margins": 0.11525474488735199, + "rewards/rejected": -0.3386995196342468, + "step": 7500 + }, + { + "epoch": 0.49, + "eval_logits/chosen": -0.9689752459526062, + "eval_logits/rejected": -0.8481876254081726, + "eval_logps/chosen": -479.9737548828125, + "eval_logps/rejected": -539.5113525390625, + "eval_loss": 0.6896456480026245, + "eval_rewards/accuracies": 0.6614999771118164, + "eval_rewards/chosen": -0.24796883761882782, + "eval_rewards/margins": 0.0799306184053421, + "eval_rewards/rejected": -0.3278994560241699, + "eval_runtime": 712.6198, + "eval_samples_per_second": 2.807, + "eval_steps_per_second": 1.403, + "step": 7500 + }, + { + "epoch": 0.49, + "learning_rate": 3.0083240446103965e-06, + "logits/chosen": -0.7331717610359192, + "logits/rejected": -0.4825916290283203, + "logps/chosen": -436.3609313964844, + "logps/rejected": -545.4282836914062, + "loss": 0.6881, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.2523263394832611, + "rewards/margins": 0.10139818489551544, + "rewards/rejected": -0.35372450947761536, + "step": 7510 + }, + { + "epoch": 0.49, + "learning_rate": 3.0027320913854306e-06, + "logits/chosen": -1.3195829391479492, + "logits/rejected": -1.117133617401123, + "logps/chosen": -533.9688720703125, + "logps/rejected": -565.7762451171875, + "loss": 0.6909, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.2415073662996292, + "rewards/margins": 0.09451662003993988, + "rewards/rejected": -0.3360239863395691, + "step": 7520 + }, + { + "epoch": 0.49, + "learning_rate": 2.997137515663609e-06, + "logits/chosen": -1.0817979574203491, + "logits/rejected": -0.9075329899787903, + "logps/chosen": -417.1910705566406, + "logps/rejected": -472.84747314453125, + "loss": 0.6893, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.1918136477470398, + "rewards/margins": 0.09335563331842422, + "rewards/rejected": -0.2851692736148834, + "step": 7530 + }, + { + "epoch": 0.49, + "learning_rate": 2.991540346628981e-06, + "logits/chosen": -0.9919770956039429, + "logits/rejected": -1.0041069984436035, + "logps/chosen": -508.04888916015625, + "logps/rejected": -528.6693725585938, + "loss": 0.6902, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.2684485614299774, + "rewards/margins": 0.04672873765230179, + "rewards/rejected": -0.3151772916316986, + "step": 7540 + }, + { + "epoch": 0.49, + "learning_rate": 2.985940613479121e-06, + "logits/chosen": -1.1806385517120361, + "logits/rejected": -1.057145357131958, + "logps/chosen": -549.0791625976562, + "logps/rejected": -559.5987548828125, + "loss": 0.6895, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.25522947311401367, + "rewards/margins": 0.06989149749279022, + "rewards/rejected": -0.3251209855079651, + "step": 7550 + }, + { + "epoch": 0.49, + "learning_rate": 2.980338345424981e-06, + "logits/chosen": -0.8516530990600586, + "logits/rejected": -0.7801792025566101, + "logps/chosen": -522.4078979492188, + "logps/rejected": -543.2108154296875, + "loss": 0.6907, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.2731880843639374, + "rewards/margins": 0.06907974183559418, + "rewards/rejected": -0.34226787090301514, + "step": 7560 + }, + { + "epoch": 0.5, + "learning_rate": 2.974733571690735e-06, + "logits/chosen": -0.9777799844741821, + "logits/rejected": -0.5789401531219482, + "logps/chosen": -561.6461791992188, + "logps/rejected": -590.323486328125, + "loss": 0.69, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.3242393732070923, + "rewards/margins": 0.08796980232000351, + "rewards/rejected": -0.4122091233730316, + "step": 7570 + }, + { + "epoch": 0.5, + "learning_rate": 2.9691263215136274e-06, + "logits/chosen": -1.1575870513916016, + "logits/rejected": -1.032047986984253, + "logps/chosen": -534.2648315429688, + "logps/rejected": -575.9432373046875, + "loss": 0.6911, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.2672644555568695, + "rewards/margins": 0.07333298027515411, + "rewards/rejected": -0.3405974507331848, + "step": 7580 + }, + { + "epoch": 0.5, + "learning_rate": 2.963516624143823e-06, + "logits/chosen": -0.8190193176269531, + "logits/rejected": -1.0621607303619385, + "logps/chosen": -522.3610229492188, + "logps/rejected": -564.7208251953125, + "loss": 0.6884, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.3097943663597107, + "rewards/margins": 0.07989050447940826, + "rewards/rejected": -0.38968485593795776, + "step": 7590 + }, + { + "epoch": 0.5, + "learning_rate": 2.9579045088442504e-06, + "logits/chosen": -0.9224055409431458, + "logits/rejected": -0.7572005987167358, + "logps/chosen": -440.4720153808594, + "logps/rejected": -552.9095458984375, + "loss": 0.6873, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.25196707248687744, + "rewards/margins": 0.09268857538700104, + "rewards/rejected": -0.3446556627750397, + "step": 7600 + }, + { + "epoch": 0.5, + "eval_logits/chosen": -1.0028795003890991, + "eval_logits/rejected": -0.8792377710342407, + "eval_logps/chosen": -477.3995666503906, + "eval_logps/rejected": -540.662353515625, + "eval_loss": 0.689660906791687, + "eval_rewards/accuracies": 0.6639999747276306, + "eval_rewards/chosen": -0.24539463222026825, + "eval_rewards/margins": 0.08365590125322342, + "eval_rewards/rejected": -0.3290505111217499, + "eval_runtime": 714.1393, + "eval_samples_per_second": 2.801, + "eval_steps_per_second": 1.4, + "step": 7600 + }, + { + "epoch": 0.5, + "learning_rate": 2.9522900048904534e-06, + "logits/chosen": -1.1636269092559814, + "logits/rejected": -0.9207497835159302, + "logps/chosen": -517.7084350585938, + "logps/rejected": -542.8178100585938, + "loss": 0.6924, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.2779703140258789, + "rewards/margins": 0.05513089895248413, + "rewards/rejected": -0.33310121297836304, + "step": 7610 + }, + { + "epoch": 0.5, + "learning_rate": 2.9466731415704343e-06, + "logits/chosen": -1.0667656660079956, + "logits/rejected": -0.9428873062133789, + "logps/chosen": -439.12677001953125, + "logps/rejected": -518.74853515625, + "loss": 0.6909, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.21534006297588348, + "rewards/margins": 0.08398912847042084, + "rewards/rejected": -0.2993291914463043, + "step": 7620 + }, + { + "epoch": 0.5, + "learning_rate": 2.941053948184503e-06, + "logits/chosen": -1.16767156124115, + "logits/rejected": -0.817290186882019, + "logps/chosen": -478.71978759765625, + "logps/rejected": -502.38909912109375, + "loss": 0.6902, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.19878068566322327, + "rewards/margins": 0.05858156830072403, + "rewards/rejected": -0.2573622763156891, + "step": 7630 + }, + { + "epoch": 0.5, + "learning_rate": 2.935432454045125e-06, + "logits/chosen": -0.7946759462356567, + "logits/rejected": -0.8207203149795532, + "logps/chosen": -449.922607421875, + "logps/rejected": -463.6610412597656, + "loss": 0.6913, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.21795828640460968, + "rewards/margins": 0.034945905208587646, + "rewards/rejected": -0.2529042065143585, + "step": 7640 + }, + { + "epoch": 0.5, + "learning_rate": 2.929808688476768e-06, + "logits/chosen": -1.0972411632537842, + "logits/rejected": -1.0782297849655151, + "logps/chosen": -453.37548828125, + "logps/rejected": -522.5484008789062, + "loss": 0.689, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.21401312947273254, + "rewards/margins": 0.08677531778812408, + "rewards/rejected": -0.3007884621620178, + "step": 7650 + }, + { + "epoch": 0.5, + "learning_rate": 2.924182680815748e-06, + "logits/chosen": -0.9795387387275696, + "logits/rejected": -0.9352675676345825, + "logps/chosen": -444.06121826171875, + "logps/rejected": -548.3419799804688, + "loss": 0.6878, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.20943062007427216, + "rewards/margins": 0.12698841094970703, + "rewards/rejected": -0.336419016122818, + "step": 7660 + }, + { + "epoch": 0.5, + "learning_rate": 2.9185544604100765e-06, + "logits/chosen": -0.669420599937439, + "logits/rejected": -0.5942717790603638, + "logps/chosen": -396.4417419433594, + "logps/rejected": -474.1393127441406, + "loss": 0.6887, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.20053164660930634, + "rewards/margins": 0.08195292949676514, + "rewards/rejected": -0.28248459100723267, + "step": 7670 + }, + { + "epoch": 0.5, + "learning_rate": 2.9129240566193083e-06, + "logits/chosen": -1.212862253189087, + "logits/rejected": -0.8634244799613953, + "logps/chosen": -411.9769592285156, + "logps/rejected": -494.0874938964844, + "loss": 0.6881, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.20989665389060974, + "rewards/margins": 0.09539847820997238, + "rewards/rejected": -0.30529510974884033, + "step": 7680 + }, + { + "epoch": 0.5, + "learning_rate": 2.9072914988143874e-06, + "logits/chosen": -0.9252969026565552, + "logits/rejected": -0.7269047498703003, + "logps/chosen": -435.27142333984375, + "logps/rejected": -558.121826171875, + "loss": 0.6887, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.23384162783622742, + "rewards/margins": 0.13132184743881226, + "rewards/rejected": -0.36516350507736206, + "step": 7690 + }, + { + "epoch": 0.5, + "learning_rate": 2.9016568163774956e-06, + "logits/chosen": -0.9825423359870911, + "logits/rejected": -0.8417407870292664, + "logps/chosen": -364.4339599609375, + "logps/rejected": -400.3785400390625, + "loss": 0.6896, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.19220469892024994, + "rewards/margins": 0.06999743729829788, + "rewards/rejected": -0.2622021436691284, + "step": 7700 + }, + { + "epoch": 0.5, + "eval_logits/chosen": -1.0249030590057373, + "eval_logits/rejected": -0.9031400084495544, + "eval_logps/chosen": -435.697998046875, + "eval_logps/rejected": -492.8592529296875, + "eval_loss": 0.6896706223487854, + "eval_rewards/accuracies": 0.6539999842643738, + "eval_rewards/chosen": -0.20369309186935425, + "eval_rewards/margins": 0.07755427807569504, + "eval_rewards/rejected": -0.2812473475933075, + "eval_runtime": 711.5485, + "eval_samples_per_second": 2.811, + "eval_steps_per_second": 1.405, + "step": 7700 + }, + { + "epoch": 0.5, + "learning_rate": 2.8960200387018942e-06, + "logits/chosen": -1.09833562374115, + "logits/rejected": -0.9112803339958191, + "logps/chosen": -508.74224853515625, + "logps/rejected": -526.496337890625, + "loss": 0.6911, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.20100924372673035, + "rewards/margins": 0.06658849865198135, + "rewards/rejected": -0.2675977349281311, + "step": 7710 + }, + { + "epoch": 0.51, + "learning_rate": 2.8903811951917792e-06, + "logits/chosen": -1.0677770376205444, + "logits/rejected": -1.047732949256897, + "logps/chosen": -385.08856201171875, + "logps/rejected": -401.1496276855469, + "loss": 0.6901, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.18680894374847412, + "rewards/margins": 0.06320323050022125, + "rewards/rejected": -0.25001221895217896, + "step": 7720 + }, + { + "epoch": 0.51, + "learning_rate": 2.88474031526212e-06, + "logits/chosen": -1.150158166885376, + "logits/rejected": -1.023202896118164, + "logps/chosen": -423.855712890625, + "logps/rejected": -499.7687072753906, + "loss": 0.6909, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.22101597487926483, + "rewards/margins": 0.06248442456126213, + "rewards/rejected": -0.2835003733634949, + "step": 7730 + }, + { + "epoch": 0.51, + "learning_rate": 2.879097428338509e-06, + "logits/chosen": -0.9290957450866699, + "logits/rejected": -0.6512723565101624, + "logps/chosen": -430.292236328125, + "logps/rejected": -476.34912109375, + "loss": 0.6895, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.21439728140830994, + "rewards/margins": 0.06838522851467133, + "rewards/rejected": -0.28278249502182007, + "step": 7740 + }, + { + "epoch": 0.51, + "learning_rate": 2.8734525638570094e-06, + "logits/chosen": -0.9917430877685547, + "logits/rejected": -0.9474889636039734, + "logps/chosen": -447.1341247558594, + "logps/rejected": -494.3531799316406, + "loss": 0.692, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.21293988823890686, + "rewards/margins": 0.059592366218566895, + "rewards/rejected": -0.27253225445747375, + "step": 7750 + }, + { + "epoch": 0.51, + "learning_rate": 2.8678057512639982e-06, + "logits/chosen": -1.0115077495574951, + "logits/rejected": -0.9387832880020142, + "logps/chosen": -477.07373046875, + "logps/rejected": -581.225830078125, + "loss": 0.6873, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.1910073161125183, + "rewards/margins": 0.1262114942073822, + "rewards/rejected": -0.3172187805175781, + "step": 7760 + }, + { + "epoch": 0.51, + "learning_rate": 2.8621570200160172e-06, + "logits/chosen": -0.4895518720149994, + "logits/rejected": -0.45712098479270935, + "logps/chosen": -366.78436279296875, + "logps/rejected": -456.4737243652344, + "loss": 0.6898, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.1982152760028839, + "rewards/margins": 0.09786740690469742, + "rewards/rejected": -0.29608267545700073, + "step": 7770 + }, + { + "epoch": 0.51, + "learning_rate": 2.856506399579615e-06, + "logits/chosen": -0.9345094561576843, + "logits/rejected": -0.994672954082489, + "logps/chosen": -507.7185974121094, + "logps/rejected": -560.7838134765625, + "loss": 0.6891, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.2893551290035248, + "rewards/margins": 0.07255448400974274, + "rewards/rejected": -0.3619096279144287, + "step": 7780 + }, + { + "epoch": 0.51, + "learning_rate": 2.8508539194311964e-06, + "logits/chosen": -0.7989007234573364, + "logits/rejected": -0.9685953855514526, + "logps/chosen": -515.143798828125, + "logps/rejected": -594.9364013671875, + "loss": 0.6908, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.2614103853702545, + "rewards/margins": 0.06696247309446335, + "rewards/rejected": -0.32837286591529846, + "step": 7790 + }, + { + "epoch": 0.51, + "learning_rate": 2.8451996090568656e-06, + "logits/chosen": -0.7361730337142944, + "logits/rejected": -0.4997798502445221, + "logps/chosen": -490.7713317871094, + "logps/rejected": -557.9491577148438, + "loss": 0.6893, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.3046317994594574, + "rewards/margins": 0.08513940870761871, + "rewards/rejected": -0.3897712230682373, + "step": 7800 + }, + { + "epoch": 0.51, + "eval_logits/chosen": -0.7930099368095398, + "eval_logits/rejected": -0.6832570433616638, + "eval_logps/chosen": -514.2276000976562, + "eval_logps/rejected": -576.969970703125, + "eval_loss": 0.6896798610687256, + "eval_rewards/accuracies": 0.6675000190734863, + "eval_rewards/chosen": -0.2822226583957672, + "eval_rewards/margins": 0.08313547074794769, + "eval_rewards/rejected": -0.3653581142425537, + "eval_runtime": 713.829, + "eval_samples_per_second": 2.802, + "eval_steps_per_second": 1.401, + "step": 7800 + }, + { + "epoch": 0.51, + "learning_rate": 2.839543497952276e-06, + "logits/chosen": -0.7512328028678894, + "logits/rejected": -0.7098406553268433, + "logps/chosen": -445.0135192871094, + "logps/rejected": -524.4622192382812, + "loss": 0.6903, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.26040345430374146, + "rewards/margins": 0.08815246820449829, + "rewards/rejected": -0.34855595231056213, + "step": 7810 + }, + { + "epoch": 0.51, + "learning_rate": 2.833885615622474e-06, + "logits/chosen": -0.7569184899330139, + "logits/rejected": -0.6354383826255798, + "logps/chosen": -483.88323974609375, + "logps/rejected": -554.6300659179688, + "loss": 0.6924, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.2813073992729187, + "rewards/margins": 0.06127142161130905, + "rewards/rejected": -0.34257885813713074, + "step": 7820 + }, + { + "epoch": 0.51, + "learning_rate": 2.8282259915817454e-06, + "logits/chosen": -0.5648205876350403, + "logits/rejected": -0.5008620619773865, + "logps/chosen": -384.44256591796875, + "logps/rejected": -520.5614013671875, + "loss": 0.6877, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.24349789321422577, + "rewards/margins": 0.09590722620487213, + "rewards/rejected": -0.3394051194190979, + "step": 7830 + }, + { + "epoch": 0.51, + "learning_rate": 2.8225646553534614e-06, + "logits/chosen": -0.5399858951568604, + "logits/rejected": -0.48315078020095825, + "logps/chosen": -408.0323181152344, + "logps/rejected": -473.1873474121094, + "loss": 0.6909, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.20871634781360626, + "rewards/margins": 0.0684463381767273, + "rewards/rejected": -0.27716267108917236, + "step": 7840 + }, + { + "epoch": 0.51, + "learning_rate": 2.8169016364699255e-06, + "logits/chosen": -0.8743473291397095, + "logits/rejected": -0.7486833333969116, + "logps/chosen": -465.1512145996094, + "logps/rejected": -522.1315307617188, + "loss": 0.6922, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.25326332449913025, + "rewards/margins": 0.05523008853197098, + "rewards/rejected": -0.308493435382843, + "step": 7850 + }, + { + "epoch": 0.51, + "learning_rate": 2.811236964472217e-06, + "logits/chosen": -1.0294277667999268, + "logits/rejected": -0.9134146571159363, + "logps/chosen": -556.4703369140625, + "logps/rejected": -564.4383544921875, + "loss": 0.6898, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.24606367945671082, + "rewards/margins": 0.06808798015117645, + "rewards/rejected": -0.31415167450904846, + "step": 7860 + }, + { + "epoch": 0.51, + "learning_rate": 2.805570668910041e-06, + "logits/chosen": -0.6756094098091125, + "logits/rejected": -0.7497716546058655, + "logps/chosen": -456.1519470214844, + "logps/rejected": -607.6312255859375, + "loss": 0.6894, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.28651267290115356, + "rewards/margins": 0.08797899633646011, + "rewards/rejected": -0.3744916319847107, + "step": 7870 + }, + { + "epoch": 0.52, + "learning_rate": 2.7999027793415695e-06, + "logits/chosen": -1.1828815937042236, + "logits/rejected": -0.7200473546981812, + "logps/chosen": -459.301513671875, + "logps/rejected": -471.305419921875, + "loss": 0.6912, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.21438555419445038, + "rewards/margins": 0.0569295659661293, + "rewards/rejected": -0.2713150680065155, + "step": 7880 + }, + { + "epoch": 0.52, + "learning_rate": 2.794233325333293e-06, + "logits/chosen": -0.9350395202636719, + "logits/rejected": -0.7725561857223511, + "logps/chosen": -467.2784118652344, + "logps/rejected": -540.7667846679688, + "loss": 0.6889, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.2089969664812088, + "rewards/margins": 0.09716568142175674, + "rewards/rejected": -0.30616268515586853, + "step": 7890 + }, + { + "epoch": 0.52, + "learning_rate": 2.7885623364598597e-06, + "logits/chosen": -1.1787984371185303, + "logits/rejected": -0.8761898875236511, + "logps/chosen": -518.7886352539062, + "logps/rejected": -571.7611083984375, + "loss": 0.6896, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.25509604811668396, + "rewards/margins": 0.09489138424396515, + "rewards/rejected": -0.3499874174594879, + "step": 7900 + }, + { + "epoch": 0.52, + "eval_logits/chosen": -0.9352292418479919, + "eval_logits/rejected": -0.8164902925491333, + "eval_logps/chosen": -467.99554443359375, + "eval_logps/rejected": -532.0469970703125, + "eval_loss": 0.68949294090271, + "eval_rewards/accuracies": 0.6685000061988831, + "eval_rewards/chosen": -0.23599061369895935, + "eval_rewards/margins": 0.08444450050592422, + "eval_rewards/rejected": -0.32043513655662537, + "eval_runtime": 713.5958, + "eval_samples_per_second": 2.803, + "eval_steps_per_second": 1.401, + "step": 7900 + }, + { + "epoch": 0.52, + "learning_rate": 2.782889842303926e-06, + "logits/chosen": -0.9351937174797058, + "logits/rejected": -0.8792891502380371, + "logps/chosen": -421.2020568847656, + "logps/rejected": -468.4584045410156, + "loss": 0.6924, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.26097291707992554, + "rewards/margins": 0.05275429040193558, + "rewards/rejected": -0.3137272298336029, + "step": 7910 + }, + { + "epoch": 0.52, + "learning_rate": 2.7772158724559987e-06, + "logits/chosen": -0.956935703754425, + "logits/rejected": -0.681462824344635, + "logps/chosen": -415.68115234375, + "logps/rejected": -615.3704833984375, + "loss": 0.6835, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.19857212901115417, + "rewards/margins": 0.1622186303138733, + "rewards/rejected": -0.3607906997203827, + "step": 7920 + }, + { + "epoch": 0.52, + "learning_rate": 2.7715404565142856e-06, + "logits/chosen": -0.7236738204956055, + "logits/rejected": -0.8611618280410767, + "logps/chosen": -417.05206298828125, + "logps/rejected": -469.84051513671875, + "loss": 0.6907, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.21708783507347107, + "rewards/margins": 0.05901111289858818, + "rewards/rejected": -0.27609896659851074, + "step": 7930 + }, + { + "epoch": 0.52, + "learning_rate": 2.7658636240845354e-06, + "logits/chosen": -1.2458736896514893, + "logits/rejected": -1.17683744430542, + "logps/chosen": -462.50439453125, + "logps/rejected": -576.8449096679688, + "loss": 0.6897, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.23905611038208008, + "rewards/margins": 0.09972243756055832, + "rewards/rejected": -0.338778555393219, + "step": 7940 + }, + { + "epoch": 0.52, + "learning_rate": 2.7601854047798872e-06, + "logits/chosen": -0.7429434657096863, + "logits/rejected": -0.7902488112449646, + "logps/chosen": -438.64617919921875, + "logps/rejected": -532.12255859375, + "loss": 0.6895, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.21472282707691193, + "rewards/margins": 0.07406110316514969, + "rewards/rejected": -0.2887839078903198, + "step": 7950 + }, + { + "epoch": 0.52, + "learning_rate": 2.7545058282207148e-06, + "logits/chosen": -0.7798100113868713, + "logits/rejected": -0.6986501812934875, + "logps/chosen": -432.69061279296875, + "logps/rejected": -461.27197265625, + "loss": 0.6909, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.22114884853363037, + "rewards/margins": 0.059160299599170685, + "rewards/rejected": -0.28030914068222046, + "step": 7960 + }, + { + "epoch": 0.52, + "learning_rate": 2.748824924034471e-06, + "logits/chosen": -1.0739469528198242, + "logits/rejected": -0.9439611434936523, + "logps/chosen": -469.6466369628906, + "logps/rejected": -524.2724609375, + "loss": 0.6893, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.2502364218235016, + "rewards/margins": 0.07352254539728165, + "rewards/rejected": -0.32375895977020264, + "step": 7970 + }, + { + "epoch": 0.52, + "learning_rate": 2.743142721855536e-06, + "logits/chosen": -0.69549560546875, + "logits/rejected": -0.8490379452705383, + "logps/chosen": -343.1895446777344, + "logps/rejected": -391.9134826660156, + "loss": 0.6904, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.19132943451404572, + "rewards/margins": 0.057072412222623825, + "rewards/rejected": -0.24840185046195984, + "step": 7980 + }, + { + "epoch": 0.52, + "learning_rate": 2.737459251325058e-06, + "logits/chosen": -1.0810500383377075, + "logits/rejected": -0.9517616033554077, + "logps/chosen": -445.8333435058594, + "logps/rejected": -476.86529541015625, + "loss": 0.691, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.17585977911949158, + "rewards/margins": 0.05356324836611748, + "rewards/rejected": -0.22942304611206055, + "step": 7990 + }, + { + "epoch": 0.52, + "learning_rate": 2.731774542090804e-06, + "logits/chosen": -0.80693519115448, + "logits/rejected": -0.7859255075454712, + "logps/chosen": -362.6540222167969, + "logps/rejected": -392.09698486328125, + "loss": 0.6909, + "rewards/accuracies": 0.4749999940395355, + "rewards/chosen": -0.16987797617912292, + "rewards/margins": 0.04847797378897667, + "rewards/rejected": -0.2183559387922287, + "step": 8000 + }, + { + "epoch": 0.52, + "eval_logits/chosen": -1.0597189664840698, + "eval_logits/rejected": -0.9344833493232727, + "eval_logps/chosen": -404.4119873046875, + "eval_logps/rejected": -465.3135681152344, + "eval_loss": 0.6895393133163452, + "eval_rewards/accuracies": 0.6694999933242798, + "eval_rewards/chosen": -0.17240706086158752, + "eval_rewards/margins": 0.08129459619522095, + "eval_rewards/rejected": -0.25370165705680847, + "eval_runtime": 711.0651, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 1.406, + "step": 8000 + }, + { + "epoch": 0.52, + "learning_rate": 2.7260886238070034e-06, + "logits/chosen": -1.1494150161743164, + "logits/rejected": -1.0372936725616455, + "logps/chosen": -357.6014709472656, + "logps/rejected": -431.92059326171875, + "loss": 0.6908, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.16113150119781494, + "rewards/margins": 0.07902725785970688, + "rewards/rejected": -0.24015876650810242, + "step": 8010 + }, + { + "epoch": 0.52, + "learning_rate": 2.72040152613419e-06, + "logits/chosen": -0.9957448244094849, + "logits/rejected": -0.8766078948974609, + "logps/chosen": -375.31829833984375, + "logps/rejected": -415.14068603515625, + "loss": 0.6853, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.15844318270683289, + "rewards/margins": 0.12304798513650894, + "rewards/rejected": -0.2814911901950836, + "step": 8020 + }, + { + "epoch": 0.53, + "learning_rate": 2.7147132787390516e-06, + "logits/chosen": -1.0838630199432373, + "logits/rejected": -0.817136287689209, + "logps/chosen": -400.7259521484375, + "logps/rejected": -463.8260192871094, + "loss": 0.6913, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.17324785888195038, + "rewards/margins": 0.0800856351852417, + "rewards/rejected": -0.25333350896835327, + "step": 8030 + }, + { + "epoch": 0.53, + "learning_rate": 2.709023911294273e-06, + "logits/chosen": -1.1286613941192627, + "logits/rejected": -0.913567066192627, + "logps/chosen": -388.92364501953125, + "logps/rejected": -496.14349365234375, + "loss": 0.6862, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.1466047465801239, + "rewards/margins": 0.1381194144487381, + "rewards/rejected": -0.2847242057323456, + "step": 8040 + }, + { + "epoch": 0.53, + "learning_rate": 2.7033334534783806e-06, + "logits/chosen": -1.009552240371704, + "logits/rejected": -1.1506755352020264, + "logps/chosen": -369.41131591796875, + "logps/rejected": -469.4898986816406, + "loss": 0.6886, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.17026540637016296, + "rewards/margins": 0.08387952297925949, + "rewards/rejected": -0.25414493680000305, + "step": 8050 + }, + { + "epoch": 0.53, + "learning_rate": 2.697641934975592e-06, + "logits/chosen": -1.0629603862762451, + "logits/rejected": -0.896773636341095, + "logps/chosen": -427.0707092285156, + "logps/rejected": -479.0992736816406, + "loss": 0.6882, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.19928427040576935, + "rewards/margins": 0.0845823660492897, + "rewards/rejected": -0.28386664390563965, + "step": 8060 + }, + { + "epoch": 0.53, + "learning_rate": 2.691949385475654e-06, + "logits/chosen": -1.056208610534668, + "logits/rejected": -0.8916531801223755, + "logps/chosen": -451.06201171875, + "logps/rejected": -505.90771484375, + "loss": 0.6893, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.20717112720012665, + "rewards/margins": 0.08041705191135406, + "rewards/rejected": -0.2875882089138031, + "step": 8070 + }, + { + "epoch": 0.53, + "learning_rate": 2.6862558346736937e-06, + "logits/chosen": -0.9724780917167664, + "logits/rejected": -0.7502321600914001, + "logps/chosen": -438.94268798828125, + "logps/rejected": -590.4031982421875, + "loss": 0.6851, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.20059040188789368, + "rewards/margins": 0.15666231513023376, + "rewards/rejected": -0.35725271701812744, + "step": 8080 + }, + { + "epoch": 0.53, + "learning_rate": 2.6805613122700617e-06, + "logits/chosen": -0.7160404920578003, + "logits/rejected": -0.7909665107727051, + "logps/chosen": -459.51922607421875, + "logps/rejected": -552.0443725585938, + "loss": 0.6877, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.23594674468040466, + "rewards/margins": 0.09158305823802948, + "rewards/rejected": -0.32752981781959534, + "step": 8090 + }, + { + "epoch": 0.53, + "learning_rate": 2.674865847970176e-06, + "logits/chosen": -0.8122416734695435, + "logits/rejected": -0.6669374704360962, + "logps/chosen": -430.8330078125, + "logps/rejected": -524.0494384765625, + "loss": 0.6908, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.2232166826725006, + "rewards/margins": 0.07076805830001831, + "rewards/rejected": -0.2939847409725189, + "step": 8100 + }, + { + "epoch": 0.53, + "eval_logits/chosen": -0.7900794148445129, + "eval_logits/rejected": -0.6794930100440979, + "eval_logps/chosen": -433.87579345703125, + "eval_logps/rejected": -497.8059387207031, + "eval_loss": 0.6894783973693848, + "eval_rewards/accuracies": 0.6694999933242798, + "eval_rewards/chosen": -0.2018708437681198, + "eval_rewards/margins": 0.0843232199549675, + "eval_rewards/rejected": -0.2861940860748291, + "eval_runtime": 711.208, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 1.406, + "step": 8100 + }, + { + "epoch": 0.53, + "learning_rate": 2.669169471484368e-06, + "logits/chosen": -0.5697265267372131, + "logits/rejected": -0.6256684064865112, + "logps/chosen": -370.5645446777344, + "logps/rejected": -415.7994079589844, + "loss": 0.6906, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.20550651848316193, + "rewards/margins": 0.0478750616312027, + "rewards/rejected": -0.25338155031204224, + "step": 8110 + }, + { + "epoch": 0.53, + "learning_rate": 2.6634722125277278e-06, + "logits/chosen": -0.9089109301567078, + "logits/rejected": -0.6783393621444702, + "logps/chosen": -463.1395568847656, + "logps/rejected": -536.8549194335938, + "loss": 0.6893, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.23101966083049774, + "rewards/margins": 0.06552055478096008, + "rewards/rejected": -0.29654020071029663, + "step": 8120 + }, + { + "epoch": 0.53, + "learning_rate": 2.6577741008199498e-06, + "logits/chosen": -0.617620587348938, + "logits/rejected": -0.5651569366455078, + "logps/chosen": -502.4048767089844, + "logps/rejected": -587.86767578125, + "loss": 0.6865, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.24624672532081604, + "rewards/margins": 0.1455041915178299, + "rewards/rejected": -0.39175087213516235, + "step": 8130 + }, + { + "epoch": 0.53, + "learning_rate": 2.652075166085175e-06, + "logits/chosen": -0.578894317150116, + "logits/rejected": -0.6739285588264465, + "logps/chosen": -505.9727478027344, + "logps/rejected": -664.359375, + "loss": 0.6873, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.27139347791671753, + "rewards/margins": 0.1348414421081543, + "rewards/rejected": -0.4062349200248718, + "step": 8140 + }, + { + "epoch": 0.53, + "learning_rate": 2.6463754380518395e-06, + "logits/chosen": -0.558585524559021, + "logits/rejected": -0.4612973630428314, + "logps/chosen": -513.1217651367188, + "logps/rejected": -555.1633911132812, + "loss": 0.6908, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.28216418623924255, + "rewards/margins": 0.0968686193227768, + "rewards/rejected": -0.37903279066085815, + "step": 8150 + }, + { + "epoch": 0.53, + "learning_rate": 2.6406749464525167e-06, + "logits/chosen": -1.1261868476867676, + "logits/rejected": -0.7899680733680725, + "logps/chosen": -406.1955871582031, + "logps/rejected": -457.78369140625, + "loss": 0.69, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.172636479139328, + "rewards/margins": 0.09881995618343353, + "rewards/rejected": -0.27145642042160034, + "step": 8160 + }, + { + "epoch": 0.53, + "learning_rate": 2.634973721023762e-06, + "logits/chosen": -1.2199698686599731, + "logits/rejected": -1.0744082927703857, + "logps/chosen": -463.51263427734375, + "logps/rejected": -482.42181396484375, + "loss": 0.6898, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.20905041694641113, + "rewards/margins": 0.06834776699542999, + "rewards/rejected": -0.2773981988430023, + "step": 8170 + }, + { + "epoch": 0.54, + "learning_rate": 2.6292717915059605e-06, + "logits/chosen": -1.2687675952911377, + "logits/rejected": -1.109535813331604, + "logps/chosen": -468.4798889160156, + "logps/rejected": -526.0375366210938, + "loss": 0.6892, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.192849799990654, + "rewards/margins": 0.11375071108341217, + "rewards/rejected": -0.30660054087638855, + "step": 8180 + }, + { + "epoch": 0.54, + "learning_rate": 2.6235691876431706e-06, + "logits/chosen": -1.241114616394043, + "logits/rejected": -1.140925645828247, + "logps/chosen": -392.8011169433594, + "logps/rejected": -462.74755859375, + "loss": 0.6893, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.17418552935123444, + "rewards/margins": 0.06434730440378189, + "rewards/rejected": -0.23853282630443573, + "step": 8190 + }, + { + "epoch": 0.54, + "learning_rate": 2.6178659391829673e-06, + "logits/chosen": -1.2482163906097412, + "logits/rejected": -0.9498428106307983, + "logps/chosen": -402.7013854980469, + "logps/rejected": -434.3684997558594, + "loss": 0.6904, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.16522344946861267, + "rewards/margins": 0.07251036912202835, + "rewards/rejected": -0.23773381114006042, + "step": 8200 + }, + { + "epoch": 0.54, + "eval_logits/chosen": -1.0770260095596313, + "eval_logits/rejected": -0.9495123028755188, + "eval_logps/chosen": -421.29119873046875, + "eval_logps/rejected": -480.12030029296875, + "eval_loss": 0.6895091533660889, + "eval_rewards/accuracies": 0.6694999933242798, + "eval_rewards/chosen": -0.1892862617969513, + "eval_rewards/margins": 0.07922215014696121, + "eval_rewards/rejected": -0.2685084342956543, + "eval_runtime": 712.5679, + "eval_samples_per_second": 2.807, + "eval_steps_per_second": 1.403, + "step": 8200 + }, + { + "epoch": 0.54, + "learning_rate": 2.6121620758762877e-06, + "logits/chosen": -1.1685333251953125, + "logits/rejected": -0.8667934536933899, + "logps/chosen": -379.981201171875, + "logps/rejected": -443.89166259765625, + "loss": 0.6904, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.18789689242839813, + "rewards/margins": 0.06470952928066254, + "rewards/rejected": -0.25260645151138306, + "step": 8210 + }, + { + "epoch": 0.54, + "learning_rate": 2.606457627477277e-06, + "logits/chosen": -0.8580241203308105, + "logits/rejected": -0.7762119174003601, + "logps/chosen": -338.51263427734375, + "logps/rejected": -426.2496032714844, + "loss": 0.6912, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.16288332641124725, + "rewards/margins": 0.08438492566347122, + "rewards/rejected": -0.24726824462413788, + "step": 8220 + }, + { + "epoch": 0.54, + "learning_rate": 2.6007526237431324e-06, + "logits/chosen": -1.172778844833374, + "logits/rejected": -1.0370298624038696, + "logps/chosen": -346.90911865234375, + "logps/rejected": -456.95361328125, + "loss": 0.6886, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.16487959027290344, + "rewards/margins": 0.0959264412522316, + "rewards/rejected": -0.26080602407455444, + "step": 8230 + }, + { + "epoch": 0.54, + "learning_rate": 2.5950470944339478e-06, + "logits/chosen": -1.0417600870132446, + "logits/rejected": -1.0672943592071533, + "logps/chosen": -371.01171875, + "logps/rejected": -402.5978698730469, + "loss": 0.6916, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.14920981228351593, + "rewards/margins": 0.035081896930933, + "rewards/rejected": -0.18429169058799744, + "step": 8240 + }, + { + "epoch": 0.54, + "learning_rate": 2.58934106931256e-06, + "logits/chosen": -1.0141005516052246, + "logits/rejected": -0.886761486530304, + "logps/chosen": -412.96478271484375, + "logps/rejected": -452.2259826660156, + "loss": 0.6907, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.19219276309013367, + "rewards/margins": 0.05843869969248772, + "rewards/rejected": -0.2506314516067505, + "step": 8250 + }, + { + "epoch": 0.54, + "learning_rate": 2.58363457814439e-06, + "logits/chosen": -1.1281472444534302, + "logits/rejected": -0.8433464169502258, + "logps/chosen": -416.047119140625, + "logps/rejected": -489.42401123046875, + "loss": 0.6873, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.20537519454956055, + "rewards/margins": 0.08710043132305145, + "rewards/rejected": -0.2924756109714508, + "step": 8260 + }, + { + "epoch": 0.54, + "learning_rate": 2.5779276506972924e-06, + "logits/chosen": -0.9526575803756714, + "logits/rejected": -0.9733279943466187, + "logps/chosen": -408.75347900390625, + "logps/rejected": -421.7810974121094, + "loss": 0.691, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.1768006533384323, + "rewards/margins": 0.05240979790687561, + "rewards/rejected": -0.22921045124530792, + "step": 8270 + }, + { + "epoch": 0.54, + "learning_rate": 2.5722203167413945e-06, + "logits/chosen": -1.0990087985992432, + "logits/rejected": -0.9820888638496399, + "logps/chosen": -455.8550720214844, + "logps/rejected": -470.61651611328125, + "loss": 0.6894, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.17255744338035583, + "rewards/margins": 0.09668220579624176, + "rewards/rejected": -0.2692396342754364, + "step": 8280 + }, + { + "epoch": 0.54, + "learning_rate": 2.5665126060489476e-06, + "logits/chosen": -1.152363896369934, + "logits/rejected": -1.097588300704956, + "logps/chosen": -353.132080078125, + "logps/rejected": -451.05560302734375, + "loss": 0.6896, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.1650300920009613, + "rewards/margins": 0.06907693296670914, + "rewards/rejected": -0.23410698771476746, + "step": 8290 + }, + { + "epoch": 0.54, + "learning_rate": 2.560804548394165e-06, + "logits/chosen": -0.9613862037658691, + "logits/rejected": -0.5457326173782349, + "logps/chosen": -443.27239990234375, + "logps/rejected": -486.93218994140625, + "loss": 0.6895, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.19599205255508423, + "rewards/margins": 0.08893202245235443, + "rewards/rejected": -0.28492408990859985, + "step": 8300 + }, + { + "epoch": 0.54, + "eval_logits/chosen": -0.9703200459480286, + "eval_logits/rejected": -0.8503559231758118, + "eval_logps/chosen": -410.2919921875, + "eval_logps/rejected": -467.85498046875, + "eval_loss": 0.6896072626113892, + "eval_rewards/accuracies": 0.6654999852180481, + "eval_rewards/chosen": -0.17828704416751862, + "eval_rewards/margins": 0.07795605808496475, + "eval_rewards/rejected": -0.25624310970306396, + "eval_runtime": 714.4842, + "eval_samples_per_second": 2.799, + "eval_steps_per_second": 1.4, + "step": 8300 + }, + { + "epoch": 0.54, + "learning_rate": 2.5550961735530734e-06, + "logits/chosen": -0.6795090436935425, + "logits/rejected": -0.7947781085968018, + "logps/chosen": -300.90093994140625, + "logps/rejected": -393.47308349609375, + "loss": 0.6903, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.1378651261329651, + "rewards/margins": 0.061981432139873505, + "rewards/rejected": -0.1998465359210968, + "step": 8310 + }, + { + "epoch": 0.54, + "learning_rate": 2.549387511303351e-06, + "logits/chosen": -0.9080276489257812, + "logits/rejected": -1.0452932119369507, + "logps/chosen": -340.3172302246094, + "logps/rejected": -445.0389099121094, + "loss": 0.6902, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.1705804169178009, + "rewards/margins": 0.06044108793139458, + "rewards/rejected": -0.23102149367332458, + "step": 8320 + }, + { + "epoch": 0.55, + "learning_rate": 2.5436785914241774e-06, + "logits/chosen": -0.8201481103897095, + "logits/rejected": -0.6838145852088928, + "logps/chosen": -406.7204284667969, + "logps/rejected": -494.86260986328125, + "loss": 0.6871, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.20776119828224182, + "rewards/margins": 0.120111845433712, + "rewards/rejected": -0.3278730511665344, + "step": 8330 + }, + { + "epoch": 0.55, + "learning_rate": 2.5379694436960746e-06, + "logits/chosen": -0.9807443618774414, + "logits/rejected": -0.955755352973938, + "logps/chosen": -435.4742126464844, + "logps/rejected": -507.7806701660156, + "loss": 0.691, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.18946322798728943, + "rewards/margins": 0.06174159049987793, + "rewards/rejected": -0.25120481848716736, + "step": 8340 + }, + { + "epoch": 0.55, + "learning_rate": 2.5322600979007533e-06, + "logits/chosen": -1.0486241579055786, + "logits/rejected": -0.7849219441413879, + "logps/chosen": -409.82379150390625, + "logps/rejected": -457.51300048828125, + "loss": 0.6899, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.1973283886909485, + "rewards/margins": 0.06838833540678024, + "rewards/rejected": -0.2657167315483093, + "step": 8350 + }, + { + "epoch": 0.55, + "learning_rate": 2.5265505838209592e-06, + "logits/chosen": -0.9295563697814941, + "logits/rejected": -0.830061137676239, + "logps/chosen": -477.27569580078125, + "logps/rejected": -481.6053161621094, + "loss": 0.6919, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.22031202912330627, + "rewards/margins": 0.05074296146631241, + "rewards/rejected": -0.2710549831390381, + "step": 8360 + }, + { + "epoch": 0.55, + "learning_rate": 2.520840931240314e-06, + "logits/chosen": -1.0167980194091797, + "logits/rejected": -0.7042320966720581, + "logps/chosen": -412.95123291015625, + "logps/rejected": -432.8705139160156, + "loss": 0.6911, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.20314264297485352, + "rewards/margins": 0.08364014327526093, + "rewards/rejected": -0.28678280115127563, + "step": 8370 + }, + { + "epoch": 0.55, + "learning_rate": 2.515131169943162e-06, + "logits/chosen": -0.5957569479942322, + "logits/rejected": -0.5955820083618164, + "logps/chosen": -463.299072265625, + "logps/rejected": -550.264892578125, + "loss": 0.6897, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.20520460605621338, + "rewards/margins": 0.09393807500600815, + "rewards/rejected": -0.2991426885128021, + "step": 8380 + }, + { + "epoch": 0.55, + "learning_rate": 2.509421329714416e-06, + "logits/chosen": -0.6069525480270386, + "logits/rejected": -0.7541376352310181, + "logps/chosen": -362.64208984375, + "logps/rejected": -435.2427673339844, + "loss": 0.6916, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.15650290250778198, + "rewards/margins": 0.051408637315034866, + "rewards/rejected": -0.20791153609752655, + "step": 8390 + }, + { + "epoch": 0.55, + "learning_rate": 2.5037114403393987e-06, + "logits/chosen": -0.810130774974823, + "logits/rejected": -0.5814931392669678, + "logps/chosen": -348.248046875, + "logps/rejected": -374.8896179199219, + "loss": 0.6904, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.13726766407489777, + "rewards/margins": 0.06042211130261421, + "rewards/rejected": -0.19768977165222168, + "step": 8400 + }, + { + "epoch": 0.55, + "eval_logits/chosen": -0.901390016078949, + "eval_logits/rejected": -0.7864856719970703, + "eval_logps/chosen": -381.29022216796875, + "eval_logps/rejected": -431.9936828613281, + "eval_loss": 0.689697265625, + "eval_rewards/accuracies": 0.6679999828338623, + "eval_rewards/chosen": -0.14928528666496277, + "eval_rewards/margins": 0.07109646499156952, + "eval_rewards/rejected": -0.22038176655769348, + "eval_runtime": 711.3945, + "eval_samples_per_second": 2.811, + "eval_steps_per_second": 1.406, + "step": 8400 + }, + { + "epoch": 0.55, + "learning_rate": 2.4980015316036908e-06, + "logits/chosen": -0.7899686098098755, + "logits/rejected": -0.7767657041549683, + "logps/chosen": -312.8028564453125, + "logps/rejected": -435.9971618652344, + "loss": 0.6873, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.1369931399822235, + "rewards/margins": 0.1016085147857666, + "rewards/rejected": -0.2386016547679901, + "step": 8410 + }, + { + "epoch": 0.55, + "learning_rate": 2.4922916332929725e-06, + "logits/chosen": -1.0517879724502563, + "logits/rejected": -1.0950580835342407, + "logps/chosen": -374.1047058105469, + "logps/rejected": -374.71875, + "loss": 0.6915, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.13868047297000885, + "rewards/margins": 0.040228597819805145, + "rewards/rejected": -0.1789090633392334, + "step": 8420 + }, + { + "epoch": 0.55, + "learning_rate": 2.4865817751928716e-06, + "logits/chosen": -1.0115129947662354, + "logits/rejected": -0.9112080335617065, + "logps/chosen": -355.5943908691406, + "logps/rejected": -507.1390686035156, + "loss": 0.6861, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.16010567545890808, + "rewards/margins": 0.125122532248497, + "rewards/rejected": -0.2852281928062439, + "step": 8430 + }, + { + "epoch": 0.55, + "learning_rate": 2.4808719870888037e-06, + "logits/chosen": -0.7915637493133545, + "logits/rejected": -0.5190998315811157, + "logps/chosen": -387.52020263671875, + "logps/rejected": -465.8919982910156, + "loss": 0.6892, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.16984879970550537, + "rewards/margins": 0.11164551973342896, + "rewards/rejected": -0.2814943194389343, + "step": 8440 + }, + { + "epoch": 0.55, + "learning_rate": 2.4751622987658206e-06, + "logits/chosen": -0.9463188052177429, + "logits/rejected": -0.8167473077774048, + "logps/chosen": -460.9830627441406, + "logps/rejected": -512.6512451171875, + "loss": 0.6917, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.22303274273872375, + "rewards/margins": 0.0624089241027832, + "rewards/rejected": -0.28544169664382935, + "step": 8450 + }, + { + "epoch": 0.55, + "learning_rate": 2.4694527400084546e-06, + "logits/chosen": -0.720486044883728, + "logits/rejected": -0.5732995867729187, + "logps/chosen": -403.9100036621094, + "logps/rejected": -468.7867126464844, + "loss": 0.6903, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.1816653460264206, + "rewards/margins": 0.0707213282585144, + "rewards/rejected": -0.2523866891860962, + "step": 8460 + }, + { + "epoch": 0.55, + "learning_rate": 2.4637433406005607e-06, + "logits/chosen": -0.6172093152999878, + "logits/rejected": -0.9131113290786743, + "logps/chosen": -549.1463623046875, + "logps/rejected": -563.6685180664062, + "loss": 0.6913, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.239420086145401, + "rewards/margins": 0.0425652377307415, + "rewards/rejected": -0.2819853127002716, + "step": 8470 + }, + { + "epoch": 0.55, + "learning_rate": 2.4580341303251628e-06, + "logits/chosen": -0.32746773958206177, + "logits/rejected": -0.24373102188110352, + "logps/chosen": -470.934814453125, + "logps/rejected": -526.7442016601562, + "loss": 0.689, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.21098360419273376, + "rewards/margins": 0.09220820665359497, + "rewards/rejected": -0.30319178104400635, + "step": 8480 + }, + { + "epoch": 0.56, + "learning_rate": 2.4523251389642984e-06, + "logits/chosen": -0.7050188779830933, + "logits/rejected": -0.28900861740112305, + "logps/chosen": -509.91156005859375, + "logps/rejected": -579.9166870117188, + "loss": 0.6873, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.25535228848457336, + "rewards/margins": 0.1071435958147049, + "rewards/rejected": -0.36249589920043945, + "step": 8490 + }, + { + "epoch": 0.56, + "learning_rate": 2.4466163962988626e-06, + "logits/chosen": -0.9161072969436646, + "logits/rejected": -0.6629343628883362, + "logps/chosen": -513.077392578125, + "logps/rejected": -519.474609375, + "loss": 0.6878, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.23247647285461426, + "rewards/margins": 0.10511653125286102, + "rewards/rejected": -0.3375930190086365, + "step": 8500 + }, + { + "epoch": 0.56, + "eval_logits/chosen": -0.5619562268257141, + "eval_logits/rejected": -0.4634077250957489, + "eval_logps/chosen": -475.04638671875, + "eval_logps/rejected": -545.1040649414062, + "eval_loss": 0.6894838213920593, + "eval_rewards/accuracies": 0.6710000038146973, + "eval_rewards/chosen": -0.24304144084453583, + "eval_rewards/margins": 0.09045073390007019, + "eval_rewards/rejected": -0.3334921598434448, + "eval_runtime": 711.5654, + "eval_samples_per_second": 2.811, + "eval_steps_per_second": 1.405, + "step": 8500 + }, + { + "epoch": 0.56, + "learning_rate": 2.4409079321084543e-06, + "logits/chosen": -0.755149245262146, + "logits/rejected": -0.960436224937439, + "logps/chosen": -416.8389587402344, + "logps/rejected": -544.0230712890625, + "loss": 0.6908, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.20171932876110077, + "rewards/margins": 0.09724519401788712, + "rewards/rejected": -0.2989645004272461, + "step": 8510 + }, + { + "epoch": 0.56, + "learning_rate": 2.4351997761712184e-06, + "logits/chosen": -1.0244697332382202, + "logits/rejected": -0.45252904295921326, + "logps/chosen": -469.56524658203125, + "logps/rejected": -494.75933837890625, + "loss": 0.6897, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.22417688369750977, + "rewards/margins": 0.09190000593662262, + "rewards/rejected": -0.31607693433761597, + "step": 8520 + }, + { + "epoch": 0.56, + "learning_rate": 2.4294919582636933e-06, + "logits/chosen": -0.831392765045166, + "logits/rejected": -0.5988849997520447, + "logps/chosen": -412.9861755371094, + "logps/rejected": -481.98699951171875, + "loss": 0.691, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.20188367366790771, + "rewards/margins": 0.08084017038345337, + "rewards/rejected": -0.2827238440513611, + "step": 8530 + }, + { + "epoch": 0.56, + "learning_rate": 2.423784508160652e-06, + "logits/chosen": -0.7932353019714355, + "logits/rejected": -0.6916912198066711, + "logps/chosen": -520.588623046875, + "logps/rejected": -544.4139404296875, + "loss": 0.6907, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.2653577923774719, + "rewards/margins": 0.07166271656751633, + "rewards/rejected": -0.33702048659324646, + "step": 8540 + }, + { + "epoch": 0.56, + "learning_rate": 2.418077455634951e-06, + "logits/chosen": -0.6658292412757874, + "logits/rejected": -0.5401934385299683, + "logps/chosen": -470.5606384277344, + "logps/rejected": -553.2406005859375, + "loss": 0.691, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.25398963689804077, + "rewards/margins": 0.05512278154492378, + "rewards/rejected": -0.30911239981651306, + "step": 8550 + }, + { + "epoch": 0.56, + "learning_rate": 2.4123708304573714e-06, + "logits/chosen": -0.7948893308639526, + "logits/rejected": -0.4241601526737213, + "logps/chosen": -517.2756958007812, + "logps/rejected": -576.2462158203125, + "loss": 0.6894, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.2275083363056183, + "rewards/margins": 0.0756693035364151, + "rewards/rejected": -0.3031776547431946, + "step": 8560 + }, + { + "epoch": 0.56, + "learning_rate": 2.406664662396465e-06, + "logits/chosen": -0.20848624408245087, + "logits/rejected": -0.26154419779777527, + "logps/chosen": -458.6073303222656, + "logps/rejected": -498.234375, + "loss": 0.691, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.2742254137992859, + "rewards/margins": 0.055309224873781204, + "rewards/rejected": -0.3295346796512604, + "step": 8570 + }, + { + "epoch": 0.56, + "learning_rate": 2.4009589812184012e-06, + "logits/chosen": -0.5426809191703796, + "logits/rejected": -0.23366662859916687, + "logps/chosen": -436.4287109375, + "logps/rejected": -460.65057373046875, + "loss": 0.6884, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.2332087755203247, + "rewards/margins": 0.07733286917209625, + "rewards/rejected": -0.31054162979125977, + "step": 8580 + }, + { + "epoch": 0.56, + "learning_rate": 2.3952538166868073e-06, + "logits/chosen": -0.4449075162410736, + "logits/rejected": -0.5031110048294067, + "logps/chosen": -477.5169982910156, + "logps/rejected": -586.2160034179688, + "loss": 0.6874, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.25997069478034973, + "rewards/margins": 0.12529215216636658, + "rewards/rejected": -0.38526278734207153, + "step": 8590 + }, + { + "epoch": 0.56, + "learning_rate": 2.389549198562616e-06, + "logits/chosen": -0.7170125246047974, + "logits/rejected": -0.3539445996284485, + "logps/chosen": -483.71112060546875, + "logps/rejected": -566.279052734375, + "loss": 0.6881, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.25762954354286194, + "rewards/margins": 0.1125480905175209, + "rewards/rejected": -0.37017759680747986, + "step": 8600 + }, + { + "epoch": 0.56, + "eval_logits/chosen": -0.6347896456718445, + "eval_logits/rejected": -0.5309445261955261, + "eval_logps/chosen": -483.77215576171875, + "eval_logps/rejected": -558.7501831054688, + "eval_loss": 0.6894675493240356, + "eval_rewards/accuracies": 0.671999990940094, + "eval_rewards/chosen": -0.25176724791526794, + "eval_rewards/margins": 0.09537114202976227, + "eval_rewards/rejected": -0.3471384048461914, + "eval_runtime": 711.4841, + "eval_samples_per_second": 2.811, + "eval_steps_per_second": 1.406, + "step": 8600 + }, + { + "epoch": 0.56, + "learning_rate": 2.3838451566039098e-06, + "logits/chosen": -0.9295812845230103, + "logits/rejected": -0.7526025176048279, + "logps/chosen": -484.5731506347656, + "logps/rejected": -528.5623779296875, + "loss": 0.6923, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.24606318771839142, + "rewards/margins": 0.05412333458662033, + "rewards/rejected": -0.30018651485443115, + "step": 8610 + }, + { + "epoch": 0.56, + "learning_rate": 2.3781417205657662e-06, + "logits/chosen": -0.779945433139801, + "logits/rejected": -0.6768251657485962, + "logps/chosen": -427.7649841308594, + "logps/rejected": -465.7723083496094, + "loss": 0.6904, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.23000852763652802, + "rewards/margins": 0.076214499771595, + "rewards/rejected": -0.3062230050563812, + "step": 8620 + }, + { + "epoch": 0.56, + "learning_rate": 2.3724389202001006e-06, + "logits/chosen": -0.6447828412055969, + "logits/rejected": -0.3995209336280823, + "logps/chosen": -456.41064453125, + "logps/rejected": -512.5110473632812, + "loss": 0.6917, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.2540872097015381, + "rewards/margins": 0.07779712229967117, + "rewards/rejected": -0.33188432455062866, + "step": 8630 + }, + { + "epoch": 0.57, + "learning_rate": 2.366736785255514e-06, + "logits/chosen": -0.7542355060577393, + "logits/rejected": -0.869966983795166, + "logps/chosen": -447.4637145996094, + "logps/rejected": -502.99365234375, + "loss": 0.6909, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.24873492121696472, + "rewards/margins": 0.0667499229311943, + "rewards/rejected": -0.315484881401062, + "step": 8640 + }, + { + "epoch": 0.57, + "learning_rate": 2.3610353454771355e-06, + "logits/chosen": -0.5472957491874695, + "logits/rejected": -0.3664063513278961, + "logps/chosen": -403.11505126953125, + "logps/rejected": -469.02313232421875, + "loss": 0.6903, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.21316757798194885, + "rewards/margins": 0.08220270276069641, + "rewards/rejected": -0.29537031054496765, + "step": 8650 + }, + { + "epoch": 0.57, + "learning_rate": 2.355334630606467e-06, + "logits/chosen": -1.101891279220581, + "logits/rejected": -0.8851909637451172, + "logps/chosen": -476.35845947265625, + "logps/rejected": -476.6539611816406, + "loss": 0.6906, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.2360682189464569, + "rewards/margins": 0.06384102255105972, + "rewards/rejected": -0.2999092638492584, + "step": 8660 + }, + { + "epoch": 0.57, + "learning_rate": 2.349634670381231e-06, + "logits/chosen": -0.5369294285774231, + "logits/rejected": -0.39624541997909546, + "logps/chosen": -454.20489501953125, + "logps/rejected": -533.3726806640625, + "loss": 0.6903, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.24700577557086945, + "rewards/margins": 0.06936918944120407, + "rewards/rejected": -0.3163749575614929, + "step": 8670 + }, + { + "epoch": 0.57, + "learning_rate": 2.3439354945352104e-06, + "logits/chosen": -0.8326675295829773, + "logits/rejected": -0.682153582572937, + "logps/chosen": -448.55816650390625, + "logps/rejected": -442.1712951660156, + "loss": 0.6912, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.205034539103508, + "rewards/margins": 0.03801094740629196, + "rewards/rejected": -0.24304552376270294, + "step": 8680 + }, + { + "epoch": 0.57, + "learning_rate": 2.3382371327981e-06, + "logits/chosen": -0.8642188310623169, + "logits/rejected": -0.7281670570373535, + "logps/chosen": -430.5753479003906, + "logps/rejected": -505.33294677734375, + "loss": 0.6894, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.1990990936756134, + "rewards/margins": 0.08905245363712311, + "rewards/rejected": -0.2881515622138977, + "step": 8690 + }, + { + "epoch": 0.57, + "learning_rate": 2.3325396148953456e-06, + "logits/chosen": -0.4669378697872162, + "logits/rejected": -0.6072463989257812, + "logps/chosen": -429.3271484375, + "logps/rejected": -579.4981689453125, + "loss": 0.6888, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.2580257058143616, + "rewards/margins": 0.0960407704114914, + "rewards/rejected": -0.35406649112701416, + "step": 8700 + }, + { + "epoch": 0.57, + "eval_logits/chosen": -0.7447597980499268, + "eval_logits/rejected": -0.6359822750091553, + "eval_logps/chosen": -477.96826171875, + "eval_logps/rejected": -541.177001953125, + "eval_loss": 0.689468264579773, + "eval_rewards/accuracies": 0.6765000224113464, + "eval_rewards/chosen": -0.24596332013607025, + "eval_rewards/margins": 0.08360182493925095, + "eval_rewards/rejected": -0.329565167427063, + "eval_runtime": 710.5611, + "eval_samples_per_second": 2.815, + "eval_steps_per_second": 1.407, + "step": 8700 + }, + { + "epoch": 0.57, + "learning_rate": 2.3268429705479915e-06, + "logits/chosen": -1.2217304706573486, + "logits/rejected": -0.7867422103881836, + "logps/chosen": -480.87493896484375, + "logps/rejected": -520.9791259765625, + "loss": 0.6906, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.2560623288154602, + "rewards/margins": 0.08142323791980743, + "rewards/rejected": -0.3374856114387512, + "step": 8710 + }, + { + "epoch": 0.57, + "learning_rate": 2.3211472294725248e-06, + "logits/chosen": -0.6829847693443298, + "logits/rejected": -0.580116868019104, + "logps/chosen": -448.57513427734375, + "logps/rejected": -521.0882568359375, + "loss": 0.6901, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.23216000199317932, + "rewards/margins": 0.08518026024103165, + "rewards/rejected": -0.31734028458595276, + "step": 8720 + }, + { + "epoch": 0.57, + "learning_rate": 2.315452421380721e-06, + "logits/chosen": -0.5848901867866516, + "logits/rejected": -0.49349188804626465, + "logps/chosen": -485.47344970703125, + "logps/rejected": -522.7279663085938, + "loss": 0.688, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.23052915930747986, + "rewards/margins": 0.08524759113788605, + "rewards/rejected": -0.3157767653465271, + "step": 8730 + }, + { + "epoch": 0.57, + "learning_rate": 2.3097585759794886e-06, + "logits/chosen": -0.7139642834663391, + "logits/rejected": -0.6301506161689758, + "logps/chosen": -469.02227783203125, + "logps/rejected": -537.8853759765625, + "loss": 0.6867, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.21468093991279602, + "rewards/margins": 0.13225510716438293, + "rewards/rejected": -0.34693604707717896, + "step": 8740 + }, + { + "epoch": 0.57, + "learning_rate": 2.3040657229707155e-06, + "logits/chosen": -1.0114556550979614, + "logits/rejected": -0.8004629015922546, + "logps/chosen": -378.1190490722656, + "logps/rejected": -497.8941345214844, + "loss": 0.6893, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.20607146620750427, + "rewards/margins": 0.10821950435638428, + "rewards/rejected": -0.3142909109592438, + "step": 8750 + }, + { + "epoch": 0.57, + "learning_rate": 2.2983738920511104e-06, + "logits/chosen": -0.8854055404663086, + "logits/rejected": -0.7368249297142029, + "logps/chosen": -463.99664306640625, + "logps/rejected": -481.71868896484375, + "loss": 0.6915, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.1942036896944046, + "rewards/margins": 0.06802419573068619, + "rewards/rejected": -0.262227863073349, + "step": 8760 + }, + { + "epoch": 0.57, + "learning_rate": 2.2926831129120523e-06, + "logits/chosen": -0.5618699193000793, + "logits/rejected": -0.4119883179664612, + "logps/chosen": -441.3275451660156, + "logps/rejected": -469.8602600097656, + "loss": 0.6917, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.2055359184741974, + "rewards/margins": 0.0568884015083313, + "rewards/rejected": -0.2624242901802063, + "step": 8770 + }, + { + "epoch": 0.57, + "learning_rate": 2.2869934152394323e-06, + "logits/chosen": -0.8328984379768372, + "logits/rejected": -0.8400952219963074, + "logps/chosen": -495.8323669433594, + "logps/rejected": -512.2872314453125, + "loss": 0.6887, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.22686699032783508, + "rewards/margins": 0.07205691188573837, + "rewards/rejected": -0.29892387986183167, + "step": 8780 + }, + { + "epoch": 0.58, + "learning_rate": 2.281304828713501e-06, + "logits/chosen": -1.0833885669708252, + "logits/rejected": -0.8467610478401184, + "logps/chosen": -438.43768310546875, + "logps/rejected": -495.0404357910156, + "loss": 0.6897, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.2054503858089447, + "rewards/margins": 0.064105324447155, + "rewards/rejected": -0.2695557177066803, + "step": 8790 + }, + { + "epoch": 0.58, + "learning_rate": 2.275617383008711e-06, + "logits/chosen": -0.9343339800834656, + "logits/rejected": -0.8902202844619751, + "logps/chosen": -425.01849365234375, + "logps/rejected": -473.1581115722656, + "loss": 0.6912, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.18953107297420502, + "rewards/margins": 0.05080736428499222, + "rewards/rejected": -0.24033844470977783, + "step": 8800 + }, + { + "epoch": 0.58, + "eval_logits/chosen": -0.8669275045394897, + "eval_logits/rejected": -0.7510409951210022, + "eval_logps/chosen": -423.5054626464844, + "eval_logps/rejected": -484.2248840332031, + "eval_loss": 0.6895233392715454, + "eval_rewards/accuracies": 0.6725000143051147, + "eval_rewards/chosen": -0.19150054454803467, + "eval_rewards/margins": 0.08111248910427094, + "eval_rewards/rejected": -0.2726130485534668, + "eval_runtime": 713.1294, + "eval_samples_per_second": 2.805, + "eval_steps_per_second": 1.402, + "step": 8800 + }, + { + "epoch": 0.58, + "learning_rate": 2.269931107793567e-06, + "logits/chosen": -0.539323091506958, + "logits/rejected": -0.5320712327957153, + "logps/chosen": -374.38763427734375, + "logps/rejected": -448.4871520996094, + "loss": 0.6902, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.16210730373859406, + "rewards/margins": 0.06600706279277802, + "rewards/rejected": -0.22811436653137207, + "step": 8810 + }, + { + "epoch": 0.58, + "learning_rate": 2.2642460327304655e-06, + "logits/chosen": -1.025653600692749, + "logits/rejected": -0.9366380572319031, + "logps/chosen": -453.3301696777344, + "logps/rejected": -506.7493591308594, + "loss": 0.69, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.20877043902873993, + "rewards/margins": 0.06926669180393219, + "rewards/rejected": -0.2780371308326721, + "step": 8820 + }, + { + "epoch": 0.58, + "learning_rate": 2.258562187475543e-06, + "logits/chosen": -0.886056125164032, + "logits/rejected": -0.4289335310459137, + "logps/chosen": -431.5992126464844, + "logps/rejected": -468.0819396972656, + "loss": 0.6894, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.2046523094177246, + "rewards/margins": 0.07399457693099976, + "rewards/rejected": -0.27864688634872437, + "step": 8830 + }, + { + "epoch": 0.58, + "learning_rate": 2.2528796016785196e-06, + "logits/chosen": -0.4931762218475342, + "logits/rejected": -0.624854564666748, + "logps/chosen": -372.86358642578125, + "logps/rejected": -496.1195373535156, + "loss": 0.6884, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.1829456388950348, + "rewards/margins": 0.11358989775180817, + "rewards/rejected": -0.29653555154800415, + "step": 8840 + }, + { + "epoch": 0.58, + "learning_rate": 2.247198304982548e-06, + "logits/chosen": -0.5007576942443848, + "logits/rejected": -0.5184835195541382, + "logps/chosen": -354.9455261230469, + "logps/rejected": -422.990234375, + "loss": 0.6899, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.1929740458726883, + "rewards/margins": 0.07199414074420929, + "rewards/rejected": -0.2649681866168976, + "step": 8850 + }, + { + "epoch": 0.58, + "learning_rate": 2.2415183270240533e-06, + "logits/chosen": -1.1592953205108643, + "logits/rejected": -1.062793493270874, + "logps/chosen": -411.7584533691406, + "logps/rejected": -514.5060424804688, + "loss": 0.6887, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.21242883801460266, + "rewards/margins": 0.09844199568033218, + "rewards/rejected": -0.31087082624435425, + "step": 8860 + }, + { + "epoch": 0.58, + "learning_rate": 2.2358396974325837e-06, + "logits/chosen": -0.6631832122802734, + "logits/rejected": -0.6946643590927124, + "logps/chosen": -466.4697265625, + "logps/rejected": -542.7886962890625, + "loss": 0.6877, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.22420215606689453, + "rewards/margins": 0.10258595645427704, + "rewards/rejected": -0.32678812742233276, + "step": 8870 + }, + { + "epoch": 0.58, + "learning_rate": 2.2301624458306525e-06, + "logits/chosen": -0.7441704869270325, + "logits/rejected": -0.8345525860786438, + "logps/chosen": -523.4190063476562, + "logps/rejected": -540.7713623046875, + "loss": 0.6905, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.26407089829444885, + "rewards/margins": 0.06869350373744965, + "rewards/rejected": -0.3327644467353821, + "step": 8880 + }, + { + "epoch": 0.58, + "learning_rate": 2.2244866018335855e-06, + "logits/chosen": -0.6869930028915405, + "logits/rejected": -0.6048182845115662, + "logps/chosen": -440.927978515625, + "logps/rejected": -521.34912109375, + "loss": 0.6903, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.22387027740478516, + "rewards/margins": 0.06601408869028091, + "rewards/rejected": -0.28988438844680786, + "step": 8890 + }, + { + "epoch": 0.58, + "learning_rate": 2.2188121950493648e-06, + "logits/chosen": -0.844458281993866, + "logits/rejected": -0.4750286638736725, + "logps/chosen": -480.06488037109375, + "logps/rejected": -465.815673828125, + "loss": 0.6905, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.2613215148448944, + "rewards/margins": 0.06400308758020401, + "rewards/rejected": -0.3253245949745178, + "step": 8900 + }, + { + "epoch": 0.58, + "eval_logits/chosen": -0.6408426761627197, + "eval_logits/rejected": -0.5339207649230957, + "eval_logps/chosen": -507.0281677246094, + "eval_logps/rejected": -584.7627563476562, + "eval_loss": 0.6895042061805725, + "eval_rewards/accuracies": 0.6704999804496765, + "eval_rewards/chosen": -0.2750232219696045, + "eval_rewards/margins": 0.09812760353088379, + "eval_rewards/rejected": -0.37315088510513306, + "eval_runtime": 711.495, + "eval_samples_per_second": 2.811, + "eval_steps_per_second": 1.405, + "step": 8900 + }, + { + "epoch": 0.58, + "learning_rate": 2.2131392550784766e-06, + "logits/chosen": -0.6945571303367615, + "logits/rejected": -0.4500119686126709, + "logps/chosen": -585.53955078125, + "logps/rejected": -574.7061157226562, + "loss": 0.6914, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.3006609082221985, + "rewards/margins": 0.08473079651594162, + "rewards/rejected": -0.3853917121887207, + "step": 8910 + }, + { + "epoch": 0.58, + "learning_rate": 2.2074678115137533e-06, + "logits/chosen": -0.7692958116531372, + "logits/rejected": -0.5215278267860413, + "logps/chosen": -463.76641845703125, + "logps/rejected": -587.1498413085938, + "loss": 0.6867, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.2690494656562805, + "rewards/margins": 0.11846695095300674, + "rewards/rejected": -0.38751640915870667, + "step": 8920 + }, + { + "epoch": 0.58, + "learning_rate": 2.201797893940224e-06, + "logits/chosen": -0.49802762269973755, + "logits/rejected": -0.6506582498550415, + "logps/chosen": -514.5814819335938, + "logps/rejected": -604.6488037109375, + "loss": 0.6896, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.28140413761138916, + "rewards/margins": 0.06971672922372818, + "rewards/rejected": -0.35112085938453674, + "step": 8930 + }, + { + "epoch": 0.58, + "learning_rate": 2.196129531934956e-06, + "logits/chosen": -0.5921011567115784, + "logits/rejected": -0.6181560754776001, + "logps/chosen": -492.1026306152344, + "logps/rejected": -557.617919921875, + "loss": 0.6909, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.2541886866092682, + "rewards/margins": 0.08357776701450348, + "rewards/rejected": -0.33776646852493286, + "step": 8940 + }, + { + "epoch": 0.59, + "learning_rate": 2.190462755066902e-06, + "logits/chosen": -0.9346240758895874, + "logits/rejected": -0.6352697610855103, + "logps/chosen": -520.2872314453125, + "logps/rejected": -543.5521240234375, + "loss": 0.6925, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.25788596272468567, + "rewards/margins": 0.04875577986240387, + "rewards/rejected": -0.30664172768592834, + "step": 8950 + }, + { + "epoch": 0.59, + "learning_rate": 2.184797592896746e-06, + "logits/chosen": -1.015408992767334, + "logits/rejected": -0.8906301259994507, + "logps/chosen": -459.4242248535156, + "logps/rejected": -503.21258544921875, + "loss": 0.6892, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.224988654255867, + "rewards/margins": 0.0703214704990387, + "rewards/rejected": -0.2953101396560669, + "step": 8960 + }, + { + "epoch": 0.59, + "learning_rate": 2.17913407497675e-06, + "logits/chosen": -0.9990288615226746, + "logits/rejected": -0.9135416150093079, + "logps/chosen": -332.30548095703125, + "logps/rejected": -434.0537109375, + "loss": 0.6906, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.15392352640628815, + "rewards/margins": 0.067796990275383, + "rewards/rejected": -0.22172050178050995, + "step": 8970 + }, + { + "epoch": 0.59, + "learning_rate": 2.173472230850596e-06, + "logits/chosen": -1.244238257408142, + "logits/rejected": -0.9179713129997253, + "logps/chosen": -357.5025329589844, + "logps/rejected": -372.48565673828125, + "loss": 0.6904, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.15723998844623566, + "rewards/margins": 0.055426858365535736, + "rewards/rejected": -0.2126668244600296, + "step": 8980 + }, + { + "epoch": 0.59, + "learning_rate": 2.1678120900532375e-06, + "logits/chosen": -1.006601095199585, + "logits/rejected": -0.8393747210502625, + "logps/chosen": -450.2059020996094, + "logps/rejected": -521.0638427734375, + "loss": 0.691, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.2142910659313202, + "rewards/margins": 0.10011821985244751, + "rewards/rejected": -0.3144093155860901, + "step": 8990 + }, + { + "epoch": 0.59, + "learning_rate": 2.1621536821107412e-06, + "logits/chosen": -0.9647336006164551, + "logits/rejected": -0.7863036394119263, + "logps/chosen": -395.4327087402344, + "logps/rejected": -435.06201171875, + "loss": 0.6889, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.19286289811134338, + "rewards/margins": 0.08166161924600601, + "rewards/rejected": -0.2745245099067688, + "step": 9000 + }, + { + "epoch": 0.59, + "eval_logits/chosen": -0.9425244331359863, + "eval_logits/rejected": -0.820991039276123, + "eval_logps/chosen": -420.3014831542969, + "eval_logps/rejected": -482.8201904296875, + "eval_loss": 0.6894809603691101, + "eval_rewards/accuracies": 0.671999990940094, + "eval_rewards/chosen": -0.18829651176929474, + "eval_rewards/margins": 0.08291179686784744, + "eval_rewards/rejected": -0.2712083160877228, + "eval_runtime": 711.4161, + "eval_samples_per_second": 2.811, + "eval_steps_per_second": 1.406, + "step": 9000 + }, + { + "epoch": 0.59, + "learning_rate": 2.1564970365401346e-06, + "logits/chosen": -1.1748039722442627, + "logits/rejected": -0.8536974191665649, + "logps/chosen": -374.5556945800781, + "logps/rejected": -409.34722900390625, + "loss": 0.6884, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.19040462374687195, + "rewards/margins": 0.07279963046312332, + "rewards/rejected": -0.26320427656173706, + "step": 9010 + }, + { + "epoch": 0.59, + "learning_rate": 2.1508421828492527e-06, + "logits/chosen": -1.266325831413269, + "logits/rejected": -0.9437382817268372, + "logps/chosen": -389.5987243652344, + "logps/rejected": -388.35601806640625, + "loss": 0.6921, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.16440604627132416, + "rewards/margins": 0.06699763238430023, + "rewards/rejected": -0.2314036786556244, + "step": 9020 + }, + { + "epoch": 0.59, + "learning_rate": 2.145189150536582e-06, + "logits/chosen": -1.0054259300231934, + "logits/rejected": -0.9243372082710266, + "logps/chosen": -381.30938720703125, + "logps/rejected": -391.0116271972656, + "loss": 0.6907, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.15913987159729004, + "rewards/margins": 0.05910447984933853, + "rewards/rejected": -0.21824435889720917, + "step": 9030 + }, + { + "epoch": 0.59, + "learning_rate": 2.139537969091107e-06, + "logits/chosen": -0.9480058550834656, + "logits/rejected": -0.9302452206611633, + "logps/chosen": -426.36700439453125, + "logps/rejected": -399.29840087890625, + "loss": 0.6926, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.1607833206653595, + "rewards/margins": 0.03510197624564171, + "rewards/rejected": -0.1958852857351303, + "step": 9040 + }, + { + "epoch": 0.59, + "learning_rate": 2.1338886679921603e-06, + "logits/chosen": -1.1572840213775635, + "logits/rejected": -1.0841923952102661, + "logps/chosen": -390.0577697753906, + "logps/rejected": -422.123291015625, + "loss": 0.691, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.1485147327184677, + "rewards/margins": 0.05247587710618973, + "rewards/rejected": -0.20099060237407684, + "step": 9050 + }, + { + "epoch": 0.59, + "learning_rate": 2.128241276709263e-06, + "logits/chosen": -1.3356164693832397, + "logits/rejected": -1.3861857652664185, + "logps/chosen": -322.12042236328125, + "logps/rejected": -411.61212158203125, + "loss": 0.6912, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.11885868012905121, + "rewards/margins": 0.06945428252220154, + "rewards/rejected": -0.18831294775009155, + "step": 9060 + }, + { + "epoch": 0.59, + "learning_rate": 2.1225958247019746e-06, + "logits/chosen": -1.4204630851745605, + "logits/rejected": -1.453250527381897, + "logps/chosen": -330.41796875, + "logps/rejected": -398.22210693359375, + "loss": 0.6911, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.1432761251926422, + "rewards/margins": 0.04766453430056572, + "rewards/rejected": -0.19094067811965942, + "step": 9070 + }, + { + "epoch": 0.59, + "learning_rate": 2.1169523414197383e-06, + "logits/chosen": -1.002547025680542, + "logits/rejected": -0.9242392778396606, + "logps/chosen": -345.39385986328125, + "logps/rejected": -413.39593505859375, + "loss": 0.6905, + "rewards/accuracies": 0.4749999940395355, + "rewards/chosen": -0.14658816158771515, + "rewards/margins": 0.044680409133434296, + "rewards/rejected": -0.19126857817173004, + "step": 9080 + }, + { + "epoch": 0.59, + "learning_rate": 2.1113108563017267e-06, + "logits/chosen": -0.8716124296188354, + "logits/rejected": -0.8876460194587708, + "logps/chosen": -437.42974853515625, + "logps/rejected": -488.0655212402344, + "loss": 0.6903, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.2246464490890503, + "rewards/margins": 0.08126501739025116, + "rewards/rejected": -0.30591148138046265, + "step": 9090 + }, + { + "epoch": 0.6, + "learning_rate": 2.1056713987766905e-06, + "logits/chosen": -1.1082594394683838, + "logits/rejected": -0.9452310800552368, + "logps/chosen": -418.8975524902344, + "logps/rejected": -449.0021057128906, + "loss": 0.6906, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.20369724929332733, + "rewards/margins": 0.07881996780633926, + "rewards/rejected": -0.2825172245502472, + "step": 9100 + }, + { + "epoch": 0.6, + "eval_logits/chosen": -1.003013014793396, + "eval_logits/rejected": -0.8787204623222351, + "eval_logps/chosen": -430.4298095703125, + "eval_logps/rejected": -486.5714416503906, + "eval_loss": 0.6895273923873901, + "eval_rewards/accuracies": 0.6729999780654907, + "eval_rewards/chosen": -0.19842489063739777, + "eval_rewards/margins": 0.07653466612100601, + "eval_rewards/rejected": -0.2749595642089844, + "eval_runtime": 712.7409, + "eval_samples_per_second": 2.806, + "eval_steps_per_second": 1.403, + "step": 9100 + }, + { + "epoch": 0.6, + "learning_rate": 2.1000339982628022e-06, + "logits/chosen": -0.8402494192123413, + "logits/rejected": -0.7123501300811768, + "logps/chosen": -485.10626220703125, + "logps/rejected": -507.3006896972656, + "loss": 0.6898, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.2338334023952484, + "rewards/margins": 0.056514572352170944, + "rewards/rejected": -0.29034799337387085, + "step": 9110 + }, + { + "epoch": 0.6, + "learning_rate": 2.0943986841675043e-06, + "logits/chosen": -1.1401221752166748, + "logits/rejected": -0.8223906755447388, + "logps/chosen": -401.43218994140625, + "logps/rejected": -469.53082275390625, + "loss": 0.6897, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.19822123646736145, + "rewards/margins": 0.0874384194612503, + "rewards/rejected": -0.2856596112251282, + "step": 9120 + }, + { + "epoch": 0.6, + "learning_rate": 2.088765485887356e-06, + "logits/chosen": -1.15511155128479, + "logits/rejected": -0.8909670114517212, + "logps/chosen": -433.95330810546875, + "logps/rejected": -433.62237548828125, + "loss": 0.6917, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.18923839926719666, + "rewards/margins": 0.041294485330581665, + "rewards/rejected": -0.2305329144001007, + "step": 9130 + }, + { + "epoch": 0.6, + "learning_rate": 2.083134432807879e-06, + "logits/chosen": -1.2437583208084106, + "logits/rejected": -1.0313438177108765, + "logps/chosen": -401.11529541015625, + "logps/rejected": -514.318603515625, + "loss": 0.6891, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.20684321224689484, + "rewards/margins": 0.09168516844511032, + "rewards/rejected": -0.29852837324142456, + "step": 9140 + }, + { + "epoch": 0.6, + "learning_rate": 2.077505554303404e-06, + "logits/chosen": -1.0879318714141846, + "logits/rejected": -1.0778875350952148, + "logps/chosen": -328.68927001953125, + "logps/rejected": -402.76544189453125, + "loss": 0.6899, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.1541372537612915, + "rewards/margins": 0.06970960646867752, + "rewards/rejected": -0.22384683787822723, + "step": 9150 + }, + { + "epoch": 0.6, + "learning_rate": 2.071878879736918e-06, + "logits/chosen": -1.2794255018234253, + "logits/rejected": -1.1359623670578003, + "logps/chosen": -445.3270568847656, + "logps/rejected": -587.7103271484375, + "loss": 0.6901, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.19848747551441193, + "rewards/margins": 0.0640995055437088, + "rewards/rejected": -0.2625869810581207, + "step": 9160 + }, + { + "epoch": 0.6, + "learning_rate": 2.0662544384599136e-06, + "logits/chosen": -1.0890775918960571, + "logits/rejected": -0.9574755430221558, + "logps/chosen": -342.47711181640625, + "logps/rejected": -402.0417785644531, + "loss": 0.6893, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.1403704583644867, + "rewards/margins": 0.07605002820491791, + "rewards/rejected": -0.2164204865694046, + "step": 9170 + }, + { + "epoch": 0.6, + "learning_rate": 2.0606322598122314e-06, + "logits/chosen": -1.0150939226150513, + "logits/rejected": -1.157862901687622, + "logps/chosen": -346.90814208984375, + "logps/rejected": -394.8082580566406, + "loss": 0.6924, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.1603945940732956, + "rewards/margins": 0.028239255771040916, + "rewards/rejected": -0.18863385915756226, + "step": 9180 + }, + { + "epoch": 0.6, + "learning_rate": 2.0550123731219085e-06, + "logits/chosen": -1.5570096969604492, + "logits/rejected": -1.143188714981079, + "logps/chosen": -394.85992431640625, + "logps/rejected": -420.784423828125, + "loss": 0.6889, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.14168012142181396, + "rewards/margins": 0.06706938147544861, + "rewards/rejected": -0.20874948799610138, + "step": 9190 + }, + { + "epoch": 0.6, + "learning_rate": 2.0493948077050267e-06, + "logits/chosen": -0.7615960240364075, + "logits/rejected": -0.6373671293258667, + "logps/chosen": -375.5524597167969, + "logps/rejected": -436.08282470703125, + "loss": 0.6893, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.182296484708786, + "rewards/margins": 0.07861991226673126, + "rewards/rejected": -0.26091641187667847, + "step": 9200 + }, + { + "epoch": 0.6, + "eval_logits/chosen": -1.085320234298706, + "eval_logits/rejected": -0.9545806646347046, + "eval_logps/chosen": -403.3822326660156, + "eval_logps/rejected": -463.1018981933594, + "eval_loss": 0.6895037293434143, + "eval_rewards/accuracies": 0.6729999780654907, + "eval_rewards/chosen": -0.1713773012161255, + "eval_rewards/margins": 0.08011273294687271, + "eval_rewards/rejected": -0.2514900267124176, + "eval_runtime": 711.4246, + "eval_samples_per_second": 2.811, + "eval_steps_per_second": 1.406, + "step": 9200 + }, + { + "epoch": 0.6, + "learning_rate": 2.0437795928655596e-06, + "logits/chosen": -1.1752150058746338, + "logits/rejected": -1.2434207201004028, + "logps/chosen": -457.6922912597656, + "logps/rejected": -494.33966064453125, + "loss": 0.6907, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.17597965896129608, + "rewards/margins": 0.054831117391586304, + "rewards/rejected": -0.23081080615520477, + "step": 9210 + }, + { + "epoch": 0.6, + "learning_rate": 2.0381667578952184e-06, + "logits/chosen": -1.1544066667556763, + "logits/rejected": -1.0161818265914917, + "logps/chosen": -409.118408203125, + "logps/rejected": -512.7477416992188, + "loss": 0.6879, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.1982455998659134, + "rewards/margins": 0.0980122834444046, + "rewards/rejected": -0.2962579131126404, + "step": 9220 + }, + { + "epoch": 0.6, + "learning_rate": 2.0325563320732995e-06, + "logits/chosen": -1.1459739208221436, + "logits/rejected": -1.059901475906372, + "logps/chosen": -457.919921875, + "logps/rejected": -496.1634216308594, + "loss": 0.6902, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.19458158314228058, + "rewards/margins": 0.08226142078638077, + "rewards/rejected": -0.27684301137924194, + "step": 9230 + }, + { + "epoch": 0.6, + "learning_rate": 2.026948344666532e-06, + "logits/chosen": -0.7194372415542603, + "logits/rejected": -0.8435705304145813, + "logps/chosen": -418.29852294921875, + "logps/rejected": -508.6609802246094, + "loss": 0.6885, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.21917256712913513, + "rewards/margins": 0.09080708771944046, + "rewards/rejected": -0.30997970700263977, + "step": 9240 + }, + { + "epoch": 0.61, + "learning_rate": 2.0213428249289257e-06, + "logits/chosen": -0.40980544686317444, + "logits/rejected": -0.6821666955947876, + "logps/chosen": -415.1513671875, + "logps/rejected": -512.7869873046875, + "loss": 0.6876, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.2166108340024948, + "rewards/margins": 0.0958281084895134, + "rewards/rejected": -0.31243896484375, + "step": 9250 + }, + { + "epoch": 0.61, + "learning_rate": 2.0157398021016175e-06, + "logits/chosen": -0.7078922390937805, + "logits/rejected": -0.7407819628715515, + "logps/chosen": -340.1689758300781, + "logps/rejected": -475.88323974609375, + "loss": 0.691, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.19263359904289246, + "rewards/margins": 0.08950956165790558, + "rewards/rejected": -0.28214317560195923, + "step": 9260 + }, + { + "epoch": 0.61, + "learning_rate": 2.010139305412719e-06, + "logits/chosen": -1.393827199935913, + "logits/rejected": -1.0710934400558472, + "logps/chosen": -490.1766662597656, + "logps/rejected": -514.3074951171875, + "loss": 0.6914, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.21358105540275574, + "rewards/margins": 0.06649493426084518, + "rewards/rejected": -0.28007596731185913, + "step": 9270 + }, + { + "epoch": 0.61, + "learning_rate": 2.0045413640771644e-06, + "logits/chosen": -1.0676630735397339, + "logits/rejected": -0.7858937978744507, + "logps/chosen": -457.89892578125, + "logps/rejected": -559.87353515625, + "loss": 0.6878, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.20273356139659882, + "rewards/margins": 0.09871906787157059, + "rewards/rejected": -0.3014525771141052, + "step": 9280 + }, + { + "epoch": 0.61, + "learning_rate": 1.998946007296558e-06, + "logits/chosen": -1.0419930219650269, + "logits/rejected": -0.9732457995414734, + "logps/chosen": -513.1734619140625, + "logps/rejected": -541.3367309570312, + "loss": 0.6887, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.20457284152507782, + "rewards/margins": 0.09160022437572479, + "rewards/rejected": -0.2961730659008026, + "step": 9290 + }, + { + "epoch": 0.61, + "learning_rate": 1.9933532642590215e-06, + "logits/chosen": -0.5975018739700317, + "logits/rejected": -0.3555176556110382, + "logps/chosen": -357.795654296875, + "logps/rejected": -402.771484375, + "loss": 0.6902, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.16863739490509033, + "rewards/margins": 0.0951366201043129, + "rewards/rejected": -0.26377400755882263, + "step": 9300 + }, + { + "epoch": 0.61, + "eval_logits/chosen": -0.89243084192276, + "eval_logits/rejected": -0.7708998322486877, + "eval_logps/chosen": -436.14013671875, + "eval_logps/rejected": -502.2715148925781, + "eval_loss": 0.6894857287406921, + "eval_rewards/accuracies": 0.6790000200271606, + "eval_rewards/chosen": -0.20413516461849213, + "eval_rewards/margins": 0.08652444928884506, + "eval_rewards/rejected": -0.2906596064567566, + "eval_runtime": 711.624, + "eval_samples_per_second": 2.81, + "eval_steps_per_second": 1.405, + "step": 9300 + }, + { + "epoch": 0.61, + "learning_rate": 1.987763164139042e-06, + "logits/chosen": -1.0309691429138184, + "logits/rejected": -0.7755805253982544, + "logps/chosen": -406.9186706542969, + "logps/rejected": -505.7527770996094, + "loss": 0.6892, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.20166358351707458, + "rewards/margins": 0.09657981246709824, + "rewards/rejected": -0.29824337363243103, + "step": 9310 + }, + { + "epoch": 0.61, + "learning_rate": 1.982175736097321e-06, + "logits/chosen": -0.7986326217651367, + "logits/rejected": -0.7574408650398254, + "logps/chosen": -505.83282470703125, + "logps/rejected": -606.5858154296875, + "loss": 0.6905, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.23129181563854218, + "rewards/margins": 0.08373314142227173, + "rewards/rejected": -0.3150249421596527, + "step": 9320 + }, + { + "epoch": 0.61, + "learning_rate": 1.9765910092806196e-06, + "logits/chosen": -0.8272625803947449, + "logits/rejected": -0.6810213327407837, + "logps/chosen": -341.3271179199219, + "logps/rejected": -382.89288330078125, + "loss": 0.6895, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.16325163841247559, + "rewards/margins": 0.06122400239109993, + "rewards/rejected": -0.2244756519794464, + "step": 9330 + }, + { + "epoch": 0.61, + "learning_rate": 1.9710090128216083e-06, + "logits/chosen": -0.9659280776977539, + "logits/rejected": -0.7761660814285278, + "logps/chosen": -455.28497314453125, + "logps/rejected": -549.11181640625, + "loss": 0.6884, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.23255494236946106, + "rewards/margins": 0.1099899634718895, + "rewards/rejected": -0.34254494309425354, + "step": 9340 + }, + { + "epoch": 0.61, + "learning_rate": 1.9654297758387155e-06, + "logits/chosen": -0.8274409174919128, + "logits/rejected": -0.6248763799667358, + "logps/chosen": -396.43341064453125, + "logps/rejected": -489.4363708496094, + "loss": 0.6899, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.23556342720985413, + "rewards/margins": 0.07227747142314911, + "rewards/rejected": -0.30784088373184204, + "step": 9350 + }, + { + "epoch": 0.61, + "learning_rate": 1.9598533274359736e-06, + "logits/chosen": -0.7758110761642456, + "logits/rejected": -0.902672290802002, + "logps/chosen": -464.6673278808594, + "logps/rejected": -500.7264709472656, + "loss": 0.6924, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -0.22498705983161926, + "rewards/margins": 0.02869754657149315, + "rewards/rejected": -0.2536846101284027, + "step": 9360 + }, + { + "epoch": 0.61, + "learning_rate": 1.9542796967028697e-06, + "logits/chosen": -1.1198780536651611, + "logits/rejected": -0.8304969072341919, + "logps/chosen": -425.0093688964844, + "logps/rejected": -467.0662536621094, + "loss": 0.6913, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.20676879584789276, + "rewards/margins": 0.060299746692180634, + "rewards/rejected": -0.2670685648918152, + "step": 9370 + }, + { + "epoch": 0.61, + "learning_rate": 1.948708912714192e-06, + "logits/chosen": -0.4404030740261078, + "logits/rejected": -0.6012068390846252, + "logps/chosen": -505.26495361328125, + "logps/rejected": -536.6444091796875, + "loss": 0.6913, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.2583196759223938, + "rewards/margins": 0.06231521815061569, + "rewards/rejected": -0.3206349015235901, + "step": 9380 + }, + { + "epoch": 0.61, + "learning_rate": 1.9431410045298786e-06, + "logits/chosen": -0.5475056767463684, + "logits/rejected": -0.7080952525138855, + "logps/chosen": -436.47412109375, + "logps/rejected": -505.7579650878906, + "loss": 0.6893, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.21934881806373596, + "rewards/margins": 0.07025954127311707, + "rewards/rejected": -0.28960832953453064, + "step": 9390 + }, + { + "epoch": 0.62, + "learning_rate": 1.9375760011948654e-06, + "logits/chosen": -0.8554168939590454, + "logits/rejected": -0.8827948570251465, + "logps/chosen": -414.16375732421875, + "logps/rejected": -530.1060791015625, + "loss": 0.6885, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.21448588371276855, + "rewards/margins": 0.09362180531024933, + "rewards/rejected": -0.3081076741218567, + "step": 9400 + }, + { + "epoch": 0.62, + "eval_logits/chosen": -0.787796139717102, + "eval_logits/rejected": -0.6740127205848694, + "eval_logps/chosen": -455.37786865234375, + "eval_logps/rejected": -517.357421875, + "eval_loss": 0.6894824504852295, + "eval_rewards/accuracies": 0.6704999804496765, + "eval_rewards/chosen": -0.2233729362487793, + "eval_rewards/margins": 0.08237263560295105, + "eval_rewards/rejected": -0.30574557185173035, + "eval_runtime": 711.2531, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 1.406, + "step": 9400 + }, + { + "epoch": 0.62, + "learning_rate": 1.932013931738937e-06, + "logits/chosen": -0.7979623675346375, + "logits/rejected": -0.5755224227905273, + "logps/chosen": -462.43115234375, + "logps/rejected": -596.0765380859375, + "loss": 0.685, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.2588294446468353, + "rewards/margins": 0.12076359987258911, + "rewards/rejected": -0.37959304451942444, + "step": 9410 + }, + { + "epoch": 0.62, + "learning_rate": 1.9264548251765717e-06, + "logits/chosen": -0.9121201634407043, + "logits/rejected": -0.862923800945282, + "logps/chosen": -441.1590881347656, + "logps/rejected": -514.7960205078125, + "loss": 0.6907, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.23735642433166504, + "rewards/margins": 0.07674865424633026, + "rewards/rejected": -0.3141050934791565, + "step": 9420 + }, + { + "epoch": 0.62, + "learning_rate": 1.9208987105067924e-06, + "logits/chosen": -0.6129502058029175, + "logits/rejected": -0.43746525049209595, + "logps/chosen": -465.37872314453125, + "logps/rejected": -516.73974609375, + "loss": 0.691, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.25082165002822876, + "rewards/margins": 0.07524871081113815, + "rewards/rejected": -0.3260703682899475, + "step": 9430 + }, + { + "epoch": 0.62, + "learning_rate": 1.9153456167130154e-06, + "logits/chosen": -0.7086097598075867, + "logits/rejected": -0.7439953088760376, + "logps/chosen": -449.641845703125, + "logps/rejected": -551.921142578125, + "loss": 0.6906, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.24557125568389893, + "rewards/margins": 0.07608067989349365, + "rewards/rejected": -0.3216519057750702, + "step": 9440 + }, + { + "epoch": 0.62, + "learning_rate": 1.9097955727628975e-06, + "logits/chosen": -0.9949433207511902, + "logits/rejected": -1.032881259918213, + "logps/chosen": -380.15447998046875, + "logps/rejected": -458.3058166503906, + "loss": 0.6901, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.1841690093278885, + "rewards/margins": 0.06621630489826202, + "rewards/rejected": -0.2503852844238281, + "step": 9450 + }, + { + "epoch": 0.62, + "learning_rate": 1.904248607608187e-06, + "logits/chosen": -0.6118771433830261, + "logits/rejected": -0.8482357263565063, + "logps/chosen": -460.8009338378906, + "logps/rejected": -472.385498046875, + "loss": 0.6913, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.20358090102672577, + "rewards/margins": 0.05869137495756149, + "rewards/rejected": -0.26227226853370667, + "step": 9460 + }, + { + "epoch": 0.62, + "learning_rate": 1.8987047501845714e-06, + "logits/chosen": -0.9858795404434204, + "logits/rejected": -0.7422040104866028, + "logps/chosen": -359.5581359863281, + "logps/rejected": -448.4444274902344, + "loss": 0.6895, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.19263368844985962, + "rewards/margins": 0.09513186663389206, + "rewards/rejected": -0.2877655625343323, + "step": 9470 + }, + { + "epoch": 0.62, + "learning_rate": 1.8931640294115267e-06, + "logits/chosen": -0.7054397463798523, + "logits/rejected": -0.45379215478897095, + "logps/chosen": -383.64605712890625, + "logps/rejected": -470.921875, + "loss": 0.6894, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.1896400898694992, + "rewards/margins": 0.10280604660511017, + "rewards/rejected": -0.29244619607925415, + "step": 9480 + }, + { + "epoch": 0.62, + "learning_rate": 1.8876264741921662e-06, + "logits/chosen": -0.7664824724197388, + "logits/rejected": -0.7304006814956665, + "logps/chosen": -371.33160400390625, + "logps/rejected": -480.13214111328125, + "loss": 0.6863, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.1812061369419098, + "rewards/margins": 0.11423603445291519, + "rewards/rejected": -0.2954421937465668, + "step": 9490 + }, + { + "epoch": 0.62, + "learning_rate": 1.8820921134130912e-06, + "logits/chosen": -0.9434563517570496, + "logits/rejected": -0.5874304175376892, + "logps/chosen": -430.4812927246094, + "logps/rejected": -517.7457275390625, + "loss": 0.6864, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.19996920228004456, + "rewards/margins": 0.13217367231845856, + "rewards/rejected": -0.33214282989501953, + "step": 9500 + }, + { + "epoch": 0.62, + "eval_logits/chosen": -0.7308316826820374, + "eval_logits/rejected": -0.6198488473892212, + "eval_logps/chosen": -440.1344909667969, + "eval_logps/rejected": -503.9653625488281, + "eval_loss": 0.6894819140434265, + "eval_rewards/accuracies": 0.6669999957084656, + "eval_rewards/chosen": -0.20812954008579254, + "eval_rewards/margins": 0.08422394841909409, + "eval_rewards/rejected": -0.2923535108566284, + "eval_runtime": 713.9575, + "eval_samples_per_second": 2.801, + "eval_steps_per_second": 1.401, + "step": 9500 + }, + { + "epoch": 0.62, + "learning_rate": 1.8765609759442378e-06, + "logits/chosen": -0.2680138647556305, + "logits/rejected": -0.4130997657775879, + "logps/chosen": -458.49627685546875, + "logps/rejected": -515.45458984375, + "loss": 0.6896, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.2171749621629715, + "rewards/margins": 0.06777051836252213, + "rewards/rejected": -0.2849455177783966, + "step": 9510 + }, + { + "epoch": 0.62, + "learning_rate": 1.8710330906387288e-06, + "logits/chosen": -1.2024356126785278, + "logits/rejected": -1.1673656702041626, + "logps/chosen": -469.227294921875, + "logps/rejected": -582.1397705078125, + "loss": 0.6886, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.23073968291282654, + "rewards/margins": 0.0893225371837616, + "rewards/rejected": -0.32006222009658813, + "step": 9520 + }, + { + "epoch": 0.62, + "learning_rate": 1.8655084863327222e-06, + "logits/chosen": -0.6578270196914673, + "logits/rejected": -0.5960260629653931, + "logps/chosen": -350.1307678222656, + "logps/rejected": -429.6395568847656, + "loss": 0.6914, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.16667279601097107, + "rewards/margins": 0.07334667444229126, + "rewards/rejected": -0.24001947045326233, + "step": 9530 + }, + { + "epoch": 0.62, + "learning_rate": 1.8599871918452603e-06, + "logits/chosen": -0.5374518036842346, + "logits/rejected": -0.6281536817550659, + "logps/chosen": -431.32977294921875, + "logps/rejected": -529.6610107421875, + "loss": 0.6904, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.20823796093463898, + "rewards/margins": 0.08508185297250748, + "rewards/rejected": -0.29331979155540466, + "step": 9540 + }, + { + "epoch": 0.62, + "learning_rate": 1.8544692359781192e-06, + "logits/chosen": -0.5340497493743896, + "logits/rejected": -0.5739427804946899, + "logps/chosen": -366.29071044921875, + "logps/rejected": -406.6949157714844, + "loss": 0.691, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.1787060797214508, + "rewards/margins": 0.07135146111249924, + "rewards/rejected": -0.250057578086853, + "step": 9550 + }, + { + "epoch": 0.63, + "learning_rate": 1.8489546475156602e-06, + "logits/chosen": -0.9955617189407349, + "logits/rejected": -1.0135492086410522, + "logps/chosen": -411.125, + "logps/rejected": -467.4065856933594, + "loss": 0.6909, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.1916321963071823, + "rewards/margins": 0.0744495689868927, + "rewards/rejected": -0.2660817801952362, + "step": 9560 + }, + { + "epoch": 0.63, + "learning_rate": 1.8434434552246778e-06, + "logits/chosen": -0.7039095163345337, + "logits/rejected": -0.6999632120132446, + "logps/chosen": -402.59906005859375, + "logps/rejected": -470.253173828125, + "loss": 0.6899, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.19296754896640778, + "rewards/margins": 0.07891705632209778, + "rewards/rejected": -0.27188462018966675, + "step": 9570 + }, + { + "epoch": 0.63, + "learning_rate": 1.837935687854251e-06, + "logits/chosen": -0.8548401594161987, + "logits/rejected": -0.6474151015281677, + "logps/chosen": -410.0220642089844, + "logps/rejected": -464.441162109375, + "loss": 0.6881, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.1884753704071045, + "rewards/margins": 0.08621923625469208, + "rewards/rejected": -0.27469462156295776, + "step": 9580 + }, + { + "epoch": 0.63, + "learning_rate": 1.832431374135592e-06, + "logits/chosen": -0.8793581128120422, + "logits/rejected": -0.9984419941902161, + "logps/chosen": -440.98486328125, + "logps/rejected": -550.4117431640625, + "loss": 0.6869, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.19254949688911438, + "rewards/margins": 0.12829996645450592, + "rewards/rejected": -0.3208494484424591, + "step": 9590 + }, + { + "epoch": 0.63, + "learning_rate": 1.8269305427818977e-06, + "logits/chosen": -0.9290812611579895, + "logits/rejected": -0.8652921915054321, + "logps/chosen": -393.28179931640625, + "logps/rejected": -435.561279296875, + "loss": 0.6896, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.18466556072235107, + "rewards/margins": 0.06994330883026123, + "rewards/rejected": -0.2546088695526123, + "step": 9600 + }, + { + "epoch": 0.63, + "eval_logits/chosen": -0.7276079058647156, + "eval_logits/rejected": -0.6168313026428223, + "eval_logps/chosen": -417.38580322265625, + "eval_logps/rejected": -483.2872619628906, + "eval_loss": 0.6895156502723694, + "eval_rewards/accuracies": 0.6664999723434448, + "eval_rewards/chosen": -0.18538087606430054, + "eval_rewards/margins": 0.08629447966814041, + "eval_rewards/rejected": -0.27167531847953796, + "eval_runtime": 712.7219, + "eval_samples_per_second": 2.806, + "eval_steps_per_second": 1.403, + "step": 9600 + }, + { + "epoch": 0.63, + "learning_rate": 1.821433222488199e-06, + "logits/chosen": -0.357731431722641, + "logits/rejected": -0.5084939002990723, + "logps/chosen": -405.1507263183594, + "logps/rejected": -457.59283447265625, + "loss": 0.689, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.17953599989414215, + "rewards/margins": 0.08018968254327774, + "rewards/rejected": -0.2597256600856781, + "step": 9610 + }, + { + "epoch": 0.63, + "learning_rate": 1.8159394419312112e-06, + "logits/chosen": -0.9117706418037415, + "logits/rejected": -0.628116250038147, + "logps/chosen": -446.15411376953125, + "logps/rejected": -529.2586059570312, + "loss": 0.6874, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.18742407858371735, + "rewards/margins": 0.1296432465314865, + "rewards/rejected": -0.31706729531288147, + "step": 9620 + }, + { + "epoch": 0.63, + "learning_rate": 1.8104492297691845e-06, + "logits/chosen": -0.8102725744247437, + "logits/rejected": -0.6468926668167114, + "logps/chosen": -486.02703857421875, + "logps/rejected": -541.3604125976562, + "loss": 0.6913, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.25950390100479126, + "rewards/margins": 0.07944594323635101, + "rewards/rejected": -0.33894985914230347, + "step": 9630 + }, + { + "epoch": 0.63, + "learning_rate": 1.8049626146417562e-06, + "logits/chosen": 0.026175355538725853, + "logits/rejected": -0.2544723153114319, + "logps/chosen": -342.4808349609375, + "logps/rejected": -426.19879150390625, + "loss": 0.6904, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.18740496039390564, + "rewards/margins": 0.08743083477020264, + "rewards/rejected": -0.2748357951641083, + "step": 9640 + }, + { + "epoch": 0.63, + "learning_rate": 1.7994796251697983e-06, + "logits/chosen": -0.44325417280197144, + "logits/rejected": -0.2331937998533249, + "logps/chosen": -398.76373291015625, + "logps/rejected": -543.6065063476562, + "loss": 0.6888, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.2067248374223709, + "rewards/margins": 0.10527799278497696, + "rewards/rejected": -0.31200283765792847, + "step": 9650 + }, + { + "epoch": 0.63, + "learning_rate": 1.794000289955269e-06, + "logits/chosen": -0.5990532636642456, + "logits/rejected": -0.7400835752487183, + "logps/chosen": -462.24212646484375, + "logps/rejected": -519.3464965820312, + "loss": 0.6884, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.1951819211244583, + "rewards/margins": 0.08855116367340088, + "rewards/rejected": -0.283733069896698, + "step": 9660 + }, + { + "epoch": 0.63, + "learning_rate": 1.7885246375810646e-06, + "logits/chosen": -0.23755809664726257, + "logits/rejected": -0.2857111394405365, + "logps/chosen": -383.4107360839844, + "logps/rejected": -439.93896484375, + "loss": 0.6909, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.15913687646389008, + "rewards/margins": 0.060290198773145676, + "rewards/rejected": -0.21942707896232605, + "step": 9670 + }, + { + "epoch": 0.63, + "learning_rate": 1.7830526966108713e-06, + "logits/chosen": -0.5294663310050964, + "logits/rejected": -0.3975537419319153, + "logps/chosen": -385.46209716796875, + "logps/rejected": -486.99285888671875, + "loss": 0.6846, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.19898918271064758, + "rewards/margins": 0.13014227151870728, + "rewards/rejected": -0.32913145422935486, + "step": 9680 + }, + { + "epoch": 0.63, + "learning_rate": 1.7775844955890129e-06, + "logits/chosen": -0.42855939269065857, + "logits/rejected": -0.328756183385849, + "logps/chosen": -384.8292541503906, + "logps/rejected": -471.77593994140625, + "loss": 0.6881, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.1732829064130783, + "rewards/margins": 0.09852338582277298, + "rewards/rejected": -0.2718062996864319, + "step": 9690 + }, + { + "epoch": 0.63, + "learning_rate": 1.7721200630403046e-06, + "logits/chosen": -0.3515569567680359, + "logits/rejected": -0.3742366433143616, + "logps/chosen": -369.99420166015625, + "logps/rejected": -472.16693115234375, + "loss": 0.6884, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.1742318570613861, + "rewards/margins": 0.07662680745124817, + "rewards/rejected": -0.2508586645126343, + "step": 9700 + }, + { + "epoch": 0.63, + "eval_logits/chosen": -0.6133941411972046, + "eval_logits/rejected": -0.5091242790222168, + "eval_logps/chosen": -434.1581726074219, + "eval_logps/rejected": -500.04058837890625, + "eval_loss": 0.689453125, + "eval_rewards/accuracies": 0.6664999723434448, + "eval_rewards/chosen": -0.20215323567390442, + "eval_rewards/margins": 0.08627549558877945, + "eval_rewards/rejected": -0.28842872381210327, + "eval_runtime": 712.5855, + "eval_samples_per_second": 2.807, + "eval_steps_per_second": 1.403, + "step": 9700 + }, + { + "epoch": 0.64, + "learning_rate": 1.7666594274699037e-06, + "logits/chosen": -0.5265650749206543, + "logits/rejected": -0.5063202381134033, + "logps/chosen": -477.45159912109375, + "logps/rejected": -557.30419921875, + "loss": 0.6884, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.22476127743721008, + "rewards/margins": 0.11805678904056549, + "rewards/rejected": -0.3428180515766144, + "step": 9710 + }, + { + "epoch": 0.64, + "learning_rate": 1.76120261736316e-06, + "logits/chosen": -0.47983318567276, + "logits/rejected": -0.20942839980125427, + "logps/chosen": -440.2186584472656, + "logps/rejected": -536.6563110351562, + "loss": 0.6865, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.21350359916687012, + "rewards/margins": 0.12358088791370392, + "rewards/rejected": -0.33708450198173523, + "step": 9720 + }, + { + "epoch": 0.64, + "learning_rate": 1.755749661185468e-06, + "logits/chosen": -0.7283905148506165, + "logits/rejected": -0.5965417623519897, + "logps/chosen": -487.33905029296875, + "logps/rejected": -531.7615966796875, + "loss": 0.6896, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.19288946688175201, + "rewards/margins": 0.10325628519058228, + "rewards/rejected": -0.2961457371711731, + "step": 9730 + }, + { + "epoch": 0.64, + "learning_rate": 1.7503005873821183e-06, + "logits/chosen": -0.529441237449646, + "logits/rejected": -0.7391183376312256, + "logps/chosen": -351.6926574707031, + "logps/rejected": -468.90545654296875, + "loss": 0.6895, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.191264346241951, + "rewards/margins": 0.09094887971878052, + "rewards/rejected": -0.2822132408618927, + "step": 9740 + }, + { + "epoch": 0.64, + "learning_rate": 1.744855424378148e-06, + "logits/chosen": -0.3442351818084717, + "logits/rejected": -0.7468951940536499, + "logps/chosen": -372.4602355957031, + "logps/rejected": -495.74658203125, + "loss": 0.6909, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.1838332712650299, + "rewards/margins": 0.09984080493450165, + "rewards/rejected": -0.28367406129837036, + "step": 9750 + }, + { + "epoch": 0.64, + "learning_rate": 1.7394142005781973e-06, + "logits/chosen": -0.7553730607032776, + "logits/rejected": -0.6287229061126709, + "logps/chosen": -458.0218200683594, + "logps/rejected": -533.7131958007812, + "loss": 0.6928, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.19631078839302063, + "rewards/margins": 0.07212035357952118, + "rewards/rejected": -0.2684311270713806, + "step": 9760 + }, + { + "epoch": 0.64, + "learning_rate": 1.7339769443663528e-06, + "logits/chosen": -0.6719237565994263, + "logits/rejected": -0.6959569454193115, + "logps/chosen": -333.442626953125, + "logps/rejected": -423.80023193359375, + "loss": 0.6874, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.1905004382133484, + "rewards/margins": 0.089654341340065, + "rewards/rejected": -0.2801547646522522, + "step": 9770 + }, + { + "epoch": 0.64, + "learning_rate": 1.7285436841060078e-06, + "logits/chosen": -0.7846375703811646, + "logits/rejected": -0.6807979345321655, + "logps/chosen": -460.4491271972656, + "logps/rejected": -497.080322265625, + "loss": 0.6898, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.18432073295116425, + "rewards/margins": 0.07958535850048065, + "rewards/rejected": -0.2639060914516449, + "step": 9780 + }, + { + "epoch": 0.64, + "learning_rate": 1.7231144481397083e-06, + "logits/chosen": -0.8831745982170105, + "logits/rejected": -0.7654666900634766, + "logps/chosen": -393.7134094238281, + "logps/rejected": -424.652587890625, + "loss": 0.6905, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.17331509292125702, + "rewards/margins": 0.05637786537408829, + "rewards/rejected": -0.22969293594360352, + "step": 9790 + }, + { + "epoch": 0.64, + "learning_rate": 1.7176892647890092e-06, + "logits/chosen": -0.7050382494926453, + "logits/rejected": -0.3511679768562317, + "logps/chosen": -425.826416015625, + "logps/rejected": -440.35748291015625, + "loss": 0.6913, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.19016043841838837, + "rewards/margins": 0.05341249704360962, + "rewards/rejected": -0.2435729205608368, + "step": 9800 + }, + { + "epoch": 0.64, + "eval_logits/chosen": -0.6631197929382324, + "eval_logits/rejected": -0.5572806000709534, + "eval_logps/chosen": -428.2689514160156, + "eval_logps/rejected": -488.5942077636719, + "eval_loss": 0.6894755363464355, + "eval_rewards/accuracies": 0.6675000190734863, + "eval_rewards/chosen": -0.19626396894454956, + "eval_rewards/margins": 0.08071837574243546, + "eval_rewards/rejected": -0.2769823670387268, + "eval_runtime": 709.5855, + "eval_samples_per_second": 2.819, + "eval_steps_per_second": 1.409, + "step": 9800 + }, + { + "epoch": 0.64, + "learning_rate": 1.7122681623543239e-06, + "logits/chosen": -0.8072730302810669, + "logits/rejected": -0.9070757031440735, + "logps/chosen": -437.646240234375, + "logps/rejected": -526.9542846679688, + "loss": 0.6896, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.19065766036510468, + "rewards/margins": 0.10052184760570526, + "rewards/rejected": -0.29117950797080994, + "step": 9810 + }, + { + "epoch": 0.64, + "learning_rate": 1.7068511691147788e-06, + "logits/chosen": -0.5625017881393433, + "logits/rejected": -0.4836824834346771, + "logps/chosen": -361.634521484375, + "logps/rejected": -447.08758544921875, + "loss": 0.6911, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.1675034761428833, + "rewards/margins": 0.07708346098661423, + "rewards/rejected": -0.24458694458007812, + "step": 9820 + }, + { + "epoch": 0.64, + "learning_rate": 1.7014383133280636e-06, + "logits/chosen": -0.7409011125564575, + "logits/rejected": -0.38703542947769165, + "logps/chosen": -476.31243896484375, + "logps/rejected": -494.09637451171875, + "loss": 0.6899, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.22767803072929382, + "rewards/margins": 0.07117791473865509, + "rewards/rejected": -0.2988559305667877, + "step": 9830 + }, + { + "epoch": 0.64, + "learning_rate": 1.696029623230286e-06, + "logits/chosen": -0.6452018022537231, + "logits/rejected": -0.7865114212036133, + "logps/chosen": -469.92877197265625, + "logps/rejected": -596.3943481445312, + "loss": 0.689, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.2140064686536789, + "rewards/margins": 0.10768643766641617, + "rewards/rejected": -0.32169288396835327, + "step": 9840 + }, + { + "epoch": 0.64, + "learning_rate": 1.6906251270358229e-06, + "logits/chosen": -0.7774800062179565, + "logits/rejected": -0.6933233141899109, + "logps/chosen": -474.37335205078125, + "logps/rejected": -498.35589599609375, + "loss": 0.6903, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.2115190029144287, + "rewards/margins": 0.0737244039773941, + "rewards/rejected": -0.2852434515953064, + "step": 9850 + }, + { + "epoch": 0.65, + "learning_rate": 1.685224852937174e-06, + "logits/chosen": -0.5178209543228149, + "logits/rejected": -0.14217159152030945, + "logps/chosen": -408.82708740234375, + "logps/rejected": -625.0120849609375, + "loss": 0.6826, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.2104623019695282, + "rewards/margins": 0.174326092004776, + "rewards/rejected": -0.3847884237766266, + "step": 9860 + }, + { + "epoch": 0.65, + "learning_rate": 1.6798288291048136e-06, + "logits/chosen": -0.42471179366111755, + "logits/rejected": -0.3782634139060974, + "logps/chosen": -467.9342346191406, + "logps/rejected": -558.4611206054688, + "loss": 0.687, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.24469268321990967, + "rewards/margins": 0.11923189461231232, + "rewards/rejected": -0.3639245331287384, + "step": 9870 + }, + { + "epoch": 0.65, + "learning_rate": 1.6744370836870466e-06, + "logits/chosen": -1.2039653062820435, + "logits/rejected": -0.723548948764801, + "logps/chosen": -551.3778076171875, + "logps/rejected": -583.1861572265625, + "loss": 0.6865, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.21668429672718048, + "rewards/margins": 0.11532609164714813, + "rewards/rejected": -0.3320103585720062, + "step": 9880 + }, + { + "epoch": 0.65, + "learning_rate": 1.6690496448098576e-06, + "logits/chosen": -0.4742654860019684, + "logits/rejected": -0.34280937910079956, + "logps/chosen": -430.12713623046875, + "logps/rejected": -484.00201416015625, + "loss": 0.6895, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.2010909616947174, + "rewards/margins": 0.0747339203953743, + "rewards/rejected": -0.2758248746395111, + "step": 9890 + }, + { + "epoch": 0.65, + "learning_rate": 1.6636665405767666e-06, + "logits/chosen": -0.3358609974384308, + "logits/rejected": -0.2778807282447815, + "logps/chosen": -421.57672119140625, + "logps/rejected": -476.6004333496094, + "loss": 0.6887, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.18380577862262726, + "rewards/margins": 0.07391373813152313, + "rewards/rejected": -0.2577195465564728, + "step": 9900 + }, + { + "epoch": 0.65, + "eval_logits/chosen": -0.5865435004234314, + "eval_logits/rejected": -0.4826829731464386, + "eval_logps/chosen": -436.94842529296875, + "eval_logps/rejected": -508.5710754394531, + "eval_loss": 0.689460039138794, + "eval_rewards/accuracies": 0.6700000166893005, + "eval_rewards/chosen": -0.20494350790977478, + "eval_rewards/margins": 0.09201564639806747, + "eval_rewards/rejected": -0.29695916175842285, + "eval_runtime": 711.2802, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 1.406, + "step": 9900 + }, + { + "epoch": 0.65, + "learning_rate": 1.6582877990686827e-06, + "logits/chosen": -0.517175555229187, + "logits/rejected": -0.7299954891204834, + "logps/chosen": -289.72222900390625, + "logps/rejected": -421.864990234375, + "loss": 0.689, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.16842789947986603, + "rewards/margins": 0.1098114401102066, + "rewards/rejected": -0.27823930978775024, + "step": 9910 + }, + { + "epoch": 0.65, + "learning_rate": 1.6529134483437562e-06, + "logits/chosen": -0.4364466667175293, + "logits/rejected": -0.7086406946182251, + "logps/chosen": -417.412841796875, + "logps/rejected": -474.3499450683594, + "loss": 0.6889, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.21183860301971436, + "rewards/margins": 0.09639080613851547, + "rewards/rejected": -0.30822938680648804, + "step": 9920 + }, + { + "epoch": 0.65, + "learning_rate": 1.647543516437233e-06, + "logits/chosen": -0.7980550527572632, + "logits/rejected": -0.8304376602172852, + "logps/chosen": -403.45947265625, + "logps/rejected": -505.7483825683594, + "loss": 0.6896, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.20442131161689758, + "rewards/margins": 0.08160404860973358, + "rewards/rejected": -0.28602534532546997, + "step": 9930 + }, + { + "epoch": 0.65, + "learning_rate": 1.6421780313613088e-06, + "logits/chosen": -0.5756974220275879, + "logits/rejected": -0.19046545028686523, + "logps/chosen": -413.3451232910156, + "logps/rejected": -482.7889099121094, + "loss": 0.6912, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.20860353112220764, + "rewards/margins": 0.10263363271951675, + "rewards/rejected": -0.3112371265888214, + "step": 9940 + }, + { + "epoch": 0.65, + "learning_rate": 1.6368170211049816e-06, + "logits/chosen": -0.2424355298280716, + "logits/rejected": -0.3142424523830414, + "logps/chosen": -515.1586303710938, + "logps/rejected": -553.0162353515625, + "loss": 0.6886, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.23135843873023987, + "rewards/margins": 0.09796912968158722, + "rewards/rejected": -0.3293275833129883, + "step": 9950 + }, + { + "epoch": 0.65, + "learning_rate": 1.6314605136339074e-06, + "logits/chosen": -0.6656386256217957, + "logits/rejected": -0.4989562928676605, + "logps/chosen": -398.7882995605469, + "logps/rejected": -458.17120361328125, + "loss": 0.6909, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.20320162177085876, + "rewards/margins": 0.07728231698274612, + "rewards/rejected": -0.2804839611053467, + "step": 9960 + }, + { + "epoch": 0.65, + "learning_rate": 1.6261085368902526e-06, + "logits/chosen": -1.0514520406723022, + "logits/rejected": -0.9138363599777222, + "logps/chosen": -455.0506286621094, + "logps/rejected": -479.02337646484375, + "loss": 0.6885, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.1901097595691681, + "rewards/margins": 0.0676022320985794, + "rewards/rejected": -0.2577120065689087, + "step": 9970 + }, + { + "epoch": 0.65, + "learning_rate": 1.6207611187925503e-06, + "logits/chosen": -0.7324178814888, + "logits/rejected": -0.6601067185401917, + "logps/chosen": -412.18756103515625, + "logps/rejected": -551.6229858398438, + "loss": 0.6865, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.2033606320619583, + "rewards/margins": 0.09311771392822266, + "rewards/rejected": -0.2964783310890198, + "step": 9980 + }, + { + "epoch": 0.65, + "learning_rate": 1.6154182872355512e-06, + "logits/chosen": -0.40643399953842163, + "logits/rejected": -0.5821970105171204, + "logps/chosen": -396.7106628417969, + "logps/rejected": -487.38092041015625, + "loss": 0.6909, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.2328932285308838, + "rewards/margins": 0.0762505978345871, + "rewards/rejected": -0.3091438412666321, + "step": 9990 + }, + { + "epoch": 0.65, + "learning_rate": 1.610080070090084e-06, + "logits/chosen": -0.5443228483200073, + "logits/rejected": -0.3826178312301636, + "logps/chosen": -480.44256591796875, + "logps/rejected": -582.5927124023438, + "loss": 0.6886, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.299521267414093, + "rewards/margins": 0.11117850244045258, + "rewards/rejected": -0.4106997549533844, + "step": 10000 + }, + { + "epoch": 0.65, + "eval_logits/chosen": -0.4209235906600952, + "eval_logits/rejected": -0.3262253701686859, + "eval_logps/chosen": -493.7507629394531, + "eval_logps/rejected": -570.7291870117188, + "eval_loss": 0.6895338296890259, + "eval_rewards/accuracies": 0.6685000061988831, + "eval_rewards/chosen": -0.2617458403110504, + "eval_rewards/margins": 0.09737147390842438, + "eval_rewards/rejected": -0.3591172993183136, + "eval_runtime": 709.5084, + "eval_samples_per_second": 2.819, + "eval_steps_per_second": 1.409, + "step": 10000 + }, + { + "epoch": 0.65, + "learning_rate": 1.6047464952029034e-06, + "logits/chosen": -0.7706862092018127, + "logits/rejected": -0.7439472675323486, + "logps/chosen": -504.51043701171875, + "logps/rejected": -627.5145263671875, + "loss": 0.6887, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.2502230405807495, + "rewards/margins": 0.1147780567407608, + "rewards/rejected": -0.3650010824203491, + "step": 10010 + }, + { + "epoch": 0.66, + "learning_rate": 1.5994175903965486e-06, + "logits/chosen": -0.2823607325553894, + "logits/rejected": -0.013178685680031776, + "logps/chosen": -538.0333251953125, + "logps/rejected": -646.7186889648438, + "loss": 0.6901, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.2877029478549957, + "rewards/margins": 0.10693083703517914, + "rewards/rejected": -0.39463382959365845, + "step": 10020 + }, + { + "epoch": 0.66, + "learning_rate": 1.5940933834691977e-06, + "logits/chosen": -0.7852429747581482, + "logits/rejected": -0.46865981817245483, + "logps/chosen": -570.1761474609375, + "logps/rejected": -542.385498046875, + "loss": 0.6903, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.2649102807044983, + "rewards/margins": 0.08010663837194443, + "rewards/rejected": -0.3450169265270233, + "step": 10030 + }, + { + "epoch": 0.66, + "learning_rate": 1.588773902194522e-06, + "logits/chosen": -0.39314407110214233, + "logits/rejected": -0.09499244391918182, + "logps/chosen": -512.2601318359375, + "logps/rejected": -632.1806030273438, + "loss": 0.6864, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.3081805109977722, + "rewards/margins": 0.12416081130504608, + "rewards/rejected": -0.4323412775993347, + "step": 10040 + }, + { + "epoch": 0.66, + "learning_rate": 1.583459174321541e-06, + "logits/chosen": -0.08470626175403595, + "logits/rejected": -0.16978123784065247, + "logps/chosen": -516.5139770507812, + "logps/rejected": -590.5281372070312, + "loss": 0.6894, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.3068966865539551, + "rewards/margins": 0.10110817104578018, + "rewards/rejected": -0.40800485014915466, + "step": 10050 + }, + { + "epoch": 0.66, + "learning_rate": 1.5781492275744797e-06, + "logits/chosen": -0.8645523190498352, + "logits/rejected": -0.7959304451942444, + "logps/chosen": -579.1370849609375, + "logps/rejected": -673.6524658203125, + "loss": 0.6913, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.28489264845848083, + "rewards/margins": 0.12073332071304321, + "rewards/rejected": -0.40562596917152405, + "step": 10060 + }, + { + "epoch": 0.66, + "learning_rate": 1.5728440896526215e-06, + "logits/chosen": -0.2425081729888916, + "logits/rejected": -0.21429088711738586, + "logps/chosen": -523.8734741210938, + "logps/rejected": -572.2105712890625, + "loss": 0.6891, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.2504715323448181, + "rewards/margins": 0.09453996270895004, + "rewards/rejected": -0.34501153230667114, + "step": 10070 + }, + { + "epoch": 0.66, + "learning_rate": 1.5675437882301633e-06, + "logits/chosen": -0.5220723152160645, + "logits/rejected": -0.5302685499191284, + "logps/chosen": -468.63037109375, + "logps/rejected": -466.52032470703125, + "loss": 0.6921, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.24958617985248566, + "rewards/margins": 0.032685764133930206, + "rewards/rejected": -0.2822719216346741, + "step": 10080 + }, + { + "epoch": 0.66, + "learning_rate": 1.5622483509560748e-06, + "logits/chosen": -0.31761685013771057, + "logits/rejected": -0.4478934407234192, + "logps/chosen": -406.1260681152344, + "logps/rejected": -529.2910766601562, + "loss": 0.6905, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.23539690673351288, + "rewards/margins": 0.0929826870560646, + "rewards/rejected": -0.32837963104248047, + "step": 10090 + }, + { + "epoch": 0.66, + "learning_rate": 1.5569578054539506e-06, + "logits/chosen": -0.6085635423660278, + "logits/rejected": -0.3032626509666443, + "logps/chosen": -530.7781982421875, + "logps/rejected": -589.9613037109375, + "loss": 0.6858, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.2515811324119568, + "rewards/margins": 0.14304611086845398, + "rewards/rejected": -0.3946272134780884, + "step": 10100 + }, + { + "epoch": 0.66, + "eval_logits/chosen": -0.5254442691802979, + "eval_logits/rejected": -0.42709270119667053, + "eval_logps/chosen": -472.549072265625, + "eval_logps/rejected": -537.1530151367188, + "eval_loss": 0.6895158886909485, + "eval_rewards/accuracies": 0.6690000295639038, + "eval_rewards/chosen": -0.2405441403388977, + "eval_rewards/margins": 0.08499700576066971, + "eval_rewards/rejected": -0.3255411386489868, + "eval_runtime": 712.2687, + "eval_samples_per_second": 2.808, + "eval_steps_per_second": 1.404, + "step": 10100 + }, + { + "epoch": 0.66, + "learning_rate": 1.551672179321867e-06, + "logits/chosen": -0.5627814531326294, + "logits/rejected": -0.5385065078735352, + "logps/chosen": -443.6526794433594, + "logps/rejected": -504.775390625, + "loss": 0.6902, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.22779580950737, + "rewards/margins": 0.0822938084602356, + "rewards/rejected": -0.310089647769928, + "step": 10110 + }, + { + "epoch": 0.66, + "learning_rate": 1.5463915001322398e-06, + "logits/chosen": -0.5434405207633972, + "logits/rejected": -0.3636297881603241, + "logps/chosen": -501.84539794921875, + "logps/rejected": -582.4625244140625, + "loss": 0.687, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.2561890482902527, + "rewards/margins": 0.09498479962348938, + "rewards/rejected": -0.35117384791374207, + "step": 10120 + }, + { + "epoch": 0.66, + "learning_rate": 1.5411157954316784e-06, + "logits/chosen": -0.8339093327522278, + "logits/rejected": -0.37349334359169006, + "logps/chosen": -435.55792236328125, + "logps/rejected": -486.6434020996094, + "loss": 0.6908, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.23368044197559357, + "rewards/margins": 0.06477528065443039, + "rewards/rejected": -0.29845571517944336, + "step": 10130 + }, + { + "epoch": 0.66, + "learning_rate": 1.535845092740843e-06, + "logits/chosen": -0.5845457315444946, + "logits/rejected": -0.6137182116508484, + "logps/chosen": -437.1864318847656, + "logps/rejected": -508.76416015625, + "loss": 0.6902, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.19986525177955627, + "rewards/margins": 0.05918189138174057, + "rewards/rejected": -0.25904718041419983, + "step": 10140 + }, + { + "epoch": 0.66, + "learning_rate": 1.5305794195543005e-06, + "logits/chosen": -0.8156601190567017, + "logits/rejected": -0.7871606945991516, + "logps/chosen": -426.68145751953125, + "logps/rejected": -507.53546142578125, + "loss": 0.688, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.2195023000240326, + "rewards/margins": 0.09778545051813126, + "rewards/rejected": -0.31728774309158325, + "step": 10150 + }, + { + "epoch": 0.66, + "learning_rate": 1.5253188033403816e-06, + "logits/chosen": -0.9039441347122192, + "logits/rejected": -0.7695221304893494, + "logps/chosen": -358.87200927734375, + "logps/rejected": -414.21600341796875, + "loss": 0.6913, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.18813714385032654, + "rewards/margins": 0.03868565335869789, + "rewards/rejected": -0.22682280838489532, + "step": 10160 + }, + { + "epoch": 0.67, + "learning_rate": 1.520063271541037e-06, + "logits/chosen": -0.7685378193855286, + "logits/rejected": -0.6929147839546204, + "logps/chosen": -407.37103271484375, + "logps/rejected": -530.870361328125, + "loss": 0.6845, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.22886629402637482, + "rewards/margins": 0.13480234146118164, + "rewards/rejected": -0.36366862058639526, + "step": 10170 + }, + { + "epoch": 0.67, + "learning_rate": 1.5148128515716954e-06, + "logits/chosen": -0.9215852618217468, + "logits/rejected": -0.5616071820259094, + "logps/chosen": -481.96209716796875, + "logps/rejected": -521.5393676757812, + "loss": 0.6867, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.21543291211128235, + "rewards/margins": 0.11195148527622223, + "rewards/rejected": -0.32738441228866577, + "step": 10180 + }, + { + "epoch": 0.67, + "learning_rate": 1.5095675708211197e-06, + "logits/chosen": -0.8018338084220886, + "logits/rejected": -0.7028027772903442, + "logps/chosen": -449.2286682128906, + "logps/rejected": -511.21270751953125, + "loss": 0.6906, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -0.25806769728660583, + "rewards/margins": 0.038640473037958145, + "rewards/rejected": -0.29670819640159607, + "step": 10190 + }, + { + "epoch": 0.67, + "learning_rate": 1.504327456651263e-06, + "logits/chosen": -0.5294234156608582, + "logits/rejected": -0.4144687056541443, + "logps/chosen": -522.6754150390625, + "logps/rejected": -584.6783447265625, + "loss": 0.6902, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.2612723708152771, + "rewards/margins": 0.08896765112876892, + "rewards/rejected": -0.3502400517463684, + "step": 10200 + }, + { + "epoch": 0.67, + "eval_logits/chosen": -0.6712846159934998, + "eval_logits/rejected": -0.5611080527305603, + "eval_logps/chosen": -470.42236328125, + "eval_logps/rejected": -546.7171630859375, + "eval_loss": 0.6895034313201904, + "eval_rewards/accuracies": 0.6639999747276306, + "eval_rewards/chosen": -0.23841746151447296, + "eval_rewards/margins": 0.09668787568807602, + "eval_rewards/rejected": -0.33510535955429077, + "eval_runtime": 714.6345, + "eval_samples_per_second": 2.799, + "eval_steps_per_second": 1.399, + "step": 10200 + }, + { + "epoch": 0.67, + "learning_rate": 1.4990925363971284e-06, + "logits/chosen": -0.7982445955276489, + "logits/rejected": -0.2872164845466614, + "logps/chosen": -553.0101318359375, + "logps/rejected": -664.3682250976562, + "loss": 0.6874, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.2630870044231415, + "rewards/margins": 0.17984716594219208, + "rewards/rejected": -0.44293412566185, + "step": 10210 + }, + { + "epoch": 0.67, + "learning_rate": 1.4938628373666236e-06, + "logits/chosen": -0.6805712580680847, + "logits/rejected": -0.5815011262893677, + "logps/chosen": -409.97039794921875, + "logps/rejected": -481.96588134765625, + "loss": 0.6915, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.2373911440372467, + "rewards/margins": 0.07012283056974411, + "rewards/rejected": -0.3075140118598938, + "step": 10220 + }, + { + "epoch": 0.67, + "learning_rate": 1.4886383868404203e-06, + "logits/chosen": -0.47447291016578674, + "logits/rejected": -0.5905685424804688, + "logps/chosen": -360.18585205078125, + "logps/rejected": -445.53790283203125, + "loss": 0.6882, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.19896404445171356, + "rewards/margins": 0.09538199752569199, + "rewards/rejected": -0.29434603452682495, + "step": 10230 + }, + { + "epoch": 0.67, + "learning_rate": 1.483419212071813e-06, + "logits/chosen": -0.3768971562385559, + "logits/rejected": -0.14265303313732147, + "logps/chosen": -408.9107971191406, + "logps/rejected": -469.2865295410156, + "loss": 0.6894, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.2204466611146927, + "rewards/margins": 0.06755717098712921, + "rewards/rejected": -0.2880038321018219, + "step": 10240 + }, + { + "epoch": 0.67, + "learning_rate": 1.478205340286573e-06, + "logits/chosen": -0.629867434501648, + "logits/rejected": -0.7656704187393188, + "logps/chosen": -462.8060607910156, + "logps/rejected": -520.7987060546875, + "loss": 0.6897, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.2591971755027771, + "rewards/margins": 0.0759974792599678, + "rewards/rejected": -0.3351946771144867, + "step": 10250 + }, + { + "epoch": 0.67, + "learning_rate": 1.4729967986828104e-06, + "logits/chosen": -0.6806701421737671, + "logits/rejected": -0.6869579553604126, + "logps/chosen": -535.6580200195312, + "logps/rejected": -573.3906860351562, + "loss": 0.6902, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.21367435157299042, + "rewards/margins": 0.0857006385922432, + "rewards/rejected": -0.2993749976158142, + "step": 10260 + }, + { + "epoch": 0.67, + "learning_rate": 1.4677936144308286e-06, + "logits/chosen": -0.8180096745491028, + "logits/rejected": -0.5855274796485901, + "logps/chosen": -411.0531311035156, + "logps/rejected": -505.92669677734375, + "loss": 0.6884, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.18680839240550995, + "rewards/margins": 0.12293653190135956, + "rewards/rejected": -0.3097449541091919, + "step": 10270 + }, + { + "epoch": 0.67, + "learning_rate": 1.4625958146729864e-06, + "logits/chosen": -0.9897669553756714, + "logits/rejected": -0.6518400311470032, + "logps/chosen": -429.7906188964844, + "logps/rejected": -494.58447265625, + "loss": 0.6894, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.20952677726745605, + "rewards/margins": 0.08074188232421875, + "rewards/rejected": -0.2902686595916748, + "step": 10280 + }, + { + "epoch": 0.67, + "learning_rate": 1.4574034265235523e-06, + "logits/chosen": -0.6724362373352051, + "logits/rejected": -0.5424883961677551, + "logps/chosen": -464.368408203125, + "logps/rejected": -473.7183532714844, + "loss": 0.69, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.20853987336158752, + "rewards/margins": 0.10711286962032318, + "rewards/rejected": -0.3156526982784271, + "step": 10290 + }, + { + "epoch": 0.67, + "learning_rate": 1.452216477068568e-06, + "logits/chosen": -0.5634249448776245, + "logits/rejected": -0.3757302165031433, + "logps/chosen": -402.8147277832031, + "logps/rejected": -414.9501953125, + "loss": 0.6877, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.1727697104215622, + "rewards/margins": 0.10505032539367676, + "rewards/rejected": -0.27782002091407776, + "step": 10300 + }, + { + "epoch": 0.67, + "eval_logits/chosen": -0.8372055292129517, + "eval_logits/rejected": -0.7203603386878967, + "eval_logps/chosen": -434.0806579589844, + "eval_logps/rejected": -497.1747131347656, + "eval_loss": 0.6894459128379822, + "eval_rewards/accuracies": 0.6635000109672546, + "eval_rewards/chosen": -0.2020757496356964, + "eval_rewards/margins": 0.08348707854747772, + "eval_rewards/rejected": -0.28556281328201294, + "eval_runtime": 711.9834, + "eval_samples_per_second": 2.809, + "eval_steps_per_second": 1.405, + "step": 10300 + }, + { + "epoch": 0.67, + "learning_rate": 1.4470349933657004e-06, + "logits/chosen": -1.407127022743225, + "logits/rejected": -0.9657286405563354, + "logps/chosen": -401.1145324707031, + "logps/rejected": -463.8077087402344, + "loss": 0.6893, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.18038493394851685, + "rewards/margins": 0.08667208254337311, + "rewards/rejected": -0.26705700159072876, + "step": 10310 + }, + { + "epoch": 0.68, + "learning_rate": 1.4418590024441096e-06, + "logits/chosen": -1.1771811246871948, + "logits/rejected": -0.6675149202346802, + "logps/chosen": -440.747802734375, + "logps/rejected": -459.735595703125, + "loss": 0.6885, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.18728753924369812, + "rewards/margins": 0.08547311276197433, + "rewards/rejected": -0.27276068925857544, + "step": 10320 + }, + { + "epoch": 0.68, + "learning_rate": 1.436688531304297e-06, + "logits/chosen": -1.0007431507110596, + "logits/rejected": -0.8121240735054016, + "logps/chosen": -394.47149658203125, + "logps/rejected": -483.74847412109375, + "loss": 0.6889, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.17614403367042542, + "rewards/margins": 0.09476612508296967, + "rewards/rejected": -0.2709101438522339, + "step": 10330 + }, + { + "epoch": 0.68, + "learning_rate": 1.431523606917974e-06, + "logits/chosen": -0.9082180261611938, + "logits/rejected": -0.850312352180481, + "logps/chosen": -435.8108825683594, + "logps/rejected": -534.7352294921875, + "loss": 0.6863, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.23071154952049255, + "rewards/margins": 0.09230966120958328, + "rewards/rejected": -0.3230212330818176, + "step": 10340 + }, + { + "epoch": 0.68, + "learning_rate": 1.4263642562279162e-06, + "logits/chosen": -0.6657235622406006, + "logits/rejected": -0.5200079083442688, + "logps/chosen": -468.4452209472656, + "logps/rejected": -580.9529418945312, + "loss": 0.6886, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.21895787119865417, + "rewards/margins": 0.10401761531829834, + "rewards/rejected": -0.3229754567146301, + "step": 10350 + }, + { + "epoch": 0.68, + "learning_rate": 1.4212105061478257e-06, + "logits/chosen": -0.7509498000144958, + "logits/rejected": -0.4829614758491516, + "logps/chosen": -484.610107421875, + "logps/rejected": -565.8938598632812, + "loss": 0.6896, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.2547195553779602, + "rewards/margins": 0.07346032559871674, + "rewards/rejected": -0.32817989587783813, + "step": 10360 + }, + { + "epoch": 0.68, + "learning_rate": 1.4160623835621848e-06, + "logits/chosen": -1.2137842178344727, + "logits/rejected": -0.8153377771377563, + "logps/chosen": -424.40850830078125, + "logps/rejected": -510.79656982421875, + "loss": 0.6893, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.18709731101989746, + "rewards/margins": 0.09786628186702728, + "rewards/rejected": -0.28496360778808594, + "step": 10370 + }, + { + "epoch": 0.68, + "learning_rate": 1.4109199153261249e-06, + "logits/chosen": -0.8705890774726868, + "logits/rejected": -0.6633458733558655, + "logps/chosen": -490.9913635253906, + "logps/rejected": -560.652099609375, + "loss": 0.6887, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.21499666571617126, + "rewards/margins": 0.09997449815273285, + "rewards/rejected": -0.3149711489677429, + "step": 10380 + }, + { + "epoch": 0.68, + "learning_rate": 1.405783128265278e-06, + "logits/chosen": -0.9433485269546509, + "logits/rejected": -0.8324483633041382, + "logps/chosen": -443.7115173339844, + "logps/rejected": -508.8597717285156, + "loss": 0.6899, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.2395780384540558, + "rewards/margins": 0.06841441243886948, + "rewards/rejected": -0.30799245834350586, + "step": 10390 + }, + { + "epoch": 0.68, + "learning_rate": 1.4006520491756427e-06, + "logits/chosen": -0.784449577331543, + "logits/rejected": -0.4519527554512024, + "logps/chosen": -396.655029296875, + "logps/rejected": -426.204833984375, + "loss": 0.6889, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.2004307508468628, + "rewards/margins": 0.09419076144695282, + "rewards/rejected": -0.2946215271949768, + "step": 10400 + }, + { + "epoch": 0.68, + "eval_logits/chosen": -0.8564895987510681, + "eval_logits/rejected": -0.737651526927948, + "eval_logps/chosen": -437.34539794921875, + "eval_logps/rejected": -502.5918884277344, + "eval_loss": 0.6894581913948059, + "eval_rewards/accuracies": 0.6610000133514404, + "eval_rewards/chosen": -0.2053404450416565, + "eval_rewards/margins": 0.08563953638076782, + "eval_rewards/rejected": -0.2909799814224243, + "eval_runtime": 711.3734, + "eval_samples_per_second": 2.811, + "eval_steps_per_second": 1.406, + "step": 10400 + }, + { + "epoch": 0.68, + "learning_rate": 1.39552670482344e-06, + "logits/chosen": -0.7949396371841431, + "logits/rejected": -0.9270607233047485, + "logps/chosen": -369.264892578125, + "logps/rejected": -430.3038024902344, + "loss": 0.6908, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.1946653127670288, + "rewards/margins": 0.06114745885133743, + "rewards/rejected": -0.25581276416778564, + "step": 10410 + }, + { + "epoch": 0.68, + "learning_rate": 1.3904071219449776e-06, + "logits/chosen": -0.6988734006881714, + "logits/rejected": -0.4601953625679016, + "logps/chosen": -389.9330749511719, + "logps/rejected": -380.8857727050781, + "loss": 0.6893, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.19003143906593323, + "rewards/margins": 0.07561281323432922, + "rewards/rejected": -0.26564425230026245, + "step": 10420 + }, + { + "epoch": 0.68, + "learning_rate": 1.3852933272465068e-06, + "logits/chosen": -0.8293148279190063, + "logits/rejected": -0.7336186170578003, + "logps/chosen": -386.4721374511719, + "logps/rejected": -419.66241455078125, + "loss": 0.6912, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.14861205220222473, + "rewards/margins": 0.06841407716274261, + "rewards/rejected": -0.21702614426612854, + "step": 10430 + }, + { + "epoch": 0.68, + "learning_rate": 1.3801853474040873e-06, + "logits/chosen": -0.7749053239822388, + "logits/rejected": -0.702245831489563, + "logps/chosen": -456.7560119628906, + "logps/rejected": -536.7758178710938, + "loss": 0.6887, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.21823477745056152, + "rewards/margins": 0.0964801087975502, + "rewards/rejected": -0.31471487879753113, + "step": 10440 + }, + { + "epoch": 0.68, + "learning_rate": 1.3750832090634417e-06, + "logits/chosen": -0.9500045776367188, + "logits/rejected": -0.7475318908691406, + "logps/chosen": -374.98687744140625, + "logps/rejected": -435.96630859375, + "loss": 0.6894, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.1888141632080078, + "rewards/margins": 0.07563088089227676, + "rewards/rejected": -0.264445036649704, + "step": 10450 + }, + { + "epoch": 0.68, + "learning_rate": 1.3699869388398245e-06, + "logits/chosen": -0.7953190803527832, + "logits/rejected": -0.6879931092262268, + "logps/chosen": -428.355712890625, + "logps/rejected": -493.54345703125, + "loss": 0.6899, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.21533437073230743, + "rewards/margins": 0.08364120870828629, + "rewards/rejected": -0.2989755868911743, + "step": 10460 + }, + { + "epoch": 0.69, + "learning_rate": 1.3648965633178772e-06, + "logits/chosen": -0.9219416379928589, + "logits/rejected": -0.820416271686554, + "logps/chosen": -412.6971130371094, + "logps/rejected": -518.1492919921875, + "loss": 0.6909, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.21196496486663818, + "rewards/margins": 0.09404056519269943, + "rewards/rejected": -0.3060055673122406, + "step": 10470 + }, + { + "epoch": 0.69, + "learning_rate": 1.3598121090514938e-06, + "logits/chosen": -0.6970809698104858, + "logits/rejected": -0.8272876739501953, + "logps/chosen": -389.41754150390625, + "logps/rejected": -445.25836181640625, + "loss": 0.6886, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.20640969276428223, + "rewards/margins": 0.0799640640616417, + "rewards/rejected": -0.28637373447418213, + "step": 10480 + }, + { + "epoch": 0.69, + "learning_rate": 1.3547336025636753e-06, + "logits/chosen": -0.7740924954414368, + "logits/rejected": -0.5170813798904419, + "logps/chosen": -529.0531616210938, + "logps/rejected": -559.5513305664062, + "loss": 0.6902, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.25375670194625854, + "rewards/margins": 0.07202710956335068, + "rewards/rejected": -0.3257838189601898, + "step": 10490 + }, + { + "epoch": 0.69, + "learning_rate": 1.3496610703464022e-06, + "logits/chosen": -0.993310272693634, + "logits/rejected": -0.6279395818710327, + "logps/chosen": -482.531005859375, + "logps/rejected": -524.1070556640625, + "loss": 0.6885, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.2533164620399475, + "rewards/margins": 0.0870974212884903, + "rewards/rejected": -0.3404138684272766, + "step": 10500 + }, + { + "epoch": 0.69, + "eval_logits/chosen": -0.812269389629364, + "eval_logits/rejected": -0.6946424841880798, + "eval_logps/chosen": -479.5160217285156, + "eval_logps/rejected": -547.5474853515625, + "eval_loss": 0.6894866824150085, + "eval_rewards/accuracies": 0.6650000214576721, + "eval_rewards/chosen": -0.24751105904579163, + "eval_rewards/margins": 0.08842450380325317, + "eval_rewards/rejected": -0.3359355330467224, + "eval_runtime": 713.5643, + "eval_samples_per_second": 2.803, + "eval_steps_per_second": 1.401, + "step": 10500 + }, + { + "epoch": 0.69, + "learning_rate": 1.3445945388604848e-06, + "logits/chosen": -0.9369242787361145, + "logits/rejected": -0.4151206910610199, + "logps/chosen": -525.0983276367188, + "logps/rejected": -590.3690185546875, + "loss": 0.6899, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.286874920129776, + "rewards/margins": 0.1084313616156578, + "rewards/rejected": -0.39530622959136963, + "step": 10510 + }, + { + "epoch": 0.69, + "learning_rate": 1.3395340345354358e-06, + "logits/chosen": -0.9469447135925293, + "logits/rejected": -1.009319543838501, + "logps/chosen": -480.42010498046875, + "logps/rejected": -589.4058837890625, + "loss": 0.6876, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.2567223012447357, + "rewards/margins": 0.091738261282444, + "rewards/rejected": -0.3484605848789215, + "step": 10520 + }, + { + "epoch": 0.69, + "learning_rate": 1.334479583769322e-06, + "logits/chosen": -1.1004682779312134, + "logits/rejected": -1.1085875034332275, + "logps/chosen": -510.69415283203125, + "logps/rejected": -516.8112182617188, + "loss": 0.6908, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.25585538148880005, + "rewards/margins": 0.056086838245391846, + "rewards/rejected": -0.3119421899318695, + "step": 10530 + }, + { + "epoch": 0.69, + "learning_rate": 1.3294312129286366e-06, + "logits/chosen": -0.8170161247253418, + "logits/rejected": -0.7216283082962036, + "logps/chosen": -492.08306884765625, + "logps/rejected": -539.3572998046875, + "loss": 0.6905, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.22535443305969238, + "rewards/margins": 0.06094512343406677, + "rewards/rejected": -0.28629955649375916, + "step": 10540 + }, + { + "epoch": 0.69, + "learning_rate": 1.324388948348153e-06, + "logits/chosen": -1.2214971780776978, + "logits/rejected": -0.864739716053009, + "logps/chosen": -502.04327392578125, + "logps/rejected": -509.402587890625, + "loss": 0.6881, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.21680526435375214, + "rewards/margins": 0.08872579038143158, + "rewards/rejected": -0.3055310845375061, + "step": 10550 + }, + { + "epoch": 0.69, + "learning_rate": 1.319352816330796e-06, + "logits/chosen": -1.2389277219772339, + "logits/rejected": -0.8248780369758606, + "logps/chosen": -516.2705688476562, + "logps/rejected": -500.32025146484375, + "loss": 0.6897, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.23307332396507263, + "rewards/margins": 0.09050510078668594, + "rewards/rejected": -0.323578417301178, + "step": 10560 + }, + { + "epoch": 0.69, + "learning_rate": 1.314322843147494e-06, + "logits/chosen": -0.8264732360839844, + "logits/rejected": -0.9180240631103516, + "logps/chosen": -447.1441955566406, + "logps/rejected": -576.095458984375, + "loss": 0.6896, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.2768183648586273, + "rewards/margins": 0.07408357411623001, + "rewards/rejected": -0.3509019613265991, + "step": 10570 + }, + { + "epoch": 0.69, + "learning_rate": 1.3092990550370526e-06, + "logits/chosen": -0.9271947145462036, + "logits/rejected": -0.8801366090774536, + "logps/chosen": -596.9766235351562, + "logps/rejected": -602.197998046875, + "loss": 0.689, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.25121819972991943, + "rewards/margins": 0.09057263284921646, + "rewards/rejected": -0.3417908549308777, + "step": 10580 + }, + { + "epoch": 0.69, + "learning_rate": 1.3042814782060131e-06, + "logits/chosen": -0.531276524066925, + "logits/rejected": -0.5136385560035706, + "logps/chosen": -380.8265686035156, + "logps/rejected": -469.82818603515625, + "loss": 0.6881, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.19932372868061066, + "rewards/margins": 0.11291786283254623, + "rewards/rejected": -0.3122416138648987, + "step": 10590 + }, + { + "epoch": 0.69, + "learning_rate": 1.2992701388285112e-06, + "logits/chosen": -0.6711562871932983, + "logits/rejected": -0.576374888420105, + "logps/chosen": -481.093017578125, + "logps/rejected": -520.4686279296875, + "loss": 0.6884, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.21174398064613342, + "rewards/margins": 0.08384759724140167, + "rewards/rejected": -0.2955915331840515, + "step": 10600 + }, + { + "epoch": 0.69, + "eval_logits/chosen": -0.8155879378318787, + "eval_logits/rejected": -0.6967446208000183, + "eval_logps/chosen": -486.49542236328125, + "eval_logps/rejected": -556.6547241210938, + "eval_loss": 0.6894893646240234, + "eval_rewards/accuracies": 0.6639999747276306, + "eval_rewards/chosen": -0.254490464925766, + "eval_rewards/margins": 0.0905524417757988, + "eval_rewards/rejected": -0.3450429141521454, + "eval_runtime": 709.7403, + "eval_samples_per_second": 2.818, + "eval_steps_per_second": 1.409, + "step": 10600 + }, + { + "epoch": 0.69, + "learning_rate": 1.29426506304615e-06, + "logits/chosen": -0.749521017074585, + "logits/rejected": -0.7463639974594116, + "logps/chosen": -512.3372802734375, + "logps/rejected": -551.0499877929688, + "loss": 0.691, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.28961387276649475, + "rewards/margins": 0.05819075182080269, + "rewards/rejected": -0.34780463576316833, + "step": 10610 + }, + { + "epoch": 0.69, + "learning_rate": 1.289266276967855e-06, + "logits/chosen": -1.1036258935928345, + "logits/rejected": -0.867837131023407, + "logps/chosen": -578.3683471679688, + "logps/rejected": -556.658935546875, + "loss": 0.6907, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.2397865355014801, + "rewards/margins": 0.06844881922006607, + "rewards/rejected": -0.3082353472709656, + "step": 10620 + }, + { + "epoch": 0.7, + "learning_rate": 1.284273806669745e-06, + "logits/chosen": -0.8476123809814453, + "logits/rejected": -0.8278233408927917, + "logps/chosen": -527.3364868164062, + "logps/rejected": -636.2213134765625, + "loss": 0.6881, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.2880115211009979, + "rewards/margins": 0.09134428203105927, + "rewards/rejected": -0.3793558180332184, + "step": 10630 + }, + { + "epoch": 0.7, + "learning_rate": 1.2792876781949884e-06, + "logits/chosen": -0.48908740282058716, + "logits/rejected": -0.4103211760520935, + "logps/chosen": -421.69970703125, + "logps/rejected": -510.91363525390625, + "loss": 0.6872, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.21562710404396057, + "rewards/margins": 0.10853157937526703, + "rewards/rejected": -0.3241586983203888, + "step": 10640 + }, + { + "epoch": 0.7, + "learning_rate": 1.274307917553676e-06, + "logits/chosen": -0.8523713946342468, + "logits/rejected": -0.666365385055542, + "logps/chosen": -444.986572265625, + "logps/rejected": -592.508056640625, + "loss": 0.6903, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.24932782351970673, + "rewards/margins": 0.12292194366455078, + "rewards/rejected": -0.3722497522830963, + "step": 10650 + }, + { + "epoch": 0.7, + "learning_rate": 1.2693345507226767e-06, + "logits/chosen": -0.8844467997550964, + "logits/rejected": -0.6996780633926392, + "logps/chosen": -486.7682189941406, + "logps/rejected": -608.3629150390625, + "loss": 0.6868, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.25677794218063354, + "rewards/margins": 0.11948125064373016, + "rewards/rejected": -0.3762592077255249, + "step": 10660 + }, + { + "epoch": 0.7, + "learning_rate": 1.2643676036455099e-06, + "logits/chosen": -1.2263991832733154, + "logits/rejected": -1.0565673112869263, + "logps/chosen": -498.09698486328125, + "logps/rejected": -500.1378479003906, + "loss": 0.692, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.21024446189403534, + "rewards/margins": 0.05278193950653076, + "rewards/rejected": -0.2630263864994049, + "step": 10670 + }, + { + "epoch": 0.7, + "learning_rate": 1.259407102232203e-06, + "logits/chosen": -1.1713817119598389, + "logits/rejected": -0.7351571917533875, + "logps/chosen": -511.2571716308594, + "logps/rejected": -547.3772583007812, + "loss": 0.6877, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.23362283408641815, + "rewards/margins": 0.10906408727169037, + "rewards/rejected": -0.3426869511604309, + "step": 10680 + }, + { + "epoch": 0.7, + "learning_rate": 1.254453072359163e-06, + "logits/chosen": -0.7428683638572693, + "logits/rejected": -0.7556576132774353, + "logps/chosen": -448.65008544921875, + "logps/rejected": -503.0829162597656, + "loss": 0.689, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.2146300971508026, + "rewards/margins": 0.07629900425672531, + "rewards/rejected": -0.2909291386604309, + "step": 10690 + }, + { + "epoch": 0.7, + "learning_rate": 1.2495055398690337e-06, + "logits/chosen": -1.2361562252044678, + "logits/rejected": -1.0086596012115479, + "logps/chosen": -436.9013671875, + "logps/rejected": -475.19317626953125, + "loss": 0.6909, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.2070470154285431, + "rewards/margins": 0.0475749596953392, + "rewards/rejected": -0.2546219527721405, + "step": 10700 + }, + { + "epoch": 0.7, + "eval_logits/chosen": -0.8308368921279907, + "eval_logits/rejected": -0.7124754786491394, + "eval_logps/chosen": -463.0313415527344, + "eval_logps/rejected": -527.8247680664062, + "eval_loss": 0.6894720196723938, + "eval_rewards/accuracies": 0.6629999876022339, + "eval_rewards/chosen": -0.23102642595767975, + "eval_rewards/margins": 0.0851864367723465, + "eval_rewards/rejected": -0.31621286273002625, + "eval_runtime": 712.5873, + "eval_samples_per_second": 2.807, + "eval_steps_per_second": 1.403, + "step": 10700 + }, + { + "epoch": 0.7, + "learning_rate": 1.2445645305705718e-06, + "logits/chosen": -0.9717991948127747, + "logits/rejected": -0.9585930109024048, + "logps/chosen": -448.2286071777344, + "logps/rejected": -500.2596740722656, + "loss": 0.6909, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.24190764129161835, + "rewards/margins": 0.07267390191555023, + "rewards/rejected": -0.31458157300949097, + "step": 10710 + }, + { + "epoch": 0.7, + "learning_rate": 1.2396300702384995e-06, + "logits/chosen": -0.9743059277534485, + "logits/rejected": -0.8761451840400696, + "logps/chosen": -494.4071350097656, + "logps/rejected": -494.4388732910156, + "loss": 0.6919, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.23659905791282654, + "rewards/margins": 0.03769702836871147, + "rewards/rejected": -0.2742961049079895, + "step": 10720 + }, + { + "epoch": 0.7, + "learning_rate": 1.234702184613381e-06, + "logits/chosen": -0.8256417512893677, + "logits/rejected": -0.5968748927116394, + "logps/chosen": -424.6349182128906, + "logps/rejected": -496.51983642578125, + "loss": 0.6904, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.20915257930755615, + "rewards/margins": 0.07222042977809906, + "rewards/rejected": -0.2813730239868164, + "step": 10730 + }, + { + "epoch": 0.7, + "learning_rate": 1.2297808994014793e-06, + "logits/chosen": -1.0779728889465332, + "logits/rejected": -0.8660680055618286, + "logps/chosen": -496.3164978027344, + "logps/rejected": -523.3186645507812, + "loss": 0.6898, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.21038170158863068, + "rewards/margins": 0.061894625425338745, + "rewards/rejected": -0.27227628231048584, + "step": 10740 + }, + { + "epoch": 0.7, + "learning_rate": 1.2248662402746314e-06, + "logits/chosen": -0.7504767179489136, + "logits/rejected": -0.931710422039032, + "logps/chosen": -452.50958251953125, + "logps/rejected": -514.7100219726562, + "loss": 0.6908, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.2625929117202759, + "rewards/margins": 0.068316251039505, + "rewards/rejected": -0.3309091627597809, + "step": 10750 + }, + { + "epoch": 0.7, + "learning_rate": 1.2199582328701045e-06, + "logits/chosen": -0.9508997797966003, + "logits/rejected": -0.8796059489250183, + "logps/chosen": -510.3228454589844, + "logps/rejected": -566.1322021484375, + "loss": 0.6876, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.21968936920166016, + "rewards/margins": 0.09610020369291306, + "rewards/rejected": -0.3157895803451538, + "step": 10760 + }, + { + "epoch": 0.7, + "learning_rate": 1.2150569027904712e-06, + "logits/chosen": -0.9090763330459595, + "logits/rejected": -0.8921510577201843, + "logps/chosen": -478.50311279296875, + "logps/rejected": -540.8385009765625, + "loss": 0.69, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.23251965641975403, + "rewards/margins": 0.06863830238580704, + "rewards/rejected": -0.30115798115730286, + "step": 10770 + }, + { + "epoch": 0.71, + "learning_rate": 1.2101622756034688e-06, + "logits/chosen": -0.9387717247009277, + "logits/rejected": -0.887730598449707, + "logps/chosen": -405.3013610839844, + "logps/rejected": -457.65142822265625, + "loss": 0.6888, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.18254734575748444, + "rewards/margins": 0.08271530270576477, + "rewards/rejected": -0.2652626633644104, + "step": 10780 + }, + { + "epoch": 0.71, + "learning_rate": 1.2052743768418715e-06, + "logits/chosen": -0.9470183253288269, + "logits/rejected": -0.7718468308448792, + "logps/chosen": -441.03863525390625, + "logps/rejected": -491.30267333984375, + "loss": 0.6887, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.193229541182518, + "rewards/margins": 0.08427095413208008, + "rewards/rejected": -0.2775005102157593, + "step": 10790 + }, + { + "epoch": 0.71, + "learning_rate": 1.2003932320033523e-06, + "logits/chosen": -1.0047305822372437, + "logits/rejected": -1.0284090042114258, + "logps/chosen": -418.42095947265625, + "logps/rejected": -513.3245849609375, + "loss": 0.6877, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.1919182986021042, + "rewards/margins": 0.09427173435688019, + "rewards/rejected": -0.286190003156662, + "step": 10800 + }, + { + "epoch": 0.71, + "eval_logits/chosen": -0.9391505718231201, + "eval_logits/rejected": -0.815139651298523, + "eval_logps/chosen": -437.78961181640625, + "eval_logps/rejected": -501.56768798828125, + "eval_loss": 0.689470112323761, + "eval_rewards/accuracies": 0.6644999980926514, + "eval_rewards/chosen": -0.2057846486568451, + "eval_rewards/margins": 0.08417114615440369, + "eval_rewards/rejected": -0.2899557948112488, + "eval_runtime": 714.604, + "eval_samples_per_second": 2.799, + "eval_steps_per_second": 1.399, + "step": 10800 + }, + { + "epoch": 0.71, + "learning_rate": 1.1955188665503553e-06, + "logits/chosen": -0.7959145903587341, + "logits/rejected": -0.7350171804428101, + "logps/chosen": -426.7283630371094, + "logps/rejected": -471.46685791015625, + "loss": 0.6917, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.22160467505455017, + "rewards/margins": 0.06552859395742416, + "rewards/rejected": -0.28713327646255493, + "step": 10810 + }, + { + "epoch": 0.71, + "learning_rate": 1.1906513059099566e-06, + "logits/chosen": -1.0689995288848877, + "logits/rejected": -0.7881597280502319, + "logps/chosen": -464.06591796875, + "logps/rejected": -566.7840576171875, + "loss": 0.6877, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.23257681727409363, + "rewards/margins": 0.11145871877670288, + "rewards/rejected": -0.3440355658531189, + "step": 10820 + }, + { + "epoch": 0.71, + "learning_rate": 1.185790575473738e-06, + "logits/chosen": -1.0131001472473145, + "logits/rejected": -0.6255987286567688, + "logps/chosen": -447.65093994140625, + "logps/rejected": -485.94342041015625, + "loss": 0.6916, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.21851758658885956, + "rewards/margins": 0.08556672930717468, + "rewards/rejected": -0.30408430099487305, + "step": 10830 + }, + { + "epoch": 0.71, + "learning_rate": 1.1809367005976516e-06, + "logits/chosen": -0.995489776134491, + "logits/rejected": -0.8367295265197754, + "logps/chosen": -449.3817443847656, + "logps/rejected": -429.474365234375, + "loss": 0.6913, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.16688695549964905, + "rewards/margins": 0.05114005133509636, + "rewards/rejected": -0.2180270254611969, + "step": 10840 + }, + { + "epoch": 0.71, + "learning_rate": 1.1760897066018842e-06, + "logits/chosen": -0.9616080522537231, + "logits/rejected": -0.8490033149719238, + "logps/chosen": -380.6590576171875, + "logps/rejected": -465.58599853515625, + "loss": 0.6892, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.1576179563999176, + "rewards/margins": 0.09092387557029724, + "rewards/rejected": -0.24854183197021484, + "step": 10850 + }, + { + "epoch": 0.71, + "learning_rate": 1.1712496187707327e-06, + "logits/chosen": -0.9444979429244995, + "logits/rejected": -1.1311503648757935, + "logps/chosen": -450.02069091796875, + "logps/rejected": -567.10009765625, + "loss": 0.6898, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.20971646904945374, + "rewards/margins": 0.1311451941728592, + "rewards/rejected": -0.3408616781234741, + "step": 10860 + }, + { + "epoch": 0.71, + "learning_rate": 1.1664164623524646e-06, + "logits/chosen": -1.0897196531295776, + "logits/rejected": -0.906332790851593, + "logps/chosen": -367.5469970703125, + "logps/rejected": -410.7867126464844, + "loss": 0.6899, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.14917199313640594, + "rewards/margins": 0.07149702310562134, + "rewards/rejected": -0.2206690013408661, + "step": 10870 + }, + { + "epoch": 0.71, + "learning_rate": 1.1615902625591926e-06, + "logits/chosen": -1.2006911039352417, + "logits/rejected": -0.863299548625946, + "logps/chosen": -397.97344970703125, + "logps/rejected": -452.78216552734375, + "loss": 0.6901, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1727522760629654, + "rewards/margins": 0.06422239542007446, + "rewards/rejected": -0.23697467148303986, + "step": 10880 + }, + { + "epoch": 0.71, + "learning_rate": 1.156771044566738e-06, + "logits/chosen": -1.1622774600982666, + "logits/rejected": -1.058846116065979, + "logps/chosen": -425.43115234375, + "logps/rejected": -453.4656677246094, + "loss": 0.6896, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.1608165055513382, + "rewards/margins": 0.07125195860862732, + "rewards/rejected": -0.23206846415996552, + "step": 10890 + }, + { + "epoch": 0.71, + "learning_rate": 1.1519588335145037e-06, + "logits/chosen": -1.2024497985839844, + "logits/rejected": -1.3856556415557861, + "logps/chosen": -353.27618408203125, + "logps/rejected": -407.64788818359375, + "loss": 0.6921, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.14569738507270813, + "rewards/margins": 0.03567231446504593, + "rewards/rejected": -0.18136970698833466, + "step": 10900 + }, + { + "epoch": 0.71, + "eval_logits/chosen": -1.0966393947601318, + "eval_logits/rejected": -0.9659644961357117, + "eval_logps/chosen": -388.65643310546875, + "eval_logps/rejected": -443.52410888671875, + "eval_loss": 0.6895360350608826, + "eval_rewards/accuracies": 0.6570000052452087, + "eval_rewards/chosen": -0.15665146708488464, + "eval_rewards/margins": 0.07526073604822159, + "eval_rewards/rejected": -0.23191221058368683, + "eval_runtime": 711.6877, + "eval_samples_per_second": 2.81, + "eval_steps_per_second": 1.405, + "step": 10900 + }, + { + "epoch": 0.71, + "learning_rate": 1.1471536545053382e-06, + "logits/chosen": -1.0944491624832153, + "logits/rejected": -1.0983108282089233, + "logps/chosen": -351.8878479003906, + "logps/rejected": -432.76995849609375, + "loss": 0.6895, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.14285293221473694, + "rewards/margins": 0.07332994043827057, + "rewards/rejected": -0.2161828726530075, + "step": 10910 + }, + { + "epoch": 0.71, + "learning_rate": 1.1423555326054112e-06, + "logits/chosen": -1.0121129751205444, + "logits/rejected": -0.732758641242981, + "logps/chosen": -467.73419189453125, + "logps/rejected": -534.3839111328125, + "loss": 0.6838, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.18489964306354523, + "rewards/margins": 0.14008580148220062, + "rewards/rejected": -0.32498544454574585, + "step": 10920 + }, + { + "epoch": 0.72, + "learning_rate": 1.1375644928440743e-06, + "logits/chosen": -1.107006311416626, + "logits/rejected": -0.8201066851615906, + "logps/chosen": -414.925048828125, + "logps/rejected": -444.3072204589844, + "loss": 0.6883, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.1782417744398117, + "rewards/margins": 0.09688602387905121, + "rewards/rejected": -0.2751278281211853, + "step": 10930 + }, + { + "epoch": 0.72, + "learning_rate": 1.1327805602137396e-06, + "logits/chosen": -1.1339380741119385, + "logits/rejected": -0.9242345094680786, + "logps/chosen": -462.92095947265625, + "logps/rejected": -484.5670471191406, + "loss": 0.6904, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.20417681336402893, + "rewards/margins": 0.07462415099143982, + "rewards/rejected": -0.27880096435546875, + "step": 10940 + }, + { + "epoch": 0.72, + "learning_rate": 1.1280037596697426e-06, + "logits/chosen": -0.986310601234436, + "logits/rejected": -0.737085223197937, + "logps/chosen": -454.80084228515625, + "logps/rejected": -629.8025512695312, + "loss": 0.6838, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.23391835391521454, + "rewards/margins": 0.13807490468025208, + "rewards/rejected": -0.3719932436943054, + "step": 10950 + }, + { + "epoch": 0.72, + "learning_rate": 1.123234116130216e-06, + "logits/chosen": -0.8748584985733032, + "logits/rejected": -0.7704537510871887, + "logps/chosen": -405.89532470703125, + "logps/rejected": -528.647705078125, + "loss": 0.6887, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.22000694274902344, + "rewards/margins": 0.11669802665710449, + "rewards/rejected": -0.3367049992084503, + "step": 10960 + }, + { + "epoch": 0.72, + "learning_rate": 1.1184716544759553e-06, + "logits/chosen": -0.62715744972229, + "logits/rejected": -0.5959141254425049, + "logps/chosen": -356.5045471191406, + "logps/rejected": -417.14794921875, + "loss": 0.6907, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.19114038348197937, + "rewards/margins": 0.04822884127497673, + "rewards/rejected": -0.2393692284822464, + "step": 10970 + }, + { + "epoch": 0.72, + "learning_rate": 1.1137163995502948e-06, + "logits/chosen": -1.4193073511123657, + "logits/rejected": -1.2204351425170898, + "logps/chosen": -414.5079040527344, + "logps/rejected": -458.13067626953125, + "loss": 0.6894, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.19297286868095398, + "rewards/margins": 0.07632466405630112, + "rewards/rejected": -0.2692975401878357, + "step": 10980 + }, + { + "epoch": 0.72, + "learning_rate": 1.1089683761589717e-06, + "logits/chosen": -0.8085821866989136, + "logits/rejected": -0.7703992128372192, + "logps/chosen": -439.84722900390625, + "logps/rejected": -536.17529296875, + "loss": 0.6882, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.1996408998966217, + "rewards/margins": 0.11524226516485214, + "rewards/rejected": -0.31488317251205444, + "step": 10990 + }, + { + "epoch": 0.72, + "learning_rate": 1.1042276090700044e-06, + "logits/chosen": -0.9247690439224243, + "logits/rejected": -0.9810608625411987, + "logps/chosen": -441.11273193359375, + "logps/rejected": -537.4869384765625, + "loss": 0.6906, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.23081064224243164, + "rewards/margins": 0.06620490550994873, + "rewards/rejected": -0.297015517950058, + "step": 11000 + }, + { + "epoch": 0.72, + "eval_logits/chosen": -1.0156525373458862, + "eval_logits/rejected": -0.8884658217430115, + "eval_logps/chosen": -420.56817626953125, + "eval_logps/rejected": -481.687744140625, + "eval_loss": 0.6894798874855042, + "eval_rewards/accuracies": 0.6629999876022339, + "eval_rewards/chosen": -0.18856322765350342, + "eval_rewards/margins": 0.08151256293058395, + "eval_rewards/rejected": -0.27007579803466797, + "eval_runtime": 711.7879, + "eval_samples_per_second": 2.81, + "eval_steps_per_second": 1.405, + "step": 11000 + }, + { + "epoch": 0.72, + "learning_rate": 1.0994941230135536e-06, + "logits/chosen": -1.0726044178009033, + "logits/rejected": -0.9668411016464233, + "logps/chosen": -422.48162841796875, + "logps/rejected": -508.0166931152344, + "loss": 0.6873, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.18963167071342468, + "rewards/margins": 0.12159478664398193, + "rewards/rejected": -0.3112264573574066, + "step": 11010 + }, + { + "epoch": 0.72, + "learning_rate": 1.094767942681804e-06, + "logits/chosen": -1.4820367097854614, + "logits/rejected": -1.0809131860733032, + "logps/chosen": -502.901611328125, + "logps/rejected": -554.4119262695312, + "loss": 0.6883, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.25503259897232056, + "rewards/margins": 0.09071089327335358, + "rewards/rejected": -0.3457435369491577, + "step": 11020 + }, + { + "epoch": 0.72, + "learning_rate": 1.0900490927288248e-06, + "logits/chosen": -0.7793976068496704, + "logits/rejected": -0.827666163444519, + "logps/chosen": -469.4134826660156, + "logps/rejected": -488.8060607910156, + "loss": 0.6902, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.20894518494606018, + "rewards/margins": 0.06886889785528183, + "rewards/rejected": -0.2778140902519226, + "step": 11030 + }, + { + "epoch": 0.72, + "learning_rate": 1.0853375977704511e-06, + "logits/chosen": -1.1008172035217285, + "logits/rejected": -0.9268584251403809, + "logps/chosen": -440.64056396484375, + "logps/rejected": -460.67547607421875, + "loss": 0.6903, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.20021602511405945, + "rewards/margins": 0.08404561877250671, + "rewards/rejected": -0.28426164388656616, + "step": 11040 + }, + { + "epoch": 0.72, + "learning_rate": 1.0806334823841466e-06, + "logits/chosen": -1.024103045463562, + "logits/rejected": -1.1570180654525757, + "logps/chosen": -462.835693359375, + "logps/rejected": -540.9618530273438, + "loss": 0.69, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.22321978211402893, + "rewards/margins": 0.04872307926416397, + "rewards/rejected": -0.2719428837299347, + "step": 11050 + }, + { + "epoch": 0.72, + "learning_rate": 1.0759367711088825e-06, + "logits/chosen": -0.7810468673706055, + "logits/rejected": -1.0236024856567383, + "logps/chosen": -382.93597412109375, + "logps/rejected": -475.3539123535156, + "loss": 0.6912, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.1991034895181656, + "rewards/margins": 0.058940671384334564, + "rewards/rejected": -0.25804418325424194, + "step": 11060 + }, + { + "epoch": 0.72, + "learning_rate": 1.0712474884450056e-06, + "logits/chosen": -0.9814950823783875, + "logits/rejected": -0.8216627240180969, + "logps/chosen": -388.156982421875, + "logps/rejected": -453.73712158203125, + "loss": 0.6898, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.18532374501228333, + "rewards/margins": 0.09436696022748947, + "rewards/rejected": -0.279690682888031, + "step": 11070 + }, + { + "epoch": 0.72, + "learning_rate": 1.066565658854112e-06, + "logits/chosen": -0.7910436391830444, + "logits/rejected": -0.8780626058578491, + "logps/chosen": -318.71221923828125, + "logps/rejected": -394.83660888671875, + "loss": 0.6895, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.19088035821914673, + "rewards/margins": 0.0793568417429924, + "rewards/rejected": -0.27023714780807495, + "step": 11080 + }, + { + "epoch": 0.73, + "learning_rate": 1.0618913067589165e-06, + "logits/chosen": -1.094334363937378, + "logits/rejected": -0.7383924722671509, + "logps/chosen": -391.47113037109375, + "logps/rejected": -446.92816162109375, + "loss": 0.688, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.17565642297267914, + "rewards/margins": 0.09291692823171616, + "rewards/rejected": -0.2685733735561371, + "step": 11090 + }, + { + "epoch": 0.73, + "learning_rate": 1.0572244565431313e-06, + "logits/chosen": -0.9324450492858887, + "logits/rejected": -0.9185946583747864, + "logps/chosen": -366.2502746582031, + "logps/rejected": -447.4685974121094, + "loss": 0.6898, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.22083349525928497, + "rewards/margins": 0.07605709135532379, + "rewards/rejected": -0.29689058661460876, + "step": 11100 + }, + { + "epoch": 0.73, + "eval_logits/chosen": -1.029784083366394, + "eval_logits/rejected": -0.9005224704742432, + "eval_logps/chosen": -430.2828674316406, + "eval_logps/rejected": -496.58447265625, + "eval_loss": 0.6894676685333252, + "eval_rewards/accuracies": 0.6654999852180481, + "eval_rewards/chosen": -0.19827795028686523, + "eval_rewards/margins": 0.08669465035200119, + "eval_rewards/rejected": -0.284972608089447, + "eval_runtime": 710.9479, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 1.407, + "step": 11100 + }, + { + "epoch": 0.73, + "learning_rate": 1.0525651325513317e-06, + "logits/chosen": -1.035304307937622, + "logits/rejected": -1.05049729347229, + "logps/chosen": -518.7673950195312, + "logps/rejected": -558.48046875, + "loss": 0.6895, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.18749776482582092, + "rewards/margins": 0.06086844950914383, + "rewards/rejected": -0.24836620688438416, + "step": 11110 + }, + { + "epoch": 0.73, + "learning_rate": 1.0479133590888351e-06, + "logits/chosen": -1.0515987873077393, + "logits/rejected": -0.9245373606681824, + "logps/chosen": -455.1729431152344, + "logps/rejected": -532.0485229492188, + "loss": 0.6884, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.19971804320812225, + "rewards/margins": 0.10384778678417206, + "rewards/rejected": -0.3035658299922943, + "step": 11120 + }, + { + "epoch": 0.73, + "learning_rate": 1.0432691604215695e-06, + "logits/chosen": -1.10709547996521, + "logits/rejected": -0.8915435075759888, + "logps/chosen": -410.8944396972656, + "logps/rejected": -440.82598876953125, + "loss": 0.6912, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.17584750056266785, + "rewards/margins": 0.061644673347473145, + "rewards/rejected": -0.2374921590089798, + "step": 11130 + }, + { + "epoch": 0.73, + "learning_rate": 1.0386325607759515e-06, + "logits/chosen": -1.0275440216064453, + "logits/rejected": -0.8162240982055664, + "logps/chosen": -342.28564453125, + "logps/rejected": -422.4649353027344, + "loss": 0.6888, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.15089556574821472, + "rewards/margins": 0.10024967044591904, + "rewards/rejected": -0.25114524364471436, + "step": 11140 + }, + { + "epoch": 0.73, + "learning_rate": 1.0340035843387544e-06, + "logits/chosen": -0.9475866556167603, + "logits/rejected": -0.8041400909423828, + "logps/chosen": -361.7947998046875, + "logps/rejected": -417.688720703125, + "loss": 0.6897, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.18205925822257996, + "rewards/margins": 0.08347681164741516, + "rewards/rejected": -0.26553604006767273, + "step": 11150 + }, + { + "epoch": 0.73, + "learning_rate": 1.0293822552569887e-06, + "logits/chosen": -1.2313281297683716, + "logits/rejected": -1.0497896671295166, + "logps/chosen": -428.425048828125, + "logps/rejected": -475.63018798828125, + "loss": 0.6898, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.17654123902320862, + "rewards/margins": 0.10303560644388199, + "rewards/rejected": -0.27957683801651, + "step": 11160 + }, + { + "epoch": 0.73, + "learning_rate": 1.0247685976377688e-06, + "logits/chosen": -1.1329296827316284, + "logits/rejected": -0.8583731651306152, + "logps/chosen": -384.9459533691406, + "logps/rejected": -419.2796325683594, + "loss": 0.6907, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.20326094329357147, + "rewards/margins": 0.07316506654024124, + "rewards/rejected": -0.2764259874820709, + "step": 11170 + }, + { + "epoch": 0.73, + "learning_rate": 1.0201626355481939e-06, + "logits/chosen": -1.3057048320770264, + "logits/rejected": -1.1107124090194702, + "logps/chosen": -395.8485412597656, + "logps/rejected": -430.41162109375, + "loss": 0.6878, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.17972803115844727, + "rewards/margins": 0.08812297135591507, + "rewards/rejected": -0.2678510248661041, + "step": 11180 + }, + { + "epoch": 0.73, + "learning_rate": 1.0155643930152192e-06, + "logits/chosen": -1.33696448802948, + "logits/rejected": -1.2072172164916992, + "logps/chosen": -448.7349548339844, + "logps/rejected": -438.774658203125, + "loss": 0.6911, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.176127627491951, + "rewards/margins": 0.044685568660497665, + "rewards/rejected": -0.22081318497657776, + "step": 11190 + }, + { + "epoch": 0.73, + "learning_rate": 1.0109738940255286e-06, + "logits/chosen": -1.1134618520736694, + "logits/rejected": -0.9695285558700562, + "logps/chosen": -376.0254821777344, + "logps/rejected": -419.3876953125, + "loss": 0.6924, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.15904290974140167, + "rewards/margins": 0.08064347505569458, + "rewards/rejected": -0.23968639969825745, + "step": 11200 + }, + { + "epoch": 0.73, + "eval_logits/chosen": -1.2116992473602295, + "eval_logits/rejected": -1.0739349126815796, + "eval_logps/chosen": -382.55328369140625, + "eval_logps/rejected": -440.3695983886719, + "eval_loss": 0.6895102262496948, + "eval_rewards/accuracies": 0.6629999876022339, + "eval_rewards/chosen": -0.15054833889007568, + "eval_rewards/margins": 0.07820937782526016, + "eval_rewards/rejected": -0.22875770926475525, + "eval_runtime": 713.425, + "eval_samples_per_second": 2.803, + "eval_steps_per_second": 1.402, + "step": 11200 + }, + { + "epoch": 0.73, + "learning_rate": 1.0063911625254155e-06, + "logits/chosen": -1.3005118370056152, + "logits/rejected": -1.296314001083374, + "logps/chosen": -356.2990417480469, + "logps/rejected": -432.0716247558594, + "loss": 0.6897, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.12037642300128937, + "rewards/margins": 0.07514314353466034, + "rewards/rejected": -0.1955195516347885, + "step": 11210 + }, + { + "epoch": 0.73, + "learning_rate": 1.0018162224206502e-06, + "logits/chosen": -1.1771663427352905, + "logits/rejected": -1.177433967590332, + "logps/chosen": -327.0699157714844, + "logps/rejected": -436.6388244628906, + "loss": 0.688, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.1525951325893402, + "rewards/margins": 0.10585884749889374, + "rewards/rejected": -0.25845396518707275, + "step": 11220 + }, + { + "epoch": 0.73, + "learning_rate": 9.97249097576363e-07, + "logits/chosen": -1.6533960103988647, + "logits/rejected": -1.2510101795196533, + "logps/chosen": -379.99169921875, + "logps/rejected": -440.383056640625, + "loss": 0.6881, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.1469351053237915, + "rewards/margins": 0.10388322174549103, + "rewards/rejected": -0.25081831216812134, + "step": 11230 + }, + { + "epoch": 0.74, + "learning_rate": 9.92689811816913e-07, + "logits/chosen": -1.237443208694458, + "logits/rejected": -0.949772834777832, + "logps/chosen": -375.50042724609375, + "logps/rejected": -397.1983337402344, + "loss": 0.6905, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.16297926008701324, + "rewards/margins": 0.06910089403390884, + "rewards/rejected": -0.23208017647266388, + "step": 11240 + }, + { + "epoch": 0.74, + "learning_rate": 9.881383889257691e-07, + "logits/chosen": -1.180985450744629, + "logits/rejected": -1.2715930938720703, + "logps/chosen": -327.3847961425781, + "logps/rejected": -448.04351806640625, + "loss": 0.6897, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.15808793902397156, + "rewards/margins": 0.05658021569252014, + "rewards/rejected": -0.2146681249141693, + "step": 11250 + }, + { + "epoch": 0.74, + "learning_rate": 9.835948526453817e-07, + "logits/chosen": -0.9011642336845398, + "logits/rejected": -1.2479734420776367, + "logps/chosen": -360.42840576171875, + "logps/rejected": -456.5986328125, + "loss": 0.6905, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.17999902367591858, + "rewards/margins": 0.05249845236539841, + "rewards/rejected": -0.2324974536895752, + "step": 11260 + }, + { + "epoch": 0.74, + "learning_rate": 9.790592266770633e-07, + "logits/chosen": -1.3939253091812134, + "logits/rejected": -1.224125623703003, + "logps/chosen": -446.0503845214844, + "logps/rejected": -484.1180114746094, + "loss": 0.6902, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.17670570313930511, + "rewards/margins": 0.06630632281303406, + "rewards/rejected": -0.24301204085350037, + "step": 11270 + }, + { + "epoch": 0.74, + "learning_rate": 9.745315346808584e-07, + "logits/chosen": -0.9992599487304688, + "logits/rejected": -1.0071967840194702, + "logps/chosen": -369.0317077636719, + "logps/rejected": -412.9254455566406, + "loss": 0.6894, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.14995627105236053, + "rewards/margins": 0.06627680361270905, + "rewards/rejected": -0.2162330597639084, + "step": 11280 + }, + { + "epoch": 0.74, + "learning_rate": 9.70011800275428e-07, + "logits/chosen": -1.0035455226898193, + "logits/rejected": -0.9785023927688599, + "logps/chosen": -427.5858459472656, + "logps/rejected": -535.3264770507812, + "loss": 0.6879, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.19006092846393585, + "rewards/margins": 0.09629470109939575, + "rewards/rejected": -0.2863556444644928, + "step": 11290 + }, + { + "epoch": 0.74, + "learning_rate": 9.655000470379206e-07, + "logits/chosen": -1.0799880027770996, + "logits/rejected": -0.8926523923873901, + "logps/chosen": -401.95391845703125, + "logps/rejected": -504.5462341308594, + "loss": 0.6875, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.19160649180412292, + "rewards/margins": 0.11327487230300903, + "rewards/rejected": -0.30488136410713196, + "step": 11300 + }, + { + "epoch": 0.74, + "eval_logits/chosen": -1.12637197971344, + "eval_logits/rejected": -0.9923059344291687, + "eval_logps/chosen": -401.6754150390625, + "eval_logps/rejected": -464.677490234375, + "eval_loss": 0.6894660592079163, + "eval_rewards/accuracies": 0.6610000133514404, + "eval_rewards/chosen": -0.16967050731182098, + "eval_rewards/margins": 0.08339511603116989, + "eval_rewards/rejected": -0.2530656158924103, + "eval_runtime": 713.4984, + "eval_samples_per_second": 2.803, + "eval_steps_per_second": 1.402, + "step": 11300 + }, + { + "epoch": 0.74, + "learning_rate": 9.609962985038517e-07, + "logits/chosen": -1.1582266092300415, + "logits/rejected": -1.0454920530319214, + "logps/chosen": -378.60369873046875, + "logps/rejected": -485.1788024902344, + "loss": 0.6891, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.16324260830879211, + "rewards/margins": 0.1317000836133957, + "rewards/rejected": -0.2949426770210266, + "step": 11310 + }, + { + "epoch": 0.74, + "learning_rate": 9.565005781669786e-07, + "logits/chosen": -1.4098128080368042, + "logits/rejected": -1.043099284172058, + "logps/chosen": -432.05108642578125, + "logps/rejected": -467.31146240234375, + "loss": 0.6879, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.17146047949790955, + "rewards/margins": 0.08857326209545135, + "rewards/rejected": -0.2600337564945221, + "step": 11320 + }, + { + "epoch": 0.74, + "learning_rate": 9.520129094791822e-07, + "logits/chosen": -0.9881695508956909, + "logits/rejected": -0.7796091437339783, + "logps/chosen": -361.5747985839844, + "logps/rejected": -445.5174255371094, + "loss": 0.6882, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.18207907676696777, + "rewards/margins": 0.09414590150117874, + "rewards/rejected": -0.2762250006198883, + "step": 11330 + }, + { + "epoch": 0.74, + "learning_rate": 9.475333158503389e-07, + "logits/chosen": -1.012505054473877, + "logits/rejected": -0.8788460493087769, + "logps/chosen": -367.9222717285156, + "logps/rejected": -380.99481201171875, + "loss": 0.6906, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.1500999927520752, + "rewards/margins": 0.058231133967638016, + "rewards/rejected": -0.20833110809326172, + "step": 11340 + }, + { + "epoch": 0.74, + "learning_rate": 9.430618206482053e-07, + "logits/chosen": -0.9951919317245483, + "logits/rejected": -0.8707451820373535, + "logps/chosen": -304.44342041015625, + "logps/rejected": -351.59326171875, + "loss": 0.6917, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.16402050852775574, + "rewards/margins": 0.04655434936285019, + "rewards/rejected": -0.21057486534118652, + "step": 11350 + }, + { + "epoch": 0.74, + "learning_rate": 9.385984471982892e-07, + "logits/chosen": -0.931800365447998, + "logits/rejected": -0.7422152161598206, + "logps/chosen": -387.48687744140625, + "logps/rejected": -480.77734375, + "loss": 0.6854, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.17266109585762024, + "rewards/margins": 0.14402495324611664, + "rewards/rejected": -0.3166860342025757, + "step": 11360 + }, + { + "epoch": 0.74, + "learning_rate": 9.341432187837343e-07, + "logits/chosen": -1.2325918674468994, + "logits/rejected": -1.0530160665512085, + "logps/chosen": -355.48663330078125, + "logps/rejected": -480.79327392578125, + "loss": 0.6857, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.1651829034090042, + "rewards/margins": 0.110438272356987, + "rewards/rejected": -0.2756211459636688, + "step": 11370 + }, + { + "epoch": 0.74, + "learning_rate": 9.29696158645193e-07, + "logits/chosen": -1.0486128330230713, + "logits/rejected": -1.179518222808838, + "logps/chosen": -401.9928283691406, + "logps/rejected": -560.9896240234375, + "loss": 0.6882, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.18089056015014648, + "rewards/margins": 0.13000695407390594, + "rewards/rejected": -0.3108975291252136, + "step": 11380 + }, + { + "epoch": 0.75, + "learning_rate": 9.252572899807111e-07, + "logits/chosen": -1.0828187465667725, + "logits/rejected": -0.9198848009109497, + "logps/chosen": -479.14947509765625, + "logps/rejected": -569.015625, + "loss": 0.687, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.20854994654655457, + "rewards/margins": 0.11605298519134521, + "rewards/rejected": -0.3246029317378998, + "step": 11390 + }, + { + "epoch": 0.75, + "learning_rate": 9.208266359456003e-07, + "logits/chosen": -1.1783645153045654, + "logits/rejected": -1.0648075342178345, + "logps/chosen": -355.53656005859375, + "logps/rejected": -432.8563537597656, + "loss": 0.6895, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.14973084628582, + "rewards/margins": 0.06774896383285522, + "rewards/rejected": -0.21747978031635284, + "step": 11400 + }, + { + "epoch": 0.75, + "eval_logits/chosen": -1.054376482963562, + "eval_logits/rejected": -0.9229505658149719, + "eval_logps/chosen": -428.9089050292969, + "eval_logps/rejected": -497.7010498046875, + "eval_loss": 0.6894968748092651, + "eval_rewards/accuracies": 0.6639999747276306, + "eval_rewards/chosen": -0.19690395891666412, + "eval_rewards/margins": 0.0891851931810379, + "eval_rewards/rejected": -0.286089152097702, + "eval_runtime": 711.3033, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 1.406, + "step": 11400 + }, + { + "epoch": 0.75, + "learning_rate": 9.164042196523229e-07, + "logits/chosen": -1.3578684329986572, + "logits/rejected": -1.054465413093567, + "logps/chosen": -382.7578430175781, + "logps/rejected": -469.48394775390625, + "loss": 0.6905, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.19335679709911346, + "rewards/margins": 0.10024379193782806, + "rewards/rejected": -0.29360055923461914, + "step": 11410 + }, + { + "epoch": 0.75, + "learning_rate": 9.119900641703696e-07, + "logits/chosen": -1.283399224281311, + "logits/rejected": -1.0210292339324951, + "logps/chosen": -414.1683654785156, + "logps/rejected": -439.62286376953125, + "loss": 0.6907, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.20072200894355774, + "rewards/margins": 0.0717894434928894, + "rewards/rejected": -0.27251142263412476, + "step": 11420 + }, + { + "epoch": 0.75, + "learning_rate": 9.075841925261364e-07, + "logits/chosen": -1.400123953819275, + "logits/rejected": -1.266704797744751, + "logps/chosen": -412.7691955566406, + "logps/rejected": -482.8182678222656, + "loss": 0.6916, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.17485003173351288, + "rewards/margins": 0.08447788655757904, + "rewards/rejected": -0.2593279480934143, + "step": 11430 + }, + { + "epoch": 0.75, + "learning_rate": 9.031866277028093e-07, + "logits/chosen": -1.0190837383270264, + "logits/rejected": -0.8646179437637329, + "logps/chosen": -373.0989685058594, + "logps/rejected": -495.5443420410156, + "loss": 0.6887, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.18202418088912964, + "rewards/margins": 0.09563577175140381, + "rewards/rejected": -0.27765995264053345, + "step": 11440 + }, + { + "epoch": 0.75, + "learning_rate": 8.987973926402391e-07, + "logits/chosen": -0.8709138035774231, + "logits/rejected": -1.0138837099075317, + "logps/chosen": -385.26495361328125, + "logps/rejected": -469.19696044921875, + "loss": 0.69, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.17292535305023193, + "rewards/margins": 0.08451946079730988, + "rewards/rejected": -0.257444828748703, + "step": 11450 + }, + { + "epoch": 0.75, + "learning_rate": 8.944165102348273e-07, + "logits/chosen": -1.1840312480926514, + "logits/rejected": -1.033864140510559, + "logps/chosen": -287.67584228515625, + "logps/rejected": -409.1636047363281, + "loss": 0.6871, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.13072873651981354, + "rewards/margins": 0.11423041671514511, + "rewards/rejected": -0.24495916068553925, + "step": 11460 + }, + { + "epoch": 0.75, + "learning_rate": 8.900440033394018e-07, + "logits/chosen": -0.9068467020988464, + "logits/rejected": -0.953558623790741, + "logps/chosen": -363.6817626953125, + "logps/rejected": -424.71832275390625, + "loss": 0.6902, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.18116554617881775, + "rewards/margins": 0.07032226771116257, + "rewards/rejected": -0.2514878213405609, + "step": 11470 + }, + { + "epoch": 0.75, + "learning_rate": 8.856798947631009e-07, + "logits/chosen": -1.083222508430481, + "logits/rejected": -1.029837965965271, + "logps/chosen": -384.2429504394531, + "logps/rejected": -513.6322021484375, + "loss": 0.688, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.19336727261543274, + "rewards/margins": 0.11243722587823868, + "rewards/rejected": -0.30580443143844604, + "step": 11480 + }, + { + "epoch": 0.75, + "learning_rate": 8.813242072712519e-07, + "logits/chosen": -0.4971315860748291, + "logits/rejected": -0.38992008566856384, + "logps/chosen": -384.61212158203125, + "logps/rejected": -450.76025390625, + "loss": 0.6906, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.22274982929229736, + "rewards/margins": 0.06501208990812302, + "rewards/rejected": -0.28776195645332336, + "step": 11490 + }, + { + "epoch": 0.75, + "learning_rate": 8.769769635852557e-07, + "logits/chosen": -0.9182844161987305, + "logits/rejected": -0.9621591567993164, + "logps/chosen": -387.2684020996094, + "logps/rejected": -422.38043212890625, + "loss": 0.6901, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.17196063697338104, + "rewards/margins": 0.07186910510063171, + "rewards/rejected": -0.24382975697517395, + "step": 11500 + }, + { + "epoch": 0.75, + "eval_logits/chosen": -0.9567478895187378, + "eval_logits/rejected": -0.8319298624992371, + "eval_logps/chosen": -427.27978515625, + "eval_logps/rejected": -491.3986511230469, + "eval_loss": 0.6894726753234863, + "eval_rewards/accuracies": 0.6639999747276306, + "eval_rewards/chosen": -0.19527484476566315, + "eval_rewards/margins": 0.08451192826032639, + "eval_rewards/rejected": -0.27978676557540894, + "eval_runtime": 713.116, + "eval_samples_per_second": 2.805, + "eval_steps_per_second": 1.402, + "step": 11500 + }, + { + "epoch": 0.75, + "learning_rate": 8.726381863824635e-07, + "logits/chosen": -1.3189074993133545, + "logits/rejected": -1.1210556030273438, + "logps/chosen": -484.98870849609375, + "logps/rejected": -490.3335876464844, + "loss": 0.69, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.1999116688966751, + "rewards/margins": 0.07275116443634033, + "rewards/rejected": -0.27266281843185425, + "step": 11510 + }, + { + "epoch": 0.75, + "learning_rate": 8.683078982960638e-07, + "logits/chosen": -0.7536784410476685, + "logits/rejected": -0.5630779266357422, + "logps/chosen": -427.4227600097656, + "logps/rejected": -500.4649963378906, + "loss": 0.6866, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.20273777842521667, + "rewards/margins": 0.12313316762447357, + "rewards/rejected": -0.32587096095085144, + "step": 11520 + }, + { + "epoch": 0.75, + "learning_rate": 8.639861219149584e-07, + "logits/chosen": -1.0072379112243652, + "logits/rejected": -0.716784656047821, + "logps/chosen": -491.2522888183594, + "logps/rejected": -557.5786743164062, + "loss": 0.6855, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.22727027535438538, + "rewards/margins": 0.10925090312957764, + "rewards/rejected": -0.3365211486816406, + "step": 11530 + }, + { + "epoch": 0.76, + "learning_rate": 8.596728797836532e-07, + "logits/chosen": -0.9275296926498413, + "logits/rejected": -0.8758915066719055, + "logps/chosen": -419.6770935058594, + "logps/rejected": -570.2562255859375, + "loss": 0.6861, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.20436839759349823, + "rewards/margins": 0.12098614871501923, + "rewards/rejected": -0.32535451650619507, + "step": 11540 + }, + { + "epoch": 0.76, + "learning_rate": 8.553681944021294e-07, + "logits/chosen": -1.092116355895996, + "logits/rejected": -1.2861175537109375, + "logps/chosen": -432.4119567871094, + "logps/rejected": -507.87091064453125, + "loss": 0.6879, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.19332793354988098, + "rewards/margins": 0.0948062315583229, + "rewards/rejected": -0.2881341874599457, + "step": 11550 + }, + { + "epoch": 0.76, + "learning_rate": 8.510720882257365e-07, + "logits/chosen": -0.5880703926086426, + "logits/rejected": -0.6488803625106812, + "logps/chosen": -372.99285888671875, + "logps/rejected": -544.4835205078125, + "loss": 0.6861, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.21169230341911316, + "rewards/margins": 0.13025878369808197, + "rewards/rejected": -0.3419511020183563, + "step": 11560 + }, + { + "epoch": 0.76, + "learning_rate": 8.467845836650667e-07, + "logits/chosen": -0.49036699533462524, + "logits/rejected": -0.5513705015182495, + "logps/chosen": -451.46856689453125, + "logps/rejected": -561.2171630859375, + "loss": 0.6865, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.24320204555988312, + "rewards/margins": 0.103193499147892, + "rewards/rejected": -0.3463955521583557, + "step": 11570 + }, + { + "epoch": 0.76, + "learning_rate": 8.425057030858461e-07, + "logits/chosen": -0.6223480701446533, + "logits/rejected": -0.6779859066009521, + "logps/chosen": -356.28814697265625, + "logps/rejected": -496.072998046875, + "loss": 0.6887, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.20023226737976074, + "rewards/margins": 0.11076197773218155, + "rewards/rejected": -0.3109942674636841, + "step": 11580 + }, + { + "epoch": 0.76, + "learning_rate": 8.382354688088098e-07, + "logits/chosen": -0.7387554049491882, + "logits/rejected": -0.8713496923446655, + "logps/chosen": -365.7668762207031, + "logps/rejected": -448.03173828125, + "loss": 0.6899, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.20761151611804962, + "rewards/margins": 0.06684192270040512, + "rewards/rejected": -0.27445346117019653, + "step": 11590 + }, + { + "epoch": 0.76, + "learning_rate": 8.33973903109594e-07, + "logits/chosen": -0.8083747625350952, + "logits/rejected": -0.8849031329154968, + "logps/chosen": -453.2215270996094, + "logps/rejected": -530.6839599609375, + "loss": 0.6887, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.23662443459033966, + "rewards/margins": 0.10902400314807892, + "rewards/rejected": -0.3456484079360962, + "step": 11600 + }, + { + "epoch": 0.76, + "eval_logits/chosen": -0.8248559236526489, + "eval_logits/rejected": -0.70490562915802, + "eval_logps/chosen": -462.7817077636719, + "eval_logps/rejected": -536.8844604492188, + "eval_loss": 0.6895047426223755, + "eval_rewards/accuracies": 0.6685000061988831, + "eval_rewards/chosen": -0.23077677190303802, + "eval_rewards/margins": 0.0944957509636879, + "eval_rewards/rejected": -0.3252725601196289, + "eval_runtime": 713.646, + "eval_samples_per_second": 2.803, + "eval_steps_per_second": 1.401, + "step": 11600 + }, + { + "epoch": 0.76, + "learning_rate": 8.297210282186102e-07, + "logits/chosen": -0.9280464053153992, + "logits/rejected": -0.8609668612480164, + "logps/chosen": -516.7465209960938, + "logps/rejected": -612.9113159179688, + "loss": 0.6888, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.28672903776168823, + "rewards/margins": 0.07479820400476456, + "rewards/rejected": -0.3615272045135498, + "step": 11610 + }, + { + "epoch": 0.76, + "learning_rate": 8.254768663209397e-07, + "logits/chosen": -0.6439899206161499, + "logits/rejected": -0.7141983509063721, + "logps/chosen": -500.17041015625, + "logps/rejected": -502.80487060546875, + "loss": 0.6909, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.22159060835838318, + "rewards/margins": 0.06510604918003082, + "rewards/rejected": -0.2866966724395752, + "step": 11620 + }, + { + "epoch": 0.76, + "learning_rate": 8.212414395562079e-07, + "logits/chosen": -0.7860328555107117, + "logits/rejected": -0.6665444374084473, + "logps/chosen": -492.0284729003906, + "logps/rejected": -576.9085083007812, + "loss": 0.691, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.26181426644325256, + "rewards/margins": 0.06095917150378227, + "rewards/rejected": -0.32277345657348633, + "step": 11630 + }, + { + "epoch": 0.76, + "learning_rate": 8.170147700184775e-07, + "logits/chosen": -0.8009759783744812, + "logits/rejected": -0.7388449907302856, + "logps/chosen": -486.1048889160156, + "logps/rejected": -573.0155029296875, + "loss": 0.6916, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.22965356707572937, + "rewards/margins": 0.09584342688322067, + "rewards/rejected": -0.32549700140953064, + "step": 11640 + }, + { + "epoch": 0.76, + "learning_rate": 8.127968797561242e-07, + "logits/chosen": -1.0365979671478271, + "logits/rejected": -0.8095973134040833, + "logps/chosen": -490.6710510253906, + "logps/rejected": -588.9063110351562, + "loss": 0.6878, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.2667003273963928, + "rewards/margins": 0.12065819650888443, + "rewards/rejected": -0.38735854625701904, + "step": 11650 + }, + { + "epoch": 0.76, + "learning_rate": 8.085877907717338e-07, + "logits/chosen": -0.8645883798599243, + "logits/rejected": -1.0048613548278809, + "logps/chosen": -445.19183349609375, + "logps/rejected": -539.5614013671875, + "loss": 0.6891, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.22428032755851746, + "rewards/margins": 0.10604876279830933, + "rewards/rejected": -0.3303290903568268, + "step": 11660 + }, + { + "epoch": 0.76, + "learning_rate": 8.043875250219732e-07, + "logits/chosen": -0.7957605123519897, + "logits/rejected": -0.49647361040115356, + "logps/chosen": -491.366943359375, + "logps/rejected": -514.1613159179688, + "loss": 0.6913, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.26196223497390747, + "rewards/margins": 0.043863289058208466, + "rewards/rejected": -0.30582553148269653, + "step": 11670 + }, + { + "epoch": 0.76, + "learning_rate": 8.001961044174881e-07, + "logits/chosen": -0.9177546501159668, + "logits/rejected": -0.48230376839637756, + "logps/chosen": -473.46527099609375, + "logps/rejected": -463.75103759765625, + "loss": 0.6925, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.24500080943107605, + "rewards/margins": 0.04747391492128372, + "rewards/rejected": -0.29247474670410156, + "step": 11680 + }, + { + "epoch": 0.76, + "learning_rate": 7.960135508227795e-07, + "logits/chosen": -0.8630214929580688, + "logits/rejected": -0.7500642538070679, + "logps/chosen": -520.3016357421875, + "logps/rejected": -544.3031005859375, + "loss": 0.6906, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.22643259167671204, + "rewards/margins": 0.08831746876239777, + "rewards/rejected": -0.314750075340271, + "step": 11690 + }, + { + "epoch": 0.77, + "learning_rate": 7.91839886056098e-07, + "logits/chosen": -1.036927342414856, + "logits/rejected": -0.5842759609222412, + "logps/chosen": -545.1009521484375, + "logps/rejected": -588.3203735351562, + "loss": 0.6883, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.2613387405872345, + "rewards/margins": 0.07379014045000076, + "rewards/rejected": -0.33512887358665466, + "step": 11700 + }, + { + "epoch": 0.77, + "eval_logits/chosen": -0.756587028503418, + "eval_logits/rejected": -0.6421234011650085, + "eval_logps/chosen": -466.8341979980469, + "eval_logps/rejected": -534.1975708007812, + "eval_loss": 0.6894639730453491, + "eval_rewards/accuracies": 0.6660000085830688, + "eval_rewards/chosen": -0.23482927680015564, + "eval_rewards/margins": 0.08775635808706284, + "eval_rewards/rejected": -0.3225856423377991, + "eval_runtime": 713.4686, + "eval_samples_per_second": 2.803, + "eval_steps_per_second": 1.402, + "step": 11700 + }, + { + "epoch": 0.77, + "learning_rate": 7.876751318893217e-07, + "logits/chosen": -0.7366732358932495, + "logits/rejected": -0.5020397901535034, + "logps/chosen": -468.701171875, + "logps/rejected": -529.0864868164062, + "loss": 0.6899, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.23083552718162537, + "rewards/margins": 0.08197341859340668, + "rewards/rejected": -0.31280896067619324, + "step": 11710 + }, + { + "epoch": 0.77, + "learning_rate": 7.8351931004785e-07, + "logits/chosen": -0.34233760833740234, + "logits/rejected": -0.5591579675674438, + "logps/chosen": -460.55810546875, + "logps/rejected": -525.3573608398438, + "loss": 0.6884, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.2519298195838928, + "rewards/margins": 0.08968259394168854, + "rewards/rejected": -0.34161242842674255, + "step": 11720 + }, + { + "epoch": 0.77, + "learning_rate": 7.793724422104834e-07, + "logits/chosen": -0.8081684112548828, + "logits/rejected": -0.6944714188575745, + "logps/chosen": -440.3197326660156, + "logps/rejected": -627.394775390625, + "loss": 0.6883, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.2314954698085785, + "rewards/margins": 0.11759781837463379, + "rewards/rejected": -0.3490932583808899, + "step": 11730 + }, + { + "epoch": 0.77, + "learning_rate": 7.752345500093184e-07, + "logits/chosen": -0.9091407060623169, + "logits/rejected": -0.751275360584259, + "logps/chosen": -466.60687255859375, + "logps/rejected": -499.1985778808594, + "loss": 0.6904, + "rewards/accuracies": 0.4749999940395355, + "rewards/chosen": -0.256451815366745, + "rewards/margins": 0.06235620379447937, + "rewards/rejected": -0.31880801916122437, + "step": 11740 + }, + { + "epoch": 0.77, + "learning_rate": 7.711056550296253e-07, + "logits/chosen": -0.9662020802497864, + "logits/rejected": -0.7012864947319031, + "logps/chosen": -461.15264892578125, + "logps/rejected": -520.930419921875, + "loss": 0.6915, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.2143588364124298, + "rewards/margins": 0.09680284559726715, + "rewards/rejected": -0.31116166710853577, + "step": 11750 + }, + { + "epoch": 0.77, + "learning_rate": 7.669857788097445e-07, + "logits/chosen": -0.2298242151737213, + "logits/rejected": -0.07954345643520355, + "logps/chosen": -426.2181701660156, + "logps/rejected": -551.5553588867188, + "loss": 0.6878, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.26465222239494324, + "rewards/margins": 0.09871228784322739, + "rewards/rejected": -0.3633645176887512, + "step": 11760 + }, + { + "epoch": 0.77, + "learning_rate": 7.628749428409676e-07, + "logits/chosen": -0.6088379621505737, + "logits/rejected": -0.47349509596824646, + "logps/chosen": -493.9090881347656, + "logps/rejected": -533.0814208984375, + "loss": 0.6887, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.2570212781429291, + "rewards/margins": 0.09792395681142807, + "rewards/rejected": -0.35494521260261536, + "step": 11770 + }, + { + "epoch": 0.77, + "learning_rate": 7.587731685674288e-07, + "logits/chosen": -0.9257609248161316, + "logits/rejected": -0.8395794034004211, + "logps/chosen": -529.6868896484375, + "logps/rejected": -625.3295288085938, + "loss": 0.6904, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.25609859824180603, + "rewards/margins": 0.08780574798583984, + "rewards/rejected": -0.34390437602996826, + "step": 11780 + }, + { + "epoch": 0.77, + "learning_rate": 7.546804773859931e-07, + "logits/chosen": -0.7353237271308899, + "logits/rejected": -0.6386014223098755, + "logps/chosen": -471.8373107910156, + "logps/rejected": -562.5552368164062, + "loss": 0.6894, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.2447972297668457, + "rewards/margins": 0.11065386235713959, + "rewards/rejected": -0.3554511070251465, + "step": 11790 + }, + { + "epoch": 0.77, + "learning_rate": 7.505968906461409e-07, + "logits/chosen": -0.370036780834198, + "logits/rejected": -0.8298758268356323, + "logps/chosen": -501.50555419921875, + "logps/rejected": -546.6343383789062, + "loss": 0.6904, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.26056551933288574, + "rewards/margins": 0.06927505880594254, + "rewards/rejected": -0.3298405706882477, + "step": 11800 + }, + { + "epoch": 0.77, + "eval_logits/chosen": -0.6847590804100037, + "eval_logits/rejected": -0.5755062103271484, + "eval_logps/chosen": -467.6891174316406, + "eval_logps/rejected": -530.0805053710938, + "eval_loss": 0.6894600987434387, + "eval_rewards/accuracies": 0.6679999828338623, + "eval_rewards/chosen": -0.23568420112133026, + "eval_rewards/margins": 0.08278439193964005, + "eval_rewards/rejected": -0.3184686303138733, + "eval_runtime": 711.5188, + "eval_samples_per_second": 2.811, + "eval_steps_per_second": 1.405, + "step": 11800 + }, + { + "epoch": 0.77, + "learning_rate": 7.465224296498627e-07, + "logits/chosen": -0.7143672704696655, + "logits/rejected": -0.5428879261016846, + "logps/chosen": -466.81646728515625, + "logps/rejected": -510.3091735839844, + "loss": 0.6886, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.2338574230670929, + "rewards/margins": 0.08462175726890564, + "rewards/rejected": -0.31847918033599854, + "step": 11810 + }, + { + "epoch": 0.77, + "learning_rate": 7.424571156515412e-07, + "logits/chosen": -0.8459585309028625, + "logits/rejected": -0.6181514859199524, + "logps/chosen": -418.2208557128906, + "logps/rejected": -538.0280151367188, + "loss": 0.6913, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.24051566421985626, + "rewards/margins": 0.09713063389062881, + "rewards/rejected": -0.3376463055610657, + "step": 11820 + }, + { + "epoch": 0.77, + "learning_rate": 7.38400969857847e-07, + "logits/chosen": -0.5362470149993896, + "logits/rejected": -0.6336470246315002, + "logps/chosen": -466.18505859375, + "logps/rejected": -603.7225952148438, + "loss": 0.6845, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.28304263949394226, + "rewards/margins": 0.1343853771686554, + "rewards/rejected": -0.41742801666259766, + "step": 11830 + }, + { + "epoch": 0.77, + "learning_rate": 7.343540134276225e-07, + "logits/chosen": -0.7161901593208313, + "logits/rejected": -0.7293332815170288, + "logps/chosen": -376.135009765625, + "logps/rejected": -450.649658203125, + "loss": 0.6902, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.20542338490486145, + "rewards/margins": 0.07261840254068375, + "rewards/rejected": -0.2780417799949646, + "step": 11840 + }, + { + "epoch": 0.78, + "learning_rate": 7.303162674717762e-07, + "logits/chosen": -0.172419935464859, + "logits/rejected": -0.13953010737895966, + "logps/chosen": -492.84796142578125, + "logps/rejected": -520.361572265625, + "loss": 0.6886, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.2826201617717743, + "rewards/margins": 0.08334905654191971, + "rewards/rejected": -0.3659692406654358, + "step": 11850 + }, + { + "epoch": 0.78, + "learning_rate": 7.26287753053167e-07, + "logits/chosen": -0.7456333637237549, + "logits/rejected": -0.5391548275947571, + "logps/chosen": -545.03857421875, + "logps/rejected": -628.8922119140625, + "loss": 0.6885, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.2800625264644623, + "rewards/margins": 0.085121750831604, + "rewards/rejected": -0.3651842474937439, + "step": 11860 + }, + { + "epoch": 0.78, + "learning_rate": 7.222684911865013e-07, + "logits/chosen": -0.6446608304977417, + "logits/rejected": -0.5831423401832581, + "logps/chosen": -434.08026123046875, + "logps/rejected": -561.3636474609375, + "loss": 0.6878, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.22691166400909424, + "rewards/margins": 0.10559795051813126, + "rewards/rejected": -0.3325095772743225, + "step": 11870 + }, + { + "epoch": 0.78, + "learning_rate": 7.182585028382166e-07, + "logits/chosen": -0.8354743719100952, + "logits/rejected": -0.6697049736976624, + "logps/chosen": -525.44091796875, + "logps/rejected": -582.7346801757812, + "loss": 0.6893, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.24928930401802063, + "rewards/margins": 0.0897437110543251, + "rewards/rejected": -0.3390330374240875, + "step": 11880 + }, + { + "epoch": 0.78, + "learning_rate": 7.142578089263769e-07, + "logits/chosen": -0.9254902601242065, + "logits/rejected": -1.04551362991333, + "logps/chosen": -551.900634765625, + "logps/rejected": -593.8734741210938, + "loss": 0.6911, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.2334720641374588, + "rewards/margins": 0.10077275335788727, + "rewards/rejected": -0.33424481749534607, + "step": 11890 + }, + { + "epoch": 0.78, + "learning_rate": 7.102664303205611e-07, + "logits/chosen": -0.6265154480934143, + "logits/rejected": -0.9039812088012695, + "logps/chosen": -465.548828125, + "logps/rejected": -537.3599853515625, + "loss": 0.6868, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.24498634040355682, + "rewards/margins": 0.08968769758939743, + "rewards/rejected": -0.33467406034469604, + "step": 11900 + }, + { + "epoch": 0.78, + "eval_logits/chosen": -0.7183237671852112, + "eval_logits/rejected": -0.6058202981948853, + "eval_logps/chosen": -469.5601501464844, + "eval_logps/rejected": -536.6124877929688, + "eval_loss": 0.6894504427909851, + "eval_rewards/accuracies": 0.6660000085830688, + "eval_rewards/chosen": -0.23755520582199097, + "eval_rewards/margins": 0.08744542300701141, + "eval_rewards/rejected": -0.3250006139278412, + "eval_runtime": 711.8508, + "eval_samples_per_second": 2.81, + "eval_steps_per_second": 1.405, + "step": 11900 + }, + { + "epoch": 0.78, + "learning_rate": 7.062843878417566e-07, + "logits/chosen": -1.0695551633834839, + "logits/rejected": -0.8493108749389648, + "logps/chosen": -417.53173828125, + "logps/rejected": -471.24237060546875, + "loss": 0.689, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.19409912824630737, + "rewards/margins": 0.08675482124090195, + "rewards/rejected": -0.2808539569377899, + "step": 11910 + }, + { + "epoch": 0.78, + "learning_rate": 7.023117022622458e-07, + "logits/chosen": -0.8141118288040161, + "logits/rejected": -0.4967958331108093, + "logps/chosen": -513.6212768554688, + "logps/rejected": -578.6097412109375, + "loss": 0.6901, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.27510595321655273, + "rewards/margins": 0.0882568210363388, + "rewards/rejected": -0.36336278915405273, + "step": 11920 + }, + { + "epoch": 0.78, + "learning_rate": 6.983483943055042e-07, + "logits/chosen": -0.8237019777297974, + "logits/rejected": -0.6630524396896362, + "logps/chosen": -520.7310791015625, + "logps/rejected": -528.470458984375, + "loss": 0.6895, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.24026444554328918, + "rewards/margins": 0.06451254338026047, + "rewards/rejected": -0.3047769367694855, + "step": 11930 + }, + { + "epoch": 0.78, + "learning_rate": 6.943944846460859e-07, + "logits/chosen": -0.4450019896030426, + "logits/rejected": -0.535525381565094, + "logps/chosen": -430.12127685546875, + "logps/rejected": -439.7928771972656, + "loss": 0.6925, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.2113669365644455, + "rewards/margins": 0.05641376972198486, + "rewards/rejected": -0.26778069138526917, + "step": 11940 + }, + { + "epoch": 0.78, + "learning_rate": 6.904499939095225e-07, + "logits/chosen": -0.7984381914138794, + "logits/rejected": -0.735599160194397, + "logps/chosen": -448.34613037109375, + "logps/rejected": -533.6369018554688, + "loss": 0.6892, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.22599899768829346, + "rewards/margins": 0.10061899572610855, + "rewards/rejected": -0.3266179859638214, + "step": 11950 + }, + { + "epoch": 0.78, + "learning_rate": 6.865149426722079e-07, + "logits/chosen": -0.5745676755905151, + "logits/rejected": -0.5700176954269409, + "logps/chosen": -530.6217651367188, + "logps/rejected": -595.4599609375, + "loss": 0.6894, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.25943660736083984, + "rewards/margins": 0.09553225338459015, + "rewards/rejected": -0.3549688458442688, + "step": 11960 + }, + { + "epoch": 0.78, + "learning_rate": 6.825893514612985e-07, + "logits/chosen": -0.5172964930534363, + "logits/rejected": -0.4114875793457031, + "logps/chosen": -450.946044921875, + "logps/rejected": -549.2027587890625, + "loss": 0.6883, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.2143792361021042, + "rewards/margins": 0.10044028609991074, + "rewards/rejected": -0.31481948494911194, + "step": 11970 + }, + { + "epoch": 0.78, + "learning_rate": 6.786732407546001e-07, + "logits/chosen": -0.5917221307754517, + "logits/rejected": -0.4425802230834961, + "logps/chosen": -415.364501953125, + "logps/rejected": -455.5563049316406, + "loss": 0.6891, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.2144833356142044, + "rewards/margins": 0.07990734279155731, + "rewards/rejected": -0.29439064860343933, + "step": 11980 + }, + { + "epoch": 0.78, + "learning_rate": 6.747666309804654e-07, + "logits/chosen": -1.1100962162017822, + "logits/rejected": -0.7921835780143738, + "logps/chosen": -522.1273193359375, + "logps/rejected": -514.0115966796875, + "loss": 0.6903, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.234450101852417, + "rewards/margins": 0.07141976058483124, + "rewards/rejected": -0.30586984753608704, + "step": 11990 + }, + { + "epoch": 0.79, + "learning_rate": 6.708695425176831e-07, + "logits/chosen": -0.783919095993042, + "logits/rejected": -0.4991689622402191, + "logps/chosen": -423.63970947265625, + "logps/rejected": -534.7395629882812, + "loss": 0.6884, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.2546009421348572, + "rewards/margins": 0.09034449607133865, + "rewards/rejected": -0.3449454605579376, + "step": 12000 + }, + { + "epoch": 0.79, + "eval_logits/chosen": -0.8023983836174011, + "eval_logits/rejected": -0.6853892803192139, + "eval_logps/chosen": -454.8382568359375, + "eval_logps/rejected": -520.2646484375, + "eval_loss": 0.6894471049308777, + "eval_rewards/accuracies": 0.6669999957084656, + "eval_rewards/chosen": -0.22283333539962769, + "eval_rewards/margins": 0.08581943064928055, + "eval_rewards/rejected": -0.30865269899368286, + "eval_runtime": 712.3537, + "eval_samples_per_second": 2.808, + "eval_steps_per_second": 1.404, + "step": 12000 + }, + { + "epoch": 0.79, + "learning_rate": 6.669819956953768e-07, + "logits/chosen": -0.5224130749702454, + "logits/rejected": -0.6283994913101196, + "logps/chosen": -375.14593505859375, + "logps/rejected": -472.1822814941406, + "loss": 0.6892, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.20481471717357635, + "rewards/margins": 0.08849630504846573, + "rewards/rejected": -0.2933110296726227, + "step": 12010 + }, + { + "epoch": 0.79, + "learning_rate": 6.631040107928957e-07, + "logits/chosen": -1.2082918882369995, + "logits/rejected": -0.6000986099243164, + "logps/chosen": -500.62945556640625, + "logps/rejected": -478.07373046875, + "loss": 0.6911, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.23244516551494598, + "rewards/margins": 0.0737428143620491, + "rewards/rejected": -0.3061879575252533, + "step": 12020 + }, + { + "epoch": 0.79, + "learning_rate": 6.592356080397072e-07, + "logits/chosen": -0.8976734280586243, + "logits/rejected": -0.6858707666397095, + "logps/chosen": -438.0572204589844, + "logps/rejected": -480.2071838378906, + "loss": 0.6894, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.20976808667182922, + "rewards/margins": 0.09861332178115845, + "rewards/rejected": -0.30838140845298767, + "step": 12030 + }, + { + "epoch": 0.79, + "learning_rate": 6.553768076152963e-07, + "logits/chosen": -0.5755825042724609, + "logits/rejected": -0.7588081955909729, + "logps/chosen": -362.34368896484375, + "logps/rejected": -512.9739379882812, + "loss": 0.6883, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.20424611866474152, + "rewards/margins": 0.12352615594863892, + "rewards/rejected": -0.3277722895145416, + "step": 12040 + }, + { + "epoch": 0.79, + "learning_rate": 6.51527629649055e-07, + "logits/chosen": -1.09049391746521, + "logits/rejected": -0.8895280957221985, + "logps/chosen": -486.25701904296875, + "logps/rejected": -511.87615966796875, + "loss": 0.6908, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.23267392814159393, + "rewards/margins": 0.054766200482845306, + "rewards/rejected": -0.28744015097618103, + "step": 12050 + }, + { + "epoch": 0.79, + "learning_rate": 6.476880942201824e-07, + "logits/chosen": -1.276995301246643, + "logits/rejected": -0.8726984262466431, + "logps/chosen": -411.46307373046875, + "logps/rejected": -439.18359375, + "loss": 0.688, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.17012545466423035, + "rewards/margins": 0.0863446518778801, + "rewards/rejected": -0.25647011399269104, + "step": 12060 + }, + { + "epoch": 0.79, + "learning_rate": 6.438582213575748e-07, + "logits/chosen": -0.8580164909362793, + "logits/rejected": -0.9575880765914917, + "logps/chosen": -431.1397399902344, + "logps/rejected": -528.38427734375, + "loss": 0.6904, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.20122317969799042, + "rewards/margins": 0.08609914034605026, + "rewards/rejected": -0.28732234239578247, + "step": 12070 + }, + { + "epoch": 0.79, + "learning_rate": 6.400380310397267e-07, + "logits/chosen": -0.8642458915710449, + "logits/rejected": -0.5412619709968567, + "logps/chosen": -440.3069763183594, + "logps/rejected": -521.5443725585938, + "loss": 0.6919, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.20819664001464844, + "rewards/margins": 0.049825601279735565, + "rewards/rejected": -0.2580222487449646, + "step": 12080 + }, + { + "epoch": 0.79, + "learning_rate": 6.362275431946202e-07, + "logits/chosen": -0.6833379864692688, + "logits/rejected": -0.6843885183334351, + "logps/chosen": -441.56927490234375, + "logps/rejected": -506.1634826660156, + "loss": 0.6899, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.205215722322464, + "rewards/margins": 0.058545976877212524, + "rewards/rejected": -0.2637616991996765, + "step": 12090 + }, + { + "epoch": 0.79, + "learning_rate": 6.324267776996285e-07, + "logits/chosen": -1.1212000846862793, + "logits/rejected": -0.6497322916984558, + "logps/chosen": -623.5897216796875, + "logps/rejected": -622.3527221679688, + "loss": 0.6878, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.24901065230369568, + "rewards/margins": 0.12074349075555801, + "rewards/rejected": -0.3697541356086731, + "step": 12100 + }, + { + "epoch": 0.79, + "eval_logits/chosen": -0.8305608034133911, + "eval_logits/rejected": -0.7124419808387756, + "eval_logps/chosen": -446.484130859375, + "eval_logps/rejected": -511.0565185546875, + "eval_loss": 0.6894470453262329, + "eval_rewards/accuracies": 0.6654999852180481, + "eval_rewards/chosen": -0.21447916328907013, + "eval_rewards/margins": 0.08496550470590591, + "eval_rewards/rejected": -0.29944467544555664, + "eval_runtime": 710.5726, + "eval_samples_per_second": 2.815, + "eval_steps_per_second": 1.407, + "step": 12100 + }, + { + "epoch": 0.79, + "learning_rate": 6.286357543814045e-07, + "logits/chosen": -0.7228476405143738, + "logits/rejected": -0.848524272441864, + "logps/chosen": -406.7255859375, + "logps/rejected": -588.52490234375, + "loss": 0.6865, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.21319575607776642, + "rewards/margins": 0.11378808319568634, + "rewards/rejected": -0.32698389887809753, + "step": 12110 + }, + { + "epoch": 0.79, + "learning_rate": 6.248544930157838e-07, + "logits/chosen": -0.9068900346755981, + "logits/rejected": -0.7620546817779541, + "logps/chosen": -416.1766662597656, + "logps/rejected": -525.029541015625, + "loss": 0.6867, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.23209106922149658, + "rewards/margins": 0.1266333907842636, + "rewards/rejected": -0.358724445104599, + "step": 12120 + }, + { + "epoch": 0.79, + "learning_rate": 6.21083013327678e-07, + "logits/chosen": -0.777552604675293, + "logits/rejected": -0.7302781343460083, + "logps/chosen": -505.84814453125, + "logps/rejected": -513.743896484375, + "loss": 0.6897, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.20160739123821259, + "rewards/margins": 0.06382157653570175, + "rewards/rejected": -0.26542896032333374, + "step": 12130 + }, + { + "epoch": 0.79, + "learning_rate": 6.17321334990973e-07, + "logits/chosen": -0.9642006754875183, + "logits/rejected": -0.6679797172546387, + "logps/chosen": -407.752685546875, + "logps/rejected": -446.0419006347656, + "loss": 0.6911, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.2069481909275055, + "rewards/margins": 0.07352651655673981, + "rewards/rejected": -0.2804746627807617, + "step": 12140 + }, + { + "epoch": 0.79, + "learning_rate": 6.135694776284243e-07, + "logits/chosen": -1.1946967840194702, + "logits/rejected": -0.7229410409927368, + "logps/chosen": -462.3304138183594, + "logps/rejected": -509.57952880859375, + "loss": 0.6892, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.19153942167758942, + "rewards/margins": 0.11202134191989899, + "rewards/rejected": -0.3035607635974884, + "step": 12150 + }, + { + "epoch": 0.8, + "learning_rate": 6.098274608115595e-07, + "logits/chosen": -0.9830659031867981, + "logits/rejected": -0.5625541806221008, + "logps/chosen": -399.18585205078125, + "logps/rejected": -403.7681884765625, + "loss": 0.6932, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.19353525340557098, + "rewards/margins": 0.03321469947695732, + "rewards/rejected": -0.2267499417066574, + "step": 12160 + }, + { + "epoch": 0.8, + "learning_rate": 6.060953040605697e-07, + "logits/chosen": -1.0277113914489746, + "logits/rejected": -0.6402236223220825, + "logps/chosen": -523.7098388671875, + "logps/rejected": -552.3817138671875, + "loss": 0.6907, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.18458017706871033, + "rewards/margins": 0.09856677800416946, + "rewards/rejected": -0.2831469774246216, + "step": 12170 + }, + { + "epoch": 0.8, + "learning_rate": 6.023730268442144e-07, + "logits/chosen": -0.8050621151924133, + "logits/rejected": -0.6555970311164856, + "logps/chosen": -415.20281982421875, + "logps/rejected": -501.6064453125, + "loss": 0.6869, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.21038201451301575, + "rewards/margins": 0.11484841257333755, + "rewards/rejected": -0.3252304494380951, + "step": 12180 + }, + { + "epoch": 0.8, + "learning_rate": 5.986606485797131e-07, + "logits/chosen": -0.7921792268753052, + "logits/rejected": -0.92046058177948, + "logps/chosen": -378.4645690917969, + "logps/rejected": -452.6407165527344, + "loss": 0.6891, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.17857401072978973, + "rewards/margins": 0.06911009550094604, + "rewards/rejected": -0.24768409132957458, + "step": 12190 + }, + { + "epoch": 0.8, + "learning_rate": 5.949581886326511e-07, + "logits/chosen": -0.7513020038604736, + "logits/rejected": -0.9230319857597351, + "logps/chosen": -474.7445373535156, + "logps/rejected": -501.08935546875, + "loss": 0.6903, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.17695248126983643, + "rewards/margins": 0.05525914952158928, + "rewards/rejected": -0.2322116196155548, + "step": 12200 + }, + { + "epoch": 0.8, + "eval_logits/chosen": -0.9147159457206726, + "eval_logits/rejected": -0.7926742434501648, + "eval_logps/chosen": -427.7273864746094, + "eval_logps/rejected": -490.2318115234375, + "eval_loss": 0.6894443035125732, + "eval_rewards/accuracies": 0.6675000190734863, + "eval_rewards/chosen": -0.19572244584560394, + "eval_rewards/margins": 0.08289749175310135, + "eval_rewards/rejected": -0.2786199450492859, + "eval_runtime": 712.1709, + "eval_samples_per_second": 2.808, + "eval_steps_per_second": 1.404, + "step": 12200 + }, + { + "epoch": 0.8, + "learning_rate": 5.912656663168717e-07, + "logits/chosen": -1.0834193229675293, + "logits/rejected": -1.0200486183166504, + "logps/chosen": -409.322998046875, + "logps/rejected": -465.9730529785156, + "loss": 0.6907, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.18531367182731628, + "rewards/margins": 0.06302249431610107, + "rewards/rejected": -0.24833616614341736, + "step": 12210 + }, + { + "epoch": 0.8, + "learning_rate": 5.875831008943817e-07, + "logits/chosen": -0.7899206876754761, + "logits/rejected": -0.7594529390335083, + "logps/chosen": -378.1102600097656, + "logps/rejected": -410.220703125, + "loss": 0.691, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.20046624541282654, + "rewards/margins": 0.054710645228624344, + "rewards/rejected": -0.2551768720149994, + "step": 12220 + }, + { + "epoch": 0.8, + "learning_rate": 5.839105115752442e-07, + "logits/chosen": -0.7225872278213501, + "logits/rejected": -0.6378189325332642, + "logps/chosen": -462.96588134765625, + "logps/rejected": -512.4617309570312, + "loss": 0.6881, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.23998837172985077, + "rewards/margins": 0.08950354158878326, + "rewards/rejected": -0.32949191331863403, + "step": 12230 + }, + { + "epoch": 0.8, + "learning_rate": 5.802479175174855e-07, + "logits/chosen": -0.7741121053695679, + "logits/rejected": -0.794974684715271, + "logps/chosen": -390.6680603027344, + "logps/rejected": -477.42755126953125, + "loss": 0.6891, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.224673792719841, + "rewards/margins": 0.08083239942789078, + "rewards/rejected": -0.30550616979599, + "step": 12240 + }, + { + "epoch": 0.8, + "learning_rate": 5.765953378269901e-07, + "logits/chosen": -0.9988287687301636, + "logits/rejected": -1.0308479070663452, + "logps/chosen": -427.112548828125, + "logps/rejected": -573.0469970703125, + "loss": 0.6864, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.2201191484928131, + "rewards/margins": 0.11489073187112808, + "rewards/rejected": -0.3350098729133606, + "step": 12250 + }, + { + "epoch": 0.8, + "learning_rate": 5.729527915574037e-07, + "logits/chosen": -0.7769566178321838, + "logits/rejected": -0.8973855972290039, + "logps/chosen": -426.7218322753906, + "logps/rejected": -519.5902099609375, + "loss": 0.6898, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.20630326867103577, + "rewards/margins": 0.09252933412790298, + "rewards/rejected": -0.29883262515068054, + "step": 12260 + }, + { + "epoch": 0.8, + "learning_rate": 5.693202977100304e-07, + "logits/chosen": -0.7344051003456116, + "logits/rejected": -0.650239884853363, + "logps/chosen": -386.86102294921875, + "logps/rejected": -460.61614990234375, + "loss": 0.6898, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.21671593189239502, + "rewards/margins": 0.07709144055843353, + "rewards/rejected": -0.29380735754966736, + "step": 12270 + }, + { + "epoch": 0.8, + "learning_rate": 5.656978752337389e-07, + "logits/chosen": -0.9088813662528992, + "logits/rejected": -0.9434731602668762, + "logps/chosen": -446.60223388671875, + "logps/rejected": -563.0369262695312, + "loss": 0.6872, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.24694709479808807, + "rewards/margins": 0.11503490060567856, + "rewards/rejected": -0.36198195815086365, + "step": 12280 + }, + { + "epoch": 0.8, + "learning_rate": 5.620855430248581e-07, + "logits/chosen": -0.6687235236167908, + "logits/rejected": -0.7715210914611816, + "logps/chosen": -318.0511474609375, + "logps/rejected": -428.16424560546875, + "loss": 0.6892, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.15605607628822327, + "rewards/margins": 0.11229152977466583, + "rewards/rejected": -0.2683475911617279, + "step": 12290 + }, + { + "epoch": 0.8, + "learning_rate": 5.584833199270837e-07, + "logits/chosen": -1.0661684274673462, + "logits/rejected": -0.7041251063346863, + "logps/chosen": -458.58795166015625, + "logps/rejected": -540.2942504882812, + "loss": 0.6914, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.22995570302009583, + "rewards/margins": 0.08939947187900543, + "rewards/rejected": -0.31935515999794006, + "step": 12300 + }, + { + "epoch": 0.8, + "eval_logits/chosen": -0.8736314177513123, + "eval_logits/rejected": -0.7532578110694885, + "eval_logps/chosen": -441.02703857421875, + "eval_logps/rejected": -506.0263671875, + "eval_loss": 0.6894257068634033, + "eval_rewards/accuracies": 0.6654999852180481, + "eval_rewards/chosen": -0.20902210474014282, + "eval_rewards/margins": 0.08539240807294846, + "eval_rewards/rejected": -0.2944145202636719, + "eval_runtime": 713.5476, + "eval_samples_per_second": 2.803, + "eval_steps_per_second": 1.401, + "step": 12300 + }, + { + "epoch": 0.81, + "learning_rate": 5.548912247313742e-07, + "logits/chosen": -1.3093442916870117, + "logits/rejected": -1.0217863321304321, + "logps/chosen": -513.5242919921875, + "logps/rejected": -541.9788818359375, + "loss": 0.6907, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.22972431778907776, + "rewards/margins": 0.07603023946285248, + "rewards/rejected": -0.30575451254844666, + "step": 12310 + }, + { + "epoch": 0.81, + "learning_rate": 5.513092761758596e-07, + "logits/chosen": -1.2074533700942993, + "logits/rejected": -0.9448097348213196, + "logps/chosen": -509.68133544921875, + "logps/rejected": -491.7139587402344, + "loss": 0.692, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.23847146332263947, + "rewards/margins": 0.049013566225767136, + "rewards/rejected": -0.2874850332736969, + "step": 12320 + }, + { + "epoch": 0.81, + "learning_rate": 5.477374929457363e-07, + "logits/chosen": -1.1951650381088257, + "logits/rejected": -0.9347305297851562, + "logps/chosen": -422.927490234375, + "logps/rejected": -459.3108825683594, + "loss": 0.6918, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.2208663523197174, + "rewards/margins": 0.05555524677038193, + "rewards/rejected": -0.27642157673835754, + "step": 12330 + }, + { + "epoch": 0.81, + "learning_rate": 5.441758936731772e-07, + "logits/chosen": -0.9595106840133667, + "logits/rejected": -0.6512321829795837, + "logps/chosen": -437.71051025390625, + "logps/rejected": -497.3162536621094, + "loss": 0.6902, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.20236524939537048, + "rewards/margins": 0.07739603519439697, + "rewards/rejected": -0.27976128458976746, + "step": 12340 + }, + { + "epoch": 0.81, + "learning_rate": 5.406244969372273e-07, + "logits/chosen": -0.9617953300476074, + "logits/rejected": -0.7921175956726074, + "logps/chosen": -404.3094482421875, + "logps/rejected": -547.6414794921875, + "loss": 0.6864, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.20458662509918213, + "rewards/margins": 0.13683012127876282, + "rewards/rejected": -0.34141674637794495, + "step": 12350 + }, + { + "epoch": 0.81, + "learning_rate": 5.370833212637122e-07, + "logits/chosen": -0.6919295191764832, + "logits/rejected": -0.6015470623970032, + "logps/chosen": -408.9327697753906, + "logps/rejected": -477.7970275878906, + "loss": 0.6905, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.19042177498340607, + "rewards/margins": 0.08264943957328796, + "rewards/rejected": -0.2730712294578552, + "step": 12360 + }, + { + "epoch": 0.81, + "learning_rate": 5.335523851251392e-07, + "logits/chosen": -1.001318097114563, + "logits/rejected": -0.8748119473457336, + "logps/chosen": -415.00048828125, + "logps/rejected": -490.12945556640625, + "loss": 0.6873, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.20886659622192383, + "rewards/margins": 0.0959271788597107, + "rewards/rejected": -0.3047937750816345, + "step": 12370 + }, + { + "epoch": 0.81, + "learning_rate": 5.300317069406003e-07, + "logits/chosen": -1.0027813911437988, + "logits/rejected": -0.7950852513313293, + "logps/chosen": -341.1397399902344, + "logps/rejected": -453.4449768066406, + "loss": 0.6865, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.18039390444755554, + "rewards/margins": 0.1085384264588356, + "rewards/rejected": -0.28893235325813293, + "step": 12380 + }, + { + "epoch": 0.81, + "learning_rate": 5.265213050756782e-07, + "logits/chosen": -1.218680500984192, + "logits/rejected": -0.9439771771430969, + "logps/chosen": -411.95489501953125, + "logps/rejected": -501.7945861816406, + "loss": 0.6889, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.17990362644195557, + "rewards/margins": 0.09197081625461578, + "rewards/rejected": -0.27187445759773254, + "step": 12390 + }, + { + "epoch": 0.81, + "learning_rate": 5.230211978423477e-07, + "logits/chosen": -1.046304702758789, + "logits/rejected": -0.9308651089668274, + "logps/chosen": -416.91558837890625, + "logps/rejected": -460.021484375, + "loss": 0.6923, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.19811533391475677, + "rewards/margins": 0.05827472731471062, + "rewards/rejected": -0.2563900351524353, + "step": 12400 + }, + { + "epoch": 0.81, + "eval_logits/chosen": -0.9178078770637512, + "eval_logits/rejected": -0.7956603169441223, + "eval_logps/chosen": -427.3274230957031, + "eval_logps/rejected": -489.2209777832031, + "eval_loss": 0.6894317865371704, + "eval_rewards/accuracies": 0.6700000166893005, + "eval_rewards/chosen": -0.1953224390745163, + "eval_rewards/margins": 0.08228664100170135, + "eval_rewards/rejected": -0.27760908007621765, + "eval_runtime": 710.5715, + "eval_samples_per_second": 2.815, + "eval_steps_per_second": 1.407, + "step": 12400 + }, + { + "epoch": 0.81, + "learning_rate": 5.195314034988835e-07, + "logits/chosen": -1.2224987745285034, + "logits/rejected": -1.0301649570465088, + "logps/chosen": -390.1413269042969, + "logps/rejected": -409.8642578125, + "loss": 0.6894, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.17085427045822144, + "rewards/margins": 0.08676211535930634, + "rewards/rejected": -0.2576163709163666, + "step": 12410 + }, + { + "epoch": 0.81, + "learning_rate": 5.160519402497616e-07, + "logits/chosen": -0.9702883958816528, + "logits/rejected": -0.8835731744766235, + "logps/chosen": -442.39300537109375, + "logps/rejected": -527.3919677734375, + "loss": 0.6875, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.21233896911144257, + "rewards/margins": 0.08905895054340363, + "rewards/rejected": -0.3013979196548462, + "step": 12420 + }, + { + "epoch": 0.81, + "learning_rate": 5.125828262455679e-07, + "logits/chosen": -0.9499308466911316, + "logits/rejected": -0.7601709961891174, + "logps/chosen": -452.78863525390625, + "logps/rejected": -510.4447326660156, + "loss": 0.6884, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.19707557559013367, + "rewards/margins": 0.08799558877944946, + "rewards/rejected": -0.28507116436958313, + "step": 12430 + }, + { + "epoch": 0.81, + "learning_rate": 5.091240795828992e-07, + "logits/chosen": -0.7149444818496704, + "logits/rejected": -0.6284686923027039, + "logps/chosen": -386.01531982421875, + "logps/rejected": -501.17694091796875, + "loss": 0.6904, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.18352492153644562, + "rewards/margins": 0.09533097594976425, + "rewards/rejected": -0.2788558602333069, + "step": 12440 + }, + { + "epoch": 0.81, + "learning_rate": 5.056757183042732e-07, + "logits/chosen": -0.9699877500534058, + "logits/rejected": -0.785503089427948, + "logps/chosen": -440.580810546875, + "logps/rejected": -510.8658752441406, + "loss": 0.6892, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.20702461898326874, + "rewards/margins": 0.0901625007390976, + "rewards/rejected": -0.2971871495246887, + "step": 12450 + }, + { + "epoch": 0.82, + "learning_rate": 5.022377603980308e-07, + "logits/chosen": -1.19623601436615, + "logits/rejected": -0.7461040019989014, + "logps/chosen": -453.72381591796875, + "logps/rejected": -481.7816467285156, + "loss": 0.6883, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.2044392079114914, + "rewards/margins": 0.09452911466360092, + "rewards/rejected": -0.29896828532218933, + "step": 12460 + }, + { + "epoch": 0.82, + "learning_rate": 4.988102237982454e-07, + "logits/chosen": -1.0094448328018188, + "logits/rejected": -0.7690650224685669, + "logps/chosen": -428.43182373046875, + "logps/rejected": -440.9228515625, + "loss": 0.6922, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.2062513530254364, + "rewards/margins": 0.053066302090883255, + "rewards/rejected": -0.25931766629219055, + "step": 12470 + }, + { + "epoch": 0.82, + "learning_rate": 4.953931263846251e-07, + "logits/chosen": -0.9864797592163086, + "logits/rejected": -0.7949808239936829, + "logps/chosen": -471.72576904296875, + "logps/rejected": -527.9396362304688, + "loss": 0.6891, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.2100168913602829, + "rewards/margins": 0.10017456114292145, + "rewards/rejected": -0.31019148230552673, + "step": 12480 + }, + { + "epoch": 0.82, + "learning_rate": 4.919864859824266e-07, + "logits/chosen": -0.8903275728225708, + "logits/rejected": -0.9078360795974731, + "logps/chosen": -443.26885986328125, + "logps/rejected": -481.3370666503906, + "loss": 0.6905, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.20583471655845642, + "rewards/margins": 0.08310334384441376, + "rewards/rejected": -0.288938045501709, + "step": 12490 + }, + { + "epoch": 0.82, + "learning_rate": 4.885903203623532e-07, + "logits/chosen": -1.361114263534546, + "logits/rejected": -0.7789111137390137, + "logps/chosen": -457.87957763671875, + "logps/rejected": -490.6852111816406, + "loss": 0.6892, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.17286573350429535, + "rewards/margins": 0.09657898545265198, + "rewards/rejected": -0.26944470405578613, + "step": 12500 + }, + { + "epoch": 0.82, + "eval_logits/chosen": -0.9396504163742065, + "eval_logits/rejected": -0.8163852691650391, + "eval_logps/chosen": -422.764892578125, + "eval_logps/rejected": -484.4884338378906, + "eval_loss": 0.6894283294677734, + "eval_rewards/accuracies": 0.6664999723434448, + "eval_rewards/chosen": -0.19075995683670044, + "eval_rewards/margins": 0.08211655169725418, + "eval_rewards/rejected": -0.2728765308856964, + "eval_runtime": 711.9412, + "eval_samples_per_second": 2.809, + "eval_steps_per_second": 1.405, + "step": 12500 + }, + { + "epoch": 0.82, + "learning_rate": 4.852046472404695e-07, + "logits/chosen": -0.9885215759277344, + "logits/rejected": -0.5774581432342529, + "logps/chosen": -446.4098205566406, + "logps/rejected": -402.09356689453125, + "loss": 0.6899, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.1485210806131363, + "rewards/margins": 0.08135717362165451, + "rewards/rejected": -0.2298782616853714, + "step": 12510 + }, + { + "epoch": 0.82, + "learning_rate": 4.818294842781035e-07, + "logits/chosen": -1.1667752265930176, + "logits/rejected": -0.8311818242073059, + "logps/chosen": -395.07470703125, + "logps/rejected": -465.4454650878906, + "loss": 0.6888, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.16886630654335022, + "rewards/margins": 0.12272864580154419, + "rewards/rejected": -0.291594922542572, + "step": 12520 + }, + { + "epoch": 0.82, + "learning_rate": 4.784648490817601e-07, + "logits/chosen": -1.015737771987915, + "logits/rejected": -0.901741623878479, + "logps/chosen": -391.2193298339844, + "logps/rejected": -415.372802734375, + "loss": 0.6903, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.1711125373840332, + "rewards/margins": 0.06741202622652054, + "rewards/rejected": -0.23852458596229553, + "step": 12530 + }, + { + "epoch": 0.82, + "learning_rate": 4.751107592030235e-07, + "logits/chosen": -0.9657641649246216, + "logits/rejected": -0.6886070966720581, + "logps/chosen": -327.6506652832031, + "logps/rejected": -425.15374755859375, + "loss": 0.6893, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.1577988713979721, + "rewards/margins": 0.11460302025079727, + "rewards/rejected": -0.27240189909935, + "step": 12540 + }, + { + "epoch": 0.82, + "learning_rate": 4.717672321384703e-07, + "logits/chosen": -0.9216817617416382, + "logits/rejected": -0.42492127418518066, + "logps/chosen": -415.05120849609375, + "logps/rejected": -458.6930236816406, + "loss": 0.6886, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.19180552661418915, + "rewards/margins": 0.08583752810955048, + "rewards/rejected": -0.277643084526062, + "step": 12550 + }, + { + "epoch": 0.82, + "learning_rate": 4.684342853295748e-07, + "logits/chosen": -0.7486587166786194, + "logits/rejected": -0.7542505860328674, + "logps/chosen": -368.861328125, + "logps/rejected": -465.25213623046875, + "loss": 0.688, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.18116456270217896, + "rewards/margins": 0.10031484067440033, + "rewards/rejected": -0.2814793884754181, + "step": 12560 + }, + { + "epoch": 0.82, + "learning_rate": 4.651119361626213e-07, + "logits/chosen": -1.3638161420822144, + "logits/rejected": -0.8579947352409363, + "logps/chosen": -395.2978515625, + "logps/rejected": -425.96185302734375, + "loss": 0.69, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.15417927503585815, + "rewards/margins": 0.07689286768436432, + "rewards/rejected": -0.23107214272022247, + "step": 12570 + }, + { + "epoch": 0.82, + "learning_rate": 4.618002019686091e-07, + "logits/chosen": -0.983599841594696, + "logits/rejected": -1.0167783498764038, + "logps/chosen": -462.54071044921875, + "logps/rejected": -489.15533447265625, + "loss": 0.6889, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.18332836031913757, + "rewards/margins": 0.0832507386803627, + "rewards/rejected": -0.26657912135124207, + "step": 12580 + }, + { + "epoch": 0.82, + "learning_rate": 4.5849910002316757e-07, + "logits/chosen": -0.9545795321464539, + "logits/rejected": -0.8534967303276062, + "logps/chosen": -411.9749450683594, + "logps/rejected": -470.1524353027344, + "loss": 0.6911, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.231462761759758, + "rewards/margins": 0.09144178777933121, + "rewards/rejected": -0.3229045569896698, + "step": 12590 + }, + { + "epoch": 0.82, + "learning_rate": 4.5520864754645984e-07, + "logits/chosen": -1.293601155281067, + "logits/rejected": -1.0598740577697754, + "logps/chosen": -457.161376953125, + "logps/rejected": -484.68731689453125, + "loss": 0.6898, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.17581158876419067, + "rewards/margins": 0.07135093212127686, + "rewards/rejected": -0.24716253578662872, + "step": 12600 + }, + { + "epoch": 0.82, + "eval_logits/chosen": -0.9114385843276978, + "eval_logits/rejected": -0.7893572449684143, + "eval_logps/chosen": -430.8013916015625, + "eval_logps/rejected": -493.8382568359375, + "eval_loss": 0.6894257664680481, + "eval_rewards/accuracies": 0.6664999723434448, + "eval_rewards/chosen": -0.19879642128944397, + "eval_rewards/margins": 0.08342995494604111, + "eval_rewards/rejected": -0.2822263836860657, + "eval_runtime": 711.5076, + "eval_samples_per_second": 2.811, + "eval_steps_per_second": 1.405, + "step": 12600 + }, + { + "epoch": 0.83, + "learning_rate": 4.5192886170309896e-07, + "logits/chosen": -0.8391144871711731, + "logits/rejected": -0.8102821111679077, + "logps/chosen": -396.1620178222656, + "logps/rejected": -444.4454040527344, + "loss": 0.6914, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.20539355278015137, + "rewards/margins": 0.04809931665658951, + "rewards/rejected": -0.25349289178848267, + "step": 12610 + }, + { + "epoch": 0.83, + "learning_rate": 4.486597596020548e-07, + "logits/chosen": -0.7510364651679993, + "logits/rejected": -0.8711469769477844, + "logps/chosen": -448.0186462402344, + "logps/rejected": -485.5732421875, + "loss": 0.6891, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.22843733429908752, + "rewards/margins": 0.07587635517120361, + "rewards/rejected": -0.3043137192726135, + "step": 12620 + }, + { + "epoch": 0.83, + "learning_rate": 4.454013582965644e-07, + "logits/chosen": -0.673041820526123, + "logits/rejected": -0.5068280100822449, + "logps/chosen": -488.75177001953125, + "logps/rejected": -497.9751892089844, + "loss": 0.6914, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.2252950668334961, + "rewards/margins": 0.05966676399111748, + "rewards/rejected": -0.2849618196487427, + "step": 12630 + }, + { + "epoch": 0.83, + "learning_rate": 4.4215367478404605e-07, + "logits/chosen": -0.7873706817626953, + "logits/rejected": -0.7081938982009888, + "logps/chosen": -487.13531494140625, + "logps/rejected": -583.9402465820312, + "loss": 0.6903, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.2163754403591156, + "rewards/margins": 0.07243311405181885, + "rewards/rejected": -0.28880855441093445, + "step": 12640 + }, + { + "epoch": 0.83, + "learning_rate": 4.389167260060068e-07, + "logits/chosen": -0.8580641746520996, + "logits/rejected": -0.8711032867431641, + "logps/chosen": -384.7228088378906, + "logps/rejected": -469.16259765625, + "loss": 0.6875, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.17603252828121185, + "rewards/margins": 0.12261439859867096, + "rewards/rejected": -0.2986469268798828, + "step": 12650 + }, + { + "epoch": 0.83, + "learning_rate": 4.356905288479579e-07, + "logits/chosen": -0.8820209503173828, + "logits/rejected": -0.6234656572341919, + "logps/chosen": -429.2950134277344, + "logps/rejected": -532.017333984375, + "loss": 0.6855, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.20277424156665802, + "rewards/margins": 0.13148626685142517, + "rewards/rejected": -0.334260493516922, + "step": 12660 + }, + { + "epoch": 0.83, + "learning_rate": 4.3247510013932377e-07, + "logits/chosen": -0.5720213651657104, + "logits/rejected": -0.6938979029655457, + "logps/chosen": -463.2184143066406, + "logps/rejected": -558.4837036132812, + "loss": 0.6897, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.21102993190288544, + "rewards/margins": 0.09544537961483002, + "rewards/rejected": -0.30647531151771545, + "step": 12670 + }, + { + "epoch": 0.83, + "learning_rate": 4.2927045665335594e-07, + "logits/chosen": -0.3011423647403717, + "logits/rejected": -0.4939189851284027, + "logps/chosen": -386.75531005859375, + "logps/rejected": -474.6163635253906, + "loss": 0.6875, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.22187760472297668, + "rewards/margins": 0.09141628444194794, + "rewards/rejected": -0.3132938742637634, + "step": 12680 + }, + { + "epoch": 0.83, + "learning_rate": 4.260766151070439e-07, + "logits/chosen": -0.6524245142936707, + "logits/rejected": -0.6261091232299805, + "logps/chosen": -446.9039611816406, + "logps/rejected": -528.741943359375, + "loss": 0.6902, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.2224704921245575, + "rewards/margins": 0.09309863299131393, + "rewards/rejected": -0.315569132566452, + "step": 12690 + }, + { + "epoch": 0.83, + "learning_rate": 4.228935921610308e-07, + "logits/chosen": -1.0410144329071045, + "logits/rejected": -0.8041863441467285, + "logps/chosen": -446.28924560546875, + "logps/rejected": -444.7435607910156, + "loss": 0.6903, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.18257063627243042, + "rewards/margins": 0.06250576674938202, + "rewards/rejected": -0.24507638812065125, + "step": 12700 + }, + { + "epoch": 0.83, + "eval_logits/chosen": -0.9304318428039551, + "eval_logits/rejected": -0.8066224455833435, + "eval_logps/chosen": -433.35626220703125, + "eval_logps/rejected": -498.3890380859375, + "eval_loss": 0.6894228458404541, + "eval_rewards/accuracies": 0.6664999723434448, + "eval_rewards/chosen": -0.2013513296842575, + "eval_rewards/margins": 0.08542577177286148, + "eval_rewards/rejected": -0.286777138710022, + "eval_runtime": 713.6499, + "eval_samples_per_second": 2.802, + "eval_steps_per_second": 1.401, + "step": 12700 + }, + { + "epoch": 0.83, + "learning_rate": 4.1972140441952246e-07, + "logits/chosen": -0.8487979173660278, + "logits/rejected": -0.9116487503051758, + "logps/chosen": -430.19549560546875, + "logps/rejected": -499.740966796875, + "loss": 0.6896, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.19061222672462463, + "rewards/margins": 0.06753625720739365, + "rewards/rejected": -0.2581484913825989, + "step": 12710 + }, + { + "epoch": 0.83, + "learning_rate": 4.165600684302046e-07, + "logits/chosen": -1.0365651845932007, + "logits/rejected": -0.9350225329399109, + "logps/chosen": -358.640625, + "logps/rejected": -459.84130859375, + "loss": 0.6895, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.18285666406154633, + "rewards/margins": 0.09124276787042618, + "rewards/rejected": -0.2740994095802307, + "step": 12720 + }, + { + "epoch": 0.83, + "learning_rate": 4.13409600684154e-07, + "logits/chosen": -1.0195610523223877, + "logits/rejected": -0.8225051760673523, + "logps/chosen": -417.7220153808594, + "logps/rejected": -484.96685791015625, + "loss": 0.6882, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.2027023732662201, + "rewards/margins": 0.09642815589904785, + "rewards/rejected": -0.29913052916526794, + "step": 12730 + }, + { + "epoch": 0.83, + "learning_rate": 4.102700176157548e-07, + "logits/chosen": -1.102210521697998, + "logits/rejected": -0.8111448287963867, + "logps/chosen": -552.992431640625, + "logps/rejected": -525.7896728515625, + "loss": 0.6899, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.22823718190193176, + "rewards/margins": 0.07058385014533997, + "rewards/rejected": -0.29882103204727173, + "step": 12740 + }, + { + "epoch": 0.83, + "learning_rate": 4.0714133560260884e-07, + "logits/chosen": -1.0088520050048828, + "logits/rejected": -1.0012805461883545, + "logps/chosen": -464.440185546875, + "logps/rejected": -468.52886962890625, + "loss": 0.6917, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.20490708947181702, + "rewards/margins": 0.062319546937942505, + "rewards/rejected": -0.2672266364097595, + "step": 12750 + }, + { + "epoch": 0.83, + "learning_rate": 4.0402357096545527e-07, + "logits/chosen": -0.741023063659668, + "logits/rejected": -0.9252802133560181, + "logps/chosen": -452.5013122558594, + "logps/rejected": -524.4758911132812, + "loss": 0.6897, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.20073971152305603, + "rewards/margins": 0.08214665949344635, + "rewards/rejected": -0.2828863859176636, + "step": 12760 + }, + { + "epoch": 0.84, + "learning_rate": 4.0091673996808025e-07, + "logits/chosen": -1.2111819982528687, + "logits/rejected": -1.0127800703048706, + "logps/chosen": -428.5201110839844, + "logps/rejected": -490.87725830078125, + "loss": 0.6887, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.23146001994609833, + "rewards/margins": 0.0869842916727066, + "rewards/rejected": -0.31844431161880493, + "step": 12770 + }, + { + "epoch": 0.84, + "learning_rate": 3.9782085881723776e-07, + "logits/chosen": -0.9244493246078491, + "logits/rejected": -0.8639167547225952, + "logps/chosen": -360.8436584472656, + "logps/rejected": -485.7451171875, + "loss": 0.6883, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.19524605572223663, + "rewards/margins": 0.11293482780456543, + "rewards/rejected": -0.30818089842796326, + "step": 12780 + }, + { + "epoch": 0.84, + "learning_rate": 3.947359436625592e-07, + "logits/chosen": -0.9303233027458191, + "logits/rejected": -0.6964131593704224, + "logps/chosen": -436.606201171875, + "logps/rejected": -506.09381103515625, + "loss": 0.6888, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.20194324851036072, + "rewards/margins": 0.10705173015594482, + "rewards/rejected": -0.30899497866630554, + "step": 12790 + }, + { + "epoch": 0.84, + "learning_rate": 3.9166201059647386e-07, + "logits/chosen": -0.9158743619918823, + "logits/rejected": -1.0071403980255127, + "logps/chosen": -471.6205139160156, + "logps/rejected": -480.96356201171875, + "loss": 0.6906, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.2097702920436859, + "rewards/margins": 0.04627406597137451, + "rewards/rejected": -0.2560443580150604, + "step": 12800 + }, + { + "epoch": 0.84, + "eval_logits/chosen": -0.9460026621818542, + "eval_logits/rejected": -0.8214091062545776, + "eval_logps/chosen": -435.67803955078125, + "eval_logps/rejected": -501.1642150878906, + "eval_loss": 0.6894211769104004, + "eval_rewards/accuracies": 0.6664999723434448, + "eval_rewards/chosen": -0.2036730945110321, + "eval_rewards/margins": 0.08587922900915146, + "eval_rewards/rejected": -0.28955233097076416, + "eval_runtime": 710.9115, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 1.407, + "step": 12800 + }, + { + "epoch": 0.84, + "learning_rate": 3.8859907565412194e-07, + "logits/chosen": -0.8359258770942688, + "logits/rejected": -1.1643868684768677, + "logps/chosen": -392.3347473144531, + "logps/rejected": -482.1744079589844, + "loss": 0.6884, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.2126855105161667, + "rewards/margins": 0.09041811525821686, + "rewards/rejected": -0.30310362577438354, + "step": 12810 + }, + { + "epoch": 0.84, + "learning_rate": 3.8554715481327303e-07, + "logits/chosen": -0.8129196166992188, + "logits/rejected": -0.8484827280044556, + "logps/chosen": -465.0271911621094, + "logps/rejected": -534.2659912109375, + "loss": 0.6875, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.2317681610584259, + "rewards/margins": 0.10463656485080719, + "rewards/rejected": -0.3364047408103943, + "step": 12820 + }, + { + "epoch": 0.84, + "learning_rate": 3.8250626399424007e-07, + "logits/chosen": -1.0457967519760132, + "logits/rejected": -0.993150532245636, + "logps/chosen": -481.54150390625, + "logps/rejected": -543.5672607421875, + "loss": 0.6903, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.23006339371204376, + "rewards/margins": 0.08087591826915741, + "rewards/rejected": -0.31093934178352356, + "step": 12830 + }, + { + "epoch": 0.84, + "learning_rate": 3.7947641905980104e-07, + "logits/chosen": -0.8676019906997681, + "logits/rejected": -0.9785581827163696, + "logps/chosen": -386.38897705078125, + "logps/rejected": -436.10076904296875, + "loss": 0.6888, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.17907050251960754, + "rewards/margins": 0.08439258486032486, + "rewards/rejected": -0.2634630799293518, + "step": 12840 + }, + { + "epoch": 0.84, + "learning_rate": 3.764576358151098e-07, + "logits/chosen": -0.989855170249939, + "logits/rejected": -0.8942712545394897, + "logps/chosen": -364.42938232421875, + "logps/rejected": -425.1758728027344, + "loss": 0.6905, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.18961241841316223, + "rewards/margins": 0.07259075343608856, + "rewards/rejected": -0.2622031569480896, + "step": 12850 + }, + { + "epoch": 0.84, + "learning_rate": 3.7344993000761944e-07, + "logits/chosen": -0.9940598607063293, + "logits/rejected": -0.9289056062698364, + "logps/chosen": -395.99505615234375, + "logps/rejected": -533.985595703125, + "loss": 0.6892, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.21867993474006653, + "rewards/margins": 0.09501878172159195, + "rewards/rejected": -0.31369873881340027, + "step": 12860 + }, + { + "epoch": 0.84, + "learning_rate": 3.7045331732699585e-07, + "logits/chosen": -0.9832733869552612, + "logits/rejected": -0.8541062474250793, + "logps/chosen": -412.0921936035156, + "logps/rejected": -504.63653564453125, + "loss": 0.6858, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.2068941295146942, + "rewards/margins": 0.12824371457099915, + "rewards/rejected": -0.33513784408569336, + "step": 12870 + }, + { + "epoch": 0.84, + "learning_rate": 3.6746781340503993e-07, + "logits/chosen": -0.9805141687393188, + "logits/rejected": -0.6995661854743958, + "logps/chosen": -416.86358642578125, + "logps/rejected": -514.7808837890625, + "loss": 0.6865, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.18244779109954834, + "rewards/margins": 0.10594437271356583, + "rewards/rejected": -0.2883921265602112, + "step": 12880 + }, + { + "epoch": 0.84, + "learning_rate": 3.6449343381560116e-07, + "logits/chosen": -0.8010265231132507, + "logits/rejected": -0.6467529535293579, + "logps/chosen": -479.70025634765625, + "logps/rejected": -569.8721313476562, + "loss": 0.6886, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.248274564743042, + "rewards/margins": 0.10356147587299347, + "rewards/rejected": -0.35183608531951904, + "step": 12890 + }, + { + "epoch": 0.84, + "learning_rate": 3.615301940745017e-07, + "logits/chosen": -1.3851318359375, + "logits/rejected": -0.8564912676811218, + "logps/chosen": -510.59564208984375, + "logps/rejected": -476.83636474609375, + "loss": 0.6903, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.1934005469083786, + "rewards/margins": 0.07141765207052231, + "rewards/rejected": -0.2648181915283203, + "step": 12900 + }, + { + "epoch": 0.84, + "eval_logits/chosen": -0.9909506440162659, + "eval_logits/rejected": -0.863025426864624, + "eval_logps/chosen": -435.8428649902344, + "eval_logps/rejected": -502.7374267578125, + "eval_loss": 0.6894172430038452, + "eval_rewards/accuracies": 0.6675000190734863, + "eval_rewards/chosen": -0.20383791625499725, + "eval_rewards/margins": 0.08728757500648499, + "eval_rewards/rejected": -0.29112547636032104, + "eval_runtime": 711.8623, + "eval_samples_per_second": 2.81, + "eval_steps_per_second": 1.405, + "step": 12900 + }, + { + "epoch": 0.84, + "learning_rate": 3.5857810963945084e-07, + "logits/chosen": -0.6161023378372192, + "logits/rejected": -0.6896204352378845, + "logps/chosen": -450.0265197753906, + "logps/rejected": -523.8975830078125, + "loss": 0.69, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.23551742732524872, + "rewards/margins": 0.08869399130344391, + "rewards/rejected": -0.32421138882637024, + "step": 12910 + }, + { + "epoch": 0.85, + "learning_rate": 3.556371959099678e-07, + "logits/chosen": -1.257305383682251, + "logits/rejected": -0.9748373031616211, + "logps/chosen": -494.3572692871094, + "logps/rejected": -524.6000366210938, + "loss": 0.691, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.19718293845653534, + "rewards/margins": 0.07021328061819077, + "rewards/rejected": -0.2673962116241455, + "step": 12920 + }, + { + "epoch": 0.85, + "learning_rate": 3.5270746822729797e-07, + "logits/chosen": -1.0669023990631104, + "logits/rejected": -0.8817640542984009, + "logps/chosen": -452.60992431640625, + "logps/rejected": -569.9772338867188, + "loss": 0.689, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.20512787997722626, + "rewards/margins": 0.10479208081960678, + "rewards/rejected": -0.30991998314857483, + "step": 12930 + }, + { + "epoch": 0.85, + "learning_rate": 3.4978894187433746e-07, + "logits/chosen": -0.9080924987792969, + "logits/rejected": -0.8624773025512695, + "logps/chosen": -320.5248718261719, + "logps/rejected": -369.5634460449219, + "loss": 0.6895, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.16944964230060577, + "rewards/margins": 0.05994703620672226, + "rewards/rejected": -0.22939670085906982, + "step": 12940 + }, + { + "epoch": 0.85, + "learning_rate": 3.468816320755486e-07, + "logits/chosen": -0.6681433916091919, + "logits/rejected": -0.731353759765625, + "logps/chosen": -412.3421936035156, + "logps/rejected": -438.8526306152344, + "loss": 0.6902, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.18841442465782166, + "rewards/margins": 0.0661202147603035, + "rewards/rejected": -0.25453463196754456, + "step": 12950 + }, + { + "epoch": 0.85, + "learning_rate": 3.4398555399688336e-07, + "logits/chosen": -1.000149130821228, + "logits/rejected": -0.8533943295478821, + "logps/chosen": -453.640869140625, + "logps/rejected": -463.63946533203125, + "loss": 0.6918, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.24229037761688232, + "rewards/margins": 0.030662816017866135, + "rewards/rejected": -0.27295318245887756, + "step": 12960 + }, + { + "epoch": 0.85, + "learning_rate": 3.411007227457047e-07, + "logits/chosen": -1.1260017156600952, + "logits/rejected": -0.7768393754959106, + "logps/chosen": -441.520751953125, + "logps/rejected": -516.2427978515625, + "loss": 0.687, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.19375945627689362, + "rewards/margins": 0.10699689388275146, + "rewards/rejected": -0.3007563352584839, + "step": 12970 + }, + { + "epoch": 0.85, + "learning_rate": 3.382271533707043e-07, + "logits/chosen": -0.7902621030807495, + "logits/rejected": -0.7129308581352234, + "logps/chosen": -382.034423828125, + "logps/rejected": -416.2178649902344, + "loss": 0.6904, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.190429225564003, + "rewards/margins": 0.05706711858510971, + "rewards/rejected": -0.2474963366985321, + "step": 12980 + }, + { + "epoch": 0.85, + "learning_rate": 3.353648608618287e-07, + "logits/chosen": -0.9405345916748047, + "logits/rejected": -0.8393017649650574, + "logps/chosen": -348.46832275390625, + "logps/rejected": -424.5315856933594, + "loss": 0.6892, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.18059708178043365, + "rewards/margins": 0.08603395521640778, + "rewards/rejected": -0.2666310667991638, + "step": 12990 + }, + { + "epoch": 0.85, + "learning_rate": 3.3251386015019676e-07, + "logits/chosen": -1.3043386936187744, + "logits/rejected": -1.0379887819290161, + "logps/chosen": -399.287841796875, + "logps/rejected": -450.4970703125, + "loss": 0.6887, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.19652841985225677, + "rewards/margins": 0.08495378494262695, + "rewards/rejected": -0.28148218989372253, + "step": 13000 + }, + { + "epoch": 0.85, + "eval_logits/chosen": -0.9750109314918518, + "eval_logits/rejected": -0.8479962348937988, + "eval_logps/chosen": -436.7467956542969, + "eval_logps/rejected": -504.1571960449219, + "eval_loss": 0.6894132494926453, + "eval_rewards/accuracies": 0.6690000295639038, + "eval_rewards/chosen": -0.20474188029766083, + "eval_rewards/margins": 0.08780339360237122, + "eval_rewards/rejected": -0.29254525899887085, + "eval_runtime": 713.1079, + "eval_samples_per_second": 2.805, + "eval_steps_per_second": 1.402, + "step": 13000 + }, + { + "epoch": 0.85, + "learning_rate": 3.296741661080255e-07, + "logits/chosen": -1.1275584697723389, + "logits/rejected": -0.9574069976806641, + "logps/chosen": -465.5220642089844, + "logps/rejected": -555.017578125, + "loss": 0.688, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.23238368332386017, + "rewards/margins": 0.09603676944971085, + "rewards/rejected": -0.3284204602241516, + "step": 13010 + }, + { + "epoch": 0.85, + "learning_rate": 3.2684579354854974e-07, + "logits/chosen": -1.1925865411758423, + "logits/rejected": -1.0892870426177979, + "logps/chosen": -534.2598266601562, + "logps/rejected": -627.32861328125, + "loss": 0.6908, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.24180126190185547, + "rewards/margins": 0.08281029760837555, + "rewards/rejected": -0.3246115744113922, + "step": 13020 + }, + { + "epoch": 0.85, + "learning_rate": 3.2402875722594653e-07, + "logits/chosen": -0.8882959485054016, + "logits/rejected": -1.0372774600982666, + "logps/chosen": -346.94049072265625, + "logps/rejected": -442.657958984375, + "loss": 0.6893, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.17789503931999207, + "rewards/margins": 0.08956596255302429, + "rewards/rejected": -0.26746100187301636, + "step": 13030 + }, + { + "epoch": 0.85, + "learning_rate": 3.212230718352566e-07, + "logits/chosen": -0.898537814617157, + "logits/rejected": -0.8267822265625, + "logps/chosen": -404.4369812011719, + "logps/rejected": -369.17401123046875, + "loss": 0.6928, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.17938710749149323, + "rewards/margins": 0.02843407727777958, + "rewards/rejected": -0.20782120525836945, + "step": 13040 + }, + { + "epoch": 0.85, + "learning_rate": 3.1842875201231025e-07, + "logits/chosen": -0.8991460800170898, + "logits/rejected": -0.7728959321975708, + "logps/chosen": -393.7642822265625, + "logps/rejected": -441.01751708984375, + "loss": 0.6896, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.1720242202281952, + "rewards/margins": 0.07302843034267426, + "rewards/rejected": -0.24505265057086945, + "step": 13050 + }, + { + "epoch": 0.85, + "learning_rate": 3.156458123336478e-07, + "logits/chosen": -0.7052079439163208, + "logits/rejected": -0.6316913962364197, + "logps/chosen": -323.0262145996094, + "logps/rejected": -438.30230712890625, + "loss": 0.6885, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.16451437771320343, + "rewards/margins": 0.12542487680912018, + "rewards/rejected": -0.2899392545223236, + "step": 13060 + }, + { + "epoch": 0.86, + "learning_rate": 3.128742673164459e-07, + "logits/chosen": -1.2318341732025146, + "logits/rejected": -0.754733145236969, + "logps/chosen": -494.69354248046875, + "logps/rejected": -532.0003662109375, + "loss": 0.6904, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.2117662876844406, + "rewards/margins": 0.08182285726070404, + "rewards/rejected": -0.29358917474746704, + "step": 13070 + }, + { + "epoch": 0.86, + "learning_rate": 3.101141314184414e-07, + "logits/chosen": -1.3839300870895386, + "logits/rejected": -1.1595733165740967, + "logps/chosen": -392.35955810546875, + "logps/rejected": -436.986083984375, + "loss": 0.6924, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.18697500228881836, + "rewards/margins": 0.053837817162275314, + "rewards/rejected": -0.24081282317638397, + "step": 13080 + }, + { + "epoch": 0.86, + "learning_rate": 3.0736541903785526e-07, + "logits/chosen": -0.802038311958313, + "logits/rejected": -0.9247132539749146, + "logps/chosen": -411.74761962890625, + "logps/rejected": -556.392822265625, + "loss": 0.6901, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.20273271203041077, + "rewards/margins": 0.09518562257289886, + "rewards/rejected": -0.2979183495044708, + "step": 13090 + }, + { + "epoch": 0.86, + "learning_rate": 3.0462814451331704e-07, + "logits/chosen": -1.0374623537063599, + "logits/rejected": -0.762954592704773, + "logps/chosen": -451.2318420410156, + "logps/rejected": -498.00738525390625, + "loss": 0.6917, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.22055545449256897, + "rewards/margins": 0.04687047749757767, + "rewards/rejected": -0.26742592453956604, + "step": 13100 + }, + { + "epoch": 0.86, + "eval_logits/chosen": -1.0175011157989502, + "eval_logits/rejected": -0.8888563513755798, + "eval_logps/chosen": -425.418212890625, + "eval_logps/rejected": -489.5446472167969, + "eval_loss": 0.6894149780273438, + "eval_rewards/accuracies": 0.6669999957084656, + "eval_rewards/chosen": -0.19341330230236053, + "eval_rewards/margins": 0.08451951295137405, + "eval_rewards/rejected": -0.2779327929019928, + "eval_runtime": 713.3691, + "eval_samples_per_second": 2.804, + "eval_steps_per_second": 1.402, + "step": 13100 + }, + { + "epoch": 0.86, + "learning_rate": 3.019023221237927e-07, + "logits/chosen": -0.9610816240310669, + "logits/rejected": -0.8090234994888306, + "logps/chosen": -420.1961975097656, + "logps/rejected": -456.49639892578125, + "loss": 0.6895, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.18346384167671204, + "rewards/margins": 0.09386763721704483, + "rewards/rejected": -0.27733147144317627, + "step": 13110 + }, + { + "epoch": 0.86, + "learning_rate": 2.991879660885058e-07, + "logits/chosen": -1.1881482601165771, + "logits/rejected": -1.1225849390029907, + "logps/chosen": -430.271240234375, + "logps/rejected": -506.01873779296875, + "loss": 0.6912, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1667243391275406, + "rewards/margins": 0.09210772812366486, + "rewards/rejected": -0.25883203744888306, + "step": 13120 + }, + { + "epoch": 0.86, + "learning_rate": 2.9648509056686786e-07, + "logits/chosen": -1.0945974588394165, + "logits/rejected": -0.9265823364257812, + "logps/chosen": -369.04547119140625, + "logps/rejected": -435.9185485839844, + "loss": 0.6872, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.19210371375083923, + "rewards/margins": 0.09244880080223083, + "rewards/rejected": -0.28455251455307007, + "step": 13130 + }, + { + "epoch": 0.86, + "learning_rate": 2.937937096584012e-07, + "logits/chosen": -1.111617088317871, + "logits/rejected": -0.7723917961120605, + "logps/chosen": -483.25146484375, + "logps/rejected": -492.22027587890625, + "loss": 0.6889, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.1928504854440689, + "rewards/margins": 0.06945283710956573, + "rewards/rejected": -0.26230329275131226, + "step": 13140 + }, + { + "epoch": 0.86, + "learning_rate": 2.9111383740266756e-07, + "logits/chosen": -0.8475943803787231, + "logits/rejected": -0.8579694032669067, + "logps/chosen": -453.10394287109375, + "logps/rejected": -499.24853515625, + "loss": 0.691, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.2173198163509369, + "rewards/margins": 0.05193269997835159, + "rewards/rejected": -0.2692525088787079, + "step": 13150 + }, + { + "epoch": 0.86, + "learning_rate": 2.8844548777919255e-07, + "logits/chosen": -1.0501482486724854, + "logits/rejected": -0.9597611427307129, + "logps/chosen": -372.1024169921875, + "logps/rejected": -426.0980529785156, + "loss": 0.6885, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.16821856796741486, + "rewards/margins": 0.07627587020397186, + "rewards/rejected": -0.2444944679737091, + "step": 13160 + }, + { + "epoch": 0.86, + "learning_rate": 2.8578867470739594e-07, + "logits/chosen": -0.6772990226745605, + "logits/rejected": -0.6000491380691528, + "logps/chosen": -413.3113708496094, + "logps/rejected": -483.9972229003906, + "loss": 0.6874, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.22882306575775146, + "rewards/margins": 0.10210853815078735, + "rewards/rejected": -0.3309316039085388, + "step": 13170 + }, + { + "epoch": 0.86, + "learning_rate": 2.8314341204651484e-07, + "logits/chosen": -1.3596770763397217, + "logits/rejected": -1.1702024936676025, + "logps/chosen": -441.63323974609375, + "logps/rejected": -478.5375061035156, + "loss": 0.6871, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.16559413075447083, + "rewards/margins": 0.10968685150146484, + "rewards/rejected": -0.2752809524536133, + "step": 13180 + }, + { + "epoch": 0.86, + "learning_rate": 2.805097135955362e-07, + "logits/chosen": -0.9181706309318542, + "logits/rejected": -0.7884522080421448, + "logps/chosen": -407.10137939453125, + "logps/rejected": -474.822021484375, + "loss": 0.6879, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.19457736611366272, + "rewards/margins": 0.10264654457569122, + "rewards/rejected": -0.29722392559051514, + "step": 13190 + }, + { + "epoch": 0.86, + "learning_rate": 2.778875930931213e-07, + "logits/chosen": -0.9860755801200867, + "logits/rejected": -0.741965651512146, + "logps/chosen": -424.412841796875, + "logps/rejected": -510.8443908691406, + "loss": 0.6877, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.18929395079612732, + "rewards/margins": 0.10379371792078018, + "rewards/rejected": -0.2930876612663269, + "step": 13200 + }, + { + "epoch": 0.86, + "eval_logits/chosen": -1.009841799736023, + "eval_logits/rejected": -0.8810458183288574, + "eval_logps/chosen": -429.9781494140625, + "eval_logps/rejected": -496.1291198730469, + "eval_loss": 0.6894113421440125, + "eval_rewards/accuracies": 0.6694999933242798, + "eval_rewards/chosen": -0.19797320663928986, + "eval_rewards/margins": 0.08654402941465378, + "eval_rewards/rejected": -0.28451722860336304, + "eval_runtime": 711.5759, + "eval_samples_per_second": 2.811, + "eval_steps_per_second": 1.405, + "step": 13200 + }, + { + "epoch": 0.86, + "learning_rate": 2.7527706421753426e-07, + "logits/chosen": -1.1855487823486328, + "logits/rejected": -1.001201868057251, + "logps/chosen": -393.73419189453125, + "logps/rejected": -456.26422119140625, + "loss": 0.6906, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.19533300399780273, + "rewards/margins": 0.058041296899318695, + "rewards/rejected": -0.253374308347702, + "step": 13210 + }, + { + "epoch": 0.86, + "learning_rate": 2.726781405865736e-07, + "logits/chosen": -1.0462353229522705, + "logits/rejected": -0.8738126754760742, + "logps/chosen": -496.15081787109375, + "logps/rejected": -472.3070373535156, + "loss": 0.6897, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.19348499178886414, + "rewards/margins": 0.09192480891942978, + "rewards/rejected": -0.2854097783565521, + "step": 13220 + }, + { + "epoch": 0.87, + "learning_rate": 2.7009083575749687e-07, + "logits/chosen": -0.8790783882141113, + "logits/rejected": -0.8375867605209351, + "logps/chosen": -439.9881896972656, + "logps/rejected": -513.730712890625, + "loss": 0.6902, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.19612789154052734, + "rewards/margins": 0.07537803053855896, + "rewards/rejected": -0.2715059220790863, + "step": 13230 + }, + { + "epoch": 0.87, + "learning_rate": 2.6751516322695457e-07, + "logits/chosen": -1.0426018238067627, + "logits/rejected": -1.0287061929702759, + "logps/chosen": -396.2439270019531, + "logps/rejected": -439.5823669433594, + "loss": 0.6898, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.2060004025697708, + "rewards/margins": 0.04937834292650223, + "rewards/rejected": -0.25537875294685364, + "step": 13240 + }, + { + "epoch": 0.87, + "learning_rate": 2.649511364309154e-07, + "logits/chosen": -1.2703790664672852, + "logits/rejected": -1.1133968830108643, + "logps/chosen": -391.85626220703125, + "logps/rejected": -450.3577575683594, + "loss": 0.69, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.18956658244132996, + "rewards/margins": 0.07938266545534134, + "rewards/rejected": -0.2689492404460907, + "step": 13250 + }, + { + "epoch": 0.87, + "learning_rate": 2.6239876874460003e-07, + "logits/chosen": -1.2480677366256714, + "logits/rejected": -1.2331262826919556, + "logps/chosen": -475.4595642089844, + "logps/rejected": -563.0646362304688, + "loss": 0.6891, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.191044420003891, + "rewards/margins": 0.11816122382879257, + "rewards/rejected": -0.3092056214809418, + "step": 13260 + }, + { + "epoch": 0.87, + "learning_rate": 2.5985807348240744e-07, + "logits/chosen": -1.130602478981018, + "logits/rejected": -0.6957510709762573, + "logps/chosen": -423.267578125, + "logps/rejected": -497.05487060546875, + "loss": 0.6873, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1909240484237671, + "rewards/margins": 0.12173442542552948, + "rewards/rejected": -0.31265848875045776, + "step": 13270 + }, + { + "epoch": 0.87, + "learning_rate": 2.5732906389785014e-07, + "logits/chosen": -1.2089236974716187, + "logits/rejected": -1.1420354843139648, + "logps/chosen": -468.90277099609375, + "logps/rejected": -554.2119140625, + "loss": 0.686, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.1911967247724533, + "rewards/margins": 0.12345466762781143, + "rewards/rejected": -0.31465139985084534, + "step": 13280 + }, + { + "epoch": 0.87, + "learning_rate": 2.5481175318347956e-07, + "logits/chosen": -0.947446346282959, + "logits/rejected": -1.148301124572754, + "logps/chosen": -402.0899658203125, + "logps/rejected": -498.4195861816406, + "loss": 0.6894, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.17459237575531006, + "rewards/margins": 0.07843352854251862, + "rewards/rejected": -0.25302591919898987, + "step": 13290 + }, + { + "epoch": 0.87, + "learning_rate": 2.5230615447082246e-07, + "logits/chosen": -0.957288384437561, + "logits/rejected": -0.8125879168510437, + "logps/chosen": -444.80499267578125, + "logps/rejected": -516.0078735351562, + "loss": 0.6887, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.19348831474781036, + "rewards/margins": 0.08598808944225311, + "rewards/rejected": -0.27947643399238586, + "step": 13300 + }, + { + "epoch": 0.87, + "eval_logits/chosen": -1.0318516492843628, + "eval_logits/rejected": -0.9020374417304993, + "eval_logps/chosen": -425.52032470703125, + "eval_logps/rejected": -491.04864501953125, + "eval_loss": 0.6894135475158691, + "eval_rewards/accuracies": 0.6664999723434448, + "eval_rewards/chosen": -0.1935153603553772, + "eval_rewards/margins": 0.08592142909765244, + "eval_rewards/rejected": -0.27943679690361023, + "eval_runtime": 712.2993, + "eval_samples_per_second": 2.808, + "eval_steps_per_second": 1.404, + "step": 13300 + }, + { + "epoch": 0.87, + "learning_rate": 2.49812280830308e-07, + "logits/chosen": -0.9962084889411926, + "logits/rejected": -0.7481353878974915, + "logps/chosen": -430.90087890625, + "logps/rejected": -566.72314453125, + "loss": 0.6842, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.20586366951465607, + "rewards/margins": 0.17067831754684448, + "rewards/rejected": -0.37654200196266174, + "step": 13310 + }, + { + "epoch": 0.87, + "learning_rate": 2.4733014527120457e-07, + "logits/chosen": -0.7532116770744324, + "logits/rejected": -0.8916665315628052, + "logps/chosen": -472.84320068359375, + "logps/rejected": -545.5331420898438, + "loss": 0.6899, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.27042829990386963, + "rewards/margins": 0.09558326005935669, + "rewards/rejected": -0.3660115599632263, + "step": 13320 + }, + { + "epoch": 0.87, + "learning_rate": 2.4485976074154565e-07, + "logits/chosen": -1.0746243000030518, + "logits/rejected": -1.1461856365203857, + "logps/chosen": -411.5790100097656, + "logps/rejected": -471.8722229003906, + "loss": 0.6911, + "rewards/accuracies": 0.4749999940395355, + "rewards/chosen": -0.20258104801177979, + "rewards/margins": 0.030249997973442078, + "rewards/rejected": -0.23283103108406067, + "step": 13330 + }, + { + "epoch": 0.87, + "learning_rate": 2.4240114012806763e-07, + "logits/chosen": -0.9970208406448364, + "logits/rejected": -1.0200673341751099, + "logps/chosen": -391.00347900390625, + "logps/rejected": -435.83270263671875, + "loss": 0.6912, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.1832052767276764, + "rewards/margins": 0.06189913675189018, + "rewards/rejected": -0.24510440230369568, + "step": 13340 + }, + { + "epoch": 0.87, + "learning_rate": 2.399542962561399e-07, + "logits/chosen": -1.0039441585540771, + "logits/rejected": -0.7964978814125061, + "logps/chosen": -419.21307373046875, + "logps/rejected": -483.9776306152344, + "loss": 0.6852, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.193757101893425, + "rewards/margins": 0.11217392981052399, + "rewards/rejected": -0.305931031703949, + "step": 13350 + }, + { + "epoch": 0.87, + "learning_rate": 2.3751924188969876e-07, + "logits/chosen": -0.9888512492179871, + "logits/rejected": -0.8423534631729126, + "logps/chosen": -445.109375, + "logps/rejected": -521.4771728515625, + "loss": 0.6901, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.19337087869644165, + "rewards/margins": 0.09620238840579987, + "rewards/rejected": -0.2895732820034027, + "step": 13360 + }, + { + "epoch": 0.87, + "learning_rate": 2.3509598973118024e-07, + "logits/chosen": -1.299377679824829, + "logits/rejected": -1.1307967901229858, + "logps/chosen": -387.85406494140625, + "logps/rejected": -382.24884033203125, + "loss": 0.6917, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.16684868931770325, + "rewards/margins": 0.057182587683200836, + "rewards/rejected": -0.22403128445148468, + "step": 13370 + }, + { + "epoch": 0.88, + "learning_rate": 2.326845524214555e-07, + "logits/chosen": -0.9441890716552734, + "logits/rejected": -0.9570524096488953, + "logps/chosen": -430.52069091796875, + "logps/rejected": -422.7290954589844, + "loss": 0.6922, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.19546636939048767, + "rewards/margins": 0.01833316683769226, + "rewards/rejected": -0.21379955112934113, + "step": 13380 + }, + { + "epoch": 0.88, + "learning_rate": 2.3028494253976158e-07, + "logits/chosen": -1.0607401132583618, + "logits/rejected": -0.8935171961784363, + "logps/chosen": -563.0780639648438, + "logps/rejected": -556.1412353515625, + "loss": 0.6908, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.22181260585784912, + "rewards/margins": 0.0576445572078228, + "rewards/rejected": -0.279457151889801, + "step": 13390 + }, + { + "epoch": 0.88, + "learning_rate": 2.2789717260364026e-07, + "logits/chosen": -1.0842921733856201, + "logits/rejected": -0.9010177850723267, + "logps/chosen": -335.4914245605469, + "logps/rejected": -370.0270690917969, + "loss": 0.6916, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.17038536071777344, + "rewards/margins": 0.05626339837908745, + "rewards/rejected": -0.2266487330198288, + "step": 13400 + }, + { + "epoch": 0.88, + "eval_logits/chosen": -1.0430668592453003, + "eval_logits/rejected": -0.9128531217575073, + "eval_logps/chosen": -420.7966003417969, + "eval_logps/rejected": -485.21160888671875, + "eval_loss": 0.6894127726554871, + "eval_rewards/accuracies": 0.6710000038146973, + "eval_rewards/chosen": -0.18879161775112152, + "eval_rewards/margins": 0.08480807393789291, + "eval_rewards/rejected": -0.2735997140407562, + "eval_runtime": 715.5823, + "eval_samples_per_second": 2.795, + "eval_steps_per_second": 1.397, + "step": 13400 + }, + { + "epoch": 0.88, + "learning_rate": 2.255212550688682e-07, + "logits/chosen": -1.1218197345733643, + "logits/rejected": -1.4631164073944092, + "logps/chosen": -419.88525390625, + "logps/rejected": -582.7513427734375, + "loss": 0.687, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.2019301950931549, + "rewards/margins": 0.09867943078279495, + "rewards/rejected": -0.30060964822769165, + "step": 13410 + }, + { + "epoch": 0.88, + "learning_rate": 2.2315720232939598e-07, + "logits/chosen": -1.5930960178375244, + "logits/rejected": -1.086121678352356, + "logps/chosen": -431.61279296875, + "logps/rejected": -448.45965576171875, + "loss": 0.6893, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.16949909925460815, + "rewards/margins": 0.1064492017030716, + "rewards/rejected": -0.27594828605651855, + "step": 13420 + }, + { + "epoch": 0.88, + "learning_rate": 2.2080502671727956e-07, + "logits/chosen": -1.189905047416687, + "logits/rejected": -0.9950293302536011, + "logps/chosen": -352.42913818359375, + "logps/rejected": -403.2584228515625, + "loss": 0.6888, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.13234731554985046, + "rewards/margins": 0.07265688478946686, + "rewards/rejected": -0.20500421524047852, + "step": 13430 + }, + { + "epoch": 0.88, + "learning_rate": 2.1846474050262078e-07, + "logits/chosen": -1.0095980167388916, + "logits/rejected": -0.7977234125137329, + "logps/chosen": -414.53826904296875, + "logps/rejected": -418.682373046875, + "loss": 0.6904, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.16797752678394318, + "rewards/margins": 0.0699412003159523, + "rewards/rejected": -0.2379187047481537, + "step": 13440 + }, + { + "epoch": 0.88, + "learning_rate": 2.1613635589349756e-07, + "logits/chosen": -0.8134937286376953, + "logits/rejected": -0.7540784478187561, + "logps/chosen": -371.159423828125, + "logps/rejected": -508.986572265625, + "loss": 0.6888, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.1739961802959442, + "rewards/margins": 0.11030639708042145, + "rewards/rejected": -0.28430259227752686, + "step": 13450 + }, + { + "epoch": 0.88, + "learning_rate": 2.1381988503590578e-07, + "logits/chosen": -0.7340242266654968, + "logits/rejected": -0.8426862955093384, + "logps/chosen": -414.74554443359375, + "logps/rejected": -516.2962646484375, + "loss": 0.6885, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.19887368381023407, + "rewards/margins": 0.10770060122013092, + "rewards/rejected": -0.306574285030365, + "step": 13460 + }, + { + "epoch": 0.88, + "learning_rate": 2.11515340013691e-07, + "logits/chosen": -1.2820839881896973, + "logits/rejected": -1.135124921798706, + "logps/chosen": -422.4356384277344, + "logps/rejected": -517.8685302734375, + "loss": 0.6882, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.1912631094455719, + "rewards/margins": 0.11701309680938721, + "rewards/rejected": -0.3082761764526367, + "step": 13470 + }, + { + "epoch": 0.88, + "learning_rate": 2.092227328484897e-07, + "logits/chosen": -0.8282972574234009, + "logits/rejected": -0.8670898675918579, + "logps/chosen": -382.09368896484375, + "logps/rejected": -512.9632568359375, + "loss": 0.6875, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.17808496952056885, + "rewards/margins": 0.10021205991506577, + "rewards/rejected": -0.2782970368862152, + "step": 13480 + }, + { + "epoch": 0.88, + "learning_rate": 2.0694207549966345e-07, + "logits/chosen": -0.8017724752426147, + "logits/rejected": -0.9765009880065918, + "logps/chosen": -418.8724670410156, + "logps/rejected": -440.66070556640625, + "loss": 0.6912, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.20996403694152832, + "rewards/margins": 0.03865761682391167, + "rewards/rejected": -0.24862165749073029, + "step": 13490 + }, + { + "epoch": 0.88, + "learning_rate": 2.0467337986423864e-07, + "logits/chosen": -1.2652013301849365, + "logits/rejected": -1.0905256271362305, + "logps/chosen": -490.0810546875, + "logps/rejected": -521.456787109375, + "loss": 0.6905, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.18435756862163544, + "rewards/margins": 0.06458848714828491, + "rewards/rejected": -0.24894602596759796, + "step": 13500 + }, + { + "epoch": 0.88, + "eval_logits/chosen": -1.045881986618042, + "eval_logits/rejected": -0.9156625866889954, + "eval_logps/chosen": -419.99395751953125, + "eval_logps/rejected": -484.36981201171875, + "eval_loss": 0.6894125938415527, + "eval_rewards/accuracies": 0.6679999828338623, + "eval_rewards/chosen": -0.1879890114068985, + "eval_rewards/margins": 0.08476891368627548, + "eval_rewards/rejected": -0.2727579176425934, + "eval_runtime": 711.5302, + "eval_samples_per_second": 2.811, + "eval_steps_per_second": 1.405, + "step": 13500 + }, + { + "epoch": 0.88, + "learning_rate": 2.0241665777684272e-07, + "logits/chosen": -1.3042113780975342, + "logits/rejected": -1.0920075178146362, + "logps/chosen": -444.8846740722656, + "logps/rejected": -533.3680419921875, + "loss": 0.6872, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1765916794538498, + "rewards/margins": 0.13209852576255798, + "rewards/rejected": -0.30869022011756897, + "step": 13510 + }, + { + "epoch": 0.88, + "learning_rate": 2.0017192100964366e-07, + "logits/chosen": -0.8175595998764038, + "logits/rejected": -0.8133748769760132, + "logps/chosen": -421.3980407714844, + "logps/rejected": -504.69171142578125, + "loss": 0.691, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.22144213318824768, + "rewards/margins": 0.08117649704217911, + "rewards/rejected": -0.3026186227798462, + "step": 13520 + }, + { + "epoch": 0.89, + "learning_rate": 1.9793918127228777e-07, + "logits/chosen": -1.3139551877975464, + "logits/rejected": -0.9030052423477173, + "logps/chosen": -541.2322998046875, + "logps/rejected": -568.1669921875, + "loss": 0.6882, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.22493290901184082, + "rewards/margins": 0.08101674169301987, + "rewards/rejected": -0.3059496581554413, + "step": 13530 + }, + { + "epoch": 0.89, + "learning_rate": 1.9571845021184005e-07, + "logits/chosen": -0.777948260307312, + "logits/rejected": -0.833267331123352, + "logps/chosen": -427.2875061035156, + "logps/rejected": -516.5462646484375, + "loss": 0.6886, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.19630160927772522, + "rewards/margins": 0.08514077961444855, + "rewards/rejected": -0.2814423739910126, + "step": 13540 + }, + { + "epoch": 0.89, + "learning_rate": 1.9350973941272027e-07, + "logits/chosen": -1.1817286014556885, + "logits/rejected": -0.9291670918464661, + "logps/chosen": -404.9017639160156, + "logps/rejected": -474.9322814941406, + "loss": 0.689, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.20112669467926025, + "rewards/margins": 0.0905313715338707, + "rewards/rejected": -0.29165807366371155, + "step": 13550 + }, + { + "epoch": 0.89, + "learning_rate": 1.9131306039664676e-07, + "logits/chosen": -0.8950015306472778, + "logits/rejected": -0.7792337536811829, + "logps/chosen": -387.6864318847656, + "logps/rejected": -513.8165283203125, + "loss": 0.6877, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.18778234720230103, + "rewards/margins": 0.1017737165093422, + "rewards/rejected": -0.289556086063385, + "step": 13560 + }, + { + "epoch": 0.89, + "learning_rate": 1.8912842462257358e-07, + "logits/chosen": -0.9837632179260254, + "logits/rejected": -0.853921115398407, + "logps/chosen": -414.85821533203125, + "logps/rejected": -504.00079345703125, + "loss": 0.6874, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.19468896090984344, + "rewards/margins": 0.10612950474023819, + "rewards/rejected": -0.30081844329833984, + "step": 13570 + }, + { + "epoch": 0.89, + "learning_rate": 1.869558434866303e-07, + "logits/chosen": -1.0088794231414795, + "logits/rejected": -1.1799061298370361, + "logps/chosen": -385.1492919921875, + "logps/rejected": -505.05718994140625, + "loss": 0.6871, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.2040594518184662, + "rewards/margins": 0.10187806189060211, + "rewards/rejected": -0.3059375286102295, + "step": 13580 + }, + { + "epoch": 0.89, + "learning_rate": 1.847953283220652e-07, + "logits/chosen": -1.1091597080230713, + "logits/rejected": -0.862514317035675, + "logps/chosen": -449.0830993652344, + "logps/rejected": -513.0009765625, + "loss": 0.6855, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.19297336041927338, + "rewards/margins": 0.1425541192293167, + "rewards/rejected": -0.3355274498462677, + "step": 13590 + }, + { + "epoch": 0.89, + "learning_rate": 1.8264689039918265e-07, + "logits/chosen": -1.008798360824585, + "logits/rejected": -0.9485238790512085, + "logps/chosen": -476.99005126953125, + "logps/rejected": -521.00146484375, + "loss": 0.691, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.2171596735715866, + "rewards/margins": 0.0714375376701355, + "rewards/rejected": -0.2885972261428833, + "step": 13600 + }, + { + "epoch": 0.89, + "eval_logits/chosen": -1.0011253356933594, + "eval_logits/rejected": -0.8732011914253235, + "eval_logps/chosen": -428.5783386230469, + "eval_logps/rejected": -494.4617919921875, + "eval_loss": 0.6894011497497559, + "eval_rewards/accuracies": 0.6690000295639038, + "eval_rewards/chosen": -0.196573406457901, + "eval_rewards/margins": 0.08627651631832123, + "eval_rewards/rejected": -0.28284990787506104, + "eval_runtime": 712.7327, + "eval_samples_per_second": 2.806, + "eval_steps_per_second": 1.403, + "step": 13600 + }, + { + "epoch": 0.89, + "learning_rate": 1.8051054092528857e-07, + "logits/chosen": -1.0556079149246216, + "logits/rejected": -0.8579056859016418, + "logps/chosen": -450.79071044921875, + "logps/rejected": -553.0870971679688, + "loss": 0.6881, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.18968930840492249, + "rewards/margins": 0.11205662786960602, + "rewards/rejected": -0.3017459213733673, + "step": 13610 + }, + { + "epoch": 0.89, + "learning_rate": 1.783862910446271e-07, + "logits/chosen": -0.7849446535110474, + "logits/rejected": -0.7896897196769714, + "logps/chosen": -367.90234375, + "logps/rejected": -482.52349853515625, + "loss": 0.6883, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.19504904747009277, + "rewards/margins": 0.12393607199192047, + "rewards/rejected": -0.31898510456085205, + "step": 13620 + }, + { + "epoch": 0.89, + "learning_rate": 1.762741518383271e-07, + "logits/chosen": -1.12753427028656, + "logits/rejected": -0.8379606008529663, + "logps/chosen": -404.31524658203125, + "logps/rejected": -460.7677307128906, + "loss": 0.6883, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.18248286843299866, + "rewards/margins": 0.08411658555269241, + "rewards/rejected": -0.26659947633743286, + "step": 13630 + }, + { + "epoch": 0.89, + "learning_rate": 1.7417413432434082e-07, + "logits/chosen": -0.9677637219429016, + "logits/rejected": -0.8881511688232422, + "logps/chosen": -455.74493408203125, + "logps/rejected": -473.82879638671875, + "loss": 0.6912, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.2030853033065796, + "rewards/margins": 0.07085120677947998, + "rewards/rejected": -0.27393651008605957, + "step": 13640 + }, + { + "epoch": 0.89, + "learning_rate": 1.7208624945738855e-07, + "logits/chosen": -1.2254602909088135, + "logits/rejected": -1.1744589805603027, + "logps/chosen": -401.1329650878906, + "logps/rejected": -447.900390625, + "loss": 0.6931, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.18304863572120667, + "rewards/margins": 0.039330027997493744, + "rewards/rejected": -0.22237864136695862, + "step": 13650 + }, + { + "epoch": 0.89, + "learning_rate": 1.7001050812889995e-07, + "logits/chosen": -1.284170389175415, + "logits/rejected": -1.093874454498291, + "logps/chosen": -479.20928955078125, + "logps/rejected": -530.1829833984375, + "loss": 0.6896, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.22367647290229797, + "rewards/margins": 0.09122467041015625, + "rewards/rejected": -0.3149011731147766, + "step": 13660 + }, + { + "epoch": 0.89, + "learning_rate": 1.679469211669596e-07, + "logits/chosen": -1.0277354717254639, + "logits/rejected": -0.8371337056159973, + "logps/chosen": -439.98541259765625, + "logps/rejected": -501.0602111816406, + "loss": 0.6874, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.21504028141498566, + "rewards/margins": 0.11267922818660736, + "rewards/rejected": -0.327719509601593, + "step": 13670 + }, + { + "epoch": 0.9, + "learning_rate": 1.6589549933624715e-07, + "logits/chosen": -1.0300480127334595, + "logits/rejected": -0.852430522441864, + "logps/chosen": -399.5941162109375, + "logps/rejected": -503.5506286621094, + "loss": 0.6854, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.15938711166381836, + "rewards/margins": 0.15544193983078003, + "rewards/rejected": -0.314829021692276, + "step": 13680 + }, + { + "epoch": 0.9, + "learning_rate": 1.638562533379845e-07, + "logits/chosen": -0.9181520342826843, + "logits/rejected": -0.8067198991775513, + "logps/chosen": -435.23541259765625, + "logps/rejected": -446.798583984375, + "loss": 0.69, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.17953279614448547, + "rewards/margins": 0.07383112609386444, + "rewards/rejected": -0.2533639073371887, + "step": 13690 + }, + { + "epoch": 0.9, + "learning_rate": 1.6182919380987676e-07, + "logits/chosen": -1.0368890762329102, + "logits/rejected": -0.9775916337966919, + "logps/chosen": -417.2001037597656, + "logps/rejected": -454.9668884277344, + "loss": 0.6911, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.19148211181163788, + "rewards/margins": 0.0531439408659935, + "rewards/rejected": -0.24462607502937317, + "step": 13700 + }, + { + "epoch": 0.9, + "eval_logits/chosen": -1.0116302967071533, + "eval_logits/rejected": -0.8832849860191345, + "eval_logps/chosen": -426.22021484375, + "eval_logps/rejected": -491.4140625, + "eval_loss": 0.6893994212150574, + "eval_rewards/accuracies": 0.6700000166893005, + "eval_rewards/chosen": -0.19421526789665222, + "eval_rewards/margins": 0.08558690547943115, + "eval_rewards/rejected": -0.279802143573761, + "eval_runtime": 710.9793, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 1.407, + "step": 13700 + }, + { + "epoch": 0.9, + "learning_rate": 1.598143313260603e-07, + "logits/chosen": -0.732464075088501, + "logits/rejected": -0.7959052920341492, + "logps/chosen": -376.18701171875, + "logps/rejected": -440.91363525390625, + "loss": 0.6901, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.18813328444957733, + "rewards/margins": 0.07674112170934677, + "rewards/rejected": -0.2648743987083435, + "step": 13710 + }, + { + "epoch": 0.9, + "learning_rate": 1.5781167639704415e-07, + "logits/chosen": -1.1566427946090698, + "logits/rejected": -0.7914190888404846, + "logps/chosen": -514.2080078125, + "logps/rejected": -461.599365234375, + "loss": 0.6913, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.182041198015213, + "rewards/margins": 0.07163481414318085, + "rewards/rejected": -0.25367602705955505, + "step": 13720 + }, + { + "epoch": 0.9, + "learning_rate": 1.5582123946965787e-07, + "logits/chosen": -0.8978877067565918, + "logits/rejected": -0.6969678997993469, + "logps/chosen": -414.6393127441406, + "logps/rejected": -515.791015625, + "loss": 0.6886, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.18024428188800812, + "rewards/margins": 0.09100376069545746, + "rewards/rejected": -0.2712480425834656, + "step": 13730 + }, + { + "epoch": 0.9, + "learning_rate": 1.5384303092699504e-07, + "logits/chosen": -1.1189682483673096, + "logits/rejected": -0.7042065858840942, + "logps/chosen": -487.235595703125, + "logps/rejected": -604.0064086914062, + "loss": 0.6878, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.20025098323822021, + "rewards/margins": 0.11845190823078156, + "rewards/rejected": -0.3187028765678406, + "step": 13740 + }, + { + "epoch": 0.9, + "learning_rate": 1.518770610883613e-07, + "logits/chosen": -0.8458231687545776, + "logits/rejected": -0.7532489895820618, + "logps/chosen": -462.36895751953125, + "logps/rejected": -562.3704833984375, + "loss": 0.6888, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.2478303462266922, + "rewards/margins": 0.12941531836986542, + "rewards/rejected": -0.3772456645965576, + "step": 13750 + }, + { + "epoch": 0.9, + "learning_rate": 1.4992334020921735e-07, + "logits/chosen": -1.03566575050354, + "logits/rejected": -1.0163966417312622, + "logps/chosen": -357.2666320800781, + "logps/rejected": -445.370361328125, + "loss": 0.6879, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.184294193983078, + "rewards/margins": 0.11771754920482635, + "rewards/rejected": -0.30201178789138794, + "step": 13760 + }, + { + "epoch": 0.9, + "learning_rate": 1.4798187848112905e-07, + "logits/chosen": -1.0435506105422974, + "logits/rejected": -0.6870570182800293, + "logps/chosen": -468.039794921875, + "logps/rejected": -527.7335815429688, + "loss": 0.688, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.24506907165050507, + "rewards/margins": 0.1005513072013855, + "rewards/rejected": -0.345620334148407, + "step": 13770 + }, + { + "epoch": 0.9, + "learning_rate": 1.460526860317113e-07, + "logits/chosen": -1.1585135459899902, + "logits/rejected": -1.0323517322540283, + "logps/chosen": -376.08807373046875, + "logps/rejected": -538.8499755859375, + "loss": 0.6847, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.20280340313911438, + "rewards/margins": 0.1310150921344757, + "rewards/rejected": -0.3338184952735901, + "step": 13780 + }, + { + "epoch": 0.9, + "learning_rate": 1.441357729245771e-07, + "logits/chosen": -1.2246206998825073, + "logits/rejected": -0.9151955842971802, + "logps/chosen": -479.27117919921875, + "logps/rejected": -512.744140625, + "loss": 0.6893, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.23127934336662292, + "rewards/margins": 0.08946017920970917, + "rewards/rejected": -0.3207395672798157, + "step": 13790 + }, + { + "epoch": 0.9, + "learning_rate": 1.4223114915928482e-07, + "logits/chosen": -0.5556536316871643, + "logits/rejected": -0.6485458612442017, + "logps/chosen": -435.077392578125, + "logps/rejected": -520.010009765625, + "loss": 0.6892, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.20915481448173523, + "rewards/margins": 0.07816542685031891, + "rewards/rejected": -0.28732022643089294, + "step": 13800 + }, + { + "epoch": 0.9, + "eval_logits/chosen": -0.9911072850227356, + "eval_logits/rejected": -0.8638641834259033, + "eval_logps/chosen": -431.2166748046875, + "eval_logps/rejected": -497.59661865234375, + "eval_loss": 0.6893988251686096, + "eval_rewards/accuracies": 0.6694999933242798, + "eval_rewards/chosen": -0.19921176135540009, + "eval_rewards/margins": 0.08677300810813904, + "eval_rewards/rejected": -0.2859847843647003, + "eval_runtime": 712.8812, + "eval_samples_per_second": 2.806, + "eval_steps_per_second": 1.403, + "step": 13800 + }, + { + "epoch": 0.9, + "learning_rate": 1.403388246712842e-07, + "logits/chosen": -0.9798790812492371, + "logits/rejected": -0.8478490114212036, + "logps/chosen": -359.9521484375, + "logps/rejected": -412.8531188964844, + "loss": 0.6895, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.19570107758045197, + "rewards/margins": 0.0600527822971344, + "rewards/rejected": -0.25575387477874756, + "step": 13810 + }, + { + "epoch": 0.9, + "learning_rate": 1.3845880933186757e-07, + "logits/chosen": -1.1259896755218506, + "logits/rejected": -1.0273348093032837, + "logps/chosen": -453.7496643066406, + "logps/rejected": -459.85321044921875, + "loss": 0.6919, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.2150108814239502, + "rewards/margins": 0.040339548140764236, + "rewards/rejected": -0.2553504407405853, + "step": 13820 + }, + { + "epoch": 0.9, + "learning_rate": 1.3659111294811457e-07, + "logits/chosen": -0.9863991737365723, + "logits/rejected": -0.9783857464790344, + "logps/chosen": -417.46417236328125, + "logps/rejected": -466.4806213378906, + "loss": 0.6905, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.22562098503112793, + "rewards/margins": 0.06691263616085052, + "rewards/rejected": -0.29253360629081726, + "step": 13830 + }, + { + "epoch": 0.91, + "learning_rate": 1.347357452628459e-07, + "logits/chosen": -1.432991623878479, + "logits/rejected": -1.2990912199020386, + "logps/chosen": -434.8438415527344, + "logps/rejected": -499.17071533203125, + "loss": 0.6908, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.18954630196094513, + "rewards/margins": 0.07182861864566803, + "rewards/rejected": -0.26137489080429077, + "step": 13840 + }, + { + "epoch": 0.91, + "learning_rate": 1.3289271595456732e-07, + "logits/chosen": -1.0181031227111816, + "logits/rejected": -0.6720232963562012, + "logps/chosen": -441.0040588378906, + "logps/rejected": -518.5680541992188, + "loss": 0.6878, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.23667487502098083, + "rewards/margins": 0.1033577173948288, + "rewards/rejected": -0.34003257751464844, + "step": 13850 + }, + { + "epoch": 0.91, + "learning_rate": 1.310620346374228e-07, + "logits/chosen": -0.9424416422843933, + "logits/rejected": -0.7790125608444214, + "logps/chosen": -452.8560485839844, + "logps/rejected": -542.755126953125, + "loss": 0.6865, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.22063413262367249, + "rewards/margins": 0.1267022341489792, + "rewards/rejected": -0.3473363518714905, + "step": 13860 + }, + { + "epoch": 0.91, + "learning_rate": 1.2924371086114274e-07, + "logits/chosen": -1.1187223196029663, + "logits/rejected": -0.7112355828285217, + "logps/chosen": -440.7926330566406, + "logps/rejected": -511.54248046875, + "loss": 0.6901, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.20473113656044006, + "rewards/margins": 0.07677672803401947, + "rewards/rejected": -0.2815078794956207, + "step": 13870 + }, + { + "epoch": 0.91, + "learning_rate": 1.274377541109953e-07, + "logits/chosen": -0.8828527331352234, + "logits/rejected": -0.9465206265449524, + "logps/chosen": -361.6195983886719, + "logps/rejected": -506.4344787597656, + "loss": 0.6886, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.19726905226707458, + "rewards/margins": 0.06689117848873138, + "rewards/rejected": -0.2641602158546448, + "step": 13880 + }, + { + "epoch": 0.91, + "learning_rate": 1.2564417380773435e-07, + "logits/chosen": -0.7460101842880249, + "logits/rejected": -0.46463337540626526, + "logps/chosen": -390.36688232421875, + "logps/rejected": -515.1717529296875, + "loss": 0.6888, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.21186986565589905, + "rewards/margins": 0.10232283920049667, + "rewards/rejected": -0.3141927123069763, + "step": 13890 + }, + { + "epoch": 0.91, + "learning_rate": 1.2386297930755436e-07, + "logits/chosen": -1.199453592300415, + "logits/rejected": -1.083516001701355, + "logps/chosen": -502.26470947265625, + "logps/rejected": -592.527099609375, + "loss": 0.6905, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.25437894463539124, + "rewards/margins": 0.09508887678384781, + "rewards/rejected": -0.34946778416633606, + "step": 13900 + }, + { + "epoch": 0.91, + "eval_logits/chosen": -0.9932417273521423, + "eval_logits/rejected": -0.8657166957855225, + "eval_logps/chosen": -430.56683349609375, + "eval_logps/rejected": -497.0989990234375, + "eval_loss": 0.6894011497497559, + "eval_rewards/accuracies": 0.6704999804496765, + "eval_rewards/chosen": -0.1985618770122528, + "eval_rewards/margins": 0.08692525327205658, + "eval_rewards/rejected": -0.2854871153831482, + "eval_runtime": 711.9181, + "eval_samples_per_second": 2.809, + "eval_steps_per_second": 1.405, + "step": 13900 + }, + { + "epoch": 0.91, + "learning_rate": 1.220941799020378e-07, + "logits/chosen": -0.9004515409469604, + "logits/rejected": -0.7649658918380737, + "logps/chosen": -409.7417907714844, + "logps/rejected": -484.17108154296875, + "loss": 0.6902, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.1906607449054718, + "rewards/margins": 0.09678211808204651, + "rewards/rejected": -0.2874428629875183, + "step": 13910 + }, + { + "epoch": 0.91, + "learning_rate": 1.2033778481810975e-07, + "logits/chosen": -1.0336154699325562, + "logits/rejected": -1.0059373378753662, + "logps/chosen": -399.5526428222656, + "logps/rejected": -472.43011474609375, + "loss": 0.6867, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.17955917119979858, + "rewards/margins": 0.11022365093231201, + "rewards/rejected": -0.2897828221321106, + "step": 13920 + }, + { + "epoch": 0.91, + "learning_rate": 1.1859380321798591e-07, + "logits/chosen": -1.0649926662445068, + "logits/rejected": -1.3081092834472656, + "logps/chosen": -397.7228698730469, + "logps/rejected": -483.80780029296875, + "loss": 0.6883, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.1951616108417511, + "rewards/margins": 0.07134003937244415, + "rewards/rejected": -0.26650166511535645, + "step": 13930 + }, + { + "epoch": 0.91, + "learning_rate": 1.1686224419912989e-07, + "logits/chosen": -0.9648985862731934, + "logits/rejected": -0.7549543380737305, + "logps/chosen": -487.0858459472656, + "logps/rejected": -567.8107299804688, + "loss": 0.6875, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.2342132031917572, + "rewards/margins": 0.11001571267843246, + "rewards/rejected": -0.34422892332077026, + "step": 13940 + }, + { + "epoch": 0.91, + "learning_rate": 1.1514311679420104e-07, + "logits/chosen": -0.598174512386322, + "logits/rejected": -0.6542484164237976, + "logps/chosen": -364.93170166015625, + "logps/rejected": -530.19189453125, + "loss": 0.6867, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.2097831517457962, + "rewards/margins": 0.10944052785634995, + "rewards/rejected": -0.31922370195388794, + "step": 13950 + }, + { + "epoch": 0.91, + "learning_rate": 1.1343642997101029e-07, + "logits/chosen": -1.0652467012405396, + "logits/rejected": -0.8850333094596863, + "logps/chosen": -407.06231689453125, + "logps/rejected": -484.21783447265625, + "loss": 0.6911, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.2047090083360672, + "rewards/margins": 0.0903773307800293, + "rewards/rejected": -0.2950863242149353, + "step": 13960 + }, + { + "epoch": 0.91, + "learning_rate": 1.1174219263247188e-07, + "logits/chosen": -0.5526877641677856, + "logits/rejected": -0.5558839440345764, + "logps/chosen": -417.8456115722656, + "logps/rejected": -504.50421142578125, + "loss": 0.6887, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.21902808547019958, + "rewards/margins": 0.10418976843357086, + "rewards/rejected": -0.32321786880493164, + "step": 13970 + }, + { + "epoch": 0.91, + "learning_rate": 1.1006041361655839e-07, + "logits/chosen": -1.1823909282684326, + "logits/rejected": -0.7704101204872131, + "logps/chosen": -402.4588317871094, + "logps/rejected": -427.7669372558594, + "loss": 0.6895, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.194021537899971, + "rewards/margins": 0.06524568051099777, + "rewards/rejected": -0.2592672109603882, + "step": 13980 + }, + { + "epoch": 0.92, + "learning_rate": 1.0839110169625189e-07, + "logits/chosen": -0.8379222750663757, + "logits/rejected": -0.9152050018310547, + "logps/chosen": -440.62347412109375, + "logps/rejected": -559.3052368164062, + "loss": 0.6869, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.23378117382526398, + "rewards/margins": 0.1260370910167694, + "rewards/rejected": -0.3598182797431946, + "step": 13990 + }, + { + "epoch": 0.92, + "learning_rate": 1.06734265579502e-07, + "logits/chosen": -1.0023798942565918, + "logits/rejected": -0.6805317997932434, + "logps/chosen": -481.406494140625, + "logps/rejected": -506.0174865722656, + "loss": 0.6884, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.2198897898197174, + "rewards/margins": 0.09111623466014862, + "rewards/rejected": -0.3110060393810272, + "step": 14000 + }, + { + "epoch": 0.92, + "eval_logits/chosen": -0.9784961938858032, + "eval_logits/rejected": -0.851710319519043, + "eval_logps/chosen": -433.59979248046875, + "eval_logps/rejected": -500.4915771484375, + "eval_loss": 0.689401388168335, + "eval_rewards/accuracies": 0.6669999957084656, + "eval_rewards/chosen": -0.20159488916397095, + "eval_rewards/margins": 0.08728481084108353, + "eval_rewards/rejected": -0.2888796925544739, + "eval_runtime": 712.0057, + "eval_samples_per_second": 2.809, + "eval_steps_per_second": 1.404, + "step": 14000 + }, + { + "epoch": 0.92, + "learning_rate": 1.050899139091771e-07, + "logits/chosen": -1.23249351978302, + "logits/rejected": -0.8209800720214844, + "logps/chosen": -479.36785888671875, + "logps/rejected": -524.4945068359375, + "loss": 0.6895, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.20074041187763214, + "rewards/margins": 0.09164474904537201, + "rewards/rejected": -0.29238516092300415, + "step": 14010 + }, + { + "epoch": 0.92, + "learning_rate": 1.0345805526302072e-07, + "logits/chosen": -1.1188102960586548, + "logits/rejected": -0.9375091791152954, + "logps/chosen": -407.9237365722656, + "logps/rejected": -487.79052734375, + "loss": 0.69, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.2067614048719406, + "rewards/margins": 0.0871044397354126, + "rewards/rejected": -0.2938658595085144, + "step": 14020 + }, + { + "epoch": 0.92, + "learning_rate": 1.0183869815360764e-07, + "logits/chosen": -1.065840244293213, + "logits/rejected": -1.1328171491622925, + "logps/chosen": -385.97149658203125, + "logps/rejected": -474.182861328125, + "loss": 0.6912, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.19405806064605713, + "rewards/margins": 0.05557119846343994, + "rewards/rejected": -0.24962928891181946, + "step": 14030 + }, + { + "epoch": 0.92, + "learning_rate": 1.0023185102829763e-07, + "logits/chosen": -0.771183967590332, + "logits/rejected": -0.8472586870193481, + "logps/chosen": -448.053955078125, + "logps/rejected": -546.9066162109375, + "loss": 0.6899, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.21723172068595886, + "rewards/margins": 0.09466800838708878, + "rewards/rejected": -0.31189972162246704, + "step": 14040 + }, + { + "epoch": 0.92, + "learning_rate": 9.863752226919182e-08, + "logits/chosen": -0.8391677737236023, + "logits/rejected": -0.6935745477676392, + "logps/chosen": -425.4812927246094, + "logps/rejected": -488.382568359375, + "loss": 0.6863, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.18774910271167755, + "rewards/margins": 0.12847992777824402, + "rewards/rejected": -0.31622904539108276, + "step": 14050 + }, + { + "epoch": 0.92, + "learning_rate": 9.705572019309107e-08, + "logits/chosen": -0.9686983823776245, + "logits/rejected": -0.8074959516525269, + "logps/chosen": -480.96795654296875, + "logps/rejected": -553.8460693359375, + "loss": 0.6877, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.2129405289888382, + "rewards/margins": 0.1043621078133583, + "rewards/rejected": -0.3173026442527771, + "step": 14060 + }, + { + "epoch": 0.92, + "learning_rate": 9.548645305144849e-08, + "logits/chosen": -1.1921392679214478, + "logits/rejected": -0.9313928484916687, + "logps/chosen": -352.3310546875, + "logps/rejected": -439.64532470703125, + "loss": 0.6876, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.1788359135389328, + "rewards/margins": 0.08334054052829742, + "rewards/rejected": -0.2621764540672302, + "step": 14070 + }, + { + "epoch": 0.92, + "learning_rate": 9.392972903033149e-08, + "logits/chosen": -0.7557094097137451, + "logits/rejected": -1.0380260944366455, + "logps/chosen": -400.652587890625, + "logps/rejected": -442.6686096191406, + "loss": 0.692, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.17752131819725037, + "rewards/margins": 0.04955955222249031, + "rewards/rejected": -0.22708086669445038, + "step": 14080 + }, + { + "epoch": 0.92, + "learning_rate": 9.238555625037449e-08, + "logits/chosen": -0.8334128260612488, + "logits/rejected": -0.7627261281013489, + "logps/chosen": -386.14227294921875, + "logps/rejected": -404.98895263671875, + "loss": 0.6913, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.19543412327766418, + "rewards/margins": 0.048528458923101425, + "rewards/rejected": -0.24396257102489471, + "step": 14090 + }, + { + "epoch": 0.92, + "learning_rate": 9.085394276673903e-08, + "logits/chosen": -0.9746842384338379, + "logits/rejected": -0.9705870747566223, + "logps/chosen": -472.040771484375, + "logps/rejected": -545.2486572265625, + "loss": 0.6892, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.2038746178150177, + "rewards/margins": 0.08765744417905807, + "rewards/rejected": -0.291532039642334, + "step": 14100 + }, + { + "epoch": 0.92, + "eval_logits/chosen": -0.9989323616027832, + "eval_logits/rejected": -0.8711248636245728, + "eval_logps/chosen": -429.91204833984375, + "eval_logps/rejected": -496.2607116699219, + "eval_loss": 0.6894006133079529, + "eval_rewards/accuracies": 0.6694999933242798, + "eval_rewards/chosen": -0.19790711998939514, + "eval_rewards/margins": 0.08674175292253494, + "eval_rewards/rejected": -0.2846488356590271, + "eval_runtime": 711.9768, + "eval_samples_per_second": 2.809, + "eval_steps_per_second": 1.405, + "step": 14100 + }, + { + "epoch": 0.92, + "learning_rate": 8.933489656907157e-08, + "logits/chosen": -0.8989300727844238, + "logits/rejected": -0.9867172241210938, + "logps/chosen": -416.06744384765625, + "logps/rejected": -497.34051513671875, + "loss": 0.6907, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.19873200356960297, + "rewards/margins": 0.0563327893614769, + "rewards/rejected": -0.25506478548049927, + "step": 14110 + }, + { + "epoch": 0.92, + "learning_rate": 8.782842558146127e-08, + "logits/chosen": -0.8634630441665649, + "logits/rejected": -0.7865079641342163, + "logps/chosen": -339.5974426269531, + "logps/rejected": -452.58404541015625, + "loss": 0.6879, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.177713543176651, + "rewards/margins": 0.11469310522079468, + "rewards/rejected": -0.2924066185951233, + "step": 14120 + }, + { + "epoch": 0.92, + "learning_rate": 8.633453766239836e-08, + "logits/chosen": -1.139054536819458, + "logits/rejected": -1.0553338527679443, + "logps/chosen": -413.77044677734375, + "logps/rejected": -440.55194091796875, + "loss": 0.6915, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.17864930629730225, + "rewards/margins": 0.05863678455352783, + "rewards/rejected": -0.23728612065315247, + "step": 14130 + }, + { + "epoch": 0.93, + "learning_rate": 8.485324060473448e-08, + "logits/chosen": -1.0663942098617554, + "logits/rejected": -0.8815576434135437, + "logps/chosen": -439.49884033203125, + "logps/rejected": -488.25054931640625, + "loss": 0.6903, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.19921675324440002, + "rewards/margins": 0.06114257499575615, + "rewards/rejected": -0.26035934686660767, + "step": 14140 + }, + { + "epoch": 0.93, + "learning_rate": 8.338454213564052e-08, + "logits/chosen": -1.031376600265503, + "logits/rejected": -0.7835978269577026, + "logps/chosen": -444.68743896484375, + "logps/rejected": -538.5584106445312, + "loss": 0.689, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.21379053592681885, + "rewards/margins": 0.1110650897026062, + "rewards/rejected": -0.32485562562942505, + "step": 14150 + }, + { + "epoch": 0.93, + "learning_rate": 8.192844991656679e-08, + "logits/chosen": -0.9138960838317871, + "logits/rejected": -0.6291934847831726, + "logps/chosen": -473.10870361328125, + "logps/rejected": -506.9134826660156, + "loss": 0.6891, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.23439207673072815, + "rewards/margins": 0.07021282613277435, + "rewards/rejected": -0.3046048879623413, + "step": 14160 + }, + { + "epoch": 0.93, + "learning_rate": 8.048497154320434e-08, + "logits/chosen": -0.947569727897644, + "logits/rejected": -1.0524301528930664, + "logps/chosen": -345.71954345703125, + "logps/rejected": -428.63604736328125, + "loss": 0.6894, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.2161700427532196, + "rewards/margins": 0.07381902635097504, + "rewards/rejected": -0.28998905420303345, + "step": 14170 + }, + { + "epoch": 0.93, + "learning_rate": 7.905411454544265e-08, + "logits/chosen": -1.0077670812606812, + "logits/rejected": -0.9876540303230286, + "logps/chosen": -435.50872802734375, + "logps/rejected": -503.3299865722656, + "loss": 0.6905, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.19900453090667725, + "rewards/margins": 0.06307898461818695, + "rewards/rejected": -0.262083500623703, + "step": 14180 + }, + { + "epoch": 0.93, + "learning_rate": 7.763588638733332e-08, + "logits/chosen": -0.9834734201431274, + "logits/rejected": -1.0122195482254028, + "logps/chosen": -455.09893798828125, + "logps/rejected": -530.7279052734375, + "loss": 0.6877, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.19267883896827698, + "rewards/margins": 0.09545306861400604, + "rewards/rejected": -0.2881319224834442, + "step": 14190 + }, + { + "epoch": 0.93, + "learning_rate": 7.623029446704899e-08, + "logits/chosen": -1.1731340885162354, + "logits/rejected": -1.228776454925537, + "logps/chosen": -514.779541015625, + "logps/rejected": -593.9116821289062, + "loss": 0.689, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.20333810150623322, + "rewards/margins": 0.11602671444416046, + "rewards/rejected": -0.31936484575271606, + "step": 14200 + }, + { + "epoch": 0.93, + "eval_logits/chosen": -0.9908974766731262, + "eval_logits/rejected": -0.8632987141609192, + "eval_logps/chosen": -431.88525390625, + "eval_logps/rejected": -498.7848815917969, + "eval_loss": 0.6893996596336365, + "eval_rewards/accuracies": 0.6694999933242798, + "eval_rewards/chosen": -0.19988025724887848, + "eval_rewards/margins": 0.08729271590709686, + "eval_rewards/rejected": -0.28717297315597534, + "eval_runtime": 713.4147, + "eval_samples_per_second": 2.803, + "eval_steps_per_second": 1.402, + "step": 14200 + }, + { + "epoch": 0.93, + "learning_rate": 7.483734611684557e-08, + "logits/chosen": -0.7480652928352356, + "logits/rejected": -0.5744966268539429, + "logps/chosen": -455.3934020996094, + "logps/rejected": -489.6495666503906, + "loss": 0.6908, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.19850760698318481, + "rewards/margins": 0.08841712027788162, + "rewards/rejected": -0.2869247496128082, + "step": 14210 + }, + { + "epoch": 0.93, + "learning_rate": 7.345704860302366e-08, + "logits/chosen": -1.3832318782806396, + "logits/rejected": -0.9964305758476257, + "logps/chosen": -455.84136962890625, + "logps/rejected": -557.6566162109375, + "loss": 0.6895, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.20758870244026184, + "rewards/margins": 0.10249364376068115, + "rewards/rejected": -0.3100823760032654, + "step": 14220 + }, + { + "epoch": 0.93, + "learning_rate": 7.208940912589224e-08, + "logits/chosen": -0.9903473854064941, + "logits/rejected": -0.7882139086723328, + "logps/chosen": -443.6026916503906, + "logps/rejected": -530.8634643554688, + "loss": 0.6858, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.23401638865470886, + "rewards/margins": 0.12463922798633575, + "rewards/rejected": -0.3586556017398834, + "step": 14230 + }, + { + "epoch": 0.93, + "learning_rate": 7.073443481972753e-08, + "logits/chosen": -0.9210270047187805, + "logits/rejected": -0.8315000534057617, + "logps/chosen": -408.10357666015625, + "logps/rejected": -515.8826293945312, + "loss": 0.6879, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.22394785284996033, + "rewards/margins": 0.08827735483646393, + "rewards/rejected": -0.31222519278526306, + "step": 14240 + }, + { + "epoch": 0.93, + "learning_rate": 6.939213275274027e-08, + "logits/chosen": -1.0639857053756714, + "logits/rejected": -1.0767017602920532, + "logps/chosen": -434.39300537109375, + "logps/rejected": -488.84893798828125, + "loss": 0.6904, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.19513921439647675, + "rewards/margins": 0.0740586444735527, + "rewards/rejected": -0.26919785141944885, + "step": 14250 + }, + { + "epoch": 0.93, + "learning_rate": 6.806250992703461e-08, + "logits/chosen": -0.936127781867981, + "logits/rejected": -0.8528211712837219, + "logps/chosen": -408.73883056640625, + "logps/rejected": -458.3291931152344, + "loss": 0.6905, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.1920442134141922, + "rewards/margins": 0.0758005753159523, + "rewards/rejected": -0.2678447961807251, + "step": 14260 + }, + { + "epoch": 0.93, + "learning_rate": 6.674557327857572e-08, + "logits/chosen": -1.1486737728118896, + "logits/rejected": -1.0023722648620605, + "logps/chosen": -447.0084533691406, + "logps/rejected": -563.09375, + "loss": 0.6867, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.19273777306079865, + "rewards/margins": 0.12517333030700684, + "rewards/rejected": -0.3179110884666443, + "step": 14270 + }, + { + "epoch": 0.93, + "learning_rate": 6.544132967714917e-08, + "logits/chosen": -0.7697897553443909, + "logits/rejected": -0.6848057508468628, + "logps/chosen": -498.2203063964844, + "logps/rejected": -595.4434204101562, + "loss": 0.6877, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.25136035680770874, + "rewards/margins": 0.11093559116125107, + "rewards/rejected": -0.3622959554195404, + "step": 14280 + }, + { + "epoch": 0.93, + "learning_rate": 6.414978592632932e-08, + "logits/chosen": -0.9375301599502563, + "logits/rejected": -0.8963106870651245, + "logps/chosen": -468.3148498535156, + "logps/rejected": -512.3311767578125, + "loss": 0.69, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.20721551775932312, + "rewards/margins": 0.09047175943851471, + "rewards/rejected": -0.297687292098999, + "step": 14290 + }, + { + "epoch": 0.94, + "learning_rate": 6.287094876344046e-08, + "logits/chosen": -1.2189066410064697, + "logits/rejected": -1.0829071998596191, + "logps/chosen": -325.34210205078125, + "logps/rejected": -410.9091796875, + "loss": 0.6911, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.15292184054851532, + "rewards/margins": 0.07892803847789764, + "rewards/rejected": -0.23184990882873535, + "step": 14300 + }, + { + "epoch": 0.94, + "eval_logits/chosen": -0.998076856136322, + "eval_logits/rejected": -0.8702616095542908, + "eval_logps/chosen": -430.6116638183594, + "eval_logps/rejected": -496.98193359375, + "eval_loss": 0.689400315284729, + "eval_rewards/accuracies": 0.6679999828338623, + "eval_rewards/chosen": -0.1986067146062851, + "eval_rewards/margins": 0.08676330000162125, + "eval_rewards/rejected": -0.2853700518608093, + "eval_runtime": 711.6289, + "eval_samples_per_second": 2.81, + "eval_steps_per_second": 1.405, + "step": 14300 + }, + { + "epoch": 0.94, + "learning_rate": 6.160482485952413e-08, + "logits/chosen": -1.1886036396026611, + "logits/rejected": -1.1761853694915771, + "logps/chosen": -464.11669921875, + "logps/rejected": -506.704345703125, + "loss": 0.6902, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.2276669293642044, + "rewards/margins": 0.07438355684280396, + "rewards/rejected": -0.30205050110816956, + "step": 14310 + }, + { + "epoch": 0.94, + "learning_rate": 6.035142081930234e-08, + "logits/chosen": -0.9976833462715149, + "logits/rejected": -0.8096323013305664, + "logps/chosen": -493.4520568847656, + "logps/rejected": -496.55987548828125, + "loss": 0.6905, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.2306557446718216, + "rewards/margins": 0.08012469112873077, + "rewards/rejected": -0.31078043580055237, + "step": 14320 + }, + { + "epoch": 0.94, + "learning_rate": 5.911074318114496e-08, + "logits/chosen": -0.91179358959198, + "logits/rejected": -0.7653204202651978, + "logps/chosen": -409.9183654785156, + "logps/rejected": -543.5857543945312, + "loss": 0.69, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.20633026957511902, + "rewards/margins": 0.09154252707958221, + "rewards/rejected": -0.2978728115558624, + "step": 14330 + }, + { + "epoch": 0.94, + "learning_rate": 5.788279841703381e-08, + "logits/chosen": -1.199209451675415, + "logits/rejected": -0.9316481351852417, + "logps/chosen": -374.29852294921875, + "logps/rejected": -461.6366271972656, + "loss": 0.6889, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.19038894772529602, + "rewards/margins": 0.09500516206026077, + "rewards/rejected": -0.2853941321372986, + "step": 14340 + }, + { + "epoch": 0.94, + "learning_rate": 5.66675929325311e-08, + "logits/chosen": -1.1369472742080688, + "logits/rejected": -0.9022246599197388, + "logps/chosen": -429.6748962402344, + "logps/rejected": -462.70355224609375, + "loss": 0.6914, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.20901048183441162, + "rewards/margins": 0.04437742009758949, + "rewards/rejected": -0.2533878982067108, + "step": 14350 + }, + { + "epoch": 0.94, + "learning_rate": 5.546513306674301e-08, + "logits/chosen": -0.8335935473442078, + "logits/rejected": -0.7389085292816162, + "logps/chosen": -486.3041076660156, + "logps/rejected": -500.9739685058594, + "loss": 0.6885, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.207787424325943, + "rewards/margins": 0.08975278586149216, + "rewards/rejected": -0.29754018783569336, + "step": 14360 + }, + { + "epoch": 0.94, + "learning_rate": 5.4275425092290004e-08, + "logits/chosen": -1.5143823623657227, + "logits/rejected": -1.290815830230713, + "logps/chosen": -443.744384765625, + "logps/rejected": -505.951416015625, + "loss": 0.6909, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.18894822895526886, + "rewards/margins": 0.08064968138933182, + "rewards/rejected": -0.2695979177951813, + "step": 14370 + }, + { + "epoch": 0.94, + "learning_rate": 5.309847521527078e-08, + "logits/chosen": -0.7275829315185547, + "logits/rejected": -0.6369680166244507, + "logps/chosen": -491.9794006347656, + "logps/rejected": -519.0418701171875, + "loss": 0.689, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.20740604400634766, + "rewards/margins": 0.06999073177576065, + "rewards/rejected": -0.2773967981338501, + "step": 14380 + }, + { + "epoch": 0.94, + "learning_rate": 5.1934289575233385e-08, + "logits/chosen": -0.7574592232704163, + "logits/rejected": -0.5376136302947998, + "logps/chosen": -447.2177734375, + "logps/rejected": -517.1713256835938, + "loss": 0.6887, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.2076883316040039, + "rewards/margins": 0.10432090610265732, + "rewards/rejected": -0.3120092451572418, + "step": 14390 + }, + { + "epoch": 0.94, + "learning_rate": 5.078287424513994e-08, + "logits/chosen": -1.222109079360962, + "logits/rejected": -1.0470283031463623, + "logps/chosen": -476.23614501953125, + "logps/rejected": -512.8048095703125, + "loss": 0.6898, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.20618847012519836, + "rewards/margins": 0.11139892041683197, + "rewards/rejected": -0.31758740544319153, + "step": 14400 + }, + { + "epoch": 0.94, + "eval_logits/chosen": -0.9976576566696167, + "eval_logits/rejected": -0.8699551820755005, + "eval_logps/chosen": -430.67169189453125, + "eval_logps/rejected": -497.1328430175781, + "eval_loss": 0.6893981695175171, + "eval_rewards/accuracies": 0.6675000190734863, + "eval_rewards/chosen": -0.1986667662858963, + "eval_rewards/margins": 0.08685415238142014, + "eval_rewards/rejected": -0.2855209410190582, + "eval_runtime": 711.3754, + "eval_samples_per_second": 2.811, + "eval_steps_per_second": 1.406, + "step": 14400 + }, + { + "epoch": 0.94, + "learning_rate": 4.964423523133671e-08, + "logits/chosen": -1.3410192728042603, + "logits/rejected": -0.8826667070388794, + "logps/chosen": -393.24261474609375, + "logps/rejected": -424.50665283203125, + "loss": 0.6917, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.1755111664533615, + "rewards/margins": 0.06561318039894104, + "rewards/rejected": -0.24112434685230255, + "step": 14410 + }, + { + "epoch": 0.94, + "learning_rate": 4.8518378473522976e-08, + "logits/chosen": -1.096555471420288, + "logits/rejected": -0.836302638053894, + "logps/chosen": -454.3536682128906, + "logps/rejected": -537.505126953125, + "loss": 0.6867, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.20384380221366882, + "rewards/margins": 0.08987867832183838, + "rewards/rejected": -0.2937224507331848, + "step": 14420 + }, + { + "epoch": 0.94, + "learning_rate": 4.7405309844718584e-08, + "logits/chosen": -1.0802674293518066, + "logits/rejected": -0.9114401936531067, + "logps/chosen": -414.07745361328125, + "logps/rejected": -547.9803466796875, + "loss": 0.6862, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.22157840430736542, + "rewards/margins": 0.12470661103725433, + "rewards/rejected": -0.34628504514694214, + "step": 14430 + }, + { + "epoch": 0.94, + "learning_rate": 4.630503515123508e-08, + "logits/chosen": -1.1976383924484253, + "logits/rejected": -0.8946923017501831, + "logps/chosen": -411.8882751464844, + "logps/rejected": -443.54473876953125, + "loss": 0.6885, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.2110222578048706, + "rewards/margins": 0.08152283728122711, + "rewards/rejected": -0.2925451099872589, + "step": 14440 + }, + { + "epoch": 0.95, + "learning_rate": 4.5217560132644056e-08, + "logits/chosen": -0.8913286924362183, + "logits/rejected": -0.7216338515281677, + "logps/chosen": -345.64935302734375, + "logps/rejected": -428.1698303222656, + "loss": 0.6902, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.19502976536750793, + "rewards/margins": 0.06411959230899811, + "rewards/rejected": -0.25914937257766724, + "step": 14450 + }, + { + "epoch": 0.95, + "learning_rate": 4.41428904617483e-08, + "logits/chosen": -0.997122585773468, + "logits/rejected": -1.084720492362976, + "logps/chosen": -370.079345703125, + "logps/rejected": -443.838623046875, + "loss": 0.6913, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.1929706335067749, + "rewards/margins": 0.0690988153219223, + "rewards/rejected": -0.262069433927536, + "step": 14460 + }, + { + "epoch": 0.95, + "learning_rate": 4.3081031744550696e-08, + "logits/chosen": -1.1552343368530273, + "logits/rejected": -1.1827692985534668, + "logps/chosen": -410.41705322265625, + "logps/rejected": -485.11676025390625, + "loss": 0.6892, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.15727174282073975, + "rewards/margins": 0.09702739119529724, + "rewards/rejected": -0.2542991042137146, + "step": 14470 + }, + { + "epoch": 0.95, + "learning_rate": 4.2031989520227025e-08, + "logits/chosen": -0.9480659365653992, + "logits/rejected": -0.8651493191719055, + "logps/chosen": -449.3002014160156, + "logps/rejected": -501.34088134765625, + "loss": 0.6905, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.2209857702255249, + "rewards/margins": 0.07392819970846176, + "rewards/rejected": -0.29491397738456726, + "step": 14480 + }, + { + "epoch": 0.95, + "learning_rate": 4.099576926109461e-08, + "logits/chosen": -1.2740567922592163, + "logits/rejected": -0.925223171710968, + "logps/chosen": -428.7998962402344, + "logps/rejected": -424.1492614746094, + "loss": 0.6902, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.18901804089546204, + "rewards/margins": 0.07450384646654129, + "rewards/rejected": -0.2635219097137451, + "step": 14490 + }, + { + "epoch": 0.95, + "learning_rate": 3.997237637258705e-08, + "logits/chosen": -1.1149488687515259, + "logits/rejected": -0.8887525796890259, + "logps/chosen": -495.614013671875, + "logps/rejected": -540.0551147460938, + "loss": 0.6909, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.178360715508461, + "rewards/margins": 0.08778423070907593, + "rewards/rejected": -0.2661449611186981, + "step": 14500 + }, + { + "epoch": 0.95, + "eval_logits/chosen": -0.9958268404006958, + "eval_logits/rejected": -0.8681316375732422, + "eval_logps/chosen": -431.3943786621094, + "eval_logps/rejected": -498.07061767578125, + "eval_loss": 0.6893977522850037, + "eval_rewards/accuracies": 0.6694999933242798, + "eval_rewards/chosen": -0.19938941299915314, + "eval_rewards/margins": 0.08706925064325333, + "eval_rewards/rejected": -0.28645867109298706, + "eval_runtime": 709.4627, + "eval_samples_per_second": 2.819, + "eval_steps_per_second": 1.41, + "step": 14500 + }, + { + "epoch": 0.95, + "learning_rate": 3.8961816193222035e-08, + "logits/chosen": -1.0719501972198486, + "logits/rejected": -0.824332058429718, + "logps/chosen": -477.8409118652344, + "logps/rejected": -475.36474609375, + "loss": 0.6912, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.2419906109571457, + "rewards/margins": 0.05485706776380539, + "rewards/rejected": -0.29684773087501526, + "step": 14510 + }, + { + "epoch": 0.95, + "learning_rate": 3.79640939945769e-08, + "logits/chosen": -1.032693862915039, + "logits/rejected": -0.8485744595527649, + "logps/chosen": -428.54351806640625, + "logps/rejected": -388.8865661621094, + "loss": 0.6919, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.1423225998878479, + "rewards/margins": 0.04660715162754059, + "rewards/rejected": -0.18892976641654968, + "step": 14520 + }, + { + "epoch": 0.95, + "learning_rate": 3.697921498125895e-08, + "logits/chosen": -0.861232578754425, + "logits/rejected": -1.0105615854263306, + "logps/chosen": -435.960205078125, + "logps/rejected": -542.7559204101562, + "loss": 0.6892, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.22600717842578888, + "rewards/margins": 0.10091586410999298, + "rewards/rejected": -0.32692304253578186, + "step": 14530 + }, + { + "epoch": 0.95, + "learning_rate": 3.6007184290880456e-08, + "logits/chosen": -1.1425590515136719, + "logits/rejected": -1.0225722789764404, + "logps/chosen": -445.26361083984375, + "logps/rejected": -494.39453125, + "loss": 0.6908, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.23329059779644012, + "rewards/margins": 0.06579459458589554, + "rewards/rejected": -0.29908519983291626, + "step": 14540 + }, + { + "epoch": 0.95, + "learning_rate": 3.504800699402872e-08, + "logits/chosen": -1.2578893899917603, + "logits/rejected": -1.0587711334228516, + "logps/chosen": -548.8036499023438, + "logps/rejected": -515.9398193359375, + "loss": 0.6917, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.2031494677066803, + "rewards/margins": 0.04050298407673836, + "rewards/rejected": -0.24365243315696716, + "step": 14550 + }, + { + "epoch": 0.95, + "learning_rate": 3.4101688094242967e-08, + "logits/chosen": -1.0031616687774658, + "logits/rejected": -0.8348426818847656, + "logps/chosen": -522.0515747070312, + "logps/rejected": -614.2586669921875, + "loss": 0.689, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.2524906098842621, + "rewards/margins": 0.11738330125808716, + "rewards/rejected": -0.36987388134002686, + "step": 14560 + }, + { + "epoch": 0.95, + "learning_rate": 3.3168232527985564e-08, + "logits/chosen": -0.6329993009567261, + "logits/rejected": -0.6292056441307068, + "logps/chosen": -437.3837890625, + "logps/rejected": -472.7875061035156, + "loss": 0.6903, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1957651823759079, + "rewards/margins": 0.09015369415283203, + "rewards/rejected": -0.2859188914299011, + "step": 14570 + }, + { + "epoch": 0.95, + "learning_rate": 3.224764516461892e-08, + "logits/chosen": -1.0613816976547241, + "logits/rejected": -0.7705498337745667, + "logps/chosen": -434.5621032714844, + "logps/rejected": -519.8670043945312, + "loss": 0.6876, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.1795632541179657, + "rewards/margins": 0.11605356633663177, + "rewards/rejected": -0.2956168055534363, + "step": 14580 + }, + { + "epoch": 0.95, + "learning_rate": 3.133993080637665e-08, + "logits/chosen": -1.1013673543930054, + "logits/rejected": -0.8421053886413574, + "logps/chosen": -413.6099548339844, + "logps/rejected": -492.38116455078125, + "loss": 0.69, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.21072909235954285, + "rewards/margins": 0.09118209034204483, + "rewards/rejected": -0.3019111752510071, + "step": 14590 + }, + { + "epoch": 0.96, + "learning_rate": 3.0445094188342186e-08, + "logits/chosen": -0.4487873613834381, + "logits/rejected": -0.3390078842639923, + "logps/chosen": -452.75299072265625, + "logps/rejected": -464.5770568847656, + "loss": 0.6889, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.199276402592659, + "rewards/margins": 0.09208828955888748, + "rewards/rejected": -0.2913646996021271, + "step": 14600 + }, + { + "epoch": 0.96, + "eval_logits/chosen": -0.9952174425125122, + "eval_logits/rejected": -0.8675841689109802, + "eval_logps/chosen": -430.40631103515625, + "eval_logps/rejected": -496.6932373046875, + "eval_loss": 0.6893989443778992, + "eval_rewards/accuracies": 0.6690000295639038, + "eval_rewards/chosen": -0.19840139150619507, + "eval_rewards/margins": 0.08667998015880585, + "eval_rewards/rejected": -0.2850813567638397, + "eval_runtime": 711.9516, + "eval_samples_per_second": 2.809, + "eval_steps_per_second": 1.405, + "step": 14600 + }, + { + "epoch": 0.96, + "learning_rate": 2.9563139978421028e-08, + "logits/chosen": -0.8672275543212891, + "logits/rejected": -0.9306055903434753, + "logps/chosen": -410.693115234375, + "logps/rejected": -462.60345458984375, + "loss": 0.6902, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.1848212331533432, + "rewards/margins": 0.05969489365816116, + "rewards/rejected": -0.24451613426208496, + "step": 14610 + }, + { + "epoch": 0.96, + "learning_rate": 2.869407277731939e-08, + "logits/chosen": -0.6720192432403564, + "logits/rejected": -0.7131026983261108, + "logps/chosen": -373.59527587890625, + "logps/rejected": -424.0103454589844, + "loss": 0.6897, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.18833813071250916, + "rewards/margins": 0.07358469069004059, + "rewards/rejected": -0.26192283630371094, + "step": 14620 + }, + { + "epoch": 0.96, + "learning_rate": 2.783789711851642e-08, + "logits/chosen": -1.121572732925415, + "logits/rejected": -0.8326706886291504, + "logps/chosen": -361.41058349609375, + "logps/rejected": -448.26629638671875, + "loss": 0.6882, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.19955962896347046, + "rewards/margins": 0.10928840935230255, + "rewards/rejected": -0.3088480532169342, + "step": 14630 + }, + { + "epoch": 0.96, + "learning_rate": 2.6994617468244778e-08, + "logits/chosen": -1.0026696920394897, + "logits/rejected": -0.8741849660873413, + "logps/chosen": -406.95697021484375, + "logps/rejected": -448.57568359375, + "loss": 0.6889, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.1939363181591034, + "rewards/margins": 0.10553745925426483, + "rewards/rejected": -0.29947376251220703, + "step": 14640 + }, + { + "epoch": 0.96, + "learning_rate": 2.6164238225463155e-08, + "logits/chosen": -0.8640907406806946, + "logits/rejected": -0.5929582118988037, + "logps/chosen": -480.9214782714844, + "logps/rejected": -498.310546875, + "loss": 0.6901, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.1984943449497223, + "rewards/margins": 0.09589709341526031, + "rewards/rejected": -0.294391393661499, + "step": 14650 + }, + { + "epoch": 0.96, + "learning_rate": 2.534676372183742e-08, + "logits/chosen": -0.7579206228256226, + "logits/rejected": -0.7912012338638306, + "logps/chosen": -489.6610412597656, + "logps/rejected": -507.6478576660156, + "loss": 0.6901, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.20570676028728485, + "rewards/margins": 0.07031337171792984, + "rewards/rejected": -0.2760201096534729, + "step": 14660 + }, + { + "epoch": 0.96, + "learning_rate": 2.4542198221714218e-08, + "logits/chosen": -0.6668469309806824, + "logits/rejected": -0.5565620064735413, + "logps/chosen": -329.5209655761719, + "logps/rejected": -421.29571533203125, + "loss": 0.6883, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.1914140284061432, + "rewards/margins": 0.09154781699180603, + "rewards/rejected": -0.28296181559562683, + "step": 14670 + }, + { + "epoch": 0.96, + "learning_rate": 2.3750545922101854e-08, + "logits/chosen": -1.431932806968689, + "logits/rejected": -0.8189393877983093, + "logps/chosen": -505.0044860839844, + "logps/rejected": -524.7289428710938, + "loss": 0.6901, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.1956133097410202, + "rewards/margins": 0.08811850845813751, + "rewards/rejected": -0.2837317883968353, + "step": 14680 + }, + { + "epoch": 0.96, + "learning_rate": 2.2971810952646112e-08, + "logits/chosen": -1.1525204181671143, + "logits/rejected": -0.9542962908744812, + "logps/chosen": -463.877197265625, + "logps/rejected": -472.92974853515625, + "loss": 0.6906, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.20551447570323944, + "rewards/margins": 0.05675836652517319, + "rewards/rejected": -0.26227283477783203, + "step": 14690 + }, + { + "epoch": 0.96, + "learning_rate": 2.2205997375610576e-08, + "logits/chosen": -0.7429525852203369, + "logits/rejected": -0.7011960744857788, + "logps/chosen": -341.0621032714844, + "logps/rejected": -451.4022521972656, + "loss": 0.6902, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.15555623173713684, + "rewards/margins": 0.10518001019954681, + "rewards/rejected": -0.26073622703552246, + "step": 14700 + }, + { + "epoch": 0.96, + "eval_logits/chosen": -0.9974082112312317, + "eval_logits/rejected": -0.8696686029434204, + "eval_logps/chosen": -430.09259033203125, + "eval_logps/rejected": -496.2928771972656, + "eval_loss": 0.6893972158432007, + "eval_rewards/accuracies": 0.6690000295639038, + "eval_rewards/chosen": -0.1980876475572586, + "eval_rewards/margins": 0.0865933746099472, + "eval_rewards/rejected": -0.2846809923648834, + "eval_runtime": 711.6144, + "eval_samples_per_second": 2.811, + "eval_steps_per_second": 1.405, + "step": 14700 + }, + { + "epoch": 0.96, + "learning_rate": 2.1453109185853304e-08, + "logits/chosen": -1.0774486064910889, + "logits/rejected": -1.0813241004943848, + "logps/chosen": -369.2832336425781, + "logps/rejected": -461.7610778808594, + "loss": 0.6884, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.16720476746559143, + "rewards/margins": 0.09073060750961304, + "rewards/rejected": -0.2579353451728821, + "step": 14710 + }, + { + "epoch": 0.96, + "learning_rate": 2.0713150310808784e-08, + "logits/chosen": -1.082183599472046, + "logits/rejected": -1.0008739233016968, + "logps/chosen": -428.96868896484375, + "logps/rejected": -478.0359802246094, + "loss": 0.6914, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.20348091423511505, + "rewards/margins": 0.04330389201641083, + "rewards/rejected": -0.24678480625152588, + "step": 14720 + }, + { + "epoch": 0.96, + "learning_rate": 1.9986124610464064e-08, + "logits/chosen": -0.7365717887878418, + "logits/rejected": -0.5972979664802551, + "logps/chosen": -514.8629760742188, + "logps/rejected": -576.7749633789062, + "loss": 0.6877, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.22763288021087646, + "rewards/margins": 0.13172422349452972, + "rewards/rejected": -0.35935714840888977, + "step": 14730 + }, + { + "epoch": 0.96, + "learning_rate": 1.927203587734211e-08, + "logits/chosen": -0.6570479273796082, + "logits/rejected": -0.6569720506668091, + "logps/chosen": -454.4698791503906, + "logps/rejected": -490.32122802734375, + "loss": 0.6891, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.20147821307182312, + "rewards/margins": 0.08437781035900116, + "rewards/rejected": -0.2858560085296631, + "step": 14740 + }, + { + "epoch": 0.97, + "learning_rate": 1.8570887836479034e-08, + "logits/chosen": -0.90229731798172, + "logits/rejected": -0.6604552268981934, + "logps/chosen": -403.8556823730469, + "logps/rejected": -527.2491455078125, + "loss": 0.6896, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.21023252606391907, + "rewards/margins": 0.07057814300060272, + "rewards/rejected": -0.2808106541633606, + "step": 14750 + }, + { + "epoch": 0.97, + "learning_rate": 1.7882684145406616e-08, + "logits/chosen": -1.0498579740524292, + "logits/rejected": -1.024839162826538, + "logps/chosen": -483.9183654785156, + "logps/rejected": -557.7072143554688, + "loss": 0.6876, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.18749774992465973, + "rewards/margins": 0.07567773759365082, + "rewards/rejected": -0.26317542791366577, + "step": 14760 + }, + { + "epoch": 0.97, + "learning_rate": 1.7207428394132865e-08, + "logits/chosen": -1.2564983367919922, + "logits/rejected": -0.9558451771736145, + "logps/chosen": -474.35345458984375, + "logps/rejected": -536.0001220703125, + "loss": 0.6865, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.2045731544494629, + "rewards/margins": 0.11633388698101044, + "rewards/rejected": -0.3209070563316345, + "step": 14770 + }, + { + "epoch": 0.97, + "learning_rate": 1.654512410512177e-08, + "logits/chosen": -1.035137414932251, + "logits/rejected": -0.7679761648178101, + "logps/chosen": -451.58837890625, + "logps/rejected": -457.88275146484375, + "loss": 0.6907, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.19801142811775208, + "rewards/margins": 0.06868289411067963, + "rewards/rejected": -0.2666943073272705, + "step": 14780 + }, + { + "epoch": 0.97, + "learning_rate": 1.5895774733277468e-08, + "logits/chosen": -0.9306725263595581, + "logits/rejected": -0.8101316690444946, + "logps/chosen": -483.390380859375, + "logps/rejected": -517.47900390625, + "loss": 0.6894, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.20357339084148407, + "rewards/margins": 0.08432519435882568, + "rewards/rejected": -0.28789860010147095, + "step": 14790 + }, + { + "epoch": 0.97, + "learning_rate": 1.5259383665924e-08, + "logits/chosen": -1.5006357431411743, + "logits/rejected": -1.146349549293518, + "logps/chosen": -514.8986206054688, + "logps/rejected": -501.13482666015625, + "loss": 0.6894, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.17280684411525726, + "rewards/margins": 0.0802449956536293, + "rewards/rejected": -0.2530518174171448, + "step": 14800 + }, + { + "epoch": 0.97, + "eval_logits/chosen": -0.9956094622612, + "eval_logits/rejected": -0.8681559562683105, + "eval_logps/chosen": -430.40167236328125, + "eval_logps/rejected": -496.6893615722656, + "eval_loss": 0.6893979907035828, + "eval_rewards/accuracies": 0.6675000190734863, + "eval_rewards/chosen": -0.1983967423439026, + "eval_rewards/margins": 0.08668076992034912, + "eval_rewards/rejected": -0.2850775122642517, + "eval_runtime": 712.7525, + "eval_samples_per_second": 2.806, + "eval_steps_per_second": 1.403, + "step": 14800 + }, + { + "epoch": 0.48, + "step": 14801, + "total_flos": 0.0, + "train_loss": 4.683178136915897e-05, + "train_runtime": 5.5433, + "train_samples_per_second": 11.029, + "train_steps_per_second": 5.592 + } + ], + "logging_steps": 10, + "max_steps": 31, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 100, + "total_flos": 0.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +}