{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.998972954467648, "eval_steps": 100, "global_step": 6570, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4.5662100456621e-10, "logits/chosen": -1.5087523460388184, "logits/rejected": -1.5035094022750854, "logps/chosen": -43.74905014038086, "logps/rejected": -77.60680389404297, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.0, "learning_rate": 4.5662100456621e-09, "logits/chosen": -1.545806646347046, "logits/rejected": -1.455157995223999, "logps/chosen": -55.80635452270508, "logps/rejected": -55.2034912109375, "loss": 0.6932, "rewards/accuracies": 0.4305555522441864, "rewards/chosen": 0.014816111885011196, "rewards/margins": -0.003535893280059099, "rewards/rejected": 0.018352005630731583, "step": 10 }, { "epoch": 0.01, "learning_rate": 9.1324200913242e-09, "logits/chosen": -1.5560485124588013, "logits/rejected": -1.4880516529083252, "logps/chosen": -54.789825439453125, "logps/rejected": -61.110069274902344, "loss": 0.6964, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": 0.001987707568332553, "rewards/margins": 0.002111446810886264, "rewards/rejected": -0.00012373924255371094, "step": 20 }, { "epoch": 0.01, "learning_rate": 1.36986301369863e-08, "logits/chosen": -1.5476592779159546, "logits/rejected": -1.460582971572876, "logps/chosen": -53.77582931518555, "logps/rejected": -60.30269241333008, "loss": 0.7011, "rewards/accuracies": 0.4375, "rewards/chosen": -0.00331380357965827, "rewards/margins": -0.014126944355666637, "rewards/rejected": 0.010813141241669655, "step": 30 }, { "epoch": 0.02, "learning_rate": 1.82648401826484e-08, "logits/chosen": -1.5262000560760498, "logits/rejected": -1.4483455419540405, "logps/chosen": -59.4591064453125, "logps/rejected": -60.53277587890625, "loss": 0.6914, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.01075730286538601, "rewards/margins": 0.009579157456755638, "rewards/rejected": -0.020336460322141647, "step": 40 }, { "epoch": 0.02, "learning_rate": 2.28310502283105e-08, "logits/chosen": -1.5269324779510498, "logits/rejected": -1.4619429111480713, "logps/chosen": -48.80940628051758, "logps/rejected": -54.6522102355957, "loss": 0.6844, "rewards/accuracies": 0.6875, "rewards/chosen": 0.007998323068022728, "rewards/margins": 0.05441845580935478, "rewards/rejected": -0.0464201346039772, "step": 50 }, { "epoch": 0.03, "learning_rate": 2.73972602739726e-08, "logits/chosen": -1.5598713159561157, "logits/rejected": -1.4761707782745361, "logps/chosen": -56.5130615234375, "logps/rejected": -59.2298698425293, "loss": 0.6669, "rewards/accuracies": 0.6875, "rewards/chosen": -0.0005574465030804276, "rewards/margins": 0.07874791324138641, "rewards/rejected": -0.07930536568164825, "step": 60 }, { "epoch": 0.03, "learning_rate": 3.19634703196347e-08, "logits/chosen": -1.5540889501571655, "logits/rejected": -1.4865127801895142, "logps/chosen": -55.98405838012695, "logps/rejected": -60.471275329589844, "loss": 0.6547, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.005887323524802923, "rewards/margins": 0.08136871457099915, "rewards/rejected": -0.0872560366988182, "step": 70 }, { "epoch": 0.04, "learning_rate": 3.65296803652968e-08, "logits/chosen": -1.5670634508132935, "logits/rejected": -1.5008996725082397, "logps/chosen": -52.0002555847168, "logps/rejected": -56.97963333129883, "loss": 0.6275, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.018660223111510277, "rewards/margins": 0.12297725677490234, "rewards/rejected": -0.14163747429847717, "step": 80 }, { "epoch": 0.04, "learning_rate": 4.10958904109589e-08, "logits/chosen": -1.5634491443634033, "logits/rejected": -1.4947015047073364, "logps/chosen": -50.369789123535156, "logps/rejected": -58.014564514160156, "loss": 0.587, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 0.005921304225921631, "rewards/margins": 0.2610793709754944, "rewards/rejected": -0.25515809655189514, "step": 90 }, { "epoch": 0.05, "learning_rate": 4.5662100456621e-08, "logits/chosen": -1.5410099029541016, "logits/rejected": -1.460561990737915, "logps/chosen": -54.32032012939453, "logps/rejected": -58.245704650878906, "loss": 0.5577, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.007982229813933372, "rewards/margins": 0.3241470456123352, "rewards/rejected": -0.3321292996406555, "step": 100 }, { "epoch": 0.05, "eval_logits/chosen": -1.3715848922729492, "eval_logits/rejected": -1.307630181312561, "eval_logps/chosen": -76.15989685058594, "eval_logps/rejected": -60.609840393066406, "eval_loss": 0.5743366479873657, "eval_rewards/accuracies": 0.9022346138954163, "eval_rewards/chosen": -0.08896368741989136, "eval_rewards/margins": 0.2638399302959442, "eval_rewards/rejected": -0.35280361771583557, "eval_runtime": 185.6917, "eval_samples_per_second": 15.413, "eval_steps_per_second": 0.964, "step": 100 }, { "epoch": 0.05, "learning_rate": 5.02283105022831e-08, "logits/chosen": -1.5511906147003174, "logits/rejected": -1.4669945240020752, "logps/chosen": -55.648841857910156, "logps/rejected": -60.24755859375, "loss": 0.5325, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.00021359921083785594, "rewards/margins": 0.4047005772590637, "rewards/rejected": -0.4044870436191559, "step": 110 }, { "epoch": 0.05, "learning_rate": 5.47945205479452e-08, "logits/chosen": -1.5656968355178833, "logits/rejected": -1.4793637990951538, "logps/chosen": -56.9489631652832, "logps/rejected": -60.153228759765625, "loss": 0.4736, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.05813983827829361, "rewards/margins": 0.514919638633728, "rewards/rejected": -0.5730594396591187, "step": 120 }, { "epoch": 0.06, "learning_rate": 5.93607305936073e-08, "logits/chosen": -1.582545280456543, "logits/rejected": -1.4928141832351685, "logps/chosen": -57.66066360473633, "logps/rejected": -60.691490173339844, "loss": 0.3961, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.06978967040777206, "rewards/margins": 0.7058674693107605, "rewards/rejected": -0.7756571769714355, "step": 130 }, { "epoch": 0.06, "learning_rate": 6.39269406392694e-08, "logits/chosen": -1.6073917150497437, "logits/rejected": -1.529598593711853, "logps/chosen": -55.62971878051758, "logps/rejected": -63.5817985534668, "loss": 0.3421, "rewards/accuracies": 1.0, "rewards/chosen": -0.046302586793899536, "rewards/margins": 0.9670504331588745, "rewards/rejected": -1.0133531093597412, "step": 140 }, { "epoch": 0.07, "learning_rate": 6.84931506849315e-08, "logits/chosen": -1.5868146419525146, "logits/rejected": -1.5070278644561768, "logps/chosen": -55.41338348388672, "logps/rejected": -60.58258819580078, "loss": 0.311, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.06830114126205444, "rewards/margins": 1.1008093357086182, "rewards/rejected": -1.1691104173660278, "step": 150 }, { "epoch": 0.07, "learning_rate": 7.30593607305936e-08, "logits/chosen": -1.568574070930481, "logits/rejected": -1.4987123012542725, "logps/chosen": -54.731719970703125, "logps/rejected": -63.13419723510742, "loss": 0.2781, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.09268207848072052, "rewards/margins": 1.240229845046997, "rewards/rejected": -1.3329120874404907, "step": 160 }, { "epoch": 0.08, "learning_rate": 7.76255707762557e-08, "logits/chosen": -1.583616018295288, "logits/rejected": -1.5008718967437744, "logps/chosen": -58.21760940551758, "logps/rejected": -61.46492385864258, "loss": 0.2618, "rewards/accuracies": 1.0, "rewards/chosen": -0.1754956990480423, "rewards/margins": 1.261420488357544, "rewards/rejected": -1.4369161128997803, "step": 170 }, { "epoch": 0.08, "learning_rate": 8.21917808219178e-08, "logits/chosen": -1.6093565225601196, "logits/rejected": -1.516689658164978, "logps/chosen": -54.5604248046875, "logps/rejected": -61.80358123779297, "loss": 0.233, "rewards/accuracies": 1.0, "rewards/chosen": -0.13866546750068665, "rewards/margins": 1.441631555557251, "rewards/rejected": -1.5802969932556152, "step": 180 }, { "epoch": 0.09, "learning_rate": 8.67579908675799e-08, "logits/chosen": -1.571337342262268, "logits/rejected": -1.4844709634780884, "logps/chosen": -52.31999969482422, "logps/rejected": -58.7751350402832, "loss": 0.1923, "rewards/accuracies": 1.0, "rewards/chosen": -0.13308748602867126, "rewards/margins": 1.6981117725372314, "rewards/rejected": -1.831199288368225, "step": 190 }, { "epoch": 0.09, "learning_rate": 9.1324200913242e-08, "logits/chosen": -1.5900229215621948, "logits/rejected": -1.516791820526123, "logps/chosen": -49.58915710449219, "logps/rejected": -60.3437385559082, "loss": 0.1502, "rewards/accuracies": 1.0, "rewards/chosen": -0.15844210982322693, "rewards/margins": 2.0099329948425293, "rewards/rejected": -2.168375015258789, "step": 200 }, { "epoch": 0.09, "eval_logits/chosen": -1.4090580940246582, "eval_logits/rejected": -1.3397127389907837, "eval_logps/chosen": -77.1548080444336, "eval_logps/rejected": -64.89435577392578, "eval_loss": 0.17612037062644958, "eval_rewards/accuracies": 0.9804469347000122, "eval_rewards/chosen": -0.5864141583442688, "eval_rewards/margins": 1.9086493253707886, "eval_rewards/rejected": -2.495063543319702, "eval_runtime": 171.9625, "eval_samples_per_second": 16.643, "eval_steps_per_second": 1.041, "step": 200 }, { "epoch": 0.1, "learning_rate": 9.58904109589041e-08, "logits/chosen": -1.5708708763122559, "logits/rejected": -1.4990869760513306, "logps/chosen": -54.06782150268555, "logps/rejected": -65.75396728515625, "loss": 0.1042, "rewards/accuracies": 1.0, "rewards/chosen": -0.27950865030288696, "rewards/margins": 2.414898157119751, "rewards/rejected": -2.694406747817993, "step": 210 }, { "epoch": 0.1, "learning_rate": 1.004566210045662e-07, "logits/chosen": -1.6292283535003662, "logits/rejected": -1.5556135177612305, "logps/chosen": -53.032798767089844, "logps/rejected": -62.6429328918457, "loss": 0.0878, "rewards/accuracies": 1.0, "rewards/chosen": -0.28332996368408203, "rewards/margins": 2.6439812183380127, "rewards/rejected": -2.927311420440674, "step": 220 }, { "epoch": 0.1, "learning_rate": 1.050228310502283e-07, "logits/chosen": -1.6413395404815674, "logits/rejected": -1.5558950901031494, "logps/chosen": -60.82377243041992, "logps/rejected": -66.6085205078125, "loss": 0.0685, "rewards/accuracies": 1.0, "rewards/chosen": -0.32246023416519165, "rewards/margins": 3.061707019805908, "rewards/rejected": -3.384167194366455, "step": 230 }, { "epoch": 0.11, "learning_rate": 1.095890410958904e-07, "logits/chosen": -1.6124732494354248, "logits/rejected": -1.5318291187286377, "logps/chosen": -55.524559020996094, "logps/rejected": -63.96097946166992, "loss": 0.0697, "rewards/accuracies": 1.0, "rewards/chosen": -0.33981430530548096, "rewards/margins": 3.162278175354004, "rewards/rejected": -3.5020923614501953, "step": 240 }, { "epoch": 0.11, "learning_rate": 1.141552511415525e-07, "logits/chosen": -1.6445833444595337, "logits/rejected": -1.5500088930130005, "logps/chosen": -54.410560607910156, "logps/rejected": -62.597938537597656, "loss": 0.0588, "rewards/accuracies": 1.0, "rewards/chosen": -0.3053959012031555, "rewards/margins": 3.180203914642334, "rewards/rejected": -3.485599994659424, "step": 250 }, { "epoch": 0.12, "learning_rate": 1.187214611872146e-07, "logits/chosen": -1.634615182876587, "logits/rejected": -1.548903226852417, "logps/chosen": -58.29247283935547, "logps/rejected": -65.41529846191406, "loss": 0.0517, "rewards/accuracies": 1.0, "rewards/chosen": -0.42053937911987305, "rewards/margins": 3.6800742149353027, "rewards/rejected": -4.100613594055176, "step": 260 }, { "epoch": 0.12, "learning_rate": 1.232876712328767e-07, "logits/chosen": -1.6498243808746338, "logits/rejected": -1.5349724292755127, "logps/chosen": -57.144081115722656, "logps/rejected": -66.8178939819336, "loss": 0.0509, "rewards/accuracies": 1.0, "rewards/chosen": -0.36212849617004395, "rewards/margins": 3.6991469860076904, "rewards/rejected": -4.061275482177734, "step": 270 }, { "epoch": 0.13, "learning_rate": 1.278538812785388e-07, "logits/chosen": -1.639831304550171, "logits/rejected": -1.569330096244812, "logps/chosen": -54.149169921875, "logps/rejected": -68.57421112060547, "loss": 0.0364, "rewards/accuracies": 1.0, "rewards/chosen": -0.41847267746925354, "rewards/margins": 4.1883344650268555, "rewards/rejected": -4.606807708740234, "step": 280 }, { "epoch": 0.13, "learning_rate": 1.324200913242009e-07, "logits/chosen": -1.6218324899673462, "logits/rejected": -1.5499851703643799, "logps/chosen": -56.719825744628906, "logps/rejected": -68.20590209960938, "loss": 0.0339, "rewards/accuracies": 1.0, "rewards/chosen": -0.4609605371952057, "rewards/margins": 4.013735771179199, "rewards/rejected": -4.474696636199951, "step": 290 }, { "epoch": 0.14, "learning_rate": 1.36986301369863e-07, "logits/chosen": -1.6696264743804932, "logits/rejected": -1.5758763551712036, "logps/chosen": -54.80878829956055, "logps/rejected": -65.65101623535156, "loss": 0.0367, "rewards/accuracies": 1.0, "rewards/chosen": -0.43391790986061096, "rewards/margins": 3.7488112449645996, "rewards/rejected": -4.182730197906494, "step": 300 }, { "epoch": 0.14, "eval_logits/chosen": -1.44279146194458, "eval_logits/rejected": -1.368519902229309, "eval_logps/chosen": -78.34496307373047, "eval_logps/rejected": -69.59746551513672, "eval_loss": 0.06399906426668167, "eval_rewards/accuracies": 0.9860334992408752, "eval_rewards/chosen": -1.1814894676208496, "eval_rewards/margins": 3.665130138397217, "eval_rewards/rejected": -4.846619606018066, "eval_runtime": 135.8833, "eval_samples_per_second": 21.062, "eval_steps_per_second": 1.317, "step": 300 }, { "epoch": 0.14, "learning_rate": 1.415525114155251e-07, "logits/chosen": -1.6536544561386108, "logits/rejected": -1.5657002925872803, "logps/chosen": -52.22998046875, "logps/rejected": -65.63001251220703, "loss": 0.0392, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.43238696455955505, "rewards/margins": 4.57238245010376, "rewards/rejected": -5.004769325256348, "step": 310 }, { "epoch": 0.15, "learning_rate": 1.461187214611872e-07, "logits/chosen": -1.6720914840698242, "logits/rejected": -1.5804774761199951, "logps/chosen": -55.632659912109375, "logps/rejected": -70.00082397460938, "loss": 0.0281, "rewards/accuracies": 1.0, "rewards/chosen": -0.4426132142543793, "rewards/margins": 4.479395866394043, "rewards/rejected": -4.922008991241455, "step": 320 }, { "epoch": 0.15, "learning_rate": 1.506849315068493e-07, "logits/chosen": -1.6406399011611938, "logits/rejected": -1.5606629848480225, "logps/chosen": -54.38983154296875, "logps/rejected": -70.45909881591797, "loss": 0.031, "rewards/accuracies": 1.0, "rewards/chosen": -0.520280659198761, "rewards/margins": 4.549806594848633, "rewards/rejected": -5.070087432861328, "step": 330 }, { "epoch": 0.16, "learning_rate": 1.552511415525114e-07, "logits/chosen": -1.6501238346099854, "logits/rejected": -1.5617876052856445, "logps/chosen": -53.52082443237305, "logps/rejected": -66.49567413330078, "loss": 0.0284, "rewards/accuracies": 1.0, "rewards/chosen": -0.5217021703720093, "rewards/margins": 4.813857555389404, "rewards/rejected": -5.335558891296387, "step": 340 }, { "epoch": 0.16, "learning_rate": 1.598173515981735e-07, "logits/chosen": -1.6586627960205078, "logits/rejected": -1.579021692276001, "logps/chosen": -53.56574249267578, "logps/rejected": -70.02999114990234, "loss": 0.0247, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.5866273641586304, "rewards/margins": 4.987410068511963, "rewards/rejected": -5.574038505554199, "step": 350 }, { "epoch": 0.16, "learning_rate": 1.643835616438356e-07, "logits/chosen": -1.6780459880828857, "logits/rejected": -1.596045732498169, "logps/chosen": -54.865150451660156, "logps/rejected": -70.90138244628906, "loss": 0.0238, "rewards/accuracies": 1.0, "rewards/chosen": -0.6191946864128113, "rewards/margins": 4.894677639007568, "rewards/rejected": -5.513872146606445, "step": 360 }, { "epoch": 0.17, "learning_rate": 1.689497716894977e-07, "logits/chosen": -1.6677402257919312, "logits/rejected": -1.57552170753479, "logps/chosen": -55.3704833984375, "logps/rejected": -69.22010803222656, "loss": 0.0296, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.7166622281074524, "rewards/margins": 4.735496520996094, "rewards/rejected": -5.452158451080322, "step": 370 }, { "epoch": 0.17, "learning_rate": 1.735159817351598e-07, "logits/chosen": -1.6523818969726562, "logits/rejected": -1.5386160612106323, "logps/chosen": -55.9133186340332, "logps/rejected": -67.37703704833984, "loss": 0.0178, "rewards/accuracies": 1.0, "rewards/chosen": -0.5371387004852295, "rewards/margins": 5.164801597595215, "rewards/rejected": -5.701941013336182, "step": 380 }, { "epoch": 0.18, "learning_rate": 1.780821917808219e-07, "logits/chosen": -1.6681104898452759, "logits/rejected": -1.5768417119979858, "logps/chosen": -54.644615173339844, "logps/rejected": -68.48939514160156, "loss": 0.0166, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.5451260805130005, "rewards/margins": 5.735184669494629, "rewards/rejected": -6.280310153961182, "step": 390 }, { "epoch": 0.18, "learning_rate": 1.82648401826484e-07, "logits/chosen": -1.6817266941070557, "logits/rejected": -1.6053158044815063, "logps/chosen": -50.52559280395508, "logps/rejected": -69.53555297851562, "loss": 0.0195, "rewards/accuracies": 1.0, "rewards/chosen": -0.5777836441993713, "rewards/margins": 5.324347019195557, "rewards/rejected": -5.902129650115967, "step": 400 }, { "epoch": 0.18, "eval_logits/chosen": -1.4647938013076782, "eval_logits/rejected": -1.387487769126892, "eval_logps/chosen": -79.24311828613281, "eval_logps/rejected": -72.7348403930664, "eval_loss": 0.0418909415602684, "eval_rewards/accuracies": 0.9832402467727661, "eval_rewards/chosen": -1.6305665969848633, "eval_rewards/margins": 4.784738063812256, "eval_rewards/rejected": -6.415303707122803, "eval_runtime": 177.0186, "eval_samples_per_second": 16.168, "eval_steps_per_second": 1.011, "step": 400 }, { "epoch": 0.19, "learning_rate": 1.872146118721461e-07, "logits/chosen": -1.7017066478729248, "logits/rejected": -1.5788252353668213, "logps/chosen": -61.74540328979492, "logps/rejected": -69.6572036743164, "loss": 0.0174, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.7025364637374878, "rewards/margins": 5.482548713684082, "rewards/rejected": -6.185084342956543, "step": 410 }, { "epoch": 0.19, "learning_rate": 1.917808219178082e-07, "logits/chosen": -1.6518733501434326, "logits/rejected": -1.574467420578003, "logps/chosen": -53.831336975097656, "logps/rejected": -72.37210083007812, "loss": 0.0178, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.8208284378051758, "rewards/margins": 5.8398590087890625, "rewards/rejected": -6.660687446594238, "step": 420 }, { "epoch": 0.2, "learning_rate": 1.963470319634703e-07, "logits/chosen": -1.6632076501846313, "logits/rejected": -1.5873596668243408, "logps/chosen": -53.70698928833008, "logps/rejected": -68.99622344970703, "loss": 0.0207, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.964774489402771, "rewards/margins": 5.707071304321289, "rewards/rejected": -6.67184591293335, "step": 430 }, { "epoch": 0.2, "learning_rate": 2.009132420091324e-07, "logits/chosen": -1.6617752313613892, "logits/rejected": -1.5743049383163452, "logps/chosen": -53.155479431152344, "logps/rejected": -70.70321655273438, "loss": 0.0138, "rewards/accuracies": 1.0, "rewards/chosen": -0.732166051864624, "rewards/margins": 5.958271026611328, "rewards/rejected": -6.690437316894531, "step": 440 }, { "epoch": 0.21, "learning_rate": 2.054794520547945e-07, "logits/chosen": -1.7175381183624268, "logits/rejected": -1.6154544353485107, "logps/chosen": -58.01020431518555, "logps/rejected": -69.6778564453125, "loss": 0.0144, "rewards/accuracies": 1.0, "rewards/chosen": -0.8646949529647827, "rewards/margins": 6.164088249206543, "rewards/rejected": -7.028783321380615, "step": 450 }, { "epoch": 0.21, "learning_rate": 2.100456621004566e-07, "logits/chosen": -1.6831591129302979, "logits/rejected": -1.5844804048538208, "logps/chosen": -60.1660270690918, "logps/rejected": -73.73701477050781, "loss": 0.0165, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.0043270587921143, "rewards/margins": 6.397038459777832, "rewards/rejected": -7.401365756988525, "step": 460 }, { "epoch": 0.21, "learning_rate": 2.146118721461187e-07, "logits/chosen": -1.6989357471466064, "logits/rejected": -1.5939372777938843, "logps/chosen": -59.41900634765625, "logps/rejected": -74.23500061035156, "loss": 0.0147, "rewards/accuracies": 1.0, "rewards/chosen": -0.7950853109359741, "rewards/margins": 6.795047760009766, "rewards/rejected": -7.5901336669921875, "step": 470 }, { "epoch": 0.22, "learning_rate": 2.191780821917808e-07, "logits/chosen": -1.6680141687393188, "logits/rejected": -1.578650951385498, "logps/chosen": -56.234466552734375, "logps/rejected": -71.89026641845703, "loss": 0.0156, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.2601807117462158, "rewards/margins": 6.4187421798706055, "rewards/rejected": -7.6789231300354, "step": 480 }, { "epoch": 0.22, "learning_rate": 2.237442922374429e-07, "logits/chosen": -1.6936092376708984, "logits/rejected": -1.59926438331604, "logps/chosen": -56.20717239379883, "logps/rejected": -73.23368072509766, "loss": 0.0127, "rewards/accuracies": 1.0, "rewards/chosen": -0.8995521664619446, "rewards/margins": 6.879524230957031, "rewards/rejected": -7.77907657623291, "step": 490 }, { "epoch": 0.23, "learning_rate": 2.28310502283105e-07, "logits/chosen": -1.698009729385376, "logits/rejected": -1.5847851037979126, "logps/chosen": -57.49848175048828, "logps/rejected": -72.68367004394531, "loss": 0.0128, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.065097451210022, "rewards/margins": 6.5358781814575195, "rewards/rejected": -7.600974082946777, "step": 500 }, { "epoch": 0.23, "eval_logits/chosen": -1.4846999645233154, "eval_logits/rejected": -1.4044886827468872, "eval_logps/chosen": -80.25221252441406, "eval_logps/rejected": -75.9832534790039, "eval_loss": 0.03207468241453171, "eval_rewards/accuracies": 0.9860334992408752, "eval_rewards/chosen": -2.1351206302642822, "eval_rewards/margins": 5.904390811920166, "eval_rewards/rejected": -8.039511680603027, "eval_runtime": 165.0524, "eval_samples_per_second": 17.34, "eval_steps_per_second": 1.085, "step": 500 }, { "epoch": 0.23, "learning_rate": 2.328767123287671e-07, "logits/chosen": -1.6877530813217163, "logits/rejected": -1.5861059427261353, "logps/chosen": -54.69840621948242, "logps/rejected": -72.17831420898438, "loss": 0.0128, "rewards/accuracies": 1.0, "rewards/chosen": -1.0449682474136353, "rewards/margins": 6.904466152191162, "rewards/rejected": -7.949434757232666, "step": 510 }, { "epoch": 0.24, "learning_rate": 2.374429223744292e-07, "logits/chosen": -1.7049137353897095, "logits/rejected": -1.5926499366760254, "logps/chosen": -59.61528778076172, "logps/rejected": -75.29037475585938, "loss": 0.009, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.9810761213302612, "rewards/margins": 7.4216814041137695, "rewards/rejected": -8.40275764465332, "step": 520 }, { "epoch": 0.24, "learning_rate": 2.420091324200913e-07, "logits/chosen": -1.6999527215957642, "logits/rejected": -1.6045364141464233, "logps/chosen": -57.0565185546875, "logps/rejected": -77.7638168334961, "loss": 0.0128, "rewards/accuracies": 1.0, "rewards/chosen": -1.02046537399292, "rewards/margins": 7.695591926574707, "rewards/rejected": -8.716057777404785, "step": 530 }, { "epoch": 0.25, "learning_rate": 2.465753424657534e-07, "logits/chosen": -1.6926349401474, "logits/rejected": -1.5947034358978271, "logps/chosen": -59.54913330078125, "logps/rejected": -80.18620300292969, "loss": 0.0087, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.0159438848495483, "rewards/margins": 7.910098075866699, "rewards/rejected": -8.926042556762695, "step": 540 }, { "epoch": 0.25, "learning_rate": 2.511415525114155e-07, "logits/chosen": -1.684308648109436, "logits/rejected": -1.5966044664382935, "logps/chosen": -60.59437942504883, "logps/rejected": -73.6009292602539, "loss": 0.011, "rewards/accuracies": 1.0, "rewards/chosen": -1.3125141859054565, "rewards/margins": 7.294568061828613, "rewards/rejected": -8.607081413269043, "step": 550 }, { "epoch": 0.26, "learning_rate": 2.557077625570776e-07, "logits/chosen": -1.7317125797271729, "logits/rejected": -1.647165298461914, "logps/chosen": -56.182151794433594, "logps/rejected": -75.43659973144531, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": -1.1344718933105469, "rewards/margins": 7.353419303894043, "rewards/rejected": -8.487890243530273, "step": 560 }, { "epoch": 0.26, "learning_rate": 2.602739726027397e-07, "logits/chosen": -1.7006349563598633, "logits/rejected": -1.6112979650497437, "logps/chosen": -55.806793212890625, "logps/rejected": -76.87861633300781, "loss": 0.0138, "rewards/accuracies": 1.0, "rewards/chosen": -1.0782018899917603, "rewards/margins": 7.8718671798706055, "rewards/rejected": -8.950068473815918, "step": 570 }, { "epoch": 0.26, "learning_rate": 2.648401826484018e-07, "logits/chosen": -1.7162210941314697, "logits/rejected": -1.6245304346084595, "logps/chosen": -55.56931686401367, "logps/rejected": -73.07711791992188, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": -1.2571735382080078, "rewards/margins": 7.612096309661865, "rewards/rejected": -8.869270324707031, "step": 580 }, { "epoch": 0.27, "learning_rate": 2.694063926940639e-07, "logits/chosen": -1.6970678567886353, "logits/rejected": -1.6222326755523682, "logps/chosen": -56.684051513671875, "logps/rejected": -79.91156005859375, "loss": 0.0111, "rewards/accuracies": 1.0, "rewards/chosen": -1.2310670614242554, "rewards/margins": 8.0743989944458, "rewards/rejected": -9.305466651916504, "step": 590 }, { "epoch": 0.27, "learning_rate": 2.73972602739726e-07, "logits/chosen": -1.7427692413330078, "logits/rejected": -1.6350624561309814, "logps/chosen": -55.236839294433594, "logps/rejected": -76.8140869140625, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": -1.1100609302520752, "rewards/margins": 7.973969459533691, "rewards/rejected": -9.084030151367188, "step": 600 }, { "epoch": 0.27, "eval_logits/chosen": -1.4985913038253784, "eval_logits/rejected": -1.4162646532058716, "eval_logps/chosen": -81.62905883789062, "eval_logps/rejected": -79.30266571044922, "eval_loss": 0.029448172077536583, "eval_rewards/accuracies": 0.9860334992408752, "eval_rewards/chosen": -2.8235411643981934, "eval_rewards/margins": 6.87567663192749, "eval_rewards/rejected": -9.699217796325684, "eval_runtime": 162.0935, "eval_samples_per_second": 17.656, "eval_steps_per_second": 1.104, "step": 600 }, { "epoch": 0.28, "learning_rate": 2.785388127853881e-07, "logits/chosen": -1.7095897197723389, "logits/rejected": -1.6108310222625732, "logps/chosen": -57.466880798339844, "logps/rejected": -76.33137512207031, "loss": 0.0094, "rewards/accuracies": 1.0, "rewards/chosen": -1.2477577924728394, "rewards/margins": 8.338712692260742, "rewards/rejected": -9.586469650268555, "step": 610 }, { "epoch": 0.28, "learning_rate": 2.831050228310502e-07, "logits/chosen": -1.694031000137329, "logits/rejected": -1.6111032962799072, "logps/chosen": -54.303466796875, "logps/rejected": -79.16754150390625, "loss": 0.0112, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.403235673904419, "rewards/margins": 8.552541732788086, "rewards/rejected": -9.955777168273926, "step": 620 }, { "epoch": 0.29, "learning_rate": 2.876712328767123e-07, "logits/chosen": -1.7127506732940674, "logits/rejected": -1.619024634361267, "logps/chosen": -56.50312423706055, "logps/rejected": -79.96643829345703, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": -1.282273530960083, "rewards/margins": 8.758012771606445, "rewards/rejected": -10.040285110473633, "step": 630 }, { "epoch": 0.29, "learning_rate": 2.922374429223744e-07, "logits/chosen": -1.7468054294586182, "logits/rejected": -1.6339282989501953, "logps/chosen": -58.642433166503906, "logps/rejected": -79.05645751953125, "loss": 0.0137, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.4662420749664307, "rewards/margins": 8.245123863220215, "rewards/rejected": -9.711365699768066, "step": 640 }, { "epoch": 0.3, "learning_rate": 2.968036529680365e-07, "logits/chosen": -1.731302261352539, "logits/rejected": -1.627869963645935, "logps/chosen": -58.152130126953125, "logps/rejected": -76.38700866699219, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -1.5161360502243042, "rewards/margins": 8.61723518371582, "rewards/rejected": -10.133370399475098, "step": 650 }, { "epoch": 0.3, "learning_rate": 2.998477929984779e-07, "logits/chosen": -1.7117973566055298, "logits/rejected": -1.6044315099716187, "logps/chosen": -58.26856231689453, "logps/rejected": -81.69371032714844, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -1.4945389032363892, "rewards/margins": 9.201311111450195, "rewards/rejected": -10.69584846496582, "step": 660 }, { "epoch": 0.31, "learning_rate": 2.993404363267377e-07, "logits/chosen": -1.7394225597381592, "logits/rejected": -1.6316182613372803, "logps/chosen": -57.263755798339844, "logps/rejected": -75.65992736816406, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -1.1733144521713257, "rewards/margins": 8.887094497680664, "rewards/rejected": -10.060407638549805, "step": 670 }, { "epoch": 0.31, "learning_rate": 2.9883307965499743e-07, "logits/chosen": -1.6869474649429321, "logits/rejected": -1.5928945541381836, "logps/chosen": -57.73834228515625, "logps/rejected": -81.07805633544922, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -1.3280141353607178, "rewards/margins": 9.038291931152344, "rewards/rejected": -10.36630630493164, "step": 680 }, { "epoch": 0.31, "learning_rate": 2.983257229832572e-07, "logits/chosen": -1.7139816284179688, "logits/rejected": -1.6298580169677734, "logps/chosen": -55.35132598876953, "logps/rejected": -78.6609878540039, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/chosen": -1.2809817790985107, "rewards/margins": 8.990970611572266, "rewards/rejected": -10.271952629089355, "step": 690 }, { "epoch": 0.32, "learning_rate": 2.9781836631151696e-07, "logits/chosen": -1.7303400039672852, "logits/rejected": -1.6253429651260376, "logps/chosen": -59.9201545715332, "logps/rejected": -81.42010498046875, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": -1.3820278644561768, "rewards/margins": 9.210229873657227, "rewards/rejected": -10.592256546020508, "step": 700 }, { "epoch": 0.32, "eval_logits/chosen": -1.5078723430633545, "eval_logits/rejected": -1.4251151084899902, "eval_logps/chosen": -81.525634765625, "eval_logps/rejected": -81.45867919921875, "eval_loss": 0.017705164849758148, "eval_rewards/accuracies": 0.9832402467727661, "eval_rewards/chosen": -2.7718329429626465, "eval_rewards/margins": 8.005391120910645, "eval_rewards/rejected": -10.777223587036133, "eval_runtime": 165.2049, "eval_samples_per_second": 17.324, "eval_steps_per_second": 1.084, "step": 700 }, { "epoch": 0.32, "learning_rate": 2.9731100963977676e-07, "logits/chosen": -1.715698003768921, "logits/rejected": -1.6322933435440063, "logps/chosen": -57.885459899902344, "logps/rejected": -84.03385925292969, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": -1.5874685049057007, "rewards/margins": 9.31218433380127, "rewards/rejected": -10.899652481079102, "step": 710 }, { "epoch": 0.33, "learning_rate": 2.968036529680365e-07, "logits/chosen": -1.7445329427719116, "logits/rejected": -1.6722770929336548, "logps/chosen": -54.62650680541992, "logps/rejected": -83.03822326660156, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -1.637279748916626, "rewards/margins": 9.309844017028809, "rewards/rejected": -10.947123527526855, "step": 720 }, { "epoch": 0.33, "learning_rate": 2.962962962962963e-07, "logits/chosen": -1.7035659551620483, "logits/rejected": -1.6001815795898438, "logps/chosen": -55.35626983642578, "logps/rejected": -77.52360534667969, "loss": 0.0089, "rewards/accuracies": 1.0, "rewards/chosen": -0.8972182273864746, "rewards/margins": 9.760783195495605, "rewards/rejected": -10.658002853393555, "step": 730 }, { "epoch": 0.34, "learning_rate": 2.9578893962455603e-07, "logits/chosen": -1.7120774984359741, "logits/rejected": -1.6191877126693726, "logps/chosen": -56.72313690185547, "logps/rejected": -78.64081573486328, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -1.2180434465408325, "rewards/margins": 9.50749397277832, "rewards/rejected": -10.72553825378418, "step": 740 }, { "epoch": 0.34, "learning_rate": 2.952815829528158e-07, "logits/chosen": -1.7385778427124023, "logits/rejected": -1.6423228979110718, "logps/chosen": -56.050682067871094, "logps/rejected": -79.77486419677734, "loss": 0.0091, "rewards/accuracies": 1.0, "rewards/chosen": -0.8531892895698547, "rewards/margins": 9.420397758483887, "rewards/rejected": -10.273588180541992, "step": 750 }, { "epoch": 0.35, "learning_rate": 2.9477422628107556e-07, "logits/chosen": -1.7303050756454468, "logits/rejected": -1.6287094354629517, "logps/chosen": -61.40581512451172, "logps/rejected": -78.7645263671875, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": -1.1273159980773926, "rewards/margins": 9.553644180297852, "rewards/rejected": -10.680959701538086, "step": 760 }, { "epoch": 0.35, "learning_rate": 2.9426686960933536e-07, "logits/chosen": -1.7399718761444092, "logits/rejected": -1.6434978246688843, "logps/chosen": -53.90864181518555, "logps/rejected": -78.7162094116211, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -0.8057798147201538, "rewards/margins": 10.01326847076416, "rewards/rejected": -10.819047927856445, "step": 770 }, { "epoch": 0.36, "learning_rate": 2.937595129375951e-07, "logits/chosen": -1.727130651473999, "logits/rejected": -1.6336214542388916, "logps/chosen": -59.56471633911133, "logps/rejected": -78.61094665527344, "loss": 0.0065, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.0801204442977905, "rewards/margins": 9.437311172485352, "rewards/rejected": -10.51743221282959, "step": 780 }, { "epoch": 0.36, "learning_rate": 2.932521562658549e-07, "logits/chosen": -1.7236578464508057, "logits/rejected": -1.637286901473999, "logps/chosen": -58.87115478515625, "logps/rejected": -81.21770477294922, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -1.2567155361175537, "rewards/margins": 10.11281967163086, "rewards/rejected": -11.369535446166992, "step": 790 }, { "epoch": 0.37, "learning_rate": 2.9274479959411463e-07, "logits/chosen": -1.7196662425994873, "logits/rejected": -1.6370540857315063, "logps/chosen": -58.21757888793945, "logps/rejected": -84.54280090332031, "loss": 0.0051, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.156782865524292, "rewards/margins": 10.224605560302734, "rewards/rejected": -11.381387710571289, "step": 800 }, { "epoch": 0.37, "eval_logits/chosen": -1.518147587776184, "eval_logits/rejected": -1.4352678060531616, "eval_logps/chosen": -80.94294738769531, "eval_logps/rejected": -82.54000091552734, "eval_loss": 0.014408529736101627, "eval_rewards/accuracies": 0.9832402467727661, "eval_rewards/chosen": -2.4804840087890625, "eval_rewards/margins": 8.837401390075684, "eval_rewards/rejected": -11.317886352539062, "eval_runtime": 169.3625, "eval_samples_per_second": 16.899, "eval_steps_per_second": 1.057, "step": 800 }, { "epoch": 0.37, "learning_rate": 2.922374429223744e-07, "logits/chosen": -1.708937644958496, "logits/rejected": -1.6160999536514282, "logps/chosen": -52.5054931640625, "logps/rejected": -78.92777252197266, "loss": 0.0136, "rewards/accuracies": 1.0, "rewards/chosen": -0.7234916090965271, "rewards/margins": 10.119041442871094, "rewards/rejected": -10.842533111572266, "step": 810 }, { "epoch": 0.37, "learning_rate": 2.9173008625063416e-07, "logits/chosen": -1.722770094871521, "logits/rejected": -1.6324989795684814, "logps/chosen": -53.77677536010742, "logps/rejected": -81.64833068847656, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": -0.8361567258834839, "rewards/margins": 10.269185066223145, "rewards/rejected": -11.105340957641602, "step": 820 }, { "epoch": 0.38, "learning_rate": 2.9122272957889396e-07, "logits/chosen": -1.7464574575424194, "logits/rejected": -1.641235589981079, "logps/chosen": -54.421730041503906, "logps/rejected": -77.16957092285156, "loss": 0.0067, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.5370277166366577, "rewards/margins": 10.162129402160645, "rewards/rejected": -10.69915771484375, "step": 830 }, { "epoch": 0.38, "learning_rate": 2.907153729071537e-07, "logits/chosen": -1.7367918491363525, "logits/rejected": -1.635994553565979, "logps/chosen": -54.7552375793457, "logps/rejected": -81.75818634033203, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -1.014847755432129, "rewards/margins": 10.362683296203613, "rewards/rejected": -11.377530097961426, "step": 840 }, { "epoch": 0.39, "learning_rate": 2.902080162354135e-07, "logits/chosen": -1.7175086736679077, "logits/rejected": -1.6312010288238525, "logps/chosen": -55.76726150512695, "logps/rejected": -80.70665740966797, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -1.0785677433013916, "rewards/margins": 10.594701766967773, "rewards/rejected": -11.673269271850586, "step": 850 }, { "epoch": 0.39, "learning_rate": 2.8970065956367323e-07, "logits/chosen": -1.7384506464004517, "logits/rejected": -1.6417887210845947, "logps/chosen": -59.0673713684082, "logps/rejected": -82.13581848144531, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -1.1794620752334595, "rewards/margins": 10.776390075683594, "rewards/rejected": -11.955851554870605, "step": 860 }, { "epoch": 0.4, "learning_rate": 2.89193302891933e-07, "logits/chosen": -1.7018225193023682, "logits/rejected": -1.6194498538970947, "logps/chosen": -57.1175651550293, "logps/rejected": -85.069091796875, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": -1.2594497203826904, "rewards/margins": 11.165175437927246, "rewards/rejected": -12.424626350402832, "step": 870 }, { "epoch": 0.4, "learning_rate": 2.8868594622019276e-07, "logits/chosen": -1.7552284002304077, "logits/rejected": -1.6478898525238037, "logps/chosen": -60.715362548828125, "logps/rejected": -81.6158676147461, "loss": 0.0072, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.4402903318405151, "rewards/margins": 10.465791702270508, "rewards/rejected": -11.906082153320312, "step": 880 }, { "epoch": 0.41, "learning_rate": 2.8817858954845256e-07, "logits/chosen": -1.710700273513794, "logits/rejected": -1.6423050165176392, "logps/chosen": -53.98700714111328, "logps/rejected": -82.07879638671875, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -1.2296615839004517, "rewards/margins": 10.90865707397461, "rewards/rejected": -12.138318061828613, "step": 890 }, { "epoch": 0.41, "learning_rate": 2.876712328767123e-07, "logits/chosen": -1.7296634912490845, "logits/rejected": -1.6472947597503662, "logps/chosen": -55.60344314575195, "logps/rejected": -81.44587707519531, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -1.296541690826416, "rewards/margins": 11.358396530151367, "rewards/rejected": -12.654939651489258, "step": 900 }, { "epoch": 0.41, "eval_logits/chosen": -1.526100516319275, "eval_logits/rejected": -1.4421030282974243, "eval_logps/chosen": -81.6524658203125, "eval_logps/rejected": -84.46768951416016, "eval_loss": 0.016024084761738777, "eval_rewards/accuracies": 0.9860334992408752, "eval_rewards/chosen": -2.835247278213501, "eval_rewards/margins": 9.446483612060547, "eval_rewards/rejected": -12.281729698181152, "eval_runtime": 149.6015, "eval_samples_per_second": 19.131, "eval_steps_per_second": 1.197, "step": 900 }, { "epoch": 0.42, "learning_rate": 2.871638762049721e-07, "logits/chosen": -1.7292686700820923, "logits/rejected": -1.6218318939208984, "logps/chosen": -55.28586959838867, "logps/rejected": -83.52758026123047, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -0.8380565643310547, "rewards/margins": 11.400309562683105, "rewards/rejected": -12.238367080688477, "step": 910 }, { "epoch": 0.42, "learning_rate": 2.8665651953323183e-07, "logits/chosen": -1.7416365146636963, "logits/rejected": -1.6355617046356201, "logps/chosen": -57.684844970703125, "logps/rejected": -81.35444641113281, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -0.8555625677108765, "rewards/margins": 11.20235538482666, "rewards/rejected": -12.057916641235352, "step": 920 }, { "epoch": 0.42, "learning_rate": 2.861491628614916e-07, "logits/chosen": -1.7450590133666992, "logits/rejected": -1.6474605798721313, "logps/chosen": -58.1208381652832, "logps/rejected": -82.04862976074219, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": -1.2038339376449585, "rewards/margins": 10.757715225219727, "rewards/rejected": -11.961549758911133, "step": 930 }, { "epoch": 0.43, "learning_rate": 2.8564180618975136e-07, "logits/chosen": -1.756439208984375, "logits/rejected": -1.6617294549942017, "logps/chosen": -59.64165496826172, "logps/rejected": -86.30843353271484, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -1.3430993556976318, "rewards/margins": 11.532323837280273, "rewards/rejected": -12.8754243850708, "step": 940 }, { "epoch": 0.43, "learning_rate": 2.8513444951801116e-07, "logits/chosen": -1.732020378112793, "logits/rejected": -1.643958330154419, "logps/chosen": -55.513648986816406, "logps/rejected": -84.36043548583984, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -1.1080124378204346, "rewards/margins": 11.76207160949707, "rewards/rejected": -12.870083808898926, "step": 950 }, { "epoch": 0.44, "learning_rate": 2.846270928462709e-07, "logits/chosen": -1.7512563467025757, "logits/rejected": -1.6647357940673828, "logps/chosen": -57.02693557739258, "logps/rejected": -85.93479919433594, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -1.300955057144165, "rewards/margins": 11.752635955810547, "rewards/rejected": -13.053593635559082, "step": 960 }, { "epoch": 0.44, "learning_rate": 2.841197361745307e-07, "logits/chosen": -1.7503582239151, "logits/rejected": -1.6578338146209717, "logps/chosen": -54.23548126220703, "logps/rejected": -82.74885559082031, "loss": 0.0069, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.0630229711532593, "rewards/margins": 11.578569412231445, "rewards/rejected": -12.641592025756836, "step": 970 }, { "epoch": 0.45, "learning_rate": 2.8361237950279043e-07, "logits/chosen": -1.7624849081039429, "logits/rejected": -1.6492741107940674, "logps/chosen": -59.6468391418457, "logps/rejected": -83.25942993164062, "loss": 0.0132, "rewards/accuracies": 1.0, "rewards/chosen": -1.0746952295303345, "rewards/margins": 12.109354972839355, "rewards/rejected": -13.184049606323242, "step": 980 }, { "epoch": 0.45, "learning_rate": 2.831050228310502e-07, "logits/chosen": -1.7709052562713623, "logits/rejected": -1.6669490337371826, "logps/chosen": -58.214073181152344, "logps/rejected": -80.26509094238281, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -1.2640316486358643, "rewards/margins": 10.607865333557129, "rewards/rejected": -11.871896743774414, "step": 990 }, { "epoch": 0.46, "learning_rate": 2.8259766615930996e-07, "logits/chosen": -1.7690212726593018, "logits/rejected": -1.664385199546814, "logps/chosen": -58.22309494018555, "logps/rejected": -80.5656967163086, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -1.4453684091567993, "rewards/margins": 11.335880279541016, "rewards/rejected": -12.781248092651367, "step": 1000 }, { "epoch": 0.46, "eval_logits/chosen": -1.5345439910888672, "eval_logits/rejected": -1.4514362812042236, "eval_logps/chosen": -81.75653076171875, "eval_logps/rejected": -85.9760971069336, "eval_loss": 0.012221959419548512, "eval_rewards/accuracies": 0.9860334992408752, "eval_rewards/chosen": -2.887273073196411, "eval_rewards/margins": 10.148656845092773, "eval_rewards/rejected": -13.035929679870605, "eval_runtime": 144.624, "eval_samples_per_second": 19.789, "eval_steps_per_second": 1.238, "step": 1000 }, { "epoch": 0.46, "learning_rate": 2.8209030948756976e-07, "logits/chosen": -1.755025863647461, "logits/rejected": -1.6674007177352905, "logps/chosen": -57.46387481689453, "logps/rejected": -87.47645568847656, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": -1.1511553525924683, "rewards/margins": 12.040731430053711, "rewards/rejected": -13.191886901855469, "step": 1010 }, { "epoch": 0.47, "learning_rate": 2.815829528158295e-07, "logits/chosen": -1.7087234258651733, "logits/rejected": -1.6328926086425781, "logps/chosen": -51.928794860839844, "logps/rejected": -79.58134460449219, "loss": 0.0057, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.155023217201233, "rewards/margins": 11.272875785827637, "rewards/rejected": -12.427899360656738, "step": 1020 }, { "epoch": 0.47, "learning_rate": 2.810755961440893e-07, "logits/chosen": -1.7401186227798462, "logits/rejected": -1.653926134109497, "logps/chosen": -54.856727600097656, "logps/rejected": -85.71138000488281, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -1.2455310821533203, "rewards/margins": 11.82475757598877, "rewards/rejected": -13.070286750793457, "step": 1030 }, { "epoch": 0.47, "learning_rate": 2.8056823947234903e-07, "logits/chosen": -1.7602002620697021, "logits/rejected": -1.6599136590957642, "logps/chosen": -57.8088493347168, "logps/rejected": -83.03340911865234, "loss": 0.0082, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.2695609331130981, "rewards/margins": 11.275420188903809, "rewards/rejected": -12.544981002807617, "step": 1040 }, { "epoch": 0.48, "learning_rate": 2.800608828006088e-07, "logits/chosen": -1.7425765991210938, "logits/rejected": -1.6693546772003174, "logps/chosen": -54.41240310668945, "logps/rejected": -84.59600830078125, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -1.3567270040512085, "rewards/margins": 11.407785415649414, "rewards/rejected": -12.76451301574707, "step": 1050 }, { "epoch": 0.48, "learning_rate": 2.7955352612886856e-07, "logits/chosen": -1.7381328344345093, "logits/rejected": -1.6626803874969482, "logps/chosen": -55.707252502441406, "logps/rejected": -82.98629760742188, "loss": 0.0086, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.7733808755874634, "rewards/margins": 11.287147521972656, "rewards/rejected": -13.060528755187988, "step": 1060 }, { "epoch": 0.49, "learning_rate": 2.7904616945712836e-07, "logits/chosen": -1.7508115768432617, "logits/rejected": -1.6619138717651367, "logps/chosen": -57.57696533203125, "logps/rejected": -82.67385864257812, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -1.5290658473968506, "rewards/margins": 12.146486282348633, "rewards/rejected": -13.675552368164062, "step": 1070 }, { "epoch": 0.49, "learning_rate": 2.785388127853881e-07, "logits/chosen": -1.738328218460083, "logits/rejected": -1.6436212062835693, "logps/chosen": -54.58185577392578, "logps/rejected": -80.33912658691406, "loss": 0.0073, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.134696364402771, "rewards/margins": 11.768366813659668, "rewards/rejected": -12.90306282043457, "step": 1080 }, { "epoch": 0.5, "learning_rate": 2.780314561136479e-07, "logits/chosen": -1.7240383625030518, "logits/rejected": -1.6409051418304443, "logps/chosen": -55.646934509277344, "logps/rejected": -87.40071105957031, "loss": 0.0027, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.4618747234344482, "rewards/margins": 12.264945983886719, "rewards/rejected": -13.726821899414062, "step": 1090 }, { "epoch": 0.5, "learning_rate": 2.7752409944190763e-07, "logits/chosen": -1.7554023265838623, "logits/rejected": -1.6604242324829102, "logps/chosen": -57.77976608276367, "logps/rejected": -83.35955810546875, "loss": 0.0107, "rewards/accuracies": 1.0, "rewards/chosen": -1.751634955406189, "rewards/margins": 11.482076644897461, "rewards/rejected": -13.233711242675781, "step": 1100 }, { "epoch": 0.5, "eval_logits/chosen": -1.5333517789840698, "eval_logits/rejected": -1.4505860805511475, "eval_logps/chosen": -81.65863037109375, "eval_logps/rejected": -86.0610580444336, "eval_loss": 0.011035408824682236, "eval_rewards/accuracies": 0.9888268113136292, "eval_rewards/chosen": -2.838331937789917, "eval_rewards/margins": 10.240079879760742, "eval_rewards/rejected": -13.078413009643555, "eval_runtime": 146.4725, "eval_samples_per_second": 19.54, "eval_steps_per_second": 1.222, "step": 1100 }, { "epoch": 0.51, "learning_rate": 2.770167427701674e-07, "logits/chosen": -1.749803900718689, "logits/rejected": -1.6486237049102783, "logps/chosen": -58.17445755004883, "logps/rejected": -84.91439056396484, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -0.9937120676040649, "rewards/margins": 11.985600471496582, "rewards/rejected": -12.9793119430542, "step": 1110 }, { "epoch": 0.51, "learning_rate": 2.7650938609842716e-07, "logits/chosen": -1.7548671960830688, "logits/rejected": -1.6473737955093384, "logps/chosen": -56.917808532714844, "logps/rejected": -81.3094711303711, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -1.2224924564361572, "rewards/margins": 11.221006393432617, "rewards/rejected": -12.443498611450195, "step": 1120 }, { "epoch": 0.52, "learning_rate": 2.7600202942668696e-07, "logits/chosen": -1.757595419883728, "logits/rejected": -1.6543792486190796, "logps/chosen": -62.02849578857422, "logps/rejected": -84.46827697753906, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -1.3594145774841309, "rewards/margins": 11.177923202514648, "rewards/rejected": -12.537336349487305, "step": 1130 }, { "epoch": 0.52, "learning_rate": 2.754946727549467e-07, "logits/chosen": -1.7476240396499634, "logits/rejected": -1.6613715887069702, "logps/chosen": -58.78446578979492, "logps/rejected": -89.38286590576172, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -1.4613271951675415, "rewards/margins": 11.705554008483887, "rewards/rejected": -13.16688060760498, "step": 1140 }, { "epoch": 0.52, "learning_rate": 2.749873160832065e-07, "logits/chosen": -1.7503944635391235, "logits/rejected": -1.6537967920303345, "logps/chosen": -54.70173263549805, "logps/rejected": -82.47148132324219, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -0.9892008900642395, "rewards/margins": 12.35951042175293, "rewards/rejected": -13.348711013793945, "step": 1150 }, { "epoch": 0.53, "learning_rate": 2.7447995941146623e-07, "logits/chosen": -1.7634122371673584, "logits/rejected": -1.6714951992034912, "logps/chosen": -60.8097038269043, "logps/rejected": -87.31422424316406, "loss": 0.0108, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -1.9673036336898804, "rewards/margins": 12.482806205749512, "rewards/rejected": -14.450109481811523, "step": 1160 }, { "epoch": 0.53, "learning_rate": 2.73972602739726e-07, "logits/chosen": -1.7594560384750366, "logits/rejected": -1.6858173608779907, "logps/chosen": -55.721031188964844, "logps/rejected": -90.5, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -1.6576344966888428, "rewards/margins": 12.7664213180542, "rewards/rejected": -14.424057006835938, "step": 1170 }, { "epoch": 0.54, "learning_rate": 2.7346524606798576e-07, "logits/chosen": -1.7588996887207031, "logits/rejected": -1.6747125387191772, "logps/chosen": -57.83014678955078, "logps/rejected": -88.86564636230469, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -1.9523566961288452, "rewards/margins": 12.802734375, "rewards/rejected": -14.755091667175293, "step": 1180 }, { "epoch": 0.54, "learning_rate": 2.7295788939624556e-07, "logits/chosen": -1.7592281103134155, "logits/rejected": -1.6504974365234375, "logps/chosen": -58.18562698364258, "logps/rejected": -86.49019622802734, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/chosen": -1.525193452835083, "rewards/margins": 12.773050308227539, "rewards/rejected": -14.298243522644043, "step": 1190 }, { "epoch": 0.55, "learning_rate": 2.724505327245053e-07, "logits/chosen": -1.7617738246917725, "logits/rejected": -1.666863203048706, "logps/chosen": -59.43085861206055, "logps/rejected": -85.90283203125, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": -1.331127405166626, "rewards/margins": 12.19595718383789, "rewards/rejected": -13.527084350585938, "step": 1200 }, { "epoch": 0.55, "eval_logits/chosen": -1.544122576713562, "eval_logits/rejected": -1.4603493213653564, "eval_logps/chosen": -82.71837615966797, "eval_logps/rejected": -87.875732421875, "eval_loss": 0.013027088716626167, "eval_rewards/accuracies": 0.9860334992408752, "eval_rewards/chosen": -3.368196725845337, "eval_rewards/margins": 10.61755084991455, "eval_rewards/rejected": -13.985747337341309, "eval_runtime": 148.8985, "eval_samples_per_second": 19.221, "eval_steps_per_second": 1.202, "step": 1200 }, { "epoch": 0.55, "learning_rate": 2.719431760527651e-07, "logits/chosen": -1.7558351755142212, "logits/rejected": -1.6655076742172241, "logps/chosen": -56.12122344970703, "logps/rejected": -84.51030731201172, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -1.5430134534835815, "rewards/margins": 12.190717697143555, "rewards/rejected": -13.733731269836426, "step": 1210 }, { "epoch": 0.56, "learning_rate": 2.7143581938102483e-07, "logits/chosen": -1.7553596496582031, "logits/rejected": -1.6564127206802368, "logps/chosen": -56.277931213378906, "logps/rejected": -83.83023834228516, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -1.0945639610290527, "rewards/margins": 12.596158981323242, "rewards/rejected": -13.69072151184082, "step": 1220 }, { "epoch": 0.56, "learning_rate": 2.709284627092846e-07, "logits/chosen": -1.742206335067749, "logits/rejected": -1.6503639221191406, "logps/chosen": -58.675819396972656, "logps/rejected": -89.0281982421875, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -1.533455491065979, "rewards/margins": 13.204513549804688, "rewards/rejected": -14.737970352172852, "step": 1230 }, { "epoch": 0.57, "learning_rate": 2.7042110603754436e-07, "logits/chosen": -1.7599554061889648, "logits/rejected": -1.6664212942123413, "logps/chosen": -62.36484909057617, "logps/rejected": -86.59840393066406, "loss": 0.0068, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.790294885635376, "rewards/margins": 12.219128608703613, "rewards/rejected": -14.009424209594727, "step": 1240 }, { "epoch": 0.57, "learning_rate": 2.6991374936580416e-07, "logits/chosen": -1.7607377767562866, "logits/rejected": -1.6740095615386963, "logps/chosen": -58.570457458496094, "logps/rejected": -90.27938842773438, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -1.5715006589889526, "rewards/margins": 13.148752212524414, "rewards/rejected": -14.720251083374023, "step": 1250 }, { "epoch": 0.58, "learning_rate": 2.694063926940639e-07, "logits/chosen": -1.7583293914794922, "logits/rejected": -1.656664252281189, "logps/chosen": -61.866050720214844, "logps/rejected": -85.34736633300781, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -1.522429347038269, "rewards/margins": 12.270319938659668, "rewards/rejected": -13.792750358581543, "step": 1260 }, { "epoch": 0.58, "learning_rate": 2.688990360223237e-07, "logits/chosen": -1.7451664209365845, "logits/rejected": -1.6569017171859741, "logps/chosen": -54.42702102661133, "logps/rejected": -87.1197509765625, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -1.1746667623519897, "rewards/margins": 12.838618278503418, "rewards/rejected": -14.013284683227539, "step": 1270 }, { "epoch": 0.58, "learning_rate": 2.6839167935058343e-07, "logits/chosen": -1.7624620199203491, "logits/rejected": -1.6860120296478271, "logps/chosen": -56.553749084472656, "logps/rejected": -87.63340759277344, "loss": 0.0103, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.627769112586975, "rewards/margins": 13.027348518371582, "rewards/rejected": -14.655117988586426, "step": 1280 }, { "epoch": 0.59, "learning_rate": 2.678843226788432e-07, "logits/chosen": -1.7679738998413086, "logits/rejected": -1.6850473880767822, "logps/chosen": -55.26356887817383, "logps/rejected": -90.05108642578125, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -1.4258941411972046, "rewards/margins": 13.58891487121582, "rewards/rejected": -15.014808654785156, "step": 1290 }, { "epoch": 0.59, "learning_rate": 2.6737696600710296e-07, "logits/chosen": -1.7583335638046265, "logits/rejected": -1.6575883626937866, "logps/chosen": -59.30186080932617, "logps/rejected": -90.12716674804688, "loss": 0.0054, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.90740168094635, "rewards/margins": 13.73097038269043, "rewards/rejected": -15.638374328613281, "step": 1300 }, { "epoch": 0.59, "eval_logits/chosen": -1.5403498411178589, "eval_logits/rejected": -1.4576407670974731, "eval_logps/chosen": -83.19163513183594, "eval_logps/rejected": -89.70408630371094, "eval_loss": 0.0123166274279356, "eval_rewards/accuracies": 0.9888268113136292, "eval_rewards/chosen": -3.604830503463745, "eval_rewards/margins": 11.295095443725586, "eval_rewards/rejected": -14.899927139282227, "eval_runtime": 166.689, "eval_samples_per_second": 17.17, "eval_steps_per_second": 1.074, "step": 1300 }, { "epoch": 0.6, "learning_rate": 2.6686960933536276e-07, "logits/chosen": -1.7511463165283203, "logits/rejected": -1.6742098331451416, "logps/chosen": -56.27610397338867, "logps/rejected": -87.4818344116211, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -1.8156147003173828, "rewards/margins": 13.021186828613281, "rewards/rejected": -14.836801528930664, "step": 1310 }, { "epoch": 0.6, "learning_rate": 2.663622526636225e-07, "logits/chosen": -1.749943733215332, "logits/rejected": -1.6831376552581787, "logps/chosen": -55.67545700073242, "logps/rejected": -88.80065155029297, "loss": 0.0103, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -2.1307406425476074, "rewards/margins": 12.877218246459961, "rewards/rejected": -15.007959365844727, "step": 1320 }, { "epoch": 0.61, "learning_rate": 2.658548959918823e-07, "logits/chosen": -1.7757232189178467, "logits/rejected": -1.6820189952850342, "logps/chosen": -62.95960235595703, "logps/rejected": -90.3966293334961, "loss": 0.0057, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.706180214881897, "rewards/margins": 13.415826797485352, "rewards/rejected": -15.1220064163208, "step": 1330 }, { "epoch": 0.61, "learning_rate": 2.6534753932014203e-07, "logits/chosen": -1.7566606998443604, "logits/rejected": -1.6727615594863892, "logps/chosen": -55.65472412109375, "logps/rejected": -86.75607299804688, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -1.4121570587158203, "rewards/margins": 13.099316596984863, "rewards/rejected": -14.511472702026367, "step": 1340 }, { "epoch": 0.62, "learning_rate": 2.648401826484018e-07, "logits/chosen": -1.7660220861434937, "logits/rejected": -1.6494724750518799, "logps/chosen": -60.838462829589844, "logps/rejected": -88.93028259277344, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -1.3854892253875732, "rewards/margins": 13.644729614257812, "rewards/rejected": -15.030218124389648, "step": 1350 }, { "epoch": 0.62, "learning_rate": 2.6433282597666156e-07, "logits/chosen": -1.7497011423110962, "logits/rejected": -1.646283507347107, "logps/chosen": -60.607208251953125, "logps/rejected": -86.80403900146484, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.3235530853271484, "rewards/margins": 13.664321899414062, "rewards/rejected": -14.987874031066895, "step": 1360 }, { "epoch": 0.63, "learning_rate": 2.6382546930492135e-07, "logits/chosen": -1.7312263250350952, "logits/rejected": -1.6483138799667358, "logps/chosen": -55.373199462890625, "logps/rejected": -89.51940155029297, "loss": 0.0078, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.6450560092926025, "rewards/margins": 13.383737564086914, "rewards/rejected": -15.02879524230957, "step": 1370 }, { "epoch": 0.63, "learning_rate": 2.633181126331811e-07, "logits/chosen": -1.7593389749526978, "logits/rejected": -1.690319299697876, "logps/chosen": -52.8946647644043, "logps/rejected": -86.19906616210938, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -1.703823447227478, "rewards/margins": 12.947980880737305, "rewards/rejected": -14.651806831359863, "step": 1380 }, { "epoch": 0.63, "learning_rate": 2.628107559614409e-07, "logits/chosen": -1.7741191387176514, "logits/rejected": -1.6717126369476318, "logps/chosen": -59.35906982421875, "logps/rejected": -84.93653869628906, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -1.5824410915374756, "rewards/margins": 12.609251022338867, "rewards/rejected": -14.191693305969238, "step": 1390 }, { "epoch": 0.64, "learning_rate": 2.6230339928970063e-07, "logits/chosen": -1.7360626459121704, "logits/rejected": -1.667319655418396, "logps/chosen": -55.236122131347656, "logps/rejected": -87.0063705444336, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -1.5939099788665771, "rewards/margins": 13.322172164916992, "rewards/rejected": -14.916082382202148, "step": 1400 }, { "epoch": 0.64, "eval_logits/chosen": -1.54180109500885, "eval_logits/rejected": -1.4597798585891724, "eval_logps/chosen": -82.61723327636719, "eval_logps/rejected": -90.00525665283203, "eval_loss": 0.009081164374947548, "eval_rewards/accuracies": 0.9860334992408752, "eval_rewards/chosen": -3.3176283836364746, "eval_rewards/margins": 11.732884407043457, "eval_rewards/rejected": -15.050512313842773, "eval_runtime": 149.6939, "eval_samples_per_second": 19.119, "eval_steps_per_second": 1.196, "step": 1400 }, { "epoch": 0.64, "learning_rate": 2.617960426179604e-07, "logits/chosen": -1.77938973903656, "logits/rejected": -1.6862428188323975, "logps/chosen": -60.312339782714844, "logps/rejected": -90.48917388916016, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -1.3172271251678467, "rewards/margins": 13.71961784362793, "rewards/rejected": -15.036844253540039, "step": 1410 }, { "epoch": 0.65, "learning_rate": 2.6128868594622016e-07, "logits/chosen": -1.7564280033111572, "logits/rejected": -1.6784827709197998, "logps/chosen": -56.887779235839844, "logps/rejected": -87.00199890136719, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -1.7040510177612305, "rewards/margins": 13.034492492675781, "rewards/rejected": -14.738543510437012, "step": 1420 }, { "epoch": 0.65, "learning_rate": 2.6078132927447995e-07, "logits/chosen": -1.7650423049926758, "logits/rejected": -1.6766407489776611, "logps/chosen": -57.2898063659668, "logps/rejected": -85.12565612792969, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -1.521803617477417, "rewards/margins": 13.181356430053711, "rewards/rejected": -14.703161239624023, "step": 1430 }, { "epoch": 0.66, "learning_rate": 2.602739726027397e-07, "logits/chosen": -1.7596886157989502, "logits/rejected": -1.654207468032837, "logps/chosen": -56.0603141784668, "logps/rejected": -87.854736328125, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -1.3883931636810303, "rewards/margins": 13.825471878051758, "rewards/rejected": -15.21386432647705, "step": 1440 }, { "epoch": 0.66, "learning_rate": 2.597666159309995e-07, "logits/chosen": -1.7648146152496338, "logits/rejected": -1.67717707157135, "logps/chosen": -56.35142135620117, "logps/rejected": -87.81778717041016, "loss": 0.0092, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.6936101913452148, "rewards/margins": 13.936601638793945, "rewards/rejected": -15.630210876464844, "step": 1450 }, { "epoch": 0.67, "learning_rate": 2.5925925925925923e-07, "logits/chosen": -1.7566111087799072, "logits/rejected": -1.6657116413116455, "logps/chosen": -58.238059997558594, "logps/rejected": -87.4339370727539, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -1.4842593669891357, "rewards/margins": 12.764392852783203, "rewards/rejected": -14.248652458190918, "step": 1460 }, { "epoch": 0.67, "learning_rate": 2.58751902587519e-07, "logits/chosen": -1.777064561843872, "logits/rejected": -1.6611353158950806, "logps/chosen": -58.111549377441406, "logps/rejected": -84.71275329589844, "loss": 0.0059, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.0342987775802612, "rewards/margins": 13.157496452331543, "rewards/rejected": -14.191795349121094, "step": 1470 }, { "epoch": 0.68, "learning_rate": 2.5824454591577876e-07, "logits/chosen": -1.7620245218276978, "logits/rejected": -1.662798523902893, "logps/chosen": -57.60813522338867, "logps/rejected": -85.7889175415039, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.4033831357955933, "rewards/margins": 13.611114501953125, "rewards/rejected": -15.014495849609375, "step": 1480 }, { "epoch": 0.68, "learning_rate": 2.5773718924403855e-07, "logits/chosen": -1.7630466222763062, "logits/rejected": -1.6711034774780273, "logps/chosen": -55.47661590576172, "logps/rejected": -86.72578430175781, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -1.5360097885131836, "rewards/margins": 13.896036148071289, "rewards/rejected": -15.432044982910156, "step": 1490 }, { "epoch": 0.68, "learning_rate": 2.572298325722983e-07, "logits/chosen": -1.7507346868515015, "logits/rejected": -1.6613788604736328, "logps/chosen": -60.52473068237305, "logps/rejected": -88.97389221191406, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -1.761684775352478, "rewards/margins": 13.575399398803711, "rewards/rejected": -15.33708667755127, "step": 1500 }, { "epoch": 0.68, "eval_logits/chosen": -1.549391269683838, "eval_logits/rejected": -1.467103362083435, "eval_logps/chosen": -82.59818267822266, "eval_logps/rejected": -91.0326919555664, "eval_loss": 0.008652918040752411, "eval_rewards/accuracies": 0.9860334992408752, "eval_rewards/chosen": -3.308105707168579, "eval_rewards/margins": 12.256126403808594, "eval_rewards/rejected": -15.564231872558594, "eval_runtime": 155.0871, "eval_samples_per_second": 18.454, "eval_steps_per_second": 1.154, "step": 1500 }, { "epoch": 0.69, "learning_rate": 2.567224759005581e-07, "logits/chosen": -1.7494996786117554, "logits/rejected": -1.6485868692398071, "logps/chosen": -58.14558792114258, "logps/rejected": -89.62889862060547, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -1.5420677661895752, "rewards/margins": 14.456304550170898, "rewards/rejected": -15.998372077941895, "step": 1510 }, { "epoch": 0.69, "learning_rate": 2.5621511922881783e-07, "logits/chosen": -1.7610740661621094, "logits/rejected": -1.6540189981460571, "logps/chosen": -57.69807052612305, "logps/rejected": -91.73440551757812, "loss": 0.0105, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.6863176822662354, "rewards/margins": 14.371050834655762, "rewards/rejected": -16.057369232177734, "step": 1520 }, { "epoch": 0.7, "learning_rate": 2.557077625570776e-07, "logits/chosen": -1.7764087915420532, "logits/rejected": -1.6975984573364258, "logps/chosen": -54.03931427001953, "logps/rejected": -88.61404418945312, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -1.407801866531372, "rewards/margins": 13.469766616821289, "rewards/rejected": -14.877568244934082, "step": 1530 }, { "epoch": 0.7, "learning_rate": 2.5520040588533736e-07, "logits/chosen": -1.7674833536148071, "logits/rejected": -1.6577078104019165, "logps/chosen": -59.08375930786133, "logps/rejected": -85.60472106933594, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -0.8764028549194336, "rewards/margins": 13.601232528686523, "rewards/rejected": -14.477635383605957, "step": 1540 }, { "epoch": 0.71, "learning_rate": 2.5469304921359715e-07, "logits/chosen": -1.7571929693222046, "logits/rejected": -1.6718075275421143, "logps/chosen": -56.97377395629883, "logps/rejected": -88.74199676513672, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -1.215511679649353, "rewards/margins": 13.31176471710205, "rewards/rejected": -14.527276992797852, "step": 1550 }, { "epoch": 0.71, "learning_rate": 2.541856925418569e-07, "logits/chosen": -1.7460705041885376, "logits/rejected": -1.6669387817382812, "logps/chosen": -55.67962646484375, "logps/rejected": -87.57090759277344, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -1.260811686515808, "rewards/margins": 13.849576950073242, "rewards/rejected": -15.110387802124023, "step": 1560 }, { "epoch": 0.72, "learning_rate": 2.536783358701167e-07, "logits/chosen": -1.7633936405181885, "logits/rejected": -1.656254529953003, "logps/chosen": -57.3261604309082, "logps/rejected": -88.50245666503906, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -1.2457377910614014, "rewards/margins": 13.728734970092773, "rewards/rejected": -14.974472045898438, "step": 1570 }, { "epoch": 0.72, "learning_rate": 2.5317097919837643e-07, "logits/chosen": -1.745391845703125, "logits/rejected": -1.672279715538025, "logps/chosen": -57.052711486816406, "logps/rejected": -92.32408142089844, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -1.403147578239441, "rewards/margins": 14.200845718383789, "rewards/rejected": -15.60399341583252, "step": 1580 }, { "epoch": 0.73, "learning_rate": 2.526636225266362e-07, "logits/chosen": -1.7615629434585571, "logits/rejected": -1.6586211919784546, "logps/chosen": -58.931182861328125, "logps/rejected": -93.81259155273438, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -1.3059899806976318, "rewards/margins": 14.414894104003906, "rewards/rejected": -15.720884323120117, "step": 1590 }, { "epoch": 0.73, "learning_rate": 2.5215626585489596e-07, "logits/chosen": -1.7530238628387451, "logits/rejected": -1.6644384860992432, "logps/chosen": -54.5128173828125, "logps/rejected": -87.1929702758789, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -1.7006027698516846, "rewards/margins": 13.703947067260742, "rewards/rejected": -15.404550552368164, "step": 1600 }, { "epoch": 0.73, "eval_logits/chosen": -1.5559909343719482, "eval_logits/rejected": -1.4721708297729492, "eval_logps/chosen": -83.0450668334961, "eval_logps/rejected": -92.46697235107422, "eval_loss": 0.00909368135035038, "eval_rewards/accuracies": 0.9860334992408752, "eval_rewards/chosen": -3.5315442085266113, "eval_rewards/margins": 12.749824523925781, "eval_rewards/rejected": -16.2813663482666, "eval_runtime": 153.9516, "eval_samples_per_second": 18.59, "eval_steps_per_second": 1.163, "step": 1600 }, { "epoch": 0.73, "learning_rate": 2.5164890918315575e-07, "logits/chosen": -1.752454400062561, "logits/rejected": -1.6705706119537354, "logps/chosen": -57.991455078125, "logps/rejected": -91.4581527709961, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -2.0661113262176514, "rewards/margins": 14.307818412780762, "rewards/rejected": -16.373929977416992, "step": 1610 }, { "epoch": 0.74, "learning_rate": 2.511415525114155e-07, "logits/chosen": -1.7848793268203735, "logits/rejected": -1.7035505771636963, "logps/chosen": -56.839599609375, "logps/rejected": -88.28500366210938, "loss": 0.0037, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -2.018306255340576, "rewards/margins": 13.947771072387695, "rewards/rejected": -15.96607780456543, "step": 1620 }, { "epoch": 0.74, "learning_rate": 2.506341958396753e-07, "logits/chosen": -1.776012659072876, "logits/rejected": -1.6706695556640625, "logps/chosen": -58.133758544921875, "logps/rejected": -88.62023162841797, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -1.8454831838607788, "rewards/margins": 14.76240062713623, "rewards/rejected": -16.60788345336914, "step": 1630 }, { "epoch": 0.75, "learning_rate": 2.5012683916793503e-07, "logits/chosen": -1.780495047569275, "logits/rejected": -1.6756473779678345, "logps/chosen": -63.57946014404297, "logps/rejected": -92.46826171875, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -2.026796579360962, "rewards/margins": 14.724100112915039, "rewards/rejected": -16.75089454650879, "step": 1640 }, { "epoch": 0.75, "learning_rate": 2.496194824961948e-07, "logits/chosen": -1.759849190711975, "logits/rejected": -1.6756088733673096, "logps/chosen": -56.996368408203125, "logps/rejected": -93.39848327636719, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -1.910292625427246, "rewards/margins": 15.01073932647705, "rewards/rejected": -16.921031951904297, "step": 1650 }, { "epoch": 0.76, "learning_rate": 2.4911212582445456e-07, "logits/chosen": -1.7654517889022827, "logits/rejected": -1.6813764572143555, "logps/chosen": -55.60560989379883, "logps/rejected": -92.54562377929688, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -1.3576942682266235, "rewards/margins": 15.633010864257812, "rewards/rejected": -16.990703582763672, "step": 1660 }, { "epoch": 0.76, "learning_rate": 2.4860476915271435e-07, "logits/chosen": -1.767221450805664, "logits/rejected": -1.6690009832382202, "logps/chosen": -54.5999870300293, "logps/rejected": -93.59965515136719, "loss": 0.0123, "rewards/accuracies": 1.0, "rewards/chosen": -0.9985862970352173, "rewards/margins": 15.221014022827148, "rewards/rejected": -16.219600677490234, "step": 1670 }, { "epoch": 0.77, "learning_rate": 2.480974124809741e-07, "logits/chosen": -1.7726835012435913, "logits/rejected": -1.6497215032577515, "logps/chosen": -58.1947021484375, "logps/rejected": -90.62200164794922, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -0.9589114189147949, "rewards/margins": 14.714686393737793, "rewards/rejected": -15.673599243164062, "step": 1680 }, { "epoch": 0.77, "learning_rate": 2.475900558092339e-07, "logits/chosen": -1.7549190521240234, "logits/rejected": -1.6590404510498047, "logps/chosen": -58.07526779174805, "logps/rejected": -87.36026763916016, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -1.1645383834838867, "rewards/margins": 14.172067642211914, "rewards/rejected": -15.3366060256958, "step": 1690 }, { "epoch": 0.78, "learning_rate": 2.4708269913749363e-07, "logits/chosen": -1.768355131149292, "logits/rejected": -1.6747773885726929, "logps/chosen": -57.80949783325195, "logps/rejected": -87.95934295654297, "loss": 0.0035, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.4355525970458984, "rewards/margins": 14.236753463745117, "rewards/rejected": -15.672307014465332, "step": 1700 }, { "epoch": 0.78, "eval_logits/chosen": -1.5480554103851318, "eval_logits/rejected": -1.466413140296936, "eval_logps/chosen": -82.27859497070312, "eval_logps/rejected": -91.71222686767578, "eval_loss": 0.007809256669133902, "eval_rewards/accuracies": 0.9916201233863831, "eval_rewards/chosen": -3.1483092308044434, "eval_rewards/margins": 12.755687713623047, "eval_rewards/rejected": -15.903995513916016, "eval_runtime": 150.663, "eval_samples_per_second": 18.996, "eval_steps_per_second": 1.188, "step": 1700 }, { "epoch": 0.78, "learning_rate": 2.465753424657534e-07, "logits/chosen": -1.7380459308624268, "logits/rejected": -1.644565224647522, "logps/chosen": -56.472679138183594, "logps/rejected": -91.74127960205078, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.2027267217636108, "rewards/margins": 14.890820503234863, "rewards/rejected": -16.093547821044922, "step": 1710 }, { "epoch": 0.79, "learning_rate": 2.4606798579401316e-07, "logits/chosen": -1.7520453929901123, "logits/rejected": -1.6536376476287842, "logps/chosen": -61.30470657348633, "logps/rejected": -90.20903778076172, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -1.4567862749099731, "rewards/margins": 14.035242080688477, "rewards/rejected": -15.492027282714844, "step": 1720 }, { "epoch": 0.79, "learning_rate": 2.4556062912227295e-07, "logits/chosen": -1.7642204761505127, "logits/rejected": -1.6550449132919312, "logps/chosen": -56.6153450012207, "logps/rejected": -85.96258544921875, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -1.2349016666412354, "rewards/margins": 14.066560745239258, "rewards/rejected": -15.301460266113281, "step": 1730 }, { "epoch": 0.79, "learning_rate": 2.450532724505327e-07, "logits/chosen": -1.7625477313995361, "logits/rejected": -1.6494197845458984, "logps/chosen": -58.4631462097168, "logps/rejected": -90.04280853271484, "loss": 0.0056, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.411668300628662, "rewards/margins": 14.800588607788086, "rewards/rejected": -16.21225929260254, "step": 1740 }, { "epoch": 0.8, "learning_rate": 2.445459157787925e-07, "logits/chosen": -1.7740110158920288, "logits/rejected": -1.6718931198120117, "logps/chosen": -56.41655731201172, "logps/rejected": -88.89642333984375, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -0.9772757291793823, "rewards/margins": 14.739130020141602, "rewards/rejected": -15.716405868530273, "step": 1750 }, { "epoch": 0.8, "learning_rate": 2.4403855910705223e-07, "logits/chosen": -1.7601970434188843, "logits/rejected": -1.6815147399902344, "logps/chosen": -55.41786575317383, "logps/rejected": -90.36604309082031, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -1.3811933994293213, "rewards/margins": 14.588228225708008, "rewards/rejected": -15.96942138671875, "step": 1760 }, { "epoch": 0.81, "learning_rate": 2.43531202435312e-07, "logits/chosen": -1.773404836654663, "logits/rejected": -1.6569006443023682, "logps/chosen": -57.115379333496094, "logps/rejected": -87.33353424072266, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.131402850151062, "rewards/margins": 14.441953659057617, "rewards/rejected": -15.573358535766602, "step": 1770 }, { "epoch": 0.81, "learning_rate": 2.4302384576357176e-07, "logits/chosen": -1.7684681415557861, "logits/rejected": -1.6918935775756836, "logps/chosen": -59.896827697753906, "logps/rejected": -94.4011001586914, "loss": 0.0013, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.6811656951904297, "rewards/margins": 15.493830680847168, "rewards/rejected": -17.174999237060547, "step": 1780 }, { "epoch": 0.82, "learning_rate": 2.4251648909183155e-07, "logits/chosen": -1.7643531560897827, "logits/rejected": -1.678773283958435, "logps/chosen": -57.124267578125, "logps/rejected": -93.88394927978516, "loss": 0.0052, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -1.4721136093139648, "rewards/margins": 15.75408935546875, "rewards/rejected": -17.2262020111084, "step": 1790 }, { "epoch": 0.82, "learning_rate": 2.420091324200913e-07, "logits/chosen": -1.7517344951629639, "logits/rejected": -1.6720161437988281, "logps/chosen": -60.31678009033203, "logps/rejected": -93.91047668457031, "loss": 0.0094, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -2.1854450702667236, "rewards/margins": 14.294087409973145, "rewards/rejected": -16.479534149169922, "step": 1800 }, { "epoch": 0.82, "eval_logits/chosen": -1.5389713048934937, "eval_logits/rejected": -1.4571911096572876, "eval_logps/chosen": -81.96665954589844, "eval_logps/rejected": -91.73914337158203, "eval_loss": 0.0071415891870856285, "eval_rewards/accuracies": 0.9888268113136292, "eval_rewards/chosen": -2.992342710494995, "eval_rewards/margins": 12.925115585327148, "eval_rewards/rejected": -15.917457580566406, "eval_runtime": 165.3945, "eval_samples_per_second": 17.304, "eval_steps_per_second": 1.082, "step": 1800 }, { "epoch": 0.83, "learning_rate": 2.415017757483511e-07, "logits/chosen": -1.7610645294189453, "logits/rejected": -1.6734730005264282, "logps/chosen": -58.1474494934082, "logps/rejected": -91.43634033203125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.2573091983795166, "rewards/margins": 15.121930122375488, "rewards/rejected": -16.37923812866211, "step": 1810 }, { "epoch": 0.83, "learning_rate": 2.409944190766108e-07, "logits/chosen": -1.7458274364471436, "logits/rejected": -1.6527506113052368, "logps/chosen": -57.40966796875, "logps/rejected": -93.22349548339844, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -1.624754548072815, "rewards/margins": 15.039660453796387, "rewards/rejected": -16.66441535949707, "step": 1820 }, { "epoch": 0.84, "learning_rate": 2.404870624048706e-07, "logits/chosen": -1.7511749267578125, "logits/rejected": -1.666269302368164, "logps/chosen": -58.3302001953125, "logps/rejected": -90.82698059082031, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.2160614728927612, "rewards/margins": 14.482437133789062, "rewards/rejected": -15.69849681854248, "step": 1830 }, { "epoch": 0.84, "learning_rate": 2.3997970573313036e-07, "logits/chosen": -1.7466942071914673, "logits/rejected": -1.659889578819275, "logps/chosen": -55.64778518676758, "logps/rejected": -86.6668701171875, "loss": 0.0026, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.3703393936157227, "rewards/margins": 14.488919258117676, "rewards/rejected": -15.859257698059082, "step": 1840 }, { "epoch": 0.84, "learning_rate": 2.3947234906139015e-07, "logits/chosen": -1.7588704824447632, "logits/rejected": -1.664973497390747, "logps/chosen": -58.49451446533203, "logps/rejected": -91.96329498291016, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -1.37942636013031, "rewards/margins": 15.162734985351562, "rewards/rejected": -16.542163848876953, "step": 1850 }, { "epoch": 0.85, "learning_rate": 2.389649923896499e-07, "logits/chosen": -1.7728341817855835, "logits/rejected": -1.6813457012176514, "logps/chosen": -55.9604377746582, "logps/rejected": -89.51544952392578, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -1.612262487411499, "rewards/margins": 14.787466049194336, "rewards/rejected": -16.399730682373047, "step": 1860 }, { "epoch": 0.85, "learning_rate": 2.384576357179097e-07, "logits/chosen": -1.7437846660614014, "logits/rejected": -1.6534225940704346, "logps/chosen": -56.951820373535156, "logps/rejected": -91.35940551757812, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -1.487562894821167, "rewards/margins": 14.857897758483887, "rewards/rejected": -16.345458984375, "step": 1870 }, { "epoch": 0.86, "learning_rate": 2.3795027904616943e-07, "logits/chosen": -1.7715113162994385, "logits/rejected": -1.6715120077133179, "logps/chosen": -59.91661834716797, "logps/rejected": -89.1376953125, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -1.7439546585083008, "rewards/margins": 15.043264389038086, "rewards/rejected": -16.787220001220703, "step": 1880 }, { "epoch": 0.86, "learning_rate": 2.374429223744292e-07, "logits/chosen": -1.7696430683135986, "logits/rejected": -1.689231514930725, "logps/chosen": -56.52924346923828, "logps/rejected": -90.79057312011719, "loss": 0.0051, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.5264157056808472, "rewards/margins": 14.896077156066895, "rewards/rejected": -16.4224910736084, "step": 1890 }, { "epoch": 0.87, "learning_rate": 2.3693556570268896e-07, "logits/chosen": -1.769017219543457, "logits/rejected": -1.684442162513733, "logps/chosen": -56.181922912597656, "logps/rejected": -93.59736633300781, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -1.1465259790420532, "rewards/margins": 15.11158561706543, "rewards/rejected": -16.258113861083984, "step": 1900 }, { "epoch": 0.87, "eval_logits/chosen": -1.551081657409668, "eval_logits/rejected": -1.4689964056015015, "eval_logps/chosen": -81.9542007446289, "eval_logps/rejected": -92.96185302734375, "eval_loss": 0.006640641484409571, "eval_rewards/accuracies": 0.9916201233863831, "eval_rewards/chosen": -2.986114025115967, "eval_rewards/margins": 13.542698860168457, "eval_rewards/rejected": -16.5288143157959, "eval_runtime": 132.2326, "eval_samples_per_second": 21.644, "eval_steps_per_second": 1.354, "step": 1900 }, { "epoch": 0.87, "learning_rate": 2.3642820903094873e-07, "logits/chosen": -1.7713333368301392, "logits/rejected": -1.6674282550811768, "logps/chosen": -58.21760177612305, "logps/rejected": -97.25029754638672, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -1.0102754831314087, "rewards/margins": 16.225215911865234, "rewards/rejected": -17.235490798950195, "step": 1910 }, { "epoch": 0.88, "learning_rate": 2.359208523592085e-07, "logits/chosen": -1.7865272760391235, "logits/rejected": -1.6899009943008423, "logps/chosen": -57.2186164855957, "logps/rejected": -89.74848175048828, "loss": 0.0134, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.849109411239624, "rewards/margins": 14.458150863647461, "rewards/rejected": -16.307260513305664, "step": 1920 }, { "epoch": 0.88, "learning_rate": 2.3541349568746826e-07, "logits/chosen": -1.7519928216934204, "logits/rejected": -1.6507072448730469, "logps/chosen": -54.916954040527344, "logps/rejected": -85.49049377441406, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -1.1282603740692139, "rewards/margins": 14.200277328491211, "rewards/rejected": -15.328536987304688, "step": 1930 }, { "epoch": 0.89, "learning_rate": 2.3490613901572803e-07, "logits/chosen": -1.7461013793945312, "logits/rejected": -1.6764867305755615, "logps/chosen": -55.79955291748047, "logps/rejected": -89.12510681152344, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -1.656272292137146, "rewards/margins": 13.735760688781738, "rewards/rejected": -15.392033576965332, "step": 1940 }, { "epoch": 0.89, "learning_rate": 2.343987823439878e-07, "logits/chosen": -1.750739336013794, "logits/rejected": -1.6435902118682861, "logps/chosen": -57.835601806640625, "logps/rejected": -88.36417388916016, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -1.1759008169174194, "rewards/margins": 14.037012100219727, "rewards/rejected": -15.212911605834961, "step": 1950 }, { "epoch": 0.89, "learning_rate": 2.3389142567224756e-07, "logits/chosen": -1.7469285726547241, "logits/rejected": -1.675733208656311, "logps/chosen": -59.03168487548828, "logps/rejected": -93.41059112548828, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -1.538738489151001, "rewards/margins": 14.39984130859375, "rewards/rejected": -15.938581466674805, "step": 1960 }, { "epoch": 0.9, "learning_rate": 2.3338406900050733e-07, "logits/chosen": -1.7523949146270752, "logits/rejected": -1.6582969427108765, "logps/chosen": -57.25901412963867, "logps/rejected": -92.3466796875, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -0.8285577893257141, "rewards/margins": 15.199548721313477, "rewards/rejected": -16.028104782104492, "step": 1970 }, { "epoch": 0.9, "learning_rate": 2.328767123287671e-07, "logits/chosen": -1.7480812072753906, "logits/rejected": -1.6603069305419922, "logps/chosen": -58.4476203918457, "logps/rejected": -91.93983459472656, "loss": 0.004, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.3551121950149536, "rewards/margins": 14.631512641906738, "rewards/rejected": -15.986623764038086, "step": 1980 }, { "epoch": 0.91, "learning_rate": 2.3236935565702686e-07, "logits/chosen": -1.7778419256210327, "logits/rejected": -1.6857637166976929, "logps/chosen": -59.96107864379883, "logps/rejected": -92.36823272705078, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -1.472414255142212, "rewards/margins": 14.517046928405762, "rewards/rejected": -15.989463806152344, "step": 1990 }, { "epoch": 0.91, "learning_rate": 2.3186199898528663e-07, "logits/chosen": -1.750738501548767, "logits/rejected": -1.6730880737304688, "logps/chosen": -56.4324951171875, "logps/rejected": -91.2624740600586, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/chosen": -1.4734715223312378, "rewards/margins": 14.515901565551758, "rewards/rejected": -15.989372253417969, "step": 2000 }, { "epoch": 0.91, "eval_logits/chosen": -1.539143681526184, "eval_logits/rejected": -1.4576748609542847, "eval_logps/chosen": -82.55216217041016, "eval_logps/rejected": -91.96436309814453, "eval_loss": 0.007588522508740425, "eval_rewards/accuracies": 0.9916201233863831, "eval_rewards/chosen": -3.2850937843322754, "eval_rewards/margins": 12.74497127532959, "eval_rewards/rejected": -16.030065536499023, "eval_runtime": 169.8211, "eval_samples_per_second": 16.853, "eval_steps_per_second": 1.054, "step": 2000 }, { "epoch": 0.92, "learning_rate": 2.313546423135464e-07, "logits/chosen": -1.787705659866333, "logits/rejected": -1.6910228729248047, "logps/chosen": -60.171051025390625, "logps/rejected": -88.08454132080078, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -1.567155361175537, "rewards/margins": 13.747461318969727, "rewards/rejected": -15.314616203308105, "step": 2010 }, { "epoch": 0.92, "learning_rate": 2.3084728564180616e-07, "logits/chosen": -1.7449241876602173, "logits/rejected": -1.6786353588104248, "logps/chosen": -55.51226043701172, "logps/rejected": -90.74034881591797, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -1.3218858242034912, "rewards/margins": 14.683317184448242, "rewards/rejected": -16.005203247070312, "step": 2020 }, { "epoch": 0.93, "learning_rate": 2.3033992897006593e-07, "logits/chosen": -1.757450819015503, "logits/rejected": -1.6583503484725952, "logps/chosen": -62.15929412841797, "logps/rejected": -90.96791076660156, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": -1.4464218616485596, "rewards/margins": 14.97315502166748, "rewards/rejected": -16.41957664489746, "step": 2030 }, { "epoch": 0.93, "learning_rate": 2.298325722983257e-07, "logits/chosen": -1.7604230642318726, "logits/rejected": -1.6536738872528076, "logps/chosen": -59.402137756347656, "logps/rejected": -92.2740707397461, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -1.6296403408050537, "rewards/margins": 15.20836353302002, "rewards/rejected": -16.838003158569336, "step": 2040 }, { "epoch": 0.94, "learning_rate": 2.2932521562658546e-07, "logits/chosen": -1.7548086643218994, "logits/rejected": -1.657857894897461, "logps/chosen": -56.833946228027344, "logps/rejected": -89.49751281738281, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -1.196375846862793, "rewards/margins": 14.8095064163208, "rewards/rejected": -16.00588035583496, "step": 2050 }, { "epoch": 0.94, "learning_rate": 2.2881785895484523e-07, "logits/chosen": -1.747240424156189, "logits/rejected": -1.6511905193328857, "logps/chosen": -58.68681716918945, "logps/rejected": -88.55622863769531, "loss": 0.0082, "rewards/accuracies": 1.0, "rewards/chosen": -1.3744940757751465, "rewards/margins": 14.411099433898926, "rewards/rejected": -15.785593032836914, "step": 2060 }, { "epoch": 0.94, "learning_rate": 2.28310502283105e-07, "logits/chosen": -1.7425874471664429, "logits/rejected": -1.6658105850219727, "logps/chosen": -54.89007568359375, "logps/rejected": -89.45921325683594, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.7904685735702515, "rewards/margins": 14.850664138793945, "rewards/rejected": -16.641132354736328, "step": 2070 }, { "epoch": 0.95, "learning_rate": 2.2780314561136476e-07, "logits/chosen": -1.7678744792938232, "logits/rejected": -1.6782909631729126, "logps/chosen": -56.872894287109375, "logps/rejected": -89.02281951904297, "loss": 0.0018, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.518134355545044, "rewards/margins": 15.083781242370605, "rewards/rejected": -16.601917266845703, "step": 2080 }, { "epoch": 0.95, "learning_rate": 2.2729578893962453e-07, "logits/chosen": -1.7603305578231812, "logits/rejected": -1.6793501377105713, "logps/chosen": -59.30863571166992, "logps/rejected": -93.47074890136719, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -1.8892009258270264, "rewards/margins": 15.660491943359375, "rewards/rejected": -17.549694061279297, "step": 2090 }, { "epoch": 0.96, "learning_rate": 2.267884322678843e-07, "logits/chosen": -1.7693204879760742, "logits/rejected": -1.6628926992416382, "logps/chosen": -54.2353630065918, "logps/rejected": -87.59960174560547, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -1.0670645236968994, "rewards/margins": 15.38727855682373, "rewards/rejected": -16.454341888427734, "step": 2100 }, { "epoch": 0.96, "eval_logits/chosen": -1.5439651012420654, "eval_logits/rejected": -1.4616644382476807, "eval_logps/chosen": -82.66470336914062, "eval_logps/rejected": -93.65453338623047, "eval_loss": 0.006382639519870281, "eval_rewards/accuracies": 0.994413435459137, "eval_rewards/chosen": -3.3413615226745605, "eval_rewards/margins": 13.533792495727539, "eval_rewards/rejected": -16.87515640258789, "eval_runtime": 153.9099, "eval_samples_per_second": 18.595, "eval_steps_per_second": 1.163, "step": 2100 }, { "epoch": 0.96, "learning_rate": 2.2628107559614406e-07, "logits/chosen": -1.7645127773284912, "logits/rejected": -1.6967523097991943, "logps/chosen": -59.54228973388672, "logps/rejected": -93.48818969726562, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -1.765673279762268, "rewards/margins": 15.079645156860352, "rewards/rejected": -16.845317840576172, "step": 2110 }, { "epoch": 0.97, "learning_rate": 2.2577371892440383e-07, "logits/chosen": -1.7425806522369385, "logits/rejected": -1.658601999282837, "logps/chosen": -55.3408088684082, "logps/rejected": -92.50639343261719, "loss": 0.0019, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.3979063034057617, "rewards/margins": 15.3561429977417, "rewards/rejected": -16.75404930114746, "step": 2120 }, { "epoch": 0.97, "learning_rate": 2.252663622526636e-07, "logits/chosen": -1.7674560546875, "logits/rejected": -1.6785967350006104, "logps/chosen": -59.449562072753906, "logps/rejected": -94.57528686523438, "loss": 0.0058, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.4636263847351074, "rewards/margins": 15.344259262084961, "rewards/rejected": -16.807886123657227, "step": 2130 }, { "epoch": 0.98, "learning_rate": 2.2475900558092336e-07, "logits/chosen": -1.7418906688690186, "logits/rejected": -1.6623594760894775, "logps/chosen": -57.31889724731445, "logps/rejected": -90.8071060180664, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -1.710715889930725, "rewards/margins": 14.743158340454102, "rewards/rejected": -16.453876495361328, "step": 2140 }, { "epoch": 0.98, "learning_rate": 2.2425164890918313e-07, "logits/chosen": -1.7725855112075806, "logits/rejected": -1.6637465953826904, "logps/chosen": -64.91510772705078, "logps/rejected": -95.10658264160156, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.4892213344573975, "rewards/margins": 15.396344184875488, "rewards/rejected": -16.885562896728516, "step": 2150 }, { "epoch": 0.99, "learning_rate": 2.237442922374429e-07, "logits/chosen": -1.7567058801651, "logits/rejected": -1.6639419794082642, "logps/chosen": -55.096824645996094, "logps/rejected": -89.47212219238281, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -1.8129856586456299, "rewards/margins": 14.918745040893555, "rewards/rejected": -16.731731414794922, "step": 2160 }, { "epoch": 0.99, "learning_rate": 2.2323693556570266e-07, "logits/chosen": -1.7617263793945312, "logits/rejected": -1.658855676651001, "logps/chosen": -58.96918869018555, "logps/rejected": -95.55270385742188, "loss": 0.0048, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.275040626525879, "rewards/margins": 15.53173828125, "rewards/rejected": -16.806777954101562, "step": 2170 }, { "epoch": 1.0, "learning_rate": 2.2272957889396242e-07, "logits/chosen": -1.7455657720565796, "logits/rejected": -1.6588319540023804, "logps/chosen": -57.32484817504883, "logps/rejected": -94.15316009521484, "loss": 0.0084, "rewards/accuracies": 1.0, "rewards/chosen": -1.5650216341018677, "rewards/margins": 15.224810600280762, "rewards/rejected": -16.789833068847656, "step": 2180 }, { "epoch": 1.0, "learning_rate": 2.222222222222222e-07, "logits/chosen": -1.7567503452301025, "logits/rejected": -1.6721036434173584, "logps/chosen": -55.82500457763672, "logps/rejected": -87.40864562988281, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -1.7859405279159546, "rewards/margins": 14.830865859985352, "rewards/rejected": -16.616806030273438, "step": 2190 }, { "epoch": 1.0, "learning_rate": 2.2171486555048196e-07, "logits/chosen": -1.780106544494629, "logits/rejected": -1.6833308935165405, "logps/chosen": -59.21906661987305, "logps/rejected": -97.84840393066406, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -1.1965326070785522, "rewards/margins": 16.255271911621094, "rewards/rejected": -17.45180320739746, "step": 2200 }, { "epoch": 1.0, "eval_logits/chosen": -1.5444055795669556, "eval_logits/rejected": -1.4629565477371216, "eval_logps/chosen": -82.37531280517578, "eval_logps/rejected": -93.55461883544922, "eval_loss": 0.005960206501185894, "eval_rewards/accuracies": 0.994413435459137, "eval_rewards/chosen": -3.1966702938079834, "eval_rewards/margins": 13.628522872924805, "eval_rewards/rejected": -16.825193405151367, "eval_runtime": 141.3335, "eval_samples_per_second": 20.25, "eval_steps_per_second": 1.267, "step": 2200 }, { "epoch": 1.01, "learning_rate": 2.2120750887874172e-07, "logits/chosen": -1.7576357126235962, "logits/rejected": -1.6710838079452515, "logps/chosen": -59.391929626464844, "logps/rejected": -92.55592346191406, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -1.5112824440002441, "rewards/margins": 15.496170043945312, "rewards/rejected": -17.0074520111084, "step": 2210 }, { "epoch": 1.01, "learning_rate": 2.207001522070015e-07, "logits/chosen": -1.7698043584823608, "logits/rejected": -1.681131362915039, "logps/chosen": -56.185142517089844, "logps/rejected": -92.93355560302734, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -1.5311224460601807, "rewards/margins": 15.489564895629883, "rewards/rejected": -17.02068519592285, "step": 2220 }, { "epoch": 1.02, "learning_rate": 2.2019279553526126e-07, "logits/chosen": -1.749076247215271, "logits/rejected": -1.6408894062042236, "logps/chosen": -58.44233322143555, "logps/rejected": -85.44766998291016, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -1.5151691436767578, "rewards/margins": 14.953633308410645, "rewards/rejected": -16.468799591064453, "step": 2230 }, { "epoch": 1.02, "learning_rate": 2.1968543886352102e-07, "logits/chosen": -1.7423725128173828, "logits/rejected": -1.660238265991211, "logps/chosen": -56.360084533691406, "logps/rejected": -90.83768463134766, "loss": 0.0036, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.8068774938583374, "rewards/margins": 15.062307357788086, "rewards/rejected": -16.869184494018555, "step": 2240 }, { "epoch": 1.03, "learning_rate": 2.191780821917808e-07, "logits/chosen": -1.745189905166626, "logits/rejected": -1.669600248336792, "logps/chosen": -56.39533615112305, "logps/rejected": -91.84968566894531, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.8725141286849976, "rewards/margins": 14.959602355957031, "rewards/rejected": -16.832117080688477, "step": 2250 }, { "epoch": 1.03, "learning_rate": 2.1867072552004056e-07, "logits/chosen": -1.7463161945343018, "logits/rejected": -1.6642965078353882, "logps/chosen": -57.425537109375, "logps/rejected": -91.81517028808594, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -1.558890461921692, "rewards/margins": 15.99829387664795, "rewards/rejected": -17.557186126708984, "step": 2260 }, { "epoch": 1.04, "learning_rate": 2.1816336884830032e-07, "logits/chosen": -1.7645819187164307, "logits/rejected": -1.6817333698272705, "logps/chosen": -55.57514190673828, "logps/rejected": -91.8698501586914, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.1787233352661133, "rewards/margins": 16.134780883789062, "rewards/rejected": -17.31350326538086, "step": 2270 }, { "epoch": 1.04, "learning_rate": 2.176560121765601e-07, "logits/chosen": -1.7538105249404907, "logits/rejected": -1.6650365591049194, "logps/chosen": -59.37980270385742, "logps/rejected": -94.99974060058594, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -1.2272608280181885, "rewards/margins": 15.5308198928833, "rewards/rejected": -16.758081436157227, "step": 2280 }, { "epoch": 1.05, "learning_rate": 2.1714865550481986e-07, "logits/chosen": -1.7885656356811523, "logits/rejected": -1.6814218759536743, "logps/chosen": -55.93397903442383, "logps/rejected": -94.24886322021484, "loss": 0.0045, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.3392460346221924, "rewards/margins": 16.647480010986328, "rewards/rejected": -17.98672866821289, "step": 2290 }, { "epoch": 1.05, "learning_rate": 2.1664129883307962e-07, "logits/chosen": -1.728960633277893, "logits/rejected": -1.662274718284607, "logps/chosen": -57.202308654785156, "logps/rejected": -98.20774841308594, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -1.6883776187896729, "rewards/margins": 16.555070877075195, "rewards/rejected": -18.24344825744629, "step": 2300 }, { "epoch": 1.05, "eval_logits/chosen": -1.5466911792755127, "eval_logits/rejected": -1.4644973278045654, "eval_logps/chosen": -83.1010513305664, "eval_logps/rejected": -95.12531280517578, "eval_loss": 0.006303800735622644, "eval_rewards/accuracies": 0.9916201233863831, "eval_rewards/chosen": -3.5595388412475586, "eval_rewards/margins": 14.051005363464355, "eval_rewards/rejected": -17.610544204711914, "eval_runtime": 151.7854, "eval_samples_per_second": 18.856, "eval_steps_per_second": 1.179, "step": 2300 }, { "epoch": 1.05, "learning_rate": 2.161339421613394e-07, "logits/chosen": -1.7632097005844116, "logits/rejected": -1.6761329174041748, "logps/chosen": -57.0994873046875, "logps/rejected": -96.46761322021484, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.760319709777832, "rewards/margins": 16.308773040771484, "rewards/rejected": -18.069091796875, "step": 2310 }, { "epoch": 1.06, "learning_rate": 2.1562658548959916e-07, "logits/chosen": -1.7615985870361328, "logits/rejected": -1.6536455154418945, "logps/chosen": -61.092079162597656, "logps/rejected": -94.53724670410156, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -1.4904835224151611, "rewards/margins": 16.616823196411133, "rewards/rejected": -18.1073055267334, "step": 2320 }, { "epoch": 1.06, "learning_rate": 2.1511922881785892e-07, "logits/chosen": -1.7435195446014404, "logits/rejected": -1.6646251678466797, "logps/chosen": -55.811553955078125, "logps/rejected": -95.2570571899414, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.7704877853393555, "rewards/margins": 15.985902786254883, "rewards/rejected": -17.756389617919922, "step": 2330 }, { "epoch": 1.07, "learning_rate": 2.146118721461187e-07, "logits/chosen": -1.7589401006698608, "logits/rejected": -1.663800835609436, "logps/chosen": -60.44816207885742, "logps/rejected": -98.11687469482422, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.8989307880401611, "rewards/margins": 16.624099731445312, "rewards/rejected": -18.523029327392578, "step": 2340 }, { "epoch": 1.07, "learning_rate": 2.1410451547437846e-07, "logits/chosen": -1.753797173500061, "logits/rejected": -1.6788421869277954, "logps/chosen": -56.67557907104492, "logps/rejected": -98.96099090576172, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -1.7312290668487549, "rewards/margins": 16.829326629638672, "rewards/rejected": -18.560556411743164, "step": 2350 }, { "epoch": 1.08, "learning_rate": 2.1359715880263822e-07, "logits/chosen": -1.7726614475250244, "logits/rejected": -1.6805782318115234, "logps/chosen": -57.83234786987305, "logps/rejected": -95.62266540527344, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -1.7165930271148682, "rewards/margins": 16.315860748291016, "rewards/rejected": -18.032451629638672, "step": 2360 }, { "epoch": 1.08, "learning_rate": 2.13089802130898e-07, "logits/chosen": -1.759373664855957, "logits/rejected": -1.6703014373779297, "logps/chosen": -54.686668395996094, "logps/rejected": -95.24959564208984, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -1.6667108535766602, "rewards/margins": 17.19411849975586, "rewards/rejected": -18.860828399658203, "step": 2370 }, { "epoch": 1.09, "learning_rate": 2.1258244545915776e-07, "logits/chosen": -1.7582851648330688, "logits/rejected": -1.6722888946533203, "logps/chosen": -58.01994705200195, "logps/rejected": -97.08244323730469, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.7737096548080444, "rewards/margins": 16.389488220214844, "rewards/rejected": -18.163196563720703, "step": 2380 }, { "epoch": 1.09, "learning_rate": 2.1207508878741752e-07, "logits/chosen": -1.7361366748809814, "logits/rejected": -1.6514666080474854, "logps/chosen": -56.37322998046875, "logps/rejected": -87.77371978759766, "loss": 0.0044, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -2.5976836681365967, "rewards/margins": 15.749250411987305, "rewards/rejected": -18.346933364868164, "step": 2390 }, { "epoch": 1.1, "learning_rate": 2.115677321156773e-07, "logits/chosen": -1.7475221157073975, "logits/rejected": -1.6648136377334595, "logps/chosen": -60.138282775878906, "logps/rejected": -98.83917236328125, "loss": 0.0055, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.728782057762146, "rewards/margins": 17.035816192626953, "rewards/rejected": -18.764598846435547, "step": 2400 }, { "epoch": 1.1, "eval_logits/chosen": -1.5441038608551025, "eval_logits/rejected": -1.4605567455291748, "eval_logps/chosen": -84.07401275634766, "eval_logps/rejected": -97.23653411865234, "eval_loss": 0.007000993005931377, "eval_rewards/accuracies": 0.994413435459137, "eval_rewards/chosen": -4.046020030975342, "eval_rewards/margins": 14.620133399963379, "eval_rewards/rejected": -18.666152954101562, "eval_runtime": 146.16, "eval_samples_per_second": 19.581, "eval_steps_per_second": 1.225, "step": 2400 }, { "epoch": 1.1, "learning_rate": 2.1106037544393706e-07, "logits/chosen": -1.764243483543396, "logits/rejected": -1.6860980987548828, "logps/chosen": -56.706016540527344, "logps/rejected": -101.9083251953125, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -1.575025200843811, "rewards/margins": 17.899951934814453, "rewards/rejected": -19.474979400634766, "step": 2410 }, { "epoch": 1.1, "learning_rate": 2.1055301877219682e-07, "logits/chosen": -1.787570595741272, "logits/rejected": -1.6699540615081787, "logps/chosen": -62.099884033203125, "logps/rejected": -95.29823303222656, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.684003472328186, "rewards/margins": 17.041250228881836, "rewards/rejected": -18.725255966186523, "step": 2420 }, { "epoch": 1.11, "learning_rate": 2.100456621004566e-07, "logits/chosen": -1.7343931198120117, "logits/rejected": -1.6694958209991455, "logps/chosen": -53.398345947265625, "logps/rejected": -93.80493927001953, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -1.8752937316894531, "rewards/margins": 17.16379165649414, "rewards/rejected": -19.03908348083496, "step": 2430 }, { "epoch": 1.11, "learning_rate": 2.0953830542871636e-07, "logits/chosen": -1.7479832172393799, "logits/rejected": -1.6645724773406982, "logps/chosen": -59.2391242980957, "logps/rejected": -97.51466369628906, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.8753879070281982, "rewards/margins": 16.62053871154785, "rewards/rejected": -18.495925903320312, "step": 2440 }, { "epoch": 1.12, "learning_rate": 2.0903094875697612e-07, "logits/chosen": -1.7523056268692017, "logits/rejected": -1.642364263534546, "logps/chosen": -62.688987731933594, "logps/rejected": -97.38086700439453, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -1.8174155950546265, "rewards/margins": 17.393173217773438, "rewards/rejected": -19.210586547851562, "step": 2450 }, { "epoch": 1.12, "learning_rate": 2.085235920852359e-07, "logits/chosen": -1.7349144220352173, "logits/rejected": -1.6552202701568604, "logps/chosen": -58.827049255371094, "logps/rejected": -96.5774917602539, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": -2.0927205085754395, "rewards/margins": 17.134923934936523, "rewards/rejected": -19.22764778137207, "step": 2460 }, { "epoch": 1.13, "learning_rate": 2.0801623541349566e-07, "logits/chosen": -1.7906172275543213, "logits/rejected": -1.6924254894256592, "logps/chosen": -55.4560661315918, "logps/rejected": -94.42463684082031, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.2818658351898193, "rewards/margins": 16.96918296813965, "rewards/rejected": -18.251049041748047, "step": 2470 }, { "epoch": 1.13, "learning_rate": 2.0750887874175542e-07, "logits/chosen": -1.7605549097061157, "logits/rejected": -1.677120566368103, "logps/chosen": -55.66670608520508, "logps/rejected": -99.32123565673828, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.1595937013626099, "rewards/margins": 17.582664489746094, "rewards/rejected": -18.742259979248047, "step": 2480 }, { "epoch": 1.14, "learning_rate": 2.070015220700152e-07, "logits/chosen": -1.7633788585662842, "logits/rejected": -1.6667747497558594, "logps/chosen": -57.516387939453125, "logps/rejected": -93.65599822998047, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.4830613136291504, "rewards/margins": 16.72182846069336, "rewards/rejected": -18.204891204833984, "step": 2490 }, { "epoch": 1.14, "learning_rate": 2.0649416539827496e-07, "logits/chosen": -1.7710567712783813, "logits/rejected": -1.6819353103637695, "logps/chosen": -57.67155075073242, "logps/rejected": -93.34293365478516, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -1.1700657606124878, "rewards/margins": 16.193981170654297, "rewards/rejected": -17.364046096801758, "step": 2500 }, { "epoch": 1.14, "eval_logits/chosen": -1.550737977027893, "eval_logits/rejected": -1.4679088592529297, "eval_logps/chosen": -82.61907196044922, "eval_logps/rejected": -95.11019134521484, "eval_loss": 0.006657297257333994, "eval_rewards/accuracies": 0.994413435459137, "eval_rewards/chosen": -3.3185434341430664, "eval_rewards/margins": 14.284436225891113, "eval_rewards/rejected": -17.602981567382812, "eval_runtime": 155.6342, "eval_samples_per_second": 18.389, "eval_steps_per_second": 1.15, "step": 2500 }, { "epoch": 1.15, "learning_rate": 2.0598680872653472e-07, "logits/chosen": -1.7823295593261719, "logits/rejected": -1.68827223777771, "logps/chosen": -58.17741775512695, "logps/rejected": -93.47938537597656, "loss": 0.0039, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.1728184223175049, "rewards/margins": 16.308216094970703, "rewards/rejected": -17.48103141784668, "step": 2510 }, { "epoch": 1.15, "learning_rate": 2.054794520547945e-07, "logits/chosen": -1.7661288976669312, "logits/rejected": -1.6686477661132812, "logps/chosen": -54.59541702270508, "logps/rejected": -92.54632568359375, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -0.8793942332267761, "rewards/margins": 16.468664169311523, "rewards/rejected": -17.34805679321289, "step": 2520 }, { "epoch": 1.15, "learning_rate": 2.0497209538305426e-07, "logits/chosen": -1.7487903833389282, "logits/rejected": -1.6530921459197998, "logps/chosen": -56.20011520385742, "logps/rejected": -93.74838256835938, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -1.4012267589569092, "rewards/margins": 17.030271530151367, "rewards/rejected": -18.431495666503906, "step": 2530 }, { "epoch": 1.16, "learning_rate": 2.0446473871131402e-07, "logits/chosen": -1.7581230401992798, "logits/rejected": -1.6596683263778687, "logps/chosen": -60.82426071166992, "logps/rejected": -99.79011535644531, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.3703958988189697, "rewards/margins": 16.5430965423584, "rewards/rejected": -17.91349220275879, "step": 2540 }, { "epoch": 1.16, "learning_rate": 2.039573820395738e-07, "logits/chosen": -1.7701380252838135, "logits/rejected": -1.6606664657592773, "logps/chosen": -59.037353515625, "logps/rejected": -94.49665069580078, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.0363215208053589, "rewards/margins": 16.670988082885742, "rewards/rejected": -17.70730972290039, "step": 2550 }, { "epoch": 1.17, "learning_rate": 2.0345002536783356e-07, "logits/chosen": -1.749725341796875, "logits/rejected": -1.6626144647598267, "logps/chosen": -55.888938903808594, "logps/rejected": -90.47606658935547, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.187700629234314, "rewards/margins": 16.13033676147461, "rewards/rejected": -17.318037033081055, "step": 2560 }, { "epoch": 1.17, "learning_rate": 2.0294266869609332e-07, "logits/chosen": -1.7732551097869873, "logits/rejected": -1.6830087900161743, "logps/chosen": -56.351524353027344, "logps/rejected": -92.32308197021484, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -1.5203639268875122, "rewards/margins": 16.521289825439453, "rewards/rejected": -18.04165267944336, "step": 2570 }, { "epoch": 1.18, "learning_rate": 2.024353120243531e-07, "logits/chosen": -1.7540162801742554, "logits/rejected": -1.673253059387207, "logps/chosen": -52.99707794189453, "logps/rejected": -97.1292724609375, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -0.9613077044487, "rewards/margins": 16.9852237701416, "rewards/rejected": -17.946529388427734, "step": 2580 }, { "epoch": 1.18, "learning_rate": 2.0192795535261286e-07, "logits/chosen": -1.7799053192138672, "logits/rejected": -1.6866953372955322, "logps/chosen": -57.35761642456055, "logps/rejected": -97.79578399658203, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -0.8344005346298218, "rewards/margins": 18.256929397583008, "rewards/rejected": -19.091327667236328, "step": 2590 }, { "epoch": 1.19, "learning_rate": 2.0142059868087262e-07, "logits/chosen": -1.7495887279510498, "logits/rejected": -1.670689344406128, "logps/chosen": -57.3239860534668, "logps/rejected": -94.37218475341797, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -1.2658514976501465, "rewards/margins": 16.88041114807129, "rewards/rejected": -18.14626121520996, "step": 2600 }, { "epoch": 1.19, "eval_logits/chosen": -1.5500638484954834, "eval_logits/rejected": -1.466713309288025, "eval_logps/chosen": -82.79623413085938, "eval_logps/rejected": -96.38542175292969, "eval_loss": 0.006424016784876585, "eval_rewards/accuracies": 0.994413435459137, "eval_rewards/chosen": -3.4071311950683594, "eval_rewards/margins": 14.833463668823242, "eval_rewards/rejected": -18.240596771240234, "eval_runtime": 153.5802, "eval_samples_per_second": 18.635, "eval_steps_per_second": 1.166, "step": 2600 }, { "epoch": 1.19, "learning_rate": 2.009132420091324e-07, "logits/chosen": -1.7762126922607422, "logits/rejected": -1.6902281045913696, "logps/chosen": -53.2180290222168, "logps/rejected": -96.89413452148438, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": -1.0934245586395264, "rewards/margins": 17.65363883972168, "rewards/rejected": -18.74706268310547, "step": 2610 }, { "epoch": 1.2, "learning_rate": 2.0040588533739216e-07, "logits/chosen": -1.7808773517608643, "logits/rejected": -1.6828012466430664, "logps/chosen": -59.0372428894043, "logps/rejected": -96.43038177490234, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -1.2977206707000732, "rewards/margins": 17.016761779785156, "rewards/rejected": -18.31447982788086, "step": 2620 }, { "epoch": 1.2, "learning_rate": 1.9989852866565192e-07, "logits/chosen": -1.734175443649292, "logits/rejected": -1.6448099613189697, "logps/chosen": -57.11977005004883, "logps/rejected": -95.9421615600586, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -1.2607675790786743, "rewards/margins": 17.131710052490234, "rewards/rejected": -18.39247703552246, "step": 2630 }, { "epoch": 1.21, "learning_rate": 1.993911719939117e-07, "logits/chosen": -1.7802501916885376, "logits/rejected": -1.6809139251708984, "logps/chosen": -55.82944869995117, "logps/rejected": -91.70013427734375, "loss": 0.0042, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.4456136226654053, "rewards/margins": 16.31406021118164, "rewards/rejected": -17.759674072265625, "step": 2640 }, { "epoch": 1.21, "learning_rate": 1.9888381532217146e-07, "logits/chosen": -1.7335536479949951, "logits/rejected": -1.661350965499878, "logps/chosen": -53.10137939453125, "logps/rejected": -95.90177154541016, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -1.789583444595337, "rewards/margins": 17.921833038330078, "rewards/rejected": -19.711416244506836, "step": 2650 }, { "epoch": 1.21, "learning_rate": 1.9837645865043122e-07, "logits/chosen": -1.750737190246582, "logits/rejected": -1.6568177938461304, "logps/chosen": -55.086997985839844, "logps/rejected": -95.12551879882812, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -1.4792072772979736, "rewards/margins": 18.116641998291016, "rewards/rejected": -19.595848083496094, "step": 2660 }, { "epoch": 1.22, "learning_rate": 1.97869101978691e-07, "logits/chosen": -1.7495743036270142, "logits/rejected": -1.6731303930282593, "logps/chosen": -55.61684036254883, "logps/rejected": -96.10287475585938, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.7537901401519775, "rewards/margins": 17.751415252685547, "rewards/rejected": -19.505207061767578, "step": 2670 }, { "epoch": 1.22, "learning_rate": 1.9736174530695076e-07, "logits/chosen": -1.7520612478256226, "logits/rejected": -1.6678498983383179, "logps/chosen": -55.18487548828125, "logps/rejected": -96.980224609375, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -1.197737455368042, "rewards/margins": 18.08832550048828, "rewards/rejected": -19.286062240600586, "step": 2680 }, { "epoch": 1.23, "learning_rate": 1.9685438863521052e-07, "logits/chosen": -1.7610679864883423, "logits/rejected": -1.6806461811065674, "logps/chosen": -55.430091857910156, "logps/rejected": -97.28132629394531, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -1.306195855140686, "rewards/margins": 18.163005828857422, "rewards/rejected": -19.469202041625977, "step": 2690 }, { "epoch": 1.23, "learning_rate": 1.963470319634703e-07, "logits/chosen": -1.8031642436981201, "logits/rejected": -1.7114074230194092, "logps/chosen": -59.132850646972656, "logps/rejected": -95.17094421386719, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -1.488271951675415, "rewards/margins": 17.14899253845215, "rewards/rejected": -18.637264251708984, "step": 2700 }, { "epoch": 1.23, "eval_logits/chosen": -1.5495989322662354, "eval_logits/rejected": -1.4647303819656372, "eval_logps/chosen": -84.65057373046875, "eval_logps/rejected": -99.30116271972656, "eval_loss": 0.00904226116836071, "eval_rewards/accuracies": 0.9916201233863831, "eval_rewards/chosen": -4.334300518035889, "eval_rewards/margins": 15.36416244506836, "eval_rewards/rejected": -19.69846534729004, "eval_runtime": 147.385, "eval_samples_per_second": 19.419, "eval_steps_per_second": 1.215, "step": 2700 }, { "epoch": 1.24, "learning_rate": 1.9583967529173006e-07, "logits/chosen": -1.7910597324371338, "logits/rejected": -1.6921383142471313, "logps/chosen": -58.234352111816406, "logps/rejected": -97.64838409423828, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.6721891164779663, "rewards/margins": 17.89757537841797, "rewards/rejected": -19.56976318359375, "step": 2710 }, { "epoch": 1.24, "learning_rate": 1.9533231861998982e-07, "logits/chosen": -1.7883247137069702, "logits/rejected": -1.6830103397369385, "logps/chosen": -61.363319396972656, "logps/rejected": -100.30858612060547, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -2.0055155754089355, "rewards/margins": 18.509708404541016, "rewards/rejected": -20.51522445678711, "step": 2720 }, { "epoch": 1.25, "learning_rate": 1.948249619482496e-07, "logits/chosen": -1.751837968826294, "logits/rejected": -1.6661630868911743, "logps/chosen": -55.377769470214844, "logps/rejected": -95.15009307861328, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -1.795588731765747, "rewards/margins": 18.258634567260742, "rewards/rejected": -20.054224014282227, "step": 2730 }, { "epoch": 1.25, "learning_rate": 1.9431760527650936e-07, "logits/chosen": -1.7812420129776, "logits/rejected": -1.6635338068008423, "logps/chosen": -62.19695281982422, "logps/rejected": -100.60261535644531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6461451053619385, "rewards/margins": 18.79757308959961, "rewards/rejected": -20.44371795654297, "step": 2740 }, { "epoch": 1.26, "learning_rate": 1.9381024860476912e-07, "logits/chosen": -1.7579082250595093, "logits/rejected": -1.6530691385269165, "logps/chosen": -57.74163818359375, "logps/rejected": -98.6023178100586, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -1.6989234685897827, "rewards/margins": 19.26529312133789, "rewards/rejected": -20.964216232299805, "step": 2750 }, { "epoch": 1.26, "learning_rate": 1.933028919330289e-07, "logits/chosen": -1.7814481258392334, "logits/rejected": -1.6904300451278687, "logps/chosen": -57.6041374206543, "logps/rejected": -98.92882537841797, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -1.9272903203964233, "rewards/margins": 19.180126190185547, "rewards/rejected": -21.107418060302734, "step": 2760 }, { "epoch": 1.26, "learning_rate": 1.9279553526128866e-07, "logits/chosen": -1.7679035663604736, "logits/rejected": -1.6667778491973877, "logps/chosen": -59.31825637817383, "logps/rejected": -99.1368408203125, "loss": 0.0043, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.5422855615615845, "rewards/margins": 19.345226287841797, "rewards/rejected": -20.88751220703125, "step": 2770 }, { "epoch": 1.27, "learning_rate": 1.9228817858954842e-07, "logits/chosen": -1.770992636680603, "logits/rejected": -1.6852118968963623, "logps/chosen": -56.10600662231445, "logps/rejected": -98.84684753417969, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -1.3405952453613281, "rewards/margins": 18.240341186523438, "rewards/rejected": -19.580936431884766, "step": 2780 }, { "epoch": 1.27, "learning_rate": 1.917808219178082e-07, "logits/chosen": -1.748457908630371, "logits/rejected": -1.6733148097991943, "logps/chosen": -53.041015625, "logps/rejected": -100.11251068115234, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -1.6683629751205444, "rewards/margins": 18.404541015625, "rewards/rejected": -20.072906494140625, "step": 2790 }, { "epoch": 1.28, "learning_rate": 1.9127346524606796e-07, "logits/chosen": -1.778543472290039, "logits/rejected": -1.6833875179290771, "logps/chosen": -57.812828063964844, "logps/rejected": -96.30428314208984, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -1.4683773517608643, "rewards/margins": 17.575725555419922, "rewards/rejected": -19.04410171508789, "step": 2800 }, { "epoch": 1.28, "eval_logits/chosen": -1.5407549142837524, "eval_logits/rejected": -1.4569265842437744, "eval_logps/chosen": -85.26310729980469, "eval_logps/rejected": -99.38053131103516, "eval_loss": 0.011327545158565044, "eval_rewards/accuracies": 0.9916201233863831, "eval_rewards/chosen": -4.640564918518066, "eval_rewards/margins": 15.097585678100586, "eval_rewards/rejected": -19.73814582824707, "eval_runtime": 149.334, "eval_samples_per_second": 19.165, "eval_steps_per_second": 1.199, "step": 2800 }, { "epoch": 1.28, "learning_rate": 1.9076610857432772e-07, "logits/chosen": -1.7536699771881104, "logits/rejected": -1.6619449853897095, "logps/chosen": -56.8665771484375, "logps/rejected": -99.80836486816406, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.7938029766082764, "rewards/margins": 18.09140396118164, "rewards/rejected": -19.885204315185547, "step": 2810 }, { "epoch": 1.29, "learning_rate": 1.902587519025875e-07, "logits/chosen": -1.7796272039413452, "logits/rejected": -1.673616647720337, "logps/chosen": -61.139610290527344, "logps/rejected": -97.94349670410156, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -1.6869022846221924, "rewards/margins": 18.66249656677246, "rewards/rejected": -20.34939956665039, "step": 2820 }, { "epoch": 1.29, "learning_rate": 1.8975139523084726e-07, "logits/chosen": -1.7451549768447876, "logits/rejected": -1.665014624595642, "logps/chosen": -59.975379943847656, "logps/rejected": -101.70437622070312, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.056490421295166, "rewards/margins": 18.601909637451172, "rewards/rejected": -20.658397674560547, "step": 2830 }, { "epoch": 1.3, "learning_rate": 1.8924403855910702e-07, "logits/chosen": -1.7664998769760132, "logits/rejected": -1.6723295450210571, "logps/chosen": -57.23393630981445, "logps/rejected": -94.91930389404297, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -1.884436011314392, "rewards/margins": 17.427194595336914, "rewards/rejected": -19.311628341674805, "step": 2840 }, { "epoch": 1.3, "learning_rate": 1.887366818873668e-07, "logits/chosen": -1.7805742025375366, "logits/rejected": -1.6844866275787354, "logps/chosen": -58.0041618347168, "logps/rejected": -97.0829849243164, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.111764907836914, "rewards/margins": 17.889118194580078, "rewards/rejected": -19.00088119506836, "step": 2850 }, { "epoch": 1.31, "learning_rate": 1.8822932521562656e-07, "logits/chosen": -1.7825946807861328, "logits/rejected": -1.6918452978134155, "logps/chosen": -56.925636291503906, "logps/rejected": -97.35308074951172, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/chosen": -0.9460649490356445, "rewards/margins": 17.18486976623535, "rewards/rejected": -18.130931854248047, "step": 2860 }, { "epoch": 1.31, "learning_rate": 1.8772196854388632e-07, "logits/chosen": -1.7390912771224976, "logits/rejected": -1.6564744710922241, "logps/chosen": -57.98912811279297, "logps/rejected": -96.44007873535156, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -1.5826243162155151, "rewards/margins": 16.839244842529297, "rewards/rejected": -18.4218692779541, "step": 2870 }, { "epoch": 1.31, "learning_rate": 1.872146118721461e-07, "logits/chosen": -1.7849754095077515, "logits/rejected": -1.672852873802185, "logps/chosen": -61.340476989746094, "logps/rejected": -93.8963851928711, "loss": 0.0011, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.613435983657837, "rewards/margins": 17.021175384521484, "rewards/rejected": -18.634611129760742, "step": 2880 }, { "epoch": 1.32, "learning_rate": 1.8670725520040586e-07, "logits/chosen": -1.750396728515625, "logits/rejected": -1.6511199474334717, "logps/chosen": -58.67247772216797, "logps/rejected": -96.80357360839844, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.338128685951233, "rewards/margins": 17.583751678466797, "rewards/rejected": -18.9218807220459, "step": 2890 }, { "epoch": 1.32, "learning_rate": 1.8619989852866562e-07, "logits/chosen": -1.7634893655776978, "logits/rejected": -1.6787548065185547, "logps/chosen": -55.73259353637695, "logps/rejected": -94.75193786621094, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -1.5130577087402344, "rewards/margins": 18.2349853515625, "rewards/rejected": -19.748043060302734, "step": 2900 }, { "epoch": 1.32, "eval_logits/chosen": -1.544886589050293, "eval_logits/rejected": -1.4612011909484863, "eval_logps/chosen": -83.85013580322266, "eval_logps/rejected": -98.73179626464844, "eval_loss": 0.006982807535678148, "eval_rewards/accuracies": 0.994413435459137, "eval_rewards/chosen": -3.9340834617614746, "eval_rewards/margins": 15.479698181152344, "eval_rewards/rejected": -19.413782119750977, "eval_runtime": 144.0378, "eval_samples_per_second": 19.87, "eval_steps_per_second": 1.243, "step": 2900 }, { "epoch": 1.33, "learning_rate": 1.856925418569254e-07, "logits/chosen": -1.7577152252197266, "logits/rejected": -1.6612701416015625, "logps/chosen": -61.57184982299805, "logps/rejected": -98.47500610351562, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -1.2696821689605713, "rewards/margins": 18.28770637512207, "rewards/rejected": -19.557388305664062, "step": 2910 }, { "epoch": 1.33, "learning_rate": 1.8518518518518516e-07, "logits/chosen": -1.744296669960022, "logits/rejected": -1.671971321105957, "logps/chosen": -54.99235153198242, "logps/rejected": -90.80326843261719, "loss": 0.0068, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.0770174264907837, "rewards/margins": 15.502540588378906, "rewards/rejected": -16.579557418823242, "step": 2920 }, { "epoch": 1.34, "learning_rate": 1.8467782851344492e-07, "logits/chosen": -1.7668946981430054, "logits/rejected": -1.6761682033538818, "logps/chosen": -54.510459899902344, "logps/rejected": -91.04711151123047, "loss": 0.0019, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.2146661281585693, "rewards/margins": 15.932766914367676, "rewards/rejected": -17.147432327270508, "step": 2930 }, { "epoch": 1.34, "learning_rate": 1.841704718417047e-07, "logits/chosen": -1.7573566436767578, "logits/rejected": -1.654294729232788, "logps/chosen": -58.834869384765625, "logps/rejected": -94.41200256347656, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -1.412942886352539, "rewards/margins": 16.644380569458008, "rewards/rejected": -18.057323455810547, "step": 2940 }, { "epoch": 1.35, "learning_rate": 1.8366311516996446e-07, "logits/chosen": -1.7762553691864014, "logits/rejected": -1.682163953781128, "logps/chosen": -54.3316764831543, "logps/rejected": -90.17029571533203, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.1894292831420898, "rewards/margins": 16.16569709777832, "rewards/rejected": -17.355127334594727, "step": 2950 }, { "epoch": 1.35, "learning_rate": 1.8315575849822422e-07, "logits/chosen": -1.7757704257965088, "logits/rejected": -1.684913992881775, "logps/chosen": -56.261322021484375, "logps/rejected": -95.15665435791016, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.1488529443740845, "rewards/margins": 16.14897346496582, "rewards/rejected": -17.297826766967773, "step": 2960 }, { "epoch": 1.36, "learning_rate": 1.82648401826484e-07, "logits/chosen": -1.7451854944229126, "logits/rejected": -1.6552518606185913, "logps/chosen": -57.798240661621094, "logps/rejected": -90.9861068725586, "loss": 0.0034, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.4594552516937256, "rewards/margins": 16.58235740661621, "rewards/rejected": -18.041812896728516, "step": 2970 }, { "epoch": 1.36, "learning_rate": 1.8214104515474375e-07, "logits/chosen": -1.7445427179336548, "logits/rejected": -1.6590522527694702, "logps/chosen": -59.61603927612305, "logps/rejected": -95.45381164550781, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -1.726776361465454, "rewards/margins": 17.064167022705078, "rewards/rejected": -18.790943145751953, "step": 2980 }, { "epoch": 1.36, "learning_rate": 1.8163368848300352e-07, "logits/chosen": -1.7461668252944946, "logits/rejected": -1.6662800312042236, "logps/chosen": -56.46452713012695, "logps/rejected": -97.12699890136719, "loss": 0.0076, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.519303560256958, "rewards/margins": 16.730981826782227, "rewards/rejected": -18.25028419494629, "step": 2990 }, { "epoch": 1.37, "learning_rate": 1.811263318112633e-07, "logits/chosen": -1.761304259300232, "logits/rejected": -1.683823585510254, "logps/chosen": -56.9975700378418, "logps/rejected": -94.61572265625, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -1.3946937322616577, "rewards/margins": 16.722742080688477, "rewards/rejected": -18.117435455322266, "step": 3000 }, { "epoch": 1.37, "eval_logits/chosen": -1.539894461631775, "eval_logits/rejected": -1.4586848020553589, "eval_logps/chosen": -83.39830780029297, "eval_logps/rejected": -96.94595336914062, "eval_loss": 0.006562211085110903, "eval_rewards/accuracies": 0.9916201233863831, "eval_rewards/chosen": -3.708167791366577, "eval_rewards/margins": 14.81269645690918, "eval_rewards/rejected": -18.520864486694336, "eval_runtime": 151.5133, "eval_samples_per_second": 18.889, "eval_steps_per_second": 1.181, "step": 3000 }, { "epoch": 1.37, "learning_rate": 1.8061897513952305e-07, "logits/chosen": -1.7640758752822876, "logits/rejected": -1.6685714721679688, "logps/chosen": -59.41301727294922, "logps/rejected": -95.84724426269531, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -1.2242387533187866, "rewards/margins": 17.188039779663086, "rewards/rejected": -18.412277221679688, "step": 3010 }, { "epoch": 1.38, "learning_rate": 1.8011161846778282e-07, "logits/chosen": -1.7586253881454468, "logits/rejected": -1.6649078130722046, "logps/chosen": -58.984031677246094, "logps/rejected": -94.05524444580078, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -1.5664620399475098, "rewards/margins": 16.676197052001953, "rewards/rejected": -18.242660522460938, "step": 3020 }, { "epoch": 1.38, "learning_rate": 1.796042617960426e-07, "logits/chosen": -1.773471474647522, "logits/rejected": -1.6692659854888916, "logps/chosen": -61.39658737182617, "logps/rejected": -95.90094757080078, "loss": 0.0095, "rewards/accuracies": 1.0, "rewards/chosen": -1.9589765071868896, "rewards/margins": 17.034753799438477, "rewards/rejected": -18.993728637695312, "step": 3030 }, { "epoch": 1.39, "learning_rate": 1.7909690512430235e-07, "logits/chosen": -1.759385108947754, "logits/rejected": -1.662372350692749, "logps/chosen": -60.836090087890625, "logps/rejected": -96.40751647949219, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.8023784160614014, "rewards/margins": 17.685691833496094, "rewards/rejected": -19.488067626953125, "step": 3040 }, { "epoch": 1.39, "learning_rate": 1.7858954845256212e-07, "logits/chosen": -1.7491194009780884, "logits/rejected": -1.6780580282211304, "logps/chosen": -56.577880859375, "logps/rejected": -98.60438537597656, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -1.453829050064087, "rewards/margins": 17.57693862915039, "rewards/rejected": -19.03076934814453, "step": 3050 }, { "epoch": 1.4, "learning_rate": 1.780821917808219e-07, "logits/chosen": -1.760709524154663, "logits/rejected": -1.6798269748687744, "logps/chosen": -58.53704833984375, "logps/rejected": -99.60615539550781, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -1.9645904302597046, "rewards/margins": 17.165122985839844, "rewards/rejected": -19.129714965820312, "step": 3060 }, { "epoch": 1.4, "learning_rate": 1.7757483510908165e-07, "logits/chosen": -1.7557331323623657, "logits/rejected": -1.6769014596939087, "logps/chosen": -55.544944763183594, "logps/rejected": -99.79969787597656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5912768840789795, "rewards/margins": 18.37584114074707, "rewards/rejected": -19.96711540222168, "step": 3070 }, { "epoch": 1.41, "learning_rate": 1.7706747843734142e-07, "logits/chosen": -1.7606086730957031, "logits/rejected": -1.6541544198989868, "logps/chosen": -56.68511199951172, "logps/rejected": -91.82845306396484, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -1.4311389923095703, "rewards/margins": 17.00944709777832, "rewards/rejected": -18.44058609008789, "step": 3080 }, { "epoch": 1.41, "learning_rate": 1.765601217656012e-07, "logits/chosen": -1.7437763214111328, "logits/rejected": -1.653381586074829, "logps/chosen": -58.844627380371094, "logps/rejected": -94.74607849121094, "loss": 0.0056, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.6268081665039062, "rewards/margins": 16.87070083618164, "rewards/rejected": -18.497509002685547, "step": 3090 }, { "epoch": 1.42, "learning_rate": 1.7605276509386095e-07, "logits/chosen": -1.7557626962661743, "logits/rejected": -1.670015573501587, "logps/chosen": -56.841880798339844, "logps/rejected": -94.67625427246094, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -1.3221409320831299, "rewards/margins": 17.122955322265625, "rewards/rejected": -18.445096969604492, "step": 3100 }, { "epoch": 1.42, "eval_logits/chosen": -1.5296785831451416, "eval_logits/rejected": -1.4479854106903076, "eval_logps/chosen": -83.32076263427734, "eval_logps/rejected": -97.17174530029297, "eval_loss": 0.006353565026074648, "eval_rewards/accuracies": 0.9972066879272461, "eval_rewards/chosen": -3.6693906784057617, "eval_rewards/margins": 14.964364051818848, "eval_rewards/rejected": -18.633752822875977, "eval_runtime": 170.5936, "eval_samples_per_second": 16.777, "eval_steps_per_second": 1.049, "step": 3100 }, { "epoch": 1.42, "learning_rate": 1.7554540842212072e-07, "logits/chosen": -1.7814935445785522, "logits/rejected": -1.6783158779144287, "logps/chosen": -60.32609939575195, "logps/rejected": -93.3768081665039, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -1.453685998916626, "rewards/margins": 16.54983901977539, "rewards/rejected": -18.00352668762207, "step": 3110 }, { "epoch": 1.42, "learning_rate": 1.750380517503805e-07, "logits/chosen": -1.7732906341552734, "logits/rejected": -1.6722043752670288, "logps/chosen": -58.196807861328125, "logps/rejected": -94.85267639160156, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -1.4471790790557861, "rewards/margins": 17.081787109375, "rewards/rejected": -18.52896499633789, "step": 3120 }, { "epoch": 1.43, "learning_rate": 1.7453069507864025e-07, "logits/chosen": -1.7158924341201782, "logits/rejected": -1.6260392665863037, "logps/chosen": -57.551734924316406, "logps/rejected": -91.12532043457031, "loss": 0.0088, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.9693899154663086, "rewards/margins": 16.378599166870117, "rewards/rejected": -18.347990036010742, "step": 3130 }, { "epoch": 1.43, "learning_rate": 1.7402333840690002e-07, "logits/chosen": -1.7346522808074951, "logits/rejected": -1.6474847793579102, "logps/chosen": -57.03557205200195, "logps/rejected": -95.57264709472656, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.7559372186660767, "rewards/margins": 17.30267906188965, "rewards/rejected": -19.05861473083496, "step": 3140 }, { "epoch": 1.44, "learning_rate": 1.735159817351598e-07, "logits/chosen": -1.7555586099624634, "logits/rejected": -1.6755212545394897, "logps/chosen": -55.34453201293945, "logps/rejected": -100.09938049316406, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.3288910388946533, "rewards/margins": 18.491540908813477, "rewards/rejected": -19.820430755615234, "step": 3150 }, { "epoch": 1.44, "learning_rate": 1.7300862506341955e-07, "logits/chosen": -1.772899866104126, "logits/rejected": -1.6719576120376587, "logps/chosen": -60.80910110473633, "logps/rejected": -97.62386322021484, "loss": 0.0065, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.793237328529358, "rewards/margins": 18.663257598876953, "rewards/rejected": -20.456493377685547, "step": 3160 }, { "epoch": 1.45, "learning_rate": 1.7250126839167932e-07, "logits/chosen": -1.7909915447235107, "logits/rejected": -1.6983429193496704, "logps/chosen": -61.983421325683594, "logps/rejected": -97.15614318847656, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -1.7546970844268799, "rewards/margins": 17.051715850830078, "rewards/rejected": -18.806411743164062, "step": 3170 }, { "epoch": 1.45, "learning_rate": 1.719939117199391e-07, "logits/chosen": -1.7587168216705322, "logits/rejected": -1.6700305938720703, "logps/chosen": -55.97304153442383, "logps/rejected": -97.49143981933594, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -1.573572039604187, "rewards/margins": 17.24924087524414, "rewards/rejected": -18.82281494140625, "step": 3180 }, { "epoch": 1.46, "learning_rate": 1.7148655504819885e-07, "logits/chosen": -1.72994065284729, "logits/rejected": -1.6405225992202759, "logps/chosen": -56.26521682739258, "logps/rejected": -98.17828369140625, "loss": 0.0025, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.3642914295196533, "rewards/margins": 18.429710388183594, "rewards/rejected": -19.794002532958984, "step": 3190 }, { "epoch": 1.46, "learning_rate": 1.7097919837645862e-07, "logits/chosen": -1.7546837329864502, "logits/rejected": -1.6676174402236938, "logps/chosen": -59.02223587036133, "logps/rejected": -97.63322448730469, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -1.8880170583724976, "rewards/margins": 18.1130313873291, "rewards/rejected": -20.001047134399414, "step": 3200 }, { "epoch": 1.46, "eval_logits/chosen": -1.5306702852249146, "eval_logits/rejected": -1.4482632875442505, "eval_logps/chosen": -83.45709991455078, "eval_logps/rejected": -98.23886108398438, "eval_loss": 0.005945264827460051, "eval_rewards/accuracies": 0.994413435459137, "eval_rewards/chosen": -3.737560510635376, "eval_rewards/margins": 15.429756164550781, "eval_rewards/rejected": -19.167316436767578, "eval_runtime": 153.8728, "eval_samples_per_second": 18.6, "eval_steps_per_second": 1.163, "step": 3200 }, { "epoch": 1.47, "learning_rate": 1.704718417047184e-07, "logits/chosen": -1.7227180004119873, "logits/rejected": -1.6549056768417358, "logps/chosen": -55.83765411376953, "logps/rejected": -93.55619812011719, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -2.1712257862091064, "rewards/margins": 16.469789505004883, "rewards/rejected": -18.641014099121094, "step": 3210 }, { "epoch": 1.47, "learning_rate": 1.6996448503297815e-07, "logits/chosen": -1.7486251592636108, "logits/rejected": -1.669952154159546, "logps/chosen": -53.65233612060547, "logps/rejected": -96.1492691040039, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -1.5661886930465698, "rewards/margins": 17.999963760375977, "rewards/rejected": -19.566152572631836, "step": 3220 }, { "epoch": 1.47, "learning_rate": 1.6945712836123792e-07, "logits/chosen": -1.7423101663589478, "logits/rejected": -1.652051329612732, "logps/chosen": -55.4273681640625, "logps/rejected": -98.6847915649414, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/chosen": -1.360779881477356, "rewards/margins": 18.79489517211914, "rewards/rejected": -20.155675888061523, "step": 3230 }, { "epoch": 1.48, "learning_rate": 1.689497716894977e-07, "logits/chosen": -1.7299396991729736, "logits/rejected": -1.6439214944839478, "logps/chosen": -57.0401611328125, "logps/rejected": -98.83965301513672, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -1.5103360414505005, "rewards/margins": 18.032917022705078, "rewards/rejected": -19.543251037597656, "step": 3240 }, { "epoch": 1.48, "learning_rate": 1.6844241501775745e-07, "logits/chosen": -1.7509998083114624, "logits/rejected": -1.6693347692489624, "logps/chosen": -59.036094665527344, "logps/rejected": -98.47119903564453, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -1.9223308563232422, "rewards/margins": 17.635066986083984, "rewards/rejected": -19.557395935058594, "step": 3250 }, { "epoch": 1.49, "learning_rate": 1.6793505834601722e-07, "logits/chosen": -1.738722562789917, "logits/rejected": -1.6587598323822021, "logps/chosen": -53.109886169433594, "logps/rejected": -97.91802978515625, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -1.6686608791351318, "rewards/margins": 18.069690704345703, "rewards/rejected": -19.73834991455078, "step": 3260 }, { "epoch": 1.49, "learning_rate": 1.67427701674277e-07, "logits/chosen": -1.7291982173919678, "logits/rejected": -1.6407372951507568, "logps/chosen": -54.598365783691406, "logps/rejected": -95.91632080078125, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -1.455635905265808, "rewards/margins": 17.710803985595703, "rewards/rejected": -19.166439056396484, "step": 3270 }, { "epoch": 1.5, "learning_rate": 1.6692034500253675e-07, "logits/chosen": -1.761309027671814, "logits/rejected": -1.6706691980361938, "logps/chosen": -56.527313232421875, "logps/rejected": -98.2834243774414, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -1.34516179561615, "rewards/margins": 18.49872589111328, "rewards/rejected": -19.843887329101562, "step": 3280 }, { "epoch": 1.5, "learning_rate": 1.6641298833079652e-07, "logits/chosen": -1.741758942604065, "logits/rejected": -1.6503000259399414, "logps/chosen": -57.42247772216797, "logps/rejected": -94.12330627441406, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.4599244594573975, "rewards/margins": 17.578405380249023, "rewards/rejected": -19.038328170776367, "step": 3290 }, { "epoch": 1.51, "learning_rate": 1.659056316590563e-07, "logits/chosen": -1.7459741830825806, "logits/rejected": -1.6614172458648682, "logps/chosen": -53.6883659362793, "logps/rejected": -90.2201156616211, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -1.5786361694335938, "rewards/margins": 17.19680404663086, "rewards/rejected": -18.775440216064453, "step": 3300 }, { "epoch": 1.51, "eval_logits/chosen": -1.5285367965698242, "eval_logits/rejected": -1.4458637237548828, "eval_logps/chosen": -83.92906188964844, "eval_logps/rejected": -99.31780242919922, "eval_loss": 0.006135547533631325, "eval_rewards/accuracies": 0.9916201233863831, "eval_rewards/chosen": -3.9735474586486816, "eval_rewards/margins": 15.733240127563477, "eval_rewards/rejected": -19.706789016723633, "eval_runtime": 144.8765, "eval_samples_per_second": 19.755, "eval_steps_per_second": 1.236, "step": 3300 }, { "epoch": 1.51, "learning_rate": 1.6539827498731605e-07, "logits/chosen": -1.741248369216919, "logits/rejected": -1.6535362005233765, "logps/chosen": -58.149566650390625, "logps/rejected": -93.27287292480469, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.4163057804107666, "rewards/margins": 18.03692054748535, "rewards/rejected": -19.453222274780273, "step": 3310 }, { "epoch": 1.52, "learning_rate": 1.6489091831557582e-07, "logits/chosen": -1.7589938640594482, "logits/rejected": -1.67257559299469, "logps/chosen": -58.156166076660156, "logps/rejected": -98.0214614868164, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.6777223348617554, "rewards/margins": 17.951839447021484, "rewards/rejected": -19.629560470581055, "step": 3320 }, { "epoch": 1.52, "learning_rate": 1.643835616438356e-07, "logits/chosen": -1.7588249444961548, "logits/rejected": -1.677903175354004, "logps/chosen": -54.59656524658203, "logps/rejected": -98.17218780517578, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -1.640683889389038, "rewards/margins": 18.058780670166016, "rewards/rejected": -19.699462890625, "step": 3330 }, { "epoch": 1.52, "learning_rate": 1.6387620497209535e-07, "logits/chosen": -1.750461220741272, "logits/rejected": -1.669263482093811, "logps/chosen": -55.287437438964844, "logps/rejected": -98.08964538574219, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/chosen": -1.6613962650299072, "rewards/margins": 18.753292083740234, "rewards/rejected": -20.414690017700195, "step": 3340 }, { "epoch": 1.53, "learning_rate": 1.6336884830035512e-07, "logits/chosen": -1.737192153930664, "logits/rejected": -1.6521022319793701, "logps/chosen": -56.506439208984375, "logps/rejected": -98.63439178466797, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.836482048034668, "rewards/margins": 18.692378997802734, "rewards/rejected": -20.528860092163086, "step": 3350 }, { "epoch": 1.53, "learning_rate": 1.6286149162861489e-07, "logits/chosen": -1.745640754699707, "logits/rejected": -1.648800253868103, "logps/chosen": -57.18326950073242, "logps/rejected": -99.80830383300781, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -1.6478908061981201, "rewards/margins": 19.097484588623047, "rewards/rejected": -20.74537467956543, "step": 3360 }, { "epoch": 1.54, "learning_rate": 1.6235413495687465e-07, "logits/chosen": -1.7396637201309204, "logits/rejected": -1.6458467245101929, "logps/chosen": -55.3143310546875, "logps/rejected": -93.59313201904297, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -1.841915488243103, "rewards/margins": 18.101560592651367, "rewards/rejected": -19.943477630615234, "step": 3370 }, { "epoch": 1.54, "learning_rate": 1.6184677828513442e-07, "logits/chosen": -1.7592235803604126, "logits/rejected": -1.6574302911758423, "logps/chosen": -60.5203971862793, "logps/rejected": -97.92893981933594, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -2.1750316619873047, "rewards/margins": 17.71821403503418, "rewards/rejected": -19.89324378967285, "step": 3380 }, { "epoch": 1.55, "learning_rate": 1.613394216133942e-07, "logits/chosen": -1.7654197216033936, "logits/rejected": -1.6703176498413086, "logps/chosen": -58.678627014160156, "logps/rejected": -97.8316650390625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.7995555400848389, "rewards/margins": 18.441692352294922, "rewards/rejected": -20.241247177124023, "step": 3390 }, { "epoch": 1.55, "learning_rate": 1.6083206494165398e-07, "logits/chosen": -1.7394158840179443, "logits/rejected": -1.6462080478668213, "logps/chosen": -56.487518310546875, "logps/rejected": -99.09591674804688, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -2.0049827098846436, "rewards/margins": 18.279010772705078, "rewards/rejected": -20.28399658203125, "step": 3400 }, { "epoch": 1.55, "eval_logits/chosen": -1.524535059928894, "eval_logits/rejected": -1.4411815404891968, "eval_logps/chosen": -84.63044738769531, "eval_logps/rejected": -100.86541748046875, "eval_loss": 0.006590413860976696, "eval_rewards/accuracies": 0.994413435459137, "eval_rewards/chosen": -4.324239730834961, "eval_rewards/margins": 16.15635871887207, "eval_rewards/rejected": -20.480600357055664, "eval_runtime": 144.5579, "eval_samples_per_second": 19.798, "eval_steps_per_second": 1.238, "step": 3400 }, { "epoch": 1.56, "learning_rate": 1.6032470826991375e-07, "logits/chosen": -1.7354533672332764, "logits/rejected": -1.6529903411865234, "logps/chosen": -54.403541564941406, "logps/rejected": -96.82051849365234, "loss": 0.0033, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -2.3602700233459473, "rewards/margins": 17.785980224609375, "rewards/rejected": -20.146251678466797, "step": 3410 }, { "epoch": 1.56, "learning_rate": 1.598173515981735e-07, "logits/chosen": -1.739316701889038, "logits/rejected": -1.6420605182647705, "logps/chosen": -57.6550407409668, "logps/rejected": -97.14595794677734, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -1.8596147298812866, "rewards/margins": 17.877527236938477, "rewards/rejected": -19.73714256286621, "step": 3420 }, { "epoch": 1.57, "learning_rate": 1.5930999492643328e-07, "logits/chosen": -1.7393652200698853, "logits/rejected": -1.6525812149047852, "logps/chosen": -59.345985412597656, "logps/rejected": -100.73359680175781, "loss": 0.0027, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.462760329246521, "rewards/margins": 19.496845245361328, "rewards/rejected": -20.959606170654297, "step": 3430 }, { "epoch": 1.57, "learning_rate": 1.5880263825469305e-07, "logits/chosen": -1.7402441501617432, "logits/rejected": -1.6511075496673584, "logps/chosen": -58.53057098388672, "logps/rejected": -99.9593734741211, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -2.192598819732666, "rewards/margins": 19.186471939086914, "rewards/rejected": -21.379072189331055, "step": 3440 }, { "epoch": 1.57, "learning_rate": 1.582952815829528e-07, "logits/chosen": -1.757459044456482, "logits/rejected": -1.6511247158050537, "logps/chosen": -60.21623611450195, "logps/rejected": -100.17884826660156, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -2.050049304962158, "rewards/margins": 18.79250717163086, "rewards/rejected": -20.842559814453125, "step": 3450 }, { "epoch": 1.58, "learning_rate": 1.5778792491121258e-07, "logits/chosen": -1.716695785522461, "logits/rejected": -1.6268866062164307, "logps/chosen": -60.640830993652344, "logps/rejected": -101.121337890625, "loss": 0.0033, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -2.0517373085021973, "rewards/margins": 19.40340232849121, "rewards/rejected": -21.45513916015625, "step": 3460 }, { "epoch": 1.58, "learning_rate": 1.5728056823947235e-07, "logits/chosen": -1.7543067932128906, "logits/rejected": -1.6606168746948242, "logps/chosen": -55.8422737121582, "logps/rejected": -100.81498718261719, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -2.2675364017486572, "rewards/margins": 19.677331924438477, "rewards/rejected": -21.944866180419922, "step": 3470 }, { "epoch": 1.59, "learning_rate": 1.567732115677321e-07, "logits/chosen": -1.7218773365020752, "logits/rejected": -1.645035982131958, "logps/chosen": -56.04047393798828, "logps/rejected": -106.46551513671875, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -1.6016311645507812, "rewards/margins": 20.23841667175293, "rewards/rejected": -21.84004783630371, "step": 3480 }, { "epoch": 1.59, "learning_rate": 1.5626585489599188e-07, "logits/chosen": -1.7585750818252563, "logits/rejected": -1.667877197265625, "logps/chosen": -60.819419860839844, "logps/rejected": -102.30345153808594, "loss": 0.0024, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.7390552759170532, "rewards/margins": 19.786039352416992, "rewards/rejected": -21.525094985961914, "step": 3490 }, { "epoch": 1.6, "learning_rate": 1.5575849822425165e-07, "logits/chosen": -1.7323825359344482, "logits/rejected": -1.6479371786117554, "logps/chosen": -55.760154724121094, "logps/rejected": -97.13644409179688, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.751079797744751, "rewards/margins": 18.547962188720703, "rewards/rejected": -20.299041748046875, "step": 3500 }, { "epoch": 1.6, "eval_logits/chosen": -1.5145334005355835, "eval_logits/rejected": -1.4307856559753418, "eval_logps/chosen": -85.55133819580078, "eval_logps/rejected": -101.94500732421875, "eval_loss": 0.009321879595518112, "eval_rewards/accuracies": 0.9916201233863831, "eval_rewards/chosen": -4.784679889678955, "eval_rewards/margins": 16.235702514648438, "eval_rewards/rejected": -21.0203857421875, "eval_runtime": 162.7265, "eval_samples_per_second": 17.588, "eval_steps_per_second": 1.1, "step": 3500 }, { "epoch": 1.6, "learning_rate": 1.552511415525114e-07, "logits/chosen": -1.7493646144866943, "logits/rejected": -1.6622288227081299, "logps/chosen": -57.598350524902344, "logps/rejected": -92.99654388427734, "loss": 0.0046, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.8141578435897827, "rewards/margins": 17.225921630859375, "rewards/rejected": -19.040081024169922, "step": 3510 }, { "epoch": 1.61, "learning_rate": 1.5474378488077118e-07, "logits/chosen": -1.7772926092147827, "logits/rejected": -1.686802864074707, "logps/chosen": -58.4640007019043, "logps/rejected": -99.5447769165039, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -2.02579927444458, "rewards/margins": 18.582260131835938, "rewards/rejected": -20.60805892944336, "step": 3520 }, { "epoch": 1.61, "learning_rate": 1.5423642820903095e-07, "logits/chosen": -1.7540311813354492, "logits/rejected": -1.6697025299072266, "logps/chosen": -58.2379150390625, "logps/rejected": -98.92530822753906, "loss": 0.0035, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.79413640499115, "rewards/margins": 18.22900390625, "rewards/rejected": -20.02313995361328, "step": 3530 }, { "epoch": 1.62, "learning_rate": 1.537290715372907e-07, "logits/chosen": -1.7464195489883423, "logits/rejected": -1.656534194946289, "logps/chosen": -55.99895095825195, "logps/rejected": -97.24131774902344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6266868114471436, "rewards/margins": 18.616073608398438, "rewards/rejected": -20.242759704589844, "step": 3540 }, { "epoch": 1.62, "learning_rate": 1.5322171486555048e-07, "logits/chosen": -1.7702693939208984, "logits/rejected": -1.6523029804229736, "logps/chosen": -62.9988899230957, "logps/rejected": -101.58716583251953, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.6101036071777344, "rewards/margins": 19.229171752929688, "rewards/rejected": -20.83927345275879, "step": 3550 }, { "epoch": 1.63, "learning_rate": 1.5271435819381025e-07, "logits/chosen": -1.7666019201278687, "logits/rejected": -1.6752361059188843, "logps/chosen": -62.15509796142578, "logps/rejected": -101.52202606201172, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -1.9190824031829834, "rewards/margins": 18.65341567993164, "rewards/rejected": -20.572498321533203, "step": 3560 }, { "epoch": 1.63, "learning_rate": 1.5220700152207e-07, "logits/chosen": -1.7496618032455444, "logits/rejected": -1.6386051177978516, "logps/chosen": -59.79640579223633, "logps/rejected": -99.09815216064453, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -2.0094802379608154, "rewards/margins": 19.474361419677734, "rewards/rejected": -21.483840942382812, "step": 3570 }, { "epoch": 1.63, "learning_rate": 1.5169964485032978e-07, "logits/chosen": -1.7477951049804688, "logits/rejected": -1.6757123470306396, "logps/chosen": -54.9462890625, "logps/rejected": -98.96672058105469, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.8885055780410767, "rewards/margins": 18.964462280273438, "rewards/rejected": -20.85296630859375, "step": 3580 }, { "epoch": 1.64, "learning_rate": 1.5119228817858955e-07, "logits/chosen": -1.7456642389297485, "logits/rejected": -1.6490224599838257, "logps/chosen": -59.340980529785156, "logps/rejected": -104.44132995605469, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -1.894884467124939, "rewards/margins": 19.993732452392578, "rewards/rejected": -21.888614654541016, "step": 3590 }, { "epoch": 1.64, "learning_rate": 1.506849315068493e-07, "logits/chosen": -1.7418420314788818, "logits/rejected": -1.6429531574249268, "logps/chosen": -60.49907684326172, "logps/rejected": -99.40533447265625, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -1.947064757347107, "rewards/margins": 18.43744468688965, "rewards/rejected": -20.384510040283203, "step": 3600 }, { "epoch": 1.64, "eval_logits/chosen": -1.5208775997161865, "eval_logits/rejected": -1.4372811317443848, "eval_logps/chosen": -85.1228256225586, "eval_logps/rejected": -101.82320404052734, "eval_loss": 0.00761109683662653, "eval_rewards/accuracies": 0.9888268113136292, "eval_rewards/chosen": -4.570422649383545, "eval_rewards/margins": 16.38906478881836, "eval_rewards/rejected": -20.95948600769043, "eval_runtime": 160.6097, "eval_samples_per_second": 17.82, "eval_steps_per_second": 1.115, "step": 3600 }, { "epoch": 1.65, "learning_rate": 1.5017757483510908e-07, "logits/chosen": -1.744821548461914, "logits/rejected": -1.6511716842651367, "logps/chosen": -57.97832107543945, "logps/rejected": -104.1939468383789, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -1.5856506824493408, "rewards/margins": 20.175537109375, "rewards/rejected": -21.761188507080078, "step": 3610 }, { "epoch": 1.65, "learning_rate": 1.4967021816336885e-07, "logits/chosen": -1.7192729711532593, "logits/rejected": -1.6354973316192627, "logps/chosen": -52.93940353393555, "logps/rejected": -93.718994140625, "loss": 0.0033, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.6425870656967163, "rewards/margins": 18.601680755615234, "rewards/rejected": -20.244266510009766, "step": 3620 }, { "epoch": 1.66, "learning_rate": 1.491628614916286e-07, "logits/chosen": -1.7456947565078735, "logits/rejected": -1.662951111793518, "logps/chosen": -60.57215118408203, "logps/rejected": -105.87451171875, "loss": 0.0055, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -2.069582223892212, "rewards/margins": 19.797704696655273, "rewards/rejected": -21.86728858947754, "step": 3630 }, { "epoch": 1.66, "learning_rate": 1.4865550481988838e-07, "logits/chosen": -1.7416263818740845, "logits/rejected": -1.659208059310913, "logps/chosen": -57.92128372192383, "logps/rejected": -102.81998443603516, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -1.9102433919906616, "rewards/margins": 19.905513763427734, "rewards/rejected": -21.81575584411621, "step": 3640 }, { "epoch": 1.67, "learning_rate": 1.4814814814814815e-07, "logits/chosen": -1.7522609233856201, "logits/rejected": -1.6549276113510132, "logps/chosen": -61.67973709106445, "logps/rejected": -102.40510559082031, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -2.2504985332489014, "rewards/margins": 18.94046401977539, "rewards/rejected": -21.19096565246582, "step": 3650 }, { "epoch": 1.67, "learning_rate": 1.476407914764079e-07, "logits/chosen": -1.7534595727920532, "logits/rejected": -1.6623141765594482, "logps/chosen": -63.60465621948242, "logps/rejected": -101.19085693359375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -2.382418394088745, "rewards/margins": 18.830825805664062, "rewards/rejected": -21.21324348449707, "step": 3660 }, { "epoch": 1.68, "learning_rate": 1.4713343480466768e-07, "logits/chosen": -1.755079984664917, "logits/rejected": -1.6616798639297485, "logps/chosen": -57.2696418762207, "logps/rejected": -102.25415802001953, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -2.1452555656433105, "rewards/margins": 19.918466567993164, "rewards/rejected": -22.063720703125, "step": 3670 }, { "epoch": 1.68, "learning_rate": 1.4662607813292745e-07, "logits/chosen": -1.758702278137207, "logits/rejected": -1.657207727432251, "logps/chosen": -58.540130615234375, "logps/rejected": -99.57029724121094, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -1.671474814414978, "rewards/margins": 19.523405075073242, "rewards/rejected": -21.19487953186035, "step": 3680 }, { "epoch": 1.68, "learning_rate": 1.461187214611872e-07, "logits/chosen": -1.7256863117218018, "logits/rejected": -1.639664888381958, "logps/chosen": -58.728057861328125, "logps/rejected": -101.06281280517578, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -2.20875883102417, "rewards/margins": 19.420764923095703, "rewards/rejected": -21.629526138305664, "step": 3690 }, { "epoch": 1.69, "learning_rate": 1.4561136478944698e-07, "logits/chosen": -1.7542873620986938, "logits/rejected": -1.6643635034561157, "logps/chosen": -57.53455352783203, "logps/rejected": -104.0309066772461, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -1.9398162364959717, "rewards/margins": 20.248397827148438, "rewards/rejected": -22.188215255737305, "step": 3700 }, { "epoch": 1.69, "eval_logits/chosen": -1.5147947072982788, "eval_logits/rejected": -1.4300434589385986, "eval_logps/chosen": -85.57495880126953, "eval_logps/rejected": -103.20862579345703, "eval_loss": 0.008725001476705074, "eval_rewards/accuracies": 0.9916201233863831, "eval_rewards/chosen": -4.79649019241333, "eval_rewards/margins": 16.855709075927734, "eval_rewards/rejected": -21.652198791503906, "eval_runtime": 152.7457, "eval_samples_per_second": 18.737, "eval_steps_per_second": 1.172, "step": 3700 }, { "epoch": 1.69, "learning_rate": 1.4510400811770675e-07, "logits/chosen": -1.727120041847229, "logits/rejected": -1.611891746520996, "logps/chosen": -63.399688720703125, "logps/rejected": -101.63905334472656, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -1.4259730577468872, "rewards/margins": 19.92839241027832, "rewards/rejected": -21.35436248779297, "step": 3710 }, { "epoch": 1.7, "learning_rate": 1.445966514459665e-07, "logits/chosen": -1.731498122215271, "logits/rejected": -1.6396141052246094, "logps/chosen": -57.56293869018555, "logps/rejected": -103.0668716430664, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -2.1499602794647217, "rewards/margins": 19.873432159423828, "rewards/rejected": -22.023391723632812, "step": 3720 }, { "epoch": 1.7, "learning_rate": 1.4408929477422628e-07, "logits/chosen": -1.7379181385040283, "logits/rejected": -1.645946741104126, "logps/chosen": -58.8451042175293, "logps/rejected": -103.89896392822266, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -1.7194750308990479, "rewards/margins": 20.302326202392578, "rewards/rejected": -22.021800994873047, "step": 3730 }, { "epoch": 1.71, "learning_rate": 1.4358193810248604e-07, "logits/chosen": -1.7280566692352295, "logits/rejected": -1.6349445581436157, "logps/chosen": -60.61736297607422, "logps/rejected": -102.3194351196289, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -2.244107484817505, "rewards/margins": 20.2021541595459, "rewards/rejected": -22.446266174316406, "step": 3740 }, { "epoch": 1.71, "learning_rate": 1.430745814307458e-07, "logits/chosen": -1.7542724609375, "logits/rejected": -1.6561269760131836, "logps/chosen": -61.44940948486328, "logps/rejected": -101.87255096435547, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.5370864868164062, "rewards/margins": 20.14695930480957, "rewards/rejected": -22.68404769897461, "step": 3750 }, { "epoch": 1.72, "learning_rate": 1.4256722475900558e-07, "logits/chosen": -1.7171341180801392, "logits/rejected": -1.6243823766708374, "logps/chosen": -55.97332000732422, "logps/rejected": -101.05335998535156, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -1.7775256633758545, "rewards/margins": 20.30779457092285, "rewards/rejected": -22.08531951904297, "step": 3760 }, { "epoch": 1.72, "learning_rate": 1.4205986808726534e-07, "logits/chosen": -1.7553892135620117, "logits/rejected": -1.6701765060424805, "logps/chosen": -58.353965759277344, "logps/rejected": -108.2108383178711, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -2.4113121032714844, "rewards/margins": 20.59480094909668, "rewards/rejected": -23.006113052368164, "step": 3770 }, { "epoch": 1.73, "learning_rate": 1.415525114155251e-07, "logits/chosen": -1.7134288549423218, "logits/rejected": -1.6168136596679688, "logps/chosen": -61.3768424987793, "logps/rejected": -104.5031509399414, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.53882098197937, "rewards/margins": 20.721431732177734, "rewards/rejected": -23.260251998901367, "step": 3780 }, { "epoch": 1.73, "learning_rate": 1.4104515474378488e-07, "logits/chosen": -1.7239723205566406, "logits/rejected": -1.6319774389266968, "logps/chosen": -61.295196533203125, "logps/rejected": -101.477783203125, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -2.6111342906951904, "rewards/margins": 19.920166015625, "rewards/rejected": -22.531299591064453, "step": 3790 }, { "epoch": 1.73, "learning_rate": 1.4053779807204464e-07, "logits/chosen": -1.7131726741790771, "logits/rejected": -1.6239140033721924, "logps/chosen": -58.66071701049805, "logps/rejected": -100.33702087402344, "loss": 0.0056, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -2.3244271278381348, "rewards/margins": 19.755664825439453, "rewards/rejected": -22.08009147644043, "step": 3800 }, { "epoch": 1.73, "eval_logits/chosen": -1.505843162536621, "eval_logits/rejected": -1.4213422536849976, "eval_logps/chosen": -86.23440551757812, "eval_logps/rejected": -104.42259216308594, "eval_loss": 0.009252488613128662, "eval_rewards/accuracies": 0.9916201233863831, "eval_rewards/chosen": -5.1262125968933105, "eval_rewards/margins": 17.132970809936523, "eval_rewards/rejected": -22.25918197631836, "eval_runtime": 143.7786, "eval_samples_per_second": 19.906, "eval_steps_per_second": 1.245, "step": 3800 }, { "epoch": 1.74, "learning_rate": 1.400304414003044e-07, "logits/chosen": -1.7435963153839111, "logits/rejected": -1.6586761474609375, "logps/chosen": -57.895790100097656, "logps/rejected": -104.03873443603516, "loss": 0.0012, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -2.4536798000335693, "rewards/margins": 20.30872917175293, "rewards/rejected": -22.762409210205078, "step": 3810 }, { "epoch": 1.74, "learning_rate": 1.3952308472856418e-07, "logits/chosen": -1.7410093545913696, "logits/rejected": -1.6523460149765015, "logps/chosen": -56.3589973449707, "logps/rejected": -98.3576431274414, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.9067256450653076, "rewards/margins": 19.53439712524414, "rewards/rejected": -21.441125869750977, "step": 3820 }, { "epoch": 1.75, "learning_rate": 1.3901572805682394e-07, "logits/chosen": -1.7533388137817383, "logits/rejected": -1.6592824459075928, "logps/chosen": -60.925758361816406, "logps/rejected": -102.49827575683594, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -2.274437427520752, "rewards/margins": 19.64658546447754, "rewards/rejected": -21.921022415161133, "step": 3830 }, { "epoch": 1.75, "learning_rate": 1.385083713850837e-07, "logits/chosen": -1.7348659038543701, "logits/rejected": -1.6244924068450928, "logps/chosen": -58.76271438598633, "logps/rejected": -102.5107192993164, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -1.965003252029419, "rewards/margins": 20.968101501464844, "rewards/rejected": -22.93310546875, "step": 3840 }, { "epoch": 1.76, "learning_rate": 1.3800101471334348e-07, "logits/chosen": -1.7477362155914307, "logits/rejected": -1.6386569738388062, "logps/chosen": -60.78839874267578, "logps/rejected": -95.99488067626953, "loss": 0.0056, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -2.53136944770813, "rewards/margins": 18.4868221282959, "rewards/rejected": -21.018192291259766, "step": 3850 }, { "epoch": 1.76, "learning_rate": 1.3749365804160324e-07, "logits/chosen": -1.7221263647079468, "logits/rejected": -1.623956322669983, "logps/chosen": -59.84740447998047, "logps/rejected": -100.80842590332031, "loss": 0.0033, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -2.1374335289001465, "rewards/margins": 19.245466232299805, "rewards/rejected": -21.38290023803711, "step": 3860 }, { "epoch": 1.77, "learning_rate": 1.36986301369863e-07, "logits/chosen": -1.7168346643447876, "logits/rejected": -1.6347538232803345, "logps/chosen": -59.019287109375, "logps/rejected": -106.46458435058594, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -2.109846591949463, "rewards/margins": 20.929542541503906, "rewards/rejected": -23.03938865661621, "step": 3870 }, { "epoch": 1.77, "learning_rate": 1.3647894469812278e-07, "logits/chosen": -1.7261329889297485, "logits/rejected": -1.631757378578186, "logps/chosen": -61.130950927734375, "logps/rejected": -105.9852523803711, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -2.3457083702087402, "rewards/margins": 21.277233123779297, "rewards/rejected": -23.622940063476562, "step": 3880 }, { "epoch": 1.78, "learning_rate": 1.3597158802638254e-07, "logits/chosen": -1.7206604480743408, "logits/rejected": -1.634374976158142, "logps/chosen": -56.732521057128906, "logps/rejected": -104.9426498413086, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -2.531989097595215, "rewards/margins": 20.72577476501465, "rewards/rejected": -23.25776481628418, "step": 3890 }, { "epoch": 1.78, "learning_rate": 1.354642313546423e-07, "logits/chosen": -1.710542917251587, "logits/rejected": -1.6287927627563477, "logps/chosen": -60.89078903198242, "logps/rejected": -100.89933013916016, "loss": 0.0024, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -3.0867960453033447, "rewards/margins": 19.906475067138672, "rewards/rejected": -22.993270874023438, "step": 3900 }, { "epoch": 1.78, "eval_logits/chosen": -1.4855576753616333, "eval_logits/rejected": -1.4014440774917603, "eval_logps/chosen": -87.70219421386719, "eval_logps/rejected": -107.43189239501953, "eval_loss": 0.011312047950923443, "eval_rewards/accuracies": 0.9888268113136292, "eval_rewards/chosen": -5.860104560852051, "eval_rewards/margins": 17.90372085571289, "eval_rewards/rejected": -23.76382827758789, "eval_runtime": 140.5169, "eval_samples_per_second": 20.368, "eval_steps_per_second": 1.274, "step": 3900 }, { "epoch": 1.78, "learning_rate": 1.3495687468290208e-07, "logits/chosen": -1.7028968334197998, "logits/rejected": -1.6030282974243164, "logps/chosen": -59.007225036621094, "logps/rejected": -101.65214538574219, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -2.518394947052002, "rewards/margins": 20.77285385131836, "rewards/rejected": -23.291250228881836, "step": 3910 }, { "epoch": 1.79, "learning_rate": 1.3444951801116184e-07, "logits/chosen": -1.7411220073699951, "logits/rejected": -1.6353130340576172, "logps/chosen": -60.95904541015625, "logps/rejected": -99.79039001464844, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -2.8324027061462402, "rewards/margins": 20.231990814208984, "rewards/rejected": -23.06439208984375, "step": 3920 }, { "epoch": 1.79, "learning_rate": 1.339421613394216e-07, "logits/chosen": -1.739633560180664, "logits/rejected": -1.6502149105072021, "logps/chosen": -61.77033233642578, "logps/rejected": -108.11653137207031, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -2.709038734436035, "rewards/margins": 21.357595443725586, "rewards/rejected": -24.066635131835938, "step": 3930 }, { "epoch": 1.8, "learning_rate": 1.3343480466768138e-07, "logits/chosen": -1.713693380355835, "logits/rejected": -1.620425820350647, "logps/chosen": -57.366050720214844, "logps/rejected": -103.63282775878906, "loss": 0.0073, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -2.4770348072052, "rewards/margins": 20.998950958251953, "rewards/rejected": -23.475988388061523, "step": 3940 }, { "epoch": 1.8, "learning_rate": 1.3292744799594114e-07, "logits/chosen": -1.7242887020111084, "logits/rejected": -1.635079026222229, "logps/chosen": -54.93780517578125, "logps/rejected": -101.4769515991211, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -1.7494869232177734, "rewards/margins": 20.059358596801758, "rewards/rejected": -21.80884552001953, "step": 3950 }, { "epoch": 1.81, "learning_rate": 1.324200913242009e-07, "logits/chosen": -1.7211713790893555, "logits/rejected": -1.635608434677124, "logps/chosen": -59.47595977783203, "logps/rejected": -102.97447204589844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.3539652824401855, "rewards/margins": 20.05697250366211, "rewards/rejected": -22.41093635559082, "step": 3960 }, { "epoch": 1.81, "learning_rate": 1.3191273465246068e-07, "logits/chosen": -1.712767243385315, "logits/rejected": -1.6329208612442017, "logps/chosen": -57.88202667236328, "logps/rejected": -103.71693420410156, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -2.4641976356506348, "rewards/margins": 20.41353416442871, "rewards/rejected": -22.87773323059082, "step": 3970 }, { "epoch": 1.82, "learning_rate": 1.3140537798072044e-07, "logits/chosen": -1.7149088382720947, "logits/rejected": -1.6296100616455078, "logps/chosen": -56.60906982421875, "logps/rejected": -98.81111907958984, "loss": 0.0045, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -2.384639263153076, "rewards/margins": 19.819398880004883, "rewards/rejected": -22.204036712646484, "step": 3980 }, { "epoch": 1.82, "learning_rate": 1.308980213089802e-07, "logits/chosen": -1.7182649374008179, "logits/rejected": -1.6282594203948975, "logps/chosen": -59.917518615722656, "logps/rejected": -104.40673828125, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -2.4817817211151123, "rewards/margins": 20.250804901123047, "rewards/rejected": -22.732585906982422, "step": 3990 }, { "epoch": 1.83, "learning_rate": 1.3039066463723998e-07, "logits/chosen": -1.7276928424835205, "logits/rejected": -1.6250627040863037, "logps/chosen": -61.18939971923828, "logps/rejected": -102.0118408203125, "loss": 0.0034, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -2.5209269523620605, "rewards/margins": 20.70689582824707, "rewards/rejected": -23.22782325744629, "step": 4000 }, { "epoch": 1.83, "eval_logits/chosen": -1.5083593130111694, "eval_logits/rejected": -1.4252426624298096, "eval_logps/chosen": -85.39736938476562, "eval_logps/rejected": -104.9570083618164, "eval_loss": 0.005603944417089224, "eval_rewards/accuracies": 0.994413435459137, "eval_rewards/chosen": -4.707695484161377, "eval_rewards/margins": 17.818696975708008, "eval_rewards/rejected": -22.526391983032227, "eval_runtime": 146.6093, "eval_samples_per_second": 19.521, "eval_steps_per_second": 1.221, "step": 4000 }, { "epoch": 1.83, "learning_rate": 1.2988330796549974e-07, "logits/chosen": -1.7435764074325562, "logits/rejected": -1.6490623950958252, "logps/chosen": -59.32027053833008, "logps/rejected": -98.74386596679688, "loss": 0.0066, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -2.657484769821167, "rewards/margins": 18.735923767089844, "rewards/rejected": -21.393407821655273, "step": 4010 }, { "epoch": 1.83, "learning_rate": 1.293759512937595e-07, "logits/chosen": -1.711301565170288, "logits/rejected": -1.632124900817871, "logps/chosen": -59.0091667175293, "logps/rejected": -103.40666198730469, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -2.394582509994507, "rewards/margins": 19.536239624023438, "rewards/rejected": -21.93082618713379, "step": 4020 }, { "epoch": 1.84, "learning_rate": 1.2886859462201928e-07, "logits/chosen": -1.7019872665405273, "logits/rejected": -1.6248067617416382, "logps/chosen": -59.6156120300293, "logps/rejected": -102.67716979980469, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -2.609593629837036, "rewards/margins": 19.72539710998535, "rewards/rejected": -22.334989547729492, "step": 4030 }, { "epoch": 1.84, "learning_rate": 1.2836123795027904e-07, "logits/chosen": -1.707863450050354, "logits/rejected": -1.6396872997283936, "logps/chosen": -55.025291442871094, "logps/rejected": -102.29816436767578, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.0787899494171143, "rewards/margins": 19.629016876220703, "rewards/rejected": -21.707805633544922, "step": 4040 }, { "epoch": 1.85, "learning_rate": 1.278538812785388e-07, "logits/chosen": -1.72417414188385, "logits/rejected": -1.6330257654190063, "logps/chosen": -61.2284049987793, "logps/rejected": -106.95091247558594, "loss": 0.0033, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -2.1726553440093994, "rewards/margins": 20.560657501220703, "rewards/rejected": -22.733312606811523, "step": 4050 }, { "epoch": 1.85, "learning_rate": 1.2734652460679858e-07, "logits/chosen": -1.750223159790039, "logits/rejected": -1.658272385597229, "logps/chosen": -61.496177673339844, "logps/rejected": -100.18705749511719, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -2.0673086643218994, "rewards/margins": 18.649917602539062, "rewards/rejected": -20.717227935791016, "step": 4060 }, { "epoch": 1.86, "learning_rate": 1.2683916793505834e-07, "logits/chosen": -1.6931970119476318, "logits/rejected": -1.624407410621643, "logps/chosen": -55.50725173950195, "logps/rejected": -103.68687438964844, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -2.344270944595337, "rewards/margins": 20.012542724609375, "rewards/rejected": -22.356815338134766, "step": 4070 }, { "epoch": 1.86, "learning_rate": 1.263318112633181e-07, "logits/chosen": -1.7359850406646729, "logits/rejected": -1.6276299953460693, "logps/chosen": -61.17974090576172, "logps/rejected": -101.77490234375, "loss": 0.0025, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -2.0892181396484375, "rewards/margins": 18.614036560058594, "rewards/rejected": -20.70325469970703, "step": 4080 }, { "epoch": 1.87, "learning_rate": 1.2582445459157788e-07, "logits/chosen": -1.7262541055679321, "logits/rejected": -1.6360960006713867, "logps/chosen": -60.631103515625, "logps/rejected": -103.457275390625, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -2.3985886573791504, "rewards/margins": 19.82919692993164, "rewards/rejected": -22.227787017822266, "step": 4090 }, { "epoch": 1.87, "learning_rate": 1.2531709791983764e-07, "logits/chosen": -1.7521579265594482, "logits/rejected": -1.6497102975845337, "logps/chosen": -58.76811599731445, "logps/rejected": -104.64798736572266, "loss": 0.0044, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.8879072666168213, "rewards/margins": 19.89638328552246, "rewards/rejected": -21.784290313720703, "step": 4100 }, { "epoch": 1.87, "eval_logits/chosen": -1.516519546508789, "eval_logits/rejected": -1.4342458248138428, "eval_logps/chosen": -84.54878234863281, "eval_logps/rejected": -103.28939056396484, "eval_loss": 0.005533331073820591, "eval_rewards/accuracies": 0.9972066879272461, "eval_rewards/chosen": -4.283407688140869, "eval_rewards/margins": 17.4091796875, "eval_rewards/rejected": -21.69258689880371, "eval_runtime": 131.2471, "eval_samples_per_second": 21.806, "eval_steps_per_second": 1.364, "step": 4100 }, { "epoch": 1.88, "learning_rate": 1.248097412480974e-07, "logits/chosen": -1.7417571544647217, "logits/rejected": -1.6413519382476807, "logps/chosen": -60.33240509033203, "logps/rejected": -102.51177978515625, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -1.6107101440429688, "rewards/margins": 19.836740493774414, "rewards/rejected": -21.447450637817383, "step": 4110 }, { "epoch": 1.88, "learning_rate": 1.2430238457635718e-07, "logits/chosen": -1.7408193349838257, "logits/rejected": -1.6514530181884766, "logps/chosen": -59.55634307861328, "logps/rejected": -103.1875, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -2.4238922595977783, "rewards/margins": 20.01588249206543, "rewards/rejected": -22.43977165222168, "step": 4120 }, { "epoch": 1.89, "learning_rate": 1.2379502790461694e-07, "logits/chosen": -1.7198892831802368, "logits/rejected": -1.6175673007965088, "logps/chosen": -57.6531982421875, "logps/rejected": -95.41826629638672, "loss": 0.0054, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -2.287550449371338, "rewards/margins": 18.77586555480957, "rewards/rejected": -21.06341552734375, "step": 4130 }, { "epoch": 1.89, "learning_rate": 1.232876712328767e-07, "logits/chosen": -1.7502750158309937, "logits/rejected": -1.6409406661987305, "logps/chosen": -58.13480758666992, "logps/rejected": -101.2226791381836, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.8727871179580688, "rewards/margins": 20.0989990234375, "rewards/rejected": -21.971782684326172, "step": 4140 }, { "epoch": 1.89, "learning_rate": 1.2278031456113648e-07, "logits/chosen": -1.759542465209961, "logits/rejected": -1.6358810663223267, "logps/chosen": -66.03750610351562, "logps/rejected": -101.20765686035156, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -2.005901575088501, "rewards/margins": 19.78945541381836, "rewards/rejected": -21.79535675048828, "step": 4150 }, { "epoch": 1.9, "learning_rate": 1.2227295788939624e-07, "logits/chosen": -1.7395435571670532, "logits/rejected": -1.6474955081939697, "logps/chosen": -59.45258331298828, "logps/rejected": -103.9498062133789, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -2.4537131786346436, "rewards/margins": 20.302892684936523, "rewards/rejected": -22.75660514831543, "step": 4160 }, { "epoch": 1.9, "learning_rate": 1.21765601217656e-07, "logits/chosen": -1.7050672769546509, "logits/rejected": -1.6305850744247437, "logps/chosen": -55.50453567504883, "logps/rejected": -102.51220703125, "loss": 0.0034, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.9435291290283203, "rewards/margins": 19.749547958374023, "rewards/rejected": -21.693078994750977, "step": 4170 }, { "epoch": 1.91, "learning_rate": 1.2125824454591578e-07, "logits/chosen": -1.7227487564086914, "logits/rejected": -1.6449857950210571, "logps/chosen": -60.23795700073242, "logps/rejected": -103.9252700805664, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -2.536226511001587, "rewards/margins": 20.490659713745117, "rewards/rejected": -23.02688980102539, "step": 4180 }, { "epoch": 1.91, "learning_rate": 1.2075088787417554e-07, "logits/chosen": -1.7252013683319092, "logits/rejected": -1.6343914270401, "logps/chosen": -58.06959915161133, "logps/rejected": -102.8636703491211, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -2.519500732421875, "rewards/margins": 20.217092514038086, "rewards/rejected": -22.73659324645996, "step": 4190 }, { "epoch": 1.92, "learning_rate": 1.202435312024353e-07, "logits/chosen": -1.7138278484344482, "logits/rejected": -1.6251713037490845, "logps/chosen": -59.78468704223633, "logps/rejected": -101.74442291259766, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.3976869583129883, "rewards/margins": 19.762615203857422, "rewards/rejected": -22.160301208496094, "step": 4200 }, { "epoch": 1.92, "eval_logits/chosen": -1.5051515102386475, "eval_logits/rejected": -1.4219218492507935, "eval_logps/chosen": -86.4904556274414, "eval_logps/rejected": -106.72370147705078, "eval_loss": 0.006755765061825514, "eval_rewards/accuracies": 0.9916201233863831, "eval_rewards/chosen": -5.254235744476318, "eval_rewards/margins": 18.15550422668457, "eval_rewards/rejected": -23.409738540649414, "eval_runtime": 167.7976, "eval_samples_per_second": 17.056, "eval_steps_per_second": 1.067, "step": 4200 }, { "epoch": 1.92, "learning_rate": 1.1973617453069508e-07, "logits/chosen": -1.7196204662322998, "logits/rejected": -1.6182119846343994, "logps/chosen": -57.03583908081055, "logps/rejected": -105.01580810546875, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -2.0129735469818115, "rewards/margins": 22.04193687438965, "rewards/rejected": -24.05491065979004, "step": 4210 }, { "epoch": 1.93, "learning_rate": 1.1922881785895484e-07, "logits/chosen": -1.7210935354232788, "logits/rejected": -1.6417878866195679, "logps/chosen": -59.10405349731445, "logps/rejected": -108.11570739746094, "loss": 0.0022, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -2.621901035308838, "rewards/margins": 21.172082901000977, "rewards/rejected": -23.79398536682129, "step": 4220 }, { "epoch": 1.93, "learning_rate": 1.187214611872146e-07, "logits/chosen": -1.6935851573944092, "logits/rejected": -1.6083828210830688, "logps/chosen": -60.400779724121094, "logps/rejected": -110.29762268066406, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -2.7976603507995605, "rewards/margins": 22.258953094482422, "rewards/rejected": -25.05661392211914, "step": 4230 }, { "epoch": 1.94, "learning_rate": 1.1821410451547436e-07, "logits/chosen": -1.731496810913086, "logits/rejected": -1.6574420928955078, "logps/chosen": -56.34038162231445, "logps/rejected": -107.848876953125, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -2.164360523223877, "rewards/margins": 21.40178680419922, "rewards/rejected": -23.56614875793457, "step": 4240 }, { "epoch": 1.94, "learning_rate": 1.1770674784373413e-07, "logits/chosen": -1.6939624547958374, "logits/rejected": -1.6362526416778564, "logps/chosen": -56.355072021484375, "logps/rejected": -105.94290924072266, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -2.9071943759918213, "rewards/margins": 21.0939884185791, "rewards/rejected": -24.001184463500977, "step": 4250 }, { "epoch": 1.94, "learning_rate": 1.171993911719939e-07, "logits/chosen": -1.716327428817749, "logits/rejected": -1.6449180841445923, "logps/chosen": -57.47583770751953, "logps/rejected": -104.41899108886719, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -2.8491833209991455, "rewards/margins": 20.430660247802734, "rewards/rejected": -23.279842376708984, "step": 4260 }, { "epoch": 1.95, "learning_rate": 1.1669203450025366e-07, "logits/chosen": -1.721514105796814, "logits/rejected": -1.6436516046524048, "logps/chosen": -58.92315673828125, "logps/rejected": -103.16642761230469, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": -2.3837337493896484, "rewards/margins": 20.6968936920166, "rewards/rejected": -23.080623626708984, "step": 4270 }, { "epoch": 1.95, "learning_rate": 1.1618467782851343e-07, "logits/chosen": -1.7137353420257568, "logits/rejected": -1.6262519359588623, "logps/chosen": -57.92986297607422, "logps/rejected": -106.00459289550781, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -2.288856029510498, "rewards/margins": 21.330066680908203, "rewards/rejected": -23.61892318725586, "step": 4280 }, { "epoch": 1.96, "learning_rate": 1.156773211567732e-07, "logits/chosen": -1.6893755197525024, "logits/rejected": -1.5906169414520264, "logps/chosen": -59.3648796081543, "logps/rejected": -101.29659271240234, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -2.7958602905273438, "rewards/margins": 20.5319881439209, "rewards/rejected": -23.327848434448242, "step": 4290 }, { "epoch": 1.96, "learning_rate": 1.1516996448503296e-07, "logits/chosen": -1.6958662271499634, "logits/rejected": -1.611880898475647, "logps/chosen": -59.040428161621094, "logps/rejected": -108.2059326171875, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -2.1087820529937744, "rewards/margins": 21.529190063476562, "rewards/rejected": -23.63797378540039, "step": 4300 }, { "epoch": 1.96, "eval_logits/chosen": -1.4921066761016846, "eval_logits/rejected": -1.4097992181777954, "eval_logps/chosen": -86.48042297363281, "eval_logps/rejected": -106.46904754638672, "eval_loss": 0.007482536602765322, "eval_rewards/accuracies": 0.9888268113136292, "eval_rewards/chosen": -5.2492241859436035, "eval_rewards/margins": 18.03318214416504, "eval_rewards/rejected": -23.28240394592285, "eval_runtime": 152.1579, "eval_samples_per_second": 18.809, "eval_steps_per_second": 1.176, "step": 4300 }, { "epoch": 1.97, "learning_rate": 1.1466260781329273e-07, "logits/chosen": -1.7025010585784912, "logits/rejected": -1.6234486103057861, "logps/chosen": -58.45012283325195, "logps/rejected": -104.7151870727539, "loss": 0.0022, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -2.6991915702819824, "rewards/margins": 20.52659034729004, "rewards/rejected": -23.22578239440918, "step": 4310 }, { "epoch": 1.97, "learning_rate": 1.141552511415525e-07, "logits/chosen": -1.720330834388733, "logits/rejected": -1.6162750720977783, "logps/chosen": -61.7385139465332, "logps/rejected": -103.19317626953125, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -2.2113752365112305, "rewards/margins": 20.69656753540039, "rewards/rejected": -22.907943725585938, "step": 4320 }, { "epoch": 1.98, "learning_rate": 1.1364789446981226e-07, "logits/chosen": -1.7113571166992188, "logits/rejected": -1.620640516281128, "logps/chosen": -65.27436828613281, "logps/rejected": -107.57862854003906, "loss": 0.0048, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -2.797924518585205, "rewards/margins": 20.757877349853516, "rewards/rejected": -23.555797576904297, "step": 4330 }, { "epoch": 1.98, "learning_rate": 1.1314053779807203e-07, "logits/chosen": -1.7076358795166016, "logits/rejected": -1.6240257024765015, "logps/chosen": -54.62898635864258, "logps/rejected": -103.197509765625, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -2.441157341003418, "rewards/margins": 20.72103500366211, "rewards/rejected": -23.162189483642578, "step": 4340 }, { "epoch": 1.99, "learning_rate": 1.126331811263318e-07, "logits/chosen": -1.733656644821167, "logits/rejected": -1.6405130624771118, "logps/chosen": -62.487953186035156, "logps/rejected": -102.05244445800781, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -2.0095598697662354, "rewards/margins": 19.754032135009766, "rewards/rejected": -21.763591766357422, "step": 4350 }, { "epoch": 1.99, "learning_rate": 1.1212582445459156e-07, "logits/chosen": -1.7253767251968384, "logits/rejected": -1.6205635070800781, "logps/chosen": -63.746116638183594, "logps/rejected": -108.1665267944336, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -3.041841506958008, "rewards/margins": 20.789043426513672, "rewards/rejected": -23.830883026123047, "step": 4360 }, { "epoch": 1.99, "learning_rate": 1.1161846778285133e-07, "logits/chosen": -1.7052295207977295, "logits/rejected": -1.6197770833969116, "logps/chosen": -53.97678756713867, "logps/rejected": -105.70039367675781, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -2.049503803253174, "rewards/margins": 21.83470916748047, "rewards/rejected": -23.884212493896484, "step": 4370 }, { "epoch": 2.0, "learning_rate": 1.111111111111111e-07, "logits/chosen": -1.7089096307754517, "logits/rejected": -1.6089298725128174, "logps/chosen": -60.19716262817383, "logps/rejected": -108.61988830566406, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -2.3397135734558105, "rewards/margins": 22.69954490661621, "rewards/rejected": -25.03925895690918, "step": 4380 }, { "epoch": 2.0, "learning_rate": 1.1060375443937086e-07, "logits/chosen": -1.7114375829696655, "logits/rejected": -1.6113615036010742, "logps/chosen": -59.75043487548828, "logps/rejected": -101.7315673828125, "loss": 0.0044, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -2.9044723510742188, "rewards/margins": 20.202335357666016, "rewards/rejected": -23.106807708740234, "step": 4390 }, { "epoch": 2.01, "learning_rate": 1.1009639776763063e-07, "logits/chosen": -1.7055928707122803, "logits/rejected": -1.6417690515518188, "logps/chosen": -56.58637619018555, "logps/rejected": -108.21183013916016, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -2.384255886077881, "rewards/margins": 21.54519271850586, "rewards/rejected": -23.9294490814209, "step": 4400 }, { "epoch": 2.01, "eval_logits/chosen": -1.49055814743042, "eval_logits/rejected": -1.4087382555007935, "eval_logps/chosen": -87.22195434570312, "eval_logps/rejected": -107.77252960205078, "eval_loss": 0.008222967386245728, "eval_rewards/accuracies": 0.994413435459137, "eval_rewards/chosen": -5.619990348815918, "eval_rewards/margins": 18.314165115356445, "eval_rewards/rejected": -23.93415641784668, "eval_runtime": 162.4003, "eval_samples_per_second": 17.623, "eval_steps_per_second": 1.102, "step": 4400 }, { "epoch": 2.01, "learning_rate": 1.095890410958904e-07, "logits/chosen": -1.6955058574676514, "logits/rejected": -1.615828514099121, "logps/chosen": -61.4653205871582, "logps/rejected": -108.96707916259766, "loss": 0.0043, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -3.2798190116882324, "rewards/margins": 20.68682098388672, "rewards/rejected": -23.966638565063477, "step": 4410 }, { "epoch": 2.02, "learning_rate": 1.0908168442415016e-07, "logits/chosen": -1.7015609741210938, "logits/rejected": -1.6179053783416748, "logps/chosen": -60.08527755737305, "logps/rejected": -104.529296875, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": -3.195782423019409, "rewards/margins": 21.137939453125, "rewards/rejected": -24.333721160888672, "step": 4420 }, { "epoch": 2.02, "learning_rate": 1.0857432775240993e-07, "logits/chosen": -1.6842849254608154, "logits/rejected": -1.6135200262069702, "logps/chosen": -59.04445266723633, "logps/rejected": -106.99456787109375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -2.8437340259552, "rewards/margins": 21.239042282104492, "rewards/rejected": -24.082775115966797, "step": 4430 }, { "epoch": 2.03, "learning_rate": 1.080669710806697e-07, "logits/chosen": -1.7014793157577515, "logits/rejected": -1.6122283935546875, "logps/chosen": -57.35608673095703, "logps/rejected": -107.4156723022461, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -2.6503283977508545, "rewards/margins": 22.468738555908203, "rewards/rejected": -25.119068145751953, "step": 4440 }, { "epoch": 2.03, "learning_rate": 1.0755961440892946e-07, "logits/chosen": -1.6996396780014038, "logits/rejected": -1.6209256649017334, "logps/chosen": -56.53656005859375, "logps/rejected": -108.75687408447266, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -2.6461291313171387, "rewards/margins": 22.431716918945312, "rewards/rejected": -25.07784652709961, "step": 4450 }, { "epoch": 2.04, "learning_rate": 1.0705225773718923e-07, "logits/chosen": -1.6660305261611938, "logits/rejected": -1.5947883129119873, "logps/chosen": -58.48649215698242, "logps/rejected": -106.2381591796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.893306016921997, "rewards/margins": 21.230649948120117, "rewards/rejected": -24.123958587646484, "step": 4460 }, { "epoch": 2.04, "learning_rate": 1.06544901065449e-07, "logits/chosen": -1.702347993850708, "logits/rejected": -1.608782410621643, "logps/chosen": -62.32471466064453, "logps/rejected": -104.51692199707031, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -2.5780274868011475, "rewards/margins": 20.563426971435547, "rewards/rejected": -23.14145278930664, "step": 4470 }, { "epoch": 2.04, "learning_rate": 1.0603754439370876e-07, "logits/chosen": -1.7274497747421265, "logits/rejected": -1.617231011390686, "logps/chosen": -61.200843811035156, "logps/rejected": -107.774169921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.7115633487701416, "rewards/margins": 22.002866744995117, "rewards/rejected": -24.714427947998047, "step": 4480 }, { "epoch": 2.05, "learning_rate": 1.0553018772196853e-07, "logits/chosen": -1.6974376440048218, "logits/rejected": -1.6113479137420654, "logps/chosen": -58.778778076171875, "logps/rejected": -106.168701171875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -2.8068671226501465, "rewards/margins": 21.432415008544922, "rewards/rejected": -24.239282608032227, "step": 4490 }, { "epoch": 2.05, "learning_rate": 1.050228310502283e-07, "logits/chosen": -1.6986557245254517, "logits/rejected": -1.6170848608016968, "logps/chosen": -56.63262176513672, "logps/rejected": -107.97099304199219, "loss": 0.0033, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -2.7931902408599854, "rewards/margins": 20.993011474609375, "rewards/rejected": -23.78619956970215, "step": 4500 }, { "epoch": 2.05, "eval_logits/chosen": -1.485693097114563, "eval_logits/rejected": -1.403558373451233, "eval_logps/chosen": -87.8786849975586, "eval_logps/rejected": -109.02555084228516, "eval_loss": 0.009082186035811901, "eval_rewards/accuracies": 0.9916201233863831, "eval_rewards/chosen": -5.948355197906494, "eval_rewards/margins": 18.612306594848633, "eval_rewards/rejected": -24.56066131591797, "eval_runtime": 165.5502, "eval_samples_per_second": 17.288, "eval_steps_per_second": 1.081, "step": 4500 }, { "epoch": 2.06, "learning_rate": 1.0451547437848806e-07, "logits/chosen": -1.7064087390899658, "logits/rejected": -1.6112060546875, "logps/chosen": -61.651649475097656, "logps/rejected": -103.2201919555664, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -2.2393550872802734, "rewards/margins": 21.777130126953125, "rewards/rejected": -24.01648712158203, "step": 4510 }, { "epoch": 2.06, "learning_rate": 1.0400811770674783e-07, "logits/chosen": -1.6898605823516846, "logits/rejected": -1.6058692932128906, "logps/chosen": -54.533302307128906, "logps/rejected": -103.07552337646484, "loss": 0.0065, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -2.6751835346221924, "rewards/margins": 21.617483139038086, "rewards/rejected": -24.292667388916016, "step": 4520 }, { "epoch": 2.07, "learning_rate": 1.035007610350076e-07, "logits/chosen": -1.7024545669555664, "logits/rejected": -1.6204640865325928, "logps/chosen": -60.74576950073242, "logps/rejected": -104.13387298583984, "loss": 0.0044, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -2.8922746181488037, "rewards/margins": 20.929811477661133, "rewards/rejected": -23.822086334228516, "step": 4530 }, { "epoch": 2.07, "learning_rate": 1.0299340436326736e-07, "logits/chosen": -1.6919069290161133, "logits/rejected": -1.6228523254394531, "logps/chosen": -58.68035125732422, "logps/rejected": -105.86567687988281, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -3.017421007156372, "rewards/margins": 20.6372013092041, "rewards/rejected": -23.65462303161621, "step": 4540 }, { "epoch": 2.08, "learning_rate": 1.0248604769152713e-07, "logits/chosen": -1.6782621145248413, "logits/rejected": -1.6088836193084717, "logps/chosen": -56.09760665893555, "logps/rejected": -108.61997985839844, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -2.794865131378174, "rewards/margins": 21.76509666442871, "rewards/rejected": -24.559961318969727, "step": 4550 }, { "epoch": 2.08, "learning_rate": 1.019786910197869e-07, "logits/chosen": -1.705232858657837, "logits/rejected": -1.6264293193817139, "logps/chosen": -61.69176483154297, "logps/rejected": -104.30326843261719, "loss": 0.0065, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -3.840217113494873, "rewards/margins": 20.118412017822266, "rewards/rejected": -23.958629608154297, "step": 4560 }, { "epoch": 2.09, "learning_rate": 1.0147133434804666e-07, "logits/chosen": -1.6915639638900757, "logits/rejected": -1.5887486934661865, "logps/chosen": -60.4460334777832, "logps/rejected": -107.79622650146484, "loss": 0.0054, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -2.5117955207824707, "rewards/margins": 22.500534057617188, "rewards/rejected": -25.0123291015625, "step": 4570 }, { "epoch": 2.09, "learning_rate": 1.0096397767630643e-07, "logits/chosen": -1.692584753036499, "logits/rejected": -1.5952820777893066, "logps/chosen": -60.45515060424805, "logps/rejected": -107.60456848144531, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -2.6986634731292725, "rewards/margins": 22.19002914428711, "rewards/rejected": -24.888689041137695, "step": 4580 }, { "epoch": 2.1, "learning_rate": 1.004566210045662e-07, "logits/chosen": -1.6760507822036743, "logits/rejected": -1.6011130809783936, "logps/chosen": -57.7957763671875, "logps/rejected": -107.33087158203125, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -2.696836471557617, "rewards/margins": 21.473825454711914, "rewards/rejected": -24.1706600189209, "step": 4590 }, { "epoch": 2.1, "learning_rate": 9.994926433282596e-08, "logits/chosen": -1.6986204385757446, "logits/rejected": -1.613415002822876, "logps/chosen": -60.96204376220703, "logps/rejected": -112.97148132324219, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -2.995871067047119, "rewards/margins": 22.890216827392578, "rewards/rejected": -25.88608741760254, "step": 4600 }, { "epoch": 2.1, "eval_logits/chosen": -1.480384349822998, "eval_logits/rejected": -1.3979898691177368, "eval_logps/chosen": -88.09606170654297, "eval_logps/rejected": -109.98899841308594, "eval_loss": 0.009054499678313732, "eval_rewards/accuracies": 0.9916201233863831, "eval_rewards/chosen": -6.057043552398682, "eval_rewards/margins": 18.985332489013672, "eval_rewards/rejected": -25.04237937927246, "eval_runtime": 145.1115, "eval_samples_per_second": 19.723, "eval_steps_per_second": 1.234, "step": 4600 }, { "epoch": 2.1, "learning_rate": 9.944190766108573e-08, "logits/chosen": -1.7015920877456665, "logits/rejected": -1.6078838109970093, "logps/chosen": -60.95823287963867, "logps/rejected": -107.05985260009766, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -2.7135543823242188, "rewards/margins": 21.817249298095703, "rewards/rejected": -24.530803680419922, "step": 4610 }, { "epoch": 2.11, "learning_rate": 9.89345509893455e-08, "logits/chosen": -1.6830532550811768, "logits/rejected": -1.6148555278778076, "logps/chosen": -57.418052673339844, "logps/rejected": -108.89680480957031, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -3.040402412414551, "rewards/margins": 22.17599105834961, "rewards/rejected": -25.21639060974121, "step": 4620 }, { "epoch": 2.11, "learning_rate": 9.842719431760526e-08, "logits/chosen": -1.6680667400360107, "logits/rejected": -1.575258493423462, "logps/chosen": -61.173545837402344, "logps/rejected": -110.5925521850586, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -3.2073662281036377, "rewards/margins": 22.849517822265625, "rewards/rejected": -26.056884765625, "step": 4630 }, { "epoch": 2.12, "learning_rate": 9.791983764586503e-08, "logits/chosen": -1.6535594463348389, "logits/rejected": -1.560339331626892, "logps/chosen": -58.0858154296875, "logps/rejected": -102.92674255371094, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -3.0235629081726074, "rewards/margins": 21.722715377807617, "rewards/rejected": -24.746280670166016, "step": 4640 }, { "epoch": 2.12, "learning_rate": 9.74124809741248e-08, "logits/chosen": -1.686806082725525, "logits/rejected": -1.5998756885528564, "logps/chosen": -58.31231689453125, "logps/rejected": -111.29328918457031, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -2.88480544090271, "rewards/margins": 23.29157257080078, "rewards/rejected": -26.176376342773438, "step": 4650 }, { "epoch": 2.13, "learning_rate": 9.690512430238456e-08, "logits/chosen": -1.6860908269882202, "logits/rejected": -1.591294527053833, "logps/chosen": -60.230491638183594, "logps/rejected": -108.77268981933594, "loss": 0.0043, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -3.4025626182556152, "rewards/margins": 22.170820236206055, "rewards/rejected": -25.57338523864746, "step": 4660 }, { "epoch": 2.13, "learning_rate": 9.639776763064433e-08, "logits/chosen": -1.6946262121200562, "logits/rejected": -1.6074960231781006, "logps/chosen": -62.180450439453125, "logps/rejected": -111.79603576660156, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -3.5009357929229736, "rewards/margins": 22.072834014892578, "rewards/rejected": -25.57377052307129, "step": 4670 }, { "epoch": 2.14, "learning_rate": 9.58904109589041e-08, "logits/chosen": -1.692679762840271, "logits/rejected": -1.5875680446624756, "logps/chosen": -60.60527420043945, "logps/rejected": -106.55836486816406, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -3.20111083984375, "rewards/margins": 22.19775390625, "rewards/rejected": -25.398862838745117, "step": 4680 }, { "epoch": 2.14, "learning_rate": 9.538305428716386e-08, "logits/chosen": -1.677522897720337, "logits/rejected": -1.5916025638580322, "logps/chosen": -64.4634017944336, "logps/rejected": -106.9753189086914, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -3.455249786376953, "rewards/margins": 21.74876594543457, "rewards/rejected": -25.204017639160156, "step": 4690 }, { "epoch": 2.15, "learning_rate": 9.487569761542363e-08, "logits/chosen": -1.680101752281189, "logits/rejected": -1.5896421670913696, "logps/chosen": -60.176551818847656, "logps/rejected": -102.62559509277344, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -2.885277271270752, "rewards/margins": 21.611709594726562, "rewards/rejected": -24.49698829650879, "step": 4700 }, { "epoch": 2.15, "eval_logits/chosen": -1.4732141494750977, "eval_logits/rejected": -1.3907400369644165, "eval_logps/chosen": -88.74838256835938, "eval_logps/rejected": -111.12362670898438, "eval_loss": 0.01002542581409216, "eval_rewards/accuracies": 0.9888268113136292, "eval_rewards/chosen": -6.383204460144043, "eval_rewards/margins": 19.226491928100586, "eval_rewards/rejected": -25.609697341918945, "eval_runtime": 175.6871, "eval_samples_per_second": 16.29, "eval_steps_per_second": 1.019, "step": 4700 }, { "epoch": 2.15, "learning_rate": 9.43683409436834e-08, "logits/chosen": -1.672640085220337, "logits/rejected": -1.5956499576568604, "logps/chosen": -60.055267333984375, "logps/rejected": -114.6338119506836, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.480705738067627, "rewards/margins": 23.14187240600586, "rewards/rejected": -26.62257957458496, "step": 4710 }, { "epoch": 2.15, "learning_rate": 9.386098427194316e-08, "logits/chosen": -1.6616106033325195, "logits/rejected": -1.575148105621338, "logps/chosen": -61.344154357910156, "logps/rejected": -107.2572250366211, "loss": 0.0043, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -3.7119224071502686, "rewards/margins": 21.411645889282227, "rewards/rejected": -25.12356948852539, "step": 4720 }, { "epoch": 2.16, "learning_rate": 9.335362760020293e-08, "logits/chosen": -1.6699577569961548, "logits/rejected": -1.5892879962921143, "logps/chosen": -58.083099365234375, "logps/rejected": -106.17130279541016, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -3.9335007667541504, "rewards/margins": 21.795217514038086, "rewards/rejected": -25.72871971130371, "step": 4730 }, { "epoch": 2.16, "learning_rate": 9.28462709284627e-08, "logits/chosen": -1.6922982931137085, "logits/rejected": -1.59556245803833, "logps/chosen": -60.62724685668945, "logps/rejected": -108.93511962890625, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -3.105750322341919, "rewards/margins": 22.99467658996582, "rewards/rejected": -26.10042381286621, "step": 4740 }, { "epoch": 2.17, "learning_rate": 9.233891425672246e-08, "logits/chosen": -1.7342430353164673, "logits/rejected": -1.6406952142715454, "logps/chosen": -60.60820388793945, "logps/rejected": -106.60917663574219, "loss": 0.0054, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -2.723804235458374, "rewards/margins": 21.95320701599121, "rewards/rejected": -24.677011489868164, "step": 4750 }, { "epoch": 2.17, "learning_rate": 9.183155758498223e-08, "logits/chosen": -1.6949208974838257, "logits/rejected": -1.6064071655273438, "logps/chosen": -59.212013244628906, "logps/rejected": -105.79365539550781, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -2.6465213298797607, "rewards/margins": 21.635541915893555, "rewards/rejected": -24.28206443786621, "step": 4760 }, { "epoch": 2.18, "learning_rate": 9.1324200913242e-08, "logits/chosen": -1.725602388381958, "logits/rejected": -1.6477651596069336, "logps/chosen": -61.2446174621582, "logps/rejected": -111.3396987915039, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -2.967522621154785, "rewards/margins": 22.574275970458984, "rewards/rejected": -25.541797637939453, "step": 4770 }, { "epoch": 2.18, "learning_rate": 9.081684424150176e-08, "logits/chosen": -1.6925163269042969, "logits/rejected": -1.6027542352676392, "logps/chosen": -59.8698844909668, "logps/rejected": -109.35646057128906, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -2.84021258354187, "rewards/margins": 22.394512176513672, "rewards/rejected": -25.234722137451172, "step": 4780 }, { "epoch": 2.19, "learning_rate": 9.030948756976153e-08, "logits/chosen": -1.6867091655731201, "logits/rejected": -1.5830605030059814, "logps/chosen": -61.847023010253906, "logps/rejected": -107.93465423583984, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -2.9739956855773926, "rewards/margins": 22.876094818115234, "rewards/rejected": -25.850088119506836, "step": 4790 }, { "epoch": 2.19, "learning_rate": 8.98021308980213e-08, "logits/chosen": -1.7063744068145752, "logits/rejected": -1.6185452938079834, "logps/chosen": -60.459068298339844, "logps/rejected": -110.65385437011719, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": -2.613921880722046, "rewards/margins": 23.015316009521484, "rewards/rejected": -25.629236221313477, "step": 4800 }, { "epoch": 2.19, "eval_logits/chosen": -1.482709288597107, "eval_logits/rejected": -1.4006223678588867, "eval_logps/chosen": -87.56159210205078, "eval_logps/rejected": -110.17630767822266, "eval_loss": 0.007255359552800655, "eval_rewards/accuracies": 0.9916201233863831, "eval_rewards/chosen": -5.789804935455322, "eval_rewards/margins": 19.346235275268555, "eval_rewards/rejected": -25.13603973388672, "eval_runtime": 148.1957, "eval_samples_per_second": 19.312, "eval_steps_per_second": 1.208, "step": 4800 }, { "epoch": 2.2, "learning_rate": 8.929477422628106e-08, "logits/chosen": -1.7169044017791748, "logits/rejected": -1.6037685871124268, "logps/chosen": -66.4024429321289, "logps/rejected": -109.7977294921875, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -3.555539608001709, "rewards/margins": 21.82910919189453, "rewards/rejected": -25.3846492767334, "step": 4810 }, { "epoch": 2.2, "learning_rate": 8.878741755454083e-08, "logits/chosen": -1.7067817449569702, "logits/rejected": -1.6083142757415771, "logps/chosen": -62.67310333251953, "logps/rejected": -111.0739517211914, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -3.06396746635437, "rewards/margins": 22.700790405273438, "rewards/rejected": -25.764759063720703, "step": 4820 }, { "epoch": 2.2, "learning_rate": 8.82800608828006e-08, "logits/chosen": -1.7074460983276367, "logits/rejected": -1.624869704246521, "logps/chosen": -62.67158889770508, "logps/rejected": -105.26263427734375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -3.1344337463378906, "rewards/margins": 21.621511459350586, "rewards/rejected": -24.75594711303711, "step": 4830 }, { "epoch": 2.21, "learning_rate": 8.777270421106036e-08, "logits/chosen": -1.704567313194275, "logits/rejected": -1.6185743808746338, "logps/chosen": -57.15745162963867, "logps/rejected": -110.69510650634766, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -2.8284034729003906, "rewards/margins": 23.138578414916992, "rewards/rejected": -25.966983795166016, "step": 4840 }, { "epoch": 2.21, "learning_rate": 8.726534753932013e-08, "logits/chosen": -1.6907163858413696, "logits/rejected": -1.6202579736709595, "logps/chosen": -56.169822692871094, "logps/rejected": -107.8416748046875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -2.6681342124938965, "rewards/margins": 22.314682006835938, "rewards/rejected": -24.98281478881836, "step": 4850 }, { "epoch": 2.22, "learning_rate": 8.67579908675799e-08, "logits/chosen": -1.6673482656478882, "logits/rejected": -1.5913320779800415, "logps/chosen": -57.061622619628906, "logps/rejected": -108.7569351196289, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -3.0472772121429443, "rewards/margins": 22.588138580322266, "rewards/rejected": -25.63541603088379, "step": 4860 }, { "epoch": 2.22, "learning_rate": 8.625063419583966e-08, "logits/chosen": -1.6885020732879639, "logits/rejected": -1.6073684692382812, "logps/chosen": -58.192726135253906, "logps/rejected": -104.97102355957031, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -2.7794158458709717, "rewards/margins": 21.73690414428711, "rewards/rejected": -24.516321182250977, "step": 4870 }, { "epoch": 2.23, "learning_rate": 8.574327752409943e-08, "logits/chosen": -1.6919105052947998, "logits/rejected": -1.6105105876922607, "logps/chosen": -54.97904586791992, "logps/rejected": -108.90132141113281, "loss": 0.0022, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -2.928136110305786, "rewards/margins": 22.817455291748047, "rewards/rejected": -25.745594024658203, "step": 4880 }, { "epoch": 2.23, "learning_rate": 8.52359208523592e-08, "logits/chosen": -1.7173068523406982, "logits/rejected": -1.6142711639404297, "logps/chosen": -58.7863883972168, "logps/rejected": -109.75135803222656, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -2.4348838329315186, "rewards/margins": 23.071659088134766, "rewards/rejected": -25.506546020507812, "step": 4890 }, { "epoch": 2.24, "learning_rate": 8.472856418061896e-08, "logits/chosen": -1.698009729385376, "logits/rejected": -1.5966373682022095, "logps/chosen": -62.571998596191406, "logps/rejected": -109.10028076171875, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -3.0530714988708496, "rewards/margins": 22.67581558227539, "rewards/rejected": -25.7288875579834, "step": 4900 }, { "epoch": 2.24, "eval_logits/chosen": -1.473187804222107, "eval_logits/rejected": -1.3907272815704346, "eval_logps/chosen": -88.25776672363281, "eval_logps/rejected": -111.77100372314453, "eval_loss": 0.009084388613700867, "eval_rewards/accuracies": 0.9916201233863831, "eval_rewards/chosen": -6.137899875640869, "eval_rewards/margins": 19.795488357543945, "eval_rewards/rejected": -25.933391571044922, "eval_runtime": 177.6801, "eval_samples_per_second": 16.108, "eval_steps_per_second": 1.007, "step": 4900 }, { "epoch": 2.24, "learning_rate": 8.422120750887873e-08, "logits/chosen": -1.6893600225448608, "logits/rejected": -1.5956366062164307, "logps/chosen": -60.0640869140625, "logps/rejected": -111.0639877319336, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -2.929564952850342, "rewards/margins": 22.824703216552734, "rewards/rejected": -25.7542667388916, "step": 4910 }, { "epoch": 2.25, "learning_rate": 8.37138508371385e-08, "logits/chosen": -1.700185775756836, "logits/rejected": -1.6091382503509521, "logps/chosen": -61.7586784362793, "logps/rejected": -114.5706558227539, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -2.8398773670196533, "rewards/margins": 23.557147979736328, "rewards/rejected": -26.397024154663086, "step": 4920 }, { "epoch": 2.25, "learning_rate": 8.320649416539826e-08, "logits/chosen": -1.6746511459350586, "logits/rejected": -1.587571382522583, "logps/chosen": -62.784088134765625, "logps/rejected": -110.8236312866211, "loss": 0.0033, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -3.5711326599121094, "rewards/margins": 22.47551155090332, "rewards/rejected": -26.046642303466797, "step": 4930 }, { "epoch": 2.25, "learning_rate": 8.269913749365803e-08, "logits/chosen": -1.7054109573364258, "logits/rejected": -1.6170246601104736, "logps/chosen": -64.3830337524414, "logps/rejected": -114.76332092285156, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -3.3856863975524902, "rewards/margins": 23.24374771118164, "rewards/rejected": -26.629430770874023, "step": 4940 }, { "epoch": 2.26, "learning_rate": 8.21917808219178e-08, "logits/chosen": -1.696123480796814, "logits/rejected": -1.6094690561294556, "logps/chosen": -61.13164138793945, "logps/rejected": -114.35484313964844, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -2.768718957901001, "rewards/margins": 24.020790100097656, "rewards/rejected": -26.789508819580078, "step": 4950 }, { "epoch": 2.26, "learning_rate": 8.168442415017756e-08, "logits/chosen": -1.6941993236541748, "logits/rejected": -1.6031553745269775, "logps/chosen": -59.2587890625, "logps/rejected": -110.17204284667969, "loss": 0.0043, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -2.780848741531372, "rewards/margins": 22.9312744140625, "rewards/rejected": -25.71212387084961, "step": 4960 }, { "epoch": 2.27, "learning_rate": 8.117706747843733e-08, "logits/chosen": -1.6594960689544678, "logits/rejected": -1.5845571756362915, "logps/chosen": -59.62993621826172, "logps/rejected": -109.40238952636719, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -2.7585835456848145, "rewards/margins": 22.709556579589844, "rewards/rejected": -25.468141555786133, "step": 4970 }, { "epoch": 2.27, "learning_rate": 8.06697108066971e-08, "logits/chosen": -1.6639039516448975, "logits/rejected": -1.5775517225265503, "logps/chosen": -59.787498474121094, "logps/rejected": -110.11994934082031, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -3.1483092308044434, "rewards/margins": 23.41049575805664, "rewards/rejected": -26.558801651000977, "step": 4980 }, { "epoch": 2.28, "learning_rate": 8.016235413495687e-08, "logits/chosen": -1.6792118549346924, "logits/rejected": -1.6143678426742554, "logps/chosen": -58.59711456298828, "logps/rejected": -112.262939453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.5294151306152344, "rewards/margins": 23.087398529052734, "rewards/rejected": -26.6168155670166, "step": 4990 }, { "epoch": 2.28, "learning_rate": 7.965499746321664e-08, "logits/chosen": -1.6774799823760986, "logits/rejected": -1.5791685581207275, "logps/chosen": -61.466697692871094, "logps/rejected": -119.5277328491211, "loss": 0.0022, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -3.4334099292755127, "rewards/margins": 24.16744613647461, "rewards/rejected": -27.600854873657227, "step": 5000 }, { "epoch": 2.28, "eval_logits/chosen": -1.4564272165298462, "eval_logits/rejected": -1.3738222122192383, "eval_logps/chosen": -90.7276611328125, "eval_logps/rejected": -115.12030792236328, "eval_loss": 0.014685509726405144, "eval_rewards/accuracies": 0.9888268113136292, "eval_rewards/chosen": -7.372840881347656, "eval_rewards/margins": 20.23519515991211, "eval_rewards/rejected": -27.608036041259766, "eval_runtime": 159.2397, "eval_samples_per_second": 17.973, "eval_steps_per_second": 1.124, "step": 5000 }, { "epoch": 2.29, "learning_rate": 7.91476407914764e-08, "logits/chosen": -1.6650855541229248, "logits/rejected": -1.5862457752227783, "logps/chosen": -61.99729537963867, "logps/rejected": -112.56124114990234, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -3.6777865886688232, "rewards/margins": 23.270479202270508, "rewards/rejected": -26.94826316833496, "step": 5010 }, { "epoch": 2.29, "learning_rate": 7.864028411973617e-08, "logits/chosen": -1.6611073017120361, "logits/rejected": -1.579276204109192, "logps/chosen": -61.78557586669922, "logps/rejected": -111.4010009765625, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": -3.6312317848205566, "rewards/margins": 23.880470275878906, "rewards/rejected": -27.511699676513672, "step": 5020 }, { "epoch": 2.3, "learning_rate": 7.813292744799594e-08, "logits/chosen": -1.6593017578125, "logits/rejected": -1.576297640800476, "logps/chosen": -61.967369079589844, "logps/rejected": -114.94383239746094, "loss": 0.0022, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -3.7899672985076904, "rewards/margins": 24.285675048828125, "rewards/rejected": -28.075641632080078, "step": 5030 }, { "epoch": 2.3, "learning_rate": 7.76255707762557e-08, "logits/chosen": -1.6597697734832764, "logits/rejected": -1.576095461845398, "logps/chosen": -60.80706787109375, "logps/rejected": -112.26513671875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -3.6342098712921143, "rewards/margins": 23.36855697631836, "rewards/rejected": -27.002765655517578, "step": 5040 }, { "epoch": 2.31, "learning_rate": 7.711821410451547e-08, "logits/chosen": -1.6522419452667236, "logits/rejected": -1.563639760017395, "logps/chosen": -63.43516159057617, "logps/rejected": -111.27690124511719, "loss": 0.0022, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -3.502595901489258, "rewards/margins": 23.448060989379883, "rewards/rejected": -26.95065689086914, "step": 5050 }, { "epoch": 2.31, "learning_rate": 7.661085743277524e-08, "logits/chosen": -1.6515191793441772, "logits/rejected": -1.5625699758529663, "logps/chosen": -64.24688720703125, "logps/rejected": -117.6348876953125, "loss": 0.0011, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -4.006891250610352, "rewards/margins": 24.36283302307129, "rewards/rejected": -28.369726181030273, "step": 5060 }, { "epoch": 2.31, "learning_rate": 7.6103500761035e-08, "logits/chosen": -1.6504627466201782, "logits/rejected": -1.5722054243087769, "logps/chosen": -59.683631896972656, "logps/rejected": -113.41046142578125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -3.637004852294922, "rewards/margins": 24.63381004333496, "rewards/rejected": -28.270816802978516, "step": 5070 }, { "epoch": 2.32, "learning_rate": 7.559614408929477e-08, "logits/chosen": -1.6522117853164673, "logits/rejected": -1.560707926750183, "logps/chosen": -62.32175827026367, "logps/rejected": -116.25910949707031, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -2.994751214981079, "rewards/margins": 25.31358528137207, "rewards/rejected": -28.308338165283203, "step": 5080 }, { "epoch": 2.32, "learning_rate": 7.508878741755454e-08, "logits/chosen": -1.6741764545440674, "logits/rejected": -1.5819628238677979, "logps/chosen": -61.825042724609375, "logps/rejected": -112.41102600097656, "loss": 0.0054, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -3.4560928344726562, "rewards/margins": 24.269001007080078, "rewards/rejected": -27.725093841552734, "step": 5090 }, { "epoch": 2.33, "learning_rate": 7.45814307458143e-08, "logits/chosen": -1.678259253501892, "logits/rejected": -1.5955337285995483, "logps/chosen": -62.3753547668457, "logps/rejected": -110.7182846069336, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -3.586951732635498, "rewards/margins": 23.020858764648438, "rewards/rejected": -26.60780906677246, "step": 5100 }, { "epoch": 2.33, "eval_logits/chosen": -1.4603537321090698, "eval_logits/rejected": -1.3779700994491577, "eval_logps/chosen": -89.79312896728516, "eval_logps/rejected": -114.51570129394531, "eval_loss": 0.011963811703026295, "eval_rewards/accuracies": 0.9888268113136292, "eval_rewards/chosen": -6.905576705932617, "eval_rewards/margins": 20.400161743164062, "eval_rewards/rejected": -27.305734634399414, "eval_runtime": 137.8862, "eval_samples_per_second": 20.756, "eval_steps_per_second": 1.298, "step": 5100 }, { "epoch": 2.33, "learning_rate": 7.407407407407407e-08, "logits/chosen": -1.6667464971542358, "logits/rejected": -1.590036392211914, "logps/chosen": -62.1322021484375, "logps/rejected": -112.29280090332031, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -3.8729019165039062, "rewards/margins": 23.566265106201172, "rewards/rejected": -27.43916893005371, "step": 5110 }, { "epoch": 2.34, "learning_rate": 7.356671740233384e-08, "logits/chosen": -1.6505603790283203, "logits/rejected": -1.551260232925415, "logps/chosen": -60.57634735107422, "logps/rejected": -110.75213623046875, "loss": 0.0033, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -3.6815459728240967, "rewards/margins": 23.88407325744629, "rewards/rejected": -27.56561851501465, "step": 5120 }, { "epoch": 2.34, "learning_rate": 7.30593607305936e-08, "logits/chosen": -1.6690067052841187, "logits/rejected": -1.5922582149505615, "logps/chosen": -63.007774353027344, "logps/rejected": -111.46229553222656, "loss": 0.0022, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -3.688744068145752, "rewards/margins": 22.96770477294922, "rewards/rejected": -26.656448364257812, "step": 5130 }, { "epoch": 2.35, "learning_rate": 7.255200405885337e-08, "logits/chosen": -1.669487714767456, "logits/rejected": -1.5800920724868774, "logps/chosen": -60.822509765625, "logps/rejected": -113.46736907958984, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -3.038383722305298, "rewards/margins": 23.952011108398438, "rewards/rejected": -26.990392684936523, "step": 5140 }, { "epoch": 2.35, "learning_rate": 7.204464738711314e-08, "logits/chosen": -1.6490485668182373, "logits/rejected": -1.565785527229309, "logps/chosen": -61.78581619262695, "logps/rejected": -113.68495178222656, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -3.121267318725586, "rewards/margins": 24.656448364257812, "rewards/rejected": -27.7777156829834, "step": 5150 }, { "epoch": 2.36, "learning_rate": 7.15372907153729e-08, "logits/chosen": -1.689867615699768, "logits/rejected": -1.5955573320388794, "logps/chosen": -61.59331130981445, "logps/rejected": -114.4730224609375, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -3.735996723175049, "rewards/margins": 24.27120590209961, "rewards/rejected": -28.0072021484375, "step": 5160 }, { "epoch": 2.36, "learning_rate": 7.102993404363267e-08, "logits/chosen": -1.6986421346664429, "logits/rejected": -1.6016403436660767, "logps/chosen": -62.81281661987305, "logps/rejected": -111.57080078125, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -3.166194438934326, "rewards/margins": 24.02010726928711, "rewards/rejected": -27.18630599975586, "step": 5170 }, { "epoch": 2.36, "learning_rate": 7.052257737189244e-08, "logits/chosen": -1.664933443069458, "logits/rejected": -1.5854079723358154, "logps/chosen": -60.731178283691406, "logps/rejected": -115.13871765136719, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -3.7711570262908936, "rewards/margins": 24.53057861328125, "rewards/rejected": -28.301733016967773, "step": 5180 }, { "epoch": 2.37, "learning_rate": 7.00152207001522e-08, "logits/chosen": -1.676743507385254, "logits/rejected": -1.5855991840362549, "logps/chosen": -60.43587112426758, "logps/rejected": -114.07183837890625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -3.224400758743286, "rewards/margins": 23.96244239807129, "rewards/rejected": -27.186847686767578, "step": 5190 }, { "epoch": 2.37, "learning_rate": 6.950786402841197e-08, "logits/chosen": -1.651254415512085, "logits/rejected": -1.5747812986373901, "logps/chosen": -64.29228210449219, "logps/rejected": -112.44258117675781, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -3.9219841957092285, "rewards/margins": 23.39896011352539, "rewards/rejected": -27.32094383239746, "step": 5200 }, { "epoch": 2.37, "eval_logits/chosen": -1.4592654705047607, "eval_logits/rejected": -1.377195954322815, "eval_logps/chosen": -89.17169189453125, "eval_logps/rejected": -115.13497161865234, "eval_loss": 0.009668777696788311, "eval_rewards/accuracies": 0.9888268113136292, "eval_rewards/chosen": -6.594855308532715, "eval_rewards/margins": 21.02051544189453, "eval_rewards/rejected": -27.615367889404297, "eval_runtime": 161.4474, "eval_samples_per_second": 17.727, "eval_steps_per_second": 1.109, "step": 5200 }, { "epoch": 2.38, "learning_rate": 6.900050735667174e-08, "logits/chosen": -1.6961199045181274, "logits/rejected": -1.6116136312484741, "logps/chosen": -61.264503479003906, "logps/rejected": -113.85749816894531, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -3.226783275604248, "rewards/margins": 24.32256317138672, "rewards/rejected": -27.549346923828125, "step": 5210 }, { "epoch": 2.38, "learning_rate": 6.84931506849315e-08, "logits/chosen": -1.6688201427459717, "logits/rejected": -1.5917457342147827, "logps/chosen": -60.019065856933594, "logps/rejected": -117.906005859375, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -3.143035411834717, "rewards/margins": 25.059246063232422, "rewards/rejected": -28.202281951904297, "step": 5220 }, { "epoch": 2.39, "learning_rate": 6.798579401319127e-08, "logits/chosen": -1.6733934879302979, "logits/rejected": -1.5905344486236572, "logps/chosen": -60.477500915527344, "logps/rejected": -115.54353332519531, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -3.429901599884033, "rewards/margins": 24.198131561279297, "rewards/rejected": -27.628036499023438, "step": 5230 }, { "epoch": 2.39, "learning_rate": 6.747843734145104e-08, "logits/chosen": -1.6754209995269775, "logits/rejected": -1.5958062410354614, "logps/chosen": -57.3133430480957, "logps/rejected": -111.15318298339844, "loss": 0.0034, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -2.7517170906066895, "rewards/margins": 24.133460998535156, "rewards/rejected": -26.885177612304688, "step": 5240 }, { "epoch": 2.4, "learning_rate": 6.69710806697108e-08, "logits/chosen": -1.6434913873672485, "logits/rejected": -1.5607589483261108, "logps/chosen": -61.31553268432617, "logps/rejected": -114.73616027832031, "loss": 0.0043, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -3.1473183631896973, "rewards/margins": 24.592918395996094, "rewards/rejected": -27.740238189697266, "step": 5250 }, { "epoch": 2.4, "learning_rate": 6.646372399797057e-08, "logits/chosen": -1.679810881614685, "logits/rejected": -1.5854202508926392, "logps/chosen": -60.2682991027832, "logps/rejected": -108.10420227050781, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -3.6345019340515137, "rewards/margins": 23.013126373291016, "rewards/rejected": -26.647632598876953, "step": 5260 }, { "epoch": 2.41, "learning_rate": 6.595636732623034e-08, "logits/chosen": -1.6663167476654053, "logits/rejected": -1.583310842514038, "logps/chosen": -61.53295135498047, "logps/rejected": -111.26729583740234, "loss": 0.0011, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -3.6133663654327393, "rewards/margins": 23.47526741027832, "rewards/rejected": -27.0886287689209, "step": 5270 }, { "epoch": 2.41, "learning_rate": 6.54490106544901e-08, "logits/chosen": -1.6865384578704834, "logits/rejected": -1.6003679037094116, "logps/chosen": -60.403114318847656, "logps/rejected": -114.53379821777344, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -3.2604928016662598, "rewards/margins": 25.53091049194336, "rewards/rejected": -28.79140281677246, "step": 5280 }, { "epoch": 2.41, "learning_rate": 6.494165398274987e-08, "logits/chosen": -1.6446622610092163, "logits/rejected": -1.5624706745147705, "logps/chosen": -62.8109130859375, "logps/rejected": -112.4520263671875, "loss": 0.0022, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -3.6601951122283936, "rewards/margins": 23.925983428955078, "rewards/rejected": -27.5861759185791, "step": 5290 }, { "epoch": 2.42, "learning_rate": 6.443429731100964e-08, "logits/chosen": -1.6465861797332764, "logits/rejected": -1.563546061515808, "logps/chosen": -62.8968505859375, "logps/rejected": -117.574951171875, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -3.694208860397339, "rewards/margins": 25.211589813232422, "rewards/rejected": -28.905797958374023, "step": 5300 }, { "epoch": 2.42, "eval_logits/chosen": -1.4465004205703735, "eval_logits/rejected": -1.3646644353866577, "eval_logps/chosen": -91.00645446777344, "eval_logps/rejected": -117.21986389160156, "eval_loss": 0.015215998515486717, "eval_rewards/accuracies": 0.9888268113136292, "eval_rewards/chosen": -7.512238502502441, "eval_rewards/margins": 21.14558219909668, "eval_rewards/rejected": -28.657821655273438, "eval_runtime": 149.4468, "eval_samples_per_second": 19.151, "eval_steps_per_second": 1.198, "step": 5300 }, { "epoch": 2.42, "learning_rate": 6.39269406392694e-08, "logits/chosen": -1.641343355178833, "logits/rejected": -1.5607117414474487, "logps/chosen": -62.00952911376953, "logps/rejected": -118.48858642578125, "loss": 0.0044, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -3.9190986156463623, "rewards/margins": 25.884418487548828, "rewards/rejected": -29.80352210998535, "step": 5310 }, { "epoch": 2.43, "learning_rate": 6.341958396752917e-08, "logits/chosen": -1.6571009159088135, "logits/rejected": -1.5537234544754028, "logps/chosen": -63.40632247924805, "logps/rejected": -119.00166320800781, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -3.7866835594177246, "rewards/margins": 25.64019775390625, "rewards/rejected": -29.4268798828125, "step": 5320 }, { "epoch": 2.43, "learning_rate": 6.291222729578894e-08, "logits/chosen": -1.67231023311615, "logits/rejected": -1.5877420902252197, "logps/chosen": -63.801239013671875, "logps/rejected": -115.84706115722656, "loss": 0.0033, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -4.1915388107299805, "rewards/margins": 24.520641326904297, "rewards/rejected": -28.71217918395996, "step": 5330 }, { "epoch": 2.44, "learning_rate": 6.24048706240487e-08, "logits/chosen": -1.6602236032485962, "logits/rejected": -1.552490472793579, "logps/chosen": -67.17897033691406, "logps/rejected": -118.0072021484375, "loss": 0.0054, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -4.39713191986084, "rewards/margins": 24.628887176513672, "rewards/rejected": -29.02602195739746, "step": 5340 }, { "epoch": 2.44, "learning_rate": 6.189751395230847e-08, "logits/chosen": -1.6501667499542236, "logits/rejected": -1.5565497875213623, "logps/chosen": -62.2840690612793, "logps/rejected": -115.02555847167969, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -3.614900588989258, "rewards/margins": 25.629772186279297, "rewards/rejected": -29.244674682617188, "step": 5350 }, { "epoch": 2.45, "learning_rate": 6.139015728056824e-08, "logits/chosen": -1.6319433450698853, "logits/rejected": -1.5535074472427368, "logps/chosen": -60.017303466796875, "logps/rejected": -115.55560302734375, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -3.5083460807800293, "rewards/margins": 24.990381240844727, "rewards/rejected": -28.498727798461914, "step": 5360 }, { "epoch": 2.45, "learning_rate": 6.0882800608828e-08, "logits/chosen": -1.6377484798431396, "logits/rejected": -1.567611575126648, "logps/chosen": -59.57817840576172, "logps/rejected": -112.98590087890625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -3.69633412361145, "rewards/margins": 25.142475128173828, "rewards/rejected": -28.83881187438965, "step": 5370 }, { "epoch": 2.46, "learning_rate": 6.037544393708777e-08, "logits/chosen": -1.6721665859222412, "logits/rejected": -1.5867160558700562, "logps/chosen": -62.4495735168457, "logps/rejected": -119.78460693359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.2411720752716064, "rewards/margins": 25.996212005615234, "rewards/rejected": -29.237384796142578, "step": 5380 }, { "epoch": 2.46, "learning_rate": 5.986808726534754e-08, "logits/chosen": -1.6507494449615479, "logits/rejected": -1.5618906021118164, "logps/chosen": -62.99871826171875, "logps/rejected": -118.33638763427734, "loss": 0.0057, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -3.6402747631073, "rewards/margins": 26.580303192138672, "rewards/rejected": -30.2205810546875, "step": 5390 }, { "epoch": 2.46, "learning_rate": 5.93607305936073e-08, "logits/chosen": -1.6390947103500366, "logits/rejected": -1.5543744564056396, "logps/chosen": -59.22716522216797, "logps/rejected": -118.3710708618164, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -3.3072409629821777, "rewards/margins": 26.537982940673828, "rewards/rejected": -29.845226287841797, "step": 5400 }, { "epoch": 2.46, "eval_logits/chosen": -1.4331419467926025, "eval_logits/rejected": -1.3514832258224487, "eval_logps/chosen": -91.3964614868164, "eval_logps/rejected": -118.7977066040039, "eval_loss": 0.014859071001410484, "eval_rewards/accuracies": 0.9888268113136292, "eval_rewards/chosen": -7.707246780395508, "eval_rewards/margins": 21.739484786987305, "eval_rewards/rejected": -29.446731567382812, "eval_runtime": 148.897, "eval_samples_per_second": 19.221, "eval_steps_per_second": 1.202, "step": 5400 }, { "epoch": 2.47, "learning_rate": 5.8853373921867065e-08, "logits/chosen": -1.6490676403045654, "logits/rejected": -1.5844876766204834, "logps/chosen": -60.4987907409668, "logps/rejected": -123.09349060058594, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -3.7090137004852295, "rewards/margins": 26.693166732788086, "rewards/rejected": -30.40218162536621, "step": 5410 }, { "epoch": 2.47, "learning_rate": 5.834601725012683e-08, "logits/chosen": -1.6431608200073242, "logits/rejected": -1.556091547012329, "logps/chosen": -60.43384552001953, "logps/rejected": -116.97358703613281, "loss": 0.0011, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -3.6738152503967285, "rewards/margins": 25.790332794189453, "rewards/rejected": -29.464147567749023, "step": 5420 }, { "epoch": 2.48, "learning_rate": 5.78386605783866e-08, "logits/chosen": -1.6185407638549805, "logits/rejected": -1.5349016189575195, "logps/chosen": -61.325828552246094, "logps/rejected": -115.5124740600586, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -3.4237639904022217, "rewards/margins": 26.009695053100586, "rewards/rejected": -29.433462142944336, "step": 5430 }, { "epoch": 2.48, "learning_rate": 5.7331303906646365e-08, "logits/chosen": -1.6398437023162842, "logits/rejected": -1.5696852207183838, "logps/chosen": -60.71503829956055, "logps/rejected": -117.88394927978516, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -3.8025786876678467, "rewards/margins": 26.124065399169922, "rewards/rejected": -29.926645278930664, "step": 5440 }, { "epoch": 2.49, "learning_rate": 5.682394723490613e-08, "logits/chosen": -1.642251968383789, "logits/rejected": -1.5555260181427002, "logps/chosen": -65.87081909179688, "logps/rejected": -118.69813537597656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.45894718170166, "rewards/margins": 24.65566635131836, "rewards/rejected": -29.114614486694336, "step": 5450 }, { "epoch": 2.49, "learning_rate": 5.63165905631659e-08, "logits/chosen": -1.626678466796875, "logits/rejected": -1.54049551486969, "logps/chosen": -65.37655639648438, "logps/rejected": -124.11677551269531, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -3.931981325149536, "rewards/margins": 27.34768295288086, "rewards/rejected": -31.2796573638916, "step": 5460 }, { "epoch": 2.5, "learning_rate": 5.5809233891425665e-08, "logits/chosen": -1.622873067855835, "logits/rejected": -1.536217212677002, "logps/chosen": -60.86760711669922, "logps/rejected": -113.6754379272461, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -3.9700331687927246, "rewards/margins": 25.673553466796875, "rewards/rejected": -29.643585205078125, "step": 5470 }, { "epoch": 2.5, "learning_rate": 5.530187721968543e-08, "logits/chosen": -1.6449658870697021, "logits/rejected": -1.557802677154541, "logps/chosen": -65.7143783569336, "logps/rejected": -118.18770599365234, "loss": 0.0022, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -4.456841945648193, "rewards/margins": 25.29494285583496, "rewards/rejected": -29.751785278320312, "step": 5480 }, { "epoch": 2.51, "learning_rate": 5.47945205479452e-08, "logits/chosen": -1.6212965250015259, "logits/rejected": -1.5396636724472046, "logps/chosen": -60.645843505859375, "logps/rejected": -120.55546569824219, "loss": 0.0033, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -3.6203675270080566, "rewards/margins": 26.316967010498047, "rewards/rejected": -29.937335968017578, "step": 5490 }, { "epoch": 2.51, "learning_rate": 5.4287163876204964e-08, "logits/chosen": -1.640913963317871, "logits/rejected": -1.5578389167785645, "logps/chosen": -61.227508544921875, "logps/rejected": -118.57939147949219, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.417339324951172, "rewards/margins": 26.617385864257812, "rewards/rejected": -30.034725189208984, "step": 5500 }, { "epoch": 2.51, "eval_logits/chosen": -1.4292577505111694, "eval_logits/rejected": -1.3483442068099976, "eval_logps/chosen": -91.32805633544922, "eval_logps/rejected": -118.79887390136719, "eval_loss": 0.013684802688658237, "eval_rewards/accuracies": 0.9916201233863831, "eval_rewards/chosen": -7.673043251037598, "eval_rewards/margins": 21.774276733398438, "eval_rewards/rejected": -29.44732093811035, "eval_runtime": 165.9448, "eval_samples_per_second": 17.247, "eval_steps_per_second": 1.079, "step": 5500 }, { "epoch": 2.52, "learning_rate": 5.377980720446473e-08, "logits/chosen": -1.6323438882827759, "logits/rejected": -1.5507663488388062, "logps/chosen": -61.16468048095703, "logps/rejected": -113.951904296875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -4.240023612976074, "rewards/margins": 25.353132247924805, "rewards/rejected": -29.593158721923828, "step": 5510 }, { "epoch": 2.52, "learning_rate": 5.32724505327245e-08, "logits/chosen": -1.6588122844696045, "logits/rejected": -1.5675216913223267, "logps/chosen": -61.83917999267578, "logps/rejected": -117.0645751953125, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -3.4707107543945312, "rewards/margins": 25.591796875, "rewards/rejected": -29.062509536743164, "step": 5520 }, { "epoch": 2.52, "learning_rate": 5.2765093860984264e-08, "logits/chosen": -1.6362457275390625, "logits/rejected": -1.5562450885772705, "logps/chosen": -62.32781219482422, "logps/rejected": -117.09416198730469, "loss": 0.0033, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -4.09111213684082, "rewards/margins": 25.149341583251953, "rewards/rejected": -29.24045753479004, "step": 5530 }, { "epoch": 2.53, "learning_rate": 5.225773718924403e-08, "logits/chosen": -1.6216081380844116, "logits/rejected": -1.5349209308624268, "logps/chosen": -59.554840087890625, "logps/rejected": -116.4613265991211, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -3.461890459060669, "rewards/margins": 26.002004623413086, "rewards/rejected": -29.46389389038086, "step": 5540 }, { "epoch": 2.53, "learning_rate": 5.17503805175038e-08, "logits/chosen": -1.647017478942871, "logits/rejected": -1.570915699005127, "logps/chosen": -59.53449249267578, "logps/rejected": -115.95018005371094, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -3.120753049850464, "rewards/margins": 26.011632919311523, "rewards/rejected": -29.13239097595215, "step": 5550 }, { "epoch": 2.54, "learning_rate": 5.1243023845763564e-08, "logits/chosen": -1.635332465171814, "logits/rejected": -1.5419440269470215, "logps/chosen": -62.53186798095703, "logps/rejected": -117.53709411621094, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -3.5881824493408203, "rewards/margins": 25.784774780273438, "rewards/rejected": -29.372955322265625, "step": 5560 }, { "epoch": 2.54, "learning_rate": 5.073566717402333e-08, "logits/chosen": -1.6254523992538452, "logits/rejected": -1.5545673370361328, "logps/chosen": -58.76667404174805, "logps/rejected": -119.00218200683594, "loss": 0.0076, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -3.654752731323242, "rewards/margins": 26.570575714111328, "rewards/rejected": -30.225322723388672, "step": 5570 }, { "epoch": 2.55, "learning_rate": 5.02283105022831e-08, "logits/chosen": -1.6368452310562134, "logits/rejected": -1.53933584690094, "logps/chosen": -65.74815368652344, "logps/rejected": -116.06086730957031, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -4.214093208312988, "rewards/margins": 25.33750343322754, "rewards/rejected": -29.55159568786621, "step": 5580 }, { "epoch": 2.55, "learning_rate": 4.9720953830542864e-08, "logits/chosen": -1.6262636184692383, "logits/rejected": -1.5578850507736206, "logps/chosen": -62.19173049926758, "logps/rejected": -117.39012145996094, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -4.056756496429443, "rewards/margins": 25.51288414001465, "rewards/rejected": -29.56964111328125, "step": 5590 }, { "epoch": 2.56, "learning_rate": 4.921359715880263e-08, "logits/chosen": -1.6535956859588623, "logits/rejected": -1.5675952434539795, "logps/chosen": -60.3506965637207, "logps/rejected": -115.2103271484375, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -3.7057414054870605, "rewards/margins": 24.840530395507812, "rewards/rejected": -28.546274185180664, "step": 5600 }, { "epoch": 2.56, "eval_logits/chosen": -1.4299277067184448, "eval_logits/rejected": -1.3484517335891724, "eval_logps/chosen": -91.37982177734375, "eval_logps/rejected": -119.24150085449219, "eval_loss": 0.013339003548026085, "eval_rewards/accuracies": 0.9916201233863831, "eval_rewards/chosen": -7.698925495147705, "eval_rewards/margins": 21.969703674316406, "eval_rewards/rejected": -29.668628692626953, "eval_runtime": 149.7582, "eval_samples_per_second": 19.111, "eval_steps_per_second": 1.195, "step": 5600 }, { "epoch": 2.56, "learning_rate": 4.87062404870624e-08, "logits/chosen": -1.6384315490722656, "logits/rejected": -1.5487849712371826, "logps/chosen": -63.22198486328125, "logps/rejected": -117.51546478271484, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -3.881399154663086, "rewards/margins": 26.307037353515625, "rewards/rejected": -30.18843650817871, "step": 5610 }, { "epoch": 2.57, "learning_rate": 4.8198883815322164e-08, "logits/chosen": -1.6279752254486084, "logits/rejected": -1.5550925731658936, "logps/chosen": -62.881736755371094, "logps/rejected": -119.9935302734375, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": -4.314352989196777, "rewards/margins": 25.769826889038086, "rewards/rejected": -30.084178924560547, "step": 5620 }, { "epoch": 2.57, "learning_rate": 4.769152714358193e-08, "logits/chosen": -1.6415107250213623, "logits/rejected": -1.5622230768203735, "logps/chosen": -60.204978942871094, "logps/rejected": -118.11802673339844, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -3.6665031909942627, "rewards/margins": 26.238367080688477, "rewards/rejected": -29.904865264892578, "step": 5630 }, { "epoch": 2.57, "learning_rate": 4.71841704718417e-08, "logits/chosen": -1.624925971031189, "logits/rejected": -1.5330023765563965, "logps/chosen": -58.52467727661133, "logps/rejected": -111.4828872680664, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -3.563800096511841, "rewards/margins": 24.683853149414062, "rewards/rejected": -28.24765396118164, "step": 5640 }, { "epoch": 2.58, "learning_rate": 4.6676813800101464e-08, "logits/chosen": -1.630906343460083, "logits/rejected": -1.5402790307998657, "logps/chosen": -60.6591682434082, "logps/rejected": -115.6097183227539, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -3.623584032058716, "rewards/margins": 26.047557830810547, "rewards/rejected": -29.671142578125, "step": 5650 }, { "epoch": 2.58, "learning_rate": 4.616945712836123e-08, "logits/chosen": -1.6545413732528687, "logits/rejected": -1.5670019388198853, "logps/chosen": -63.90033721923828, "logps/rejected": -112.67887878417969, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -3.858156681060791, "rewards/margins": 24.520545959472656, "rewards/rejected": -28.378704071044922, "step": 5660 }, { "epoch": 2.59, "learning_rate": 4.5662100456621e-08, "logits/chosen": -1.6373335123062134, "logits/rejected": -1.5495936870574951, "logps/chosen": -60.8790397644043, "logps/rejected": -119.51078796386719, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -3.7924132347106934, "rewards/margins": 27.08707618713379, "rewards/rejected": -30.87948989868164, "step": 5670 }, { "epoch": 2.59, "learning_rate": 4.5154743784880764e-08, "logits/chosen": -1.6513662338256836, "logits/rejected": -1.583567500114441, "logps/chosen": -59.11711883544922, "logps/rejected": -120.10084533691406, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -3.343040943145752, "rewards/margins": 26.122615814208984, "rewards/rejected": -29.46565818786621, "step": 5680 }, { "epoch": 2.6, "learning_rate": 4.464738711314053e-08, "logits/chosen": -1.6662410497665405, "logits/rejected": -1.5812619924545288, "logps/chosen": -61.8986930847168, "logps/rejected": -110.57454681396484, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.756959915161133, "rewards/margins": 23.829944610595703, "rewards/rejected": -27.586902618408203, "step": 5690 }, { "epoch": 2.6, "learning_rate": 4.41400304414003e-08, "logits/chosen": -1.6431344747543335, "logits/rejected": -1.5499104261398315, "logps/chosen": -62.566795349121094, "logps/rejected": -118.3983383178711, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -3.171024799346924, "rewards/margins": 27.057300567626953, "rewards/rejected": -30.22832679748535, "step": 5700 }, { "epoch": 2.6, "eval_logits/chosen": -1.4365618228912354, "eval_logits/rejected": -1.3552583456039429, "eval_logps/chosen": -89.70028686523438, "eval_logps/rejected": -117.83853149414062, "eval_loss": 0.009456491097807884, "eval_rewards/accuracies": 0.9888268113136292, "eval_rewards/chosen": -6.859156131744385, "eval_rewards/margins": 22.108001708984375, "eval_rewards/rejected": -28.9671573638916, "eval_runtime": 162.7705, "eval_samples_per_second": 17.583, "eval_steps_per_second": 1.1, "step": 5700 }, { "epoch": 2.61, "learning_rate": 4.3632673769660064e-08, "logits/chosen": -1.6342830657958984, "logits/rejected": -1.5472590923309326, "logps/chosen": -63.207054138183594, "logps/rejected": -120.1373291015625, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -3.573979616165161, "rewards/margins": 25.88643455505371, "rewards/rejected": -29.46041488647461, "step": 5710 }, { "epoch": 2.61, "learning_rate": 4.312531709791983e-08, "logits/chosen": -1.681544303894043, "logits/rejected": -1.6141388416290283, "logps/chosen": -63.15888214111328, "logps/rejected": -116.94087982177734, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.5882973670959473, "rewards/margins": 24.538061141967773, "rewards/rejected": -28.126361846923828, "step": 5720 }, { "epoch": 2.62, "learning_rate": 4.26179604261796e-08, "logits/chosen": -1.655426263809204, "logits/rejected": -1.5624696016311646, "logps/chosen": -56.862220764160156, "logps/rejected": -109.0649185180664, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -2.455822467803955, "rewards/margins": 25.6435489654541, "rewards/rejected": -28.0993709564209, "step": 5730 }, { "epoch": 2.62, "learning_rate": 4.2110603754439363e-08, "logits/chosen": -1.6815840005874634, "logits/rejected": -1.5926878452301025, "logps/chosen": -59.546897888183594, "logps/rejected": -115.58638763427734, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -2.8910863399505615, "rewards/margins": 25.475738525390625, "rewards/rejected": -28.3668270111084, "step": 5740 }, { "epoch": 2.62, "learning_rate": 4.160324708269913e-08, "logits/chosen": -1.6645225286483765, "logits/rejected": -1.5795118808746338, "logps/chosen": -60.3840446472168, "logps/rejected": -118.16209411621094, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -2.7638068199157715, "rewards/margins": 26.152246475219727, "rewards/rejected": -28.91605567932129, "step": 5750 }, { "epoch": 2.63, "learning_rate": 4.10958904109589e-08, "logits/chosen": -1.6419804096221924, "logits/rejected": -1.5658048391342163, "logps/chosen": -55.76360321044922, "logps/rejected": -111.81380462646484, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -3.067939043045044, "rewards/margins": 24.284969329833984, "rewards/rejected": -27.352909088134766, "step": 5760 }, { "epoch": 2.63, "learning_rate": 4.0588533739218663e-08, "logits/chosen": -1.6323308944702148, "logits/rejected": -1.5415849685668945, "logps/chosen": -57.9756965637207, "logps/rejected": -110.98262786865234, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -2.81278920173645, "rewards/margins": 25.500110626220703, "rewards/rejected": -28.312902450561523, "step": 5770 }, { "epoch": 2.64, "learning_rate": 4.0081177067478437e-08, "logits/chosen": -1.6647393703460693, "logits/rejected": -1.5759618282318115, "logps/chosen": -62.323204040527344, "logps/rejected": -111.48100280761719, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -3.236069917678833, "rewards/margins": 24.6293888092041, "rewards/rejected": -27.86545753479004, "step": 5780 }, { "epoch": 2.64, "learning_rate": 3.95738203957382e-08, "logits/chosen": -1.6411035060882568, "logits/rejected": -1.5813742876052856, "logps/chosen": -60.639862060546875, "logps/rejected": -116.2388916015625, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -4.28511381149292, "rewards/margins": 24.081039428710938, "rewards/rejected": -28.36614990234375, "step": 5790 }, { "epoch": 2.65, "learning_rate": 3.906646372399797e-08, "logits/chosen": -1.678734540939331, "logits/rejected": -1.5850093364715576, "logps/chosen": -61.819984436035156, "logps/rejected": -111.3331527709961, "loss": 0.0054, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -3.188535213470459, "rewards/margins": 24.102962493896484, "rewards/rejected": -27.291500091552734, "step": 5800 }, { "epoch": 2.65, "eval_logits/chosen": -1.445048213005066, "eval_logits/rejected": -1.3636606931686401, "eval_logps/chosen": -88.80926513671875, "eval_logps/rejected": -116.7530746459961, "eval_loss": 0.00770636135712266, "eval_rewards/accuracies": 0.9916201233863831, "eval_rewards/chosen": -6.413644313812256, "eval_rewards/margins": 22.010774612426758, "eval_rewards/rejected": -28.424423217773438, "eval_runtime": 172.6366, "eval_samples_per_second": 16.578, "eval_steps_per_second": 1.037, "step": 5800 }, { "epoch": 2.65, "learning_rate": 3.8559107052257736e-08, "logits/chosen": -1.634439468383789, "logits/rejected": -1.5573443174362183, "logps/chosen": -57.23991775512695, "logps/rejected": -114.6131591796875, "loss": 0.0033, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -3.1852035522460938, "rewards/margins": 25.9045352935791, "rewards/rejected": -29.089736938476562, "step": 5810 }, { "epoch": 2.66, "learning_rate": 3.80517503805175e-08, "logits/chosen": -1.6653354167938232, "logits/rejected": -1.5760657787322998, "logps/chosen": -63.47956466674805, "logps/rejected": -118.31854248046875, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -3.382948637008667, "rewards/margins": 25.50372886657715, "rewards/rejected": -28.886676788330078, "step": 5820 }, { "epoch": 2.66, "learning_rate": 3.754439370877727e-08, "logits/chosen": -1.6479460000991821, "logits/rejected": -1.5641560554504395, "logps/chosen": -61.220802307128906, "logps/rejected": -117.40779876708984, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -3.3633739948272705, "rewards/margins": 25.617687225341797, "rewards/rejected": -28.981060028076172, "step": 5830 }, { "epoch": 2.67, "learning_rate": 3.7037037037037036e-08, "logits/chosen": -1.6705583333969116, "logits/rejected": -1.5847426652908325, "logps/chosen": -58.475196838378906, "logps/rejected": -117.71073913574219, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -3.079245090484619, "rewards/margins": 26.204748153686523, "rewards/rejected": -29.28399658203125, "step": 5840 }, { "epoch": 2.67, "learning_rate": 3.65296803652968e-08, "logits/chosen": -1.6485599279403687, "logits/rejected": -1.56297767162323, "logps/chosen": -60.04380416870117, "logps/rejected": -109.9533920288086, "loss": 0.0055, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -3.4925427436828613, "rewards/margins": 23.410324096679688, "rewards/rejected": -26.902868270874023, "step": 5850 }, { "epoch": 2.67, "learning_rate": 3.602232369355657e-08, "logits/chosen": -1.6266456842422485, "logits/rejected": -1.5563054084777832, "logps/chosen": -59.59053421020508, "logps/rejected": -114.15860748291016, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -3.7324161529541016, "rewards/margins": 25.718669891357422, "rewards/rejected": -29.45108985900879, "step": 5860 }, { "epoch": 2.68, "learning_rate": 3.5514967021816336e-08, "logits/chosen": -1.6728761196136475, "logits/rejected": -1.5769294500350952, "logps/chosen": -62.21794509887695, "logps/rejected": -121.44730377197266, "loss": 0.0033, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -3.6361191272735596, "rewards/margins": 26.636241912841797, "rewards/rejected": -30.27235984802246, "step": 5870 }, { "epoch": 2.68, "learning_rate": 3.50076103500761e-08, "logits/chosen": -1.6521060466766357, "logits/rejected": -1.5682947635650635, "logps/chosen": -64.18666076660156, "logps/rejected": -119.81550598144531, "loss": 0.0044, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -4.000080585479736, "rewards/margins": 25.79050064086914, "rewards/rejected": -29.790578842163086, "step": 5880 }, { "epoch": 2.69, "learning_rate": 3.450025367833587e-08, "logits/chosen": -1.6358667612075806, "logits/rejected": -1.5582091808319092, "logps/chosen": -63.92961502075195, "logps/rejected": -118.2952880859375, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": -3.9304556846618652, "rewards/margins": 26.11741065979004, "rewards/rejected": -30.047870635986328, "step": 5890 }, { "epoch": 2.69, "learning_rate": 3.3992897006595636e-08, "logits/chosen": -1.6305748224258423, "logits/rejected": -1.5501872301101685, "logps/chosen": -62.05702590942383, "logps/rejected": -121.21634674072266, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -3.6642932891845703, "rewards/margins": 26.954849243164062, "rewards/rejected": -30.6191463470459, "step": 5900 }, { "epoch": 2.69, "eval_logits/chosen": -1.4208369255065918, "eval_logits/rejected": -1.3400059938430786, "eval_logps/chosen": -91.27997589111328, "eval_logps/rejected": -120.20849609375, "eval_loss": 0.011545187793672085, "eval_rewards/accuracies": 0.9888268113136292, "eval_rewards/chosen": -7.648996829986572, "eval_rewards/margins": 22.503141403198242, "eval_rewards/rejected": -30.152135848999023, "eval_runtime": 161.2749, "eval_samples_per_second": 17.746, "eval_steps_per_second": 1.11, "step": 5900 }, { "epoch": 2.7, "learning_rate": 3.34855403348554e-08, "logits/chosen": -1.6295617818832397, "logits/rejected": -1.5396859645843506, "logps/chosen": -68.30640411376953, "logps/rejected": -116.1508560180664, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -4.036105155944824, "rewards/margins": 25.045873641967773, "rewards/rejected": -29.081979751586914, "step": 5910 }, { "epoch": 2.7, "learning_rate": 3.297818366311517e-08, "logits/chosen": -1.6219854354858398, "logits/rejected": -1.55465829372406, "logps/chosen": -62.98735427856445, "logps/rejected": -118.68704986572266, "loss": 0.0043, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -4.5867815017700195, "rewards/margins": 25.48343849182129, "rewards/rejected": -30.07021713256836, "step": 5920 }, { "epoch": 2.71, "learning_rate": 3.2470826991374936e-08, "logits/chosen": -1.637002944946289, "logits/rejected": -1.551239252090454, "logps/chosen": -65.75910949707031, "logps/rejected": -118.2501449584961, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -4.5694169998168945, "rewards/margins": 25.793041229248047, "rewards/rejected": -30.362462997436523, "step": 5930 }, { "epoch": 2.71, "learning_rate": 3.19634703196347e-08, "logits/chosen": -1.6292927265167236, "logits/rejected": -1.5572412014007568, "logps/chosen": -62.1704216003418, "logps/rejected": -121.44587707519531, "loss": 0.0011, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -4.114914894104004, "rewards/margins": 26.280200958251953, "rewards/rejected": -30.395116806030273, "step": 5940 }, { "epoch": 2.72, "learning_rate": 3.145611364789447e-08, "logits/chosen": -1.6549896001815796, "logits/rejected": -1.567518949508667, "logps/chosen": -62.098243713378906, "logps/rejected": -116.75553131103516, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -3.8145833015441895, "rewards/margins": 25.686513900756836, "rewards/rejected": -29.5010986328125, "step": 5950 }, { "epoch": 2.72, "learning_rate": 3.0948756976154236e-08, "logits/chosen": -1.6517982482910156, "logits/rejected": -1.5621912479400635, "logps/chosen": -61.54521560668945, "logps/rejected": -115.05535888671875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -3.3965892791748047, "rewards/margins": 25.722614288330078, "rewards/rejected": -29.11920166015625, "step": 5960 }, { "epoch": 2.73, "learning_rate": 3.0441400304414e-08, "logits/chosen": -1.6520181894302368, "logits/rejected": -1.576278805732727, "logps/chosen": -63.23960494995117, "logps/rejected": -118.0584487915039, "loss": 0.0022, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -3.735790967941284, "rewards/margins": 25.305919647216797, "rewards/rejected": -29.04170799255371, "step": 5970 }, { "epoch": 2.73, "learning_rate": 2.993404363267377e-08, "logits/chosen": -1.630038857460022, "logits/rejected": -1.5673866271972656, "logps/chosen": -54.789329528808594, "logps/rejected": -119.5348129272461, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -3.0025506019592285, "rewards/margins": 27.198843002319336, "rewards/rejected": -30.20139503479004, "step": 5980 }, { "epoch": 2.73, "learning_rate": 2.9426686960933532e-08, "logits/chosen": -1.6585489511489868, "logits/rejected": -1.5535162687301636, "logps/chosen": -66.03738403320312, "logps/rejected": -121.28465270996094, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -3.421753406524658, "rewards/margins": 26.9661922454834, "rewards/rejected": -30.3879451751709, "step": 5990 }, { "epoch": 2.74, "learning_rate": 2.89193302891933e-08, "logits/chosen": -1.670650839805603, "logits/rejected": -1.5895731449127197, "logps/chosen": -65.10139465332031, "logps/rejected": -117.02482604980469, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -3.6346237659454346, "rewards/margins": 25.722705841064453, "rewards/rejected": -29.35733413696289, "step": 6000 }, { "epoch": 2.74, "eval_logits/chosen": -1.4316580295562744, "eval_logits/rejected": -1.3509966135025024, "eval_logps/chosen": -89.68942260742188, "eval_logps/rejected": -118.18567657470703, "eval_loss": 0.008585677482187748, "eval_rewards/accuracies": 0.9888268113136292, "eval_rewards/chosen": -6.853721618652344, "eval_rewards/margins": 22.28700065612793, "eval_rewards/rejected": -29.140722274780273, "eval_runtime": 143.5018, "eval_samples_per_second": 19.944, "eval_steps_per_second": 1.247, "step": 6000 }, { "epoch": 2.74, "learning_rate": 2.8411973617453066e-08, "logits/chosen": -1.6392771005630493, "logits/rejected": -1.5661704540252686, "logps/chosen": -59.24951171875, "logps/rejected": -116.7738265991211, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -3.125824213027954, "rewards/margins": 26.19602394104004, "rewards/rejected": -29.321847915649414, "step": 6010 }, { "epoch": 2.75, "learning_rate": 2.7904616945712832e-08, "logits/chosen": -1.6743838787078857, "logits/rejected": -1.567811369895935, "logps/chosen": -65.45565795898438, "logps/rejected": -115.02339172363281, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -4.311161994934082, "rewards/margins": 24.980754852294922, "rewards/rejected": -29.291919708251953, "step": 6020 }, { "epoch": 2.75, "learning_rate": 2.73972602739726e-08, "logits/chosen": -1.6575504541397095, "logits/rejected": -1.5639588832855225, "logps/chosen": -61.6038818359375, "logps/rejected": -113.43119812011719, "loss": 0.0033, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -4.067666530609131, "rewards/margins": 25.01435661315918, "rewards/rejected": -29.0820255279541, "step": 6030 }, { "epoch": 2.76, "learning_rate": 2.6889903602232366e-08, "logits/chosen": -1.6164219379425049, "logits/rejected": -1.5363751649856567, "logps/chosen": -62.669212341308594, "logps/rejected": -122.5501480102539, "loss": 0.0043, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -3.584050416946411, "rewards/margins": 26.209972381591797, "rewards/rejected": -29.794025421142578, "step": 6040 }, { "epoch": 2.76, "learning_rate": 2.6382546930492132e-08, "logits/chosen": -1.6212742328643799, "logits/rejected": -1.531227946281433, "logps/chosen": -63.161460876464844, "logps/rejected": -114.44987487792969, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -3.7137718200683594, "rewards/margins": 25.035747528076172, "rewards/rejected": -28.7495174407959, "step": 6050 }, { "epoch": 2.77, "learning_rate": 2.58751902587519e-08, "logits/chosen": -1.6605415344238281, "logits/rejected": -1.5648448467254639, "logps/chosen": -61.281028747558594, "logps/rejected": -115.41572570800781, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -3.519256591796875, "rewards/margins": 25.869674682617188, "rewards/rejected": -29.388927459716797, "step": 6060 }, { "epoch": 2.77, "learning_rate": 2.5367833587011665e-08, "logits/chosen": -1.6297121047973633, "logits/rejected": -1.5470812320709229, "logps/chosen": -64.13361358642578, "logps/rejected": -118.23075103759766, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -4.01121187210083, "rewards/margins": 26.0483341217041, "rewards/rejected": -30.059545516967773, "step": 6070 }, { "epoch": 2.78, "learning_rate": 2.4860476915271432e-08, "logits/chosen": -1.6354061365127563, "logits/rejected": -1.5343222618103027, "logps/chosen": -64.9552230834961, "logps/rejected": -117.0967788696289, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -3.105961561203003, "rewards/margins": 26.618520736694336, "rewards/rejected": -29.724477767944336, "step": 6080 }, { "epoch": 2.78, "learning_rate": 2.43531202435312e-08, "logits/chosen": -1.6334095001220703, "logits/rejected": -1.559619665145874, "logps/chosen": -55.2518196105957, "logps/rejected": -114.39505767822266, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -3.2193074226379395, "rewards/margins": 25.336313247680664, "rewards/rejected": -28.555622100830078, "step": 6090 }, { "epoch": 2.78, "learning_rate": 2.3845763571790965e-08, "logits/chosen": -1.6395479440689087, "logits/rejected": -1.560088872909546, "logps/chosen": -62.160560607910156, "logps/rejected": -113.92082214355469, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -3.746182918548584, "rewards/margins": 24.4161434173584, "rewards/rejected": -28.162328720092773, "step": 6100 }, { "epoch": 2.78, "eval_logits/chosen": -1.4256998300552368, "eval_logits/rejected": -1.345211386680603, "eval_logps/chosen": -90.22210693359375, "eval_logps/rejected": -119.16900634765625, "eval_loss": 0.009460356086492538, "eval_rewards/accuracies": 0.9888268113136292, "eval_rewards/chosen": -7.120062828063965, "eval_rewards/margins": 22.5123233795166, "eval_rewards/rejected": -29.632389068603516, "eval_runtime": 175.1008, "eval_samples_per_second": 16.345, "eval_steps_per_second": 1.022, "step": 6100 }, { "epoch": 2.79, "learning_rate": 2.3338406900050732e-08, "logits/chosen": -1.6284040212631226, "logits/rejected": -1.541839361190796, "logps/chosen": -60.50825119018555, "logps/rejected": -111.8641128540039, "loss": 0.0043, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -3.3182640075683594, "rewards/margins": 24.865325927734375, "rewards/rejected": -28.1835880279541, "step": 6110 }, { "epoch": 2.79, "learning_rate": 2.28310502283105e-08, "logits/chosen": -1.6222755908966064, "logits/rejected": -1.5430351495742798, "logps/chosen": -62.3880729675293, "logps/rejected": -120.25767517089844, "loss": 0.0022, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -3.532604694366455, "rewards/margins": 26.64389419555664, "rewards/rejected": -30.176494598388672, "step": 6120 }, { "epoch": 2.8, "learning_rate": 2.2323693556570265e-08, "logits/chosen": -1.6467183828353882, "logits/rejected": -1.5764975547790527, "logps/chosen": -58.1132698059082, "logps/rejected": -115.12342834472656, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -3.736299991607666, "rewards/margins": 25.90152359008789, "rewards/rejected": -29.6378231048584, "step": 6130 }, { "epoch": 2.8, "learning_rate": 2.1816336884830032e-08, "logits/chosen": -1.6268088817596436, "logits/rejected": -1.5481709241867065, "logps/chosen": -61.39824295043945, "logps/rejected": -114.4950942993164, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -3.7331886291503906, "rewards/margins": 24.886098861694336, "rewards/rejected": -28.619287490844727, "step": 6140 }, { "epoch": 2.81, "learning_rate": 2.13089802130898e-08, "logits/chosen": -1.6436065435409546, "logits/rejected": -1.532901406288147, "logps/chosen": -64.28096008300781, "logps/rejected": -116.76171875, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -3.1694931983947754, "rewards/margins": 26.952239990234375, "rewards/rejected": -30.121734619140625, "step": 6150 }, { "epoch": 2.81, "learning_rate": 2.0801623541349565e-08, "logits/chosen": -1.6422874927520752, "logits/rejected": -1.574110984802246, "logps/chosen": -63.52398681640625, "logps/rejected": -121.7143325805664, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -3.430600643157959, "rewards/margins": 25.789005279541016, "rewards/rejected": -29.219608306884766, "step": 6160 }, { "epoch": 2.82, "learning_rate": 2.0294266869609332e-08, "logits/chosen": -1.656306266784668, "logits/rejected": -1.5762187242507935, "logps/chosen": -64.04621887207031, "logps/rejected": -115.48970031738281, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -4.133012294769287, "rewards/margins": 25.261951446533203, "rewards/rejected": -29.394962310791016, "step": 6170 }, { "epoch": 2.82, "learning_rate": 1.97869101978691e-08, "logits/chosen": -1.622867226600647, "logits/rejected": -1.537571668624878, "logps/chosen": -62.96330642700195, "logps/rejected": -118.26654052734375, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -3.150825262069702, "rewards/margins": 26.657018661499023, "rewards/rejected": -29.807846069335938, "step": 6180 }, { "epoch": 2.83, "learning_rate": 1.9279553526128868e-08, "logits/chosen": -1.657539963722229, "logits/rejected": -1.5573053359985352, "logps/chosen": -61.9826774597168, "logps/rejected": -121.66966247558594, "loss": 0.0022, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -3.1908092498779297, "rewards/margins": 27.3090877532959, "rewards/rejected": -30.499902725219727, "step": 6190 }, { "epoch": 2.83, "learning_rate": 1.8772196854388635e-08, "logits/chosen": -1.6334965229034424, "logits/rejected": -1.5571749210357666, "logps/chosen": -58.16144943237305, "logps/rejected": -113.46431732177734, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -3.1828455924987793, "rewards/margins": 24.975894927978516, "rewards/rejected": -28.158737182617188, "step": 6200 }, { "epoch": 2.83, "eval_logits/chosen": -1.4334616661071777, "eval_logits/rejected": -1.3530946969985962, "eval_logps/chosen": -89.77032470703125, "eval_logps/rejected": -118.23873901367188, "eval_loss": 0.008627377450466156, "eval_rewards/accuracies": 0.9916201233863831, "eval_rewards/chosen": -6.894176006317139, "eval_rewards/margins": 22.2730770111084, "eval_rewards/rejected": -29.167253494262695, "eval_runtime": 167.5336, "eval_samples_per_second": 17.083, "eval_steps_per_second": 1.068, "step": 6200 }, { "epoch": 2.83, "learning_rate": 1.82648401826484e-08, "logits/chosen": -1.630913496017456, "logits/rejected": -1.5411865711212158, "logps/chosen": -63.88469314575195, "logps/rejected": -117.57832336425781, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -3.7570133209228516, "rewards/margins": 26.40604591369629, "rewards/rejected": -30.16305923461914, "step": 6210 }, { "epoch": 2.84, "learning_rate": 1.7757483510908168e-08, "logits/chosen": -1.659031867980957, "logits/rejected": -1.5792479515075684, "logps/chosen": -57.74346923828125, "logps/rejected": -116.78248596191406, "loss": 0.0022, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -3.2035319805145264, "rewards/margins": 25.624927520751953, "rewards/rejected": -28.828460693359375, "step": 6220 }, { "epoch": 2.84, "learning_rate": 1.7250126839167935e-08, "logits/chosen": -1.6171165704727173, "logits/rejected": -1.5536924600601196, "logps/chosen": -56.93315505981445, "logps/rejected": -116.37396240234375, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -3.59100604057312, "rewards/margins": 25.44740867614746, "rewards/rejected": -29.03841781616211, "step": 6230 }, { "epoch": 2.85, "learning_rate": 1.67427701674277e-08, "logits/chosen": -1.6382725238800049, "logits/rejected": -1.5492085218429565, "logps/chosen": -60.00202178955078, "logps/rejected": -117.9487075805664, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -3.313509702682495, "rewards/margins": 26.29458236694336, "rewards/rejected": -29.608089447021484, "step": 6240 }, { "epoch": 2.85, "learning_rate": 1.6235413495687468e-08, "logits/chosen": -1.6389650106430054, "logits/rejected": -1.5683645009994507, "logps/chosen": -61.87580490112305, "logps/rejected": -117.61279296875, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -3.534771680831909, "rewards/margins": 25.421100616455078, "rewards/rejected": -28.95587158203125, "step": 6250 }, { "epoch": 2.86, "learning_rate": 1.5728056823947235e-08, "logits/chosen": -1.6355327367782593, "logits/rejected": -1.5543670654296875, "logps/chosen": -61.89397048950195, "logps/rejected": -118.14009857177734, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -3.3398423194885254, "rewards/margins": 25.748271942138672, "rewards/rejected": -29.088115692138672, "step": 6260 }, { "epoch": 2.86, "learning_rate": 1.5220700152207e-08, "logits/chosen": -1.6374504566192627, "logits/rejected": -1.5645629167556763, "logps/chosen": -57.43635940551758, "logps/rejected": -117.78645324707031, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -3.0963313579559326, "rewards/margins": 26.56585693359375, "rewards/rejected": -29.662189483642578, "step": 6270 }, { "epoch": 2.87, "learning_rate": 1.4713343480466766e-08, "logits/chosen": -1.6323999166488647, "logits/rejected": -1.5513784885406494, "logps/chosen": -63.46538162231445, "logps/rejected": -117.1316909790039, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -4.105284690856934, "rewards/margins": 25.079740524291992, "rewards/rejected": -29.18502426147461, "step": 6280 }, { "epoch": 2.87, "learning_rate": 1.4205986808726533e-08, "logits/chosen": -1.6455732583999634, "logits/rejected": -1.5496267080307007, "logps/chosen": -63.40629196166992, "logps/rejected": -116.4112777709961, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -3.5596485137939453, "rewards/margins": 26.04227638244629, "rewards/rejected": -29.601924896240234, "step": 6290 }, { "epoch": 2.88, "learning_rate": 1.36986301369863e-08, "logits/chosen": -1.6346752643585205, "logits/rejected": -1.5575860738754272, "logps/chosen": -60.238128662109375, "logps/rejected": -114.30989837646484, "loss": 0.0013, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -4.088661193847656, "rewards/margins": 24.86785888671875, "rewards/rejected": -28.956518173217773, "step": 6300 }, { "epoch": 2.88, "eval_logits/chosen": -1.4349452257156372, "eval_logits/rejected": -1.3543336391448975, "eval_logps/chosen": -89.65511322021484, "eval_logps/rejected": -117.97098541259766, "eval_loss": 0.00864337757229805, "eval_rewards/accuracies": 0.9916201233863831, "eval_rewards/chosen": -6.83656644821167, "eval_rewards/margins": 22.196807861328125, "eval_rewards/rejected": -29.033374786376953, "eval_runtime": 154.2565, "eval_samples_per_second": 18.554, "eval_steps_per_second": 1.16, "step": 6300 }, { "epoch": 2.88, "learning_rate": 1.3191273465246066e-08, "logits/chosen": -1.637908935546875, "logits/rejected": -1.5516226291656494, "logps/chosen": -60.553466796875, "logps/rejected": -114.1862564086914, "loss": 0.0022, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -3.289581775665283, "rewards/margins": 24.721349716186523, "rewards/rejected": -28.010934829711914, "step": 6310 }, { "epoch": 2.88, "learning_rate": 1.2683916793505833e-08, "logits/chosen": -1.662811279296875, "logits/rejected": -1.5722968578338623, "logps/chosen": -63.365997314453125, "logps/rejected": -122.7975082397461, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -3.5013763904571533, "rewards/margins": 27.253204345703125, "rewards/rejected": -30.754581451416016, "step": 6320 }, { "epoch": 2.89, "learning_rate": 1.21765601217656e-08, "logits/chosen": -1.6395689249038696, "logits/rejected": -1.5583178997039795, "logps/chosen": -57.32929611206055, "logps/rejected": -112.72383117675781, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": -3.334056854248047, "rewards/margins": 25.49137306213379, "rewards/rejected": -28.8254337310791, "step": 6330 }, { "epoch": 2.89, "learning_rate": 1.1669203450025366e-08, "logits/chosen": -1.6697086095809937, "logits/rejected": -1.5900932550430298, "logps/chosen": -62.0523796081543, "logps/rejected": -119.71087646484375, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -3.4745171070098877, "rewards/margins": 25.79860496520996, "rewards/rejected": -29.273122787475586, "step": 6340 }, { "epoch": 2.9, "learning_rate": 1.1161846778285133e-08, "logits/chosen": -1.6300337314605713, "logits/rejected": -1.5525095462799072, "logps/chosen": -61.092987060546875, "logps/rejected": -115.71488189697266, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -3.913750410079956, "rewards/margins": 25.886764526367188, "rewards/rejected": -29.800512313842773, "step": 6350 }, { "epoch": 2.9, "learning_rate": 1.06544901065449e-08, "logits/chosen": -1.643493413925171, "logits/rejected": -1.5590834617614746, "logps/chosen": -60.048858642578125, "logps/rejected": -115.4676513671875, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -2.94218111038208, "rewards/margins": 26.0200138092041, "rewards/rejected": -28.962194442749023, "step": 6360 }, { "epoch": 2.91, "learning_rate": 1.0147133434804666e-08, "logits/chosen": -1.641506552696228, "logits/rejected": -1.548807144165039, "logps/chosen": -63.835044860839844, "logps/rejected": -116.1264877319336, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": -3.6523032188415527, "rewards/margins": 25.005321502685547, "rewards/rejected": -28.657623291015625, "step": 6370 }, { "epoch": 2.91, "learning_rate": 9.639776763064434e-09, "logits/chosen": -1.6641569137573242, "logits/rejected": -1.5705759525299072, "logps/chosen": -58.545570373535156, "logps/rejected": -114.12187194824219, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -3.1781678199768066, "rewards/margins": 26.338916778564453, "rewards/rejected": -29.5170841217041, "step": 6380 }, { "epoch": 2.92, "learning_rate": 9.1324200913242e-09, "logits/chosen": -1.6400539875030518, "logits/rejected": -1.5575604438781738, "logps/chosen": -60.43939208984375, "logps/rejected": -114.89385986328125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -3.559497833251953, "rewards/margins": 25.163372039794922, "rewards/rejected": -28.72286605834961, "step": 6390 }, { "epoch": 2.92, "learning_rate": 8.625063419583967e-09, "logits/chosen": -1.6212533712387085, "logits/rejected": -1.5395368337631226, "logps/chosen": -59.00303268432617, "logps/rejected": -113.10612487792969, "loss": 0.0033, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -3.4267821311950684, "rewards/margins": 25.679149627685547, "rewards/rejected": -29.105932235717773, "step": 6400 }, { "epoch": 2.92, "eval_logits/chosen": -1.4303452968597412, "eval_logits/rejected": -1.3493685722351074, "eval_logps/chosen": -89.9966049194336, "eval_logps/rejected": -118.4869155883789, "eval_loss": 0.009599537588655949, "eval_rewards/accuracies": 0.9916201233863831, "eval_rewards/chosen": -7.007308483123779, "eval_rewards/margins": 22.284032821655273, "eval_rewards/rejected": -29.291339874267578, "eval_runtime": 148.4489, "eval_samples_per_second": 19.279, "eval_steps_per_second": 1.206, "step": 6400 }, { "epoch": 2.93, "learning_rate": 8.117706747843734e-09, "logits/chosen": -1.6490615606307983, "logits/rejected": -1.5711501836776733, "logps/chosen": -61.677642822265625, "logps/rejected": -119.9886703491211, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -3.11264705657959, "rewards/margins": 27.286340713500977, "rewards/rejected": -30.398983001708984, "step": 6410 }, { "epoch": 2.93, "learning_rate": 7.6103500761035e-09, "logits/chosen": -1.631784439086914, "logits/rejected": -1.5532324314117432, "logps/chosen": -59.434417724609375, "logps/rejected": -115.86344909667969, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -3.2527592182159424, "rewards/margins": 26.518869400024414, "rewards/rejected": -29.77162742614746, "step": 6420 }, { "epoch": 2.94, "learning_rate": 7.1029934043632664e-09, "logits/chosen": -1.6500422954559326, "logits/rejected": -1.5633658170700073, "logps/chosen": -61.51234817504883, "logps/rejected": -113.0541763305664, "loss": 0.0033, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -3.301517963409424, "rewards/margins": 25.33908462524414, "rewards/rejected": -28.64060401916504, "step": 6430 }, { "epoch": 2.94, "learning_rate": 6.595636732623033e-09, "logits/chosen": -1.6449449062347412, "logits/rejected": -1.5771965980529785, "logps/chosen": -54.834747314453125, "logps/rejected": -118.65677642822266, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -2.671351671218872, "rewards/margins": 26.696313858032227, "rewards/rejected": -29.367666244506836, "step": 6440 }, { "epoch": 2.94, "learning_rate": 6.0882800608828e-09, "logits/chosen": -1.62326979637146, "logits/rejected": -1.5562350749969482, "logps/chosen": -59.994285583496094, "logps/rejected": -118.3582992553711, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -3.617910861968994, "rewards/margins": 25.71333885192871, "rewards/rejected": -29.331247329711914, "step": 6450 }, { "epoch": 2.95, "learning_rate": 5.580923389142566e-09, "logits/chosen": -1.66298508644104, "logits/rejected": -1.575870394706726, "logps/chosen": -58.43715286254883, "logps/rejected": -119.3067855834961, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -2.6894166469573975, "rewards/margins": 27.552875518798828, "rewards/rejected": -30.242290496826172, "step": 6460 }, { "epoch": 2.95, "learning_rate": 5.073566717402333e-09, "logits/chosen": -1.6346296072006226, "logits/rejected": -1.5523085594177246, "logps/chosen": -62.90636444091797, "logps/rejected": -115.53157043457031, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -3.4161829948425293, "rewards/margins": 24.956478118896484, "rewards/rejected": -28.372661590576172, "step": 6470 }, { "epoch": 2.96, "learning_rate": 4.5662100456621e-09, "logits/chosen": -1.6323788166046143, "logits/rejected": -1.5435467958450317, "logps/chosen": -61.47710418701172, "logps/rejected": -117.0919418334961, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -3.35223388671875, "rewards/margins": 26.055648803710938, "rewards/rejected": -29.407886505126953, "step": 6480 }, { "epoch": 2.96, "learning_rate": 4.058853373921867e-09, "logits/chosen": -1.6424974203109741, "logits/rejected": -1.547910451889038, "logps/chosen": -63.641624450683594, "logps/rejected": -116.34974670410156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.9447073936462402, "rewards/margins": 26.343042373657227, "rewards/rejected": -29.28774642944336, "step": 6490 }, { "epoch": 2.97, "learning_rate": 3.5514967021816332e-09, "logits/chosen": -1.6433073282241821, "logits/rejected": -1.5683605670928955, "logps/chosen": -58.81917190551758, "logps/rejected": -115.32568359375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -3.370504856109619, "rewards/margins": 25.766164779663086, "rewards/rejected": -29.136669158935547, "step": 6500 }, { "epoch": 2.97, "eval_logits/chosen": -1.4297113418579102, "eval_logits/rejected": -1.3493608236312866, "eval_logps/chosen": -89.93760681152344, "eval_logps/rejected": -118.577392578125, "eval_loss": 0.009225493296980858, "eval_rewards/accuracies": 0.9916201233863831, "eval_rewards/chosen": -6.977811813354492, "eval_rewards/margins": 22.3587703704834, "eval_rewards/rejected": -29.336578369140625, "eval_runtime": 145.0292, "eval_samples_per_second": 19.734, "eval_steps_per_second": 1.234, "step": 6500 }, { "epoch": 2.97, "learning_rate": 3.0441400304414e-09, "logits/chosen": -1.641736626625061, "logits/rejected": -1.5471439361572266, "logps/chosen": -61.745521545410156, "logps/rejected": -118.31556701660156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.6029326915740967, "rewards/margins": 26.154644012451172, "rewards/rejected": -29.757577896118164, "step": 6510 }, { "epoch": 2.98, "learning_rate": 2.5367833587011665e-09, "logits/chosen": -1.6372482776641846, "logits/rejected": -1.5472410917282104, "logps/chosen": -64.36847686767578, "logps/rejected": -118.65006256103516, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.840372085571289, "rewards/margins": 26.441211700439453, "rewards/rejected": -30.28158950805664, "step": 6520 }, { "epoch": 2.98, "learning_rate": 2.0294266869609335e-09, "logits/chosen": -1.622945785522461, "logits/rejected": -1.5590341091156006, "logps/chosen": -57.7742919921875, "logps/rejected": -116.918212890625, "loss": 0.0044, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -3.9200615882873535, "rewards/margins": 24.792682647705078, "rewards/rejected": -28.712743759155273, "step": 6530 }, { "epoch": 2.99, "learning_rate": 1.5220700152207e-09, "logits/chosen": -1.6107476949691772, "logits/rejected": -1.5307183265686035, "logps/chosen": -57.80412673950195, "logps/rejected": -114.65202331542969, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -3.344606399536133, "rewards/margins": 25.22000503540039, "rewards/rejected": -28.56460952758789, "step": 6540 }, { "epoch": 2.99, "learning_rate": 1.0147133434804667e-09, "logits/chosen": -1.650368332862854, "logits/rejected": -1.5417366027832031, "logps/chosen": -62.948486328125, "logps/rejected": -114.2308578491211, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.576667070388794, "rewards/margins": 25.144075393676758, "rewards/rejected": -28.720739364624023, "step": 6550 }, { "epoch": 2.99, "learning_rate": 5.073566717402334e-10, "logits/chosen": -1.6579023599624634, "logits/rejected": -1.574951410293579, "logps/chosen": -62.673072814941406, "logps/rejected": -113.37846374511719, "loss": 0.0011, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -3.956240177154541, "rewards/margins": 25.087797164916992, "rewards/rejected": -29.04403305053711, "step": 6560 }, { "epoch": 3.0, "learning_rate": 0.0, "logits/chosen": -1.6318897008895874, "logits/rejected": -1.5588319301605225, "logps/chosen": -58.1288948059082, "logps/rejected": -115.969482421875, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -3.4750983715057373, "rewards/margins": 26.056133270263672, "rewards/rejected": -29.531230926513672, "step": 6570 }, { "epoch": 3.0, "step": 6570, "total_flos": 0.0, "train_loss": 0.019671504644591626, "train_runtime": 60672.7973, "train_samples_per_second": 6.932, "train_steps_per_second": 0.108 } ], "logging_steps": 10, "max_steps": 6570, "num_train_epochs": 3, "save_steps": 500, "total_flos": 0.0, "trial_name": null, "trial_params": null }