{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 100, "global_step": 7642, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 6.535947712418301e-09, "logits/chosen": -2.771364688873291, "logits/rejected": -2.0475902557373047, "logps/chosen": -350.8045654296875, "logps/rejected": -232.34600830078125, "loss": 0.0246, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.0, "learning_rate": 6.535947712418302e-08, "logits/chosen": -2.4147045612335205, "logits/rejected": -2.214167356491089, "logps/chosen": -275.91546630859375, "logps/rejected": -221.3582763671875, "loss": 0.0435, "rewards/accuracies": 0.4166666567325592, "rewards/chosen": -6.692952592857182e-05, "rewards/margins": -4.810280370293185e-05, "rewards/rejected": -1.8826718587661162e-05, "step": 10 }, { "epoch": 0.0, "learning_rate": 1.3071895424836603e-07, "logits/chosen": -2.3138043880462646, "logits/rejected": -1.998386025428772, "logps/chosen": -183.78628540039062, "logps/rejected": -185.7834014892578, "loss": 0.0581, "rewards/accuracies": 0.375, "rewards/chosen": 6.9359957706183195e-06, "rewards/margins": -1.1799385902122594e-05, "rewards/rejected": 1.873539076768793e-05, "step": 20 }, { "epoch": 0.0, "learning_rate": 1.9607843137254904e-07, "logits/chosen": -2.408761739730835, "logits/rejected": -2.3481807708740234, "logps/chosen": -221.2811737060547, "logps/rejected": -207.22256469726562, "loss": 0.0492, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": 0.0001152217882918194, "rewards/margins": 4.462666038307361e-05, "rewards/rejected": 7.059513154672459e-05, "step": 30 }, { "epoch": 0.01, "learning_rate": 2.6143790849673207e-07, "logits/chosen": -2.2558865547180176, "logits/rejected": -2.2246384620666504, "logps/chosen": -189.4458770751953, "logps/rejected": -171.353515625, "loss": 0.0585, "rewards/accuracies": 0.5, "rewards/chosen": 0.0001129482188844122, "rewards/margins": 0.0001205944427056238, "rewards/rejected": -7.646233825653326e-06, "step": 40 }, { "epoch": 0.01, "learning_rate": 3.267973856209151e-07, "logits/chosen": -2.456951141357422, "logits/rejected": -2.2887561321258545, "logps/chosen": -278.3084411621094, "logps/rejected": -233.02688598632812, "loss": 0.0696, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": 0.00021237425971776247, "rewards/margins": 1.0886736163229216e-05, "rewards/rejected": 0.00020148752082604915, "step": 50 }, { "epoch": 0.01, "learning_rate": 3.921568627450981e-07, "logits/chosen": -2.452667236328125, "logits/rejected": -2.3211636543273926, "logps/chosen": -299.87713623046875, "logps/rejected": -243.0287322998047, "loss": 0.0811, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.0004027537943329662, "rewards/margins": 0.00023468179279007018, "rewards/rejected": 0.00016807201609481126, "step": 60 }, { "epoch": 0.01, "learning_rate": 4.5751633986928105e-07, "logits/chosen": -2.2944846153259277, "logits/rejected": -2.1996867656707764, "logps/chosen": -256.50616455078125, "logps/rejected": -251.6355438232422, "loss": 0.0398, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.0004902628134004772, "rewards/margins": 2.103743281622883e-05, "rewards/rejected": 0.0004692253714893013, "step": 70 }, { "epoch": 0.01, "learning_rate": 5.228758169934641e-07, "logits/chosen": -2.4491403102874756, "logits/rejected": -2.2229251861572266, "logps/chosen": -251.4347381591797, "logps/rejected": -224.22293090820312, "loss": 0.0398, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.0005534522933885455, "rewards/margins": 0.00024414055224042386, "rewards/rejected": 0.0003093117265962064, "step": 80 }, { "epoch": 0.01, "learning_rate": 5.882352941176471e-07, "logits/chosen": -2.3732728958129883, "logits/rejected": -2.1753273010253906, "logps/chosen": -209.80862426757812, "logps/rejected": -172.71231079101562, "loss": 0.0691, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.0009153633145615458, "rewards/margins": 0.0003029147337656468, "rewards/rejected": 0.0006124485516920686, "step": 90 }, { "epoch": 0.01, "learning_rate": 6.535947712418302e-07, "logits/chosen": -2.309413433074951, "logits/rejected": -2.144484281539917, "logps/chosen": -208.6510467529297, "logps/rejected": -211.52490234375, "loss": 0.0532, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": 0.0008663847111165524, "rewards/margins": 8.283840725198388e-05, "rewards/rejected": 0.0007835463620722294, "step": 100 }, { "epoch": 0.01, "eval_logits/chosen": -2.3285820484161377, "eval_logits/rejected": -2.1099746227264404, "eval_logps/chosen": -227.70619201660156, "eval_logps/rejected": -195.34722900390625, "eval_loss": 0.05353359878063202, "eval_rewards/accuracies": 0.5529999732971191, "eval_rewards/chosen": 0.0015157524030655622, "eval_rewards/margins": 0.00045084880548529327, "eval_rewards/rejected": 0.0010649036848917603, "eval_runtime": 1442.2999, "eval_samples_per_second": 1.387, "eval_steps_per_second": 0.347, "step": 100 }, { "epoch": 0.01, "learning_rate": 7.189542483660131e-07, "logits/chosen": -2.199138641357422, "logits/rejected": -2.3579840660095215, "logps/chosen": -235.41043090820312, "logps/rejected": -250.78732299804688, "loss": 0.0456, "rewards/accuracies": 0.375, "rewards/chosen": 0.0016705368179827929, "rewards/margins": -0.00019449429237283766, "rewards/rejected": 0.0018650311976671219, "step": 110 }, { "epoch": 0.02, "learning_rate": 7.843137254901962e-07, "logits/chosen": -2.1194636821746826, "logits/rejected": -2.130894660949707, "logps/chosen": -208.68984985351562, "logps/rejected": -208.67202758789062, "loss": 0.0539, "rewards/accuracies": 0.42500001192092896, "rewards/chosen": 0.0016094299498945475, "rewards/margins": -0.0002141711302101612, "rewards/rejected": 0.0018236007308587432, "step": 120 }, { "epoch": 0.02, "learning_rate": 8.496732026143792e-07, "logits/chosen": -2.211329460144043, "logits/rejected": -2.0572669506073, "logps/chosen": -233.6269073486328, "logps/rejected": -180.12249755859375, "loss": 0.0587, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.0022293583024293184, "rewards/margins": 0.0006621202919632196, "rewards/rejected": 0.0015672380104660988, "step": 130 }, { "epoch": 0.02, "learning_rate": 9.150326797385621e-07, "logits/chosen": -2.255624771118164, "logits/rejected": -2.1116433143615723, "logps/chosen": -176.47323608398438, "logps/rejected": -158.57904052734375, "loss": 0.0637, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.0026094545610249043, "rewards/margins": 0.0007819056627340615, "rewards/rejected": 0.0018275491893291473, "step": 140 }, { "epoch": 0.02, "learning_rate": 9.80392156862745e-07, "logits/chosen": -2.173933506011963, "logits/rejected": -2.0644004344940186, "logps/chosen": -193.16531372070312, "logps/rejected": -217.70556640625, "loss": 0.0643, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.002999431686475873, "rewards/margins": 0.0004859448818024248, "rewards/rejected": 0.0025134864263236523, "step": 150 }, { "epoch": 0.02, "learning_rate": 1.0457516339869283e-06, "logits/chosen": -2.0049774646759033, "logits/rejected": -2.14156436920166, "logps/chosen": -196.28433227539062, "logps/rejected": -215.7510223388672, "loss": 0.0621, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.004374385345727205, "rewards/margins": 0.0014288431266322732, "rewards/rejected": 0.0029455421026796103, "step": 160 }, { "epoch": 0.02, "learning_rate": 1.111111111111111e-06, "logits/chosen": -2.4489009380340576, "logits/rejected": -2.1065969467163086, "logps/chosen": -238.6536407470703, "logps/rejected": -195.6304168701172, "loss": 0.0526, "rewards/accuracies": 0.625, "rewards/chosen": 0.00491450447589159, "rewards/margins": 0.0018129239324480295, "rewards/rejected": 0.0031015807762742043, "step": 170 }, { "epoch": 0.02, "learning_rate": 1.1764705882352942e-06, "logits/chosen": -2.4028241634368896, "logits/rejected": -2.051755428314209, "logps/chosen": -144.17684936523438, "logps/rejected": -146.40402221679688, "loss": 0.0638, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": 0.003161213593557477, "rewards/margins": 0.0006372106727212667, "rewards/rejected": 0.0025240029208362103, "step": 180 }, { "epoch": 0.02, "learning_rate": 1.2418300653594772e-06, "logits/chosen": -2.391371011734009, "logits/rejected": -2.1699657440185547, "logps/chosen": -217.9038543701172, "logps/rejected": -168.83421325683594, "loss": 0.0444, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.004153423942625523, "rewards/margins": 0.0016645189607515931, "rewards/rejected": 0.002488905331119895, "step": 190 }, { "epoch": 0.03, "learning_rate": 1.3071895424836604e-06, "logits/chosen": -2.3824551105499268, "logits/rejected": -2.099884510040283, "logps/chosen": -203.17013549804688, "logps/rejected": -164.29214477539062, "loss": 0.0625, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.004522558301687241, "rewards/margins": 0.0029251843225210905, "rewards/rejected": 0.001597374677658081, "step": 200 }, { "epoch": 0.03, "eval_logits/chosen": -2.330930233001709, "eval_logits/rejected": -2.112360954284668, "eval_logps/chosen": -225.17840576171875, "eval_logps/rejected": -194.7161407470703, "eval_loss": 0.05265128239989281, "eval_rewards/accuracies": 0.6079999804496765, "eval_rewards/chosen": 0.004043539520353079, "eval_rewards/margins": 0.0023475452326238155, "eval_rewards/rejected": 0.0016959939384832978, "eval_runtime": 1442.7463, "eval_samples_per_second": 1.386, "eval_steps_per_second": 0.347, "step": 200 }, { "epoch": 0.03, "learning_rate": 1.3725490196078434e-06, "logits/chosen": -2.2161953449249268, "logits/rejected": -2.2658512592315674, "logps/chosen": -217.46646118164062, "logps/rejected": -239.91824340820312, "loss": 0.0344, "rewards/accuracies": 0.625, "rewards/chosen": 0.003468969836831093, "rewards/margins": 0.0022426594514399767, "rewards/rejected": 0.0012263102689757943, "step": 210 }, { "epoch": 0.03, "learning_rate": 1.4379084967320261e-06, "logits/chosen": -2.4367940425872803, "logits/rejected": -2.205225706100464, "logps/chosen": -179.8951873779297, "logps/rejected": -179.15188598632812, "loss": 0.0275, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.002089735586196184, "rewards/margins": 0.0011267390800639987, "rewards/rejected": 0.0009629965061321855, "step": 220 }, { "epoch": 0.03, "learning_rate": 1.5032679738562091e-06, "logits/chosen": -2.2942264080047607, "logits/rejected": -2.230792999267578, "logps/chosen": -210.7850799560547, "logps/rejected": -194.7191619873047, "loss": 0.0504, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.0031732949428260326, "rewards/margins": 0.003322675358504057, "rewards/rejected": -0.00014938069216441363, "step": 230 }, { "epoch": 0.03, "learning_rate": 1.5686274509803923e-06, "logits/chosen": -2.321350336074829, "logits/rejected": -2.1149497032165527, "logps/chosen": -219.54190063476562, "logps/rejected": -253.10812377929688, "loss": 0.0465, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.004185300786048174, "rewards/margins": 0.005275317933410406, "rewards/rejected": -0.0010900170309469104, "step": 240 }, { "epoch": 0.03, "learning_rate": 1.6339869281045753e-06, "logits/chosen": -2.167945146560669, "logits/rejected": -2.141470432281494, "logps/chosen": -186.19515991210938, "logps/rejected": -184.30873107910156, "loss": 0.0607, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": 0.0037641176022589207, "rewards/margins": 0.002698666648939252, "rewards/rejected": 0.0010654507204890251, "step": 250 }, { "epoch": 0.03, "learning_rate": 1.6993464052287585e-06, "logits/chosen": -2.341299057006836, "logits/rejected": -1.9840151071548462, "logps/chosen": -269.43170166015625, "logps/rejected": -203.65139770507812, "loss": 0.0456, "rewards/accuracies": 0.5, "rewards/chosen": 0.005779625847935677, "rewards/margins": 0.0015972151886671782, "rewards/rejected": 0.004182410426437855, "step": 260 }, { "epoch": 0.04, "learning_rate": 1.7647058823529414e-06, "logits/chosen": -2.3696370124816895, "logits/rejected": -2.374230146408081, "logps/chosen": -251.5572967529297, "logps/rejected": -241.042724609375, "loss": 0.0276, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.007571273948997259, "rewards/margins": 0.0037990030832588673, "rewards/rejected": 0.0037722710985690355, "step": 270 }, { "epoch": 0.04, "learning_rate": 1.8300653594771242e-06, "logits/chosen": -2.272646188735962, "logits/rejected": -2.035445213317871, "logps/chosen": -237.1612548828125, "logps/rejected": -169.53341674804688, "loss": 0.0596, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.007252591662108898, "rewards/margins": 0.003395236562937498, "rewards/rejected": 0.0038573560304939747, "step": 280 }, { "epoch": 0.04, "learning_rate": 1.8954248366013072e-06, "logits/chosen": -2.373527765274048, "logits/rejected": -2.2914352416992188, "logps/chosen": -213.3122100830078, "logps/rejected": -226.66067504882812, "loss": 0.0646, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.001583040109835565, "rewards/margins": 0.006263392977416515, "rewards/rejected": -0.004680351819843054, "step": 290 }, { "epoch": 0.04, "learning_rate": 1.96078431372549e-06, "logits/chosen": -2.220839262008667, "logits/rejected": -2.360034704208374, "logps/chosen": -267.57769775390625, "logps/rejected": -283.5144958496094, "loss": 0.0485, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.006979311816394329, "rewards/margins": 0.00745069095864892, "rewards/rejected": -0.014430004172027111, "step": 300 }, { "epoch": 0.04, "eval_logits/chosen": -2.323570966720581, "eval_logits/rejected": -2.1050167083740234, "eval_logps/chosen": -237.7423553466797, "eval_logps/rejected": -214.5470733642578, "eval_loss": 0.04963809624314308, "eval_rewards/accuracies": 0.5889999866485596, "eval_rewards/chosen": -0.008520414121448994, "eval_rewards/margins": 0.009614524431526661, "eval_rewards/rejected": -0.018134936690330505, "eval_runtime": 1442.5595, "eval_samples_per_second": 1.386, "eval_steps_per_second": 0.347, "step": 300 }, { "epoch": 0.04, "learning_rate": 2.0261437908496734e-06, "logits/chosen": -2.207132339477539, "logits/rejected": -1.9097169637680054, "logps/chosen": -252.839111328125, "logps/rejected": -199.76547241210938, "loss": 0.0452, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.014440001919865608, "rewards/margins": 0.008956280536949635, "rewards/rejected": -0.023396281525492668, "step": 310 }, { "epoch": 0.04, "learning_rate": 2.0915032679738565e-06, "logits/chosen": -2.26574969291687, "logits/rejected": -2.1446757316589355, "logps/chosen": -198.3191680908203, "logps/rejected": -209.6562957763672, "loss": 0.0531, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.008327952586114407, "rewards/margins": 0.015999721363186836, "rewards/rejected": -0.02432767115533352, "step": 320 }, { "epoch": 0.04, "learning_rate": 2.1568627450980393e-06, "logits/chosen": -2.3831756114959717, "logits/rejected": -2.3971455097198486, "logps/chosen": -276.6793518066406, "logps/rejected": -228.6278076171875, "loss": 0.0517, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.0005464582936838269, "rewards/margins": 0.01151005458086729, "rewards/rejected": -0.01096359547227621, "step": 330 }, { "epoch": 0.04, "learning_rate": 2.222222222222222e-06, "logits/chosen": -2.4563076496124268, "logits/rejected": -1.999681830406189, "logps/chosen": -280.4342956542969, "logps/rejected": -198.6059112548828, "loss": 0.0388, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.005417247768491507, "rewards/margins": 0.012641100212931633, "rewards/rejected": -0.018058348447084427, "step": 340 }, { "epoch": 0.05, "learning_rate": 2.2875816993464053e-06, "logits/chosen": -2.3461554050445557, "logits/rejected": -2.3123745918273926, "logps/chosen": -206.0986328125, "logps/rejected": -226.97412109375, "loss": 0.0511, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.026498574763536453, "rewards/margins": 0.02056313492357731, "rewards/rejected": -0.04706170782446861, "step": 350 }, { "epoch": 0.05, "learning_rate": 2.3529411764705885e-06, "logits/chosen": -2.330589532852173, "logits/rejected": -2.1479365825653076, "logps/chosen": -273.8431396484375, "logps/rejected": -248.7611541748047, "loss": 0.0634, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.027227917686104774, "rewards/margins": 0.02004820853471756, "rewards/rejected": -0.047276128083467484, "step": 360 }, { "epoch": 0.05, "learning_rate": 2.4183006535947716e-06, "logits/chosen": -2.193765163421631, "logits/rejected": -2.45023250579834, "logps/chosen": -143.6634979248047, "logps/rejected": -311.2171936035156, "loss": 0.0394, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.012883084826171398, "rewards/margins": 0.008494162932038307, "rewards/rejected": -0.02137724682688713, "step": 370 }, { "epoch": 0.05, "learning_rate": 2.4836601307189544e-06, "logits/chosen": -2.365126609802246, "logits/rejected": -2.4081246852874756, "logps/chosen": -183.79946899414062, "logps/rejected": -189.6509246826172, "loss": 0.0491, "rewards/accuracies": 0.625, "rewards/chosen": -0.015578614547848701, "rewards/margins": 0.023884663358330727, "rewards/rejected": -0.03946327418088913, "step": 380 }, { "epoch": 0.05, "learning_rate": 2.549019607843137e-06, "logits/chosen": -2.3143937587738037, "logits/rejected": -1.9923683404922485, "logps/chosen": -280.3426818847656, "logps/rejected": -271.82958984375, "loss": 0.0526, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.0271341260522604, "rewards/margins": 0.02181713469326496, "rewards/rejected": -0.04895126074552536, "step": 390 }, { "epoch": 0.05, "learning_rate": 2.6143790849673208e-06, "logits/chosen": -2.2406041622161865, "logits/rejected": -2.3530118465423584, "logps/chosen": -267.77569580078125, "logps/rejected": -281.50048828125, "loss": 0.0361, "rewards/accuracies": 0.625, "rewards/chosen": -0.012241894379258156, "rewards/margins": 0.0220674779266119, "rewards/rejected": -0.034309376031160355, "step": 400 }, { "epoch": 0.05, "eval_logits/chosen": -2.3719825744628906, "eval_logits/rejected": -2.149343490600586, "eval_logps/chosen": -251.4062957763672, "eval_logps/rejected": -239.94168090820312, "eval_loss": 0.04466630890965462, "eval_rewards/accuracies": 0.5989999771118164, "eval_rewards/chosen": -0.022184353321790695, "eval_rewards/margins": 0.021345192566514015, "eval_rewards/rejected": -0.04352954775094986, "eval_runtime": 1443.3038, "eval_samples_per_second": 1.386, "eval_steps_per_second": 0.346, "step": 400 }, { "epoch": 0.05, "learning_rate": 2.6797385620915036e-06, "logits/chosen": -2.4779820442199707, "logits/rejected": -2.141010284423828, "logps/chosen": -242.4623260498047, "logps/rejected": -268.7309265136719, "loss": 0.018, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.02627101168036461, "rewards/margins": 0.028306175023317337, "rewards/rejected": -0.054577190428972244, "step": 410 }, { "epoch": 0.05, "learning_rate": 2.7450980392156867e-06, "logits/chosen": -2.3489201068878174, "logits/rejected": -2.229403018951416, "logps/chosen": -345.72723388671875, "logps/rejected": -308.21630859375, "loss": 0.0527, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.045316558331251144, "rewards/margins": 0.00618447782471776, "rewards/rejected": -0.05150103569030762, "step": 420 }, { "epoch": 0.06, "learning_rate": 2.8104575163398695e-06, "logits/chosen": -2.3438632488250732, "logits/rejected": -2.246393918991089, "logps/chosen": -290.4280090332031, "logps/rejected": -294.6549072265625, "loss": 0.0546, "rewards/accuracies": 0.625, "rewards/chosen": -0.03901774808764458, "rewards/margins": 0.0222849752753973, "rewards/rejected": -0.06130272150039673, "step": 430 }, { "epoch": 0.06, "learning_rate": 2.8758169934640523e-06, "logits/chosen": -2.5370495319366455, "logits/rejected": -2.246542453765869, "logps/chosen": -333.0699462890625, "logps/rejected": -308.8509216308594, "loss": 0.0466, "rewards/accuracies": 0.625, "rewards/chosen": -0.05149116367101669, "rewards/margins": 0.011986413970589638, "rewards/rejected": -0.06347757577896118, "step": 440 }, { "epoch": 0.06, "learning_rate": 2.9411764705882355e-06, "logits/chosen": -2.256922721862793, "logits/rejected": -2.279179096221924, "logps/chosen": -222.6754608154297, "logps/rejected": -259.6260681152344, "loss": 0.041, "rewards/accuracies": 0.75, "rewards/chosen": -0.03808627650141716, "rewards/margins": 0.028404083102941513, "rewards/rejected": -0.06649035960435867, "step": 450 }, { "epoch": 0.06, "learning_rate": 3.0065359477124182e-06, "logits/chosen": -2.428729295730591, "logits/rejected": -1.8685014247894287, "logps/chosen": -323.7979431152344, "logps/rejected": -251.86865234375, "loss": 0.0505, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.047601353377103806, "rewards/margins": 0.028006980195641518, "rewards/rejected": -0.07560833543539047, "step": 460 }, { "epoch": 0.06, "learning_rate": 3.071895424836602e-06, "logits/chosen": -2.2810492515563965, "logits/rejected": -2.073118209838867, "logps/chosen": -265.302001953125, "logps/rejected": -265.73822021484375, "loss": 0.0375, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.05743040516972542, "rewards/margins": 0.024930477142333984, "rewards/rejected": -0.0823608785867691, "step": 470 }, { "epoch": 0.06, "learning_rate": 3.1372549019607846e-06, "logits/chosen": -2.216215133666992, "logits/rejected": -2.1602706909179688, "logps/chosen": -319.3440856933594, "logps/rejected": -367.47821044921875, "loss": 0.0436, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.05836561322212219, "rewards/margins": 0.03132264316082001, "rewards/rejected": -0.0896882563829422, "step": 480 }, { "epoch": 0.06, "learning_rate": 3.2026143790849674e-06, "logits/chosen": -2.3500797748565674, "logits/rejected": -2.0155441761016846, "logps/chosen": -270.86224365234375, "logps/rejected": -262.1877746582031, "loss": 0.0339, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.053871434181928635, "rewards/margins": 0.03459259867668152, "rewards/rejected": -0.08846403658390045, "step": 490 }, { "epoch": 0.07, "learning_rate": 3.2679738562091506e-06, "logits/chosen": -2.3256402015686035, "logits/rejected": -2.2846150398254395, "logps/chosen": -219.56997680664062, "logps/rejected": -250.40908813476562, "loss": 0.0375, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.04319126158952713, "rewards/margins": 0.045080628246068954, "rewards/rejected": -0.08827189356088638, "step": 500 }, { "epoch": 0.07, "eval_logits/chosen": -2.1960113048553467, "eval_logits/rejected": -1.9821337461471558, "eval_logps/chosen": -282.6957702636719, "eval_logps/rejected": -281.328857421875, "eval_loss": 0.04168427363038063, "eval_rewards/accuracies": 0.5889999866485596, "eval_rewards/chosen": -0.053473833948373795, "eval_rewards/margins": 0.03144287317991257, "eval_rewards/rejected": -0.08491671830415726, "eval_runtime": 1440.5515, "eval_samples_per_second": 1.388, "eval_steps_per_second": 0.347, "step": 500 }, { "epoch": 0.07, "learning_rate": 3.3333333333333333e-06, "logits/chosen": -2.3372669219970703, "logits/rejected": -2.0579018592834473, "logps/chosen": -387.23651123046875, "logps/rejected": -321.8766784667969, "loss": 0.0369, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.060873858630657196, "rewards/margins": 0.029842043295502663, "rewards/rejected": -0.09071590006351471, "step": 510 }, { "epoch": 0.07, "learning_rate": 3.398692810457517e-06, "logits/chosen": -2.3675150871276855, "logits/rejected": -2.1596837043762207, "logps/chosen": -315.5011291503906, "logps/rejected": -302.6841735839844, "loss": 0.0343, "rewards/accuracies": 0.625, "rewards/chosen": -0.06973306834697723, "rewards/margins": 0.030995529145002365, "rewards/rejected": -0.1007285937666893, "step": 520 }, { "epoch": 0.07, "learning_rate": 3.4640522875816997e-06, "logits/chosen": -2.0667433738708496, "logits/rejected": -1.9001293182373047, "logps/chosen": -321.52752685546875, "logps/rejected": -310.5598449707031, "loss": 0.0567, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.05219079926609993, "rewards/margins": 0.025245213881134987, "rewards/rejected": -0.07743600755929947, "step": 530 }, { "epoch": 0.07, "learning_rate": 3.529411764705883e-06, "logits/chosen": -2.101945400238037, "logits/rejected": -2.064239740371704, "logps/chosen": -284.4571838378906, "logps/rejected": -292.69781494140625, "loss": 0.0239, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.04057732969522476, "rewards/margins": 0.016831254586577415, "rewards/rejected": -0.05740858241915703, "step": 540 }, { "epoch": 0.07, "learning_rate": 3.5947712418300657e-06, "logits/chosen": -1.988926887512207, "logits/rejected": -1.6984357833862305, "logps/chosen": -287.50714111328125, "logps/rejected": -260.623291015625, "loss": 0.0384, "rewards/accuracies": 0.625, "rewards/chosen": -0.04455497860908508, "rewards/margins": 0.03517274186015129, "rewards/rejected": -0.07972772419452667, "step": 550 }, { "epoch": 0.07, "learning_rate": 3.6601307189542484e-06, "logits/chosen": -1.844098687171936, "logits/rejected": -1.8456268310546875, "logps/chosen": -269.31488037109375, "logps/rejected": -283.35833740234375, "loss": 0.0366, "rewards/accuracies": 0.625, "rewards/chosen": -0.04365937411785126, "rewards/margins": 0.03285984694957733, "rewards/rejected": -0.07651921361684799, "step": 560 }, { "epoch": 0.07, "learning_rate": 3.7254901960784316e-06, "logits/chosen": -1.7108196020126343, "logits/rejected": -1.5092352628707886, "logps/chosen": -300.17010498046875, "logps/rejected": -268.1441650390625, "loss": 0.0626, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.054984550923109055, "rewards/margins": 0.05392267554998398, "rewards/rejected": -0.10890723764896393, "step": 570 }, { "epoch": 0.08, "learning_rate": 3.7908496732026144e-06, "logits/chosen": -1.59710693359375, "logits/rejected": -1.3931443691253662, "logps/chosen": -384.5987548828125, "logps/rejected": -411.75543212890625, "loss": 0.0391, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.1424575299024582, "rewards/margins": 0.06394679844379425, "rewards/rejected": -0.20640432834625244, "step": 580 }, { "epoch": 0.08, "learning_rate": 3.856209150326798e-06, "logits/chosen": -1.6260541677474976, "logits/rejected": -1.5756090879440308, "logps/chosen": -455.5960388183594, "logps/rejected": -484.4208068847656, "loss": 0.0187, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.22536692023277283, "rewards/margins": 0.037786681205034256, "rewards/rejected": -0.2631535530090332, "step": 590 }, { "epoch": 0.08, "learning_rate": 3.92156862745098e-06, "logits/chosen": -1.6431983709335327, "logits/rejected": -1.5343234539031982, "logps/chosen": -412.9248962402344, "logps/rejected": -452.26123046875, "loss": 0.0522, "rewards/accuracies": 0.625, "rewards/chosen": -0.18326084315776825, "rewards/margins": 0.04902006313204765, "rewards/rejected": -0.2322808802127838, "step": 600 }, { "epoch": 0.08, "eval_logits/chosen": -1.543282151222229, "eval_logits/rejected": -1.3696963787078857, "eval_logps/chosen": -398.6434326171875, "eval_logps/rejected": -395.07135009765625, "eval_loss": 0.04322844743728638, "eval_rewards/accuracies": 0.5920000076293945, "eval_rewards/chosen": -0.1694214940071106, "eval_rewards/margins": 0.029237719252705574, "eval_rewards/rejected": -0.19865919649600983, "eval_runtime": 1442.1284, "eval_samples_per_second": 1.387, "eval_steps_per_second": 0.347, "step": 600 }, { "epoch": 0.08, "learning_rate": 3.986928104575164e-06, "logits/chosen": -1.5562020540237427, "logits/rejected": -1.4817330837249756, "logps/chosen": -438.84893798828125, "logps/rejected": -425.55291748046875, "loss": 0.0428, "rewards/accuracies": 0.5, "rewards/chosen": -0.15865842998027802, "rewards/margins": 0.030415236949920654, "rewards/rejected": -0.18907368183135986, "step": 610 }, { "epoch": 0.08, "learning_rate": 4.052287581699347e-06, "logits/chosen": -1.5809907913208008, "logits/rejected": -1.4904913902282715, "logps/chosen": -343.4562072753906, "logps/rejected": -360.49688720703125, "loss": 0.041, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.13616259396076202, "rewards/margins": 0.044137731194496155, "rewards/rejected": -0.18030032515525818, "step": 620 }, { "epoch": 0.08, "learning_rate": 4.11764705882353e-06, "logits/chosen": -1.6854896545410156, "logits/rejected": -1.5019071102142334, "logps/chosen": -343.8302001953125, "logps/rejected": -372.08160400390625, "loss": 0.0619, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1621018797159195, "rewards/margins": 0.04577670991420746, "rewards/rejected": -0.20787855982780457, "step": 630 }, { "epoch": 0.08, "learning_rate": 4.183006535947713e-06, "logits/chosen": -1.8720099925994873, "logits/rejected": -1.6704416275024414, "logps/chosen": -352.1206970214844, "logps/rejected": -401.9758605957031, "loss": 0.0299, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.08925464749336243, "rewards/margins": 0.047790177166461945, "rewards/rejected": -0.13704481720924377, "step": 640 }, { "epoch": 0.09, "learning_rate": 4.2483660130718954e-06, "logits/chosen": -1.858128547668457, "logits/rejected": -1.6746127605438232, "logps/chosen": -382.6357727050781, "logps/rejected": -382.96856689453125, "loss": 0.0419, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.11193374544382095, "rewards/margins": 0.04092060774564743, "rewards/rejected": -0.15285435318946838, "step": 650 }, { "epoch": 0.09, "learning_rate": 4.313725490196079e-06, "logits/chosen": -1.919390320777893, "logits/rejected": -1.8492457866668701, "logps/chosen": -251.84130859375, "logps/rejected": -260.2295227050781, "loss": 0.0455, "rewards/accuracies": 0.625, "rewards/chosen": -0.07586243003606796, "rewards/margins": 0.030439767986536026, "rewards/rejected": -0.1063021868467331, "step": 660 }, { "epoch": 0.09, "learning_rate": 4.379084967320262e-06, "logits/chosen": -1.8406785726547241, "logits/rejected": -1.662712812423706, "logps/chosen": -301.2870788574219, "logps/rejected": -314.25634765625, "loss": 0.0305, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.08839339017868042, "rewards/margins": 0.03664010763168335, "rewards/rejected": -0.12503348290920258, "step": 670 }, { "epoch": 0.09, "learning_rate": 4.444444444444444e-06, "logits/chosen": -1.7664210796356201, "logits/rejected": -1.671459436416626, "logps/chosen": -286.22589111328125, "logps/rejected": -287.08258056640625, "loss": 0.0587, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.08919215947389603, "rewards/margins": 0.03164225071668625, "rewards/rejected": -0.12083441019058228, "step": 680 }, { "epoch": 0.09, "learning_rate": 4.509803921568628e-06, "logits/chosen": -1.9861505031585693, "logits/rejected": -1.7048215866088867, "logps/chosen": -246.3960418701172, "logps/rejected": -274.64788818359375, "loss": 0.0582, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09032480418682098, "rewards/margins": 0.04387999698519707, "rewards/rejected": -0.13420480489730835, "step": 690 }, { "epoch": 0.09, "learning_rate": 4.5751633986928105e-06, "logits/chosen": -1.708458662033081, "logits/rejected": -1.5508501529693604, "logps/chosen": -397.4908447265625, "logps/rejected": -426.00238037109375, "loss": 0.0453, "rewards/accuracies": 0.625, "rewards/chosen": -0.13721361756324768, "rewards/margins": 0.05807988718152046, "rewards/rejected": -0.19529351592063904, "step": 700 }, { "epoch": 0.09, "eval_logits/chosen": -1.913664698600769, "eval_logits/rejected": -1.720253825187683, "eval_logps/chosen": -295.2010803222656, "eval_logps/rejected": -297.8420104980469, "eval_loss": 0.036652155220508575, "eval_rewards/accuracies": 0.578000009059906, "eval_rewards/chosen": -0.06597913801670074, "eval_rewards/margins": 0.03545072674751282, "eval_rewards/rejected": -0.10142985731363297, "eval_runtime": 1439.0401, "eval_samples_per_second": 1.39, "eval_steps_per_second": 0.347, "step": 700 }, { "epoch": 0.09, "learning_rate": 4.640522875816994e-06, "logits/chosen": -1.906247854232788, "logits/rejected": -1.518561601638794, "logps/chosen": -363.5531311035156, "logps/rejected": -340.15576171875, "loss": 0.0493, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.06743152439594269, "rewards/margins": 0.04517130181193352, "rewards/rejected": -0.1126028299331665, "step": 710 }, { "epoch": 0.09, "learning_rate": 4.705882352941177e-06, "logits/chosen": -1.8197141885757446, "logits/rejected": -1.843785285949707, "logps/chosen": -263.13616943359375, "logps/rejected": -338.6583557128906, "loss": 0.0308, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.053458016365766525, "rewards/margins": 0.059612225741147995, "rewards/rejected": -0.11307024955749512, "step": 720 }, { "epoch": 0.1, "learning_rate": 4.77124183006536e-06, "logits/chosen": -2.073086977005005, "logits/rejected": -1.8402042388916016, "logps/chosen": -275.48406982421875, "logps/rejected": -269.55206298828125, "loss": 0.0303, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.05122733116149902, "rewards/margins": 0.03283599764108658, "rewards/rejected": -0.0840633362531662, "step": 730 }, { "epoch": 0.1, "learning_rate": 4.836601307189543e-06, "logits/chosen": -2.1057915687561035, "logits/rejected": -1.833142876625061, "logps/chosen": -252.7379913330078, "logps/rejected": -284.5447692871094, "loss": 0.0547, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.04275885596871376, "rewards/margins": 0.04886358603835106, "rewards/rejected": -0.09162244200706482, "step": 740 }, { "epoch": 0.1, "learning_rate": 4.901960784313726e-06, "logits/chosen": -1.849066138267517, "logits/rejected": -1.7588335275650024, "logps/chosen": -321.33642578125, "logps/rejected": -316.1219177246094, "loss": 0.0409, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.07632167637348175, "rewards/margins": 0.045088060200214386, "rewards/rejected": -0.12140975147485733, "step": 750 }, { "epoch": 0.1, "learning_rate": 4.967320261437909e-06, "logits/chosen": -1.7766132354736328, "logits/rejected": -1.5358455181121826, "logps/chosen": -321.90472412109375, "logps/rejected": -320.8185729980469, "loss": 0.0324, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.116396464407444, "rewards/margins": 0.049995969980955124, "rewards/rejected": -0.16639243066310883, "step": 760 }, { "epoch": 0.1, "learning_rate": 4.99999347843947e-06, "logits/chosen": -1.7069709300994873, "logits/rejected": -1.5624696016311646, "logps/chosen": -318.8409423828125, "logps/rejected": -326.75592041015625, "loss": 0.0303, "rewards/accuracies": 0.5, "rewards/chosen": -0.11583375930786133, "rewards/margins": 0.03832734376192093, "rewards/rejected": -0.15416111052036285, "step": 770 }, { "epoch": 0.1, "learning_rate": 4.999941306159375e-06, "logits/chosen": -1.9203029870986938, "logits/rejected": -1.57107675075531, "logps/chosen": -395.36517333984375, "logps/rejected": -420.06805419921875, "loss": 0.0375, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.13647016882896423, "rewards/margins": 0.027954230085015297, "rewards/rejected": -0.16442438960075378, "step": 780 }, { "epoch": 0.1, "learning_rate": 4.999836962687967e-06, "logits/chosen": -1.6315686702728271, "logits/rejected": -1.6286077499389648, "logps/chosen": -376.17401123046875, "logps/rejected": -505.39215087890625, "loss": 0.0463, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.15708838403224945, "rewards/margins": 0.09491723030805588, "rewards/rejected": -0.25200560688972473, "step": 790 }, { "epoch": 0.1, "learning_rate": 4.999680450202786e-06, "logits/chosen": -1.5963799953460693, "logits/rejected": -1.3815653324127197, "logps/chosen": -450.90875244140625, "logps/rejected": -442.9236755371094, "loss": 0.0293, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.1555173099040985, "rewards/margins": 0.05233161896467209, "rewards/rejected": -0.2078489363193512, "step": 800 }, { "epoch": 0.1, "eval_logits/chosen": -1.6149731874465942, "eval_logits/rejected": -1.4339385032653809, "eval_logps/chosen": -400.8042297363281, "eval_logps/rejected": -417.92388916015625, "eval_loss": 0.036732107400894165, "eval_rewards/accuracies": 0.5929999947547913, "eval_rewards/chosen": -0.17158228158950806, "eval_rewards/margins": 0.04992944374680519, "eval_rewards/rejected": -0.22151173651218414, "eval_runtime": 1440.7969, "eval_samples_per_second": 1.388, "eval_steps_per_second": 0.347, "step": 800 }, { "epoch": 0.11, "learning_rate": 4.999471771970087e-06, "logits/chosen": -1.705596923828125, "logits/rejected": -1.6261297464370728, "logps/chosen": -342.60693359375, "logps/rejected": -366.328857421875, "loss": 0.0526, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.18038082122802734, "rewards/margins": 0.04092780128121376, "rewards/rejected": -0.2213086187839508, "step": 810 }, { "epoch": 0.11, "learning_rate": 4.999210932344767e-06, "logits/chosen": -1.823772668838501, "logits/rejected": -1.6082426309585571, "logps/chosen": -384.11773681640625, "logps/rejected": -382.7273254394531, "loss": 0.0207, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.13001203536987305, "rewards/margins": 0.03671548515558243, "rewards/rejected": -0.16672751307487488, "step": 820 }, { "epoch": 0.11, "learning_rate": 4.998897936770281e-06, "logits/chosen": -1.5681250095367432, "logits/rejected": -1.2824045419692993, "logps/chosen": -402.26470947265625, "logps/rejected": -407.77325439453125, "loss": 0.0417, "rewards/accuracies": 0.625, "rewards/chosen": -0.18893824517726898, "rewards/margins": 0.027542661875486374, "rewards/rejected": -0.21648092567920685, "step": 830 }, { "epoch": 0.11, "learning_rate": 4.998532791778521e-06, "logits/chosen": -1.6705471277236938, "logits/rejected": -1.455125093460083, "logps/chosen": -416.44256591796875, "logps/rejected": -387.32745361328125, "loss": 0.0257, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.18690225481987, "rewards/margins": 0.033063314855098724, "rewards/rejected": -0.21996554732322693, "step": 840 }, { "epoch": 0.11, "learning_rate": 4.9981155049896885e-06, "logits/chosen": -1.6054198741912842, "logits/rejected": -1.5043563842773438, "logps/chosen": -479.97418212890625, "logps/rejected": -479.62744140625, "loss": 0.0373, "rewards/accuracies": 0.625, "rewards/chosen": -0.22980158030986786, "rewards/margins": 0.019741864874958992, "rewards/rejected": -0.2495434284210205, "step": 850 }, { "epoch": 0.11, "learning_rate": 4.997646085112126e-06, "logits/chosen": -1.805079698562622, "logits/rejected": -1.661866545677185, "logps/chosen": -368.40509033203125, "logps/rejected": -385.2732238769531, "loss": 0.0196, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.14316768944263458, "rewards/margins": 0.04519253969192505, "rewards/rejected": -0.18836024403572083, "step": 860 }, { "epoch": 0.11, "learning_rate": 4.997124541942141e-06, "logits/chosen": -1.7849347591400146, "logits/rejected": -1.5895960330963135, "logps/chosen": -354.07965087890625, "logps/rejected": -437.007080078125, "loss": 0.0246, "rewards/accuracies": 0.75, "rewards/chosen": -0.11094705015420914, "rewards/margins": 0.07083548605442047, "rewards/rejected": -0.181782528758049, "step": 870 }, { "epoch": 0.12, "learning_rate": 4.996550886363801e-06, "logits/chosen": -2.149895191192627, "logits/rejected": -1.9984849691390991, "logps/chosen": -333.11785888671875, "logps/rejected": -338.1785888671875, "loss": 0.0345, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.07574061304330826, "rewards/margins": 0.024002335965633392, "rewards/rejected": -0.09974294900894165, "step": 880 }, { "epoch": 0.12, "learning_rate": 4.995925130348706e-06, "logits/chosen": -1.9792038202285767, "logits/rejected": -1.534693956375122, "logps/chosen": -342.51165771484375, "logps/rejected": -287.411376953125, "loss": 0.0586, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.0806887149810791, "rewards/margins": 0.02763846516609192, "rewards/rejected": -0.10832718759775162, "step": 890 }, { "epoch": 0.12, "learning_rate": 4.995247286955734e-06, "logits/chosen": -1.6908349990844727, "logits/rejected": -1.4713640213012695, "logps/chosen": -348.95599365234375, "logps/rejected": -420.2156677246094, "loss": 0.0241, "rewards/accuracies": 0.625, "rewards/chosen": -0.11906653642654419, "rewards/margins": 0.07349709421396255, "rewards/rejected": -0.19256362318992615, "step": 900 }, { "epoch": 0.12, "learning_rate": 4.994517370330779e-06, "logits/chosen": -1.4035985469818115, "logits/rejected": -1.4106028079986572, "logps/chosen": -345.45013427734375, "logps/rejected": -391.02325439453125, "loss": 0.0404, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.15998545289039612, "rewards/margins": 0.01910357177257538, "rewards/rejected": -0.1790890246629715, "step": 910 }, { "epoch": 0.12, "learning_rate": 4.993735395706446e-06, "logits/chosen": -1.485642671585083, "logits/rejected": -1.1201026439666748, "logps/chosen": -413.4599609375, "logps/rejected": -430.8916931152344, "loss": 0.036, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.12864863872528076, "rewards/margins": 0.05424485355615616, "rewards/rejected": -0.18289348483085632, "step": 920 }, { "epoch": 0.12, "learning_rate": 4.992901379401737e-06, "logits/chosen": -1.4833259582519531, "logits/rejected": -1.2941524982452393, "logps/chosen": -289.5825500488281, "logps/rejected": -385.73004150390625, "loss": 0.0387, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.11567602306604385, "rewards/margins": 0.0963902622461319, "rewards/rejected": -0.21206626296043396, "step": 930 }, { "epoch": 0.12, "learning_rate": 4.992015338821711e-06, "logits/chosen": -1.4885437488555908, "logits/rejected": -1.1388311386108398, "logps/chosen": -402.7807312011719, "logps/rejected": -380.0301208496094, "loss": 0.0407, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.18832194805145264, "rewards/margins": 0.04915202781558037, "rewards/rejected": -0.2374739646911621, "step": 940 }, { "epoch": 0.12, "learning_rate": 4.991077292457117e-06, "logits/chosen": -1.579649806022644, "logits/rejected": -1.3794090747833252, "logps/chosen": -329.3354797363281, "logps/rejected": -379.41485595703125, "loss": 0.0326, "rewards/accuracies": 0.75, "rewards/chosen": -0.1469370424747467, "rewards/margins": 0.0816231220960617, "rewards/rejected": -0.2285601794719696, "step": 950 }, { "epoch": 0.13, "learning_rate": 4.990087259884016e-06, "logits/chosen": -1.5824581384658813, "logits/rejected": -1.4533617496490479, "logps/chosen": -362.3585510253906, "logps/rejected": -411.91851806640625, "loss": 0.029, "rewards/accuracies": 0.625, "rewards/chosen": -0.13144941627979279, "rewards/margins": 0.04948444664478302, "rewards/rejected": -0.1809338480234146, "step": 960 }, { "epoch": 0.13, "learning_rate": 4.989045261763362e-06, "logits/chosen": -1.6257492303848267, "logits/rejected": -1.4243541955947876, "logps/chosen": -384.20977783203125, "logps/rejected": -451.828369140625, "loss": 0.0237, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.1589529812335968, "rewards/margins": 0.07473161071538925, "rewards/rejected": -0.23368458449840546, "step": 970 }, { "epoch": 0.13, "learning_rate": 4.98795131984058e-06, "logits/chosen": -1.3598047494888306, "logits/rejected": -1.2197717428207397, "logps/chosen": -463.22418212890625, "logps/rejected": -492.2627868652344, "loss": 0.0335, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.21916159987449646, "rewards/margins": 0.07268805801868439, "rewards/rejected": -0.29184961318969727, "step": 980 }, { "epoch": 0.13, "learning_rate": 4.986805456945107e-06, "logits/chosen": -1.4510526657104492, "logits/rejected": -1.3373464345932007, "logps/chosen": -411.4930725097656, "logps/rejected": -502.29949951171875, "loss": 0.0464, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.17068354785442352, "rewards/margins": 0.10880188643932343, "rewards/rejected": -0.27948540449142456, "step": 990 }, { "epoch": 0.13, "learning_rate": 4.985607696989919e-06, "logits/chosen": -1.6246265172958374, "logits/rejected": -1.4347529411315918, "logps/chosen": -413.3321228027344, "logps/rejected": -440.0856018066406, "loss": 0.0315, "rewards/accuracies": 0.625, "rewards/chosen": -0.13195686042308807, "rewards/margins": 0.04154813662171364, "rewards/rejected": -0.17350497841835022, "step": 1000 }, { "epoch": 0.13, "learning_rate": 4.984358064971026e-06, "logits/chosen": -1.6722373962402344, "logits/rejected": -1.495236873626709, "logps/chosen": -372.73748779296875, "logps/rejected": -423.34722900390625, "loss": 0.0223, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.11866787821054459, "rewards/margins": 0.07718131691217422, "rewards/rejected": -0.19584921002388, "step": 1010 }, { "epoch": 0.13, "learning_rate": 4.983056586966958e-06, "logits/chosen": -1.5427746772766113, "logits/rejected": -1.3719583749771118, "logps/chosen": -477.5443420410156, "logps/rejected": -532.8753051757812, "loss": 0.0258, "rewards/accuracies": 0.625, "rewards/chosen": -0.21644172072410583, "rewards/margins": 0.0712512657046318, "rewards/rejected": -0.28769299387931824, "step": 1020 }, { "epoch": 0.13, "learning_rate": 4.981703290138215e-06, "logits/chosen": -1.6714264154434204, "logits/rejected": -1.6751312017440796, "logps/chosen": -413.89111328125, "logps/rejected": -477.0470275878906, "loss": 0.0294, "rewards/accuracies": 0.625, "rewards/chosen": -0.1675490289926529, "rewards/margins": 0.07263518124818802, "rewards/rejected": -0.2401842325925827, "step": 1030 }, { "epoch": 0.14, "learning_rate": 4.980298202726706e-06, "logits/chosen": -1.7971988916397095, "logits/rejected": -1.7383168935775757, "logps/chosen": -371.4641418457031, "logps/rejected": -371.2574768066406, "loss": 0.0383, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.11729536950588226, "rewards/margins": 0.010803603567183018, "rewards/rejected": -0.1280989795923233, "step": 1040 }, { "epoch": 0.14, "learning_rate": 4.978841354055148e-06, "logits/chosen": -1.7846040725708008, "logits/rejected": -1.6594231128692627, "logps/chosen": -304.92608642578125, "logps/rejected": -339.9076232910156, "loss": 0.0473, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.11667434871196747, "rewards/margins": 0.052279382944107056, "rewards/rejected": -0.16895373165607452, "step": 1050 }, { "epoch": 0.14, "learning_rate": 4.977332774526471e-06, "logits/chosen": -1.429678201675415, "logits/rejected": -1.5469611883163452, "logps/chosen": -360.56683349609375, "logps/rejected": -457.0309143066406, "loss": 0.0376, "rewards/accuracies": 0.75, "rewards/chosen": -0.14282606542110443, "rewards/margins": 0.07488436251878738, "rewards/rejected": -0.21771040558815002, "step": 1060 }, { "epoch": 0.14, "learning_rate": 4.97577249562317e-06, "logits/chosen": -1.576290488243103, "logits/rejected": -1.553577184677124, "logps/chosen": -473.0867614746094, "logps/rejected": -547.1744995117188, "loss": 0.0359, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.23830334842205048, "rewards/margins": 0.049412429332733154, "rewards/rejected": -0.28771576285362244, "step": 1070 }, { "epoch": 0.14, "learning_rate": 4.974160549906652e-06, "logits/chosen": -1.705106496810913, "logits/rejected": -1.5367138385772705, "logps/chosen": -476.2462463378906, "logps/rejected": -533.9549560546875, "loss": 0.0264, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.18861953914165497, "rewards/margins": 0.09369265288114548, "rewards/rejected": -0.28231221437454224, "step": 1080 }, { "epoch": 0.14, "learning_rate": 4.972496971016559e-06, "logits/chosen": -1.363030195236206, "logits/rejected": -1.2015846967697144, "logps/chosen": -455.76190185546875, "logps/rejected": -489.392578125, "loss": 0.0184, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.23590807616710663, "rewards/margins": 0.06948628276586533, "rewards/rejected": -0.30539435148239136, "step": 1090 }, { "epoch": 0.14, "learning_rate": 4.9707817936700635e-06, "logits/chosen": -1.6423311233520508, "logits/rejected": -1.3507311344146729, "logps/chosen": -496.7262268066406, "logps/rejected": -511.8069763183594, "loss": 0.0264, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.26126766204833984, "rewards/margins": 0.049921147525310516, "rewards/rejected": -0.31118884682655334, "step": 1100 }, { "epoch": 0.15, "learning_rate": 4.969015053661142e-06, "logits/chosen": -1.4872214794158936, "logits/rejected": -1.2768076658248901, "logps/chosen": -513.9466552734375, "logps/rejected": -542.5833129882812, "loss": 0.0264, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2550693154335022, "rewards/margins": 0.0754098892211914, "rewards/rejected": -0.3304792046546936, "step": 1110 }, { "epoch": 0.15, "learning_rate": 4.967196787859835e-06, "logits/chosen": -1.291974663734436, "logits/rejected": -1.210644006729126, "logps/chosen": -585.2589111328125, "logps/rejected": -605.524169921875, "loss": 0.0302, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.2933773994445801, "rewards/margins": 0.0462600514292717, "rewards/rejected": -0.3396374583244324, "step": 1120 }, { "epoch": 0.15, "learning_rate": 4.965327034211469e-06, "logits/chosen": -1.5257681608200073, "logits/rejected": -1.2594645023345947, "logps/chosen": -451.5184631347656, "logps/rejected": -486.90106201171875, "loss": 0.0249, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.23999419808387756, "rewards/margins": 0.07712169736623764, "rewards/rejected": -0.3171158730983734, "step": 1130 }, { "epoch": 0.15, "learning_rate": 4.96340583173587e-06, "logits/chosen": -1.6421066522598267, "logits/rejected": -1.514484167098999, "logps/chosen": -499.04534912109375, "logps/rejected": -516.4204711914062, "loss": 0.0295, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.2360762655735016, "rewards/margins": 0.044597141444683075, "rewards/rejected": -0.2806733548641205, "step": 1140 }, { "epoch": 0.15, "learning_rate": 4.96143322052655e-06, "logits/chosen": -1.6334667205810547, "logits/rejected": -1.4992587566375732, "logps/chosen": -452.73309326171875, "logps/rejected": -560.7843627929688, "loss": 0.0387, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.23054762184619904, "rewards/margins": 0.08870735764503479, "rewards/rejected": -0.31925496459007263, "step": 1150 }, { "epoch": 0.15, "learning_rate": 4.959409241749864e-06, "logits/chosen": -1.4871851205825806, "logits/rejected": -1.441433072090149, "logps/chosen": -430.04681396484375, "logps/rejected": -512.469970703125, "loss": 0.0234, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.20715589821338654, "rewards/margins": 0.08203965425491333, "rewards/rejected": -0.28919556736946106, "step": 1160 }, { "epoch": 0.15, "learning_rate": 4.957333937644159e-06, "logits/chosen": -1.6897249221801758, "logits/rejected": -1.5073974132537842, "logps/chosen": -444.21759033203125, "logps/rejected": -495.2613220214844, "loss": 0.024, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.24953778088092804, "rewards/margins": 0.06193218380212784, "rewards/rejected": -0.3114699721336365, "step": 1170 }, { "epoch": 0.15, "learning_rate": 4.955207351518885e-06, "logits/chosen": -1.5480334758758545, "logits/rejected": -1.4697473049163818, "logps/chosen": -444.0738220214844, "logps/rejected": -484.5289001464844, "loss": 0.0415, "rewards/accuracies": 0.5, "rewards/chosen": -0.24453409016132355, "rewards/margins": 0.05837539955973625, "rewards/rejected": -0.3029094934463501, "step": 1180 }, { "epoch": 0.16, "learning_rate": 4.953029527753699e-06, "logits/chosen": -1.5584051609039307, "logits/rejected": -1.3973197937011719, "logps/chosen": -479.6776428222656, "logps/rejected": -499.9449157714844, "loss": 0.0258, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.23742356896400452, "rewards/margins": 0.03735958784818649, "rewards/rejected": -0.274783194065094, "step": 1190 }, { "epoch": 0.16, "learning_rate": 4.95080051179753e-06, "logits/chosen": -1.706897497177124, "logits/rejected": -1.5994467735290527, "logps/chosen": -428.048828125, "logps/rejected": -485.77484130859375, "loss": 0.0433, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.22842660546302795, "rewards/margins": 0.05509146302938461, "rewards/rejected": -0.28351807594299316, "step": 1200 }, { "epoch": 0.16, "learning_rate": 4.948520350167637e-06, "logits/chosen": -1.6655988693237305, "logits/rejected": -1.3038753271102905, "logps/chosen": -553.6268920898438, "logps/rejected": -579.7947998046875, "loss": 0.0286, "rewards/accuracies": 0.75, "rewards/chosen": -0.25176820158958435, "rewards/margins": 0.08280982077121735, "rewards/rejected": -0.3345780074596405, "step": 1210 }, { "epoch": 0.16, "learning_rate": 4.946189090448639e-06, "logits/chosen": -1.7785139083862305, "logits/rejected": -1.4575735330581665, "logps/chosen": -575.2974853515625, "logps/rejected": -596.218994140625, "loss": 0.0325, "rewards/accuracies": 0.625, "rewards/chosen": -0.267650842666626, "rewards/margins": 0.060673050582408905, "rewards/rejected": -0.3283239006996155, "step": 1220 }, { "epoch": 0.16, "learning_rate": 4.943806781291515e-06, "logits/chosen": -1.4108306169509888, "logits/rejected": -1.3590025901794434, "logps/chosen": -536.73974609375, "logps/rejected": -646.03564453125, "loss": 0.0208, "rewards/accuracies": 0.75, "rewards/chosen": -0.28589382767677307, "rewards/margins": 0.08323682844638824, "rewards/rejected": -0.3691306710243225, "step": 1230 }, { "epoch": 0.16, "learning_rate": 4.941373472412595e-06, "logits/chosen": -1.4176275730133057, "logits/rejected": -1.099277138710022, "logps/chosen": -666.0406494140625, "logps/rejected": -683.25732421875, "loss": 0.0249, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3972935080528259, "rewards/margins": 0.09708867967128754, "rewards/rejected": -0.49438220262527466, "step": 1240 }, { "epoch": 0.16, "learning_rate": 4.938889214592521e-06, "logits/chosen": -0.8007619976997375, "logits/rejected": -0.8756643533706665, "logps/chosen": -701.1500854492188, "logps/rejected": -785.47509765625, "loss": 0.0421, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.5342551469802856, "rewards/margins": 0.05736679583787918, "rewards/rejected": -0.5916219353675842, "step": 1250 }, { "epoch": 0.16, "learning_rate": 4.936354059675186e-06, "logits/chosen": -0.9256356954574585, "logits/rejected": -0.7004662752151489, "logps/chosen": -707.5337524414062, "logps/rejected": -772.3132934570312, "loss": 0.0261, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.49646010994911194, "rewards/margins": 0.06648706644773483, "rewards/rejected": -0.562947154045105, "step": 1260 }, { "epoch": 0.17, "learning_rate": 4.933768060566654e-06, "logits/chosen": -0.9167349934577942, "logits/rejected": -0.5076829195022583, "logps/chosen": -597.2662353515625, "logps/rejected": -646.2260131835938, "loss": 0.0286, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.37925904989242554, "rewards/margins": 0.08603726327419281, "rewards/rejected": -0.46529620885849, "step": 1270 }, { "epoch": 0.17, "learning_rate": 4.931131271234052e-06, "logits/chosen": -1.0448763370513916, "logits/rejected": -0.955971896648407, "logps/chosen": -576.7291259765625, "logps/rejected": -602.1259155273438, "loss": 0.0272, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3331785798072815, "rewards/margins": 0.03749316558241844, "rewards/rejected": -0.37067174911499023, "step": 1280 }, { "epoch": 0.17, "learning_rate": 4.928443746704448e-06, "logits/chosen": -1.3860580921173096, "logits/rejected": -0.961715817451477, "logps/chosen": -480.794189453125, "logps/rejected": -501.71533203125, "loss": 0.037, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2662775218486786, "rewards/margins": 0.06484885513782501, "rewards/rejected": -0.3311263918876648, "step": 1290 }, { "epoch": 0.17, "learning_rate": 4.925705543063703e-06, "logits/chosen": -1.3405392169952393, "logits/rejected": -1.417043924331665, "logps/chosen": -486.39019775390625, "logps/rejected": -610.2715454101562, "loss": 0.0271, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2749415338039398, "rewards/margins": 0.08056138455867767, "rewards/rejected": -0.3555029332637787, "step": 1300 }, { "epoch": 0.17, "learning_rate": 4.922916717455297e-06, "logits/chosen": -1.174640417098999, "logits/rejected": -1.264804482460022, "logps/chosen": -490.49859619140625, "logps/rejected": -569.15576171875, "loss": 0.0467, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2704632580280304, "rewards/margins": 0.07853202521800995, "rewards/rejected": -0.34899526834487915, "step": 1310 }, { "epoch": 0.17, "learning_rate": 4.920077328079136e-06, "logits/chosen": -1.4887348413467407, "logits/rejected": -1.2705087661743164, "logps/chosen": -557.512939453125, "logps/rejected": -601.0181884765625, "loss": 0.0241, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2937076985836029, "rewards/margins": 0.08439463376998901, "rewards/rejected": -0.37810230255126953, "step": 1320 }, { "epoch": 0.17, "learning_rate": 4.9171874341903445e-06, "logits/chosen": -1.3629595041275024, "logits/rejected": -1.1199499368667603, "logps/chosen": -478.30938720703125, "logps/rejected": -541.1429443359375, "loss": 0.0545, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.27040910720825195, "rewards/margins": 0.08212222903966904, "rewards/rejected": -0.352531373500824, "step": 1330 }, { "epoch": 0.18, "learning_rate": 4.914247096098019e-06, "logits/chosen": -1.4378149509429932, "logits/rejected": -1.3526177406311035, "logps/chosen": -428.2542419433594, "logps/rejected": -478.46258544921875, "loss": 0.0311, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.21419191360473633, "rewards/margins": 0.06993551552295685, "rewards/rejected": -0.284127414226532, "step": 1340 }, { "epoch": 0.18, "learning_rate": 4.911256375163977e-06, "logits/chosen": -1.6823108196258545, "logits/rejected": -1.413735032081604, "logps/chosen": -429.1358337402344, "logps/rejected": -421.41314697265625, "loss": 0.0326, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.17538395524024963, "rewards/margins": 0.0550597682595253, "rewards/rejected": -0.23044368624687195, "step": 1350 }, { "epoch": 0.18, "learning_rate": 4.908215333801474e-06, "logits/chosen": -2.0725080966949463, "logits/rejected": -1.7535841464996338, "logps/chosen": -449.07574462890625, "logps/rejected": -431.6366271972656, "loss": 0.0289, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.16928282380104065, "rewards/margins": 0.05664173886179924, "rewards/rejected": -0.22592458128929138, "step": 1360 }, { "epoch": 0.18, "learning_rate": 4.9051240354739004e-06, "logits/chosen": -1.919226050376892, "logits/rejected": -1.7065900564193726, "logps/chosen": -440.53509521484375, "logps/rejected": -461.86859130859375, "loss": 0.0229, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.1572817862033844, "rewards/margins": 0.03301681950688362, "rewards/rejected": -0.19029861688613892, "step": 1370 }, { "epoch": 0.18, "learning_rate": 4.901982544693457e-06, "logits/chosen": -1.7934865951538086, "logits/rejected": -1.782326102256775, "logps/chosen": -299.3521423339844, "logps/rejected": -408.4831237792969, "loss": 0.0475, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.15632550418376923, "rewards/margins": 0.054883696138858795, "rewards/rejected": -0.21120920777320862, "step": 1380 }, { "epoch": 0.18, "learning_rate": 4.898790927019809e-06, "logits/chosen": -1.8103210926055908, "logits/rejected": -1.8971868753433228, "logps/chosen": -365.04541015625, "logps/rejected": -415.27081298828125, "loss": 0.0301, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.1553061306476593, "rewards/margins": 0.0542365200817585, "rewards/rejected": -0.2095426619052887, "step": 1390 }, { "epoch": 0.18, "learning_rate": 4.895549249058718e-06, "logits/chosen": -1.8186776638031006, "logits/rejected": -1.4331042766571045, "logps/chosen": -465.67742919921875, "logps/rejected": -492.672119140625, "loss": 0.0242, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.19692638516426086, "rewards/margins": 0.09286610037088394, "rewards/rejected": -0.2897924780845642, "step": 1400 }, { "epoch": 0.18, "learning_rate": 4.892257578460656e-06, "logits/chosen": -1.6929572820663452, "logits/rejected": -1.64870285987854, "logps/chosen": -414.07781982421875, "logps/rejected": -508.06951904296875, "loss": 0.0281, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.25049352645874023, "rewards/margins": 0.09370686113834381, "rewards/rejected": -0.34420040249824524, "step": 1410 }, { "epoch": 0.19, "learning_rate": 4.888915983919383e-06, "logits/chosen": -1.7767932415008545, "logits/rejected": -1.725423812866211, "logps/chosen": -399.81024169921875, "logps/rejected": -501.4336853027344, "loss": 0.0237, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2222074717283249, "rewards/margins": 0.09464211016893387, "rewards/rejected": -0.31684961915016174, "step": 1420 }, { "epoch": 0.19, "learning_rate": 4.885524535170525e-06, "logits/chosen": -1.7155097723007202, "logits/rejected": -1.3688064813613892, "logps/chosen": -456.03289794921875, "logps/rejected": -557.8484497070312, "loss": 0.0305, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2546646296977997, "rewards/margins": 0.1224946603178978, "rewards/rejected": -0.3771592676639557, "step": 1430 }, { "epoch": 0.19, "learning_rate": 4.882083302990113e-06, "logits/chosen": -1.7398130893707275, "logits/rejected": -1.6435096263885498, "logps/chosen": -479.67724609375, "logps/rejected": -534.8831176757812, "loss": 0.0193, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.26064735651016235, "rewards/margins": 0.06385330110788345, "rewards/rejected": -0.3245006203651428, "step": 1440 }, { "epoch": 0.19, "learning_rate": 4.878592359193104e-06, "logits/chosen": -1.9220349788665771, "logits/rejected": -1.4295719861984253, "logps/chosen": -535.4910278320312, "logps/rejected": -584.61083984375, "loss": 0.0351, "rewards/accuracies": 0.75, "rewards/chosen": -0.3067534863948822, "rewards/margins": 0.10750001668930054, "rewards/rejected": -0.4142535328865051, "step": 1450 }, { "epoch": 0.19, "learning_rate": 4.875051776631888e-06, "logits/chosen": -1.6607071161270142, "logits/rejected": -1.691383719444275, "logps/chosen": -569.18310546875, "logps/rejected": -715.18896484375, "loss": 0.0263, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.317788302898407, "rewards/margins": 0.0920090302824974, "rewards/rejected": -0.409797340631485, "step": 1460 }, { "epoch": 0.19, "learning_rate": 4.871461629194764e-06, "logits/chosen": -1.9914582967758179, "logits/rejected": -1.7897332906723022, "logps/chosen": -530.7254638671875, "logps/rejected": -543.0486450195312, "loss": 0.0248, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.23011811077594757, "rewards/margins": 0.05637926980853081, "rewards/rejected": -0.28649741411209106, "step": 1470 }, { "epoch": 0.19, "learning_rate": 4.8678219918043984e-06, "logits/chosen": -1.9362154006958008, "logits/rejected": -1.8457800149917603, "logps/chosen": -456.870849609375, "logps/rejected": -537.7033081054688, "loss": 0.0182, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.217064768075943, "rewards/margins": 0.058478035032749176, "rewards/rejected": -0.2755427956581116, "step": 1480 }, { "epoch": 0.19, "learning_rate": 4.864132940416262e-06, "logits/chosen": -1.9576715230941772, "logits/rejected": -1.652612328529358, "logps/chosen": -404.54388427734375, "logps/rejected": -444.6311950683594, "loss": 0.024, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.23168055713176727, "rewards/margins": 0.046421170234680176, "rewards/rejected": -0.27810171246528625, "step": 1490 }, { "epoch": 0.2, "learning_rate": 4.860394552017044e-06, "logits/chosen": -1.8545684814453125, "logits/rejected": -1.750740647315979, "logps/chosen": -519.2760009765625, "logps/rejected": -541.0926513671875, "loss": 0.0208, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.24759700894355774, "rewards/margins": 0.05812455341219902, "rewards/rejected": -0.30572155117988586, "step": 1500 }, { "epoch": 0.2, "learning_rate": 4.856606904623047e-06, "logits/chosen": -1.9752925634384155, "logits/rejected": -1.880743384361267, "logps/chosen": -484.6067810058594, "logps/rejected": -532.9691162109375, "loss": 0.0309, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.26403746008872986, "rewards/margins": 0.08272302895784378, "rewards/rejected": -0.34676045179367065, "step": 1510 }, { "epoch": 0.2, "learning_rate": 4.852770077278557e-06, "logits/chosen": -1.7194467782974243, "logits/rejected": -1.3660582304000854, "logps/chosen": -500.27288818359375, "logps/rejected": -488.1220703125, "loss": 0.0299, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.2599658668041229, "rewards/margins": 0.049370136111974716, "rewards/rejected": -0.30933600664138794, "step": 1520 }, { "epoch": 0.2, "learning_rate": 4.848884150054196e-06, "logits/chosen": -1.3870841264724731, "logits/rejected": -1.4338102340698242, "logps/chosen": -483.5819396972656, "logps/rejected": -598.3069458007812, "loss": 0.0304, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2766098380088806, "rewards/margins": 0.07799626886844635, "rewards/rejected": -0.35460609197616577, "step": 1530 }, { "epoch": 0.2, "learning_rate": 4.8449492040452495e-06, "logits/chosen": -1.6025880575180054, "logits/rejected": -1.4526028633117676, "logps/chosen": -582.9497680664062, "logps/rejected": -625.2781982421875, "loss": 0.027, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3081502318382263, "rewards/margins": 0.07421085983514786, "rewards/rejected": -0.38236111402511597, "step": 1540 }, { "epoch": 0.2, "learning_rate": 4.840965321369973e-06, "logits/chosen": -1.6838009357452393, "logits/rejected": -1.7545547485351562, "logps/chosen": -545.9749145507812, "logps/rejected": -636.2731323242188, "loss": 0.041, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.28291061520576477, "rewards/margins": 0.08299050480127335, "rewards/rejected": -0.3659011125564575, "step": 1550 }, { "epoch": 0.2, "learning_rate": 4.8369325851678795e-06, "logits/chosen": -1.7689971923828125, "logits/rejected": -1.4405544996261597, "logps/chosen": -497.4356384277344, "logps/rejected": -542.1776123046875, "loss": 0.018, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.25298741459846497, "rewards/margins": 0.09371381998062134, "rewards/rejected": -0.3467012047767639, "step": 1560 }, { "epoch": 0.21, "learning_rate": 4.832851079598007e-06, "logits/chosen": -1.7749683856964111, "logits/rejected": -1.641271948814392, "logps/chosen": -448.66693115234375, "logps/rejected": -474.647705078125, "loss": 0.0347, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.24436303973197937, "rewards/margins": 0.04413959011435509, "rewards/rejected": -0.28850263357162476, "step": 1570 }, { "epoch": 0.21, "learning_rate": 4.828720889837158e-06, "logits/chosen": -1.956264853477478, "logits/rejected": -1.7235233783721924, "logps/chosen": -536.6216430664062, "logps/rejected": -541.3743286132812, "loss": 0.0425, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.25714707374572754, "rewards/margins": 0.03778282552957535, "rewards/rejected": -0.2949298620223999, "step": 1580 }, { "epoch": 0.21, "learning_rate": 4.824542102078125e-06, "logits/chosen": -1.8579609394073486, "logits/rejected": -1.6114164590835571, "logps/chosen": -391.9680480957031, "logps/rejected": -420.4110412597656, "loss": 0.031, "rewards/accuracies": 0.75, "rewards/chosen": -0.16827771067619324, "rewards/margins": 0.08321143686771393, "rewards/rejected": -0.25148916244506836, "step": 1590 }, { "epoch": 0.21, "learning_rate": 4.820314803527888e-06, "logits/chosen": -1.9021852016448975, "logits/rejected": -1.8077495098114014, "logps/chosen": -357.4749755859375, "logps/rejected": -429.97650146484375, "loss": 0.0314, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.1697140634059906, "rewards/margins": 0.061640460044145584, "rewards/rejected": -0.2313545197248459, "step": 1600 }, { "epoch": 0.21, "learning_rate": 4.816039082405799e-06, "logits/chosen": -1.6327106952667236, "logits/rejected": -1.6976954936981201, "logps/chosen": -423.43048095703125, "logps/rejected": -547.1001586914062, "loss": 0.0311, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.24598722159862518, "rewards/margins": 0.09015451371669769, "rewards/rejected": -0.33614176511764526, "step": 1610 }, { "epoch": 0.21, "learning_rate": 4.81171502794174e-06, "logits/chosen": -1.66916024684906, "logits/rejected": -1.5378539562225342, "logps/chosen": -506.19635009765625, "logps/rejected": -581.7791748046875, "loss": 0.0208, "rewards/accuracies": 0.75, "rewards/chosen": -0.27827128767967224, "rewards/margins": 0.09809643775224686, "rewards/rejected": -0.3763677477836609, "step": 1620 }, { "epoch": 0.21, "learning_rate": 4.8073427303742584e-06, "logits/chosen": -1.7626116275787354, "logits/rejected": -1.553881287574768, "logps/chosen": -516.6483154296875, "logps/rejected": -498.420166015625, "loss": 0.0344, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.27453574538230896, "rewards/margins": 0.05228264257311821, "rewards/rejected": -0.32681840658187866, "step": 1630 }, { "epoch": 0.21, "learning_rate": 4.802922280948685e-06, "logits/chosen": -1.7148334980010986, "logits/rejected": -1.5481488704681396, "logps/chosen": -483.62457275390625, "logps/rejected": -519.8236083984375, "loss": 0.0229, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.2525627911090851, "rewards/margins": 0.06651215255260468, "rewards/rejected": -0.31907492876052856, "step": 1640 }, { "epoch": 0.22, "learning_rate": 4.798453771915231e-06, "logits/chosen": -1.9586117267608643, "logits/rejected": -1.7584108114242554, "logps/chosen": -392.68023681640625, "logps/rejected": -416.2002868652344, "loss": 0.0379, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.1924244910478592, "rewards/margins": 0.055614493787288666, "rewards/rejected": -0.24803900718688965, "step": 1650 }, { "epoch": 0.22, "learning_rate": 4.793937296527062e-06, "logits/chosen": -2.0236639976501465, "logits/rejected": -1.7089784145355225, "logps/chosen": -432.085693359375, "logps/rejected": -505.3924255371094, "loss": 0.0283, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.16688159108161926, "rewards/margins": 0.10469367355108261, "rewards/rejected": -0.27157527208328247, "step": 1660 }, { "epoch": 0.22, "learning_rate": 4.78937294903835e-06, "logits/chosen": -2.0606894493103027, "logits/rejected": -1.7672550678253174, "logps/chosen": -512.7127075195312, "logps/rejected": -511.38885498046875, "loss": 0.0209, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.19819924235343933, "rewards/margins": 0.06016005203127861, "rewards/rejected": -0.25835928320884705, "step": 1670 }, { "epoch": 0.22, "learning_rate": 4.78476082470231e-06, "logits/chosen": -1.9348901510238647, "logits/rejected": -1.9231693744659424, "logps/chosen": -417.9811096191406, "logps/rejected": -467.3179626464844, "loss": 0.0243, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.19126436114311218, "rewards/margins": 0.053105421364307404, "rewards/rejected": -0.24436978995800018, "step": 1680 }, { "epoch": 0.22, "learning_rate": 4.780101019769212e-06, "logits/chosen": -1.9238088130950928, "logits/rejected": -1.7408262491226196, "logps/chosen": -373.9527587890625, "logps/rejected": -410.34417724609375, "loss": 0.0173, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.17050370573997498, "rewards/margins": 0.051046222448349, "rewards/rejected": -0.22154991328716278, "step": 1690 }, { "epoch": 0.22, "learning_rate": 4.775393631484368e-06, "logits/chosen": -2.0598666667938232, "logits/rejected": -1.823992133140564, "logps/chosen": -535.9193115234375, "logps/rejected": -628.5404663085938, "loss": 0.027, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.21464088559150696, "rewards/margins": 0.08915476500988007, "rewards/rejected": -0.30379563570022583, "step": 1700 }, { "epoch": 0.22, "learning_rate": 4.770638758086105e-06, "logits/chosen": -2.0537681579589844, "logits/rejected": -1.927835464477539, "logps/chosen": -368.2241516113281, "logps/rejected": -378.3135070800781, "loss": 0.0323, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.1430542767047882, "rewards/margins": 0.03868260234594345, "rewards/rejected": -0.18173687160015106, "step": 1710 }, { "epoch": 0.23, "learning_rate": 4.7658364988037184e-06, "logits/chosen": -2.0646414756774902, "logits/rejected": -1.9343478679656982, "logps/chosen": -310.01153564453125, "logps/rejected": -428.88818359375, "loss": 0.0283, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.13848961889743805, "rewards/margins": 0.09961990267038345, "rewards/rejected": -0.2381094992160797, "step": 1720 }, { "epoch": 0.23, "learning_rate": 4.760986953855395e-06, "logits/chosen": -2.019463062286377, "logits/rejected": -1.7855058908462524, "logps/chosen": -395.25616455078125, "logps/rejected": -376.6690368652344, "loss": 0.0311, "rewards/accuracies": 0.42500001192092896, "rewards/chosen": -0.13688886165618896, "rewards/margins": 0.013272324576973915, "rewards/rejected": -0.15016120672225952, "step": 1730 }, { "epoch": 0.23, "learning_rate": 4.756090224446127e-06, "logits/chosen": -1.9321496486663818, "logits/rejected": -1.702087163925171, "logps/chosen": -372.3777770996094, "logps/rejected": -402.9239807128906, "loss": 0.036, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.13679590821266174, "rewards/margins": 0.08134065568447113, "rewards/rejected": -0.21813654899597168, "step": 1740 }, { "epoch": 0.23, "learning_rate": 4.7511464127655945e-06, "logits/chosen": -1.7360193729400635, "logits/rejected": -1.7117149829864502, "logps/chosen": -460.23626708984375, "logps/rejected": -568.3236694335938, "loss": 0.0285, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2555629014968872, "rewards/margins": 0.08793105185031891, "rewards/rejected": -0.34349390864372253, "step": 1750 }, { "epoch": 0.23, "learning_rate": 4.74615562198604e-06, "logits/chosen": -1.9758737087249756, "logits/rejected": -1.7570078372955322, "logps/chosen": -331.95977783203125, "logps/rejected": -342.317626953125, "loss": 0.0222, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.1414758712053299, "rewards/margins": 0.056294094771146774, "rewards/rejected": -0.19776996970176697, "step": 1760 }, { "epoch": 0.23, "learning_rate": 4.741117956260107e-06, "logits/chosen": -2.1880033016204834, "logits/rejected": -2.02624773979187, "logps/chosen": -412.73516845703125, "logps/rejected": -436.26715087890625, "loss": 0.0234, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.12030048668384552, "rewards/margins": 0.055233072489500046, "rewards/rejected": -0.17553356289863586, "step": 1770 }, { "epoch": 0.23, "learning_rate": 4.736033520718672e-06, "logits/chosen": -2.026815891265869, "logits/rejected": -1.8514766693115234, "logps/chosen": -287.23846435546875, "logps/rejected": -331.16925048828125, "loss": 0.0234, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.11404535919427872, "rewards/margins": 0.07378792762756348, "rewards/rejected": -0.1878332495689392, "step": 1780 }, { "epoch": 0.23, "learning_rate": 4.730902421468652e-06, "logits/chosen": -2.1646840572357178, "logits/rejected": -2.077221155166626, "logps/chosen": -350.36798095703125, "logps/rejected": -382.8150939941406, "loss": 0.0395, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.12857167422771454, "rewards/margins": 0.0784262865781784, "rewards/rejected": -0.20699796080589294, "step": 1790 }, { "epoch": 0.24, "learning_rate": 4.7257247655907854e-06, "logits/chosen": -2.1851401329040527, "logits/rejected": -1.7716038227081299, "logps/chosen": -369.41815185546875, "logps/rejected": -374.6217346191406, "loss": 0.0333, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.11951597034931183, "rewards/margins": 0.05485420301556587, "rewards/rejected": -0.1743701696395874, "step": 1800 }, { "epoch": 0.24, "learning_rate": 4.720500661137397e-06, "logits/chosen": -1.9286584854125977, "logits/rejected": -2.1050808429718018, "logps/chosen": -266.3800354003906, "logps/rejected": -395.1708984375, "loss": 0.0261, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.10980401188135147, "rewards/margins": 0.10229452699422836, "rewards/rejected": -0.21209852397441864, "step": 1810 }, { "epoch": 0.24, "learning_rate": 4.71523021713015e-06, "logits/chosen": -2.213164806365967, "logits/rejected": -2.015493869781494, "logps/chosen": -420.3340759277344, "logps/rejected": -426.08673095703125, "loss": 0.0297, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.1583167314529419, "rewards/margins": 0.049007922410964966, "rewards/rejected": -0.20732466876506805, "step": 1820 }, { "epoch": 0.24, "learning_rate": 4.709913543557761e-06, "logits/chosen": -2.0623435974121094, "logits/rejected": -1.8618663549423218, "logps/chosen": -477.73114013671875, "logps/rejected": -519.5567626953125, "loss": 0.0231, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.20417337119579315, "rewards/margins": 0.0879688486456871, "rewards/rejected": -0.29214224219322205, "step": 1830 }, { "epoch": 0.24, "learning_rate": 4.704550751373715e-06, "logits/chosen": -2.1619040966033936, "logits/rejected": -2.092615842819214, "logps/chosen": -377.7481994628906, "logps/rejected": -447.01483154296875, "loss": 0.0338, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.18411315977573395, "rewards/margins": 0.0370350107550621, "rewards/rejected": -0.22114817798137665, "step": 1840 }, { "epoch": 0.24, "learning_rate": 4.699141952493941e-06, "logits/chosen": -2.0571186542510986, "logits/rejected": -2.0089094638824463, "logps/chosen": -400.6955261230469, "logps/rejected": -421.14239501953125, "loss": 0.0286, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.15329428017139435, "rewards/margins": 0.08874006569385529, "rewards/rejected": -0.24203434586524963, "step": 1850 }, { "epoch": 0.24, "learning_rate": 4.6936872597944814e-06, "logits/chosen": -1.934605598449707, "logits/rejected": -1.6609560251235962, "logps/chosen": -484.6827087402344, "logps/rejected": -568.5869140625, "loss": 0.0279, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.2339555323123932, "rewards/margins": 0.09535963833332062, "rewards/rejected": -0.3293151557445526, "step": 1860 }, { "epoch": 0.24, "learning_rate": 4.688186787109136e-06, "logits/chosen": -1.7950502634048462, "logits/rejected": -1.7118091583251953, "logps/chosen": -497.66485595703125, "logps/rejected": -588.157958984375, "loss": 0.0291, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.3197007477283478, "rewards/margins": 0.10076627880334854, "rewards/rejected": -0.4204670786857605, "step": 1870 }, { "epoch": 0.25, "learning_rate": 4.682640649227085e-06, "logits/chosen": -1.7749334573745728, "logits/rejected": -1.3422480821609497, "logps/chosen": -497.0650939941406, "logps/rejected": -546.1441040039062, "loss": 0.0325, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2521812915802002, "rewards/margins": 0.07832489907741547, "rewards/rejected": -0.33050617575645447, "step": 1880 }, { "epoch": 0.25, "learning_rate": 4.677048961890492e-06, "logits/chosen": -1.8528788089752197, "logits/rejected": -1.7184594869613647, "logps/chosen": -415.6058654785156, "logps/rejected": -478.2289123535156, "loss": 0.0405, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.20940780639648438, "rewards/margins": 0.07339660823345184, "rewards/rejected": -0.2828044295310974, "step": 1890 }, { "epoch": 0.25, "learning_rate": 4.671411841792096e-06, "logits/chosen": -2.135364294052124, "logits/rejected": -1.7172048091888428, "logps/chosen": -539.20947265625, "logps/rejected": -566.8685302734375, "loss": 0.0218, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.22130024433135986, "rewards/margins": 0.10534927994012833, "rewards/rejected": -0.3266495168209076, "step": 1900 }, { "epoch": 0.25, "learning_rate": 4.665729406572764e-06, "logits/chosen": -1.6798137426376343, "logits/rejected": -1.5157406330108643, "logps/chosen": -387.11602783203125, "logps/rejected": -461.33233642578125, "loss": 0.0323, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.22341910004615784, "rewards/margins": 0.08191484212875366, "rewards/rejected": -0.3053339421749115, "step": 1910 }, { "epoch": 0.25, "learning_rate": 4.660001774819048e-06, "logits/chosen": -1.7033799886703491, "logits/rejected": -1.7451461553573608, "logps/chosen": -341.09649658203125, "logps/rejected": -428.5357971191406, "loss": 0.0189, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.19849085807800293, "rewards/margins": 0.05713418126106262, "rewards/rejected": -0.25562506914138794, "step": 1920 }, { "epoch": 0.25, "learning_rate": 4.654229066060702e-06, "logits/chosen": -1.5551412105560303, "logits/rejected": -1.6741459369659424, "logps/chosen": -441.34124755859375, "logps/rejected": -620.9329833984375, "loss": 0.0287, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.2283930480480194, "rewards/margins": 0.0852317065000534, "rewards/rejected": -0.313624769449234, "step": 1930 }, { "epoch": 0.25, "learning_rate": 4.648411400768193e-06, "logits/chosen": -1.7304413318634033, "logits/rejected": -1.6656568050384521, "logps/chosen": -443.6297912597656, "logps/rejected": -532.1062622070312, "loss": 0.0171, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.21100816130638123, "rewards/margins": 0.061530113220214844, "rewards/rejected": -0.27253827452659607, "step": 1940 }, { "epoch": 0.26, "learning_rate": 4.642548900350182e-06, "logits/chosen": -1.7242944240570068, "logits/rejected": -1.59256112575531, "logps/chosen": -377.0804138183594, "logps/rejected": -457.90997314453125, "loss": 0.0386, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.14849740266799927, "rewards/margins": 0.10453041642904282, "rewards/rejected": -0.2530278265476227, "step": 1950 }, { "epoch": 0.26, "learning_rate": 4.636641687150994e-06, "logits/chosen": -2.036111354827881, "logits/rejected": -1.890380620956421, "logps/chosen": -356.2049255371094, "logps/rejected": -338.0189208984375, "loss": 0.0203, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.1230909675359726, "rewards/margins": 0.023669157177209854, "rewards/rejected": -0.14676013588905334, "step": 1960 }, { "epoch": 0.26, "learning_rate": 4.6306898844480615e-06, "logits/chosen": -1.8678182363510132, "logits/rejected": -1.7673766613006592, "logps/chosen": -297.96588134765625, "logps/rejected": -384.05517578125, "loss": 0.0215, "rewards/accuracies": 0.625, "rewards/chosen": -0.13255058228969574, "rewards/margins": 0.0716322511434555, "rewards/rejected": -0.20418281853199005, "step": 1970 }, { "epoch": 0.26, "learning_rate": 4.624693616449358e-06, "logits/chosen": -1.9337873458862305, "logits/rejected": -1.5866873264312744, "logps/chosen": -407.74786376953125, "logps/rejected": -436.3265686035156, "loss": 0.0285, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.15489144623279572, "rewards/margins": 0.07904152572154999, "rewards/rejected": -0.23393294215202332, "step": 1980 }, { "epoch": 0.26, "learning_rate": 4.6186530082908e-06, "logits/chosen": -1.6662366390228271, "logits/rejected": -1.661988615989685, "logps/chosen": -442.3397521972656, "logps/rejected": -513.10400390625, "loss": 0.0194, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2055976837873459, "rewards/margins": 0.07577277719974518, "rewards/rejected": -0.28137046098709106, "step": 1990 }, { "epoch": 0.26, "learning_rate": 4.612568186033633e-06, "logits/chosen": -1.5893526077270508, "logits/rejected": -1.5049242973327637, "logps/chosen": -458.06011962890625, "logps/rejected": -476.4659118652344, "loss": 0.041, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2440434992313385, "rewards/margins": 0.08281457424163818, "rewards/rejected": -0.3268580436706543, "step": 2000 }, { "epoch": 0.26, "learning_rate": 4.6064392766618125e-06, "logits/chosen": -1.651850938796997, "logits/rejected": -1.6366875171661377, "logps/chosen": -400.959716796875, "logps/rejected": -483.428466796875, "loss": 0.0471, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.213557630777359, "rewards/margins": 0.08097215741872787, "rewards/rejected": -0.2945297360420227, "step": 2010 }, { "epoch": 0.26, "learning_rate": 4.60026640807934e-06, "logits/chosen": -1.7265008687973022, "logits/rejected": -1.6939365863800049, "logps/chosen": -471.2493591308594, "logps/rejected": -543.5922241210938, "loss": 0.0404, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.21301202476024628, "rewards/margins": 0.07630138099193573, "rewards/rejected": -0.289313405752182, "step": 2020 }, { "epoch": 0.27, "learning_rate": 4.594049709107604e-06, "logits/chosen": -1.7392613887786865, "logits/rejected": -1.7632348537445068, "logps/chosen": -470.91925048828125, "logps/rejected": -563.3527221679688, "loss": 0.0225, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.25252705812454224, "rewards/margins": 0.08756853640079498, "rewards/rejected": -0.340095579624176, "step": 2030 }, { "epoch": 0.27, "learning_rate": 4.587789309482687e-06, "logits/chosen": -1.8418395519256592, "logits/rejected": -1.7986156940460205, "logps/chosen": -431.211181640625, "logps/rejected": -576.7343139648438, "loss": 0.029, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.23480498790740967, "rewards/margins": 0.12721502780914307, "rewards/rejected": -0.3620200455188751, "step": 2040 }, { "epoch": 0.27, "learning_rate": 4.581485339852659e-06, "logits/chosen": -1.8456242084503174, "logits/rejected": -1.393122673034668, "logps/chosen": -443.29388427734375, "logps/rejected": -463.262451171875, "loss": 0.0325, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.256413996219635, "rewards/margins": 0.0756414532661438, "rewards/rejected": -0.3320554494857788, "step": 2050 }, { "epoch": 0.27, "learning_rate": 4.5751379317748514e-06, "logits/chosen": -1.776293158531189, "logits/rejected": -1.5436232089996338, "logps/chosen": -528.5411987304688, "logps/rejected": -530.0343017578125, "loss": 0.0339, "rewards/accuracies": 0.625, "rewards/chosen": -0.25058040022850037, "rewards/margins": 0.08744814246892929, "rewards/rejected": -0.33802855014801025, "step": 2060 }, { "epoch": 0.27, "learning_rate": 4.56874721771311e-06, "logits/chosen": -2.0413804054260254, "logits/rejected": -1.6784255504608154, "logps/chosen": -477.5301818847656, "logps/rejected": -507.48114013671875, "loss": 0.0261, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.1695762276649475, "rewards/margins": 0.07611775398254395, "rewards/rejected": -0.24569399654865265, "step": 2070 }, { "epoch": 0.27, "learning_rate": 4.562313331035032e-06, "logits/chosen": -1.8038721084594727, "logits/rejected": -1.6684455871582031, "logps/chosen": -363.0096130371094, "logps/rejected": -452.68707275390625, "loss": 0.0247, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.17797474563121796, "rewards/margins": 0.0909547433257103, "rewards/rejected": -0.26892951130867004, "step": 2080 }, { "epoch": 0.27, "learning_rate": 4.555836406009183e-06, "logits/chosen": -1.9092241525650024, "logits/rejected": -1.715597152709961, "logps/chosen": -422.19610595703125, "logps/rejected": -476.93792724609375, "loss": 0.0259, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.1975400447845459, "rewards/margins": 0.09823472797870636, "rewards/rejected": -0.29577475786209106, "step": 2090 }, { "epoch": 0.27, "learning_rate": 4.5493165778022945e-06, "logits/chosen": -1.5767942667007446, "logits/rejected": -1.6466798782348633, "logps/chosen": -441.18658447265625, "logps/rejected": -488.8585510253906, "loss": 0.0358, "rewards/accuracies": 0.625, "rewards/chosen": -0.22285763919353485, "rewards/margins": 0.05790979415178299, "rewards/rejected": -0.28076741099357605, "step": 2100 }, { "epoch": 0.28, "learning_rate": 4.542753982476443e-06, "logits/chosen": -1.8367239236831665, "logits/rejected": -1.640899896621704, "logps/chosen": -344.5665588378906, "logps/rejected": -532.787841796875, "loss": 0.0223, "rewards/accuracies": 0.75, "rewards/chosen": -0.18316304683685303, "rewards/margins": 0.17273655533790588, "rewards/rejected": -0.3558996021747589, "step": 2110 }, { "epoch": 0.28, "learning_rate": 4.53614875698621e-06, "logits/chosen": -2.014596462249756, "logits/rejected": -1.665479063987732, "logps/chosen": -449.51776123046875, "logps/rejected": -545.7579345703125, "loss": 0.0184, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.22679471969604492, "rewards/margins": 0.08117818832397461, "rewards/rejected": -0.30797290802001953, "step": 2120 }, { "epoch": 0.28, "learning_rate": 4.529501039175824e-06, "logits/chosen": -1.8626056909561157, "logits/rejected": -1.5862220525741577, "logps/chosen": -426.05096435546875, "logps/rejected": -422.3216857910156, "loss": 0.0171, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.186855748295784, "rewards/margins": 0.055360354483127594, "rewards/rejected": -0.2422161102294922, "step": 2130 }, { "epoch": 0.28, "learning_rate": 4.522810967776287e-06, "logits/chosen": -1.802381157875061, "logits/rejected": -1.7314026355743408, "logps/chosen": -434.59930419921875, "logps/rejected": -450.11834716796875, "loss": 0.0378, "rewards/accuracies": 0.625, "rewards/chosen": -0.21993453800678253, "rewards/margins": 0.04317254200577736, "rewards/rejected": -0.2631070613861084, "step": 2140 }, { "epoch": 0.28, "learning_rate": 4.516078682402473e-06, "logits/chosen": -1.7470426559448242, "logits/rejected": -1.7412372827529907, "logps/chosen": -477.63922119140625, "logps/rejected": -509.67266845703125, "loss": 0.0222, "rewards/accuracies": 0.625, "rewards/chosen": -0.20522888004779816, "rewards/margins": 0.05774085968732834, "rewards/rejected": -0.2629697620868683, "step": 2150 }, { "epoch": 0.28, "learning_rate": 4.509304323550221e-06, "logits/chosen": -1.9978656768798828, "logits/rejected": -1.8754138946533203, "logps/chosen": -414.8675842285156, "logps/rejected": -469.20684814453125, "loss": 0.0182, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2199893444776535, "rewards/margins": 0.059581078588962555, "rewards/rejected": -0.27957040071487427, "step": 2160 }, { "epoch": 0.28, "learning_rate": 4.502488032593398e-06, "logits/chosen": -1.8610658645629883, "logits/rejected": -1.6103994846343994, "logps/chosen": -438.61956787109375, "logps/rejected": -502.69952392578125, "loss": 0.0189, "rewards/accuracies": 0.625, "rewards/chosen": -0.2141924798488617, "rewards/margins": 0.08607035875320435, "rewards/rejected": -0.30026283860206604, "step": 2170 }, { "epoch": 0.29, "learning_rate": 4.495629951780951e-06, "logits/chosen": -1.8735036849975586, "logits/rejected": -1.7977253198623657, "logps/chosen": -410.9207458496094, "logps/rejected": -541.6500854492188, "loss": 0.0274, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2333689033985138, "rewards/margins": 0.10300314426422119, "rewards/rejected": -0.336372047662735, "step": 2180 }, { "epoch": 0.29, "learning_rate": 4.488730224233941e-06, "logits/chosen": -1.5623286962509155, "logits/rejected": -1.3899695873260498, "logps/chosen": -454.39599609375, "logps/rejected": -538.2545166015625, "loss": 0.053, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2773645222187042, "rewards/margins": 0.06378518044948578, "rewards/rejected": -0.3411497175693512, "step": 2190 }, { "epoch": 0.29, "learning_rate": 4.481788993942547e-06, "logits/chosen": -1.7548974752426147, "logits/rejected": -1.6386677026748657, "logps/chosen": -432.77545166015625, "logps/rejected": -536.7062377929688, "loss": 0.0155, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2100006639957428, "rewards/margins": 0.11079144477844238, "rewards/rejected": -0.3207921087741852, "step": 2200 }, { "epoch": 0.29, "learning_rate": 4.474806405763076e-06, "logits/chosen": -1.6667753458023071, "logits/rejected": -1.5982141494750977, "logps/chosen": -451.031005859375, "logps/rejected": -577.8094482421875, "loss": 0.0241, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2053256779909134, "rewards/margins": 0.09365935623645782, "rewards/rejected": -0.2989850342273712, "step": 2210 }, { "epoch": 0.29, "learning_rate": 4.4677826054149235e-06, "logits/chosen": -1.7733885049819946, "logits/rejected": -1.6308481693267822, "logps/chosen": -475.9578552246094, "logps/rejected": -536.2483520507812, "loss": 0.0275, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.2246747761964798, "rewards/margins": 0.05896454304456711, "rewards/rejected": -0.2836392819881439, "step": 2220 }, { "epoch": 0.29, "learning_rate": 4.460717739477543e-06, "logits/chosen": -1.9974079132080078, "logits/rejected": -1.7053706645965576, "logps/chosen": -463.2173767089844, "logps/rejected": -466.25537109375, "loss": 0.02, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.2159685641527176, "rewards/margins": 0.037511665374040604, "rewards/rejected": -0.2534802556037903, "step": 2230 }, { "epoch": 0.29, "learning_rate": 4.4536119553873866e-06, "logits/chosen": -1.7966111898422241, "logits/rejected": -1.7337009906768799, "logps/chosen": -400.27728271484375, "logps/rejected": -539.5341186523438, "loss": 0.0334, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.20263293385505676, "rewards/margins": 0.09285394102334976, "rewards/rejected": -0.2954868674278259, "step": 2240 }, { "epoch": 0.29, "learning_rate": 4.446465401434824e-06, "logits/chosen": -2.0680744647979736, "logits/rejected": -2.1116702556610107, "logps/chosen": -447.0508728027344, "logps/rejected": -494.86334228515625, "loss": 0.0283, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.18393829464912415, "rewards/margins": 0.06616505235433578, "rewards/rejected": -0.2501033842563629, "step": 2250 }, { "epoch": 0.3, "learning_rate": 4.43927822676105e-06, "logits/chosen": -2.0033762454986572, "logits/rejected": -1.7921524047851562, "logps/chosen": -449.127197265625, "logps/rejected": -488.2967834472656, "loss": 0.0289, "rewards/accuracies": 0.625, "rewards/chosen": -0.21589410305023193, "rewards/margins": 0.07737429440021515, "rewards/rejected": -0.2932683825492859, "step": 2260 }, { "epoch": 0.3, "learning_rate": 4.432050581354972e-06, "logits/chosen": -1.8215572834014893, "logits/rejected": -1.5815279483795166, "logps/chosen": -389.57464599609375, "logps/rejected": -434.3033142089844, "loss": 0.0214, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.20798125863075256, "rewards/margins": 0.07210813462734222, "rewards/rejected": -0.2800893783569336, "step": 2270 }, { "epoch": 0.3, "learning_rate": 4.424782616050078e-06, "logits/chosen": -1.7252633571624756, "logits/rejected": -1.6086317300796509, "logps/chosen": -446.84552001953125, "logps/rejected": -523.4686889648438, "loss": 0.0237, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2335268259048462, "rewards/margins": 0.09612774103879929, "rewards/rejected": -0.3296545445919037, "step": 2280 }, { "epoch": 0.3, "learning_rate": 4.4174744825212954e-06, "logits/chosen": -1.9347927570343018, "logits/rejected": -1.7474849224090576, "logps/chosen": -506.05963134765625, "logps/rejected": -575.9285888671875, "loss": 0.022, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.24362215399742126, "rewards/margins": 0.0921451598405838, "rewards/rejected": -0.33576732873916626, "step": 2290 }, { "epoch": 0.3, "learning_rate": 4.410126333281815e-06, "logits/chosen": -1.7309291362762451, "logits/rejected": -1.5879218578338623, "logps/chosen": -512.5667114257812, "logps/rejected": -527.5018310546875, "loss": 0.033, "rewards/accuracies": 0.75, "rewards/chosen": -0.25268226861953735, "rewards/margins": 0.09238190948963165, "rewards/rejected": -0.3450641334056854, "step": 2300 }, { "epoch": 0.3, "learning_rate": 4.402738321679918e-06, "logits/chosen": -1.670026183128357, "logits/rejected": -1.761228322982788, "logps/chosen": -348.04974365234375, "logps/rejected": -443.8878479003906, "loss": 0.0283, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.17478026449680328, "rewards/margins": 0.0867806226015091, "rewards/rejected": -0.2615608870983124, "step": 2310 }, { "epoch": 0.3, "learning_rate": 4.395310601895772e-06, "logits/chosen": -1.9855763912200928, "logits/rejected": -1.6330562829971313, "logps/chosen": -406.68670654296875, "logps/rejected": -417.7669982910156, "loss": 0.0173, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.14078545570373535, "rewards/margins": 0.07669827342033386, "rewards/rejected": -0.21748371422290802, "step": 2320 }, { "epoch": 0.3, "learning_rate": 4.38784332893821e-06, "logits/chosen": -1.9733412265777588, "logits/rejected": -1.8503671884536743, "logps/chosen": -491.7334899902344, "logps/rejected": -502.209716796875, "loss": 0.0317, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.22381806373596191, "rewards/margins": 0.06034940481185913, "rewards/rejected": -0.28416746854782104, "step": 2330 }, { "epoch": 0.31, "learning_rate": 4.380336658641503e-06, "logits/chosen": -1.9059604406356812, "logits/rejected": -1.6476118564605713, "logps/chosen": -435.6214294433594, "logps/rejected": -533.3345947265625, "loss": 0.0307, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.2410147488117218, "rewards/margins": 0.11430881172418594, "rewards/rejected": -0.35532355308532715, "step": 2340 }, { "epoch": 0.31, "learning_rate": 4.372790747662101e-06, "logits/chosen": -1.8285706043243408, "logits/rejected": -1.7125866413116455, "logps/chosen": -471.718017578125, "logps/rejected": -567.15869140625, "loss": 0.019, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.24902674555778503, "rewards/margins": 0.07921990007162094, "rewards/rejected": -0.3282466530799866, "step": 2350 }, { "epoch": 0.31, "learning_rate": 4.365205753475367e-06, "logits/chosen": -2.015669822692871, "logits/rejected": -1.7391248941421509, "logps/chosen": -395.1049499511719, "logps/rejected": -434.15570068359375, "loss": 0.0269, "rewards/accuracies": 0.625, "rewards/chosen": -0.15337860584259033, "rewards/margins": 0.0979638621211052, "rewards/rejected": -0.2513424754142761, "step": 2360 }, { "epoch": 0.31, "learning_rate": 4.35758183437229e-06, "logits/chosen": -2.215122938156128, "logits/rejected": -1.8627878427505493, "logps/chosen": -394.71429443359375, "logps/rejected": -400.0760192871094, "loss": 0.0246, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.15606388449668884, "rewards/margins": 0.048857349902391434, "rewards/rejected": -0.20492124557495117, "step": 2370 }, { "epoch": 0.31, "learning_rate": 4.3499191494561835e-06, "logits/chosen": -1.851022720336914, "logits/rejected": -1.783961296081543, "logps/chosen": -426.6841735839844, "logps/rejected": -496.6563415527344, "loss": 0.0255, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1743614822626114, "rewards/margins": 0.10733374208211899, "rewards/rejected": -0.2816952168941498, "step": 2380 }, { "epoch": 0.31, "learning_rate": 4.3422178586393615e-06, "logits/chosen": -1.890279769897461, "logits/rejected": -1.7761573791503906, "logps/chosen": -464.600830078125, "logps/rejected": -534.0846557617188, "loss": 0.0165, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.22872360050678253, "rewards/margins": 0.10886083543300629, "rewards/rejected": -0.3375844359397888, "step": 2390 }, { "epoch": 0.31, "learning_rate": 4.334478122639804e-06, "logits/chosen": -1.9677358865737915, "logits/rejected": -1.6048587560653687, "logps/chosen": -549.1671142578125, "logps/rejected": -462.8221130371094, "loss": 0.0419, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.28963416814804077, "rewards/margins": 0.019320717081427574, "rewards/rejected": -0.3089548945426941, "step": 2400 }, { "epoch": 0.32, "learning_rate": 4.3267001029778015e-06, "logits/chosen": -1.8243286609649658, "logits/rejected": -1.7490968704223633, "logps/chosen": -473.9647521972656, "logps/rejected": -526.09033203125, "loss": 0.037, "rewards/accuracies": 0.75, "rewards/chosen": -0.2106451541185379, "rewards/margins": 0.10477238893508911, "rewards/rejected": -0.3154175579547882, "step": 2410 }, { "epoch": 0.32, "learning_rate": 4.318883961972585e-06, "logits/chosen": -2.0151243209838867, "logits/rejected": -1.893967866897583, "logps/chosen": -356.76239013671875, "logps/rejected": -394.2031555175781, "loss": 0.0277, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.18662713468074799, "rewards/margins": 0.046176038682460785, "rewards/rejected": -0.23280318081378937, "step": 2420 }, { "epoch": 0.32, "learning_rate": 4.311029862738942e-06, "logits/chosen": -1.8836452960968018, "logits/rejected": -1.717961072921753, "logps/chosen": -386.47406005859375, "logps/rejected": -504.1248474121094, "loss": 0.0175, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.21776695549488068, "rewards/margins": 0.09177577495574951, "rewards/rejected": -0.309542715549469, "step": 2430 }, { "epoch": 0.32, "learning_rate": 4.303137969183804e-06, "logits/chosen": -1.941748023033142, "logits/rejected": -1.7688930034637451, "logps/chosen": -442.46868896484375, "logps/rejected": -561.3958740234375, "loss": 0.0283, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.18605726957321167, "rewards/margins": 0.11203690618276596, "rewards/rejected": -0.29809415340423584, "step": 2440 }, { "epoch": 0.32, "learning_rate": 4.295208446002832e-06, "logits/chosen": -2.0674407482147217, "logits/rejected": -1.8563096523284912, "logps/chosen": -388.8973693847656, "logps/rejected": -459.39495849609375, "loss": 0.033, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.20003589987754822, "rewards/margins": 0.09308059513568878, "rewards/rejected": -0.2931164801120758, "step": 2450 }, { "epoch": 0.32, "learning_rate": 4.287241458676981e-06, "logits/chosen": -1.9666210412979126, "logits/rejected": -1.6701295375823975, "logps/chosen": -398.9001770019531, "logps/rejected": -474.994384765625, "loss": 0.0214, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.1540244221687317, "rewards/margins": 0.11552529036998749, "rewards/rejected": -0.269549697637558, "step": 2460 }, { "epoch": 0.32, "learning_rate": 4.279237173469043e-06, "logits/chosen": -1.8836891651153564, "logits/rejected": -1.6282813549041748, "logps/chosen": -482.57147216796875, "logps/rejected": -560.1978149414062, "loss": 0.0158, "rewards/accuracies": 0.75, "rewards/chosen": -0.2128007709980011, "rewards/margins": 0.1130170226097107, "rewards/rejected": -0.3258178234100342, "step": 2470 }, { "epoch": 0.32, "learning_rate": 4.271195757420177e-06, "logits/chosen": -1.9587551355361938, "logits/rejected": -1.7986056804656982, "logps/chosen": -456.47161865234375, "logps/rejected": -548.9180297851562, "loss": 0.0221, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.23430156707763672, "rewards/margins": 0.1185712069272995, "rewards/rejected": -0.3528727889060974, "step": 2480 }, { "epoch": 0.33, "learning_rate": 4.263117378346425e-06, "logits/chosen": -1.819907784461975, "logits/rejected": -1.6789543628692627, "logps/chosen": -454.332275390625, "logps/rejected": -502.52130126953125, "loss": 0.015, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2230617254972458, "rewards/margins": 0.07935784757137299, "rewards/rejected": -0.3024195730686188, "step": 2490 }, { "epoch": 0.33, "learning_rate": 4.255002204835208e-06, "logits/chosen": -1.9170949459075928, "logits/rejected": -1.673644781112671, "logps/chosen": -503.7950134277344, "logps/rejected": -573.0311279296875, "loss": 0.0195, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2700875997543335, "rewards/margins": 0.10103525966405869, "rewards/rejected": -0.3711228370666504, "step": 2500 }, { "epoch": 0.33, "learning_rate": 4.246850406241812e-06, "logits/chosen": -1.8298994302749634, "logits/rejected": -1.565895676612854, "logps/chosen": -584.307861328125, "logps/rejected": -674.6881103515625, "loss": 0.0284, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.27168023586273193, "rewards/margins": 0.09993793815374374, "rewards/rejected": -0.3716181814670563, "step": 2510 }, { "epoch": 0.33, "learning_rate": 4.2386621526858465e-06, "logits/chosen": -1.7684259414672852, "logits/rejected": -1.7138373851776123, "logps/chosen": -439.82733154296875, "logps/rejected": -488.1190490722656, "loss": 0.0277, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.23590664565563202, "rewards/margins": 0.05192587897181511, "rewards/rejected": -0.2878325581550598, "step": 2520 }, { "epoch": 0.33, "learning_rate": 4.2304376150477015e-06, "logits/chosen": -1.867626428604126, "logits/rejected": -1.6936473846435547, "logps/chosen": -400.4134826660156, "logps/rejected": -444.52447509765625, "loss": 0.033, "rewards/accuracies": 0.5, "rewards/chosen": -0.19006365537643433, "rewards/margins": 0.05945818871259689, "rewards/rejected": -0.24952185153961182, "step": 2530 }, { "epoch": 0.33, "learning_rate": 4.222176964964977e-06, "logits/chosen": -1.8515548706054688, "logits/rejected": -1.620330810546875, "logps/chosen": -416.956298828125, "logps/rejected": -489.68634033203125, "loss": 0.0272, "rewards/accuracies": 0.625, "rewards/chosen": -0.1836603283882141, "rewards/margins": 0.08848662674427032, "rewards/rejected": -0.27214694023132324, "step": 2540 }, { "epoch": 0.33, "learning_rate": 4.213880374828903e-06, "logits/chosen": -1.8032068014144897, "logits/rejected": -1.7320334911346436, "logps/chosen": -474.12335205078125, "logps/rejected": -495.7640075683594, "loss": 0.0235, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.21940049529075623, "rewards/margins": 0.04905577376484871, "rewards/rejected": -0.26845625042915344, "step": 2550 }, { "epoch": 0.33, "learning_rate": 4.2055480177807406e-06, "logits/chosen": -1.665143609046936, "logits/rejected": -1.5036962032318115, "logps/chosen": -434.1664123535156, "logps/rejected": -500.8665466308594, "loss": 0.0311, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.22254256904125214, "rewards/margins": 0.10014110803604126, "rewards/rejected": -0.3226836621761322, "step": 2560 }, { "epoch": 0.34, "learning_rate": 4.1971800677081696e-06, "logits/chosen": -1.6747596263885498, "logits/rejected": -1.4174727201461792, "logps/chosen": -421.354248046875, "logps/rejected": -491.0271911621094, "loss": 0.0342, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.23044414818286896, "rewards/margins": 0.08503113687038422, "rewards/rejected": -0.3154752850532532, "step": 2570 }, { "epoch": 0.34, "learning_rate": 4.188776699241661e-06, "logits/chosen": -1.4612760543823242, "logits/rejected": -1.4553321599960327, "logps/chosen": -458.9996643066406, "logps/rejected": -573.1510009765625, "loss": 0.0323, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2871035933494568, "rewards/margins": 0.07828949391841888, "rewards/rejected": -0.36539310216903687, "step": 2580 }, { "epoch": 0.34, "learning_rate": 4.180338087750827e-06, "logits/chosen": -1.6774513721466064, "logits/rejected": -1.3070108890533447, "logps/chosen": -561.6486206054688, "logps/rejected": -629.3992919921875, "loss": 0.0264, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2835440933704376, "rewards/margins": 0.10050519555807114, "rewards/rejected": -0.38404932618141174, "step": 2590 }, { "epoch": 0.34, "learning_rate": 4.1718644093407704e-06, "logits/chosen": -1.70647394657135, "logits/rejected": -1.6027628183364868, "logps/chosen": -442.23583984375, "logps/rejected": -534.0610961914062, "loss": 0.0197, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.23742584884166718, "rewards/margins": 0.08286769688129425, "rewards/rejected": -0.3202935457229614, "step": 2600 }, { "epoch": 0.34, "learning_rate": 4.163355840848401e-06, "logits/chosen": -1.7119226455688477, "logits/rejected": -1.6578342914581299, "logps/chosen": -454.8663635253906, "logps/rejected": -536.91943359375, "loss": 0.0232, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.22818918526172638, "rewards/margins": 0.0926157757639885, "rewards/rejected": -0.3208049237728119, "step": 2610 }, { "epoch": 0.34, "learning_rate": 4.154812559838748e-06, "logits/chosen": -1.783673644065857, "logits/rejected": -1.587224006652832, "logps/chosen": -426.52020263671875, "logps/rejected": -453.7071228027344, "loss": 0.0178, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.17788371443748474, "rewards/margins": 0.07864318788051605, "rewards/rejected": -0.2565268874168396, "step": 2620 }, { "epoch": 0.34, "learning_rate": 4.146234744601259e-06, "logits/chosen": -1.6520817279815674, "logits/rejected": -1.3121957778930664, "logps/chosen": -498.17529296875, "logps/rejected": -541.5598754882812, "loss": 0.0449, "rewards/accuracies": 0.625, "rewards/chosen": -0.2449866533279419, "rewards/margins": 0.0944221243262291, "rewards/rejected": -0.3394087553024292, "step": 2630 }, { "epoch": 0.35, "learning_rate": 4.137622574146071e-06, "logits/chosen": -1.8784363269805908, "logits/rejected": -1.7260299921035767, "logps/chosen": -400.75726318359375, "logps/rejected": -431.58782958984375, "loss": 0.0383, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.1871858537197113, "rewards/margins": 0.06336455047130585, "rewards/rejected": -0.25055041909217834, "step": 2640 }, { "epoch": 0.35, "learning_rate": 4.12897622820028e-06, "logits/chosen": -1.7684128284454346, "logits/rejected": -1.816655158996582, "logps/chosen": -439.8097229003906, "logps/rejected": -501.459228515625, "loss": 0.0234, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.15390966832637787, "rewards/margins": 0.06625259667634964, "rewards/rejected": -0.22016224265098572, "step": 2650 }, { "epoch": 0.35, "learning_rate": 4.120295887204191e-06, "logits/chosen": -1.5292326211929321, "logits/rejected": -1.4526147842407227, "logps/chosen": -459.69622802734375, "logps/rejected": -443.5069885253906, "loss": 0.0241, "rewards/accuracies": 0.5, "rewards/chosen": -0.20146799087524414, "rewards/margins": 0.02798052504658699, "rewards/rejected": -0.22944851219654083, "step": 2660 }, { "epoch": 0.35, "learning_rate": 4.111581732307548e-06, "logits/chosen": -1.6952488422393799, "logits/rejected": -1.4364745616912842, "logps/chosen": -520.1705932617188, "logps/rejected": -526.3663330078125, "loss": 0.0208, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.24517397582530975, "rewards/margins": 0.07558010518550873, "rewards/rejected": -0.3207540512084961, "step": 2670 }, { "epoch": 0.35, "learning_rate": 4.1028339453657595e-06, "logits/chosen": -1.5877403020858765, "logits/rejected": -1.4068609476089478, "logps/chosen": -478.41546630859375, "logps/rejected": -542.8192138671875, "loss": 0.0227, "rewards/accuracies": 0.625, "rewards/chosen": -0.24401327967643738, "rewards/margins": 0.09986021369695663, "rewards/rejected": -0.3438734710216522, "step": 2680 }, { "epoch": 0.35, "learning_rate": 4.094052708936096e-06, "logits/chosen": -1.764606237411499, "logits/rejected": -1.5982327461242676, "logps/chosen": -442.99664306640625, "logps/rejected": -494.39892578125, "loss": 0.0246, "rewards/accuracies": 0.625, "rewards/chosen": -0.248723104596138, "rewards/margins": 0.0774948000907898, "rewards/rejected": -0.326217919588089, "step": 2690 }, { "epoch": 0.35, "learning_rate": 4.0852382062738874e-06, "logits/chosen": -1.3868590593338013, "logits/rejected": -1.315887689590454, "logps/chosen": -410.35186767578125, "logps/rejected": -470.20556640625, "loss": 0.0244, "rewards/accuracies": 0.5, "rewards/chosen": -0.24316677451133728, "rewards/margins": 0.06980231404304504, "rewards/rejected": -0.3129690885543823, "step": 2700 }, { "epoch": 0.35, "learning_rate": 4.076390621328693e-06, "logits/chosen": -1.4941400289535522, "logits/rejected": -1.2799028158187866, "logps/chosen": -583.5787353515625, "logps/rejected": -585.2474365234375, "loss": 0.0311, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.36681923270225525, "rewards/margins": 0.04977092519402504, "rewards/rejected": -0.41659015417099, "step": 2710 }, { "epoch": 0.36, "learning_rate": 4.067510138740467e-06, "logits/chosen": -1.2457807064056396, "logits/rejected": -1.1327399015426636, "logps/chosen": -576.9419555664062, "logps/rejected": -656.8292236328125, "loss": 0.019, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.36136874556541443, "rewards/margins": 0.08126135170459747, "rewards/rejected": -0.4426301419734955, "step": 2720 }, { "epoch": 0.36, "learning_rate": 4.058596943835703e-06, "logits/chosen": -1.3685424327850342, "logits/rejected": -1.1996796131134033, "logps/chosen": -542.9622192382812, "logps/rejected": -635.43896484375, "loss": 0.035, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.33202752470970154, "rewards/margins": 0.08833317458629608, "rewards/rejected": -0.42036065459251404, "step": 2730 }, { "epoch": 0.36, "learning_rate": 4.049651222623568e-06, "logits/chosen": -1.3233853578567505, "logits/rejected": -1.1292164325714111, "logps/chosen": -591.0294799804688, "logps/rejected": -589.8373413085938, "loss": 0.0291, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3421989381313324, "rewards/margins": 0.06017078831791878, "rewards/rejected": -0.4023696780204773, "step": 2740 }, { "epoch": 0.36, "learning_rate": 4.040673161792014e-06, "logits/chosen": -1.3762229681015015, "logits/rejected": -1.3930509090423584, "logps/chosen": -540.7244873046875, "logps/rejected": -700.24658203125, "loss": 0.0249, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.34776386618614197, "rewards/margins": 0.1587856411933899, "rewards/rejected": -0.5065494775772095, "step": 2750 }, { "epoch": 0.36, "learning_rate": 4.031662948703896e-06, "logits/chosen": -1.3948943614959717, "logits/rejected": -1.2206356525421143, "logps/chosen": -580.3343505859375, "logps/rejected": -633.843017578125, "loss": 0.0245, "rewards/accuracies": 0.625, "rewards/chosen": -0.3342733681201935, "rewards/margins": 0.09769748896360397, "rewards/rejected": -0.43197083473205566, "step": 2760 }, { "epoch": 0.36, "learning_rate": 4.022620771393047e-06, "logits/chosen": -1.2887545824050903, "logits/rejected": -1.1180346012115479, "logps/chosen": -455.7023010253906, "logps/rejected": -522.9749755859375, "loss": 0.0377, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.274341344833374, "rewards/margins": 0.08734793961048126, "rewards/rejected": -0.3616892695426941, "step": 2770 }, { "epoch": 0.36, "learning_rate": 4.013546818560362e-06, "logits/chosen": -1.277212142944336, "logits/rejected": -1.143895149230957, "logps/chosen": -429.0113220214844, "logps/rejected": -407.8331298828125, "loss": 0.0219, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.21917875111103058, "rewards/margins": 0.05816282704472542, "rewards/rejected": -0.2773415446281433, "step": 2780 }, { "epoch": 0.37, "learning_rate": 4.00444127956986e-06, "logits/chosen": -1.4734269380569458, "logits/rejected": -1.2990522384643555, "logps/chosen": -466.5022888183594, "logps/rejected": -478.3050231933594, "loss": 0.0315, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.2548750042915344, "rewards/margins": 0.03684788569808006, "rewards/rejected": -0.2917229235172272, "step": 2790 }, { "epoch": 0.37, "learning_rate": 3.9953043444447255e-06, "logits/chosen": -1.2454700469970703, "logits/rejected": -1.2405586242675781, "logps/chosen": -561.666015625, "logps/rejected": -618.0407104492188, "loss": 0.0287, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.2790631949901581, "rewards/margins": 0.03939159959554672, "rewards/rejected": -0.3184548318386078, "step": 2800 }, { "epoch": 0.37, "learning_rate": 3.986136203863355e-06, "logits/chosen": -1.3038129806518555, "logits/rejected": -1.367506742477417, "logps/chosen": -515.1031494140625, "logps/rejected": -544.586181640625, "loss": 0.0224, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.276677668094635, "rewards/margins": 0.03499032184481621, "rewards/rejected": -0.3116679787635803, "step": 2810 }, { "epoch": 0.37, "learning_rate": 3.976937049155365e-06, "logits/chosen": -1.4343550205230713, "logits/rejected": -1.3405755758285522, "logps/chosen": -471.4578552246094, "logps/rejected": -566.8021850585938, "loss": 0.0267, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.2356349527835846, "rewards/margins": 0.06042511388659477, "rewards/rejected": -0.2960600256919861, "step": 2820 }, { "epoch": 0.37, "learning_rate": 3.967707072297608e-06, "logits/chosen": -1.3231853246688843, "logits/rejected": -1.2007641792297363, "logps/chosen": -500.8212890625, "logps/rejected": -607.4046630859375, "loss": 0.0192, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.29319027066230774, "rewards/margins": 0.11779715120792389, "rewards/rejected": -0.41098737716674805, "step": 2830 }, { "epoch": 0.37, "learning_rate": 3.958446465910159e-06, "logits/chosen": -1.3309224843978882, "logits/rejected": -1.4673199653625488, "logps/chosen": -482.66705322265625, "logps/rejected": -582.6072998046875, "loss": 0.0265, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.2789314389228821, "rewards/margins": 0.07025317847728729, "rewards/rejected": -0.3491845726966858, "step": 2840 }, { "epoch": 0.37, "learning_rate": 3.9491554232523066e-06, "logits/chosen": -1.1617060899734497, "logits/rejected": -1.1193522214889526, "logps/chosen": -520.6838989257812, "logps/rejected": -621.5158081054688, "loss": 0.0226, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3350999057292938, "rewards/margins": 0.11317549645900726, "rewards/rejected": -0.4482753872871399, "step": 2850 }, { "epoch": 0.37, "learning_rate": 3.939834138218505e-06, "logits/chosen": -1.310098648071289, "logits/rejected": -1.2673050165176392, "logps/chosen": -478.2703552246094, "logps/rejected": -547.5897827148438, "loss": 0.022, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.27902334928512573, "rewards/margins": 0.06650176644325256, "rewards/rejected": -0.3455251157283783, "step": 2860 }, { "epoch": 0.38, "learning_rate": 3.930482805334339e-06, "logits/chosen": -1.5085766315460205, "logits/rejected": -1.4009966850280762, "logps/chosen": -403.2984313964844, "logps/rejected": -477.52056884765625, "loss": 0.0295, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.24465203285217285, "rewards/margins": 0.07953041046857834, "rewards/rejected": -0.3241824507713318, "step": 2870 }, { "epoch": 0.38, "learning_rate": 3.921101619752464e-06, "logits/chosen": -1.4011389017105103, "logits/rejected": -1.5107542276382446, "logps/chosen": -450.69342041015625, "logps/rejected": -470.6482849121094, "loss": 0.0408, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.23339705169200897, "rewards/margins": 0.036589961498975754, "rewards/rejected": -0.26998698711395264, "step": 2880 }, { "epoch": 0.38, "learning_rate": 3.911690777248525e-06, "logits/chosen": -1.512414574623108, "logits/rejected": -1.5040571689605713, "logps/chosen": -437.159423828125, "logps/rejected": -494.37811279296875, "loss": 0.0274, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.2510322332382202, "rewards/margins": 0.05385801941156387, "rewards/rejected": -0.3048902451992035, "step": 2890 }, { "epoch": 0.38, "learning_rate": 3.902250474217079e-06, "logits/chosen": -1.4019505977630615, "logits/rejected": -1.314363956451416, "logps/chosen": -383.010009765625, "logps/rejected": -559.7865600585938, "loss": 0.0438, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.250358521938324, "rewards/margins": 0.13500240445137024, "rewards/rejected": -0.3853609561920166, "step": 2900 }, { "epoch": 0.38, "learning_rate": 3.892780907667495e-06, "logits/chosen": -1.744741678237915, "logits/rejected": -1.4366378784179688, "logps/chosen": -484.03057861328125, "logps/rejected": -522.0039672851562, "loss": 0.029, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2130521535873413, "rewards/margins": 0.060403358191251755, "rewards/rejected": -0.27345550060272217, "step": 2910 }, { "epoch": 0.38, "learning_rate": 3.883282275219837e-06, "logits/chosen": -1.42691969871521, "logits/rejected": -1.3678151369094849, "logps/chosen": -447.2662658691406, "logps/rejected": -526.8746337890625, "loss": 0.0254, "rewards/accuracies": 0.625, "rewards/chosen": -0.25842684507369995, "rewards/margins": 0.07644257694482803, "rewards/rejected": -0.3348694443702698, "step": 2920 }, { "epoch": 0.38, "learning_rate": 3.873754775100751e-06, "logits/chosen": -1.4811619520187378, "logits/rejected": -1.4468374252319336, "logps/chosen": -435.4991149902344, "logps/rejected": -573.3851318359375, "loss": 0.018, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.25239571928977966, "rewards/margins": 0.09442947804927826, "rewards/rejected": -0.34682518243789673, "step": 2930 }, { "epoch": 0.38, "learning_rate": 3.8641986061393145e-06, "logits/chosen": -1.8592326641082764, "logits/rejected": -1.5195543766021729, "logps/chosen": -474.525146484375, "logps/rejected": -491.6029357910156, "loss": 0.0233, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2184789478778839, "rewards/margins": 0.07690483331680298, "rewards/rejected": -0.2953837513923645, "step": 2940 }, { "epoch": 0.39, "learning_rate": 3.854613967762898e-06, "logits/chosen": -1.5112121105194092, "logits/rejected": -1.4672622680664062, "logps/chosen": -457.9198303222656, "logps/rejected": -558.0857543945312, "loss": 0.019, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.24388387799263, "rewards/margins": 0.08239175379276276, "rewards/rejected": -0.32627564668655396, "step": 2950 }, { "epoch": 0.39, "learning_rate": 3.845001059992999e-06, "logits/chosen": -1.5748988389968872, "logits/rejected": -1.3156006336212158, "logps/chosen": -547.559814453125, "logps/rejected": -659.1094970703125, "loss": 0.0149, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.27472323179244995, "rewards/margins": 0.13340029120445251, "rewards/rejected": -0.4081234931945801, "step": 2960 }, { "epoch": 0.39, "learning_rate": 3.835360083441067e-06, "logits/chosen": -1.7014272212982178, "logits/rejected": -1.6057565212249756, "logps/chosen": -526.9740600585938, "logps/rejected": -604.8302001953125, "loss": 0.0146, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.300466924905777, "rewards/margins": 0.0719136968255043, "rewards/rejected": -0.3723805546760559, "step": 2970 }, { "epoch": 0.39, "learning_rate": 3.825691239304318e-06, "logits/chosen": -1.6480176448822021, "logits/rejected": -1.4222185611724854, "logps/chosen": -486.6483459472656, "logps/rejected": -619.3688354492188, "loss": 0.0348, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2511606812477112, "rewards/margins": 0.13785335421562195, "rewards/rejected": -0.3890140652656555, "step": 2980 }, { "epoch": 0.39, "learning_rate": 3.8159947293615385e-06, "logits/chosen": -1.6466913223266602, "logits/rejected": -1.3495409488677979, "logps/chosen": -494.6392517089844, "logps/rejected": -514.5824584960938, "loss": 0.0163, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.23486237227916718, "rewards/margins": 0.05418325588107109, "rewards/rejected": -0.28904566168785095, "step": 2990 }, { "epoch": 0.39, "learning_rate": 3.806270755968866e-06, "logits/chosen": -1.5945298671722412, "logits/rejected": -1.6403144598007202, "logps/chosen": -357.9747619628906, "logps/rejected": -462.56817626953125, "loss": 0.0181, "rewards/accuracies": 0.625, "rewards/chosen": -0.20649829506874084, "rewards/margins": 0.07846391201019287, "rewards/rejected": -0.2849622070789337, "step": 3000 }, { "epoch": 0.39, "learning_rate": 3.7965195220555784e-06, "logits/chosen": -1.5004541873931885, "logits/rejected": -1.4311333894729614, "logps/chosen": -392.80352783203125, "logps/rejected": -504.016357421875, "loss": 0.0303, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.17788691818714142, "rewards/margins": 0.09681157022714615, "rewards/rejected": -0.27469852566719055, "step": 3010 }, { "epoch": 0.4, "learning_rate": 3.786741231119847e-06, "logits/chosen": -1.5867496728897095, "logits/rejected": -1.5139662027359009, "logps/chosen": -441.8441467285156, "logps/rejected": -573.4630737304688, "loss": 0.025, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.24398484826087952, "rewards/margins": 0.10547931492328644, "rewards/rejected": -0.34946417808532715, "step": 3020 }, { "epoch": 0.4, "learning_rate": 3.7769360872244992e-06, "logits/chosen": -1.4943944215774536, "logits/rejected": -1.5490363836288452, "logps/chosen": -508.37335205078125, "logps/rejected": -570.640625, "loss": 0.0205, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2434680461883545, "rewards/margins": 0.07497527450323105, "rewards/rejected": -0.3184433579444885, "step": 3030 }, { "epoch": 0.4, "learning_rate": 3.767104294992754e-06, "logits/chosen": -1.6485742330551147, "logits/rejected": -1.439035415649414, "logps/chosen": -509.4322204589844, "logps/rejected": -577.4638671875, "loss": 0.0238, "rewards/accuracies": 0.625, "rewards/chosen": -0.3061564564704895, "rewards/margins": 0.07664833962917328, "rewards/rejected": -0.382804811000824, "step": 3040 }, { "epoch": 0.4, "learning_rate": 3.7572460596039524e-06, "logits/chosen": -1.4903205633163452, "logits/rejected": -1.4283571243286133, "logps/chosen": -551.2159423828125, "logps/rejected": -646.5234375, "loss": 0.0444, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.34560102224349976, "rewards/margins": 0.08969350159168243, "rewards/rejected": -0.4352944791316986, "step": 3050 }, { "epoch": 0.4, "learning_rate": 3.74736158678928e-06, "logits/chosen": -1.4189043045043945, "logits/rejected": -1.264514684677124, "logps/chosen": -380.6353454589844, "logps/rejected": -457.75537109375, "loss": 0.036, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.22212731838226318, "rewards/margins": 0.08710966259241104, "rewards/rejected": -0.30923694372177124, "step": 3060 }, { "epoch": 0.4, "learning_rate": 3.7374510828274673e-06, "logits/chosen": -1.455428957939148, "logits/rejected": -1.5315929651260376, "logps/chosen": -361.54803466796875, "logps/rejected": -505.62060546875, "loss": 0.0343, "rewards/accuracies": 0.625, "rewards/chosen": -0.20948748290538788, "rewards/margins": 0.09494461119174957, "rewards/rejected": -0.30443209409713745, "step": 3070 }, { "epoch": 0.4, "learning_rate": 3.72751475454049e-06, "logits/chosen": -1.5700876712799072, "logits/rejected": -1.3799183368682861, "logps/chosen": -463.9649963378906, "logps/rejected": -513.8907470703125, "loss": 0.0281, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.21600162982940674, "rewards/margins": 0.08006380498409271, "rewards/rejected": -0.29606547951698303, "step": 3080 }, { "epoch": 0.4, "learning_rate": 3.7175528092892503e-06, "logits/chosen": -1.6064115762710571, "logits/rejected": -1.3781477212905884, "logps/chosen": -387.56402587890625, "logps/rejected": -408.89117431640625, "loss": 0.0349, "rewards/accuracies": 0.5, "rewards/chosen": -0.19931992888450623, "rewards/margins": 0.06138339638710022, "rewards/rejected": -0.26070332527160645, "step": 3090 }, { "epoch": 0.41, "learning_rate": 3.7075654549692498e-06, "logits/chosen": -1.4929149150848389, "logits/rejected": -1.313826560974121, "logps/chosen": -473.07904052734375, "logps/rejected": -517.1908569335938, "loss": 0.0303, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.23562940955162048, "rewards/margins": 0.05551863834261894, "rewards/rejected": -0.29114800691604614, "step": 3100 }, { "epoch": 0.41, "learning_rate": 3.697552900006249e-06, "logits/chosen": -1.7409900426864624, "logits/rejected": -1.6835981607437134, "logps/chosen": -367.76690673828125, "logps/rejected": -462.6327209472656, "loss": 0.0171, "rewards/accuracies": 0.5, "rewards/chosen": -0.18092992901802063, "rewards/margins": 0.06359394639730453, "rewards/rejected": -0.24452385306358337, "step": 3110 }, { "epoch": 0.41, "learning_rate": 3.6875153533519244e-06, "logits/chosen": -1.8172905445098877, "logits/rejected": -1.4347440004348755, "logps/chosen": -450.7483825683594, "logps/rejected": -505.62530517578125, "loss": 0.0226, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.20507276058197021, "rewards/margins": 0.08976466953754425, "rewards/rejected": -0.29483744502067566, "step": 3120 }, { "epoch": 0.41, "learning_rate": 3.6774530244794992e-06, "logits/chosen": -1.6782619953155518, "logits/rejected": -1.558455467224121, "logps/chosen": -513.4165649414062, "logps/rejected": -545.1614379882812, "loss": 0.0265, "rewards/accuracies": 0.625, "rewards/chosen": -0.2603208124637604, "rewards/margins": 0.0918203815817833, "rewards/rejected": -0.35214120149612427, "step": 3130 }, { "epoch": 0.41, "learning_rate": 3.667366123379378e-06, "logits/chosen": -1.3462660312652588, "logits/rejected": -1.3467267751693726, "logps/chosen": -433.2908630371094, "logps/rejected": -553.0076293945312, "loss": 0.0233, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.22309264540672302, "rewards/margins": 0.10725726187229156, "rewards/rejected": -0.3303499221801758, "step": 3140 }, { "epoch": 0.41, "learning_rate": 3.6572548605547607e-06, "logits/chosen": -1.6897525787353516, "logits/rejected": -1.2931480407714844, "logps/chosen": -476.80987548828125, "logps/rejected": -513.6519165039062, "loss": 0.0156, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.23292949795722961, "rewards/margins": 0.08971043676137924, "rewards/rejected": -0.32263994216918945, "step": 3150 }, { "epoch": 0.41, "learning_rate": 3.6471194470172538e-06, "logits/chosen": -1.6105201244354248, "logits/rejected": -1.409613013267517, "logps/chosen": -582.4307861328125, "logps/rejected": -648.6765747070312, "loss": 0.0225, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.27965396642684937, "rewards/margins": 0.10067589581012726, "rewards/rejected": -0.38032984733581543, "step": 3160 }, { "epoch": 0.41, "learning_rate": 3.636960094282461e-06, "logits/chosen": -1.6837307214736938, "logits/rejected": -1.5798778533935547, "logps/chosen": -438.977783203125, "logps/rejected": -540.5154418945312, "loss": 0.0275, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.23820741474628448, "rewards/margins": 0.09993481636047363, "rewards/rejected": -0.3381422162055969, "step": 3170 }, { "epoch": 0.42, "learning_rate": 3.6267770143655743e-06, "logits/chosen": -1.7468640804290771, "logits/rejected": -1.6485341787338257, "logps/chosen": -426.53192138671875, "logps/rejected": -463.8035583496094, "loss": 0.0183, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.20808923244476318, "rewards/margins": 0.0463956817984581, "rewards/rejected": -0.2544848918914795, "step": 3180 }, { "epoch": 0.42, "learning_rate": 3.6165704197769484e-06, "logits/chosen": -1.7614538669586182, "logits/rejected": -1.7924140691757202, "logps/chosen": -312.37261962890625, "logps/rejected": -424.40203857421875, "loss": 0.0221, "rewards/accuracies": 0.625, "rewards/chosen": -0.13243715465068817, "rewards/margins": 0.11160604655742645, "rewards/rejected": -0.24404320120811462, "step": 3190 }, { "epoch": 0.42, "learning_rate": 3.606340523517663e-06, "logits/chosen": -2.0063328742980957, "logits/rejected": -1.8009140491485596, "logps/chosen": -376.8255920410156, "logps/rejected": -452.60699462890625, "loss": 0.0213, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.144984170794487, "rewards/margins": 0.08352089673280716, "rewards/rejected": -0.22850506007671356, "step": 3200 }, { "epoch": 0.42, "learning_rate": 3.5960875390750793e-06, "logits/chosen": -1.8453142642974854, "logits/rejected": -1.6005035638809204, "logps/chosen": -446.85675048828125, "logps/rejected": -556.7806396484375, "loss": 0.0474, "rewards/accuracies": 0.625, "rewards/chosen": -0.21962828934192657, "rewards/margins": 0.08978879451751709, "rewards/rejected": -0.30941709876060486, "step": 3210 }, { "epoch": 0.42, "learning_rate": 3.585811680418386e-06, "logits/chosen": -1.7096410989761353, "logits/rejected": -1.5049703121185303, "logps/chosen": -413.1253356933594, "logps/rejected": -426.44561767578125, "loss": 0.0215, "rewards/accuracies": 0.75, "rewards/chosen": -0.18407495319843292, "rewards/margins": 0.0769776925444603, "rewards/rejected": -0.2610526382923126, "step": 3220 }, { "epoch": 0.42, "learning_rate": 3.5755131619941347e-06, "logits/chosen": -1.8558601140975952, "logits/rejected": -1.671087622642517, "logps/chosen": -536.2061157226562, "logps/rejected": -596.7044067382812, "loss": 0.0345, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.22868017852306366, "rewards/margins": 0.09786845743656158, "rewards/rejected": -0.32654863595962524, "step": 3230 }, { "epoch": 0.42, "learning_rate": 3.565192198721759e-06, "logits/chosen": -1.7334047555923462, "logits/rejected": -1.2770370244979858, "logps/chosen": -447.2215270996094, "logps/rejected": -460.78924560546875, "loss": 0.0212, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.21503373980522156, "rewards/margins": 0.09746576100587845, "rewards/rejected": -0.3124995231628418, "step": 3240 }, { "epoch": 0.43, "learning_rate": 3.5548490059890965e-06, "logits/chosen": -1.7922292947769165, "logits/rejected": -1.681229829788208, "logps/chosen": -414.5247497558594, "logps/rejected": -453.2389221191406, "loss": 0.0244, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.19199016690254211, "rewards/margins": 0.061304427683353424, "rewards/rejected": -0.25329458713531494, "step": 3250 }, { "epoch": 0.43, "learning_rate": 3.5444837996478903e-06, "logits/chosen": -1.6504888534545898, "logits/rejected": -1.5916399955749512, "logps/chosen": -407.19537353515625, "logps/rejected": -481.0166931152344, "loss": 0.0381, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.19847889244556427, "rewards/margins": 0.05682044476270676, "rewards/rejected": -0.25529932975769043, "step": 3260 }, { "epoch": 0.43, "learning_rate": 3.534096796009282e-06, "logits/chosen": -1.7954851388931274, "logits/rejected": -1.8284984827041626, "logps/chosen": -327.8110046386719, "logps/rejected": -373.6348876953125, "loss": 0.0306, "rewards/accuracies": 0.5, "rewards/chosen": -0.16501004993915558, "rewards/margins": 0.03539814427495003, "rewards/rejected": -0.2004081904888153, "step": 3270 }, { "epoch": 0.43, "learning_rate": 3.5236882118393046e-06, "logits/chosen": -1.7820394039154053, "logits/rejected": -1.6226108074188232, "logps/chosen": -422.36505126953125, "logps/rejected": -514.1890869140625, "loss": 0.0231, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.19812330603599548, "rewards/margins": 0.10547544062137604, "rewards/rejected": -0.3035987317562103, "step": 3280 }, { "epoch": 0.43, "learning_rate": 3.5132582643543513e-06, "logits/chosen": -1.7644245624542236, "logits/rejected": -1.3841527700424194, "logps/chosen": -505.01629638671875, "logps/rejected": -548.0068969726562, "loss": 0.0285, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2590879797935486, "rewards/margins": 0.0976809710264206, "rewards/rejected": -0.35676896572113037, "step": 3290 }, { "epoch": 0.43, "learning_rate": 3.5028071712166456e-06, "logits/chosen": -1.6618616580963135, "logits/rejected": -1.4068950414657593, "logps/chosen": -468.48419189453125, "logps/rejected": -525.5357666015625, "loss": 0.0368, "rewards/accuracies": 0.75, "rewards/chosen": -0.22081026434898376, "rewards/margins": 0.09167324751615524, "rewards/rejected": -0.312483549118042, "step": 3300 }, { "epoch": 0.43, "learning_rate": 3.4923351505297008e-06, "logits/chosen": -1.805572509765625, "logits/rejected": -1.5034650564193726, "logps/chosen": -505.39398193359375, "logps/rejected": -472.99041748046875, "loss": 0.0461, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2428407371044159, "rewards/margins": 0.05200044438242912, "rewards/rejected": -0.2948412299156189, "step": 3310 }, { "epoch": 0.43, "learning_rate": 3.481842420833766e-06, "logits/chosen": -1.912627935409546, "logits/rejected": -1.681044340133667, "logps/chosen": -495.08544921875, "logps/rejected": -584.6039428710938, "loss": 0.0189, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.24733643233776093, "rewards/margins": 0.109983429312706, "rewards/rejected": -0.3573199212551117, "step": 3320 }, { "epoch": 0.44, "learning_rate": 3.4713292011012645e-06, "logits/chosen": -1.7308298349380493, "logits/rejected": -1.6717321872711182, "logps/chosen": -364.91351318359375, "logps/rejected": -413.93768310546875, "loss": 0.0197, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.19749896228313446, "rewards/margins": 0.07182800769805908, "rewards/rejected": -0.26932698488235474, "step": 3330 }, { "epoch": 0.44, "learning_rate": 3.4607957107322277e-06, "logits/chosen": -1.5342094898223877, "logits/rejected": -1.5572571754455566, "logps/chosen": -384.7530212402344, "logps/rejected": -516.9837646484375, "loss": 0.0169, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.2340409755706787, "rewards/margins": 0.10289822518825531, "rewards/rejected": -0.3369391858577728, "step": 3340 }, { "epoch": 0.44, "learning_rate": 3.4502421695497112e-06, "logits/chosen": -1.814679503440857, "logits/rejected": -1.7517423629760742, "logps/chosen": -478.49676513671875, "logps/rejected": -526.2501220703125, "loss": 0.0241, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.23059411346912384, "rewards/margins": 0.07972148805856705, "rewards/rejected": -0.3103155493736267, "step": 3350 }, { "epoch": 0.44, "learning_rate": 3.4396687977952137e-06, "logits/chosen": -1.850203275680542, "logits/rejected": -1.494214415550232, "logps/chosen": -451.4457092285156, "logps/rejected": -529.9976806640625, "loss": 0.0188, "rewards/accuracies": 0.75, "rewards/chosen": -0.2504863142967224, "rewards/margins": 0.09670194983482361, "rewards/rejected": -0.347188264131546, "step": 3360 }, { "epoch": 0.44, "learning_rate": 3.429075816124075e-06, "logits/chosen": -1.939391851425171, "logits/rejected": -1.4765293598175049, "logps/chosen": -536.4401245117188, "logps/rejected": -520.0018920898438, "loss": 0.0188, "rewards/accuracies": 0.625, "rewards/chosen": -0.22204503417015076, "rewards/margins": 0.08225461095571518, "rewards/rejected": -0.30429965257644653, "step": 3370 }, { "epoch": 0.44, "learning_rate": 3.418463445600874e-06, "logits/chosen": -1.9319158792495728, "logits/rejected": -1.568086862564087, "logps/chosen": -537.13232421875, "logps/rejected": -494.2122497558594, "loss": 0.0276, "rewards/accuracies": 0.5, "rewards/chosen": -0.2699492871761322, "rewards/margins": 0.038329653441905975, "rewards/rejected": -0.3082789480686188, "step": 3380 }, { "epoch": 0.44, "learning_rate": 3.4078319076948173e-06, "logits/chosen": -1.8316503763198853, "logits/rejected": -1.5290436744689941, "logps/chosen": -471.3053283691406, "logps/rejected": -508.49566650390625, "loss": 0.0191, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.24652762711048126, "rewards/margins": 0.06418335437774658, "rewards/rejected": -0.31071096658706665, "step": 3390 }, { "epoch": 0.44, "learning_rate": 3.3971814242751123e-06, "logits/chosen": -1.6582746505737305, "logits/rejected": -1.5354890823364258, "logps/chosen": -440.11517333984375, "logps/rejected": -526.5018310546875, "loss": 0.0231, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2067064791917801, "rewards/margins": 0.07915418595075607, "rewards/rejected": -0.28586068749427795, "step": 3400 }, { "epoch": 0.45, "learning_rate": 3.386512217606339e-06, "logits/chosen": -1.8133357763290405, "logits/rejected": -1.633396863937378, "logps/chosen": -483.3251953125, "logps/rejected": -544.7848510742188, "loss": 0.0229, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.2441612184047699, "rewards/margins": 0.06827167421579361, "rewards/rejected": -0.3124328553676605, "step": 3410 }, { "epoch": 0.45, "learning_rate": 3.375824510343816e-06, "logits/chosen": -1.6016031503677368, "logits/rejected": -1.3881438970565796, "logps/chosen": -462.1622009277344, "logps/rejected": -522.3944091796875, "loss": 0.034, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.25817808508872986, "rewards/margins": 0.09379847347736359, "rewards/rejected": -0.35197657346725464, "step": 3420 }, { "epoch": 0.45, "learning_rate": 3.3651185255289466e-06, "logits/chosen": -1.8316404819488525, "logits/rejected": -1.5879504680633545, "logps/chosen": -467.69427490234375, "logps/rejected": -491.70733642578125, "loss": 0.0288, "rewards/accuracies": 0.625, "rewards/chosen": -0.21627011895179749, "rewards/margins": 0.06159573048353195, "rewards/rejected": -0.27786585688591003, "step": 3430 }, { "epoch": 0.45, "learning_rate": 3.354394486584568e-06, "logits/chosen": -1.3282498121261597, "logits/rejected": -1.349848985671997, "logps/chosen": -456.355712890625, "logps/rejected": -520.9228515625, "loss": 0.027, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2578386962413788, "rewards/margins": 0.06478948146104813, "rewards/rejected": -0.3226282000541687, "step": 3440 }, { "epoch": 0.45, "learning_rate": 3.3436526173102913e-06, "logits/chosen": -1.744478464126587, "logits/rejected": -1.539102554321289, "logps/chosen": -516.9254150390625, "logps/rejected": -546.52197265625, "loss": 0.0393, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.262891948223114, "rewards/margins": 0.06755396723747253, "rewards/rejected": -0.33044594526290894, "step": 3450 }, { "epoch": 0.45, "learning_rate": 3.3328931418778254e-06, "logits/chosen": -1.7492382526397705, "logits/rejected": -1.425809621810913, "logps/chosen": -463.23138427734375, "logps/rejected": -501.52325439453125, "loss": 0.0278, "rewards/accuracies": 0.625, "rewards/chosen": -0.27157261967658997, "rewards/margins": 0.060871053487062454, "rewards/rejected": -0.3324436843395233, "step": 3460 }, { "epoch": 0.45, "learning_rate": 3.3221162848263028e-06, "logits/chosen": -1.6769005060195923, "logits/rejected": -1.568414330482483, "logps/chosen": -493.3720703125, "logps/rejected": -502.56390380859375, "loss": 0.0269, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.27238377928733826, "rewards/margins": 0.06392760574817657, "rewards/rejected": -0.33631137013435364, "step": 3470 }, { "epoch": 0.46, "learning_rate": 3.3113222710575914e-06, "logits/chosen": -1.8256422281265259, "logits/rejected": -1.6403229236602783, "logps/chosen": -442.6636657714844, "logps/rejected": -473.293212890625, "loss": 0.0314, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.2143537551164627, "rewards/margins": 0.02805102802813053, "rewards/rejected": -0.2424047738313675, "step": 3480 }, { "epoch": 0.46, "learning_rate": 3.300511325831603e-06, "logits/chosen": -1.8307218551635742, "logits/rejected": -1.7159074544906616, "logps/chosen": -517.7831420898438, "logps/rejected": -533.2928466796875, "loss": 0.0267, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.26110875606536865, "rewards/margins": 0.06851954013109207, "rewards/rejected": -0.3296282887458801, "step": 3490 }, { "epoch": 0.46, "learning_rate": 3.289683674761592e-06, "logits/chosen": -1.8645591735839844, "logits/rejected": -1.7138818502426147, "logps/chosen": -517.3194580078125, "logps/rejected": -542.5574951171875, "loss": 0.0307, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.26969239115715027, "rewards/margins": 0.06960771977901459, "rewards/rejected": -0.33930009603500366, "step": 3500 }, { "epoch": 0.46, "learning_rate": 3.2788395438094444e-06, "logits/chosen": -1.6032047271728516, "logits/rejected": -1.4881914854049683, "logps/chosen": -454.68963623046875, "logps/rejected": -497.9525451660156, "loss": 0.0186, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.2439115345478058, "rewards/margins": 0.034994177520275116, "rewards/rejected": -0.2789056897163391, "step": 3510 }, { "epoch": 0.46, "learning_rate": 3.2679791592809653e-06, "logits/chosen": -1.7976545095443726, "logits/rejected": -1.6705448627471924, "logps/chosen": -444.757080078125, "logps/rejected": -513.5965576171875, "loss": 0.0241, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2211049497127533, "rewards/margins": 0.06046595051884651, "rewards/rejected": -0.2815709114074707, "step": 3520 }, { "epoch": 0.46, "learning_rate": 3.257102747821157e-06, "logits/chosen": -1.6195249557495117, "logits/rejected": -1.4758068323135376, "logps/chosen": -467.68194580078125, "logps/rejected": -495.48980712890625, "loss": 0.0281, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2174040526151657, "rewards/margins": 0.0670911967754364, "rewards/rejected": -0.2844952642917633, "step": 3530 }, { "epoch": 0.46, "learning_rate": 3.246210536409484e-06, "logits/chosen": -1.7049881219863892, "logits/rejected": -1.6321277618408203, "logps/chosen": -382.57464599609375, "logps/rejected": -440.7767028808594, "loss": 0.0274, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.22493591904640198, "rewards/margins": 0.07922857254743576, "rewards/rejected": -0.30416446924209595, "step": 3540 }, { "epoch": 0.46, "learning_rate": 3.235302752355142e-06, "logits/chosen": -1.3347362279891968, "logits/rejected": -1.2995057106018066, "logps/chosen": -357.0577392578125, "logps/rejected": -402.48480224609375, "loss": 0.037, "rewards/accuracies": 0.5, "rewards/chosen": -0.19129280745983124, "rewards/margins": 0.047142066061496735, "rewards/rejected": -0.23843488097190857, "step": 3550 }, { "epoch": 0.47, "learning_rate": 3.2243796232923097e-06, "logits/chosen": -1.768776297569275, "logits/rejected": -1.7733237743377686, "logps/chosen": -374.0507507324219, "logps/rejected": -424.15679931640625, "loss": 0.0148, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.21344757080078125, "rewards/margins": 0.038538042455911636, "rewards/rejected": -0.2519856095314026, "step": 3560 }, { "epoch": 0.47, "learning_rate": 3.2134413771754037e-06, "logits/chosen": -1.5099389553070068, "logits/rejected": -1.3366999626159668, "logps/chosen": -445.18927001953125, "logps/rejected": -481.36187744140625, "loss": 0.0187, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2308485060930252, "rewards/margins": 0.07678955793380737, "rewards/rejected": -0.3076380491256714, "step": 3570 }, { "epoch": 0.47, "learning_rate": 3.2024882422743118e-06, "logits/chosen": -1.684107780456543, "logits/rejected": -1.4487898349761963, "logps/chosen": -451.0792541503906, "logps/rejected": -508.5023498535156, "loss": 0.0307, "rewards/accuracies": 0.625, "rewards/chosen": -0.2644755244255066, "rewards/margins": 0.07611201703548431, "rewards/rejected": -0.3405875563621521, "step": 3580 }, { "epoch": 0.47, "learning_rate": 3.1915204471696425e-06, "logits/chosen": -1.6601011753082275, "logits/rejected": -1.5120861530303955, "logps/chosen": -581.9293823242188, "logps/rejected": -621.4324951171875, "loss": 0.0334, "rewards/accuracies": 0.75, "rewards/chosen": -0.324591726064682, "rewards/margins": 0.06923629343509674, "rewards/rejected": -0.39382803440093994, "step": 3590 }, { "epoch": 0.47, "learning_rate": 3.180538220747943e-06, "logits/chosen": -1.5336710214614868, "logits/rejected": -1.3869249820709229, "logps/chosen": -501.361572265625, "logps/rejected": -524.8993530273438, "loss": 0.03, "rewards/accuracies": 0.5, "rewards/chosen": -0.33136993646621704, "rewards/margins": 0.0689396858215332, "rewards/rejected": -0.40030962228775024, "step": 3600 }, { "epoch": 0.47, "learning_rate": 3.1695417921969287e-06, "logits/chosen": -1.6701656579971313, "logits/rejected": -1.5971901416778564, "logps/chosen": -521.0940551757812, "logps/rejected": -608.44580078125, "loss": 0.0225, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3268844187259674, "rewards/margins": 0.10305018723011017, "rewards/rejected": -0.4299345910549164, "step": 3610 }, { "epoch": 0.47, "learning_rate": 3.158531391000697e-06, "logits/chosen": -1.6981074810028076, "logits/rejected": -1.5071054697036743, "logps/chosen": -566.3934326171875, "logps/rejected": -606.4476318359375, "loss": 0.0176, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.28605085611343384, "rewards/margins": 0.09975703060626984, "rewards/rejected": -0.38580790162086487, "step": 3620 }, { "epoch": 0.48, "learning_rate": 3.147507246934943e-06, "logits/chosen": -1.927480697631836, "logits/rejected": -1.7554702758789062, "logps/chosen": -465.91607666015625, "logps/rejected": -525.5009765625, "loss": 0.0293, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2357490509748459, "rewards/margins": 0.08434184640645981, "rewards/rejected": -0.3200909197330475, "step": 3630 }, { "epoch": 0.48, "learning_rate": 3.136469590062158e-06, "logits/chosen": -1.7100019454956055, "logits/rejected": -1.497292160987854, "logps/chosen": -430.9881896972656, "logps/rejected": -473.24285888671875, "loss": 0.0278, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.24274256825447083, "rewards/margins": 0.08607254922389984, "rewards/rejected": -0.3288151025772095, "step": 3640 }, { "epoch": 0.48, "learning_rate": 3.1254186507268354e-06, "logits/chosen": -1.873304009437561, "logits/rejected": -1.638899564743042, "logps/chosen": -552.6870727539062, "logps/rejected": -595.4931640625, "loss": 0.0197, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2840507924556732, "rewards/margins": 0.0846860259771347, "rewards/rejected": -0.3687368333339691, "step": 3650 }, { "epoch": 0.48, "learning_rate": 3.114354659550656e-06, "logits/chosen": -1.782984972000122, "logits/rejected": -1.8160291910171509, "logps/chosen": -484.53802490234375, "logps/rejected": -617.17138671875, "loss": 0.0474, "rewards/accuracies": 0.625, "rewards/chosen": -0.25991731882095337, "rewards/margins": 0.0726139098405838, "rewards/rejected": -0.3325311541557312, "step": 3660 }, { "epoch": 0.48, "learning_rate": 3.1032778474276816e-06, "logits/chosen": -1.9407377243041992, "logits/rejected": -1.5795496702194214, "logps/chosen": -439.5630798339844, "logps/rejected": -509.26953125, "loss": 0.0264, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.21727442741394043, "rewards/margins": 0.0863426923751831, "rewards/rejected": -0.30361711978912354, "step": 3670 }, { "epoch": 0.48, "learning_rate": 3.092188445519532e-06, "logits/chosen": -1.8346776962280273, "logits/rejected": -1.7408864498138428, "logps/chosen": -428.6434631347656, "logps/rejected": -484.744140625, "loss": 0.0237, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.19090434908866882, "rewards/margins": 0.08148179948329926, "rewards/rejected": -0.2723861634731293, "step": 3680 }, { "epoch": 0.48, "learning_rate": 3.081086685250565e-06, "logits/chosen": -2.0212202072143555, "logits/rejected": -1.9480583667755127, "logps/chosen": -428.998779296875, "logps/rejected": -475.50079345703125, "loss": 0.0408, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.20856909453868866, "rewards/margins": 0.07064095884561539, "rewards/rejected": -0.27921006083488464, "step": 3690 }, { "epoch": 0.48, "learning_rate": 3.0699727983030434e-06, "logits/chosen": -1.9696426391601562, "logits/rejected": -1.8022600412368774, "logps/chosen": -456.12640380859375, "logps/rejected": -522.7034912109375, "loss": 0.0336, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.20959241688251495, "rewards/margins": 0.08178629726171494, "rewards/rejected": -0.2913787066936493, "step": 3700 }, { "epoch": 0.49, "learning_rate": 3.058847016612301e-06, "logits/chosen": -1.824035882949829, "logits/rejected": -1.60663640499115, "logps/chosen": -604.5587768554688, "logps/rejected": -632.459716796875, "loss": 0.023, "rewards/accuracies": 0.75, "rewards/chosen": -0.2821849584579468, "rewards/margins": 0.09114620089530945, "rewards/rejected": -0.3733311593532562, "step": 3710 }, { "epoch": 0.49, "learning_rate": 3.0477095723619034e-06, "logits/chosen": -1.8234065771102905, "logits/rejected": -1.6101405620574951, "logps/chosen": -513.341796875, "logps/rejected": -645.3538818359375, "loss": 0.0211, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.28968682885169983, "rewards/margins": 0.12813854217529297, "rewards/rejected": -0.4178254008293152, "step": 3720 }, { "epoch": 0.49, "learning_rate": 3.0365606979788003e-06, "logits/chosen": -1.5860755443572998, "logits/rejected": -1.6987440586090088, "logps/chosen": -454.96197509765625, "logps/rejected": -528.0737915039062, "loss": 0.0292, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.26307860016822815, "rewards/margins": 0.05534951761364937, "rewards/rejected": -0.318428099155426, "step": 3730 }, { "epoch": 0.49, "learning_rate": 3.0254006261284786e-06, "logits/chosen": -2.1023459434509277, "logits/rejected": -1.9115941524505615, "logps/chosen": -444.1775817871094, "logps/rejected": -460.3389587402344, "loss": 0.0194, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.18086031079292297, "rewards/margins": 0.04658311977982521, "rewards/rejected": -0.2274434119462967, "step": 3740 }, { "epoch": 0.49, "learning_rate": 3.0142295897101032e-06, "logits/chosen": -1.849963903427124, "logits/rejected": -1.6758321523666382, "logps/chosen": -464.70941162109375, "logps/rejected": -479.88824462890625, "loss": 0.0314, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.22201187908649445, "rewards/margins": 0.06475654244422913, "rewards/rejected": -0.28676837682724, "step": 3750 }, { "epoch": 0.49, "learning_rate": 3.0030478218516578e-06, "logits/chosen": -1.8443362712860107, "logits/rejected": -1.4853614568710327, "logps/chosen": -517.84716796875, "logps/rejected": -568.94091796875, "loss": 0.0265, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2580469250679016, "rewards/margins": 0.16223561763763428, "rewards/rejected": -0.4202825129032135, "step": 3760 }, { "epoch": 0.49, "learning_rate": 2.9918555559050826e-06, "logits/chosen": -1.839974045753479, "logits/rejected": -1.866651177406311, "logps/chosen": -476.24395751953125, "logps/rejected": -606.966552734375, "loss": 0.0326, "rewards/accuracies": 0.625, "rewards/chosen": -0.2671697735786438, "rewards/margins": 0.09309405833482742, "rewards/rejected": -0.360263854265213, "step": 3770 }, { "epoch": 0.49, "learning_rate": 2.980653025441399e-06, "logits/chosen": -2.0247726440429688, "logits/rejected": -2.0207698345184326, "logps/chosen": -381.8411865234375, "logps/rejected": -484.6162109375, "loss": 0.0266, "rewards/accuracies": 0.625, "rewards/chosen": -0.20929519832134247, "rewards/margins": 0.07083339989185333, "rewards/rejected": -0.2801285684108734, "step": 3780 }, { "epoch": 0.5, "learning_rate": 2.969440464245841e-06, "logits/chosen": -1.819759726524353, "logits/rejected": -1.9021791219711304, "logps/chosen": -367.9759216308594, "logps/rejected": -426.68646240234375, "loss": 0.0233, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.20591673254966736, "rewards/margins": 0.04149980470538139, "rewards/rejected": -0.24741652607917786, "step": 3790 }, { "epoch": 0.5, "learning_rate": 2.95821810631297e-06, "logits/chosen": -1.8347660303115845, "logits/rejected": -1.882627248764038, "logps/chosen": -403.18524169921875, "logps/rejected": -482.39599609375, "loss": 0.0254, "rewards/accuracies": 0.625, "rewards/chosen": -0.2047998011112213, "rewards/margins": 0.05898011848330498, "rewards/rejected": -0.2637799382209778, "step": 3800 }, { "epoch": 0.5, "learning_rate": 2.946986185841801e-06, "logits/chosen": -1.923988938331604, "logits/rejected": -1.7287712097167969, "logps/chosen": -442.3211975097656, "logps/rejected": -507.741455078125, "loss": 0.0207, "rewards/accuracies": 0.625, "rewards/chosen": -0.19284003973007202, "rewards/margins": 0.08564140647649765, "rewards/rejected": -0.2784814238548279, "step": 3810 }, { "epoch": 0.5, "learning_rate": 2.935744937230903e-06, "logits/chosen": -1.9581371545791626, "logits/rejected": -1.7871599197387695, "logps/chosen": -452.6080627441406, "logps/rejected": -473.64886474609375, "loss": 0.0341, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.22355417907238007, "rewards/margins": 0.06519726663827896, "rewards/rejected": -0.28875142335891724, "step": 3820 }, { "epoch": 0.5, "learning_rate": 2.924494595073517e-06, "logits/chosen": -1.7134824991226196, "logits/rejected": -1.6709381341934204, "logps/chosen": -408.0823974609375, "logps/rejected": -472.41168212890625, "loss": 0.0308, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.21899013221263885, "rewards/margins": 0.07624353468418121, "rewards/rejected": -0.29523366689682007, "step": 3830 }, { "epoch": 0.5, "learning_rate": 2.9132353941526575e-06, "logits/chosen": -2.0410828590393066, "logits/rejected": -1.753594994544983, "logps/chosen": -463.1409606933594, "logps/rejected": -499.73175048828125, "loss": 0.0399, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.17987537384033203, "rewards/margins": 0.1088109239935875, "rewards/rejected": -0.2886863052845001, "step": 3840 }, { "epoch": 0.5, "learning_rate": 2.901967569436209e-06, "logits/chosen": -1.8085641860961914, "logits/rejected": -1.7082746028900146, "logps/chosen": -412.5797424316406, "logps/rejected": -463.7660217285156, "loss": 0.0248, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.1930643916130066, "rewards/margins": 0.06069738790392876, "rewards/rejected": -0.25376179814338684, "step": 3850 }, { "epoch": 0.51, "learning_rate": 2.89069135607203e-06, "logits/chosen": -1.862959623336792, "logits/rejected": -1.5761888027191162, "logps/chosen": -482.17529296875, "logps/rejected": -534.7391967773438, "loss": 0.029, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.22728033363819122, "rewards/margins": 0.09412405639886856, "rewards/rejected": -0.3214043974876404, "step": 3860 }, { "epoch": 0.51, "learning_rate": 2.8794069893830386e-06, "logits/chosen": -1.577389121055603, "logits/rejected": -1.6044118404388428, "logps/chosen": -409.6336975097656, "logps/rejected": -537.7545776367188, "loss": 0.0231, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.20168235898017883, "rewards/margins": 0.08728457987308502, "rewards/rejected": -0.28896695375442505, "step": 3870 }, { "epoch": 0.51, "learning_rate": 2.8681147048623038e-06, "logits/chosen": -1.872521162033081, "logits/rejected": -1.8206942081451416, "logps/chosen": -411.75714111328125, "logps/rejected": -440.4037170410156, "loss": 0.0254, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.20198896527290344, "rewards/margins": 0.049792829900979996, "rewards/rejected": -0.25178176164627075, "step": 3880 }, { "epoch": 0.51, "learning_rate": 2.8568147381681333e-06, "logits/chosen": -1.9868106842041016, "logits/rejected": -1.529101848602295, "logps/chosen": -437.37646484375, "logps/rejected": -485.68011474609375, "loss": 0.0129, "rewards/accuracies": 0.75, "rewards/chosen": -0.17643964290618896, "rewards/margins": 0.10629074275493622, "rewards/rejected": -0.2827304005622864, "step": 3890 }, { "epoch": 0.51, "learning_rate": 2.8455073251191533e-06, "logits/chosen": -1.9317123889923096, "logits/rejected": -1.657210111618042, "logps/chosen": -434.2438049316406, "logps/rejected": -530.1087646484375, "loss": 0.0172, "rewards/accuracies": 0.625, "rewards/chosen": -0.20733916759490967, "rewards/margins": 0.10539014637470245, "rewards/rejected": -0.3127292990684509, "step": 3900 }, { "epoch": 0.51, "learning_rate": 2.8341927016893887e-06, "logits/chosen": -1.8347209692001343, "logits/rejected": -1.6418960094451904, "logps/chosen": -421.93328857421875, "logps/rejected": -514.156494140625, "loss": 0.0267, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.19104665517807007, "rewards/margins": 0.10307135432958603, "rewards/rejected": -0.2941179871559143, "step": 3910 }, { "epoch": 0.51, "learning_rate": 2.822871104003335e-06, "logits/chosen": -1.7689571380615234, "logits/rejected": -1.7831192016601562, "logps/chosen": -439.4349060058594, "logps/rejected": -500.2786560058594, "loss": 0.0187, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.23783540725708008, "rewards/margins": 0.07082591950893402, "rewards/rejected": -0.3086613714694977, "step": 3920 }, { "epoch": 0.51, "learning_rate": 2.8115427683310355e-06, "logits/chosen": -1.7057605981826782, "logits/rejected": -1.811344861984253, "logps/chosen": -486.764404296875, "logps/rejected": -532.9198608398438, "loss": 0.0239, "rewards/accuracies": 0.625, "rewards/chosen": -0.25383487343788147, "rewards/margins": 0.05473923683166504, "rewards/rejected": -0.3085741400718689, "step": 3930 }, { "epoch": 0.52, "learning_rate": 2.8002079310831477e-06, "logits/chosen": -1.8792072534561157, "logits/rejected": -1.5005006790161133, "logps/chosen": -429.49945068359375, "logps/rejected": -489.650390625, "loss": 0.0198, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.21709856390953064, "rewards/margins": 0.1154472827911377, "rewards/rejected": -0.33254584670066833, "step": 3940 }, { "epoch": 0.52, "learning_rate": 2.7888668288060095e-06, "logits/chosen": -1.7971118688583374, "logits/rejected": -1.7154830694198608, "logps/chosen": -399.40338134765625, "logps/rejected": -502.49090576171875, "loss": 0.0304, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.20770153403282166, "rewards/margins": 0.08903868496417999, "rewards/rejected": -0.29674020409584045, "step": 3950 }, { "epoch": 0.52, "learning_rate": 2.7775196981767044e-06, "logits/chosen": -1.9662151336669922, "logits/rejected": -1.6054153442382812, "logps/chosen": -392.71270751953125, "logps/rejected": -463.1542053222656, "loss": 0.0258, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.2093251645565033, "rewards/margins": 0.10495827347040176, "rewards/rejected": -0.31428343057632446, "step": 3960 }, { "epoch": 0.52, "learning_rate": 2.7661667759981213e-06, "logits/chosen": -1.5571118593215942, "logits/rejected": -1.6845260858535767, "logps/chosen": -384.57293701171875, "logps/rejected": -467.65606689453125, "loss": 0.0195, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.24344149231910706, "rewards/margins": 0.06183134391903877, "rewards/rejected": -0.3052728474140167, "step": 3970 }, { "epoch": 0.52, "learning_rate": 2.7548082991940137e-06, "logits/chosen": -1.879895806312561, "logits/rejected": -1.6696151494979858, "logps/chosen": -470.91455078125, "logps/rejected": -492.08135986328125, "loss": 0.0218, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.20541563630104065, "rewards/margins": 0.09316332638263702, "rewards/rejected": -0.2985789477825165, "step": 3980 }, { "epoch": 0.52, "learning_rate": 2.743444504804051e-06, "logits/chosen": -1.7343305349349976, "logits/rejected": -1.5158665180206299, "logps/chosen": -464.01715087890625, "logps/rejected": -480.1866760253906, "loss": 0.022, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.25514039397239685, "rewards/margins": 0.06958375871181488, "rewards/rejected": -0.3247241973876953, "step": 3990 }, { "epoch": 0.52, "learning_rate": 2.7320756299788788e-06, "logits/chosen": -1.6907501220703125, "logits/rejected": -1.6015201807022095, "logps/chosen": -474.698486328125, "logps/rejected": -565.966064453125, "loss": 0.0226, "rewards/accuracies": 0.625, "rewards/chosen": -0.24437180161476135, "rewards/margins": 0.08970580995082855, "rewards/rejected": -0.3340775966644287, "step": 4000 }, { "epoch": 0.52, "learning_rate": 2.7207019119751644e-06, "logits/chosen": -1.6006886959075928, "logits/rejected": -1.4601367712020874, "logps/chosen": -418.631103515625, "logps/rejected": -462.9296875, "loss": 0.0273, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.20333543419837952, "rewards/margins": 0.09906761348247528, "rewards/rejected": -0.3024030327796936, "step": 4010 }, { "epoch": 0.53, "learning_rate": 2.7093235881506474e-06, "logits/chosen": -1.7386598587036133, "logits/rejected": -1.631439208984375, "logps/chosen": -464.40020751953125, "logps/rejected": -537.2156372070312, "loss": 0.0218, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.21815907955169678, "rewards/margins": 0.09343260526657104, "rewards/rejected": -0.3115917146205902, "step": 4020 }, { "epoch": 0.53, "learning_rate": 2.6979408959591863e-06, "logits/chosen": -1.7486827373504639, "logits/rejected": -1.508710265159607, "logps/chosen": -461.79876708984375, "logps/rejected": -527.3321533203125, "loss": 0.0151, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.19339005649089813, "rewards/margins": 0.125021830201149, "rewards/rejected": -0.3184118866920471, "step": 4030 }, { "epoch": 0.53, "learning_rate": 2.6865540729458034e-06, "logits/chosen": -1.742926836013794, "logits/rejected": -1.6564674377441406, "logps/chosen": -366.6788635253906, "logps/rejected": -420.30780029296875, "loss": 0.0338, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.14146068692207336, "rewards/margins": 0.04987145587801933, "rewards/rejected": -0.1913321316242218, "step": 4040 }, { "epoch": 0.53, "learning_rate": 2.675163356741726e-06, "logits/chosen": -1.7827094793319702, "logits/rejected": -1.6591606140136719, "logps/chosen": -372.85089111328125, "logps/rejected": -396.1568603515625, "loss": 0.0254, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.14776013791561127, "rewards/margins": 0.0655493214726448, "rewards/rejected": -0.21330948173999786, "step": 4050 }, { "epoch": 0.53, "learning_rate": 2.6637689850594285e-06, "logits/chosen": -1.4729732275009155, "logits/rejected": -1.2420446872711182, "logps/chosen": -552.0250244140625, "logps/rejected": -658.884765625, "loss": 0.0321, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2546490728855133, "rewards/margins": 0.10681060701608658, "rewards/rejected": -0.3614596724510193, "step": 4060 }, { "epoch": 0.53, "learning_rate": 2.652371195687671e-06, "logits/chosen": -1.7226966619491577, "logits/rejected": -1.6044594049453735, "logps/chosen": -482.7759704589844, "logps/rejected": -555.4845581054688, "loss": 0.0393, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.21687380969524384, "rewards/margins": 0.08411475270986557, "rewards/rejected": -0.3009885847568512, "step": 4070 }, { "epoch": 0.53, "learning_rate": 2.64097022648654e-06, "logits/chosen": -1.5568745136260986, "logits/rejected": -1.2326549291610718, "logps/chosen": -436.64923095703125, "logps/rejected": -430.7582092285156, "loss": 0.0167, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.234510138630867, "rewards/margins": 0.030786100775003433, "rewards/rejected": -0.26529622077941895, "step": 4080 }, { "epoch": 0.54, "learning_rate": 2.6295663153824774e-06, "logits/chosen": -1.7773542404174805, "logits/rejected": -1.462713599205017, "logps/chosen": -501.14630126953125, "logps/rejected": -487.55841064453125, "loss": 0.0271, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.23411674797534943, "rewards/margins": 0.07479636371135712, "rewards/rejected": -0.30891311168670654, "step": 4090 }, { "epoch": 0.54, "learning_rate": 2.6181597003633218e-06, "logits/chosen": -1.5608381032943726, "logits/rejected": -1.4907993078231812, "logps/chosen": -400.27740478515625, "logps/rejected": -460.11773681640625, "loss": 0.0197, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.23818571865558624, "rewards/margins": 0.07261364161968231, "rewards/rejected": -0.31079936027526855, "step": 4100 }, { "epoch": 0.54, "learning_rate": 2.606750619473342e-06, "logits/chosen": -1.3995827436447144, "logits/rejected": -1.3887090682983398, "logps/chosen": -431.75531005859375, "logps/rejected": -520.8568115234375, "loss": 0.0318, "rewards/accuracies": 0.625, "rewards/chosen": -0.22923071682453156, "rewards/margins": 0.07589320093393326, "rewards/rejected": -0.3051239252090454, "step": 4110 }, { "epoch": 0.54, "learning_rate": 2.595339310808262e-06, "logits/chosen": -1.5573954582214355, "logits/rejected": -1.250608205795288, "logps/chosen": -493.65411376953125, "logps/rejected": -508.92034912109375, "loss": 0.023, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2405034601688385, "rewards/margins": 0.08133454620838165, "rewards/rejected": -0.32183799147605896, "step": 4120 }, { "epoch": 0.54, "learning_rate": 2.5839260125103004e-06, "logits/chosen": -1.2536404132843018, "logits/rejected": -1.2843748331069946, "logps/chosen": -428.8235778808594, "logps/rejected": -518.3291625976562, "loss": 0.0353, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2670718729496002, "rewards/margins": 0.07417013496160507, "rewards/rejected": -0.3412419855594635, "step": 4130 }, { "epoch": 0.54, "learning_rate": 2.5725109627631984e-06, "logits/chosen": -1.685089111328125, "logits/rejected": -1.5624825954437256, "logps/chosen": -454.55303955078125, "logps/rejected": -452.01348876953125, "loss": 0.0253, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.19030128419399261, "rewards/margins": 0.05792864412069321, "rewards/rejected": -0.24822993576526642, "step": 4140 }, { "epoch": 0.54, "learning_rate": 2.5610943997872443e-06, "logits/chosen": -1.5616363286972046, "logits/rejected": -1.4265216588974, "logps/chosen": -380.36407470703125, "logps/rejected": -440.4208984375, "loss": 0.0426, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.19018259644508362, "rewards/margins": 0.059229712933301926, "rewards/rejected": -0.24941229820251465, "step": 4150 }, { "epoch": 0.54, "learning_rate": 2.5496765618343096e-06, "logits/chosen": -1.6420665979385376, "logits/rejected": -1.3375904560089111, "logps/chosen": -434.129638671875, "logps/rejected": -474.74896240234375, "loss": 0.0228, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.20237000286579132, "rewards/margins": 0.06872677803039551, "rewards/rejected": -0.27109676599502563, "step": 4160 }, { "epoch": 0.55, "learning_rate": 2.538257687182871e-06, "logits/chosen": -1.6899299621582031, "logits/rejected": -1.5516376495361328, "logps/chosen": -391.97076416015625, "logps/rejected": -479.993896484375, "loss": 0.0215, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1851247102022171, "rewards/margins": 0.09358541667461395, "rewards/rejected": -0.27871015667915344, "step": 4170 }, { "epoch": 0.55, "learning_rate": 2.526838014133041e-06, "logits/chosen": -1.5125585794448853, "logits/rejected": -1.4651882648468018, "logps/chosen": -460.04168701171875, "logps/rejected": -533.2202758789062, "loss": 0.02, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.21069243550300598, "rewards/margins": 0.07955484092235565, "rewards/rejected": -0.29024726152420044, "step": 4180 }, { "epoch": 0.55, "learning_rate": 2.515417781001594e-06, "logits/chosen": -1.6838356256484985, "logits/rejected": -1.5009909868240356, "logps/chosen": -407.412353515625, "logps/rejected": -432.92095947265625, "loss": 0.016, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.15886293351650238, "rewards/margins": 0.06116022914648056, "rewards/rejected": -0.22002318501472473, "step": 4190 }, { "epoch": 0.55, "learning_rate": 2.503997226116992e-06, "logits/chosen": -1.540334701538086, "logits/rejected": -1.31317937374115, "logps/chosen": -507.1998596191406, "logps/rejected": -586.198974609375, "loss": 0.0379, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2449527233839035, "rewards/margins": 0.1364816427230835, "rewards/rejected": -0.3814343810081482, "step": 4200 }, { "epoch": 0.55, "learning_rate": 2.4925765878144115e-06, "logits/chosen": -1.5158265829086304, "logits/rejected": -1.3035396337509155, "logps/chosen": -396.3337707519531, "logps/rejected": -544.6439208984375, "loss": 0.0227, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.20970037579536438, "rewards/margins": 0.11930962651968002, "rewards/rejected": -0.329010009765625, "step": 4210 }, { "epoch": 0.55, "learning_rate": 2.4811561044307727e-06, "logits/chosen": -1.694092035293579, "logits/rejected": -1.4198188781738281, "logps/chosen": -432.31689453125, "logps/rejected": -528.2352905273438, "loss": 0.0227, "rewards/accuracies": 0.625, "rewards/chosen": -0.1862529069185257, "rewards/margins": 0.11565999686717987, "rewards/rejected": -0.3019128739833832, "step": 4220 }, { "epoch": 0.55, "learning_rate": 2.469736014299758e-06, "logits/chosen": -1.8983396291732788, "logits/rejected": -1.4605172872543335, "logps/chosen": -442.47393798828125, "logps/rejected": -453.75006103515625, "loss": 0.0215, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.16222409904003143, "rewards/margins": 0.08797114342451096, "rewards/rejected": -0.2501952648162842, "step": 4230 }, { "epoch": 0.55, "learning_rate": 2.458316555746846e-06, "logits/chosen": -1.7895050048828125, "logits/rejected": -1.5404589176177979, "logps/chosen": -400.5735778808594, "logps/rejected": -468.3433532714844, "loss": 0.0254, "rewards/accuracies": 0.75, "rewards/chosen": -0.16227874159812927, "rewards/margins": 0.08485525101423264, "rewards/rejected": -0.24713397026062012, "step": 4240 }, { "epoch": 0.56, "learning_rate": 2.446897967084334e-06, "logits/chosen": -1.7049553394317627, "logits/rejected": -1.6142890453338623, "logps/chosen": -446.55078125, "logps/rejected": -508.743408203125, "loss": 0.022, "rewards/accuracies": 0.625, "rewards/chosen": -0.19081057608127594, "rewards/margins": 0.07758510857820511, "rewards/rejected": -0.26839572191238403, "step": 4250 }, { "epoch": 0.56, "learning_rate": 2.4354804866063684e-06, "logits/chosen": -1.788198471069336, "logits/rejected": -1.5201570987701416, "logps/chosen": -454.056640625, "logps/rejected": -495.20367431640625, "loss": 0.0413, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.22993293404579163, "rewards/margins": 0.051151882857084274, "rewards/rejected": -0.281084805727005, "step": 4260 }, { "epoch": 0.56, "learning_rate": 2.424064352583964e-06, "logits/chosen": -1.4209312200546265, "logits/rejected": -1.2128381729125977, "logps/chosen": -418.61181640625, "logps/rejected": -477.3053283691406, "loss": 0.0209, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.19294168055057526, "rewards/margins": 0.10017367452383041, "rewards/rejected": -0.29311537742614746, "step": 4270 }, { "epoch": 0.56, "learning_rate": 2.4126498032600403e-06, "logits/chosen": -1.5372164249420166, "logits/rejected": -1.537858486175537, "logps/chosen": -389.9130859375, "logps/rejected": -496.35723876953125, "loss": 0.0224, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.19452248513698578, "rewards/margins": 0.09162701666355133, "rewards/rejected": -0.2861495316028595, "step": 4280 }, { "epoch": 0.56, "learning_rate": 2.401237076844445e-06, "logits/chosen": -1.3520879745483398, "logits/rejected": -1.1993756294250488, "logps/chosen": -426.945068359375, "logps/rejected": -472.2967224121094, "loss": 0.0281, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.21869483590126038, "rewards/margins": 0.04853721708059311, "rewards/rejected": -0.26723209023475647, "step": 4290 }, { "epoch": 0.56, "learning_rate": 2.38982641150898e-06, "logits/chosen": -1.6982262134552002, "logits/rejected": -1.3734503984451294, "logps/chosen": -491.4610290527344, "logps/rejected": -537.25, "loss": 0.022, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2278437614440918, "rewards/margins": 0.07478948682546616, "rewards/rejected": -0.30263322591781616, "step": 4300 }, { "epoch": 0.56, "learning_rate": 2.3784180453824414e-06, "logits/chosen": -1.6156953573226929, "logits/rejected": -1.5192720890045166, "logps/chosen": -428.35693359375, "logps/rejected": -511.0567321777344, "loss": 0.0222, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.18561260402202606, "rewards/margins": 0.09769701957702637, "rewards/rejected": -0.2833096385002136, "step": 4310 }, { "epoch": 0.57, "learning_rate": 2.367012216545638e-06, "logits/chosen": -1.7509912252426147, "logits/rejected": -1.4216718673706055, "logps/chosen": -443.70001220703125, "logps/rejected": -451.0489196777344, "loss": 0.0303, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.1952689290046692, "rewards/margins": 0.06825876235961914, "rewards/rejected": -0.26352769136428833, "step": 4320 }, { "epoch": 0.57, "learning_rate": 2.3556091630264294e-06, "logits/chosen": -1.5806615352630615, "logits/rejected": -1.4713026285171509, "logps/chosen": -513.8214111328125, "logps/rejected": -581.2058715820312, "loss": 0.0282, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.23938843607902527, "rewards/margins": 0.06749831140041351, "rewards/rejected": -0.3068867325782776, "step": 4330 }, { "epoch": 0.57, "learning_rate": 2.344209122794757e-06, "logits/chosen": -1.4990651607513428, "logits/rejected": -1.3438913822174072, "logps/chosen": -462.8631286621094, "logps/rejected": -485.54412841796875, "loss": 0.0249, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.21740731596946716, "rewards/margins": 0.07030390202999115, "rewards/rejected": -0.2877112329006195, "step": 4340 }, { "epoch": 0.57, "learning_rate": 2.3328123337576787e-06, "logits/chosen": -1.5248218774795532, "logits/rejected": -1.4321211576461792, "logps/chosen": -511.95611572265625, "logps/rejected": -601.123779296875, "loss": 0.0274, "rewards/accuracies": 0.625, "rewards/chosen": -0.26373380422592163, "rewards/margins": 0.07318225502967834, "rewards/rejected": -0.3369160294532776, "step": 4350 }, { "epoch": 0.57, "learning_rate": 2.3214190337544017e-06, "logits/chosen": -1.6629645824432373, "logits/rejected": -1.4849364757537842, "logps/chosen": -424.27581787109375, "logps/rejected": -464.45074462890625, "loss": 0.0301, "rewards/accuracies": 0.625, "rewards/chosen": -0.22247524559497833, "rewards/margins": 0.0703267902135849, "rewards/rejected": -0.29280200600624084, "step": 4360 }, { "epoch": 0.57, "learning_rate": 2.310029460551323e-06, "logits/chosen": -1.8954870700836182, "logits/rejected": -1.580891728401184, "logps/chosen": -437.32275390625, "logps/rejected": -496.6338806152344, "loss": 0.0208, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.20409438014030457, "rewards/margins": 0.08132465183734894, "rewards/rejected": -0.2854190468788147, "step": 4370 }, { "epoch": 0.57, "learning_rate": 2.2986438518370645e-06, "logits/chosen": -1.6273301839828491, "logits/rejected": -1.6872777938842773, "logps/chosen": -431.04425048828125, "logps/rejected": -482.32293701171875, "loss": 0.0173, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.22521671652793884, "rewards/margins": 0.062195055186748505, "rewards/rejected": -0.28741174936294556, "step": 4380 }, { "epoch": 0.57, "learning_rate": 2.2872624452175123e-06, "logits/chosen": -1.7207715511322021, "logits/rejected": -1.5806845426559448, "logps/chosen": -434.0201110839844, "logps/rejected": -481.0724182128906, "loss": 0.0279, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.20164403319358826, "rewards/margins": 0.05903003737330437, "rewards/rejected": -0.2606740891933441, "step": 4390 }, { "epoch": 0.58, "learning_rate": 2.2758854782108584e-06, "logits/chosen": -1.548923134803772, "logits/rejected": -1.5041494369506836, "logps/chosen": -417.203857421875, "logps/rejected": -507.90582275390625, "loss": 0.027, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2142319232225418, "rewards/margins": 0.08471399545669556, "rewards/rejected": -0.29894593358039856, "step": 4400 }, { "epoch": 0.58, "learning_rate": 2.2645131882426458e-06, "logits/chosen": -1.7802507877349854, "logits/rejected": -1.4019224643707275, "logps/chosen": -468.563720703125, "logps/rejected": -490.57708740234375, "loss": 0.0216, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.22909097373485565, "rewards/margins": 0.1022447720170021, "rewards/rejected": -0.33133575320243835, "step": 4410 }, { "epoch": 0.58, "learning_rate": 2.2531458126408154e-06, "logits/chosen": -1.6049638986587524, "logits/rejected": -1.3213298320770264, "logps/chosen": -379.1090393066406, "logps/rejected": -478.8014221191406, "loss": 0.0329, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.1762361377477646, "rewards/margins": 0.14133073389530182, "rewards/rejected": -0.3175669014453888, "step": 4420 }, { "epoch": 0.58, "learning_rate": 2.2417835886307452e-06, "logits/chosen": -1.4309465885162354, "logits/rejected": -1.2265818119049072, "logps/chosen": -439.0750427246094, "logps/rejected": -505.5755310058594, "loss": 0.0303, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.2087368220090866, "rewards/margins": 0.1353384554386139, "rewards/rejected": -0.3440752923488617, "step": 4430 }, { "epoch": 0.58, "learning_rate": 2.2304267533303075e-06, "logits/chosen": -1.3964816331863403, "logits/rejected": -1.242711067199707, "logps/chosen": -557.1407470703125, "logps/rejected": -587.2622680664062, "loss": 0.04, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.2703414857387543, "rewards/margins": 0.06430923938751221, "rewards/rejected": -0.33465075492858887, "step": 4440 }, { "epoch": 0.58, "learning_rate": 2.219075543744918e-06, "logits/chosen": -1.5047862529754639, "logits/rejected": -1.2050893306732178, "logps/chosen": -474.2640075683594, "logps/rejected": -498.3106384277344, "loss": 0.0165, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.2143983393907547, "rewards/margins": 0.07695254683494568, "rewards/rejected": -0.2913508713245392, "step": 4450 }, { "epoch": 0.58, "learning_rate": 2.207730196762589e-06, "logits/chosen": -1.5303689241409302, "logits/rejected": -1.2933293581008911, "logps/chosen": -487.30743408203125, "logps/rejected": -537.5446166992188, "loss": 0.0413, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2514573037624359, "rewards/margins": 0.08375079184770584, "rewards/rejected": -0.33520805835723877, "step": 4460 }, { "epoch": 0.58, "learning_rate": 2.1963909491489846e-06, "logits/chosen": -1.6777637004852295, "logits/rejected": -1.7184629440307617, "logps/chosen": -413.19580078125, "logps/rejected": -464.3396911621094, "loss": 0.0392, "rewards/accuracies": 0.5, "rewards/chosen": -0.2414092719554901, "rewards/margins": 0.06929554790258408, "rewards/rejected": -0.3107048571109772, "step": 4470 }, { "epoch": 0.59, "learning_rate": 2.185058037542486e-06, "logits/chosen": -1.5463758707046509, "logits/rejected": -1.2479265928268433, "logps/chosen": -518.1327514648438, "logps/rejected": -562.65380859375, "loss": 0.0252, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.25115349888801575, "rewards/margins": 0.11522521823644638, "rewards/rejected": -0.3663787245750427, "step": 4480 }, { "epoch": 0.59, "learning_rate": 2.173731698449244e-06, "logits/chosen": -1.4406402111053467, "logits/rejected": -1.059444785118103, "logps/chosen": -539.0560302734375, "logps/rejected": -574.791259765625, "loss": 0.029, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.267414391040802, "rewards/margins": 0.11316857486963272, "rewards/rejected": -0.3805829882621765, "step": 4490 }, { "epoch": 0.59, "learning_rate": 2.1624121682382495e-06, "logits/chosen": -1.3759723901748657, "logits/rejected": -1.2170668840408325, "logps/chosen": -504.7023010253906, "logps/rejected": -614.5403442382812, "loss": 0.0278, "rewards/accuracies": 0.75, "rewards/chosen": -0.28002482652664185, "rewards/margins": 0.10902640968561172, "rewards/rejected": -0.38905128836631775, "step": 4500 }, { "epoch": 0.59, "learning_rate": 2.1510996831363993e-06, "logits/chosen": -1.644968032836914, "logits/rejected": -1.3946508169174194, "logps/chosen": -536.4176025390625, "logps/rejected": -593.7776489257812, "loss": 0.0264, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.25945937633514404, "rewards/margins": 0.0572463758289814, "rewards/rejected": -0.31670576333999634, "step": 4510 }, { "epoch": 0.59, "learning_rate": 2.139794479223565e-06, "logits/chosen": -1.5176206827163696, "logits/rejected": -1.252367377281189, "logps/chosen": -457.44683837890625, "logps/rejected": -522.0448608398438, "loss": 0.0336, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.24093282222747803, "rewards/margins": 0.07370787113904953, "rewards/rejected": -0.31464070081710815, "step": 4520 }, { "epoch": 0.59, "learning_rate": 2.128496792427669e-06, "logits/chosen": -1.8894401788711548, "logits/rejected": -1.7518160343170166, "logps/chosen": -424.028076171875, "logps/rejected": -528.4948120117188, "loss": 0.0211, "rewards/accuracies": 0.625, "rewards/chosen": -0.1635584980249405, "rewards/margins": 0.0953235924243927, "rewards/rejected": -0.2588821053504944, "step": 4530 }, { "epoch": 0.59, "learning_rate": 2.117206858519758e-06, "logits/chosen": -1.793152093887329, "logits/rejected": -1.6433145999908447, "logps/chosen": -481.6566467285156, "logps/rejected": -526.556640625, "loss": 0.0238, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2006797045469284, "rewards/margins": 0.07764464616775513, "rewards/rejected": -0.27832433581352234, "step": 4540 }, { "epoch": 0.6, "learning_rate": 2.1059249131090844e-06, "logits/chosen": -1.4656394720077515, "logits/rejected": -1.3645011186599731, "logps/chosen": -448.1487731933594, "logps/rejected": -506.11444091796875, "loss": 0.0173, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.22689059376716614, "rewards/margins": 0.08436138927936554, "rewards/rejected": -0.31125199794769287, "step": 4550 }, { "epoch": 0.6, "learning_rate": 2.094651191638189e-06, "logits/chosen": -1.6243807077407837, "logits/rejected": -1.4395637512207031, "logps/chosen": -427.7439880371094, "logps/rejected": -471.2557678222656, "loss": 0.0282, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1894945651292801, "rewards/margins": 0.0754246711730957, "rewards/rejected": -0.2649192214012146, "step": 4560 }, { "epoch": 0.6, "learning_rate": 2.0833859293779867e-06, "logits/chosen": -1.6144850254058838, "logits/rejected": -1.5711697340011597, "logps/chosen": -450.52642822265625, "logps/rejected": -514.2506103515625, "loss": 0.019, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2052551507949829, "rewards/margins": 0.08380991965532303, "rewards/rejected": -0.28906506299972534, "step": 4570 }, { "epoch": 0.6, "learning_rate": 2.0721293614228568e-06, "logits/chosen": -1.5417252779006958, "logits/rejected": -1.3488690853118896, "logps/chosen": -432.2200622558594, "logps/rejected": -466.6581115722656, "loss": 0.0235, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.2289007157087326, "rewards/margins": 0.05619456246495247, "rewards/rejected": -0.2850952744483948, "step": 4580 }, { "epoch": 0.6, "learning_rate": 2.060881722685742e-06, "logits/chosen": -1.5724356174468994, "logits/rejected": -1.2952988147735596, "logps/chosen": -435.21075439453125, "logps/rejected": -483.70526123046875, "loss": 0.0294, "rewards/accuracies": 0.75, "rewards/chosen": -0.18673992156982422, "rewards/margins": 0.1105538010597229, "rewards/rejected": -0.2972937524318695, "step": 4590 }, { "epoch": 0.6, "learning_rate": 2.049643247893235e-06, "logits/chosen": -1.7986335754394531, "logits/rejected": -1.4598957300186157, "logps/chosen": -457.9772033691406, "logps/rejected": -456.84033203125, "loss": 0.025, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.17091234028339386, "rewards/margins": 0.06817115843296051, "rewards/rejected": -0.23908352851867676, "step": 4600 }, { "epoch": 0.6, "learning_rate": 2.0384141715806903e-06, "logits/chosen": -1.5617355108261108, "logits/rejected": -1.5490385293960571, "logps/chosen": -379.4814453125, "logps/rejected": -440.04254150390625, "loss": 0.0204, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.16015496850013733, "rewards/margins": 0.045989301055669785, "rewards/rejected": -0.20614425837993622, "step": 4610 }, { "epoch": 0.6, "learning_rate": 2.0271947280873255e-06, "logits/chosen": -1.6132984161376953, "logits/rejected": -1.4674255847930908, "logps/chosen": -348.9981689453125, "logps/rejected": -502.19091796875, "loss": 0.0185, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.17968395352363586, "rewards/margins": 0.12029703706502914, "rewards/rejected": -0.2999809682369232, "step": 4620 }, { "epoch": 0.61, "learning_rate": 2.0159851515513302e-06, "logits/chosen": -1.3583838939666748, "logits/rejected": -1.344406247138977, "logps/chosen": -388.41876220703125, "logps/rejected": -437.25811767578125, "loss": 0.0202, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.19703704118728638, "rewards/margins": 0.0540815070271492, "rewards/rejected": -0.2511185109615326, "step": 4630 }, { "epoch": 0.61, "learning_rate": 2.004785675904982e-06, "logits/chosen": -1.490714192390442, "logits/rejected": -1.4939876794815063, "logps/chosen": -345.9848937988281, "logps/rejected": -453.45684814453125, "loss": 0.0287, "rewards/accuracies": 0.625, "rewards/chosen": -0.17589840292930603, "rewards/margins": 0.09860150516033173, "rewards/rejected": -0.27449989318847656, "step": 4640 }, { "epoch": 0.61, "learning_rate": 1.9935965348697624e-06, "logits/chosen": -1.526310682296753, "logits/rejected": -1.5772511959075928, "logps/chosen": -408.08843994140625, "logps/rejected": -526.1033935546875, "loss": 0.0295, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.17842058837413788, "rewards/margins": 0.09581470489501953, "rewards/rejected": -0.2742353081703186, "step": 4650 }, { "epoch": 0.61, "learning_rate": 1.9824179619514807e-06, "logits/chosen": -1.3917975425720215, "logits/rejected": -1.6485751867294312, "logps/chosen": -324.16485595703125, "logps/rejected": -428.88397216796875, "loss": 0.029, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.19104906916618347, "rewards/margins": 0.07186642289161682, "rewards/rejected": -0.2629155218601227, "step": 4660 }, { "epoch": 0.61, "learning_rate": 1.9712501904354004e-06, "logits/chosen": -1.708351731300354, "logits/rejected": -1.3460270166397095, "logps/chosen": -428.37371826171875, "logps/rejected": -460.4834899902344, "loss": 0.0289, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.22374172508716583, "rewards/margins": 0.06903581321239471, "rewards/rejected": -0.29277750849723816, "step": 4670 }, { "epoch": 0.61, "learning_rate": 1.960093453381369e-06, "logits/chosen": -1.6553242206573486, "logits/rejected": -1.6477683782577515, "logps/chosen": -286.5013122558594, "logps/rejected": -361.59619140625, "loss": 0.0228, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.15842024981975555, "rewards/margins": 0.06222587078809738, "rewards/rejected": -0.22064614295959473, "step": 4680 }, { "epoch": 0.61, "learning_rate": 1.948947983618962e-06, "logits/chosen": -1.6140121221542358, "logits/rejected": -1.4784574508666992, "logps/chosen": -422.51165771484375, "logps/rejected": -475.551513671875, "loss": 0.0191, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.1707344651222229, "rewards/margins": 0.07597699016332626, "rewards/rejected": -0.24671144783496857, "step": 4690 }, { "epoch": 0.62, "learning_rate": 1.937814013742611e-06, "logits/chosen": -1.7896366119384766, "logits/rejected": -1.4367586374282837, "logps/chosen": -499.7083435058594, "logps/rejected": -533.1226806640625, "loss": 0.0237, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.18204855918884277, "rewards/margins": 0.09838353097438812, "rewards/rejected": -0.2804320752620697, "step": 4700 }, { "epoch": 0.62, "learning_rate": 1.9266917761067617e-06, "logits/chosen": -1.4915893077850342, "logits/rejected": -1.1812177896499634, "logps/chosen": -387.476806640625, "logps/rejected": -429.5155334472656, "loss": 0.0278, "rewards/accuracies": 0.625, "rewards/chosen": -0.16393427550792694, "rewards/margins": 0.0778748169541359, "rewards/rejected": -0.24180908501148224, "step": 4710 }, { "epoch": 0.62, "learning_rate": 1.915581502821017e-06, "logits/chosen": -1.4793461561203003, "logits/rejected": -1.3449960947036743, "logps/chosen": -334.1965026855469, "logps/rejected": -368.3381042480469, "loss": 0.0536, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.14853033423423767, "rewards/margins": 0.0649092048406601, "rewards/rejected": -0.21343955397605896, "step": 4720 }, { "epoch": 0.62, "learning_rate": 1.9044834257452997e-06, "logits/chosen": -1.8198268413543701, "logits/rejected": -1.5682449340820312, "logps/chosen": -373.09259033203125, "logps/rejected": -394.75177001953125, "loss": 0.0205, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.15750274062156677, "rewards/margins": 0.06791599839925766, "rewards/rejected": -0.22541876137256622, "step": 4730 }, { "epoch": 0.62, "learning_rate": 1.893397776485006e-06, "logits/chosen": -1.7474422454833984, "logits/rejected": -1.504546880722046, "logps/chosen": -339.1938171386719, "logps/rejected": -431.856689453125, "loss": 0.0314, "rewards/accuracies": 0.625, "rewards/chosen": -0.14679184556007385, "rewards/margins": 0.10970643907785416, "rewards/rejected": -0.2564982771873474, "step": 4740 }, { "epoch": 0.62, "learning_rate": 1.8823247863861804e-06, "logits/chosen": -1.6589701175689697, "logits/rejected": -1.621392011642456, "logps/chosen": -404.7550964355469, "logps/rejected": -460.41143798828125, "loss": 0.0211, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.17128518223762512, "rewards/margins": 0.07399027049541473, "rewards/rejected": -0.24527546763420105, "step": 4750 }, { "epoch": 0.62, "learning_rate": 1.8712646865306822e-06, "logits/chosen": -1.5093224048614502, "logits/rejected": -1.4263262748718262, "logps/chosen": -462.394287109375, "logps/rejected": -484.19415283203125, "loss": 0.0351, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.16870293021202087, "rewards/margins": 0.08714812994003296, "rewards/rejected": -0.2558510899543762, "step": 4760 }, { "epoch": 0.62, "learning_rate": 1.8602177077313631e-06, "logits/chosen": -1.4509001970291138, "logits/rejected": -1.4139875173568726, "logps/chosen": -415.69921875, "logps/rejected": -492.8407287597656, "loss": 0.0176, "rewards/accuracies": 0.625, "rewards/chosen": -0.1922876238822937, "rewards/margins": 0.07129409164190292, "rewards/rejected": -0.2635817229747772, "step": 4770 }, { "epoch": 0.63, "learning_rate": 1.8491840805272546e-06, "logits/chosen": -1.5857927799224854, "logits/rejected": -1.3974844217300415, "logps/chosen": -418.29217529296875, "logps/rejected": -474.0970764160156, "loss": 0.0268, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.1814996898174286, "rewards/margins": 0.09302203357219696, "rewards/rejected": -0.27452173829078674, "step": 4780 }, { "epoch": 0.63, "learning_rate": 1.8381640351787516e-06, "logits/chosen": -1.6603059768676758, "logits/rejected": -1.5743963718414307, "logps/chosen": -419.96563720703125, "logps/rejected": -484.13177490234375, "loss": 0.0292, "rewards/accuracies": 0.625, "rewards/chosen": -0.18491405248641968, "rewards/margins": 0.06687320023775101, "rewards/rejected": -0.2517872452735901, "step": 4790 }, { "epoch": 0.63, "learning_rate": 1.8271578016628122e-06, "logits/chosen": -1.4773046970367432, "logits/rejected": -1.4587136507034302, "logps/chosen": -371.9123229980469, "logps/rejected": -448.15777587890625, "loss": 0.0205, "rewards/accuracies": 0.625, "rewards/chosen": -0.1796925961971283, "rewards/margins": 0.07964633405208588, "rewards/rejected": -0.259338915348053, "step": 4800 }, { "epoch": 0.63, "learning_rate": 1.8161656096681546e-06, "logits/chosen": -1.5090692043304443, "logits/rejected": -1.2741395235061646, "logps/chosen": -350.93194580078125, "logps/rejected": -480.82171630859375, "loss": 0.0358, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.17646178603172302, "rewards/margins": 0.13238736987113953, "rewards/rejected": -0.30884915590286255, "step": 4810 }, { "epoch": 0.63, "learning_rate": 1.8051876885904645e-06, "logits/chosen": -1.36701500415802, "logits/rejected": -1.3117094039916992, "logps/chosen": -421.5575256347656, "logps/rejected": -501.64691162109375, "loss": 0.0361, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.1858542114496231, "rewards/margins": 0.08887975662946701, "rewards/rejected": -0.2747339606285095, "step": 4820 }, { "epoch": 0.63, "learning_rate": 1.7942242675276098e-06, "logits/chosen": -1.4270477294921875, "logits/rejected": -1.5564621686935425, "logps/chosen": -362.71478271484375, "logps/rejected": -495.60162353515625, "loss": 0.0208, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2082078754901886, "rewards/margins": 0.08840437233448029, "rewards/rejected": -0.2966122329235077, "step": 4830 }, { "epoch": 0.63, "learning_rate": 1.783275575274856e-06, "logits/chosen": -1.607736349105835, "logits/rejected": -1.419236421585083, "logps/chosen": -492.0704040527344, "logps/rejected": -484.59344482421875, "loss": 0.0165, "rewards/accuracies": 0.75, "rewards/chosen": -0.20354600250720978, "rewards/margins": 0.08249841630458832, "rewards/rejected": -0.2860444188117981, "step": 4840 }, { "epoch": 0.63, "learning_rate": 1.7723418403200943e-06, "logits/chosen": -1.6304285526275635, "logits/rejected": -1.2252190113067627, "logps/chosen": -516.8814086914062, "logps/rejected": -528.237060546875, "loss": 0.0263, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.222385972738266, "rewards/margins": 0.08664709329605103, "rewards/rejected": -0.30903303623199463, "step": 4850 }, { "epoch": 0.64, "learning_rate": 1.7614232908390748e-06, "logits/chosen": -1.4483563899993896, "logits/rejected": -1.3192527294158936, "logps/chosen": -398.49603271484375, "logps/rejected": -473.6939392089844, "loss": 0.0244, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.16964933276176453, "rewards/margins": 0.07871166616678238, "rewards/rejected": -0.24836096167564392, "step": 4860 }, { "epoch": 0.64, "learning_rate": 1.7505201546906398e-06, "logits/chosen": -1.4854518175125122, "logits/rejected": -1.2704298496246338, "logps/chosen": -463.33636474609375, "logps/rejected": -425.34796142578125, "loss": 0.0333, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.1838013082742691, "rewards/margins": 0.04110676050186157, "rewards/rejected": -0.22490806877613068, "step": 4870 }, { "epoch": 0.64, "learning_rate": 1.7396326594119717e-06, "logits/chosen": -1.7042782306671143, "logits/rejected": -1.613613486289978, "logps/chosen": -417.82550048828125, "logps/rejected": -494.0665588378906, "loss": 0.0364, "rewards/accuracies": 0.625, "rewards/chosen": -0.1753154844045639, "rewards/margins": 0.10930249840021133, "rewards/rejected": -0.28461796045303345, "step": 4880 }, { "epoch": 0.64, "learning_rate": 1.7287610322138449e-06, "logits/chosen": -1.5676114559173584, "logits/rejected": -1.3253037929534912, "logps/chosen": -419.602294921875, "logps/rejected": -503.919677734375, "loss": 0.0215, "rewards/accuracies": 0.625, "rewards/chosen": -0.18550562858581543, "rewards/margins": 0.12120529264211655, "rewards/rejected": -0.3067108988761902, "step": 4890 }, { "epoch": 0.64, "learning_rate": 1.7179054999758817e-06, "logits/chosen": -1.8627017736434937, "logits/rejected": -1.529758334159851, "logps/chosen": -439.87420654296875, "logps/rejected": -486.83099365234375, "loss": 0.0203, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.20357398688793182, "rewards/margins": 0.07500387728214264, "rewards/rejected": -0.27857786417007446, "step": 4900 }, { "epoch": 0.64, "learning_rate": 1.7070662892418225e-06, "logits/chosen": -1.5751526355743408, "logits/rejected": -1.45789635181427, "logps/chosen": -357.0823669433594, "logps/rejected": -422.3937072753906, "loss": 0.042, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.18973052501678467, "rewards/margins": 0.07508702576160431, "rewards/rejected": -0.26481756567955017, "step": 4910 }, { "epoch": 0.64, "learning_rate": 1.6962436262147913e-06, "logits/chosen": -1.4677342176437378, "logits/rejected": -1.4380252361297607, "logps/chosen": -410.12890625, "logps/rejected": -524.4754638671875, "loss": 0.0522, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2252027541399002, "rewards/margins": 0.09915554523468018, "rewards/rejected": -0.3243583142757416, "step": 4920 }, { "epoch": 0.65, "learning_rate": 1.6854377367525814e-06, "logits/chosen": -1.6399227380752563, "logits/rejected": -1.5074961185455322, "logps/chosen": -495.7207946777344, "logps/rejected": -519.4228515625, "loss": 0.029, "rewards/accuracies": 0.625, "rewards/chosen": -0.212543323636055, "rewards/margins": 0.08628206700086594, "rewards/rejected": -0.29882535338401794, "step": 4930 }, { "epoch": 0.65, "learning_rate": 1.6746488463629362e-06, "logits/chosen": -1.7351045608520508, "logits/rejected": -1.6972219944000244, "logps/chosen": -488.1156311035156, "logps/rejected": -578.7838745117188, "loss": 0.0268, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.21219182014465332, "rewards/margins": 0.0840751901268959, "rewards/rejected": -0.29626700282096863, "step": 4940 }, { "epoch": 0.65, "learning_rate": 1.6638771801988483e-06, "logits/chosen": -1.4572616815567017, "logits/rejected": -1.4984290599822998, "logps/chosen": -481.2704162597656, "logps/rejected": -533.4957885742188, "loss": 0.0236, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.20552797615528107, "rewards/margins": 0.06888245046138763, "rewards/rejected": -0.2744104266166687, "step": 4950 }, { "epoch": 0.65, "learning_rate": 1.653122963053857e-06, "logits/chosen": -1.4090168476104736, "logits/rejected": -1.22544264793396, "logps/chosen": -391.96661376953125, "logps/rejected": -467.94232177734375, "loss": 0.033, "rewards/accuracies": 0.625, "rewards/chosen": -0.1839529573917389, "rewards/margins": 0.07347510755062103, "rewards/rejected": -0.25742802023887634, "step": 4960 }, { "epoch": 0.65, "learning_rate": 1.6423864193573606e-06, "logits/chosen": -1.399701476097107, "logits/rejected": -1.2479718923568726, "logps/chosen": -403.88641357421875, "logps/rejected": -505.8567810058594, "loss": 0.0297, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.19615277647972107, "rewards/margins": 0.10045097768306732, "rewards/rejected": -0.2966037690639496, "step": 4970 }, { "epoch": 0.65, "learning_rate": 1.6316677731699286e-06, "logits/chosen": -1.6854263544082642, "logits/rejected": -1.4302657842636108, "logps/chosen": -468.148193359375, "logps/rejected": -521.8663330078125, "loss": 0.0408, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.22639694809913635, "rewards/margins": 0.09691077470779419, "rewards/rejected": -0.32330775260925293, "step": 4980 }, { "epoch": 0.65, "learning_rate": 1.6209672481786302e-06, "logits/chosen": -1.4107683897018433, "logits/rejected": -1.3036644458770752, "logps/chosen": -397.0565185546875, "logps/rejected": -478.081298828125, "loss": 0.0232, "rewards/accuracies": 0.75, "rewards/chosen": -0.16772839426994324, "rewards/margins": 0.1289522349834442, "rewards/rejected": -0.29668062925338745, "step": 4990 }, { "epoch": 0.65, "learning_rate": 1.6102850676923616e-06, "logits/chosen": -1.5386617183685303, "logits/rejected": -1.5371997356414795, "logps/chosen": -410.09564208984375, "logps/rejected": -492.2012634277344, "loss": 0.018, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.19269494712352753, "rewards/margins": 0.0963241308927536, "rewards/rejected": -0.2890191078186035, "step": 5000 }, { "epoch": 0.66, "learning_rate": 1.5996214546371888e-06, "logits/chosen": -1.8242378234863281, "logits/rejected": -1.4758179187774658, "logps/chosen": -380.41973876953125, "logps/rejected": -392.49261474609375, "loss": 0.02, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.16637901961803436, "rewards/margins": 0.057704634964466095, "rewards/rejected": -0.22408363223075867, "step": 5010 }, { "epoch": 0.66, "learning_rate": 1.588976631551697e-06, "logits/chosen": -1.6556527614593506, "logits/rejected": -1.4670960903167725, "logps/chosen": -387.4090881347656, "logps/rejected": -469.97760009765625, "loss": 0.029, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.14826735854148865, "rewards/margins": 0.09724099189043045, "rewards/rejected": -0.2455083429813385, "step": 5020 }, { "epoch": 0.66, "learning_rate": 1.5783508205823412e-06, "logits/chosen": -1.5352144241333008, "logits/rejected": -1.4616471529006958, "logps/chosen": -400.0191955566406, "logps/rejected": -461.9366760253906, "loss": 0.0229, "rewards/accuracies": 0.75, "rewards/chosen": -0.18305253982543945, "rewards/margins": 0.08117742836475372, "rewards/rejected": -0.264229953289032, "step": 5030 }, { "epoch": 0.66, "learning_rate": 1.5677442434788143e-06, "logits/chosen": -1.7859392166137695, "logits/rejected": -1.488284945487976, "logps/chosen": -407.81317138671875, "logps/rejected": -449.358154296875, "loss": 0.0242, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.16020144522190094, "rewards/margins": 0.08070135116577148, "rewards/rejected": -0.24090275168418884, "step": 5040 }, { "epoch": 0.66, "learning_rate": 1.5571571215894181e-06, "logits/chosen": -1.7393693923950195, "logits/rejected": -1.6400400400161743, "logps/chosen": -395.0425720214844, "logps/rejected": -480.1275329589844, "loss": 0.0173, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.1880541741847992, "rewards/margins": 0.057797182351350784, "rewards/rejected": -0.24585136771202087, "step": 5050 }, { "epoch": 0.66, "learning_rate": 1.5465896758564452e-06, "logits/chosen": -1.8525689840316772, "logits/rejected": -1.6848907470703125, "logps/chosen": -430.5870666503906, "logps/rejected": -511.40728759765625, "loss": 0.0364, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.1730785071849823, "rewards/margins": 0.07160644978284836, "rewards/rejected": -0.24468496441841125, "step": 5060 }, { "epoch": 0.66, "learning_rate": 1.5360421268115653e-06, "logits/chosen": -1.8298113346099854, "logits/rejected": -1.4987479448318481, "logps/chosen": -458.18658447265625, "logps/rejected": -492.65789794921875, "loss": 0.0301, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.19494257867336273, "rewards/margins": 0.09265542030334473, "rewards/rejected": -0.28759801387786865, "step": 5070 }, { "epoch": 0.66, "learning_rate": 1.5255146945712267e-06, "logits/chosen": -1.5927072763442993, "logits/rejected": -1.3620411157608032, "logps/chosen": -451.4266662597656, "logps/rejected": -485.79571533203125, "loss": 0.0289, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.19076094031333923, "rewards/margins": 0.09270092099905014, "rewards/rejected": -0.28346189856529236, "step": 5080 }, { "epoch": 0.67, "learning_rate": 1.5150075988320594e-06, "logits/chosen": -1.6711536645889282, "logits/rejected": -1.5017945766448975, "logps/chosen": -375.4448547363281, "logps/rejected": -468.86480712890625, "loss": 0.034, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.1778108775615692, "rewards/margins": 0.09814232587814331, "rewards/rejected": -0.2759532332420349, "step": 5090 }, { "epoch": 0.67, "learning_rate": 1.5045210588662929e-06, "logits/chosen": -1.5476630926132202, "logits/rejected": -1.4029957056045532, "logps/chosen": -377.67327880859375, "logps/rejected": -451.4478454589844, "loss": 0.0246, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.1601109504699707, "rewards/margins": 0.09873531013727188, "rewards/rejected": -0.258846253156662, "step": 5100 }, { "epoch": 0.67, "learning_rate": 1.4940552935171781e-06, "logits/chosen": -1.680654764175415, "logits/rejected": -1.5256980657577515, "logps/chosen": -431.8291015625, "logps/rejected": -547.5233154296875, "loss": 0.0237, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.20115765929222107, "rewards/margins": 0.09922701865434647, "rewards/rejected": -0.3003847002983093, "step": 5110 }, { "epoch": 0.67, "learning_rate": 1.483610521194419e-06, "logits/chosen": -1.598397970199585, "logits/rejected": -1.1381256580352783, "logps/chosen": -504.94744873046875, "logps/rejected": -565.5025024414062, "loss": 0.0293, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.216828852891922, "rewards/margins": 0.14259278774261475, "rewards/rejected": -0.35942161083221436, "step": 5120 }, { "epoch": 0.67, "learning_rate": 1.4731869598696226e-06, "logits/chosen": -1.4274873733520508, "logits/rejected": -1.5127485990524292, "logps/chosen": -427.0562438964844, "logps/rejected": -522.8704833984375, "loss": 0.0259, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.22581632435321808, "rewards/margins": 0.06540516763925552, "rewards/rejected": -0.2912214994430542, "step": 5130 }, { "epoch": 0.67, "learning_rate": 1.4627848270717387e-06, "logits/chosen": -1.633857011795044, "logits/rejected": -1.3590513467788696, "logps/chosen": -425.37115478515625, "logps/rejected": -518.7385864257812, "loss": 0.0256, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.20596785843372345, "rewards/margins": 0.11591050773859024, "rewards/rejected": -0.3218783438205719, "step": 5140 }, { "epoch": 0.67, "learning_rate": 1.4524043398825277e-06, "logits/chosen": -1.4400713443756104, "logits/rejected": -1.1214375495910645, "logps/chosen": -394.29144287109375, "logps/rejected": -386.66168212890625, "loss": 0.0196, "rewards/accuracies": 0.625, "rewards/chosen": -0.14006850123405457, "rewards/margins": 0.0656704306602478, "rewards/rejected": -0.20573893189430237, "step": 5150 }, { "epoch": 0.68, "learning_rate": 1.4420457149320299e-06, "logits/chosen": -1.5667462348937988, "logits/rejected": -1.5604121685028076, "logps/chosen": -375.1114196777344, "logps/rejected": -447.48223876953125, "loss": 0.019, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.1627887487411499, "rewards/margins": 0.049822621047496796, "rewards/rejected": -0.2126113921403885, "step": 5160 }, { "epoch": 0.68, "learning_rate": 1.431709168394042e-06, "logits/chosen": -1.7921355962753296, "logits/rejected": -1.5176485776901245, "logps/chosen": -432.16754150390625, "logps/rejected": -388.26953125, "loss": 0.0245, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.14249520003795624, "rewards/margins": 0.041745856404304504, "rewards/rejected": -0.18424105644226074, "step": 5170 }, { "epoch": 0.68, "learning_rate": 1.4213949159816059e-06, "logits/chosen": -1.621297836303711, "logits/rejected": -1.374531865119934, "logps/chosen": -340.29486083984375, "logps/rejected": -422.07275390625, "loss": 0.0272, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.12620839476585388, "rewards/margins": 0.1090429425239563, "rewards/rejected": -0.2352513074874878, "step": 5180 }, { "epoch": 0.68, "learning_rate": 1.4111031729425103e-06, "logits/chosen": -1.6758848428726196, "logits/rejected": -1.4807888269424438, "logps/chosen": -440.89263916015625, "logps/rejected": -510.248046875, "loss": 0.0202, "rewards/accuracies": 0.75, "rewards/chosen": -0.17388439178466797, "rewards/margins": 0.11208424717187881, "rewards/rejected": -0.2859686017036438, "step": 5190 }, { "epoch": 0.68, "learning_rate": 1.4008341540547965e-06, "logits/chosen": -1.525407075881958, "logits/rejected": -1.5021107196807861, "logps/chosen": -427.673828125, "logps/rejected": -486.7311096191406, "loss": 0.023, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.19444535672664642, "rewards/margins": 0.05377691239118576, "rewards/rejected": -0.24822227656841278, "step": 5200 }, { "epoch": 0.68, "learning_rate": 1.3905880736222737e-06, "logits/chosen": -1.5743075609207153, "logits/rejected": -1.6213937997817993, "logps/chosen": -369.836181640625, "logps/rejected": -453.7099609375, "loss": 0.0361, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.18054161965847015, "rewards/margins": 0.06503504514694214, "rewards/rejected": -0.2455766499042511, "step": 5210 }, { "epoch": 0.68, "learning_rate": 1.3803651454700531e-06, "logits/chosen": -1.5111993551254272, "logits/rejected": -1.3026028871536255, "logps/chosen": -458.8428649902344, "logps/rejected": -492.923095703125, "loss": 0.0259, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.20958459377288818, "rewards/margins": 0.0841468870639801, "rewards/rejected": -0.2937314510345459, "step": 5220 }, { "epoch": 0.68, "learning_rate": 1.3701655829400773e-06, "logits/chosen": -1.6716458797454834, "logits/rejected": -1.4183886051177979, "logps/chosen": -525.5261840820312, "logps/rejected": -505.20965576171875, "loss": 0.022, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.21298542618751526, "rewards/margins": 0.0640968531370163, "rewards/rejected": -0.27708229422569275, "step": 5230 }, { "epoch": 0.69, "learning_rate": 1.3599895988866756e-06, "logits/chosen": -1.4628162384033203, "logits/rejected": -1.3058449029922485, "logps/chosen": -388.9469909667969, "logps/rejected": -414.37957763671875, "loss": 0.0168, "rewards/accuracies": 0.5, "rewards/chosen": -0.18021324276924133, "rewards/margins": 0.039642706513404846, "rewards/rejected": -0.21985594928264618, "step": 5240 }, { "epoch": 0.69, "learning_rate": 1.3498374056721198e-06, "logits/chosen": -1.696189522743225, "logits/rejected": -1.6554816961288452, "logps/chosen": -349.71197509765625, "logps/rejected": -488.245361328125, "loss": 0.0165, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.16485311090946198, "rewards/margins": 0.09726069867610931, "rewards/rejected": -0.2621137797832489, "step": 5250 }, { "epoch": 0.69, "learning_rate": 1.3397092151621883e-06, "logits/chosen": -1.6548722982406616, "logits/rejected": -1.3580074310302734, "logps/chosen": -429.25, "logps/rejected": -467.40155029296875, "loss": 0.0365, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2029763013124466, "rewards/margins": 0.08197561651468277, "rewards/rejected": -0.28495192527770996, "step": 5260 }, { "epoch": 0.69, "learning_rate": 1.3296052387217484e-06, "logits/chosen": -1.462677240371704, "logits/rejected": -1.449013352394104, "logps/chosen": -362.41571044921875, "logps/rejected": -415.59051513671875, "loss": 0.0155, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.1855693757534027, "rewards/margins": 0.08781188726425171, "rewards/rejected": -0.2733812630176544, "step": 5270 }, { "epoch": 0.69, "learning_rate": 1.3195256872103476e-06, "logits/chosen": -1.488548994064331, "logits/rejected": -1.5530840158462524, "logps/chosen": -410.396728515625, "logps/rejected": -510.2646484375, "loss": 0.0188, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.16777709126472473, "rewards/margins": 0.06479448080062866, "rewards/rejected": -0.2325715571641922, "step": 5280 }, { "epoch": 0.69, "learning_rate": 1.3094707709778068e-06, "logits/chosen": -1.3897769451141357, "logits/rejected": -1.3290399312973022, "logps/chosen": -398.8644714355469, "logps/rejected": -455.2787170410156, "loss": 0.0193, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.20112545788288116, "rewards/margins": 0.09017898887395859, "rewards/rejected": -0.29130443930625916, "step": 5290 }, { "epoch": 0.69, "learning_rate": 1.2994406998598364e-06, "logits/chosen": -1.6049206256866455, "logits/rejected": -1.458370566368103, "logps/chosen": -367.20831298828125, "logps/rejected": -526.95166015625, "loss": 0.0272, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2139579802751541, "rewards/margins": 0.16104961931705475, "rewards/rejected": -0.3750075697898865, "step": 5300 }, { "epoch": 0.69, "learning_rate": 1.2894356831736558e-06, "logits/chosen": -1.5055642127990723, "logits/rejected": -1.3894039392471313, "logps/chosen": -393.8443298339844, "logps/rejected": -445.658935546875, "loss": 0.0301, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.19631923735141754, "rewards/margins": 0.08201192319393158, "rewards/rejected": -0.2783311903476715, "step": 5310 }, { "epoch": 0.7, "learning_rate": 1.2794559297136203e-06, "logits/chosen": -1.3348758220672607, "logits/rejected": -1.2198129892349243, "logps/chosen": -498.6011657714844, "logps/rejected": -608.5252075195312, "loss": 0.0198, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.25773555040359497, "rewards/margins": 0.09582929313182831, "rewards/rejected": -0.3535648286342621, "step": 5320 }, { "epoch": 0.7, "learning_rate": 1.2695016477468724e-06, "logits/chosen": -1.562260389328003, "logits/rejected": -1.5694024562835693, "logps/chosen": -445.9269104003906, "logps/rejected": -454.0824279785156, "loss": 0.0368, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.20634634792804718, "rewards/margins": 0.051405757665634155, "rewards/rejected": -0.25775209069252014, "step": 5330 }, { "epoch": 0.7, "learning_rate": 1.2595730450089874e-06, "logits/chosen": -1.8299586772918701, "logits/rejected": -1.5711266994476318, "logps/chosen": -522.7340087890625, "logps/rejected": -592.4933471679688, "loss": 0.0245, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.24600999057292938, "rewards/margins": 0.10581465065479279, "rewards/rejected": -0.35182467103004456, "step": 5340 }, { "epoch": 0.7, "learning_rate": 1.2496703286996433e-06, "logits/chosen": -1.7908226251602173, "logits/rejected": -1.3918564319610596, "logps/chosen": -522.5734252929688, "logps/rejected": -556.00439453125, "loss": 0.015, "rewards/accuracies": 0.75, "rewards/chosen": -0.217800572514534, "rewards/margins": 0.1189211830496788, "rewards/rejected": -0.3367217481136322, "step": 5350 }, { "epoch": 0.7, "learning_rate": 1.2397937054782961e-06, "logits/chosen": -1.5083521604537964, "logits/rejected": -1.432520866394043, "logps/chosen": -523.9688720703125, "logps/rejected": -547.6058959960938, "loss": 0.0206, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.23229670524597168, "rewards/margins": 0.07556265592575073, "rewards/rejected": -0.3078593611717224, "step": 5360 }, { "epoch": 0.7, "learning_rate": 1.2299433814598635e-06, "logits/chosen": -1.6189830303192139, "logits/rejected": -1.5186855792999268, "logps/chosen": -429.11181640625, "logps/rejected": -455.6077575683594, "loss": 0.0412, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.19660572707653046, "rewards/margins": 0.058229874819517136, "rewards/rejected": -0.2548356056213379, "step": 5370 }, { "epoch": 0.7, "learning_rate": 1.2201195622104265e-06, "logits/chosen": -1.6549122333526611, "logits/rejected": -1.5803096294403076, "logps/chosen": -481.8655700683594, "logps/rejected": -576.1634521484375, "loss": 0.0196, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.23913542926311493, "rewards/margins": 0.13378915190696716, "rewards/rejected": -0.3729245662689209, "step": 5380 }, { "epoch": 0.71, "learning_rate": 1.2103224527429417e-06, "logits/chosen": -1.5333130359649658, "logits/rejected": -1.2944916486740112, "logps/chosen": -447.95330810546875, "logps/rejected": -433.03582763671875, "loss": 0.046, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.2052149474620819, "rewards/margins": 0.07002494484186172, "rewards/rejected": -0.27523988485336304, "step": 5390 }, { "epoch": 0.71, "learning_rate": 1.2005522575129559e-06, "logits/chosen": -1.7821900844573975, "logits/rejected": -1.429947018623352, "logps/chosen": -551.6270751953125, "logps/rejected": -515.8822021484375, "loss": 0.0256, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.20926909148693085, "rewards/margins": 0.08802861720323563, "rewards/rejected": -0.2972976863384247, "step": 5400 }, { "epoch": 0.71, "learning_rate": 1.1908091804143469e-06, "logits/chosen": -1.4563171863555908, "logits/rejected": -1.3154704570770264, "logps/chosen": -382.73455810546875, "logps/rejected": -472.9546813964844, "loss": 0.0198, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.20601502060890198, "rewards/margins": 0.0962318703532219, "rewards/rejected": -0.30224689841270447, "step": 5410 }, { "epoch": 0.71, "learning_rate": 1.1810934247750649e-06, "logits/chosen": -1.5964233875274658, "logits/rejected": -1.1891409158706665, "logps/chosen": -484.681640625, "logps/rejected": -531.6754150390625, "loss": 0.0209, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.19162070751190186, "rewards/margins": 0.12779280543327332, "rewards/rejected": -0.3194134831428528, "step": 5420 }, { "epoch": 0.71, "learning_rate": 1.1714051933528881e-06, "logits/chosen": -1.791672945022583, "logits/rejected": -1.4877759218215942, "logps/chosen": -417.502685546875, "logps/rejected": -434.6937561035156, "loss": 0.0175, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.15925352275371552, "rewards/margins": 0.06685891002416611, "rewards/rejected": -0.22611241042613983, "step": 5430 }, { "epoch": 0.71, "learning_rate": 1.161744688331192e-06, "logits/chosen": -1.5497604608535767, "logits/rejected": -1.3743586540222168, "logps/chosen": -411.5484313964844, "logps/rejected": -510.011962890625, "loss": 0.0135, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.19434738159179688, "rewards/margins": 0.06442542374134064, "rewards/rejected": -0.2587727904319763, "step": 5440 }, { "epoch": 0.71, "learning_rate": 1.152112111314733e-06, "logits/chosen": -1.5756428241729736, "logits/rejected": -1.354800820350647, "logps/chosen": -347.12139892578125, "logps/rejected": -366.8716125488281, "loss": 0.0183, "rewards/accuracies": 0.625, "rewards/chosen": -0.15612633526325226, "rewards/margins": 0.054203521460294724, "rewards/rejected": -0.2103298455476761, "step": 5450 }, { "epoch": 0.71, "learning_rate": 1.142507663325439e-06, "logits/chosen": -1.4416675567626953, "logits/rejected": -1.3867011070251465, "logps/chosen": -422.52947998046875, "logps/rejected": -504.47296142578125, "loss": 0.0354, "rewards/accuracies": 0.75, "rewards/chosen": -0.1803435981273651, "rewards/margins": 0.10444513708353043, "rewards/rejected": -0.28478875756263733, "step": 5460 }, { "epoch": 0.72, "learning_rate": 1.132931544798211e-06, "logits/chosen": -1.5146865844726562, "logits/rejected": -1.2480287551879883, "logps/chosen": -467.4181213378906, "logps/rejected": -529.2227783203125, "loss": 0.019, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.20075412094593048, "rewards/margins": 0.106613889336586, "rewards/rejected": -0.3073680102825165, "step": 5470 }, { "epoch": 0.72, "learning_rate": 1.1233839555767482e-06, "logits/chosen": -1.466223955154419, "logits/rejected": -1.2259441614151, "logps/chosen": -426.2166442871094, "logps/rejected": -446.619140625, "loss": 0.0325, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.18078255653381348, "rewards/margins": 0.09439133107662201, "rewards/rejected": -0.2751738727092743, "step": 5480 }, { "epoch": 0.72, "learning_rate": 1.1138650949093668e-06, "logits/chosen": -1.7423839569091797, "logits/rejected": -1.4779974222183228, "logps/chosen": -417.0690002441406, "logps/rejected": -422.07049560546875, "loss": 0.0251, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.20100681483745575, "rewards/margins": 0.053999461233615875, "rewards/rejected": -0.25500625371932983, "step": 5490 }, { "epoch": 0.72, "learning_rate": 1.1043751614448543e-06, "logits/chosen": -1.5876578092575073, "logits/rejected": -1.2621053457260132, "logps/chosen": -394.9156799316406, "logps/rejected": -432.68328857421875, "loss": 0.0269, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.19612181186676025, "rewards/margins": 0.0839340090751648, "rewards/rejected": -0.28005582094192505, "step": 5500 }, { "epoch": 0.72, "learning_rate": 1.0949143532283107e-06, "logits/chosen": -1.6373828649520874, "logits/rejected": -1.4211647510528564, "logps/chosen": -440.989990234375, "logps/rejected": -500.00396728515625, "loss": 0.0249, "rewards/accuracies": 0.625, "rewards/chosen": -0.19455215334892273, "rewards/margins": 0.08070588856935501, "rewards/rejected": -0.27525803446769714, "step": 5510 }, { "epoch": 0.72, "learning_rate": 1.0854828676970275e-06, "logits/chosen": -1.4828943014144897, "logits/rejected": -1.4317944049835205, "logps/chosen": -427.9281311035156, "logps/rejected": -534.10400390625, "loss": 0.0218, "rewards/accuracies": 0.625, "rewards/chosen": -0.221365287899971, "rewards/margins": 0.12023405730724335, "rewards/rejected": -0.34159931540489197, "step": 5520 }, { "epoch": 0.72, "learning_rate": 1.076080901676361e-06, "logits/chosen": -1.632274866104126, "logits/rejected": -1.5328576564788818, "logps/chosen": -380.6434020996094, "logps/rejected": -445.2096252441406, "loss": 0.021, "rewards/accuracies": 0.625, "rewards/chosen": -0.18433251976966858, "rewards/margins": 0.08061722666025162, "rewards/rejected": -0.2649497389793396, "step": 5530 }, { "epoch": 0.72, "learning_rate": 1.0667086513756234e-06, "logits/chosen": -1.6440751552581787, "logits/rejected": -1.3497045040130615, "logps/chosen": -420.45806884765625, "logps/rejected": -460.8369140625, "loss": 0.0245, "rewards/accuracies": 0.75, "rewards/chosen": -0.18254700303077698, "rewards/margins": 0.09349583089351654, "rewards/rejected": -0.2760428190231323, "step": 5540 }, { "epoch": 0.73, "learning_rate": 1.0573663123839912e-06, "logits/chosen": -1.7530996799468994, "logits/rejected": -1.4101498126983643, "logps/chosen": -465.41015625, "logps/rejected": -476.9146423339844, "loss": 0.0158, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.225990891456604, "rewards/margins": 0.08158402889966965, "rewards/rejected": -0.30757492780685425, "step": 5550 }, { "epoch": 0.73, "learning_rate": 1.0480540796664251e-06, "logits/chosen": -1.5448033809661865, "logits/rejected": -1.368115782737732, "logps/chosen": -503.0091247558594, "logps/rejected": -597.8097534179688, "loss": 0.0157, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.25012126564979553, "rewards/margins": 0.08918876945972443, "rewards/rejected": -0.3393099904060364, "step": 5560 }, { "epoch": 0.73, "learning_rate": 1.0387721475595978e-06, "logits/chosen": -1.4915783405303955, "logits/rejected": -1.3935407400131226, "logps/chosen": -372.75262451171875, "logps/rejected": -428.7457580566406, "loss": 0.0417, "rewards/accuracies": 0.625, "rewards/chosen": -0.21398456394672394, "rewards/margins": 0.05937281250953674, "rewards/rejected": -0.2733573615550995, "step": 5570 }, { "epoch": 0.73, "learning_rate": 1.0295207097678378e-06, "logits/chosen": -1.4784311056137085, "logits/rejected": -1.4672610759735107, "logps/chosen": -366.3025207519531, "logps/rejected": -546.9857177734375, "loss": 0.0361, "rewards/accuracies": 0.625, "rewards/chosen": -0.19802804291248322, "rewards/margins": 0.11691056191921234, "rewards/rejected": -0.31493860483169556, "step": 5580 }, { "epoch": 0.73, "learning_rate": 1.0202999593590924e-06, "logits/chosen": -1.523376226425171, "logits/rejected": -1.3750369548797607, "logps/chosen": -420.4945373535156, "logps/rejected": -550.7835693359375, "loss": 0.0296, "rewards/accuracies": 0.75, "rewards/chosen": -0.2096870243549347, "rewards/margins": 0.1410565823316574, "rewards/rejected": -0.3507435917854309, "step": 5590 }, { "epoch": 0.73, "learning_rate": 1.011110088760891e-06, "logits/chosen": -1.4990277290344238, "logits/rejected": -1.411941409111023, "logps/chosen": -419.41851806640625, "logps/rejected": -543.7576904296875, "loss": 0.0296, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.23063719272613525, "rewards/margins": 0.12043901532888412, "rewards/rejected": -0.3510761857032776, "step": 5600 }, { "epoch": 0.73, "learning_rate": 1.0019512897563347e-06, "logits/chosen": -1.6697190999984741, "logits/rejected": -1.3446718454360962, "logps/chosen": -505.65313720703125, "logps/rejected": -560.1145629882812, "loss": 0.0325, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2524965703487396, "rewards/margins": 0.09622282534837723, "rewards/rejected": -0.34871941804885864, "step": 5610 }, { "epoch": 0.74, "learning_rate": 9.928237534800935e-07, "logits/chosen": -1.6215341091156006, "logits/rejected": -1.5561894178390503, "logps/chosen": -405.2314453125, "logps/rejected": -478.72247314453125, "loss": 0.0217, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.20122945308685303, "rewards/margins": 0.09568652510643005, "rewards/rejected": -0.2969159781932831, "step": 5620 }, { "epoch": 0.74, "learning_rate": 9.837276704144174e-07, "logits/chosen": -1.5506407022476196, "logits/rejected": -1.3838660717010498, "logps/chosen": -423.6056213378906, "logps/rejected": -450.9071350097656, "loss": 0.0273, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.2144496887922287, "rewards/margins": 0.07349663227796555, "rewards/rejected": -0.28794634342193604, "step": 5630 }, { "epoch": 0.74, "learning_rate": 9.746632303851569e-07, "logits/chosen": -1.7482116222381592, "logits/rejected": -1.3927713632583618, "logps/chosen": -425.24053955078125, "logps/rejected": -488.3824768066406, "loss": 0.0194, "rewards/accuracies": 0.625, "rewards/chosen": -0.20241156220436096, "rewards/margins": 0.10810136795043945, "rewards/rejected": -0.3105129301548004, "step": 5640 }, { "epoch": 0.74, "learning_rate": 9.65630622557809e-07, "logits/chosen": -1.3741400241851807, "logits/rejected": -1.094609022140503, "logps/chosen": -459.25537109375, "logps/rejected": -542.5413818359375, "loss": 0.0318, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2083849161863327, "rewards/margins": 0.09790316969156265, "rewards/rejected": -0.30628806352615356, "step": 5650 }, { "epoch": 0.74, "learning_rate": 9.56630035433561e-07, "logits/chosen": -1.7187912464141846, "logits/rejected": -1.4406051635742188, "logps/chosen": -451.6206970214844, "logps/rejected": -473.7144470214844, "loss": 0.03, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.1990150511264801, "rewards/margins": 0.08933638036251068, "rewards/rejected": -0.2883513867855072, "step": 5660 }, { "epoch": 0.74, "learning_rate": 9.476616568453659e-07, "logits/chosen": -1.617304801940918, "logits/rejected": -1.2837506532669067, "logps/chosen": -501.55615234375, "logps/rejected": -495.8614196777344, "loss": 0.0241, "rewards/accuracies": 0.625, "rewards/chosen": -0.23391124606132507, "rewards/margins": 0.058008432388305664, "rewards/rejected": -0.29191964864730835, "step": 5670 }, { "epoch": 0.74, "learning_rate": 9.387256739540162e-07, "logits/chosen": -1.5672932863235474, "logits/rejected": -1.3743022680282593, "logps/chosen": -403.78558349609375, "logps/rejected": -425.3617248535156, "loss": 0.025, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.1623322069644928, "rewards/margins": 0.05934957414865494, "rewards/rejected": -0.22168178856372833, "step": 5680 }, { "epoch": 0.74, "learning_rate": 9.298222732442377e-07, "logits/chosen": -1.8293750286102295, "logits/rejected": -1.4852283000946045, "logps/chosen": -422.79888916015625, "logps/rejected": -495.72381591796875, "loss": 0.0247, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.1667879819869995, "rewards/margins": 0.12689343094825745, "rewards/rejected": -0.29368144273757935, "step": 5690 }, { "epoch": 0.75, "learning_rate": 9.20951640520803e-07, "logits/chosen": -1.6043968200683594, "logits/rejected": -1.4415172338485718, "logps/chosen": -422.46337890625, "logps/rejected": -482.3023376464844, "loss": 0.0219, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.20696571469306946, "rewards/margins": 0.08833520114421844, "rewards/rejected": -0.2953009009361267, "step": 5700 }, { "epoch": 0.75, "learning_rate": 9.121139609046484e-07, "logits/chosen": -1.5958956480026245, "logits/rejected": -1.451616883277893, "logps/chosen": -351.499755859375, "logps/rejected": -456.39324951171875, "loss": 0.0204, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.1530417501926422, "rewards/margins": 0.120171919465065, "rewards/rejected": -0.273213654756546, "step": 5710 }, { "epoch": 0.75, "learning_rate": 9.033094188290121e-07, "logits/chosen": -1.4508789777755737, "logits/rejected": -1.4076393842697144, "logps/chosen": -400.70465087890625, "logps/rejected": -447.3128967285156, "loss": 0.0188, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.17952290177345276, "rewards/margins": 0.08391942083835602, "rewards/rejected": -0.2634423077106476, "step": 5720 }, { "epoch": 0.75, "learning_rate": 8.945381980355889e-07, "logits/chosen": -1.697596788406372, "logits/rejected": -1.5488923788070679, "logps/chosen": -357.16522216796875, "logps/rejected": -423.3173828125, "loss": 0.0284, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.1615738719701767, "rewards/margins": 0.06780795753002167, "rewards/rejected": -0.22938182950019836, "step": 5730 }, { "epoch": 0.75, "learning_rate": 8.858004815706919e-07, "logits/chosen": -1.5258272886276245, "logits/rejected": -1.4233239889144897, "logps/chosen": -403.95599365234375, "logps/rejected": -502.761474609375, "loss": 0.0237, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2067147195339203, "rewards/margins": 0.10103969275951385, "rewards/rejected": -0.30775442719459534, "step": 5740 }, { "epoch": 0.75, "learning_rate": 8.77096451781432e-07, "logits/chosen": -1.431740164756775, "logits/rejected": -1.1918470859527588, "logps/chosen": -425.783203125, "logps/rejected": -505.8761291503906, "loss": 0.0361, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.23915116488933563, "rewards/margins": 0.1044241189956665, "rewards/rejected": -0.34357529878616333, "step": 5750 }, { "epoch": 0.75, "learning_rate": 8.684262903119165e-07, "logits/chosen": -1.505095362663269, "logits/rejected": -1.574194312095642, "logps/chosen": -411.2688903808594, "logps/rejected": -521.2644653320312, "loss": 0.0195, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.18665559589862823, "rewards/margins": 0.09668277949094772, "rewards/rejected": -0.28333839774131775, "step": 5760 }, { "epoch": 0.76, "learning_rate": 8.597901780994525e-07, "logits/chosen": -1.814569115638733, "logits/rejected": -1.554092526435852, "logps/chosen": -465.39495849609375, "logps/rejected": -522.8751831054688, "loss": 0.0175, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2231772392988205, "rewards/margins": 0.11268983036279678, "rewards/rejected": -0.33586710691452026, "step": 5770 }, { "epoch": 0.76, "learning_rate": 8.511882953707773e-07, "logits/chosen": -1.669830322265625, "logits/rejected": -1.4222438335418701, "logps/chosen": -502.70806884765625, "logps/rejected": -509.10498046875, "loss": 0.0165, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.22161705791950226, "rewards/margins": 0.05758953094482422, "rewards/rejected": -0.27920660376548767, "step": 5780 }, { "epoch": 0.76, "learning_rate": 8.426208216382944e-07, "logits/chosen": -1.5124791860580444, "logits/rejected": -1.4276105165481567, "logps/chosen": -387.97723388671875, "logps/rejected": -436.249755859375, "loss": 0.0323, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.2093040645122528, "rewards/margins": 0.06647123396396637, "rewards/rejected": -0.27577531337738037, "step": 5790 }, { "epoch": 0.76, "learning_rate": 8.340879356963245e-07, "logits/chosen": -1.4797133207321167, "logits/rejected": -1.360034465789795, "logps/chosen": -422.455078125, "logps/rejected": -495.0018005371094, "loss": 0.0232, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.19471265375614166, "rewards/margins": 0.08114586770534515, "rewards/rejected": -0.2758585512638092, "step": 5800 }, { "epoch": 0.76, "learning_rate": 8.255898156173777e-07, "logits/chosen": -1.696480393409729, "logits/rejected": -1.5143259763717651, "logps/chosen": -396.93707275390625, "logps/rejected": -459.56396484375, "loss": 0.0194, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2079300880432129, "rewards/margins": 0.07973875105381012, "rewards/rejected": -0.2876688838005066, "step": 5810 }, { "epoch": 0.76, "learning_rate": 8.171266387484389e-07, "logits/chosen": -1.74526047706604, "logits/rejected": -1.593643069267273, "logps/chosen": -409.3708190917969, "logps/rejected": -455.1100158691406, "loss": 0.019, "rewards/accuracies": 0.75, "rewards/chosen": -0.17666839063167572, "rewards/margins": 0.09848718345165253, "rewards/rejected": -0.27515554428100586, "step": 5820 }, { "epoch": 0.76, "learning_rate": 8.086985817072604e-07, "logits/chosen": -1.6020183563232422, "logits/rejected": -1.3858546018600464, "logps/chosen": -407.660400390625, "logps/rejected": -462.2608337402344, "loss": 0.0212, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1779371052980423, "rewards/margins": 0.09084297716617584, "rewards/rejected": -0.2687801122665405, "step": 5830 }, { "epoch": 0.76, "learning_rate": 8.003058203786835e-07, "logits/chosen": -1.6491711139678955, "logits/rejected": -1.342855453491211, "logps/chosen": -497.6109313964844, "logps/rejected": -524.0183715820312, "loss": 0.0152, "rewards/accuracies": 0.75, "rewards/chosen": -0.23766927421092987, "rewards/margins": 0.07575170695781708, "rewards/rejected": -0.31342101097106934, "step": 5840 }, { "epoch": 0.77, "learning_rate": 7.91948529910963e-07, "logits/chosen": -1.6227118968963623, "logits/rejected": -1.4864352941513062, "logps/chosen": -431.8373107910156, "logps/rejected": -506.4502868652344, "loss": 0.0212, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.19953256845474243, "rewards/margins": 0.10467538982629776, "rewards/rejected": -0.3042079508304596, "step": 5850 }, { "epoch": 0.77, "learning_rate": 7.836268847121126e-07, "logits/chosen": -1.4191184043884277, "logits/rejected": -1.314880132675171, "logps/chosen": -410.31903076171875, "logps/rejected": -478.3172302246094, "loss": 0.0254, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.18665172159671783, "rewards/margins": 0.06229304522275925, "rewards/rejected": -0.24894475936889648, "step": 5860 }, { "epoch": 0.77, "learning_rate": 7.753410584462681e-07, "logits/chosen": -1.6070563793182373, "logits/rejected": -1.327772855758667, "logps/chosen": -436.433349609375, "logps/rejected": -533.3255615234375, "loss": 0.0309, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.20444254577159882, "rewards/margins": 0.144081711769104, "rewards/rejected": -0.3485243022441864, "step": 5870 }, { "epoch": 0.77, "learning_rate": 7.670912240300596e-07, "logits/chosen": -1.6220155954360962, "logits/rejected": -1.4926598072052002, "logps/chosen": -392.23870849609375, "logps/rejected": -450.91741943359375, "loss": 0.0204, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.17535719275474548, "rewards/margins": 0.08097521215677261, "rewards/rejected": -0.2563324272632599, "step": 5880 }, { "epoch": 0.77, "learning_rate": 7.588775536290035e-07, "logits/chosen": -1.7251522541046143, "logits/rejected": -1.3986337184906006, "logps/chosen": -521.7364501953125, "logps/rejected": -563.6209106445312, "loss": 0.0187, "rewards/accuracies": 0.75, "rewards/chosen": -0.26620036363601685, "rewards/margins": 0.08622214198112488, "rewards/rejected": -0.35242247581481934, "step": 5890 }, { "epoch": 0.77, "learning_rate": 7.507002186539147e-07, "logits/chosen": -1.4981155395507812, "logits/rejected": -1.524047613143921, "logps/chosen": -369.7276306152344, "logps/rejected": -434.8815002441406, "loss": 0.0349, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.18315860629081726, "rewards/margins": 0.05764635652303696, "rewards/rejected": -0.24080495536327362, "step": 5900 }, { "epoch": 0.77, "learning_rate": 7.425593897573216e-07, "logits/chosen": -1.459442377090454, "logits/rejected": -1.323828101158142, "logps/chosen": -468.89044189453125, "logps/rejected": -514.7048950195312, "loss": 0.0299, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.20316457748413086, "rewards/margins": 0.08581452071666718, "rewards/rejected": -0.28897911310195923, "step": 5910 }, { "epoch": 0.77, "learning_rate": 7.344552368299088e-07, "logits/chosen": -1.554771900177002, "logits/rejected": -1.4254438877105713, "logps/chosen": -480.82427978515625, "logps/rejected": -497.3819274902344, "loss": 0.0306, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.21433106064796448, "rewards/margins": 0.033910952508449554, "rewards/rejected": -0.24824202060699463, "step": 5920 }, { "epoch": 0.78, "learning_rate": 7.26387928996973e-07, "logits/chosen": -1.3242512941360474, "logits/rejected": -1.1937670707702637, "logps/chosen": -440.6521911621094, "logps/rejected": -535.5496215820312, "loss": 0.032, "rewards/accuracies": 0.75, "rewards/chosen": -0.21807590126991272, "rewards/margins": 0.13068453967571259, "rewards/rejected": -0.3487604558467865, "step": 5930 }, { "epoch": 0.78, "learning_rate": 7.183576346148899e-07, "logits/chosen": -1.731610894203186, "logits/rejected": -1.6200708150863647, "logps/chosen": -431.1158752441406, "logps/rejected": -541.3234252929688, "loss": 0.0172, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.19807052612304688, "rewards/margins": 0.11446140706539154, "rewards/rejected": -0.31253188848495483, "step": 5940 }, { "epoch": 0.78, "learning_rate": 7.103645212676044e-07, "logits/chosen": -1.4820683002471924, "logits/rejected": -1.403683066368103, "logps/chosen": -437.85186767578125, "logps/rejected": -493.6400451660156, "loss": 0.0328, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2281392514705658, "rewards/margins": 0.09585168957710266, "rewards/rejected": -0.32399097084999084, "step": 5950 }, { "epoch": 0.78, "learning_rate": 7.024087557631318e-07, "logits/chosen": -1.226434350013733, "logits/rejected": -1.1703420877456665, "logps/chosen": -409.635498046875, "logps/rejected": -446.46649169921875, "loss": 0.0246, "rewards/accuracies": 0.5, "rewards/chosen": -0.2522982954978943, "rewards/margins": 0.03306723013520241, "rewards/rejected": -0.2853655517101288, "step": 5960 }, { "epoch": 0.78, "learning_rate": 6.944905041300739e-07, "logits/chosen": -1.5336253643035889, "logits/rejected": -1.2892582416534424, "logps/chosen": -508.7168884277344, "logps/rejected": -546.019287109375, "loss": 0.0253, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.234015554189682, "rewards/margins": 0.10266806930303574, "rewards/rejected": -0.3366836607456207, "step": 5970 }, { "epoch": 0.78, "learning_rate": 6.866099316141606e-07, "logits/chosen": -1.583622932434082, "logits/rejected": -1.4463317394256592, "logps/chosen": -449.3282775878906, "logps/rejected": -516.5638427734375, "loss": 0.0204, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.23067812621593475, "rewards/margins": 0.06670000404119492, "rewards/rejected": -0.2973781228065491, "step": 5980 }, { "epoch": 0.78, "learning_rate": 6.787672026747946e-07, "logits/chosen": -1.2984622716903687, "logits/rejected": -1.5075492858886719, "logps/chosen": -436.43182373046875, "logps/rejected": -516.0398559570312, "loss": 0.0235, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.22933240234851837, "rewards/margins": 0.05532126501202583, "rewards/rejected": -0.2846536338329315, "step": 5990 }, { "epoch": 0.79, "learning_rate": 6.709624809816223e-07, "logits/chosen": -1.6339048147201538, "logits/rejected": -1.5467860698699951, "logps/chosen": -497.2613830566406, "logps/rejected": -573.0549926757812, "loss": 0.0228, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.22815433144569397, "rewards/margins": 0.0838376060128212, "rewards/rejected": -0.31199193000793457, "step": 6000 }, { "epoch": 0.79, "learning_rate": 6.6319592941112e-07, "logits/chosen": -1.5801079273223877, "logits/rejected": -1.3842319250106812, "logps/chosen": -549.0311889648438, "logps/rejected": -553.92919921875, "loss": 0.0259, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.24139074981212616, "rewards/margins": 0.0593680813908577, "rewards/rejected": -0.30075883865356445, "step": 6010 }, { "epoch": 0.79, "learning_rate": 6.554677100431927e-07, "logits/chosen": -1.6321079730987549, "logits/rejected": -1.3281104564666748, "logps/chosen": -444.17315673828125, "logps/rejected": -489.5956115722656, "loss": 0.0236, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.20192117989063263, "rewards/margins": 0.08705952018499374, "rewards/rejected": -0.28898072242736816, "step": 6020 }, { "epoch": 0.79, "learning_rate": 6.4777798415779e-07, "logits/chosen": -1.4961564540863037, "logits/rejected": -1.4459128379821777, "logps/chosen": -424.9244689941406, "logps/rejected": -489.59375, "loss": 0.0188, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2145252227783203, "rewards/margins": 0.0739036574959755, "rewards/rejected": -0.2884288728237152, "step": 6030 }, { "epoch": 0.79, "learning_rate": 6.401269122315451e-07, "logits/chosen": -1.317834734916687, "logits/rejected": -1.0378650426864624, "logps/chosen": -363.69427490234375, "logps/rejected": -459.9253845214844, "loss": 0.0399, "rewards/accuracies": 0.625, "rewards/chosen": -0.20702803134918213, "rewards/margins": 0.11154347658157349, "rewards/rejected": -0.3185714781284332, "step": 6040 }, { "epoch": 0.79, "learning_rate": 6.325146539344196e-07, "logits/chosen": -1.5062226057052612, "logits/rejected": -1.369683861732483, "logps/chosen": -503.24725341796875, "logps/rejected": -545.2264404296875, "loss": 0.0213, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2399510145187378, "rewards/margins": 0.07239175587892532, "rewards/rejected": -0.3123427629470825, "step": 6050 }, { "epoch": 0.79, "learning_rate": 6.249413681263782e-07, "logits/chosen": -1.7546535730361938, "logits/rejected": -1.7641979455947876, "logps/chosen": -422.50714111328125, "logps/rejected": -490.16949462890625, "loss": 0.0251, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2074483335018158, "rewards/margins": 0.07991499453783035, "rewards/rejected": -0.28736335039138794, "step": 6060 }, { "epoch": 0.79, "learning_rate": 6.174072128540686e-07, "logits/chosen": -1.5013649463653564, "logits/rejected": -1.378302812576294, "logps/chosen": -438.6622009277344, "logps/rejected": -487.98590087890625, "loss": 0.0333, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2310091257095337, "rewards/margins": 0.07826980948448181, "rewards/rejected": -0.3092789351940155, "step": 6070 }, { "epoch": 0.8, "learning_rate": 6.099123453475245e-07, "logits/chosen": -1.586003065109253, "logits/rejected": -1.3171608448028564, "logps/chosen": -483.73797607421875, "logps/rejected": -525.4342041015625, "loss": 0.0275, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.24406981468200684, "rewards/margins": 0.09159289300441742, "rewards/rejected": -0.3356626629829407, "step": 6080 }, { "epoch": 0.8, "learning_rate": 6.024569220168836e-07, "logits/chosen": -1.653754472732544, "logits/rejected": -1.596088171005249, "logps/chosen": -432.75506591796875, "logps/rejected": -490.79156494140625, "loss": 0.0139, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.20881927013397217, "rewards/margins": 0.08285851776599884, "rewards/rejected": -0.2916777729988098, "step": 6090 }, { "epoch": 0.8, "learning_rate": 5.950410984491268e-07, "logits/chosen": -1.8049776554107666, "logits/rejected": -1.6777689456939697, "logps/chosen": -529.035888671875, "logps/rejected": -556.14111328125, "loss": 0.0267, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.21315769851207733, "rewards/margins": 0.07590596377849579, "rewards/rejected": -0.2890636622905731, "step": 6100 }, { "epoch": 0.8, "learning_rate": 5.876650294048262e-07, "logits/chosen": -1.376012921333313, "logits/rejected": -1.3910853862762451, "logps/chosen": -446.28985595703125, "logps/rejected": -570.401123046875, "loss": 0.0286, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.21063914895057678, "rewards/margins": 0.11243287473917007, "rewards/rejected": -0.32307201623916626, "step": 6110 }, { "epoch": 0.8, "learning_rate": 5.8032886881492e-07, "logits/chosen": -1.8189210891723633, "logits/rejected": -1.6391546726226807, "logps/chosen": -418.68084716796875, "logps/rejected": -466.744873046875, "loss": 0.023, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.18586881458759308, "rewards/margins": 0.06752636283636093, "rewards/rejected": -0.2533951699733734, "step": 6120 }, { "epoch": 0.8, "learning_rate": 5.730327697774988e-07, "logits/chosen": -1.5780233144760132, "logits/rejected": -1.5865637063980103, "logps/chosen": -437.9036560058594, "logps/rejected": -494.39501953125, "loss": 0.0268, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.20080581307411194, "rewards/margins": 0.06650952249765396, "rewards/rejected": -0.2673153281211853, "step": 6130 }, { "epoch": 0.8, "learning_rate": 5.657768845546068e-07, "logits/chosen": -1.2442691326141357, "logits/rejected": -1.501525640487671, "logps/chosen": -462.56494140625, "logps/rejected": -573.1702880859375, "loss": 0.0177, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.21378342807292938, "rewards/margins": 0.05830472707748413, "rewards/rejected": -0.2720881700515747, "step": 6140 }, { "epoch": 0.8, "learning_rate": 5.585613645690713e-07, "logits/chosen": -1.5363595485687256, "logits/rejected": -1.3994860649108887, "logps/chosen": -445.9325256347656, "logps/rejected": -556.3574829101562, "loss": 0.0247, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.23880818486213684, "rewards/margins": 0.1059606522321701, "rewards/rejected": -0.34476882219314575, "step": 6150 }, { "epoch": 0.81, "learning_rate": 5.513863604013355e-07, "logits/chosen": -1.6287044286727905, "logits/rejected": -1.1260802745819092, "logps/chosen": -432.9693908691406, "logps/rejected": -435.9732971191406, "loss": 0.0187, "rewards/accuracies": 0.625, "rewards/chosen": -0.17502713203430176, "rewards/margins": 0.07979585230350494, "rewards/rejected": -0.2548229694366455, "step": 6160 }, { "epoch": 0.81, "learning_rate": 5.442520217863215e-07, "logits/chosen": -1.768366813659668, "logits/rejected": -1.4605776071548462, "logps/chosen": -414.16473388671875, "logps/rejected": -461.45965576171875, "loss": 0.0124, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.19283291697502136, "rewards/margins": 0.07525144517421722, "rewards/rejected": -0.2680843472480774, "step": 6170 }, { "epoch": 0.81, "learning_rate": 5.371584976103034e-07, "logits/chosen": -1.7616106271743774, "logits/rejected": -1.3792388439178467, "logps/chosen": -463.5787048339844, "logps/rejected": -509.7967834472656, "loss": 0.0191, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2153579294681549, "rewards/margins": 0.08694064617156982, "rewards/rejected": -0.30229857563972473, "step": 6180 }, { "epoch": 0.81, "learning_rate": 5.301059359077987e-07, "logits/chosen": -1.5925132036209106, "logits/rejected": -1.495607614517212, "logps/chosen": -379.4404602050781, "logps/rejected": -434.323486328125, "loss": 0.0307, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.18784675002098083, "rewards/margins": 0.06654896587133408, "rewards/rejected": -0.2543957233428955, "step": 6190 }, { "epoch": 0.81, "learning_rate": 5.230944838584806e-07, "logits/chosen": -1.6353609561920166, "logits/rejected": -1.5216481685638428, "logps/chosen": -482.3740234375, "logps/rejected": -572.2147216796875, "loss": 0.0193, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.217765212059021, "rewards/margins": 0.12146315723657608, "rewards/rejected": -0.3392283320426941, "step": 6200 }, { "epoch": 0.81, "learning_rate": 5.161242877841083e-07, "logits/chosen": -1.6601533889770508, "logits/rejected": -1.3861908912658691, "logps/chosen": -473.46685791015625, "logps/rejected": -509.176025390625, "loss": 0.0211, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2064732313156128, "rewards/margins": 0.07329900562763214, "rewards/rejected": -0.27977222204208374, "step": 6210 }, { "epoch": 0.81, "learning_rate": 5.091954931454682e-07, "logits/chosen": -1.485788106918335, "logits/rejected": -1.4958043098449707, "logps/chosen": -372.6158142089844, "logps/rejected": -446.5315856933594, "loss": 0.0279, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.1747020035982132, "rewards/margins": 0.07266520708799362, "rewards/rejected": -0.24736721813678741, "step": 6220 }, { "epoch": 0.82, "learning_rate": 5.023082445393446e-07, "logits/chosen": -1.6031299829483032, "logits/rejected": -1.3771857023239136, "logps/chosen": -426.8971252441406, "logps/rejected": -490.07373046875, "loss": 0.033, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.21284392476081848, "rewards/margins": 0.08204887807369232, "rewards/rejected": -0.2948927879333496, "step": 6230 }, { "epoch": 0.82, "learning_rate": 4.95462685695498e-07, "logits/chosen": -1.5788943767547607, "logits/rejected": -1.547645926475525, "logps/chosen": -380.37451171875, "logps/rejected": -482.9818420410156, "loss": 0.033, "rewards/accuracies": 0.625, "rewards/chosen": -0.17835983633995056, "rewards/margins": 0.08886045962572098, "rewards/rejected": -0.26722028851509094, "step": 6240 }, { "epoch": 0.82, "learning_rate": 4.88658959473666e-07, "logits/chosen": -1.4899132251739502, "logits/rejected": -1.4642289876937866, "logps/chosen": -448.7754821777344, "logps/rejected": -518.4555053710938, "loss": 0.0164, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.1989007145166397, "rewards/margins": 0.07370701432228088, "rewards/rejected": -0.272607684135437, "step": 6250 }, { "epoch": 0.82, "learning_rate": 4.818972078605821e-07, "logits/chosen": -1.478088617324829, "logits/rejected": -1.3369762897491455, "logps/chosen": -493.56195068359375, "logps/rejected": -521.9366455078125, "loss": 0.0234, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.24244217574596405, "rewards/margins": 0.06204763054847717, "rewards/rejected": -0.30448979139328003, "step": 6260 }, { "epoch": 0.82, "learning_rate": 4.7517757196701514e-07, "logits/chosen": -1.6222178936004639, "logits/rejected": -1.6087877750396729, "logps/chosen": -391.06536865234375, "logps/rejected": -491.53961181640625, "loss": 0.0192, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.22878026962280273, "rewards/margins": 0.0832093134522438, "rewards/rejected": -0.31198957562446594, "step": 6270 }, { "epoch": 0.82, "learning_rate": 4.6850019202482193e-07, "logits/chosen": -1.5310463905334473, "logits/rejected": -1.6684789657592773, "logps/chosen": -413.8614807128906, "logps/rejected": -568.8370971679688, "loss": 0.0167, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.20737600326538086, "rewards/margins": 0.11540031433105469, "rewards/rejected": -0.32277631759643555, "step": 6280 }, { "epoch": 0.82, "learning_rate": 4.618652073840188e-07, "logits/chosen": -1.7567546367645264, "logits/rejected": -1.4541746377944946, "logps/chosen": -443.84698486328125, "logps/rejected": -487.93255615234375, "loss": 0.0194, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.17903652787208557, "rewards/margins": 0.06564389914274216, "rewards/rejected": -0.24468043446540833, "step": 6290 }, { "epoch": 0.82, "learning_rate": 4.5527275650987965e-07, "logits/chosen": -1.5819406509399414, "logits/rejected": -1.5654951333999634, "logps/chosen": -420.19915771484375, "logps/rejected": -540.7142333984375, "loss": 0.0167, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.19606199860572815, "rewards/margins": 0.10539436340332031, "rewards/rejected": -0.30145636200904846, "step": 6300 }, { "epoch": 0.83, "learning_rate": 4.487229769800394e-07, "logits/chosen": -1.520179271697998, "logits/rejected": -1.4008495807647705, "logps/chosen": -459.998046875, "logps/rejected": -561.9788818359375, "loss": 0.0343, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.22858110070228577, "rewards/margins": 0.10780701786279678, "rewards/rejected": -0.33638817071914673, "step": 6310 }, { "epoch": 0.83, "learning_rate": 4.422160054816285e-07, "logits/chosen": -1.5911762714385986, "logits/rejected": -1.3901503086090088, "logps/chosen": -423.17327880859375, "logps/rejected": -499.7627868652344, "loss": 0.0233, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.18377675116062164, "rewards/margins": 0.10875866562128067, "rewards/rejected": -0.2925353944301605, "step": 6320 }, { "epoch": 0.83, "learning_rate": 4.35751977808416e-07, "logits/chosen": -1.634953260421753, "logits/rejected": -1.3580422401428223, "logps/chosen": -472.6122131347656, "logps/rejected": -592.571533203125, "loss": 0.0157, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.17626197636127472, "rewards/margins": 0.13492093980312347, "rewards/rejected": -0.3111829161643982, "step": 6330 }, { "epoch": 0.83, "learning_rate": 4.293310288579794e-07, "logits/chosen": -1.6954553127288818, "logits/rejected": -1.4459668397903442, "logps/chosen": -377.73321533203125, "logps/rejected": -393.33935546875, "loss": 0.0228, "rewards/accuracies": 0.625, "rewards/chosen": -0.16442880034446716, "rewards/margins": 0.04790250584483147, "rewards/rejected": -0.21233132481575012, "step": 6340 }, { "epoch": 0.83, "learning_rate": 4.2295329262888733e-07, "logits/chosen": -1.386487603187561, "logits/rejected": -1.4061570167541504, "logps/chosen": -367.64630126953125, "logps/rejected": -486.550048828125, "loss": 0.0237, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.17982135713100433, "rewards/margins": 0.08798646926879883, "rewards/rejected": -0.26780781149864197, "step": 6350 }, { "epoch": 0.83, "learning_rate": 4.1661890221790316e-07, "logits/chosen": -1.5857356786727905, "logits/rejected": -1.472039818763733, "logps/chosen": -412.272705078125, "logps/rejected": -433.59014892578125, "loss": 0.0134, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.17613378167152405, "rewards/margins": 0.06455505639314651, "rewards/rejected": -0.24068883061408997, "step": 6360 }, { "epoch": 0.83, "learning_rate": 4.103279898172072e-07, "logits/chosen": -1.6553398370742798, "logits/rejected": -1.4700231552124023, "logps/chosen": -406.8631286621094, "logps/rejected": -437.97943115234375, "loss": 0.0176, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.17570190131664276, "rewards/margins": 0.09338720887899399, "rewards/rejected": -0.26908910274505615, "step": 6370 }, { "epoch": 0.83, "learning_rate": 4.040806867116401e-07, "logits/chosen": -1.8268280029296875, "logits/rejected": -1.6583318710327148, "logps/chosen": -479.39544677734375, "logps/rejected": -525.3782348632812, "loss": 0.0394, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.20694401860237122, "rewards/margins": 0.06560108065605164, "rewards/rejected": -0.27254509925842285, "step": 6380 }, { "epoch": 0.84, "learning_rate": 3.978771232759615e-07, "logits/chosen": -1.6186943054199219, "logits/rejected": -1.7581040859222412, "logps/chosen": -389.00115966796875, "logps/rejected": -422.4768981933594, "loss": 0.0385, "rewards/accuracies": 0.625, "rewards/chosen": -0.196413055062294, "rewards/margins": 0.051200706511735916, "rewards/rejected": -0.247613787651062, "step": 6390 }, { "epoch": 0.84, "learning_rate": 3.917174289721276e-07, "logits/chosen": -1.4655689001083374, "logits/rejected": -1.2271219491958618, "logps/chosen": -406.14678955078125, "logps/rejected": -524.12646484375, "loss": 0.0225, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.18133914470672607, "rewards/margins": 0.14622202515602112, "rewards/rejected": -0.3275611996650696, "step": 6400 }, { "epoch": 0.84, "learning_rate": 3.856017323465938e-07, "logits/chosen": -1.6540682315826416, "logits/rejected": -1.288861632347107, "logps/chosen": -414.47930908203125, "logps/rejected": -440.4425354003906, "loss": 0.0256, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.19330580532550812, "rewards/margins": 0.07383783161640167, "rewards/rejected": -0.2671436369419098, "step": 6410 }, { "epoch": 0.84, "learning_rate": 3.7953016102762695e-07, "logits/chosen": -1.617789626121521, "logits/rejected": -1.2891263961791992, "logps/chosen": -440.29608154296875, "logps/rejected": -442.4244689941406, "loss": 0.0335, "rewards/accuracies": 0.625, "rewards/chosen": -0.17672431468963623, "rewards/margins": 0.08600305020809174, "rewards/rejected": -0.2627273499965668, "step": 6420 }, { "epoch": 0.84, "learning_rate": 3.7350284172264493e-07, "logits/chosen": -1.6066081523895264, "logits/rejected": -1.3126106262207031, "logps/chosen": -422.9794006347656, "logps/rejected": -471.2784729003906, "loss": 0.0225, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.1922755241394043, "rewards/margins": 0.10332103073596954, "rewards/rejected": -0.29559653997421265, "step": 6430 }, { "epoch": 0.84, "learning_rate": 3.67519900215573e-07, "logits/chosen": -1.6866018772125244, "logits/rejected": -1.497786283493042, "logps/chosen": -432.73126220703125, "logps/rejected": -473.71728515625, "loss": 0.0249, "rewards/accuracies": 0.625, "rewards/chosen": -0.19587557017803192, "rewards/margins": 0.08827323466539383, "rewards/rejected": -0.2841488718986511, "step": 6440 }, { "epoch": 0.84, "learning_rate": 3.615814613642174e-07, "logits/chosen": -1.5590837001800537, "logits/rejected": -1.405241847038269, "logps/chosen": -453.08221435546875, "logps/rejected": -543.8556518554688, "loss": 0.0191, "rewards/accuracies": 0.625, "rewards/chosen": -0.2221020758152008, "rewards/margins": 0.0864696279168129, "rewards/rejected": -0.3085716664791107, "step": 6450 }, { "epoch": 0.85, "learning_rate": 3.5568764909765795e-07, "logits/chosen": -1.6449756622314453, "logits/rejected": -1.4965819120407104, "logps/chosen": -443.61346435546875, "logps/rejected": -442.5621032714844, "loss": 0.0181, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.18187734484672546, "rewards/margins": 0.0648706704378128, "rewards/rejected": -0.24674801528453827, "step": 6460 }, { "epoch": 0.85, "learning_rate": 3.498385864136672e-07, "logits/chosen": -1.6285717487335205, "logits/rejected": -1.4089804887771606, "logps/chosen": -402.15045166015625, "logps/rejected": -461.0576171875, "loss": 0.0187, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.16795262694358826, "rewards/margins": 0.08682878315448761, "rewards/rejected": -0.2547813951969147, "step": 6470 }, { "epoch": 0.85, "learning_rate": 3.440343953761363e-07, "logits/chosen": -1.4728256464004517, "logits/rejected": -1.2075586318969727, "logps/chosen": -387.43408203125, "logps/rejected": -426.7442932128906, "loss": 0.0247, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.17750981450080872, "rewards/margins": 0.09418913722038269, "rewards/rejected": -0.2716989517211914, "step": 6480 }, { "epoch": 0.85, "learning_rate": 3.382751971125345e-07, "logits/chosen": -1.6317641735076904, "logits/rejected": -1.608093023300171, "logps/chosen": -419.4491271972656, "logps/rejected": -532.7672729492188, "loss": 0.0142, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.1842859834432602, "rewards/margins": 0.09859196841716766, "rewards/rejected": -0.28287798166275024, "step": 6490 }, { "epoch": 0.85, "learning_rate": 3.3256111181137753e-07, "logits/chosen": -1.5633186101913452, "logits/rejected": -1.536211371421814, "logps/chosen": -362.38861083984375, "logps/rejected": -454.27459716796875, "loss": 0.0185, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.18105900287628174, "rewards/margins": 0.06541544198989868, "rewards/rejected": -0.2464744597673416, "step": 6500 }, { "epoch": 0.85, "learning_rate": 3.2689225871971905e-07, "logits/chosen": -1.4011424779891968, "logits/rejected": -1.3848562240600586, "logps/chosen": -448.1383361816406, "logps/rejected": -477.1025390625, "loss": 0.0248, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.2206324338912964, "rewards/margins": 0.05015076324343681, "rewards/rejected": -0.2707832157611847, "step": 6510 }, { "epoch": 0.85, "learning_rate": 3.2126875614066523e-07, "logits/chosen": -1.6511691808700562, "logits/rejected": -1.4052765369415283, "logps/chosen": -386.79364013671875, "logps/rejected": -483.07855224609375, "loss": 0.0164, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.16033944487571716, "rewards/margins": 0.12655356526374817, "rewards/rejected": -0.2868930399417877, "step": 6520 }, { "epoch": 0.85, "learning_rate": 3.156907214309024e-07, "logits/chosen": -1.4176801443099976, "logits/rejected": -1.4613978862762451, "logps/chosen": -389.1749267578125, "logps/rejected": -454.427001953125, "loss": 0.0328, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.20327451825141907, "rewards/margins": 0.07178395241498947, "rewards/rejected": -0.2750585079193115, "step": 6530 }, { "epoch": 0.86, "learning_rate": 3.1015827099824923e-07, "logits/chosen": -1.6079105138778687, "logits/rejected": -1.4893054962158203, "logps/chosen": -437.54998779296875, "logps/rejected": -476.2743225097656, "loss": 0.0159, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.16807910799980164, "rewards/margins": 0.05767925828695297, "rewards/rejected": -0.2257583886384964, "step": 6540 }, { "epoch": 0.86, "learning_rate": 3.0467152029922926e-07, "logits/chosen": -1.4525275230407715, "logits/rejected": -1.3955967426300049, "logps/chosen": -413.958251953125, "logps/rejected": -460.4651794433594, "loss": 0.0283, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.19624033570289612, "rewards/margins": 0.07971431314945221, "rewards/rejected": -0.2759546637535095, "step": 6550 }, { "epoch": 0.86, "learning_rate": 2.992305838366591e-07, "logits/chosen": -1.7086737155914307, "logits/rejected": -1.487250566482544, "logps/chosen": -346.0322265625, "logps/rejected": -405.02288818359375, "loss": 0.0179, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.18641534447669983, "rewards/margins": 0.06775184720754623, "rewards/rejected": -0.25416722893714905, "step": 6560 }, { "epoch": 0.86, "learning_rate": 2.938355751572583e-07, "logits/chosen": -1.563066840171814, "logits/rejected": -1.3573358058929443, "logps/chosen": -377.8486633300781, "logps/rejected": -448.5704040527344, "loss": 0.0256, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.19250357151031494, "rewards/margins": 0.08367094397544861, "rewards/rejected": -0.27617448568344116, "step": 6570 }, { "epoch": 0.86, "learning_rate": 2.8848660684928307e-07, "logits/chosen": -1.5894094705581665, "logits/rejected": -1.4691380262374878, "logps/chosen": -450.65362548828125, "logps/rejected": -520.8355712890625, "loss": 0.0331, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.24818184971809387, "rewards/margins": 0.04456399381160736, "rewards/rejected": -0.2927458584308624, "step": 6580 }, { "epoch": 0.86, "learning_rate": 2.8318379054017383e-07, "logits/chosen": -1.5910918712615967, "logits/rejected": -1.3723212480545044, "logps/chosen": -463.10626220703125, "logps/rejected": -579.238525390625, "loss": 0.0279, "rewards/accuracies": 0.625, "rewards/chosen": -0.2031777799129486, "rewards/margins": 0.08301732689142227, "rewards/rejected": -0.2861950993537903, "step": 6590 }, { "epoch": 0.86, "learning_rate": 2.779272368942246e-07, "logits/chosen": -1.600366234779358, "logits/rejected": -1.260945200920105, "logps/chosen": -440.1941833496094, "logps/rejected": -498.590576171875, "loss": 0.0229, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.21485555171966553, "rewards/margins": 0.0972893014550209, "rewards/rejected": -0.3121448755264282, "step": 6600 }, { "epoch": 0.86, "learning_rate": 2.7271705561027986e-07, "logits/chosen": -1.7694162130355835, "logits/rejected": -1.7384599447250366, "logps/chosen": -469.52288818359375, "logps/rejected": -558.3441162109375, "loss": 0.0209, "rewards/accuracies": 0.75, "rewards/chosen": -0.19003179669380188, "rewards/margins": 0.09676986187696457, "rewards/rejected": -0.28680163621902466, "step": 6610 }, { "epoch": 0.87, "learning_rate": 2.6755335541943677e-07, "logits/chosen": -1.4450665712356567, "logits/rejected": -1.4400370121002197, "logps/chosen": -432.9559020996094, "logps/rejected": -513.572021484375, "loss": 0.0355, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.22987909615039825, "rewards/margins": 0.0817764475941658, "rewards/rejected": -0.31165555119514465, "step": 6620 }, { "epoch": 0.87, "learning_rate": 2.62436244082781e-07, "logits/chosen": -1.533942461013794, "logits/rejected": -1.4904354810714722, "logps/chosen": -390.4458923339844, "logps/rejected": -407.4669189453125, "loss": 0.021, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.19528919458389282, "rewards/margins": 0.04443785548210144, "rewards/rejected": -0.23972702026367188, "step": 6630 }, { "epoch": 0.87, "learning_rate": 2.5736582838913836e-07, "logits/chosen": -1.6789026260375977, "logits/rejected": -1.5278362035751343, "logps/chosen": -457.1402282714844, "logps/rejected": -579.2009887695312, "loss": 0.0249, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.20158009231090546, "rewards/margins": 0.14046378433704376, "rewards/rejected": -0.3420438766479492, "step": 6640 }, { "epoch": 0.87, "learning_rate": 2.5234221415284363e-07, "logits/chosen": -1.5976431369781494, "logits/rejected": -1.490487813949585, "logps/chosen": -486.6568908691406, "logps/rejected": -523.069091796875, "loss": 0.0235, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.22602316737174988, "rewards/margins": 0.07399384677410126, "rewards/rejected": -0.3000170588493347, "step": 6650 }, { "epoch": 0.87, "learning_rate": 2.4736550621153375e-07, "logits/chosen": -1.599687933921814, "logits/rejected": -1.4537866115570068, "logps/chosen": -510.0804748535156, "logps/rejected": -571.4901123046875, "loss": 0.0309, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.22016914188861847, "rewards/margins": 0.09033988416194916, "rewards/rejected": -0.3105090260505676, "step": 6660 }, { "epoch": 0.87, "learning_rate": 2.424358084239609e-07, "logits/chosen": -1.657153844833374, "logits/rejected": -1.5969769954681396, "logps/chosen": -568.629150390625, "logps/rejected": -559.4827880859375, "loss": 0.0208, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.20487694442272186, "rewards/margins": 0.09001322090625763, "rewards/rejected": -0.29489022493362427, "step": 6670 }, { "epoch": 0.87, "learning_rate": 2.3755322366782158e-07, "logits/chosen": -1.545088768005371, "logits/rejected": -1.4435375928878784, "logps/chosen": -465.56329345703125, "logps/rejected": -519.2607421875, "loss": 0.0189, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.21758349239826202, "rewards/margins": 0.09756701439619064, "rewards/rejected": -0.31515049934387207, "step": 6680 }, { "epoch": 0.88, "learning_rate": 2.3271785383761431e-07, "logits/chosen": -1.6613857746124268, "logits/rejected": -1.3951141834259033, "logps/chosen": -481.0204162597656, "logps/rejected": -505.3641662597656, "loss": 0.0349, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.21575649082660675, "rewards/margins": 0.057221878319978714, "rewards/rejected": -0.2729783356189728, "step": 6690 }, { "epoch": 0.88, "learning_rate": 2.2792979984250978e-07, "logits/chosen": -1.5365406274795532, "logits/rejected": -1.3203694820404053, "logps/chosen": -432.1142578125, "logps/rejected": -446.89166259765625, "loss": 0.0278, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.21476109325885773, "rewards/margins": 0.06200176477432251, "rewards/rejected": -0.27676287293434143, "step": 6700 }, { "epoch": 0.88, "learning_rate": 2.231891616042453e-07, "logits/chosen": -1.4919780492782593, "logits/rejected": -1.5763249397277832, "logps/chosen": -375.32763671875, "logps/rejected": -487.92877197265625, "loss": 0.0309, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.19331511855125427, "rewards/margins": 0.07069431990385056, "rewards/rejected": -0.26400941610336304, "step": 6710 }, { "epoch": 0.88, "learning_rate": 2.1849603805504328e-07, "logits/chosen": -1.490634560585022, "logits/rejected": -1.3624677658081055, "logps/chosen": -448.1121520996094, "logps/rejected": -482.7945251464844, "loss": 0.0164, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.19230246543884277, "rewards/margins": 0.08405870199203491, "rewards/rejected": -0.2763611674308777, "step": 6720 }, { "epoch": 0.88, "learning_rate": 2.1385052713554066e-07, "logits/chosen": -1.6195634603500366, "logits/rejected": -1.5923945903778076, "logps/chosen": -392.4721984863281, "logps/rejected": -516.0805053710938, "loss": 0.0323, "rewards/accuracies": 0.625, "rewards/chosen": -0.22640562057495117, "rewards/margins": 0.09060530364513397, "rewards/rejected": -0.31701093912124634, "step": 6730 }, { "epoch": 0.88, "learning_rate": 2.0925272579274873e-07, "logits/chosen": -1.567509412765503, "logits/rejected": -1.4040048122406006, "logps/chosen": -463.7437438964844, "logps/rejected": -498.69232177734375, "loss": 0.0221, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.19629953801631927, "rewards/margins": 0.04697941988706589, "rewards/rejected": -0.24327898025512695, "step": 6740 }, { "epoch": 0.88, "learning_rate": 2.047027299780302e-07, "logits/chosen": -1.7769540548324585, "logits/rejected": -1.7264270782470703, "logps/chosen": -445.7354431152344, "logps/rejected": -509.4366760253906, "loss": 0.0163, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.1864301860332489, "rewards/margins": 0.07413545250892639, "rewards/rejected": -0.2605656385421753, "step": 6750 }, { "epoch": 0.88, "learning_rate": 2.0020063464509492e-07, "logits/chosen": -1.6714115142822266, "logits/rejected": -1.4131503105163574, "logps/chosen": -438.7445373535156, "logps/rejected": -464.24896240234375, "loss": 0.0182, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.1970597803592682, "rewards/margins": 0.10940859466791153, "rewards/rejected": -0.3064683973789215, "step": 6760 }, { "epoch": 0.89, "learning_rate": 1.957465337480191e-07, "logits/chosen": -1.6414451599121094, "logits/rejected": -1.5069279670715332, "logps/chosen": -452.56182861328125, "logps/rejected": -525.6574096679688, "loss": 0.0256, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.21758043766021729, "rewards/margins": 0.11510293185710907, "rewards/rejected": -0.33268335461616516, "step": 6770 }, { "epoch": 0.89, "learning_rate": 1.9134052023928622e-07, "logits/chosen": -1.4718319177627563, "logits/rejected": -1.4121534824371338, "logps/chosen": -386.81781005859375, "logps/rejected": -468.6263122558594, "loss": 0.0364, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.23575875163078308, "rewards/margins": 0.08290183544158936, "rewards/rejected": -0.3186606168746948, "step": 6780 }, { "epoch": 0.89, "learning_rate": 1.8698268606784392e-07, "logits/chosen": -1.5553295612335205, "logits/rejected": -1.3655126094818115, "logps/chosen": -382.2786865234375, "logps/rejected": -487.64190673828125, "loss": 0.0152, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.18006733059883118, "rewards/margins": 0.09453471750020981, "rewards/rejected": -0.2746020555496216, "step": 6790 }, { "epoch": 0.89, "learning_rate": 1.826731221771866e-07, "logits/chosen": -1.6402775049209595, "logits/rejected": -1.5354700088500977, "logps/chosen": -488.4889221191406, "logps/rejected": -551.7385864257812, "loss": 0.0279, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.23139257729053497, "rewards/margins": 0.09490464627742767, "rewards/rejected": -0.32629722356796265, "step": 6800 }, { "epoch": 0.89, "learning_rate": 1.7841191850345967e-07, "logits/chosen": -1.6131150722503662, "logits/rejected": -1.555279016494751, "logps/chosen": -468.02337646484375, "logps/rejected": -504.90057373046875, "loss": 0.0272, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.21480710804462433, "rewards/margins": 0.065223328769207, "rewards/rejected": -0.28003042936325073, "step": 6810 }, { "epoch": 0.89, "learning_rate": 1.7419916397357905e-07, "logits/chosen": -1.6005849838256836, "logits/rejected": -1.4733082056045532, "logps/chosen": -443.8605041503906, "logps/rejected": -516.1515502929688, "loss": 0.0189, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.24963009357452393, "rewards/margins": 0.06734304130077362, "rewards/rejected": -0.31697314977645874, "step": 6820 }, { "epoch": 0.89, "learning_rate": 1.700349465033782e-07, "logits/chosen": -1.6979738473892212, "logits/rejected": -1.4578403234481812, "logps/chosen": -476.15228271484375, "logps/rejected": -494.53143310546875, "loss": 0.0305, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.22806552052497864, "rewards/margins": 0.0760088786482811, "rewards/rejected": -0.30407437682151794, "step": 6830 }, { "epoch": 0.9, "learning_rate": 1.6591935299577227e-07, "logits/chosen": -1.4906421899795532, "logits/rejected": -1.3485311269760132, "logps/chosen": -521.2755737304688, "logps/rejected": -579.0121459960938, "loss": 0.015, "rewards/accuracies": 0.75, "rewards/chosen": -0.23313705623149872, "rewards/margins": 0.08736772835254669, "rewards/rejected": -0.3205048143863678, "step": 6840 }, { "epoch": 0.9, "learning_rate": 1.6185246933894338e-07, "logits/chosen": -1.469611644744873, "logits/rejected": -1.4063249826431274, "logps/chosen": -421.91156005859375, "logps/rejected": -511.09649658203125, "loss": 0.0277, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.19816365838050842, "rewards/margins": 0.08645348250865936, "rewards/rejected": -0.2846171259880066, "step": 6850 }, { "epoch": 0.9, "learning_rate": 1.5783438040455097e-07, "logits/chosen": -1.4442344903945923, "logits/rejected": -1.5077521800994873, "logps/chosen": -396.4654846191406, "logps/rejected": -461.05987548828125, "loss": 0.0138, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.1838027685880661, "rewards/margins": 0.07608558237552643, "rewards/rejected": -0.25988835096359253, "step": 6860 }, { "epoch": 0.9, "learning_rate": 1.538651700459576e-07, "logits/chosen": -1.653641700744629, "logits/rejected": -1.412660002708435, "logps/chosen": -494.48052978515625, "logps/rejected": -557.4576416015625, "loss": 0.0247, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.23249347507953644, "rewards/margins": 0.11307159811258316, "rewards/rejected": -0.3455651104450226, "step": 6870 }, { "epoch": 0.9, "learning_rate": 1.4994492109648151e-07, "logits/chosen": -1.343875527381897, "logits/rejected": -1.3370107412338257, "logps/chosen": -377.4173889160156, "logps/rejected": -463.6302795410156, "loss": 0.0264, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2058386355638504, "rewards/margins": 0.07137282937765121, "rewards/rejected": -0.277211457490921, "step": 6880 }, { "epoch": 0.9, "learning_rate": 1.4607371536766695e-07, "logits/chosen": -1.5182969570159912, "logits/rejected": -1.6659351587295532, "logps/chosen": -428.3271484375, "logps/rejected": -498.54742431640625, "loss": 0.0131, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.238141730427742, "rewards/margins": 0.04850930720567703, "rewards/rejected": -0.28665101528167725, "step": 6890 }, { "epoch": 0.9, "learning_rate": 1.4225163364757655e-07, "logits/chosen": -1.699549913406372, "logits/rejected": -1.3660337924957275, "logps/chosen": -476.3060607910156, "logps/rejected": -480.1463317871094, "loss": 0.0284, "rewards/accuracies": 0.625, "rewards/chosen": -0.19354431331157684, "rewards/margins": 0.07588140666484833, "rewards/rejected": -0.2694256901741028, "step": 6900 }, { "epoch": 0.9, "learning_rate": 1.3847875569910462e-07, "logits/chosen": -1.5795233249664307, "logits/rejected": -1.5374621152877808, "logps/chosen": -381.10736083984375, "logps/rejected": -439.749267578125, "loss": 0.0169, "rewards/accuracies": 0.625, "rewards/chosen": -0.20478907227516174, "rewards/margins": 0.06565740704536438, "rewards/rejected": -0.2704464793205261, "step": 6910 }, { "epoch": 0.91, "learning_rate": 1.3475516025831552e-07, "logits/chosen": -1.602575659751892, "logits/rejected": -1.544237732887268, "logps/chosen": -419.572265625, "logps/rejected": -511.387451171875, "loss": 0.0191, "rewards/accuracies": 0.625, "rewards/chosen": -0.22955676913261414, "rewards/margins": 0.07430396229028702, "rewards/rejected": -0.30386072397232056, "step": 6920 }, { "epoch": 0.91, "learning_rate": 1.310809250327974e-07, "logits/chosen": -1.6102867126464844, "logits/rejected": -1.5081886053085327, "logps/chosen": -413.7611389160156, "logps/rejected": -500.97564697265625, "loss": 0.014, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2218031883239746, "rewards/margins": 0.09341904520988464, "rewards/rejected": -0.31522220373153687, "step": 6930 }, { "epoch": 0.91, "learning_rate": 1.2745612670004153e-07, "logits/chosen": -1.676377296447754, "logits/rejected": -1.4056346416473389, "logps/chosen": -471.49420166015625, "logps/rejected": -564.6023559570312, "loss": 0.0312, "rewards/accuracies": 0.625, "rewards/chosen": -0.2545742988586426, "rewards/margins": 0.10350040346384048, "rewards/rejected": -0.3580746650695801, "step": 6940 }, { "epoch": 0.91, "learning_rate": 1.2388084090584395e-07, "logits/chosen": -1.647878646850586, "logits/rejected": -1.442943811416626, "logps/chosen": -424.6617736816406, "logps/rejected": -457.922119140625, "loss": 0.0298, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.21583659946918488, "rewards/margins": 0.07665933668613434, "rewards/rejected": -0.2924959361553192, "step": 6950 }, { "epoch": 0.91, "learning_rate": 1.2035514226272305e-07, "logits/chosen": -1.4394546747207642, "logits/rejected": -1.2975504398345947, "logps/chosen": -486.21923828125, "logps/rejected": -533.73486328125, "loss": 0.0277, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2445685863494873, "rewards/margins": 0.0704784169793129, "rewards/rejected": -0.315047025680542, "step": 6960 }, { "epoch": 0.91, "learning_rate": 1.1687910434836607e-07, "logits/chosen": -1.247804880142212, "logits/rejected": -1.3159127235412598, "logps/chosen": -384.3349609375, "logps/rejected": -531.3505859375, "loss": 0.0365, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.19616396725177765, "rewards/margins": 0.1056792140007019, "rewards/rejected": -0.30184316635131836, "step": 6970 }, { "epoch": 0.91, "learning_rate": 1.1345279970409128e-07, "logits/chosen": -1.5758659839630127, "logits/rejected": -1.5954402685165405, "logps/chosen": -456.5048828125, "logps/rejected": -535.1149291992188, "loss": 0.0277, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2219972163438797, "rewards/margins": 0.04516248777508736, "rewards/rejected": -0.26715970039367676, "step": 6980 }, { "epoch": 0.91, "learning_rate": 1.1007629983333629e-07, "logits/chosen": -1.4976396560668945, "logits/rejected": -1.319213628768921, "logps/chosen": -416.6646423339844, "logps/rejected": -548.4486083984375, "loss": 0.0279, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.20675568282604218, "rewards/margins": 0.1446656733751297, "rewards/rejected": -0.35142138600349426, "step": 6990 }, { "epoch": 0.92, "learning_rate": 1.067496752001626e-07, "logits/chosen": -1.5586458444595337, "logits/rejected": -1.448335886001587, "logps/chosen": -362.5075378417969, "logps/rejected": -442.5972595214844, "loss": 0.0275, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.19385865330696106, "rewards/margins": 0.07538175582885742, "rewards/rejected": -0.2692403793334961, "step": 7000 }, { "epoch": 0.92, "learning_rate": 1.0347299522778909e-07, "logits/chosen": -1.6143827438354492, "logits/rejected": -1.6086311340332031, "logps/chosen": -423.903076171875, "logps/rejected": -492.069091796875, "loss": 0.0321, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.21144139766693115, "rewards/margins": 0.05724687501788139, "rewards/rejected": -0.26868826150894165, "step": 7010 }, { "epoch": 0.92, "learning_rate": 1.0024632829713971e-07, "logits/chosen": -1.5233447551727295, "logits/rejected": -1.5699418783187866, "logps/chosen": -413.32830810546875, "logps/rejected": -533.0677490234375, "loss": 0.0224, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.19785210490226746, "rewards/margins": 0.0935153067111969, "rewards/rejected": -0.29136738181114197, "step": 7020 }, { "epoch": 0.92, "learning_rate": 9.706974174541889e-08, "logits/chosen": -1.6802995204925537, "logits/rejected": -1.4303064346313477, "logps/chosen": -428.618896484375, "logps/rejected": -478.03265380859375, "loss": 0.0204, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.20262300968170166, "rewards/margins": 0.07504673302173615, "rewards/rejected": -0.2776697278022766, "step": 7030 }, { "epoch": 0.92, "learning_rate": 9.39433018647043e-08, "logits/chosen": -1.6169646978378296, "logits/rejected": -1.3585525751113892, "logps/chosen": -412.26873779296875, "logps/rejected": -450.7110900878906, "loss": 0.0214, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.21610252559185028, "rewards/margins": 0.05788475275039673, "rewards/rejected": -0.2739872634410858, "step": 7040 }, { "epoch": 0.92, "learning_rate": 9.086707390056543e-08, "logits/chosen": -1.4815367460250854, "logits/rejected": -1.6197248697280884, "logps/chosen": -449.9187927246094, "logps/rejected": -502.67364501953125, "loss": 0.0248, "rewards/accuracies": 0.625, "rewards/chosen": -0.2156413346529007, "rewards/margins": 0.04413525387644768, "rewards/rejected": -0.25977659225463867, "step": 7050 }, { "epoch": 0.92, "learning_rate": 8.784112205070083e-08, "logits/chosen": -1.6900125741958618, "logits/rejected": -1.6549876928329468, "logps/chosen": -351.97808837890625, "logps/rejected": -422.29638671875, "loss": 0.0208, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.16725996136665344, "rewards/margins": 0.0809854120016098, "rewards/rejected": -0.24824540317058563, "step": 7060 }, { "epoch": 0.93, "learning_rate": 8.486550946359779e-08, "logits/chosen": -1.7063417434692383, "logits/rejected": -1.6383116245269775, "logps/chosen": -350.2059326171875, "logps/rejected": -401.1691589355469, "loss": 0.0219, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.16188636422157288, "rewards/margins": 0.04841512441635132, "rewards/rejected": -0.21030151844024658, "step": 7070 }, { "epoch": 0.93, "learning_rate": 8.194029823721556e-08, "logits/chosen": -1.625353217124939, "logits/rejected": -1.5545185804367065, "logps/chosen": -335.5082092285156, "logps/rejected": -378.410400390625, "loss": 0.03, "rewards/accuracies": 0.42500001192092896, "rewards/chosen": -0.18089623749256134, "rewards/margins": 0.05593543127179146, "rewards/rejected": -0.2368316650390625, "step": 7080 }, { "epoch": 0.93, "learning_rate": 7.906554941768896e-08, "logits/chosen": -1.2641688585281372, "logits/rejected": -1.3844702243804932, "logps/chosen": -420.99462890625, "logps/rejected": -521.883056640625, "loss": 0.0258, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.20448222756385803, "rewards/margins": 0.06919074803590775, "rewards/rejected": -0.27367299795150757, "step": 7090 }, { "epoch": 0.93, "learning_rate": 7.624132299805575e-08, "logits/chosen": -1.6793031692504883, "logits/rejected": -1.448111891746521, "logps/chosen": -469.1708984375, "logps/rejected": -599.2205810546875, "loss": 0.0217, "rewards/accuracies": 0.75, "rewards/chosen": -0.2051534652709961, "rewards/margins": 0.14806172251701355, "rewards/rejected": -0.35321518778800964, "step": 7100 }, { "epoch": 0.93, "learning_rate": 7.346767791700127e-08, "logits/chosen": -1.469233512878418, "logits/rejected": -1.4481909275054932, "logps/chosen": -398.59698486328125, "logps/rejected": -483.1396484375, "loss": 0.0314, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.21537312865257263, "rewards/margins": 0.09009166061878204, "rewards/rejected": -0.30546480417251587, "step": 7110 }, { "epoch": 0.93, "learning_rate": 7.07446720576327e-08, "logits/chosen": -1.6200309991836548, "logits/rejected": -1.4754276275634766, "logps/chosen": -487.5052795410156, "logps/rejected": -557.2413330078125, "loss": 0.0261, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.22672314941883087, "rewards/margins": 0.09737074375152588, "rewards/rejected": -0.32409390807151794, "step": 7120 }, { "epoch": 0.93, "learning_rate": 6.807236224626701e-08, "logits/chosen": -1.3291611671447754, "logits/rejected": -1.3264832496643066, "logps/chosen": -414.73614501953125, "logps/rejected": -470.7920837402344, "loss": 0.024, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.22658607363700867, "rewards/margins": 0.05380987375974655, "rewards/rejected": -0.2803959548473358, "step": 7130 }, { "epoch": 0.93, "learning_rate": 6.545080425124888e-08, "logits/chosen": -1.567453384399414, "logits/rejected": -1.3527438640594482, "logps/chosen": -431.8515625, "logps/rejected": -490.6085510253906, "loss": 0.0325, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2253638207912445, "rewards/margins": 0.09049206227064133, "rewards/rejected": -0.3158559203147888, "step": 7140 }, { "epoch": 0.94, "learning_rate": 6.288005278178382e-08, "logits/chosen": -1.6186761856079102, "logits/rejected": -1.4696338176727295, "logps/chosen": -378.1620788574219, "logps/rejected": -439.23944091796875, "loss": 0.0243, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.16763922572135925, "rewards/margins": 0.08176124095916748, "rewards/rejected": -0.24940045177936554, "step": 7150 }, { "epoch": 0.94, "learning_rate": 6.036016148679825e-08, "logits/chosen": -1.5543218851089478, "logits/rejected": -1.387766718864441, "logps/chosen": -433.41400146484375, "logps/rejected": -518.6406860351562, "loss": 0.0288, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.21379122138023376, "rewards/margins": 0.09945134073495865, "rewards/rejected": -0.3132425844669342, "step": 7160 }, { "epoch": 0.94, "learning_rate": 5.7891182953819235e-08, "logits/chosen": -1.4900459051132202, "logits/rejected": -1.3624264001846313, "logps/chosen": -448.15716552734375, "logps/rejected": -467.8421936035156, "loss": 0.0206, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.22588130831718445, "rewards/margins": 0.050553254783153534, "rewards/rejected": -0.27643460035324097, "step": 7170 }, { "epoch": 0.94, "learning_rate": 5.547316870787689e-08, "logits/chosen": -1.6980407238006592, "logits/rejected": -1.614473581314087, "logps/chosen": -476.68511962890625, "logps/rejected": -538.3526611328125, "loss": 0.0281, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.22638268768787384, "rewards/margins": 0.0753302201628685, "rewards/rejected": -0.30171290040016174, "step": 7180 }, { "epoch": 0.94, "learning_rate": 5.310616921042927e-08, "logits/chosen": -1.5047115087509155, "logits/rejected": -1.2674446105957031, "logps/chosen": -411.44085693359375, "logps/rejected": -429.0818786621094, "loss": 0.0242, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.19907724857330322, "rewards/margins": 0.0715177059173584, "rewards/rejected": -0.27059492468833923, "step": 7190 }, { "epoch": 0.94, "learning_rate": 5.079023385830939e-08, "logits/chosen": -1.571404218673706, "logits/rejected": -1.4775793552398682, "logps/chosen": -428.352783203125, "logps/rejected": -470.31854248046875, "loss": 0.0202, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.1955043077468872, "rewards/margins": 0.07978837192058563, "rewards/rejected": -0.27529269456863403, "step": 7200 }, { "epoch": 0.94, "learning_rate": 4.8525410982695476e-08, "logits/chosen": -1.7943906784057617, "logits/rejected": -1.4248347282409668, "logps/chosen": -489.4849548339844, "logps/rejected": -516.9371948242188, "loss": 0.0131, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.22439464926719666, "rewards/margins": 0.07225723564624786, "rewards/rejected": -0.2966518998146057, "step": 7210 }, { "epoch": 0.94, "learning_rate": 4.6311747848099e-08, "logits/chosen": -1.6961714029312134, "logits/rejected": -1.386518120765686, "logps/chosen": -448.45159912109375, "logps/rejected": -472.0159606933594, "loss": 0.0208, "rewards/accuracies": 0.625, "rewards/chosen": -0.19258160889148712, "rewards/margins": 0.055770616978406906, "rewards/rejected": -0.24835219979286194, "step": 7220 }, { "epoch": 0.95, "learning_rate": 4.4149290651382405e-08, "logits/chosen": -1.5325816869735718, "logits/rejected": -1.555955410003662, "logps/chosen": -445.8809509277344, "logps/rejected": -574.5386962890625, "loss": 0.0251, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2440822422504425, "rewards/margins": 0.09757833182811737, "rewards/rejected": -0.3416605591773987, "step": 7230 }, { "epoch": 0.95, "learning_rate": 4.203808452079211e-08, "logits/chosen": -1.6468013525009155, "logits/rejected": -1.599022626876831, "logps/chosen": -376.5355529785156, "logps/rejected": -439.79803466796875, "loss": 0.0259, "rewards/accuracies": 0.625, "rewards/chosen": -0.210208460688591, "rewards/margins": 0.05704888701438904, "rewards/rejected": -0.26725736260414124, "step": 7240 }, { "epoch": 0.95, "learning_rate": 3.9978173515018427e-08, "logits/chosen": -1.6024280786514282, "logits/rejected": -1.6201448440551758, "logps/chosen": -396.4577331542969, "logps/rejected": -474.0391540527344, "loss": 0.0224, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.20827671885490417, "rewards/margins": 0.06074458360671997, "rewards/rejected": -0.26902130246162415, "step": 7250 }, { "epoch": 0.95, "learning_rate": 3.7969600622274614e-08, "logits/chosen": -1.4462767839431763, "logits/rejected": -0.992064356803894, "logps/chosen": -533.9465942382812, "logps/rejected": -521.5758056640625, "loss": 0.0328, "rewards/accuracies": 0.75, "rewards/chosen": -0.24340076744556427, "rewards/margins": 0.08966024219989777, "rewards/rejected": -0.33306097984313965, "step": 7260 }, { "epoch": 0.95, "learning_rate": 3.601240775940151e-08, "logits/chosen": -1.681206464767456, "logits/rejected": -1.5457440614700317, "logps/chosen": -512.3326416015625, "logps/rejected": -511.33551025390625, "loss": 0.0199, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.19923128187656403, "rewards/margins": 0.04757522791624069, "rewards/rejected": -0.2468065321445465, "step": 7270 }, { "epoch": 0.95, "learning_rate": 3.410663577099071e-08, "logits/chosen": -1.5055510997772217, "logits/rejected": -1.2968648672103882, "logps/chosen": -445.6414489746094, "logps/rejected": -499.5888671875, "loss": 0.0204, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.20774845778942108, "rewards/margins": 0.08748999238014221, "rewards/rejected": -0.2952384352684021, "step": 7280 }, { "epoch": 0.95, "learning_rate": 3.2252324428534986e-08, "logits/chosen": -1.642435073852539, "logits/rejected": -1.3985035419464111, "logps/chosen": -430.88116455078125, "logps/rejected": -523.8838500976562, "loss": 0.0211, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2069445550441742, "rewards/margins": 0.13603080809116364, "rewards/rejected": -0.34297531843185425, "step": 7290 }, { "epoch": 0.96, "learning_rate": 3.0449512429594486e-08, "logits/chosen": -1.618054986000061, "logits/rejected": -1.4266889095306396, "logps/chosen": -470.6953125, "logps/rejected": -619.3988037109375, "loss": 0.0358, "rewards/accuracies": 0.75, "rewards/chosen": -0.20840811729431152, "rewards/margins": 0.14416466653347015, "rewards/rejected": -0.3525727689266205, "step": 7300 }, { "epoch": 0.96, "learning_rate": 2.8698237396992956e-08, "logits/chosen": -1.5598580837249756, "logits/rejected": -1.4614779949188232, "logps/chosen": -452.614013671875, "logps/rejected": -567.0684204101562, "loss": 0.0177, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.22577671706676483, "rewards/margins": 0.13550271093845367, "rewards/rejected": -0.3612794876098633, "step": 7310 }, { "epoch": 0.96, "learning_rate": 2.6998535878030584e-08, "logits/chosen": -1.6863279342651367, "logits/rejected": -1.4476655721664429, "logps/chosen": -444.57281494140625, "logps/rejected": -477.07830810546875, "loss": 0.0283, "rewards/accuracies": 0.625, "rewards/chosen": -0.20643453299999237, "rewards/margins": 0.11491278558969498, "rewards/rejected": -0.32134729623794556, "step": 7320 }, { "epoch": 0.96, "learning_rate": 2.535044334372072e-08, "logits/chosen": -1.5739470720291138, "logits/rejected": -1.5450502634048462, "logps/chosen": -487.28155517578125, "logps/rejected": -542.0820922851562, "loss": 0.0277, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.22768394649028778, "rewards/margins": 0.08231634646654129, "rewards/rejected": -0.3100002706050873, "step": 7330 }, { "epoch": 0.96, "learning_rate": 2.3753994188051853e-08, "logits/chosen": -1.3884375095367432, "logits/rejected": -1.0182054042816162, "logps/chosen": -503.9326171875, "logps/rejected": -588.8248291015625, "loss": 0.0254, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2552822232246399, "rewards/margins": 0.114377960562706, "rewards/rejected": -0.3696601986885071, "step": 7340 }, { "epoch": 0.96, "learning_rate": 2.220922172726764e-08, "logits/chosen": -1.735327959060669, "logits/rejected": -1.519585371017456, "logps/chosen": -440.54803466796875, "logps/rejected": -469.45599365234375, "loss": 0.0218, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.21124538779258728, "rewards/margins": 0.05926515534520149, "rewards/rejected": -0.27051058411598206, "step": 7350 }, { "epoch": 0.96, "learning_rate": 2.071615819917244e-08, "logits/chosen": -1.7590490579605103, "logits/rejected": -1.500055193901062, "logps/chosen": -447.4695739746094, "logps/rejected": -540.8870849609375, "loss": 0.024, "rewards/accuracies": 0.875, "rewards/chosen": -0.19787776470184326, "rewards/margins": 0.11433287709951401, "rewards/rejected": -0.3122106194496155, "step": 7360 }, { "epoch": 0.96, "learning_rate": 1.9274834762459393e-08, "logits/chosen": -1.4206875562667847, "logits/rejected": -1.2328351736068726, "logps/chosen": -481.37506103515625, "logps/rejected": -497.41717529296875, "loss": 0.0259, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.23238250613212585, "rewards/margins": 0.06020934507250786, "rewards/rejected": -0.2925918698310852, "step": 7370 }, { "epoch": 0.97, "learning_rate": 1.7885281496058947e-08, "logits/chosen": -1.5303130149841309, "logits/rejected": -1.5012943744659424, "logps/chosen": -420.4921875, "logps/rejected": -477.54815673828125, "loss": 0.0151, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.18356028199195862, "rewards/margins": 0.08185829222202301, "rewards/rejected": -0.2654185891151428, "step": 7380 }, { "epoch": 0.97, "learning_rate": 1.654752739851134e-08, "logits/chosen": -1.500623106956482, "logits/rejected": -1.5592161417007446, "logps/chosen": -385.9029541015625, "logps/rejected": -537.6184692382812, "loss": 0.0211, "rewards/accuracies": 0.75, "rewards/chosen": -0.2086178958415985, "rewards/margins": 0.11499868333339691, "rewards/rejected": -0.3236165940761566, "step": 7390 }, { "epoch": 0.97, "learning_rate": 1.526160038736235e-08, "logits/chosen": -1.6749951839447021, "logits/rejected": -1.4805552959442139, "logps/chosen": -433.3062438964844, "logps/rejected": -517.5592041015625, "loss": 0.0199, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2026170790195465, "rewards/margins": 0.13305865228176117, "rewards/rejected": -0.3356757164001465, "step": 7400 }, { "epoch": 0.97, "learning_rate": 1.402752729857959e-08, "logits/chosen": -1.5121300220489502, "logits/rejected": -1.529975175857544, "logps/chosen": -430.78106689453125, "logps/rejected": -469.31854248046875, "loss": 0.027, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.22110477089881897, "rewards/margins": 0.037758342921733856, "rewards/rejected": -0.25886309146881104, "step": 7410 }, { "epoch": 0.97, "learning_rate": 1.2845333885992683e-08, "logits/chosen": -1.4580018520355225, "logits/rejected": -1.5121757984161377, "logps/chosen": -416.81170654296875, "logps/rejected": -495.95404052734375, "loss": 0.0262, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.21470603346824646, "rewards/margins": 0.09575139731168747, "rewards/rejected": -0.31045740842819214, "step": 7420 }, { "epoch": 0.97, "learning_rate": 1.171504482075675e-08, "logits/chosen": -1.6380068063735962, "logits/rejected": -1.5240509510040283, "logps/chosen": -446.06378173828125, "logps/rejected": -507.699462890625, "loss": 0.0276, "rewards/accuracies": 0.75, "rewards/chosen": -0.23224465548992157, "rewards/margins": 0.09121891111135483, "rewards/rejected": -0.3234635591506958, "step": 7430 }, { "epoch": 0.97, "learning_rate": 1.0636683690836147e-08, "logits/chosen": -1.5630649328231812, "logits/rejected": -1.3046844005584717, "logps/chosen": -436.377197265625, "logps/rejected": -488.420654296875, "loss": 0.0212, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.21501310169696808, "rewards/margins": 0.08404555171728134, "rewards/rejected": -0.2990586459636688, "step": 7440 }, { "epoch": 0.97, "learning_rate": 9.610273000513203e-09, "logits/chosen": -1.679991364479065, "logits/rejected": -1.5062822103500366, "logps/chosen": -418.0048828125, "logps/rejected": -482.8583068847656, "loss": 0.032, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.22746939957141876, "rewards/margins": 0.04597032070159912, "rewards/rejected": -0.2734397351741791, "step": 7450 }, { "epoch": 0.98, "learning_rate": 8.635834169918312e-09, "logits/chosen": -1.6890833377838135, "logits/rejected": -1.4296929836273193, "logps/chosen": -463.12799072265625, "logps/rejected": -477.7140197753906, "loss": 0.0244, "rewards/accuracies": 0.625, "rewards/chosen": -0.23178067803382874, "rewards/margins": 0.04911506921052933, "rewards/rejected": -0.2808957099914551, "step": 7460 }, { "epoch": 0.98, "learning_rate": 7.713387534582506e-09, "logits/chosen": -1.589603066444397, "logits/rejected": -1.4947658777236938, "logps/chosen": -501.105712890625, "logps/rejected": -561.6420288085938, "loss": 0.0186, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.22854037582874298, "rewards/margins": 0.07598869502544403, "rewards/rejected": -0.304529070854187, "step": 7470 }, { "epoch": 0.98, "learning_rate": 6.84295234501392e-09, "logits/chosen": -1.5068248510360718, "logits/rejected": -1.166831135749817, "logps/chosen": -445.83587646484375, "logps/rejected": -549.9104614257812, "loss": 0.0195, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.2248276174068451, "rewards/margins": 0.15849019587039948, "rewards/rejected": -0.38331782817840576, "step": 7480 }, { "epoch": 0.98, "learning_rate": 6.024546766295325e-09, "logits/chosen": -1.8001673221588135, "logits/rejected": -1.413901925086975, "logps/chosen": -467.92889404296875, "logps/rejected": -552.5183715820312, "loss": 0.0191, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.22095003724098206, "rewards/margins": 0.12218403816223145, "rewards/rejected": -0.3431340754032135, "step": 7490 }, { "epoch": 0.98, "learning_rate": 5.2581878777049895e-09, "logits/chosen": -1.6722362041473389, "logits/rejected": -1.4077531099319458, "logps/chosen": -400.8117980957031, "logps/rejected": -479.1786193847656, "loss": 0.0211, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2000255584716797, "rewards/margins": 0.10312291234731674, "rewards/rejected": -0.30314844846725464, "step": 7500 }, { "epoch": 0.98, "learning_rate": 4.543891672361411e-09, "logits/chosen": -1.7269861698150635, "logits/rejected": -1.66338312625885, "logps/chosen": -412.36102294921875, "logps/rejected": -441.78204345703125, "loss": 0.0261, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.18549641966819763, "rewards/margins": 0.040950436145067215, "rewards/rejected": -0.22644683718681335, "step": 7510 }, { "epoch": 0.98, "learning_rate": 3.881673056887747e-09, "logits/chosen": -1.596295714378357, "logits/rejected": -1.443849802017212, "logps/chosen": -378.5397644042969, "logps/rejected": -499.32489013671875, "loss": 0.0167, "rewards/accuracies": 0.75, "rewards/chosen": -0.20860883593559265, "rewards/margins": 0.13590942323207855, "rewards/rejected": -0.3445182740688324, "step": 7520 }, { "epoch": 0.99, "learning_rate": 3.2715458511023425e-09, "logits/chosen": -1.5938750505447388, "logits/rejected": -1.442068099975586, "logps/chosen": -444.24951171875, "logps/rejected": -574.7880249023438, "loss": 0.0216, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.21984434127807617, "rewards/margins": 0.13241305947303772, "rewards/rejected": -0.3522574007511139, "step": 7530 }, { "epoch": 0.99, "learning_rate": 2.7135227877289617e-09, "logits/chosen": -1.2030110359191895, "logits/rejected": -1.528421401977539, "logps/chosen": -356.8739318847656, "logps/rejected": -478.8106994628906, "loss": 0.0243, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.2077951729297638, "rewards/margins": 0.06836696714162827, "rewards/rejected": -0.27616217732429504, "step": 7540 }, { "epoch": 0.99, "learning_rate": 2.2076155121328326e-09, "logits/chosen": -1.6814384460449219, "logits/rejected": -1.49794602394104, "logps/chosen": -455.85760498046875, "logps/rejected": -572.0209350585938, "loss": 0.0215, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.20171403884887695, "rewards/margins": 0.11354148387908936, "rewards/rejected": -0.3152554929256439, "step": 7550 }, { "epoch": 0.99, "learning_rate": 1.7538345820755641e-09, "logits/chosen": -1.3514631986618042, "logits/rejected": -1.418828010559082, "logps/chosen": -439.3419494628906, "logps/rejected": -534.3173828125, "loss": 0.0318, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.24673500657081604, "rewards/margins": 0.07837876677513123, "rewards/rejected": -0.32511377334594727, "step": 7560 }, { "epoch": 0.99, "learning_rate": 1.3521894674961567e-09, "logits/chosen": -1.5621706247329712, "logits/rejected": -1.437835454940796, "logps/chosen": -406.8969421386719, "logps/rejected": -508.74749755859375, "loss": 0.0209, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.21497616171836853, "rewards/margins": 0.08241794258356094, "rewards/rejected": -0.2973940968513489, "step": 7570 }, { "epoch": 0.99, "learning_rate": 1.0026885503131023e-09, "logits/chosen": -1.4738019704818726, "logits/rejected": -1.5091726779937744, "logps/chosen": -468.597900390625, "logps/rejected": -560.5963134765625, "loss": 0.0261, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2344876229763031, "rewards/margins": 0.10260176658630371, "rewards/rejected": -0.3370893895626068, "step": 7580 }, { "epoch": 0.99, "learning_rate": 7.053391242492491e-10, "logits/chosen": -1.478180170059204, "logits/rejected": -1.4109389781951904, "logps/chosen": -378.4008483886719, "logps/rejected": -479.33203125, "loss": 0.0202, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1765948385000229, "rewards/margins": 0.11604814231395721, "rewards/rejected": -0.2926430106163025, "step": 7590 }, { "epoch": 0.99, "learning_rate": 4.6014739467997725e-10, "logits/chosen": -1.3927793502807617, "logits/rejected": -1.36867356300354, "logps/chosen": -445.24481201171875, "logps/rejected": -572.3045654296875, "loss": 0.0269, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.2252415120601654, "rewards/margins": 0.11578087508678436, "rewards/rejected": -0.34102240204811096, "step": 7600 }, { "epoch": 1.0, "learning_rate": 2.671184785033032e-10, "logits/chosen": -1.7575019598007202, "logits/rejected": -1.6783424615859985, "logps/chosen": -482.564453125, "logps/rejected": -552.7120361328125, "loss": 0.0201, "rewards/accuracies": 0.75, "rewards/chosen": -0.20446915924549103, "rewards/margins": 0.08912034332752228, "rewards/rejected": -0.2935895025730133, "step": 7610 }, { "epoch": 1.0, "learning_rate": 1.2625640403302054e-10, "logits/chosen": -1.4191354513168335, "logits/rejected": -1.4505354166030884, "logps/chosen": -378.90972900390625, "logps/rejected": -489.2767639160156, "loss": 0.0248, "rewards/accuracies": 0.625, "rewards/chosen": -0.22058920562267303, "rewards/margins": 0.07962705194950104, "rewards/rejected": -0.3002162575721741, "step": 7620 }, { "epoch": 1.0, "learning_rate": 3.756411091515588e-11, "logits/chosen": -1.7268844842910767, "logits/rejected": -1.4856466054916382, "logps/chosen": -463.380126953125, "logps/rejected": -495.91876220703125, "loss": 0.0199, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.19431455433368683, "rewards/margins": 0.09503963589668274, "rewards/rejected": -0.2893541753292084, "step": 7630 }, { "epoch": 1.0, "learning_rate": 1.0434500657963143e-12, "logits/chosen": -1.7444950342178345, "logits/rejected": -1.388063669204712, "logps/chosen": -367.97479248046875, "logps/rejected": -386.3829650878906, "loss": 0.0318, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.17011064291000366, "rewards/margins": 0.06572172790765762, "rewards/rejected": -0.2358323633670807, "step": 7640 }, { "epoch": 1.0, "step": 7642, "total_flos": 0.0, "train_loss": 0.0048277108391146795, "train_runtime": 13417.0015, "train_samples_per_second": 4.557, "train_steps_per_second": 0.57 } ], "logging_steps": 10, "max_steps": 7642, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }