{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 200, "global_step": 5000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1e-08, "logits/chosen": -2.275148391723633, "logits/rejected": -2.252962827682495, "logps/chosen": -49.66499710083008, "logps/rejected": -67.88325500488281, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.0, "learning_rate": 1.0000000000000001e-07, "logits/chosen": -2.199190139770508, "logits/rejected": -1.5089201927185059, "logps/chosen": -138.57327270507812, "logps/rejected": -91.51111602783203, "loss": 0.6931, "rewards/accuracies": 0.4166666567325592, "rewards/chosen": 0.0012105697533115745, "rewards/margins": 0.0012005879543721676, "rewards/rejected": 9.981727998820134e-06, "step": 10 }, { "epoch": 0.0, "learning_rate": 2.0000000000000002e-07, "logits/chosen": -2.010512590408325, "logits/rejected": -1.4925806522369385, "logps/chosen": -131.5250244140625, "logps/rejected": -79.00123596191406, "loss": 0.6932, "rewards/accuracies": 0.375, "rewards/chosen": 0.0006381148705258965, "rewards/margins": -0.00036874672514386475, "rewards/rejected": 0.0010068616829812527, "step": 20 }, { "epoch": 0.01, "learning_rate": 3.0000000000000004e-07, "logits/chosen": -2.0532355308532715, "logits/rejected": -1.5410211086273193, "logps/chosen": -103.53959655761719, "logps/rejected": -75.55322265625, "loss": 0.693, "rewards/accuracies": 0.375, "rewards/chosen": 0.0004810505488421768, "rewards/margins": -0.0005204210174269974, "rewards/rejected": 0.001001471420750022, "step": 30 }, { "epoch": 0.01, "learning_rate": 4.0000000000000003e-07, "logits/chosen": -2.1325297355651855, "logits/rejected": -1.8901160955429077, "logps/chosen": -121.27030944824219, "logps/rejected": -96.04686737060547, "loss": 0.693, "rewards/accuracies": 0.2750000059604645, "rewards/chosen": 0.0020412690937519073, "rewards/margins": 0.00025054410798475146, "rewards/rejected": 0.0017907252768054605, "step": 40 }, { "epoch": 0.01, "learning_rate": 5.000000000000001e-07, "logits/chosen": -2.1697657108306885, "logits/rejected": -1.6580966711044312, "logps/chosen": -125.24947357177734, "logps/rejected": -76.5873031616211, "loss": 0.6927, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.005478862207382917, "rewards/margins": 0.004068131558597088, "rewards/rejected": 0.0014107308816164732, "step": 50 }, { "epoch": 0.01, "learning_rate": 6.000000000000001e-07, "logits/chosen": -1.961475133895874, "logits/rejected": -1.5781526565551758, "logps/chosen": -163.24258422851562, "logps/rejected": -116.1397933959961, "loss": 0.6924, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.007361049763858318, "rewards/margins": 0.001786444685421884, "rewards/rejected": 0.005574604496359825, "step": 60 }, { "epoch": 0.01, "learning_rate": 7.000000000000001e-07, "logits/chosen": -2.043919801712036, "logits/rejected": -1.5668914318084717, "logps/chosen": -151.30075073242188, "logps/rejected": -100.92520141601562, "loss": 0.6921, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.016562381759285927, "rewards/margins": 0.010314849205315113, "rewards/rejected": 0.006247533019632101, "step": 70 }, { "epoch": 0.02, "learning_rate": 8.000000000000001e-07, "logits/chosen": -1.8576778173446655, "logits/rejected": -1.4404563903808594, "logps/chosen": -127.70096588134766, "logps/rejected": -84.46105194091797, "loss": 0.6921, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.01676427200436592, "rewards/margins": 0.011969492770731449, "rewards/rejected": 0.0047947801649570465, "step": 80 }, { "epoch": 0.02, "learning_rate": 9.000000000000001e-07, "logits/chosen": -1.9256703853607178, "logits/rejected": -1.5529954433441162, "logps/chosen": -138.75009155273438, "logps/rejected": -99.7838134765625, "loss": 0.6908, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.021214038133621216, "rewards/margins": 0.017048979178071022, "rewards/rejected": 0.004165060818195343, "step": 90 }, { "epoch": 0.02, "learning_rate": 1.0000000000000002e-06, "logits/chosen": -1.8967689275741577, "logits/rejected": -1.6510919332504272, "logps/chosen": -117.10508728027344, "logps/rejected": -79.18618774414062, "loss": 0.6907, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.017415408045053482, "rewards/margins": 0.017400022596120834, "rewards/rejected": 1.5387777239084244e-05, "step": 100 }, { "epoch": 0.02, "learning_rate": 1.1e-06, "logits/chosen": -2.053466320037842, "logits/rejected": -1.4318517446517944, "logps/chosen": -138.87936401367188, "logps/rejected": -122.6020278930664, "loss": 0.6876, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.041586942970752716, "rewards/margins": 0.04805510863661766, "rewards/rejected": -0.006468159146606922, "step": 110 }, { "epoch": 0.02, "learning_rate": 1.2000000000000002e-06, "logits/chosen": -1.9276115894317627, "logits/rejected": -1.5420010089874268, "logps/chosen": -140.89996337890625, "logps/rejected": -105.41324615478516, "loss": 0.6859, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.03002164699137211, "rewards/margins": 0.09724549949169159, "rewards/rejected": -0.06722383201122284, "step": 120 }, { "epoch": 0.03, "learning_rate": 1.3e-06, "logits/chosen": -1.8886677026748657, "logits/rejected": -1.346839189529419, "logps/chosen": -151.53182983398438, "logps/rejected": -103.38607025146484, "loss": 0.6823, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.06169581413269043, "rewards/margins": 0.15404877066612244, "rewards/rejected": -0.21574458479881287, "step": 130 }, { "epoch": 0.03, "learning_rate": 1.4000000000000001e-06, "logits/chosen": -1.9585676193237305, "logits/rejected": -1.4493801593780518, "logps/chosen": -148.07363891601562, "logps/rejected": -135.94021606445312, "loss": 0.6828, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.09941406548023224, "rewards/margins": 0.23751676082611084, "rewards/rejected": -0.3369307518005371, "step": 140 }, { "epoch": 0.03, "learning_rate": 1.5e-06, "logits/chosen": -1.8113046884536743, "logits/rejected": -1.3615756034851074, "logps/chosen": -154.11404418945312, "logps/rejected": -121.53764343261719, "loss": 0.6798, "rewards/accuracies": 0.625, "rewards/chosen": -0.17880429327487946, "rewards/margins": 0.14422708749771118, "rewards/rejected": -0.32303136587142944, "step": 150 }, { "epoch": 0.03, "learning_rate": 1.6000000000000001e-06, "logits/chosen": -2.100853443145752, "logits/rejected": -1.5646215677261353, "logps/chosen": -145.79393005371094, "logps/rejected": -147.7415313720703, "loss": 0.6788, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.0376536026597023, "rewards/margins": 0.151740163564682, "rewards/rejected": -0.1893937885761261, "step": 160 }, { "epoch": 0.03, "learning_rate": 1.7000000000000002e-06, "logits/chosen": -1.9753303527832031, "logits/rejected": -1.4234099388122559, "logps/chosen": -143.33480834960938, "logps/rejected": -128.6998291015625, "loss": 0.6779, "rewards/accuracies": 0.875, "rewards/chosen": -0.2504326105117798, "rewards/margins": 0.20616090297698975, "rewards/rejected": -0.45659351348876953, "step": 170 }, { "epoch": 0.04, "learning_rate": 1.8000000000000001e-06, "logits/chosen": -1.9849954843521118, "logits/rejected": -1.3199915885925293, "logps/chosen": -183.52261352539062, "logps/rejected": -163.77528381347656, "loss": 0.677, "rewards/accuracies": 0.75, "rewards/chosen": -0.28151735663414, "rewards/margins": 0.3017168641090393, "rewards/rejected": -0.5832341909408569, "step": 180 }, { "epoch": 0.04, "learning_rate": 1.9000000000000002e-06, "logits/chosen": -1.9351110458374023, "logits/rejected": -1.253793478012085, "logps/chosen": -170.26963806152344, "logps/rejected": -149.2416229248047, "loss": 0.6788, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.16827769577503204, "rewards/margins": 0.31821906566619873, "rewards/rejected": -0.4864967465400696, "step": 190 }, { "epoch": 0.04, "learning_rate": 2.0000000000000003e-06, "logits/chosen": -1.9278274774551392, "logits/rejected": -1.4175664186477661, "logps/chosen": -178.3792724609375, "logps/rejected": -136.69137573242188, "loss": 0.6748, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2405795156955719, "rewards/margins": 0.19246478378772736, "rewards/rejected": -0.43304434418678284, "step": 200 }, { "epoch": 0.04, "eval_logits/chosen": -1.9624555110931396, "eval_logits/rejected": -1.8000800609588623, "eval_logps/chosen": -299.6654052734375, "eval_logps/rejected": -284.7154541015625, "eval_loss": 0.7006981372833252, "eval_rewards/accuracies": 0.4446107745170593, "eval_rewards/chosen": -0.36754098534584045, "eval_rewards/margins": 0.013853566721081734, "eval_rewards/rejected": -0.38139453530311584, "eval_runtime": 1204.7366, "eval_samples_per_second": 1.66, "eval_steps_per_second": 0.277, "step": 200 }, { "epoch": 0.04, "learning_rate": 2.1000000000000002e-06, "logits/chosen": -2.2367801666259766, "logits/rejected": -1.6915652751922607, "logps/chosen": -167.20713806152344, "logps/rejected": -135.56109619140625, "loss": 0.6792, "rewards/accuracies": 0.625, "rewards/chosen": -0.24828723073005676, "rewards/margins": 0.2101193368434906, "rewards/rejected": -0.458406537771225, "step": 210 }, { "epoch": 0.04, "learning_rate": 2.2e-06, "logits/chosen": -1.9008948802947998, "logits/rejected": -1.3550198078155518, "logps/chosen": -163.49244689941406, "logps/rejected": -133.10394287109375, "loss": 0.6775, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.29116111993789673, "rewards/margins": 0.1812642216682434, "rewards/rejected": -0.47242528200149536, "step": 220 }, { "epoch": 0.05, "learning_rate": 2.3000000000000004e-06, "logits/chosen": -2.1905152797698975, "logits/rejected": -1.642858862876892, "logps/chosen": -149.93695068359375, "logps/rejected": -127.42059326171875, "loss": 0.6756, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.34493881464004517, "rewards/margins": 0.25581878423690796, "rewards/rejected": -0.6007575988769531, "step": 230 }, { "epoch": 0.05, "learning_rate": 2.4000000000000003e-06, "logits/chosen": -1.8767839670181274, "logits/rejected": -1.3880187273025513, "logps/chosen": -158.164306640625, "logps/rejected": -152.3143768310547, "loss": 0.6766, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.21420688927173615, "rewards/margins": 0.2859117388725281, "rewards/rejected": -0.500118613243103, "step": 240 }, { "epoch": 0.05, "learning_rate": 2.5e-06, "logits/chosen": -2.0920891761779785, "logits/rejected": -1.5011050701141357, "logps/chosen": -152.09368896484375, "logps/rejected": -140.8191680908203, "loss": 0.6783, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.13553202152252197, "rewards/margins": 0.20881636440753937, "rewards/rejected": -0.34434837102890015, "step": 250 }, { "epoch": 0.05, "learning_rate": 2.6e-06, "logits/chosen": -1.6099424362182617, "logits/rejected": -1.3793843984603882, "logps/chosen": -161.1884002685547, "logps/rejected": -157.10902404785156, "loss": 0.6802, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.4520888328552246, "rewards/margins": 0.2142581194639206, "rewards/rejected": -0.6663470268249512, "step": 260 }, { "epoch": 0.05, "learning_rate": 2.7000000000000004e-06, "logits/chosen": -2.046205759048462, "logits/rejected": -1.4911781549453735, "logps/chosen": -156.38571166992188, "logps/rejected": -139.35513305664062, "loss": 0.6749, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.27164769172668457, "rewards/margins": 0.267218679189682, "rewards/rejected": -0.538866400718689, "step": 270 }, { "epoch": 0.06, "learning_rate": 2.8000000000000003e-06, "logits/chosen": -2.0110878944396973, "logits/rejected": -1.5444520711898804, "logps/chosen": -177.12501525878906, "logps/rejected": -157.34912109375, "loss": 0.6762, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3508060872554779, "rewards/margins": 0.2382647544145584, "rewards/rejected": -0.5890708565711975, "step": 280 }, { "epoch": 0.06, "learning_rate": 2.9e-06, "logits/chosen": -2.009817361831665, "logits/rejected": -1.6303608417510986, "logps/chosen": -161.31858825683594, "logps/rejected": -151.79132080078125, "loss": 0.6784, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.33385199308395386, "rewards/margins": 0.15410049259662628, "rewards/rejected": -0.48795247077941895, "step": 290 }, { "epoch": 0.06, "learning_rate": 3e-06, "logits/chosen": -2.037912368774414, "logits/rejected": -1.629115343093872, "logps/chosen": -149.25282287597656, "logps/rejected": -136.72433471679688, "loss": 0.6769, "rewards/accuracies": 0.75, "rewards/chosen": -0.12601105868816376, "rewards/margins": 0.19319812953472137, "rewards/rejected": -0.3192092180252075, "step": 300 }, { "epoch": 0.06, "learning_rate": 3.1000000000000004e-06, "logits/chosen": -1.986745834350586, "logits/rejected": -1.5233522653579712, "logps/chosen": -162.3162384033203, "logps/rejected": -148.5954132080078, "loss": 0.672, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2618417739868164, "rewards/margins": 0.2822086811065674, "rewards/rejected": -0.5440504550933838, "step": 310 }, { "epoch": 0.06, "learning_rate": 3.2000000000000003e-06, "logits/chosen": -1.8696820735931396, "logits/rejected": -1.4047298431396484, "logps/chosen": -167.04501342773438, "logps/rejected": -158.54757690429688, "loss": 0.6718, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.4154019355773926, "rewards/margins": 0.2821538746356964, "rewards/rejected": -0.6975558400154114, "step": 320 }, { "epoch": 0.07, "learning_rate": 3.3000000000000006e-06, "logits/chosen": -1.9930431842803955, "logits/rejected": -1.613372802734375, "logps/chosen": -164.47940063476562, "logps/rejected": -129.75711059570312, "loss": 0.6811, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.20519188046455383, "rewards/margins": 0.17603768408298492, "rewards/rejected": -0.38122954964637756, "step": 330 }, { "epoch": 0.07, "learning_rate": 3.4000000000000005e-06, "logits/chosen": -1.9684274196624756, "logits/rejected": -1.6611862182617188, "logps/chosen": -124.29713439941406, "logps/rejected": -142.02206420898438, "loss": 0.6793, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.25787854194641113, "rewards/margins": 0.24758338928222656, "rewards/rejected": -0.5054618716239929, "step": 340 }, { "epoch": 0.07, "learning_rate": 3.5e-06, "logits/chosen": -1.9551589488983154, "logits/rejected": -1.3303110599517822, "logps/chosen": -167.13226318359375, "logps/rejected": -166.62051391601562, "loss": 0.6743, "rewards/accuracies": 0.875, "rewards/chosen": -0.2782246768474579, "rewards/margins": 0.3209804892539978, "rewards/rejected": -0.5992051362991333, "step": 350 }, { "epoch": 0.07, "learning_rate": 3.6000000000000003e-06, "logits/chosen": -1.902215600013733, "logits/rejected": -1.2647427320480347, "logps/chosen": -192.03060913085938, "logps/rejected": -203.42056274414062, "loss": 0.6734, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.4457126259803772, "rewards/margins": 0.2893184721469879, "rewards/rejected": -0.7350310683250427, "step": 360 }, { "epoch": 0.07, "learning_rate": 3.7e-06, "logits/chosen": -1.9445044994354248, "logits/rejected": -1.5625604391098022, "logps/chosen": -173.17498779296875, "logps/rejected": -177.96241760253906, "loss": 0.6771, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.42896080017089844, "rewards/margins": 0.23185841739177704, "rewards/rejected": -0.6608191728591919, "step": 370 }, { "epoch": 0.08, "learning_rate": 3.8000000000000005e-06, "logits/chosen": -1.9811357259750366, "logits/rejected": -1.4119899272918701, "logps/chosen": -188.53985595703125, "logps/rejected": -161.04202270507812, "loss": 0.6766, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3964442312717438, "rewards/margins": 0.26596054434776306, "rewards/rejected": -0.6624047756195068, "step": 380 }, { "epoch": 0.08, "learning_rate": 3.900000000000001e-06, "logits/chosen": -1.9619165658950806, "logits/rejected": -1.4057536125183105, "logps/chosen": -182.70489501953125, "logps/rejected": -164.2327117919922, "loss": 0.6733, "rewards/accuracies": 0.75, "rewards/chosen": -0.38969874382019043, "rewards/margins": 0.2608191967010498, "rewards/rejected": -0.6505179405212402, "step": 390 }, { "epoch": 0.08, "learning_rate": 4.000000000000001e-06, "logits/chosen": -1.8259871006011963, "logits/rejected": -1.2773064374923706, "logps/chosen": -158.28359985351562, "logps/rejected": -154.044921875, "loss": 0.6724, "rewards/accuracies": 0.875, "rewards/chosen": -0.35175850987434387, "rewards/margins": 0.31467652320861816, "rewards/rejected": -0.6664350032806396, "step": 400 }, { "epoch": 0.08, "eval_logits/chosen": -1.9523651599884033, "eval_logits/rejected": -1.7890278100967407, "eval_logps/chosen": -294.74749755859375, "eval_logps/rejected": -281.84820556640625, "eval_loss": 0.7026551961898804, "eval_rewards/accuracies": 0.4940119683742523, "eval_rewards/chosen": -0.3183620274066925, "eval_rewards/margins": 0.034360144287347794, "eval_rewards/rejected": -0.35272216796875, "eval_runtime": 1205.6924, "eval_samples_per_second": 1.659, "eval_steps_per_second": 0.277, "step": 400 }, { "epoch": 0.08, "learning_rate": 4.1e-06, "logits/chosen": -2.149876117706299, "logits/rejected": -1.446425199508667, "logps/chosen": -212.07675170898438, "logps/rejected": -181.53189086914062, "loss": 0.6717, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.4052143692970276, "rewards/margins": 0.3267214596271515, "rewards/rejected": -0.7319357395172119, "step": 410 }, { "epoch": 0.08, "learning_rate": 4.2000000000000004e-06, "logits/chosen": -1.960097312927246, "logits/rejected": -1.3134586811065674, "logps/chosen": -178.6069793701172, "logps/rejected": -156.78665161132812, "loss": 0.6767, "rewards/accuracies": 0.75, "rewards/chosen": -0.4016164243221283, "rewards/margins": 0.3050606846809387, "rewards/rejected": -0.7066770792007446, "step": 420 }, { "epoch": 0.09, "learning_rate": 4.3e-06, "logits/chosen": -1.9331775903701782, "logits/rejected": -1.4281222820281982, "logps/chosen": -184.1750030517578, "logps/rejected": -164.83297729492188, "loss": 0.6756, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.3405923545360565, "rewards/margins": 0.2659713625907898, "rewards/rejected": -0.6065636873245239, "step": 430 }, { "epoch": 0.09, "learning_rate": 4.4e-06, "logits/chosen": -1.790580153465271, "logits/rejected": -1.2552874088287354, "logps/chosen": -182.96673583984375, "logps/rejected": -145.83885192871094, "loss": 0.6734, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3612382411956787, "rewards/margins": 0.2275930643081665, "rewards/rejected": -0.5888313055038452, "step": 440 }, { "epoch": 0.09, "learning_rate": 4.5e-06, "logits/chosen": -1.998995065689087, "logits/rejected": -1.381549596786499, "logps/chosen": -186.69630432128906, "logps/rejected": -169.30868530273438, "loss": 0.6794, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.3535120189189911, "rewards/margins": 0.36277759075164795, "rewards/rejected": -0.7162896394729614, "step": 450 }, { "epoch": 0.09, "learning_rate": 4.600000000000001e-06, "logits/chosen": -1.8992105722427368, "logits/rejected": -1.3617089986801147, "logps/chosen": -190.19342041015625, "logps/rejected": -147.9746551513672, "loss": 0.6752, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3215749263763428, "rewards/margins": 0.23611216247081757, "rewards/rejected": -0.5576871037483215, "step": 460 }, { "epoch": 0.09, "learning_rate": 4.7e-06, "logits/chosen": -1.9145139455795288, "logits/rejected": -1.2918580770492554, "logps/chosen": -189.08383178710938, "logps/rejected": -146.23828125, "loss": 0.6738, "rewards/accuracies": 0.75, "rewards/chosen": -0.4808422029018402, "rewards/margins": 0.28706902265548706, "rewards/rejected": -0.7679113149642944, "step": 470 }, { "epoch": 0.1, "learning_rate": 4.800000000000001e-06, "logits/chosen": -1.938701868057251, "logits/rejected": -1.4654858112335205, "logps/chosen": -165.35458374023438, "logps/rejected": -145.22390747070312, "loss": 0.6806, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3199502229690552, "rewards/margins": 0.16619114577770233, "rewards/rejected": -0.4861413836479187, "step": 480 }, { "epoch": 0.1, "learning_rate": 4.9000000000000005e-06, "logits/chosen": -1.613707184791565, "logits/rejected": -1.285510778427124, "logps/chosen": -152.36941528320312, "logps/rejected": -143.10797119140625, "loss": 0.6767, "rewards/accuracies": 0.75, "rewards/chosen": -0.27973946928977966, "rewards/margins": 0.17489340901374817, "rewards/rejected": -0.4546329081058502, "step": 490 }, { "epoch": 0.1, "learning_rate": 5e-06, "logits/chosen": -1.9260075092315674, "logits/rejected": -1.4875643253326416, "logps/chosen": -204.52841186523438, "logps/rejected": -199.9879150390625, "loss": 0.6761, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.6231226921081543, "rewards/margins": 0.2070433795452118, "rewards/rejected": -0.8301660418510437, "step": 500 }, { "epoch": 0.1, "learning_rate": 4.999939076763487e-06, "logits/chosen": -1.7510411739349365, "logits/rejected": -1.2261205911636353, "logps/chosen": -206.4501190185547, "logps/rejected": -185.46054077148438, "loss": 0.6743, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.718189537525177, "rewards/margins": 0.22572274506092072, "rewards/rejected": -0.9439122080802917, "step": 510 }, { "epoch": 0.1, "learning_rate": 4.999756310023261e-06, "logits/chosen": -1.8986438512802124, "logits/rejected": -1.2370402812957764, "logps/chosen": -184.15892028808594, "logps/rejected": -202.61741638183594, "loss": 0.6735, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.506219744682312, "rewards/margins": 0.3453019857406616, "rewards/rejected": -0.8515217900276184, "step": 520 }, { "epoch": 0.11, "learning_rate": 4.999451708687114e-06, "logits/chosen": -1.9944801330566406, "logits/rejected": -1.378252625465393, "logps/chosen": -203.19119262695312, "logps/rejected": -165.0548095703125, "loss": 0.6758, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.4315893054008484, "rewards/margins": 0.25390011072158813, "rewards/rejected": -0.6854894161224365, "step": 530 }, { "epoch": 0.11, "learning_rate": 4.999025287600886e-06, "logits/chosen": -1.9773871898651123, "logits/rejected": -1.5123317241668701, "logps/chosen": -167.681884765625, "logps/rejected": -149.40553283691406, "loss": 0.6795, "rewards/accuracies": 0.75, "rewards/chosen": -0.4714645743370056, "rewards/margins": 0.18777449429035187, "rewards/rejected": -0.6592391133308411, "step": 540 }, { "epoch": 0.11, "learning_rate": 4.99847706754774e-06, "logits/chosen": -1.5543482303619385, "logits/rejected": -1.2529933452606201, "logps/chosen": -183.87698364257812, "logps/rejected": -159.92874145507812, "loss": 0.6759, "rewards/accuracies": 0.75, "rewards/chosen": -0.47762006521224976, "rewards/margins": 0.1939605474472046, "rewards/rejected": -0.6715805530548096, "step": 550 }, { "epoch": 0.11, "learning_rate": 4.997807075247147e-06, "logits/chosen": -1.6196110248565674, "logits/rejected": -1.2382831573486328, "logps/chosen": -191.66799926757812, "logps/rejected": -179.5049591064453, "loss": 0.6795, "rewards/accuracies": 0.875, "rewards/chosen": -0.6134921908378601, "rewards/margins": 0.278992235660553, "rewards/rejected": -0.8924844861030579, "step": 560 }, { "epoch": 0.11, "learning_rate": 4.9970153433535855e-06, "logits/chosen": -1.9593942165374756, "logits/rejected": -1.6110328435897827, "logps/chosen": -143.80203247070312, "logps/rejected": -132.58969116210938, "loss": 0.6775, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.34907329082489014, "rewards/margins": 0.17869921028614044, "rewards/rejected": -0.5277725458145142, "step": 570 }, { "epoch": 0.12, "learning_rate": 4.996101910454953e-06, "logits/chosen": -1.8148915767669678, "logits/rejected": -1.3327760696411133, "logps/chosen": -191.04641723632812, "logps/rejected": -145.37681579589844, "loss": 0.6758, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2701118588447571, "rewards/margins": 0.24128296971321106, "rewards/rejected": -0.5113948583602905, "step": 580 }, { "epoch": 0.12, "learning_rate": 4.9950668210706795e-06, "logits/chosen": -1.8354158401489258, "logits/rejected": -1.4500545263290405, "logps/chosen": -181.51785278320312, "logps/rejected": -165.00509643554688, "loss": 0.6718, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.27173325419425964, "rewards/margins": 0.19058290123939514, "rewards/rejected": -0.4623161256313324, "step": 590 }, { "epoch": 0.12, "learning_rate": 4.993910125649561e-06, "logits/chosen": -1.7158424854278564, "logits/rejected": -1.3078532218933105, "logps/chosen": -162.95870971679688, "logps/rejected": -178.8660888671875, "loss": 0.6749, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.47113722562789917, "rewards/margins": 0.24033662676811218, "rewards/rejected": -0.711473822593689, "step": 600 }, { "epoch": 0.12, "eval_logits/chosen": -1.8358020782470703, "eval_logits/rejected": -1.6820003986358643, "eval_logps/chosen": -295.4615478515625, "eval_logps/rejected": -282.51385498046875, "eval_loss": 0.7100444436073303, "eval_rewards/accuracies": 0.47604790329933167, "eval_rewards/chosen": -0.3255026042461395, "eval_rewards/margins": 0.03387587517499924, "eval_rewards/rejected": -0.35937848687171936, "eval_runtime": 1204.6222, "eval_samples_per_second": 1.66, "eval_steps_per_second": 0.277, "step": 600 }, { "epoch": 0.12, "learning_rate": 4.992631880567301e-06, "logits/chosen": -1.7240911722183228, "logits/rejected": -1.3801615238189697, "logps/chosen": -132.95199584960938, "logps/rejected": -128.12086486816406, "loss": 0.6784, "rewards/accuracies": 0.75, "rewards/chosen": -0.2732979953289032, "rewards/margins": 0.31873607635498047, "rewards/rejected": -0.5920340418815613, "step": 610 }, { "epoch": 0.12, "learning_rate": 4.9912321481237616e-06, "logits/chosen": -2.0096640586853027, "logits/rejected": -1.726293921470642, "logps/chosen": -159.30947875976562, "logps/rejected": -162.3290557861328, "loss": 0.6853, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.18279683589935303, "rewards/margins": 0.11771507561206818, "rewards/rejected": -0.30051189661026, "step": 620 }, { "epoch": 0.13, "learning_rate": 4.989710996539926e-06, "logits/chosen": -1.9780490398406982, "logits/rejected": -1.3797833919525146, "logps/chosen": -186.39578247070312, "logps/rejected": -185.13880920410156, "loss": 0.6733, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.439971387386322, "rewards/margins": 0.29455432295799255, "rewards/rejected": -0.7345257997512817, "step": 630 }, { "epoch": 0.13, "learning_rate": 4.988068499954578e-06, "logits/chosen": -1.4671508073806763, "logits/rejected": -1.0325344800949097, "logps/chosen": -218.70101928710938, "logps/rejected": -186.02578735351562, "loss": 0.6744, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.807999312877655, "rewards/margins": 0.2578094005584717, "rewards/rejected": -1.065808892250061, "step": 640 }, { "epoch": 0.13, "learning_rate": 4.986304738420684e-06, "logits/chosen": -1.9891719818115234, "logits/rejected": -1.5229629278182983, "logps/chosen": -200.0025634765625, "logps/rejected": -204.912109375, "loss": 0.6778, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.699751615524292, "rewards/margins": 0.165238156914711, "rewards/rejected": -0.8649896383285522, "step": 650 }, { "epoch": 0.13, "learning_rate": 4.984419797901491e-06, "logits/chosen": -1.5943307876586914, "logits/rejected": -1.0411819219589233, "logps/chosen": -233.78329467773438, "logps/rejected": -203.46670532226562, "loss": 0.675, "rewards/accuracies": 0.875, "rewards/chosen": -0.9834928512573242, "rewards/margins": 0.2832333445549011, "rewards/rejected": -1.2667262554168701, "step": 660 }, { "epoch": 0.13, "learning_rate": 4.9824137702663424e-06, "logits/chosen": -2.0208828449249268, "logits/rejected": -1.6523329019546509, "logps/chosen": -203.19168090820312, "logps/rejected": -166.45701599121094, "loss": 0.6755, "rewards/accuracies": 0.625, "rewards/chosen": -0.6380704045295715, "rewards/margins": 0.17251282930374146, "rewards/rejected": -0.810583233833313, "step": 670 }, { "epoch": 0.14, "learning_rate": 4.980286753286196e-06, "logits/chosen": -1.672461748123169, "logits/rejected": -1.2556068897247314, "logps/chosen": -183.37911987304688, "logps/rejected": -172.17251586914062, "loss": 0.6697, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.6164441704750061, "rewards/margins": 0.23370078206062317, "rewards/rejected": -0.8501448631286621, "step": 680 }, { "epoch": 0.14, "learning_rate": 4.978038850628855e-06, "logits/chosen": -1.7250378131866455, "logits/rejected": -1.035827398300171, "logps/chosen": -196.78933715820312, "logps/rejected": -181.30081176757812, "loss": 0.6767, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.7827109098434448, "rewards/margins": 0.34777623414993286, "rewards/rejected": -1.1304872035980225, "step": 690 }, { "epoch": 0.14, "learning_rate": 4.975670171853926e-06, "logits/chosen": -1.6087543964385986, "logits/rejected": -1.0666589736938477, "logps/chosen": -206.1292724609375, "logps/rejected": -186.0543975830078, "loss": 0.6737, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.7761890888214111, "rewards/margins": 0.3267631530761719, "rewards/rejected": -1.102952241897583, "step": 700 }, { "epoch": 0.14, "learning_rate": 4.973180832407471e-06, "logits/chosen": -1.8606401681900024, "logits/rejected": -1.5597708225250244, "logps/chosen": -197.0848388671875, "logps/rejected": -162.94955444335938, "loss": 0.6745, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.5010191202163696, "rewards/margins": 0.20368461310863495, "rewards/rejected": -0.704703688621521, "step": 710 }, { "epoch": 0.14, "learning_rate": 4.970570953616383e-06, "logits/chosen": -1.6398369073867798, "logits/rejected": -1.3472591638565063, "logps/chosen": -177.24392700195312, "logps/rejected": -179.13775634765625, "loss": 0.6775, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.5502495169639587, "rewards/margins": 0.2604612708091736, "rewards/rejected": -0.8107107877731323, "step": 720 }, { "epoch": 0.15, "learning_rate": 4.96784066268247e-06, "logits/chosen": -1.8312498331069946, "logits/rejected": -1.1328572034835815, "logps/chosen": -194.71649169921875, "logps/rejected": -165.8572540283203, "loss": 0.6734, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.5330935716629028, "rewards/margins": 0.31923145055770874, "rewards/rejected": -0.8523250818252563, "step": 730 }, { "epoch": 0.15, "learning_rate": 4.964990092676263e-06, "logits/chosen": -1.9046881198883057, "logits/rejected": -1.3553937673568726, "logps/chosen": -190.50311279296875, "logps/rejected": -170.9064178466797, "loss": 0.6788, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.5383037328720093, "rewards/margins": 0.17498290538787842, "rewards/rejected": -0.7132865786552429, "step": 740 }, { "epoch": 0.15, "learning_rate": 4.962019382530521e-06, "logits/chosen": -1.8933277130126953, "logits/rejected": -1.2457010746002197, "logps/chosen": -182.39169311523438, "logps/rejected": -156.73141479492188, "loss": 0.6738, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.34492191672325134, "rewards/margins": 0.3116303086280823, "rewards/rejected": -0.6565521955490112, "step": 750 }, { "epoch": 0.15, "learning_rate": 4.958928677033465e-06, "logits/chosen": -1.9312350749969482, "logits/rejected": -1.3866002559661865, "logps/chosen": -201.98898315429688, "logps/rejected": -155.15155029296875, "loss": 0.6774, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.38832250237464905, "rewards/margins": 0.3032090663909912, "rewards/rejected": -0.6915315389633179, "step": 760 }, { "epoch": 0.15, "learning_rate": 4.9557181268217225e-06, "logits/chosen": -1.9143766164779663, "logits/rejected": -1.3683913946151733, "logps/chosen": -190.9333953857422, "logps/rejected": -189.93711853027344, "loss": 0.673, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.5070767402648926, "rewards/margins": 0.2814294695854187, "rewards/rejected": -0.7885062098503113, "step": 770 }, { "epoch": 0.16, "learning_rate": 4.9523878883729794e-06, "logits/chosen": -2.1280136108398438, "logits/rejected": -1.4883763790130615, "logps/chosen": -198.16445922851562, "logps/rejected": -171.26011657714844, "loss": 0.6758, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.5425986647605896, "rewards/margins": 0.3571510910987854, "rewards/rejected": -0.8997496366500854, "step": 780 }, { "epoch": 0.16, "learning_rate": 4.94893812399836e-06, "logits/chosen": -2.0244431495666504, "logits/rejected": -1.5030972957611084, "logps/chosen": -195.46583557128906, "logps/rejected": -163.37734985351562, "loss": 0.6775, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3676135241985321, "rewards/margins": 0.23035785555839539, "rewards/rejected": -0.5979713797569275, "step": 790 }, { "epoch": 0.16, "learning_rate": 4.9453690018345144e-06, "logits/chosen": -1.6084178686141968, "logits/rejected": -1.0610103607177734, "logps/chosen": -169.61251831054688, "logps/rejected": -154.36740112304688, "loss": 0.6719, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.4508897662162781, "rewards/margins": 0.2767483592033386, "rewards/rejected": -0.7276380658149719, "step": 800 }, { "epoch": 0.16, "eval_logits/chosen": -1.8833717107772827, "eval_logits/rejected": -1.7258867025375366, "eval_logps/chosen": -293.1357421875, "eval_logps/rejected": -280.2987976074219, "eval_loss": 0.7049710750579834, "eval_rewards/accuracies": 0.477544903755188, "eval_rewards/chosen": -0.3022443950176239, "eval_rewards/margins": 0.03498363122344017, "eval_rewards/rejected": -0.33722805976867676, "eval_runtime": 1205.0663, "eval_samples_per_second": 1.66, "eval_steps_per_second": 0.277, "step": 800 }, { "epoch": 0.16, "learning_rate": 4.9416806958354206e-06, "logits/chosen": -1.811400055885315, "logits/rejected": -1.307423710823059, "logps/chosen": -175.14736938476562, "logps/rejected": -186.5182342529297, "loss": 0.6766, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.49655231833457947, "rewards/margins": 0.27930667996406555, "rewards/rejected": -0.775858998298645, "step": 810 }, { "epoch": 0.16, "learning_rate": 4.937873385763909e-06, "logits/chosen": -1.6264002323150635, "logits/rejected": -1.2800896167755127, "logps/chosen": -182.79551696777344, "logps/rejected": -161.4050750732422, "loss": 0.6741, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.4599367082118988, "rewards/margins": 0.2940910756587982, "rewards/rejected": -0.7540278434753418, "step": 820 }, { "epoch": 0.17, "learning_rate": 4.933947257182901e-06, "logits/chosen": -1.804857611656189, "logits/rejected": -1.3498326539993286, "logps/chosen": -180.3863525390625, "logps/rejected": -169.3606719970703, "loss": 0.6733, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.4786984324455261, "rewards/margins": 0.3096124827861786, "rewards/rejected": -0.7883109450340271, "step": 830 }, { "epoch": 0.17, "learning_rate": 4.9299025014463665e-06, "logits/chosen": -1.770956039428711, "logits/rejected": -1.2047390937805176, "logps/chosen": -174.56915283203125, "logps/rejected": -175.23660278320312, "loss": 0.6725, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.5032157301902771, "rewards/margins": 0.28462472558021545, "rewards/rejected": -0.7878404855728149, "step": 840 }, { "epoch": 0.17, "learning_rate": 4.925739315689991e-06, "logits/chosen": -1.6953847408294678, "logits/rejected": -1.2020996809005737, "logps/chosen": -141.16734313964844, "logps/rejected": -139.93963623046875, "loss": 0.6777, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.40812236070632935, "rewards/margins": 0.22488458454608917, "rewards/rejected": -0.6330068707466125, "step": 850 }, { "epoch": 0.17, "learning_rate": 4.921457902821578e-06, "logits/chosen": -1.9823923110961914, "logits/rejected": -1.317838191986084, "logps/chosen": -184.2607421875, "logps/rejected": -162.5825958251953, "loss": 0.6738, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.38280805945396423, "rewards/margins": 0.35453933477401733, "rewards/rejected": -0.737347424030304, "step": 860 }, { "epoch": 0.17, "learning_rate": 4.917058471511149e-06, "logits/chosen": -1.9153846502304077, "logits/rejected": -1.565412998199463, "logps/chosen": -169.1078338623047, "logps/rejected": -180.0596466064453, "loss": 0.6734, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.4009065628051758, "rewards/margins": 0.19822433590888977, "rewards/rejected": -0.5991309285163879, "step": 870 }, { "epoch": 0.18, "learning_rate": 4.912541236180779e-06, "logits/chosen": -1.785884141921997, "logits/rejected": -1.4236456155776978, "logps/chosen": -170.08309936523438, "logps/rejected": -147.41513061523438, "loss": 0.6765, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.6248822212219238, "rewards/margins": 0.1715964525938034, "rewards/rejected": -0.7964786887168884, "step": 880 }, { "epoch": 0.18, "learning_rate": 4.907906416994146e-06, "logits/chosen": -1.9555022716522217, "logits/rejected": -1.305280089378357, "logps/chosen": -222.47885131835938, "logps/rejected": -181.51956176757812, "loss": 0.6708, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.4643624424934387, "rewards/margins": 0.35030364990234375, "rewards/rejected": -0.8146661520004272, "step": 890 }, { "epoch": 0.18, "learning_rate": 4.903154239845798e-06, "logits/chosen": -1.7588160037994385, "logits/rejected": -1.1415255069732666, "logps/chosen": -194.46206665039062, "logps/rejected": -172.75965881347656, "loss": 0.6733, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.5838688015937805, "rewards/margins": 0.3039501905441284, "rewards/rejected": -0.8878189325332642, "step": 900 }, { "epoch": 0.18, "learning_rate": 4.898284936350144e-06, "logits/chosen": -1.6963226795196533, "logits/rejected": -1.2956384420394897, "logps/chosen": -156.32806396484375, "logps/rejected": -153.4464569091797, "loss": 0.6759, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.619235634803772, "rewards/margins": 0.22164157032966614, "rewards/rejected": -0.8408772349357605, "step": 910 }, { "epoch": 0.18, "learning_rate": 4.893298743830168e-06, "logits/chosen": -1.951059341430664, "logits/rejected": -1.313233733177185, "logps/chosen": -197.42349243164062, "logps/rejected": -154.0541229248047, "loss": 0.6729, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.4594130516052246, "rewards/margins": 0.27485281229019165, "rewards/rejected": -0.7342658042907715, "step": 920 }, { "epoch": 0.19, "learning_rate": 4.888195905305859e-06, "logits/chosen": -1.854839563369751, "logits/rejected": -1.276881456375122, "logps/chosen": -211.1772918701172, "logps/rejected": -183.47764587402344, "loss": 0.6785, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.4075341820716858, "rewards/margins": 0.2958530783653259, "rewards/rejected": -0.7033872604370117, "step": 930 }, { "epoch": 0.19, "learning_rate": 4.882976669482368e-06, "logits/chosen": -1.7859159708023071, "logits/rejected": -1.1793581247329712, "logps/chosen": -229.04531860351562, "logps/rejected": -190.11834716796875, "loss": 0.6752, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.5149229764938354, "rewards/margins": 0.31546348333358765, "rewards/rejected": -0.8303864598274231, "step": 940 }, { "epoch": 0.19, "learning_rate": 4.8776412907378845e-06, "logits/chosen": -1.853342056274414, "logits/rejected": -1.3333466053009033, "logps/chosen": -187.91098022460938, "logps/rejected": -162.39129638671875, "loss": 0.676, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.6136583685874939, "rewards/margins": 0.25150948762893677, "rewards/rejected": -0.8651677966117859, "step": 950 }, { "epoch": 0.19, "learning_rate": 4.8721900291112415e-06, "logits/chosen": -2.0543694496154785, "logits/rejected": -1.4375483989715576, "logps/chosen": -212.8982391357422, "logps/rejected": -178.7359619140625, "loss": 0.6724, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.45481619238853455, "rewards/margins": 0.25803133845329285, "rewards/rejected": -0.7128475904464722, "step": 960 }, { "epoch": 0.19, "learning_rate": 4.866623150289241e-06, "logits/chosen": -1.971003770828247, "logits/rejected": -1.5686346292495728, "logps/chosen": -184.4425811767578, "logps/rejected": -157.7111053466797, "loss": 0.6753, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.5632898211479187, "rewards/margins": 0.24243955314159393, "rewards/rejected": -0.8057295083999634, "step": 970 }, { "epoch": 0.2, "learning_rate": 4.860940925593703e-06, "logits/chosen": -1.852240800857544, "logits/rejected": -1.2510197162628174, "logps/chosen": -190.4782257080078, "logps/rejected": -177.23397827148438, "loss": 0.6761, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.5093380808830261, "rewards/margins": 0.24473080039024353, "rewards/rejected": -0.754068911075592, "step": 980 }, { "epoch": 0.2, "learning_rate": 4.855143631968242e-06, "logits/chosen": -1.6410949230194092, "logits/rejected": -1.245324969291687, "logps/chosen": -199.9717559814453, "logps/rejected": -188.9312744140625, "loss": 0.6812, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.5869057774543762, "rewards/margins": 0.2156517207622528, "rewards/rejected": -0.8025575876235962, "step": 990 }, { "epoch": 0.2, "learning_rate": 4.849231551964771e-06, "logits/chosen": -1.9986374378204346, "logits/rejected": -1.521784782409668, "logps/chosen": -182.34677124023438, "logps/rejected": -175.22216796875, "loss": 0.6777, "rewards/accuracies": 0.75, "rewards/chosen": -0.38654035329818726, "rewards/margins": 0.2029152810573578, "rewards/rejected": -0.5894556641578674, "step": 1000 }, { "epoch": 0.2, "eval_logits/chosen": -1.868129014968872, "eval_logits/rejected": -1.7123078107833862, "eval_logps/chosen": -292.3885803222656, "eval_logps/rejected": -277.9925842285156, "eval_loss": 0.7024898529052734, "eval_rewards/accuracies": 0.44610777497291565, "eval_rewards/chosen": -0.29477304220199585, "eval_rewards/margins": 0.019392510876059532, "eval_rewards/rejected": -0.31416556239128113, "eval_runtime": 1205.3887, "eval_samples_per_second": 1.659, "eval_steps_per_second": 0.277, "step": 1000 }, { "epoch": 0.2, "learning_rate": 4.84320497372973e-06, "logits/chosen": -1.676690697669983, "logits/rejected": -1.148404836654663, "logps/chosen": -192.23660278320312, "logps/rejected": -171.73147583007812, "loss": 0.6748, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.45321711897850037, "rewards/margins": 0.23036496341228485, "rewards/rejected": -0.6835820078849792, "step": 1010 }, { "epoch": 0.2, "learning_rate": 4.837064190990036e-06, "logits/chosen": -1.8550653457641602, "logits/rejected": -1.4675614833831787, "logps/chosen": -192.9080047607422, "logps/rejected": -170.35867309570312, "loss": 0.6778, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.4775848984718323, "rewards/margins": 0.22671504318714142, "rewards/rejected": -0.7042999863624573, "step": 1020 }, { "epoch": 0.21, "learning_rate": 4.830809503038781e-06, "logits/chosen": -2.126335620880127, "logits/rejected": -1.5282647609710693, "logps/chosen": -183.46963500976562, "logps/rejected": -168.2025909423828, "loss": 0.6754, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3205109238624573, "rewards/margins": 0.28869763016700745, "rewards/rejected": -0.6092085242271423, "step": 1030 }, { "epoch": 0.21, "learning_rate": 4.824441214720629e-06, "logits/chosen": -2.0871081352233887, "logits/rejected": -1.7713587284088135, "logps/chosen": -163.38116455078125, "logps/rejected": -138.7725830078125, "loss": 0.6785, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.34737804532051086, "rewards/margins": 0.16627982258796692, "rewards/rejected": -0.5136578679084778, "step": 1040 }, { "epoch": 0.21, "learning_rate": 4.817959636416969e-06, "logits/chosen": -1.9491631984710693, "logits/rejected": -1.560300588607788, "logps/chosen": -176.78421020507812, "logps/rejected": -197.93646240234375, "loss": 0.674, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.339080274105072, "rewards/margins": 0.22567510604858398, "rewards/rejected": -0.5647553205490112, "step": 1050 }, { "epoch": 0.21, "learning_rate": 4.811365084030784e-06, "logits/chosen": -1.8286950588226318, "logits/rejected": -1.3668581247329712, "logps/chosen": -174.78305053710938, "logps/rejected": -165.62896728515625, "loss": 0.672, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.4502977430820465, "rewards/margins": 0.2698357105255127, "rewards/rejected": -0.7201334238052368, "step": 1060 }, { "epoch": 0.21, "learning_rate": 4.804657878971252e-06, "logits/chosen": -1.7786099910736084, "logits/rejected": -1.3271209001541138, "logps/chosen": -156.68606567382812, "logps/rejected": -176.6844024658203, "loss": 0.6732, "rewards/accuracies": 0.875, "rewards/chosen": -0.49797964096069336, "rewards/margins": 0.23241038620471954, "rewards/rejected": -0.7303899526596069, "step": 1070 }, { "epoch": 0.22, "learning_rate": 4.7978383481380865e-06, "logits/chosen": -1.7390186786651611, "logits/rejected": -1.364408254623413, "logps/chosen": -182.0269775390625, "logps/rejected": -175.05148315429688, "loss": 0.677, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.6945708394050598, "rewards/margins": 0.17272751033306122, "rewards/rejected": -0.8672983050346375, "step": 1080 }, { "epoch": 0.22, "learning_rate": 4.790906823905599e-06, "logits/chosen": -1.8929744958877563, "logits/rejected": -1.4090081453323364, "logps/chosen": -194.8509521484375, "logps/rejected": -164.8671875, "loss": 0.6763, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.5219094753265381, "rewards/margins": 0.30347561836242676, "rewards/rejected": -0.8253852128982544, "step": 1090 }, { "epoch": 0.22, "learning_rate": 4.783863644106502e-06, "logits/chosen": -1.8908122777938843, "logits/rejected": -1.2591350078582764, "logps/chosen": -199.1201629638672, "logps/rejected": -183.76881408691406, "loss": 0.6772, "rewards/accuracies": 0.875, "rewards/chosen": -0.556411623954773, "rewards/margins": 0.29149144887924194, "rewards/rejected": -0.8479030728340149, "step": 1100 }, { "epoch": 0.22, "learning_rate": 4.776709152015443e-06, "logits/chosen": -1.950014352798462, "logits/rejected": -1.3147194385528564, "logps/chosen": -198.1878662109375, "logps/rejected": -183.84280395507812, "loss": 0.6755, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.4928820729255676, "rewards/margins": 0.22216923534870148, "rewards/rejected": -0.7150512933731079, "step": 1110 }, { "epoch": 0.22, "learning_rate": 4.769443696332272e-06, "logits/chosen": -1.8085720539093018, "logits/rejected": -1.3025529384613037, "logps/chosen": -204.54324340820312, "logps/rejected": -166.63211059570312, "loss": 0.6736, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.48130378127098083, "rewards/margins": 0.2872456908226013, "rewards/rejected": -0.7685495018959045, "step": 1120 }, { "epoch": 0.23, "learning_rate": 4.762067631165049e-06, "logits/chosen": -1.7942638397216797, "logits/rejected": -1.2301878929138184, "logps/chosen": -188.95639038085938, "logps/rejected": -171.255126953125, "loss": 0.6746, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.6285373568534851, "rewards/margins": 0.3076931834220886, "rewards/rejected": -0.9362305402755737, "step": 1130 }, { "epoch": 0.23, "learning_rate": 4.754581316012785e-06, "logits/chosen": -1.9091819524765015, "logits/rejected": -1.4388093948364258, "logps/chosen": -187.17684936523438, "logps/rejected": -168.9630126953125, "loss": 0.6747, "rewards/accuracies": 0.75, "rewards/chosen": -0.4695571959018707, "rewards/margins": 0.24600252509117126, "rewards/rejected": -0.7155596613883972, "step": 1140 }, { "epoch": 0.23, "learning_rate": 4.746985115747918e-06, "logits/chosen": -1.8895679712295532, "logits/rejected": -1.5251388549804688, "logps/chosen": -198.84365844726562, "logps/rejected": -183.75210571289062, "loss": 0.6696, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.5533050298690796, "rewards/margins": 0.25998106598854065, "rewards/rejected": -0.8132861256599426, "step": 1150 }, { "epoch": 0.23, "learning_rate": 4.7392794005985324e-06, "logits/chosen": -1.7853431701660156, "logits/rejected": -1.2373676300048828, "logps/chosen": -187.1587677001953, "logps/rejected": -180.57931518554688, "loss": 0.6752, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.6626554727554321, "rewards/margins": 0.3021981716156006, "rewards/rejected": -0.9648534655570984, "step": 1160 }, { "epoch": 0.23, "learning_rate": 4.731464546130315e-06, "logits/chosen": -1.8573204278945923, "logits/rejected": -1.1506048440933228, "logps/chosen": -167.44760131835938, "logps/rejected": -167.46066284179688, "loss": 0.6748, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.5433225035667419, "rewards/margins": 0.2685738503932953, "rewards/rejected": -0.8118965029716492, "step": 1170 }, { "epoch": 0.24, "learning_rate": 4.723540933228245e-06, "logits/chosen": -1.7069810628890991, "logits/rejected": -1.2688992023468018, "logps/chosen": -184.98800659179688, "logps/rejected": -171.51011657714844, "loss": 0.6729, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.6716406345367432, "rewards/margins": 0.20548729598522186, "rewards/rejected": -0.8771279454231262, "step": 1180 }, { "epoch": 0.24, "learning_rate": 4.715508948078037e-06, "logits/chosen": -2.055290937423706, "logits/rejected": -1.5707298517227173, "logps/chosen": -177.72817993164062, "logps/rejected": -153.8712615966797, "loss": 0.6768, "rewards/accuracies": 0.75, "rewards/chosen": -0.5478841066360474, "rewards/margins": 0.25464001297950745, "rewards/rejected": -0.8025240898132324, "step": 1190 }, { "epoch": 0.24, "learning_rate": 4.707368982147318e-06, "logits/chosen": -1.8850421905517578, "logits/rejected": -1.452816367149353, "logps/chosen": -173.22738647460938, "logps/rejected": -164.39637756347656, "loss": 0.6724, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.5675324201583862, "rewards/margins": 0.171905055642128, "rewards/rejected": -0.739437460899353, "step": 1200 }, { "epoch": 0.24, "eval_logits/chosen": -1.893850564956665, "eval_logits/rejected": -1.734560251235962, "eval_logps/chosen": -305.4026794433594, "eval_logps/rejected": -293.7763366699219, "eval_loss": 0.7088657021522522, "eval_rewards/accuracies": 0.4865269362926483, "eval_rewards/chosen": -0.42491355538368225, "eval_rewards/margins": 0.04709017649292946, "eval_rewards/rejected": -0.4720037281513214, "eval_runtime": 1205.0907, "eval_samples_per_second": 1.66, "eval_steps_per_second": 0.277, "step": 1200 }, { "epoch": 0.24, "learning_rate": 4.699121432166542e-06, "logits/chosen": -1.918455719947815, "logits/rejected": -1.4015181064605713, "logps/chosen": -195.1612548828125, "logps/rejected": -187.83090209960938, "loss": 0.6757, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.5902829766273499, "rewards/margins": 0.29539918899536133, "rewards/rejected": -0.8856821060180664, "step": 1210 }, { "epoch": 0.24, "learning_rate": 4.690766700109659e-06, "logits/chosen": -2.003971576690674, "logits/rejected": -1.3515846729278564, "logps/chosen": -203.52975463867188, "logps/rejected": -196.17164611816406, "loss": 0.6726, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.5324481725692749, "rewards/margins": 0.28604885935783386, "rewards/rejected": -0.8184970021247864, "step": 1220 }, { "epoch": 0.25, "learning_rate": 4.682305193174524e-06, "logits/chosen": -1.8011804819107056, "logits/rejected": -1.1995935440063477, "logps/chosen": -197.48532104492188, "logps/rejected": -192.8670654296875, "loss": 0.673, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.5428311824798584, "rewards/margins": 0.360750287771225, "rewards/rejected": -0.9035813212394714, "step": 1230 }, { "epoch": 0.25, "learning_rate": 4.673737323763048e-06, "logits/chosen": -1.8365986347198486, "logits/rejected": -1.225276231765747, "logps/chosen": -180.8289337158203, "logps/rejected": -184.90676879882812, "loss": 0.6704, "rewards/accuracies": 0.875, "rewards/chosen": -0.6013752222061157, "rewards/margins": 0.2983767092227936, "rewards/rejected": -0.8997519612312317, "step": 1240 }, { "epoch": 0.25, "learning_rate": 4.665063509461098e-06, "logits/chosen": -1.6209691762924194, "logits/rejected": -1.368153691291809, "logps/chosen": -187.81460571289062, "logps/rejected": -181.2181854248047, "loss": 0.6753, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.6356116533279419, "rewards/margins": 0.2378757894039154, "rewards/rejected": -0.8734874725341797, "step": 1250 }, { "epoch": 0.25, "learning_rate": 4.656284173018144e-06, "logits/chosen": -1.6858351230621338, "logits/rejected": -1.3207067251205444, "logps/chosen": -192.0369110107422, "logps/rejected": -189.73312377929688, "loss": 0.6713, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.4954409599304199, "rewards/margins": 0.29821908473968506, "rewards/rejected": -0.7936599850654602, "step": 1260 }, { "epoch": 0.25, "learning_rate": 4.6473997423266615e-06, "logits/chosen": -2.0042178630828857, "logits/rejected": -1.429304838180542, "logps/chosen": -176.9885711669922, "logps/rejected": -174.02542114257812, "loss": 0.6769, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.48566824197769165, "rewards/margins": 0.24618959426879883, "rewards/rejected": -0.7318578362464905, "step": 1270 }, { "epoch": 0.26, "learning_rate": 4.638410650401267e-06, "logits/chosen": -1.8819782733917236, "logits/rejected": -1.2729324102401733, "logps/chosen": -171.9393310546875, "logps/rejected": -182.9014434814453, "loss": 0.6713, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.5428531765937805, "rewards/margins": 0.299246221780777, "rewards/rejected": -0.8420994877815247, "step": 1280 }, { "epoch": 0.26, "learning_rate": 4.62931733535762e-06, "logits/chosen": -1.9484580755233765, "logits/rejected": -1.3897769451141357, "logps/chosen": -199.72267150878906, "logps/rejected": -189.85775756835938, "loss": 0.6762, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.5705099701881409, "rewards/margins": 0.27716928720474243, "rewards/rejected": -0.8476793169975281, "step": 1290 }, { "epoch": 0.26, "learning_rate": 4.620120240391065e-06, "logits/chosen": -2.085118293762207, "logits/rejected": -1.556470274925232, "logps/chosen": -216.3814239501953, "logps/rejected": -182.98602294921875, "loss": 0.6735, "rewards/accuracies": 0.875, "rewards/chosen": -0.4597252905368805, "rewards/margins": 0.25358858704566956, "rewards/rejected": -0.71331387758255, "step": 1300 }, { "epoch": 0.26, "learning_rate": 4.610819813755038e-06, "logits/chosen": -1.871151328086853, "logits/rejected": -1.485316514968872, "logps/chosen": -172.58326721191406, "logps/rejected": -174.2667999267578, "loss": 0.6698, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.4300549626350403, "rewards/margins": 0.2965255677700043, "rewards/rejected": -0.7265805006027222, "step": 1310 }, { "epoch": 0.26, "learning_rate": 4.601416508739211e-06, "logits/chosen": -2.105222225189209, "logits/rejected": -1.4934980869293213, "logps/chosen": -192.4054412841797, "logps/rejected": -179.7006072998047, "loss": 0.6727, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.40534812211990356, "rewards/margins": 0.31178298592567444, "rewards/rejected": -0.7171310186386108, "step": 1320 }, { "epoch": 0.27, "learning_rate": 4.591910783647405e-06, "logits/chosen": -1.9953113794326782, "logits/rejected": -1.5056662559509277, "logps/chosen": -159.1852264404297, "logps/rejected": -187.34786987304688, "loss": 0.6686, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.47505664825439453, "rewards/margins": 0.2990500032901764, "rewards/rejected": -0.7741067409515381, "step": 1330 }, { "epoch": 0.27, "learning_rate": 4.582303101775249e-06, "logits/chosen": -1.8186442852020264, "logits/rejected": -1.3767964839935303, "logps/chosen": -188.58029174804688, "logps/rejected": -165.46054077148438, "loss": 0.6712, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.6385612487792969, "rewards/margins": 0.2523365914821625, "rewards/rejected": -0.8908978700637817, "step": 1340 }, { "epoch": 0.27, "learning_rate": 4.572593931387604e-06, "logits/chosen": -1.8516912460327148, "logits/rejected": -1.3371303081512451, "logps/chosen": -171.63211059570312, "logps/rejected": -167.9397735595703, "loss": 0.6733, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.5888071060180664, "rewards/margins": 0.28815943002700806, "rewards/rejected": -0.8769665956497192, "step": 1350 }, { "epoch": 0.27, "learning_rate": 4.562783745695738e-06, "logits/chosen": -1.8417026996612549, "logits/rejected": -1.3612366914749146, "logps/chosen": -185.82627868652344, "logps/rejected": -210.0791015625, "loss": 0.676, "rewards/accuracies": 0.75, "rewards/chosen": -0.48426303267478943, "rewards/margins": 0.29424479603767395, "rewards/rejected": -0.7785078883171082, "step": 1360 }, { "epoch": 0.27, "learning_rate": 4.55287302283426e-06, "logits/chosen": -1.9550968408584595, "logits/rejected": -1.3119685649871826, "logps/chosen": -189.0598602294922, "logps/rejected": -182.3198699951172, "loss": 0.6729, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.5813578963279724, "rewards/margins": 0.3578397333621979, "rewards/rejected": -0.9391976594924927, "step": 1370 }, { "epoch": 0.28, "learning_rate": 4.542862245837821e-06, "logits/chosen": -1.9631887674331665, "logits/rejected": -1.4077445268630981, "logps/chosen": -192.8140411376953, "logps/rejected": -179.6833953857422, "loss": 0.6742, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.48491722345352173, "rewards/margins": 0.2727327346801758, "rewards/rejected": -0.7576500177383423, "step": 1380 }, { "epoch": 0.28, "learning_rate": 4.5327519026175694e-06, "logits/chosen": -1.7780221700668335, "logits/rejected": -1.230252981185913, "logps/chosen": -185.67477416992188, "logps/rejected": -155.61874389648438, "loss": 0.6738, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.39965492486953735, "rewards/margins": 0.24012355506420135, "rewards/rejected": -0.6397784948348999, "step": 1390 }, { "epoch": 0.28, "learning_rate": 4.522542485937369e-06, "logits/chosen": -2.0115091800689697, "logits/rejected": -1.5542072057724, "logps/chosen": -196.05990600585938, "logps/rejected": -192.10707092285156, "loss": 0.6763, "rewards/accuracies": 0.875, "rewards/chosen": -0.6453132033348083, "rewards/margins": 0.2437974214553833, "rewards/rejected": -0.8891106843948364, "step": 1400 }, { "epoch": 0.28, "eval_logits/chosen": -1.8560092449188232, "eval_logits/rejected": -1.6995104551315308, "eval_logps/chosen": -300.4253845214844, "eval_logps/rejected": -288.36663818359375, "eval_loss": 0.7064846754074097, "eval_rewards/accuracies": 0.47455090284347534, "eval_rewards/chosen": -0.37514084577560425, "eval_rewards/margins": 0.04276532307267189, "eval_rewards/rejected": -0.41790616512298584, "eval_runtime": 1205.0256, "eval_samples_per_second": 1.66, "eval_steps_per_second": 0.277, "step": 1400 }, { "epoch": 0.28, "learning_rate": 4.512234493389785e-06, "logits/chosen": -1.8329970836639404, "logits/rejected": -1.3327699899673462, "logps/chosen": -184.90328979492188, "logps/rejected": -175.23184204101562, "loss": 0.6689, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.6536052823066711, "rewards/margins": 0.3339831233024597, "rewards/rejected": -0.9875882863998413, "step": 1410 }, { "epoch": 0.28, "learning_rate": 4.501828427371834e-06, "logits/chosen": -1.804976224899292, "logits/rejected": -1.358144760131836, "logps/chosen": -214.5706787109375, "logps/rejected": -172.1238250732422, "loss": 0.6754, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.5686611533164978, "rewards/margins": 0.17065295577049255, "rewards/rejected": -0.739314079284668, "step": 1420 }, { "epoch": 0.29, "learning_rate": 4.491324795060491e-06, "logits/chosen": -1.5048315525054932, "logits/rejected": -1.0389209985733032, "logps/chosen": -197.77413940429688, "logps/rejected": -161.22720336914062, "loss": 0.6738, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.6495161056518555, "rewards/margins": 0.2653394043445587, "rewards/rejected": -0.9148554801940918, "step": 1430 }, { "epoch": 0.29, "learning_rate": 4.4807241083879774e-06, "logits/chosen": -1.8573486804962158, "logits/rejected": -1.4382604360580444, "logps/chosen": -195.03421020507812, "logps/rejected": -184.9160919189453, "loss": 0.6766, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.8099320530891418, "rewards/margins": 0.21673361957073212, "rewards/rejected": -1.0266656875610352, "step": 1440 }, { "epoch": 0.29, "learning_rate": 4.470026884016805e-06, "logits/chosen": -2.022935390472412, "logits/rejected": -1.5532037019729614, "logps/chosen": -273.4797058105469, "logps/rejected": -203.18983459472656, "loss": 0.6745, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.4980449080467224, "rewards/margins": 0.28645652532577515, "rewards/rejected": -0.7845014333724976, "step": 1450 }, { "epoch": 0.29, "learning_rate": 4.4592336433146e-06, "logits/chosen": -1.9035594463348389, "logits/rejected": -1.5136336088180542, "logps/chosen": -170.8654327392578, "logps/rejected": -160.56967163085938, "loss": 0.6744, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.6085073947906494, "rewards/margins": 0.2425815612077713, "rewards/rejected": -0.8510890007019043, "step": 1460 }, { "epoch": 0.29, "learning_rate": 4.448344912328686e-06, "logits/chosen": -1.879050850868225, "logits/rejected": -1.3904733657836914, "logps/chosen": -198.27976989746094, "logps/rejected": -184.63699340820312, "loss": 0.6728, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.8053019642829895, "rewards/margins": 0.24284525215625763, "rewards/rejected": -1.048147201538086, "step": 1470 }, { "epoch": 0.3, "learning_rate": 4.437361221760449e-06, "logits/chosen": -1.8372528553009033, "logits/rejected": -1.2644575834274292, "logps/chosen": -196.80941772460938, "logps/rejected": -183.17239379882812, "loss": 0.6749, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.7457159757614136, "rewards/margins": 0.31047171354293823, "rewards/rejected": -1.0561877489089966, "step": 1480 }, { "epoch": 0.3, "learning_rate": 4.426283106939474e-06, "logits/chosen": -1.7805640697479248, "logits/rejected": -1.4001415967941284, "logps/chosen": -206.7699432373047, "logps/rejected": -172.9145050048828, "loss": 0.6771, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.5146921873092651, "rewards/margins": 0.2523936927318573, "rewards/rejected": -0.7670857906341553, "step": 1490 }, { "epoch": 0.3, "learning_rate": 4.415111107797445e-06, "logits/chosen": -1.8258165121078491, "logits/rejected": -1.3966068029403687, "logps/chosen": -215.8752899169922, "logps/rejected": -175.91732788085938, "loss": 0.673, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.5088901519775391, "rewards/margins": 0.21694640815258026, "rewards/rejected": -0.7258366346359253, "step": 1500 }, { "epoch": 0.3, "learning_rate": 4.403845768841842e-06, "logits/chosen": -1.8146671056747437, "logits/rejected": -1.1305155754089355, "logps/chosen": -173.45106506347656, "logps/rejected": -171.32388305664062, "loss": 0.6756, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.5853989720344543, "rewards/margins": 0.37376365065574646, "rewards/rejected": -0.9591625928878784, "step": 1510 }, { "epoch": 0.3, "learning_rate": 4.3924876391293915e-06, "logits/chosen": -1.892703652381897, "logits/rejected": -1.4267150163650513, "logps/chosen": -194.65892028808594, "logps/rejected": -173.43008422851562, "loss": 0.6719, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.5559982061386108, "rewards/margins": 0.2738884687423706, "rewards/rejected": -0.8298866152763367, "step": 1520 }, { "epoch": 0.31, "learning_rate": 4.381037272239311e-06, "logits/chosen": -1.7748878002166748, "logits/rejected": -1.4393236637115479, "logps/chosen": -176.77133178710938, "logps/rejected": -156.44662475585938, "loss": 0.674, "rewards/accuracies": 0.75, "rewards/chosen": -0.5596617460250854, "rewards/margins": 0.25333333015441895, "rewards/rejected": -0.8129950761795044, "step": 1530 }, { "epoch": 0.31, "learning_rate": 4.36949522624633e-06, "logits/chosen": -2.0552220344543457, "logits/rejected": -1.681679129600525, "logps/chosen": -179.95986938476562, "logps/rejected": -182.3250274658203, "loss": 0.6738, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.6255384683609009, "rewards/margins": 0.29410865902900696, "rewards/rejected": -0.9196470975875854, "step": 1540 }, { "epoch": 0.31, "learning_rate": 4.357862063693486e-06, "logits/chosen": -1.6872880458831787, "logits/rejected": -1.2098948955535889, "logps/chosen": -181.95401000976562, "logps/rejected": -180.86837768554688, "loss": 0.6713, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.5915932655334473, "rewards/margins": 0.30716854333877563, "rewards/rejected": -0.8987618684768677, "step": 1550 }, { "epoch": 0.31, "learning_rate": 4.346138351564711e-06, "logits/chosen": -1.7618337869644165, "logits/rejected": -1.3654136657714844, "logps/chosen": -188.86070251464844, "logps/rejected": -178.79296875, "loss": 0.6769, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.6212700009346008, "rewards/margins": 0.27459949254989624, "rewards/rejected": -0.8958694338798523, "step": 1560 }, { "epoch": 0.31, "learning_rate": 4.334324661257191e-06, "logits/chosen": -2.091728687286377, "logits/rejected": -1.3397648334503174, "logps/chosen": -203.05584716796875, "logps/rejected": -179.69207763671875, "loss": 0.6779, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.5271373987197876, "rewards/margins": 0.4177669584751129, "rewards/rejected": -0.9449043273925781, "step": 1570 }, { "epoch": 0.32, "learning_rate": 4.322421568553529e-06, "logits/chosen": -1.81011164188385, "logits/rejected": -1.409149408340454, "logps/chosen": -198.5722198486328, "logps/rejected": -183.05738830566406, "loss": 0.6794, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.5413479208946228, "rewards/margins": 0.22018906474113464, "rewards/rejected": -0.7615369558334351, "step": 1580 }, { "epoch": 0.32, "learning_rate": 4.3104296535936695e-06, "logits/chosen": -2.020843982696533, "logits/rejected": -1.3089649677276611, "logps/chosen": -225.20901489257812, "logps/rejected": -203.9239959716797, "loss": 0.6744, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.46905359625816345, "rewards/margins": 0.3004087209701538, "rewards/rejected": -0.7694622278213501, "step": 1590 }, { "epoch": 0.32, "learning_rate": 4.2983495008466285e-06, "logits/chosen": -1.7776668071746826, "logits/rejected": -1.2039577960968018, "logps/chosen": -199.06443786621094, "logps/rejected": -171.98890686035156, "loss": 0.6729, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.5918269157409668, "rewards/margins": 0.3498297929763794, "rewards/rejected": -0.9416567087173462, "step": 1600 }, { "epoch": 0.32, "eval_logits/chosen": -1.8920459747314453, "eval_logits/rejected": -1.7340258359909058, "eval_logps/chosen": -296.7008056640625, "eval_logps/rejected": -282.5754699707031, "eval_loss": 0.7084392309188843, "eval_rewards/accuracies": 0.4640718698501587, "eval_rewards/chosen": -0.33789509534835815, "eval_rewards/margins": 0.022099362686276436, "eval_rewards/rejected": -0.35999447107315063, "eval_runtime": 1205.7084, "eval_samples_per_second": 1.659, "eval_steps_per_second": 0.277, "step": 1600 }, { "epoch": 0.32, "learning_rate": 4.286181699082008e-06, "logits/chosen": -1.9618968963623047, "logits/rejected": -1.5781219005584717, "logps/chosen": -166.61546325683594, "logps/rejected": -153.8035125732422, "loss": 0.6739, "rewards/accuracies": 0.75, "rewards/chosen": -0.5266655683517456, "rewards/margins": 0.27810847759246826, "rewards/rejected": -0.8047741055488586, "step": 1610 }, { "epoch": 0.32, "learning_rate": 4.273926841341303e-06, "logits/chosen": -1.9839811325073242, "logits/rejected": -1.3000690937042236, "logps/chosen": -179.24551391601562, "logps/rejected": -168.8354949951172, "loss": 0.6736, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.45065417885780334, "rewards/margins": 0.419107049703598, "rewards/rejected": -0.8697612881660461, "step": 1620 }, { "epoch": 0.33, "learning_rate": 4.261585524908987e-06, "logits/chosen": -1.9049394130706787, "logits/rejected": -1.30315363407135, "logps/chosen": -215.952880859375, "logps/rejected": -176.99282836914062, "loss": 0.6725, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.4692316949367523, "rewards/margins": 0.2636789381504059, "rewards/rejected": -0.7329106330871582, "step": 1630 }, { "epoch": 0.33, "learning_rate": 4.249158351283414e-06, "logits/chosen": -2.1853251457214355, "logits/rejected": -1.4617363214492798, "logps/chosen": -182.714111328125, "logps/rejected": -165.86322021484375, "loss": 0.6736, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.42149966955184937, "rewards/margins": 0.2859262526035309, "rewards/rejected": -0.7074259519577026, "step": 1640 }, { "epoch": 0.33, "learning_rate": 4.236645926147493e-06, "logits/chosen": -1.8542239665985107, "logits/rejected": -1.5464783906936646, "logps/chosen": -178.0828094482422, "logps/rejected": -165.61489868164062, "loss": 0.6743, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.5935237407684326, "rewards/margins": 0.2646033763885498, "rewards/rejected": -0.8581271171569824, "step": 1650 }, { "epoch": 0.33, "learning_rate": 4.224048859339175e-06, "logits/chosen": -1.7522939443588257, "logits/rejected": -1.3118393421173096, "logps/chosen": -158.6810760498047, "logps/rejected": -160.56710815429688, "loss": 0.6731, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.6008865833282471, "rewards/margins": 0.241220161318779, "rewards/rejected": -0.8421066999435425, "step": 1660 }, { "epoch": 0.33, "learning_rate": 4.211367764821722e-06, "logits/chosen": -1.9663273096084595, "logits/rejected": -1.23604416847229, "logps/chosen": -193.88662719726562, "logps/rejected": -190.10812377929688, "loss": 0.6743, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.5975052118301392, "rewards/margins": 0.2660571336746216, "rewards/rejected": -0.8635624051094055, "step": 1670 }, { "epoch": 0.34, "learning_rate": 4.198603260653792e-06, "logits/chosen": -1.7496057748794556, "logits/rejected": -1.2725489139556885, "logps/chosen": -213.86666870117188, "logps/rejected": -201.71604919433594, "loss": 0.6751, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.6049782633781433, "rewards/margins": 0.30520889163017273, "rewards/rejected": -0.9101872444152832, "step": 1680 }, { "epoch": 0.34, "learning_rate": 4.185755968959308e-06, "logits/chosen": -2.0263009071350098, "logits/rejected": -1.60398268699646, "logps/chosen": -174.05746459960938, "logps/rejected": -183.24807739257812, "loss": 0.669, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.6037923693656921, "rewards/margins": 0.250485897064209, "rewards/rejected": -0.8542783856391907, "step": 1690 }, { "epoch": 0.34, "learning_rate": 4.172826515897146e-06, "logits/chosen": -1.4789354801177979, "logits/rejected": -1.1580722332000732, "logps/chosen": -175.3059844970703, "logps/rejected": -177.5846405029297, "loss": 0.6778, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.7000223398208618, "rewards/margins": 0.22912335395812988, "rewards/rejected": -0.9291456341743469, "step": 1700 }, { "epoch": 0.34, "learning_rate": 4.159815531630604e-06, "logits/chosen": -1.9340108633041382, "logits/rejected": -1.4221322536468506, "logps/chosen": -242.05789184570312, "logps/rejected": -201.46099853515625, "loss": 0.6734, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.4981282353401184, "rewards/margins": 0.3075557351112366, "rewards/rejected": -0.805683970451355, "step": 1710 }, { "epoch": 0.34, "learning_rate": 4.146723650296701e-06, "logits/chosen": -1.9322497844696045, "logits/rejected": -1.5705821514129639, "logps/chosen": -195.6395721435547, "logps/rejected": -182.41439819335938, "loss": 0.6748, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.6157048344612122, "rewards/margins": 0.28599825501441956, "rewards/rejected": -0.9017030596733093, "step": 1720 }, { "epoch": 0.35, "learning_rate": 4.133551509975264e-06, "logits/chosen": -1.795940637588501, "logits/rejected": -1.4218677282333374, "logps/chosen": -208.96762084960938, "logps/rejected": -188.20848083496094, "loss": 0.6751, "rewards/accuracies": 0.75, "rewards/chosen": -0.7449847459793091, "rewards/margins": 0.25646236538887024, "rewards/rejected": -1.0014469623565674, "step": 1730 }, { "epoch": 0.35, "learning_rate": 4.120299752657828e-06, "logits/chosen": -1.8599283695220947, "logits/rejected": -1.3783822059631348, "logps/chosen": -213.2761688232422, "logps/rejected": -203.79995727539062, "loss": 0.6741, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.753442645072937, "rewards/margins": 0.27769041061401367, "rewards/rejected": -1.0311329364776611, "step": 1740 }, { "epoch": 0.35, "learning_rate": 4.106969024216348e-06, "logits/chosen": -1.712114691734314, "logits/rejected": -1.324386477470398, "logps/chosen": -203.6344757080078, "logps/rejected": -180.77413940429688, "loss": 0.6766, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.7352558374404907, "rewards/margins": 0.2504226863384247, "rewards/rejected": -0.9856783747673035, "step": 1750 }, { "epoch": 0.35, "learning_rate": 4.093559974371725e-06, "logits/chosen": -1.7862077951431274, "logits/rejected": -1.2913178205490112, "logps/chosen": -188.83047485351562, "logps/rejected": -177.20596313476562, "loss": 0.6732, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.7140417695045471, "rewards/margins": 0.31644076108932495, "rewards/rejected": -1.0304826498031616, "step": 1760 }, { "epoch": 0.35, "learning_rate": 4.080073256662128e-06, "logits/chosen": -1.7819814682006836, "logits/rejected": -1.1891093254089355, "logps/chosen": -222.55728149414062, "logps/rejected": -182.42422485351562, "loss": 0.6725, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.7930876016616821, "rewards/margins": 0.251798540353775, "rewards/rejected": -1.0448861122131348, "step": 1770 }, { "epoch": 0.36, "learning_rate": 4.066509528411151e-06, "logits/chosen": -1.9290975332260132, "logits/rejected": -1.5329827070236206, "logps/chosen": -199.80795288085938, "logps/rejected": -189.35765075683594, "loss": 0.6779, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.6187854409217834, "rewards/margins": 0.24056975543498993, "rewards/rejected": -0.8593552708625793, "step": 1780 }, { "epoch": 0.36, "learning_rate": 4.052869450695776e-06, "logits/chosen": -1.87184739112854, "logits/rejected": -1.4208166599273682, "logps/chosen": -209.4506378173828, "logps/rejected": -184.01100158691406, "loss": 0.6772, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.7303478121757507, "rewards/margins": 0.20910847187042236, "rewards/rejected": -0.9394562840461731, "step": 1790 }, { "epoch": 0.36, "learning_rate": 4.039153688314146e-06, "logits/chosen": -1.8769724369049072, "logits/rejected": -1.1882269382476807, "logps/chosen": -190.82577514648438, "logps/rejected": -170.71212768554688, "loss": 0.6734, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.4894588589668274, "rewards/margins": 0.30739688873291016, "rewards/rejected": -0.7968557476997375, "step": 1800 }, { "epoch": 0.36, "eval_logits/chosen": -1.8648688793182373, "eval_logits/rejected": -1.7088593244552612, "eval_logps/chosen": -293.6774597167969, "eval_logps/rejected": -279.1586608886719, "eval_loss": 0.703715443611145, "eval_rewards/accuracies": 0.45209580659866333, "eval_rewards/chosen": -0.3076614737510681, "eval_rewards/margins": 0.01816486194729805, "eval_rewards/rejected": -0.3258263170719147, "eval_runtime": 1204.9802, "eval_samples_per_second": 1.66, "eval_steps_per_second": 0.277, "step": 1800 }, { "epoch": 0.36, "learning_rate": 4.02536290975317e-06, "logits/chosen": -1.9461946487426758, "logits/rejected": -1.4611475467681885, "logps/chosen": -199.77239990234375, "logps/rejected": -166.04483032226562, "loss": 0.6775, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.4904053211212158, "rewards/margins": 0.21567514538764954, "rewards/rejected": -0.7060805559158325, "step": 1810 }, { "epoch": 0.36, "learning_rate": 4.011497787155938e-06, "logits/chosen": -1.879023790359497, "logits/rejected": -1.2723863124847412, "logps/chosen": -212.2092742919922, "logps/rejected": -183.31248474121094, "loss": 0.6748, "rewards/accuracies": 0.875, "rewards/chosen": -0.5689260959625244, "rewards/margins": 0.24944710731506348, "rewards/rejected": -0.8183733224868774, "step": 1820 }, { "epoch": 0.37, "learning_rate": 3.997558996288965e-06, "logits/chosen": -1.9276552200317383, "logits/rejected": -1.5077195167541504, "logps/chosen": -236.1685028076172, "logps/rejected": -212.502685546875, "loss": 0.6702, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.48284998536109924, "rewards/margins": 0.32556915283203125, "rewards/rejected": -0.8084191083908081, "step": 1830 }, { "epoch": 0.37, "learning_rate": 3.983547216509254e-06, "logits/chosen": -2.1525771617889404, "logits/rejected": -1.6471827030181885, "logps/chosen": -193.48208618164062, "logps/rejected": -170.1123809814453, "loss": 0.6735, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.45690760016441345, "rewards/margins": 0.2923925518989563, "rewards/rejected": -0.7493001222610474, "step": 1840 }, { "epoch": 0.37, "learning_rate": 3.969463130731183e-06, "logits/chosen": -1.897660255432129, "logits/rejected": -1.5383254289627075, "logps/chosen": -200.92848205566406, "logps/rejected": -182.51788330078125, "loss": 0.6742, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.48100757598876953, "rewards/margins": 0.23920786380767822, "rewards/rejected": -0.7202154994010925, "step": 1850 }, { "epoch": 0.37, "learning_rate": 3.955307425393224e-06, "logits/chosen": -1.9208259582519531, "logits/rejected": -1.3443113565444946, "logps/chosen": -195.31033325195312, "logps/rejected": -204.84512329101562, "loss": 0.6701, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.5977071523666382, "rewards/margins": 0.31546181440353394, "rewards/rejected": -0.9131690263748169, "step": 1860 }, { "epoch": 0.37, "learning_rate": 3.941080790424483e-06, "logits/chosen": -1.7583637237548828, "logits/rejected": -1.3662809133529663, "logps/chosen": -218.2642059326172, "logps/rejected": -180.39974975585938, "loss": 0.6733, "rewards/accuracies": 0.75, "rewards/chosen": -0.47190505266189575, "rewards/margins": 0.2539021372795105, "rewards/rejected": -0.7258071303367615, "step": 1870 }, { "epoch": 0.38, "learning_rate": 3.92678391921108e-06, "logits/chosen": -1.9956004619598389, "logits/rejected": -1.4918212890625, "logps/chosen": -204.52664184570312, "logps/rejected": -170.21002197265625, "loss": 0.6709, "rewards/accuracies": 0.75, "rewards/chosen": -0.4746318459510803, "rewards/margins": 0.26508575677871704, "rewards/rejected": -0.7397176027297974, "step": 1880 }, { "epoch": 0.38, "learning_rate": 3.912417508562345e-06, "logits/chosen": -1.8673477172851562, "logits/rejected": -1.279120683670044, "logps/chosen": -199.6542510986328, "logps/rejected": -166.83973693847656, "loss": 0.6745, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.5720670819282532, "rewards/margins": 0.26647061109542847, "rewards/rejected": -0.8385375738143921, "step": 1890 }, { "epoch": 0.38, "learning_rate": 3.897982258676867e-06, "logits/chosen": -2.056460380554199, "logits/rejected": -1.6172153949737549, "logps/chosen": -155.67990112304688, "logps/rejected": -159.1779327392578, "loss": 0.6741, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.5010054707527161, "rewards/margins": 0.26567792892456055, "rewards/rejected": -0.7666834592819214, "step": 1900 }, { "epoch": 0.38, "learning_rate": 3.88347887310836e-06, "logits/chosen": -2.01033091545105, "logits/rejected": -1.4821187257766724, "logps/chosen": -197.7437286376953, "logps/rejected": -175.1762237548828, "loss": 0.6738, "rewards/accuracies": 0.75, "rewards/chosen": -0.530315101146698, "rewards/margins": 0.2951328158378601, "rewards/rejected": -0.8254479169845581, "step": 1910 }, { "epoch": 0.38, "learning_rate": 3.868908058731376e-06, "logits/chosen": -2.02001953125, "logits/rejected": -1.662044882774353, "logps/chosen": -185.71234130859375, "logps/rejected": -178.3129119873047, "loss": 0.6751, "rewards/accuracies": 0.75, "rewards/chosen": -0.3356937766075134, "rewards/margins": 0.2248609960079193, "rewards/rejected": -0.5605548024177551, "step": 1920 }, { "epoch": 0.39, "learning_rate": 3.85427052570685e-06, "logits/chosen": -1.9633855819702148, "logits/rejected": -1.4638346433639526, "logps/chosen": -193.20370483398438, "logps/rejected": -189.8458251953125, "loss": 0.6707, "rewards/accuracies": 0.75, "rewards/chosen": -0.46807974576950073, "rewards/margins": 0.2651185393333435, "rewards/rejected": -0.7331982851028442, "step": 1930 }, { "epoch": 0.39, "learning_rate": 3.839566987447492e-06, "logits/chosen": -1.8922216892242432, "logits/rejected": -1.533254861831665, "logps/chosen": -194.08120727539062, "logps/rejected": -175.1561279296875, "loss": 0.6757, "rewards/accuracies": 0.75, "rewards/chosen": -0.6005040407180786, "rewards/margins": 0.23022966086864471, "rewards/rejected": -0.8307337760925293, "step": 1940 }, { "epoch": 0.39, "learning_rate": 3.824798160583012e-06, "logits/chosen": -1.9295408725738525, "logits/rejected": -1.2295997142791748, "logps/chosen": -194.404052734375, "logps/rejected": -200.65675354003906, "loss": 0.6714, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.5714791417121887, "rewards/margins": 0.3534068167209625, "rewards/rejected": -0.9248858690261841, "step": 1950 }, { "epoch": 0.39, "learning_rate": 3.8099647649251984e-06, "logits/chosen": -1.9119479656219482, "logits/rejected": -1.3893742561340332, "logps/chosen": -235.59255981445312, "logps/rejected": -181.63272094726562, "loss": 0.6775, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.573710024356842, "rewards/margins": 0.23748692870140076, "rewards/rejected": -0.8111969828605652, "step": 1960 }, { "epoch": 0.39, "learning_rate": 3.795067523432826e-06, "logits/chosen": -1.981358289718628, "logits/rejected": -1.3365758657455444, "logps/chosen": -208.91055297851562, "logps/rejected": -185.8983612060547, "loss": 0.6697, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.5935107469558716, "rewards/margins": 0.25945204496383667, "rewards/rejected": -0.8529627919197083, "step": 1970 }, { "epoch": 0.4, "learning_rate": 3.780107162176429e-06, "logits/chosen": -1.7403614521026611, "logits/rejected": -1.3477962017059326, "logps/chosen": -199.708984375, "logps/rejected": -171.9291534423828, "loss": 0.6755, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.5525250434875488, "rewards/margins": 0.2729472517967224, "rewards/rejected": -0.825472354888916, "step": 1980 }, { "epoch": 0.4, "learning_rate": 3.7650844103029093e-06, "logits/chosen": -1.8267145156860352, "logits/rejected": -1.2643405199050903, "logps/chosen": -189.97459411621094, "logps/rejected": -167.34866333007812, "loss": 0.6722, "rewards/accuracies": 0.75, "rewards/chosen": -0.5427579879760742, "rewards/margins": 0.24742421507835388, "rewards/rejected": -0.7901821732521057, "step": 1990 }, { "epoch": 0.4, "learning_rate": 3.7500000000000005e-06, "logits/chosen": -1.9596168994903564, "logits/rejected": -1.4312818050384521, "logps/chosen": -207.6639862060547, "logps/rejected": -181.36734008789062, "loss": 0.6754, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.5533245801925659, "rewards/margins": 0.258844792842865, "rewards/rejected": -0.8121693730354309, "step": 2000 }, { "epoch": 0.4, "eval_logits/chosen": -1.8948662281036377, "eval_logits/rejected": -1.7360830307006836, "eval_logps/chosen": -303.671875, "eval_logps/rejected": -290.7583923339844, "eval_loss": 0.7072721719741821, "eval_rewards/accuracies": 0.46706587076187134, "eval_rewards/chosen": -0.40760570764541626, "eval_rewards/margins": 0.034218017011880875, "eval_rewards/rejected": -0.44182372093200684, "eval_runtime": 1204.7941, "eval_samples_per_second": 1.66, "eval_steps_per_second": 0.277, "step": 2000 }, { "epoch": 0.4, "learning_rate": 3.7348546664605777e-06, "logits/chosen": -1.7654269933700562, "logits/rejected": -1.3528163433074951, "logps/chosen": -163.11282348632812, "logps/rejected": -169.49505615234375, "loss": 0.6704, "rewards/accuracies": 0.75, "rewards/chosen": -0.43457499146461487, "rewards/margins": 0.2780947685241699, "rewards/rejected": -0.7126697301864624, "step": 2010 }, { "epoch": 0.4, "learning_rate": 3.7196491478468322e-06, "logits/chosen": -1.919817328453064, "logits/rejected": -1.4470279216766357, "logps/chosen": -225.479248046875, "logps/rejected": -237.519775390625, "loss": 0.676, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.532214343547821, "rewards/margins": 0.2575102746486664, "rewards/rejected": -0.7897245287895203, "step": 2020 }, { "epoch": 0.41, "learning_rate": 3.7043841852542884e-06, "logits/chosen": -1.6937332153320312, "logits/rejected": -1.279678225517273, "logps/chosen": -177.95065307617188, "logps/rejected": -159.79324340820312, "loss": 0.674, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.5285166501998901, "rewards/margins": 0.30077117681503296, "rewards/rejected": -0.8292877078056335, "step": 2030 }, { "epoch": 0.41, "learning_rate": 3.689060522675689e-06, "logits/chosen": -1.8043416738510132, "logits/rejected": -1.2424075603485107, "logps/chosen": -189.4014892578125, "logps/rejected": -177.76431274414062, "loss": 0.6769, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.6172100305557251, "rewards/margins": 0.25782492756843567, "rewards/rejected": -0.8750349283218384, "step": 2040 }, { "epoch": 0.41, "learning_rate": 3.6736789069647273e-06, "logits/chosen": -1.799748182296753, "logits/rejected": -1.3809267282485962, "logps/chosen": -180.16360473632812, "logps/rejected": -165.44708251953125, "loss": 0.674, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.5689660906791687, "rewards/margins": 0.23547124862670898, "rewards/rejected": -0.8044373393058777, "step": 2050 }, { "epoch": 0.41, "learning_rate": 3.658240087799655e-06, "logits/chosen": -1.8382329940795898, "logits/rejected": -1.4162254333496094, "logps/chosen": -188.5830078125, "logps/rejected": -156.00106811523438, "loss": 0.6754, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.564164400100708, "rewards/margins": 0.17808131873607635, "rewards/rejected": -0.7422457337379456, "step": 2060 }, { "epoch": 0.41, "learning_rate": 3.642744817646736e-06, "logits/chosen": -2.0069162845611572, "logits/rejected": -1.6041675806045532, "logps/chosen": -204.7095489501953, "logps/rejected": -187.70526123046875, "loss": 0.6702, "rewards/accuracies": 0.875, "rewards/chosen": -0.556923508644104, "rewards/margins": 0.29966193437576294, "rewards/rejected": -0.8565853834152222, "step": 2070 }, { "epoch": 0.42, "learning_rate": 3.627193851723577e-06, "logits/chosen": -1.856783151626587, "logits/rejected": -1.3103711605072021, "logps/chosen": -207.44473266601562, "logps/rejected": -191.138427734375, "loss": 0.6718, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.5144498944282532, "rewards/margins": 0.3957799971103668, "rewards/rejected": -0.9102296829223633, "step": 2080 }, { "epoch": 0.42, "learning_rate": 3.611587947962319e-06, "logits/chosen": -1.9008525609970093, "logits/rejected": -1.574562430381775, "logps/chosen": -164.8338623046875, "logps/rejected": -168.35812377929688, "loss": 0.6767, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.518588662147522, "rewards/margins": 0.23384113609790802, "rewards/rejected": -0.7524298429489136, "step": 2090 }, { "epoch": 0.42, "learning_rate": 3.595927866972694e-06, "logits/chosen": -1.8983213901519775, "logits/rejected": -1.2817580699920654, "logps/chosen": -197.06539916992188, "logps/rejected": -169.2557830810547, "loss": 0.6726, "rewards/accuracies": 0.875, "rewards/chosen": -0.43882614374160767, "rewards/margins": 0.41299930214881897, "rewards/rejected": -0.8518254160881042, "step": 2100 }, { "epoch": 0.42, "learning_rate": 3.5802143720049565e-06, "logits/chosen": -1.7755272388458252, "logits/rejected": -1.251577377319336, "logps/chosen": -178.02650451660156, "logps/rejected": -188.2619171142578, "loss": 0.6693, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.5117620229721069, "rewards/margins": 0.2963302731513977, "rewards/rejected": -0.8080922961235046, "step": 2110 }, { "epoch": 0.42, "learning_rate": 3.564448228912682e-06, "logits/chosen": -1.8733713626861572, "logits/rejected": -1.4047024250030518, "logps/chosen": -182.5292510986328, "logps/rejected": -175.87191772460938, "loss": 0.6713, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.5618389844894409, "rewards/margins": 0.24943415820598602, "rewards/rejected": -0.8112730979919434, "step": 2120 }, { "epoch": 0.43, "learning_rate": 3.5486302061154433e-06, "logits/chosen": -1.8059337139129639, "logits/rejected": -1.2917284965515137, "logps/chosen": -207.5518035888672, "logps/rejected": -186.2129364013672, "loss": 0.6752, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.5907793045043945, "rewards/margins": 0.26312190294265747, "rewards/rejected": -0.853901207447052, "step": 2130 }, { "epoch": 0.43, "learning_rate": 3.532761074561355e-06, "logits/chosen": -1.896323800086975, "logits/rejected": -1.4776142835617065, "logps/chosen": -189.96267700195312, "logps/rejected": -182.42514038085938, "loss": 0.6739, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.511960506439209, "rewards/margins": 0.21206624805927277, "rewards/rejected": -0.7240267992019653, "step": 2140 }, { "epoch": 0.43, "learning_rate": 3.516841607689501e-06, "logits/chosen": -1.8578612804412842, "logits/rejected": -1.186772346496582, "logps/chosen": -179.05093383789062, "logps/rejected": -162.14309692382812, "loss": 0.6746, "rewards/accuracies": 0.75, "rewards/chosen": -0.5242822766304016, "rewards/margins": 0.3754071593284607, "rewards/rejected": -0.8996893763542175, "step": 2150 }, { "epoch": 0.43, "learning_rate": 3.5008725813922383e-06, "logits/chosen": -1.8464181423187256, "logits/rejected": -1.304973840713501, "logps/chosen": -177.97738647460938, "logps/rejected": -178.04287719726562, "loss": 0.6725, "rewards/accuracies": 0.875, "rewards/chosen": -0.48996639251708984, "rewards/margins": 0.302457720041275, "rewards/rejected": -0.7924240827560425, "step": 2160 }, { "epoch": 0.43, "learning_rate": 3.4848547739773782e-06, "logits/chosen": -1.994154691696167, "logits/rejected": -1.43686044216156, "logps/chosen": -187.79061889648438, "logps/rejected": -181.30111694335938, "loss": 0.6742, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.4746576249599457, "rewards/margins": 0.29306238889694214, "rewards/rejected": -0.7677199840545654, "step": 2170 }, { "epoch": 0.44, "learning_rate": 3.4687889661302577e-06, "logits/chosen": -1.6252772808074951, "logits/rejected": -1.155043363571167, "logps/chosen": -175.06283569335938, "logps/rejected": -178.09686279296875, "loss": 0.6729, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.5629199743270874, "rewards/margins": 0.28032130002975464, "rewards/rejected": -0.8432413339614868, "step": 2180 }, { "epoch": 0.44, "learning_rate": 3.452675940875686e-06, "logits/chosen": -1.8366267681121826, "logits/rejected": -1.4112536907196045, "logps/chosen": -195.7451629638672, "logps/rejected": -199.21487426757812, "loss": 0.6771, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.7240464091300964, "rewards/margins": 0.2895621657371521, "rewards/rejected": -1.013608694076538, "step": 2190 }, { "epoch": 0.44, "learning_rate": 3.436516483539781e-06, "logits/chosen": -1.7391029596328735, "logits/rejected": -1.309617042541504, "logps/chosen": -202.01612854003906, "logps/rejected": -182.37445068359375, "loss": 0.679, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.5508378744125366, "rewards/margins": 0.27711355686187744, "rewards/rejected": -0.8279514312744141, "step": 2200 }, { "epoch": 0.44, "eval_logits/chosen": -1.8362478017807007, "eval_logits/rejected": -1.6814324855804443, "eval_logps/chosen": -307.2497253417969, "eval_logps/rejected": -294.4463195800781, "eval_loss": 0.7075074315071106, "eval_rewards/accuracies": 0.46107783913612366, "eval_rewards/chosen": -0.44338446855545044, "eval_rewards/margins": 0.035318806767463684, "eval_rewards/rejected": -0.47870326042175293, "eval_runtime": 1205.5545, "eval_samples_per_second": 1.659, "eval_steps_per_second": 0.277, "step": 2200 }, { "epoch": 0.44, "learning_rate": 3.4203113817116955e-06, "logits/chosen": -1.698176622390747, "logits/rejected": -1.2519752979278564, "logps/chosen": -183.85934448242188, "logps/rejected": -181.77511596679688, "loss": 0.6713, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.6005623936653137, "rewards/margins": 0.2891175448894501, "rewards/rejected": -0.8896799087524414, "step": 2210 }, { "epoch": 0.44, "learning_rate": 3.4040614252052305e-06, "logits/chosen": -1.8071773052215576, "logits/rejected": -1.3697012662887573, "logps/chosen": -199.5607452392578, "logps/rejected": -190.60299682617188, "loss": 0.6719, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.4918552041053772, "rewards/margins": 0.30966538190841675, "rewards/rejected": -0.8015205264091492, "step": 2220 }, { "epoch": 0.45, "learning_rate": 3.387767406020343e-06, "logits/chosen": -1.7884925603866577, "logits/rejected": -1.2359635829925537, "logps/chosen": -197.8942108154297, "logps/rejected": -179.0970001220703, "loss": 0.6736, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.44637590646743774, "rewards/margins": 0.2924754023551941, "rewards/rejected": -0.7388513088226318, "step": 2230 }, { "epoch": 0.45, "learning_rate": 3.3714301183045382e-06, "logits/chosen": -2.0025973320007324, "logits/rejected": -1.5185552835464478, "logps/chosen": -197.5625457763672, "logps/rejected": -196.24801635742188, "loss": 0.6758, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.5814533233642578, "rewards/margins": 0.24875466525554657, "rewards/rejected": -0.8302080035209656, "step": 2240 }, { "epoch": 0.45, "learning_rate": 3.3550503583141726e-06, "logits/chosen": -1.785057783126831, "logits/rejected": -1.159350872039795, "logps/chosen": -199.1614532470703, "logps/rejected": -178.01551818847656, "loss": 0.6748, "rewards/accuracies": 0.75, "rewards/chosen": -0.630455493927002, "rewards/margins": 0.2970593571662903, "rewards/rejected": -0.9275148510932922, "step": 2250 }, { "epoch": 0.45, "learning_rate": 3.338628924375638e-06, "logits/chosen": -1.6639362573623657, "logits/rejected": -1.2227703332901, "logps/chosen": -199.7520294189453, "logps/rejected": -192.56919860839844, "loss": 0.6737, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.5375007390975952, "rewards/margins": 0.33527350425720215, "rewards/rejected": -0.8727743029594421, "step": 2260 }, { "epoch": 0.45, "learning_rate": 3.3221666168464584e-06, "logits/chosen": -1.8195312023162842, "logits/rejected": -1.3423821926116943, "logps/chosen": -180.17807006835938, "logps/rejected": -190.94314575195312, "loss": 0.6752, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.4657808840274811, "rewards/margins": 0.20476970076560974, "rewards/rejected": -0.6705506443977356, "step": 2270 }, { "epoch": 0.46, "learning_rate": 3.3056642380762783e-06, "logits/chosen": -1.7451165914535522, "logits/rejected": -1.4132225513458252, "logps/chosen": -174.05044555664062, "logps/rejected": -162.31065368652344, "loss": 0.6769, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.6010884046554565, "rewards/margins": 0.2041688710451126, "rewards/rejected": -0.8052573204040527, "step": 2280 }, { "epoch": 0.46, "learning_rate": 3.2891225923677565e-06, "logits/chosen": -1.9351451396942139, "logits/rejected": -1.473539113998413, "logps/chosen": -187.1571807861328, "logps/rejected": -171.54066467285156, "loss": 0.6746, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.4758825898170471, "rewards/margins": 0.20934148132801056, "rewards/rejected": -0.6852241158485413, "step": 2290 }, { "epoch": 0.46, "learning_rate": 3.272542485937369e-06, "logits/chosen": -1.9461266994476318, "logits/rejected": -1.5034528970718384, "logps/chosen": -191.15951538085938, "logps/rejected": -186.38577270507812, "loss": 0.6734, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.4989898204803467, "rewards/margins": 0.20688875019550323, "rewards/rejected": -0.7058785557746887, "step": 2300 }, { "epoch": 0.46, "learning_rate": 3.2559247268761117e-06, "logits/chosen": -2.0952258110046387, "logits/rejected": -1.5746580362319946, "logps/chosen": -175.76669311523438, "logps/rejected": -167.56613159179688, "loss": 0.675, "rewards/accuracies": 0.75, "rewards/chosen": -0.27586472034454346, "rewards/margins": 0.360548198223114, "rewards/rejected": -0.6364129185676575, "step": 2310 }, { "epoch": 0.46, "learning_rate": 3.2392701251101172e-06, "logits/chosen": -1.7197048664093018, "logits/rejected": -1.0960466861724854, "logps/chosen": -182.8074188232422, "logps/rejected": -177.63833618164062, "loss": 0.6745, "rewards/accuracies": 0.875, "rewards/chosen": -0.46170035004615784, "rewards/margins": 0.34735190868377686, "rewards/rejected": -0.8090522885322571, "step": 2320 }, { "epoch": 0.47, "learning_rate": 3.222579492361179e-06, "logits/chosen": -1.669664978981018, "logits/rejected": -1.2479567527770996, "logps/chosen": -181.59669494628906, "logps/rejected": -183.80819702148438, "loss": 0.6751, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.5004920363426208, "rewards/margins": 0.32195088267326355, "rewards/rejected": -0.8224430084228516, "step": 2330 }, { "epoch": 0.47, "learning_rate": 3.205853642107192e-06, "logits/chosen": -1.9154350757598877, "logits/rejected": -1.4871299266815186, "logps/chosen": -170.89712524414062, "logps/rejected": -161.75827026367188, "loss": 0.672, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.573229968547821, "rewards/margins": 0.17594358325004578, "rewards/rejected": -0.7491735816001892, "step": 2340 }, { "epoch": 0.47, "learning_rate": 3.189093389542498e-06, "logits/chosen": -1.9233745336532593, "logits/rejected": -1.4563286304473877, "logps/chosen": -169.3166046142578, "logps/rejected": -149.12149047851562, "loss": 0.6757, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.40617451071739197, "rewards/margins": 0.20493952929973602, "rewards/rejected": -0.6111140251159668, "step": 2350 }, { "epoch": 0.47, "learning_rate": 3.1722995515381644e-06, "logits/chosen": -1.6577157974243164, "logits/rejected": -1.3170554637908936, "logps/chosen": -144.51303100585938, "logps/rejected": -161.37924194335938, "loss": 0.6755, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.40916332602500916, "rewards/margins": 0.23883509635925293, "rewards/rejected": -0.6479983925819397, "step": 2360 }, { "epoch": 0.47, "learning_rate": 3.155472946602162e-06, "logits/chosen": -1.8271172046661377, "logits/rejected": -1.4430568218231201, "logps/chosen": -192.534423828125, "logps/rejected": -190.32565307617188, "loss": 0.6721, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.5060493350028992, "rewards/margins": 0.28192219138145447, "rewards/rejected": -0.7879716157913208, "step": 2370 }, { "epoch": 0.48, "learning_rate": 3.1386143948394764e-06, "logits/chosen": -1.7869532108306885, "logits/rejected": -1.2439748048782349, "logps/chosen": -181.59927368164062, "logps/rejected": -168.99913024902344, "loss": 0.6718, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.43046626448631287, "rewards/margins": 0.3187507092952728, "rewards/rejected": -0.7492170333862305, "step": 2380 }, { "epoch": 0.48, "learning_rate": 3.121724717912138e-06, "logits/chosen": -1.772923231124878, "logits/rejected": -1.3488881587982178, "logps/chosen": -193.0264892578125, "logps/rejected": -166.48593139648438, "loss": 0.6711, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.5263931155204773, "rewards/margins": 0.33422666788101196, "rewards/rejected": -0.8606197237968445, "step": 2390 }, { "epoch": 0.48, "learning_rate": 3.1048047389991693e-06, "logits/chosen": -1.7798055410385132, "logits/rejected": -1.1240304708480835, "logps/chosen": -211.40493774414062, "logps/rejected": -181.99197387695312, "loss": 0.6692, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.3470020890235901, "rewards/margins": 0.40799134969711304, "rewards/rejected": -0.7549934387207031, "step": 2400 }, { "epoch": 0.48, "eval_logits/chosen": -1.830475091934204, "eval_logits/rejected": -1.6761457920074463, "eval_logps/chosen": -293.5765380859375, "eval_logps/rejected": -281.3558654785156, "eval_loss": 0.7067101001739502, "eval_rewards/accuracies": 0.4715568721294403, "eval_rewards/chosen": -0.30665212869644165, "eval_rewards/margins": 0.04114661365747452, "eval_rewards/rejected": -0.34779873490333557, "eval_runtime": 1205.2014, "eval_samples_per_second": 1.659, "eval_steps_per_second": 0.277, "step": 2400 }, { "epoch": 0.48, "learning_rate": 3.087855282756475e-06, "logits/chosen": -1.6292369365692139, "logits/rejected": -1.2916814088821411, "logps/chosen": -187.1026153564453, "logps/rejected": -163.94334411621094, "loss": 0.6759, "rewards/accuracies": 0.75, "rewards/chosen": -0.48811691999435425, "rewards/margins": 0.2739902436733246, "rewards/rejected": -0.7621071338653564, "step": 2410 }, { "epoch": 0.48, "learning_rate": 3.0708771752766397e-06, "logits/chosen": -1.6488803625106812, "logits/rejected": -1.17416250705719, "logps/chosen": -172.21530151367188, "logps/rejected": -159.70388793945312, "loss": 0.6768, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.5460191369056702, "rewards/margins": 0.3138882517814636, "rewards/rejected": -0.8599074482917786, "step": 2420 }, { "epoch": 0.49, "learning_rate": 3.053871244048669e-06, "logits/chosen": -1.9461822509765625, "logits/rejected": -1.5185297727584839, "logps/chosen": -211.6808624267578, "logps/rejected": -181.49661254882812, "loss": 0.6758, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.5033960938453674, "rewards/margins": 0.264183908700943, "rewards/rejected": -0.767579972743988, "step": 2430 }, { "epoch": 0.49, "learning_rate": 3.0368383179176584e-06, "logits/chosen": -1.8427566289901733, "logits/rejected": -1.1894140243530273, "logps/chosen": -206.5518035888672, "logps/rejected": -205.786376953125, "loss": 0.6736, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.6515196561813354, "rewards/margins": 0.34544336795806885, "rewards/rejected": -0.9969631433486938, "step": 2440 }, { "epoch": 0.49, "learning_rate": 3.019779227044398e-06, "logits/chosen": -1.8485256433486938, "logits/rejected": -1.1070412397384644, "logps/chosen": -186.2294158935547, "logps/rejected": -190.2908172607422, "loss": 0.6751, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.5132230520248413, "rewards/margins": 0.34580183029174805, "rewards/rejected": -0.8590248823165894, "step": 2450 }, { "epoch": 0.49, "learning_rate": 3.002694802864912e-06, "logits/chosen": -1.8154640197753906, "logits/rejected": -1.247162938117981, "logps/chosen": -184.99708557128906, "logps/rejected": -163.02227783203125, "loss": 0.674, "rewards/accuracies": 0.875, "rewards/chosen": -0.5121213793754578, "rewards/margins": 0.26612430810928345, "rewards/rejected": -0.7782456874847412, "step": 2460 }, { "epoch": 0.49, "learning_rate": 2.98558587804993e-06, "logits/chosen": -1.7605488300323486, "logits/rejected": -1.313816785812378, "logps/chosen": -201.1320343017578, "logps/rejected": -165.52597045898438, "loss": 0.6741, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.4505895674228668, "rewards/margins": 0.2570321559906006, "rewards/rejected": -0.707621693611145, "step": 2470 }, { "epoch": 0.5, "learning_rate": 2.9684532864643123e-06, "logits/chosen": -1.776493787765503, "logits/rejected": -1.3342785835266113, "logps/chosen": -167.67816162109375, "logps/rejected": -169.43775939941406, "loss": 0.6718, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.5500646829605103, "rewards/margins": 0.23469237983226776, "rewards/rejected": -0.784757137298584, "step": 2480 }, { "epoch": 0.5, "learning_rate": 2.9512978631264006e-06, "logits/chosen": -1.8473625183105469, "logits/rejected": -1.4294042587280273, "logps/chosen": -181.67886352539062, "logps/rejected": -172.66140747070312, "loss": 0.6776, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.4860634207725525, "rewards/margins": 0.2723037600517273, "rewards/rejected": -0.7583671808242798, "step": 2490 }, { "epoch": 0.5, "learning_rate": 2.9341204441673267e-06, "logits/chosen": -1.709730863571167, "logits/rejected": -1.2603574991226196, "logps/chosen": -205.03915405273438, "logps/rejected": -174.63916015625, "loss": 0.6711, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.45966750383377075, "rewards/margins": 0.33475756645202637, "rewards/rejected": -0.7944250106811523, "step": 2500 }, { "epoch": 0.5, "learning_rate": 2.9169218667902562e-06, "logits/chosen": -1.9564578533172607, "logits/rejected": -1.3793319463729858, "logps/chosen": -224.7890625, "logps/rejected": -186.0004425048828, "loss": 0.6728, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.39820346236228943, "rewards/margins": 0.29973819851875305, "rewards/rejected": -0.6979416608810425, "step": 2510 }, { "epoch": 0.5, "learning_rate": 2.8997029692295875e-06, "logits/chosen": -1.6908514499664307, "logits/rejected": -1.0852570533752441, "logps/chosen": -188.7340545654297, "logps/rejected": -167.9377899169922, "loss": 0.6755, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.4355719983577728, "rewards/margins": 0.32259494066238403, "rewards/rejected": -0.7581669688224792, "step": 2520 }, { "epoch": 0.51, "learning_rate": 2.8824645907100957e-06, "logits/chosen": -1.7927274703979492, "logits/rejected": -1.351239800453186, "logps/chosen": -179.49771118164062, "logps/rejected": -175.46339416503906, "loss": 0.6743, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3833231031894684, "rewards/margins": 0.25877678394317627, "rewards/rejected": -0.6420998573303223, "step": 2530 }, { "epoch": 0.51, "learning_rate": 2.8652075714060296e-06, "logits/chosen": -1.6642992496490479, "logits/rejected": -1.092292070388794, "logps/chosen": -192.80116271972656, "logps/rejected": -182.14523315429688, "loss": 0.6748, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.49458056688308716, "rewards/margins": 0.3242564797401428, "rewards/rejected": -0.8188369870185852, "step": 2540 }, { "epoch": 0.51, "learning_rate": 2.847932752400164e-06, "logits/chosen": -2.050318479537964, "logits/rejected": -1.3377444744110107, "logps/chosen": -234.1560821533203, "logps/rejected": -191.45880126953125, "loss": 0.6736, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.4466802179813385, "rewards/margins": 0.3993278443813324, "rewards/rejected": -0.8460081219673157, "step": 2550 }, { "epoch": 0.51, "learning_rate": 2.8306409756428067e-06, "logits/chosen": -1.9657310247421265, "logits/rejected": -1.483107328414917, "logps/chosen": -164.46823120117188, "logps/rejected": -173.79981994628906, "loss": 0.6684, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.4485262930393219, "rewards/margins": 0.3190024793148041, "rewards/rejected": -0.767528772354126, "step": 2560 }, { "epoch": 0.51, "learning_rate": 2.813333083910761e-06, "logits/chosen": -1.7239017486572266, "logits/rejected": -1.1839874982833862, "logps/chosen": -183.20346069335938, "logps/rejected": -164.28932189941406, "loss": 0.6749, "rewards/accuracies": 0.75, "rewards/chosen": -0.5019615888595581, "rewards/margins": 0.330681174993515, "rewards/rejected": -0.8326429128646851, "step": 2570 }, { "epoch": 0.52, "learning_rate": 2.7960099207662535e-06, "logits/chosen": -1.9549373388290405, "logits/rejected": -1.1832587718963623, "logps/chosen": -187.26541137695312, "logps/rejected": -165.99447631835938, "loss": 0.6723, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.455518901348114, "rewards/margins": 0.40256109833717346, "rewards/rejected": -0.8580800890922546, "step": 2580 }, { "epoch": 0.52, "learning_rate": 2.778672330515814e-06, "logits/chosen": -1.7912496328353882, "logits/rejected": -1.2911481857299805, "logps/chosen": -201.77212524414062, "logps/rejected": -184.16615295410156, "loss": 0.6732, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.38602378964424133, "rewards/margins": 0.3023552894592285, "rewards/rejected": -0.688379168510437, "step": 2590 }, { "epoch": 0.52, "learning_rate": 2.761321158169134e-06, "logits/chosen": -1.8845837116241455, "logits/rejected": -1.3728806972503662, "logps/chosen": -168.54507446289062, "logps/rejected": -157.21896362304688, "loss": 0.6778, "rewards/accuracies": 0.75, "rewards/chosen": -0.5500813722610474, "rewards/margins": 0.30299633741378784, "rewards/rejected": -0.8530777096748352, "step": 2600 }, { "epoch": 0.52, "eval_logits/chosen": -1.8687188625335693, "eval_logits/rejected": -1.7120007276535034, "eval_logps/chosen": -289.01275634765625, "eval_logps/rejected": -275.6222229003906, "eval_loss": 0.7036088705062866, "eval_rewards/accuracies": 0.46257483959198, "eval_rewards/chosen": -0.26101478934288025, "eval_rewards/margins": 0.029447516426444054, "eval_rewards/rejected": -0.29046228528022766, "eval_runtime": 1205.2629, "eval_samples_per_second": 1.659, "eval_steps_per_second": 0.277, "step": 2600 }, { "epoch": 0.52, "learning_rate": 2.743957249397874e-06, "logits/chosen": -1.950708031654358, "logits/rejected": -1.3470975160598755, "logps/chosen": -163.66860961914062, "logps/rejected": -156.57351684570312, "loss": 0.6767, "rewards/accuracies": 0.875, "rewards/chosen": -0.47454318404197693, "rewards/margins": 0.3361544907093048, "rewards/rejected": -0.8106976747512817, "step": 2610 }, { "epoch": 0.52, "learning_rate": 2.726581450494451e-06, "logits/chosen": -1.6164953708648682, "logits/rejected": -1.288303256034851, "logps/chosen": -136.69203186035156, "logps/rejected": -141.69578552246094, "loss": 0.676, "rewards/accuracies": 0.75, "rewards/chosen": -0.3741917610168457, "rewards/margins": 0.24335148930549622, "rewards/rejected": -0.6175432205200195, "step": 2620 }, { "epoch": 0.53, "learning_rate": 2.70919460833079e-06, "logits/chosen": -1.793229341506958, "logits/rejected": -1.3292603492736816, "logps/chosen": -148.20074462890625, "logps/rejected": -141.1336212158203, "loss": 0.6771, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.45213595032691956, "rewards/margins": 0.2539849877357483, "rewards/rejected": -0.7061209082603455, "step": 2630 }, { "epoch": 0.53, "learning_rate": 2.6917975703170466e-06, "logits/chosen": -1.5819017887115479, "logits/rejected": -1.2468364238739014, "logps/chosen": -179.3242950439453, "logps/rejected": -158.84085083007812, "loss": 0.6761, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.3509295880794525, "rewards/margins": 0.245340496301651, "rewards/rejected": -0.5962700843811035, "step": 2640 }, { "epoch": 0.53, "learning_rate": 2.6743911843603134e-06, "logits/chosen": -1.7965497970581055, "logits/rejected": -1.2266132831573486, "logps/chosen": -168.13784790039062, "logps/rejected": -150.69332885742188, "loss": 0.6716, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.36452779173851013, "rewards/margins": 0.30592161417007446, "rewards/rejected": -0.670449435710907, "step": 2650 }, { "epoch": 0.53, "learning_rate": 2.6569762988232838e-06, "logits/chosen": -1.7848432064056396, "logits/rejected": -1.3635203838348389, "logps/chosen": -187.39088439941406, "logps/rejected": -142.91839599609375, "loss": 0.676, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.4022220969200134, "rewards/margins": 0.33027398586273193, "rewards/rejected": -0.7324960827827454, "step": 2660 }, { "epoch": 0.53, "learning_rate": 2.63955376248291e-06, "logits/chosen": -1.9735376834869385, "logits/rejected": -1.548107385635376, "logps/chosen": -167.87197875976562, "logps/rejected": -151.86856079101562, "loss": 0.672, "rewards/accuracies": 0.75, "rewards/chosen": -0.5017527341842651, "rewards/margins": 0.24763119220733643, "rewards/rejected": -0.7493839263916016, "step": 2670 }, { "epoch": 0.54, "learning_rate": 2.6221244244890336e-06, "logits/chosen": -1.7312660217285156, "logits/rejected": -1.3255332708358765, "logps/chosen": -167.429931640625, "logps/rejected": -167.97882080078125, "loss": 0.6749, "rewards/accuracies": 0.75, "rewards/chosen": -0.5501227974891663, "rewards/margins": 0.3202934265136719, "rewards/rejected": -0.8704161643981934, "step": 2680 }, { "epoch": 0.54, "learning_rate": 2.604689134322999e-06, "logits/chosen": -1.5674875974655151, "logits/rejected": -1.2445539236068726, "logps/chosen": -181.32308959960938, "logps/rejected": -164.71713256835938, "loss": 0.6741, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.5878476500511169, "rewards/margins": 0.26971811056137085, "rewards/rejected": -0.8575657606124878, "step": 2690 }, { "epoch": 0.54, "learning_rate": 2.587248741756253e-06, "logits/chosen": -1.7759087085723877, "logits/rejected": -1.347715139389038, "logps/chosen": -187.6118621826172, "logps/rejected": -172.80215454101562, "loss": 0.6727, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.4626511037349701, "rewards/margins": 0.28930526971817017, "rewards/rejected": -0.7519564032554626, "step": 2700 }, { "epoch": 0.54, "learning_rate": 2.569804096808923e-06, "logits/chosen": -1.7389520406723022, "logits/rejected": -1.3212487697601318, "logps/chosen": -160.83932495117188, "logps/rejected": -169.26182556152344, "loss": 0.6746, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.7231206893920898, "rewards/margins": 0.31118813157081604, "rewards/rejected": -1.0343087911605835, "step": 2710 }, { "epoch": 0.54, "learning_rate": 2.5523560497083927e-06, "logits/chosen": -1.6341676712036133, "logits/rejected": -1.28550386428833, "logps/chosen": -166.45352172851562, "logps/rejected": -159.68301391601562, "loss": 0.6737, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.6307767629623413, "rewards/margins": 0.23971566557884216, "rewards/rejected": -0.8704924583435059, "step": 2720 }, { "epoch": 0.55, "learning_rate": 2.5349054508478636e-06, "logits/chosen": -1.905362844467163, "logits/rejected": -1.3361937999725342, "logps/chosen": -210.5450439453125, "logps/rejected": -188.63999938964844, "loss": 0.6735, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.560156524181366, "rewards/margins": 0.3129926323890686, "rewards/rejected": -0.8731492161750793, "step": 2730 }, { "epoch": 0.55, "learning_rate": 2.517453150744904e-06, "logits/chosen": -1.8077281713485718, "logits/rejected": -1.2750660181045532, "logps/chosen": -200.51712036132812, "logps/rejected": -189.79714965820312, "loss": 0.6757, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.3451409935951233, "rewards/margins": 0.31594201922416687, "rewards/rejected": -0.6610830426216125, "step": 2740 }, { "epoch": 0.55, "learning_rate": 2.5e-06, "logits/chosen": -1.744180679321289, "logits/rejected": -1.2995460033416748, "logps/chosen": -216.9247283935547, "logps/rejected": -206.2625274658203, "loss": 0.6736, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.4681733548641205, "rewards/margins": 0.28743892908096313, "rewards/rejected": -0.7556122541427612, "step": 2750 }, { "epoch": 0.55, "learning_rate": 2.482546849255096e-06, "logits/chosen": -1.816948652267456, "logits/rejected": -1.4967985153198242, "logps/chosen": -206.8921356201172, "logps/rejected": -214.9856719970703, "loss": 0.6772, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.4849894642829895, "rewards/margins": 0.2557857036590576, "rewards/rejected": -0.7407751083374023, "step": 2760 }, { "epoch": 0.55, "learning_rate": 2.4650945491521372e-06, "logits/chosen": -1.8832544088363647, "logits/rejected": -1.4929633140563965, "logps/chosen": -200.93264770507812, "logps/rejected": -172.87783813476562, "loss": 0.6737, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.5108343362808228, "rewards/margins": 0.26843157410621643, "rewards/rejected": -0.7792659401893616, "step": 2770 }, { "epoch": 0.56, "learning_rate": 2.447643950291608e-06, "logits/chosen": -1.9391086101531982, "logits/rejected": -1.5371830463409424, "logps/chosen": -211.7366180419922, "logps/rejected": -189.55410766601562, "loss": 0.6747, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.6736190915107727, "rewards/margins": 0.2480430155992508, "rewards/rejected": -0.9216620326042175, "step": 2780 }, { "epoch": 0.56, "learning_rate": 2.4301959031910785e-06, "logits/chosen": -1.728326439857483, "logits/rejected": -1.2791118621826172, "logps/chosen": -223.2736358642578, "logps/rejected": -200.06375122070312, "loss": 0.6679, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.6176142692565918, "rewards/margins": 0.29919275641441345, "rewards/rejected": -0.9168070554733276, "step": 2790 }, { "epoch": 0.56, "learning_rate": 2.4127512582437486e-06, "logits/chosen": -1.9325716495513916, "logits/rejected": -1.3611788749694824, "logps/chosen": -181.1573028564453, "logps/rejected": -177.87191772460938, "loss": 0.6687, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.6200854182243347, "rewards/margins": 0.3409944176673889, "rewards/rejected": -0.9610798954963684, "step": 2800 }, { "epoch": 0.56, "eval_logits/chosen": -1.8484017848968506, "eval_logits/rejected": -1.6930148601531982, "eval_logps/chosen": -303.61712646484375, "eval_logps/rejected": -290.80804443359375, "eval_loss": 0.7112637162208557, "eval_rewards/accuracies": 0.46257483959198, "eval_rewards/chosen": -0.4070579707622528, "eval_rewards/margins": 0.035262659192085266, "eval_rewards/rejected": -0.44232064485549927, "eval_runtime": 1205.3364, "eval_samples_per_second": 1.659, "eval_steps_per_second": 0.277, "step": 2800 }, { "epoch": 0.56, "learning_rate": 2.3953108656770018e-06, "logits/chosen": -1.7573649883270264, "logits/rejected": -1.3248592615127563, "logps/chosen": -192.79794311523438, "logps/rejected": -180.8382568359375, "loss": 0.6729, "rewards/accuracies": 0.75, "rewards/chosen": -0.49549323320388794, "rewards/margins": 0.333453893661499, "rewards/rejected": -0.8289471864700317, "step": 2810 }, { "epoch": 0.56, "learning_rate": 2.377875575510967e-06, "logits/chosen": -1.9037433862686157, "logits/rejected": -1.4028642177581787, "logps/chosen": -210.6756591796875, "logps/rejected": -196.8881378173828, "loss": 0.6744, "rewards/accuracies": 0.875, "rewards/chosen": -0.5146250128746033, "rewards/margins": 0.30020710825920105, "rewards/rejected": -0.8148322105407715, "step": 2820 }, { "epoch": 0.57, "learning_rate": 2.3604462375170905e-06, "logits/chosen": -1.7907432317733765, "logits/rejected": -1.4217617511749268, "logps/chosen": -194.8076629638672, "logps/rejected": -168.1132049560547, "loss": 0.6793, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.5442146062850952, "rewards/margins": 0.21668629348278046, "rewards/rejected": -0.7609008550643921, "step": 2830 }, { "epoch": 0.57, "learning_rate": 2.3430237011767166e-06, "logits/chosen": -1.834393858909607, "logits/rejected": -1.4094077348709106, "logps/chosen": -185.83743286132812, "logps/rejected": -183.2838592529297, "loss": 0.6761, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.5791108012199402, "rewards/margins": 0.21476773917675018, "rewards/rejected": -0.7938784956932068, "step": 2840 }, { "epoch": 0.57, "learning_rate": 2.325608815639687e-06, "logits/chosen": -1.5957016944885254, "logits/rejected": -1.1119681596755981, "logps/chosen": -183.26089477539062, "logps/rejected": -160.47842407226562, "loss": 0.6754, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.5756304860115051, "rewards/margins": 0.25155001878738403, "rewards/rejected": -0.8271805644035339, "step": 2850 }, { "epoch": 0.57, "learning_rate": 2.3082024296829538e-06, "logits/chosen": -1.7604936361312866, "logits/rejected": -1.3576148748397827, "logps/chosen": -172.75656127929688, "logps/rejected": -185.87112426757812, "loss": 0.6737, "rewards/accuracies": 0.75, "rewards/chosen": -0.6021561026573181, "rewards/margins": 0.26316025853157043, "rewards/rejected": -0.8653162717819214, "step": 2860 }, { "epoch": 0.57, "learning_rate": 2.290805391669212e-06, "logits/chosen": -1.9428532123565674, "logits/rejected": -1.2008286714553833, "logps/chosen": -181.3525848388672, "logps/rejected": -198.18325805664062, "loss": 0.673, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.43997830152511597, "rewards/margins": 0.36790579557418823, "rewards/rejected": -0.8078840970993042, "step": 2870 }, { "epoch": 0.58, "learning_rate": 2.2734185495055503e-06, "logits/chosen": -1.7510589361190796, "logits/rejected": -1.2939937114715576, "logps/chosen": -184.83253479003906, "logps/rejected": -175.17599487304688, "loss": 0.6743, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.6584424376487732, "rewards/margins": 0.27582138776779175, "rewards/rejected": -0.9342638254165649, "step": 2880 }, { "epoch": 0.58, "learning_rate": 2.256042750602127e-06, "logits/chosen": -1.7783676385879517, "logits/rejected": -1.3183950185775757, "logps/chosen": -221.0303497314453, "logps/rejected": -190.1031036376953, "loss": 0.6731, "rewards/accuracies": 0.625, "rewards/chosen": -0.47211718559265137, "rewards/margins": 0.2633390426635742, "rewards/rejected": -0.7354562878608704, "step": 2890 }, { "epoch": 0.58, "learning_rate": 2.238678841830867e-06, "logits/chosen": -1.7901554107666016, "logits/rejected": -1.3465102910995483, "logps/chosen": -189.94558715820312, "logps/rejected": -169.72003173828125, "loss": 0.6776, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.5226966738700867, "rewards/margins": 0.2620371878147125, "rewards/rejected": -0.7847338914871216, "step": 2900 }, { "epoch": 0.58, "learning_rate": 2.2213276694841866e-06, "logits/chosen": -1.7172454595565796, "logits/rejected": -1.1281907558441162, "logps/chosen": -200.78469848632812, "logps/rejected": -178.8584747314453, "loss": 0.6733, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.5375521779060364, "rewards/margins": 0.3278997540473938, "rewards/rejected": -0.8654518127441406, "step": 2910 }, { "epoch": 0.58, "learning_rate": 2.2039900792337477e-06, "logits/chosen": -1.8611243963241577, "logits/rejected": -1.339874267578125, "logps/chosen": -199.24288940429688, "logps/rejected": -183.18930053710938, "loss": 0.6702, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.5097121596336365, "rewards/margins": 0.28898146748542786, "rewards/rejected": -0.7986935973167419, "step": 2920 }, { "epoch": 0.59, "learning_rate": 2.186666916089239e-06, "logits/chosen": -1.8386751413345337, "logits/rejected": -1.304248571395874, "logps/chosen": -179.70242309570312, "logps/rejected": -193.95883178710938, "loss": 0.673, "rewards/accuracies": 0.875, "rewards/chosen": -0.6228549480438232, "rewards/margins": 0.3268582820892334, "rewards/rejected": -0.9497131109237671, "step": 2930 }, { "epoch": 0.59, "learning_rate": 2.1693590243571937e-06, "logits/chosen": -1.7048537731170654, "logits/rejected": -1.2206976413726807, "logps/chosen": -185.75521850585938, "logps/rejected": -180.73643493652344, "loss": 0.6703, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.5552883148193359, "rewards/margins": 0.3206081986427307, "rewards/rejected": -0.8758966326713562, "step": 2940 }, { "epoch": 0.59, "learning_rate": 2.1520672475998374e-06, "logits/chosen": -1.7592540979385376, "logits/rejected": -1.1391639709472656, "logps/chosen": -223.3463592529297, "logps/rejected": -196.56520080566406, "loss": 0.6669, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.5193053483963013, "rewards/margins": 0.3402412235736847, "rewards/rejected": -0.8595464825630188, "step": 2950 }, { "epoch": 0.59, "learning_rate": 2.134792428593971e-06, "logits/chosen": -1.746522307395935, "logits/rejected": -1.4490628242492676, "logps/chosen": -159.15187072753906, "logps/rejected": -172.71751403808594, "loss": 0.6757, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.4844391345977783, "rewards/margins": 0.2979712188243866, "rewards/rejected": -0.7824103236198425, "step": 2960 }, { "epoch": 0.59, "learning_rate": 2.117535409289905e-06, "logits/chosen": -1.7779953479766846, "logits/rejected": -1.127291202545166, "logps/chosen": -193.23690795898438, "logps/rejected": -180.17236328125, "loss": 0.6749, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.41943177580833435, "rewards/margins": 0.3307582437992096, "rewards/rejected": -0.7501900792121887, "step": 2970 }, { "epoch": 0.6, "learning_rate": 2.1002970307704134e-06, "logits/chosen": -1.8621399402618408, "logits/rejected": -1.5056955814361572, "logps/chosen": -167.5177764892578, "logps/rejected": -160.3635711669922, "loss": 0.6766, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.5300180315971375, "rewards/margins": 0.24094454944133759, "rewards/rejected": -0.770962655544281, "step": 2980 }, { "epoch": 0.6, "learning_rate": 2.0830781332097446e-06, "logits/chosen": -1.7064130306243896, "logits/rejected": -1.346752405166626, "logps/chosen": -182.6517333984375, "logps/rejected": -159.5797119140625, "loss": 0.6735, "rewards/accuracies": 0.75, "rewards/chosen": -0.41608262062072754, "rewards/margins": 0.25435641407966614, "rewards/rejected": -0.6704390048980713, "step": 2990 }, { "epoch": 0.6, "learning_rate": 2.0658795558326745e-06, "logits/chosen": -1.779611587524414, "logits/rejected": -1.3635574579238892, "logps/chosen": -195.0620574951172, "logps/rejected": -181.1638641357422, "loss": 0.6741, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.4341199994087219, "rewards/margins": 0.2896597981452942, "rewards/rejected": -0.7237797379493713, "step": 3000 }, { "epoch": 0.6, "eval_logits/chosen": -1.8221943378448486, "eval_logits/rejected": -1.6691908836364746, "eval_logps/chosen": -295.5166931152344, "eval_logps/rejected": -282.7206115722656, "eval_loss": 0.7067164182662964, "eval_rewards/accuracies": 0.46706587076187134, "eval_rewards/chosen": -0.32605409622192383, "eval_rewards/margins": 0.03539185971021652, "eval_rewards/rejected": -0.36144593358039856, "eval_runtime": 1204.9596, "eval_samples_per_second": 1.66, "eval_steps_per_second": 0.277, "step": 3000 }, { "epoch": 0.6, "learning_rate": 2.0487021368736002e-06, "logits/chosen": -1.7297006845474243, "logits/rejected": -1.081540822982788, "logps/chosen": -218.8377227783203, "logps/rejected": -208.20272827148438, "loss": 0.6719, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.4662281572818756, "rewards/margins": 0.33968496322631836, "rewards/rejected": -0.8059131503105164, "step": 3010 }, { "epoch": 0.6, "learning_rate": 2.031546713535688e-06, "logits/chosen": -1.6373369693756104, "logits/rejected": -0.9530367851257324, "logps/chosen": -213.96142578125, "logps/rejected": -171.2977752685547, "loss": 0.6684, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.4863305985927582, "rewards/margins": 0.40614575147628784, "rewards/rejected": -0.8924763798713684, "step": 3020 }, { "epoch": 0.61, "learning_rate": 2.0144141219500707e-06, "logits/chosen": -1.7659136056900024, "logits/rejected": -1.2979044914245605, "logps/chosen": -187.57737731933594, "logps/rejected": -177.23468017578125, "loss": 0.6715, "rewards/accuracies": 0.875, "rewards/chosen": -0.5280492901802063, "rewards/margins": 0.37660685181617737, "rewards/rejected": -0.9046560525894165, "step": 3030 }, { "epoch": 0.61, "learning_rate": 1.997305197135089e-06, "logits/chosen": -1.8129348754882812, "logits/rejected": -1.2432445287704468, "logps/chosen": -171.16738891601562, "logps/rejected": -172.42550659179688, "loss": 0.6713, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.6230432987213135, "rewards/margins": 0.3671768307685852, "rewards/rejected": -0.9902200698852539, "step": 3040 }, { "epoch": 0.61, "learning_rate": 1.9802207729556023e-06, "logits/chosen": -1.868790626525879, "logits/rejected": -1.5055510997772217, "logps/chosen": -180.40176391601562, "logps/rejected": -177.11814880371094, "loss": 0.6697, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.5533271431922913, "rewards/margins": 0.20522382855415344, "rewards/rejected": -0.7585509419441223, "step": 3050 }, { "epoch": 0.61, "learning_rate": 1.963161682082342e-06, "logits/chosen": -1.7876341342926025, "logits/rejected": -1.1789666414260864, "logps/chosen": -202.9730987548828, "logps/rejected": -177.06686401367188, "loss": 0.6733, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.5442095994949341, "rewards/margins": 0.3467788100242615, "rewards/rejected": -0.8909884691238403, "step": 3060 }, { "epoch": 0.61, "learning_rate": 1.946128755951332e-06, "logits/chosen": -1.9207346439361572, "logits/rejected": -1.3213632106781006, "logps/chosen": -200.37428283691406, "logps/rejected": -168.12350463867188, "loss": 0.6748, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.4856260418891907, "rewards/margins": 0.3189104497432709, "rewards/rejected": -0.8045364618301392, "step": 3070 }, { "epoch": 0.62, "learning_rate": 1.9291228247233607e-06, "logits/chosen": -1.8156201839447021, "logits/rejected": -1.2673556804656982, "logps/chosen": -189.31118774414062, "logps/rejected": -196.18264770507812, "loss": 0.6704, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.519213080406189, "rewards/margins": 0.26477181911468506, "rewards/rejected": -0.7839849591255188, "step": 3080 }, { "epoch": 0.62, "learning_rate": 1.912144717243525e-06, "logits/chosen": -1.8886935710906982, "logits/rejected": -1.5111931562423706, "logps/chosen": -189.7649383544922, "logps/rejected": -183.67892456054688, "loss": 0.6738, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.6388393640518188, "rewards/margins": 0.2276858538389206, "rewards/rejected": -0.8665252923965454, "step": 3090 }, { "epoch": 0.62, "learning_rate": 1.895195261000831e-06, "logits/chosen": -1.7642914056777954, "logits/rejected": -1.3265117406845093, "logps/chosen": -176.56607055664062, "logps/rejected": -159.41806030273438, "loss": 0.6772, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.619392454624176, "rewards/margins": 0.22480256855487823, "rewards/rejected": -0.8441950082778931, "step": 3100 }, { "epoch": 0.62, "learning_rate": 1.8782752820878636e-06, "logits/chosen": -1.7658132314682007, "logits/rejected": -1.4638707637786865, "logps/chosen": -208.37612915039062, "logps/rejected": -183.71859741210938, "loss": 0.6737, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.5368543863296509, "rewards/margins": 0.29873350262641907, "rewards/rejected": -0.8355878591537476, "step": 3110 }, { "epoch": 0.62, "learning_rate": 1.8613856051605242e-06, "logits/chosen": -2.0071663856506348, "logits/rejected": -1.62530517578125, "logps/chosen": -185.59152221679688, "logps/rejected": -199.63734436035156, "loss": 0.674, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.5928925275802612, "rewards/margins": 0.2392810583114624, "rewards/rejected": -0.8321736454963684, "step": 3120 }, { "epoch": 0.63, "learning_rate": 1.8445270533978387e-06, "logits/chosen": -1.8105121850967407, "logits/rejected": -1.2008041143417358, "logps/chosen": -190.645751953125, "logps/rejected": -184.36691284179688, "loss": 0.6759, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.4957979619503021, "rewards/margins": 0.34236884117126465, "rewards/rejected": -0.8381667137145996, "step": 3130 }, { "epoch": 0.63, "learning_rate": 1.827700448461836e-06, "logits/chosen": -1.8654365539550781, "logits/rejected": -1.1974941492080688, "logps/chosen": -195.1807403564453, "logps/rejected": -185.81179809570312, "loss": 0.6703, "rewards/accuracies": 0.875, "rewards/chosen": -0.47486066818237305, "rewards/margins": 0.393667995929718, "rewards/rejected": -0.8685287237167358, "step": 3140 }, { "epoch": 0.63, "learning_rate": 1.8109066104575023e-06, "logits/chosen": -1.7698990106582642, "logits/rejected": -1.2602875232696533, "logps/chosen": -163.6126708984375, "logps/rejected": -160.32923889160156, "loss": 0.6736, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.5138403177261353, "rewards/margins": 0.3311834931373596, "rewards/rejected": -0.8450237512588501, "step": 3150 }, { "epoch": 0.63, "learning_rate": 1.7941463578928088e-06, "logits/chosen": -1.6188510656356812, "logits/rejected": -1.2506155967712402, "logps/chosen": -195.2880096435547, "logps/rejected": -192.77696228027344, "loss": 0.6728, "rewards/accuracies": 0.75, "rewards/chosen": -0.5180437564849854, "rewards/margins": 0.23342260718345642, "rewards/rejected": -0.7514663934707642, "step": 3160 }, { "epoch": 0.63, "learning_rate": 1.7774205076388207e-06, "logits/chosen": -1.963537573814392, "logits/rejected": -1.179218053817749, "logps/chosen": -191.8006591796875, "logps/rejected": -181.9679718017578, "loss": 0.6736, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.46346649527549744, "rewards/margins": 0.32313650846481323, "rewards/rejected": -0.7866030335426331, "step": 3170 }, { "epoch": 0.64, "learning_rate": 1.7607298748898844e-06, "logits/chosen": -1.8354198932647705, "logits/rejected": -1.2489491701126099, "logps/chosen": -182.9680938720703, "logps/rejected": -171.52200317382812, "loss": 0.6725, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.47444209456443787, "rewards/margins": 0.28939831256866455, "rewards/rejected": -0.76384037733078, "step": 3180 }, { "epoch": 0.64, "learning_rate": 1.744075273123889e-06, "logits/chosen": -1.8500797748565674, "logits/rejected": -1.2870159149169922, "logps/chosen": -196.744873046875, "logps/rejected": -171.9564666748047, "loss": 0.6674, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.5544387698173523, "rewards/margins": 0.3399571478366852, "rewards/rejected": -0.8943958282470703, "step": 3190 }, { "epoch": 0.64, "learning_rate": 1.7274575140626318e-06, "logits/chosen": -1.771675705909729, "logits/rejected": -1.2070860862731934, "logps/chosen": -187.3867645263672, "logps/rejected": -200.8397674560547, "loss": 0.674, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.565233588218689, "rewards/margins": 0.3425661623477936, "rewards/rejected": -0.9077998399734497, "step": 3200 }, { "epoch": 0.64, "eval_logits/chosen": -1.8384701013565063, "eval_logits/rejected": -1.684001088142395, "eval_logps/chosen": -294.6257629394531, "eval_logps/rejected": -282.13128662109375, "eval_loss": 0.708480715751648, "eval_rewards/accuracies": 0.4715568721294403, "eval_rewards/chosen": -0.3171444833278656, "eval_rewards/margins": 0.03840852901339531, "eval_rewards/rejected": -0.35555300116539, "eval_runtime": 1205.6718, "eval_samples_per_second": 1.659, "eval_steps_per_second": 0.277, "step": 3200 }, { "epoch": 0.64, "learning_rate": 1.7108774076322443e-06, "logits/chosen": -1.6476867198944092, "logits/rejected": -1.1505199670791626, "logps/chosen": -173.94692993164062, "logps/rejected": -167.03904724121094, "loss": 0.6737, "rewards/accuracies": 0.75, "rewards/chosen": -0.4680989682674408, "rewards/margins": 0.28364431858062744, "rewards/rejected": -0.7517432570457458, "step": 3210 }, { "epoch": 0.64, "learning_rate": 1.6943357619237227e-06, "logits/chosen": -1.9314390420913696, "logits/rejected": -1.2555644512176514, "logps/chosen": -197.59005737304688, "logps/rejected": -170.77593994140625, "loss": 0.6742, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.5004652142524719, "rewards/margins": 0.34631776809692383, "rewards/rejected": -0.8467830419540405, "step": 3220 }, { "epoch": 0.65, "learning_rate": 1.677833383153542e-06, "logits/chosen": -1.6416819095611572, "logits/rejected": -1.2541364431381226, "logps/chosen": -191.52027893066406, "logps/rejected": -180.745361328125, "loss": 0.6752, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.5467370748519897, "rewards/margins": 0.24134309589862823, "rewards/rejected": -0.7880801558494568, "step": 3230 }, { "epoch": 0.65, "learning_rate": 1.661371075624363e-06, "logits/chosen": -1.7817283868789673, "logits/rejected": -1.0519486665725708, "logps/chosen": -176.46359252929688, "logps/rejected": -164.21681213378906, "loss": 0.6716, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.47450804710388184, "rewards/margins": 0.3613935112953186, "rewards/rejected": -0.8359016180038452, "step": 3240 }, { "epoch": 0.65, "learning_rate": 1.6449496416858285e-06, "logits/chosen": -1.666428804397583, "logits/rejected": -1.1580183506011963, "logps/chosen": -206.393798828125, "logps/rejected": -160.6327362060547, "loss": 0.6755, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.6286054253578186, "rewards/margins": 0.3506014943122864, "rewards/rejected": -0.9792068600654602, "step": 3250 }, { "epoch": 0.65, "learning_rate": 1.6285698816954626e-06, "logits/chosen": -1.80611252784729, "logits/rejected": -1.244471788406372, "logps/chosen": -174.0679168701172, "logps/rejected": -165.92367553710938, "loss": 0.674, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.5801917314529419, "rewards/margins": 0.30626851320266724, "rewards/rejected": -0.8864603042602539, "step": 3260 }, { "epoch": 0.65, "learning_rate": 1.612232593979658e-06, "logits/chosen": -1.8103708028793335, "logits/rejected": -1.3331295251846313, "logps/chosen": -172.4650115966797, "logps/rejected": -169.60140991210938, "loss": 0.6735, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.569888174533844, "rewards/margins": 0.29327720403671265, "rewards/rejected": -0.8631652593612671, "step": 3270 }, { "epoch": 0.66, "learning_rate": 1.5959385747947697e-06, "logits/chosen": -1.8266083002090454, "logits/rejected": -1.2756679058074951, "logps/chosen": -194.59707641601562, "logps/rejected": -175.5626678466797, "loss": 0.6711, "rewards/accuracies": 0.75, "rewards/chosen": -0.5538734197616577, "rewards/margins": 0.2958170771598816, "rewards/rejected": -0.8496904373168945, "step": 3280 }, { "epoch": 0.66, "learning_rate": 1.5796886182883053e-06, "logits/chosen": -1.8553192615509033, "logits/rejected": -1.0842763185501099, "logps/chosen": -190.76585388183594, "logps/rejected": -186.9684600830078, "loss": 0.6767, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.5342960357666016, "rewards/margins": 0.38266506791114807, "rewards/rejected": -0.9169610738754272, "step": 3290 }, { "epoch": 0.66, "learning_rate": 1.56348351646022e-06, "logits/chosen": -1.6904369592666626, "logits/rejected": -1.102135181427002, "logps/chosen": -196.9351348876953, "logps/rejected": -178.5782012939453, "loss": 0.672, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.47919923067092896, "rewards/margins": 0.3497912287712097, "rewards/rejected": -0.8289904594421387, "step": 3300 }, { "epoch": 0.66, "learning_rate": 1.547324059124315e-06, "logits/chosen": -1.7735326290130615, "logits/rejected": -1.2302892208099365, "logps/chosen": -211.19985961914062, "logps/rejected": -170.3240203857422, "loss": 0.6722, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.4695677161216736, "rewards/margins": 0.34387335181236267, "rewards/rejected": -0.8134411573410034, "step": 3310 }, { "epoch": 0.66, "learning_rate": 1.5312110338697427e-06, "logits/chosen": -1.9204257726669312, "logits/rejected": -1.5753657817840576, "logps/chosen": -211.1373748779297, "logps/rejected": -194.1078643798828, "loss": 0.6735, "rewards/accuracies": 0.75, "rewards/chosen": -0.37302443385124207, "rewards/margins": 0.24376694858074188, "rewards/rejected": -0.6167914271354675, "step": 3320 }, { "epoch": 0.67, "learning_rate": 1.5151452260226224e-06, "logits/chosen": -1.743941068649292, "logits/rejected": -1.2013376951217651, "logps/chosen": -174.98983764648438, "logps/rejected": -161.19802856445312, "loss": 0.6737, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.4591328501701355, "rewards/margins": 0.31252074241638184, "rewards/rejected": -0.7716535329818726, "step": 3330 }, { "epoch": 0.67, "learning_rate": 1.4991274186077632e-06, "logits/chosen": -1.7003679275512695, "logits/rejected": -1.3055880069732666, "logps/chosen": -160.9624786376953, "logps/rejected": -161.6939239501953, "loss": 0.6719, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.4876251220703125, "rewards/margins": 0.35760578513145447, "rewards/rejected": -0.8452308773994446, "step": 3340 }, { "epoch": 0.67, "learning_rate": 1.4831583923105e-06, "logits/chosen": -1.6662811040878296, "logits/rejected": -1.213519811630249, "logps/chosen": -166.5484619140625, "logps/rejected": -161.89395141601562, "loss": 0.6725, "rewards/accuracies": 0.75, "rewards/chosen": -0.36509793996810913, "rewards/margins": 0.3358835279941559, "rewards/rejected": -0.7009814977645874, "step": 3350 }, { "epoch": 0.67, "learning_rate": 1.467238925438646e-06, "logits/chosen": -1.8824735879898071, "logits/rejected": -1.2390296459197998, "logps/chosen": -203.55311584472656, "logps/rejected": -174.3763427734375, "loss": 0.6753, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.43257713317871094, "rewards/margins": 0.32384663820266724, "rewards/rejected": -0.756423830986023, "step": 3360 }, { "epoch": 0.67, "learning_rate": 1.4513697938845571e-06, "logits/chosen": -1.6414800882339478, "logits/rejected": -1.1327059268951416, "logps/chosen": -178.80929565429688, "logps/rejected": -154.91824340820312, "loss": 0.6782, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.5716148018836975, "rewards/margins": 0.2548719346523285, "rewards/rejected": -0.8264867663383484, "step": 3370 }, { "epoch": 0.68, "learning_rate": 1.4355517710873184e-06, "logits/chosen": -1.63149094581604, "logits/rejected": -1.0423767566680908, "logps/chosen": -191.518798828125, "logps/rejected": -160.09573364257812, "loss": 0.6771, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.4427117705345154, "rewards/margins": 0.27881717681884766, "rewards/rejected": -0.7215288877487183, "step": 3380 }, { "epoch": 0.68, "learning_rate": 1.419785627995044e-06, "logits/chosen": -1.80740225315094, "logits/rejected": -1.427065134048462, "logps/chosen": -178.4989013671875, "logps/rejected": -173.86570739746094, "loss": 0.67, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.5909635424613953, "rewards/margins": 0.28243017196655273, "rewards/rejected": -0.873393714427948, "step": 3390 }, { "epoch": 0.68, "learning_rate": 1.4040721330273063e-06, "logits/chosen": -1.920401930809021, "logits/rejected": -1.462092638015747, "logps/chosen": -214.32955932617188, "logps/rejected": -190.00753784179688, "loss": 0.6712, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.5589178800582886, "rewards/margins": 0.306210458278656, "rewards/rejected": -0.8651283383369446, "step": 3400 }, { "epoch": 0.68, "eval_logits/chosen": -1.81253981590271, "eval_logits/rejected": -1.660036325454712, "eval_logps/chosen": -298.35675048828125, "eval_logps/rejected": -285.3079528808594, "eval_loss": 0.7083092331886292, "eval_rewards/accuracies": 0.46257483959198, "eval_rewards/chosen": -0.35445448756217957, "eval_rewards/margins": 0.03286496177315712, "eval_rewards/rejected": -0.3873194754123688, "eval_runtime": 1205.4313, "eval_samples_per_second": 1.659, "eval_steps_per_second": 0.277, "step": 3400 }, { "epoch": 0.68, "learning_rate": 1.388412052037682e-06, "logits/chosen": -1.7785974740982056, "logits/rejected": -1.4203349351882935, "logps/chosen": -192.811767578125, "logps/rejected": -171.76019287109375, "loss": 0.6723, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.4202335774898529, "rewards/margins": 0.26975777745246887, "rewards/rejected": -0.6899913549423218, "step": 3410 }, { "epoch": 0.68, "learning_rate": 1.3728061482764238e-06, "logits/chosen": -1.6581220626831055, "logits/rejected": -1.1550885438919067, "logps/chosen": -193.2764434814453, "logps/rejected": -179.24307250976562, "loss": 0.6709, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.5190607905387878, "rewards/margins": 0.2897750735282898, "rewards/rejected": -0.8088358640670776, "step": 3420 }, { "epoch": 0.69, "learning_rate": 1.3572551823532654e-06, "logits/chosen": -1.7457304000854492, "logits/rejected": -1.23434579372406, "logps/chosen": -226.9068603515625, "logps/rejected": -191.46719360351562, "loss": 0.6715, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.5374705195426941, "rewards/margins": 0.40161556005477905, "rewards/rejected": -0.9390860795974731, "step": 3430 }, { "epoch": 0.69, "learning_rate": 1.3417599122003464e-06, "logits/chosen": -1.8964468240737915, "logits/rejected": -1.4676374197006226, "logps/chosen": -185.95840454101562, "logps/rejected": -180.59201049804688, "loss": 0.6745, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.643310546875, "rewards/margins": 0.23845314979553223, "rewards/rejected": -0.8817636370658875, "step": 3440 }, { "epoch": 0.69, "learning_rate": 1.3263210930352737e-06, "logits/chosen": -1.7557315826416016, "logits/rejected": -1.257134199142456, "logps/chosen": -181.4674530029297, "logps/rejected": -171.67306518554688, "loss": 0.6747, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.5257083773612976, "rewards/margins": 0.34599000215530396, "rewards/rejected": -0.8716983795166016, "step": 3450 }, { "epoch": 0.69, "learning_rate": 1.3109394773243117e-06, "logits/chosen": -1.5881292819976807, "logits/rejected": -1.2004177570343018, "logps/chosen": -159.8760223388672, "logps/rejected": -157.93826293945312, "loss": 0.6739, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.6627914309501648, "rewards/margins": 0.3108111023902893, "rewards/rejected": -0.9736024737358093, "step": 3460 }, { "epoch": 0.69, "learning_rate": 1.2956158147457116e-06, "logits/chosen": -1.7026065587997437, "logits/rejected": -1.3709561824798584, "logps/chosen": -169.76974487304688, "logps/rejected": -194.0834503173828, "loss": 0.6751, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.5853132009506226, "rewards/margins": 0.21199026703834534, "rewards/rejected": -0.7973034977912903, "step": 3470 }, { "epoch": 0.7, "learning_rate": 1.280350852153168e-06, "logits/chosen": -1.8687498569488525, "logits/rejected": -1.358776330947876, "logps/chosen": -206.26736450195312, "logps/rejected": -176.5262908935547, "loss": 0.6722, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.4876318573951721, "rewards/margins": 0.27240973711013794, "rewards/rejected": -0.7600415945053101, "step": 3480 }, { "epoch": 0.7, "learning_rate": 1.2651453335394232e-06, "logits/chosen": -1.5146903991699219, "logits/rejected": -1.217163324356079, "logps/chosen": -195.8373565673828, "logps/rejected": -178.4134521484375, "loss": 0.6727, "rewards/accuracies": 0.75, "rewards/chosen": -0.5755364298820496, "rewards/margins": 0.26309871673583984, "rewards/rejected": -0.8386351466178894, "step": 3490 }, { "epoch": 0.7, "learning_rate": 1.2500000000000007e-06, "logits/chosen": -1.6554126739501953, "logits/rejected": -1.3423373699188232, "logps/chosen": -199.19668579101562, "logps/rejected": -185.81301879882812, "loss": 0.6724, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.6931096911430359, "rewards/margins": 0.2232220619916916, "rewards/rejected": -0.9163317680358887, "step": 3500 }, { "epoch": 0.7, "learning_rate": 1.234915589697091e-06, "logits/chosen": -1.7042205333709717, "logits/rejected": -1.418906807899475, "logps/chosen": -194.75537109375, "logps/rejected": -196.01670837402344, "loss": 0.6708, "rewards/accuracies": 0.75, "rewards/chosen": -0.6618725657463074, "rewards/margins": 0.2305436134338379, "rewards/rejected": -0.8924161791801453, "step": 3510 }, { "epoch": 0.7, "learning_rate": 1.2198928378235717e-06, "logits/chosen": -1.6770074367523193, "logits/rejected": -1.135080099105835, "logps/chosen": -229.1519775390625, "logps/rejected": -194.12881469726562, "loss": 0.6731, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.7212926745414734, "rewards/margins": 0.30304259061813354, "rewards/rejected": -1.024335265159607, "step": 3520 }, { "epoch": 0.71, "learning_rate": 1.204932476567175e-06, "logits/chosen": -1.8147754669189453, "logits/rejected": -1.4164695739746094, "logps/chosen": -216.3401641845703, "logps/rejected": -204.9897003173828, "loss": 0.6716, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.6598548889160156, "rewards/margins": 0.3428403437137604, "rewards/rejected": -1.0026952028274536, "step": 3530 }, { "epoch": 0.71, "learning_rate": 1.1900352350748026e-06, "logits/chosen": -1.6051445007324219, "logits/rejected": -1.298174262046814, "logps/chosen": -170.02926635742188, "logps/rejected": -164.7904510498047, "loss": 0.6757, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.7009800672531128, "rewards/margins": 0.26382166147232056, "rewards/rejected": -0.9648016691207886, "step": 3540 }, { "epoch": 0.71, "learning_rate": 1.1752018394169882e-06, "logits/chosen": -1.6015777587890625, "logits/rejected": -1.064917802810669, "logps/chosen": -198.02069091796875, "logps/rejected": -194.80880737304688, "loss": 0.6717, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.5173773765563965, "rewards/margins": 0.3523986339569092, "rewards/rejected": -0.8697760701179504, "step": 3550 }, { "epoch": 0.71, "learning_rate": 1.160433012552508e-06, "logits/chosen": -1.809643030166626, "logits/rejected": -1.4172449111938477, "logps/chosen": -185.43850708007812, "logps/rejected": -160.5530242919922, "loss": 0.6756, "rewards/accuracies": 0.75, "rewards/chosen": -0.5354773998260498, "rewards/margins": 0.26742905378341675, "rewards/rejected": -0.8029063940048218, "step": 3560 }, { "epoch": 0.71, "learning_rate": 1.1457294742931508e-06, "logits/chosen": -2.0639119148254395, "logits/rejected": -1.5276662111282349, "logps/chosen": -216.6826934814453, "logps/rejected": -200.30308532714844, "loss": 0.6699, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.44229012727737427, "rewards/margins": 0.338875949382782, "rewards/rejected": -0.781166136264801, "step": 3570 }, { "epoch": 0.72, "learning_rate": 1.1310919412686248e-06, "logits/chosen": -1.7913734912872314, "logits/rejected": -1.5030537843704224, "logps/chosen": -175.69180297851562, "logps/rejected": -145.59341430664062, "loss": 0.6712, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.5129600167274475, "rewards/margins": 0.19555626809597015, "rewards/rejected": -0.7085163593292236, "step": 3580 }, { "epoch": 0.72, "learning_rate": 1.11652112689164e-06, "logits/chosen": -1.6455204486846924, "logits/rejected": -1.2797602415084839, "logps/chosen": -180.64732360839844, "logps/rejected": -190.74850463867188, "loss": 0.6756, "rewards/accuracies": 0.875, "rewards/chosen": -0.5947072505950928, "rewards/margins": 0.34432297945022583, "rewards/rejected": -0.9390303492546082, "step": 3590 }, { "epoch": 0.72, "learning_rate": 1.1020177413231334e-06, "logits/chosen": -1.9448858499526978, "logits/rejected": -1.4615305662155151, "logps/chosen": -185.05404663085938, "logps/rejected": -189.876708984375, "loss": 0.6738, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.5048798322677612, "rewards/margins": 0.2601305842399597, "rewards/rejected": -0.7650104761123657, "step": 3600 }, { "epoch": 0.72, "eval_logits/chosen": -1.7869839668273926, "eval_logits/rejected": -1.6367506980895996, "eval_logps/chosen": -303.0744323730469, "eval_logps/rejected": -291.3218688964844, "eval_loss": 0.7078058123588562, "eval_rewards/accuracies": 0.480538934469223, "eval_rewards/chosen": -0.401631236076355, "eval_rewards/margins": 0.04582706466317177, "eval_rewards/rejected": -0.44745832681655884, "eval_runtime": 1205.2219, "eval_samples_per_second": 1.659, "eval_steps_per_second": 0.277, "step": 3600 }, { "epoch": 0.72, "learning_rate": 1.0875824914376555e-06, "logits/chosen": -1.7037065029144287, "logits/rejected": -1.3454073667526245, "logps/chosen": -202.14193725585938, "logps/rejected": -187.86013793945312, "loss": 0.668, "rewards/accuracies": 0.75, "rewards/chosen": -0.6322699189186096, "rewards/margins": 0.25415295362472534, "rewards/rejected": -0.8864229321479797, "step": 3610 }, { "epoch": 0.72, "learning_rate": 1.073216080788921e-06, "logits/chosen": -1.5309890508651733, "logits/rejected": -1.0583572387695312, "logps/chosen": -207.77163696289062, "logps/rejected": -182.51760864257812, "loss": 0.6689, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.556273877620697, "rewards/margins": 0.3236701190471649, "rewards/rejected": -0.8799440264701843, "step": 3620 }, { "epoch": 0.73, "learning_rate": 1.0589192095755172e-06, "logits/chosen": -1.6590471267700195, "logits/rejected": -1.1675376892089844, "logps/chosen": -202.4744415283203, "logps/rejected": -186.37062072753906, "loss": 0.6754, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.5815357565879822, "rewards/margins": 0.3270363211631775, "rewards/rejected": -0.9085720777511597, "step": 3630 }, { "epoch": 0.73, "learning_rate": 1.0446925746067768e-06, "logits/chosen": -1.5526964664459229, "logits/rejected": -1.0625824928283691, "logps/chosen": -231.3070526123047, "logps/rejected": -187.42453002929688, "loss": 0.6712, "rewards/accuracies": 0.875, "rewards/chosen": -0.5511170029640198, "rewards/margins": 0.3349933922290802, "rewards/rejected": -0.8861104249954224, "step": 3640 }, { "epoch": 0.73, "learning_rate": 1.0305368692688175e-06, "logits/chosen": -1.8912636041641235, "logits/rejected": -1.094398856163025, "logps/chosen": -231.1025390625, "logps/rejected": -189.090576171875, "loss": 0.6733, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.4953552782535553, "rewards/margins": 0.394867479801178, "rewards/rejected": -0.8902226686477661, "step": 3650 }, { "epoch": 0.73, "learning_rate": 1.0164527834907468e-06, "logits/chosen": -1.9566981792449951, "logits/rejected": -1.4186307191848755, "logps/chosen": -206.7559356689453, "logps/rejected": -203.34420776367188, "loss": 0.672, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.5214945673942566, "rewards/margins": 0.30337831377983093, "rewards/rejected": -0.8248728513717651, "step": 3660 }, { "epoch": 0.73, "learning_rate": 1.0024410037110358e-06, "logits/chosen": -2.047588348388672, "logits/rejected": -1.6324981451034546, "logps/chosen": -208.0760040283203, "logps/rejected": -200.68235778808594, "loss": 0.6748, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.5852575302124023, "rewards/margins": 0.2222602367401123, "rewards/rejected": -0.8075177073478699, "step": 3670 }, { "epoch": 0.74, "learning_rate": 9.88502212844063e-07, "logits/chosen": -1.7221946716308594, "logits/rejected": -1.4880034923553467, "logps/chosen": -177.04226684570312, "logps/rejected": -170.99478149414062, "loss": 0.6728, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.5642322301864624, "rewards/margins": 0.2176445722579956, "rewards/rejected": -0.7818768620491028, "step": 3680 }, { "epoch": 0.74, "learning_rate": 9.746370902468311e-07, "logits/chosen": -1.5963990688323975, "logits/rejected": -1.2781301736831665, "logps/chosen": -163.08837890625, "logps/rejected": -159.89588928222656, "loss": 0.6731, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.5696216225624084, "rewards/margins": 0.2693547010421753, "rewards/rejected": -0.838976263999939, "step": 3690 }, { "epoch": 0.74, "learning_rate": 9.608463116858544e-07, "logits/chosen": -1.7581331729888916, "logits/rejected": -1.188450813293457, "logps/chosen": -200.6759490966797, "logps/rejected": -200.36572265625, "loss": 0.6701, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.5372466444969177, "rewards/margins": 0.3207955062389374, "rewards/rejected": -0.8580421209335327, "step": 3700 }, { "epoch": 0.74, "learning_rate": 9.471305493042243e-07, "logits/chosen": -1.7761567831039429, "logits/rejected": -1.2985825538635254, "logps/chosen": -185.90008544921875, "logps/rejected": -177.1260223388672, "loss": 0.6747, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.6461032629013062, "rewards/margins": 0.29325252771377563, "rewards/rejected": -0.939355731010437, "step": 3710 }, { "epoch": 0.74, "learning_rate": 9.334904715888496e-07, "logits/chosen": -1.8852574825286865, "logits/rejected": -1.2985174655914307, "logps/chosen": -170.6287384033203, "logps/rejected": -167.50796508789062, "loss": 0.6704, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.575082540512085, "rewards/margins": 0.3643600046634674, "rewards/rejected": -0.9394424557685852, "step": 3720 }, { "epoch": 0.75, "learning_rate": 9.199267433378728e-07, "logits/chosen": -1.8409827947616577, "logits/rejected": -1.2738107442855835, "logps/chosen": -167.5386505126953, "logps/rejected": -168.35340881347656, "loss": 0.6706, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.6160976886749268, "rewards/margins": 0.3155977129936218, "rewards/rejected": -0.9316954612731934, "step": 3730 }, { "epoch": 0.75, "learning_rate": 9.064400256282757e-07, "logits/chosen": -1.7190576791763306, "logits/rejected": -1.2332966327667236, "logps/chosen": -197.98031616210938, "logps/rejected": -176.07833862304688, "loss": 0.6707, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.6226130723953247, "rewards/margins": 0.2778559923171997, "rewards/rejected": -0.9004691243171692, "step": 3740 }, { "epoch": 0.75, "learning_rate": 8.930309757836517e-07, "logits/chosen": -1.7965596914291382, "logits/rejected": -1.2462399005889893, "logps/chosen": -192.54928588867188, "logps/rejected": -190.23403930664062, "loss": 0.6702, "rewards/accuracies": 0.75, "rewards/chosen": -0.5591039657592773, "rewards/margins": 0.3023371398448944, "rewards/rejected": -0.8614411354064941, "step": 3750 }, { "epoch": 0.75, "learning_rate": 8.797002473421729e-07, "logits/chosen": -1.745701789855957, "logits/rejected": -1.3079488277435303, "logps/chosen": -208.14425659179688, "logps/rejected": -199.03729248046875, "loss": 0.6716, "rewards/accuracies": 0.75, "rewards/chosen": -0.6713908910751343, "rewards/margins": 0.23530849814414978, "rewards/rejected": -0.9066994786262512, "step": 3760 }, { "epoch": 0.75, "learning_rate": 8.664484900247363e-07, "logits/chosen": -1.939549207687378, "logits/rejected": -1.4720910787582397, "logps/chosen": -198.39797973632812, "logps/rejected": -189.9690399169922, "loss": 0.6756, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.6088159084320068, "rewards/margins": 0.25305768847465515, "rewards/rejected": -0.8618736267089844, "step": 3770 }, { "epoch": 0.76, "learning_rate": 8.532763497032987e-07, "logits/chosen": -1.3646882772445679, "logits/rejected": -1.0704524517059326, "logps/chosen": -198.41883850097656, "logps/rejected": -197.7199249267578, "loss": 0.6689, "rewards/accuracies": 0.875, "rewards/chosen": -0.6586154699325562, "rewards/margins": 0.3101723790168762, "rewards/rejected": -0.9687877893447876, "step": 3780 }, { "epoch": 0.76, "learning_rate": 8.40184468369396e-07, "logits/chosen": -1.8213491439819336, "logits/rejected": -1.415886402130127, "logps/chosen": -179.46566772460938, "logps/rejected": -165.98655700683594, "loss": 0.6729, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.5158207416534424, "rewards/margins": 0.26648831367492676, "rewards/rejected": -0.7823091149330139, "step": 3790 }, { "epoch": 0.76, "learning_rate": 8.271734841028553e-07, "logits/chosen": -1.58071768283844, "logits/rejected": -1.1844398975372314, "logps/chosen": -196.37979125976562, "logps/rejected": -188.96719360351562, "loss": 0.6748, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.5554729104042053, "rewards/margins": 0.28293678164482117, "rewards/rejected": -0.8384097218513489, "step": 3800 }, { "epoch": 0.76, "eval_logits/chosen": -1.78749680519104, "eval_logits/rejected": -1.637012004852295, "eval_logps/chosen": -298.4959716796875, "eval_logps/rejected": -286.9418029785156, "eval_loss": 0.7085325717926025, "eval_rewards/accuracies": 0.47455090284347534, "eval_rewards/chosen": -0.35584694147109985, "eval_rewards/margins": 0.04781103506684303, "eval_rewards/rejected": -0.4036579132080078, "eval_runtime": 1205.0928, "eval_samples_per_second": 1.66, "eval_steps_per_second": 0.277, "step": 3800 }, { "epoch": 0.76, "learning_rate": 8.142440310406923e-07, "logits/chosen": -1.8937944173812866, "logits/rejected": -1.3807734251022339, "logps/chosen": -192.65541076660156, "logps/rejected": -180.99037170410156, "loss": 0.6693, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.5554690361022949, "rewards/margins": 0.3488186299800873, "rewards/rejected": -0.9042876362800598, "step": 3810 }, { "epoch": 0.76, "learning_rate": 8.013967393462094e-07, "logits/chosen": -1.7869288921356201, "logits/rejected": -1.2285221815109253, "logps/chosen": -206.0532684326172, "logps/rejected": -180.30270385742188, "loss": 0.6753, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.5787543058395386, "rewards/margins": 0.31541961431503296, "rewards/rejected": -0.8941739797592163, "step": 3820 }, { "epoch": 0.77, "learning_rate": 7.886322351782782e-07, "logits/chosen": -1.9376035928726196, "logits/rejected": -1.3230043649673462, "logps/chosen": -186.37889099121094, "logps/rejected": -177.75619506835938, "loss": 0.6728, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.5859439373016357, "rewards/margins": 0.28381019830703735, "rewards/rejected": -0.8697541356086731, "step": 3830 }, { "epoch": 0.77, "learning_rate": 7.759511406608255e-07, "logits/chosen": -1.8294010162353516, "logits/rejected": -1.2814737558364868, "logps/chosen": -179.62429809570312, "logps/rejected": -178.21121215820312, "loss": 0.6715, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.500334620475769, "rewards/margins": 0.3481293320655823, "rewards/rejected": -0.8484638929367065, "step": 3840 }, { "epoch": 0.77, "learning_rate": 7.633540738525066e-07, "logits/chosen": -1.8094638586044312, "logits/rejected": -1.365720510482788, "logps/chosen": -186.24063110351562, "logps/rejected": -187.5486297607422, "loss": 0.6681, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.5382263660430908, "rewards/margins": 0.27963364124298096, "rewards/rejected": -0.8178600072860718, "step": 3850 }, { "epoch": 0.77, "learning_rate": 7.508416487165862e-07, "logits/chosen": -1.827890396118164, "logits/rejected": -1.385149359703064, "logps/chosen": -192.24356079101562, "logps/rejected": -179.92501831054688, "loss": 0.6715, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.5058096051216125, "rewards/margins": 0.299837201833725, "rewards/rejected": -0.8056467771530151, "step": 3860 }, { "epoch": 0.77, "learning_rate": 7.384144750910133e-07, "logits/chosen": -1.7652359008789062, "logits/rejected": -1.1517822742462158, "logps/chosen": -213.3308563232422, "logps/rejected": -193.3721160888672, "loss": 0.6749, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.5714274644851685, "rewards/margins": 0.3523482382297516, "rewards/rejected": -0.9237756729125977, "step": 3870 }, { "epoch": 0.78, "learning_rate": 7.260731586586983e-07, "logits/chosen": -1.69609797000885, "logits/rejected": -1.3839670419692993, "logps/chosen": -193.49876403808594, "logps/rejected": -188.48577880859375, "loss": 0.6751, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.6491511464118958, "rewards/margins": 0.24958404898643494, "rewards/rejected": -0.8987351655960083, "step": 3880 }, { "epoch": 0.78, "learning_rate": 7.138183009179922e-07, "logits/chosen": -1.502120018005371, "logits/rejected": -0.9810401797294617, "logps/chosen": -219.70486450195312, "logps/rejected": -200.8079071044922, "loss": 0.6689, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.6440494060516357, "rewards/margins": 0.3348037600517273, "rewards/rejected": -0.9788532257080078, "step": 3890 }, { "epoch": 0.78, "learning_rate": 7.016504991533727e-07, "logits/chosen": -2.0070013999938965, "logits/rejected": -1.4300661087036133, "logps/chosen": -164.04586791992188, "logps/rejected": -163.64431762695312, "loss": 0.6725, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.5484809279441833, "rewards/margins": 0.3103629946708679, "rewards/rejected": -0.858843982219696, "step": 3900 }, { "epoch": 0.78, "learning_rate": 6.895703464063319e-07, "logits/chosen": -1.8312900066375732, "logits/rejected": -1.4075819253921509, "logps/chosen": -201.4026336669922, "logps/rejected": -178.82730102539062, "loss": 0.6728, "rewards/accuracies": 0.75, "rewards/chosen": -0.5756638646125793, "rewards/margins": 0.2787787914276123, "rewards/rejected": -0.8544427156448364, "step": 3910 }, { "epoch": 0.78, "learning_rate": 6.775784314464717e-07, "logits/chosen": -1.8178908824920654, "logits/rejected": -1.3248305320739746, "logps/chosen": -173.04840087890625, "logps/rejected": -171.5256805419922, "loss": 0.6732, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.6063294410705566, "rewards/margins": 0.24865618348121643, "rewards/rejected": -0.8549855947494507, "step": 3920 }, { "epoch": 0.79, "learning_rate": 6.656753387428089e-07, "logits/chosen": -1.885436773300171, "logits/rejected": -1.2840216159820557, "logps/chosen": -194.60150146484375, "logps/rejected": -197.9912567138672, "loss": 0.6716, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.5607427358627319, "rewards/margins": 0.3318184018135071, "rewards/rejected": -0.892561137676239, "step": 3930 }, { "epoch": 0.79, "learning_rate": 6.538616484352902e-07, "logits/chosen": -1.750697135925293, "logits/rejected": -1.2652366161346436, "logps/chosen": -172.13278198242188, "logps/rejected": -161.23594665527344, "loss": 0.67, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.49689894914627075, "rewards/margins": 0.329582154750824, "rewards/rejected": -0.8264809846878052, "step": 3940 }, { "epoch": 0.79, "learning_rate": 6.421379363065142e-07, "logits/chosen": -1.8470205068588257, "logits/rejected": -1.5022964477539062, "logps/chosen": -195.92471313476562, "logps/rejected": -169.7156524658203, "loss": 0.6725, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.5458836555480957, "rewards/margins": 0.24590294063091278, "rewards/rejected": -0.7917866110801697, "step": 3950 }, { "epoch": 0.79, "learning_rate": 6.305047737536707e-07, "logits/chosen": -1.8193261623382568, "logits/rejected": -1.2791240215301514, "logps/chosen": -214.3555450439453, "logps/rejected": -180.1163330078125, "loss": 0.669, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.5529198050498962, "rewards/margins": 0.33959296345710754, "rewards/rejected": -0.8925127983093262, "step": 3960 }, { "epoch": 0.79, "learning_rate": 6.189627277606894e-07, "logits/chosen": -1.4984296560287476, "logits/rejected": -1.2796761989593506, "logps/chosen": -173.42063903808594, "logps/rejected": -151.62713623046875, "loss": 0.6745, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.6056338548660278, "rewards/margins": 0.23139724135398865, "rewards/rejected": -0.8370311856269836, "step": 3970 }, { "epoch": 0.8, "learning_rate": 6.075123608706093e-07, "logits/chosen": -1.5881633758544922, "logits/rejected": -1.0521494150161743, "logps/chosen": -185.86135864257812, "logps/rejected": -191.3014373779297, "loss": 0.6729, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.6991164088249207, "rewards/margins": 0.289723664522171, "rewards/rejected": -0.9888402223587036, "step": 3980 }, { "epoch": 0.8, "learning_rate": 5.961542311581586e-07, "logits/chosen": -1.7202609777450562, "logits/rejected": -1.4178999662399292, "logps/chosen": -176.3819580078125, "logps/rejected": -168.06198120117188, "loss": 0.6733, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.6899925470352173, "rewards/margins": 0.24278751015663147, "rewards/rejected": -0.9327800869941711, "step": 3990 }, { "epoch": 0.8, "learning_rate": 5.848888922025553e-07, "logits/chosen": -1.8253370523452759, "logits/rejected": -1.4263923168182373, "logps/chosen": -182.21066284179688, "logps/rejected": -178.00180053710938, "loss": 0.6746, "rewards/accuracies": 0.75, "rewards/chosen": -0.65577232837677, "rewards/margins": 0.26734721660614014, "rewards/rejected": -0.9231195449829102, "step": 4000 }, { "epoch": 0.8, "eval_logits/chosen": -1.7976564168930054, "eval_logits/rejected": -1.6464568376541138, "eval_logps/chosen": -298.402587890625, "eval_logps/rejected": -286.00457763671875, "eval_loss": 0.7096964120864868, "eval_rewards/accuracies": 0.4640718698501587, "eval_rewards/chosen": -0.3549129366874695, "eval_rewards/margins": 0.03937295079231262, "eval_rewards/rejected": -0.3942858874797821, "eval_runtime": 1205.0896, "eval_samples_per_second": 1.66, "eval_steps_per_second": 0.277, "step": 4000 }, { "epoch": 0.8, "learning_rate": 5.737168930605272e-07, "logits/chosen": -1.9285900592803955, "logits/rejected": -1.3563498258590698, "logps/chosen": -196.507080078125, "logps/rejected": -183.5740509033203, "loss": 0.6722, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.623957633972168, "rewards/margins": 0.2666872441768646, "rewards/rejected": -0.890644907951355, "step": 4010 }, { "epoch": 0.8, "learning_rate": 5.626387782395512e-07, "logits/chosen": -1.6308634281158447, "logits/rejected": -1.008528470993042, "logps/chosen": -179.19570922851562, "logps/rejected": -175.90206909179688, "loss": 0.6725, "rewards/accuracies": 0.875, "rewards/chosen": -0.6207667589187622, "rewards/margins": 0.36119014024734497, "rewards/rejected": -0.9819570779800415, "step": 4020 }, { "epoch": 0.81, "learning_rate": 5.516550876713142e-07, "logits/chosen": -1.8946183919906616, "logits/rejected": -1.218611240386963, "logps/chosen": -176.93856811523438, "logps/rejected": -176.0408477783203, "loss": 0.6708, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.5685423612594604, "rewards/margins": 0.38361966609954834, "rewards/rejected": -0.952161967754364, "step": 4030 }, { "epoch": 0.81, "learning_rate": 5.407663566854008e-07, "logits/chosen": -1.6070003509521484, "logits/rejected": -1.1410613059997559, "logps/chosen": -185.14105224609375, "logps/rejected": -190.12478637695312, "loss": 0.6729, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.5705904364585876, "rewards/margins": 0.247324138879776, "rewards/rejected": -0.8179146647453308, "step": 4040 }, { "epoch": 0.81, "learning_rate": 5.299731159831953e-07, "logits/chosen": -1.9737787246704102, "logits/rejected": -1.2990692853927612, "logps/chosen": -185.4322509765625, "logps/rejected": -166.9883575439453, "loss": 0.6707, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.45687493681907654, "rewards/margins": 0.33818405866622925, "rewards/rejected": -0.7950589656829834, "step": 4050 }, { "epoch": 0.81, "learning_rate": 5.192758916120236e-07, "logits/chosen": -1.7507244348526, "logits/rejected": -1.2934061288833618, "logps/chosen": -186.51319885253906, "logps/rejected": -186.97789001464844, "loss": 0.6749, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.5511008501052856, "rewards/margins": 0.3101251423358917, "rewards/rejected": -0.861225962638855, "step": 4060 }, { "epoch": 0.81, "learning_rate": 5.086752049395094e-07, "logits/chosen": -1.623626947402954, "logits/rejected": -1.0202308893203735, "logps/chosen": -208.09317016601562, "logps/rejected": -187.53530883789062, "loss": 0.6711, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.5106527209281921, "rewards/margins": 0.3777123987674713, "rewards/rejected": -0.8883651494979858, "step": 4070 }, { "epoch": 0.82, "learning_rate": 4.981715726281666e-07, "logits/chosen": -1.9409902095794678, "logits/rejected": -1.5242273807525635, "logps/chosen": -159.16062927246094, "logps/rejected": -165.08450317382812, "loss": 0.6753, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.4970666468143463, "rewards/margins": 0.2737160921096802, "rewards/rejected": -0.7707827687263489, "step": 4080 }, { "epoch": 0.82, "learning_rate": 4.87765506610215e-07, "logits/chosen": -1.9073957204818726, "logits/rejected": -1.2996147871017456, "logps/chosen": -183.15225219726562, "logps/rejected": -186.7390594482422, "loss": 0.6713, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.5374733805656433, "rewards/margins": 0.2871370315551758, "rewards/rejected": -0.8246104121208191, "step": 4090 }, { "epoch": 0.82, "learning_rate": 4.774575140626317e-07, "logits/chosen": -1.5984774827957153, "logits/rejected": -1.0741130113601685, "logps/chosen": -170.23326110839844, "logps/rejected": -167.40017700195312, "loss": 0.67, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.5342054963111877, "rewards/margins": 0.2852395176887512, "rewards/rejected": -0.8194448351860046, "step": 4100 }, { "epoch": 0.82, "learning_rate": 4.672480973824312e-07, "logits/chosen": -1.7772527933120728, "logits/rejected": -1.3427586555480957, "logps/chosen": -177.45236206054688, "logps/rejected": -171.87107849121094, "loss": 0.6729, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.409841388463974, "rewards/margins": 0.2522704601287842, "rewards/rejected": -0.6621118783950806, "step": 4110 }, { "epoch": 0.82, "learning_rate": 4.5713775416217884e-07, "logits/chosen": -1.5848819017410278, "logits/rejected": -1.1666120290756226, "logps/chosen": -175.8500518798828, "logps/rejected": -178.95310974121094, "loss": 0.6711, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.5583244562149048, "rewards/margins": 0.24911966919898987, "rewards/rejected": -0.8074442148208618, "step": 4120 }, { "epoch": 0.83, "learning_rate": 4.4712697716573994e-07, "logits/chosen": -1.6556593179702759, "logits/rejected": -1.1981847286224365, "logps/chosen": -202.71975708007812, "logps/rejected": -184.44332885742188, "loss": 0.6723, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.613344669342041, "rewards/margins": 0.2692212462425232, "rewards/rejected": -0.882565975189209, "step": 4130 }, { "epoch": 0.83, "learning_rate": 4.372162543042624e-07, "logits/chosen": -1.5903215408325195, "logits/rejected": -1.1515449285507202, "logps/chosen": -199.5992431640625, "logps/rejected": -173.73475646972656, "loss": 0.6808, "rewards/accuracies": 0.75, "rewards/chosen": -0.4988236427307129, "rewards/margins": 0.26213759183883667, "rewards/rejected": -0.7609611749649048, "step": 4140 }, { "epoch": 0.83, "learning_rate": 4.27406068612396e-07, "logits/chosen": -1.846687912940979, "logits/rejected": -1.224601149559021, "logps/chosen": -205.4477081298828, "logps/rejected": -203.91110229492188, "loss": 0.6658, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.4593885838985443, "rewards/margins": 0.4049278199672699, "rewards/rejected": -0.864316463470459, "step": 4150 }, { "epoch": 0.83, "learning_rate": 4.1769689822475147e-07, "logits/chosen": -2.0081117153167725, "logits/rejected": -1.4043834209442139, "logps/chosen": -191.9678192138672, "logps/rejected": -185.89596557617188, "loss": 0.6735, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.5642850995063782, "rewards/margins": 0.29467257857322693, "rewards/rejected": -0.8589577674865723, "step": 4160 }, { "epoch": 0.83, "learning_rate": 4.0808921635259595e-07, "logits/chosen": -1.8327152729034424, "logits/rejected": -1.2812751531600952, "logps/chosen": -207.1615753173828, "logps/rejected": -171.39254760742188, "loss": 0.6734, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.6103007197380066, "rewards/margins": 0.27335861325263977, "rewards/rejected": -0.8836593627929688, "step": 4170 }, { "epoch": 0.84, "learning_rate": 3.9858349126078945e-07, "logits/chosen": -1.7188329696655273, "logits/rejected": -1.1388109922409058, "logps/chosen": -180.25900268554688, "logps/rejected": -156.396484375, "loss": 0.6707, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.44871649146080017, "rewards/margins": 0.350297212600708, "rewards/rejected": -0.7990137338638306, "step": 4180 }, { "epoch": 0.84, "learning_rate": 3.891801862449629e-07, "logits/chosen": -1.8366130590438843, "logits/rejected": -1.2264559268951416, "logps/chosen": -184.10031127929688, "logps/rejected": -172.8234100341797, "loss": 0.6717, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.5677688121795654, "rewards/margins": 0.290102481842041, "rewards/rejected": -0.8578712344169617, "step": 4190 }, { "epoch": 0.84, "learning_rate": 3.798797596089351e-07, "logits/chosen": -1.7443145513534546, "logits/rejected": -1.2919827699661255, "logps/chosen": -188.4905242919922, "logps/rejected": -167.1050567626953, "loss": 0.6772, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.5437037944793701, "rewards/margins": 0.2575288414955139, "rewards/rejected": -0.801232635974884, "step": 4200 }, { "epoch": 0.84, "eval_logits/chosen": -1.8160959482192993, "eval_logits/rejected": -1.6639758348464966, "eval_logps/chosen": -295.7154846191406, "eval_logps/rejected": -283.0742492675781, "eval_loss": 0.7087658643722534, "eval_rewards/accuracies": 0.46107783913612366, "eval_rewards/chosen": -0.328041672706604, "eval_rewards/margins": 0.03694087266921997, "eval_rewards/rejected": -0.364982545375824, "eval_runtime": 1205.1541, "eval_samples_per_second": 1.66, "eval_steps_per_second": 0.277, "step": 4200 }, { "epoch": 0.84, "learning_rate": 3.7068266464238085e-07, "logits/chosen": -1.576219081878662, "logits/rejected": -1.1790491342544556, "logps/chosen": -206.9523468017578, "logps/rejected": -197.0227508544922, "loss": 0.6728, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.5859234929084778, "rewards/margins": 0.31404975056648254, "rewards/rejected": -0.8999732732772827, "step": 4210 }, { "epoch": 0.84, "learning_rate": 3.615893495987335e-07, "logits/chosen": -1.8123877048492432, "logits/rejected": -1.4773552417755127, "logps/chosen": -184.19911193847656, "logps/rejected": -190.65353393554688, "loss": 0.6727, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.6021177768707275, "rewards/margins": 0.2756933867931366, "rewards/rejected": -0.8778111338615417, "step": 4220 }, { "epoch": 0.85, "learning_rate": 3.5260025767333894e-07, "logits/chosen": -1.9461915493011475, "logits/rejected": -1.5374952554702759, "logps/chosen": -195.1204071044922, "logps/rejected": -205.6290283203125, "loss": 0.6726, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.4397260546684265, "rewards/margins": 0.2890568673610687, "rewards/rejected": -0.7287830114364624, "step": 4230 }, { "epoch": 0.85, "learning_rate": 3.4371582698185636e-07, "logits/chosen": -1.539830207824707, "logits/rejected": -0.9317470788955688, "logps/chosen": -212.15060424804688, "logps/rejected": -168.56954956054688, "loss": 0.6707, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.6420568227767944, "rewards/margins": 0.31589242815971375, "rewards/rejected": -0.957949161529541, "step": 4240 }, { "epoch": 0.85, "learning_rate": 3.3493649053890325e-07, "logits/chosen": -1.9007657766342163, "logits/rejected": -1.4113733768463135, "logps/chosen": -191.63870239257812, "logps/rejected": -173.3266143798828, "loss": 0.6741, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.49786463379859924, "rewards/margins": 0.23661330342292786, "rewards/rejected": -0.7344778776168823, "step": 4250 }, { "epoch": 0.85, "learning_rate": 3.262626762369525e-07, "logits/chosen": -1.7256009578704834, "logits/rejected": -1.1958563327789307, "logps/chosen": -196.4643096923828, "logps/rejected": -174.62197875976562, "loss": 0.6745, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.4610671103000641, "rewards/margins": 0.3397568166255951, "rewards/rejected": -0.8008238077163696, "step": 4260 }, { "epoch": 0.85, "learning_rate": 3.176948068254762e-07, "logits/chosen": -1.6623731851577759, "logits/rejected": -1.1622531414031982, "logps/chosen": -185.16506958007812, "logps/rejected": -182.66482543945312, "loss": 0.6715, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.5583752393722534, "rewards/margins": 0.31809231638908386, "rewards/rejected": -0.8764675855636597, "step": 4270 }, { "epoch": 0.86, "learning_rate": 3.092332998903416e-07, "logits/chosen": -1.721273422241211, "logits/rejected": -1.1369974613189697, "logps/chosen": -205.56039428710938, "logps/rejected": -180.2034912109375, "loss": 0.6743, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.5317269563674927, "rewards/margins": 0.27362215518951416, "rewards/rejected": -0.8053489923477173, "step": 4280 }, { "epoch": 0.86, "learning_rate": 3.0087856783345916e-07, "logits/chosen": -1.9104810953140259, "logits/rejected": -1.3369901180267334, "logps/chosen": -202.73068237304688, "logps/rejected": -179.4395751953125, "loss": 0.6725, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.5802592635154724, "rewards/margins": 0.34563058614730835, "rewards/rejected": -0.925889790058136, "step": 4290 }, { "epoch": 0.86, "learning_rate": 2.9263101785268253e-07, "logits/chosen": -1.8083654642105103, "logits/rejected": -1.333041787147522, "logps/chosen": -173.97024536132812, "logps/rejected": -149.26536560058594, "loss": 0.6722, "rewards/accuracies": 0.75, "rewards/chosen": -0.48276376724243164, "rewards/margins": 0.33063310384750366, "rewards/rejected": -0.8133969306945801, "step": 4300 }, { "epoch": 0.86, "learning_rate": 2.844910519219632e-07, "logits/chosen": -1.747718095779419, "logits/rejected": -1.289015293121338, "logps/chosen": -210.01431274414062, "logps/rejected": -194.6765594482422, "loss": 0.6735, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.5048564672470093, "rewards/margins": 0.2843233346939087, "rewards/rejected": -0.789179801940918, "step": 4310 }, { "epoch": 0.86, "learning_rate": 2.764590667717562e-07, "logits/chosen": -1.5733174085617065, "logits/rejected": -1.1934467554092407, "logps/chosen": -188.82675170898438, "logps/rejected": -207.0797119140625, "loss": 0.6736, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.5059980154037476, "rewards/margins": 0.27098190784454346, "rewards/rejected": -0.7769799828529358, "step": 4320 }, { "epoch": 0.87, "learning_rate": 2.6853545386968607e-07, "logits/chosen": -1.7283899784088135, "logits/rejected": -1.334378719329834, "logps/chosen": -182.7091827392578, "logps/rejected": -159.2908172607422, "loss": 0.6713, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.4200921058654785, "rewards/margins": 0.33389562368392944, "rewards/rejected": -0.7539876699447632, "step": 4330 }, { "epoch": 0.87, "learning_rate": 2.6072059940146775e-07, "logits/chosen": -1.6910960674285889, "logits/rejected": -1.2752957344055176, "logps/chosen": -179.72256469726562, "logps/rejected": -169.8737030029297, "loss": 0.6727, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.47842615842819214, "rewards/margins": 0.2933879792690277, "rewards/rejected": -0.7718141674995422, "step": 4340 }, { "epoch": 0.87, "learning_rate": 2.53014884252083e-07, "logits/chosen": -1.7505544424057007, "logits/rejected": -1.4791407585144043, "logps/chosen": -167.2095184326172, "logps/rejected": -187.50631713867188, "loss": 0.6737, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.6435413360595703, "rewards/margins": 0.22436794638633728, "rewards/rejected": -0.86790931224823, "step": 4350 }, { "epoch": 0.87, "learning_rate": 2.454186839872158e-07, "logits/chosen": -1.7023128271102905, "logits/rejected": -1.2403013706207275, "logps/chosen": -179.22369384765625, "logps/rejected": -170.2850799560547, "loss": 0.6766, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.5110377073287964, "rewards/margins": 0.3025279939174652, "rewards/rejected": -0.813565731048584, "step": 4360 }, { "epoch": 0.87, "learning_rate": 2.3793236883495164e-07, "logits/chosen": -1.7260563373565674, "logits/rejected": -1.2105354070663452, "logps/chosen": -185.25547790527344, "logps/rejected": -175.2529754638672, "loss": 0.6737, "rewards/accuracies": 0.875, "rewards/chosen": -0.5668208003044128, "rewards/margins": 0.37687572836875916, "rewards/rejected": -0.9436964988708496, "step": 4370 }, { "epoch": 0.88, "learning_rate": 2.3055630366772857e-07, "logits/chosen": -1.60723078250885, "logits/rejected": -1.069088101387024, "logps/chosen": -174.3738555908203, "logps/rejected": -163.77371215820312, "loss": 0.6712, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.6688398122787476, "rewards/margins": 0.36071985960006714, "rewards/rejected": -1.02955961227417, "step": 4380 }, { "epoch": 0.88, "learning_rate": 2.2329084798455747e-07, "logits/chosen": -1.8668386936187744, "logits/rejected": -1.5328328609466553, "logps/chosen": -208.60018920898438, "logps/rejected": -194.89724731445312, "loss": 0.6742, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.6224187612533569, "rewards/margins": 0.23320558667182922, "rewards/rejected": -0.8556243777275085, "step": 4390 }, { "epoch": 0.88, "learning_rate": 2.1613635589349756e-07, "logits/chosen": -1.571468472480774, "logits/rejected": -1.0496834516525269, "logps/chosen": -182.20823669433594, "logps/rejected": -172.04624938964844, "loss": 0.6718, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.564174473285675, "rewards/margins": 0.323714941740036, "rewards/rejected": -0.8878893852233887, "step": 4400 }, { "epoch": 0.88, "eval_logits/chosen": -1.8062490224838257, "eval_logits/rejected": -1.6549919843673706, "eval_logps/chosen": -295.5823669433594, "eval_logps/rejected": -282.7409973144531, "eval_loss": 0.7082103490829468, "eval_rewards/accuracies": 0.4565868377685547, "eval_rewards/chosen": -0.32671090960502625, "eval_rewards/margins": 0.03493915870785713, "eval_rewards/rejected": -0.3616500794887543, "eval_runtime": 1205.3175, "eval_samples_per_second": 1.659, "eval_steps_per_second": 0.277, "step": 4400 }, { "epoch": 0.88, "learning_rate": 2.0909317609440093e-07, "logits/chosen": -1.6453841924667358, "logits/rejected": -1.2830721139907837, "logps/chosen": -174.14157104492188, "logps/rejected": -184.1593017578125, "loss": 0.6759, "rewards/accuracies": 0.75, "rewards/chosen": -0.40785837173461914, "rewards/margins": 0.32242149114608765, "rewards/rejected": -0.730279803276062, "step": 4410 }, { "epoch": 0.88, "learning_rate": 2.0216165186191406e-07, "logits/chosen": -1.7091939449310303, "logits/rejected": -1.273643136024475, "logps/chosen": -180.9165802001953, "logps/rejected": -171.9414825439453, "loss": 0.6734, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.4932243824005127, "rewards/margins": 0.24664032459259033, "rewards/rejected": -0.739864706993103, "step": 4420 }, { "epoch": 0.89, "learning_rate": 1.95342121028749e-07, "logits/chosen": -1.7675275802612305, "logits/rejected": -1.1961297988891602, "logps/chosen": -192.86929321289062, "logps/rejected": -173.74502563476562, "loss": 0.6727, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.4724040925502777, "rewards/margins": 0.3260127902030945, "rewards/rejected": -0.7984168529510498, "step": 4430 }, { "epoch": 0.89, "learning_rate": 1.8863491596921745e-07, "logits/chosen": -1.956656813621521, "logits/rejected": -1.537095308303833, "logps/chosen": -173.4440460205078, "logps/rejected": -180.49203491210938, "loss": 0.6778, "rewards/accuracies": 0.75, "rewards/chosen": -0.4398283362388611, "rewards/margins": 0.2562447488307953, "rewards/rejected": -0.6960731744766235, "step": 4440 }, { "epoch": 0.89, "learning_rate": 1.8204036358303173e-07, "logits/chosen": -1.8156732320785522, "logits/rejected": -1.4116982221603394, "logps/chosen": -173.31948852539062, "logps/rejected": -163.6458740234375, "loss": 0.6707, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.5265036821365356, "rewards/margins": 0.2501595616340637, "rewards/rejected": -0.7766631841659546, "step": 4450 }, { "epoch": 0.89, "learning_rate": 1.7555878527937164e-07, "logits/chosen": -1.9093611240386963, "logits/rejected": -1.3668051958084106, "logps/chosen": -174.124755859375, "logps/rejected": -165.9483642578125, "loss": 0.6762, "rewards/accuracies": 0.75, "rewards/chosen": -0.5742958188056946, "rewards/margins": 0.29118460416793823, "rewards/rejected": -0.8654803037643433, "step": 4460 }, { "epoch": 0.89, "learning_rate": 1.6919049696121957e-07, "logits/chosen": -1.9973357915878296, "logits/rejected": -1.5348784923553467, "logps/chosen": -166.29335021972656, "logps/rejected": -165.93771362304688, "loss": 0.6717, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.5555218458175659, "rewards/margins": 0.2896382510662079, "rewards/rejected": -0.8451600074768066, "step": 4470 }, { "epoch": 0.9, "learning_rate": 1.629358090099639e-07, "logits/chosen": -1.7543766498565674, "logits/rejected": -1.1764631271362305, "logps/chosen": -215.9904022216797, "logps/rejected": -183.74981689453125, "loss": 0.6726, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.5935664772987366, "rewards/margins": 0.3655681014060974, "rewards/rejected": -0.9591344594955444, "step": 4480 }, { "epoch": 0.9, "learning_rate": 1.567950262702714e-07, "logits/chosen": -1.9481292963027954, "logits/rejected": -1.3625082969665527, "logps/chosen": -181.55307006835938, "logps/rejected": -169.8749237060547, "loss": 0.6745, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.4361632466316223, "rewards/margins": 0.3200603723526001, "rewards/rejected": -0.7562234997749329, "step": 4490 }, { "epoch": 0.9, "learning_rate": 1.507684480352292e-07, "logits/chosen": -2.0509183406829834, "logits/rejected": -1.6212412118911743, "logps/chosen": -168.0310516357422, "logps/rejected": -183.0922393798828, "loss": 0.6728, "rewards/accuracies": 0.75, "rewards/chosen": -0.47675904631614685, "rewards/margins": 0.22058792412281036, "rewards/rejected": -0.6973469853401184, "step": 4500 }, { "epoch": 0.9, "learning_rate": 1.4485636803175828e-07, "logits/chosen": -1.9481201171875, "logits/rejected": -1.3651478290557861, "logps/chosen": -184.26443481445312, "logps/rejected": -189.3113250732422, "loss": 0.671, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.42765411734580994, "rewards/margins": 0.29031842947006226, "rewards/rejected": -0.7179726362228394, "step": 4510 }, { "epoch": 0.9, "learning_rate": 1.3905907440629752e-07, "logits/chosen": -1.8317391872406006, "logits/rejected": -1.408451795578003, "logps/chosen": -199.95281982421875, "logps/rejected": -171.4874267578125, "loss": 0.6738, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.5488930940628052, "rewards/margins": 0.2850055694580078, "rewards/rejected": -0.833898663520813, "step": 4520 }, { "epoch": 0.91, "learning_rate": 1.3337684971075932e-07, "logits/chosen": -1.899253487586975, "logits/rejected": -1.2733862400054932, "logps/chosen": -155.16029357910156, "logps/rejected": -159.66885375976562, "loss": 0.6723, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.4488608241081238, "rewards/margins": 0.2676648497581482, "rewards/rejected": -0.716525673866272, "step": 4530 }, { "epoch": 0.91, "learning_rate": 1.278099708887587e-07, "logits/chosen": -1.7261450290679932, "logits/rejected": -1.332425832748413, "logps/chosen": -190.36685180664062, "logps/rejected": -168.84475708007812, "loss": 0.6687, "rewards/accuracies": 0.875, "rewards/chosen": -0.6135702133178711, "rewards/margins": 0.2488062083721161, "rewards/rejected": -0.8623763918876648, "step": 4540 }, { "epoch": 0.91, "learning_rate": 1.223587092621162e-07, "logits/chosen": -1.6139081716537476, "logits/rejected": -1.13650381565094, "logps/chosen": -204.7310028076172, "logps/rejected": -171.27403259277344, "loss": 0.6746, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.556434154510498, "rewards/margins": 0.27506059408187866, "rewards/rejected": -0.8314948081970215, "step": 4550 }, { "epoch": 0.91, "learning_rate": 1.1702333051763271e-07, "logits/chosen": -1.695517897605896, "logits/rejected": -1.1595228910446167, "logps/chosen": -193.17726135253906, "logps/rejected": -163.34637451171875, "loss": 0.6727, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.5821114778518677, "rewards/margins": 0.29333314299583435, "rewards/rejected": -0.8754447102546692, "step": 4560 }, { "epoch": 0.91, "learning_rate": 1.1180409469414094e-07, "logits/chosen": -1.6763149499893188, "logits/rejected": -1.4093079566955566, "logps/chosen": -208.39877319335938, "logps/rejected": -182.16384887695312, "loss": 0.6722, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.8314476013183594, "rewards/margins": 0.22801156342029572, "rewards/rejected": -1.0594592094421387, "step": 4570 }, { "epoch": 0.92, "learning_rate": 1.067012561698319e-07, "logits/chosen": -2.0835366249084473, "logits/rejected": -1.3751837015151978, "logps/chosen": -221.61215209960938, "logps/rejected": -180.8863525390625, "loss": 0.6739, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3898591101169586, "rewards/margins": 0.4190775454044342, "rewards/rejected": -0.8089367151260376, "step": 4580 }, { "epoch": 0.92, "learning_rate": 1.0171506364985622e-07, "logits/chosen": -1.842432975769043, "logits/rejected": -1.3106482028961182, "logps/chosen": -171.06387329101562, "logps/rejected": -180.35220336914062, "loss": 0.6747, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.6240432858467102, "rewards/margins": 0.2646729350090027, "rewards/rejected": -0.8887161016464233, "step": 4590 }, { "epoch": 0.92, "learning_rate": 9.684576015420277e-08, "logits/chosen": -1.6674339771270752, "logits/rejected": -1.254866600036621, "logps/chosen": -189.98513793945312, "logps/rejected": -185.12417602539062, "loss": 0.6737, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.6010580658912659, "rewards/margins": 0.28078368306159973, "rewards/rejected": -0.8818416595458984, "step": 4600 }, { "epoch": 0.92, "eval_logits/chosen": -1.8008819818496704, "eval_logits/rejected": -1.6498563289642334, "eval_logps/chosen": -297.0698547363281, "eval_logps/rejected": -284.5475158691406, "eval_loss": 0.708510160446167, "eval_rewards/accuracies": 0.465568870306015, "eval_rewards/chosen": -0.34158575534820557, "eval_rewards/margins": 0.03812955692410469, "eval_rewards/rejected": -0.37971529364585876, "eval_runtime": 1204.7488, "eval_samples_per_second": 1.66, "eval_steps_per_second": 0.277, "step": 4600 }, { "epoch": 0.92, "learning_rate": 9.209358300585474e-08, "logits/chosen": -1.8109592199325562, "logits/rejected": -1.2240961790084839, "logps/chosen": -187.54299926757812, "logps/rejected": -165.949951171875, "loss": 0.6684, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.5721608400344849, "rewards/margins": 0.3126206696033478, "rewards/rejected": -0.8847814798355103, "step": 4610 }, { "epoch": 0.92, "learning_rate": 8.745876381922147e-08, "logits/chosen": -1.8813289403915405, "logits/rejected": -1.3099462985992432, "logps/chosen": -161.86569213867188, "logps/rejected": -145.15042114257812, "loss": 0.6705, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.5139293074607849, "rewards/margins": 0.2341819554567337, "rewards/rejected": -0.7481111884117126, "step": 4620 }, { "epoch": 0.93, "learning_rate": 8.294152848885156e-08, "logits/chosen": -1.7342445850372314, "logits/rejected": -1.3131603002548218, "logps/chosen": -175.2368927001953, "logps/rejected": -174.12747192382812, "loss": 0.6762, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.5319586992263794, "rewards/margins": 0.22955286502838135, "rewards/rejected": -0.7615114450454712, "step": 4630 }, { "epoch": 0.93, "learning_rate": 7.854209717842231e-08, "logits/chosen": -1.6120736598968506, "logits/rejected": -1.190489649772644, "logps/chosen": -185.02987670898438, "logps/rejected": -189.67172241210938, "loss": 0.6705, "rewards/accuracies": 0.75, "rewards/chosen": -0.5775774717330933, "rewards/margins": 0.29847708344459534, "rewards/rejected": -0.876054584980011, "step": 4640 }, { "epoch": 0.93, "learning_rate": 7.426068431000883e-08, "logits/chosen": -1.7405284643173218, "logits/rejected": -1.3551548719406128, "logps/chosen": -176.1534881591797, "logps/rejected": -179.88807678222656, "loss": 0.6716, "rewards/accuracies": 0.75, "rewards/chosen": -0.5440041422843933, "rewards/margins": 0.23807832598686218, "rewards/rejected": -0.7820824980735779, "step": 4650 }, { "epoch": 0.93, "learning_rate": 7.009749855363457e-08, "logits/chosen": -1.755078911781311, "logits/rejected": -1.4670668840408325, "logps/chosen": -181.12307739257812, "logps/rejected": -176.3338623046875, "loss": 0.6732, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.5007195472717285, "rewards/margins": 0.2364724576473236, "rewards/rejected": -0.7371920347213745, "step": 4660 }, { "epoch": 0.93, "learning_rate": 6.605274281709929e-08, "logits/chosen": -1.7022626399993896, "logits/rejected": -1.1175634860992432, "logps/chosen": -175.1276092529297, "logps/rejected": -176.94827270507812, "loss": 0.6755, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.6607189774513245, "rewards/margins": 0.3381043076515198, "rewards/rejected": -0.998823344707489, "step": 4670 }, { "epoch": 0.94, "learning_rate": 6.212661423609184e-08, "logits/chosen": -1.849618673324585, "logits/rejected": -1.5684844255447388, "logps/chosen": -175.68008422851562, "logps/rejected": -159.04473876953125, "loss": 0.6742, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.6384822130203247, "rewards/margins": 0.2531701326370239, "rewards/rejected": -0.8916522860527039, "step": 4680 }, { "epoch": 0.94, "learning_rate": 5.83193041645802e-08, "logits/chosen": -1.8055998086929321, "logits/rejected": -1.3125183582305908, "logps/chosen": -183.54286193847656, "logps/rejected": -167.0206298828125, "loss": 0.6689, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.5319105386734009, "rewards/margins": 0.30655089020729065, "rewards/rejected": -0.8384615182876587, "step": 4690 }, { "epoch": 0.94, "learning_rate": 5.463099816548578e-08, "logits/chosen": -1.6356470584869385, "logits/rejected": -1.20809006690979, "logps/chosen": -187.17367553710938, "logps/rejected": -178.281005859375, "loss": 0.6728, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.6296504735946655, "rewards/margins": 0.2865646481513977, "rewards/rejected": -0.9162149429321289, "step": 4700 }, { "epoch": 0.94, "learning_rate": 5.106187600163987e-08, "logits/chosen": -1.8005040884017944, "logits/rejected": -1.401289701461792, "logps/chosen": -156.47804260253906, "logps/rejected": -159.2554168701172, "loss": 0.6761, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.47016221284866333, "rewards/margins": 0.26268109679222107, "rewards/rejected": -0.732843279838562, "step": 4710 }, { "epoch": 0.94, "learning_rate": 4.761211162702117e-08, "logits/chosen": -2.0628628730773926, "logits/rejected": -1.565312385559082, "logps/chosen": -182.67739868164062, "logps/rejected": -166.28005981445312, "loss": 0.6765, "rewards/accuracies": 0.75, "rewards/chosen": -0.3503957688808441, "rewards/margins": 0.3000656068325043, "rewards/rejected": -0.6504613757133484, "step": 4720 }, { "epoch": 0.95, "learning_rate": 4.428187317827848e-08, "logits/chosen": -1.7401096820831299, "logits/rejected": -1.433260202407837, "logps/chosen": -165.39480590820312, "logps/rejected": -177.46865844726562, "loss": 0.6719, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.5266124606132507, "rewards/margins": 0.24915286898612976, "rewards/rejected": -0.7757654190063477, "step": 4730 }, { "epoch": 0.95, "learning_rate": 4.1071322966535487e-08, "logits/chosen": -1.7316057682037354, "logits/rejected": -1.3491495847702026, "logps/chosen": -219.42092895507812, "logps/rejected": -217.3855743408203, "loss": 0.6714, "rewards/accuracies": 0.75, "rewards/chosen": -0.5418974757194519, "rewards/margins": 0.24152985215187073, "rewards/rejected": -0.783427357673645, "step": 4740 }, { "epoch": 0.95, "learning_rate": 3.798061746947995e-08, "logits/chosen": -1.8085205554962158, "logits/rejected": -1.3396368026733398, "logps/chosen": -197.12074279785156, "logps/rejected": -173.08389282226562, "loss": 0.6698, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.5481420159339905, "rewards/margins": 0.26563820242881775, "rewards/rejected": -0.8137801885604858, "step": 4750 }, { "epoch": 0.95, "learning_rate": 3.5009907323737826e-08, "logits/chosen": -1.9135634899139404, "logits/rejected": -1.2425081729888916, "logps/chosen": -216.3907012939453, "logps/rejected": -223.52078247070312, "loss": 0.6762, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.5003129243850708, "rewards/margins": 0.3249271810054779, "rewards/rejected": -0.8252401351928711, "step": 4760 }, { "epoch": 0.95, "learning_rate": 3.2159337317530234e-08, "logits/chosen": -1.7303504943847656, "logits/rejected": -1.3973544836044312, "logps/chosen": -207.5354766845703, "logps/rejected": -177.95912170410156, "loss": 0.6759, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.5838102102279663, "rewards/margins": 0.22909550368785858, "rewards/rejected": -0.8129056692123413, "step": 4770 }, { "epoch": 0.96, "learning_rate": 2.9429046383618042e-08, "logits/chosen": -1.7817106246948242, "logits/rejected": -1.1402701139450073, "logps/chosen": -207.01284790039062, "logps/rejected": -180.07159423828125, "loss": 0.6743, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.5036884546279907, "rewards/margins": 0.3352360129356384, "rewards/rejected": -0.8389245271682739, "step": 4780 }, { "epoch": 0.96, "learning_rate": 2.681916759252917e-08, "logits/chosen": -1.6812810897827148, "logits/rejected": -1.2598252296447754, "logps/chosen": -188.9176788330078, "logps/rejected": -171.4733123779297, "loss": 0.6766, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.6232370138168335, "rewards/margins": 0.30141279101371765, "rewards/rejected": -0.924649715423584, "step": 4790 }, { "epoch": 0.96, "learning_rate": 2.4329828146074096e-08, "logits/chosen": -1.6698875427246094, "logits/rejected": -1.1878182888031006, "logps/chosen": -187.8237762451172, "logps/rejected": -180.69232177734375, "loss": 0.6742, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.6534579396247864, "rewards/margins": 0.26255983114242554, "rewards/rejected": -0.9160177111625671, "step": 4800 }, { "epoch": 0.96, "eval_logits/chosen": -1.8017534017562866, "eval_logits/rejected": -1.650782585144043, "eval_logps/chosen": -296.7779846191406, "eval_logps/rejected": -284.22174072265625, "eval_loss": 0.7084687352180481, "eval_rewards/accuracies": 0.4715568721294403, "eval_rewards/chosen": -0.3386666178703308, "eval_rewards/margins": 0.03779071569442749, "eval_rewards/rejected": -0.3764573335647583, "eval_runtime": 1205.0147, "eval_samples_per_second": 1.66, "eval_steps_per_second": 0.277, "step": 4800 }, { "epoch": 0.96, "learning_rate": 2.1961149371145795e-08, "logits/chosen": -1.9120067358016968, "logits/rejected": -1.4456514120101929, "logps/chosen": -203.36590576171875, "logps/rejected": -183.9966278076172, "loss": 0.6712, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.5464068651199341, "rewards/margins": 0.3400748074054718, "rewards/rejected": -0.8864815831184387, "step": 4810 }, { "epoch": 0.96, "learning_rate": 1.9713246713805588e-08, "logits/chosen": -1.8910915851593018, "logits/rejected": -1.4132921695709229, "logps/chosen": -183.93777465820312, "logps/rejected": -160.75390625, "loss": 0.6717, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.5025268793106079, "rewards/margins": 0.2992040514945984, "rewards/rejected": -0.8017309308052063, "step": 4820 }, { "epoch": 0.97, "learning_rate": 1.7586229733657646e-08, "logits/chosen": -1.8121448755264282, "logits/rejected": -1.3479695320129395, "logps/chosen": -176.1195068359375, "logps/rejected": -166.59974670410156, "loss": 0.6745, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.5948446989059448, "rewards/margins": 0.30557698011398315, "rewards/rejected": -0.9004216194152832, "step": 4830 }, { "epoch": 0.97, "learning_rate": 1.5580202098509078e-08, "logits/chosen": -1.863114356994629, "logits/rejected": -1.2220425605773926, "logps/chosen": -198.22967529296875, "logps/rejected": -185.35479736328125, "loss": 0.6756, "rewards/accuracies": 0.75, "rewards/chosen": -0.5680817365646362, "rewards/margins": 0.3286473751068115, "rewards/rejected": -0.8967291116714478, "step": 4840 }, { "epoch": 0.97, "learning_rate": 1.3695261579316776e-08, "logits/chosen": -1.558516263961792, "logits/rejected": -1.0727132558822632, "logps/chosen": -192.1417236328125, "logps/rejected": -194.68316650390625, "loss": 0.6679, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.47971630096435547, "rewards/margins": 0.30231860280036926, "rewards/rejected": -0.7820348739624023, "step": 4850 }, { "epoch": 0.97, "learning_rate": 1.193150004542204e-08, "logits/chosen": -1.8970377445220947, "logits/rejected": -1.4011037349700928, "logps/chosen": -183.6541290283203, "logps/rejected": -173.0128936767578, "loss": 0.6721, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.5196717977523804, "rewards/margins": 0.30451592803001404, "rewards/rejected": -0.8241878747940063, "step": 4860 }, { "epoch": 0.97, "learning_rate": 1.0289003460074165e-08, "logits/chosen": -1.7511240243911743, "logits/rejected": -1.3778473138809204, "logps/chosen": -178.9451141357422, "logps/rejected": -185.10305786132812, "loss": 0.6761, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.6295135021209717, "rewards/margins": 0.2775951027870178, "rewards/rejected": -0.9071086049079895, "step": 4870 }, { "epoch": 0.98, "learning_rate": 8.767851876239075e-09, "logits/chosen": -1.813463568687439, "logits/rejected": -1.2299635410308838, "logps/chosen": -194.5047149658203, "logps/rejected": -172.47569274902344, "loss": 0.6717, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.5858784914016724, "rewards/margins": 0.32422253489494324, "rewards/rejected": -0.910101056098938, "step": 4880 }, { "epoch": 0.98, "learning_rate": 7.368119432699383e-09, "logits/chosen": -1.7416889667510986, "logits/rejected": -1.337124228477478, "logps/chosen": -208.8426971435547, "logps/rejected": -191.39517211914062, "loss": 0.6766, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.5619935989379883, "rewards/margins": 0.2784048914909363, "rewards/rejected": -0.8403984308242798, "step": 4890 }, { "epoch": 0.98, "learning_rate": 6.089874350439507e-09, "logits/chosen": -1.5168983936309814, "logits/rejected": -1.0561869144439697, "logps/chosen": -177.4029541015625, "logps/rejected": -166.50682067871094, "loss": 0.6761, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.5840083360671997, "rewards/margins": 0.2635114789009094, "rewards/rejected": -0.8475197553634644, "step": 4900 }, { "epoch": 0.98, "learning_rate": 4.933178929321103e-09, "logits/chosen": -1.8010742664337158, "logits/rejected": -1.2989466190338135, "logps/chosen": -189.77340698242188, "logps/rejected": -176.5185546875, "loss": 0.6738, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.4859987795352936, "rewards/margins": 0.36308038234710693, "rewards/rejected": -0.8490791320800781, "step": 4910 }, { "epoch": 0.98, "learning_rate": 3.8980895450474455e-09, "logits/chosen": -1.5500829219818115, "logits/rejected": -1.0472524166107178, "logps/chosen": -185.28883361816406, "logps/rejected": -172.25633239746094, "loss": 0.6751, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.576134204864502, "rewards/margins": 0.3021397292613983, "rewards/rejected": -0.8782738447189331, "step": 4920 }, { "epoch": 0.99, "learning_rate": 2.984656646415063e-09, "logits/chosen": -2.015113353729248, "logits/rejected": -1.4177416563034058, "logps/chosen": -173.11093139648438, "logps/rejected": -163.98568725585938, "loss": 0.6741, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.5611327290534973, "rewards/margins": 0.31033846735954285, "rewards/rejected": -0.8714712262153625, "step": 4930 }, { "epoch": 0.99, "learning_rate": 2.192924752854042e-09, "logits/chosen": -1.8629176616668701, "logits/rejected": -1.5424811840057373, "logps/chosen": -157.3787078857422, "logps/rejected": -147.36471557617188, "loss": 0.6746, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.537105143070221, "rewards/margins": 0.2502687871456146, "rewards/rejected": -0.787373960018158, "step": 4940 }, { "epoch": 0.99, "learning_rate": 1.5229324522605949e-09, "logits/chosen": -1.8998916149139404, "logits/rejected": -1.3827567100524902, "logps/chosen": -187.2311553955078, "logps/rejected": -169.1602325439453, "loss": 0.6714, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.5780856013298035, "rewards/margins": 0.2743874788284302, "rewards/rejected": -0.8524730801582336, "step": 4950 }, { "epoch": 0.99, "learning_rate": 9.747123991141193e-10, "logits/chosen": -1.674591064453125, "logits/rejected": -1.1757704019546509, "logps/chosen": -227.2926788330078, "logps/rejected": -182.78952026367188, "loss": 0.6681, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.5630710124969482, "rewards/margins": 0.2593734860420227, "rewards/rejected": -0.8224444389343262, "step": 4960 }, { "epoch": 0.99, "learning_rate": 5.48291312886251e-10, "logits/chosen": -2.0134599208831787, "logits/rejected": -1.5195059776306152, "logps/chosen": -183.36537170410156, "logps/rejected": -184.00698852539062, "loss": 0.6724, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.45333051681518555, "rewards/margins": 0.3304307460784912, "rewards/rejected": -0.7837613224983215, "step": 4970 }, { "epoch": 1.0, "learning_rate": 2.43689976739403e-10, "logits/chosen": -1.7022699117660522, "logits/rejected": -1.3273664712905884, "logps/chosen": -159.46405029296875, "logps/rejected": -158.38516235351562, "loss": 0.6729, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.5718626379966736, "rewards/margins": 0.209172323346138, "rewards/rejected": -0.7810350656509399, "step": 4980 }, { "epoch": 1.0, "learning_rate": 6.092323651313293e-11, "logits/chosen": -1.8445113897323608, "logits/rejected": -1.3246484994888306, "logps/chosen": -182.38064575195312, "logps/rejected": -175.2942657470703, "loss": 0.6728, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.5497342348098755, "rewards/margins": 0.27360063791275024, "rewards/rejected": -0.8233348727226257, "step": 4990 }, { "epoch": 1.0, "learning_rate": 0.0, "logits/chosen": -1.752648949623108, "logits/rejected": -1.229426622390747, "logps/chosen": -172.3497314453125, "logps/rejected": -170.46157836914062, "loss": 0.6708, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.5184978246688843, "rewards/margins": 0.3327183127403259, "rewards/rejected": -0.8512161374092102, "step": 5000 }, { "epoch": 1.0, "eval_logits/chosen": -1.803659439086914, "eval_logits/rejected": -1.6524428129196167, "eval_logps/chosen": -296.7821350097656, "eval_logps/rejected": -284.1953430175781, "eval_loss": 0.7084454298019409, "eval_rewards/accuracies": 0.4640718698501587, "eval_rewards/chosen": -0.33870866894721985, "eval_rewards/margins": 0.03748469054698944, "eval_rewards/rejected": -0.3761933445930481, "eval_runtime": 1205.6171, "eval_samples_per_second": 1.659, "eval_steps_per_second": 0.277, "step": 5000 }, { "epoch": 1.0, "step": 5000, "total_flos": 0.0, "train_loss": 0.6743102419853211, "train_runtime": 77754.1777, "train_samples_per_second": 0.772, "train_steps_per_second": 0.064 } ], "logging_steps": 10, "max_steps": 5000, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }