{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 100, "global_step": 15453, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 3.234152652005175e-10, "logits/chosen": -3.2907767295837402, "logits/rejected": -3.217514991760254, "logps/chosen": -159.67581176757812, "logps/rejected": -734.8052368164062, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.0, "learning_rate": 3.2341526520051748e-09, "logits/chosen": -3.2340452671051025, "logits/rejected": -3.225147008895874, "logps/chosen": -250.78839111328125, "logps/rejected": -257.5876770019531, "loss": 0.693, "rewards/accuracies": 0.3333333432674408, "rewards/chosen": -0.0012301643146201968, "rewards/margins": -0.001831890782341361, "rewards/rejected": 0.000601727282628417, "step": 10 }, { "epoch": 0.0, "learning_rate": 6.4683053040103496e-09, "logits/chosen": -3.000033140182495, "logits/rejected": -3.021458148956299, "logps/chosen": -180.18971252441406, "logps/rejected": -220.72903442382812, "loss": 0.6903, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.007337198592722416, "rewards/margins": 0.005708443932235241, "rewards/rejected": 0.0016287544276565313, "step": 20 }, { "epoch": 0.01, "learning_rate": 9.702457956015523e-09, "logits/chosen": -3.0931828022003174, "logits/rejected": -3.086947202682495, "logps/chosen": -230.19833374023438, "logps/rejected": -267.9596252441406, "loss": 0.6973, "rewards/accuracies": 0.5, "rewards/chosen": 0.0053744749166071415, "rewards/margins": -0.0014988690381869674, "rewards/rejected": 0.0068733422085642815, "step": 30 }, { "epoch": 0.01, "learning_rate": 1.2936610608020699e-08, "logits/chosen": -3.153712034225464, "logits/rejected": -3.165261745452881, "logps/chosen": -220.7694854736328, "logps/rejected": -196.19557189941406, "loss": 0.6918, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.002314257901161909, "rewards/margins": -0.00820563267916441, "rewards/rejected": 0.005891374312341213, "step": 40 }, { "epoch": 0.01, "learning_rate": 1.6170763260025874e-08, "logits/chosen": -3.139439344406128, "logits/rejected": -3.1224019527435303, "logps/chosen": -210.93881225585938, "logps/rejected": -199.3134765625, "loss": 0.6901, "rewards/accuracies": 0.5, "rewards/chosen": 0.004287729039788246, "rewards/margins": -0.0009997839806601405, "rewards/rejected": 0.005287514068186283, "step": 50 }, { "epoch": 0.01, "learning_rate": 1.9404915912031046e-08, "logits/chosen": -2.9774956703186035, "logits/rejected": -3.070859909057617, "logps/chosen": -174.82815551757812, "logps/rejected": -222.3321990966797, "loss": 0.6911, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": 0.0032317829318344593, "rewards/margins": 0.0030282307416200638, "rewards/rejected": 0.0002035518700722605, "step": 60 }, { "epoch": 0.01, "learning_rate": 2.2639068564036222e-08, "logits/chosen": -3.0662150382995605, "logits/rejected": -3.1663196086883545, "logps/chosen": -166.78848266601562, "logps/rejected": -136.50570678710938, "loss": 0.6892, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.005116489715874195, "rewards/margins": 0.0016806584317237139, "rewards/rejected": -0.006797147449105978, "step": 70 }, { "epoch": 0.02, "learning_rate": 2.5873221216041398e-08, "logits/chosen": -3.105792999267578, "logits/rejected": -3.087473154067993, "logps/chosen": -250.6028289794922, "logps/rejected": -221.30960083007812, "loss": 0.6877, "rewards/accuracies": 0.5, "rewards/chosen": -0.0002625319757498801, "rewards/margins": -0.008208994753658772, "rewards/rejected": 0.00794646143913269, "step": 80 }, { "epoch": 0.02, "learning_rate": 2.910737386804657e-08, "logits/chosen": -3.0313832759857178, "logits/rejected": -3.011046886444092, "logps/chosen": -209.96633911132812, "logps/rejected": -251.78164672851562, "loss": 0.6922, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.007732807192951441, "rewards/margins": 0.009743442758917809, "rewards/rejected": -0.017476249486207962, "step": 90 }, { "epoch": 0.02, "learning_rate": 3.234152652005175e-08, "logits/chosen": -3.208557605743408, "logits/rejected": -3.138239860534668, "logps/chosen": -216.93978881835938, "logps/rejected": -125.2887954711914, "loss": 0.6896, "rewards/accuracies": 0.5, "rewards/chosen": -0.014565570279955864, "rewards/margins": -0.0040927669033408165, "rewards/rejected": -0.010472802445292473, "step": 100 }, { "epoch": 0.02, "eval_logits/chosen": -3.167656660079956, "eval_logits/rejected": -3.177053928375244, "eval_logps/chosen": -194.15455627441406, "eval_logps/rejected": -171.9473419189453, "eval_loss": 0.6889778971672058, "eval_rewards/accuracies": 0.5475000143051147, "eval_rewards/chosen": 0.008353026583790779, "eval_rewards/margins": 0.011491414159536362, "eval_rewards/rejected": -0.0031383878085762262, "eval_runtime": 139.3488, "eval_samples_per_second": 22.648, "eval_steps_per_second": 0.359, "step": 100 }, { "epoch": 0.02, "learning_rate": 3.557567917205692e-08, "logits/chosen": -3.1216812133789062, "logits/rejected": -3.1043143272399902, "logps/chosen": -209.23745727539062, "logps/rejected": -168.80552673339844, "loss": 0.6885, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.009929950349032879, "rewards/margins": 0.009177559986710548, "rewards/rejected": 0.0007523916428908706, "step": 110 }, { "epoch": 0.02, "learning_rate": 3.880983182406209e-08, "logits/chosen": -3.1498494148254395, "logits/rejected": -3.1368408203125, "logps/chosen": -223.6138916015625, "logps/rejected": -166.53366088867188, "loss": 0.6838, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.023775454610586166, "rewards/margins": 0.03654768317937851, "rewards/rejected": -0.012772229500114918, "step": 120 }, { "epoch": 0.03, "learning_rate": 4.204398447606727e-08, "logits/chosen": -3.053675413131714, "logits/rejected": -3.118798017501831, "logps/chosen": -163.9273681640625, "logps/rejected": -126.46878814697266, "loss": 0.6842, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.0008082082495093346, "rewards/margins": 0.03312928229570389, "rewards/rejected": -0.03232107311487198, "step": 130 }, { "epoch": 0.03, "learning_rate": 4.5278137128072445e-08, "logits/chosen": -3.198451519012451, "logits/rejected": -3.1912155151367188, "logps/chosen": -217.3875732421875, "logps/rejected": -197.75624084472656, "loss": 0.6766, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.018968766555190086, "rewards/margins": 0.06026030331850052, "rewards/rejected": -0.041291531175374985, "step": 140 }, { "epoch": 0.03, "learning_rate": 4.851228978007762e-08, "logits/chosen": -2.8457720279693604, "logits/rejected": -2.8702220916748047, "logps/chosen": -192.63583374023438, "logps/rejected": -160.9226531982422, "loss": 0.6765, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.03224840387701988, "rewards/margins": 0.04415305703878403, "rewards/rejected": -0.011904651299118996, "step": 150 }, { "epoch": 0.03, "learning_rate": 5.1746442432082797e-08, "logits/chosen": -3.1837120056152344, "logits/rejected": -3.167569637298584, "logps/chosen": -187.67776489257812, "logps/rejected": -217.16650390625, "loss": 0.6781, "rewards/accuracies": 0.5, "rewards/chosen": 0.045844174921512604, "rewards/margins": 0.0020677223801612854, "rewards/rejected": 0.04377645626664162, "step": 160 }, { "epoch": 0.03, "learning_rate": 5.4980595084087966e-08, "logits/chosen": -3.1629438400268555, "logits/rejected": -3.1647531986236572, "logps/chosen": -239.3665771484375, "logps/rejected": -206.7849884033203, "loss": 0.6701, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.07763100415468216, "rewards/margins": 0.07111285626888275, "rewards/rejected": 0.006518153008073568, "step": 170 }, { "epoch": 0.03, "learning_rate": 5.821474773609314e-08, "logits/chosen": -2.9683380126953125, "logits/rejected": -3.0374159812927246, "logps/chosen": -143.70809936523438, "logps/rejected": -158.6737823486328, "loss": 0.6728, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.03261490538716316, "rewards/margins": 0.06522291898727417, "rewards/rejected": -0.03260800987482071, "step": 180 }, { "epoch": 0.04, "learning_rate": 6.144890038809831e-08, "logits/chosen": -3.1599783897399902, "logits/rejected": -3.188602924346924, "logps/chosen": -266.9081726074219, "logps/rejected": -293.92669677734375, "loss": 0.668, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.1383255124092102, "rewards/margins": 0.06030426546931267, "rewards/rejected": 0.07802124321460724, "step": 190 }, { "epoch": 0.04, "learning_rate": 6.46830530401035e-08, "logits/chosen": -2.9277074337005615, "logits/rejected": -2.9044947624206543, "logps/chosen": -159.93588256835938, "logps/rejected": -129.9291534423828, "loss": 0.6665, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.03977008908987045, "rewards/margins": 0.04497218504548073, "rewards/rejected": -0.005202095955610275, "step": 200 }, { "epoch": 0.04, "eval_logits/chosen": -3.1655266284942627, "eval_logits/rejected": -3.1750502586364746, "eval_logps/chosen": -193.48023986816406, "eval_logps/rejected": -171.9678192138672, "eval_loss": 0.6631521582603455, "eval_rewards/accuracies": 0.6449999809265137, "eval_rewards/chosen": 0.07578270137310028, "eval_rewards/margins": 0.08096777647733688, "eval_rewards/rejected": -0.0051850746385753155, "eval_runtime": 138.4167, "eval_samples_per_second": 22.801, "eval_steps_per_second": 0.361, "step": 200 }, { "epoch": 0.04, "learning_rate": 6.791720569210866e-08, "logits/chosen": -3.181269645690918, "logits/rejected": -3.1761131286621094, "logps/chosen": -280.2482604980469, "logps/rejected": -180.41729736328125, "loss": 0.6558, "rewards/accuracies": 0.5, "rewards/chosen": 0.10838142782449722, "rewards/margins": 0.07865364849567413, "rewards/rejected": 0.029727783054113388, "step": 210 }, { "epoch": 0.04, "learning_rate": 7.115135834411385e-08, "logits/chosen": -3.204672336578369, "logits/rejected": -3.2558085918426514, "logps/chosen": -200.743408203125, "logps/rejected": -233.82485961914062, "loss": 0.6478, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.12597759068012238, "rewards/margins": 0.132663294672966, "rewards/rejected": -0.006685702595859766, "step": 220 }, { "epoch": 0.04, "learning_rate": 7.438551099611902e-08, "logits/chosen": -3.0432682037353516, "logits/rejected": -3.0207479000091553, "logps/chosen": -202.6326904296875, "logps/rejected": -212.5495147705078, "loss": 0.6383, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.09857180714607239, "rewards/margins": 0.14784620702266693, "rewards/rejected": -0.049274396151304245, "step": 230 }, { "epoch": 0.05, "learning_rate": 7.761966364812419e-08, "logits/chosen": -3.234316349029541, "logits/rejected": -3.1052303314208984, "logps/chosen": -259.6878356933594, "logps/rejected": -223.2771453857422, "loss": 0.6323, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.16739621758460999, "rewards/margins": 0.13369934260845184, "rewards/rejected": 0.03369685262441635, "step": 240 }, { "epoch": 0.05, "learning_rate": 8.085381630012935e-08, "logits/chosen": -3.020998477935791, "logits/rejected": -2.997157096862793, "logps/chosen": -296.86737060546875, "logps/rejected": -222.3542022705078, "loss": 0.6641, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.12462540715932846, "rewards/margins": 0.07443695515394211, "rewards/rejected": 0.05018845200538635, "step": 250 }, { "epoch": 0.05, "learning_rate": 8.408796895213454e-08, "logits/chosen": -3.225865602493286, "logits/rejected": -3.2103190422058105, "logps/chosen": -281.2466735839844, "logps/rejected": -253.050537109375, "loss": 0.6305, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.17998719215393066, "rewards/margins": 0.23333589732646942, "rewards/rejected": -0.05334869772195816, "step": 260 }, { "epoch": 0.05, "learning_rate": 8.73221216041397e-08, "logits/chosen": -3.1258041858673096, "logits/rejected": -3.1957387924194336, "logps/chosen": -274.235107421875, "logps/rejected": -250.7463836669922, "loss": 0.6346, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.11405845731496811, "rewards/margins": 0.09639565646648407, "rewards/rejected": 0.017662782222032547, "step": 270 }, { "epoch": 0.05, "learning_rate": 9.055627425614489e-08, "logits/chosen": -2.947895050048828, "logits/rejected": -3.0267863273620605, "logps/chosen": -190.09231567382812, "logps/rejected": -167.23037719726562, "loss": 0.6349, "rewards/accuracies": 0.5, "rewards/chosen": 0.03284968063235283, "rewards/margins": 0.18831506371498108, "rewards/rejected": -0.15546536445617676, "step": 280 }, { "epoch": 0.06, "learning_rate": 9.379042690815006e-08, "logits/chosen": -3.157709836959839, "logits/rejected": -3.1099419593811035, "logps/chosen": -237.16323852539062, "logps/rejected": -219.634765625, "loss": 0.6211, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.12987887859344482, "rewards/margins": 0.12882006168365479, "rewards/rejected": 0.0010588064324110746, "step": 290 }, { "epoch": 0.06, "learning_rate": 9.702457956015524e-08, "logits/chosen": -3.1737518310546875, "logits/rejected": -3.1884818077087402, "logps/chosen": -173.21974182128906, "logps/rejected": -213.5424346923828, "loss": 0.6381, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.1705276370048523, "rewards/margins": 0.17234186828136444, "rewards/rejected": -0.0018142417538911104, "step": 300 }, { "epoch": 0.06, "eval_logits/chosen": -3.1614997386932373, "eval_logits/rejected": -3.171653985977173, "eval_logps/chosen": -192.6167755126953, "eval_logps/rejected": -172.62965393066406, "eval_loss": 0.6209953427314758, "eval_rewards/accuracies": 0.6875, "eval_rewards/chosen": 0.16212961077690125, "eval_rewards/margins": 0.23349855840206146, "eval_rewards/rejected": -0.07136894017457962, "eval_runtime": 139.5791, "eval_samples_per_second": 22.611, "eval_steps_per_second": 0.358, "step": 300 }, { "epoch": 0.06, "learning_rate": 1.0025873221216041e-07, "logits/chosen": -3.153163194656372, "logits/rejected": -3.08953857421875, "logps/chosen": -255.1444091796875, "logps/rejected": -282.613037109375, "loss": 0.5999, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.19228589534759521, "rewards/margins": 0.24539165198802948, "rewards/rejected": -0.053105778992176056, "step": 310 }, { "epoch": 0.06, "learning_rate": 1.0349288486416559e-07, "logits/chosen": -3.199058771133423, "logits/rejected": -3.18471622467041, "logps/chosen": -179.1886444091797, "logps/rejected": -257.89703369140625, "loss": 0.6142, "rewards/accuracies": 0.5, "rewards/chosen": 0.21753160655498505, "rewards/margins": 0.11381890624761581, "rewards/rejected": 0.10371267795562744, "step": 320 }, { "epoch": 0.06, "learning_rate": 1.0672703751617076e-07, "logits/chosen": -3.153376817703247, "logits/rejected": -3.18345308303833, "logps/chosen": -207.62661743164062, "logps/rejected": -196.59873962402344, "loss": 0.5991, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.28966066241264343, "rewards/margins": 0.43374133110046387, "rewards/rejected": -0.14408066868782043, "step": 330 }, { "epoch": 0.07, "learning_rate": 1.0996119016817593e-07, "logits/chosen": -3.0663774013519287, "logits/rejected": -3.0867068767547607, "logps/chosen": -176.10557556152344, "logps/rejected": -202.84716796875, "loss": 0.6079, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.241808220744133, "rewards/margins": 0.007571871392428875, "rewards/rejected": 0.23423632979393005, "step": 340 }, { "epoch": 0.07, "learning_rate": 1.131953428201811e-07, "logits/chosen": -3.060717821121216, "logits/rejected": -3.0316948890686035, "logps/chosen": -163.25799560546875, "logps/rejected": -189.8742218017578, "loss": 0.6059, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.31728318333625793, "rewards/margins": 0.29380735754966736, "rewards/rejected": 0.02347579039633274, "step": 350 }, { "epoch": 0.07, "learning_rate": 1.1642949547218628e-07, "logits/chosen": -3.1615848541259766, "logits/rejected": -3.150390386581421, "logps/chosen": -255.5399932861328, "logps/rejected": -171.8251190185547, "loss": 0.5856, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.21890723705291748, "rewards/margins": 0.14798519015312195, "rewards/rejected": 0.07092205435037613, "step": 360 }, { "epoch": 0.07, "learning_rate": 1.1966364812419147e-07, "logits/chosen": -2.9652140140533447, "logits/rejected": -3.0450243949890137, "logps/chosen": -245.3134765625, "logps/rejected": -197.13406372070312, "loss": 0.6142, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.28407177329063416, "rewards/margins": 0.3457433581352234, "rewards/rejected": -0.061671603471040726, "step": 370 }, { "epoch": 0.07, "learning_rate": 1.2289780077619662e-07, "logits/chosen": -2.9582393169403076, "logits/rejected": -2.9958596229553223, "logps/chosen": -214.2012939453125, "logps/rejected": -178.59019470214844, "loss": 0.6159, "rewards/accuracies": 0.5, "rewards/chosen": 0.14877629280090332, "rewards/margins": 0.22745585441589355, "rewards/rejected": -0.07867956161499023, "step": 380 }, { "epoch": 0.08, "learning_rate": 1.261319534282018e-07, "logits/chosen": -3.0242486000061035, "logits/rejected": -3.0409722328186035, "logps/chosen": -326.0379638671875, "logps/rejected": -293.76422119140625, "loss": 0.5815, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.2768373489379883, "rewards/margins": 0.2867100238800049, "rewards/rejected": -0.009872669354081154, "step": 390 }, { "epoch": 0.08, "learning_rate": 1.29366106080207e-07, "logits/chosen": -3.0544285774230957, "logits/rejected": -3.0994880199432373, "logps/chosen": -191.39834594726562, "logps/rejected": -174.6272430419922, "loss": 0.5753, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.24598166346549988, "rewards/margins": 0.2981668710708618, "rewards/rejected": -0.052185166627168655, "step": 400 }, { "epoch": 0.08, "eval_logits/chosen": -3.144436836242676, "eval_logits/rejected": -3.1545228958129883, "eval_logps/chosen": -193.4642791748047, "eval_logps/rejected": -175.2998504638672, "eval_loss": 0.5865161418914795, "eval_rewards/accuracies": 0.6974999904632568, "eval_rewards/chosen": 0.0773763582110405, "eval_rewards/margins": 0.4157639741897583, "eval_rewards/rejected": -0.3383876383304596, "eval_runtime": 139.377, "eval_samples_per_second": 22.644, "eval_steps_per_second": 0.359, "step": 400 }, { "epoch": 0.08, "learning_rate": 1.3260025873221214e-07, "logits/chosen": -3.0932059288024902, "logits/rejected": -3.1315836906433105, "logps/chosen": -232.5525360107422, "logps/rejected": -199.24893188476562, "loss": 0.6045, "rewards/accuracies": 0.75, "rewards/chosen": 0.21984687447547913, "rewards/margins": 0.43035203218460083, "rewards/rejected": -0.2105051577091217, "step": 410 }, { "epoch": 0.08, "learning_rate": 1.3583441138421733e-07, "logits/chosen": -2.9779956340789795, "logits/rejected": -2.9564549922943115, "logps/chosen": -174.45761108398438, "logps/rejected": -140.55642700195312, "loss": 0.5481, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.006199514959007502, "rewards/margins": 0.4953877329826355, "rewards/rejected": -0.5015872716903687, "step": 420 }, { "epoch": 0.08, "learning_rate": 1.390685640362225e-07, "logits/chosen": -2.9969263076782227, "logits/rejected": -3.0024189949035645, "logps/chosen": -169.60955810546875, "logps/rejected": -213.49417114257812, "loss": 0.5877, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.11862262338399887, "rewards/margins": 0.32721397280693054, "rewards/rejected": -0.20859134197235107, "step": 430 }, { "epoch": 0.09, "learning_rate": 1.423027166882277e-07, "logits/chosen": -3.1354074478149414, "logits/rejected": -3.133338212966919, "logps/chosen": -316.2953796386719, "logps/rejected": -312.092041015625, "loss": 0.5342, "rewards/accuracies": 0.75, "rewards/chosen": 0.5428065657615662, "rewards/margins": 0.8526216745376587, "rewards/rejected": -0.309814989566803, "step": 440 }, { "epoch": 0.09, "learning_rate": 1.4553686934023285e-07, "logits/chosen": -3.1289236545562744, "logits/rejected": -3.0934605598449707, "logps/chosen": -204.59451293945312, "logps/rejected": -230.4581298828125, "loss": 0.5932, "rewards/accuracies": 0.5, "rewards/chosen": -0.044186461716890335, "rewards/margins": 0.3902572989463806, "rewards/rejected": -0.43444371223449707, "step": 450 }, { "epoch": 0.09, "learning_rate": 1.4877102199223803e-07, "logits/chosen": -2.936959743499756, "logits/rejected": -2.8640313148498535, "logps/chosen": -121.1145248413086, "logps/rejected": -152.80404663085938, "loss": 0.5942, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.16668501496315002, "rewards/margins": 0.2552078664302826, "rewards/rejected": -0.4218928813934326, "step": 460 }, { "epoch": 0.09, "learning_rate": 1.520051746442432e-07, "logits/chosen": -2.9685120582580566, "logits/rejected": -2.995673418045044, "logps/chosen": -170.37899780273438, "logps/rejected": -216.64419555664062, "loss": 0.566, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.07195017486810684, "rewards/margins": 0.38806548714637756, "rewards/rejected": -0.3161153197288513, "step": 470 }, { "epoch": 0.09, "learning_rate": 1.5523932729624837e-07, "logits/chosen": -3.1304144859313965, "logits/rejected": -3.1346306800842285, "logps/chosen": -200.91786193847656, "logps/rejected": -193.68020629882812, "loss": 0.5686, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.19373224675655365, "rewards/margins": 0.5450137257575989, "rewards/rejected": -0.3512814939022064, "step": 480 }, { "epoch": 0.1, "learning_rate": 1.5847347994825355e-07, "logits/chosen": -3.076622486114502, "logits/rejected": -3.0705759525299072, "logps/chosen": -238.1417694091797, "logps/rejected": -184.45993041992188, "loss": 0.5602, "rewards/accuracies": 0.75, "rewards/chosen": 0.07782919704914093, "rewards/margins": 0.4706133306026459, "rewards/rejected": -0.3927842080593109, "step": 490 }, { "epoch": 0.1, "learning_rate": 1.617076326002587e-07, "logits/chosen": -3.022279739379883, "logits/rejected": -2.996781826019287, "logps/chosen": -333.6932373046875, "logps/rejected": -252.64376831054688, "loss": 0.5092, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.2657441198825836, "rewards/margins": 1.2168611288070679, "rewards/rejected": -0.9511170387268066, "step": 500 }, { "epoch": 0.1, "eval_logits/chosen": -3.1387369632720947, "eval_logits/rejected": -3.1475753784179688, "eval_logps/chosen": -194.74359130859375, "eval_logps/rejected": -178.61302185058594, "eval_loss": 0.5518030524253845, "eval_rewards/accuracies": 0.7099999785423279, "eval_rewards/chosen": -0.05055353045463562, "eval_rewards/margins": 0.6191545724868774, "eval_rewards/rejected": -0.6697080135345459, "eval_runtime": 138.877, "eval_samples_per_second": 22.725, "eval_steps_per_second": 0.36, "step": 500 }, { "epoch": 0.1, "learning_rate": 1.649417852522639e-07, "logits/chosen": -3.1498000621795654, "logits/rejected": -3.124570846557617, "logps/chosen": -157.44468688964844, "logps/rejected": -174.67886352539062, "loss": 0.584, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.46559691429138184, "rewards/margins": 0.264739990234375, "rewards/rejected": -0.7303369641304016, "step": 510 }, { "epoch": 0.1, "learning_rate": 1.6817593790426907e-07, "logits/chosen": -3.18113374710083, "logits/rejected": -3.159060001373291, "logps/chosen": -217.8606719970703, "logps/rejected": -211.8629150390625, "loss": 0.5402, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.25132066011428833, "rewards/margins": 0.5133770704269409, "rewards/rejected": -0.7646977305412292, "step": 520 }, { "epoch": 0.1, "learning_rate": 1.7141009055627426e-07, "logits/chosen": -3.1162872314453125, "logits/rejected": -3.1024136543273926, "logps/chosen": -163.42092895507812, "logps/rejected": -174.1112823486328, "loss": 0.5486, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.26286181807518005, "rewards/margins": 0.5959106683731079, "rewards/rejected": -0.8587724566459656, "step": 530 }, { "epoch": 0.1, "learning_rate": 1.746442432082794e-07, "logits/chosen": -3.080259084701538, "logits/rejected": -3.0933754444122314, "logps/chosen": -200.79342651367188, "logps/rejected": -217.738037109375, "loss": 0.5488, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.05293453857302666, "rewards/margins": 0.29576388001441956, "rewards/rejected": -0.3486984074115753, "step": 540 }, { "epoch": 0.11, "learning_rate": 1.778783958602846e-07, "logits/chosen": -3.1378228664398193, "logits/rejected": -3.135917901992798, "logps/chosen": -205.9120330810547, "logps/rejected": -194.05776977539062, "loss": 0.5737, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.09442628920078278, "rewards/margins": 0.6792758703231812, "rewards/rejected": -0.7737022042274475, "step": 550 }, { "epoch": 0.11, "learning_rate": 1.8111254851228978e-07, "logits/chosen": -2.992809772491455, "logits/rejected": -3.0085959434509277, "logps/chosen": -181.65798950195312, "logps/rejected": -188.4614715576172, "loss": 0.52, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.11401055008172989, "rewards/margins": 0.7531822919845581, "rewards/rejected": -0.6391717195510864, "step": 560 }, { "epoch": 0.11, "learning_rate": 1.8434670116429496e-07, "logits/chosen": -3.00810170173645, "logits/rejected": -3.033761978149414, "logps/chosen": -230.50656127929688, "logps/rejected": -231.4688262939453, "loss": 0.5615, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.27678605914115906, "rewards/margins": 0.7314552068710327, "rewards/rejected": -0.45466917753219604, "step": 570 }, { "epoch": 0.11, "learning_rate": 1.8758085381630012e-07, "logits/chosen": -3.113915205001831, "logits/rejected": -3.156710147857666, "logps/chosen": -342.560302734375, "logps/rejected": -283.44403076171875, "loss": 0.5778, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.4650874137878418, "rewards/margins": 0.941075325012207, "rewards/rejected": -0.47598785161972046, "step": 580 }, { "epoch": 0.11, "learning_rate": 1.9081500646830527e-07, "logits/chosen": -3.1269617080688477, "logits/rejected": -3.1666359901428223, "logps/chosen": -234.28176879882812, "logps/rejected": -181.70407104492188, "loss": 0.6159, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.21833765506744385, "rewards/margins": 0.8614624738693237, "rewards/rejected": -0.6431248784065247, "step": 590 }, { "epoch": 0.12, "learning_rate": 1.9404915912031048e-07, "logits/chosen": -3.076352596282959, "logits/rejected": -2.9966773986816406, "logps/chosen": -139.66424560546875, "logps/rejected": -213.1605987548828, "loss": 0.5374, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.4512442648410797, "rewards/margins": 0.5663000345230103, "rewards/rejected": -1.0175443887710571, "step": 600 }, { "epoch": 0.12, "eval_logits/chosen": -3.1174840927124023, "eval_logits/rejected": -3.1267504692077637, "eval_logps/chosen": -197.49191284179688, "eval_logps/rejected": -183.15155029296875, "eval_loss": 0.5301532745361328, "eval_rewards/accuracies": 0.7149999737739563, "eval_rewards/chosen": -0.32538560032844543, "eval_rewards/margins": 0.7981722354888916, "eval_rewards/rejected": -1.1235578060150146, "eval_runtime": 148.0938, "eval_samples_per_second": 21.311, "eval_steps_per_second": 0.338, "step": 600 }, { "epoch": 0.12, "learning_rate": 1.9728331177231564e-07, "logits/chosen": -3.1216952800750732, "logits/rejected": -3.145416498184204, "logps/chosen": -248.3773956298828, "logps/rejected": -221.88284301757812, "loss": 0.5019, "rewards/accuracies": 0.75, "rewards/chosen": -0.03421555832028389, "rewards/margins": 1.1549384593963623, "rewards/rejected": -1.1891541481018066, "step": 610 }, { "epoch": 0.12, "learning_rate": 2.0051746442432082e-07, "logits/chosen": -3.0570244789123535, "logits/rejected": -3.067242383956909, "logps/chosen": -274.6044616699219, "logps/rejected": -183.0326385498047, "loss": 0.5109, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.2071150541305542, "rewards/margins": 0.6557462811470032, "rewards/rejected": -0.8628614544868469, "step": 620 }, { "epoch": 0.12, "learning_rate": 2.0375161707632598e-07, "logits/chosen": -3.0616676807403564, "logits/rejected": -3.0857839584350586, "logps/chosen": -228.970947265625, "logps/rejected": -189.21237182617188, "loss": 0.5213, "rewards/accuracies": 0.5, "rewards/chosen": -0.5329365730285645, "rewards/margins": 0.4307224154472351, "rewards/rejected": -0.9636589288711548, "step": 630 }, { "epoch": 0.12, "learning_rate": 2.0698576972833119e-07, "logits/chosen": -2.9985756874084473, "logits/rejected": -2.9745564460754395, "logps/chosen": -220.9756622314453, "logps/rejected": -221.86508178710938, "loss": 0.56, "rewards/accuracies": 0.75, "rewards/chosen": -0.365543931722641, "rewards/margins": 0.93255615234375, "rewards/rejected": -1.298100233078003, "step": 640 }, { "epoch": 0.13, "learning_rate": 2.1021992238033634e-07, "logits/chosen": -2.8438754081726074, "logits/rejected": -2.9238531589508057, "logps/chosen": -228.2821807861328, "logps/rejected": -217.6581573486328, "loss": 0.5303, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.10670392215251923, "rewards/margins": 0.9380793571472168, "rewards/rejected": -0.8313754200935364, "step": 650 }, { "epoch": 0.13, "learning_rate": 2.1345407503234153e-07, "logits/chosen": -3.0386316776275635, "logits/rejected": -3.099316120147705, "logps/chosen": -157.04953002929688, "logps/rejected": -198.0177001953125, "loss": 0.5428, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.14069390296936035, "rewards/margins": 1.6727489233016968, "rewards/rejected": -1.532055139541626, "step": 660 }, { "epoch": 0.13, "learning_rate": 2.1668822768434668e-07, "logits/chosen": -3.1025519371032715, "logits/rejected": -3.0931642055511475, "logps/chosen": -182.56222534179688, "logps/rejected": -317.12481689453125, "loss": 0.5359, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.14728078246116638, "rewards/margins": 0.41670504212379456, "rewards/rejected": -0.2694242000579834, "step": 670 }, { "epoch": 0.13, "learning_rate": 2.1992238033635186e-07, "logits/chosen": -3.0557260513305664, "logits/rejected": -3.0878586769104004, "logps/chosen": -179.29005432128906, "logps/rejected": -232.91567993164062, "loss": 0.5284, "rewards/accuracies": 0.5, "rewards/chosen": -0.07645130902528763, "rewards/margins": 0.4627855718135834, "rewards/rejected": -0.5392369031906128, "step": 680 }, { "epoch": 0.13, "learning_rate": 2.2315653298835705e-07, "logits/chosen": -2.7653260231018066, "logits/rejected": -2.883758068084717, "logps/chosen": -247.56021118164062, "logps/rejected": -213.8865509033203, "loss": 0.5972, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.6671286821365356, "rewards/margins": 1.2729079723358154, "rewards/rejected": -0.6057791709899902, "step": 690 }, { "epoch": 0.14, "learning_rate": 2.263906856403622e-07, "logits/chosen": -2.958416223526001, "logits/rejected": -2.972041606903076, "logps/chosen": -167.88119506835938, "logps/rejected": -189.9696807861328, "loss": 0.4719, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.6142417192459106, "rewards/margins": 1.2729040384292603, "rewards/rejected": -1.8871456384658813, "step": 700 }, { "epoch": 0.14, "eval_logits/chosen": -3.1009063720703125, "eval_logits/rejected": -3.1106061935424805, "eval_logps/chosen": -199.7174835205078, "eval_logps/rejected": -187.49134826660156, "eval_loss": 0.5122122168540955, "eval_rewards/accuracies": 0.7225000262260437, "eval_rewards/chosen": -0.5479406118392944, "eval_rewards/margins": 1.009599208831787, "eval_rewards/rejected": -1.5575398206710815, "eval_runtime": 138.7439, "eval_samples_per_second": 22.747, "eval_steps_per_second": 0.36, "step": 700 }, { "epoch": 0.14, "learning_rate": 2.2962483829236739e-07, "logits/chosen": -2.9802756309509277, "logits/rejected": -3.100799560546875, "logps/chosen": -172.6888427734375, "logps/rejected": -204.21240234375, "loss": 0.5192, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.5314847230911255, "rewards/margins": 1.2006410360336304, "rewards/rejected": -1.7321258783340454, "step": 710 }, { "epoch": 0.14, "learning_rate": 2.3285899094437257e-07, "logits/chosen": -3.094202756881714, "logits/rejected": -3.1324687004089355, "logps/chosen": -238.36331176757812, "logps/rejected": -250.6588592529297, "loss": 0.5113, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.7062119245529175, "rewards/margins": 0.8887389898300171, "rewards/rejected": -1.5949509143829346, "step": 720 }, { "epoch": 0.14, "learning_rate": 2.3609314359637775e-07, "logits/chosen": -3.112473964691162, "logits/rejected": -3.1348583698272705, "logps/chosen": -223.6353302001953, "logps/rejected": -282.94036865234375, "loss": 0.509, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3504326045513153, "rewards/margins": 1.08292555809021, "rewards/rejected": -1.4333581924438477, "step": 730 }, { "epoch": 0.14, "learning_rate": 2.3932729624838293e-07, "logits/chosen": -2.929335594177246, "logits/rejected": -3.038344621658325, "logps/chosen": -256.2840576171875, "logps/rejected": -270.9310607910156, "loss": 0.4907, "rewards/accuracies": 0.75, "rewards/chosen": -0.7100075483322144, "rewards/margins": 1.0054277181625366, "rewards/rejected": -1.7154353857040405, "step": 740 }, { "epoch": 0.15, "learning_rate": 2.425614489003881e-07, "logits/chosen": -3.071770668029785, "logits/rejected": -3.0335705280303955, "logps/chosen": -184.1170196533203, "logps/rejected": -190.11817932128906, "loss": 0.5403, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.0880534648895264, "rewards/margins": 0.9439334869384766, "rewards/rejected": -2.031986951828003, "step": 750 }, { "epoch": 0.15, "learning_rate": 2.4579560155239325e-07, "logits/chosen": -3.0905098915100098, "logits/rejected": -3.1044254302978516, "logps/chosen": -206.65673828125, "logps/rejected": -212.1765594482422, "loss": 0.5203, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.6541646718978882, "rewards/margins": 1.224506139755249, "rewards/rejected": -1.8786706924438477, "step": 760 }, { "epoch": 0.15, "learning_rate": 2.4902975420439843e-07, "logits/chosen": -3.105860710144043, "logits/rejected": -3.193061351776123, "logps/chosen": -248.20712280273438, "logps/rejected": -259.50555419921875, "loss": 0.4783, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.09263571351766586, "rewards/margins": 1.649441123008728, "rewards/rejected": -1.742077112197876, "step": 770 }, { "epoch": 0.15, "learning_rate": 2.522639068564036e-07, "logits/chosen": -3.066476583480835, "logits/rejected": -3.076321840286255, "logps/chosen": -231.3736572265625, "logps/rejected": -201.08914184570312, "loss": 0.4991, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.7740650773048401, "rewards/margins": 0.49084991216659546, "rewards/rejected": -1.2649149894714355, "step": 780 }, { "epoch": 0.15, "learning_rate": 2.554980595084088e-07, "logits/chosen": -2.982896327972412, "logits/rejected": -2.9362235069274902, "logps/chosen": -188.5688934326172, "logps/rejected": -235.40676879882812, "loss": 0.5174, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.06574110686779022, "rewards/margins": 1.3378245830535889, "rewards/rejected": -1.2720834016799927, "step": 790 }, { "epoch": 0.16, "learning_rate": 2.58732212160414e-07, "logits/chosen": -2.9199814796447754, "logits/rejected": -2.8367676734924316, "logps/chosen": -203.72073364257812, "logps/rejected": -298.4263610839844, "loss": 0.5036, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.9280701875686646, "rewards/margins": 0.9835942983627319, "rewards/rejected": -1.911664605140686, "step": 800 }, { "epoch": 0.16, "eval_logits/chosen": -3.106929063796997, "eval_logits/rejected": -3.1154816150665283, "eval_logps/chosen": -197.77255249023438, "eval_logps/rejected": -187.2395477294922, "eval_loss": 0.5092905163764954, "eval_rewards/accuracies": 0.7074999809265137, "eval_rewards/chosen": -0.3534494638442993, "eval_rewards/margins": 1.1789119243621826, "eval_rewards/rejected": -1.5323612689971924, "eval_runtime": 140.2502, "eval_samples_per_second": 22.503, "eval_steps_per_second": 0.357, "step": 800 }, { "epoch": 0.16, "learning_rate": 2.619663648124191e-07, "logits/chosen": -2.895864963531494, "logits/rejected": -2.9698562622070312, "logps/chosen": -253.4523162841797, "logps/rejected": -179.3986358642578, "loss": 0.6193, "rewards/accuracies": 0.5, "rewards/chosen": -0.8969345092773438, "rewards/margins": 0.17108377814292908, "rewards/rejected": -1.0680183172225952, "step": 810 }, { "epoch": 0.16, "learning_rate": 2.652005174644243e-07, "logits/chosen": -3.0189733505249023, "logits/rejected": -3.091141700744629, "logps/chosen": -145.2667694091797, "logps/rejected": -208.92019653320312, "loss": 0.6153, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.9238840341567993, "rewards/margins": 0.9646676182746887, "rewards/rejected": -1.8885517120361328, "step": 820 }, { "epoch": 0.16, "learning_rate": 2.684346701164295e-07, "logits/chosen": -3.007552146911621, "logits/rejected": -3.0918264389038086, "logps/chosen": -149.43923950195312, "logps/rejected": -190.80862426757812, "loss": 0.5495, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.3854085206985474, "rewards/margins": 0.9688920974731445, "rewards/rejected": -2.3543004989624023, "step": 830 }, { "epoch": 0.16, "learning_rate": 2.7166882276843465e-07, "logits/chosen": -2.995171308517456, "logits/rejected": -3.0511744022369385, "logps/chosen": -307.744140625, "logps/rejected": -257.0872497558594, "loss": 0.4775, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.41892343759536743, "rewards/margins": 1.1574362516403198, "rewards/rejected": -1.5763596296310425, "step": 840 }, { "epoch": 0.17, "learning_rate": 2.7490297542043984e-07, "logits/chosen": -3.008758068084717, "logits/rejected": -3.0798912048339844, "logps/chosen": -206.8883819580078, "logps/rejected": -165.884765625, "loss": 0.5413, "rewards/accuracies": 0.75, "rewards/chosen": -0.737189769744873, "rewards/margins": 1.2928497791290283, "rewards/rejected": -2.0300393104553223, "step": 850 }, { "epoch": 0.17, "learning_rate": 2.78137128072445e-07, "logits/chosen": -3.051088809967041, "logits/rejected": -3.110631227493286, "logps/chosen": -113.84765625, "logps/rejected": -138.62364196777344, "loss": 0.5105, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -1.2797080278396606, "rewards/margins": 1.187454104423523, "rewards/rejected": -2.4671621322631836, "step": 860 }, { "epoch": 0.17, "learning_rate": 2.8137128072445015e-07, "logits/chosen": -3.1219208240509033, "logits/rejected": -3.159938335418701, "logps/chosen": -246.7979278564453, "logps/rejected": -236.80239868164062, "loss": 0.4727, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.4427107274532318, "rewards/margins": 1.177958607673645, "rewards/rejected": -1.6206691265106201, "step": 870 }, { "epoch": 0.17, "learning_rate": 2.846054333764554e-07, "logits/chosen": -3.003744125366211, "logits/rejected": -3.043114423751831, "logps/chosen": -206.8845977783203, "logps/rejected": -206.0985107421875, "loss": 0.5488, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.19859448075294495, "rewards/margins": 0.7293460965156555, "rewards/rejected": -0.9279405474662781, "step": 880 }, { "epoch": 0.17, "learning_rate": 2.878395860284605e-07, "logits/chosen": -2.8752822875976562, "logits/rejected": -2.9653193950653076, "logps/chosen": -259.47442626953125, "logps/rejected": -168.39279174804688, "loss": 0.5839, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.8495208621025085, "rewards/margins": 1.1083202362060547, "rewards/rejected": -1.957841157913208, "step": 890 }, { "epoch": 0.17, "learning_rate": 2.910737386804657e-07, "logits/chosen": -2.9832606315612793, "logits/rejected": -2.9729461669921875, "logps/chosen": -190.07797241210938, "logps/rejected": -201.74734497070312, "loss": 0.456, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.213139533996582, "rewards/margins": 1.0091941356658936, "rewards/rejected": -2.2223336696624756, "step": 900 }, { "epoch": 0.17, "eval_logits/chosen": -3.0858521461486816, "eval_logits/rejected": -3.094005823135376, "eval_logps/chosen": -205.53961181640625, "eval_logps/rejected": -196.5068817138672, "eval_loss": 0.5017659068107605, "eval_rewards/accuracies": 0.7250000238418579, "eval_rewards/chosen": -1.1301543712615967, "eval_rewards/margins": 1.328935980796814, "eval_rewards/rejected": -2.459090232849121, "eval_runtime": 138.2537, "eval_samples_per_second": 22.828, "eval_steps_per_second": 0.362, "step": 900 }, { "epoch": 0.18, "learning_rate": 2.943078913324709e-07, "logits/chosen": -2.930833578109741, "logits/rejected": -2.976238489151001, "logps/chosen": -155.70570373535156, "logps/rejected": -178.11851501464844, "loss": 0.5571, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.5093227624893188, "rewards/margins": 1.6325773000717163, "rewards/rejected": -3.141899824142456, "step": 910 }, { "epoch": 0.18, "learning_rate": 2.9754204398447606e-07, "logits/chosen": -3.0615293979644775, "logits/rejected": -2.998253107070923, "logps/chosen": -251.2194366455078, "logps/rejected": -176.75877380371094, "loss": 0.4612, "rewards/accuracies": 0.75, "rewards/chosen": -0.29622069001197815, "rewards/margins": 1.41635262966156, "rewards/rejected": -1.7125732898712158, "step": 920 }, { "epoch": 0.18, "learning_rate": 3.0077619663648125e-07, "logits/chosen": -3.065725088119507, "logits/rejected": -3.0299696922302246, "logps/chosen": -259.5385437011719, "logps/rejected": -201.85426330566406, "loss": 0.4777, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.1391627788543701, "rewards/margins": 1.818529486656189, "rewards/rejected": -2.9576923847198486, "step": 930 }, { "epoch": 0.18, "learning_rate": 3.040103492884864e-07, "logits/chosen": -3.085815191268921, "logits/rejected": -3.064685583114624, "logps/chosen": -288.67962646484375, "logps/rejected": -278.7095642089844, "loss": 0.4442, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.728425145149231, "rewards/margins": 0.8075621724128723, "rewards/rejected": -1.535987138748169, "step": 940 }, { "epoch": 0.18, "learning_rate": 3.0724450194049156e-07, "logits/chosen": -3.160470962524414, "logits/rejected": -3.116166114807129, "logps/chosen": -230.3232879638672, "logps/rejected": -316.96710205078125, "loss": 0.5392, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.8914705514907837, "rewards/margins": 0.8523221015930176, "rewards/rejected": -1.7437927722930908, "step": 950 }, { "epoch": 0.19, "learning_rate": 3.1047865459249674e-07, "logits/chosen": -2.935952663421631, "logits/rejected": -3.020907402038574, "logps/chosen": -243.97781372070312, "logps/rejected": -287.1600646972656, "loss": 0.4915, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.079117774963379, "rewards/margins": 0.7942657470703125, "rewards/rejected": -1.8733835220336914, "step": 960 }, { "epoch": 0.19, "learning_rate": 3.137128072445019e-07, "logits/chosen": -3.046394109725952, "logits/rejected": -3.1229748725891113, "logps/chosen": -280.108154296875, "logps/rejected": -250.4733123779297, "loss": 0.4657, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.0350096225738525, "rewards/margins": 1.2288711071014404, "rewards/rejected": -2.263880729675293, "step": 970 }, { "epoch": 0.19, "learning_rate": 3.169469598965071e-07, "logits/chosen": -3.0382816791534424, "logits/rejected": -2.9998793601989746, "logps/chosen": -237.20614624023438, "logps/rejected": -326.0390930175781, "loss": 0.5528, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.1391876935958862, "rewards/margins": 1.0271110534667969, "rewards/rejected": -2.1662986278533936, "step": 980 }, { "epoch": 0.19, "learning_rate": 3.201811125485123e-07, "logits/chosen": -3.0846056938171387, "logits/rejected": -3.1346778869628906, "logps/chosen": -307.43695068359375, "logps/rejected": -284.30010986328125, "loss": 0.5158, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.24250833690166473, "rewards/margins": 1.4640954732894897, "rewards/rejected": -1.70660400390625, "step": 990 }, { "epoch": 0.19, "learning_rate": 3.234152652005174e-07, "logits/chosen": -2.766576051712036, "logits/rejected": -2.9066169261932373, "logps/chosen": -189.30099487304688, "logps/rejected": -197.08670043945312, "loss": 0.574, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.1831011772155762, "rewards/margins": 1.3984429836273193, "rewards/rejected": -2.5815436840057373, "step": 1000 }, { "epoch": 0.19, "eval_logits/chosen": -3.072593927383423, "eval_logits/rejected": -3.078813076019287, "eval_logps/chosen": -206.5470733642578, "eval_logps/rejected": -198.46461486816406, "eval_loss": 0.5005720853805542, "eval_rewards/accuracies": 0.7200000286102295, "eval_rewards/chosen": -1.230900764465332, "eval_rewards/margins": 1.4239643812179565, "eval_rewards/rejected": -2.654865026473999, "eval_runtime": 154.2497, "eval_samples_per_second": 20.46, "eval_steps_per_second": 0.324, "step": 1000 }, { "epoch": 0.2, "learning_rate": 3.2664941785252265e-07, "logits/chosen": -3.0678982734680176, "logits/rejected": -3.144016742706299, "logps/chosen": -210.421142578125, "logps/rejected": -202.7620391845703, "loss": 0.4927, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.0432560443878174, "rewards/margins": 0.8585975766181946, "rewards/rejected": -1.901853322982788, "step": 1010 }, { "epoch": 0.2, "learning_rate": 3.298835705045278e-07, "logits/chosen": -2.9162240028381348, "logits/rejected": -2.935126781463623, "logps/chosen": -222.281982421875, "logps/rejected": -233.9991455078125, "loss": 0.4584, "rewards/accuracies": 0.75, "rewards/chosen": -0.5654414296150208, "rewards/margins": 1.6391537189483643, "rewards/rejected": -2.2045950889587402, "step": 1020 }, { "epoch": 0.2, "learning_rate": 3.3311772315653297e-07, "logits/chosen": -3.133784770965576, "logits/rejected": -3.15869402885437, "logps/chosen": -320.0884094238281, "logps/rejected": -216.3438720703125, "loss": 0.5601, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.407951295375824, "rewards/margins": 1.659472107887268, "rewards/rejected": -2.0674235820770264, "step": 1030 }, { "epoch": 0.2, "learning_rate": 3.3635187580853815e-07, "logits/chosen": -3.0063021183013916, "logits/rejected": -2.9411933422088623, "logps/chosen": -222.69235229492188, "logps/rejected": -260.7970275878906, "loss": 0.4548, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.258476197719574, "rewards/margins": 1.0244916677474976, "rewards/rejected": -1.2829679250717163, "step": 1040 }, { "epoch": 0.2, "learning_rate": 3.395860284605433e-07, "logits/chosen": -3.1085867881774902, "logits/rejected": -3.031299114227295, "logps/chosen": -192.4764404296875, "logps/rejected": -182.42041015625, "loss": 0.5538, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -1.026580810546875, "rewards/margins": 0.8679073452949524, "rewards/rejected": -1.894487977027893, "step": 1050 }, { "epoch": 0.21, "learning_rate": 3.428201811125485e-07, "logits/chosen": -3.115882635116577, "logits/rejected": -3.1893692016601562, "logps/chosen": -317.40435791015625, "logps/rejected": -279.87200927734375, "loss": 0.5181, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.6270907521247864, "rewards/margins": 0.9389120936393738, "rewards/rejected": -1.5660028457641602, "step": 1060 }, { "epoch": 0.21, "learning_rate": 3.460543337645537e-07, "logits/chosen": -3.1071622371673584, "logits/rejected": -3.1591129302978516, "logps/chosen": -238.4996337890625, "logps/rejected": -222.28890991210938, "loss": 0.4633, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.7945259809494019, "rewards/margins": 2.2058539390563965, "rewards/rejected": -3.000380039215088, "step": 1070 }, { "epoch": 0.21, "learning_rate": 3.492884864165588e-07, "logits/chosen": -2.865248918533325, "logits/rejected": -2.824042320251465, "logps/chosen": -296.9127197265625, "logps/rejected": -319.47479248046875, "loss": 0.6099, "rewards/accuracies": 0.75, "rewards/chosen": -0.8548253178596497, "rewards/margins": 1.1957032680511475, "rewards/rejected": -2.0505287647247314, "step": 1080 }, { "epoch": 0.21, "learning_rate": 3.52522639068564e-07, "logits/chosen": -3.009572744369507, "logits/rejected": -3.0641026496887207, "logps/chosen": -259.3932189941406, "logps/rejected": -245.3793487548828, "loss": 0.4895, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.5809619426727295, "rewards/margins": 2.1275665760040283, "rewards/rejected": -3.708528518676758, "step": 1090 }, { "epoch": 0.21, "learning_rate": 3.557567917205692e-07, "logits/chosen": -3.025712728500366, "logits/rejected": -2.99841570854187, "logps/chosen": -237.5346221923828, "logps/rejected": -261.982421875, "loss": 0.5162, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.5719404220581055, "rewards/margins": 1.089235544204712, "rewards/rejected": -2.6611759662628174, "step": 1100 }, { "epoch": 0.21, "eval_logits/chosen": -3.022839307785034, "eval_logits/rejected": -3.031045436859131, "eval_logps/chosen": -213.15330505371094, "eval_logps/rejected": -205.95526123046875, "eval_loss": 0.5014147758483887, "eval_rewards/accuracies": 0.7124999761581421, "eval_rewards/chosen": -1.8915215730667114, "eval_rewards/margins": 1.512406349182129, "eval_rewards/rejected": -3.403928279876709, "eval_runtime": 147.5344, "eval_samples_per_second": 21.392, "eval_steps_per_second": 0.339, "step": 1100 }, { "epoch": 0.22, "learning_rate": 3.589909443725744e-07, "logits/chosen": -2.9536046981811523, "logits/rejected": -2.9645209312438965, "logps/chosen": -183.07235717773438, "logps/rejected": -205.9368896484375, "loss": 0.6317, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.184206485748291, "rewards/margins": 2.3128418922424316, "rewards/rejected": -4.497048377990723, "step": 1110 }, { "epoch": 0.22, "learning_rate": 3.6222509702457956e-07, "logits/chosen": -3.090672492980957, "logits/rejected": -3.0842747688293457, "logps/chosen": -314.849609375, "logps/rejected": -263.3332824707031, "loss": 0.4117, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.4791661500930786, "rewards/margins": 1.4494885206222534, "rewards/rejected": -2.928654193878174, "step": 1120 }, { "epoch": 0.22, "learning_rate": 3.654592496765847e-07, "logits/chosen": -2.967991352081299, "logits/rejected": -2.962629795074463, "logps/chosen": -202.8747100830078, "logps/rejected": -180.35372924804688, "loss": 0.4885, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.424861192703247, "rewards/margins": 2.009540557861328, "rewards/rejected": -3.434401750564575, "step": 1130 }, { "epoch": 0.22, "learning_rate": 3.686934023285899e-07, "logits/chosen": -2.9189953804016113, "logits/rejected": -2.9806225299835205, "logps/chosen": -251.71377563476562, "logps/rejected": -224.7860565185547, "loss": 0.5033, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.2403509616851807, "rewards/margins": 2.269951343536377, "rewards/rejected": -3.5103023052215576, "step": 1140 }, { "epoch": 0.22, "learning_rate": 3.719275549805951e-07, "logits/chosen": -2.754948377609253, "logits/rejected": -2.7144274711608887, "logps/chosen": -208.32113647460938, "logps/rejected": -235.2569580078125, "loss": 0.7725, "rewards/accuracies": 0.75, "rewards/chosen": -1.8944647312164307, "rewards/margins": 1.3893457651138306, "rewards/rejected": -3.2838103771209717, "step": 1150 }, { "epoch": 0.23, "learning_rate": 3.7516170763260023e-07, "logits/chosen": -3.0110602378845215, "logits/rejected": -3.023695945739746, "logps/chosen": -219.28292846679688, "logps/rejected": -301.84722900390625, "loss": 0.5097, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.392371416091919, "rewards/margins": 1.1131460666656494, "rewards/rejected": -3.5055174827575684, "step": 1160 }, { "epoch": 0.23, "learning_rate": 3.783958602846054e-07, "logits/chosen": -2.7806992530822754, "logits/rejected": -2.7770588397979736, "logps/chosen": -143.1715850830078, "logps/rejected": -159.3408203125, "loss": 0.4813, "rewards/accuracies": 0.75, "rewards/chosen": -2.19997239112854, "rewards/margins": 2.889619827270508, "rewards/rejected": -5.089591979980469, "step": 1170 }, { "epoch": 0.23, "learning_rate": 3.8163001293661055e-07, "logits/chosen": -3.1141510009765625, "logits/rejected": -3.122486114501953, "logps/chosen": -220.572998046875, "logps/rejected": -235.4295654296875, "loss": 0.4881, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -3.143183469772339, "rewards/margins": 1.379930853843689, "rewards/rejected": -4.5231146812438965, "step": 1180 }, { "epoch": 0.23, "learning_rate": 3.848641655886158e-07, "logits/chosen": -2.950125217437744, "logits/rejected": -2.998990535736084, "logps/chosen": -259.8975830078125, "logps/rejected": -214.5314483642578, "loss": 0.64, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.8857457637786865, "rewards/margins": 2.058138132095337, "rewards/rejected": -3.9438838958740234, "step": 1190 }, { "epoch": 0.23, "learning_rate": 3.8809831824062096e-07, "logits/chosen": -3.035386323928833, "logits/rejected": -3.0723254680633545, "logps/chosen": -212.2264404296875, "logps/rejected": -229.48904418945312, "loss": 0.5772, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.52046537399292, "rewards/margins": 1.2834186553955078, "rewards/rejected": -3.8038837909698486, "step": 1200 }, { "epoch": 0.23, "eval_logits/chosen": -3.0409305095672607, "eval_logits/rejected": -3.0463762283325195, "eval_logps/chosen": -222.2004852294922, "eval_logps/rejected": -216.12571716308594, "eval_loss": 0.49300292134284973, "eval_rewards/accuracies": 0.7149999737739563, "eval_rewards/chosen": -2.796241521835327, "eval_rewards/margins": 1.6247340440750122, "eval_rewards/rejected": -4.420976161956787, "eval_runtime": 139.6999, "eval_samples_per_second": 22.591, "eval_steps_per_second": 0.358, "step": 1200 }, { "epoch": 0.23, "learning_rate": 3.913324708926261e-07, "logits/chosen": -3.11126446723938, "logits/rejected": -3.046506881713867, "logps/chosen": -295.7327575683594, "logps/rejected": -236.0751190185547, "loss": 0.5102, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.2411413192749023, "rewards/margins": 1.0197899341583252, "rewards/rejected": -3.2609314918518066, "step": 1210 }, { "epoch": 0.24, "learning_rate": 3.945666235446313e-07, "logits/chosen": -2.947561740875244, "logits/rejected": -3.013680934906006, "logps/chosen": -287.5589294433594, "logps/rejected": -237.54067993164062, "loss": 0.4763, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.672600507736206, "rewards/margins": 2.017294406890869, "rewards/rejected": -3.689894914627075, "step": 1220 }, { "epoch": 0.24, "learning_rate": 3.978007761966365e-07, "logits/chosen": -3.070868730545044, "logits/rejected": -3.0533483028411865, "logps/chosen": -291.6087951660156, "logps/rejected": -220.195068359375, "loss": 0.4539, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.2438287734985352, "rewards/margins": 1.924587607383728, "rewards/rejected": -3.1684165000915527, "step": 1230 }, { "epoch": 0.24, "learning_rate": 4.0103492884864164e-07, "logits/chosen": -3.076491117477417, "logits/rejected": -3.0687084197998047, "logps/chosen": -254.6431121826172, "logps/rejected": -230.42984008789062, "loss": 0.5613, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.1087321043014526, "rewards/margins": 2.1011736392974854, "rewards/rejected": -3.2099056243896484, "step": 1240 }, { "epoch": 0.24, "learning_rate": 4.042690815006468e-07, "logits/chosen": -2.8690333366394043, "logits/rejected": -2.970384120941162, "logps/chosen": -320.2853088378906, "logps/rejected": -204.7520294189453, "loss": 0.6193, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.2358639240264893, "rewards/margins": 0.32780444622039795, "rewards/rejected": -2.5636682510375977, "step": 1250 }, { "epoch": 0.24, "learning_rate": 4.0750323415265195e-07, "logits/chosen": -2.976606845855713, "logits/rejected": -2.9068028926849365, "logps/chosen": -249.2131805419922, "logps/rejected": -293.21380615234375, "loss": 0.493, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -1.3098329305648804, "rewards/margins": 0.5287295579910278, "rewards/rejected": -1.8385623693466187, "step": 1260 }, { "epoch": 0.25, "learning_rate": 4.1073738680465714e-07, "logits/chosen": -3.063732624053955, "logits/rejected": -3.0288805961608887, "logps/chosen": -262.306640625, "logps/rejected": -191.3130340576172, "loss": 0.5405, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -1.9771369695663452, "rewards/margins": 0.6444782018661499, "rewards/rejected": -2.621614933013916, "step": 1270 }, { "epoch": 0.25, "learning_rate": 4.1397153945666237e-07, "logits/chosen": -3.0463337898254395, "logits/rejected": -3.0803537368774414, "logps/chosen": -298.9761047363281, "logps/rejected": -262.38958740234375, "loss": 0.5537, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.137474775314331, "rewards/margins": 1.1659232378005981, "rewards/rejected": -3.3033981323242188, "step": 1280 }, { "epoch": 0.25, "learning_rate": 4.172056921086675e-07, "logits/chosen": -2.61924409866333, "logits/rejected": -2.7202091217041016, "logps/chosen": -310.4773864746094, "logps/rejected": -193.84881591796875, "loss": 0.5249, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -2.0326943397521973, "rewards/margins": 0.865719199180603, "rewards/rejected": -2.8984134197235107, "step": 1290 }, { "epoch": 0.25, "learning_rate": 4.204398447606727e-07, "logits/chosen": -3.0022222995758057, "logits/rejected": -2.9504170417785645, "logps/chosen": -208.32400512695312, "logps/rejected": -234.04812622070312, "loss": 0.5046, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.1894853115081787, "rewards/margins": 1.4778894186019897, "rewards/rejected": -3.6673743724823, "step": 1300 }, { "epoch": 0.25, "eval_logits/chosen": -2.949617385864258, "eval_logits/rejected": -2.9546737670898438, "eval_logps/chosen": -214.5135498046875, "eval_logps/rejected": -210.51483154296875, "eval_loss": 0.49645331501960754, "eval_rewards/accuracies": 0.7074999809265137, "eval_rewards/chosen": -2.0275466442108154, "eval_rewards/margins": 1.8323402404785156, "eval_rewards/rejected": -3.85988712310791, "eval_runtime": 140.3326, "eval_samples_per_second": 22.489, "eval_steps_per_second": 0.356, "step": 1300 }, { "epoch": 0.25, "learning_rate": 4.236739974126778e-07, "logits/chosen": -2.9534194469451904, "logits/rejected": -3.0005733966827393, "logps/chosen": -203.74639892578125, "logps/rejected": -176.9345703125, "loss": 0.5916, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.8876439929008484, "rewards/margins": 2.431450366973877, "rewards/rejected": -3.31909441947937, "step": 1310 }, { "epoch": 0.26, "learning_rate": 4.2690815006468305e-07, "logits/chosen": -3.058112621307373, "logits/rejected": -3.0043575763702393, "logps/chosen": -282.26806640625, "logps/rejected": -223.65750122070312, "loss": 0.4585, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.8393690586090088, "rewards/margins": 1.286932110786438, "rewards/rejected": -3.1263010501861572, "step": 1320 }, { "epoch": 0.26, "learning_rate": 4.3014230271668823e-07, "logits/chosen": -2.8832192420959473, "logits/rejected": -2.9397988319396973, "logps/chosen": -245.3288116455078, "logps/rejected": -270.9195556640625, "loss": 0.4679, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.5440532565116882, "rewards/margins": 2.73748517036438, "rewards/rejected": -3.281538486480713, "step": 1330 }, { "epoch": 0.26, "learning_rate": 4.3337645536869336e-07, "logits/chosen": -3.019270658493042, "logits/rejected": -2.962163209915161, "logps/chosen": -228.564208984375, "logps/rejected": -220.4651336669922, "loss": 0.3911, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -1.6882941722869873, "rewards/margins": 0.9210535287857056, "rewards/rejected": -2.609347343444824, "step": 1340 }, { "epoch": 0.26, "learning_rate": 4.3661060802069855e-07, "logits/chosen": -2.8609261512756348, "logits/rejected": -2.9113025665283203, "logps/chosen": -264.30047607421875, "logps/rejected": -216.177978515625, "loss": 0.4809, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.8004733324050903, "rewards/margins": 2.5520710945129395, "rewards/rejected": -3.3525443077087402, "step": 1350 }, { "epoch": 0.26, "learning_rate": 4.3984476067270373e-07, "logits/chosen": -2.969022035598755, "logits/rejected": -3.0133910179138184, "logps/chosen": -230.45748901367188, "logps/rejected": -254.520751953125, "loss": 0.618, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.5217835903167725, "rewards/margins": 1.3354570865631104, "rewards/rejected": -2.857240676879883, "step": 1360 }, { "epoch": 0.27, "learning_rate": 4.430789133247089e-07, "logits/chosen": -3.0336718559265137, "logits/rejected": -3.0175960063934326, "logps/chosen": -258.44073486328125, "logps/rejected": -279.9928894042969, "loss": 0.5308, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.4808266758918762, "rewards/margins": 0.8624935150146484, "rewards/rejected": -1.3433201313018799, "step": 1370 }, { "epoch": 0.27, "learning_rate": 4.463130659767141e-07, "logits/chosen": -2.9941906929016113, "logits/rejected": -2.9364662170410156, "logps/chosen": -265.09515380859375, "logps/rejected": -274.08929443359375, "loss": 0.481, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.6047403216362, "rewards/margins": 1.2752878665924072, "rewards/rejected": -1.8800283670425415, "step": 1380 }, { "epoch": 0.27, "learning_rate": 4.495472186287192e-07, "logits/chosen": -2.9048352241516113, "logits/rejected": -2.934239387512207, "logps/chosen": -265.1805114746094, "logps/rejected": -246.13937377929688, "loss": 0.6225, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.3658626079559326, "rewards/margins": 0.4025394320487976, "rewards/rejected": -1.768401861190796, "step": 1390 }, { "epoch": 0.27, "learning_rate": 4.527813712807244e-07, "logits/chosen": -2.8936638832092285, "logits/rejected": -2.8805880546569824, "logps/chosen": -163.13661193847656, "logps/rejected": -157.44528198242188, "loss": 0.4987, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -3.523904800415039, "rewards/margins": 1.9959348440170288, "rewards/rejected": -5.519840240478516, "step": 1400 }, { "epoch": 0.27, "eval_logits/chosen": -2.938807487487793, "eval_logits/rejected": -2.9467782974243164, "eval_logps/chosen": -216.0721893310547, "eval_logps/rejected": -213.0635528564453, "eval_loss": 0.4858356714248657, "eval_rewards/accuracies": 0.7049999833106995, "eval_rewards/chosen": -2.1834118366241455, "eval_rewards/margins": 1.9313461780548096, "eval_rewards/rejected": -4.114758014678955, "eval_runtime": 139.712, "eval_samples_per_second": 22.589, "eval_steps_per_second": 0.358, "step": 1400 }, { "epoch": 0.27, "learning_rate": 4.5601552393272964e-07, "logits/chosen": -2.8405141830444336, "logits/rejected": -2.8250839710235596, "logps/chosen": -249.561767578125, "logps/rejected": -227.09109497070312, "loss": 0.5119, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.8244565725326538, "rewards/margins": 1.4919013977050781, "rewards/rejected": -3.3163580894470215, "step": 1410 }, { "epoch": 0.28, "learning_rate": 4.5924967658473477e-07, "logits/chosen": -2.8767993450164795, "logits/rejected": -2.8210933208465576, "logps/chosen": -200.073486328125, "logps/rejected": -180.85694885253906, "loss": 0.506, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.6324551105499268, "rewards/margins": 0.8869991302490234, "rewards/rejected": -2.51945424079895, "step": 1420 }, { "epoch": 0.28, "learning_rate": 4.6248382923673995e-07, "logits/chosen": -2.8442671298980713, "logits/rejected": -2.865152359008789, "logps/chosen": -187.21571350097656, "logps/rejected": -225.83041381835938, "loss": 0.6236, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -1.5626729726791382, "rewards/margins": 0.7404478192329407, "rewards/rejected": -2.3031206130981445, "step": 1430 }, { "epoch": 0.28, "learning_rate": 4.6571798188874514e-07, "logits/chosen": -2.970651388168335, "logits/rejected": -2.9583518505096436, "logps/chosen": -154.30422973632812, "logps/rejected": -188.7027587890625, "loss": 0.5871, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -2.115335702896118, "rewards/margins": 1.2574704885482788, "rewards/rejected": -3.3728058338165283, "step": 1440 }, { "epoch": 0.28, "learning_rate": 4.6895213454075027e-07, "logits/chosen": -2.991255521774292, "logits/rejected": -3.045728921890259, "logps/chosen": -324.1858215332031, "logps/rejected": -251.70266723632812, "loss": 0.3887, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.4335097372531891, "rewards/margins": 1.7088518142700195, "rewards/rejected": -2.1423614025115967, "step": 1450 }, { "epoch": 0.28, "learning_rate": 4.721862871927555e-07, "logits/chosen": -2.7233452796936035, "logits/rejected": -2.8554649353027344, "logps/chosen": -361.3069152832031, "logps/rejected": -300.8002014160156, "loss": 0.5711, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.6226775646209717, "rewards/margins": 2.504540205001831, "rewards/rejected": -4.127217769622803, "step": 1460 }, { "epoch": 0.29, "learning_rate": 4.7542043984476063e-07, "logits/chosen": -2.905463695526123, "logits/rejected": -2.9778549671173096, "logps/chosen": -302.7702941894531, "logps/rejected": -264.3681945800781, "loss": 0.6624, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -3.3320813179016113, "rewards/margins": 0.3840333819389343, "rewards/rejected": -3.7161145210266113, "step": 1470 }, { "epoch": 0.29, "learning_rate": 4.786545924967659e-07, "logits/chosen": -2.9825057983398438, "logits/rejected": -2.9804251194000244, "logps/chosen": -223.176025390625, "logps/rejected": -210.6410369873047, "loss": 0.4368, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.547914981842041, "rewards/margins": 1.49990975856781, "rewards/rejected": -4.047824859619141, "step": 1480 }, { "epoch": 0.29, "learning_rate": 4.81888745148771e-07, "logits/chosen": -2.8775105476379395, "logits/rejected": -2.9274189472198486, "logps/chosen": -187.3961944580078, "logps/rejected": -218.3221893310547, "loss": 0.4395, "rewards/accuracies": 0.75, "rewards/chosen": -1.8952897787094116, "rewards/margins": 1.8903576135635376, "rewards/rejected": -3.78564715385437, "step": 1490 }, { "epoch": 0.29, "learning_rate": 4.851228978007762e-07, "logits/chosen": -2.7787222862243652, "logits/rejected": -2.817753314971924, "logps/chosen": -257.91339111328125, "logps/rejected": -225.732666015625, "loss": 0.4808, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.018573522567749, "rewards/margins": 2.3152785301208496, "rewards/rejected": -3.3338520526885986, "step": 1500 }, { "epoch": 0.29, "eval_logits/chosen": -2.825873374938965, "eval_logits/rejected": -2.8318793773651123, "eval_logps/chosen": -216.43861389160156, "eval_logps/rejected": -213.95115661621094, "eval_loss": 0.4956172704696655, "eval_rewards/accuracies": 0.7225000262260437, "eval_rewards/chosen": -2.2200560569763184, "eval_rewards/margins": 1.983464002609253, "eval_rewards/rejected": -4.20352029800415, "eval_runtime": 139.7185, "eval_samples_per_second": 22.588, "eval_steps_per_second": 0.358, "step": 1500 }, { "epoch": 0.29, "learning_rate": 4.883570504527814e-07, "logits/chosen": -2.8890321254730225, "logits/rejected": -2.9338107109069824, "logps/chosen": -236.35006713867188, "logps/rejected": -220.2532196044922, "loss": 0.6088, "rewards/accuracies": 0.75, "rewards/chosen": -2.2814784049987793, "rewards/margins": 2.0463881492614746, "rewards/rejected": -4.327866554260254, "step": 1510 }, { "epoch": 0.3, "learning_rate": 4.915912031047865e-07, "logits/chosen": -2.895902156829834, "logits/rejected": -2.918703556060791, "logps/chosen": -220.9754638671875, "logps/rejected": -253.6642608642578, "loss": 0.4491, "rewards/accuracies": 0.75, "rewards/chosen": -1.6117613315582275, "rewards/margins": 2.7955708503723145, "rewards/rejected": -4.407332420349121, "step": 1520 }, { "epoch": 0.3, "learning_rate": 4.948253557567917e-07, "logits/chosen": -2.901484727859497, "logits/rejected": -2.8929615020751953, "logps/chosen": -219.70401000976562, "logps/rejected": -210.36886596679688, "loss": 0.6195, "rewards/accuracies": 0.75, "rewards/chosen": -1.6066604852676392, "rewards/margins": 1.9129937887191772, "rewards/rejected": -3.5196540355682373, "step": 1530 }, { "epoch": 0.3, "learning_rate": 4.980595084087969e-07, "logits/chosen": -2.829221248626709, "logits/rejected": -2.8153421878814697, "logps/chosen": -197.49081420898438, "logps/rejected": -162.01693725585938, "loss": 0.5914, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.097716808319092, "rewards/margins": 1.5696308612823486, "rewards/rejected": -3.6673481464385986, "step": 1540 }, { "epoch": 0.3, "learning_rate": 4.998561875314589e-07, "logits/chosen": -2.9436748027801514, "logits/rejected": -3.01617169380188, "logps/chosen": -219.0644989013672, "logps/rejected": -252.2754669189453, "loss": 0.4484, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.1532790660858154, "rewards/margins": 3.3547072410583496, "rewards/rejected": -5.507986545562744, "step": 1550 }, { "epoch": 0.3, "learning_rate": 4.994966563601064e-07, "logits/chosen": -2.804248809814453, "logits/rejected": -2.8312249183654785, "logps/chosen": -217.8917999267578, "logps/rejected": -174.00515747070312, "loss": 0.4917, "rewards/accuracies": 0.75, "rewards/chosen": -2.792263984680176, "rewards/margins": 2.13944935798645, "rewards/rejected": -4.931713104248047, "step": 1560 }, { "epoch": 0.3, "learning_rate": 4.991371251887539e-07, "logits/chosen": -2.9192967414855957, "logits/rejected": -2.874433755874634, "logps/chosen": -138.56375122070312, "logps/rejected": -183.21981811523438, "loss": 0.5351, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -3.108397960662842, "rewards/margins": 1.698441505432129, "rewards/rejected": -4.806839466094971, "step": 1570 }, { "epoch": 0.31, "learning_rate": 4.987775940174013e-07, "logits/chosen": -2.9292962551116943, "logits/rejected": -2.916050434112549, "logps/chosen": -225.0854949951172, "logps/rejected": -209.73532104492188, "loss": 0.6209, "rewards/accuracies": 0.75, "rewards/chosen": -2.45169734954834, "rewards/margins": 1.9957078695297241, "rewards/rejected": -4.4474053382873535, "step": 1580 }, { "epoch": 0.31, "learning_rate": 4.984180628460487e-07, "logits/chosen": -2.7843105792999268, "logits/rejected": -2.7825684547424316, "logps/chosen": -339.08001708984375, "logps/rejected": -276.44769287109375, "loss": 0.7123, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.71315860748291, "rewards/margins": 1.8002784252166748, "rewards/rejected": -4.513437271118164, "step": 1590 }, { "epoch": 0.31, "learning_rate": 4.980585316746962e-07, "logits/chosen": -2.683852195739746, "logits/rejected": -2.6745524406433105, "logps/chosen": -230.86965942382812, "logps/rejected": -236.5127410888672, "loss": 0.5445, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.4864879846572876, "rewards/margins": 1.105987548828125, "rewards/rejected": -2.592475414276123, "step": 1600 }, { "epoch": 0.31, "eval_logits/chosen": -2.8415186405181885, "eval_logits/rejected": -2.842726230621338, "eval_logps/chosen": -221.4375762939453, "eval_logps/rejected": -218.34251403808594, "eval_loss": 0.49167120456695557, "eval_rewards/accuracies": 0.7149999737739563, "eval_rewards/chosen": -2.7199511528015137, "eval_rewards/margins": 1.9227066040039062, "eval_rewards/rejected": -4.64265775680542, "eval_runtime": 139.2638, "eval_samples_per_second": 22.662, "eval_steps_per_second": 0.359, "step": 1600 }, { "epoch": 0.31, "learning_rate": 4.976990005033436e-07, "logits/chosen": -2.9191858768463135, "logits/rejected": -2.845217227935791, "logps/chosen": -305.9738464355469, "logps/rejected": -309.0461730957031, "loss": 0.55, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.8031907081604004, "rewards/margins": 2.6041455268859863, "rewards/rejected": -5.407336235046387, "step": 1610 }, { "epoch": 0.31, "learning_rate": 4.973394693319911e-07, "logits/chosen": -3.0033366680145264, "logits/rejected": -2.983060359954834, "logps/chosen": -212.1135711669922, "logps/rejected": -198.12948608398438, "loss": 0.6844, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.3254258632659912, "rewards/margins": 1.4193804264068604, "rewards/rejected": -2.7448062896728516, "step": 1620 }, { "epoch": 0.32, "learning_rate": 4.969799381606385e-07, "logits/chosen": -2.794769287109375, "logits/rejected": -2.6991593837738037, "logps/chosen": -214.34506225585938, "logps/rejected": -204.58248901367188, "loss": 0.4522, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.0607855319976807, "rewards/margins": 0.582096517086029, "rewards/rejected": -2.6428821086883545, "step": 1630 }, { "epoch": 0.32, "learning_rate": 4.966204069892859e-07, "logits/chosen": -2.9268391132354736, "logits/rejected": -2.9547667503356934, "logps/chosen": -297.62213134765625, "logps/rejected": -330.30694580078125, "loss": 0.5049, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.506831645965576, "rewards/margins": 3.2207348346710205, "rewards/rejected": -5.727566719055176, "step": 1640 }, { "epoch": 0.32, "learning_rate": 4.962608758179334e-07, "logits/chosen": -2.937668800354004, "logits/rejected": -2.9284844398498535, "logps/chosen": -274.85357666015625, "logps/rejected": -247.8715057373047, "loss": 0.5662, "rewards/accuracies": 0.75, "rewards/chosen": -0.9010640978813171, "rewards/margins": 1.7168909311294556, "rewards/rejected": -2.617955446243286, "step": 1650 }, { "epoch": 0.32, "learning_rate": 4.959013446465808e-07, "logits/chosen": -2.8963003158569336, "logits/rejected": -2.8931543827056885, "logps/chosen": -225.38064575195312, "logps/rejected": -260.68023681640625, "loss": 0.776, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.7325546741485596, "rewards/margins": 1.7724930047988892, "rewards/rejected": -3.505047559738159, "step": 1660 }, { "epoch": 0.32, "learning_rate": 4.955418134752283e-07, "logits/chosen": -2.7276527881622314, "logits/rejected": -2.7366204261779785, "logps/chosen": -215.1775360107422, "logps/rejected": -203.43844604492188, "loss": 0.5383, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.350088357925415, "rewards/margins": 2.6437973976135254, "rewards/rejected": -3.9938862323760986, "step": 1670 }, { "epoch": 0.33, "learning_rate": 4.951822823038758e-07, "logits/chosen": -2.9176158905029297, "logits/rejected": -2.9260013103485107, "logps/chosen": -251.2834930419922, "logps/rejected": -241.283203125, "loss": 0.5999, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.1413800716400146, "rewards/margins": 0.9033109545707703, "rewards/rejected": -3.044691324234009, "step": 1680 }, { "epoch": 0.33, "learning_rate": 4.948227511325231e-07, "logits/chosen": -2.9102745056152344, "logits/rejected": -2.992983341217041, "logps/chosen": -263.1497802734375, "logps/rejected": -233.3609619140625, "loss": 0.7618, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -1.4968585968017578, "rewards/margins": 1.3661136627197266, "rewards/rejected": -2.8629722595214844, "step": 1690 }, { "epoch": 0.33, "learning_rate": 4.944632199611706e-07, "logits/chosen": -2.932598829269409, "logits/rejected": -2.862515687942505, "logps/chosen": -226.80776977539062, "logps/rejected": -212.4121856689453, "loss": 0.5903, "rewards/accuracies": 0.75, "rewards/chosen": -0.6572970151901245, "rewards/margins": 1.0310018062591553, "rewards/rejected": -1.6882988214492798, "step": 1700 }, { "epoch": 0.33, "eval_logits/chosen": -2.920361042022705, "eval_logits/rejected": -2.9215283393859863, "eval_logps/chosen": -218.91456604003906, "eval_logps/rejected": -217.71157836914062, "eval_loss": 0.5077618956565857, "eval_rewards/accuracies": 0.6850000023841858, "eval_rewards/chosen": -2.4676513671875, "eval_rewards/margins": 2.111912965774536, "eval_rewards/rejected": -4.579564571380615, "eval_runtime": 140.9157, "eval_samples_per_second": 22.396, "eval_steps_per_second": 0.355, "step": 1700 }, { "epoch": 0.33, "learning_rate": 4.941036887898181e-07, "logits/chosen": -2.853813648223877, "logits/rejected": -2.8657195568084717, "logps/chosen": -181.84080505371094, "logps/rejected": -254.567626953125, "loss": 0.5093, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.878755807876587, "rewards/margins": 3.014904499053955, "rewards/rejected": -4.893660545349121, "step": 1710 }, { "epoch": 0.33, "learning_rate": 4.937441576184655e-07, "logits/chosen": -3.083876132965088, "logits/rejected": -3.0712332725524902, "logps/chosen": -339.6617126464844, "logps/rejected": -338.98876953125, "loss": 0.6498, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.8318314552307129, "rewards/margins": 1.4427533149719238, "rewards/rejected": -2.274585008621216, "step": 1720 }, { "epoch": 0.34, "learning_rate": 4.933846264471129e-07, "logits/chosen": -2.831230640411377, "logits/rejected": -2.7701363563537598, "logps/chosen": -202.59078979492188, "logps/rejected": -283.06396484375, "loss": 0.5234, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -1.7622671127319336, "rewards/margins": 0.09787784516811371, "rewards/rejected": -1.8601449728012085, "step": 1730 }, { "epoch": 0.34, "learning_rate": 4.930250952757603e-07, "logits/chosen": -2.901546001434326, "logits/rejected": -2.9206769466400146, "logps/chosen": -212.34158325195312, "logps/rejected": -215.5955352783203, "loss": 0.5993, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.1232070922851562, "rewards/margins": 2.2180099487304688, "rewards/rejected": -4.341217517852783, "step": 1740 }, { "epoch": 0.34, "learning_rate": 4.926655641044078e-07, "logits/chosen": -2.845414400100708, "logits/rejected": -2.9428486824035645, "logps/chosen": -191.5043487548828, "logps/rejected": -227.24710083007812, "loss": 0.6429, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.3682129383087158, "rewards/margins": 1.3480457067489624, "rewards/rejected": -2.7162585258483887, "step": 1750 }, { "epoch": 0.34, "learning_rate": 4.923060329330553e-07, "logits/chosen": -2.5704612731933594, "logits/rejected": -2.6995291709899902, "logps/chosen": -338.72064208984375, "logps/rejected": -299.72930908203125, "loss": 0.496, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.260344982147217, "rewards/margins": 1.2866657972335815, "rewards/rejected": -3.547010898590088, "step": 1760 }, { "epoch": 0.34, "learning_rate": 4.919465017617028e-07, "logits/chosen": -2.8508810997009277, "logits/rejected": -2.7702724933624268, "logps/chosen": -280.30621337890625, "logps/rejected": -322.1275329589844, "loss": 0.4883, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.7750566005706787, "rewards/margins": 1.663395643234253, "rewards/rejected": -3.4384522438049316, "step": 1770 }, { "epoch": 0.35, "learning_rate": 4.915869705903501e-07, "logits/chosen": -2.8308699131011963, "logits/rejected": -2.8825507164001465, "logps/chosen": -170.27957153320312, "logps/rejected": -243.4982147216797, "loss": 0.5161, "rewards/accuracies": 0.75, "rewards/chosen": -3.5050110816955566, "rewards/margins": 3.1635780334472656, "rewards/rejected": -6.668588161468506, "step": 1780 }, { "epoch": 0.35, "learning_rate": 4.912274394189976e-07, "logits/chosen": -2.798985719680786, "logits/rejected": -2.7850592136383057, "logps/chosen": -193.65354919433594, "logps/rejected": -254.942626953125, "loss": 0.4444, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.9649609327316284, "rewards/margins": 1.072882056236267, "rewards/rejected": -2.0378429889678955, "step": 1790 }, { "epoch": 0.35, "learning_rate": 4.908679082476451e-07, "logits/chosen": -2.748511552810669, "logits/rejected": -2.6923654079437256, "logps/chosen": -224.71987915039062, "logps/rejected": -220.9110565185547, "loss": 0.4285, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.0711779594421387, "rewards/margins": 2.0031611919403076, "rewards/rejected": -4.074339389801025, "step": 1800 }, { "epoch": 0.35, "eval_logits/chosen": -2.7308030128479004, "eval_logits/rejected": -2.7299492359161377, "eval_logps/chosen": -222.18240356445312, "eval_logps/rejected": -223.67172241210938, "eval_loss": 0.4976809620857239, "eval_rewards/accuracies": 0.6825000047683716, "eval_rewards/chosen": -2.794431209564209, "eval_rewards/margins": 2.381145715713501, "eval_rewards/rejected": -5.175577163696289, "eval_runtime": 140.2956, "eval_samples_per_second": 22.495, "eval_steps_per_second": 0.356, "step": 1800 }, { "epoch": 0.35, "learning_rate": 4.905083770762925e-07, "logits/chosen": -2.883899450302124, "logits/rejected": -2.8011999130249023, "logps/chosen": -220.16506958007812, "logps/rejected": -258.29681396484375, "loss": 0.5381, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.469589948654175, "rewards/margins": 1.1768501996994019, "rewards/rejected": -3.646440029144287, "step": 1810 }, { "epoch": 0.35, "learning_rate": 4.9014884590494e-07, "logits/chosen": -2.8269143104553223, "logits/rejected": -2.83699107170105, "logps/chosen": -187.8958282470703, "logps/rejected": -156.596923828125, "loss": 0.5313, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.9329996109008789, "rewards/margins": 3.001521110534668, "rewards/rejected": -3.934520721435547, "step": 1820 }, { "epoch": 0.36, "learning_rate": 4.897893147335873e-07, "logits/chosen": -2.6217234134674072, "logits/rejected": -2.7124900817871094, "logps/chosen": -193.1573486328125, "logps/rejected": -256.3013916015625, "loss": 0.4575, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -4.2268829345703125, "rewards/margins": 3.0330677032470703, "rewards/rejected": -7.259950160980225, "step": 1830 }, { "epoch": 0.36, "learning_rate": 4.894297835622348e-07, "logits/chosen": -2.751868724822998, "logits/rejected": -2.791961908340454, "logps/chosen": -191.41848754882812, "logps/rejected": -178.33953857421875, "loss": 0.6224, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.943002223968506, "rewards/margins": 2.345608949661255, "rewards/rejected": -5.288610935211182, "step": 1840 }, { "epoch": 0.36, "learning_rate": 4.890702523908823e-07, "logits/chosen": -2.8733506202697754, "logits/rejected": -2.8486733436584473, "logps/chosen": -300.8080139160156, "logps/rejected": -216.562744140625, "loss": 0.38, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.5406880378723145, "rewards/margins": 3.0873942375183105, "rewards/rejected": -5.628082275390625, "step": 1850 }, { "epoch": 0.36, "learning_rate": 4.887107212195298e-07, "logits/chosen": -2.660714864730835, "logits/rejected": -2.7268805503845215, "logps/chosen": -192.49978637695312, "logps/rejected": -242.5849151611328, "loss": 0.5311, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.8447067737579346, "rewards/margins": 1.0581676959991455, "rewards/rejected": -2.90287446975708, "step": 1860 }, { "epoch": 0.36, "learning_rate": 4.883511900481771e-07, "logits/chosen": -2.889845132827759, "logits/rejected": -2.840237855911255, "logps/chosen": -275.62872314453125, "logps/rejected": -264.7300109863281, "loss": 0.413, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.390221357345581, "rewards/margins": 1.194690465927124, "rewards/rejected": -2.584911823272705, "step": 1870 }, { "epoch": 0.36, "learning_rate": 4.879916588768246e-07, "logits/chosen": -2.8796274662017822, "logits/rejected": -2.825623035430908, "logps/chosen": -430.91375732421875, "logps/rejected": -309.5884094238281, "loss": 0.5651, "rewards/accuracies": 0.75, "rewards/chosen": -2.307588577270508, "rewards/margins": 1.254784345626831, "rewards/rejected": -3.5623726844787598, "step": 1880 }, { "epoch": 0.37, "learning_rate": 4.87632127705472e-07, "logits/chosen": -2.8152003288269043, "logits/rejected": -2.853797435760498, "logps/chosen": -211.9172821044922, "logps/rejected": -221.3629913330078, "loss": 0.4878, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -4.062591552734375, "rewards/margins": 1.4559752941131592, "rewards/rejected": -5.518566131591797, "step": 1890 }, { "epoch": 0.37, "learning_rate": 4.872725965341195e-07, "logits/chosen": -2.8143889904022217, "logits/rejected": -2.8226265907287598, "logps/chosen": -171.64285278320312, "logps/rejected": -147.26547241210938, "loss": 0.5443, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -3.339956760406494, "rewards/margins": 2.346651554107666, "rewards/rejected": -5.68660831451416, "step": 1900 }, { "epoch": 0.37, "eval_logits/chosen": -2.8397412300109863, "eval_logits/rejected": -2.8391599655151367, "eval_logps/chosen": -225.46945190429688, "eval_logps/rejected": -227.2286376953125, "eval_loss": 0.48742884397506714, "eval_rewards/accuracies": 0.6949999928474426, "eval_rewards/chosen": -3.1231377124786377, "eval_rewards/margins": 2.408130168914795, "eval_rewards/rejected": -5.5312676429748535, "eval_runtime": 153.399, "eval_samples_per_second": 20.574, "eval_steps_per_second": 0.326, "step": 1900 }, { "epoch": 0.37, "learning_rate": 4.86913065362767e-07, "logits/chosen": -2.885929822921753, "logits/rejected": -2.8270092010498047, "logps/chosen": -309.38775634765625, "logps/rejected": -254.2549591064453, "loss": 0.6235, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -2.1622366905212402, "rewards/margins": 0.6124019622802734, "rewards/rejected": -2.7746386528015137, "step": 1910 }, { "epoch": 0.37, "learning_rate": 4.865535341914143e-07, "logits/chosen": -2.769245147705078, "logits/rejected": -2.821042537689209, "logps/chosen": -438.802001953125, "logps/rejected": -267.00140380859375, "loss": 0.4921, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.7255628108978271, "rewards/margins": 3.751121997833252, "rewards/rejected": -5.4766845703125, "step": 1920 }, { "epoch": 0.37, "learning_rate": 4.861940030200618e-07, "logits/chosen": -2.9151031970977783, "logits/rejected": -2.804572105407715, "logps/chosen": -251.7982635498047, "logps/rejected": -223.19631958007812, "loss": 0.4968, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.3003602027893066, "rewards/margins": 2.039172410964966, "rewards/rejected": -3.3395328521728516, "step": 1930 }, { "epoch": 0.38, "learning_rate": 4.858344718487092e-07, "logits/chosen": -2.8644089698791504, "logits/rejected": -2.88712739944458, "logps/chosen": -262.6106262207031, "logps/rejected": -238.1800994873047, "loss": 0.53, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.4858415126800537, "rewards/margins": 2.418762445449829, "rewards/rejected": -3.90460467338562, "step": 1940 }, { "epoch": 0.38, "learning_rate": 4.854749406773567e-07, "logits/chosen": -2.8518614768981934, "logits/rejected": -2.8204755783081055, "logps/chosen": -239.5098419189453, "logps/rejected": -214.1254119873047, "loss": 0.4628, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.1469717025756836, "rewards/margins": 1.0126450061798096, "rewards/rejected": -2.1596169471740723, "step": 1950 }, { "epoch": 0.38, "learning_rate": 4.851154095060042e-07, "logits/chosen": -2.7540643215179443, "logits/rejected": -2.7091991901397705, "logps/chosen": -176.52734375, "logps/rejected": -208.6387176513672, "loss": 0.4863, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.2932955026626587, "rewards/margins": 1.7724775075912476, "rewards/rejected": -3.0657732486724854, "step": 1960 }, { "epoch": 0.38, "learning_rate": 4.847558783346516e-07, "logits/chosen": -2.6126275062561035, "logits/rejected": -2.620729446411133, "logps/chosen": -273.9920654296875, "logps/rejected": -332.6848449707031, "loss": 0.5044, "rewards/accuracies": 0.75, "rewards/chosen": -2.4300267696380615, "rewards/margins": 0.959136962890625, "rewards/rejected": -3.3891634941101074, "step": 1970 }, { "epoch": 0.38, "learning_rate": 4.84396347163299e-07, "logits/chosen": -2.8108973503112793, "logits/rejected": -2.713869333267212, "logps/chosen": -255.5907440185547, "logps/rejected": -264.1590881347656, "loss": 0.55, "rewards/accuracies": 0.75, "rewards/chosen": -1.9435021877288818, "rewards/margins": 1.8012924194335938, "rewards/rejected": -3.7447943687438965, "step": 1980 }, { "epoch": 0.39, "learning_rate": 4.840368159919465e-07, "logits/chosen": -2.767923355102539, "logits/rejected": -2.7916712760925293, "logps/chosen": -229.8169403076172, "logps/rejected": -198.69619750976562, "loss": 0.4926, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.690882921218872, "rewards/margins": 1.0796245336532593, "rewards/rejected": -3.7705070972442627, "step": 1990 }, { "epoch": 0.39, "learning_rate": 4.83677284820594e-07, "logits/chosen": -2.8908398151397705, "logits/rejected": -2.837955951690674, "logps/chosen": -252.1120147705078, "logps/rejected": -246.74606323242188, "loss": 0.4776, "rewards/accuracies": 0.75, "rewards/chosen": -2.1896395683288574, "rewards/margins": 1.070448398590088, "rewards/rejected": -3.2600879669189453, "step": 2000 }, { "epoch": 0.39, "eval_logits/chosen": -2.667562484741211, "eval_logits/rejected": -2.6599512100219727, "eval_logps/chosen": -228.1953125, "eval_logps/rejected": -230.640625, "eval_loss": 0.4850805401802063, "eval_rewards/accuracies": 0.699999988079071, "eval_rewards/chosen": -3.395721435546875, "eval_rewards/margins": 2.476745843887329, "eval_rewards/rejected": -5.872467517852783, "eval_runtime": 140.6915, "eval_samples_per_second": 22.432, "eval_steps_per_second": 0.355, "step": 2000 }, { "epoch": 0.39, "learning_rate": 4.833177536492413e-07, "logits/chosen": -2.7589898109436035, "logits/rejected": -2.8150837421417236, "logps/chosen": -219.28005981445312, "logps/rejected": -301.12664794921875, "loss": 0.5235, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.9610111713409424, "rewards/margins": 2.9139275550842285, "rewards/rejected": -5.87493896484375, "step": 2010 }, { "epoch": 0.39, "learning_rate": 4.829582224778888e-07, "logits/chosen": -2.8580589294433594, "logits/rejected": -2.7701237201690674, "logps/chosen": -185.91903686523438, "logps/rejected": -203.38905334472656, "loss": 0.459, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -5.219147682189941, "rewards/margins": 1.6592937707901, "rewards/rejected": -6.878440856933594, "step": 2020 }, { "epoch": 0.39, "learning_rate": 4.825986913065362e-07, "logits/chosen": -2.6564595699310303, "logits/rejected": -2.6738805770874023, "logps/chosen": -240.1299591064453, "logps/rejected": -339.5009765625, "loss": 0.4661, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.4794435501098633, "rewards/margins": 2.6146645545959473, "rewards/rejected": -5.094107627868652, "step": 2030 }, { "epoch": 0.4, "learning_rate": 4.822391601351837e-07, "logits/chosen": -2.725947141647339, "logits/rejected": -2.588447093963623, "logps/chosen": -211.72341918945312, "logps/rejected": -216.319580078125, "loss": 0.4058, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.6720824241638184, "rewards/margins": 2.822852611541748, "rewards/rejected": -5.494935035705566, "step": 2040 }, { "epoch": 0.4, "learning_rate": 4.818796289638312e-07, "logits/chosen": -2.711277961730957, "logits/rejected": -2.6957826614379883, "logps/chosen": -162.187744140625, "logps/rejected": -182.9951171875, "loss": 0.5772, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -5.238260746002197, "rewards/margins": 1.753045678138733, "rewards/rejected": -6.991306304931641, "step": 2050 }, { "epoch": 0.4, "learning_rate": 4.815200977924786e-07, "logits/chosen": -2.830294609069824, "logits/rejected": -2.7810616493225098, "logps/chosen": -225.2228546142578, "logps/rejected": -205.3040313720703, "loss": 0.5436, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -3.932889461517334, "rewards/margins": 1.7703787088394165, "rewards/rejected": -5.703269004821777, "step": 2060 }, { "epoch": 0.4, "learning_rate": 4.81160566621126e-07, "logits/chosen": -2.7850966453552246, "logits/rejected": -2.7620739936828613, "logps/chosen": -315.70684814453125, "logps/rejected": -245.2522430419922, "loss": 0.5516, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -4.3822126388549805, "rewards/margins": 0.6999850273132324, "rewards/rejected": -5.082197666168213, "step": 2070 }, { "epoch": 0.4, "learning_rate": 4.808010354497735e-07, "logits/chosen": -2.870511770248413, "logits/rejected": -2.805070161819458, "logps/chosen": -252.13681030273438, "logps/rejected": -197.99697875976562, "loss": 0.5594, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -3.5869674682617188, "rewards/margins": 2.221545457839966, "rewards/rejected": -5.808512210845947, "step": 2080 }, { "epoch": 0.41, "learning_rate": 4.804415042784209e-07, "logits/chosen": -2.6756701469421387, "logits/rejected": -2.7388083934783936, "logps/chosen": -336.222900390625, "logps/rejected": -217.15963745117188, "loss": 0.4252, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -2.3528850078582764, "rewards/margins": 1.6979725360870361, "rewards/rejected": -4.050858497619629, "step": 2090 }, { "epoch": 0.41, "learning_rate": 4.800819731070684e-07, "logits/chosen": -2.8770880699157715, "logits/rejected": -2.9065544605255127, "logps/chosen": -259.0509033203125, "logps/rejected": -251.63516235351562, "loss": 0.5387, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.9471192359924316, "rewards/margins": 2.0885226726531982, "rewards/rejected": -5.035641670227051, "step": 2100 }, { "epoch": 0.41, "eval_logits/chosen": -2.8139474391937256, "eval_logits/rejected": -2.803814172744751, "eval_logps/chosen": -232.3311004638672, "eval_logps/rejected": -233.79930114746094, "eval_loss": 0.5210939645767212, "eval_rewards/accuracies": 0.7200000286102295, "eval_rewards/chosen": -3.8093035221099854, "eval_rewards/margins": 2.3790290355682373, "eval_rewards/rejected": -6.1883320808410645, "eval_runtime": 140.4932, "eval_samples_per_second": 22.464, "eval_steps_per_second": 0.356, "step": 2100 }, { "epoch": 0.41, "learning_rate": 4.797224419357158e-07, "logits/chosen": -2.919070243835449, "logits/rejected": -2.864745616912842, "logps/chosen": -266.123291015625, "logps/rejected": -245.8166046142578, "loss": 0.5069, "rewards/accuracies": 0.75, "rewards/chosen": -2.7676305770874023, "rewards/margins": 2.387216567993164, "rewards/rejected": -5.154847145080566, "step": 2110 }, { "epoch": 0.41, "learning_rate": 4.793629107643632e-07, "logits/chosen": -2.8027493953704834, "logits/rejected": -2.8309144973754883, "logps/chosen": -130.6552734375, "logps/rejected": -140.10147094726562, "loss": 0.4968, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -3.0654122829437256, "rewards/margins": 1.6262986660003662, "rewards/rejected": -4.69171142578125, "step": 2120 }, { "epoch": 0.41, "learning_rate": 4.790033795930107e-07, "logits/chosen": -2.765474319458008, "logits/rejected": -2.8888039588928223, "logps/chosen": -235.1878204345703, "logps/rejected": -321.06414794921875, "loss": 0.4361, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.94917893409729, "rewards/margins": 3.3031139373779297, "rewards/rejected": -6.252293109893799, "step": 2130 }, { "epoch": 0.42, "learning_rate": 4.786438484216581e-07, "logits/chosen": -2.7906861305236816, "logits/rejected": -2.766310930252075, "logps/chosen": -255.572509765625, "logps/rejected": -258.65008544921875, "loss": 0.7067, "rewards/accuracies": 0.75, "rewards/chosen": -2.1167337894439697, "rewards/margins": 1.4146376848220825, "rewards/rejected": -3.5313713550567627, "step": 2140 }, { "epoch": 0.42, "learning_rate": 4.782843172503055e-07, "logits/chosen": -2.9073071479797363, "logits/rejected": -2.876718521118164, "logps/chosen": -236.02719116210938, "logps/rejected": -187.4551544189453, "loss": 0.6016, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -3.2433650493621826, "rewards/margins": 0.7124207615852356, "rewards/rejected": -3.9557862281799316, "step": 2150 }, { "epoch": 0.42, "learning_rate": 4.77924786078953e-07, "logits/chosen": -2.8217930793762207, "logits/rejected": -2.8676421642303467, "logps/chosen": -148.67970275878906, "logps/rejected": -188.03147888183594, "loss": 0.5687, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.420441150665283, "rewards/margins": 2.5906240940093994, "rewards/rejected": -5.0110650062561035, "step": 2160 }, { "epoch": 0.42, "learning_rate": 4.775652549076005e-07, "logits/chosen": -2.799152374267578, "logits/rejected": -2.774202585220337, "logps/chosen": -283.7826232910156, "logps/rejected": -225.2607879638672, "loss": 0.6151, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.1398122310638428, "rewards/margins": 0.8396533131599426, "rewards/rejected": -2.9794657230377197, "step": 2170 }, { "epoch": 0.42, "learning_rate": 4.772057237362479e-07, "logits/chosen": -2.9737019538879395, "logits/rejected": -3.0729072093963623, "logps/chosen": -221.1050262451172, "logps/rejected": -283.1679992675781, "loss": 0.5477, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -3.210036039352417, "rewards/margins": 2.726353883743286, "rewards/rejected": -5.936389923095703, "step": 2180 }, { "epoch": 0.43, "learning_rate": 4.768461925648954e-07, "logits/chosen": -2.972775936126709, "logits/rejected": -2.9946017265319824, "logps/chosen": -260.2768859863281, "logps/rejected": -259.5898742675781, "loss": 0.4473, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.1959757804870605, "rewards/margins": 2.791408061981201, "rewards/rejected": -4.987383842468262, "step": 2190 }, { "epoch": 0.43, "learning_rate": 4.7648666139354285e-07, "logits/chosen": -2.7998204231262207, "logits/rejected": -2.903319835662842, "logps/chosen": -275.22467041015625, "logps/rejected": -235.7041473388672, "loss": 0.5673, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.17318058013916, "rewards/margins": 2.1841416358947754, "rewards/rejected": -4.357321739196777, "step": 2200 }, { "epoch": 0.43, "eval_logits/chosen": -2.813796281814575, "eval_logits/rejected": -2.803727149963379, "eval_logps/chosen": -230.1213836669922, "eval_logps/rejected": -231.0912322998047, "eval_loss": 0.502297580242157, "eval_rewards/accuracies": 0.7149999737739563, "eval_rewards/chosen": -3.5883309841156006, "eval_rewards/margins": 2.3291962146759033, "eval_rewards/rejected": -5.91752815246582, "eval_runtime": 140.7946, "eval_samples_per_second": 22.416, "eval_steps_per_second": 0.355, "step": 2200 }, { "epoch": 0.43, "learning_rate": 4.761271302221903e-07, "logits/chosen": -2.8988993167877197, "logits/rejected": -2.9329185485839844, "logps/chosen": -250.9599609375, "logps/rejected": -239.0100860595703, "loss": 0.4661, "rewards/accuracies": 0.75, "rewards/chosen": -2.50346040725708, "rewards/margins": 1.4525728225708008, "rewards/rejected": -3.9560329914093018, "step": 2210 }, { "epoch": 0.43, "learning_rate": 4.757675990508377e-07, "logits/chosen": -2.6291983127593994, "logits/rejected": -2.742318630218506, "logps/chosen": -326.2305603027344, "logps/rejected": -226.09603881835938, "loss": 0.676, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -4.527320861816406, "rewards/margins": 0.19092464447021484, "rewards/rejected": -4.718245506286621, "step": 2220 }, { "epoch": 0.43, "learning_rate": 4.7540806787948513e-07, "logits/chosen": -3.0049118995666504, "logits/rejected": -2.921246290206909, "logps/chosen": -259.56256103515625, "logps/rejected": -237.5496826171875, "loss": 0.6143, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -4.86800479888916, "rewards/margins": 1.2542797327041626, "rewards/rejected": -6.12228536605835, "step": 2230 }, { "epoch": 0.43, "learning_rate": 4.7504853670813256e-07, "logits/chosen": -2.847571611404419, "logits/rejected": -2.8926985263824463, "logps/chosen": -237.22921752929688, "logps/rejected": -195.4996337890625, "loss": 0.6923, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -5.171439170837402, "rewards/margins": 1.057605504989624, "rewards/rejected": -6.2290449142456055, "step": 2240 }, { "epoch": 0.44, "learning_rate": 4.7468900553678004e-07, "logits/chosen": -2.865610122680664, "logits/rejected": -2.872493267059326, "logps/chosen": -233.55728149414062, "logps/rejected": -200.44448852539062, "loss": 0.5375, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.8468353748321533, "rewards/margins": 1.921197533607483, "rewards/rejected": -4.768033027648926, "step": 2250 }, { "epoch": 0.44, "learning_rate": 4.7432947436542747e-07, "logits/chosen": -2.8956828117370605, "logits/rejected": -2.912025213241577, "logps/chosen": -274.8690185546875, "logps/rejected": -275.8160095214844, "loss": 0.4763, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.6738688945770264, "rewards/margins": 3.5564217567443848, "rewards/rejected": -6.230290412902832, "step": 2260 }, { "epoch": 0.44, "learning_rate": 4.739699431940749e-07, "logits/chosen": -2.9155755043029785, "logits/rejected": -2.9084272384643555, "logps/chosen": -305.519287109375, "logps/rejected": -258.97882080078125, "loss": 0.5134, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.920853853225708, "rewards/margins": 3.540989637374878, "rewards/rejected": -5.461844444274902, "step": 2270 }, { "epoch": 0.44, "learning_rate": 4.736104120227223e-07, "logits/chosen": -2.7148208618164062, "logits/rejected": -2.7656939029693604, "logps/chosen": -334.14935302734375, "logps/rejected": -321.5052490234375, "loss": 0.6024, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -4.147124767303467, "rewards/margins": 0.8551236987113953, "rewards/rejected": -5.0022478103637695, "step": 2280 }, { "epoch": 0.44, "learning_rate": 4.7325088085136975e-07, "logits/chosen": -2.9230995178222656, "logits/rejected": -2.8051400184631348, "logps/chosen": -346.5646667480469, "logps/rejected": -439.0077209472656, "loss": 0.587, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -4.265329360961914, "rewards/margins": 2.3481392860412598, "rewards/rejected": -6.613468170166016, "step": 2290 }, { "epoch": 0.45, "learning_rate": 4.728913496800173e-07, "logits/chosen": -2.9406020641326904, "logits/rejected": -2.9966301918029785, "logps/chosen": -299.4635009765625, "logps/rejected": -303.520263671875, "loss": 0.5005, "rewards/accuracies": 0.5, "rewards/chosen": -3.865657091140747, "rewards/margins": 1.8061805963516235, "rewards/rejected": -5.671838283538818, "step": 2300 }, { "epoch": 0.45, "eval_logits/chosen": -2.8331551551818848, "eval_logits/rejected": -2.829441785812378, "eval_logps/chosen": -235.6737060546875, "eval_logps/rejected": -235.0966339111328, "eval_loss": 0.4871974587440491, "eval_rewards/accuracies": 0.7099999785423279, "eval_rewards/chosen": -4.143564224243164, "eval_rewards/margins": 2.1745026111602783, "eval_rewards/rejected": -6.31806755065918, "eval_runtime": 138.6189, "eval_samples_per_second": 22.767, "eval_steps_per_second": 0.361, "step": 2300 }, { "epoch": 0.45, "learning_rate": 4.725318185086647e-07, "logits/chosen": -2.6729812622070312, "logits/rejected": -2.745810031890869, "logps/chosen": -246.0074005126953, "logps/rejected": -261.0729675292969, "loss": 0.4538, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -3.4698410034179688, "rewards/margins": 3.8048255443573, "rewards/rejected": -7.274666786193848, "step": 2310 }, { "epoch": 0.45, "learning_rate": 4.7217228733731214e-07, "logits/chosen": -2.6849420070648193, "logits/rejected": -2.6646246910095215, "logps/chosen": -220.2881622314453, "logps/rejected": -269.70782470703125, "loss": 0.6294, "rewards/accuracies": 0.75, "rewards/chosen": -2.955125331878662, "rewards/margins": 2.6565656661987305, "rewards/rejected": -5.611690998077393, "step": 2320 }, { "epoch": 0.45, "learning_rate": 4.7181275616595957e-07, "logits/chosen": -2.7159416675567627, "logits/rejected": -2.761934280395508, "logps/chosen": -257.1372985839844, "logps/rejected": -216.0843963623047, "loss": 0.4954, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -2.3044304847717285, "rewards/margins": 1.3167331218719482, "rewards/rejected": -3.6211636066436768, "step": 2330 }, { "epoch": 0.45, "learning_rate": 4.71453224994607e-07, "logits/chosen": -2.934607744216919, "logits/rejected": -2.943761110305786, "logps/chosen": -281.1361999511719, "logps/rejected": -308.1634521484375, "loss": 0.6032, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -3.7284035682678223, "rewards/margins": 1.6770026683807373, "rewards/rejected": -5.405405521392822, "step": 2340 }, { "epoch": 0.46, "learning_rate": 4.710936938232545e-07, "logits/chosen": -2.8049893379211426, "logits/rejected": -2.830340623855591, "logps/chosen": -286.1492614746094, "logps/rejected": -263.43572998046875, "loss": 0.4673, "rewards/accuracies": 0.75, "rewards/chosen": -2.1562047004699707, "rewards/margins": 2.307713747024536, "rewards/rejected": -4.4639177322387695, "step": 2350 }, { "epoch": 0.46, "learning_rate": 4.707341626519019e-07, "logits/chosen": -2.7437050342559814, "logits/rejected": -2.8308017253875732, "logps/chosen": -230.4921875, "logps/rejected": -253.0805206298828, "loss": 0.5056, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -3.857966184616089, "rewards/margins": 2.8505640029907227, "rewards/rejected": -6.708531379699707, "step": 2360 }, { "epoch": 0.46, "learning_rate": 4.7037463148054933e-07, "logits/chosen": -2.866947889328003, "logits/rejected": -2.7586913108825684, "logps/chosen": -289.22930908203125, "logps/rejected": -241.8726806640625, "loss": 0.5151, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.4683752059936523, "rewards/margins": 1.7269830703735352, "rewards/rejected": -3.1953585147857666, "step": 2370 }, { "epoch": 0.46, "learning_rate": 4.7001510030919676e-07, "logits/chosen": -2.7631888389587402, "logits/rejected": -2.7680182456970215, "logps/chosen": -125.8613052368164, "logps/rejected": -168.28732299804688, "loss": 0.4811, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -3.2075114250183105, "rewards/margins": 1.0803308486938477, "rewards/rejected": -4.287842273712158, "step": 2380 }, { "epoch": 0.46, "learning_rate": 4.696555691378442e-07, "logits/chosen": -2.788008689880371, "logits/rejected": -2.773190975189209, "logps/chosen": -205.5693359375, "logps/rejected": -164.92813110351562, "loss": 0.5413, "rewards/accuracies": 0.75, "rewards/chosen": -3.432602643966675, "rewards/margins": 2.795102596282959, "rewards/rejected": -6.227705955505371, "step": 2390 }, { "epoch": 0.47, "learning_rate": 4.692960379664917e-07, "logits/chosen": -2.9483280181884766, "logits/rejected": -2.9471282958984375, "logps/chosen": -247.19863891601562, "logps/rejected": -231.531005859375, "loss": 0.6603, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.3705947399139404, "rewards/margins": 2.1876771450042725, "rewards/rejected": -4.558271408081055, "step": 2400 }, { "epoch": 0.47, "eval_logits/chosen": -2.865147113800049, "eval_logits/rejected": -2.862717390060425, "eval_logps/chosen": -227.82696533203125, "eval_logps/rejected": -227.18824768066406, "eval_loss": 0.5267188549041748, "eval_rewards/accuracies": 0.7074999809265137, "eval_rewards/chosen": -3.3588902950286865, "eval_rewards/margins": 2.1683359146118164, "eval_rewards/rejected": -5.527226448059082, "eval_runtime": 140.1009, "eval_samples_per_second": 22.527, "eval_steps_per_second": 0.357, "step": 2400 }, { "epoch": 0.47, "learning_rate": 4.6893650679513915e-07, "logits/chosen": -2.711394786834717, "logits/rejected": -2.835629463195801, "logps/chosen": -205.76205444335938, "logps/rejected": -204.39146423339844, "loss": 0.8134, "rewards/accuracies": 0.75, "rewards/chosen": -3.1112122535705566, "rewards/margins": 2.999711751937866, "rewards/rejected": -6.11092472076416, "step": 2410 }, { "epoch": 0.47, "learning_rate": 4.685769756237866e-07, "logits/chosen": -2.7515971660614014, "logits/rejected": -2.804112672805786, "logps/chosen": -225.8956756591797, "logps/rejected": -270.7162170410156, "loss": 0.6108, "rewards/accuracies": 0.75, "rewards/chosen": -2.4729607105255127, "rewards/margins": 1.2283196449279785, "rewards/rejected": -3.7012805938720703, "step": 2420 }, { "epoch": 0.47, "learning_rate": 4.68217444452434e-07, "logits/chosen": -2.9887964725494385, "logits/rejected": -2.903700590133667, "logps/chosen": -344.06658935546875, "logps/rejected": -273.3992004394531, "loss": 0.4948, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -3.804466962814331, "rewards/margins": 0.6572948694229126, "rewards/rejected": -4.461761951446533, "step": 2430 }, { "epoch": 0.47, "learning_rate": 4.6785791328108143e-07, "logits/chosen": -2.9037537574768066, "logits/rejected": -2.8705358505249023, "logps/chosen": -237.170654296875, "logps/rejected": -201.48524475097656, "loss": 0.4903, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.4012081623077393, "rewards/margins": 2.492006778717041, "rewards/rejected": -3.893214702606201, "step": 2440 }, { "epoch": 0.48, "learning_rate": 4.674983821097289e-07, "logits/chosen": -2.896740674972534, "logits/rejected": -2.9393861293792725, "logps/chosen": -279.9737243652344, "logps/rejected": -331.5505676269531, "loss": 0.562, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.9780767560005188, "rewards/margins": 2.16288423538208, "rewards/rejected": -3.140961170196533, "step": 2450 }, { "epoch": 0.48, "learning_rate": 4.6713885093837634e-07, "logits/chosen": -2.800800085067749, "logits/rejected": -2.810918092727661, "logps/chosen": -186.5795440673828, "logps/rejected": -224.8197784423828, "loss": 0.4998, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.2283473014831543, "rewards/margins": 2.912879467010498, "rewards/rejected": -5.141226291656494, "step": 2460 }, { "epoch": 0.48, "learning_rate": 4.6677931976702377e-07, "logits/chosen": -2.8430376052856445, "logits/rejected": -2.8135600090026855, "logps/chosen": -279.0056457519531, "logps/rejected": -211.0347137451172, "loss": 0.5782, "rewards/accuracies": 0.75, "rewards/chosen": -1.6672947406768799, "rewards/margins": 1.0928544998168945, "rewards/rejected": -2.7601494789123535, "step": 2470 }, { "epoch": 0.48, "learning_rate": 4.664197885956712e-07, "logits/chosen": -2.9663290977478027, "logits/rejected": -3.0165181159973145, "logps/chosen": -282.0238342285156, "logps/rejected": -326.161376953125, "loss": 0.4105, "rewards/accuracies": 0.75, "rewards/chosen": -2.1331288814544678, "rewards/margins": 1.9243627786636353, "rewards/rejected": -4.057491779327393, "step": 2480 }, { "epoch": 0.48, "learning_rate": 4.660602574243186e-07, "logits/chosen": -2.8429200649261475, "logits/rejected": -2.7810652256011963, "logps/chosen": -322.28619384765625, "logps/rejected": -222.6088409423828, "loss": 0.5032, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -3.170711040496826, "rewards/margins": 1.5483611822128296, "rewards/rejected": -4.719071865081787, "step": 2490 }, { "epoch": 0.49, "learning_rate": 4.6570072625296616e-07, "logits/chosen": -2.7781026363372803, "logits/rejected": -2.8628244400024414, "logps/chosen": -261.4495849609375, "logps/rejected": -212.68807983398438, "loss": 0.5727, "rewards/accuracies": 0.75, "rewards/chosen": -2.713273763656616, "rewards/margins": 3.496837615966797, "rewards/rejected": -6.210111618041992, "step": 2500 }, { "epoch": 0.49, "eval_logits/chosen": -2.8481392860412598, "eval_logits/rejected": -2.8475637435913086, "eval_logps/chosen": -227.86355590820312, "eval_logps/rejected": -228.5321807861328, "eval_loss": 0.49507883191108704, "eval_rewards/accuracies": 0.6974999904632568, "eval_rewards/chosen": -3.362549066543579, "eval_rewards/margins": 2.299072265625, "eval_rewards/rejected": -5.66162109375, "eval_runtime": 140.7809, "eval_samples_per_second": 22.418, "eval_steps_per_second": 0.355, "step": 2500 }, { "epoch": 0.49, "learning_rate": 4.653411950816136e-07, "logits/chosen": -2.882237672805786, "logits/rejected": -2.8170576095581055, "logps/chosen": -225.1431121826172, "logps/rejected": -244.21932983398438, "loss": 0.4277, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -3.2390036582946777, "rewards/margins": 2.326955795288086, "rewards/rejected": -5.565959930419922, "step": 2510 }, { "epoch": 0.49, "learning_rate": 4.64981663910261e-07, "logits/chosen": -2.8670458793640137, "logits/rejected": -2.7911815643310547, "logps/chosen": -197.6697235107422, "logps/rejected": -185.67788696289062, "loss": 0.5231, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.919440984725952, "rewards/margins": 0.77399080991745, "rewards/rejected": -3.6934313774108887, "step": 2520 }, { "epoch": 0.49, "learning_rate": 4.6462213273890844e-07, "logits/chosen": -2.9022250175476074, "logits/rejected": -2.893629550933838, "logps/chosen": -273.0534362792969, "logps/rejected": -279.77777099609375, "loss": 0.4312, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.160531520843506, "rewards/margins": 2.096802234649658, "rewards/rejected": -4.257333755493164, "step": 2530 }, { "epoch": 0.49, "learning_rate": 4.6426260156755587e-07, "logits/chosen": -2.8681180477142334, "logits/rejected": -2.824857711791992, "logps/chosen": -233.68972778320312, "logps/rejected": -296.8993225097656, "loss": 0.5388, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.006774425506592, "rewards/margins": 1.4732333421707153, "rewards/rejected": -3.4800078868865967, "step": 2540 }, { "epoch": 0.5, "learning_rate": 4.6390307039620335e-07, "logits/chosen": -2.6878414154052734, "logits/rejected": -2.652742862701416, "logps/chosen": -116.12736511230469, "logps/rejected": -166.33688354492188, "loss": 0.6971, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.250061273574829, "rewards/margins": 2.4174466133117676, "rewards/rejected": -4.667508125305176, "step": 2550 }, { "epoch": 0.5, "learning_rate": 4.635435392248508e-07, "logits/chosen": -2.791367530822754, "logits/rejected": -2.7953476905822754, "logps/chosen": -249.4175567626953, "logps/rejected": -228.1199188232422, "loss": 0.5551, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.463749647140503, "rewards/margins": 1.6222326755523682, "rewards/rejected": -4.085981845855713, "step": 2560 }, { "epoch": 0.5, "learning_rate": 4.631840080534982e-07, "logits/chosen": -2.8546769618988037, "logits/rejected": -2.9030673503875732, "logps/chosen": -214.65234375, "logps/rejected": -260.1628112792969, "loss": 0.4563, "rewards/accuracies": 0.75, "rewards/chosen": -1.3015438318252563, "rewards/margins": 1.6741079092025757, "rewards/rejected": -2.975651741027832, "step": 2570 }, { "epoch": 0.5, "learning_rate": 4.6282447688214563e-07, "logits/chosen": -2.805986166000366, "logits/rejected": -2.772294759750366, "logps/chosen": -174.05673217773438, "logps/rejected": -267.9089050292969, "loss": 0.5648, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -3.704420804977417, "rewards/margins": 2.961665391921997, "rewards/rejected": -6.666086673736572, "step": 2580 }, { "epoch": 0.5, "learning_rate": 4.6246494571079306e-07, "logits/chosen": -2.9575438499450684, "logits/rejected": -2.956247091293335, "logps/chosen": -341.7652893066406, "logps/rejected": -297.8833923339844, "loss": 0.5548, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -1.6619125604629517, "rewards/margins": 0.7100633978843689, "rewards/rejected": -2.371976137161255, "step": 2590 }, { "epoch": 0.5, "learning_rate": 4.621054145394406e-07, "logits/chosen": -2.711975336074829, "logits/rejected": -2.7088356018066406, "logps/chosen": -171.69789123535156, "logps/rejected": -199.38491821289062, "loss": 0.5962, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -5.308825969696045, "rewards/margins": 1.6770817041397095, "rewards/rejected": -6.985907554626465, "step": 2600 }, { "epoch": 0.5, "eval_logits/chosen": -2.795304536819458, "eval_logits/rejected": -2.794396162033081, "eval_logps/chosen": -225.79542541503906, "eval_logps/rejected": -227.5615234375, "eval_loss": 0.4849202036857605, "eval_rewards/accuracies": 0.7049999833106995, "eval_rewards/chosen": -3.155735969543457, "eval_rewards/margins": 2.408820152282715, "eval_rewards/rejected": -5.564556121826172, "eval_runtime": 149.6298, "eval_samples_per_second": 21.092, "eval_steps_per_second": 0.334, "step": 2600 }, { "epoch": 0.51, "learning_rate": 4.61745883368088e-07, "logits/chosen": -2.8558313846588135, "logits/rejected": -2.840514659881592, "logps/chosen": -286.47772216796875, "logps/rejected": -210.8869171142578, "loss": 0.6161, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.449849843978882, "rewards/margins": 1.0376472473144531, "rewards/rejected": -3.487496852874756, "step": 2610 }, { "epoch": 0.51, "learning_rate": 4.6138635219673545e-07, "logits/chosen": -2.8377602100372314, "logits/rejected": -2.850285291671753, "logps/chosen": -229.0254364013672, "logps/rejected": -214.21279907226562, "loss": 0.5154, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -5.181450843811035, "rewards/margins": 1.4099786281585693, "rewards/rejected": -6.591429710388184, "step": 2620 }, { "epoch": 0.51, "learning_rate": 4.610268210253829e-07, "logits/chosen": -2.809192419052124, "logits/rejected": -2.9091243743896484, "logps/chosen": -254.6504669189453, "logps/rejected": -327.51019287109375, "loss": 0.4769, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.7079193592071533, "rewards/margins": 1.4167802333831787, "rewards/rejected": -3.124699354171753, "step": 2630 }, { "epoch": 0.51, "learning_rate": 4.606672898540303e-07, "logits/chosen": -2.7210512161254883, "logits/rejected": -2.763578414916992, "logps/chosen": -179.32467651367188, "logps/rejected": -191.49557495117188, "loss": 0.5819, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.314612865447998, "rewards/margins": 3.099726438522339, "rewards/rejected": -5.4143385887146, "step": 2640 }, { "epoch": 0.51, "learning_rate": 4.603077586826778e-07, "logits/chosen": -2.784083843231201, "logits/rejected": -2.7739856243133545, "logps/chosen": -180.47994995117188, "logps/rejected": -230.2683563232422, "loss": 0.469, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.519798755645752, "rewards/margins": 0.8459455370903015, "rewards/rejected": -3.3657443523406982, "step": 2650 }, { "epoch": 0.52, "learning_rate": 4.599482275113252e-07, "logits/chosen": -2.6934545040130615, "logits/rejected": -2.6930344104766846, "logps/chosen": -173.08389282226562, "logps/rejected": -185.22975158691406, "loss": 0.4295, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.2992825508117676, "rewards/margins": 1.4708733558654785, "rewards/rejected": -3.770156145095825, "step": 2660 }, { "epoch": 0.52, "learning_rate": 4.5958869633997264e-07, "logits/chosen": -2.882859706878662, "logits/rejected": -2.8013839721679688, "logps/chosen": -236.16506958007812, "logps/rejected": -265.9134826660156, "loss": 0.5289, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.8265107870101929, "rewards/margins": 1.7285740375518799, "rewards/rejected": -3.555084705352783, "step": 2670 }, { "epoch": 0.52, "learning_rate": 4.5922916516862007e-07, "logits/chosen": -2.7338805198669434, "logits/rejected": -2.8017101287841797, "logps/chosen": -167.84872436523438, "logps/rejected": -158.44155883789062, "loss": 0.4123, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.3285470008850098, "rewards/margins": 3.0958738327026367, "rewards/rejected": -5.424420356750488, "step": 2680 }, { "epoch": 0.52, "learning_rate": 4.588696339972675e-07, "logits/chosen": -2.811453342437744, "logits/rejected": -2.855027437210083, "logps/chosen": -264.64385986328125, "logps/rejected": -217.11593627929688, "loss": 0.5148, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -3.4200408458709717, "rewards/margins": 3.363882064819336, "rewards/rejected": -6.783924102783203, "step": 2690 }, { "epoch": 0.52, "learning_rate": 4.5851010282591503e-07, "logits/chosen": -2.838355302810669, "logits/rejected": -2.8200697898864746, "logps/chosen": -304.71844482421875, "logps/rejected": -251.6042022705078, "loss": 0.5934, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -4.910211563110352, "rewards/margins": 2.4452691078186035, "rewards/rejected": -7.355480194091797, "step": 2700 }, { "epoch": 0.52, "eval_logits/chosen": -2.7893569469451904, "eval_logits/rejected": -2.7884714603424072, "eval_logps/chosen": -232.76983642578125, "eval_logps/rejected": -239.4730224609375, "eval_loss": 0.48597389459609985, "eval_rewards/accuracies": 0.7124999761581421, "eval_rewards/chosen": -3.8531787395477295, "eval_rewards/margins": 2.9025280475616455, "eval_rewards/rejected": -6.755706310272217, "eval_runtime": 140.5224, "eval_samples_per_second": 22.459, "eval_steps_per_second": 0.356, "step": 2700 }, { "epoch": 0.53, "learning_rate": 4.5815057165456246e-07, "logits/chosen": -2.8093137741088867, "logits/rejected": -2.7831215858459473, "logps/chosen": -266.3493347167969, "logps/rejected": -321.4234313964844, "loss": 0.4197, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -3.4172821044921875, "rewards/margins": 3.3637890815734863, "rewards/rejected": -6.781070709228516, "step": 2710 }, { "epoch": 0.53, "learning_rate": 4.577910404832099e-07, "logits/chosen": -2.69020414352417, "logits/rejected": -2.688347816467285, "logps/chosen": -196.66305541992188, "logps/rejected": -200.4778594970703, "loss": 0.5553, "rewards/accuracies": 0.75, "rewards/chosen": -4.270603656768799, "rewards/margins": 2.629127025604248, "rewards/rejected": -6.899730682373047, "step": 2720 }, { "epoch": 0.53, "learning_rate": 4.574315093118573e-07, "logits/chosen": -2.7629377841949463, "logits/rejected": -2.724412679672241, "logps/chosen": -370.864501953125, "logps/rejected": -272.51239013671875, "loss": 0.5046, "rewards/accuracies": 0.75, "rewards/chosen": -3.3427891731262207, "rewards/margins": 2.89741587638855, "rewards/rejected": -6.240204811096191, "step": 2730 }, { "epoch": 0.53, "learning_rate": 4.5707197814050474e-07, "logits/chosen": -2.664853572845459, "logits/rejected": -2.6672279834747314, "logps/chosen": -222.2160186767578, "logps/rejected": -236.78158569335938, "loss": 0.7191, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -2.9496960639953613, "rewards/margins": 2.4258008003234863, "rewards/rejected": -5.375496864318848, "step": 2740 }, { "epoch": 0.53, "learning_rate": 4.567124469691522e-07, "logits/chosen": -2.5228238105773926, "logits/rejected": -2.6249096393585205, "logps/chosen": -229.5448760986328, "logps/rejected": -212.86734008789062, "loss": 0.5635, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.750124454498291, "rewards/margins": 3.2047972679138184, "rewards/rejected": -5.954921722412109, "step": 2750 }, { "epoch": 0.54, "learning_rate": 4.5635291579779965e-07, "logits/chosen": -2.764838695526123, "logits/rejected": -2.788780927658081, "logps/chosen": -279.43170166015625, "logps/rejected": -226.8254852294922, "loss": 0.4389, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.6918768882751465, "rewards/margins": 4.540162563323975, "rewards/rejected": -7.232039451599121, "step": 2760 }, { "epoch": 0.54, "learning_rate": 4.559933846264471e-07, "logits/chosen": -2.6748318672180176, "logits/rejected": -2.7573904991149902, "logps/chosen": -294.40020751953125, "logps/rejected": -274.6376647949219, "loss": 0.4928, "rewards/accuracies": 0.75, "rewards/chosen": -3.328364133834839, "rewards/margins": 1.6496822834014893, "rewards/rejected": -4.97804594039917, "step": 2770 }, { "epoch": 0.54, "learning_rate": 4.556338534550945e-07, "logits/chosen": -2.860028028488159, "logits/rejected": -2.9034276008605957, "logps/chosen": -256.19110107421875, "logps/rejected": -258.70513916015625, "loss": 0.8263, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -4.013097286224365, "rewards/margins": 3.11262845993042, "rewards/rejected": -7.125726222991943, "step": 2780 }, { "epoch": 0.54, "learning_rate": 4.5527432228374194e-07, "logits/chosen": -2.962338924407959, "logits/rejected": -2.9946417808532715, "logps/chosen": -381.32305908203125, "logps/rejected": -331.4267272949219, "loss": 0.498, "rewards/accuracies": 0.5, "rewards/chosen": -4.3522491455078125, "rewards/margins": 0.34079620242118835, "rewards/rejected": -4.693045616149902, "step": 2790 }, { "epoch": 0.54, "learning_rate": 4.5491479111238947e-07, "logits/chosen": -2.9137508869171143, "logits/rejected": -2.922184705734253, "logps/chosen": -221.0836181640625, "logps/rejected": -227.008544921875, "loss": 0.5091, "rewards/accuracies": 0.75, "rewards/chosen": -4.47516393661499, "rewards/margins": 2.3096227645874023, "rewards/rejected": -6.784787178039551, "step": 2800 }, { "epoch": 0.54, "eval_logits/chosen": -2.789396286010742, "eval_logits/rejected": -2.7867987155914307, "eval_logps/chosen": -241.6216278076172, "eval_logps/rejected": -248.03701782226562, "eval_loss": 0.48178786039352417, "eval_rewards/accuracies": 0.7225000262260437, "eval_rewards/chosen": -4.7383551597595215, "eval_rewards/margins": 2.873750925064087, "eval_rewards/rejected": -7.6121063232421875, "eval_runtime": 156.9523, "eval_samples_per_second": 20.108, "eval_steps_per_second": 0.319, "step": 2800 }, { "epoch": 0.55, "learning_rate": 4.545552599410369e-07, "logits/chosen": -2.815260171890259, "logits/rejected": -2.8004562854766846, "logps/chosen": -265.31866455078125, "logps/rejected": -286.4771423339844, "loss": 0.4594, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -3.1919310092926025, "rewards/margins": 1.9255412817001343, "rewards/rejected": -5.1174726486206055, "step": 2810 }, { "epoch": 0.55, "learning_rate": 4.541957287696843e-07, "logits/chosen": -2.8050880432128906, "logits/rejected": -2.830878734588623, "logps/chosen": -279.0443420410156, "logps/rejected": -254.31881713867188, "loss": 0.5846, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -3.6159064769744873, "rewards/margins": 3.61749267578125, "rewards/rejected": -7.233399391174316, "step": 2820 }, { "epoch": 0.55, "learning_rate": 4.5383619759833175e-07, "logits/chosen": -2.8187646865844727, "logits/rejected": -2.8197734355926514, "logps/chosen": -270.74810791015625, "logps/rejected": -321.5408020019531, "loss": 0.4978, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.90576434135437, "rewards/margins": 4.879975318908691, "rewards/rejected": -7.785739898681641, "step": 2830 }, { "epoch": 0.55, "learning_rate": 4.534766664269792e-07, "logits/chosen": -2.8680903911590576, "logits/rejected": -2.8194046020507812, "logps/chosen": -213.48660278320312, "logps/rejected": -207.0009765625, "loss": 0.4644, "rewards/accuracies": 0.75, "rewards/chosen": -2.814753293991089, "rewards/margins": 2.791482448577881, "rewards/rejected": -5.606235504150391, "step": 2840 }, { "epoch": 0.55, "learning_rate": 4.531171352556266e-07, "logits/chosen": -2.751530408859253, "logits/rejected": -2.72860050201416, "logps/chosen": -231.2147979736328, "logps/rejected": -203.83409118652344, "loss": 0.4614, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -3.988515853881836, "rewards/margins": 4.011109352111816, "rewards/rejected": -7.999625205993652, "step": 2850 }, { "epoch": 0.56, "learning_rate": 4.527576040842741e-07, "logits/chosen": -2.666177988052368, "logits/rejected": -2.6831955909729004, "logps/chosen": -237.4653778076172, "logps/rejected": -269.43695068359375, "loss": 0.4596, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.882408618927002, "rewards/margins": 2.584329605102539, "rewards/rejected": -5.466738224029541, "step": 2860 }, { "epoch": 0.56, "learning_rate": 4.523980729129215e-07, "logits/chosen": -2.932311534881592, "logits/rejected": -2.8739829063415527, "logps/chosen": -198.46334838867188, "logps/rejected": -215.8825225830078, "loss": 0.5902, "rewards/accuracies": 0.5, "rewards/chosen": -2.875717878341675, "rewards/margins": 1.6331195831298828, "rewards/rejected": -4.508837699890137, "step": 2870 }, { "epoch": 0.56, "learning_rate": 4.5203854174156895e-07, "logits/chosen": -2.7299952507019043, "logits/rejected": -2.773221254348755, "logps/chosen": -160.0872039794922, "logps/rejected": -212.7479705810547, "loss": 0.4727, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -5.80568790435791, "rewards/margins": 4.347495079040527, "rewards/rejected": -10.153182983398438, "step": 2880 }, { "epoch": 0.56, "learning_rate": 4.5167901057021643e-07, "logits/chosen": -2.733940601348877, "logits/rejected": -2.7792134284973145, "logps/chosen": -213.64645385742188, "logps/rejected": -215.48654174804688, "loss": 0.4715, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -3.3354275226593018, "rewards/margins": 2.1211249828338623, "rewards/rejected": -5.456552028656006, "step": 2890 }, { "epoch": 0.56, "learning_rate": 4.5131947939886385e-07, "logits/chosen": -2.6658873558044434, "logits/rejected": -2.7218375205993652, "logps/chosen": -200.94448852539062, "logps/rejected": -262.49053955078125, "loss": 0.4864, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -3.2886252403259277, "rewards/margins": 2.6156206130981445, "rewards/rejected": -5.904245376586914, "step": 2900 }, { "epoch": 0.56, "eval_logits/chosen": -2.7678444385528564, "eval_logits/rejected": -2.7678475379943848, "eval_logps/chosen": -235.4826202392578, "eval_logps/rejected": -241.34597778320312, "eval_loss": 0.4803493916988373, "eval_rewards/accuracies": 0.7174999713897705, "eval_rewards/chosen": -4.124456882476807, "eval_rewards/margins": 2.818544626235962, "eval_rewards/rejected": -6.943002223968506, "eval_runtime": 141.1188, "eval_samples_per_second": 22.364, "eval_steps_per_second": 0.354, "step": 2900 }, { "epoch": 0.56, "learning_rate": 4.5095994822751134e-07, "logits/chosen": -2.833460569381714, "logits/rejected": -2.802799701690674, "logps/chosen": -214.0464324951172, "logps/rejected": -240.76315307617188, "loss": 0.4484, "rewards/accuracies": 0.75, "rewards/chosen": -2.0033795833587646, "rewards/margins": 1.78948175907135, "rewards/rejected": -3.7928614616394043, "step": 2910 }, { "epoch": 0.57, "learning_rate": 4.5060041705615876e-07, "logits/chosen": -2.7049596309661865, "logits/rejected": -2.7685043811798096, "logps/chosen": -196.03549194335938, "logps/rejected": -220.3227081298828, "loss": 0.5213, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -4.256979942321777, "rewards/margins": 3.0013604164123535, "rewards/rejected": -7.258340358734131, "step": 2920 }, { "epoch": 0.57, "learning_rate": 4.502408858848062e-07, "logits/chosen": -2.6681554317474365, "logits/rejected": -2.6551687717437744, "logps/chosen": -214.7572784423828, "logps/rejected": -269.0203552246094, "loss": 0.5363, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -3.369581699371338, "rewards/margins": 2.0056557655334473, "rewards/rejected": -5.375237464904785, "step": 2930 }, { "epoch": 0.57, "learning_rate": 4.498813547134536e-07, "logits/chosen": -2.7057762145996094, "logits/rejected": -2.6808526515960693, "logps/chosen": -169.85397338867188, "logps/rejected": -280.1517639160156, "loss": 0.6084, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.6998391151428223, "rewards/margins": 2.238490343093872, "rewards/rejected": -4.938329219818115, "step": 2940 }, { "epoch": 0.57, "learning_rate": 4.4952182354210105e-07, "logits/chosen": -2.5502073764801025, "logits/rejected": -2.642277956008911, "logps/chosen": -221.54415893554688, "logps/rejected": -201.84996032714844, "loss": 0.4849, "rewards/accuracies": 0.75, "rewards/chosen": -2.1933398246765137, "rewards/margins": 3.202204465866089, "rewards/rejected": -5.395545482635498, "step": 2950 }, { "epoch": 0.57, "learning_rate": 4.4916229237074853e-07, "logits/chosen": -2.723092555999756, "logits/rejected": -2.7366278171539307, "logps/chosen": -283.14678955078125, "logps/rejected": -196.46414184570312, "loss": 0.4182, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -3.1323070526123047, "rewards/margins": 1.384583592414856, "rewards/rejected": -4.516890525817871, "step": 2960 }, { "epoch": 0.58, "learning_rate": 4.4880276119939596e-07, "logits/chosen": -2.5838587284088135, "logits/rejected": -2.7283897399902344, "logps/chosen": -209.7035369873047, "logps/rejected": -230.57687377929688, "loss": 0.5242, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -3.3927102088928223, "rewards/margins": 5.4158172607421875, "rewards/rejected": -8.808526992797852, "step": 2970 }, { "epoch": 0.58, "learning_rate": 4.484432300280434e-07, "logits/chosen": -2.910597562789917, "logits/rejected": -2.785182237625122, "logps/chosen": -210.0435028076172, "logps/rejected": -222.11618041992188, "loss": 0.5054, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -3.7645599842071533, "rewards/margins": 1.3658928871154785, "rewards/rejected": -5.130453586578369, "step": 2980 }, { "epoch": 0.58, "learning_rate": 4.4808369885669086e-07, "logits/chosen": -2.771735668182373, "logits/rejected": -2.8467822074890137, "logps/chosen": -349.49066162109375, "logps/rejected": -267.9779052734375, "loss": 0.6024, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.8369460105895996, "rewards/margins": 2.648116111755371, "rewards/rejected": -5.485062599182129, "step": 2990 }, { "epoch": 0.58, "learning_rate": 4.477241676853383e-07, "logits/chosen": -2.7642741203308105, "logits/rejected": -2.7960777282714844, "logps/chosen": -254.3138427734375, "logps/rejected": -278.13262939453125, "loss": 0.4882, "rewards/accuracies": 0.75, "rewards/chosen": -3.0275261402130127, "rewards/margins": 1.6648391485214233, "rewards/rejected": -4.6923651695251465, "step": 3000 }, { "epoch": 0.58, "eval_logits/chosen": -2.789898157119751, "eval_logits/rejected": -2.7911148071289062, "eval_logps/chosen": -229.87535095214844, "eval_logps/rejected": -232.6624755859375, "eval_loss": 0.4967539310455322, "eval_rewards/accuracies": 0.6974999904632568, "eval_rewards/chosen": -3.5637285709381104, "eval_rewards/margins": 2.5109221935272217, "eval_rewards/rejected": -6.074651718139648, "eval_runtime": 140.5322, "eval_samples_per_second": 22.457, "eval_steps_per_second": 0.356, "step": 3000 }, { "epoch": 0.58, "learning_rate": 4.4736463651398577e-07, "logits/chosen": -2.8905491828918457, "logits/rejected": -2.8863720893859863, "logps/chosen": -239.4340362548828, "logps/rejected": -264.43817138671875, "loss": 0.5881, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -3.0071334838867188, "rewards/margins": 2.091095447540283, "rewards/rejected": -5.09822940826416, "step": 3010 }, { "epoch": 0.59, "learning_rate": 4.470051053426332e-07, "logits/chosen": -2.899305820465088, "logits/rejected": -2.8747293949127197, "logps/chosen": -255.5482635498047, "logps/rejected": -254.17111206054688, "loss": 0.5337, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -3.560997724533081, "rewards/margins": 4.4746809005737305, "rewards/rejected": -8.03567886352539, "step": 3020 }, { "epoch": 0.59, "learning_rate": 4.4664557417128063e-07, "logits/chosen": -2.721799612045288, "logits/rejected": -2.6572134494781494, "logps/chosen": -186.32833862304688, "logps/rejected": -239.40750122070312, "loss": 0.5977, "rewards/accuracies": 0.75, "rewards/chosen": -3.5370707511901855, "rewards/margins": 1.5322608947753906, "rewards/rejected": -5.069332122802734, "step": 3030 }, { "epoch": 0.59, "learning_rate": 4.4628604299992806e-07, "logits/chosen": -2.6710963249206543, "logits/rejected": -2.6934525966644287, "logps/chosen": -203.04034423828125, "logps/rejected": -218.6429443359375, "loss": 0.5089, "rewards/accuracies": 0.75, "rewards/chosen": -2.743546485900879, "rewards/margins": 2.842298984527588, "rewards/rejected": -5.585845947265625, "step": 3040 }, { "epoch": 0.59, "learning_rate": 4.459265118285755e-07, "logits/chosen": -2.7489089965820312, "logits/rejected": -2.7831459045410156, "logps/chosen": -177.79653930664062, "logps/rejected": -220.6035614013672, "loss": 0.4905, "rewards/accuracies": 0.75, "rewards/chosen": -3.629119873046875, "rewards/margins": 4.655067443847656, "rewards/rejected": -8.284186363220215, "step": 3050 }, { "epoch": 0.59, "learning_rate": 4.4556698065722296e-07, "logits/chosen": -2.800358533859253, "logits/rejected": -2.8105666637420654, "logps/chosen": -211.05191040039062, "logps/rejected": -238.6355438232422, "loss": 0.5352, "rewards/accuracies": 0.75, "rewards/chosen": -3.4860329627990723, "rewards/margins": 3.4360384941101074, "rewards/rejected": -6.922071933746338, "step": 3060 }, { "epoch": 0.6, "learning_rate": 4.452074494858704e-07, "logits/chosen": -2.7737698554992676, "logits/rejected": -2.850177526473999, "logps/chosen": -188.44302368164062, "logps/rejected": -232.519775390625, "loss": 0.4941, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -3.984412670135498, "rewards/margins": 3.328932285308838, "rewards/rejected": -7.313345432281494, "step": 3070 }, { "epoch": 0.6, "learning_rate": 4.448479183145178e-07, "logits/chosen": -2.72746205329895, "logits/rejected": -2.7201554775238037, "logps/chosen": -206.14730834960938, "logps/rejected": -231.61087036132812, "loss": 0.4867, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -4.876461982727051, "rewards/margins": 1.740944266319275, "rewards/rejected": -6.617405891418457, "step": 3080 }, { "epoch": 0.6, "learning_rate": 4.444883871431653e-07, "logits/chosen": -2.839428424835205, "logits/rejected": -2.82271146774292, "logps/chosen": -203.8380889892578, "logps/rejected": -207.5266571044922, "loss": 0.5487, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -3.013213634490967, "rewards/margins": 1.5793131589889526, "rewards/rejected": -4.592526912689209, "step": 3090 }, { "epoch": 0.6, "learning_rate": 4.4412885597181273e-07, "logits/chosen": -2.8839659690856934, "logits/rejected": -2.841780424118042, "logps/chosen": -278.1788024902344, "logps/rejected": -258.2974548339844, "loss": 0.4958, "rewards/accuracies": 0.75, "rewards/chosen": -3.1666135787963867, "rewards/margins": 1.1531345844268799, "rewards/rejected": -4.319748401641846, "step": 3100 }, { "epoch": 0.6, "eval_logits/chosen": -2.8040521144866943, "eval_logits/rejected": -2.8052425384521484, "eval_logps/chosen": -234.44879150390625, "eval_logps/rejected": -239.80535888671875, "eval_loss": 0.48301056027412415, "eval_rewards/accuracies": 0.699999988079071, "eval_rewards/chosen": -4.021074295043945, "eval_rewards/margins": 2.7678658962249756, "eval_rewards/rejected": -6.788939952850342, "eval_runtime": 155.9274, "eval_samples_per_second": 20.24, "eval_steps_per_second": 0.321, "step": 3100 }, { "epoch": 0.6, "learning_rate": 4.437693248004602e-07, "logits/chosen": -2.811739444732666, "logits/rejected": -2.844913959503174, "logps/chosen": -237.08291625976562, "logps/rejected": -264.9893493652344, "loss": 0.461, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.9083025455474854, "rewards/margins": 2.6540277004241943, "rewards/rejected": -4.56233024597168, "step": 3110 }, { "epoch": 0.61, "learning_rate": 4.4340979362910764e-07, "logits/chosen": -2.6549086570739746, "logits/rejected": -2.6251420974731445, "logps/chosen": -238.0406494140625, "logps/rejected": -226.7935333251953, "loss": 0.6591, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.8329887390136719, "rewards/margins": 1.1235915422439575, "rewards/rejected": -2.956580400466919, "step": 3120 }, { "epoch": 0.61, "learning_rate": 4.4305026245775506e-07, "logits/chosen": -2.838721990585327, "logits/rejected": -2.798975944519043, "logps/chosen": -204.61141967773438, "logps/rejected": -245.9084930419922, "loss": 0.5192, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -4.60957670211792, "rewards/margins": 1.4932568073272705, "rewards/rejected": -6.1028337478637695, "step": 3130 }, { "epoch": 0.61, "learning_rate": 4.426907312864025e-07, "logits/chosen": -2.8392152786254883, "logits/rejected": -2.7306416034698486, "logps/chosen": -324.04840087890625, "logps/rejected": -252.14224243164062, "loss": 0.6379, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -4.778172969818115, "rewards/margins": 0.22122666239738464, "rewards/rejected": -4.999399662017822, "step": 3140 }, { "epoch": 0.61, "learning_rate": 4.423312001150499e-07, "logits/chosen": -2.8068060874938965, "logits/rejected": -2.824324369430542, "logps/chosen": -191.25625610351562, "logps/rejected": -219.1925811767578, "loss": 0.604, "rewards/accuracies": 0.75, "rewards/chosen": -4.532156944274902, "rewards/margins": 3.1442408561706543, "rewards/rejected": -7.676396369934082, "step": 3150 }, { "epoch": 0.61, "learning_rate": 4.419716689436974e-07, "logits/chosen": -2.7903690338134766, "logits/rejected": -2.797542095184326, "logps/chosen": -269.3061218261719, "logps/rejected": -242.23208618164062, "loss": 0.5406, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -3.1046996116638184, "rewards/margins": 1.600785255432129, "rewards/rejected": -4.7054853439331055, "step": 3160 }, { "epoch": 0.62, "learning_rate": 4.4161213777234483e-07, "logits/chosen": -2.6806042194366455, "logits/rejected": -2.651494026184082, "logps/chosen": -293.6893615722656, "logps/rejected": -300.1238098144531, "loss": 0.5415, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -3.8252601623535156, "rewards/margins": 2.5960474014282227, "rewards/rejected": -6.4213080406188965, "step": 3170 }, { "epoch": 0.62, "learning_rate": 4.4125260660099226e-07, "logits/chosen": -2.7799265384674072, "logits/rejected": -2.844611883163452, "logps/chosen": -235.3821563720703, "logps/rejected": -241.40951538085938, "loss": 0.6426, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -3.239741563796997, "rewards/margins": 2.4194602966308594, "rewards/rejected": -5.659201622009277, "step": 3180 }, { "epoch": 0.62, "learning_rate": 4.4089307542963974e-07, "logits/chosen": -2.8540241718292236, "logits/rejected": -2.844773292541504, "logps/chosen": -214.5902099609375, "logps/rejected": -210.88577270507812, "loss": 0.4608, "rewards/accuracies": 0.75, "rewards/chosen": -1.6627594232559204, "rewards/margins": 4.641172885894775, "rewards/rejected": -6.303932189941406, "step": 3190 }, { "epoch": 0.62, "learning_rate": 4.4053354425828717e-07, "logits/chosen": -2.9225311279296875, "logits/rejected": -2.949389934539795, "logps/chosen": -277.113525390625, "logps/rejected": -199.2532958984375, "loss": 0.6056, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.612243175506592, "rewards/margins": 2.2372536659240723, "rewards/rejected": -4.849497318267822, "step": 3200 }, { "epoch": 0.62, "eval_logits/chosen": -2.841383695602417, "eval_logits/rejected": -2.843325138092041, "eval_logps/chosen": -227.9439239501953, "eval_logps/rejected": -232.52822875976562, "eval_loss": 0.4876376986503601, "eval_rewards/accuracies": 0.7124999761581421, "eval_rewards/chosen": -3.3705859184265137, "eval_rewards/margins": 2.690638780593872, "eval_rewards/rejected": -6.061224460601807, "eval_runtime": 138.7046, "eval_samples_per_second": 22.753, "eval_steps_per_second": 0.36, "step": 3200 }, { "epoch": 0.62, "learning_rate": 4.4017401308693465e-07, "logits/chosen": -2.752577781677246, "logits/rejected": -2.7847416400909424, "logps/chosen": -283.6220397949219, "logps/rejected": -236.8374481201172, "loss": 0.6678, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.9232075214385986, "rewards/margins": 1.3011012077331543, "rewards/rejected": -4.224308490753174, "step": 3210 }, { "epoch": 0.63, "learning_rate": 4.398144819155821e-07, "logits/chosen": -3.000760793685913, "logits/rejected": -2.914386034011841, "logps/chosen": -139.03904724121094, "logps/rejected": -159.8165283203125, "loss": 0.5968, "rewards/accuracies": 0.75, "rewards/chosen": -3.078878402709961, "rewards/margins": 2.0962741374969482, "rewards/rejected": -5.17515230178833, "step": 3220 }, { "epoch": 0.63, "learning_rate": 4.394549507442295e-07, "logits/chosen": -2.4832091331481934, "logits/rejected": -2.565969944000244, "logps/chosen": -233.667236328125, "logps/rejected": -211.1376953125, "loss": 0.5122, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -4.009371280670166, "rewards/margins": 1.7042487859725952, "rewards/rejected": -5.713620185852051, "step": 3230 }, { "epoch": 0.63, "learning_rate": 4.3909541957287693e-07, "logits/chosen": -2.793034076690674, "logits/rejected": -2.841033697128296, "logps/chosen": -248.4766082763672, "logps/rejected": -204.9600830078125, "loss": 0.5375, "rewards/accuracies": 0.75, "rewards/chosen": -1.73274827003479, "rewards/margins": 1.598253846168518, "rewards/rejected": -3.3310019969940186, "step": 3240 }, { "epoch": 0.63, "learning_rate": 4.3873588840152436e-07, "logits/chosen": -2.860628843307495, "logits/rejected": -2.8502299785614014, "logps/chosen": -277.3612365722656, "logps/rejected": -240.89620971679688, "loss": 0.5313, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.135129451751709, "rewards/margins": 1.0863913297653198, "rewards/rejected": -3.2215206623077393, "step": 3250 }, { "epoch": 0.63, "learning_rate": 4.3837635723017184e-07, "logits/chosen": -2.6584460735321045, "logits/rejected": -2.7102010250091553, "logps/chosen": -210.11239624023438, "logps/rejected": -256.6304626464844, "loss": 0.51, "rewards/accuracies": 0.75, "rewards/chosen": -2.1810548305511475, "rewards/margins": 3.3691158294677734, "rewards/rejected": -5.5501708984375, "step": 3260 }, { "epoch": 0.63, "learning_rate": 4.3801682605881927e-07, "logits/chosen": -2.8054721355438232, "logits/rejected": -2.8320729732513428, "logps/chosen": -287.7835693359375, "logps/rejected": -356.615234375, "loss": 0.4803, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.2600693702697754, "rewards/margins": 2.852206230163574, "rewards/rejected": -5.11227560043335, "step": 3270 }, { "epoch": 0.64, "learning_rate": 4.3765729488746675e-07, "logits/chosen": -2.8953349590301514, "logits/rejected": -2.884124279022217, "logps/chosen": -260.77508544921875, "logps/rejected": -234.0574188232422, "loss": 0.4614, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.122786045074463, "rewards/margins": 1.525109052658081, "rewards/rejected": -3.647895097732544, "step": 3280 }, { "epoch": 0.64, "learning_rate": 4.372977637161142e-07, "logits/chosen": -2.9647879600524902, "logits/rejected": -2.901134967803955, "logps/chosen": -288.5679626464844, "logps/rejected": -263.0459899902344, "loss": 0.4579, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -3.3843274116516113, "rewards/margins": 2.8646128177642822, "rewards/rejected": -6.248939514160156, "step": 3290 }, { "epoch": 0.64, "learning_rate": 4.369382325447616e-07, "logits/chosen": -2.634999990463257, "logits/rejected": -2.5048158168792725, "logps/chosen": -292.83612060546875, "logps/rejected": -297.54010009765625, "loss": 0.6339, "rewards/accuracies": 0.75, "rewards/chosen": -4.710498809814453, "rewards/margins": 3.350341796875, "rewards/rejected": -8.06084156036377, "step": 3300 }, { "epoch": 0.64, "eval_logits/chosen": -2.8006222248077393, "eval_logits/rejected": -2.7996485233306885, "eval_logps/chosen": -229.91427612304688, "eval_logps/rejected": -236.0455322265625, "eval_loss": 0.5043264031410217, "eval_rewards/accuracies": 0.7149999737739563, "eval_rewards/chosen": -3.5676214694976807, "eval_rewards/margins": 2.845334529876709, "eval_rewards/rejected": -6.412956714630127, "eval_runtime": 140.5374, "eval_samples_per_second": 22.457, "eval_steps_per_second": 0.356, "step": 3300 }, { "epoch": 0.64, "learning_rate": 4.365787013734091e-07, "logits/chosen": -2.8656392097473145, "logits/rejected": -2.8673369884490967, "logps/chosen": -260.9368896484375, "logps/rejected": -278.66778564453125, "loss": 0.6329, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.8062299489974976, "rewards/margins": 2.270146608352661, "rewards/rejected": -4.076376438140869, "step": 3310 }, { "epoch": 0.64, "learning_rate": 4.362191702020565e-07, "logits/chosen": -2.8520846366882324, "logits/rejected": -2.815701961517334, "logps/chosen": -333.2210998535156, "logps/rejected": -343.0906677246094, "loss": 0.4729, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.563772439956665, "rewards/margins": 3.101661205291748, "rewards/rejected": -5.665433406829834, "step": 3320 }, { "epoch": 0.65, "learning_rate": 4.3585963903070394e-07, "logits/chosen": -2.9428510665893555, "logits/rejected": -2.939283847808838, "logps/chosen": -258.36517333984375, "logps/rejected": -252.941162109375, "loss": 0.5681, "rewards/accuracies": 0.75, "rewards/chosen": -3.794334888458252, "rewards/margins": 3.352727174758911, "rewards/rejected": -7.147061347961426, "step": 3330 }, { "epoch": 0.65, "learning_rate": 4.3550010785935137e-07, "logits/chosen": -2.8750064373016357, "logits/rejected": -2.809535503387451, "logps/chosen": -236.16696166992188, "logps/rejected": -272.6056213378906, "loss": 0.576, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -3.8540070056915283, "rewards/margins": 1.4348869323730469, "rewards/rejected": -5.288894176483154, "step": 3340 }, { "epoch": 0.65, "learning_rate": 4.351405766879988e-07, "logits/chosen": -2.998915910720825, "logits/rejected": -2.934418201446533, "logps/chosen": -273.8209533691406, "logps/rejected": -249.93777465820312, "loss": 0.4891, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -3.418905735015869, "rewards/margins": 0.8809320330619812, "rewards/rejected": -4.299838066101074, "step": 3350 }, { "epoch": 0.65, "learning_rate": 4.347810455166463e-07, "logits/chosen": -2.7779600620269775, "logits/rejected": -2.866751194000244, "logps/chosen": -153.2954864501953, "logps/rejected": -205.49044799804688, "loss": 0.6631, "rewards/accuracies": 0.75, "rewards/chosen": -2.80474591255188, "rewards/margins": 4.289844512939453, "rewards/rejected": -7.0945892333984375, "step": 3360 }, { "epoch": 0.65, "learning_rate": 4.344215143452937e-07, "logits/chosen": -2.795165538787842, "logits/rejected": -2.8436975479125977, "logps/chosen": -262.58203125, "logps/rejected": -248.729248046875, "loss": 0.5235, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.3129239082336426, "rewards/margins": 1.4233911037445068, "rewards/rejected": -3.7363152503967285, "step": 3370 }, { "epoch": 0.66, "learning_rate": 4.340619831739412e-07, "logits/chosen": -2.8676393032073975, "logits/rejected": -2.7913284301757812, "logps/chosen": -217.6074676513672, "logps/rejected": -229.5713653564453, "loss": 0.7444, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -3.564275026321411, "rewards/margins": 1.6173843145370483, "rewards/rejected": -5.18165922164917, "step": 3380 }, { "epoch": 0.66, "learning_rate": 4.337024520025886e-07, "logits/chosen": -2.7991833686828613, "logits/rejected": -2.822204351425171, "logps/chosen": -246.2049560546875, "logps/rejected": -234.36013793945312, "loss": 0.5077, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -3.762002468109131, "rewards/margins": 3.062960147857666, "rewards/rejected": -6.824962615966797, "step": 3390 }, { "epoch": 0.66, "learning_rate": 4.3334292083123604e-07, "logits/chosen": -2.604262590408325, "logits/rejected": -2.6684818267822266, "logps/chosen": -227.061767578125, "logps/rejected": -227.0947723388672, "loss": 0.5974, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -3.399252414703369, "rewards/margins": 0.9407544136047363, "rewards/rejected": -4.340006351470947, "step": 3400 }, { "epoch": 0.66, "eval_logits/chosen": -2.640723466873169, "eval_logits/rejected": -2.638162136077881, "eval_logps/chosen": -237.52603149414062, "eval_logps/rejected": -240.6396484375, "eval_loss": 0.5700723528862, "eval_rewards/accuracies": 0.6974999904632568, "eval_rewards/chosen": -4.328795909881592, "eval_rewards/margins": 2.543574571609497, "eval_rewards/rejected": -6.872370719909668, "eval_runtime": 139.5192, "eval_samples_per_second": 22.621, "eval_steps_per_second": 0.358, "step": 3400 }, { "epoch": 0.66, "learning_rate": 4.329833896598835e-07, "logits/chosen": -2.6771297454833984, "logits/rejected": -2.7599921226501465, "logps/chosen": -299.83087158203125, "logps/rejected": -277.57061767578125, "loss": 0.8204, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.88619065284729, "rewards/margins": 1.7270328998565674, "rewards/rejected": -4.613223075866699, "step": 3410 }, { "epoch": 0.66, "learning_rate": 4.3262385848853095e-07, "logits/chosen": -2.709689140319824, "logits/rejected": -2.6539626121520996, "logps/chosen": -270.373046875, "logps/rejected": -239.0745086669922, "loss": 0.6056, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.7626500129699707, "rewards/margins": 2.2940726280212402, "rewards/rejected": -5.056723117828369, "step": 3420 }, { "epoch": 0.67, "learning_rate": 4.322643273171784e-07, "logits/chosen": -2.7730724811553955, "logits/rejected": -2.7567896842956543, "logps/chosen": -243.9121551513672, "logps/rejected": -292.22088623046875, "loss": 0.5059, "rewards/accuracies": 0.3499999940395355, "rewards/chosen": -4.396458625793457, "rewards/margins": 1.893843412399292, "rewards/rejected": -6.290301322937012, "step": 3430 }, { "epoch": 0.67, "learning_rate": 4.319047961458258e-07, "logits/chosen": -2.739290952682495, "logits/rejected": -2.7570600509643555, "logps/chosen": -301.94866943359375, "logps/rejected": -290.7872314453125, "loss": 0.5992, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -5.739505290985107, "rewards/margins": 2.964844226837158, "rewards/rejected": -8.704350471496582, "step": 3440 }, { "epoch": 0.67, "learning_rate": 4.3154526497447323e-07, "logits/chosen": -2.8693032264709473, "logits/rejected": -2.6837515830993652, "logps/chosen": -338.7242736816406, "logps/rejected": -213.7662811279297, "loss": 0.6146, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -3.858262300491333, "rewards/margins": 1.727285623550415, "rewards/rejected": -5.58554744720459, "step": 3450 }, { "epoch": 0.67, "learning_rate": 4.311857338031207e-07, "logits/chosen": -2.767155647277832, "logits/rejected": -2.7514164447784424, "logps/chosen": -261.1998291015625, "logps/rejected": -338.3866882324219, "loss": 0.4737, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -4.036892890930176, "rewards/margins": 1.918280839920044, "rewards/rejected": -5.955173492431641, "step": 3460 }, { "epoch": 0.67, "learning_rate": 4.3082620263176814e-07, "logits/chosen": -2.621947765350342, "logits/rejected": -2.5978214740753174, "logps/chosen": -309.0357971191406, "logps/rejected": -294.66632080078125, "loss": 0.5747, "rewards/accuracies": 0.75, "rewards/chosen": -3.7080745697021484, "rewards/margins": 0.551271378993988, "rewards/rejected": -4.259346008300781, "step": 3470 }, { "epoch": 0.68, "learning_rate": 4.304666714604156e-07, "logits/chosen": -2.422496795654297, "logits/rejected": -2.31830096244812, "logps/chosen": -285.1286315917969, "logps/rejected": -286.30584716796875, "loss": 0.4846, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -3.4158012866973877, "rewards/margins": 5.011656761169434, "rewards/rejected": -8.427458763122559, "step": 3480 }, { "epoch": 0.68, "learning_rate": 4.3010714028906305e-07, "logits/chosen": -2.6212034225463867, "logits/rejected": -2.631873846054077, "logps/chosen": -240.05062866210938, "logps/rejected": -281.8507080078125, "loss": 0.5102, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -3.7735342979431152, "rewards/margins": 3.5039703845977783, "rewards/rejected": -7.277504920959473, "step": 3490 }, { "epoch": 0.68, "learning_rate": 4.297476091177105e-07, "logits/chosen": -2.7259914875030518, "logits/rejected": -2.664835214614868, "logps/chosen": -263.580078125, "logps/rejected": -235.29190063476562, "loss": 0.4836, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -4.454415321350098, "rewards/margins": 3.3928513526916504, "rewards/rejected": -7.84726619720459, "step": 3500 }, { "epoch": 0.68, "eval_logits/chosen": -2.5692861080169678, "eval_logits/rejected": -2.563093662261963, "eval_logps/chosen": -249.60523986816406, "eval_logps/rejected": -257.0225830078125, "eval_loss": 0.5171152949333191, "eval_rewards/accuracies": 0.7099999785423279, "eval_rewards/chosen": -5.536716938018799, "eval_rewards/margins": 2.9739480018615723, "eval_rewards/rejected": -8.510666847229004, "eval_runtime": 141.1265, "eval_samples_per_second": 22.363, "eval_steps_per_second": 0.354, "step": 3500 }, { "epoch": 0.68, "learning_rate": 4.2938807794635796e-07, "logits/chosen": -2.737623929977417, "logits/rejected": -2.6793720722198486, "logps/chosen": -238.3832244873047, "logps/rejected": -244.93896484375, "loss": 0.7299, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -3.398845672607422, "rewards/margins": 2.3521039485931396, "rewards/rejected": -5.750948905944824, "step": 3510 }, { "epoch": 0.68, "learning_rate": 4.290285467750054e-07, "logits/chosen": -2.508549690246582, "logits/rejected": -2.5834248065948486, "logps/chosen": -190.38037109375, "logps/rejected": -196.329345703125, "loss": 0.471, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -5.651684761047363, "rewards/margins": 1.127396821975708, "rewards/rejected": -6.77908182144165, "step": 3520 }, { "epoch": 0.69, "learning_rate": 4.286690156036528e-07, "logits/chosen": -2.5850329399108887, "logits/rejected": -2.5376124382019043, "logps/chosen": -266.3369140625, "logps/rejected": -243.0791473388672, "loss": 0.5134, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -4.392764091491699, "rewards/margins": 2.744372844696045, "rewards/rejected": -7.137136936187744, "step": 3530 }, { "epoch": 0.69, "learning_rate": 4.2830948443230024e-07, "logits/chosen": -2.67210054397583, "logits/rejected": -2.6390540599823, "logps/chosen": -220.0902862548828, "logps/rejected": -242.9448699951172, "loss": 0.4862, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -5.911955833435059, "rewards/margins": 1.756532073020935, "rewards/rejected": -7.668488502502441, "step": 3540 }, { "epoch": 0.69, "learning_rate": 4.2794995326094767e-07, "logits/chosen": -2.703794002532959, "logits/rejected": -2.648158311843872, "logps/chosen": -278.8760986328125, "logps/rejected": -257.67010498046875, "loss": 0.5159, "rewards/accuracies": 0.5, "rewards/chosen": -5.091516971588135, "rewards/margins": 1.6587636470794678, "rewards/rejected": -6.750279903411865, "step": 3550 }, { "epoch": 0.69, "learning_rate": 4.2759042208959515e-07, "logits/chosen": -2.6188552379608154, "logits/rejected": -2.64038348197937, "logps/chosen": -290.19842529296875, "logps/rejected": -269.04217529296875, "loss": 0.5042, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -2.3531312942504883, "rewards/margins": 0.8913325071334839, "rewards/rejected": -3.2444636821746826, "step": 3560 }, { "epoch": 0.69, "learning_rate": 4.272308909182426e-07, "logits/chosen": -2.6543381214141846, "logits/rejected": -2.639191150665283, "logps/chosen": -240.03231811523438, "logps/rejected": -231.0758819580078, "loss": 0.462, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -3.280223846435547, "rewards/margins": 2.169583559036255, "rewards/rejected": -5.449807643890381, "step": 3570 }, { "epoch": 0.7, "learning_rate": 4.2687135974689006e-07, "logits/chosen": -2.6419341564178467, "logits/rejected": -2.5609917640686035, "logps/chosen": -237.6129150390625, "logps/rejected": -345.57720947265625, "loss": 0.596, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.28879714012146, "rewards/margins": 1.1028800010681152, "rewards/rejected": -3.391677141189575, "step": 3580 }, { "epoch": 0.7, "learning_rate": 4.265118285755375e-07, "logits/chosen": -2.7299187183380127, "logits/rejected": -2.736818552017212, "logps/chosen": -177.31857299804688, "logps/rejected": -240.1614532470703, "loss": 0.4835, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -3.7482101917266846, "rewards/margins": 1.666076421737671, "rewards/rejected": -5.4142866134643555, "step": 3590 }, { "epoch": 0.7, "learning_rate": 4.261522974041849e-07, "logits/chosen": -2.653881549835205, "logits/rejected": -2.653212308883667, "logps/chosen": -320.09625244140625, "logps/rejected": -292.1993408203125, "loss": 0.6342, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -3.1678853034973145, "rewards/margins": 2.4087586402893066, "rewards/rejected": -5.576643943786621, "step": 3600 }, { "epoch": 0.7, "eval_logits/chosen": -2.596027135848999, "eval_logits/rejected": -2.5903661251068115, "eval_logps/chosen": -241.9811553955078, "eval_logps/rejected": -249.30532836914062, "eval_loss": 0.5059713125228882, "eval_rewards/accuracies": 0.7124999761581421, "eval_rewards/chosen": -4.774308681488037, "eval_rewards/margins": 2.9646286964416504, "eval_rewards/rejected": -7.738936901092529, "eval_runtime": 139.8491, "eval_samples_per_second": 22.567, "eval_steps_per_second": 0.358, "step": 3600 }, { "epoch": 0.7, "learning_rate": 4.257927662328324e-07, "logits/chosen": -2.674074649810791, "logits/rejected": -2.708157777786255, "logps/chosen": -262.8844299316406, "logps/rejected": -241.1731414794922, "loss": 0.4843, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.978820562362671, "rewards/margins": 1.0564634799957275, "rewards/rejected": -3.0352840423583984, "step": 3610 }, { "epoch": 0.7, "learning_rate": 4.254332350614798e-07, "logits/chosen": -2.7725424766540527, "logits/rejected": -2.7608630657196045, "logps/chosen": -287.75006103515625, "logps/rejected": -240.6949005126953, "loss": 0.53, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -3.6862740516662598, "rewards/margins": 1.710336685180664, "rewards/rejected": -5.396610736846924, "step": 3620 }, { "epoch": 0.7, "learning_rate": 4.2507370389012725e-07, "logits/chosen": -2.5927350521087646, "logits/rejected": -2.6337664127349854, "logps/chosen": -232.65597534179688, "logps/rejected": -211.85104370117188, "loss": 0.5745, "rewards/accuracies": 0.5, "rewards/chosen": -3.9736328125, "rewards/margins": 1.2653578519821167, "rewards/rejected": -5.238990306854248, "step": 3630 }, { "epoch": 0.71, "learning_rate": 4.247141727187747e-07, "logits/chosen": -2.7251429557800293, "logits/rejected": -2.6122653484344482, "logps/chosen": -270.0220947265625, "logps/rejected": -203.34213256835938, "loss": 0.5236, "rewards/accuracies": 0.75, "rewards/chosen": -5.7760396003723145, "rewards/margins": 1.1251728534698486, "rewards/rejected": -6.901212215423584, "step": 3640 }, { "epoch": 0.71, "learning_rate": 4.243546415474221e-07, "logits/chosen": -2.7624127864837646, "logits/rejected": -2.6737847328186035, "logps/chosen": -296.1536865234375, "logps/rejected": -256.7353515625, "loss": 0.566, "rewards/accuracies": 0.75, "rewards/chosen": -3.056868076324463, "rewards/margins": 1.9823532104492188, "rewards/rejected": -5.039221286773682, "step": 3650 }, { "epoch": 0.71, "learning_rate": 4.239951103760696e-07, "logits/chosen": -2.7661476135253906, "logits/rejected": -2.7064614295959473, "logps/chosen": -230.06900024414062, "logps/rejected": -245.69528198242188, "loss": 0.6072, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -4.373167037963867, "rewards/margins": 3.0974011421203613, "rewards/rejected": -7.4705681800842285, "step": 3660 }, { "epoch": 0.71, "learning_rate": 4.2363557920471707e-07, "logits/chosen": -2.7427279949188232, "logits/rejected": -2.8481740951538086, "logps/chosen": -217.91909790039062, "logps/rejected": -234.4322052001953, "loss": 0.5565, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -3.3290581703186035, "rewards/margins": 2.53106951713562, "rewards/rejected": -5.860127925872803, "step": 3670 }, { "epoch": 0.71, "learning_rate": 4.232760480333645e-07, "logits/chosen": -2.7182562351226807, "logits/rejected": -2.6017279624938965, "logps/chosen": -206.9663543701172, "logps/rejected": -209.35269165039062, "loss": 0.4904, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.4840569496154785, "rewards/margins": 1.010908842086792, "rewards/rejected": -3.4949657917022705, "step": 3680 }, { "epoch": 0.72, "learning_rate": 4.229165168620119e-07, "logits/chosen": -2.6685986518859863, "logits/rejected": -2.644609212875366, "logps/chosen": -251.0902862548828, "logps/rejected": -220.2162322998047, "loss": 0.6079, "rewards/accuracies": 0.75, "rewards/chosen": -2.8104891777038574, "rewards/margins": 3.4027793407440186, "rewards/rejected": -6.213269233703613, "step": 3690 }, { "epoch": 0.72, "learning_rate": 4.2255698569065935e-07, "logits/chosen": -2.7443270683288574, "logits/rejected": -2.808504104614258, "logps/chosen": -247.8759765625, "logps/rejected": -227.86941528320312, "loss": 0.5143, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -4.108855247497559, "rewards/margins": 2.29363751411438, "rewards/rejected": -6.402493476867676, "step": 3700 }, { "epoch": 0.72, "eval_logits/chosen": -2.6517856121063232, "eval_logits/rejected": -2.6496589183807373, "eval_logps/chosen": -226.3973388671875, "eval_logps/rejected": -230.3889617919922, "eval_loss": 0.483525812625885, "eval_rewards/accuracies": 0.699999988079071, "eval_rewards/chosen": -3.215925931930542, "eval_rewards/margins": 2.631373882293701, "eval_rewards/rejected": -5.847299575805664, "eval_runtime": 141.5761, "eval_samples_per_second": 22.292, "eval_steps_per_second": 0.353, "step": 3700 }, { "epoch": 0.72, "learning_rate": 4.2219745451930683e-07, "logits/chosen": -2.857578992843628, "logits/rejected": -2.903554677963257, "logps/chosen": -228.97799682617188, "logps/rejected": -262.11004638671875, "loss": 0.5322, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -4.296816349029541, "rewards/margins": 2.0335192680358887, "rewards/rejected": -6.3303351402282715, "step": 3710 }, { "epoch": 0.72, "learning_rate": 4.2183792334795426e-07, "logits/chosen": -2.7694320678710938, "logits/rejected": -2.8727636337280273, "logps/chosen": -233.75820922851562, "logps/rejected": -247.53536987304688, "loss": 0.4928, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.6068358421325684, "rewards/margins": 1.658368706703186, "rewards/rejected": -4.265204906463623, "step": 3720 }, { "epoch": 0.72, "learning_rate": 4.214783921766017e-07, "logits/chosen": -2.8039932250976562, "logits/rejected": -2.767218828201294, "logps/chosen": -278.52325439453125, "logps/rejected": -220.7368621826172, "loss": 0.4337, "rewards/accuracies": 0.75, "rewards/chosen": -2.668403148651123, "rewards/margins": 2.4974465370178223, "rewards/rejected": -5.165849685668945, "step": 3730 }, { "epoch": 0.73, "learning_rate": 4.211188610052491e-07, "logits/chosen": -2.956364393234253, "logits/rejected": -2.916250705718994, "logps/chosen": -281.52325439453125, "logps/rejected": -325.3456115722656, "loss": 0.5699, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.436584234237671, "rewards/margins": 1.5769962072372437, "rewards/rejected": -4.013580322265625, "step": 3740 }, { "epoch": 0.73, "learning_rate": 4.2075932983389654e-07, "logits/chosen": -2.8661975860595703, "logits/rejected": -2.788851261138916, "logps/chosen": -298.71319580078125, "logps/rejected": -246.48599243164062, "loss": 0.4317, "rewards/accuracies": 0.75, "rewards/chosen": -2.9997756481170654, "rewards/margins": 2.1456754207611084, "rewards/rejected": -5.145451545715332, "step": 3750 }, { "epoch": 0.73, "learning_rate": 4.20399798662544e-07, "logits/chosen": -2.8595833778381348, "logits/rejected": -2.888211727142334, "logps/chosen": -279.5419921875, "logps/rejected": -271.6037902832031, "loss": 0.5257, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -3.302333354949951, "rewards/margins": 1.0954910516738892, "rewards/rejected": -4.397824287414551, "step": 3760 }, { "epoch": 0.73, "learning_rate": 4.200402674911915e-07, "logits/chosen": -2.9476447105407715, "logits/rejected": -2.9642484188079834, "logps/chosen": -287.8567810058594, "logps/rejected": -283.38482666015625, "loss": 0.5795, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.0977120399475098, "rewards/margins": 1.7883121967315674, "rewards/rejected": -3.886023998260498, "step": 3770 }, { "epoch": 0.73, "learning_rate": 4.1968073631983893e-07, "logits/chosen": -2.6992695331573486, "logits/rejected": -2.7363626956939697, "logps/chosen": -258.24932861328125, "logps/rejected": -219.3705596923828, "loss": 0.5153, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -3.7454285621643066, "rewards/margins": 1.2108144760131836, "rewards/rejected": -4.956243515014648, "step": 3780 }, { "epoch": 0.74, "learning_rate": 4.1932120514848636e-07, "logits/chosen": -2.751964807510376, "logits/rejected": -2.729923725128174, "logps/chosen": -159.1203155517578, "logps/rejected": -277.16534423828125, "loss": 0.5861, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -4.315541744232178, "rewards/margins": 4.209917068481445, "rewards/rejected": -8.525461196899414, "step": 3790 }, { "epoch": 0.74, "learning_rate": 4.189616739771338e-07, "logits/chosen": -2.803990125656128, "logits/rejected": -2.8793039321899414, "logps/chosen": -198.7175750732422, "logps/rejected": -244.63943481445312, "loss": 0.5471, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.9009945392608643, "rewards/margins": 2.1119015216827393, "rewards/rejected": -5.012895584106445, "step": 3800 }, { "epoch": 0.74, "eval_logits/chosen": -2.7517590522766113, "eval_logits/rejected": -2.7507576942443848, "eval_logps/chosen": -236.92933654785156, "eval_logps/rejected": -242.65428161621094, "eval_loss": 0.5059856176376343, "eval_rewards/accuracies": 0.6924999952316284, "eval_rewards/chosen": -4.2691264152526855, "eval_rewards/margins": 2.80470609664917, "eval_rewards/rejected": -7.0738325119018555, "eval_runtime": 140.944, "eval_samples_per_second": 22.392, "eval_steps_per_second": 0.355, "step": 3800 }, { "epoch": 0.74, "learning_rate": 4.1860214280578127e-07, "logits/chosen": -2.780768871307373, "logits/rejected": -2.80261492729187, "logps/chosen": -264.9028625488281, "logps/rejected": -270.338623046875, "loss": 0.4883, "rewards/accuracies": 0.75, "rewards/chosen": -3.935929536819458, "rewards/margins": 4.281942844390869, "rewards/rejected": -8.217870712280273, "step": 3810 }, { "epoch": 0.74, "learning_rate": 4.182426116344287e-07, "logits/chosen": -2.8779873847961426, "logits/rejected": -2.8326969146728516, "logps/chosen": -210.17507934570312, "logps/rejected": -217.43331909179688, "loss": 0.5727, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -5.283583641052246, "rewards/margins": 0.8070454597473145, "rewards/rejected": -6.0906291007995605, "step": 3820 }, { "epoch": 0.74, "learning_rate": 4.178830804630761e-07, "logits/chosen": -2.61614727973938, "logits/rejected": -2.53159499168396, "logps/chosen": -311.3333740234375, "logps/rejected": -306.41455078125, "loss": 0.4351, "rewards/accuracies": 0.75, "rewards/chosen": -2.734816789627075, "rewards/margins": 3.9072043895721436, "rewards/rejected": -6.642021179199219, "step": 3830 }, { "epoch": 0.75, "learning_rate": 4.1752354929172355e-07, "logits/chosen": -2.7780563831329346, "logits/rejected": -2.859438419342041, "logps/chosen": -243.5393524169922, "logps/rejected": -316.92047119140625, "loss": 0.5028, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -4.958086013793945, "rewards/margins": 3.1509499549865723, "rewards/rejected": -8.109037399291992, "step": 3840 }, { "epoch": 0.75, "learning_rate": 4.17164018120371e-07, "logits/chosen": -2.66560697555542, "logits/rejected": -2.6807377338409424, "logps/chosen": -221.40267944335938, "logps/rejected": -293.7818298339844, "loss": 0.6466, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -3.929117202758789, "rewards/margins": 3.0511293411254883, "rewards/rejected": -6.980246067047119, "step": 3850 }, { "epoch": 0.75, "learning_rate": 4.1680448694901846e-07, "logits/chosen": -2.6415555477142334, "logits/rejected": -2.7434072494506836, "logps/chosen": -256.9652099609375, "logps/rejected": -278.50341796875, "loss": 0.5411, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.1472325325012207, "rewards/margins": 3.2073540687561035, "rewards/rejected": -5.354586601257324, "step": 3860 }, { "epoch": 0.75, "learning_rate": 4.1644495577766594e-07, "logits/chosen": -2.9617741107940674, "logits/rejected": -2.9699528217315674, "logps/chosen": -283.10894775390625, "logps/rejected": -255.62863159179688, "loss": 0.5307, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -3.438523769378662, "rewards/margins": 0.3908306956291199, "rewards/rejected": -3.829354763031006, "step": 3870 }, { "epoch": 0.75, "learning_rate": 4.1608542460631337e-07, "logits/chosen": -2.7873966693878174, "logits/rejected": -2.8923511505126953, "logps/chosen": -238.02096557617188, "logps/rejected": -347.6580505371094, "loss": 0.524, "rewards/accuracies": 0.75, "rewards/chosen": -1.9851253032684326, "rewards/margins": 1.3712899684906006, "rewards/rejected": -3.356415271759033, "step": 3880 }, { "epoch": 0.76, "learning_rate": 4.157258934349608e-07, "logits/chosen": -2.694671630859375, "logits/rejected": -2.7630417346954346, "logps/chosen": -121.61210632324219, "logps/rejected": -173.63577270507812, "loss": 0.5658, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -3.5720744132995605, "rewards/margins": 3.7524161338806152, "rewards/rejected": -7.324490547180176, "step": 3890 }, { "epoch": 0.76, "learning_rate": 4.153663622636082e-07, "logits/chosen": -2.8237593173980713, "logits/rejected": -2.826791286468506, "logps/chosen": -256.9156799316406, "logps/rejected": -270.22845458984375, "loss": 0.4817, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -3.2757785320281982, "rewards/margins": 4.210750579833984, "rewards/rejected": -7.486529350280762, "step": 3900 }, { "epoch": 0.76, "eval_logits/chosen": -2.6443426609039307, "eval_logits/rejected": -2.6394639015197754, "eval_logps/chosen": -238.49986267089844, "eval_logps/rejected": -244.88394165039062, "eval_loss": 0.529410183429718, "eval_rewards/accuracies": 0.6974999904632568, "eval_rewards/chosen": -4.426179885864258, "eval_rewards/margins": 2.8706185817718506, "eval_rewards/rejected": -7.2967987060546875, "eval_runtime": 140.6835, "eval_samples_per_second": 22.433, "eval_steps_per_second": 0.355, "step": 3900 }, { "epoch": 0.76, "learning_rate": 4.150068310922557e-07, "logits/chosen": -2.673715114593506, "logits/rejected": -2.738027334213257, "logps/chosen": -203.73446655273438, "logps/rejected": -292.6114501953125, "loss": 0.8045, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -4.32696533203125, "rewards/margins": 3.306959629058838, "rewards/rejected": -7.633924961090088, "step": 3910 }, { "epoch": 0.76, "learning_rate": 4.1464729992090313e-07, "logits/chosen": -2.661303997039795, "logits/rejected": -2.6372158527374268, "logps/chosen": -257.3746337890625, "logps/rejected": -233.3134002685547, "loss": 0.5652, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -3.130716323852539, "rewards/margins": 1.7002136707305908, "rewards/rejected": -4.830929756164551, "step": 3920 }, { "epoch": 0.76, "learning_rate": 4.1428776874955056e-07, "logits/chosen": -2.7555060386657715, "logits/rejected": -2.7300703525543213, "logps/chosen": -184.47970581054688, "logps/rejected": -221.07754516601562, "loss": 0.6103, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.9298088550567627, "rewards/margins": 2.627885341644287, "rewards/rejected": -5.557694435119629, "step": 3930 }, { "epoch": 0.76, "learning_rate": 4.13928237578198e-07, "logits/chosen": -2.7176475524902344, "logits/rejected": -2.719181537628174, "logps/chosen": -233.90646362304688, "logps/rejected": -222.2284698486328, "loss": 0.4804, "rewards/accuracies": 0.75, "rewards/chosen": -3.0175254344940186, "rewards/margins": 2.646049737930298, "rewards/rejected": -5.663575172424316, "step": 3940 }, { "epoch": 0.77, "learning_rate": 4.135687064068454e-07, "logits/chosen": -2.4261183738708496, "logits/rejected": -2.5859925746917725, "logps/chosen": -261.8021240234375, "logps/rejected": -308.89837646484375, "loss": 0.591, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.808897614479065, "rewards/margins": 2.689765691757202, "rewards/rejected": -4.498663425445557, "step": 3950 }, { "epoch": 0.77, "learning_rate": 4.132091752354929e-07, "logits/chosen": -2.450559139251709, "logits/rejected": -2.55533504486084, "logps/chosen": -239.26602172851562, "logps/rejected": -274.4757995605469, "loss": 0.5925, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -3.073310375213623, "rewards/margins": 3.1609668731689453, "rewards/rejected": -6.234277248382568, "step": 3960 }, { "epoch": 0.77, "learning_rate": 4.128496440641404e-07, "logits/chosen": -2.6075682640075684, "logits/rejected": -2.7368927001953125, "logps/chosen": -317.2792053222656, "logps/rejected": -312.6597900390625, "loss": 0.3885, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.5682709217071533, "rewards/margins": 2.961430072784424, "rewards/rejected": -5.529700756072998, "step": 3970 }, { "epoch": 0.77, "learning_rate": 4.124901128927878e-07, "logits/chosen": -2.7510499954223633, "logits/rejected": -2.742461681365967, "logps/chosen": -364.917724609375, "logps/rejected": -312.2942199707031, "loss": 0.4847, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -3.139216661453247, "rewards/margins": 3.680332899093628, "rewards/rejected": -6.819549560546875, "step": 3980 }, { "epoch": 0.77, "learning_rate": 4.1213058172143523e-07, "logits/chosen": -2.6703734397888184, "logits/rejected": -2.690394639968872, "logps/chosen": -292.0287170410156, "logps/rejected": -308.9948425292969, "loss": 0.6375, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -5.002602577209473, "rewards/margins": 2.602440118789673, "rewards/rejected": -7.605042457580566, "step": 3990 }, { "epoch": 0.78, "learning_rate": 4.1177105055008266e-07, "logits/chosen": -2.6643238067626953, "logits/rejected": -2.732318878173828, "logps/chosen": -183.9958038330078, "logps/rejected": -279.9193115234375, "loss": 0.4616, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -4.369785308837891, "rewards/margins": 3.856417417526245, "rewards/rejected": -8.226203918457031, "step": 4000 }, { "epoch": 0.78, "eval_logits/chosen": -2.611358642578125, "eval_logits/rejected": -2.605623245239258, "eval_logps/chosen": -239.37242126464844, "eval_logps/rejected": -248.78367614746094, "eval_loss": 0.501867949962616, "eval_rewards/accuracies": 0.7049999833106995, "eval_rewards/chosen": -4.513437747955322, "eval_rewards/margins": 3.1733336448669434, "eval_rewards/rejected": -7.686771869659424, "eval_runtime": 140.6175, "eval_samples_per_second": 22.444, "eval_steps_per_second": 0.356, "step": 4000 }, { "epoch": 0.78, "learning_rate": 4.1141151937873014e-07, "logits/chosen": -2.7722015380859375, "logits/rejected": -2.5845284461975098, "logps/chosen": -261.0437927246094, "logps/rejected": -201.52496337890625, "loss": 0.4222, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -4.034439563751221, "rewards/margins": 1.531874418258667, "rewards/rejected": -5.566313743591309, "step": 4010 }, { "epoch": 0.78, "learning_rate": 4.1105198820737757e-07, "logits/chosen": -2.679579257965088, "logits/rejected": -2.6912286281585693, "logps/chosen": -177.82229614257812, "logps/rejected": -212.9039764404297, "loss": 0.4357, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -3.626319408416748, "rewards/margins": 2.4286320209503174, "rewards/rejected": -6.0549516677856445, "step": 4020 }, { "epoch": 0.78, "learning_rate": 4.10692457036025e-07, "logits/chosen": -2.7863266468048096, "logits/rejected": -2.7110161781311035, "logps/chosen": -206.7767333984375, "logps/rejected": -229.57070922851562, "loss": 0.4667, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -3.5775718688964844, "rewards/margins": 3.1784205436706543, "rewards/rejected": -6.755992889404297, "step": 4030 }, { "epoch": 0.78, "learning_rate": 4.1033292586467243e-07, "logits/chosen": -2.575326919555664, "logits/rejected": -2.6323258876800537, "logps/chosen": -232.1804656982422, "logps/rejected": -306.1264953613281, "loss": 0.6106, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -5.040480613708496, "rewards/margins": 3.736806869506836, "rewards/rejected": -8.777287483215332, "step": 4040 }, { "epoch": 0.79, "learning_rate": 4.0997339469331985e-07, "logits/chosen": -2.7365784645080566, "logits/rejected": -2.7570254802703857, "logps/chosen": -198.88853454589844, "logps/rejected": -304.9981994628906, "loss": 0.4671, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -3.8167319297790527, "rewards/margins": 2.4269371032714844, "rewards/rejected": -6.2436699867248535, "step": 4050 }, { "epoch": 0.79, "learning_rate": 4.096138635219674e-07, "logits/chosen": -2.8507704734802246, "logits/rejected": -2.8511385917663574, "logps/chosen": -299.0123596191406, "logps/rejected": -364.85321044921875, "loss": 0.4188, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -4.197487831115723, "rewards/margins": 2.4825453758239746, "rewards/rejected": -6.680032253265381, "step": 4060 }, { "epoch": 0.79, "learning_rate": 4.092543323506148e-07, "logits/chosen": -2.709254503250122, "logits/rejected": -2.6763699054718018, "logps/chosen": -238.7041473388672, "logps/rejected": -234.9014892578125, "loss": 0.4831, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -4.505122184753418, "rewards/margins": 1.6862595081329346, "rewards/rejected": -6.191380977630615, "step": 4070 }, { "epoch": 0.79, "learning_rate": 4.0889480117926224e-07, "logits/chosen": -2.572201728820801, "logits/rejected": -2.569436550140381, "logps/chosen": -255.99551391601562, "logps/rejected": -219.61172485351562, "loss": 0.6034, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -4.145529747009277, "rewards/margins": 2.2147066593170166, "rewards/rejected": -6.360236167907715, "step": 4080 }, { "epoch": 0.79, "learning_rate": 4.0853527000790967e-07, "logits/chosen": -2.7863736152648926, "logits/rejected": -2.7736449241638184, "logps/chosen": -249.95947265625, "logps/rejected": -232.7467803955078, "loss": 0.4329, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.905479907989502, "rewards/margins": 1.3330347537994385, "rewards/rejected": -4.238514423370361, "step": 4090 }, { "epoch": 0.8, "learning_rate": 4.081757388365571e-07, "logits/chosen": -2.7727138996124268, "logits/rejected": -2.7523348331451416, "logps/chosen": -290.93927001953125, "logps/rejected": -312.92694091796875, "loss": 0.5042, "rewards/accuracies": 0.75, "rewards/chosen": -3.644190549850464, "rewards/margins": 3.644242763519287, "rewards/rejected": -7.2884345054626465, "step": 4100 }, { "epoch": 0.8, "eval_logits/chosen": -2.5811851024627686, "eval_logits/rejected": -2.568887710571289, "eval_logps/chosen": -236.53573608398438, "eval_logps/rejected": -244.0291748046875, "eval_loss": 0.5084269642829895, "eval_rewards/accuracies": 0.6974999904632568, "eval_rewards/chosen": -4.229771137237549, "eval_rewards/margins": 2.981550931930542, "eval_rewards/rejected": -7.211321830749512, "eval_runtime": 141.2083, "eval_samples_per_second": 22.35, "eval_steps_per_second": 0.354, "step": 4100 }, { "epoch": 0.8, "learning_rate": 4.078162076652046e-07, "logits/chosen": -2.714445114135742, "logits/rejected": -2.7384421825408936, "logps/chosen": -236.87255859375, "logps/rejected": -246.306640625, "loss": 0.5583, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.930661678314209, "rewards/margins": 2.5478663444519043, "rewards/rejected": -5.478527545928955, "step": 4110 }, { "epoch": 0.8, "learning_rate": 4.07456676493852e-07, "logits/chosen": -2.539716958999634, "logits/rejected": -2.547043561935425, "logps/chosen": -247.4441680908203, "logps/rejected": -227.5391082763672, "loss": 0.5371, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -4.151979446411133, "rewards/margins": 2.5288708209991455, "rewards/rejected": -6.680850028991699, "step": 4120 }, { "epoch": 0.8, "learning_rate": 4.0709714532249944e-07, "logits/chosen": -2.676037311553955, "logits/rejected": -2.602576494216919, "logps/chosen": -270.4694519042969, "logps/rejected": -240.75308227539062, "loss": 0.4446, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -5.0017523765563965, "rewards/margins": 2.5888845920562744, "rewards/rejected": -7.590636253356934, "step": 4130 }, { "epoch": 0.8, "learning_rate": 4.0673761415114686e-07, "logits/chosen": -2.719240665435791, "logits/rejected": -2.7139101028442383, "logps/chosen": -270.01556396484375, "logps/rejected": -280.0762939453125, "loss": 0.4548, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -4.841872215270996, "rewards/margins": 3.2279715538024902, "rewards/rejected": -8.069845199584961, "step": 4140 }, { "epoch": 0.81, "learning_rate": 4.063780829797943e-07, "logits/chosen": -2.706204891204834, "logits/rejected": -2.7004897594451904, "logps/chosen": -243.80465698242188, "logps/rejected": -284.74468994140625, "loss": 0.6013, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -3.9999115467071533, "rewards/margins": 2.851935863494873, "rewards/rejected": -6.851847171783447, "step": 4150 }, { "epoch": 0.81, "learning_rate": 4.060185518084418e-07, "logits/chosen": -2.638167142868042, "logits/rejected": -2.601055383682251, "logps/chosen": -239.7976531982422, "logps/rejected": -308.3616638183594, "loss": 0.5205, "rewards/accuracies": 0.75, "rewards/chosen": -4.221074104309082, "rewards/margins": 4.369997978210449, "rewards/rejected": -8.591072082519531, "step": 4160 }, { "epoch": 0.81, "learning_rate": 4.0565902063708925e-07, "logits/chosen": -2.630215644836426, "logits/rejected": -2.6398167610168457, "logps/chosen": -235.03231811523438, "logps/rejected": -266.66522216796875, "loss": 0.5691, "rewards/accuracies": 0.75, "rewards/chosen": -2.963937282562256, "rewards/margins": 2.846355438232422, "rewards/rejected": -5.8102922439575195, "step": 4170 }, { "epoch": 0.81, "learning_rate": 4.052994894657367e-07, "logits/chosen": -2.666567087173462, "logits/rejected": -2.657777786254883, "logps/chosen": -228.3162841796875, "logps/rejected": -261.142822265625, "loss": 0.4964, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -3.731809139251709, "rewards/margins": 3.2146503925323486, "rewards/rejected": -6.946459770202637, "step": 4180 }, { "epoch": 0.81, "learning_rate": 4.049399582943841e-07, "logits/chosen": -2.7319045066833496, "logits/rejected": -2.7684199810028076, "logps/chosen": -270.67095947265625, "logps/rejected": -268.091064453125, "loss": 0.4525, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -3.5088818073272705, "rewards/margins": 2.3630523681640625, "rewards/rejected": -5.871934413909912, "step": 4190 }, { "epoch": 0.82, "learning_rate": 4.0458042712303154e-07, "logits/chosen": -2.6466877460479736, "logits/rejected": -2.584412097930908, "logps/chosen": -259.6782531738281, "logps/rejected": -246.9607391357422, "loss": 0.5486, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -3.682548999786377, "rewards/margins": 3.6152548789978027, "rewards/rejected": -7.2978034019470215, "step": 4200 }, { "epoch": 0.82, "eval_logits/chosen": -2.612272262573242, "eval_logits/rejected": -2.6021535396575928, "eval_logps/chosen": -244.8979034423828, "eval_logps/rejected": -253.74057006835938, "eval_loss": 0.50364089012146, "eval_rewards/accuracies": 0.7024999856948853, "eval_rewards/chosen": -5.065984725952148, "eval_rewards/margins": 3.1164772510528564, "eval_rewards/rejected": -8.182461738586426, "eval_runtime": 140.589, "eval_samples_per_second": 22.448, "eval_steps_per_second": 0.356, "step": 4200 }, { "epoch": 0.82, "learning_rate": 4.04220895951679e-07, "logits/chosen": -2.439812183380127, "logits/rejected": -2.43867826461792, "logps/chosen": -285.74713134765625, "logps/rejected": -331.9280700683594, "loss": 0.6775, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -5.17250919342041, "rewards/margins": 2.6503026485443115, "rewards/rejected": -7.822811126708984, "step": 4210 }, { "epoch": 0.82, "learning_rate": 4.0386136478032645e-07, "logits/chosen": -2.5404868125915527, "logits/rejected": -2.577773094177246, "logps/chosen": -373.8901672363281, "logps/rejected": -390.03424072265625, "loss": 0.5626, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.9468588829040527, "rewards/margins": 3.0632483959198, "rewards/rejected": -6.01010799407959, "step": 4220 }, { "epoch": 0.82, "learning_rate": 4.0350183360897387e-07, "logits/chosen": -2.7753472328186035, "logits/rejected": -2.746166706085205, "logps/chosen": -290.0711975097656, "logps/rejected": -211.5422821044922, "loss": 0.5276, "rewards/accuracies": 0.75, "rewards/chosen": -4.921571254730225, "rewards/margins": 3.443016767501831, "rewards/rejected": -8.364588737487793, "step": 4230 }, { "epoch": 0.82, "learning_rate": 4.031423024376213e-07, "logits/chosen": -2.8585479259490967, "logits/rejected": -2.8426735401153564, "logps/chosen": -298.3712463378906, "logps/rejected": -245.10330200195312, "loss": 0.4384, "rewards/accuracies": 0.75, "rewards/chosen": -6.26866340637207, "rewards/margins": 2.2625441551208496, "rewards/rejected": -8.531207084655762, "step": 4240 }, { "epoch": 0.83, "learning_rate": 4.0278277126626873e-07, "logits/chosen": -2.639944314956665, "logits/rejected": -2.6482975482940674, "logps/chosen": -360.97119140625, "logps/rejected": -359.6124267578125, "loss": 0.5181, "rewards/accuracies": 0.5, "rewards/chosen": -5.2597737312316895, "rewards/margins": 3.0593974590301514, "rewards/rejected": -8.319170951843262, "step": 4250 }, { "epoch": 0.83, "learning_rate": 4.0242324009491626e-07, "logits/chosen": -2.6124892234802246, "logits/rejected": -2.673614025115967, "logps/chosen": -211.0795135498047, "logps/rejected": -296.2552490234375, "loss": 0.4993, "rewards/accuracies": 0.3499999940395355, "rewards/chosen": -5.773487091064453, "rewards/margins": 0.6954655647277832, "rewards/rejected": -6.4689531326293945, "step": 4260 }, { "epoch": 0.83, "learning_rate": 4.020637089235637e-07, "logits/chosen": -2.7454047203063965, "logits/rejected": -2.6642699241638184, "logps/chosen": -254.54605102539062, "logps/rejected": -295.25604248046875, "loss": 0.4963, "rewards/accuracies": 0.75, "rewards/chosen": -7.68234920501709, "rewards/margins": 2.8548038005828857, "rewards/rejected": -10.537153244018555, "step": 4270 }, { "epoch": 0.83, "learning_rate": 4.017041777522111e-07, "logits/chosen": -2.8814287185668945, "logits/rejected": -2.809417486190796, "logps/chosen": -261.25286865234375, "logps/rejected": -224.7142333984375, "loss": 0.4591, "rewards/accuracies": 0.75, "rewards/chosen": -5.4783172607421875, "rewards/margins": 2.9234070777893066, "rewards/rejected": -8.401723861694336, "step": 4280 }, { "epoch": 0.83, "learning_rate": 4.0134464658085855e-07, "logits/chosen": -2.738354206085205, "logits/rejected": -2.752901077270508, "logps/chosen": -296.1322021484375, "logps/rejected": -322.6684265136719, "loss": 0.4916, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -4.142353057861328, "rewards/margins": 2.718418836593628, "rewards/rejected": -6.860772132873535, "step": 4290 }, { "epoch": 0.83, "learning_rate": 4.00985115409506e-07, "logits/chosen": -2.760732889175415, "logits/rejected": -2.6976306438446045, "logps/chosen": -237.74111938476562, "logps/rejected": -277.74078369140625, "loss": 0.4509, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -5.939502716064453, "rewards/margins": 2.4747462272644043, "rewards/rejected": -8.414249420166016, "step": 4300 }, { "epoch": 0.83, "eval_logits/chosen": -2.6864702701568604, "eval_logits/rejected": -2.674971580505371, "eval_logps/chosen": -247.89425659179688, "eval_logps/rejected": -258.3559875488281, "eval_loss": 0.49765992164611816, "eval_rewards/accuracies": 0.7200000286102295, "eval_rewards/chosen": -5.365617752075195, "eval_rewards/margins": 3.278383493423462, "eval_rewards/rejected": -8.644001007080078, "eval_runtime": 140.4778, "eval_samples_per_second": 22.466, "eval_steps_per_second": 0.356, "step": 4300 }, { "epoch": 0.84, "learning_rate": 4.006255842381534e-07, "logits/chosen": -2.765807867050171, "logits/rejected": -2.749279499053955, "logps/chosen": -310.3785095214844, "logps/rejected": -269.0523681640625, "loss": 0.547, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -3.8490264415740967, "rewards/margins": 1.1632335186004639, "rewards/rejected": -5.012260437011719, "step": 4310 }, { "epoch": 0.84, "learning_rate": 4.002660530668009e-07, "logits/chosen": -2.8433854579925537, "logits/rejected": -2.819653034210205, "logps/chosen": -234.0947265625, "logps/rejected": -288.65911865234375, "loss": 0.4926, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -5.022232532501221, "rewards/margins": 1.3328921794891357, "rewards/rejected": -6.355125427246094, "step": 4320 }, { "epoch": 0.84, "learning_rate": 3.999065218954483e-07, "logits/chosen": -2.9038548469543457, "logits/rejected": -2.806032419204712, "logps/chosen": -260.6301574707031, "logps/rejected": -245.8707275390625, "loss": 0.5484, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -5.149960517883301, "rewards/margins": 1.059818983078003, "rewards/rejected": -6.209778785705566, "step": 4330 }, { "epoch": 0.84, "learning_rate": 3.9954699072409574e-07, "logits/chosen": -2.7402634620666504, "logits/rejected": -2.740111827850342, "logps/chosen": -231.1941375732422, "logps/rejected": -323.57861328125, "loss": 0.5001, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.067840099334717, "rewards/margins": 3.637691020965576, "rewards/rejected": -5.705531120300293, "step": 4340 }, { "epoch": 0.84, "learning_rate": 3.9918745955274317e-07, "logits/chosen": -2.5543370246887207, "logits/rejected": -2.6464576721191406, "logps/chosen": -221.44461059570312, "logps/rejected": -240.70834350585938, "loss": 0.5037, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -5.656668663024902, "rewards/margins": 2.4383351802825928, "rewards/rejected": -8.095004081726074, "step": 4350 }, { "epoch": 0.85, "learning_rate": 3.9882792838139065e-07, "logits/chosen": -2.8474435806274414, "logits/rejected": -2.7763171195983887, "logps/chosen": -260.658935546875, "logps/rejected": -247.59326171875, "loss": 0.5254, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -3.6842408180236816, "rewards/margins": 2.3221702575683594, "rewards/rejected": -6.006411552429199, "step": 4360 }, { "epoch": 0.85, "learning_rate": 3.9846839721003813e-07, "logits/chosen": -2.4345052242279053, "logits/rejected": -2.4207301139831543, "logps/chosen": -309.62835693359375, "logps/rejected": -412.50634765625, "loss": 0.608, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -5.484894752502441, "rewards/margins": 2.8796939849853516, "rewards/rejected": -8.364588737487793, "step": 4370 }, { "epoch": 0.85, "learning_rate": 3.9810886603868555e-07, "logits/chosen": -2.634835958480835, "logits/rejected": -2.5531816482543945, "logps/chosen": -181.68931579589844, "logps/rejected": -310.19842529296875, "loss": 0.4233, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -7.333464622497559, "rewards/margins": 3.435229778289795, "rewards/rejected": -10.768695831298828, "step": 4380 }, { "epoch": 0.85, "learning_rate": 3.97749334867333e-07, "logits/chosen": -2.484170436859131, "logits/rejected": -2.5105714797973633, "logps/chosen": -210.3848419189453, "logps/rejected": -203.63528442382812, "loss": 0.6081, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -6.691134452819824, "rewards/margins": 3.9965405464172363, "rewards/rejected": -10.687674522399902, "step": 4390 }, { "epoch": 0.85, "learning_rate": 3.973898036959804e-07, "logits/chosen": -2.55855131149292, "logits/rejected": -2.5305016040802, "logps/chosen": -221.122314453125, "logps/rejected": -277.065673828125, "loss": 0.4964, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -4.6367926597595215, "rewards/margins": 3.261251449584961, "rewards/rejected": -7.898043632507324, "step": 4400 }, { "epoch": 0.85, "eval_logits/chosen": -2.6917290687561035, "eval_logits/rejected": -2.6843764781951904, "eval_logps/chosen": -235.93966674804688, "eval_logps/rejected": -246.0229949951172, "eval_loss": 0.5051913857460022, "eval_rewards/accuracies": 0.7024999856948853, "eval_rewards/chosen": -4.170158863067627, "eval_rewards/margins": 3.2405447959899902, "eval_rewards/rejected": -7.410703182220459, "eval_runtime": 140.8315, "eval_samples_per_second": 22.41, "eval_steps_per_second": 0.355, "step": 4400 }, { "epoch": 0.86, "learning_rate": 3.9703027252462784e-07, "logits/chosen": -2.79225754737854, "logits/rejected": -2.832019090652466, "logps/chosen": -209.0641632080078, "logps/rejected": -259.63214111328125, "loss": 0.4418, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.651867389678955, "rewards/margins": 2.5974977016448975, "rewards/rejected": -5.249365329742432, "step": 4410 }, { "epoch": 0.86, "learning_rate": 3.966707413532753e-07, "logits/chosen": -2.7867469787597656, "logits/rejected": -2.7578327655792236, "logps/chosen": -310.88336181640625, "logps/rejected": -309.07525634765625, "loss": 0.4097, "rewards/accuracies": 0.75, "rewards/chosen": -1.954159140586853, "rewards/margins": 3.5042128562927246, "rewards/rejected": -5.458372592926025, "step": 4420 }, { "epoch": 0.86, "learning_rate": 3.9631121018192275e-07, "logits/chosen": -2.650923728942871, "logits/rejected": -2.6128299236297607, "logps/chosen": -158.23658752441406, "logps/rejected": -221.2763671875, "loss": 0.6934, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.7482573986053467, "rewards/margins": 5.173362731933594, "rewards/rejected": -7.921620845794678, "step": 4430 }, { "epoch": 0.86, "learning_rate": 3.959516790105702e-07, "logits/chosen": -2.666069507598877, "logits/rejected": -2.6895437240600586, "logps/chosen": -241.5887451171875, "logps/rejected": -231.3760986328125, "loss": 0.5564, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -3.32515287399292, "rewards/margins": 1.9329948425292969, "rewards/rejected": -5.258147239685059, "step": 4440 }, { "epoch": 0.86, "learning_rate": 3.955921478392176e-07, "logits/chosen": -2.2363100051879883, "logits/rejected": -2.2927143573760986, "logps/chosen": -368.7262878417969, "logps/rejected": -312.715576171875, "loss": 0.5752, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -5.293475151062012, "rewards/margins": 2.2721190452575684, "rewards/rejected": -7.5655927658081055, "step": 4450 }, { "epoch": 0.87, "learning_rate": 3.952326166678651e-07, "logits/chosen": -2.5693564414978027, "logits/rejected": -2.518404722213745, "logps/chosen": -209.572509765625, "logps/rejected": -303.83441162109375, "loss": 0.443, "rewards/accuracies": 0.75, "rewards/chosen": -4.315739631652832, "rewards/margins": 4.045080184936523, "rewards/rejected": -8.360819816589355, "step": 4460 }, { "epoch": 0.87, "learning_rate": 3.9487308549651256e-07, "logits/chosen": -2.597867250442505, "logits/rejected": -2.702716827392578, "logps/chosen": -201.34945678710938, "logps/rejected": -250.2091827392578, "loss": 0.5413, "rewards/accuracies": 0.75, "rewards/chosen": -5.714076042175293, "rewards/margins": 5.3684258460998535, "rewards/rejected": -11.082501411437988, "step": 4470 }, { "epoch": 0.87, "learning_rate": 3.9451355432516e-07, "logits/chosen": -2.6887593269348145, "logits/rejected": -2.723721981048584, "logps/chosen": -206.8312225341797, "logps/rejected": -259.43182373046875, "loss": 0.4648, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -4.482999324798584, "rewards/margins": 4.759879112243652, "rewards/rejected": -9.242877960205078, "step": 4480 }, { "epoch": 0.87, "learning_rate": 3.941540231538074e-07, "logits/chosen": -2.500793933868408, "logits/rejected": -2.525344133377075, "logps/chosen": -163.62831115722656, "logps/rejected": -188.51388549804688, "loss": 0.5248, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -4.077842712402344, "rewards/margins": 2.760449171066284, "rewards/rejected": -6.838292121887207, "step": 4490 }, { "epoch": 0.87, "learning_rate": 3.9379449198245485e-07, "logits/chosen": -2.723928928375244, "logits/rejected": -2.648629665374756, "logps/chosen": -233.9381866455078, "logps/rejected": -261.3736572265625, "loss": 0.5711, "rewards/accuracies": 0.5, "rewards/chosen": -4.1998491287231445, "rewards/margins": 0.7085366249084473, "rewards/rejected": -4.90838623046875, "step": 4500 }, { "epoch": 0.87, "eval_logits/chosen": -2.5880494117736816, "eval_logits/rejected": -2.577402353286743, "eval_logps/chosen": -242.3307647705078, "eval_logps/rejected": -256.31182861328125, "eval_loss": 0.4862401783466339, "eval_rewards/accuracies": 0.7099999785423279, "eval_rewards/chosen": -4.809268951416016, "eval_rewards/margins": 3.6303162574768066, "eval_rewards/rejected": -8.439584732055664, "eval_runtime": 139.5327, "eval_samples_per_second": 22.618, "eval_steps_per_second": 0.358, "step": 4500 }, { "epoch": 0.88, "learning_rate": 3.934349608111023e-07, "logits/chosen": -2.7325246334075928, "logits/rejected": -2.665409564971924, "logps/chosen": -215.60775756835938, "logps/rejected": -234.34130859375, "loss": 0.6417, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -4.686572074890137, "rewards/margins": 1.9754047393798828, "rewards/rejected": -6.661977291107178, "step": 4510 }, { "epoch": 0.88, "learning_rate": 3.9307542963974976e-07, "logits/chosen": -2.7185959815979004, "logits/rejected": -2.7288401126861572, "logps/chosen": -230.36654663085938, "logps/rejected": -230.0455322265625, "loss": 0.488, "rewards/accuracies": 0.75, "rewards/chosen": -1.6887428760528564, "rewards/margins": 1.8191553354263306, "rewards/rejected": -3.5078983306884766, "step": 4520 }, { "epoch": 0.88, "learning_rate": 3.927158984683972e-07, "logits/chosen": -2.570035219192505, "logits/rejected": -2.6241276264190674, "logps/chosen": -196.22262573242188, "logps/rejected": -198.39151000976562, "loss": 0.5186, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -3.495260715484619, "rewards/margins": 4.2800469398498535, "rewards/rejected": -7.77530574798584, "step": 4530 }, { "epoch": 0.88, "learning_rate": 3.923563672970446e-07, "logits/chosen": -2.627640962600708, "logits/rejected": -2.6806159019470215, "logps/chosen": -315.4527587890625, "logps/rejected": -282.22412109375, "loss": 0.5285, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -3.5280659198760986, "rewards/margins": 1.445022702217102, "rewards/rejected": -4.973088264465332, "step": 4540 }, { "epoch": 0.88, "learning_rate": 3.9199683612569204e-07, "logits/chosen": -2.863534688949585, "logits/rejected": -2.8257272243499756, "logps/chosen": -309.8386535644531, "logps/rejected": -236.9312286376953, "loss": 0.5975, "rewards/accuracies": 0.75, "rewards/chosen": -2.1541249752044678, "rewards/margins": 1.7545160055160522, "rewards/rejected": -3.9086406230926514, "step": 4550 }, { "epoch": 0.89, "learning_rate": 3.916373049543395e-07, "logits/chosen": -2.670228958129883, "logits/rejected": -2.6037065982818604, "logps/chosen": -251.37203979492188, "logps/rejected": -233.73489379882812, "loss": 0.4691, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.9182348251342773, "rewards/margins": 3.068732261657715, "rewards/rejected": -5.98696756362915, "step": 4560 }, { "epoch": 0.89, "learning_rate": 3.91277773782987e-07, "logits/chosen": -2.6178832054138184, "logits/rejected": -2.6455113887786865, "logps/chosen": -217.88308715820312, "logps/rejected": -218.3448486328125, "loss": 0.5759, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -4.60750675201416, "rewards/margins": 2.5334486961364746, "rewards/rejected": -7.140954494476318, "step": 4570 }, { "epoch": 0.89, "learning_rate": 3.9091824261163443e-07, "logits/chosen": -2.7345991134643555, "logits/rejected": -2.7396881580352783, "logps/chosen": -248.0063934326172, "logps/rejected": -280.45037841796875, "loss": 0.3442, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -3.391216278076172, "rewards/margins": 2.4888811111450195, "rewards/rejected": -5.880097389221191, "step": 4580 }, { "epoch": 0.89, "learning_rate": 3.9055871144028186e-07, "logits/chosen": -2.704272508621216, "logits/rejected": -2.6848812103271484, "logps/chosen": -225.310791015625, "logps/rejected": -242.7181396484375, "loss": 0.5143, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -3.301420211791992, "rewards/margins": 3.005526065826416, "rewards/rejected": -6.306946277618408, "step": 4590 }, { "epoch": 0.89, "learning_rate": 3.901991802689293e-07, "logits/chosen": -2.7596051692962646, "logits/rejected": -2.7071454524993896, "logps/chosen": -262.65264892578125, "logps/rejected": -317.663818359375, "loss": 0.5481, "rewards/accuracies": 0.75, "rewards/chosen": -2.147545576095581, "rewards/margins": 2.1675028800964355, "rewards/rejected": -4.315048694610596, "step": 4600 }, { "epoch": 0.89, "eval_logits/chosen": -2.633481502532959, "eval_logits/rejected": -2.62675142288208, "eval_logps/chosen": -228.23257446289062, "eval_logps/rejected": -236.8095703125, "eval_loss": 0.49345776438713074, "eval_rewards/accuracies": 0.7099999785423279, "eval_rewards/chosen": -3.399451971054077, "eval_rewards/margins": 3.0899109840393066, "eval_rewards/rejected": -6.489363193511963, "eval_runtime": 144.96, "eval_samples_per_second": 21.772, "eval_steps_per_second": 0.345, "step": 4600 }, { "epoch": 0.89, "learning_rate": 3.898396490975767e-07, "logits/chosen": -2.6459014415740967, "logits/rejected": -2.6414332389831543, "logps/chosen": -184.276123046875, "logps/rejected": -161.65939331054688, "loss": 0.5431, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.9952762126922607, "rewards/margins": 1.3783482313156128, "rewards/rejected": -4.373624324798584, "step": 4610 }, { "epoch": 0.9, "learning_rate": 3.894801179262242e-07, "logits/chosen": -2.7866525650024414, "logits/rejected": -2.7403743267059326, "logps/chosen": -282.7216491699219, "logps/rejected": -275.5063781738281, "loss": 0.6045, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.6962759494781494, "rewards/margins": 1.5308458805084229, "rewards/rejected": -3.2271218299865723, "step": 4620 }, { "epoch": 0.9, "learning_rate": 3.891205867548716e-07, "logits/chosen": -2.6336541175842285, "logits/rejected": -2.582383155822754, "logps/chosen": -200.53636169433594, "logps/rejected": -205.555908203125, "loss": 0.5376, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.658719778060913, "rewards/margins": 3.1007637977600098, "rewards/rejected": -4.759483337402344, "step": 4630 }, { "epoch": 0.9, "learning_rate": 3.8876105558351905e-07, "logits/chosen": -2.7869327068328857, "logits/rejected": -2.701145887374878, "logps/chosen": -231.5999298095703, "logps/rejected": -231.6761474609375, "loss": 0.3675, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -3.3242385387420654, "rewards/margins": 3.2975971698760986, "rewards/rejected": -6.621836185455322, "step": 4640 }, { "epoch": 0.9, "learning_rate": 3.8840152441216653e-07, "logits/chosen": -2.629610538482666, "logits/rejected": -2.6264729499816895, "logps/chosen": -254.6874237060547, "logps/rejected": -226.0127716064453, "loss": 0.5204, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -4.649791240692139, "rewards/margins": 1.5615746974945068, "rewards/rejected": -6.211366176605225, "step": 4650 }, { "epoch": 0.9, "learning_rate": 3.8804199324081396e-07, "logits/chosen": -2.779430866241455, "logits/rejected": -2.7456820011138916, "logps/chosen": -252.6644287109375, "logps/rejected": -230.5699005126953, "loss": 0.4624, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.6407313346862793, "rewards/margins": 1.3123255968093872, "rewards/rejected": -3.953057050704956, "step": 4660 }, { "epoch": 0.91, "learning_rate": 3.8768246206946144e-07, "logits/chosen": -2.743870496749878, "logits/rejected": -2.747746706008911, "logps/chosen": -241.813720703125, "logps/rejected": -283.9878845214844, "loss": 0.5385, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.751024007797241, "rewards/margins": 3.5924434661865234, "rewards/rejected": -6.343467712402344, "step": 4670 }, { "epoch": 0.91, "learning_rate": 3.8732293089810887e-07, "logits/chosen": -2.7051734924316406, "logits/rejected": -2.7314159870147705, "logps/chosen": -260.3672790527344, "logps/rejected": -315.00445556640625, "loss": 0.4973, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.504934310913086, "rewards/margins": 2.647252321243286, "rewards/rejected": -5.152186393737793, "step": 4680 }, { "epoch": 0.91, "learning_rate": 3.869633997267563e-07, "logits/chosen": -2.6802845001220703, "logits/rejected": -2.632026195526123, "logps/chosen": -186.29025268554688, "logps/rejected": -173.17556762695312, "loss": 0.5464, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -4.9858808517456055, "rewards/margins": 2.548701047897339, "rewards/rejected": -7.534582614898682, "step": 4690 }, { "epoch": 0.91, "learning_rate": 3.866038685554037e-07, "logits/chosen": -2.7980446815490723, "logits/rejected": -2.851337432861328, "logps/chosen": -254.64938354492188, "logps/rejected": -238.0263671875, "loss": 0.4468, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -3.0403826236724854, "rewards/margins": 2.884485960006714, "rewards/rejected": -5.924868583679199, "step": 4700 }, { "epoch": 0.91, "eval_logits/chosen": -2.7352101802825928, "eval_logits/rejected": -2.7280097007751465, "eval_logps/chosen": -231.85617065429688, "eval_logps/rejected": -240.11097717285156, "eval_loss": 0.4904622733592987, "eval_rewards/accuracies": 0.699999988079071, "eval_rewards/chosen": -3.761807918548584, "eval_rewards/margins": 3.0576953887939453, "eval_rewards/rejected": -6.819503307342529, "eval_runtime": 139.9939, "eval_samples_per_second": 22.544, "eval_steps_per_second": 0.357, "step": 4700 }, { "epoch": 0.91, "learning_rate": 3.8624433738405115e-07, "logits/chosen": -2.713387966156006, "logits/rejected": -2.6253886222839355, "logps/chosen": -260.95648193359375, "logps/rejected": -266.45062255859375, "loss": 0.495, "rewards/accuracies": 0.75, "rewards/chosen": -2.654179096221924, "rewards/margins": 2.9133238792419434, "rewards/rejected": -5.567502975463867, "step": 4710 }, { "epoch": 0.92, "learning_rate": 3.8588480621269863e-07, "logits/chosen": -2.713736057281494, "logits/rejected": -2.695307970046997, "logps/chosen": -284.30584716796875, "logps/rejected": -359.34014892578125, "loss": 0.5401, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.404874324798584, "rewards/margins": 2.228503465652466, "rewards/rejected": -4.633377552032471, "step": 4720 }, { "epoch": 0.92, "learning_rate": 3.8552527504134606e-07, "logits/chosen": -2.799454927444458, "logits/rejected": -2.8190231323242188, "logps/chosen": -210.095458984375, "logps/rejected": -219.76797485351562, "loss": 0.5772, "rewards/accuracies": 0.5, "rewards/chosen": -4.435279846191406, "rewards/margins": 1.6794917583465576, "rewards/rejected": -6.114771842956543, "step": 4730 }, { "epoch": 0.92, "learning_rate": 3.851657438699935e-07, "logits/chosen": -2.6745429039001465, "logits/rejected": -2.686805248260498, "logps/chosen": -253.4486541748047, "logps/rejected": -346.912841796875, "loss": 0.5649, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -5.1384596824646, "rewards/margins": 5.357385158538818, "rewards/rejected": -10.495844841003418, "step": 4740 }, { "epoch": 0.92, "learning_rate": 3.8480621269864097e-07, "logits/chosen": -2.7019081115722656, "logits/rejected": -2.7100443840026855, "logps/chosen": -223.4260711669922, "logps/rejected": -174.53768920898438, "loss": 0.5382, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -4.346592426300049, "rewards/margins": 2.5449345111846924, "rewards/rejected": -6.891526699066162, "step": 4750 }, { "epoch": 0.92, "learning_rate": 3.844466815272884e-07, "logits/chosen": -2.7890267372131348, "logits/rejected": -2.806248188018799, "logps/chosen": -254.773681640625, "logps/rejected": -342.19476318359375, "loss": 0.4703, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.618661880493164, "rewards/margins": 2.3927292823791504, "rewards/rejected": -5.0113911628723145, "step": 4760 }, { "epoch": 0.93, "learning_rate": 3.840871503559359e-07, "logits/chosen": -2.8749566078186035, "logits/rejected": -2.8356144428253174, "logps/chosen": -296.539306640625, "logps/rejected": -263.4466857910156, "loss": 0.5366, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.6513047218322754, "rewards/margins": 0.9896014332771301, "rewards/rejected": -3.6409058570861816, "step": 4770 }, { "epoch": 0.93, "learning_rate": 3.837276191845833e-07, "logits/chosen": -2.923330783843994, "logits/rejected": -2.879647731781006, "logps/chosen": -230.6038818359375, "logps/rejected": -209.9868621826172, "loss": 0.5326, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -3.6262035369873047, "rewards/margins": 1.0854170322418213, "rewards/rejected": -4.711619853973389, "step": 4780 }, { "epoch": 0.93, "learning_rate": 3.8336808801323073e-07, "logits/chosen": -2.9384472370147705, "logits/rejected": -2.8726000785827637, "logps/chosen": -249.5596160888672, "logps/rejected": -310.5499267578125, "loss": 0.5193, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -3.7462658882141113, "rewards/margins": 3.243990421295166, "rewards/rejected": -6.990255832672119, "step": 4790 }, { "epoch": 0.93, "learning_rate": 3.8300855684187816e-07, "logits/chosen": -2.7498159408569336, "logits/rejected": -2.7551639080047607, "logps/chosen": -280.60565185546875, "logps/rejected": -251.5581512451172, "loss": 0.5001, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.7587432861328125, "rewards/margins": 3.414700746536255, "rewards/rejected": -5.173443794250488, "step": 4800 }, { "epoch": 0.93, "eval_logits/chosen": -2.7782437801361084, "eval_logits/rejected": -2.7686476707458496, "eval_logps/chosen": -239.80943298339844, "eval_logps/rejected": -255.1630096435547, "eval_loss": 0.48671188950538635, "eval_rewards/accuracies": 0.7024999856948853, "eval_rewards/chosen": -4.557135581970215, "eval_rewards/margins": 3.7675697803497314, "eval_rewards/rejected": -8.324706077575684, "eval_runtime": 146.5225, "eval_samples_per_second": 21.539, "eval_steps_per_second": 0.341, "step": 4800 }, { "epoch": 0.93, "learning_rate": 3.826490256705256e-07, "logits/chosen": -2.7762343883514404, "logits/rejected": -2.8220748901367188, "logps/chosen": -250.0959930419922, "logps/rejected": -290.9184875488281, "loss": 0.4251, "rewards/accuracies": 0.75, "rewards/chosen": -4.777438163757324, "rewards/margins": 3.731992721557617, "rewards/rejected": -8.509431838989258, "step": 4810 }, { "epoch": 0.94, "learning_rate": 3.8228949449917307e-07, "logits/chosen": -2.8603081703186035, "logits/rejected": -2.8561787605285645, "logps/chosen": -290.33416748046875, "logps/rejected": -265.3094177246094, "loss": 0.4681, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -5.198728084564209, "rewards/margins": 3.6126930713653564, "rewards/rejected": -8.811421394348145, "step": 4820 }, { "epoch": 0.94, "learning_rate": 3.819299633278205e-07, "logits/chosen": -2.7717111110687256, "logits/rejected": -2.7392990589141846, "logps/chosen": -314.5079040527344, "logps/rejected": -277.3616943359375, "loss": 0.5301, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -3.3043341636657715, "rewards/margins": 4.1167521476745605, "rewards/rejected": -7.42108678817749, "step": 4830 }, { "epoch": 0.94, "learning_rate": 3.815704321564679e-07, "logits/chosen": -2.6337149143218994, "logits/rejected": -2.6978964805603027, "logps/chosen": -224.68130493164062, "logps/rejected": -257.2038269042969, "loss": 0.5576, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.9031434059143066, "rewards/margins": 4.151448726654053, "rewards/rejected": -7.054592132568359, "step": 4840 }, { "epoch": 0.94, "learning_rate": 3.812109009851154e-07, "logits/chosen": -2.9030470848083496, "logits/rejected": -2.921684741973877, "logps/chosen": -443.34747314453125, "logps/rejected": -304.0016784667969, "loss": 0.4318, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -4.6353559494018555, "rewards/margins": 3.056591510772705, "rewards/rejected": -7.691947937011719, "step": 4850 }, { "epoch": 0.94, "learning_rate": 3.8085136981376283e-07, "logits/chosen": -2.907698392868042, "logits/rejected": -2.957947254180908, "logps/chosen": -245.33419799804688, "logps/rejected": -293.4639587402344, "loss": 0.4891, "rewards/accuracies": 0.75, "rewards/chosen": -4.125678062438965, "rewards/margins": 4.333249568939209, "rewards/rejected": -8.458927154541016, "step": 4860 }, { "epoch": 0.95, "learning_rate": 3.804918386424103e-07, "logits/chosen": -2.820817470550537, "logits/rejected": -2.7914021015167236, "logps/chosen": -200.84523010253906, "logps/rejected": -241.79531860351562, "loss": 0.4316, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -6.529050350189209, "rewards/margins": 1.5810530185699463, "rewards/rejected": -8.110102653503418, "step": 4870 }, { "epoch": 0.95, "learning_rate": 3.8013230747105774e-07, "logits/chosen": -2.6274003982543945, "logits/rejected": -2.726317882537842, "logps/chosen": -245.4355010986328, "logps/rejected": -259.23809814453125, "loss": 0.5073, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -4.172598838806152, "rewards/margins": 3.381195545196533, "rewards/rejected": -7.553793907165527, "step": 4880 }, { "epoch": 0.95, "learning_rate": 3.7977277629970517e-07, "logits/chosen": -2.805297374725342, "logits/rejected": -2.8723697662353516, "logps/chosen": -237.6421661376953, "logps/rejected": -267.11004638671875, "loss": 0.4277, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -3.3887939453125, "rewards/margins": 4.594298839569092, "rewards/rejected": -7.983092308044434, "step": 4890 }, { "epoch": 0.95, "learning_rate": 3.794132451283526e-07, "logits/chosen": -2.6064090728759766, "logits/rejected": -2.6015784740448, "logps/chosen": -194.22018432617188, "logps/rejected": -298.46380615234375, "loss": 0.4342, "rewards/accuracies": 0.75, "rewards/chosen": -5.538807392120361, "rewards/margins": 3.978961229324341, "rewards/rejected": -9.517767906188965, "step": 4900 }, { "epoch": 0.95, "eval_logits/chosen": -2.7979583740234375, "eval_logits/rejected": -2.7917184829711914, "eval_logps/chosen": -240.02420043945312, "eval_logps/rejected": -251.78773498535156, "eval_loss": 0.49478423595428467, "eval_rewards/accuracies": 0.699999988079071, "eval_rewards/chosen": -4.578611373901367, "eval_rewards/margins": 3.408565044403076, "eval_rewards/rejected": -7.987176418304443, "eval_runtime": 154.9453, "eval_samples_per_second": 20.368, "eval_steps_per_second": 0.323, "step": 4900 }, { "epoch": 0.95, "learning_rate": 3.79053713957e-07, "logits/chosen": -2.7656638622283936, "logits/rejected": -2.751854658126831, "logps/chosen": -158.113037109375, "logps/rejected": -208.19003295898438, "loss": 0.4107, "rewards/accuracies": 0.75, "rewards/chosen": -4.385377407073975, "rewards/margins": 3.694204330444336, "rewards/rejected": -8.079580307006836, "step": 4910 }, { "epoch": 0.96, "learning_rate": 3.786941827856475e-07, "logits/chosen": -2.855672597885132, "logits/rejected": -2.856933832168579, "logps/chosen": -179.34011840820312, "logps/rejected": -202.17086791992188, "loss": 0.5642, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -4.467204570770264, "rewards/margins": 3.3610007762908936, "rewards/rejected": -7.828205108642578, "step": 4920 }, { "epoch": 0.96, "learning_rate": 3.7833465161429493e-07, "logits/chosen": -2.8508670330047607, "logits/rejected": -2.9047322273254395, "logps/chosen": -292.36590576171875, "logps/rejected": -293.3150329589844, "loss": 0.5436, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.8637614250183105, "rewards/margins": 2.5137455463409424, "rewards/rejected": -5.377506732940674, "step": 4930 }, { "epoch": 0.96, "learning_rate": 3.7797512044294236e-07, "logits/chosen": -2.896697998046875, "logits/rejected": -2.8970844745635986, "logps/chosen": -282.89447021484375, "logps/rejected": -278.5943298339844, "loss": 0.5348, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -4.333247184753418, "rewards/margins": 1.6520404815673828, "rewards/rejected": -5.985286712646484, "step": 4940 }, { "epoch": 0.96, "learning_rate": 3.7761558927158984e-07, "logits/chosen": -2.8573505878448486, "logits/rejected": -2.811361074447632, "logps/chosen": -277.64776611328125, "logps/rejected": -275.1065368652344, "loss": 0.4906, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -4.285165309906006, "rewards/margins": 3.0737969875335693, "rewards/rejected": -7.358962059020996, "step": 4950 }, { "epoch": 0.96, "learning_rate": 3.7725605810023727e-07, "logits/chosen": -2.856657028198242, "logits/rejected": -2.924285411834717, "logps/chosen": -289.0182800292969, "logps/rejected": -280.6921691894531, "loss": 0.5948, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -3.451930284500122, "rewards/margins": 2.1042654514312744, "rewards/rejected": -5.5561957359313965, "step": 4960 }, { "epoch": 0.96, "learning_rate": 3.7689652692888475e-07, "logits/chosen": -2.9017937183380127, "logits/rejected": -2.838625431060791, "logps/chosen": -263.3382568359375, "logps/rejected": -249.8639678955078, "loss": 0.4445, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -7.6466474533081055, "rewards/margins": 2.6505770683288574, "rewards/rejected": -10.297224044799805, "step": 4970 }, { "epoch": 0.97, "learning_rate": 3.765369957575322e-07, "logits/chosen": -2.7935118675231934, "logits/rejected": -2.7818892002105713, "logps/chosen": -200.6241912841797, "logps/rejected": -195.07611083984375, "loss": 0.5492, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -3.2856605052948, "rewards/margins": 2.976768732070923, "rewards/rejected": -6.262429237365723, "step": 4980 }, { "epoch": 0.97, "learning_rate": 3.761774645861796e-07, "logits/chosen": -2.8862531185150146, "logits/rejected": -2.846205234527588, "logps/chosen": -241.02072143554688, "logps/rejected": -326.8516540527344, "loss": 0.3793, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -3.1322147846221924, "rewards/margins": 4.976840019226074, "rewards/rejected": -8.109055519104004, "step": 4990 }, { "epoch": 0.97, "learning_rate": 3.7581793341482703e-07, "logits/chosen": -2.556763172149658, "logits/rejected": -2.5429344177246094, "logps/chosen": -323.6518859863281, "logps/rejected": -297.3819274902344, "loss": 0.5148, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -3.514235734939575, "rewards/margins": 0.252679705619812, "rewards/rejected": -3.7669150829315186, "step": 5000 }, { "epoch": 0.97, "eval_logits/chosen": -2.8058435916900635, "eval_logits/rejected": -2.8001227378845215, "eval_logps/chosen": -245.33412170410156, "eval_logps/rejected": -256.4447937011719, "eval_loss": 0.4876927137374878, "eval_rewards/accuracies": 0.6924999952316284, "eval_rewards/chosen": -5.109607219696045, "eval_rewards/margins": 3.3432767391204834, "eval_rewards/rejected": -8.452884674072266, "eval_runtime": 139.968, "eval_samples_per_second": 22.548, "eval_steps_per_second": 0.357, "step": 5000 }, { "epoch": 0.97, "learning_rate": 3.7545840224347446e-07, "logits/chosen": -2.8039920330047607, "logits/rejected": -2.8128957748413086, "logps/chosen": -287.8877258300781, "logps/rejected": -318.81439208984375, "loss": 0.4314, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -6.142752647399902, "rewards/margins": 3.390345335006714, "rewards/rejected": -9.533100128173828, "step": 5010 }, { "epoch": 0.97, "learning_rate": 3.7509887107212194e-07, "logits/chosen": -2.816234588623047, "logits/rejected": -2.8018856048583984, "logps/chosen": -319.5215148925781, "logps/rejected": -235.34506225585938, "loss": 0.5346, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -4.539577484130859, "rewards/margins": 0.9457670450210571, "rewards/rejected": -5.485343933105469, "step": 5020 }, { "epoch": 0.98, "learning_rate": 3.7473933990076937e-07, "logits/chosen": -2.927044153213501, "logits/rejected": -2.916829824447632, "logps/chosen": -262.3309020996094, "logps/rejected": -327.40826416015625, "loss": 0.5341, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -4.339417457580566, "rewards/margins": 0.5237800478935242, "rewards/rejected": -4.863197326660156, "step": 5030 }, { "epoch": 0.98, "learning_rate": 3.7437980872941685e-07, "logits/chosen": -2.864976406097412, "logits/rejected": -2.893390655517578, "logps/chosen": -260.3367004394531, "logps/rejected": -250.24819946289062, "loss": 0.5778, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -5.055976390838623, "rewards/margins": 1.819750428199768, "rewards/rejected": -6.875726222991943, "step": 5040 }, { "epoch": 0.98, "learning_rate": 3.740202775580643e-07, "logits/chosen": -2.8373379707336426, "logits/rejected": -2.9002954959869385, "logps/chosen": -280.1398010253906, "logps/rejected": -261.08917236328125, "loss": 0.4816, "rewards/accuracies": 0.75, "rewards/chosen": -3.2306816577911377, "rewards/margins": 2.560249090194702, "rewards/rejected": -5.79093074798584, "step": 5050 }, { "epoch": 0.98, "learning_rate": 3.736607463867117e-07, "logits/chosen": -2.8642070293426514, "logits/rejected": -2.850830078125, "logps/chosen": -265.0941162109375, "logps/rejected": -274.12091064453125, "loss": 0.5481, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -6.264739036560059, "rewards/margins": 1.2267714738845825, "rewards/rejected": -7.49151086807251, "step": 5060 }, { "epoch": 0.98, "learning_rate": 3.733012152153592e-07, "logits/chosen": -2.8552756309509277, "logits/rejected": -2.7445178031921387, "logps/chosen": -282.53375244140625, "logps/rejected": -237.4908447265625, "loss": 0.5685, "rewards/accuracies": 0.75, "rewards/chosen": -3.214513063430786, "rewards/margins": 1.601584792137146, "rewards/rejected": -4.816098213195801, "step": 5070 }, { "epoch": 0.99, "learning_rate": 3.729416840440066e-07, "logits/chosen": -2.7386107444763184, "logits/rejected": -2.662231206893921, "logps/chosen": -281.3005065917969, "logps/rejected": -332.75091552734375, "loss": 0.5924, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -4.301259994506836, "rewards/margins": 1.7015777826309204, "rewards/rejected": -6.002837181091309, "step": 5080 }, { "epoch": 0.99, "learning_rate": 3.7258215287265404e-07, "logits/chosen": -2.769857883453369, "logits/rejected": -2.7492454051971436, "logps/chosen": -202.14151000976562, "logps/rejected": -204.52354431152344, "loss": 0.567, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -3.8968968391418457, "rewards/margins": 1.3117891550064087, "rewards/rejected": -5.208685874938965, "step": 5090 }, { "epoch": 0.99, "learning_rate": 3.7222262170130147e-07, "logits/chosen": -2.6590256690979004, "logits/rejected": -2.704235553741455, "logps/chosen": -207.9397735595703, "logps/rejected": -251.90444946289062, "loss": 0.456, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -3.3292739391326904, "rewards/margins": 2.6150341033935547, "rewards/rejected": -5.944308280944824, "step": 5100 }, { "epoch": 0.99, "eval_logits/chosen": -2.699265718460083, "eval_logits/rejected": -2.695173740386963, "eval_logps/chosen": -237.08937072753906, "eval_logps/rejected": -246.49073791503906, "eval_loss": 0.4937092065811157, "eval_rewards/accuracies": 0.6949999928474426, "eval_rewards/chosen": -4.28513240814209, "eval_rewards/margins": 3.1723451614379883, "eval_rewards/rejected": -7.457478046417236, "eval_runtime": 141.0846, "eval_samples_per_second": 22.37, "eval_steps_per_second": 0.354, "step": 5100 }, { "epoch": 0.99, "learning_rate": 3.718630905299489e-07, "logits/chosen": -2.7389750480651855, "logits/rejected": -2.814399242401123, "logps/chosen": -208.7837371826172, "logps/rejected": -269.72052001953125, "loss": 0.4895, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -5.171353340148926, "rewards/margins": 3.0589101314544678, "rewards/rejected": -8.230263710021973, "step": 5110 }, { "epoch": 0.99, "learning_rate": 3.715035593585964e-07, "logits/chosen": -2.74831485748291, "logits/rejected": -2.7159152030944824, "logps/chosen": -200.48020935058594, "logps/rejected": -218.4559783935547, "loss": 0.4423, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.618405818939209, "rewards/margins": 1.5190508365631104, "rewards/rejected": -4.137456893920898, "step": 5120 }, { "epoch": 1.0, "learning_rate": 3.711440281872438e-07, "logits/chosen": -2.725182056427002, "logits/rejected": -2.7014055252075195, "logps/chosen": -237.24484252929688, "logps/rejected": -207.36181640625, "loss": 0.5052, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.4843099117279053, "rewards/margins": 3.431879758834839, "rewards/rejected": -5.916189670562744, "step": 5130 }, { "epoch": 1.0, "learning_rate": 3.707844970158913e-07, "logits/chosen": -2.784498929977417, "logits/rejected": -2.7601518630981445, "logps/chosen": -226.90219116210938, "logps/rejected": -237.20742797851562, "loss": 0.457, "rewards/accuracies": 0.75, "rewards/chosen": -4.148807525634766, "rewards/margins": 4.220892906188965, "rewards/rejected": -8.369699478149414, "step": 5140 }, { "epoch": 1.0, "learning_rate": 3.704249658445387e-07, "logits/chosen": -2.5374457836151123, "logits/rejected": -2.507659435272217, "logps/chosen": -161.14242553710938, "logps/rejected": -155.3887939453125, "loss": 0.5048, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -4.364739418029785, "rewards/margins": 0.5551323890686035, "rewards/rejected": -4.9198713302612305, "step": 5150 }, { "epoch": 1.0, "learning_rate": 3.7006543467318614e-07, "logits/chosen": -2.706430196762085, "logits/rejected": -2.731123924255371, "logps/chosen": -188.61871337890625, "logps/rejected": -239.47265625, "loss": 0.1459, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.8703367710113525, "rewards/margins": 6.202843189239502, "rewards/rejected": -8.073180198669434, "step": 5160 }, { "epoch": 1.0, "learning_rate": 3.697059035018336e-07, "logits/chosen": -2.6322195529937744, "logits/rejected": -2.6461081504821777, "logps/chosen": -226.6240692138672, "logps/rejected": -246.842041015625, "loss": 0.1756, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -4.154054164886475, "rewards/margins": 4.039252758026123, "rewards/rejected": -8.193307876586914, "step": 5170 }, { "epoch": 1.01, "learning_rate": 3.6934637233048105e-07, "logits/chosen": -2.731140613555908, "logits/rejected": -2.6016902923583984, "logps/chosen": -212.1609344482422, "logps/rejected": -240.11758422851562, "loss": 0.1153, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.578425884246826, "rewards/margins": 5.413627624511719, "rewards/rejected": -7.992053031921387, "step": 5180 }, { "epoch": 1.01, "learning_rate": 3.689868411591285e-07, "logits/chosen": -2.5545010566711426, "logits/rejected": -2.479830265045166, "logps/chosen": -180.43634033203125, "logps/rejected": -308.0595703125, "loss": 0.0967, "rewards/accuracies": 1.0, "rewards/chosen": -0.7720845937728882, "rewards/margins": 8.198678970336914, "rewards/rejected": -8.970763206481934, "step": 5190 }, { "epoch": 1.01, "learning_rate": 3.686273099877759e-07, "logits/chosen": -2.825456142425537, "logits/rejected": -2.849297285079956, "logps/chosen": -224.1262969970703, "logps/rejected": -252.75198364257812, "loss": 0.1524, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.472179889678955, "rewards/margins": 4.5035929679870605, "rewards/rejected": -6.975772857666016, "step": 5200 }, { "epoch": 1.01, "eval_logits/chosen": -2.6616714000701904, "eval_logits/rejected": -2.654366970062256, "eval_logps/chosen": -239.63279724121094, "eval_logps/rejected": -255.0330047607422, "eval_loss": 0.4891924560070038, "eval_rewards/accuracies": 0.7049999833106995, "eval_rewards/chosen": -4.539472579956055, "eval_rewards/margins": 3.772231340408325, "eval_rewards/rejected": -8.311702728271484, "eval_runtime": 139.642, "eval_samples_per_second": 22.601, "eval_steps_per_second": 0.358, "step": 5200 }, { "epoch": 1.01, "learning_rate": 3.6826777881642334e-07, "logits/chosen": -2.652801990509033, "logits/rejected": -2.6216378211975098, "logps/chosen": -254.4090576171875, "logps/rejected": -257.03851318359375, "loss": 0.14, "rewards/accuracies": 1.0, "rewards/chosen": -0.48289814591407776, "rewards/margins": 7.643038749694824, "rewards/rejected": -8.125936508178711, "step": 5210 }, { "epoch": 1.01, "learning_rate": 3.679082476450708e-07, "logits/chosen": -2.626802444458008, "logits/rejected": -2.6811165809631348, "logps/chosen": -239.24526977539062, "logps/rejected": -310.8146057128906, "loss": 0.1104, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.9944814443588257, "rewards/margins": 7.890805244445801, "rewards/rejected": -9.885286331176758, "step": 5220 }, { "epoch": 1.02, "learning_rate": 3.6754871647371824e-07, "logits/chosen": -2.8209762573242188, "logits/rejected": -2.722492218017578, "logps/chosen": -300.58831787109375, "logps/rejected": -349.81500244140625, "loss": 0.1045, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 1.164588451385498, "rewards/margins": 11.860956192016602, "rewards/rejected": -10.696367263793945, "step": 5230 }, { "epoch": 1.02, "learning_rate": 3.671891853023657e-07, "logits/chosen": -2.7256627082824707, "logits/rejected": -2.8017637729644775, "logps/chosen": -235.8293914794922, "logps/rejected": -365.16839599609375, "loss": 0.1211, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.6058365106582642, "rewards/margins": 9.512593269348145, "rewards/rejected": -11.118429183959961, "step": 5240 }, { "epoch": 1.02, "learning_rate": 3.6682965413101315e-07, "logits/chosen": -2.6234757900238037, "logits/rejected": -2.639665126800537, "logps/chosen": -295.0428466796875, "logps/rejected": -347.30670166015625, "loss": 0.1882, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.5189539194107056, "rewards/margins": 8.475537300109863, "rewards/rejected": -7.956583499908447, "step": 5250 }, { "epoch": 1.02, "learning_rate": 3.664701229596606e-07, "logits/chosen": -2.682283878326416, "logits/rejected": -2.735970973968506, "logps/chosen": -250.5039520263672, "logps/rejected": -295.7886047363281, "loss": 0.1059, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.94063401222229, "rewards/margins": 11.262201309204102, "rewards/rejected": -13.202835083007812, "step": 5260 }, { "epoch": 1.02, "learning_rate": 3.6611059178830806e-07, "logits/chosen": -2.609736680984497, "logits/rejected": -2.630481243133545, "logps/chosen": -219.67385864257812, "logps/rejected": -265.46978759765625, "loss": 0.1221, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -3.2120184898376465, "rewards/margins": 7.351651668548584, "rewards/rejected": -10.563669204711914, "step": 5270 }, { "epoch": 1.03, "learning_rate": 3.657510606169555e-07, "logits/chosen": -2.79672908782959, "logits/rejected": -2.80228853225708, "logps/chosen": -423.10894775390625, "logps/rejected": -475.14849853515625, "loss": 0.1257, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.531555414199829, "rewards/margins": 8.50876522064209, "rewards/rejected": -11.040319442749023, "step": 5280 }, { "epoch": 1.03, "learning_rate": 3.653915294456029e-07, "logits/chosen": -2.7790000438690186, "logits/rejected": -2.7992444038391113, "logps/chosen": -302.49298095703125, "logps/rejected": -327.6779479980469, "loss": 0.1831, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.6189241409301758, "rewards/margins": 6.2512640953063965, "rewards/rejected": -7.870188236236572, "step": 5290 }, { "epoch": 1.03, "learning_rate": 3.6503199827425034e-07, "logits/chosen": -2.6647748947143555, "logits/rejected": -2.7105484008789062, "logps/chosen": -203.89161682128906, "logps/rejected": -380.47930908203125, "loss": 0.1647, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8709390759468079, "rewards/margins": 9.922861099243164, "rewards/rejected": -10.793800354003906, "step": 5300 }, { "epoch": 1.03, "eval_logits/chosen": -2.624562978744507, "eval_logits/rejected": -2.6140475273132324, "eval_logps/chosen": -246.80006408691406, "eval_logps/rejected": -267.1991271972656, "eval_loss": 0.5094810128211975, "eval_rewards/accuracies": 0.699999988079071, "eval_rewards/chosen": -5.256199359893799, "eval_rewards/margins": 4.272119522094727, "eval_rewards/rejected": -9.528318405151367, "eval_runtime": 140.6563, "eval_samples_per_second": 22.438, "eval_steps_per_second": 0.355, "step": 5300 }, { "epoch": 1.03, "learning_rate": 3.6467246710289777e-07, "logits/chosen": -2.6462931632995605, "logits/rejected": -2.597566604614258, "logps/chosen": -309.98675537109375, "logps/rejected": -370.7598571777344, "loss": 0.0859, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.09952565282583237, "rewards/margins": 10.9370698928833, "rewards/rejected": -11.036596298217773, "step": 5310 }, { "epoch": 1.03, "learning_rate": 3.6431293593154525e-07, "logits/chosen": -2.8438780307769775, "logits/rejected": -2.727034568786621, "logps/chosen": -281.74365234375, "logps/rejected": -366.4187316894531, "loss": 0.1493, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.0003354430082254112, "rewards/margins": 9.595807075500488, "rewards/rejected": -9.595470428466797, "step": 5320 }, { "epoch": 1.03, "learning_rate": 3.639534047601927e-07, "logits/chosen": -2.6891815662384033, "logits/rejected": -2.644469738006592, "logps/chosen": -254.2372283935547, "logps/rejected": -270.01397705078125, "loss": 0.1254, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.1999330520629883, "rewards/margins": 7.129768371582031, "rewards/rejected": -9.32970142364502, "step": 5330 }, { "epoch": 1.04, "learning_rate": 3.6359387358884016e-07, "logits/chosen": -2.681487560272217, "logits/rejected": -2.723895311355591, "logps/chosen": -242.2496795654297, "logps/rejected": -302.38616943359375, "loss": 0.0976, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.672675609588623, "rewards/margins": 8.488563537597656, "rewards/rejected": -11.161238670349121, "step": 5340 }, { "epoch": 1.04, "learning_rate": 3.632343424174876e-07, "logits/chosen": -2.6785712242126465, "logits/rejected": -2.6607227325439453, "logps/chosen": -208.8882598876953, "logps/rejected": -338.19293212890625, "loss": 0.1597, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -3.5223472118377686, "rewards/margins": 10.683622360229492, "rewards/rejected": -14.205968856811523, "step": 5350 }, { "epoch": 1.04, "learning_rate": 3.62874811246135e-07, "logits/chosen": -2.6800436973571777, "logits/rejected": -2.621644973754883, "logps/chosen": -261.87939453125, "logps/rejected": -285.1792297363281, "loss": 0.1575, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.27321195602417, "rewards/margins": 9.290997505187988, "rewards/rejected": -11.564209938049316, "step": 5360 }, { "epoch": 1.04, "learning_rate": 3.625152800747825e-07, "logits/chosen": -2.797255754470825, "logits/rejected": -2.804145097732544, "logps/chosen": -273.0203857421875, "logps/rejected": -368.51812744140625, "loss": 0.1136, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -3.1146254539489746, "rewards/margins": 8.267000198364258, "rewards/rejected": -11.381625175476074, "step": 5370 }, { "epoch": 1.04, "learning_rate": 3.621557489034299e-07, "logits/chosen": -2.7852702140808105, "logits/rejected": -2.77970027923584, "logps/chosen": -245.98532104492188, "logps/rejected": -254.83218383789062, "loss": 0.1252, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.4917871057987213, "rewards/margins": 7.187398433685303, "rewards/rejected": -7.679185390472412, "step": 5380 }, { "epoch": 1.05, "learning_rate": 3.6179621773207735e-07, "logits/chosen": -2.769925355911255, "logits/rejected": -2.785240650177002, "logps/chosen": -269.5412292480469, "logps/rejected": -322.44061279296875, "loss": 0.0918, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.9030981063842773, "rewards/margins": 6.19405460357666, "rewards/rejected": -9.097153663635254, "step": 5390 }, { "epoch": 1.05, "learning_rate": 3.614366865607248e-07, "logits/chosen": -2.684915542602539, "logits/rejected": -2.6567978858947754, "logps/chosen": -277.91278076171875, "logps/rejected": -273.9879150390625, "loss": 0.1757, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.428370863199234, "rewards/margins": 6.694431304931641, "rewards/rejected": -7.122802734375, "step": 5400 }, { "epoch": 1.05, "eval_logits/chosen": -2.5418596267700195, "eval_logits/rejected": -2.5278024673461914, "eval_logps/chosen": -239.91001892089844, "eval_logps/rejected": -258.1794738769531, "eval_loss": 0.54659104347229, "eval_rewards/accuracies": 0.7099999785423279, "eval_rewards/chosen": -4.567195415496826, "eval_rewards/margins": 4.059156894683838, "eval_rewards/rejected": -8.626352310180664, "eval_runtime": 140.799, "eval_samples_per_second": 22.415, "eval_steps_per_second": 0.355, "step": 5400 }, { "epoch": 1.05, "learning_rate": 3.610771553893722e-07, "logits/chosen": -2.7176060676574707, "logits/rejected": -2.6798465251922607, "logps/chosen": -231.7126007080078, "logps/rejected": -279.63458251953125, "loss": 0.2079, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.6557159423828125, "rewards/margins": 8.253232955932617, "rewards/rejected": -9.908949851989746, "step": 5410 }, { "epoch": 1.05, "learning_rate": 3.607176242180197e-07, "logits/chosen": -2.7627878189086914, "logits/rejected": -2.677417755126953, "logps/chosen": -243.6680908203125, "logps/rejected": -277.702880859375, "loss": 0.102, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.0694854259490967, "rewards/margins": 6.533364772796631, "rewards/rejected": -8.602849960327148, "step": 5420 }, { "epoch": 1.05, "learning_rate": 3.603580930466671e-07, "logits/chosen": -2.671948194503784, "logits/rejected": -2.625758647918701, "logps/chosen": -236.7624969482422, "logps/rejected": -297.90087890625, "loss": 0.1159, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.3368608951568604, "rewards/margins": 7.368115425109863, "rewards/rejected": -9.704974174499512, "step": 5430 }, { "epoch": 1.06, "learning_rate": 3.599985618753146e-07, "logits/chosen": -2.578261613845825, "logits/rejected": -2.6791250705718994, "logps/chosen": -328.80523681640625, "logps/rejected": -417.37908935546875, "loss": 0.1223, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.867095708847046, "rewards/margins": 7.185909271240234, "rewards/rejected": -9.053004264831543, "step": 5440 }, { "epoch": 1.06, "learning_rate": 3.59639030703962e-07, "logits/chosen": -2.7734498977661133, "logits/rejected": -2.783108711242676, "logps/chosen": -228.15652465820312, "logps/rejected": -352.22479248046875, "loss": 0.1249, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.5476136207580566, "rewards/margins": 11.574209213256836, "rewards/rejected": -13.121824264526367, "step": 5450 }, { "epoch": 1.06, "learning_rate": 3.5927949953260945e-07, "logits/chosen": -2.6990790367126465, "logits/rejected": -2.6678977012634277, "logps/chosen": -218.618408203125, "logps/rejected": -311.88580322265625, "loss": 0.0929, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.4440550804138184, "rewards/margins": 9.342641830444336, "rewards/rejected": -11.786697387695312, "step": 5460 }, { "epoch": 1.06, "learning_rate": 3.5891996836125694e-07, "logits/chosen": -2.507127046585083, "logits/rejected": -2.499525785446167, "logps/chosen": -216.7612762451172, "logps/rejected": -246.66152954101562, "loss": 0.1814, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -4.382702827453613, "rewards/margins": 4.94468355178833, "rewards/rejected": -9.327385902404785, "step": 5470 }, { "epoch": 1.06, "learning_rate": 3.5856043718990436e-07, "logits/chosen": -2.820582389831543, "logits/rejected": -2.7196497917175293, "logps/chosen": -229.8854522705078, "logps/rejected": -280.36907958984375, "loss": 0.0991, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.7096996307373047, "rewards/margins": 9.916669845581055, "rewards/rejected": -11.62636947631836, "step": 5480 }, { "epoch": 1.07, "learning_rate": 3.582009060185518e-07, "logits/chosen": -2.6646568775177, "logits/rejected": -2.610426902770996, "logps/chosen": -206.9543914794922, "logps/rejected": -324.7890319824219, "loss": 0.1196, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.6991324424743652, "rewards/margins": 7.090356349945068, "rewards/rejected": -9.789487838745117, "step": 5490 }, { "epoch": 1.07, "learning_rate": 3.578413748471992e-07, "logits/chosen": -2.678091526031494, "logits/rejected": -2.647273540496826, "logps/chosen": -161.38650512695312, "logps/rejected": -272.1451721191406, "loss": 0.1386, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.1487624645233154, "rewards/margins": 8.97457218170166, "rewards/rejected": -11.123334884643555, "step": 5500 }, { "epoch": 1.07, "eval_logits/chosen": -2.6286401748657227, "eval_logits/rejected": -2.6168596744537354, "eval_logps/chosen": -250.84083557128906, "eval_logps/rejected": -274.1166687011719, "eval_loss": 0.5161064267158508, "eval_rewards/accuracies": 0.699999988079071, "eval_rewards/chosen": -5.660276412963867, "eval_rewards/margins": 4.559793949127197, "eval_rewards/rejected": -10.220069885253906, "eval_runtime": 159.5045, "eval_samples_per_second": 19.786, "eval_steps_per_second": 0.313, "step": 5500 }, { "epoch": 1.07, "learning_rate": 3.5748184367584665e-07, "logits/chosen": -2.623798370361328, "logits/rejected": -2.6287975311279297, "logps/chosen": -245.2073974609375, "logps/rejected": -422.5575256347656, "loss": 0.1087, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -3.859710693359375, "rewards/margins": 13.329292297363281, "rewards/rejected": -17.189002990722656, "step": 5510 }, { "epoch": 1.07, "learning_rate": 3.5712231250449413e-07, "logits/chosen": -2.6746225357055664, "logits/rejected": -2.600458860397339, "logps/chosen": -234.039794921875, "logps/rejected": -247.18466186523438, "loss": 0.183, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.6866891384124756, "rewards/margins": 8.559911727905273, "rewards/rejected": -11.246599197387695, "step": 5520 }, { "epoch": 1.07, "learning_rate": 3.567627813331416e-07, "logits/chosen": -2.7399849891662598, "logits/rejected": -2.861042022705078, "logps/chosen": -294.953857421875, "logps/rejected": -363.92376708984375, "loss": 0.1597, "rewards/accuracies": 1.0, "rewards/chosen": -1.7596518993377686, "rewards/margins": 12.088563919067383, "rewards/rejected": -13.848217964172363, "step": 5530 }, { "epoch": 1.08, "learning_rate": 3.5640325016178904e-07, "logits/chosen": -2.8429012298583984, "logits/rejected": -2.8388876914978027, "logps/chosen": -271.3568115234375, "logps/rejected": -305.71978759765625, "loss": 0.151, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.8082034587860107, "rewards/margins": 5.346377849578857, "rewards/rejected": -7.154581546783447, "step": 5540 }, { "epoch": 1.08, "learning_rate": 3.5604371899043646e-07, "logits/chosen": -2.6036064624786377, "logits/rejected": -2.6070005893707275, "logps/chosen": -214.0321044921875, "logps/rejected": -242.52236938476562, "loss": 0.1333, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.6123268604278564, "rewards/margins": 5.286976337432861, "rewards/rejected": -7.8993024826049805, "step": 5550 }, { "epoch": 1.08, "learning_rate": 3.556841878190839e-07, "logits/chosen": -2.5977420806884766, "logits/rejected": -2.471993923187256, "logps/chosen": -239.48806762695312, "logps/rejected": -417.43603515625, "loss": 0.1051, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.8278324604034424, "rewards/margins": 12.061902046203613, "rewards/rejected": -14.889734268188477, "step": 5560 }, { "epoch": 1.08, "learning_rate": 3.5532465664773137e-07, "logits/chosen": -2.7525136470794678, "logits/rejected": -2.697791814804077, "logps/chosen": -288.59124755859375, "logps/rejected": -317.3720703125, "loss": 0.1616, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -3.814112901687622, "rewards/margins": 6.11906623840332, "rewards/rejected": -9.93317985534668, "step": 5570 }, { "epoch": 1.08, "learning_rate": 3.549651254763788e-07, "logits/chosen": -2.740294933319092, "logits/rejected": -2.7485148906707764, "logps/chosen": -217.1917266845703, "logps/rejected": -359.91680908203125, "loss": 0.161, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -4.977596282958984, "rewards/margins": 10.685625076293945, "rewards/rejected": -15.66322135925293, "step": 5580 }, { "epoch": 1.09, "learning_rate": 3.5460559430502623e-07, "logits/chosen": -2.4666714668273926, "logits/rejected": -2.530104160308838, "logps/chosen": -289.1488342285156, "logps/rejected": -310.63861083984375, "loss": 0.1859, "rewards/accuracies": 0.75, "rewards/chosen": -7.899319648742676, "rewards/margins": 8.151875495910645, "rewards/rejected": -16.05119514465332, "step": 5590 }, { "epoch": 1.09, "learning_rate": 3.5424606313367366e-07, "logits/chosen": -2.7731902599334717, "logits/rejected": -2.767548084259033, "logps/chosen": -284.44293212890625, "logps/rejected": -321.66888427734375, "loss": 0.0945, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -5.389395236968994, "rewards/margins": 8.4964017868042, "rewards/rejected": -13.885797500610352, "step": 5600 }, { "epoch": 1.09, "eval_logits/chosen": -2.684382677078247, "eval_logits/rejected": -2.673522710800171, "eval_logps/chosen": -258.7541809082031, "eval_logps/rejected": -280.08514404296875, "eval_loss": 0.5456523895263672, "eval_rewards/accuracies": 0.7074999809265137, "eval_rewards/chosen": -6.45161247253418, "eval_rewards/margins": 4.365309238433838, "eval_rewards/rejected": -10.81692123413086, "eval_runtime": 154.9489, "eval_samples_per_second": 20.368, "eval_steps_per_second": 0.323, "step": 5600 }, { "epoch": 1.09, "learning_rate": 3.538865319623211e-07, "logits/chosen": -2.825899839401245, "logits/rejected": -2.8352699279785156, "logps/chosen": -218.1634979248047, "logps/rejected": -232.28466796875, "loss": 0.1343, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -3.0848653316497803, "rewards/margins": 5.6649088859558105, "rewards/rejected": -8.749773979187012, "step": 5610 }, { "epoch": 1.09, "learning_rate": 3.5352700079096856e-07, "logits/chosen": -2.830157995223999, "logits/rejected": -2.7892985343933105, "logps/chosen": -236.2996368408203, "logps/rejected": -301.45672607421875, "loss": 0.1166, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -4.197899341583252, "rewards/margins": 6.826905727386475, "rewards/rejected": -11.024805068969727, "step": 5620 }, { "epoch": 1.09, "learning_rate": 3.5316746961961604e-07, "logits/chosen": -2.744631290435791, "logits/rejected": -2.6552014350891113, "logps/chosen": -238.23037719726562, "logps/rejected": -312.98663330078125, "loss": 0.1357, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -5.341963291168213, "rewards/margins": 9.205650329589844, "rewards/rejected": -14.547612190246582, "step": 5630 }, { "epoch": 1.09, "learning_rate": 3.5280793844826347e-07, "logits/chosen": -2.7977890968322754, "logits/rejected": -2.785160541534424, "logps/chosen": -314.02178955078125, "logps/rejected": -372.805908203125, "loss": 0.1376, "rewards/accuracies": 1.0, "rewards/chosen": -1.443086862564087, "rewards/margins": 10.402233123779297, "rewards/rejected": -11.845319747924805, "step": 5640 }, { "epoch": 1.1, "learning_rate": 3.524484072769109e-07, "logits/chosen": -2.7562460899353027, "logits/rejected": -2.6300792694091797, "logps/chosen": -293.38128662109375, "logps/rejected": -294.54876708984375, "loss": 0.1768, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -5.166192054748535, "rewards/margins": 4.648423194885254, "rewards/rejected": -9.814615249633789, "step": 5650 }, { "epoch": 1.1, "learning_rate": 3.5208887610555833e-07, "logits/chosen": -2.7785027027130127, "logits/rejected": -2.7121129035949707, "logps/chosen": -203.39974975585938, "logps/rejected": -292.22882080078125, "loss": 0.1521, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -4.449343681335449, "rewards/margins": 8.1329927444458, "rewards/rejected": -12.58233642578125, "step": 5660 }, { "epoch": 1.1, "learning_rate": 3.517293449342058e-07, "logits/chosen": -2.847832202911377, "logits/rejected": -2.827631950378418, "logps/chosen": -286.88092041015625, "logps/rejected": -437.13336181640625, "loss": 0.1473, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8340246081352234, "rewards/margins": 12.683631896972656, "rewards/rejected": -13.517656326293945, "step": 5670 }, { "epoch": 1.1, "learning_rate": 3.5136981376285324e-07, "logits/chosen": -2.5934131145477295, "logits/rejected": -2.6727213859558105, "logps/chosen": -212.5731964111328, "logps/rejected": -298.6236267089844, "loss": 0.1356, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -4.224434852600098, "rewards/margins": 9.610193252563477, "rewards/rejected": -13.834628105163574, "step": 5680 }, { "epoch": 1.1, "learning_rate": 3.5101028259150066e-07, "logits/chosen": -2.627270221710205, "logits/rejected": -2.6261956691741943, "logps/chosen": -235.8941650390625, "logps/rejected": -323.42633056640625, "loss": 0.1991, "rewards/accuracies": 0.75, "rewards/chosen": -3.8237156867980957, "rewards/margins": 7.967219352722168, "rewards/rejected": -11.790935516357422, "step": 5690 }, { "epoch": 1.11, "learning_rate": 3.506507514201481e-07, "logits/chosen": -2.7374954223632812, "logits/rejected": -2.672309160232544, "logps/chosen": -186.88565063476562, "logps/rejected": -241.9154815673828, "loss": 0.1396, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.6796095371246338, "rewards/margins": 5.4361772537231445, "rewards/rejected": -7.115787506103516, "step": 5700 }, { "epoch": 1.11, "eval_logits/chosen": -2.6859819889068604, "eval_logits/rejected": -2.6761467456817627, "eval_logps/chosen": -252.7005615234375, "eval_logps/rejected": -265.2137756347656, "eval_loss": 0.5312688946723938, "eval_rewards/accuracies": 0.6875, "eval_rewards/chosen": -5.846251010894775, "eval_rewards/margins": 3.483530282974243, "eval_rewards/rejected": -9.329782485961914, "eval_runtime": 154.0816, "eval_samples_per_second": 20.483, "eval_steps_per_second": 0.325, "step": 5700 }, { "epoch": 1.11, "learning_rate": 3.502912202487955e-07, "logits/chosen": -2.7474465370178223, "logits/rejected": -2.621155023574829, "logps/chosen": -196.67938232421875, "logps/rejected": -284.1141662597656, "loss": 0.1468, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -3.0635247230529785, "rewards/margins": 7.5743865966796875, "rewards/rejected": -10.637911796569824, "step": 5710 }, { "epoch": 1.11, "learning_rate": 3.4993168907744295e-07, "logits/chosen": -2.782020092010498, "logits/rejected": -2.882028102874756, "logps/chosen": -191.17454528808594, "logps/rejected": -237.24221801757812, "loss": 0.1619, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.9687166213989258, "rewards/margins": 5.077371120452881, "rewards/rejected": -7.046088218688965, "step": 5720 }, { "epoch": 1.11, "learning_rate": 3.495721579060905e-07, "logits/chosen": -2.7573437690734863, "logits/rejected": -2.7375659942626953, "logps/chosen": -250.4689483642578, "logps/rejected": -251.02511596679688, "loss": 0.1015, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -3.799938201904297, "rewards/margins": 8.151664733886719, "rewards/rejected": -11.951601028442383, "step": 5730 }, { "epoch": 1.11, "learning_rate": 3.492126267347379e-07, "logits/chosen": -2.698831558227539, "logits/rejected": -2.7048041820526123, "logps/chosen": -170.64048767089844, "logps/rejected": -225.00619506835938, "loss": 0.1125, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.595546007156372, "rewards/margins": 5.730384349822998, "rewards/rejected": -8.32593059539795, "step": 5740 }, { "epoch": 1.12, "learning_rate": 3.4885309556338534e-07, "logits/chosen": -2.66579008102417, "logits/rejected": -2.749051570892334, "logps/chosen": -169.31460571289062, "logps/rejected": -291.7335510253906, "loss": 0.1228, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.8864710330963135, "rewards/margins": 10.655224800109863, "rewards/rejected": -13.541696548461914, "step": 5750 }, { "epoch": 1.12, "learning_rate": 3.4849356439203277e-07, "logits/chosen": -2.6012725830078125, "logits/rejected": -2.5726571083068848, "logps/chosen": -219.74740600585938, "logps/rejected": -252.9861602783203, "loss": 0.1032, "rewards/accuracies": 1.0, "rewards/chosen": -3.485055923461914, "rewards/margins": 7.809021949768066, "rewards/rejected": -11.29407787322998, "step": 5760 }, { "epoch": 1.12, "learning_rate": 3.481340332206802e-07, "logits/chosen": -2.696786880493164, "logits/rejected": -2.65468168258667, "logps/chosen": -244.3468475341797, "logps/rejected": -299.8504333496094, "loss": 0.1175, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.8251352310180664, "rewards/margins": 7.654874324798584, "rewards/rejected": -9.480011940002441, "step": 5770 }, { "epoch": 1.12, "learning_rate": 3.477745020493277e-07, "logits/chosen": -2.800767421722412, "logits/rejected": -2.798677444458008, "logps/chosen": -206.37026977539062, "logps/rejected": -287.0042724609375, "loss": 0.1902, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.6202633380889893, "rewards/margins": 6.435457706451416, "rewards/rejected": -8.055720329284668, "step": 5780 }, { "epoch": 1.12, "learning_rate": 3.474149708779751e-07, "logits/chosen": -2.7777695655822754, "logits/rejected": -2.772519588470459, "logps/chosen": -231.9873809814453, "logps/rejected": -320.47271728515625, "loss": 0.0867, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.735586404800415, "rewards/margins": 8.665799140930176, "rewards/rejected": -11.401385307312012, "step": 5790 }, { "epoch": 1.13, "learning_rate": 3.4705543970662253e-07, "logits/chosen": -2.915607213973999, "logits/rejected": -2.9438180923461914, "logps/chosen": -270.58135986328125, "logps/rejected": -388.30206298828125, "loss": 0.0672, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.8011308908462524, "rewards/margins": 10.29234790802002, "rewards/rejected": -11.093478202819824, "step": 5800 }, { "epoch": 1.13, "eval_logits/chosen": -2.7117955684661865, "eval_logits/rejected": -2.701897382736206, "eval_logps/chosen": -242.89669799804688, "eval_logps/rejected": -253.0458984375, "eval_loss": 0.5428944230079651, "eval_rewards/accuracies": 0.6825000047683716, "eval_rewards/chosen": -4.865864276885986, "eval_rewards/margins": 3.2471323013305664, "eval_rewards/rejected": -8.112995147705078, "eval_runtime": 154.459, "eval_samples_per_second": 20.433, "eval_steps_per_second": 0.324, "step": 5800 }, { "epoch": 1.13, "learning_rate": 3.4669590853526996e-07, "logits/chosen": -2.9234933853149414, "logits/rejected": -2.8937723636627197, "logps/chosen": -286.6083068847656, "logps/rejected": -358.56915283203125, "loss": 0.1309, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.522188425064087, "rewards/margins": 8.710290908813477, "rewards/rejected": -10.232478141784668, "step": 5810 }, { "epoch": 1.13, "learning_rate": 3.463363773639174e-07, "logits/chosen": -2.7857749462127686, "logits/rejected": -2.742250919342041, "logps/chosen": -222.6065673828125, "logps/rejected": -343.77618408203125, "loss": 0.1403, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.9214558601379395, "rewards/margins": 7.006571292877197, "rewards/rejected": -9.92802619934082, "step": 5820 }, { "epoch": 1.13, "learning_rate": 3.459768461925649e-07, "logits/chosen": -2.7955751419067383, "logits/rejected": -2.8130767345428467, "logps/chosen": -191.033203125, "logps/rejected": -269.64898681640625, "loss": 0.1452, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -3.88899302482605, "rewards/margins": 6.952218055725098, "rewards/rejected": -10.841211318969727, "step": 5830 }, { "epoch": 1.13, "learning_rate": 3.4561731502121235e-07, "logits/chosen": -2.664130210876465, "logits/rejected": -2.6832947731018066, "logps/chosen": -249.15432739257812, "logps/rejected": -322.24700927734375, "loss": 0.137, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -4.849069118499756, "rewards/margins": 8.027547836303711, "rewards/rejected": -12.876615524291992, "step": 5840 }, { "epoch": 1.14, "learning_rate": 3.452577838498598e-07, "logits/chosen": -2.802269458770752, "logits/rejected": -2.750352144241333, "logps/chosen": -254.30221557617188, "logps/rejected": -295.14373779296875, "loss": 0.1311, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.9655396938323975, "rewards/margins": 7.998236179351807, "rewards/rejected": -10.963777542114258, "step": 5850 }, { "epoch": 1.14, "learning_rate": 3.448982526785072e-07, "logits/chosen": -2.6600778102874756, "logits/rejected": -2.669320583343506, "logps/chosen": -323.6750793457031, "logps/rejected": -399.33575439453125, "loss": 0.1515, "rewards/accuracies": 1.0, "rewards/chosen": -1.116106629371643, "rewards/margins": 10.888641357421875, "rewards/rejected": -12.004746437072754, "step": 5860 }, { "epoch": 1.14, "learning_rate": 3.4453872150715463e-07, "logits/chosen": -2.7690088748931885, "logits/rejected": -2.690573215484619, "logps/chosen": -266.77801513671875, "logps/rejected": -295.81988525390625, "loss": 0.1066, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -9.111680030822754, "rewards/margins": 6.277945041656494, "rewards/rejected": -15.389625549316406, "step": 5870 }, { "epoch": 1.14, "learning_rate": 3.441791903358021e-07, "logits/chosen": -2.710983991622925, "logits/rejected": -2.7271265983581543, "logps/chosen": -142.81861877441406, "logps/rejected": -266.3145751953125, "loss": 0.0928, "rewards/accuracies": 1.0, "rewards/chosen": -2.9935505390167236, "rewards/margins": 7.031019687652588, "rewards/rejected": -10.024569511413574, "step": 5880 }, { "epoch": 1.14, "learning_rate": 3.4381965916444954e-07, "logits/chosen": -2.831089496612549, "logits/rejected": -2.7688608169555664, "logps/chosen": -243.38113403320312, "logps/rejected": -328.47723388671875, "loss": 0.149, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.787606716156006, "rewards/margins": 6.9904022216796875, "rewards/rejected": -9.778009414672852, "step": 5890 }, { "epoch": 1.15, "learning_rate": 3.4346012799309697e-07, "logits/chosen": -2.618217945098877, "logits/rejected": -2.5108141899108887, "logps/chosen": -156.2168426513672, "logps/rejected": -288.3763427734375, "loss": 0.1091, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.7162649631500244, "rewards/margins": 9.442224502563477, "rewards/rejected": -11.158490180969238, "step": 5900 }, { "epoch": 1.15, "eval_logits/chosen": -2.6338837146759033, "eval_logits/rejected": -2.6196024417877197, "eval_logps/chosen": -258.26812744140625, "eval_logps/rejected": -276.4388427734375, "eval_loss": 0.5826197862625122, "eval_rewards/accuracies": 0.6949999928474426, "eval_rewards/chosen": -6.403004169464111, "eval_rewards/margins": 4.049283027648926, "eval_rewards/rejected": -10.452287673950195, "eval_runtime": 140.158, "eval_samples_per_second": 22.517, "eval_steps_per_second": 0.357, "step": 5900 }, { "epoch": 1.15, "learning_rate": 3.431005968217444e-07, "logits/chosen": -2.7534470558166504, "logits/rejected": -2.795793294906616, "logps/chosen": -180.9386444091797, "logps/rejected": -271.31573486328125, "loss": 0.102, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.5734479427337646, "rewards/margins": 8.237764358520508, "rewards/rejected": -10.811212539672852, "step": 5910 }, { "epoch": 1.15, "learning_rate": 3.427410656503918e-07, "logits/chosen": -2.782384157180786, "logits/rejected": -2.6864218711853027, "logps/chosen": -208.7670135498047, "logps/rejected": -257.57965087890625, "loss": 0.1953, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.2141883373260498, "rewards/margins": 7.653440952301025, "rewards/rejected": -8.86762809753418, "step": 5920 }, { "epoch": 1.15, "learning_rate": 3.4238153447903936e-07, "logits/chosen": -2.686586618423462, "logits/rejected": -2.7302565574645996, "logps/chosen": -162.2934112548828, "logps/rejected": -286.556884765625, "loss": 0.1371, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -4.947363376617432, "rewards/margins": 9.68288803100586, "rewards/rejected": -14.6302490234375, "step": 5930 }, { "epoch": 1.15, "learning_rate": 3.420220033076868e-07, "logits/chosen": -2.8979978561401367, "logits/rejected": -2.8668251037597656, "logps/chosen": -358.2300720214844, "logps/rejected": -470.30316162109375, "loss": 0.1297, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.8053625226020813, "rewards/margins": 10.31999397277832, "rewards/rejected": -11.125356674194336, "step": 5940 }, { "epoch": 1.16, "learning_rate": 3.416624721363342e-07, "logits/chosen": -2.5310025215148926, "logits/rejected": -2.5251810550689697, "logps/chosen": -212.5767822265625, "logps/rejected": -287.3901672363281, "loss": 0.1589, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -3.835158586502075, "rewards/margins": 7.662600517272949, "rewards/rejected": -11.497759819030762, "step": 5950 }, { "epoch": 1.16, "learning_rate": 3.4130294096498164e-07, "logits/chosen": -2.6718804836273193, "logits/rejected": -2.584641695022583, "logps/chosen": -257.1517639160156, "logps/rejected": -317.1195983886719, "loss": 0.1741, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.7986114025115967, "rewards/margins": 7.601963996887207, "rewards/rejected": -10.400575637817383, "step": 5960 }, { "epoch": 1.16, "learning_rate": 3.4094340979362907e-07, "logits/chosen": -2.7640786170959473, "logits/rejected": -2.762349843978882, "logps/chosen": -351.98883056640625, "logps/rejected": -444.14581298828125, "loss": 0.1052, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.045039176940918, "rewards/margins": 11.323704719543457, "rewards/rejected": -13.368743896484375, "step": 5970 }, { "epoch": 1.16, "learning_rate": 3.4058387862227655e-07, "logits/chosen": -2.4686408042907715, "logits/rejected": -2.4912800788879395, "logps/chosen": -251.7490234375, "logps/rejected": -287.3038330078125, "loss": 0.1281, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.2519376277923584, "rewards/margins": 7.740379333496094, "rewards/rejected": -9.992318153381348, "step": 5980 }, { "epoch": 1.16, "learning_rate": 3.40224347450924e-07, "logits/chosen": -2.5413877964019775, "logits/rejected": -2.5480809211730957, "logps/chosen": -283.52972412109375, "logps/rejected": -291.5387878417969, "loss": 0.126, "rewards/accuracies": 1.0, "rewards/chosen": -3.6137404441833496, "rewards/margins": 10.562993049621582, "rewards/rejected": -14.176733016967773, "step": 5990 }, { "epoch": 1.16, "learning_rate": 3.398648162795714e-07, "logits/chosen": -2.7131056785583496, "logits/rejected": -2.7502658367156982, "logps/chosen": -225.12423706054688, "logps/rejected": -282.3991394042969, "loss": 0.1643, "rewards/accuracies": 1.0, "rewards/chosen": -2.4087367057800293, "rewards/margins": 6.267477035522461, "rewards/rejected": -8.676214218139648, "step": 6000 }, { "epoch": 1.16, "eval_logits/chosen": -2.5909934043884277, "eval_logits/rejected": -2.5798749923706055, "eval_logps/chosen": -262.0378112792969, "eval_logps/rejected": -283.4436950683594, "eval_loss": 0.5502873063087463, "eval_rewards/accuracies": 0.7049999833106995, "eval_rewards/chosen": -6.779973983764648, "eval_rewards/margins": 4.372798442840576, "eval_rewards/rejected": -11.15277099609375, "eval_runtime": 141.5414, "eval_samples_per_second": 22.297, "eval_steps_per_second": 0.353, "step": 6000 }, { "epoch": 1.17, "learning_rate": 3.3950528510821883e-07, "logits/chosen": -2.6086387634277344, "logits/rejected": -2.444122791290283, "logps/chosen": -221.9136962890625, "logps/rejected": -288.4536437988281, "loss": 0.0703, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -6.792720794677734, "rewards/margins": 8.3062162399292, "rewards/rejected": -15.098939895629883, "step": 6010 }, { "epoch": 1.17, "learning_rate": 3.3914575393686626e-07, "logits/chosen": -2.4521384239196777, "logits/rejected": -2.5658693313598633, "logps/chosen": -188.38824462890625, "logps/rejected": -299.87847900390625, "loss": 0.1233, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.8679680824279785, "rewards/margins": 6.399819374084473, "rewards/rejected": -9.267788887023926, "step": 6020 }, { "epoch": 1.17, "learning_rate": 3.387862227655138e-07, "logits/chosen": -2.721592903137207, "logits/rejected": -2.8075852394104004, "logps/chosen": -207.4969940185547, "logps/rejected": -340.68035888671875, "loss": 0.1924, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.3890742063522339, "rewards/margins": 13.937342643737793, "rewards/rejected": -15.326417922973633, "step": 6030 }, { "epoch": 1.17, "learning_rate": 3.384266915941612e-07, "logits/chosen": -2.860846996307373, "logits/rejected": -2.8077187538146973, "logps/chosen": -197.4476776123047, "logps/rejected": -238.8955841064453, "loss": 0.182, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.0824860334396362, "rewards/margins": 6.919004917144775, "rewards/rejected": -8.001490592956543, "step": 6040 }, { "epoch": 1.17, "learning_rate": 3.3806716042280865e-07, "logits/chosen": -2.6932692527770996, "logits/rejected": -2.7947869300842285, "logps/chosen": -201.7335968017578, "logps/rejected": -232.18515014648438, "loss": 0.1437, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.2536561489105225, "rewards/margins": 6.48312520980835, "rewards/rejected": -7.736780643463135, "step": 6050 }, { "epoch": 1.18, "learning_rate": 3.377076292514561e-07, "logits/chosen": -2.4198594093322754, "logits/rejected": -2.6124606132507324, "logps/chosen": -222.1281280517578, "logps/rejected": -239.62393188476562, "loss": 0.1581, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -3.317704677581787, "rewards/margins": 7.412570953369141, "rewards/rejected": -10.730276107788086, "step": 6060 }, { "epoch": 1.18, "learning_rate": 3.373480980801035e-07, "logits/chosen": -2.9850125312805176, "logits/rejected": -3.0179200172424316, "logps/chosen": -232.9512939453125, "logps/rejected": -307.1102294921875, "loss": 0.1774, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.4660871028900146, "rewards/margins": 7.160256862640381, "rewards/rejected": -9.626344680786133, "step": 6070 }, { "epoch": 1.18, "learning_rate": 3.36988566908751e-07, "logits/chosen": -2.798388957977295, "logits/rejected": -2.8144421577453613, "logps/chosen": -159.11264038085938, "logps/rejected": -215.4800567626953, "loss": 0.1828, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -5.0232648849487305, "rewards/margins": 5.241990089416504, "rewards/rejected": -10.265254020690918, "step": 6080 }, { "epoch": 1.18, "learning_rate": 3.366290357373984e-07, "logits/chosen": -2.8459601402282715, "logits/rejected": -2.795527935028076, "logps/chosen": -232.2338104248047, "logps/rejected": -303.9999084472656, "loss": 0.1249, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -5.959782600402832, "rewards/margins": 7.116204738616943, "rewards/rejected": -13.075986862182617, "step": 6090 }, { "epoch": 1.18, "learning_rate": 3.3626950456604584e-07, "logits/chosen": -2.9556031227111816, "logits/rejected": -2.8668324947357178, "logps/chosen": -358.3072204589844, "logps/rejected": -479.006103515625, "loss": 0.1091, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.777050733566284, "rewards/margins": 11.381028175354004, "rewards/rejected": -14.158079147338867, "step": 6100 }, { "epoch": 1.18, "eval_logits/chosen": -2.7025187015533447, "eval_logits/rejected": -2.690358877182007, "eval_logps/chosen": -257.2952880859375, "eval_logps/rejected": -274.37188720703125, "eval_loss": 0.5208981037139893, "eval_rewards/accuracies": 0.7074999809265137, "eval_rewards/chosen": -6.305721282958984, "eval_rewards/margins": 3.939871311187744, "eval_rewards/rejected": -10.245593070983887, "eval_runtime": 140.409, "eval_samples_per_second": 22.477, "eval_steps_per_second": 0.356, "step": 6100 }, { "epoch": 1.19, "learning_rate": 3.3590997339469327e-07, "logits/chosen": -2.724202871322632, "logits/rejected": -2.7696611881256104, "logps/chosen": -278.71099853515625, "logps/rejected": -414.0077209472656, "loss": 0.1199, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.10096763074398041, "rewards/margins": 12.203642845153809, "rewards/rejected": -12.304609298706055, "step": 6110 }, { "epoch": 1.19, "learning_rate": 3.3555044222334075e-07, "logits/chosen": -2.831987142562866, "logits/rejected": -2.799257755279541, "logps/chosen": -340.9770812988281, "logps/rejected": -303.50927734375, "loss": 0.1199, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.7819552421569824, "rewards/margins": 8.611495018005371, "rewards/rejected": -11.393450736999512, "step": 6120 }, { "epoch": 1.19, "learning_rate": 3.3519091105198823e-07, "logits/chosen": -2.8285770416259766, "logits/rejected": -2.9155449867248535, "logps/chosen": -222.5007781982422, "logps/rejected": -323.23028564453125, "loss": 0.1012, "rewards/accuracies": 1.0, "rewards/chosen": -2.6057395935058594, "rewards/margins": 8.996025085449219, "rewards/rejected": -11.601765632629395, "step": 6130 }, { "epoch": 1.19, "learning_rate": 3.3483137988063566e-07, "logits/chosen": -2.8326056003570557, "logits/rejected": -2.7920801639556885, "logps/chosen": -160.5250701904297, "logps/rejected": -212.5648956298828, "loss": 0.1224, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.710567831993103, "rewards/margins": 6.801335334777832, "rewards/rejected": -8.511902809143066, "step": 6140 }, { "epoch": 1.19, "learning_rate": 3.344718487092831e-07, "logits/chosen": -2.9053919315338135, "logits/rejected": -2.9459848403930664, "logps/chosen": -260.80120849609375, "logps/rejected": -353.389892578125, "loss": 0.1474, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.7903811931610107, "rewards/margins": 8.168710708618164, "rewards/rejected": -9.959092140197754, "step": 6150 }, { "epoch": 1.2, "learning_rate": 3.341123175379305e-07, "logits/chosen": -2.778244972229004, "logits/rejected": -2.8497090339660645, "logps/chosen": -294.40057373046875, "logps/rejected": -343.45721435546875, "loss": 0.1231, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -3.6665167808532715, "rewards/margins": 8.31949234008789, "rewards/rejected": -11.986010551452637, "step": 6160 }, { "epoch": 1.2, "learning_rate": 3.3375278636657794e-07, "logits/chosen": -2.754607915878296, "logits/rejected": -2.6639506816864014, "logps/chosen": -241.40841674804688, "logps/rejected": -327.3476867675781, "loss": 0.3996, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -4.393301486968994, "rewards/margins": 10.620524406433105, "rewards/rejected": -15.013826370239258, "step": 6170 }, { "epoch": 1.2, "learning_rate": 3.333932551952254e-07, "logits/chosen": -2.794943332672119, "logits/rejected": -2.726440906524658, "logps/chosen": -239.61221313476562, "logps/rejected": -288.391357421875, "loss": 0.1154, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.0061144828796387, "rewards/margins": 7.411135196685791, "rewards/rejected": -8.417248725891113, "step": 6180 }, { "epoch": 1.2, "learning_rate": 3.3303372402387285e-07, "logits/chosen": -2.672645092010498, "logits/rejected": -2.7147326469421387, "logps/chosen": -184.77056884765625, "logps/rejected": -273.83319091796875, "loss": 0.1324, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -4.274429798126221, "rewards/margins": 6.249919414520264, "rewards/rejected": -10.524349212646484, "step": 6190 }, { "epoch": 1.2, "learning_rate": 3.326741928525203e-07, "logits/chosen": -2.8108317852020264, "logits/rejected": -2.8210465908050537, "logps/chosen": -237.4455108642578, "logps/rejected": -306.970947265625, "loss": 0.1128, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9643885493278503, "rewards/margins": 7.76919412612915, "rewards/rejected": -8.73358154296875, "step": 6200 }, { "epoch": 1.2, "eval_logits/chosen": -2.628884792327881, "eval_logits/rejected": -2.6117124557495117, "eval_logps/chosen": -260.33367919921875, "eval_logps/rejected": -282.7896728515625, "eval_loss": 0.5365757346153259, "eval_rewards/accuracies": 0.7049999833106995, "eval_rewards/chosen": -6.609562873840332, "eval_rewards/margins": 4.477807521820068, "eval_rewards/rejected": -11.087370872497559, "eval_runtime": 153.0609, "eval_samples_per_second": 20.619, "eval_steps_per_second": 0.327, "step": 6200 }, { "epoch": 1.21, "learning_rate": 3.323146616811677e-07, "logits/chosen": -2.976378917694092, "logits/rejected": -2.9297187328338623, "logps/chosen": -285.6937561035156, "logps/rejected": -311.7433776855469, "loss": 0.1185, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -3.0504415035247803, "rewards/margins": 8.310696601867676, "rewards/rejected": -11.361138343811035, "step": 6210 }, { "epoch": 1.21, "learning_rate": 3.319551305098152e-07, "logits/chosen": -2.9990527629852295, "logits/rejected": -2.865429401397705, "logps/chosen": -217.62661743164062, "logps/rejected": -297.47747802734375, "loss": 0.1025, "rewards/accuracies": 1.0, "rewards/chosen": -3.0945897102355957, "rewards/margins": 7.703749179840088, "rewards/rejected": -10.798337936401367, "step": 6220 }, { "epoch": 1.21, "learning_rate": 3.3159559933846267e-07, "logits/chosen": -2.7989859580993652, "logits/rejected": -2.833512306213379, "logps/chosen": -265.3959045410156, "logps/rejected": -336.2202453613281, "loss": 0.1391, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -6.1913533210754395, "rewards/margins": 7.045161247253418, "rewards/rejected": -13.236515998840332, "step": 6230 }, { "epoch": 1.21, "learning_rate": 3.312360681671101e-07, "logits/chosen": -2.6157264709472656, "logits/rejected": -2.702986240386963, "logps/chosen": -287.74078369140625, "logps/rejected": -360.38238525390625, "loss": 0.1243, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -5.966263771057129, "rewards/margins": 7.23165225982666, "rewards/rejected": -13.197916030883789, "step": 6240 }, { "epoch": 1.21, "learning_rate": 3.308765369957575e-07, "logits/chosen": -2.8906943798065186, "logits/rejected": -2.8710110187530518, "logps/chosen": -215.97463989257812, "logps/rejected": -289.1537780761719, "loss": 0.1138, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.455352783203125, "rewards/margins": 8.1903715133667, "rewards/rejected": -10.645724296569824, "step": 6250 }, { "epoch": 1.22, "learning_rate": 3.3051700582440495e-07, "logits/chosen": -2.873359203338623, "logits/rejected": -2.771235942840576, "logps/chosen": -279.60052490234375, "logps/rejected": -353.4242248535156, "loss": 0.0998, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.851736545562744, "rewards/margins": 7.721796989440918, "rewards/rejected": -10.57353401184082, "step": 6260 }, { "epoch": 1.22, "learning_rate": 3.301574746530524e-07, "logits/chosen": -2.8997817039489746, "logits/rejected": -2.9244163036346436, "logps/chosen": -290.8459167480469, "logps/rejected": -302.6660461425781, "loss": 0.1221, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -4.964266300201416, "rewards/margins": 8.241120338439941, "rewards/rejected": -13.2053861618042, "step": 6270 }, { "epoch": 1.22, "learning_rate": 3.2979794348169986e-07, "logits/chosen": -2.8869094848632812, "logits/rejected": -2.841811180114746, "logps/chosen": -318.13165283203125, "logps/rejected": -286.990966796875, "loss": 0.1752, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -6.606424808502197, "rewards/margins": 8.159226417541504, "rewards/rejected": -14.765650749206543, "step": 6280 }, { "epoch": 1.22, "learning_rate": 3.294384123103473e-07, "logits/chosen": -2.8572590351104736, "logits/rejected": -2.9467549324035645, "logps/chosen": -174.5487060546875, "logps/rejected": -381.8702697753906, "loss": 0.1166, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.804342031478882, "rewards/margins": 12.296274185180664, "rewards/rejected": -15.100613594055176, "step": 6290 }, { "epoch": 1.22, "learning_rate": 3.290788811389947e-07, "logits/chosen": -2.7387213706970215, "logits/rejected": -2.757601261138916, "logps/chosen": -283.9013671875, "logps/rejected": -395.25177001953125, "loss": 0.2009, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -4.1901421546936035, "rewards/margins": 6.319852352142334, "rewards/rejected": -10.509993553161621, "step": 6300 }, { "epoch": 1.22, "eval_logits/chosen": -2.7317395210266113, "eval_logits/rejected": -2.7132482528686523, "eval_logps/chosen": -273.7660217285156, "eval_logps/rejected": -298.4337463378906, "eval_loss": 0.5346037745475769, "eval_rewards/accuracies": 0.7099999785423279, "eval_rewards/chosen": -7.952797889709473, "eval_rewards/margins": 4.6989850997924805, "eval_rewards/rejected": -12.651782035827637, "eval_runtime": 153.294, "eval_samples_per_second": 20.588, "eval_steps_per_second": 0.326, "step": 6300 }, { "epoch": 1.23, "learning_rate": 3.2871934996764214e-07, "logits/chosen": -2.9453299045562744, "logits/rejected": -2.8158774375915527, "logps/chosen": -271.2457580566406, "logps/rejected": -371.6581115722656, "loss": 0.1343, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -4.781925201416016, "rewards/margins": 7.037691593170166, "rewards/rejected": -11.819616317749023, "step": 6310 }, { "epoch": 1.23, "learning_rate": 3.283598187962896e-07, "logits/chosen": -2.7703518867492676, "logits/rejected": -2.811502695083618, "logps/chosen": -256.62591552734375, "logps/rejected": -290.80560302734375, "loss": 0.0975, "rewards/accuracies": 1.0, "rewards/chosen": -0.04703056812286377, "rewards/margins": 8.074069023132324, "rewards/rejected": -8.121099472045898, "step": 6320 }, { "epoch": 1.23, "learning_rate": 3.280002876249371e-07, "logits/chosen": -2.912069320678711, "logits/rejected": -2.840228319168091, "logps/chosen": -263.3592834472656, "logps/rejected": -323.1920166015625, "loss": 0.1397, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -5.208311557769775, "rewards/margins": 7.382091522216797, "rewards/rejected": -12.590402603149414, "step": 6330 }, { "epoch": 1.23, "learning_rate": 3.2764075645358453e-07, "logits/chosen": -2.7262308597564697, "logits/rejected": -2.7747886180877686, "logps/chosen": -282.7395935058594, "logps/rejected": -277.0335693359375, "loss": 0.1317, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -4.310909748077393, "rewards/margins": 8.081521987915039, "rewards/rejected": -12.392431259155273, "step": 6340 }, { "epoch": 1.23, "learning_rate": 3.2728122528223196e-07, "logits/chosen": -2.680694103240967, "logits/rejected": -2.773026704788208, "logps/chosen": -326.7906494140625, "logps/rejected": -489.86309814453125, "loss": 0.1268, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -5.660059928894043, "rewards/margins": 12.254980087280273, "rewards/rejected": -17.9150390625, "step": 6350 }, { "epoch": 1.23, "learning_rate": 3.269216941108794e-07, "logits/chosen": -2.919232130050659, "logits/rejected": -2.8801798820495605, "logps/chosen": -232.4424285888672, "logps/rejected": -277.77850341796875, "loss": 0.1022, "rewards/accuracies": 0.75, "rewards/chosen": -8.817774772644043, "rewards/margins": 4.973958492279053, "rewards/rejected": -13.791735649108887, "step": 6360 }, { "epoch": 1.24, "learning_rate": 3.265621629395268e-07, "logits/chosen": -2.8190665245056152, "logits/rejected": -2.726118564605713, "logps/chosen": -242.07022094726562, "logps/rejected": -326.2730407714844, "loss": 0.142, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -7.421662330627441, "rewards/margins": 7.453909873962402, "rewards/rejected": -14.875570297241211, "step": 6370 }, { "epoch": 1.24, "learning_rate": 3.262026317681743e-07, "logits/chosen": -2.7926979064941406, "logits/rejected": -2.78657865524292, "logps/chosen": -240.29647827148438, "logps/rejected": -347.209716796875, "loss": 0.135, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -4.99425745010376, "rewards/margins": 10.459344863891602, "rewards/rejected": -15.45360279083252, "step": 6380 }, { "epoch": 1.24, "learning_rate": 3.258431005968217e-07, "logits/chosen": -2.7790284156799316, "logits/rejected": -2.9192051887512207, "logps/chosen": -236.951171875, "logps/rejected": -307.77569580078125, "loss": 0.1336, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -7.672762393951416, "rewards/margins": 8.64220905303955, "rewards/rejected": -16.314971923828125, "step": 6390 }, { "epoch": 1.24, "learning_rate": 3.2548356942546915e-07, "logits/chosen": -2.873333215713501, "logits/rejected": -2.9237146377563477, "logps/chosen": -221.26004028320312, "logps/rejected": -385.15484619140625, "loss": 0.1862, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -5.8607025146484375, "rewards/margins": 7.9548821449279785, "rewards/rejected": -13.815584182739258, "step": 6400 }, { "epoch": 1.24, "eval_logits/chosen": -2.6900055408477783, "eval_logits/rejected": -2.674002170562744, "eval_logps/chosen": -279.8787841796875, "eval_logps/rejected": -304.4410400390625, "eval_loss": 0.5409572720527649, "eval_rewards/accuracies": 0.7049999833106995, "eval_rewards/chosen": -8.56407356262207, "eval_rewards/margins": 4.688436031341553, "eval_rewards/rejected": -13.252508163452148, "eval_runtime": 151.2682, "eval_samples_per_second": 20.864, "eval_steps_per_second": 0.331, "step": 6400 }, { "epoch": 1.24, "learning_rate": 3.251240382541166e-07, "logits/chosen": -2.801133632659912, "logits/rejected": -2.825709342956543, "logps/chosen": -246.2862548828125, "logps/rejected": -319.25164794921875, "loss": 0.1005, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -7.7105536460876465, "rewards/margins": 8.40849781036377, "rewards/rejected": -16.119050979614258, "step": 6410 }, { "epoch": 1.25, "learning_rate": 3.2476450708276406e-07, "logits/chosen": -2.862619400024414, "logits/rejected": -2.7658121585845947, "logps/chosen": -333.66162109375, "logps/rejected": -423.8462829589844, "loss": 0.1263, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -5.267934322357178, "rewards/margins": 12.18339729309082, "rewards/rejected": -17.451330184936523, "step": 6420 }, { "epoch": 1.25, "learning_rate": 3.2440497591141154e-07, "logits/chosen": -2.6920571327209473, "logits/rejected": -2.769468307495117, "logps/chosen": -226.3386688232422, "logps/rejected": -324.83135986328125, "loss": 0.1015, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -8.818525314331055, "rewards/margins": 8.952909469604492, "rewards/rejected": -17.771434783935547, "step": 6430 }, { "epoch": 1.25, "learning_rate": 3.2404544474005897e-07, "logits/chosen": -2.8283748626708984, "logits/rejected": -2.7240800857543945, "logps/chosen": -202.03224182128906, "logps/rejected": -329.03814697265625, "loss": 0.0861, "rewards/accuracies": 1.0, "rewards/chosen": -4.734574317932129, "rewards/margins": 12.262895584106445, "rewards/rejected": -16.997468948364258, "step": 6440 }, { "epoch": 1.25, "learning_rate": 3.236859135687064e-07, "logits/chosen": -2.907532215118408, "logits/rejected": -2.916665554046631, "logps/chosen": -274.86962890625, "logps/rejected": -438.1475524902344, "loss": 0.1614, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -3.988499402999878, "rewards/margins": 10.746040344238281, "rewards/rejected": -14.734539985656738, "step": 6450 }, { "epoch": 1.25, "learning_rate": 3.233263823973538e-07, "logits/chosen": -2.909522294998169, "logits/rejected": -2.8612873554229736, "logps/chosen": -207.0958251953125, "logps/rejected": -260.4202575683594, "loss": 0.1597, "rewards/accuracies": 0.75, "rewards/chosen": -7.873379707336426, "rewards/margins": 6.500428676605225, "rewards/rejected": -14.373807907104492, "step": 6460 }, { "epoch": 1.26, "learning_rate": 3.2296685122600125e-07, "logits/chosen": -2.760603427886963, "logits/rejected": -2.8077752590179443, "logps/chosen": -241.1212158203125, "logps/rejected": -393.91729736328125, "loss": 0.1835, "rewards/accuracies": 1.0, "rewards/chosen": -2.09355092048645, "rewards/margins": 10.064352035522461, "rewards/rejected": -12.157902717590332, "step": 6470 }, { "epoch": 1.26, "learning_rate": 3.2260732005464873e-07, "logits/chosen": -2.8883023262023926, "logits/rejected": -2.870262622833252, "logps/chosen": -241.0962371826172, "logps/rejected": -357.7778015136719, "loss": 0.0931, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.14364352822303772, "rewards/margins": 9.347768783569336, "rewards/rejected": -9.491412162780762, "step": 6480 }, { "epoch": 1.26, "learning_rate": 3.2224778888329616e-07, "logits/chosen": -2.3451931476593018, "logits/rejected": -2.444458484649658, "logps/chosen": -228.77822875976562, "logps/rejected": -273.86859130859375, "loss": 0.2252, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -3.470632553100586, "rewards/margins": 8.284172058105469, "rewards/rejected": -11.754804611206055, "step": 6490 }, { "epoch": 1.26, "learning_rate": 3.218882577119436e-07, "logits/chosen": -2.957517147064209, "logits/rejected": -2.899106502532959, "logps/chosen": -221.6002655029297, "logps/rejected": -266.9825134277344, "loss": 0.137, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.1074684858322144, "rewards/margins": 7.099783420562744, "rewards/rejected": -8.207250595092773, "step": 6500 }, { "epoch": 1.26, "eval_logits/chosen": -2.7445404529571533, "eval_logits/rejected": -2.728905439376831, "eval_logps/chosen": -246.21923828125, "eval_logps/rejected": -263.984130859375, "eval_loss": 0.6052098274230957, "eval_rewards/accuracies": 0.6850000023841858, "eval_rewards/chosen": -5.198116302490234, "eval_rewards/margins": 4.008700370788574, "eval_rewards/rejected": -9.206816673278809, "eval_runtime": 154.6629, "eval_samples_per_second": 20.406, "eval_steps_per_second": 0.323, "step": 6500 }, { "epoch": 1.26, "learning_rate": 3.2152872654059107e-07, "logits/chosen": -2.788585662841797, "logits/rejected": -2.778353214263916, "logps/chosen": -207.0595703125, "logps/rejected": -314.166259765625, "loss": 0.4933, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -3.369551420211792, "rewards/margins": 6.188743591308594, "rewards/rejected": -9.558295249938965, "step": 6510 }, { "epoch": 1.27, "learning_rate": 3.211691953692385e-07, "logits/chosen": -2.822997570037842, "logits/rejected": -2.736123561859131, "logps/chosen": -171.63253784179688, "logps/rejected": -229.5925750732422, "loss": 0.1272, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.5562565326690674, "rewards/margins": 5.356114387512207, "rewards/rejected": -7.9123711585998535, "step": 6520 }, { "epoch": 1.27, "learning_rate": 3.20809664197886e-07, "logits/chosen": -2.656463861465454, "logits/rejected": -2.6478307247161865, "logps/chosen": -233.30224609375, "logps/rejected": -297.5663146972656, "loss": 0.1435, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.6849629878997803, "rewards/margins": 9.462038040161133, "rewards/rejected": -12.147003173828125, "step": 6530 }, { "epoch": 1.27, "learning_rate": 3.204501330265334e-07, "logits/chosen": -2.8856730461120605, "logits/rejected": -2.8542838096618652, "logps/chosen": -289.2203674316406, "logps/rejected": -255.3348846435547, "loss": 0.1301, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -4.026634216308594, "rewards/margins": 5.3799614906311035, "rewards/rejected": -9.406596183776855, "step": 6540 }, { "epoch": 1.27, "learning_rate": 3.2009060185518083e-07, "logits/chosen": -2.7601351737976074, "logits/rejected": -2.740595817565918, "logps/chosen": -225.09317016601562, "logps/rejected": -336.46881103515625, "loss": 0.1418, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.083618402481079, "rewards/margins": 6.715296268463135, "rewards/rejected": -8.798913955688477, "step": 6550 }, { "epoch": 1.27, "learning_rate": 3.1973107068382826e-07, "logits/chosen": -2.764744997024536, "logits/rejected": -2.72230863571167, "logps/chosen": -238.1551971435547, "logps/rejected": -305.114990234375, "loss": 0.249, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -4.148994445800781, "rewards/margins": 5.854582786560059, "rewards/rejected": -10.00357723236084, "step": 6560 }, { "epoch": 1.28, "learning_rate": 3.193715395124757e-07, "logits/chosen": -2.6915459632873535, "logits/rejected": -2.642277240753174, "logps/chosen": -354.4752502441406, "logps/rejected": -352.70574951171875, "loss": 0.1459, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -4.953001499176025, "rewards/margins": 6.105034351348877, "rewards/rejected": -11.058036804199219, "step": 6570 }, { "epoch": 1.28, "learning_rate": 3.1901200834112317e-07, "logits/chosen": -2.693657636642456, "logits/rejected": -2.691105842590332, "logps/chosen": -147.98080444335938, "logps/rejected": -253.9555206298828, "loss": 0.203, "rewards/accuracies": 1.0, "rewards/chosen": -1.7674566507339478, "rewards/margins": 6.51474666595459, "rewards/rejected": -8.282205581665039, "step": 6580 }, { "epoch": 1.28, "learning_rate": 3.186524771697706e-07, "logits/chosen": -2.834761381149292, "logits/rejected": -2.7642300128936768, "logps/chosen": -329.1448974609375, "logps/rejected": -369.2736511230469, "loss": 0.1137, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -5.5362348556518555, "rewards/margins": 9.899274826049805, "rewards/rejected": -15.435510635375977, "step": 6590 }, { "epoch": 1.28, "learning_rate": 3.18292945998418e-07, "logits/chosen": -2.750600814819336, "logits/rejected": -2.6373486518859863, "logps/chosen": -234.7852020263672, "logps/rejected": -313.743408203125, "loss": 0.2336, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -4.088565826416016, "rewards/margins": 8.900773048400879, "rewards/rejected": -12.989338874816895, "step": 6600 }, { "epoch": 1.28, "eval_logits/chosen": -2.6337947845458984, "eval_logits/rejected": -2.618746042251587, "eval_logps/chosen": -256.7079162597656, "eval_logps/rejected": -277.7032775878906, "eval_loss": 0.5167534947395325, "eval_rewards/accuracies": 0.6949999928474426, "eval_rewards/chosen": -6.246983051300049, "eval_rewards/margins": 4.331745624542236, "eval_rewards/rejected": -10.578729629516602, "eval_runtime": 153.9631, "eval_samples_per_second": 20.498, "eval_steps_per_second": 0.325, "step": 6600 }, { "epoch": 1.28, "learning_rate": 3.179334148270655e-07, "logits/chosen": -2.6361284255981445, "logits/rejected": -2.5865797996520996, "logps/chosen": -252.5102996826172, "logps/rejected": -332.4734191894531, "loss": 0.1181, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.4548401832580566, "rewards/margins": 9.476263046264648, "rewards/rejected": -11.931102752685547, "step": 6610 }, { "epoch": 1.29, "learning_rate": 3.1757388365571294e-07, "logits/chosen": -2.4050495624542236, "logits/rejected": -2.5422911643981934, "logps/chosen": -159.18374633789062, "logps/rejected": -296.66363525390625, "loss": 0.1392, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -5.344342231750488, "rewards/margins": 6.840249538421631, "rewards/rejected": -12.184591293334961, "step": 6620 }, { "epoch": 1.29, "learning_rate": 3.172143524843604e-07, "logits/chosen": -2.795285701751709, "logits/rejected": -2.7838597297668457, "logps/chosen": -211.2665557861328, "logps/rejected": -280.879638671875, "loss": 0.1005, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -6.351258754730225, "rewards/margins": 5.330613136291504, "rewards/rejected": -11.681873321533203, "step": 6630 }, { "epoch": 1.29, "learning_rate": 3.1685482131300784e-07, "logits/chosen": -2.852144956588745, "logits/rejected": -2.7763545513153076, "logps/chosen": -213.666259765625, "logps/rejected": -256.70977783203125, "loss": 0.1375, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -4.642565727233887, "rewards/margins": 6.521868705749512, "rewards/rejected": -11.164435386657715, "step": 6640 }, { "epoch": 1.29, "learning_rate": 3.1649529014165527e-07, "logits/chosen": -2.8511900901794434, "logits/rejected": -2.7841453552246094, "logps/chosen": -233.0352325439453, "logps/rejected": -331.7823486328125, "loss": 0.1123, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.6883093118667603, "rewards/margins": 9.369901657104492, "rewards/rejected": -11.058210372924805, "step": 6650 }, { "epoch": 1.29, "learning_rate": 3.161357589703027e-07, "logits/chosen": -2.709275007247925, "logits/rejected": -2.7078492641448975, "logps/chosen": -295.0395812988281, "logps/rejected": -360.08660888671875, "loss": 0.1436, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.444880962371826, "rewards/margins": 8.707773208618164, "rewards/rejected": -11.152654647827148, "step": 6660 }, { "epoch": 1.29, "learning_rate": 3.1577622779895013e-07, "logits/chosen": -2.689150333404541, "logits/rejected": -2.7489943504333496, "logps/chosen": -132.4023895263672, "logps/rejected": -272.9388122558594, "loss": 0.1077, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -3.5702412128448486, "rewards/margins": 9.820098876953125, "rewards/rejected": -13.390339851379395, "step": 6670 }, { "epoch": 1.3, "learning_rate": 3.154166966275976e-07, "logits/chosen": -2.8572585582733154, "logits/rejected": -2.8273632526397705, "logps/chosen": -232.9200439453125, "logps/rejected": -354.3174743652344, "loss": 0.122, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.9304556846618652, "rewards/margins": 8.08757209777832, "rewards/rejected": -11.018026351928711, "step": 6680 }, { "epoch": 1.3, "learning_rate": 3.1505716545624504e-07, "logits/chosen": -2.671022415161133, "logits/rejected": -2.7371838092803955, "logps/chosen": -218.63693237304688, "logps/rejected": -334.6982727050781, "loss": 0.1513, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -3.0609169006347656, "rewards/margins": 7.168398380279541, "rewards/rejected": -10.229315757751465, "step": 6690 }, { "epoch": 1.3, "learning_rate": 3.1469763428489246e-07, "logits/chosen": -2.7181735038757324, "logits/rejected": -2.721872329711914, "logps/chosen": -299.78741455078125, "logps/rejected": -276.9378967285156, "loss": 0.1341, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.6885970830917358, "rewards/margins": 7.510015964508057, "rewards/rejected": -9.198613166809082, "step": 6700 }, { "epoch": 1.3, "eval_logits/chosen": -2.7110984325408936, "eval_logits/rejected": -2.7003793716430664, "eval_logps/chosen": -255.26902770996094, "eval_logps/rejected": -278.4936828613281, "eval_loss": 0.5187221765518188, "eval_rewards/accuracies": 0.6974999904632568, "eval_rewards/chosen": -6.103095531463623, "eval_rewards/margins": 4.554676532745361, "eval_rewards/rejected": -10.657772064208984, "eval_runtime": 140.0492, "eval_samples_per_second": 22.535, "eval_steps_per_second": 0.357, "step": 6700 }, { "epoch": 1.3, "learning_rate": 3.1433810311353994e-07, "logits/chosen": -2.7828149795532227, "logits/rejected": -2.755964756011963, "logps/chosen": -223.8802032470703, "logps/rejected": -281.24676513671875, "loss": 0.078, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.5590885877609253, "rewards/margins": 12.588213920593262, "rewards/rejected": -12.029123306274414, "step": 6710 }, { "epoch": 1.3, "learning_rate": 3.1397857194218737e-07, "logits/chosen": -2.7480688095092773, "logits/rejected": -2.656557559967041, "logps/chosen": -174.9902801513672, "logps/rejected": -338.25262451171875, "loss": 0.1178, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.34710431098938, "rewards/margins": 9.372222900390625, "rewards/rejected": -11.719327926635742, "step": 6720 }, { "epoch": 1.31, "learning_rate": 3.1361904077083485e-07, "logits/chosen": -2.763228178024292, "logits/rejected": -2.7415266036987305, "logps/chosen": -296.6454162597656, "logps/rejected": -340.6575927734375, "loss": 0.0915, "rewards/accuracies": 1.0, "rewards/chosen": -1.6722679138183594, "rewards/margins": 8.62702465057373, "rewards/rejected": -10.299293518066406, "step": 6730 }, { "epoch": 1.31, "learning_rate": 3.132595095994823e-07, "logits/chosen": -2.5516011714935303, "logits/rejected": -2.680459499359131, "logps/chosen": -262.86090087890625, "logps/rejected": -273.52545166015625, "loss": 0.1223, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.9065345525741577, "rewards/margins": 5.898622989654541, "rewards/rejected": -7.805157661437988, "step": 6740 }, { "epoch": 1.31, "learning_rate": 3.128999784281297e-07, "logits/chosen": -2.7771332263946533, "logits/rejected": -2.7409467697143555, "logps/chosen": -193.655517578125, "logps/rejected": -327.9877624511719, "loss": 0.1128, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.9151012897491455, "rewards/margins": 7.28466796875, "rewards/rejected": -10.199769973754883, "step": 6750 }, { "epoch": 1.31, "learning_rate": 3.1254044725677714e-07, "logits/chosen": -2.7811279296875, "logits/rejected": -2.8143246173858643, "logps/chosen": -252.03726196289062, "logps/rejected": -291.01361083984375, "loss": 0.0802, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.3618974685668945, "rewards/margins": 7.68868350982666, "rewards/rejected": -10.050580978393555, "step": 6760 }, { "epoch": 1.31, "learning_rate": 3.1218091608542456e-07, "logits/chosen": -2.7577600479125977, "logits/rejected": -2.7990260124206543, "logps/chosen": -322.322265625, "logps/rejected": -342.3220520019531, "loss": 0.1192, "rewards/accuracies": 1.0, "rewards/chosen": -2.24971342086792, "rewards/margins": 9.047224044799805, "rewards/rejected": -11.2969388961792, "step": 6770 }, { "epoch": 1.32, "learning_rate": 3.1182138491407204e-07, "logits/chosen": -2.6567625999450684, "logits/rejected": -2.65261173248291, "logps/chosen": -270.6991271972656, "logps/rejected": -307.8583068847656, "loss": 0.1087, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.9129661321640015, "rewards/margins": 11.324995040893555, "rewards/rejected": -13.237958908081055, "step": 6780 }, { "epoch": 1.32, "learning_rate": 3.1146185374271947e-07, "logits/chosen": -2.7333474159240723, "logits/rejected": -2.825624465942383, "logps/chosen": -301.9677734375, "logps/rejected": -447.29510498046875, "loss": 0.1645, "rewards/accuracies": 1.0, "rewards/chosen": -0.511021614074707, "rewards/margins": 14.370841979980469, "rewards/rejected": -14.881861686706543, "step": 6790 }, { "epoch": 1.32, "learning_rate": 3.111023225713669e-07, "logits/chosen": -2.7338008880615234, "logits/rejected": -2.703036308288574, "logps/chosen": -205.5673370361328, "logps/rejected": -247.8675537109375, "loss": 0.0945, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -3.1913504600524902, "rewards/margins": 6.280646800994873, "rewards/rejected": -9.471997261047363, "step": 6800 }, { "epoch": 1.32, "eval_logits/chosen": -2.5996198654174805, "eval_logits/rejected": -2.5874640941619873, "eval_logps/chosen": -262.0834655761719, "eval_logps/rejected": -285.2012023925781, "eval_loss": 0.5339722633361816, "eval_rewards/accuracies": 0.7174999713897705, "eval_rewards/chosen": -6.784541606903076, "eval_rewards/margins": 4.543982982635498, "eval_rewards/rejected": -11.32852554321289, "eval_runtime": 140.1899, "eval_samples_per_second": 22.512, "eval_steps_per_second": 0.357, "step": 6800 }, { "epoch": 1.32, "learning_rate": 3.107427914000144e-07, "logits/chosen": -2.6865274906158447, "logits/rejected": -2.637275218963623, "logps/chosen": -201.00270080566406, "logps/rejected": -310.13604736328125, "loss": 0.1135, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -3.912355899810791, "rewards/margins": 10.1017484664917, "rewards/rejected": -14.014103889465332, "step": 6810 }, { "epoch": 1.32, "learning_rate": 3.103832602286618e-07, "logits/chosen": -2.5592846870422363, "logits/rejected": -2.534046173095703, "logps/chosen": -326.1671142578125, "logps/rejected": -284.32733154296875, "loss": 0.1286, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -3.1637768745422363, "rewards/margins": 8.271170616149902, "rewards/rejected": -11.43494701385498, "step": 6820 }, { "epoch": 1.33, "learning_rate": 3.100237290573093e-07, "logits/chosen": -2.6639950275421143, "logits/rejected": -2.6628472805023193, "logps/chosen": -233.7548828125, "logps/rejected": -330.5860900878906, "loss": 0.1161, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.3829774856567383, "rewards/margins": 9.43483829498291, "rewards/rejected": -11.817815780639648, "step": 6830 }, { "epoch": 1.33, "learning_rate": 3.096641978859567e-07, "logits/chosen": -2.7246289253234863, "logits/rejected": -2.724916458129883, "logps/chosen": -226.059814453125, "logps/rejected": -303.9591064453125, "loss": 0.0975, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -3.16279935836792, "rewards/margins": 9.367891311645508, "rewards/rejected": -12.530691146850586, "step": 6840 }, { "epoch": 1.33, "learning_rate": 3.0930466671460415e-07, "logits/chosen": -2.529158353805542, "logits/rejected": -2.5818915367126465, "logps/chosen": -226.40988159179688, "logps/rejected": -280.0759582519531, "loss": 0.1649, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -4.696617603302002, "rewards/margins": 9.469199180603027, "rewards/rejected": -14.165815353393555, "step": 6850 }, { "epoch": 1.33, "learning_rate": 3.0894513554325157e-07, "logits/chosen": -2.5131049156188965, "logits/rejected": -2.50006103515625, "logps/chosen": -283.16619873046875, "logps/rejected": -292.90997314453125, "loss": 0.2001, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.4522463381290436, "rewards/margins": 8.661517143249512, "rewards/rejected": -9.113763809204102, "step": 6860 }, { "epoch": 1.33, "learning_rate": 3.08585604371899e-07, "logits/chosen": -2.4603896141052246, "logits/rejected": -2.475236415863037, "logps/chosen": -200.2270050048828, "logps/rejected": -319.63165283203125, "loss": 0.1584, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.5711714029312134, "rewards/margins": 9.169591903686523, "rewards/rejected": -10.740763664245605, "step": 6870 }, { "epoch": 1.34, "learning_rate": 3.082260732005465e-07, "logits/chosen": -2.5062026977539062, "logits/rejected": -2.481663465499878, "logps/chosen": -270.3084411621094, "logps/rejected": -277.72552490234375, "loss": 0.1108, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -3.8045730590820312, "rewards/margins": 6.856584072113037, "rewards/rejected": -10.661157608032227, "step": 6880 }, { "epoch": 1.34, "learning_rate": 3.078665420291939e-07, "logits/chosen": -2.692714214324951, "logits/rejected": -2.6422486305236816, "logps/chosen": -224.814697265625, "logps/rejected": -320.9744873046875, "loss": 0.2283, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -3.366394519805908, "rewards/margins": 6.598013877868652, "rewards/rejected": -9.964409828186035, "step": 6890 }, { "epoch": 1.34, "learning_rate": 3.075070108578414e-07, "logits/chosen": -2.54801869392395, "logits/rejected": -2.611740827560425, "logps/chosen": -201.72970581054688, "logps/rejected": -318.2071838378906, "loss": 0.1569, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -5.590427398681641, "rewards/margins": 7.745883941650391, "rewards/rejected": -13.336311340332031, "step": 6900 }, { "epoch": 1.34, "eval_logits/chosen": -2.509418249130249, "eval_logits/rejected": -2.4990177154541016, "eval_logps/chosen": -265.41961669921875, "eval_logps/rejected": -287.7729797363281, "eval_loss": 0.5556238889694214, "eval_rewards/accuracies": 0.7024999856948853, "eval_rewards/chosen": -7.118157863616943, "eval_rewards/margins": 4.467545986175537, "eval_rewards/rejected": -11.585704803466797, "eval_runtime": 141.0041, "eval_samples_per_second": 22.382, "eval_steps_per_second": 0.355, "step": 6900 }, { "epoch": 1.34, "learning_rate": 3.071474796864888e-07, "logits/chosen": -2.705265760421753, "logits/rejected": -2.708603620529175, "logps/chosen": -364.18621826171875, "logps/rejected": -366.87908935546875, "loss": 0.1044, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -3.7980449199676514, "rewards/margins": 7.81343936920166, "rewards/rejected": -11.611483573913574, "step": 6910 }, { "epoch": 1.34, "learning_rate": 3.0678794851513625e-07, "logits/chosen": -2.68180513381958, "logits/rejected": -2.6304049491882324, "logps/chosen": -282.3155822753906, "logps/rejected": -310.7538146972656, "loss": 0.1837, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -7.944119453430176, "rewards/margins": 5.915678024291992, "rewards/rejected": -13.859797477722168, "step": 6920 }, { "epoch": 1.35, "learning_rate": 3.0642841734378373e-07, "logits/chosen": -2.583775758743286, "logits/rejected": -2.6555397510528564, "logps/chosen": -290.5704345703125, "logps/rejected": -368.8967590332031, "loss": 0.1213, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -5.195664405822754, "rewards/margins": 11.082324981689453, "rewards/rejected": -16.27798843383789, "step": 6930 }, { "epoch": 1.35, "learning_rate": 3.0606888617243115e-07, "logits/chosen": -2.667675733566284, "logits/rejected": -2.628983974456787, "logps/chosen": -198.3682403564453, "logps/rejected": -331.04241943359375, "loss": 0.2169, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -3.4205238819122314, "rewards/margins": 10.031224250793457, "rewards/rejected": -13.451748847961426, "step": 6940 }, { "epoch": 1.35, "learning_rate": 3.057093550010786e-07, "logits/chosen": -2.6205906867980957, "logits/rejected": -2.509148120880127, "logps/chosen": -294.91363525390625, "logps/rejected": -373.4155578613281, "loss": 0.1427, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -4.280693054199219, "rewards/margins": 6.832987308502197, "rewards/rejected": -11.113679885864258, "step": 6950 }, { "epoch": 1.35, "learning_rate": 3.05349823829726e-07, "logits/chosen": -2.627901077270508, "logits/rejected": -2.7255802154541016, "logps/chosen": -243.75668334960938, "logps/rejected": -333.1106872558594, "loss": 0.1414, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9257257580757141, "rewards/margins": 12.062132835388184, "rewards/rejected": -12.987858772277832, "step": 6960 }, { "epoch": 1.35, "learning_rate": 3.0499029265837344e-07, "logits/chosen": -2.5732569694519043, "logits/rejected": -2.576958417892456, "logps/chosen": -217.84695434570312, "logps/rejected": -285.22607421875, "loss": 0.1911, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -4.424662113189697, "rewards/margins": 5.363595485687256, "rewards/rejected": -9.78825855255127, "step": 6970 }, { "epoch": 1.36, "learning_rate": 3.046307614870209e-07, "logits/chosen": -2.7168760299682617, "logits/rejected": -2.7221908569335938, "logps/chosen": -204.42691040039062, "logps/rejected": -288.11749267578125, "loss": 0.1641, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -4.712026119232178, "rewards/margins": 8.34032917022705, "rewards/rejected": -13.052355766296387, "step": 6980 }, { "epoch": 1.36, "learning_rate": 3.0427123031566835e-07, "logits/chosen": -2.665600299835205, "logits/rejected": -2.6365535259246826, "logps/chosen": -235.10409545898438, "logps/rejected": -309.0330505371094, "loss": 0.1178, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.550424575805664, "rewards/margins": 8.813916206359863, "rewards/rejected": -11.364341735839844, "step": 6990 }, { "epoch": 1.36, "learning_rate": 3.0391169914431583e-07, "logits/chosen": -2.651092767715454, "logits/rejected": -2.572640895843506, "logps/chosen": -178.3667755126953, "logps/rejected": -302.5531311035156, "loss": 0.1122, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.638754367828369, "rewards/margins": 9.221355438232422, "rewards/rejected": -11.860109329223633, "step": 7000 }, { "epoch": 1.36, "eval_logits/chosen": -2.581740140914917, "eval_logits/rejected": -2.568486213684082, "eval_logps/chosen": -261.2301330566406, "eval_logps/rejected": -286.8914794921875, "eval_loss": 0.5235300660133362, "eval_rewards/accuracies": 0.7074999809265137, "eval_rewards/chosen": -6.699207782745361, "eval_rewards/margins": 4.798343658447266, "eval_rewards/rejected": -11.497550964355469, "eval_runtime": 141.1976, "eval_samples_per_second": 22.352, "eval_steps_per_second": 0.354, "step": 7000 }, { "epoch": 1.36, "learning_rate": 3.0355216797296326e-07, "logits/chosen": -2.5472564697265625, "logits/rejected": -2.543069839477539, "logps/chosen": -183.33639526367188, "logps/rejected": -266.994140625, "loss": 0.1101, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -4.847899436950684, "rewards/margins": 6.370082855224609, "rewards/rejected": -11.217982292175293, "step": 7010 }, { "epoch": 1.36, "learning_rate": 3.031926368016107e-07, "logits/chosen": -2.5794944763183594, "logits/rejected": -2.567682981491089, "logps/chosen": -251.6931610107422, "logps/rejected": -317.95477294921875, "loss": 0.1137, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -5.406409740447998, "rewards/margins": 7.383990287780762, "rewards/rejected": -12.790399551391602, "step": 7020 }, { "epoch": 1.36, "learning_rate": 3.0283310563025816e-07, "logits/chosen": -2.6800646781921387, "logits/rejected": -2.548715591430664, "logps/chosen": -267.2879333496094, "logps/rejected": -375.4579162597656, "loss": 0.1377, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.9697425365447998, "rewards/margins": 11.063411712646484, "rewards/rejected": -13.03315258026123, "step": 7030 }, { "epoch": 1.37, "learning_rate": 3.024735744589056e-07, "logits/chosen": -2.625786542892456, "logits/rejected": -2.5683741569519043, "logps/chosen": -331.9104919433594, "logps/rejected": -320.0903625488281, "loss": 0.1019, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.854667901992798, "rewards/margins": 8.84476375579834, "rewards/rejected": -11.699432373046875, "step": 7040 }, { "epoch": 1.37, "learning_rate": 3.02114043287553e-07, "logits/chosen": -2.708660364151001, "logits/rejected": -2.8068530559539795, "logps/chosen": -181.35525512695312, "logps/rejected": -280.81390380859375, "loss": 0.1748, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -5.215568542480469, "rewards/margins": 9.792646408081055, "rewards/rejected": -15.008213996887207, "step": 7050 }, { "epoch": 1.37, "learning_rate": 3.0175451211620045e-07, "logits/chosen": -2.633408784866333, "logits/rejected": -2.612058162689209, "logps/chosen": -200.0987091064453, "logps/rejected": -283.28515625, "loss": 0.1443, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -5.08492374420166, "rewards/margins": 8.34225082397461, "rewards/rejected": -13.427175521850586, "step": 7060 }, { "epoch": 1.37, "learning_rate": 3.013949809448479e-07, "logits/chosen": -2.665736675262451, "logits/rejected": -2.673962116241455, "logps/chosen": -204.17137145996094, "logps/rejected": -252.1431427001953, "loss": 0.1596, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -5.014439105987549, "rewards/margins": 8.255058288574219, "rewards/rejected": -13.269497871398926, "step": 7070 }, { "epoch": 1.37, "learning_rate": 3.0103544977349536e-07, "logits/chosen": -2.763153076171875, "logits/rejected": -2.7834317684173584, "logps/chosen": -260.197265625, "logps/rejected": -312.7044677734375, "loss": 0.1843, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.0015342235565186, "rewards/margins": 7.654010772705078, "rewards/rejected": -9.655545234680176, "step": 7080 }, { "epoch": 1.38, "learning_rate": 3.006759186021428e-07, "logits/chosen": -2.7696967124938965, "logits/rejected": -2.7554268836975098, "logps/chosen": -234.03488159179688, "logps/rejected": -288.6385803222656, "loss": 0.1456, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -5.310217380523682, "rewards/margins": 5.257205009460449, "rewards/rejected": -10.567422866821289, "step": 7090 }, { "epoch": 1.38, "learning_rate": 3.0031638743079026e-07, "logits/chosen": -2.649148464202881, "logits/rejected": -2.725419521331787, "logps/chosen": -270.505615234375, "logps/rejected": -361.8536682128906, "loss": 0.126, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.899606704711914, "rewards/margins": 11.054985046386719, "rewards/rejected": -13.954591751098633, "step": 7100 }, { "epoch": 1.38, "eval_logits/chosen": -2.5971784591674805, "eval_logits/rejected": -2.5856800079345703, "eval_logps/chosen": -270.76007080078125, "eval_logps/rejected": -297.9208679199219, "eval_loss": 0.5673274993896484, "eval_rewards/accuracies": 0.7024999856948853, "eval_rewards/chosen": -7.652198314666748, "eval_rewards/margins": 4.948291301727295, "eval_rewards/rejected": -12.600488662719727, "eval_runtime": 140.5425, "eval_samples_per_second": 22.456, "eval_steps_per_second": 0.356, "step": 7100 }, { "epoch": 1.38, "learning_rate": 2.999568562594377e-07, "logits/chosen": -2.6537704467773438, "logits/rejected": -2.701878786087036, "logps/chosen": -251.71798706054688, "logps/rejected": -315.9606018066406, "loss": 0.1307, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -4.501950263977051, "rewards/margins": 8.076028823852539, "rewards/rejected": -12.577978134155273, "step": 7110 }, { "epoch": 1.38, "learning_rate": 2.995973250880851e-07, "logits/chosen": -2.7427194118499756, "logits/rejected": -2.7168116569519043, "logps/chosen": -269.60906982421875, "logps/rejected": -329.2308654785156, "loss": 0.1276, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -4.565219879150391, "rewards/margins": 12.049905776977539, "rewards/rejected": -16.61512565612793, "step": 7120 }, { "epoch": 1.38, "learning_rate": 2.992377939167326e-07, "logits/chosen": -2.8124959468841553, "logits/rejected": -2.732012987136841, "logps/chosen": -268.4694519042969, "logps/rejected": -373.57611083984375, "loss": 0.1229, "rewards/accuracies": 1.0, "rewards/chosen": -5.161271095275879, "rewards/margins": 10.258711814880371, "rewards/rejected": -15.41998291015625, "step": 7130 }, { "epoch": 1.39, "learning_rate": 2.9887826274538003e-07, "logits/chosen": -2.771953821182251, "logits/rejected": -2.658160448074341, "logps/chosen": -215.19906616210938, "logps/rejected": -219.28829956054688, "loss": 0.2375, "rewards/accuracies": 1.0, "rewards/chosen": -2.545722007751465, "rewards/margins": 7.849542140960693, "rewards/rejected": -10.395264625549316, "step": 7140 }, { "epoch": 1.39, "learning_rate": 2.9851873157402746e-07, "logits/chosen": -2.7215499877929688, "logits/rejected": -2.8429300785064697, "logps/chosen": -207.01937866210938, "logps/rejected": -366.0595703125, "loss": 0.1678, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -4.246459484100342, "rewards/margins": 7.551443576812744, "rewards/rejected": -11.797903060913086, "step": 7150 }, { "epoch": 1.39, "learning_rate": 2.981592004026749e-07, "logits/chosen": -2.7126569747924805, "logits/rejected": -2.68514084815979, "logps/chosen": -179.6477813720703, "logps/rejected": -238.47119140625, "loss": 0.1196, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -7.4221978187561035, "rewards/margins": 7.249853610992432, "rewards/rejected": -14.672050476074219, "step": 7160 }, { "epoch": 1.39, "learning_rate": 2.977996692313223e-07, "logits/chosen": -2.563110113143921, "logits/rejected": -2.6195552349090576, "logps/chosen": -218.6953125, "logps/rejected": -283.00616455078125, "loss": 0.143, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -3.0791234970092773, "rewards/margins": 10.290868759155273, "rewards/rejected": -13.369993209838867, "step": 7170 }, { "epoch": 1.39, "learning_rate": 2.9744013805996974e-07, "logits/chosen": -2.6006455421447754, "logits/rejected": -2.6729652881622314, "logps/chosen": -254.58407592773438, "logps/rejected": -345.2496337890625, "loss": 0.0951, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -4.587131500244141, "rewards/margins": 10.4163236618042, "rewards/rejected": -15.003454208374023, "step": 7180 }, { "epoch": 1.4, "learning_rate": 2.970806068886172e-07, "logits/chosen": -2.7501912117004395, "logits/rejected": -2.6658809185028076, "logps/chosen": -328.89031982421875, "logps/rejected": -379.03094482421875, "loss": 0.1222, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.939349889755249, "rewards/margins": 10.54344654083252, "rewards/rejected": -13.482797622680664, "step": 7190 }, { "epoch": 1.4, "learning_rate": 2.967210757172647e-07, "logits/chosen": -2.450362205505371, "logits/rejected": -2.339787483215332, "logps/chosen": -223.5878448486328, "logps/rejected": -388.7928161621094, "loss": 0.0913, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.88235342502594, "rewards/margins": 15.814065933227539, "rewards/rejected": -17.69641876220703, "step": 7200 }, { "epoch": 1.4, "eval_logits/chosen": -2.529186487197876, "eval_logits/rejected": -2.5161945819854736, "eval_logps/chosen": -275.12677001953125, "eval_logps/rejected": -306.8511047363281, "eval_loss": 0.5452268123626709, "eval_rewards/accuracies": 0.7074999809265137, "eval_rewards/chosen": -8.088868141174316, "eval_rewards/margins": 5.404646396636963, "eval_rewards/rejected": -13.493513107299805, "eval_runtime": 141.3306, "eval_samples_per_second": 22.331, "eval_steps_per_second": 0.354, "step": 7200 }, { "epoch": 1.4, "learning_rate": 2.9636154454591213e-07, "logits/chosen": -2.620148181915283, "logits/rejected": -2.566028594970703, "logps/chosen": -175.0458221435547, "logps/rejected": -224.8864288330078, "loss": 0.1564, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -5.470210552215576, "rewards/margins": 6.311079978942871, "rewards/rejected": -11.781290054321289, "step": 7210 }, { "epoch": 1.4, "learning_rate": 2.9600201337455956e-07, "logits/chosen": -2.7207438945770264, "logits/rejected": -2.718306303024292, "logps/chosen": -244.40902709960938, "logps/rejected": -366.9908142089844, "loss": 0.1573, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -4.883308410644531, "rewards/margins": 12.144978523254395, "rewards/rejected": -17.02828598022461, "step": 7220 }, { "epoch": 1.4, "learning_rate": 2.95642482203207e-07, "logits/chosen": -2.7251694202423096, "logits/rejected": -2.7464382648468018, "logps/chosen": -266.37652587890625, "logps/rejected": -318.9719543457031, "loss": 0.0837, "rewards/accuracies": 1.0, "rewards/chosen": -0.4313480854034424, "rewards/margins": 9.35649585723877, "rewards/rejected": -9.787843704223633, "step": 7230 }, { "epoch": 1.41, "learning_rate": 2.9528295103185447e-07, "logits/chosen": -2.6239540576934814, "logits/rejected": -2.568068742752075, "logps/chosen": -244.37234497070312, "logps/rejected": -347.47613525390625, "loss": 0.2529, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -3.2521071434020996, "rewards/margins": 8.381429672241211, "rewards/rejected": -11.633537292480469, "step": 7240 }, { "epoch": 1.41, "learning_rate": 2.949234198605019e-07, "logits/chosen": -2.8597843647003174, "logits/rejected": -2.7766873836517334, "logps/chosen": -218.0390625, "logps/rejected": -291.564453125, "loss": 0.1086, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -5.036818504333496, "rewards/margins": 7.55121374130249, "rewards/rejected": -12.588032722473145, "step": 7250 }, { "epoch": 1.41, "learning_rate": 2.945638886891493e-07, "logits/chosen": -2.6912407875061035, "logits/rejected": -2.67124605178833, "logps/chosen": -220.19381713867188, "logps/rejected": -280.46551513671875, "loss": 0.1727, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -4.568626403808594, "rewards/margins": 8.104110717773438, "rewards/rejected": -12.672737121582031, "step": 7260 }, { "epoch": 1.41, "learning_rate": 2.9420435751779675e-07, "logits/chosen": -2.817361831665039, "logits/rejected": -2.7352194786071777, "logps/chosen": -266.4664611816406, "logps/rejected": -310.5345458984375, "loss": 0.1487, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -4.841556549072266, "rewards/margins": 8.549221992492676, "rewards/rejected": -13.390779495239258, "step": 7270 }, { "epoch": 1.41, "learning_rate": 2.938448263464442e-07, "logits/chosen": -2.6565136909484863, "logits/rejected": -2.727008581161499, "logps/chosen": -248.929443359375, "logps/rejected": -312.4493103027344, "loss": 0.2064, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -4.9427595138549805, "rewards/margins": 7.866678714752197, "rewards/rejected": -12.809438705444336, "step": 7280 }, { "epoch": 1.42, "learning_rate": 2.934852951750917e-07, "logits/chosen": -2.6423702239990234, "logits/rejected": -2.687544584274292, "logps/chosen": -250.701904296875, "logps/rejected": -401.62786865234375, "loss": 0.0906, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.986860752105713, "rewards/margins": 12.676633834838867, "rewards/rejected": -15.663493156433105, "step": 7290 }, { "epoch": 1.42, "learning_rate": 2.9312576400373914e-07, "logits/chosen": -2.683901786804199, "logits/rejected": -2.5897936820983887, "logps/chosen": -281.7994689941406, "logps/rejected": -361.5560302734375, "loss": 0.1582, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -8.576489448547363, "rewards/margins": 7.160782814025879, "rewards/rejected": -15.737272262573242, "step": 7300 }, { "epoch": 1.42, "eval_logits/chosen": -2.6350250244140625, "eval_logits/rejected": -2.625715970993042, "eval_logps/chosen": -275.5716552734375, "eval_logps/rejected": -300.4671936035156, "eval_loss": 0.5486189126968384, "eval_rewards/accuracies": 0.6800000071525574, "eval_rewards/chosen": -8.1333589553833, "eval_rewards/margins": 4.721765995025635, "eval_rewards/rejected": -12.855124473571777, "eval_runtime": 140.814, "eval_samples_per_second": 22.413, "eval_steps_per_second": 0.355, "step": 7300 }, { "epoch": 1.42, "learning_rate": 2.9276623283238657e-07, "logits/chosen": -2.690355062484741, "logits/rejected": -2.8258864879608154, "logps/chosen": -329.5840759277344, "logps/rejected": -354.81182861328125, "loss": 0.1519, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -5.988795280456543, "rewards/margins": 7.291092872619629, "rewards/rejected": -13.279887199401855, "step": 7310 }, { "epoch": 1.42, "learning_rate": 2.92406701661034e-07, "logits/chosen": -2.669297695159912, "logits/rejected": -2.6820120811462402, "logps/chosen": -176.64915466308594, "logps/rejected": -262.0193786621094, "loss": 0.1338, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -4.852190971374512, "rewards/margins": 6.8422746658325195, "rewards/rejected": -11.694464683532715, "step": 7320 }, { "epoch": 1.42, "learning_rate": 2.920471704896814e-07, "logits/chosen": -2.710866689682007, "logits/rejected": -2.6943893432617188, "logps/chosen": -185.72616577148438, "logps/rejected": -240.77197265625, "loss": 0.1008, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -4.496728897094727, "rewards/margins": 8.53285026550293, "rewards/rejected": -13.029577255249023, "step": 7330 }, { "epoch": 1.42, "learning_rate": 2.916876393183289e-07, "logits/chosen": -2.611412763595581, "logits/rejected": -2.60302472114563, "logps/chosen": -246.72314453125, "logps/rejected": -309.4311828613281, "loss": 0.1393, "rewards/accuracies": 1.0, "rewards/chosen": -1.08695387840271, "rewards/margins": 8.461469650268555, "rewards/rejected": -9.548425674438477, "step": 7340 }, { "epoch": 1.43, "learning_rate": 2.9132810814697633e-07, "logits/chosen": -2.5691325664520264, "logits/rejected": -2.631394147872925, "logps/chosen": -240.9157257080078, "logps/rejected": -262.32012939453125, "loss": 0.1522, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.1954829692840576, "rewards/margins": 7.731309413909912, "rewards/rejected": -9.92679214477539, "step": 7350 }, { "epoch": 1.43, "learning_rate": 2.9096857697562376e-07, "logits/chosen": -2.765463352203369, "logits/rejected": -2.708284616470337, "logps/chosen": -228.95162963867188, "logps/rejected": -380.7508544921875, "loss": 0.1379, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -5.617364883422852, "rewards/margins": 10.537843704223633, "rewards/rejected": -16.155208587646484, "step": 7360 }, { "epoch": 1.43, "learning_rate": 2.906090458042712e-07, "logits/chosen": -2.7099769115448, "logits/rejected": -2.7386481761932373, "logps/chosen": -242.02255249023438, "logps/rejected": -284.34649658203125, "loss": 0.1483, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -5.5198564529418945, "rewards/margins": 6.369032859802246, "rewards/rejected": -11.88888931274414, "step": 7370 }, { "epoch": 1.43, "learning_rate": 2.902495146329186e-07, "logits/chosen": -2.676896572113037, "logits/rejected": -2.720167875289917, "logps/chosen": -289.1672668457031, "logps/rejected": -313.4903564453125, "loss": 0.1071, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -4.613086700439453, "rewards/margins": 9.373674392700195, "rewards/rejected": -13.986761093139648, "step": 7380 }, { "epoch": 1.43, "learning_rate": 2.8988998346156615e-07, "logits/chosen": -2.6812024116516113, "logits/rejected": -2.674198865890503, "logps/chosen": -228.72802734375, "logps/rejected": -373.49493408203125, "loss": 0.1222, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -5.594104766845703, "rewards/margins": 7.436854362487793, "rewards/rejected": -13.030960083007812, "step": 7390 }, { "epoch": 1.44, "learning_rate": 2.895304522902136e-07, "logits/chosen": -2.6344497203826904, "logits/rejected": -2.599367380142212, "logps/chosen": -307.51458740234375, "logps/rejected": -377.29486083984375, "loss": 0.1205, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -4.841690540313721, "rewards/margins": 10.016744613647461, "rewards/rejected": -14.858434677124023, "step": 7400 }, { "epoch": 1.44, "eval_logits/chosen": -2.5095021724700928, "eval_logits/rejected": -2.4955389499664307, "eval_logps/chosen": -270.70867919921875, "eval_logps/rejected": -297.9638977050781, "eval_loss": 0.5640743970870972, "eval_rewards/accuracies": 0.6924999952316284, "eval_rewards/chosen": -7.647061347961426, "eval_rewards/margins": 4.95773458480835, "eval_rewards/rejected": -12.604796409606934, "eval_runtime": 140.8209, "eval_samples_per_second": 22.411, "eval_steps_per_second": 0.355, "step": 7400 }, { "epoch": 1.44, "learning_rate": 2.89170921118861e-07, "logits/chosen": -2.646491765975952, "logits/rejected": -2.695291519165039, "logps/chosen": -271.927490234375, "logps/rejected": -369.4830627441406, "loss": 0.1511, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -4.7775983810424805, "rewards/margins": 13.019407272338867, "rewards/rejected": -17.797006607055664, "step": 7410 }, { "epoch": 1.44, "learning_rate": 2.8881138994750843e-07, "logits/chosen": -2.6583352088928223, "logits/rejected": -2.6142797470092773, "logps/chosen": -267.32098388671875, "logps/rejected": -321.2505187988281, "loss": 0.1269, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -4.502383232116699, "rewards/margins": 9.251312255859375, "rewards/rejected": -13.753695487976074, "step": 7420 }, { "epoch": 1.44, "learning_rate": 2.8845185877615586e-07, "logits/chosen": -2.571791172027588, "logits/rejected": -2.591951370239258, "logps/chosen": -245.6603240966797, "logps/rejected": -329.0169372558594, "loss": 0.1461, "rewards/accuracies": 1.0, "rewards/chosen": -3.9595108032226562, "rewards/margins": 11.194948196411133, "rewards/rejected": -15.154459953308105, "step": 7430 }, { "epoch": 1.44, "learning_rate": 2.8809232760480334e-07, "logits/chosen": -2.655647039413452, "logits/rejected": -2.649632215499878, "logps/chosen": -283.72235107421875, "logps/rejected": -334.8876037597656, "loss": 0.1205, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -5.935078144073486, "rewards/margins": 8.224316596984863, "rewards/rejected": -14.159395217895508, "step": 7440 }, { "epoch": 1.45, "learning_rate": 2.8773279643345077e-07, "logits/chosen": -2.301175117492676, "logits/rejected": -2.3566908836364746, "logps/chosen": -226.89065551757812, "logps/rejected": -293.74261474609375, "loss": 0.1226, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -5.175057411193848, "rewards/margins": 8.48443603515625, "rewards/rejected": -13.659494400024414, "step": 7450 }, { "epoch": 1.45, "learning_rate": 2.873732652620982e-07, "logits/chosen": -2.650192975997925, "logits/rejected": -2.5119731426239014, "logps/chosen": -252.47445678710938, "logps/rejected": -339.8155517578125, "loss": 0.1635, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -6.845350742340088, "rewards/margins": 9.868247985839844, "rewards/rejected": -16.71360206604004, "step": 7460 }, { "epoch": 1.45, "learning_rate": 2.870137340907456e-07, "logits/chosen": -2.5948119163513184, "logits/rejected": -2.5240237712860107, "logps/chosen": -235.8869171142578, "logps/rejected": -338.0869445800781, "loss": 0.1414, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -9.814234733581543, "rewards/margins": 7.834403038024902, "rewards/rejected": -17.648639678955078, "step": 7470 }, { "epoch": 1.45, "learning_rate": 2.8665420291939305e-07, "logits/chosen": -2.5564475059509277, "logits/rejected": -2.521803617477417, "logps/chosen": -214.2293243408203, "logps/rejected": -279.4332275390625, "loss": 0.2919, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -7.876753330230713, "rewards/margins": 7.654786109924316, "rewards/rejected": -15.531538009643555, "step": 7480 }, { "epoch": 1.45, "learning_rate": 2.862946717480406e-07, "logits/chosen": -2.6138367652893066, "logits/rejected": -2.494513750076294, "logps/chosen": -288.03094482421875, "logps/rejected": -279.65606689453125, "loss": 0.1171, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -4.514867782592773, "rewards/margins": 5.313235282897949, "rewards/rejected": -9.828103065490723, "step": 7490 }, { "epoch": 1.46, "learning_rate": 2.85935140576688e-07, "logits/chosen": -2.5605311393737793, "logits/rejected": -2.5970845222473145, "logps/chosen": -251.8067169189453, "logps/rejected": -333.286376953125, "loss": 0.1483, "rewards/accuracies": 1.0, "rewards/chosen": -1.8011680841445923, "rewards/margins": 10.87164306640625, "rewards/rejected": -12.672809600830078, "step": 7500 }, { "epoch": 1.46, "eval_logits/chosen": -2.462235927581787, "eval_logits/rejected": -2.4456515312194824, "eval_logps/chosen": -262.43505859375, "eval_logps/rejected": -291.4525451660156, "eval_loss": 0.535338282585144, "eval_rewards/accuracies": 0.7099999785423279, "eval_rewards/chosen": -6.819699764251709, "eval_rewards/margins": 5.133960723876953, "eval_rewards/rejected": -11.95366096496582, "eval_runtime": 140.6131, "eval_samples_per_second": 22.445, "eval_steps_per_second": 0.356, "step": 7500 }, { "epoch": 1.46, "learning_rate": 2.8557560940533544e-07, "logits/chosen": -2.6730306148529053, "logits/rejected": -2.639164447784424, "logps/chosen": -315.77532958984375, "logps/rejected": -356.12261962890625, "loss": 0.1234, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -5.206526279449463, "rewards/margins": 10.395485877990723, "rewards/rejected": -15.602012634277344, "step": 7510 }, { "epoch": 1.46, "learning_rate": 2.8521607823398287e-07, "logits/chosen": -2.6434378623962402, "logits/rejected": -2.595853805541992, "logps/chosen": -245.8604736328125, "logps/rejected": -305.4363708496094, "loss": 0.1407, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -5.256904602050781, "rewards/margins": 7.4478936195373535, "rewards/rejected": -12.704797744750977, "step": 7520 }, { "epoch": 1.46, "learning_rate": 2.848565470626303e-07, "logits/chosen": -2.3837356567382812, "logits/rejected": -2.3848493099212646, "logps/chosen": -251.31204223632812, "logps/rejected": -386.92474365234375, "loss": 0.1449, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.964468479156494, "rewards/margins": 11.46526050567627, "rewards/rejected": -14.429728507995605, "step": 7530 }, { "epoch": 1.46, "learning_rate": 2.844970158912778e-07, "logits/chosen": -2.4391016960144043, "logits/rejected": -2.4242305755615234, "logps/chosen": -184.51016235351562, "logps/rejected": -272.45770263671875, "loss": 0.1833, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -3.969717502593994, "rewards/margins": 7.345050811767578, "rewards/rejected": -11.314767837524414, "step": 7540 }, { "epoch": 1.47, "learning_rate": 2.841374847199252e-07, "logits/chosen": -2.539172410964966, "logits/rejected": -2.4916083812713623, "logps/chosen": -238.360107421875, "logps/rejected": -321.63275146484375, "loss": 0.1164, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -5.93917179107666, "rewards/margins": 8.108965873718262, "rewards/rejected": -14.048138618469238, "step": 7550 }, { "epoch": 1.47, "learning_rate": 2.8377795354857263e-07, "logits/chosen": -2.5334014892578125, "logits/rejected": -2.5618481636047363, "logps/chosen": -217.45339965820312, "logps/rejected": -319.08648681640625, "loss": 0.1339, "rewards/accuracies": 0.75, "rewards/chosen": -5.835770130157471, "rewards/margins": 8.362272262573242, "rewards/rejected": -14.198040962219238, "step": 7560 }, { "epoch": 1.47, "learning_rate": 2.8341842237722006e-07, "logits/chosen": -2.2875418663024902, "logits/rejected": -2.2655181884765625, "logps/chosen": -260.1848449707031, "logps/rejected": -293.3078918457031, "loss": 0.1478, "rewards/accuracies": 0.75, "rewards/chosen": -5.657872200012207, "rewards/margins": 6.696013450622559, "rewards/rejected": -12.353886604309082, "step": 7570 }, { "epoch": 1.47, "learning_rate": 2.830588912058675e-07, "logits/chosen": -2.603400707244873, "logits/rejected": -2.6000728607177734, "logps/chosen": -292.7062072753906, "logps/rejected": -389.6017761230469, "loss": 0.0906, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.4953200817108154, "rewards/margins": 12.588823318481445, "rewards/rejected": -15.084144592285156, "step": 7580 }, { "epoch": 1.47, "learning_rate": 2.82699360034515e-07, "logits/chosen": -2.6736364364624023, "logits/rejected": -2.634866952896118, "logps/chosen": -314.75213623046875, "logps/rejected": -339.9332580566406, "loss": 0.0948, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.965799570083618, "rewards/margins": 8.406060218811035, "rewards/rejected": -11.371858596801758, "step": 7590 }, { "epoch": 1.48, "learning_rate": 2.8233982886316245e-07, "logits/chosen": -2.6928837299346924, "logits/rejected": -2.6834189891815186, "logps/chosen": -249.1254119873047, "logps/rejected": -344.33282470703125, "loss": 0.1431, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -3.108962297439575, "rewards/margins": 8.896110534667969, "rewards/rejected": -12.005071640014648, "step": 7600 }, { "epoch": 1.48, "eval_logits/chosen": -2.4902963638305664, "eval_logits/rejected": -2.4739882946014404, "eval_logps/chosen": -266.635498046875, "eval_logps/rejected": -295.59075927734375, "eval_loss": 0.5330983996391296, "eval_rewards/accuracies": 0.6974999904632568, "eval_rewards/chosen": -7.239742279052734, "eval_rewards/margins": 5.127737522125244, "eval_rewards/rejected": -12.36747932434082, "eval_runtime": 158.6779, "eval_samples_per_second": 19.889, "eval_steps_per_second": 0.315, "step": 7600 }, { "epoch": 1.48, "learning_rate": 2.819802976918099e-07, "logits/chosen": -2.430002450942993, "logits/rejected": -2.4719510078430176, "logps/chosen": -211.90463256835938, "logps/rejected": -397.2557067871094, "loss": 0.1348, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -8.344503402709961, "rewards/margins": 11.521432876586914, "rewards/rejected": -19.865936279296875, "step": 7610 }, { "epoch": 1.48, "learning_rate": 2.816207665204573e-07, "logits/chosen": -2.663891315460205, "logits/rejected": -2.665191650390625, "logps/chosen": -232.7655029296875, "logps/rejected": -342.55133056640625, "loss": 0.1071, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -4.3341264724731445, "rewards/margins": 8.600602149963379, "rewards/rejected": -12.934728622436523, "step": 7620 }, { "epoch": 1.48, "learning_rate": 2.8126123534910473e-07, "logits/chosen": -2.6779227256774902, "logits/rejected": -2.7119317054748535, "logps/chosen": -260.8107604980469, "logps/rejected": -391.2445373535156, "loss": 0.1438, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.915870428085327, "rewards/margins": 9.063987731933594, "rewards/rejected": -11.979857444763184, "step": 7630 }, { "epoch": 1.48, "learning_rate": 2.809017041777522e-07, "logits/chosen": -2.4341931343078613, "logits/rejected": -2.387016773223877, "logps/chosen": -203.7797393798828, "logps/rejected": -288.8929443359375, "loss": 0.0977, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -4.0777482986450195, "rewards/margins": 8.294588088989258, "rewards/rejected": -12.372335433959961, "step": 7640 }, { "epoch": 1.49, "learning_rate": 2.8054217300639964e-07, "logits/chosen": -2.632253408432007, "logits/rejected": -2.4459805488586426, "logps/chosen": -194.9683380126953, "logps/rejected": -177.0109100341797, "loss": 0.0911, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.925438404083252, "rewards/margins": 5.247866153717041, "rewards/rejected": -8.173304557800293, "step": 7650 }, { "epoch": 1.49, "learning_rate": 2.8018264183504707e-07, "logits/chosen": -2.7020745277404785, "logits/rejected": -2.551351547241211, "logps/chosen": -270.93890380859375, "logps/rejected": -298.1747741699219, "loss": 0.0941, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.2506473064422607, "rewards/margins": 9.88005256652832, "rewards/rejected": -12.13070011138916, "step": 7660 }, { "epoch": 1.49, "learning_rate": 2.798231106636945e-07, "logits/chosen": -2.626739978790283, "logits/rejected": -2.601685047149658, "logps/chosen": -298.44775390625, "logps/rejected": -300.03656005859375, "loss": 0.175, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.278904438018799, "rewards/margins": 9.179478645324707, "rewards/rejected": -11.45838451385498, "step": 7670 }, { "epoch": 1.49, "learning_rate": 2.794635794923419e-07, "logits/chosen": -2.74287486076355, "logits/rejected": -2.7336511611938477, "logps/chosen": -273.65911865234375, "logps/rejected": -331.85638427734375, "loss": 0.1228, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -4.677824020385742, "rewards/margins": 7.765066623687744, "rewards/rejected": -12.442890167236328, "step": 7680 }, { "epoch": 1.49, "learning_rate": 2.7910404832098946e-07, "logits/chosen": -2.404188871383667, "logits/rejected": -2.394561767578125, "logps/chosen": -238.877685546875, "logps/rejected": -365.3401184082031, "loss": 0.173, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -5.243638515472412, "rewards/margins": 9.225210189819336, "rewards/rejected": -14.468849182128906, "step": 7690 }, { "epoch": 1.49, "learning_rate": 2.787445171496369e-07, "logits/chosen": -2.6277623176574707, "logits/rejected": -2.616374969482422, "logps/chosen": -238.05709838867188, "logps/rejected": -358.3810729980469, "loss": 0.1604, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -6.534859657287598, "rewards/margins": 10.644804954528809, "rewards/rejected": -17.179664611816406, "step": 7700 }, { "epoch": 1.49, "eval_logits/chosen": -2.5511996746063232, "eval_logits/rejected": -2.5380501747131348, "eval_logps/chosen": -264.64892578125, "eval_logps/rejected": -292.4844665527344, "eval_loss": 0.5209183692932129, "eval_rewards/accuracies": 0.7049999833106995, "eval_rewards/chosen": -7.0410847663879395, "eval_rewards/margins": 5.015763759613037, "eval_rewards/rejected": -12.056848526000977, "eval_runtime": 141.3306, "eval_samples_per_second": 22.331, "eval_steps_per_second": 0.354, "step": 7700 }, { "epoch": 1.5, "learning_rate": 2.783849859782843e-07, "logits/chosen": -2.5951569080352783, "logits/rejected": -2.6269772052764893, "logps/chosen": -203.84194946289062, "logps/rejected": -286.40325927734375, "loss": 0.126, "rewards/accuracies": 0.75, "rewards/chosen": -9.021936416625977, "rewards/margins": 8.942475318908691, "rewards/rejected": -17.964412689208984, "step": 7710 }, { "epoch": 1.5, "learning_rate": 2.7802545480693174e-07, "logits/chosen": -2.656064987182617, "logits/rejected": -2.6841704845428467, "logps/chosen": -289.58514404296875, "logps/rejected": -373.8680419921875, "loss": 0.1295, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.8527544140815735, "rewards/margins": 12.222578048706055, "rewards/rejected": -11.369823455810547, "step": 7720 }, { "epoch": 1.5, "learning_rate": 2.7766592363557917e-07, "logits/chosen": -2.679734468460083, "logits/rejected": -2.6492819786071777, "logps/chosen": -276.2290954589844, "logps/rejected": -336.97503662109375, "loss": 0.1019, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -8.463384628295898, "rewards/margins": 9.683026313781738, "rewards/rejected": -18.14640998840332, "step": 7730 }, { "epoch": 1.5, "learning_rate": 2.7730639246422665e-07, "logits/chosen": -2.754636526107788, "logits/rejected": -2.7663750648498535, "logps/chosen": -310.114501953125, "logps/rejected": -347.15887451171875, "loss": 0.2118, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.472342610359192, "rewards/margins": 9.652831077575684, "rewards/rejected": -11.12517261505127, "step": 7740 }, { "epoch": 1.5, "learning_rate": 2.769468612928741e-07, "logits/chosen": -2.741122245788574, "logits/rejected": -2.635406970977783, "logps/chosen": -265.55364990234375, "logps/rejected": -269.2779235839844, "loss": 0.1222, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -6.486469268798828, "rewards/margins": 7.999451637268066, "rewards/rejected": -14.485920906066895, "step": 7750 }, { "epoch": 1.51, "learning_rate": 2.765873301215215e-07, "logits/chosen": -2.7480459213256836, "logits/rejected": -2.663301706314087, "logps/chosen": -339.1629638671875, "logps/rejected": -291.66278076171875, "loss": 0.1391, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -6.851433753967285, "rewards/margins": 6.955389499664307, "rewards/rejected": -13.8068208694458, "step": 7760 }, { "epoch": 1.51, "learning_rate": 2.7622779895016893e-07, "logits/chosen": -2.654982805252075, "logits/rejected": -2.707353115081787, "logps/chosen": -193.4783935546875, "logps/rejected": -280.4853820800781, "loss": 0.1822, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.415597915649414, "rewards/margins": 8.169103622436523, "rewards/rejected": -10.584702491760254, "step": 7770 }, { "epoch": 1.51, "learning_rate": 2.7586826777881636e-07, "logits/chosen": -2.7164063453674316, "logits/rejected": -2.644347906112671, "logps/chosen": -262.46246337890625, "logps/rejected": -223.9054412841797, "loss": 0.1171, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.2464256286621094, "rewards/margins": 6.026411533355713, "rewards/rejected": -8.272836685180664, "step": 7780 }, { "epoch": 1.51, "learning_rate": 2.755087366074639e-07, "logits/chosen": -2.819343090057373, "logits/rejected": -2.8072986602783203, "logps/chosen": -224.7086181640625, "logps/rejected": -403.16693115234375, "loss": 0.1208, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.5812429189682007, "rewards/margins": 16.40489959716797, "rewards/rejected": -16.986141204833984, "step": 7790 }, { "epoch": 1.51, "learning_rate": 2.751492054361113e-07, "logits/chosen": -2.7332189083099365, "logits/rejected": -2.720179319381714, "logps/chosen": -201.06503295898438, "logps/rejected": -240.810791015625, "loss": 0.1578, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -3.0317578315734863, "rewards/margins": 7.611621856689453, "rewards/rejected": -10.643381118774414, "step": 7800 }, { "epoch": 1.51, "eval_logits/chosen": -2.571337938308716, "eval_logits/rejected": -2.5551180839538574, "eval_logps/chosen": -263.78594970703125, "eval_logps/rejected": -290.19305419921875, "eval_loss": 0.5121142864227295, "eval_rewards/accuracies": 0.6949999928474426, "eval_rewards/chosen": -6.9547882080078125, "eval_rewards/margins": 4.872920513153076, "eval_rewards/rejected": -11.827710151672363, "eval_runtime": 140.9926, "eval_samples_per_second": 22.384, "eval_steps_per_second": 0.355, "step": 7800 }, { "epoch": 1.52, "learning_rate": 2.7478967426475875e-07, "logits/chosen": -2.654966115951538, "logits/rejected": -2.6363489627838135, "logps/chosen": -257.8814697265625, "logps/rejected": -331.0078125, "loss": 0.105, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -6.393416881561279, "rewards/margins": 8.878604888916016, "rewards/rejected": -15.272021293640137, "step": 7810 }, { "epoch": 1.52, "learning_rate": 2.744301430934062e-07, "logits/chosen": -2.589625120162964, "logits/rejected": -2.6775403022766113, "logps/chosen": -200.9984893798828, "logps/rejected": -332.18609619140625, "loss": 0.1582, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -6.500709533691406, "rewards/margins": 7.889809608459473, "rewards/rejected": -14.390518188476562, "step": 7820 }, { "epoch": 1.52, "learning_rate": 2.740706119220536e-07, "logits/chosen": -2.665151357650757, "logits/rejected": -2.6970107555389404, "logps/chosen": -218.695068359375, "logps/rejected": -386.66424560546875, "loss": 0.173, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -3.225282669067383, "rewards/margins": 12.516640663146973, "rewards/rejected": -15.741923332214355, "step": 7830 }, { "epoch": 1.52, "learning_rate": 2.737110807507011e-07, "logits/chosen": -2.839881658554077, "logits/rejected": -2.7552831172943115, "logps/chosen": -276.20489501953125, "logps/rejected": -325.8049011230469, "loss": 0.1292, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.9895131587982178, "rewards/margins": 8.880070686340332, "rewards/rejected": -11.869585037231445, "step": 7840 }, { "epoch": 1.52, "learning_rate": 2.733515495793485e-07, "logits/chosen": -2.5036187171936035, "logits/rejected": -2.5023043155670166, "logps/chosen": -213.8859100341797, "logps/rejected": -355.48638916015625, "loss": 0.1894, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -5.731394290924072, "rewards/margins": 11.786921501159668, "rewards/rejected": -17.5183162689209, "step": 7850 }, { "epoch": 1.53, "learning_rate": 2.7299201840799594e-07, "logits/chosen": -2.7792916297912598, "logits/rejected": -2.7273244857788086, "logps/chosen": -249.0773162841797, "logps/rejected": -347.36083984375, "loss": 0.0867, "rewards/accuracies": 1.0, "rewards/chosen": -0.6855994462966919, "rewards/margins": 9.428860664367676, "rewards/rejected": -10.114459037780762, "step": 7860 }, { "epoch": 1.53, "learning_rate": 2.7263248723664337e-07, "logits/chosen": -2.7454593181610107, "logits/rejected": -2.8052077293395996, "logps/chosen": -206.2342071533203, "logps/rejected": -292.8019104003906, "loss": 0.1154, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -4.633772373199463, "rewards/margins": 8.582466125488281, "rewards/rejected": -13.216238021850586, "step": 7870 }, { "epoch": 1.53, "learning_rate": 2.7227295606529085e-07, "logits/chosen": -2.675715208053589, "logits/rejected": -2.57414174079895, "logps/chosen": -209.45803833007812, "logps/rejected": -211.00537109375, "loss": 0.3294, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -4.574831008911133, "rewards/margins": 5.802220344543457, "rewards/rejected": -10.37705135345459, "step": 7880 }, { "epoch": 1.53, "learning_rate": 2.7191342489393833e-07, "logits/chosen": -2.737308979034424, "logits/rejected": -2.671957492828369, "logps/chosen": -282.55810546875, "logps/rejected": -256.31121826171875, "loss": 0.1393, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -3.1064114570617676, "rewards/margins": 8.2166748046875, "rewards/rejected": -11.323084831237793, "step": 7890 }, { "epoch": 1.53, "learning_rate": 2.7155389372258576e-07, "logits/chosen": -2.7994513511657715, "logits/rejected": -2.774040699005127, "logps/chosen": -218.21682739257812, "logps/rejected": -323.3345642089844, "loss": 0.1548, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -6.622430324554443, "rewards/margins": 9.895807266235352, "rewards/rejected": -16.518238067626953, "step": 7900 }, { "epoch": 1.53, "eval_logits/chosen": -2.567811965942383, "eval_logits/rejected": -2.546403408050537, "eval_logps/chosen": -265.3227844238281, "eval_logps/rejected": -289.8968811035156, "eval_loss": 0.5030146837234497, "eval_rewards/accuracies": 0.6899999976158142, "eval_rewards/chosen": -7.1084675788879395, "eval_rewards/margins": 4.689626693725586, "eval_rewards/rejected": -11.79809284210205, "eval_runtime": 145.8975, "eval_samples_per_second": 21.632, "eval_steps_per_second": 0.343, "step": 7900 }, { "epoch": 1.54, "learning_rate": 2.711943625512332e-07, "logits/chosen": -2.6100189685821533, "logits/rejected": -2.482381582260132, "logps/chosen": -267.57415771484375, "logps/rejected": -330.57696533203125, "loss": 0.1203, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -3.0282962322235107, "rewards/margins": 9.497358322143555, "rewards/rejected": -12.525655746459961, "step": 7910 }, { "epoch": 1.54, "learning_rate": 2.708348313798806e-07, "logits/chosen": -2.601951837539673, "logits/rejected": -2.559113025665283, "logps/chosen": -210.42172241210938, "logps/rejected": -413.45367431640625, "loss": 0.076, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -3.5408718585968018, "rewards/margins": 8.901400566101074, "rewards/rejected": -12.44227409362793, "step": 7920 }, { "epoch": 1.54, "learning_rate": 2.7047530020852804e-07, "logits/chosen": -2.795612335205078, "logits/rejected": -2.6182150840759277, "logps/chosen": -252.82345581054688, "logps/rejected": -254.4090576171875, "loss": 0.0967, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.6061296463012695, "rewards/margins": 8.885503768920898, "rewards/rejected": -10.491633415222168, "step": 7930 }, { "epoch": 1.54, "learning_rate": 2.701157690371755e-07, "logits/chosen": -2.82684326171875, "logits/rejected": -2.7789080142974854, "logps/chosen": -250.04013061523438, "logps/rejected": -273.026611328125, "loss": 0.1309, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.7443382740020752, "rewards/margins": 7.579935550689697, "rewards/rejected": -9.324274063110352, "step": 7940 }, { "epoch": 1.54, "learning_rate": 2.6975623786582295e-07, "logits/chosen": -2.643972396850586, "logits/rejected": -2.5632810592651367, "logps/chosen": -314.1190185546875, "logps/rejected": -333.59759521484375, "loss": 0.088, "rewards/accuracies": 1.0, "rewards/chosen": -4.784397125244141, "rewards/margins": 8.821067810058594, "rewards/rejected": -13.60546588897705, "step": 7950 }, { "epoch": 1.55, "learning_rate": 2.693967066944704e-07, "logits/chosen": -2.6901941299438477, "logits/rejected": -2.7379889488220215, "logps/chosen": -240.01431274414062, "logps/rejected": -331.6454772949219, "loss": 0.0889, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -4.543796539306641, "rewards/margins": 4.876248836517334, "rewards/rejected": -9.420045852661133, "step": 7960 }, { "epoch": 1.55, "learning_rate": 2.690371755231178e-07, "logits/chosen": -2.680809497833252, "logits/rejected": -2.5582900047302246, "logps/chosen": -350.76336669921875, "logps/rejected": -402.9486083984375, "loss": 0.3298, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.949441432952881, "rewards/margins": 7.70681095123291, "rewards/rejected": -10.656251907348633, "step": 7970 }, { "epoch": 1.55, "learning_rate": 2.686776443517653e-07, "logits/chosen": -2.6802871227264404, "logits/rejected": -2.533712387084961, "logps/chosen": -192.17262268066406, "logps/rejected": -205.2220916748047, "loss": 0.1524, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.809290885925293, "rewards/margins": 6.305342674255371, "rewards/rejected": -9.114633560180664, "step": 7980 }, { "epoch": 1.55, "learning_rate": 2.6831811318041277e-07, "logits/chosen": -2.82965350151062, "logits/rejected": -2.797982692718506, "logps/chosen": -317.3335876464844, "logps/rejected": -405.9150695800781, "loss": 0.112, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -3.4481029510498047, "rewards/margins": 8.178030967712402, "rewards/rejected": -11.626134872436523, "step": 7990 }, { "epoch": 1.55, "learning_rate": 2.679585820090602e-07, "logits/chosen": -2.75278377532959, "logits/rejected": -2.604698896408081, "logps/chosen": -298.3667907714844, "logps/rejected": -364.8375549316406, "loss": 0.114, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -5.107235908508301, "rewards/margins": 9.927998542785645, "rewards/rejected": -15.035234451293945, "step": 8000 }, { "epoch": 1.55, "eval_logits/chosen": -2.5889623165130615, "eval_logits/rejected": -2.5693280696868896, "eval_logps/chosen": -266.796142578125, "eval_logps/rejected": -293.58087158203125, "eval_loss": 0.522428035736084, "eval_rewards/accuracies": 0.7074999809265137, "eval_rewards/chosen": -7.255805492401123, "eval_rewards/margins": 4.9106855392456055, "eval_rewards/rejected": -12.166491508483887, "eval_runtime": 148.4036, "eval_samples_per_second": 21.266, "eval_steps_per_second": 0.337, "step": 8000 }, { "epoch": 1.56, "learning_rate": 2.675990508377076e-07, "logits/chosen": -2.720656394958496, "logits/rejected": -2.5840041637420654, "logps/chosen": -236.41552734375, "logps/rejected": -271.75250244140625, "loss": 0.1299, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -6.823799133300781, "rewards/margins": 7.9150800704956055, "rewards/rejected": -14.738879203796387, "step": 8010 }, { "epoch": 1.56, "learning_rate": 2.6723951966635505e-07, "logits/chosen": -2.767911434173584, "logits/rejected": -2.777944803237915, "logps/chosen": -225.2845916748047, "logps/rejected": -319.44366455078125, "loss": 0.1138, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -3.285029649734497, "rewards/margins": 7.553208351135254, "rewards/rejected": -10.838237762451172, "step": 8020 }, { "epoch": 1.56, "learning_rate": 2.668799884950025e-07, "logits/chosen": -2.7186245918273926, "logits/rejected": -2.7040085792541504, "logps/chosen": -219.1761016845703, "logps/rejected": -356.8329772949219, "loss": 0.1186, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -5.899411201477051, "rewards/margins": 8.89518928527832, "rewards/rejected": -14.794601440429688, "step": 8030 }, { "epoch": 1.56, "learning_rate": 2.6652045732364996e-07, "logits/chosen": -2.751316547393799, "logits/rejected": -2.8012309074401855, "logps/chosen": -217.4230194091797, "logps/rejected": -340.3138122558594, "loss": 0.1767, "rewards/accuracies": 1.0, "rewards/chosen": -0.2986190915107727, "rewards/margins": 9.464986801147461, "rewards/rejected": -9.763606071472168, "step": 8040 }, { "epoch": 1.56, "learning_rate": 2.661609261522974e-07, "logits/chosen": -2.6710333824157715, "logits/rejected": -2.5617194175720215, "logps/chosen": -324.3326110839844, "logps/rejected": -319.7010192871094, "loss": 0.4444, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.9465482234954834, "rewards/margins": 7.789758205413818, "rewards/rejected": -10.736307144165039, "step": 8050 }, { "epoch": 1.56, "learning_rate": 2.658013949809448e-07, "logits/chosen": -2.7540974617004395, "logits/rejected": -2.7263998985290527, "logps/chosen": -237.75772094726562, "logps/rejected": -243.87646484375, "loss": 0.2051, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -3.662503480911255, "rewards/margins": 5.3351731300354, "rewards/rejected": -8.997676849365234, "step": 8060 }, { "epoch": 1.57, "learning_rate": 2.6544186380959225e-07, "logits/chosen": -2.76902437210083, "logits/rejected": -2.8533883094787598, "logps/chosen": -188.13333129882812, "logps/rejected": -302.8412780761719, "loss": 0.1427, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -3.508274793624878, "rewards/margins": 7.940915584564209, "rewards/rejected": -11.449190139770508, "step": 8070 }, { "epoch": 1.57, "learning_rate": 2.6508233263823973e-07, "logits/chosen": -2.795078754425049, "logits/rejected": -2.6513450145721436, "logps/chosen": -288.3661193847656, "logps/rejected": -371.65814208984375, "loss": 0.1688, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.276179552078247, "rewards/margins": 9.625847816467285, "rewards/rejected": -11.902026176452637, "step": 8080 }, { "epoch": 1.57, "learning_rate": 2.647228014668872e-07, "logits/chosen": -2.5098202228546143, "logits/rejected": -2.582827091217041, "logps/chosen": -249.5102996826172, "logps/rejected": -304.91265869140625, "loss": 0.1628, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -4.1279616355896, "rewards/margins": 9.646819114685059, "rewards/rejected": -13.7747802734375, "step": 8090 }, { "epoch": 1.57, "learning_rate": 2.6436327029553464e-07, "logits/chosen": -2.7641568183898926, "logits/rejected": -2.911437511444092, "logps/chosen": -225.6170654296875, "logps/rejected": -346.4241638183594, "loss": 0.112, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.264681816101074, "rewards/margins": 10.263120651245117, "rewards/rejected": -12.527801513671875, "step": 8100 }, { "epoch": 1.57, "eval_logits/chosen": -2.593346357345581, "eval_logits/rejected": -2.5735323429107666, "eval_logps/chosen": -254.83860778808594, "eval_logps/rejected": -277.5395202636719, "eval_loss": 0.5374084115028381, "eval_rewards/accuracies": 0.699999988079071, "eval_rewards/chosen": -6.060052871704102, "eval_rewards/margins": 4.502304553985596, "eval_rewards/rejected": -10.562356948852539, "eval_runtime": 140.2574, "eval_samples_per_second": 22.501, "eval_steps_per_second": 0.356, "step": 8100 }, { "epoch": 1.57, "learning_rate": 2.6400373912418206e-07, "logits/chosen": -2.844630718231201, "logits/rejected": -2.7642016410827637, "logps/chosen": -283.64593505859375, "logps/rejected": -229.9371795654297, "loss": 0.1063, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.2396063804626465, "rewards/margins": 6.239510536193848, "rewards/rejected": -8.479116439819336, "step": 8110 }, { "epoch": 1.58, "learning_rate": 2.636442079528295e-07, "logits/chosen": -2.7353687286376953, "logits/rejected": -2.7575581073760986, "logps/chosen": -215.2127685546875, "logps/rejected": -360.39569091796875, "loss": 0.1807, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -4.294805526733398, "rewards/margins": 11.46848201751709, "rewards/rejected": -15.763287544250488, "step": 8120 }, { "epoch": 1.58, "learning_rate": 2.632846767814769e-07, "logits/chosen": -2.775391101837158, "logits/rejected": -2.7092642784118652, "logps/chosen": -336.4795227050781, "logps/rejected": -380.7021179199219, "loss": 0.2285, "rewards/accuracies": 1.0, "rewards/chosen": -3.323225736618042, "rewards/margins": 7.921922206878662, "rewards/rejected": -11.245149612426758, "step": 8130 }, { "epoch": 1.58, "learning_rate": 2.629251456101244e-07, "logits/chosen": -2.8665738105773926, "logits/rejected": -2.80812668800354, "logps/chosen": -241.3499298095703, "logps/rejected": -332.4806213378906, "loss": 0.1102, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -3.1491594314575195, "rewards/margins": 9.4281644821167, "rewards/rejected": -12.577322959899902, "step": 8140 }, { "epoch": 1.58, "learning_rate": 2.6256561443877183e-07, "logits/chosen": -2.7424569129943848, "logits/rejected": -2.8395683765411377, "logps/chosen": -209.18978881835938, "logps/rejected": -324.82208251953125, "loss": 0.0997, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -4.445475101470947, "rewards/margins": 10.062944412231445, "rewards/rejected": -14.508418083190918, "step": 8150 }, { "epoch": 1.58, "learning_rate": 2.6220608326741926e-07, "logits/chosen": -2.7943625450134277, "logits/rejected": -2.7624077796936035, "logps/chosen": -268.23248291015625, "logps/rejected": -392.8574523925781, "loss": 0.1461, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -6.481450080871582, "rewards/margins": 9.155346870422363, "rewards/rejected": -15.636796951293945, "step": 8160 }, { "epoch": 1.59, "learning_rate": 2.618465520960667e-07, "logits/chosen": -2.827127456665039, "logits/rejected": -2.750373363494873, "logps/chosen": -223.2224578857422, "logps/rejected": -334.0792541503906, "loss": 0.1133, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -3.39715576171875, "rewards/margins": 7.0858330726623535, "rewards/rejected": -10.482988357543945, "step": 8170 }, { "epoch": 1.59, "learning_rate": 2.6148702092471416e-07, "logits/chosen": -2.728161334991455, "logits/rejected": -2.774329662322998, "logps/chosen": -158.14730834960938, "logps/rejected": -279.3450927734375, "loss": 0.1076, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -5.5900421142578125, "rewards/margins": 8.114730834960938, "rewards/rejected": -13.70477294921875, "step": 8180 }, { "epoch": 1.59, "learning_rate": 2.6112748975336164e-07, "logits/chosen": -2.5704948902130127, "logits/rejected": -2.5647311210632324, "logps/chosen": -217.3551025390625, "logps/rejected": -352.66357421875, "loss": 0.1391, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -5.492300033569336, "rewards/margins": 9.363241195678711, "rewards/rejected": -14.855539321899414, "step": 8190 }, { "epoch": 1.59, "learning_rate": 2.6076795858200907e-07, "logits/chosen": -2.7108607292175293, "logits/rejected": -2.745941638946533, "logps/chosen": -234.5678253173828, "logps/rejected": -322.69244384765625, "loss": 0.1436, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -4.202203273773193, "rewards/margins": 10.338296890258789, "rewards/rejected": -14.540501594543457, "step": 8200 }, { "epoch": 1.59, "eval_logits/chosen": -2.593104839324951, "eval_logits/rejected": -2.5737414360046387, "eval_logps/chosen": -264.7280578613281, "eval_logps/rejected": -291.8731384277344, "eval_loss": 0.5275627970695496, "eval_rewards/accuracies": 0.7174999713897705, "eval_rewards/chosen": -7.049000263214111, "eval_rewards/margins": 4.946714401245117, "eval_rewards/rejected": -11.995715141296387, "eval_runtime": 152.9375, "eval_samples_per_second": 20.636, "eval_steps_per_second": 0.327, "step": 8200 }, { "epoch": 1.59, "learning_rate": 2.604084274106565e-07, "logits/chosen": -2.6715493202209473, "logits/rejected": -2.7347023487091064, "logps/chosen": -257.12225341796875, "logps/rejected": -414.55670166015625, "loss": 0.1169, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.3295891284942627, "rewards/margins": 11.935256004333496, "rewards/rejected": -13.264846801757812, "step": 8210 }, { "epoch": 1.6, "learning_rate": 2.6004889623930393e-07, "logits/chosen": -2.9021527767181396, "logits/rejected": -2.8916983604431152, "logps/chosen": -337.2508239746094, "logps/rejected": -341.51507568359375, "loss": 0.1411, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.747110366821289, "rewards/margins": 6.220602989196777, "rewards/rejected": -8.967713356018066, "step": 8220 }, { "epoch": 1.6, "learning_rate": 2.5968936506795136e-07, "logits/chosen": -2.638962507247925, "logits/rejected": -2.660064220428467, "logps/chosen": -251.115966796875, "logps/rejected": -266.664306640625, "loss": 0.1566, "rewards/accuracies": 0.75, "rewards/chosen": -7.338654518127441, "rewards/margins": 6.121777534484863, "rewards/rejected": -13.460432052612305, "step": 8230 }, { "epoch": 1.6, "learning_rate": 2.5932983389659884e-07, "logits/chosen": -2.718865394592285, "logits/rejected": -2.6547913551330566, "logps/chosen": -332.82476806640625, "logps/rejected": -387.24530029296875, "loss": 0.0852, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.32920503616333, "rewards/margins": 11.49052619934082, "rewards/rejected": -12.819730758666992, "step": 8240 }, { "epoch": 1.6, "learning_rate": 2.5897030272524626e-07, "logits/chosen": -2.678675413131714, "logits/rejected": -2.5542550086975098, "logps/chosen": -291.0780944824219, "logps/rejected": -351.25396728515625, "loss": 0.154, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -3.5418224334716797, "rewards/margins": 8.347354888916016, "rewards/rejected": -11.889177322387695, "step": 8250 }, { "epoch": 1.6, "learning_rate": 2.586107715538937e-07, "logits/chosen": -2.674690008163452, "logits/rejected": -2.646212577819824, "logps/chosen": -220.5702362060547, "logps/rejected": -262.0445861816406, "loss": 0.1372, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -3.464944362640381, "rewards/margins": 7.086409091949463, "rewards/rejected": -10.551352500915527, "step": 8260 }, { "epoch": 1.61, "learning_rate": 2.582512403825411e-07, "logits/chosen": -2.6054418087005615, "logits/rejected": -2.6012120246887207, "logps/chosen": -216.8031768798828, "logps/rejected": -303.86370849609375, "loss": 0.1155, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -4.4664530754089355, "rewards/margins": 7.803128719329834, "rewards/rejected": -12.269582748413086, "step": 8270 }, { "epoch": 1.61, "learning_rate": 2.578917092111886e-07, "logits/chosen": -2.7718937397003174, "logits/rejected": -2.740553617477417, "logps/chosen": -302.05499267578125, "logps/rejected": -382.68084716796875, "loss": 0.1213, "rewards/accuracies": 0.75, "rewards/chosen": -3.131575107574463, "rewards/margins": 7.782123565673828, "rewards/rejected": -10.913698196411133, "step": 8280 }, { "epoch": 1.61, "learning_rate": 2.575321780398361e-07, "logits/chosen": -2.762749433517456, "logits/rejected": -2.6545376777648926, "logps/chosen": -209.6224365234375, "logps/rejected": -259.521240234375, "loss": 0.1442, "rewards/accuracies": 0.75, "rewards/chosen": -6.175038814544678, "rewards/margins": 6.265427112579346, "rewards/rejected": -12.440465927124023, "step": 8290 }, { "epoch": 1.61, "learning_rate": 2.571726468684835e-07, "logits/chosen": -2.8032710552215576, "logits/rejected": -2.654263973236084, "logps/chosen": -298.76776123046875, "logps/rejected": -423.67340087890625, "loss": 0.1369, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -4.403448581695557, "rewards/margins": 9.833967208862305, "rewards/rejected": -14.237414360046387, "step": 8300 }, { "epoch": 1.61, "eval_logits/chosen": -2.596484661102295, "eval_logits/rejected": -2.576378583908081, "eval_logps/chosen": -261.24853515625, "eval_logps/rejected": -285.3045959472656, "eval_loss": 0.5190584659576416, "eval_rewards/accuracies": 0.6875, "eval_rewards/chosen": -6.701047420501709, "eval_rewards/margins": 4.637817859649658, "eval_rewards/rejected": -11.338866233825684, "eval_runtime": 140.2488, "eval_samples_per_second": 22.503, "eval_steps_per_second": 0.357, "step": 8300 }, { "epoch": 1.61, "learning_rate": 2.5681311569713094e-07, "logits/chosen": -2.4661483764648438, "logits/rejected": -2.4401745796203613, "logps/chosen": -273.05242919921875, "logps/rejected": -268.7724609375, "loss": 0.1018, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -3.7230803966522217, "rewards/margins": 8.528326988220215, "rewards/rejected": -12.251407623291016, "step": 8310 }, { "epoch": 1.62, "learning_rate": 2.5645358452577837e-07, "logits/chosen": -2.7269883155822754, "logits/rejected": -2.721303701400757, "logps/chosen": -209.5245361328125, "logps/rejected": -365.2374267578125, "loss": 0.0911, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -3.265542984008789, "rewards/margins": 8.736278533935547, "rewards/rejected": -12.001821517944336, "step": 8320 }, { "epoch": 1.62, "learning_rate": 2.560940533544258e-07, "logits/chosen": -2.780273199081421, "logits/rejected": -2.7520592212677, "logps/chosen": -231.3140411376953, "logps/rejected": -347.9571533203125, "loss": 0.1173, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -3.4555554389953613, "rewards/margins": 6.264697074890137, "rewards/rejected": -9.720252990722656, "step": 8330 }, { "epoch": 1.62, "learning_rate": 2.557345221830733e-07, "logits/chosen": -2.7330617904663086, "logits/rejected": -2.5406532287597656, "logps/chosen": -267.46856689453125, "logps/rejected": -313.81842041015625, "loss": 0.1582, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -5.8921918869018555, "rewards/margins": 7.403140068054199, "rewards/rejected": -13.295331954956055, "step": 8340 }, { "epoch": 1.62, "learning_rate": 2.553749910117207e-07, "logits/chosen": -2.6633763313293457, "logits/rejected": -2.5974395275115967, "logps/chosen": -225.1736297607422, "logps/rejected": -315.27972412109375, "loss": 0.1217, "rewards/accuracies": 1.0, "rewards/chosen": -3.175999402999878, "rewards/margins": 9.365495681762695, "rewards/rejected": -12.541496276855469, "step": 8350 }, { "epoch": 1.62, "learning_rate": 2.5501545984036813e-07, "logits/chosen": -2.697948694229126, "logits/rejected": -2.769273281097412, "logps/chosen": -217.4983673095703, "logps/rejected": -340.5929260253906, "loss": 0.1365, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.359081745147705, "rewards/margins": 9.063468933105469, "rewards/rejected": -11.422552108764648, "step": 8360 }, { "epoch": 1.62, "learning_rate": 2.546559286690156e-07, "logits/chosen": -2.5433297157287598, "logits/rejected": -2.5445127487182617, "logps/chosen": -286.13427734375, "logps/rejected": -404.03656005859375, "loss": 0.4144, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -7.150385856628418, "rewards/margins": 7.654179573059082, "rewards/rejected": -14.8045654296875, "step": 8370 }, { "epoch": 1.63, "learning_rate": 2.5429639749766304e-07, "logits/chosen": -2.6273345947265625, "logits/rejected": -2.657918930053711, "logps/chosen": -183.72579956054688, "logps/rejected": -360.6436462402344, "loss": 0.0865, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -4.842744827270508, "rewards/margins": 7.5178422927856445, "rewards/rejected": -12.360587120056152, "step": 8380 }, { "epoch": 1.63, "learning_rate": 2.539368663263105e-07, "logits/chosen": -2.592080593109131, "logits/rejected": -2.5851070880889893, "logps/chosen": -281.6587219238281, "logps/rejected": -329.4895324707031, "loss": 0.1224, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -4.482011318206787, "rewards/margins": 11.097574234008789, "rewards/rejected": -15.579585075378418, "step": 8390 }, { "epoch": 1.63, "learning_rate": 2.5357733515495795e-07, "logits/chosen": -2.502882957458496, "logits/rejected": -2.366497755050659, "logps/chosen": -233.24508666992188, "logps/rejected": -336.41009521484375, "loss": 0.1545, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -5.115644931793213, "rewards/margins": 8.525575637817383, "rewards/rejected": -13.64122200012207, "step": 8400 }, { "epoch": 1.63, "eval_logits/chosen": -2.4826722145080566, "eval_logits/rejected": -2.4635934829711914, "eval_logps/chosen": -269.8938903808594, "eval_logps/rejected": -299.3194580078125, "eval_loss": 0.5305549502372742, "eval_rewards/accuracies": 0.6974999904632568, "eval_rewards/chosen": -7.565580368041992, "eval_rewards/margins": 5.174770355224609, "eval_rewards/rejected": -12.740350723266602, "eval_runtime": 139.887, "eval_samples_per_second": 22.561, "eval_steps_per_second": 0.357, "step": 8400 }, { "epoch": 1.63, "learning_rate": 2.532178039836054e-07, "logits/chosen": -2.603243589401245, "logits/rejected": -2.623236894607544, "logps/chosen": -236.5806427001953, "logps/rejected": -352.7832336425781, "loss": 0.1467, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -5.015347480773926, "rewards/margins": 10.90376091003418, "rewards/rejected": -15.919107437133789, "step": 8410 }, { "epoch": 1.63, "learning_rate": 2.528582728122528e-07, "logits/chosen": -2.7527453899383545, "logits/rejected": -2.450197696685791, "logps/chosen": -339.81585693359375, "logps/rejected": -272.74737548828125, "loss": 0.1032, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -4.699526309967041, "rewards/margins": 7.571126461029053, "rewards/rejected": -12.270652770996094, "step": 8420 }, { "epoch": 1.64, "learning_rate": 2.5249874164090023e-07, "logits/chosen": -2.668938636779785, "logits/rejected": -2.758716583251953, "logps/chosen": -250.9716796875, "logps/rejected": -392.6880187988281, "loss": 0.1451, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.294054627418518, "rewards/margins": 12.130877494812012, "rewards/rejected": -13.424932479858398, "step": 8430 }, { "epoch": 1.64, "learning_rate": 2.521392104695477e-07, "logits/chosen": -2.587261199951172, "logits/rejected": -2.597693681716919, "logps/chosen": -298.6204833984375, "logps/rejected": -461.45892333984375, "loss": 0.1138, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -5.775132179260254, "rewards/margins": 11.153104782104492, "rewards/rejected": -16.928237915039062, "step": 8440 }, { "epoch": 1.64, "learning_rate": 2.5177967929819514e-07, "logits/chosen": -2.4586944580078125, "logits/rejected": -2.5723679065704346, "logps/chosen": -217.3318328857422, "logps/rejected": -385.0566101074219, "loss": 0.1325, "rewards/accuracies": 1.0, "rewards/chosen": -1.816227912902832, "rewards/margins": 9.050592422485352, "rewards/rejected": -10.866819381713867, "step": 8450 }, { "epoch": 1.64, "learning_rate": 2.5142014812684257e-07, "logits/chosen": -2.317434549331665, "logits/rejected": -2.284421443939209, "logps/chosen": -255.93148803710938, "logps/rejected": -367.16607666015625, "loss": 0.1027, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -4.184701919555664, "rewards/margins": 9.185425758361816, "rewards/rejected": -13.37012767791748, "step": 8460 }, { "epoch": 1.64, "learning_rate": 2.5106061695549005e-07, "logits/chosen": -2.5617194175720215, "logits/rejected": -2.551328659057617, "logps/chosen": -260.8974609375, "logps/rejected": -385.60650634765625, "loss": 0.1505, "rewards/accuracies": 1.0, "rewards/chosen": -4.0798234939575195, "rewards/margins": 7.473949432373047, "rewards/rejected": -11.553773880004883, "step": 8470 }, { "epoch": 1.65, "learning_rate": 2.507010857841375e-07, "logits/chosen": -2.66874361038208, "logits/rejected": -2.5429797172546387, "logps/chosen": -307.7286376953125, "logps/rejected": -308.6953430175781, "loss": 0.1053, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -6.031424045562744, "rewards/margins": 8.793882369995117, "rewards/rejected": -14.825304985046387, "step": 8480 }, { "epoch": 1.65, "learning_rate": 2.5034155461278496e-07, "logits/chosen": -2.5712246894836426, "logits/rejected": -2.59190034866333, "logps/chosen": -274.2309265136719, "logps/rejected": -383.289306640625, "loss": 0.1168, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -5.81260871887207, "rewards/margins": 5.698575019836426, "rewards/rejected": -11.511183738708496, "step": 8490 }, { "epoch": 1.65, "learning_rate": 2.4998202344143233e-07, "logits/chosen": -2.5047717094421387, "logits/rejected": -2.4825732707977295, "logps/chosen": -236.83578491210938, "logps/rejected": -373.082763671875, "loss": 0.1052, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -8.311113357543945, "rewards/margins": 11.240933418273926, "rewards/rejected": -19.552045822143555, "step": 8500 }, { "epoch": 1.65, "eval_logits/chosen": -2.35133957862854, "eval_logits/rejected": -2.327308416366577, "eval_logps/chosen": -285.02752685546875, "eval_logps/rejected": -317.7987365722656, "eval_loss": 0.524840235710144, "eval_rewards/accuracies": 0.6974999904632568, "eval_rewards/chosen": -9.07894515991211, "eval_rewards/margins": 5.509334564208984, "eval_rewards/rejected": -14.588278770446777, "eval_runtime": 140.7123, "eval_samples_per_second": 22.429, "eval_steps_per_second": 0.355, "step": 8500 }, { "epoch": 1.65, "learning_rate": 2.496224922700798e-07, "logits/chosen": -2.419813871383667, "logits/rejected": -2.417640209197998, "logps/chosen": -299.6729736328125, "logps/rejected": -333.9921569824219, "loss": 0.1381, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -3.1953866481781006, "rewards/margins": 9.779828071594238, "rewards/rejected": -12.975214004516602, "step": 8510 }, { "epoch": 1.65, "learning_rate": 2.4926296109872724e-07, "logits/chosen": -2.5586345195770264, "logits/rejected": -2.5299503803253174, "logps/chosen": -187.5476837158203, "logps/rejected": -240.4314422607422, "loss": 0.0784, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -3.080376148223877, "rewards/margins": 5.857470989227295, "rewards/rejected": -8.937847137451172, "step": 8520 }, { "epoch": 1.66, "learning_rate": 2.489034299273747e-07, "logits/chosen": -2.5710880756378174, "logits/rejected": -2.5085082054138184, "logps/chosen": -285.5210266113281, "logps/rejected": -348.65240478515625, "loss": 0.1039, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -4.544827938079834, "rewards/margins": 9.803266525268555, "rewards/rejected": -14.348093032836914, "step": 8530 }, { "epoch": 1.66, "learning_rate": 2.4854389875602215e-07, "logits/chosen": -2.580644130706787, "logits/rejected": -2.543076753616333, "logps/chosen": -281.333984375, "logps/rejected": -367.89495849609375, "loss": 0.1258, "rewards/accuracies": 1.0, "rewards/chosen": -2.154938220977783, "rewards/margins": 11.225164413452148, "rewards/rejected": -13.380102157592773, "step": 8540 }, { "epoch": 1.66, "learning_rate": 2.481843675846696e-07, "logits/chosen": -2.522185802459717, "logits/rejected": -2.502575635910034, "logps/chosen": -231.32388305664062, "logps/rejected": -331.98504638671875, "loss": 0.1144, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -3.6895172595977783, "rewards/margins": 10.420839309692383, "rewards/rejected": -14.110356330871582, "step": 8550 }, { "epoch": 1.66, "learning_rate": 2.47824836413317e-07, "logits/chosen": -2.552109956741333, "logits/rejected": -2.436415433883667, "logps/chosen": -293.69000244140625, "logps/rejected": -320.4071350097656, "loss": 0.1453, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -7.968686580657959, "rewards/margins": 10.455901145935059, "rewards/rejected": -18.42458724975586, "step": 8560 }, { "epoch": 1.66, "learning_rate": 2.474653052419645e-07, "logits/chosen": -2.4954328536987305, "logits/rejected": -2.5189099311828613, "logps/chosen": -264.4991149902344, "logps/rejected": -334.30804443359375, "loss": 0.1144, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.1249985694885254, "rewards/margins": 8.98199462890625, "rewards/rejected": -11.106993675231934, "step": 8570 }, { "epoch": 1.67, "learning_rate": 2.471057740706119e-07, "logits/chosen": -2.5667037963867188, "logits/rejected": -2.458103895187378, "logps/chosen": -246.67794799804688, "logps/rejected": -301.4194030761719, "loss": 0.1354, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -4.499109268188477, "rewards/margins": 6.898039817810059, "rewards/rejected": -11.397150039672852, "step": 8580 }, { "epoch": 1.67, "learning_rate": 2.4674624289925934e-07, "logits/chosen": -2.5735199451446533, "logits/rejected": -2.528298854827881, "logps/chosen": -211.7207794189453, "logps/rejected": -347.9034423828125, "loss": 0.1651, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -5.928305625915527, "rewards/margins": 9.669815063476562, "rewards/rejected": -15.598121643066406, "step": 8590 }, { "epoch": 1.67, "learning_rate": 2.4638671172790677e-07, "logits/chosen": -2.5571844577789307, "logits/rejected": -2.4557769298553467, "logps/chosen": -248.6903839111328, "logps/rejected": -430.58935546875, "loss": 0.1193, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -3.2300491333007812, "rewards/margins": 7.920162200927734, "rewards/rejected": -11.150211334228516, "step": 8600 }, { "epoch": 1.67, "eval_logits/chosen": -2.3431856632232666, "eval_logits/rejected": -2.319796085357666, "eval_logps/chosen": -277.3157958984375, "eval_logps/rejected": -308.3280944824219, "eval_loss": 0.5251381397247314, "eval_rewards/accuracies": 0.6924999952316284, "eval_rewards/chosen": -8.30777359008789, "eval_rewards/margins": 5.333440780639648, "eval_rewards/rejected": -13.641214370727539, "eval_runtime": 139.8038, "eval_samples_per_second": 22.574, "eval_steps_per_second": 0.358, "step": 8600 }, { "epoch": 1.67, "learning_rate": 2.4602718055655425e-07, "logits/chosen": -2.423828363418579, "logits/rejected": -2.4125685691833496, "logps/chosen": -289.61383056640625, "logps/rejected": -350.7088928222656, "loss": 0.0768, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -7.05233097076416, "rewards/margins": 8.507286071777344, "rewards/rejected": -15.559616088867188, "step": 8610 }, { "epoch": 1.67, "learning_rate": 2.456676493852017e-07, "logits/chosen": -2.4309775829315186, "logits/rejected": -2.33675217628479, "logps/chosen": -245.5044708251953, "logps/rejected": -277.96368408203125, "loss": 0.1245, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -5.733599662780762, "rewards/margins": 9.675500869750977, "rewards/rejected": -15.409098625183105, "step": 8620 }, { "epoch": 1.68, "learning_rate": 2.4530811821384916e-07, "logits/chosen": -2.4593870639801025, "logits/rejected": -2.5098235607147217, "logps/chosen": -233.9044189453125, "logps/rejected": -321.5685119628906, "loss": 0.1215, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -6.319704532623291, "rewards/margins": 9.51965045928955, "rewards/rejected": -15.83935546875, "step": 8630 }, { "epoch": 1.68, "learning_rate": 2.449485870424966e-07, "logits/chosen": -2.391671895980835, "logits/rejected": -2.434969663619995, "logps/chosen": -234.9580841064453, "logps/rejected": -363.7305603027344, "loss": 0.1435, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -3.2724900245666504, "rewards/margins": 10.682934761047363, "rewards/rejected": -13.955424308776855, "step": 8640 }, { "epoch": 1.68, "learning_rate": 2.44589055871144e-07, "logits/chosen": -2.547769784927368, "logits/rejected": -2.5287559032440186, "logps/chosen": -245.45480346679688, "logps/rejected": -263.2743835449219, "loss": 0.1296, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -6.414554595947266, "rewards/margins": 7.894643306732178, "rewards/rejected": -14.309199333190918, "step": 8650 }, { "epoch": 1.68, "learning_rate": 2.4422952469979144e-07, "logits/chosen": -2.5728485584259033, "logits/rejected": -2.563326835632324, "logps/chosen": -254.01171875, "logps/rejected": -323.68011474609375, "loss": 0.0999, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -5.464980602264404, "rewards/margins": 9.061437606811523, "rewards/rejected": -14.526418685913086, "step": 8660 }, { "epoch": 1.68, "learning_rate": 2.438699935284389e-07, "logits/chosen": -2.5206618309020996, "logits/rejected": -2.633418560028076, "logps/chosen": -234.8514404296875, "logps/rejected": -381.8434143066406, "loss": 0.1121, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -3.4790866374969482, "rewards/margins": 8.549965858459473, "rewards/rejected": -12.029050827026367, "step": 8670 }, { "epoch": 1.69, "learning_rate": 2.4351046235708635e-07, "logits/chosen": -2.5908291339874268, "logits/rejected": -2.5766384601593018, "logps/chosen": -264.9714050292969, "logps/rejected": -282.068359375, "loss": 0.1595, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -6.4073896408081055, "rewards/margins": 6.865915775299072, "rewards/rejected": -13.27330493927002, "step": 8680 }, { "epoch": 1.69, "learning_rate": 2.431509311857338e-07, "logits/chosen": -2.554011344909668, "logits/rejected": -2.550485849380493, "logps/chosen": -200.98663330078125, "logps/rejected": -279.0447692871094, "loss": 0.0848, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -6.101694583892822, "rewards/margins": 6.140630722045898, "rewards/rejected": -12.242324829101562, "step": 8690 }, { "epoch": 1.69, "learning_rate": 2.427914000143812e-07, "logits/chosen": -2.650585889816284, "logits/rejected": -2.61324143409729, "logps/chosen": -297.4610595703125, "logps/rejected": -302.3990783691406, "loss": 0.143, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -4.797205924987793, "rewards/margins": 10.048017501831055, "rewards/rejected": -14.845222473144531, "step": 8700 }, { "epoch": 1.69, "eval_logits/chosen": -2.4667084217071533, "eval_logits/rejected": -2.452279567718506, "eval_logps/chosen": -264.91510009765625, "eval_logps/rejected": -290.2835693359375, "eval_loss": 0.5170483589172363, "eval_rewards/accuracies": 0.699999988079071, "eval_rewards/chosen": -7.06770133972168, "eval_rewards/margins": 4.76905632019043, "eval_rewards/rejected": -11.83675765991211, "eval_runtime": 140.4727, "eval_samples_per_second": 22.467, "eval_steps_per_second": 0.356, "step": 8700 }, { "epoch": 1.69, "learning_rate": 2.424318688430287e-07, "logits/chosen": -2.446254253387451, "logits/rejected": -2.467956781387329, "logps/chosen": -239.3897247314453, "logps/rejected": -371.78204345703125, "loss": 0.1045, "rewards/accuracies": 1.0, "rewards/chosen": -2.7158796787261963, "rewards/margins": 11.193151473999023, "rewards/rejected": -13.909029960632324, "step": 8710 }, { "epoch": 1.69, "learning_rate": 2.420723376716761e-07, "logits/chosen": -2.509413242340088, "logits/rejected": -2.5538456439971924, "logps/chosen": -225.9101104736328, "logps/rejected": -290.42205810546875, "loss": 0.1323, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -3.5373482704162598, "rewards/margins": 9.28911018371582, "rewards/rejected": -12.826458930969238, "step": 8720 }, { "epoch": 1.69, "learning_rate": 2.417128065003236e-07, "logits/chosen": -2.6275618076324463, "logits/rejected": -2.5535731315612793, "logps/chosen": -319.5244140625, "logps/rejected": -357.25787353515625, "loss": 0.107, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -7.244515895843506, "rewards/margins": 8.635760307312012, "rewards/rejected": -15.880276679992676, "step": 8730 }, { "epoch": 1.7, "learning_rate": 2.41353275328971e-07, "logits/chosen": -2.63443660736084, "logits/rejected": -2.674879550933838, "logps/chosen": -318.2145080566406, "logps/rejected": -370.57366943359375, "loss": 0.0849, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -4.588968753814697, "rewards/margins": 8.581594467163086, "rewards/rejected": -13.170560836791992, "step": 8740 }, { "epoch": 1.7, "learning_rate": 2.4099374415761845e-07, "logits/chosen": -2.6127374172210693, "logits/rejected": -2.562065601348877, "logps/chosen": -290.62213134765625, "logps/rejected": -313.4164733886719, "loss": 0.1102, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -5.564499855041504, "rewards/margins": 7.381847381591797, "rewards/rejected": -12.9463472366333, "step": 8750 }, { "epoch": 1.7, "learning_rate": 2.4063421298626593e-07, "logits/chosen": -2.541891574859619, "logits/rejected": -2.513563632965088, "logps/chosen": -224.95968627929688, "logps/rejected": -294.7865295410156, "loss": 0.1926, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -5.619781494140625, "rewards/margins": 7.007911682128906, "rewards/rejected": -12.627693176269531, "step": 8760 }, { "epoch": 1.7, "learning_rate": 2.4027468181491336e-07, "logits/chosen": -2.646176338195801, "logits/rejected": -2.579622268676758, "logps/chosen": -337.343017578125, "logps/rejected": -337.2195129394531, "loss": 0.1192, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -4.850675106048584, "rewards/margins": 8.994073867797852, "rewards/rejected": -13.844747543334961, "step": 8770 }, { "epoch": 1.7, "learning_rate": 2.399151506435608e-07, "logits/chosen": -2.61106276512146, "logits/rejected": -2.5591437816619873, "logps/chosen": -253.7194366455078, "logps/rejected": -304.6730041503906, "loss": 0.2013, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -6.998692989349365, "rewards/margins": 7.946354866027832, "rewards/rejected": -14.945048332214355, "step": 8780 }, { "epoch": 1.71, "learning_rate": 2.395556194722082e-07, "logits/chosen": -2.667248487472534, "logits/rejected": -2.6080033779144287, "logps/chosen": -222.0634002685547, "logps/rejected": -280.4680480957031, "loss": 0.1192, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -8.22488784790039, "rewards/margins": 6.8585405349731445, "rewards/rejected": -15.083427429199219, "step": 8790 }, { "epoch": 1.71, "learning_rate": 2.3919608830085564e-07, "logits/chosen": -2.721423625946045, "logits/rejected": -2.6756691932678223, "logps/chosen": -273.72686767578125, "logps/rejected": -351.1053466796875, "loss": 0.0811, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -6.528761386871338, "rewards/margins": 7.336087703704834, "rewards/rejected": -13.864850044250488, "step": 8800 }, { "epoch": 1.71, "eval_logits/chosen": -2.504269599914551, "eval_logits/rejected": -2.4859654903411865, "eval_logps/chosen": -292.2650451660156, "eval_logps/rejected": -321.0940246582031, "eval_loss": 0.5283924341201782, "eval_rewards/accuracies": 0.6924999952316284, "eval_rewards/chosen": -9.802698135375977, "eval_rewards/margins": 5.115107536315918, "eval_rewards/rejected": -14.917806625366211, "eval_runtime": 140.2752, "eval_samples_per_second": 22.499, "eval_steps_per_second": 0.356, "step": 8800 }, { "epoch": 1.71, "learning_rate": 2.388365571295031e-07, "logits/chosen": -2.6268365383148193, "logits/rejected": -2.6185123920440674, "logps/chosen": -288.2203369140625, "logps/rejected": -363.9134216308594, "loss": 0.2844, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -3.239719867706299, "rewards/margins": 12.082732200622559, "rewards/rejected": -15.3224515914917, "step": 8810 }, { "epoch": 1.71, "learning_rate": 2.3847702595815055e-07, "logits/chosen": -2.707850694656372, "logits/rejected": -2.728778600692749, "logps/chosen": -280.7693176269531, "logps/rejected": -307.51300048828125, "loss": 0.1226, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -3.3982996940612793, "rewards/margins": 6.893430233001709, "rewards/rejected": -10.291729927062988, "step": 8820 }, { "epoch": 1.71, "learning_rate": 2.38117494786798e-07, "logits/chosen": -2.6748437881469727, "logits/rejected": -2.6657848358154297, "logps/chosen": -264.40545654296875, "logps/rejected": -331.6806640625, "loss": 0.0864, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -6.236111640930176, "rewards/margins": 10.806436538696289, "rewards/rejected": -17.04254722595215, "step": 8830 }, { "epoch": 1.72, "learning_rate": 2.3775796361544546e-07, "logits/chosen": -2.6756479740142822, "logits/rejected": -2.5985846519470215, "logps/chosen": -303.21099853515625, "logps/rejected": -344.70684814453125, "loss": 0.1581, "rewards/accuracies": 0.75, "rewards/chosen": -8.999774932861328, "rewards/margins": 6.793562412261963, "rewards/rejected": -15.79333782196045, "step": 8840 }, { "epoch": 1.72, "learning_rate": 2.373984324440929e-07, "logits/chosen": -2.760925769805908, "logits/rejected": -2.647653102874756, "logps/chosen": -256.6393127441406, "logps/rejected": -352.3446044921875, "loss": 0.112, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -5.275694847106934, "rewards/margins": 12.48613166809082, "rewards/rejected": -17.761825561523438, "step": 8850 }, { "epoch": 1.72, "learning_rate": 2.3703890127274034e-07, "logits/chosen": -2.7134649753570557, "logits/rejected": -2.698920965194702, "logps/chosen": -300.4090270996094, "logps/rejected": -319.36651611328125, "loss": 0.1112, "rewards/accuracies": 1.0, "rewards/chosen": -3.171463966369629, "rewards/margins": 9.919939994812012, "rewards/rejected": -13.091404914855957, "step": 8860 }, { "epoch": 1.72, "learning_rate": 2.3667937010138777e-07, "logits/chosen": -2.5016653537750244, "logits/rejected": -2.4401469230651855, "logps/chosen": -236.34375, "logps/rejected": -323.60693359375, "loss": 0.133, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -5.820054054260254, "rewards/margins": 8.825299263000488, "rewards/rejected": -14.645350456237793, "step": 8870 }, { "epoch": 1.72, "learning_rate": 2.3631983893003522e-07, "logits/chosen": -2.625457525253296, "logits/rejected": -2.563734531402588, "logps/chosen": -250.7583770751953, "logps/rejected": -246.5982208251953, "loss": 0.1607, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -5.669973850250244, "rewards/margins": 4.5254669189453125, "rewards/rejected": -10.195440292358398, "step": 8880 }, { "epoch": 1.73, "learning_rate": 2.3596030775868268e-07, "logits/chosen": -2.6635663509368896, "logits/rejected": -2.6659483909606934, "logps/chosen": -194.7909393310547, "logps/rejected": -316.8825988769531, "loss": 0.1038, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -4.9440155029296875, "rewards/margins": 9.397473335266113, "rewards/rejected": -14.341486930847168, "step": 8890 }, { "epoch": 1.73, "learning_rate": 2.356007765873301e-07, "logits/chosen": -2.380208730697632, "logits/rejected": -2.390423059463501, "logps/chosen": -193.83676147460938, "logps/rejected": -259.2057800292969, "loss": 0.1453, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -6.619728088378906, "rewards/margins": 6.362164497375488, "rewards/rejected": -12.981892585754395, "step": 8900 }, { "epoch": 1.73, "eval_logits/chosen": -2.482853651046753, "eval_logits/rejected": -2.468625068664551, "eval_logps/chosen": -285.21710205078125, "eval_logps/rejected": -311.3193054199219, "eval_loss": 0.5207270979881287, "eval_rewards/accuracies": 0.6899999976158142, "eval_rewards/chosen": -9.097904205322266, "eval_rewards/margins": 4.842432022094727, "eval_rewards/rejected": -13.940337181091309, "eval_runtime": 153.9923, "eval_samples_per_second": 20.495, "eval_steps_per_second": 0.325, "step": 8900 }, { "epoch": 1.73, "learning_rate": 2.3524124541597756e-07, "logits/chosen": -2.5679116249084473, "logits/rejected": -2.516472101211548, "logps/chosen": -352.97833251953125, "logps/rejected": -322.7197265625, "loss": 0.144, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.5343475341796875, "rewards/margins": 6.903378486633301, "rewards/rejected": -9.437726020812988, "step": 8910 }, { "epoch": 1.73, "learning_rate": 2.34881714244625e-07, "logits/chosen": -2.4197185039520264, "logits/rejected": -2.461596965789795, "logps/chosen": -305.5779113769531, "logps/rejected": -395.81695556640625, "loss": 0.1581, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -3.8303463459014893, "rewards/margins": 12.583988189697266, "rewards/rejected": -16.414335250854492, "step": 8920 }, { "epoch": 1.73, "learning_rate": 2.3452218307327242e-07, "logits/chosen": -2.723632335662842, "logits/rejected": -2.62469220161438, "logps/chosen": -285.1393127441406, "logps/rejected": -288.421630859375, "loss": 0.1197, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.8004610538482666, "rewards/margins": 7.442433834075928, "rewards/rejected": -10.242895126342773, "step": 8930 }, { "epoch": 1.74, "learning_rate": 2.341626519019199e-07, "logits/chosen": -2.506471633911133, "logits/rejected": -2.493252992630005, "logps/chosen": -277.4583435058594, "logps/rejected": -288.23663330078125, "loss": 0.1401, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -5.454425811767578, "rewards/margins": 7.775014400482178, "rewards/rejected": -13.229438781738281, "step": 8940 }, { "epoch": 1.74, "learning_rate": 2.3380312073056732e-07, "logits/chosen": -2.4613587856292725, "logits/rejected": -2.4588119983673096, "logps/chosen": -264.9566650390625, "logps/rejected": -396.5501403808594, "loss": 0.2337, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -8.629644393920898, "rewards/margins": 7.023741722106934, "rewards/rejected": -15.653387069702148, "step": 8950 }, { "epoch": 1.74, "learning_rate": 2.3344358955921478e-07, "logits/chosen": -2.557526111602783, "logits/rejected": -2.6252875328063965, "logps/chosen": -323.6629943847656, "logps/rejected": -381.2836608886719, "loss": 0.1001, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -3.549731731414795, "rewards/margins": 8.749812126159668, "rewards/rejected": -12.299544334411621, "step": 8960 }, { "epoch": 1.74, "learning_rate": 2.330840583878622e-07, "logits/chosen": -2.613024950027466, "logits/rejected": -2.554842233657837, "logps/chosen": -225.9105224609375, "logps/rejected": -357.39404296875, "loss": 0.0843, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -7.603348731994629, "rewards/margins": 10.769676208496094, "rewards/rejected": -18.373023986816406, "step": 8970 }, { "epoch": 1.74, "learning_rate": 2.3272452721650963e-07, "logits/chosen": -2.527435064315796, "logits/rejected": -2.551858425140381, "logps/chosen": -284.74737548828125, "logps/rejected": -255.0273895263672, "loss": 0.1495, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -4.3346686363220215, "rewards/margins": 6.159907341003418, "rewards/rejected": -10.494577407836914, "step": 8980 }, { "epoch": 1.75, "learning_rate": 2.3236499604515712e-07, "logits/chosen": -2.518888235092163, "logits/rejected": -2.5484490394592285, "logps/chosen": -253.7728271484375, "logps/rejected": -293.1611633300781, "loss": 0.1164, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -7.769311428070068, "rewards/margins": 6.031195640563965, "rewards/rejected": -13.800506591796875, "step": 8990 }, { "epoch": 1.75, "learning_rate": 2.3200546487380454e-07, "logits/chosen": -2.5659279823303223, "logits/rejected": -2.503180980682373, "logps/chosen": -260.3642883300781, "logps/rejected": -317.8871154785156, "loss": 0.1157, "rewards/accuracies": 1.0, "rewards/chosen": -3.4114089012145996, "rewards/margins": 8.762961387634277, "rewards/rejected": -12.174372673034668, "step": 9000 }, { "epoch": 1.75, "eval_logits/chosen": -2.459453582763672, "eval_logits/rejected": -2.444923162460327, "eval_logps/chosen": -277.1577453613281, "eval_logps/rejected": -306.0013122558594, "eval_loss": 0.5219169855117798, "eval_rewards/accuracies": 0.6949999928474426, "eval_rewards/chosen": -8.291966438293457, "eval_rewards/margins": 5.116571426391602, "eval_rewards/rejected": -13.408538818359375, "eval_runtime": 140.7558, "eval_samples_per_second": 22.422, "eval_steps_per_second": 0.355, "step": 9000 }, { "epoch": 1.75, "learning_rate": 2.31645933702452e-07, "logits/chosen": -2.5288453102111816, "logits/rejected": -2.3973660469055176, "logps/chosen": -316.841796875, "logps/rejected": -333.89495849609375, "loss": 0.3495, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -3.4621787071228027, "rewards/margins": 8.068937301635742, "rewards/rejected": -11.531115531921387, "step": 9010 }, { "epoch": 1.75, "learning_rate": 2.3128640253109942e-07, "logits/chosen": -2.419325351715088, "logits/rejected": -2.4747190475463867, "logps/chosen": -333.5755310058594, "logps/rejected": -325.43975830078125, "loss": 0.1293, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.0763964653015137, "rewards/margins": 8.46824836730957, "rewards/rejected": -10.54464340209961, "step": 9020 }, { "epoch": 1.75, "learning_rate": 2.3092687135974688e-07, "logits/chosen": -2.6578516960144043, "logits/rejected": -2.6606476306915283, "logps/chosen": -289.87298583984375, "logps/rejected": -370.78326416015625, "loss": 0.1542, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.908090114593506, "rewards/margins": 7.784598350524902, "rewards/rejected": -10.692689895629883, "step": 9030 }, { "epoch": 1.75, "learning_rate": 2.3056734018839433e-07, "logits/chosen": -2.6555745601654053, "logits/rejected": -2.5985846519470215, "logps/chosen": -253.746337890625, "logps/rejected": -312.53558349609375, "loss": 0.1139, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -3.292423963546753, "rewards/margins": 8.211528778076172, "rewards/rejected": -11.503952980041504, "step": 9040 }, { "epoch": 1.76, "learning_rate": 2.3020780901704176e-07, "logits/chosen": -2.5273990631103516, "logits/rejected": -2.5261635780334473, "logps/chosen": -223.68917846679688, "logps/rejected": -300.7638854980469, "loss": 0.1014, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.830761671066284, "rewards/margins": 7.406530857086182, "rewards/rejected": -10.23729133605957, "step": 9050 }, { "epoch": 1.76, "learning_rate": 2.2984827784568922e-07, "logits/chosen": -2.6696174144744873, "logits/rejected": -2.667482852935791, "logps/chosen": -297.14703369140625, "logps/rejected": -329.94586181640625, "loss": 0.1323, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -3.836264133453369, "rewards/margins": 7.174398899078369, "rewards/rejected": -11.010663032531738, "step": 9060 }, { "epoch": 1.76, "learning_rate": 2.2948874667433664e-07, "logits/chosen": -2.443032741546631, "logits/rejected": -2.4770193099975586, "logps/chosen": -253.74658203125, "logps/rejected": -328.1571044921875, "loss": 0.0914, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -3.8514180183410645, "rewards/margins": 10.583091735839844, "rewards/rejected": -14.43450927734375, "step": 9070 }, { "epoch": 1.76, "learning_rate": 2.291292155029841e-07, "logits/chosen": -2.50797438621521, "logits/rejected": -2.493114471435547, "logps/chosen": -247.9369659423828, "logps/rejected": -365.7491149902344, "loss": 0.1325, "rewards/accuracies": 0.75, "rewards/chosen": -10.651138305664062, "rewards/margins": 11.686362266540527, "rewards/rejected": -22.33749771118164, "step": 9080 }, { "epoch": 1.76, "learning_rate": 2.2876968433163155e-07, "logits/chosen": -2.4330861568450928, "logits/rejected": -2.5481677055358887, "logps/chosen": -366.48602294921875, "logps/rejected": -407.94866943359375, "loss": 0.1276, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -4.245977878570557, "rewards/margins": 11.331197738647461, "rewards/rejected": -15.577176094055176, "step": 9090 }, { "epoch": 1.77, "learning_rate": 2.2841015316027898e-07, "logits/chosen": -2.631187915802002, "logits/rejected": -2.624697208404541, "logps/chosen": -227.9707794189453, "logps/rejected": -370.03076171875, "loss": 0.127, "rewards/accuracies": 1.0, "rewards/chosen": -3.4985663890838623, "rewards/margins": 10.457371711730957, "rewards/rejected": -13.955938339233398, "step": 9100 }, { "epoch": 1.77, "eval_logits/chosen": -2.4831197261810303, "eval_logits/rejected": -2.4681098461151123, "eval_logps/chosen": -264.125244140625, "eval_logps/rejected": -287.5068054199219, "eval_loss": 0.5275867581367493, "eval_rewards/accuracies": 0.6825000047683716, "eval_rewards/chosen": -6.988717079162598, "eval_rewards/margins": 4.57036828994751, "eval_rewards/rejected": -11.55908489227295, "eval_runtime": 141.2338, "eval_samples_per_second": 22.346, "eval_steps_per_second": 0.354, "step": 9100 }, { "epoch": 1.77, "learning_rate": 2.2805062198892643e-07, "logits/chosen": -2.7433934211730957, "logits/rejected": -2.7377352714538574, "logps/chosen": -342.8365783691406, "logps/rejected": -389.03778076171875, "loss": 0.1139, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.9598255157470703, "rewards/margins": 7.6363725662231445, "rewards/rejected": -9.596197128295898, "step": 9110 }, { "epoch": 1.77, "learning_rate": 2.2769109081757386e-07, "logits/chosen": -2.4912314414978027, "logits/rejected": -2.461418628692627, "logps/chosen": -225.4840850830078, "logps/rejected": -245.8949737548828, "loss": 0.1185, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -4.032782554626465, "rewards/margins": 7.881567478179932, "rewards/rejected": -11.914348602294922, "step": 9120 }, { "epoch": 1.77, "learning_rate": 2.2733155964622132e-07, "logits/chosen": -2.5779995918273926, "logits/rejected": -2.5387330055236816, "logps/chosen": -267.99346923828125, "logps/rejected": -311.9544677734375, "loss": 0.1166, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.670339345932007, "rewards/margins": 7.5002593994140625, "rewards/rejected": -10.170599937438965, "step": 9130 }, { "epoch": 1.77, "learning_rate": 2.2697202847486877e-07, "logits/chosen": -2.5345051288604736, "logits/rejected": -2.4705216884613037, "logps/chosen": -236.589111328125, "logps/rejected": -355.2351989746094, "loss": 0.2438, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.7309606075286865, "rewards/margins": 8.962685585021973, "rewards/rejected": -10.693646430969238, "step": 9140 }, { "epoch": 1.78, "learning_rate": 2.266124973035162e-07, "logits/chosen": -2.480380058288574, "logits/rejected": -2.5399022102355957, "logps/chosen": -207.9787139892578, "logps/rejected": -307.7817687988281, "loss": 0.1411, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -6.452308654785156, "rewards/margins": 9.30238151550293, "rewards/rejected": -15.754690170288086, "step": 9150 }, { "epoch": 1.78, "learning_rate": 2.2625296613216365e-07, "logits/chosen": -2.696650981903076, "logits/rejected": -2.574568510055542, "logps/chosen": -278.69677734375, "logps/rejected": -227.86477661132812, "loss": 0.4146, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.0961384773254395, "rewards/margins": 6.557276725769043, "rewards/rejected": -8.653414726257324, "step": 9160 }, { "epoch": 1.78, "learning_rate": 2.2589343496081108e-07, "logits/chosen": -2.5836429595947266, "logits/rejected": -2.53010892868042, "logps/chosen": -231.33139038085938, "logps/rejected": -253.4978790283203, "loss": 0.1333, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -4.0301594734191895, "rewards/margins": 5.411094665527344, "rewards/rejected": -9.441255569458008, "step": 9170 }, { "epoch": 1.78, "learning_rate": 2.2553390378945853e-07, "logits/chosen": -2.489360809326172, "logits/rejected": -2.5844199657440186, "logps/chosen": -304.05560302734375, "logps/rejected": -350.4225769042969, "loss": 0.0862, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.2749789953231812, "rewards/margins": 8.532282829284668, "rewards/rejected": -9.807263374328613, "step": 9180 }, { "epoch": 1.78, "learning_rate": 2.25174372618106e-07, "logits/chosen": -2.764516592025757, "logits/rejected": -2.6930012702941895, "logps/chosen": -301.46575927734375, "logps/rejected": -315.3195495605469, "loss": 0.1086, "rewards/accuracies": 0.75, "rewards/chosen": -5.600867748260498, "rewards/margins": 6.2614426612854, "rewards/rejected": -11.862309455871582, "step": 9190 }, { "epoch": 1.79, "learning_rate": 2.2481484144675342e-07, "logits/chosen": -2.718442916870117, "logits/rejected": -2.6158175468444824, "logps/chosen": -254.6276092529297, "logps/rejected": -318.2427062988281, "loss": 0.0787, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -3.4615302085876465, "rewards/margins": 7.8550310134887695, "rewards/rejected": -11.316560745239258, "step": 9200 }, { "epoch": 1.79, "eval_logits/chosen": -2.4761788845062256, "eval_logits/rejected": -2.4595890045166016, "eval_logps/chosen": -261.3131103515625, "eval_logps/rejected": -284.68475341796875, "eval_loss": 0.536875307559967, "eval_rewards/accuracies": 0.699999988079071, "eval_rewards/chosen": -6.707505702972412, "eval_rewards/margins": 4.569375514984131, "eval_rewards/rejected": -11.276881217956543, "eval_runtime": 140.9062, "eval_samples_per_second": 22.398, "eval_steps_per_second": 0.355, "step": 9200 }, { "epoch": 1.79, "learning_rate": 2.2445531027540087e-07, "logits/chosen": -2.5662033557891846, "logits/rejected": -2.4942879676818848, "logps/chosen": -229.77230834960938, "logps/rejected": -343.25115966796875, "loss": 0.1329, "rewards/accuracies": 1.0, "rewards/chosen": -2.7950007915496826, "rewards/margins": 10.938156127929688, "rewards/rejected": -13.73315715789795, "step": 9210 }, { "epoch": 1.79, "learning_rate": 2.240957791040483e-07, "logits/chosen": -2.6636500358581543, "logits/rejected": -2.5954031944274902, "logps/chosen": -263.62969970703125, "logps/rejected": -307.91949462890625, "loss": 0.1208, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.333632707595825, "rewards/margins": 10.105422019958496, "rewards/rejected": -12.439054489135742, "step": 9220 }, { "epoch": 1.79, "learning_rate": 2.2373624793269575e-07, "logits/chosen": -2.6744582653045654, "logits/rejected": -2.6738028526306152, "logps/chosen": -230.3915252685547, "logps/rejected": -297.47552490234375, "loss": 0.1278, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -3.508617401123047, "rewards/margins": 9.609359741210938, "rewards/rejected": -13.117976188659668, "step": 9230 }, { "epoch": 1.79, "learning_rate": 2.233767167613432e-07, "logits/chosen": -2.5466504096984863, "logits/rejected": -2.588721990585327, "logps/chosen": -353.01593017578125, "logps/rejected": -485.99517822265625, "loss": 0.1068, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.1640651226043701, "rewards/margins": 10.018106460571289, "rewards/rejected": -11.182169914245605, "step": 9240 }, { "epoch": 1.8, "learning_rate": 2.2301718558999064e-07, "logits/chosen": -2.6759397983551025, "logits/rejected": -2.6947224140167236, "logps/chosen": -233.6916961669922, "logps/rejected": -310.08245849609375, "loss": 0.2103, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -5.124452114105225, "rewards/margins": 6.477786064147949, "rewards/rejected": -11.602239608764648, "step": 9250 }, { "epoch": 1.8, "learning_rate": 2.226576544186381e-07, "logits/chosen": -2.5456278324127197, "logits/rejected": -2.5684640407562256, "logps/chosen": -268.0722351074219, "logps/rejected": -321.04791259765625, "loss": 0.2316, "rewards/accuracies": 0.75, "rewards/chosen": -7.562939643859863, "rewards/margins": 5.708805561065674, "rewards/rejected": -13.271745681762695, "step": 9260 }, { "epoch": 1.8, "learning_rate": 2.2229812324728552e-07, "logits/chosen": -2.2930455207824707, "logits/rejected": -2.3209643363952637, "logps/chosen": -314.3650207519531, "logps/rejected": -317.35711669921875, "loss": 0.1218, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -4.544978141784668, "rewards/margins": 7.295835971832275, "rewards/rejected": -11.840815544128418, "step": 9270 }, { "epoch": 1.8, "learning_rate": 2.2193859207593297e-07, "logits/chosen": -2.383695125579834, "logits/rejected": -2.3838589191436768, "logps/chosen": -195.61062622070312, "logps/rejected": -287.2439270019531, "loss": 0.1056, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -4.162716388702393, "rewards/margins": 8.17154312133789, "rewards/rejected": -12.334260940551758, "step": 9280 }, { "epoch": 1.8, "learning_rate": 2.2157906090458043e-07, "logits/chosen": -2.4942522048950195, "logits/rejected": -2.4862399101257324, "logps/chosen": -200.2251739501953, "logps/rejected": -267.40716552734375, "loss": 0.0855, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -4.627869606018066, "rewards/margins": 9.27051067352295, "rewards/rejected": -13.8983793258667, "step": 9290 }, { "epoch": 1.81, "learning_rate": 2.2121952973322785e-07, "logits/chosen": -2.615295886993408, "logits/rejected": -2.617884874343872, "logps/chosen": -365.4449157714844, "logps/rejected": -340.91302490234375, "loss": 0.1575, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -4.824907302856445, "rewards/margins": 9.447416305541992, "rewards/rejected": -14.272321701049805, "step": 9300 }, { "epoch": 1.81, "eval_logits/chosen": -2.454586982727051, "eval_logits/rejected": -2.4321353435516357, "eval_logps/chosen": -279.1459655761719, "eval_logps/rejected": -309.0434265136719, "eval_loss": 0.5330539345741272, "eval_rewards/accuracies": 0.7049999833106995, "eval_rewards/chosen": -8.490789413452148, "eval_rewards/margins": 5.221959114074707, "eval_rewards/rejected": -13.712749481201172, "eval_runtime": 141.0777, "eval_samples_per_second": 22.371, "eval_steps_per_second": 0.354, "step": 9300 }, { "epoch": 1.81, "learning_rate": 2.208599985618753e-07, "logits/chosen": -2.5942773818969727, "logits/rejected": -2.5127804279327393, "logps/chosen": -290.8653869628906, "logps/rejected": -421.36181640625, "loss": 0.1396, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -4.269815921783447, "rewards/margins": 9.438082695007324, "rewards/rejected": -13.70789909362793, "step": 9310 }, { "epoch": 1.81, "learning_rate": 2.2050046739052274e-07, "logits/chosen": -2.6629788875579834, "logits/rejected": -2.5936431884765625, "logps/chosen": -304.45794677734375, "logps/rejected": -352.243408203125, "loss": 0.1006, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -5.734714031219482, "rewards/margins": 6.613863945007324, "rewards/rejected": -12.348577499389648, "step": 9320 }, { "epoch": 1.81, "learning_rate": 2.201409362191702e-07, "logits/chosen": -2.6832408905029297, "logits/rejected": -2.598360538482666, "logps/chosen": -201.95150756835938, "logps/rejected": -251.3197784423828, "loss": 0.1827, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.0557456016540527, "rewards/margins": 7.982450008392334, "rewards/rejected": -10.03819465637207, "step": 9330 }, { "epoch": 1.81, "learning_rate": 2.1978140504781764e-07, "logits/chosen": -2.6649320125579834, "logits/rejected": -2.561836004257202, "logps/chosen": -268.05938720703125, "logps/rejected": -355.09478759765625, "loss": 0.1038, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -3.3565566539764404, "rewards/margins": 10.691267013549805, "rewards/rejected": -14.047823905944824, "step": 9340 }, { "epoch": 1.82, "learning_rate": 2.1942187387646507e-07, "logits/chosen": -2.7730791568756104, "logits/rejected": -2.7454609870910645, "logps/chosen": -305.49542236328125, "logps/rejected": -362.4857482910156, "loss": 0.1374, "rewards/accuracies": 0.75, "rewards/chosen": -7.476459503173828, "rewards/margins": 6.851313591003418, "rewards/rejected": -14.327774047851562, "step": 9350 }, { "epoch": 1.82, "learning_rate": 2.1906234270511253e-07, "logits/chosen": -2.673074960708618, "logits/rejected": -2.6720142364501953, "logps/chosen": -225.03109741210938, "logps/rejected": -293.6507873535156, "loss": 0.1846, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -4.60236120223999, "rewards/margins": 8.857532501220703, "rewards/rejected": -13.459895133972168, "step": 9360 }, { "epoch": 1.82, "learning_rate": 2.1870281153375995e-07, "logits/chosen": -2.6925947666168213, "logits/rejected": -2.74192476272583, "logps/chosen": -205.503662109375, "logps/rejected": -337.00311279296875, "loss": 0.1988, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -5.798457145690918, "rewards/margins": 9.184468269348145, "rewards/rejected": -14.982925415039062, "step": 9370 }, { "epoch": 1.82, "learning_rate": 2.183432803624074e-07, "logits/chosen": -2.6186959743499756, "logits/rejected": -2.630443811416626, "logps/chosen": -176.66262817382812, "logps/rejected": -272.9822082519531, "loss": 0.136, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -4.631664752960205, "rewards/margins": 7.2577362060546875, "rewards/rejected": -11.889402389526367, "step": 9380 }, { "epoch": 1.82, "learning_rate": 2.1798374919105486e-07, "logits/chosen": -2.7789523601531982, "logits/rejected": -2.6312663555145264, "logps/chosen": -265.19976806640625, "logps/rejected": -324.28240966796875, "loss": 0.1166, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -5.802310943603516, "rewards/margins": 10.411230087280273, "rewards/rejected": -16.213542938232422, "step": 9390 }, { "epoch": 1.82, "learning_rate": 2.176242180197023e-07, "logits/chosen": -2.690073013305664, "logits/rejected": -2.593142032623291, "logps/chosen": -289.4481506347656, "logps/rejected": -367.05633544921875, "loss": 0.1627, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.602388381958008, "rewards/margins": 9.026971817016602, "rewards/rejected": -11.62936019897461, "step": 9400 }, { "epoch": 1.82, "eval_logits/chosen": -2.5830817222595215, "eval_logits/rejected": -2.5688726902008057, "eval_logps/chosen": -262.60369873046875, "eval_logps/rejected": -280.9705505371094, "eval_loss": 0.5199735760688782, "eval_rewards/accuracies": 0.7124999761581421, "eval_rewards/chosen": -6.836562633514404, "eval_rewards/margins": 4.068894863128662, "eval_rewards/rejected": -10.90545654296875, "eval_runtime": 141.1429, "eval_samples_per_second": 22.36, "eval_steps_per_second": 0.354, "step": 9400 }, { "epoch": 1.83, "learning_rate": 2.1726468684834975e-07, "logits/chosen": -2.6631975173950195, "logits/rejected": -2.687513828277588, "logps/chosen": -282.0251770019531, "logps/rejected": -295.54705810546875, "loss": 0.1117, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -4.567460536956787, "rewards/margins": 6.210967063903809, "rewards/rejected": -10.778428077697754, "step": 9410 }, { "epoch": 1.83, "learning_rate": 2.169051556769972e-07, "logits/chosen": -2.784729480743408, "logits/rejected": -2.7750000953674316, "logps/chosen": -250.9359588623047, "logps/rejected": -264.49383544921875, "loss": 0.1826, "rewards/accuracies": 0.75, "rewards/chosen": -3.9784743785858154, "rewards/margins": 4.458198070526123, "rewards/rejected": -8.43667221069336, "step": 9420 }, { "epoch": 1.83, "learning_rate": 2.1654562450564463e-07, "logits/chosen": -2.7793118953704834, "logits/rejected": -2.763577699661255, "logps/chosen": -273.861328125, "logps/rejected": -327.62371826171875, "loss": 0.1557, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -5.666604995727539, "rewards/margins": 9.303274154663086, "rewards/rejected": -14.969879150390625, "step": 9430 }, { "epoch": 1.83, "learning_rate": 2.1618609333429208e-07, "logits/chosen": -2.69822096824646, "logits/rejected": -2.668501615524292, "logps/chosen": -205.9537353515625, "logps/rejected": -339.89825439453125, "loss": 0.1057, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -3.004425287246704, "rewards/margins": 8.063764572143555, "rewards/rejected": -11.06818962097168, "step": 9440 }, { "epoch": 1.83, "learning_rate": 2.158265621629395e-07, "logits/chosen": -2.7364261150360107, "logits/rejected": -2.682142734527588, "logps/chosen": -221.7036895751953, "logps/rejected": -276.8183288574219, "loss": 0.1132, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9641478657722473, "rewards/margins": 8.006423950195312, "rewards/rejected": -8.970571517944336, "step": 9450 }, { "epoch": 1.84, "learning_rate": 2.1546703099158696e-07, "logits/chosen": -2.700155735015869, "logits/rejected": -2.825791120529175, "logps/chosen": -209.17172241210938, "logps/rejected": -375.6164855957031, "loss": 0.1104, "rewards/accuracies": 1.0, "rewards/chosen": -4.204814910888672, "rewards/margins": 10.476545333862305, "rewards/rejected": -14.681361198425293, "step": 9460 }, { "epoch": 1.84, "learning_rate": 2.1510749982023442e-07, "logits/chosen": -2.7599854469299316, "logits/rejected": -2.822727680206299, "logps/chosen": -255.29598999023438, "logps/rejected": -259.87677001953125, "loss": 0.1404, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -7.822667121887207, "rewards/margins": 4.506711006164551, "rewards/rejected": -12.329377174377441, "step": 9470 }, { "epoch": 1.84, "learning_rate": 2.1474796864888185e-07, "logits/chosen": -2.6040995121002197, "logits/rejected": -2.551703453063965, "logps/chosen": -250.2332763671875, "logps/rejected": -292.47357177734375, "loss": 0.1522, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.464228868484497, "rewards/margins": 8.481760025024414, "rewards/rejected": -10.945989608764648, "step": 9480 }, { "epoch": 1.84, "learning_rate": 2.143884374775293e-07, "logits/chosen": -2.8416645526885986, "logits/rejected": -2.829864025115967, "logps/chosen": -235.0521240234375, "logps/rejected": -323.4149475097656, "loss": 0.1556, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.0735909938812256, "rewards/margins": 7.334481239318848, "rewards/rejected": -9.408071517944336, "step": 9490 }, { "epoch": 1.84, "learning_rate": 2.1402890630617673e-07, "logits/chosen": -2.5075650215148926, "logits/rejected": -2.4215004444122314, "logps/chosen": -199.2357177734375, "logps/rejected": -283.7659606933594, "loss": 0.1334, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -3.8971378803253174, "rewards/margins": 7.249490261077881, "rewards/rejected": -11.146627426147461, "step": 9500 }, { "epoch": 1.84, "eval_logits/chosen": -2.616497755050659, "eval_logits/rejected": -2.602832555770874, "eval_logps/chosen": -269.4985046386719, "eval_logps/rejected": -290.15087890625, "eval_loss": 0.514388382434845, "eval_rewards/accuracies": 0.7149999737739563, "eval_rewards/chosen": -7.526042461395264, "eval_rewards/margins": 4.297450542449951, "eval_rewards/rejected": -11.823493957519531, "eval_runtime": 139.5543, "eval_samples_per_second": 22.615, "eval_steps_per_second": 0.358, "step": 9500 }, { "epoch": 1.85, "learning_rate": 2.1366937513482418e-07, "logits/chosen": -2.7919929027557373, "logits/rejected": -2.637633800506592, "logps/chosen": -305.61639404296875, "logps/rejected": -302.77288818359375, "loss": 0.0932, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -3.2936458587646484, "rewards/margins": 10.195398330688477, "rewards/rejected": -13.489044189453125, "step": 9510 }, { "epoch": 1.85, "learning_rate": 2.1330984396347164e-07, "logits/chosen": -2.773404598236084, "logits/rejected": -2.8854422569274902, "logps/chosen": -225.18533325195312, "logps/rejected": -267.5545959472656, "loss": 0.1145, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -3.022705554962158, "rewards/margins": 7.393181800842285, "rewards/rejected": -10.415887832641602, "step": 9520 }, { "epoch": 1.85, "learning_rate": 2.1295031279211906e-07, "logits/chosen": -2.7786660194396973, "logits/rejected": -2.717477798461914, "logps/chosen": -301.073974609375, "logps/rejected": -346.15179443359375, "loss": 0.0983, "rewards/accuracies": 1.0, "rewards/chosen": -3.636516571044922, "rewards/margins": 9.894160270690918, "rewards/rejected": -13.530677795410156, "step": 9530 }, { "epoch": 1.85, "learning_rate": 2.1259078162076652e-07, "logits/chosen": -2.829789400100708, "logits/rejected": -2.7225544452667236, "logps/chosen": -307.1126708984375, "logps/rejected": -363.5691833496094, "loss": 0.2078, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -3.9343395233154297, "rewards/margins": 7.367140293121338, "rewards/rejected": -11.30147933959961, "step": 9540 }, { "epoch": 1.85, "learning_rate": 2.1223125044941395e-07, "logits/chosen": -2.7844507694244385, "logits/rejected": -2.661411762237549, "logps/chosen": -244.58200073242188, "logps/rejected": -230.42041015625, "loss": 0.1608, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -5.329751491546631, "rewards/margins": 6.7721452713012695, "rewards/rejected": -12.101898193359375, "step": 9550 }, { "epoch": 1.86, "learning_rate": 2.118717192780614e-07, "logits/chosen": -2.7518794536590576, "logits/rejected": -2.81085467338562, "logps/chosen": -258.4638366699219, "logps/rejected": -396.66937255859375, "loss": 0.1225, "rewards/accuracies": 1.0, "rewards/chosen": -4.041602611541748, "rewards/margins": 10.739147186279297, "rewards/rejected": -14.780749320983887, "step": 9560 }, { "epoch": 1.86, "learning_rate": 2.1151218810670886e-07, "logits/chosen": -2.723557472229004, "logits/rejected": -2.7834010124206543, "logps/chosen": -274.72003173828125, "logps/rejected": -354.28778076171875, "loss": 0.121, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -3.4575653076171875, "rewards/margins": 8.722177505493164, "rewards/rejected": -12.179742813110352, "step": 9570 }, { "epoch": 1.86, "learning_rate": 2.1115265693535628e-07, "logits/chosen": -2.670217514038086, "logits/rejected": -2.5735363960266113, "logps/chosen": -358.3692321777344, "logps/rejected": -330.4654235839844, "loss": 0.1248, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -3.361757278442383, "rewards/margins": 8.804426193237305, "rewards/rejected": -12.16618537902832, "step": 9580 }, { "epoch": 1.86, "learning_rate": 2.1079312576400374e-07, "logits/chosen": -2.7584645748138428, "logits/rejected": -2.708832263946533, "logps/chosen": -234.52932739257812, "logps/rejected": -283.8543395996094, "loss": 0.085, "rewards/accuracies": 1.0, "rewards/chosen": -3.4932403564453125, "rewards/margins": 5.7663373947143555, "rewards/rejected": -9.259577751159668, "step": 9590 }, { "epoch": 1.86, "learning_rate": 2.1043359459265117e-07, "logits/chosen": -2.7580108642578125, "logits/rejected": -2.601715564727783, "logps/chosen": -247.44192504882812, "logps/rejected": -321.3501892089844, "loss": 0.1662, "rewards/accuracies": 0.75, "rewards/chosen": -6.507093906402588, "rewards/margins": 6.213970184326172, "rewards/rejected": -12.721063613891602, "step": 9600 }, { "epoch": 1.86, "eval_logits/chosen": -2.5207648277282715, "eval_logits/rejected": -2.5049188137054443, "eval_logps/chosen": -266.2056884765625, "eval_logps/rejected": -289.34429931640625, "eval_loss": 0.5175375938415527, "eval_rewards/accuracies": 0.6974999904632568, "eval_rewards/chosen": -7.196760177612305, "eval_rewards/margins": 4.546074390411377, "eval_rewards/rejected": -11.742834091186523, "eval_runtime": 145.8561, "eval_samples_per_second": 21.638, "eval_steps_per_second": 0.343, "step": 9600 }, { "epoch": 1.87, "learning_rate": 2.1007406342129862e-07, "logits/chosen": -2.7143349647521973, "logits/rejected": -2.6954402923583984, "logps/chosen": -256.6372985839844, "logps/rejected": -364.6605529785156, "loss": 0.1722, "rewards/accuracies": 0.75, "rewards/chosen": -4.440862655639648, "rewards/margins": 10.010429382324219, "rewards/rejected": -14.45129108428955, "step": 9610 }, { "epoch": 1.87, "learning_rate": 2.0971453224994607e-07, "logits/chosen": -2.80859375, "logits/rejected": -2.6646108627319336, "logps/chosen": -251.14077758789062, "logps/rejected": -280.90655517578125, "loss": 0.1657, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -4.744521141052246, "rewards/margins": 8.630102157592773, "rewards/rejected": -13.37462329864502, "step": 9620 }, { "epoch": 1.87, "learning_rate": 2.093550010785935e-07, "logits/chosen": -2.5912880897521973, "logits/rejected": -2.615015983581543, "logps/chosen": -284.47113037109375, "logps/rejected": -373.15863037109375, "loss": 0.1266, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.701432943344116, "rewards/margins": 9.166840553283691, "rewards/rejected": -11.86827278137207, "step": 9630 }, { "epoch": 1.87, "learning_rate": 2.0899546990724096e-07, "logits/chosen": -2.627544641494751, "logits/rejected": -2.6564548015594482, "logps/chosen": -192.7626495361328, "logps/rejected": -253.3898162841797, "loss": 0.1787, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -4.189593315124512, "rewards/margins": 5.3219685554504395, "rewards/rejected": -9.51156234741211, "step": 9640 }, { "epoch": 1.87, "learning_rate": 2.0863593873588838e-07, "logits/chosen": -2.688269853591919, "logits/rejected": -2.636615037918091, "logps/chosen": -240.20712280273438, "logps/rejected": -249.9957733154297, "loss": 0.111, "rewards/accuracies": 0.75, "rewards/chosen": -5.549140453338623, "rewards/margins": 5.735888481140137, "rewards/rejected": -11.285029411315918, "step": 9650 }, { "epoch": 1.88, "learning_rate": 2.082764075645358e-07, "logits/chosen": -2.5320065021514893, "logits/rejected": -2.51509690284729, "logps/chosen": -236.28836059570312, "logps/rejected": -365.29620361328125, "loss": 0.0996, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.9913103580474854, "rewards/margins": 10.07435417175293, "rewards/rejected": -12.065665245056152, "step": 9660 }, { "epoch": 1.88, "learning_rate": 2.079168763931833e-07, "logits/chosen": -2.7490692138671875, "logits/rejected": -2.6080615520477295, "logps/chosen": -337.1051330566406, "logps/rejected": -296.54193115234375, "loss": 0.1606, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.419654369354248, "rewards/margins": 7.501638889312744, "rewards/rejected": -9.921293258666992, "step": 9670 }, { "epoch": 1.88, "learning_rate": 2.0755734522183072e-07, "logits/chosen": -2.6276113986968994, "logits/rejected": -2.5117077827453613, "logps/chosen": -306.5704650878906, "logps/rejected": -345.6265869140625, "loss": 0.1183, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -3.0178134441375732, "rewards/margins": 8.583242416381836, "rewards/rejected": -11.601057052612305, "step": 9680 }, { "epoch": 1.88, "learning_rate": 2.0719781405047817e-07, "logits/chosen": -2.627833843231201, "logits/rejected": -2.522871732711792, "logps/chosen": -219.50454711914062, "logps/rejected": -250.96414184570312, "loss": 0.0716, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -4.722193717956543, "rewards/margins": 7.623091220855713, "rewards/rejected": -12.345284461975098, "step": 9690 }, { "epoch": 1.88, "learning_rate": 2.068382828791256e-07, "logits/chosen": -2.5679221153259277, "logits/rejected": -2.5713205337524414, "logps/chosen": -263.98077392578125, "logps/rejected": -374.9137268066406, "loss": 0.1138, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -3.59222149848938, "rewards/margins": 9.256675720214844, "rewards/rejected": -12.848896980285645, "step": 9700 }, { "epoch": 1.88, "eval_logits/chosen": -2.4926064014434814, "eval_logits/rejected": -2.478003740310669, "eval_logps/chosen": -269.9750061035156, "eval_logps/rejected": -294.9535827636719, "eval_loss": 0.5252137780189514, "eval_rewards/accuracies": 0.7024999856948853, "eval_rewards/chosen": -7.573695659637451, "eval_rewards/margins": 4.730066299438477, "eval_rewards/rejected": -12.303762435913086, "eval_runtime": 141.196, "eval_samples_per_second": 22.352, "eval_steps_per_second": 0.354, "step": 9700 }, { "epoch": 1.89, "learning_rate": 2.0647875170777303e-07, "logits/chosen": -2.561051368713379, "logits/rejected": -2.642465829849243, "logps/chosen": -235.7049560546875, "logps/rejected": -321.3508605957031, "loss": 0.1579, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -3.9679055213928223, "rewards/margins": 7.88568639755249, "rewards/rejected": -11.853592872619629, "step": 9710 }, { "epoch": 1.89, "learning_rate": 2.061192205364205e-07, "logits/chosen": -2.6106936931610107, "logits/rejected": -2.5502424240112305, "logps/chosen": -272.18902587890625, "logps/rejected": -273.1693115234375, "loss": 0.0999, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -3.936622142791748, "rewards/margins": 5.830315589904785, "rewards/rejected": -9.766938209533691, "step": 9720 }, { "epoch": 1.89, "learning_rate": 2.0575968936506794e-07, "logits/chosen": -2.5287885665893555, "logits/rejected": -2.488464832305908, "logps/chosen": -226.1278839111328, "logps/rejected": -201.50521850585938, "loss": 0.1004, "rewards/accuracies": 0.75, "rewards/chosen": -6.362948417663574, "rewards/margins": 3.775364637374878, "rewards/rejected": -10.138312339782715, "step": 9730 }, { "epoch": 1.89, "learning_rate": 2.054001581937154e-07, "logits/chosen": -2.667818546295166, "logits/rejected": -2.535402536392212, "logps/chosen": -252.14712524414062, "logps/rejected": -361.03997802734375, "loss": 0.1493, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.449309825897217, "rewards/margins": 11.48906135559082, "rewards/rejected": -13.938371658325195, "step": 9740 }, { "epoch": 1.89, "learning_rate": 2.0504062702236282e-07, "logits/chosen": -2.575338363647461, "logits/rejected": -2.5556836128234863, "logps/chosen": -261.7845458984375, "logps/rejected": -363.62738037109375, "loss": 0.1067, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -4.278941631317139, "rewards/margins": 9.476760864257812, "rewards/rejected": -13.755702018737793, "step": 9750 }, { "epoch": 1.89, "learning_rate": 2.0468109585101025e-07, "logits/chosen": -2.6099021434783936, "logits/rejected": -2.6040093898773193, "logps/chosen": -181.74221801757812, "logps/rejected": -355.05535888671875, "loss": 0.1183, "rewards/accuracies": 1.0, "rewards/chosen": -3.5663819313049316, "rewards/margins": 13.304658889770508, "rewards/rejected": -16.87103843688965, "step": 9760 }, { "epoch": 1.9, "learning_rate": 2.0432156467965773e-07, "logits/chosen": -2.4464845657348633, "logits/rejected": -2.457374095916748, "logps/chosen": -210.5225830078125, "logps/rejected": -306.7838439941406, "loss": 0.1358, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -5.1162004470825195, "rewards/margins": 10.571629524230957, "rewards/rejected": -15.687829971313477, "step": 9770 }, { "epoch": 1.9, "learning_rate": 2.0396203350830516e-07, "logits/chosen": -2.5081634521484375, "logits/rejected": -2.4628183841705322, "logps/chosen": -326.0071105957031, "logps/rejected": -349.7266845703125, "loss": 0.131, "rewards/accuracies": 1.0, "rewards/chosen": -4.235438346862793, "rewards/margins": 7.810906887054443, "rewards/rejected": -12.046346664428711, "step": 9780 }, { "epoch": 1.9, "learning_rate": 2.036025023369526e-07, "logits/chosen": -2.8011951446533203, "logits/rejected": -2.6630489826202393, "logps/chosen": -264.27154541015625, "logps/rejected": -272.40679931640625, "loss": 0.1293, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -3.549420118331909, "rewards/margins": 6.654759407043457, "rewards/rejected": -10.204178810119629, "step": 9790 }, { "epoch": 1.9, "learning_rate": 2.0324297116560004e-07, "logits/chosen": -2.625323534011841, "logits/rejected": -2.506072998046875, "logps/chosen": -310.63018798828125, "logps/rejected": -398.94744873046875, "loss": 0.2393, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.8594512939453125, "rewards/margins": 13.28419017791748, "rewards/rejected": -15.143640518188477, "step": 9800 }, { "epoch": 1.9, "eval_logits/chosen": -2.5731213092803955, "eval_logits/rejected": -2.5587193965911865, "eval_logps/chosen": -269.15802001953125, "eval_logps/rejected": -292.7436218261719, "eval_loss": 0.5220938324928284, "eval_rewards/accuracies": 0.699999988079071, "eval_rewards/chosen": -7.491992950439453, "eval_rewards/margins": 4.590774059295654, "eval_rewards/rejected": -12.08276653289795, "eval_runtime": 140.9621, "eval_samples_per_second": 22.389, "eval_steps_per_second": 0.355, "step": 9800 }, { "epoch": 1.9, "learning_rate": 2.0288343999424747e-07, "logits/chosen": -2.7835259437561035, "logits/rejected": -2.7077555656433105, "logps/chosen": -338.28070068359375, "logps/rejected": -358.1758117675781, "loss": 0.1416, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.9490952491760254, "rewards/margins": 6.300492763519287, "rewards/rejected": -9.249588012695312, "step": 9810 }, { "epoch": 1.91, "learning_rate": 2.0252390882289495e-07, "logits/chosen": -2.6914010047912598, "logits/rejected": -2.6057381629943848, "logps/chosen": -298.5309143066406, "logps/rejected": -325.1488037109375, "loss": 0.1239, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -4.406166076660156, "rewards/margins": 9.358588218688965, "rewards/rejected": -13.764753341674805, "step": 9820 }, { "epoch": 1.91, "learning_rate": 2.0216437765154238e-07, "logits/chosen": -2.612276792526245, "logits/rejected": -2.43575119972229, "logps/chosen": -290.4168701171875, "logps/rejected": -339.95703125, "loss": 0.1555, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -4.402797698974609, "rewards/margins": 6.995247840881348, "rewards/rejected": -11.398045539855957, "step": 9830 }, { "epoch": 1.91, "learning_rate": 2.0180484648018983e-07, "logits/chosen": -2.614217758178711, "logits/rejected": -2.6707980632781982, "logps/chosen": -286.5018005371094, "logps/rejected": -294.0039978027344, "loss": 0.0688, "rewards/accuracies": 0.75, "rewards/chosen": -7.310466766357422, "rewards/margins": 5.712352275848389, "rewards/rejected": -13.022821426391602, "step": 9840 }, { "epoch": 1.91, "learning_rate": 2.0144531530883726e-07, "logits/chosen": -2.549112319946289, "logits/rejected": -2.592756986618042, "logps/chosen": -260.30780029296875, "logps/rejected": -391.18450927734375, "loss": 0.1242, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -4.136299133300781, "rewards/margins": 14.734067916870117, "rewards/rejected": -18.8703670501709, "step": 9850 }, { "epoch": 1.91, "learning_rate": 2.0108578413748469e-07, "logits/chosen": -2.6412360668182373, "logits/rejected": -2.532777786254883, "logps/chosen": -223.5194091796875, "logps/rejected": -286.7159118652344, "loss": 0.1378, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.2553603649139404, "rewards/margins": 7.387794494628906, "rewards/rejected": -9.643155097961426, "step": 9860 }, { "epoch": 1.92, "learning_rate": 2.0072625296613217e-07, "logits/chosen": -2.7499048709869385, "logits/rejected": -2.619619607925415, "logps/chosen": -291.58477783203125, "logps/rejected": -303.89544677734375, "loss": 0.138, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.7958405017852783, "rewards/margins": 8.317672729492188, "rewards/rejected": -10.113512992858887, "step": 9870 }, { "epoch": 1.92, "learning_rate": 2.003667217947796e-07, "logits/chosen": -2.6006357669830322, "logits/rejected": -2.5927653312683105, "logps/chosen": -297.0106506347656, "logps/rejected": -304.1988525390625, "loss": 0.1104, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -5.417545318603516, "rewards/margins": 6.6790571212768555, "rewards/rejected": -12.096602439880371, "step": 9880 }, { "epoch": 1.92, "learning_rate": 2.0000719062342705e-07, "logits/chosen": -2.7935850620269775, "logits/rejected": -2.7453925609588623, "logps/chosen": -265.3235168457031, "logps/rejected": -388.5957946777344, "loss": 0.1092, "rewards/accuracies": 1.0, "rewards/chosen": -2.464507579803467, "rewards/margins": 11.08370304107666, "rewards/rejected": -13.548210144042969, "step": 9890 }, { "epoch": 1.92, "learning_rate": 1.9964765945207448e-07, "logits/chosen": -2.7507729530334473, "logits/rejected": -2.661170482635498, "logps/chosen": -284.39337158203125, "logps/rejected": -402.4375915527344, "loss": 0.1172, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -6.298614025115967, "rewards/margins": 11.880010604858398, "rewards/rejected": -18.178625106811523, "step": 9900 }, { "epoch": 1.92, "eval_logits/chosen": -2.6177239418029785, "eval_logits/rejected": -2.6024889945983887, "eval_logps/chosen": -271.6432800292969, "eval_logps/rejected": -297.585205078125, "eval_loss": 0.530979335308075, "eval_rewards/accuracies": 0.7049999833106995, "eval_rewards/chosen": -7.740522384643555, "eval_rewards/margins": 4.82640266418457, "eval_rewards/rejected": -12.566925048828125, "eval_runtime": 140.1556, "eval_samples_per_second": 22.518, "eval_steps_per_second": 0.357, "step": 9900 }, { "epoch": 1.92, "learning_rate": 1.9928812828072193e-07, "logits/chosen": -2.881540060043335, "logits/rejected": -2.794807195663452, "logps/chosen": -409.90179443359375, "logps/rejected": -364.76580810546875, "loss": 0.1559, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -3.1918411254882812, "rewards/margins": 8.817240715026855, "rewards/rejected": -12.009081840515137, "step": 9910 }, { "epoch": 1.93, "learning_rate": 1.9892859710936939e-07, "logits/chosen": -2.580712080001831, "logits/rejected": -2.5410375595092773, "logps/chosen": -272.1631774902344, "logps/rejected": -401.469482421875, "loss": 0.1505, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -4.087647914886475, "rewards/margins": 12.025777816772461, "rewards/rejected": -16.11342430114746, "step": 9920 }, { "epoch": 1.93, "learning_rate": 1.985690659380168e-07, "logits/chosen": -2.774130344390869, "logits/rejected": -2.8216567039489746, "logps/chosen": -284.11737060546875, "logps/rejected": -380.0205993652344, "loss": 0.1207, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -6.726789951324463, "rewards/margins": 10.794561386108398, "rewards/rejected": -17.521350860595703, "step": 9930 }, { "epoch": 1.93, "learning_rate": 1.9820953476666427e-07, "logits/chosen": -2.786043405532837, "logits/rejected": -2.8093819618225098, "logps/chosen": -257.4915466308594, "logps/rejected": -313.9781188964844, "loss": 0.1823, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.9452264308929443, "rewards/margins": 9.208467483520508, "rewards/rejected": -12.153692245483398, "step": 9940 }, { "epoch": 1.93, "learning_rate": 1.978500035953117e-07, "logits/chosen": -2.6762661933898926, "logits/rejected": -2.7098498344421387, "logps/chosen": -208.21359252929688, "logps/rejected": -345.08111572265625, "loss": 0.1405, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -7.444887638092041, "rewards/margins": 9.521523475646973, "rewards/rejected": -16.966411590576172, "step": 9950 }, { "epoch": 1.93, "learning_rate": 1.9749047242395915e-07, "logits/chosen": -2.692342758178711, "logits/rejected": -2.689415693283081, "logps/chosen": -208.0183563232422, "logps/rejected": -357.1488037109375, "loss": 0.1157, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -5.413248538970947, "rewards/margins": 11.793384552001953, "rewards/rejected": -17.206634521484375, "step": 9960 }, { "epoch": 1.94, "learning_rate": 1.971309412526066e-07, "logits/chosen": -2.810288906097412, "logits/rejected": -2.7897398471832275, "logps/chosen": -277.0812072753906, "logps/rejected": -281.8963623046875, "loss": 0.1659, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -4.449187278747559, "rewards/margins": 7.528085231781006, "rewards/rejected": -11.97727108001709, "step": 9970 }, { "epoch": 1.94, "learning_rate": 1.9677141008125403e-07, "logits/chosen": -2.614064931869507, "logits/rejected": -2.6760191917419434, "logps/chosen": -278.1968688964844, "logps/rejected": -380.0922546386719, "loss": 0.1253, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -4.580084800720215, "rewards/margins": 9.989953994750977, "rewards/rejected": -14.570037841796875, "step": 9980 }, { "epoch": 1.94, "learning_rate": 1.9641187890990149e-07, "logits/chosen": -2.818615436553955, "logits/rejected": -2.7288951873779297, "logps/chosen": -203.84352111816406, "logps/rejected": -282.2690124511719, "loss": 0.145, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -3.7939720153808594, "rewards/margins": 8.794477462768555, "rewards/rejected": -12.588449478149414, "step": 9990 }, { "epoch": 1.94, "learning_rate": 1.9605234773854891e-07, "logits/chosen": -2.753340482711792, "logits/rejected": -2.7678110599517822, "logps/chosen": -188.6497802734375, "logps/rejected": -354.73345947265625, "loss": 0.0687, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -6.572816371917725, "rewards/margins": 10.632683753967285, "rewards/rejected": -17.20549774169922, "step": 10000 }, { "epoch": 1.94, "eval_logits/chosen": -2.624098539352417, "eval_logits/rejected": -2.611208438873291, "eval_logps/chosen": -268.8094482421875, "eval_logps/rejected": -292.87554931640625, "eval_loss": 0.5245481133460999, "eval_rewards/accuracies": 0.7024999856948853, "eval_rewards/chosen": -7.457136154174805, "eval_rewards/margins": 4.638821601867676, "eval_rewards/rejected": -12.095958709716797, "eval_runtime": 155.7237, "eval_samples_per_second": 20.267, "eval_steps_per_second": 0.321, "step": 10000 }, { "epoch": 1.94, "learning_rate": 1.9569281656719637e-07, "logits/chosen": -2.6966569423675537, "logits/rejected": -2.6875321865081787, "logps/chosen": -222.75955200195312, "logps/rejected": -289.21026611328125, "loss": 0.151, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -4.303269386291504, "rewards/margins": 8.195225715637207, "rewards/rejected": -12.498494148254395, "step": 10010 }, { "epoch": 1.95, "learning_rate": 1.9533328539584382e-07, "logits/chosen": -2.7097420692443848, "logits/rejected": -2.784785032272339, "logps/chosen": -181.155517578125, "logps/rejected": -239.69155883789062, "loss": 0.1093, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -3.9080886840820312, "rewards/margins": 7.344522953033447, "rewards/rejected": -11.25261116027832, "step": 10020 }, { "epoch": 1.95, "learning_rate": 1.9497375422449125e-07, "logits/chosen": -2.7030367851257324, "logits/rejected": -2.5604372024536133, "logps/chosen": -224.58895874023438, "logps/rejected": -244.3118133544922, "loss": 0.1166, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -5.914610862731934, "rewards/margins": 4.591408729553223, "rewards/rejected": -10.50601863861084, "step": 10030 }, { "epoch": 1.95, "learning_rate": 1.946142230531387e-07, "logits/chosen": -2.695406436920166, "logits/rejected": -2.7164549827575684, "logps/chosen": -256.2817077636719, "logps/rejected": -243.5300750732422, "loss": 0.1062, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -4.227022647857666, "rewards/margins": 5.592148780822754, "rewards/rejected": -9.819170951843262, "step": 10040 }, { "epoch": 1.95, "learning_rate": 1.9425469188178613e-07, "logits/chosen": -2.517131805419922, "logits/rejected": -2.587303400039673, "logps/chosen": -218.4390869140625, "logps/rejected": -313.0886535644531, "loss": 0.1024, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.9561208486557007, "rewards/margins": 11.146007537841797, "rewards/rejected": -13.102127075195312, "step": 10050 }, { "epoch": 1.95, "learning_rate": 1.9389516071043359e-07, "logits/chosen": -2.6962180137634277, "logits/rejected": -2.784198522567749, "logps/chosen": -253.6819305419922, "logps/rejected": -318.7190246582031, "loss": 0.1264, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -3.7917160987854004, "rewards/margins": 6.036627769470215, "rewards/rejected": -9.828343391418457, "step": 10060 }, { "epoch": 1.95, "learning_rate": 1.9353562953908104e-07, "logits/chosen": -2.6028361320495605, "logits/rejected": -2.7085378170013428, "logps/chosen": -231.95217895507812, "logps/rejected": -287.3670349121094, "loss": 0.12, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -3.8892903327941895, "rewards/margins": 6.986242771148682, "rewards/rejected": -10.875532150268555, "step": 10070 }, { "epoch": 1.96, "learning_rate": 1.9317609836772847e-07, "logits/chosen": -2.868354082107544, "logits/rejected": -2.792537212371826, "logps/chosen": -304.06536865234375, "logps/rejected": -323.65972900390625, "loss": 0.113, "rewards/accuracies": 1.0, "rewards/chosen": -0.8082060813903809, "rewards/margins": 9.34862995147705, "rewards/rejected": -10.156837463378906, "step": 10080 }, { "epoch": 1.96, "learning_rate": 1.9281656719637592e-07, "logits/chosen": -2.729243516921997, "logits/rejected": -2.7317256927490234, "logps/chosen": -255.93667602539062, "logps/rejected": -410.2506408691406, "loss": 0.1016, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.3462402820587158, "rewards/margins": 9.17154598236084, "rewards/rejected": -10.517786026000977, "step": 10090 }, { "epoch": 1.96, "learning_rate": 1.9245703602502335e-07, "logits/chosen": -2.758331537246704, "logits/rejected": -2.755667209625244, "logps/chosen": -265.77618408203125, "logps/rejected": -362.4887390136719, "loss": 0.1132, "rewards/accuracies": 1.0, "rewards/chosen": -3.3049278259277344, "rewards/margins": 9.093594551086426, "rewards/rejected": -12.39852237701416, "step": 10100 }, { "epoch": 1.96, "eval_logits/chosen": -2.6079976558685303, "eval_logits/rejected": -2.595292568206787, "eval_logps/chosen": -261.605712890625, "eval_logps/rejected": -288.4120788574219, "eval_loss": 0.5272236466407776, "eval_rewards/accuracies": 0.7124999761581421, "eval_rewards/chosen": -6.736766338348389, "eval_rewards/margins": 4.912847995758057, "eval_rewards/rejected": -11.649614334106445, "eval_runtime": 155.9617, "eval_samples_per_second": 20.236, "eval_steps_per_second": 0.321, "step": 10100 }, { "epoch": 1.96, "learning_rate": 1.920975048536708e-07, "logits/chosen": -2.5196948051452637, "logits/rejected": -2.5852513313293457, "logps/chosen": -229.99343872070312, "logps/rejected": -265.3790283203125, "loss": 0.1412, "rewards/accuracies": 0.75, "rewards/chosen": -6.587240695953369, "rewards/margins": 7.187707424163818, "rewards/rejected": -13.774948120117188, "step": 10110 }, { "epoch": 1.96, "learning_rate": 1.9173797368231826e-07, "logits/chosen": -2.6614365577697754, "logits/rejected": -2.7115848064422607, "logps/chosen": -197.1228790283203, "logps/rejected": -306.74212646484375, "loss": 0.1299, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -3.5633678436279297, "rewards/margins": 8.410367965698242, "rewards/rejected": -11.973734855651855, "step": 10120 }, { "epoch": 1.97, "learning_rate": 1.913784425109657e-07, "logits/chosen": -2.8587021827697754, "logits/rejected": -2.8292853832244873, "logps/chosen": -286.17694091796875, "logps/rejected": -324.4984436035156, "loss": 0.1221, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -3.473060131072998, "rewards/margins": 10.909645080566406, "rewards/rejected": -14.382707595825195, "step": 10130 }, { "epoch": 1.97, "learning_rate": 1.9101891133961314e-07, "logits/chosen": -2.716796636581421, "logits/rejected": -2.649395704269409, "logps/chosen": -178.61146545410156, "logps/rejected": -303.76446533203125, "loss": 0.1284, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -6.529031276702881, "rewards/margins": 7.286018371582031, "rewards/rejected": -13.81505012512207, "step": 10140 }, { "epoch": 1.97, "learning_rate": 1.9065938016826057e-07, "logits/chosen": -2.6412036418914795, "logits/rejected": -2.6497039794921875, "logps/chosen": -185.38690185546875, "logps/rejected": -320.8858642578125, "loss": 0.1548, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -5.786194801330566, "rewards/margins": 8.789294242858887, "rewards/rejected": -14.57548713684082, "step": 10150 }, { "epoch": 1.97, "learning_rate": 1.9029984899690802e-07, "logits/chosen": -2.769472360610962, "logits/rejected": -2.743699789047241, "logps/chosen": -273.3786315917969, "logps/rejected": -366.3103332519531, "loss": 0.1805, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.7011394500732422, "rewards/margins": 9.303980827331543, "rewards/rejected": -11.005121231079102, "step": 10160 }, { "epoch": 1.97, "learning_rate": 1.8994031782555548e-07, "logits/chosen": -2.7782976627349854, "logits/rejected": -2.6834959983825684, "logps/chosen": -300.3525390625, "logps/rejected": -316.2628173828125, "loss": 0.1285, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -4.587084770202637, "rewards/margins": 8.565958976745605, "rewards/rejected": -13.153043746948242, "step": 10170 }, { "epoch": 1.98, "learning_rate": 1.895807866542029e-07, "logits/chosen": -2.8544564247131348, "logits/rejected": -2.818455219268799, "logps/chosen": -287.4271545410156, "logps/rejected": -367.23004150390625, "loss": 0.0872, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -3.183004856109619, "rewards/margins": 7.94314432144165, "rewards/rejected": -11.12614917755127, "step": 10180 }, { "epoch": 1.98, "learning_rate": 1.8922125548285036e-07, "logits/chosen": -2.8023242950439453, "logits/rejected": -2.7717652320861816, "logps/chosen": -236.1044158935547, "logps/rejected": -303.81414794921875, "loss": 0.0971, "rewards/accuracies": 1.0, "rewards/chosen": -1.0204492807388306, "rewards/margins": 10.077367782592773, "rewards/rejected": -11.097817420959473, "step": 10190 }, { "epoch": 1.98, "learning_rate": 1.888617243114978e-07, "logits/chosen": -2.7105748653411865, "logits/rejected": -2.742722749710083, "logps/chosen": -236.2729034423828, "logps/rejected": -309.09527587890625, "loss": 0.1348, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -4.21351432800293, "rewards/margins": 7.772862434387207, "rewards/rejected": -11.98637580871582, "step": 10200 }, { "epoch": 1.98, "eval_logits/chosen": -2.6400763988494873, "eval_logits/rejected": -2.6271843910217285, "eval_logps/chosen": -271.88494873046875, "eval_logps/rejected": -299.5146179199219, "eval_loss": 0.521010160446167, "eval_rewards/accuracies": 0.7049999833106995, "eval_rewards/chosen": -7.764688014984131, "eval_rewards/margins": 4.995177745819092, "eval_rewards/rejected": -12.759865760803223, "eval_runtime": 151.7927, "eval_samples_per_second": 20.792, "eval_steps_per_second": 0.329, "step": 10200 }, { "epoch": 1.98, "learning_rate": 1.8850219314014524e-07, "logits/chosen": -2.7185475826263428, "logits/rejected": -2.6648640632629395, "logps/chosen": -320.8039245605469, "logps/rejected": -432.5547790527344, "loss": 0.1744, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -4.132582664489746, "rewards/margins": 10.938291549682617, "rewards/rejected": -15.070874214172363, "step": 10210 }, { "epoch": 1.98, "learning_rate": 1.881426619687927e-07, "logits/chosen": -2.6703097820281982, "logits/rejected": -2.595444917678833, "logps/chosen": -218.0316162109375, "logps/rejected": -336.1557312011719, "loss": 0.1209, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -3.0630340576171875, "rewards/margins": 7.4914870262146, "rewards/rejected": -10.554521560668945, "step": 10220 }, { "epoch": 1.99, "learning_rate": 1.8778313079744012e-07, "logits/chosen": -2.706373691558838, "logits/rejected": -2.6765666007995605, "logps/chosen": -244.51779174804688, "logps/rejected": -333.5496826171875, "loss": 0.1057, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -3.8427653312683105, "rewards/margins": 12.442415237426758, "rewards/rejected": -16.285181045532227, "step": 10230 }, { "epoch": 1.99, "learning_rate": 1.8742359962608758e-07, "logits/chosen": -2.571765422821045, "logits/rejected": -2.728299617767334, "logps/chosen": -318.3007507324219, "logps/rejected": -394.8019104003906, "loss": 0.1453, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.5207507610321045, "rewards/margins": 7.949289798736572, "rewards/rejected": -10.470041275024414, "step": 10240 }, { "epoch": 1.99, "learning_rate": 1.87064068454735e-07, "logits/chosen": -2.7412946224212646, "logits/rejected": -2.670288562774658, "logps/chosen": -317.1426696777344, "logps/rejected": -364.4071350097656, "loss": 0.1457, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -3.740424394607544, "rewards/margins": 10.290372848510742, "rewards/rejected": -14.030797004699707, "step": 10250 }, { "epoch": 1.99, "learning_rate": 1.8670453728338246e-07, "logits/chosen": -2.6800644397735596, "logits/rejected": -2.735670804977417, "logps/chosen": -246.10568237304688, "logps/rejected": -356.8966369628906, "loss": 0.0944, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -3.824759006500244, "rewards/margins": 11.115058898925781, "rewards/rejected": -14.939817428588867, "step": 10260 }, { "epoch": 1.99, "learning_rate": 1.8634500611202991e-07, "logits/chosen": -2.7291271686553955, "logits/rejected": -2.670860767364502, "logps/chosen": -263.6656188964844, "logps/rejected": -385.16375732421875, "loss": 0.1313, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -6.1319451332092285, "rewards/margins": 8.994240760803223, "rewards/rejected": -15.126185417175293, "step": 10270 }, { "epoch": 2.0, "learning_rate": 1.8598547494067734e-07, "logits/chosen": -2.9449660778045654, "logits/rejected": -2.628852605819702, "logps/chosen": -303.976806640625, "logps/rejected": -327.79473876953125, "loss": 0.1037, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -3.624514102935791, "rewards/margins": 8.703771591186523, "rewards/rejected": -12.328285217285156, "step": 10280 }, { "epoch": 2.0, "learning_rate": 1.856259437693248e-07, "logits/chosen": -2.7790870666503906, "logits/rejected": -2.678490161895752, "logps/chosen": -214.7358856201172, "logps/rejected": -333.129150390625, "loss": 0.1185, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.161875009536743, "rewards/margins": 11.135395050048828, "rewards/rejected": -13.297269821166992, "step": 10290 }, { "epoch": 2.0, "learning_rate": 1.8526641259797222e-07, "logits/chosen": -2.696047067642212, "logits/rejected": -2.70910382270813, "logps/chosen": -295.90472412109375, "logps/rejected": -389.22943115234375, "loss": 0.1342, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -6.419425010681152, "rewards/margins": 11.907146453857422, "rewards/rejected": -18.32657241821289, "step": 10300 }, { "epoch": 2.0, "eval_logits/chosen": -2.6298301219940186, "eval_logits/rejected": -2.617715835571289, "eval_logps/chosen": -268.9454650878906, "eval_logps/rejected": -296.80413818359375, "eval_loss": 0.5257573127746582, "eval_rewards/accuracies": 0.7049999833106995, "eval_rewards/chosen": -7.470740795135498, "eval_rewards/margins": 5.01807975769043, "eval_rewards/rejected": -12.488821029663086, "eval_runtime": 140.8767, "eval_samples_per_second": 22.403, "eval_steps_per_second": 0.355, "step": 10300 }, { "epoch": 2.0, "learning_rate": 1.8490688142661968e-07, "logits/chosen": -2.7037014961242676, "logits/rejected": -2.7269883155822754, "logps/chosen": -182.2980194091797, "logps/rejected": -340.1834716796875, "loss": 0.0958, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -4.161952018737793, "rewards/margins": 12.578010559082031, "rewards/rejected": -16.73996353149414, "step": 10310 }, { "epoch": 2.0, "learning_rate": 1.8454735025526713e-07, "logits/chosen": -2.660433292388916, "logits/rejected": -2.654448986053467, "logps/chosen": -244.2759552001953, "logps/rejected": -284.2115478515625, "loss": 0.1099, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -6.508305549621582, "rewards/margins": 8.608351707458496, "rewards/rejected": -15.116656303405762, "step": 10320 }, { "epoch": 2.01, "learning_rate": 1.8418781908391456e-07, "logits/chosen": -2.7890124320983887, "logits/rejected": -2.805551052093506, "logps/chosen": -232.19677734375, "logps/rejected": -354.23663330078125, "loss": 0.0728, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -6.5977463722229, "rewards/margins": 11.178841590881348, "rewards/rejected": -17.776588439941406, "step": 10330 }, { "epoch": 2.01, "learning_rate": 1.8382828791256202e-07, "logits/chosen": -2.815319776535034, "logits/rejected": -2.7543816566467285, "logps/chosen": -298.68267822265625, "logps/rejected": -327.13983154296875, "loss": 0.0633, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.8585197925567627, "rewards/margins": 10.603599548339844, "rewards/rejected": -12.46212100982666, "step": 10340 }, { "epoch": 2.01, "learning_rate": 1.8346875674120947e-07, "logits/chosen": -2.7649192810058594, "logits/rejected": -2.7012877464294434, "logps/chosen": -275.35687255859375, "logps/rejected": -317.98406982421875, "loss": 0.0976, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -4.20779275894165, "rewards/margins": 8.998144149780273, "rewards/rejected": -13.205937385559082, "step": 10350 }, { "epoch": 2.01, "learning_rate": 1.831092255698569e-07, "logits/chosen": -2.7852184772491455, "logits/rejected": -2.641512870788574, "logps/chosen": -283.23480224609375, "logps/rejected": -338.6064453125, "loss": 0.0527, "rewards/accuracies": 1.0, "rewards/chosen": -0.593654215335846, "rewards/margins": 13.130376815795898, "rewards/rejected": -13.724031448364258, "step": 10360 }, { "epoch": 2.01, "learning_rate": 1.8274969439850435e-07, "logits/chosen": -2.596500873565674, "logits/rejected": -2.595038652420044, "logps/chosen": -166.22879028320312, "logps/rejected": -293.2983703613281, "loss": 0.0621, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -4.355771064758301, "rewards/margins": 10.234164237976074, "rewards/rejected": -14.589935302734375, "step": 10370 }, { "epoch": 2.02, "learning_rate": 1.8239016322715178e-07, "logits/chosen": -2.841305732727051, "logits/rejected": -2.784738063812256, "logps/chosen": -266.9625244140625, "logps/rejected": -399.4165954589844, "loss": 0.0684, "rewards/accuracies": 1.0, "rewards/chosen": -2.224963426589966, "rewards/margins": 13.512248039245605, "rewards/rejected": -15.737211227416992, "step": 10380 }, { "epoch": 2.02, "learning_rate": 1.820306320557992e-07, "logits/chosen": -2.6798746585845947, "logits/rejected": -2.6772398948669434, "logps/chosen": -328.0395812988281, "logps/rejected": -380.20672607421875, "loss": 0.0887, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -4.209399700164795, "rewards/margins": 9.099278450012207, "rewards/rejected": -13.308677673339844, "step": 10390 }, { "epoch": 2.02, "learning_rate": 1.816711008844467e-07, "logits/chosen": -2.7173755168914795, "logits/rejected": -2.7604899406433105, "logps/chosen": -224.87344360351562, "logps/rejected": -269.5205993652344, "loss": 0.0845, "rewards/accuracies": 1.0, "rewards/chosen": -2.270158529281616, "rewards/margins": 8.58400821685791, "rewards/rejected": -10.854167938232422, "step": 10400 }, { "epoch": 2.02, "eval_logits/chosen": -2.610612392425537, "eval_logits/rejected": -2.5950675010681152, "eval_logps/chosen": -276.90740966796875, "eval_logps/rejected": -309.80352783203125, "eval_loss": 0.5396497845649719, "eval_rewards/accuracies": 0.7049999833106995, "eval_rewards/chosen": -8.26693344116211, "eval_rewards/margins": 5.521821975708008, "eval_rewards/rejected": -13.7887544631958, "eval_runtime": 140.6042, "eval_samples_per_second": 22.446, "eval_steps_per_second": 0.356, "step": 10400 }, { "epoch": 2.02, "learning_rate": 1.8131156971309412e-07, "logits/chosen": -2.760927200317383, "logits/rejected": -2.7007737159729004, "logps/chosen": -231.59725952148438, "logps/rejected": -325.73968505859375, "loss": 0.0697, "rewards/accuracies": 1.0, "rewards/chosen": -2.4801700115203857, "rewards/margins": 15.018013000488281, "rewards/rejected": -17.49818229675293, "step": 10410 }, { "epoch": 2.02, "learning_rate": 1.8095203854174157e-07, "logits/chosen": -2.6685779094696045, "logits/rejected": -2.6751503944396973, "logps/chosen": -152.3220672607422, "logps/rejected": -256.0708312988281, "loss": 0.0759, "rewards/accuracies": 1.0, "rewards/chosen": -2.9181880950927734, "rewards/margins": 9.342168807983398, "rewards/rejected": -12.260355949401855, "step": 10420 }, { "epoch": 2.02, "learning_rate": 1.80592507370389e-07, "logits/chosen": -2.7395193576812744, "logits/rejected": -2.6276068687438965, "logps/chosen": -304.21624755859375, "logps/rejected": -361.04632568359375, "loss": 0.0897, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -7.1980133056640625, "rewards/margins": 9.919008255004883, "rewards/rejected": -17.117023468017578, "step": 10430 }, { "epoch": 2.03, "learning_rate": 1.8023297619903643e-07, "logits/chosen": -2.6000497341156006, "logits/rejected": -2.5789811611175537, "logps/chosen": -256.3652648925781, "logps/rejected": -319.67852783203125, "loss": 0.0431, "rewards/accuracies": 1.0, "rewards/chosen": -4.087789058685303, "rewards/margins": 9.831320762634277, "rewards/rejected": -13.919108390808105, "step": 10440 }, { "epoch": 2.03, "learning_rate": 1.798734450276839e-07, "logits/chosen": -2.401099443435669, "logits/rejected": -2.354226589202881, "logps/chosen": -300.84490966796875, "logps/rejected": -397.1323547363281, "loss": 0.0753, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -5.470708847045898, "rewards/margins": 11.59446907043457, "rewards/rejected": -17.065176010131836, "step": 10450 }, { "epoch": 2.03, "learning_rate": 1.7951391385633133e-07, "logits/chosen": -2.6439056396484375, "logits/rejected": -2.7550628185272217, "logps/chosen": -313.40277099609375, "logps/rejected": -378.4320373535156, "loss": 0.0771, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -7.915426731109619, "rewards/margins": 7.226849555969238, "rewards/rejected": -15.142277717590332, "step": 10460 }, { "epoch": 2.03, "learning_rate": 1.791543826849788e-07, "logits/chosen": -2.7960293292999268, "logits/rejected": -2.7683236598968506, "logps/chosen": -244.3646697998047, "logps/rejected": -401.85089111328125, "loss": 0.079, "rewards/accuracies": 1.0, "rewards/chosen": -1.9456754922866821, "rewards/margins": 12.354697227478027, "rewards/rejected": -14.300374031066895, "step": 10470 }, { "epoch": 2.03, "learning_rate": 1.7879485151362622e-07, "logits/chosen": -2.6908793449401855, "logits/rejected": -2.7233099937438965, "logps/chosen": -271.5380554199219, "logps/rejected": -321.399658203125, "loss": 0.0692, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -5.02646541595459, "rewards/margins": 10.237837791442871, "rewards/rejected": -15.264302253723145, "step": 10480 }, { "epoch": 2.04, "learning_rate": 1.7843532034227364e-07, "logits/chosen": -2.6730997562408447, "logits/rejected": -2.7802577018737793, "logps/chosen": -202.44168090820312, "logps/rejected": -337.1480407714844, "loss": 0.0712, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -4.281571388244629, "rewards/margins": 11.65519905090332, "rewards/rejected": -15.93677043914795, "step": 10490 }, { "epoch": 2.04, "learning_rate": 1.7807578917092113e-07, "logits/chosen": -2.6552672386169434, "logits/rejected": -2.575317859649658, "logps/chosen": -291.5713195800781, "logps/rejected": -450.67803955078125, "loss": 0.0723, "rewards/accuracies": 1.0, "rewards/chosen": -0.9118088483810425, "rewards/margins": 12.37339973449707, "rewards/rejected": -13.285209655761719, "step": 10500 }, { "epoch": 2.04, "eval_logits/chosen": -2.5996899604797363, "eval_logits/rejected": -2.582946538925171, "eval_logps/chosen": -279.7846374511719, "eval_logps/rejected": -316.4410400390625, "eval_loss": 0.5641571283340454, "eval_rewards/accuracies": 0.7099999785423279, "eval_rewards/chosen": -8.554656982421875, "eval_rewards/margins": 5.897851943969727, "eval_rewards/rejected": -14.452508926391602, "eval_runtime": 139.233, "eval_samples_per_second": 22.667, "eval_steps_per_second": 0.359, "step": 10500 }, { "epoch": 2.04, "learning_rate": 1.7771625799956855e-07, "logits/chosen": -2.730332374572754, "logits/rejected": -2.6540560722351074, "logps/chosen": -322.80621337890625, "logps/rejected": -362.97430419921875, "loss": 0.0572, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -5.85236930847168, "rewards/margins": 10.666932106018066, "rewards/rejected": -16.51930046081543, "step": 10510 }, { "epoch": 2.04, "learning_rate": 1.77356726828216e-07, "logits/chosen": -2.607438564300537, "logits/rejected": -2.577183246612549, "logps/chosen": -232.3873748779297, "logps/rejected": -308.77490234375, "loss": 0.0899, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -9.431411743164062, "rewards/margins": 7.742515563964844, "rewards/rejected": -17.173925399780273, "step": 10520 }, { "epoch": 2.04, "learning_rate": 1.7699719565686344e-07, "logits/chosen": -2.565056562423706, "logits/rejected": -2.5193850994110107, "logps/chosen": -219.9849090576172, "logps/rejected": -334.90277099609375, "loss": 0.0808, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -8.017915725708008, "rewards/margins": 8.877421379089355, "rewards/rejected": -16.89533805847168, "step": 10530 }, { "epoch": 2.05, "learning_rate": 1.7663766448551086e-07, "logits/chosen": -2.7871925830841064, "logits/rejected": -2.740809202194214, "logps/chosen": -365.58013916015625, "logps/rejected": -334.5350036621094, "loss": 0.0714, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -3.8105227947235107, "rewards/margins": 11.399116516113281, "rewards/rejected": -15.209640502929688, "step": 10540 }, { "epoch": 2.05, "learning_rate": 1.7627813331415834e-07, "logits/chosen": -2.6606345176696777, "logits/rejected": -2.625703811645508, "logps/chosen": -215.5009307861328, "logps/rejected": -243.1717529296875, "loss": 0.1273, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -7.260490417480469, "rewards/margins": 7.008008003234863, "rewards/rejected": -14.2684965133667, "step": 10550 }, { "epoch": 2.05, "learning_rate": 1.7591860214280577e-07, "logits/chosen": -2.8269846439361572, "logits/rejected": -2.650883674621582, "logps/chosen": -243.93905639648438, "logps/rejected": -306.7244567871094, "loss": 0.0729, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.3997321128845215, "rewards/margins": 8.941568374633789, "rewards/rejected": -10.341299057006836, "step": 10560 }, { "epoch": 2.05, "learning_rate": 1.7555907097145323e-07, "logits/chosen": -2.5474016666412354, "logits/rejected": -2.551736354827881, "logps/chosen": -319.91668701171875, "logps/rejected": -381.8224792480469, "loss": 0.0715, "rewards/accuracies": 1.0, "rewards/chosen": -1.944074273109436, "rewards/margins": 15.506091117858887, "rewards/rejected": -17.450164794921875, "step": 10570 }, { "epoch": 2.05, "learning_rate": 1.7519953980010065e-07, "logits/chosen": -2.663861036300659, "logits/rejected": -2.769958972930908, "logps/chosen": -251.05911254882812, "logps/rejected": -313.08709716796875, "loss": 0.0678, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -4.601755142211914, "rewards/margins": 9.459638595581055, "rewards/rejected": -14.061393737792969, "step": 10580 }, { "epoch": 2.06, "learning_rate": 1.7484000862874808e-07, "logits/chosen": -2.6212453842163086, "logits/rejected": -2.555856227874756, "logps/chosen": -306.3880310058594, "logps/rejected": -354.94879150390625, "loss": 0.0792, "rewards/accuracies": 0.75, "rewards/chosen": -8.706746101379395, "rewards/margins": 11.236501693725586, "rewards/rejected": -19.943248748779297, "step": 10590 }, { "epoch": 2.06, "learning_rate": 1.7448047745739556e-07, "logits/chosen": -2.7406089305877686, "logits/rejected": -2.688994884490967, "logps/chosen": -298.58282470703125, "logps/rejected": -376.921875, "loss": 0.0411, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.63175892829895, "rewards/margins": 11.818182945251465, "rewards/rejected": -14.449941635131836, "step": 10600 }, { "epoch": 2.06, "eval_logits/chosen": -2.558769464492798, "eval_logits/rejected": -2.538564920425415, "eval_logps/chosen": -297.4822692871094, "eval_logps/rejected": -336.77093505859375, "eval_loss": 0.5768638253211975, "eval_rewards/accuracies": 0.7099999785423279, "eval_rewards/chosen": -10.324420928955078, "eval_rewards/margins": 6.16107177734375, "eval_rewards/rejected": -16.485496520996094, "eval_runtime": 140.844, "eval_samples_per_second": 22.408, "eval_steps_per_second": 0.355, "step": 10600 }, { "epoch": 2.06, "learning_rate": 1.74120946286043e-07, "logits/chosen": -2.896561861038208, "logits/rejected": -2.8578813076019287, "logps/chosen": -297.384521484375, "logps/rejected": -331.8183898925781, "loss": 0.0645, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -3.844348907470703, "rewards/margins": 9.64069938659668, "rewards/rejected": -13.485048294067383, "step": 10610 }, { "epoch": 2.06, "learning_rate": 1.7376141511469044e-07, "logits/chosen": -2.603555679321289, "logits/rejected": -2.540269613265991, "logps/chosen": -181.19918823242188, "logps/rejected": -327.0639953613281, "loss": 0.0679, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -4.039401054382324, "rewards/margins": 12.538655281066895, "rewards/rejected": -16.578054428100586, "step": 10620 }, { "epoch": 2.06, "learning_rate": 1.7340188394333787e-07, "logits/chosen": -2.6098814010620117, "logits/rejected": -2.6087749004364014, "logps/chosen": -221.781494140625, "logps/rejected": -306.01324462890625, "loss": 0.047, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -6.989348411560059, "rewards/margins": 9.410599708557129, "rewards/rejected": -16.399948120117188, "step": 10630 }, { "epoch": 2.07, "learning_rate": 1.730423527719853e-07, "logits/chosen": -2.7155346870422363, "logits/rejected": -2.4269816875457764, "logps/chosen": -239.442626953125, "logps/rejected": -327.3274230957031, "loss": 0.0819, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -5.456811904907227, "rewards/margins": 12.340845108032227, "rewards/rejected": -17.797657012939453, "step": 10640 }, { "epoch": 2.07, "learning_rate": 1.7268282160063278e-07, "logits/chosen": -2.54668927192688, "logits/rejected": -2.483928680419922, "logps/chosen": -241.74331665039062, "logps/rejected": -301.92828369140625, "loss": 0.0707, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -4.61628532409668, "rewards/margins": 13.098543167114258, "rewards/rejected": -17.714828491210938, "step": 10650 }, { "epoch": 2.07, "learning_rate": 1.723232904292802e-07, "logits/chosen": -2.475114107131958, "logits/rejected": -2.5202317237854004, "logps/chosen": -297.5644226074219, "logps/rejected": -468.6898498535156, "loss": 0.0504, "rewards/accuracies": 1.0, "rewards/chosen": -4.696624279022217, "rewards/margins": 12.959909439086914, "rewards/rejected": -17.656536102294922, "step": 10660 }, { "epoch": 2.07, "learning_rate": 1.7196375925792766e-07, "logits/chosen": -2.719630718231201, "logits/rejected": -2.7059099674224854, "logps/chosen": -252.2794647216797, "logps/rejected": -373.1482849121094, "loss": 0.0544, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -5.261512756347656, "rewards/margins": 11.596822738647461, "rewards/rejected": -16.858333587646484, "step": 10670 }, { "epoch": 2.07, "learning_rate": 1.716042280865751e-07, "logits/chosen": -2.4799282550811768, "logits/rejected": -2.416187047958374, "logps/chosen": -255.59725952148438, "logps/rejected": -332.4320068359375, "loss": 0.0899, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -7.2090559005737305, "rewards/margins": 11.93217945098877, "rewards/rejected": -19.141237258911133, "step": 10680 }, { "epoch": 2.08, "learning_rate": 1.7124469691522252e-07, "logits/chosen": -2.6054298877716064, "logits/rejected": -2.5862808227539062, "logps/chosen": -216.68701171875, "logps/rejected": -321.9322814941406, "loss": 0.0674, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -6.772042274475098, "rewards/margins": 9.14348316192627, "rewards/rejected": -15.915525436401367, "step": 10690 }, { "epoch": 2.08, "learning_rate": 1.7088516574387e-07, "logits/chosen": -2.624765396118164, "logits/rejected": -2.569063186645508, "logps/chosen": -245.7399139404297, "logps/rejected": -294.5632629394531, "loss": 0.0459, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -7.5335798263549805, "rewards/margins": 10.03137493133545, "rewards/rejected": -17.56495475769043, "step": 10700 }, { "epoch": 2.08, "eval_logits/chosen": -2.544043779373169, "eval_logits/rejected": -2.523214340209961, "eval_logps/chosen": -295.04119873046875, "eval_logps/rejected": -336.9667053222656, "eval_loss": 0.5941163897514343, "eval_rewards/accuracies": 0.7049999833106995, "eval_rewards/chosen": -10.080312728881836, "eval_rewards/margins": 6.424767017364502, "eval_rewards/rejected": -16.505081176757812, "eval_runtime": 141.339, "eval_samples_per_second": 22.329, "eval_steps_per_second": 0.354, "step": 10700 }, { "epoch": 2.08, "learning_rate": 1.7052563457251743e-07, "logits/chosen": -2.653287887573242, "logits/rejected": -2.660998821258545, "logps/chosen": -278.9805603027344, "logps/rejected": -360.039794921875, "loss": 0.0912, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -7.7693047523498535, "rewards/margins": 12.69771671295166, "rewards/rejected": -20.467021942138672, "step": 10710 }, { "epoch": 2.08, "learning_rate": 1.7016610340116488e-07, "logits/chosen": -2.692852735519409, "logits/rejected": -2.5415070056915283, "logps/chosen": -252.43838500976562, "logps/rejected": -375.9709777832031, "loss": 0.0884, "rewards/accuracies": 1.0, "rewards/chosen": -3.5314717292785645, "rewards/margins": 12.215380668640137, "rewards/rejected": -15.746851921081543, "step": 10720 }, { "epoch": 2.08, "learning_rate": 1.698065722298123e-07, "logits/chosen": -2.6087474822998047, "logits/rejected": -2.59074068069458, "logps/chosen": -276.2684631347656, "logps/rejected": -426.1739196777344, "loss": 0.0764, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -3.9938807487487793, "rewards/margins": 12.584334373474121, "rewards/rejected": -16.57821273803711, "step": 10730 }, { "epoch": 2.09, "learning_rate": 1.6944704105845974e-07, "logits/chosen": -2.7049241065979004, "logits/rejected": -2.605888843536377, "logps/chosen": -297.2758483886719, "logps/rejected": -457.2428283691406, "loss": 0.0371, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.898677349090576, "rewards/margins": 14.518112182617188, "rewards/rejected": -17.416790008544922, "step": 10740 }, { "epoch": 2.09, "learning_rate": 1.6908750988710722e-07, "logits/chosen": -2.5800156593322754, "logits/rejected": -2.4820423126220703, "logps/chosen": -329.87335205078125, "logps/rejected": -347.7347106933594, "loss": 0.056, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -8.187057495117188, "rewards/margins": 8.66640853881836, "rewards/rejected": -16.853466033935547, "step": 10750 }, { "epoch": 2.09, "learning_rate": 1.6872797871575465e-07, "logits/chosen": -2.5678439140319824, "logits/rejected": -2.672456741333008, "logps/chosen": -306.06353759765625, "logps/rejected": -465.56439208984375, "loss": 0.0493, "rewards/accuracies": 1.0, "rewards/chosen": -4.193748474121094, "rewards/margins": 14.256443977355957, "rewards/rejected": -18.450191497802734, "step": 10760 }, { "epoch": 2.09, "learning_rate": 1.683684475444021e-07, "logits/chosen": -2.463825225830078, "logits/rejected": -2.4699854850769043, "logps/chosen": -351.6150817871094, "logps/rejected": -374.30078125, "loss": 0.1096, "rewards/accuracies": 1.0, "rewards/chosen": -2.511889934539795, "rewards/margins": 11.097128868103027, "rewards/rejected": -13.60901927947998, "step": 10770 }, { "epoch": 2.09, "learning_rate": 1.6800891637304953e-07, "logits/chosen": -2.4313137531280518, "logits/rejected": -2.417144298553467, "logps/chosen": -268.6635437011719, "logps/rejected": -355.5901794433594, "loss": 0.0738, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -5.694070816040039, "rewards/margins": 10.495762825012207, "rewards/rejected": -16.18983268737793, "step": 10780 }, { "epoch": 2.09, "learning_rate": 1.6764938520169696e-07, "logits/chosen": -2.7539901733398438, "logits/rejected": -2.6599209308624268, "logps/chosen": -214.2755889892578, "logps/rejected": -298.9658508300781, "loss": 0.0493, "rewards/accuracies": 1.0, "rewards/chosen": -3.8701863288879395, "rewards/margins": 12.36173152923584, "rewards/rejected": -16.231918334960938, "step": 10790 }, { "epoch": 2.1, "learning_rate": 1.6728985403034444e-07, "logits/chosen": -2.532379150390625, "logits/rejected": -2.454911470413208, "logps/chosen": -246.7219696044922, "logps/rejected": -326.46514892578125, "loss": 0.0586, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -3.285130262374878, "rewards/margins": 11.530749320983887, "rewards/rejected": -14.815881729125977, "step": 10800 }, { "epoch": 2.1, "eval_logits/chosen": -2.5395236015319824, "eval_logits/rejected": -2.516683578491211, "eval_logps/chosen": -296.6443176269531, "eval_logps/rejected": -339.0528564453125, "eval_loss": 0.5881070494651794, "eval_rewards/accuracies": 0.7074999809265137, "eval_rewards/chosen": -10.240626335144043, "eval_rewards/margins": 6.4730634689331055, "eval_rewards/rejected": -16.71368980407715, "eval_runtime": 140.6279, "eval_samples_per_second": 22.442, "eval_steps_per_second": 0.356, "step": 10800 }, { "epoch": 2.1, "learning_rate": 1.6693032285899186e-07, "logits/chosen": -2.4681193828582764, "logits/rejected": -2.490391969680786, "logps/chosen": -182.4458465576172, "logps/rejected": -254.2109375, "loss": 0.0707, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -4.3664960861206055, "rewards/margins": 7.3670454025268555, "rewards/rejected": -11.733543395996094, "step": 10810 }, { "epoch": 2.1, "learning_rate": 1.6657079168763932e-07, "logits/chosen": -2.5093283653259277, "logits/rejected": -2.5589137077331543, "logps/chosen": -321.19989013671875, "logps/rejected": -306.82843017578125, "loss": 0.095, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -4.453250885009766, "rewards/margins": 8.917437553405762, "rewards/rejected": -13.370686531066895, "step": 10820 }, { "epoch": 2.1, "learning_rate": 1.6621126051628675e-07, "logits/chosen": -2.6584315299987793, "logits/rejected": -2.6464643478393555, "logps/chosen": -274.248779296875, "logps/rejected": -381.333984375, "loss": 0.0556, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.8905997276306152, "rewards/margins": 12.009408950805664, "rewards/rejected": -14.900009155273438, "step": 10830 }, { "epoch": 2.1, "learning_rate": 1.658517293449342e-07, "logits/chosen": -2.649141788482666, "logits/rejected": -2.5465428829193115, "logps/chosen": -394.0592041015625, "logps/rejected": -392.80303955078125, "loss": 0.0488, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -8.281259536743164, "rewards/margins": 13.165651321411133, "rewards/rejected": -21.446908950805664, "step": 10840 }, { "epoch": 2.11, "learning_rate": 1.6549219817358166e-07, "logits/chosen": -2.739020824432373, "logits/rejected": -2.6592862606048584, "logps/chosen": -332.251953125, "logps/rejected": -469.31219482421875, "loss": 0.098, "rewards/accuracies": 1.0, "rewards/chosen": -6.956437587738037, "rewards/margins": 12.968274116516113, "rewards/rejected": -19.924711227416992, "step": 10850 }, { "epoch": 2.11, "learning_rate": 1.6513266700222908e-07, "logits/chosen": -2.4472410678863525, "logits/rejected": -2.449275255203247, "logps/chosen": -301.61260986328125, "logps/rejected": -461.7283630371094, "loss": 0.086, "rewards/accuracies": 1.0, "rewards/chosen": -6.039148330688477, "rewards/margins": 14.28356647491455, "rewards/rejected": -20.32271385192871, "step": 10860 }, { "epoch": 2.11, "learning_rate": 1.6477313583087654e-07, "logits/chosen": -2.6414430141448975, "logits/rejected": -2.567702054977417, "logps/chosen": -313.7510070800781, "logps/rejected": -385.27813720703125, "loss": 0.0872, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -5.331001281738281, "rewards/margins": 8.52501106262207, "rewards/rejected": -13.856013298034668, "step": 10870 }, { "epoch": 2.11, "learning_rate": 1.6441360465952397e-07, "logits/chosen": -2.6752846240997314, "logits/rejected": -2.635814666748047, "logps/chosen": -284.8486633300781, "logps/rejected": -546.738037109375, "loss": 0.071, "rewards/accuracies": 1.0, "rewards/chosen": -1.9140230417251587, "rewards/margins": 19.600696563720703, "rewards/rejected": -21.514720916748047, "step": 10880 }, { "epoch": 2.11, "learning_rate": 1.6405407348817142e-07, "logits/chosen": -2.592179536819458, "logits/rejected": -2.7527787685394287, "logps/chosen": -332.42327880859375, "logps/rejected": -542.1893310546875, "loss": 0.0717, "rewards/accuracies": 1.0, "rewards/chosen": -6.809802055358887, "rewards/margins": 17.367122650146484, "rewards/rejected": -24.176923751831055, "step": 10890 }, { "epoch": 2.12, "learning_rate": 1.6369454231681887e-07, "logits/chosen": -2.4406590461730957, "logits/rejected": -2.5358266830444336, "logps/chosen": -213.36709594726562, "logps/rejected": -317.38775634765625, "loss": 0.0599, "rewards/accuracies": 1.0, "rewards/chosen": -2.6232361793518066, "rewards/margins": 13.348731994628906, "rewards/rejected": -15.971966743469238, "step": 10900 }, { "epoch": 2.12, "eval_logits/chosen": -2.5247130393981934, "eval_logits/rejected": -2.4991767406463623, "eval_logps/chosen": -313.14306640625, "eval_logps/rejected": -359.2173156738281, "eval_loss": 0.614883542060852, "eval_rewards/accuracies": 0.7024999856948853, "eval_rewards/chosen": -11.890498161315918, "eval_rewards/margins": 6.839634895324707, "eval_rewards/rejected": -18.730134963989258, "eval_runtime": 140.4936, "eval_samples_per_second": 22.464, "eval_steps_per_second": 0.356, "step": 10900 }, { "epoch": 2.12, "learning_rate": 1.633350111454663e-07, "logits/chosen": -2.639094829559326, "logits/rejected": -2.6810290813446045, "logps/chosen": -278.84307861328125, "logps/rejected": -461.92364501953125, "loss": 0.0427, "rewards/accuracies": 1.0, "rewards/chosen": -8.86960220336914, "rewards/margins": 13.029779434204102, "rewards/rejected": -21.899381637573242, "step": 10910 }, { "epoch": 2.12, "learning_rate": 1.6297547997411376e-07, "logits/chosen": -2.2820096015930176, "logits/rejected": -2.2630152702331543, "logps/chosen": -302.6440734863281, "logps/rejected": -329.61541748046875, "loss": 0.0937, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -6.195849418640137, "rewards/margins": 10.124932289123535, "rewards/rejected": -16.320781707763672, "step": 10920 }, { "epoch": 2.12, "learning_rate": 1.6261594880276118e-07, "logits/chosen": -2.647876501083374, "logits/rejected": -2.4846560955047607, "logps/chosen": -274.51971435546875, "logps/rejected": -401.75225830078125, "loss": 0.0565, "rewards/accuracies": 1.0, "rewards/chosen": -10.075141906738281, "rewards/margins": 15.784754753112793, "rewards/rejected": -25.85989761352539, "step": 10930 }, { "epoch": 2.12, "learning_rate": 1.6225641763140864e-07, "logits/chosen": -2.8466548919677734, "logits/rejected": -2.6956911087036133, "logps/chosen": -332.37286376953125, "logps/rejected": -427.3744201660156, "loss": 0.0622, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -6.177321910858154, "rewards/margins": 15.13109016418457, "rewards/rejected": -21.308412551879883, "step": 10940 }, { "epoch": 2.13, "learning_rate": 1.618968864600561e-07, "logits/chosen": -2.668459415435791, "logits/rejected": -2.5558042526245117, "logps/chosen": -268.5563049316406, "logps/rejected": -342.63177490234375, "loss": 0.0955, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -9.891250610351562, "rewards/margins": 10.969114303588867, "rewards/rejected": -20.86036491394043, "step": 10950 }, { "epoch": 2.13, "learning_rate": 1.6153735528870352e-07, "logits/chosen": -2.714474678039551, "logits/rejected": -2.6775665283203125, "logps/chosen": -343.44305419921875, "logps/rejected": -320.18634033203125, "loss": 0.0629, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -5.7504987716674805, "rewards/margins": 10.8442964553833, "rewards/rejected": -16.59479522705078, "step": 10960 }, { "epoch": 2.13, "learning_rate": 1.6117782411735097e-07, "logits/chosen": -2.4388394355773926, "logits/rejected": -2.5422651767730713, "logps/chosen": -258.8954162597656, "logps/rejected": -359.7748718261719, "loss": 0.0807, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -9.019707679748535, "rewards/margins": 7.9827775955200195, "rewards/rejected": -17.002483367919922, "step": 10970 }, { "epoch": 2.13, "learning_rate": 1.608182929459984e-07, "logits/chosen": -2.6410956382751465, "logits/rejected": -2.683004856109619, "logps/chosen": -475.34954833984375, "logps/rejected": -489.85003662109375, "loss": 0.0584, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.680830478668213, "rewards/margins": 15.518026351928711, "rewards/rejected": -18.198856353759766, "step": 10980 }, { "epoch": 2.13, "learning_rate": 1.6045876177464586e-07, "logits/chosen": -2.6654322147369385, "logits/rejected": -2.5960605144500732, "logps/chosen": -278.5632019042969, "logps/rejected": -429.90130615234375, "loss": 0.0675, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -6.198668956756592, "rewards/margins": 12.306130409240723, "rewards/rejected": -18.50480079650879, "step": 10990 }, { "epoch": 2.14, "learning_rate": 1.600992306032933e-07, "logits/chosen": -2.6287841796875, "logits/rejected": -2.690498113632202, "logps/chosen": -323.66571044921875, "logps/rejected": -383.86419677734375, "loss": 0.0518, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -6.227775573730469, "rewards/margins": 13.197230339050293, "rewards/rejected": -19.425006866455078, "step": 11000 }, { "epoch": 2.14, "eval_logits/chosen": -2.5589678287506104, "eval_logits/rejected": -2.5353012084960938, "eval_logps/chosen": -313.0390930175781, "eval_logps/rejected": -360.3355712890625, "eval_loss": 0.6385772824287415, "eval_rewards/accuracies": 0.7049999833106995, "eval_rewards/chosen": -11.880102157592773, "eval_rewards/margins": 6.9618611335754395, "eval_rewards/rejected": -18.841962814331055, "eval_runtime": 156.4122, "eval_samples_per_second": 20.177, "eval_steps_per_second": 0.32, "step": 11000 }, { "epoch": 2.14, "learning_rate": 1.5973969943194074e-07, "logits/chosen": -2.667440891265869, "logits/rejected": -2.384535312652588, "logps/chosen": -244.1143341064453, "logps/rejected": -304.1304626464844, "loss": 0.0741, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -5.142891883850098, "rewards/margins": 10.961563110351562, "rewards/rejected": -16.104454040527344, "step": 11010 }, { "epoch": 2.14, "learning_rate": 1.593801682605882e-07, "logits/chosen": -2.6622557640075684, "logits/rejected": -2.608793258666992, "logps/chosen": -297.91717529296875, "logps/rejected": -365.6161804199219, "loss": 0.0821, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -6.57074499130249, "rewards/margins": 13.855226516723633, "rewards/rejected": -20.42597007751465, "step": 11020 }, { "epoch": 2.14, "learning_rate": 1.5902063708923562e-07, "logits/chosen": -2.718367338180542, "logits/rejected": -2.597449779510498, "logps/chosen": -279.3199768066406, "logps/rejected": -388.0484313964844, "loss": 0.0788, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -10.699678421020508, "rewards/margins": 11.566597938537598, "rewards/rejected": -22.266277313232422, "step": 11030 }, { "epoch": 2.14, "learning_rate": 1.5866110591788308e-07, "logits/chosen": -2.8459181785583496, "logits/rejected": -2.811908006668091, "logps/chosen": -329.0658874511719, "logps/rejected": -457.80810546875, "loss": 0.0624, "rewards/accuracies": 1.0, "rewards/chosen": -2.6768605709075928, "rewards/margins": 14.439372062683105, "rewards/rejected": -17.11623191833496, "step": 11040 }, { "epoch": 2.15, "learning_rate": 1.5830157474653053e-07, "logits/chosen": -2.5981099605560303, "logits/rejected": -2.575413703918457, "logps/chosen": -371.9594421386719, "logps/rejected": -435.58349609375, "loss": 0.0739, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -9.415132522583008, "rewards/margins": 11.580273628234863, "rewards/rejected": -20.99540901184082, "step": 11050 }, { "epoch": 2.15, "learning_rate": 1.5794204357517796e-07, "logits/chosen": -2.4940452575683594, "logits/rejected": -2.682844638824463, "logps/chosen": -217.81845092773438, "logps/rejected": -368.53680419921875, "loss": 0.0621, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -7.1467790603637695, "rewards/margins": 12.292379379272461, "rewards/rejected": -19.439159393310547, "step": 11060 }, { "epoch": 2.15, "learning_rate": 1.575825124038254e-07, "logits/chosen": -2.670027256011963, "logits/rejected": -2.7122721672058105, "logps/chosen": -258.63818359375, "logps/rejected": -381.0811462402344, "loss": 0.0697, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -4.69087028503418, "rewards/margins": 11.159737586975098, "rewards/rejected": -15.850606918334961, "step": 11070 }, { "epoch": 2.15, "learning_rate": 1.5722298123247284e-07, "logits/chosen": -2.6194968223571777, "logits/rejected": -2.646165370941162, "logps/chosen": -317.9933776855469, "logps/rejected": -354.77020263671875, "loss": 0.0882, "rewards/accuracies": 0.75, "rewards/chosen": -8.147225379943848, "rewards/margins": 8.7410306930542, "rewards/rejected": -16.88825798034668, "step": 11080 }, { "epoch": 2.15, "learning_rate": 1.568634500611203e-07, "logits/chosen": -2.602839946746826, "logits/rejected": -2.6435656547546387, "logps/chosen": -314.6427001953125, "logps/rejected": -511.84307861328125, "loss": 0.0947, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -6.209485054016113, "rewards/margins": 16.623762130737305, "rewards/rejected": -22.83324432373047, "step": 11090 }, { "epoch": 2.15, "learning_rate": 1.5650391888976775e-07, "logits/chosen": -2.8791985511779785, "logits/rejected": -2.71051025390625, "logps/chosen": -263.18023681640625, "logps/rejected": -334.80242919921875, "loss": 0.0668, "rewards/accuracies": 1.0, "rewards/chosen": -2.914602041244507, "rewards/margins": 10.570642471313477, "rewards/rejected": -13.485244750976562, "step": 11100 }, { "epoch": 2.15, "eval_logits/chosen": -2.534025192260742, "eval_logits/rejected": -2.5089762210845947, "eval_logps/chosen": -311.0261535644531, "eval_logps/rejected": -360.55535888671875, "eval_loss": 0.6274450421333313, "eval_rewards/accuracies": 0.699999988079071, "eval_rewards/chosen": -11.678807258605957, "eval_rewards/margins": 7.185128688812256, "eval_rewards/rejected": -18.863937377929688, "eval_runtime": 154.558, "eval_samples_per_second": 20.42, "eval_steps_per_second": 0.324, "step": 11100 }, { "epoch": 2.16, "learning_rate": 1.5614438771841518e-07, "logits/chosen": -2.553730010986328, "logits/rejected": -2.4233312606811523, "logps/chosen": -297.3955383300781, "logps/rejected": -408.5569152832031, "loss": 0.0643, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -9.867355346679688, "rewards/margins": 10.925642013549805, "rewards/rejected": -20.792999267578125, "step": 11110 }, { "epoch": 2.16, "learning_rate": 1.557848565470626e-07, "logits/chosen": -2.7405340671539307, "logits/rejected": -2.722381830215454, "logps/chosen": -213.37008666992188, "logps/rejected": -380.01898193359375, "loss": 0.0895, "rewards/accuracies": 1.0, "rewards/chosen": -4.469528675079346, "rewards/margins": 13.943641662597656, "rewards/rejected": -18.413171768188477, "step": 11120 }, { "epoch": 2.16, "learning_rate": 1.5542532537571006e-07, "logits/chosen": -2.604018449783325, "logits/rejected": -2.575108051300049, "logps/chosen": -248.8677978515625, "logps/rejected": -353.578369140625, "loss": 0.1341, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -8.432516098022461, "rewards/margins": 8.367327690124512, "rewards/rejected": -16.79984474182129, "step": 11130 }, { "epoch": 2.16, "learning_rate": 1.550657942043575e-07, "logits/chosen": -2.7750678062438965, "logits/rejected": -2.6267189979553223, "logps/chosen": -290.4456787109375, "logps/rejected": -415.94647216796875, "loss": 0.0588, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -7.543251991271973, "rewards/margins": 12.352669715881348, "rewards/rejected": -19.895919799804688, "step": 11140 }, { "epoch": 2.16, "learning_rate": 1.5470626303300497e-07, "logits/chosen": -2.464034080505371, "logits/rejected": -2.460228681564331, "logps/chosen": -296.686279296875, "logps/rejected": -447.08184814453125, "loss": 0.0575, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -9.144915580749512, "rewards/margins": 15.27160358428955, "rewards/rejected": -24.416519165039062, "step": 11150 }, { "epoch": 2.17, "learning_rate": 1.543467318616524e-07, "logits/chosen": -2.453579902648926, "logits/rejected": -2.556788682937622, "logps/chosen": -257.7510986328125, "logps/rejected": -426.12823486328125, "loss": 0.1015, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -9.701089859008789, "rewards/margins": 14.420515060424805, "rewards/rejected": -24.121604919433594, "step": 11160 }, { "epoch": 2.17, "learning_rate": 1.5398720069029982e-07, "logits/chosen": -2.607083797454834, "logits/rejected": -2.6189160346984863, "logps/chosen": -361.08050537109375, "logps/rejected": -591.3878173828125, "loss": 0.063, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -9.01377010345459, "rewards/margins": 13.404559135437012, "rewards/rejected": -22.418331146240234, "step": 11170 }, { "epoch": 2.17, "learning_rate": 1.5362766951894728e-07, "logits/chosen": -2.4953904151916504, "logits/rejected": -2.5536980628967285, "logps/chosen": -264.725830078125, "logps/rejected": -472.8086853027344, "loss": 0.092, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -15.30578899383545, "rewards/margins": 11.997278213500977, "rewards/rejected": -27.303064346313477, "step": 11180 }, { "epoch": 2.17, "learning_rate": 1.5326813834759473e-07, "logits/chosen": -2.753215789794922, "logits/rejected": -2.685314893722534, "logps/chosen": -293.8022155761719, "logps/rejected": -406.1915588378906, "loss": 0.0626, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.0069998502731323, "rewards/margins": 14.971635818481445, "rewards/rejected": -15.978635787963867, "step": 11190 }, { "epoch": 2.17, "learning_rate": 1.5290860717624219e-07, "logits/chosen": -2.5073513984680176, "logits/rejected": -2.4092020988464355, "logps/chosen": -179.8335723876953, "logps/rejected": -279.8216247558594, "loss": 0.1038, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -8.35915756225586, "rewards/margins": 10.612787246704102, "rewards/rejected": -18.97194480895996, "step": 11200 }, { "epoch": 2.17, "eval_logits/chosen": -2.5274198055267334, "eval_logits/rejected": -2.5016088485717773, "eval_logps/chosen": -311.4629211425781, "eval_logps/rejected": -362.78240966796875, "eval_loss": 0.6328377723693848, "eval_rewards/accuracies": 0.6974999904632568, "eval_rewards/chosen": -11.722484588623047, "eval_rewards/margins": 7.364159107208252, "eval_rewards/rejected": -19.086645126342773, "eval_runtime": 139.807, "eval_samples_per_second": 22.574, "eval_steps_per_second": 0.358, "step": 11200 }, { "epoch": 2.18, "learning_rate": 1.525490760048896e-07, "logits/chosen": -2.626120090484619, "logits/rejected": -2.6258702278137207, "logps/chosen": -225.55581665039062, "logps/rejected": -447.088134765625, "loss": 0.0403, "rewards/accuracies": 1.0, "rewards/chosen": -3.508953094482422, "rewards/margins": 16.111501693725586, "rewards/rejected": -19.620454788208008, "step": 11210 }, { "epoch": 2.18, "learning_rate": 1.5218954483353704e-07, "logits/chosen": -2.7070865631103516, "logits/rejected": -2.6965084075927734, "logps/chosen": -276.8108825683594, "logps/rejected": -474.0812072753906, "loss": 0.0579, "rewards/accuracies": 1.0, "rewards/chosen": -1.2777347564697266, "rewards/margins": 17.953739166259766, "rewards/rejected": -19.231473922729492, "step": 11220 }, { "epoch": 2.18, "learning_rate": 1.5183001366218452e-07, "logits/chosen": -2.6148335933685303, "logits/rejected": -2.7021570205688477, "logps/chosen": -232.45120239257812, "logps/rejected": -314.4829406738281, "loss": 0.0619, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.771601676940918, "rewards/margins": 10.580767631530762, "rewards/rejected": -13.35236930847168, "step": 11230 }, { "epoch": 2.18, "learning_rate": 1.5147048249083195e-07, "logits/chosen": -2.48647403717041, "logits/rejected": -2.547441005706787, "logps/chosen": -325.8612365722656, "logps/rejected": -394.05877685546875, "loss": 0.0582, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -13.092509269714355, "rewards/margins": 10.890533447265625, "rewards/rejected": -23.983041763305664, "step": 11240 }, { "epoch": 2.18, "learning_rate": 1.511109513194794e-07, "logits/chosen": -2.523958206176758, "logits/rejected": -2.5374932289123535, "logps/chosen": -317.35626220703125, "logps/rejected": -418.19427490234375, "loss": 0.0474, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -7.8556671142578125, "rewards/margins": 11.438713073730469, "rewards/rejected": -19.29437828063965, "step": 11250 }, { "epoch": 2.19, "learning_rate": 1.5075142014812683e-07, "logits/chosen": -2.656561851501465, "logits/rejected": -2.5346570014953613, "logps/chosen": -212.903076171875, "logps/rejected": -320.86029052734375, "loss": 0.073, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -4.8994927406311035, "rewards/margins": 12.7178373336792, "rewards/rejected": -17.617328643798828, "step": 11260 }, { "epoch": 2.19, "learning_rate": 1.5039188897677426e-07, "logits/chosen": -2.640470027923584, "logits/rejected": -2.475165843963623, "logps/chosen": -318.3912658691406, "logps/rejected": -448.56268310546875, "loss": 0.0763, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -6.485833168029785, "rewards/margins": 11.998087882995605, "rewards/rejected": -18.48392105102539, "step": 11270 }, { "epoch": 2.19, "learning_rate": 1.5003235780542174e-07, "logits/chosen": -2.808537006378174, "logits/rejected": -2.6735212802886963, "logps/chosen": -324.00677490234375, "logps/rejected": -353.6200256347656, "loss": 0.0868, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -3.3140597343444824, "rewards/margins": 11.747946739196777, "rewards/rejected": -15.06200885772705, "step": 11280 }, { "epoch": 2.19, "learning_rate": 1.4967282663406917e-07, "logits/chosen": -2.7259631156921387, "logits/rejected": -2.7184345722198486, "logps/chosen": -307.0538024902344, "logps/rejected": -416.4231872558594, "loss": 0.0713, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -3.9838473796844482, "rewards/margins": 12.451475143432617, "rewards/rejected": -16.435321807861328, "step": 11290 }, { "epoch": 2.19, "learning_rate": 1.4931329546271662e-07, "logits/chosen": -2.4986824989318848, "logits/rejected": -2.492968797683716, "logps/chosen": -327.5531921386719, "logps/rejected": -383.15606689453125, "loss": 0.0684, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -6.82669734954834, "rewards/margins": 13.673391342163086, "rewards/rejected": -20.50008773803711, "step": 11300 }, { "epoch": 2.19, "eval_logits/chosen": -2.5490097999572754, "eval_logits/rejected": -2.528714418411255, "eval_logps/chosen": -305.3045959472656, "eval_logps/rejected": -352.1844482421875, "eval_loss": 0.6159024238586426, "eval_rewards/accuracies": 0.699999988079071, "eval_rewards/chosen": -11.106654167175293, "eval_rewards/margins": 6.920196533203125, "eval_rewards/rejected": -18.0268497467041, "eval_runtime": 140.5121, "eval_samples_per_second": 22.461, "eval_steps_per_second": 0.356, "step": 11300 }, { "epoch": 2.2, "learning_rate": 1.4895376429136405e-07, "logits/chosen": -2.5363929271698, "logits/rejected": -2.599428415298462, "logps/chosen": -284.7268981933594, "logps/rejected": -372.39935302734375, "loss": 0.0858, "rewards/accuracies": 0.75, "rewards/chosen": -11.038068771362305, "rewards/margins": 9.73109245300293, "rewards/rejected": -20.769161224365234, "step": 11310 }, { "epoch": 2.2, "learning_rate": 1.4859423312001148e-07, "logits/chosen": -2.5442938804626465, "logits/rejected": -2.484290599822998, "logps/chosen": -228.50784301757812, "logps/rejected": -322.25811767578125, "loss": 0.0613, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -3.360391616821289, "rewards/margins": 13.330389022827148, "rewards/rejected": -16.69078254699707, "step": 11320 }, { "epoch": 2.2, "learning_rate": 1.4823470194865896e-07, "logits/chosen": -2.5713067054748535, "logits/rejected": -2.541801691055298, "logps/chosen": -290.6044921875, "logps/rejected": -360.5171813964844, "loss": 0.0716, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -7.318646430969238, "rewards/margins": 10.339444160461426, "rewards/rejected": -17.658090591430664, "step": 11330 }, { "epoch": 2.2, "learning_rate": 1.4787517077730639e-07, "logits/chosen": -2.5785257816314697, "logits/rejected": -2.57765531539917, "logps/chosen": -262.76776123046875, "logps/rejected": -473.09429931640625, "loss": 0.1116, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -9.607316970825195, "rewards/margins": 17.369888305664062, "rewards/rejected": -26.977203369140625, "step": 11340 }, { "epoch": 2.2, "learning_rate": 1.4751563960595384e-07, "logits/chosen": -2.5037636756896973, "logits/rejected": -2.5612921714782715, "logps/chosen": -239.2677764892578, "logps/rejected": -393.05511474609375, "loss": 0.081, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -9.62736988067627, "rewards/margins": 11.542470932006836, "rewards/rejected": -21.169841766357422, "step": 11350 }, { "epoch": 2.21, "learning_rate": 1.4715610843460127e-07, "logits/chosen": -2.627143383026123, "logits/rejected": -2.6791281700134277, "logps/chosen": -336.3082275390625, "logps/rejected": -490.94036865234375, "loss": 0.0631, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -7.407607078552246, "rewards/margins": 11.873138427734375, "rewards/rejected": -19.280746459960938, "step": 11360 }, { "epoch": 2.21, "learning_rate": 1.467965772632487e-07, "logits/chosen": -2.705996036529541, "logits/rejected": -2.693732738494873, "logps/chosen": -287.5931091308594, "logps/rejected": -437.357177734375, "loss": 0.0525, "rewards/accuracies": 1.0, "rewards/chosen": -6.194832801818848, "rewards/margins": 11.717456817626953, "rewards/rejected": -17.912288665771484, "step": 11370 }, { "epoch": 2.21, "learning_rate": 1.4643704609189618e-07, "logits/chosen": -2.4713706970214844, "logits/rejected": -2.4754528999328613, "logps/chosen": -204.80233764648438, "logps/rejected": -354.646240234375, "loss": 0.0896, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -9.231915473937988, "rewards/margins": 10.809212684631348, "rewards/rejected": -20.041126251220703, "step": 11380 }, { "epoch": 2.21, "learning_rate": 1.460775149205436e-07, "logits/chosen": -2.6862220764160156, "logits/rejected": -2.713573932647705, "logps/chosen": -274.80499267578125, "logps/rejected": -307.76190185546875, "loss": 0.0738, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -8.292464256286621, "rewards/margins": 8.338617324829102, "rewards/rejected": -16.631084442138672, "step": 11390 }, { "epoch": 2.21, "learning_rate": 1.4571798374919106e-07, "logits/chosen": -2.600187301635742, "logits/rejected": -2.5780961513519287, "logps/chosen": -236.43203735351562, "logps/rejected": -311.9661560058594, "loss": 0.1067, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -7.995016574859619, "rewards/margins": 9.66476058959961, "rewards/rejected": -17.65977668762207, "step": 11400 }, { "epoch": 2.21, "eval_logits/chosen": -2.597402572631836, "eval_logits/rejected": -2.5786943435668945, "eval_logps/chosen": -296.12762451171875, "eval_logps/rejected": -338.4790344238281, "eval_loss": 0.6008053421974182, "eval_rewards/accuracies": 0.6974999904632568, "eval_rewards/chosen": -10.188957214355469, "eval_rewards/margins": 6.467350006103516, "eval_rewards/rejected": -16.65630531311035, "eval_runtime": 153.6646, "eval_samples_per_second": 20.538, "eval_steps_per_second": 0.325, "step": 11400 }, { "epoch": 2.22, "learning_rate": 1.453584525778385e-07, "logits/chosen": -2.750739812850952, "logits/rejected": -2.723726511001587, "logps/chosen": -234.849853515625, "logps/rejected": -380.14990234375, "loss": 0.0701, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -8.196296691894531, "rewards/margins": 10.99039363861084, "rewards/rejected": -19.186689376831055, "step": 11410 }, { "epoch": 2.22, "learning_rate": 1.4499892140648591e-07, "logits/chosen": -2.583066701889038, "logits/rejected": -2.6566250324249268, "logps/chosen": -310.9710388183594, "logps/rejected": -354.7814025878906, "loss": 0.0621, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -6.802247524261475, "rewards/margins": 13.766270637512207, "rewards/rejected": -20.568519592285156, "step": 11420 }, { "epoch": 2.22, "learning_rate": 1.446393902351334e-07, "logits/chosen": -2.6055569648742676, "logits/rejected": -2.6457715034484863, "logps/chosen": -286.761962890625, "logps/rejected": -376.4789123535156, "loss": 0.0615, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -7.760054111480713, "rewards/margins": 11.66706371307373, "rewards/rejected": -19.4271183013916, "step": 11430 }, { "epoch": 2.22, "learning_rate": 1.4427985906378082e-07, "logits/chosen": -2.3500683307647705, "logits/rejected": -2.5180647373199463, "logps/chosen": -298.7884216308594, "logps/rejected": -370.7210388183594, "loss": 0.1296, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -5.273965835571289, "rewards/margins": 13.301396369934082, "rewards/rejected": -18.575363159179688, "step": 11440 }, { "epoch": 2.22, "learning_rate": 1.4392032789242828e-07, "logits/chosen": -2.5943427085876465, "logits/rejected": -2.5318379402160645, "logps/chosen": -165.84658813476562, "logps/rejected": -344.3070373535156, "loss": 0.0928, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -5.968050003051758, "rewards/margins": 11.761285781860352, "rewards/rejected": -17.72933578491211, "step": 11450 }, { "epoch": 2.22, "learning_rate": 1.435607967210757e-07, "logits/chosen": -2.745082378387451, "logits/rejected": -2.668109178543091, "logps/chosen": -312.0365905761719, "logps/rejected": -352.36077880859375, "loss": 0.1067, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -8.082812309265137, "rewards/margins": 9.763988494873047, "rewards/rejected": -17.8468017578125, "step": 11460 }, { "epoch": 2.23, "learning_rate": 1.4320126554972313e-07, "logits/chosen": -2.496685028076172, "logits/rejected": -2.6075363159179688, "logps/chosen": -258.70440673828125, "logps/rejected": -411.5464782714844, "loss": 0.0474, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -6.412840843200684, "rewards/margins": 13.252779960632324, "rewards/rejected": -19.665620803833008, "step": 11470 }, { "epoch": 2.23, "learning_rate": 1.4284173437837061e-07, "logits/chosen": -2.6697323322296143, "logits/rejected": -2.569648504257202, "logps/chosen": -273.3839111328125, "logps/rejected": -304.33648681640625, "loss": 0.0733, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -5.097462177276611, "rewards/margins": 8.689809799194336, "rewards/rejected": -13.787272453308105, "step": 11480 }, { "epoch": 2.23, "learning_rate": 1.4248220320701804e-07, "logits/chosen": -2.6180739402770996, "logits/rejected": -2.5678303241729736, "logps/chosen": -257.52386474609375, "logps/rejected": -337.18829345703125, "loss": 0.0823, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -4.232558250427246, "rewards/margins": 12.40059757232666, "rewards/rejected": -16.633155822753906, "step": 11490 }, { "epoch": 2.23, "learning_rate": 1.421226720356655e-07, "logits/chosen": -2.77288818359375, "logits/rejected": -2.778804063796997, "logps/chosen": -344.33233642578125, "logps/rejected": -400.1312561035156, "loss": 0.076, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -3.488837480545044, "rewards/margins": 13.25555419921875, "rewards/rejected": -16.74439239501953, "step": 11500 }, { "epoch": 2.23, "eval_logits/chosen": -2.581376314163208, "eval_logits/rejected": -2.564934015274048, "eval_logps/chosen": -286.0017395019531, "eval_logps/rejected": -327.9375305175781, "eval_loss": 0.6069397926330566, "eval_rewards/accuracies": 0.7024999856948853, "eval_rewards/chosen": -9.176368713378906, "eval_rewards/margins": 6.425786972045898, "eval_rewards/rejected": -15.602155685424805, "eval_runtime": 141.032, "eval_samples_per_second": 22.378, "eval_steps_per_second": 0.355, "step": 11500 }, { "epoch": 2.23, "learning_rate": 1.4176314086431292e-07, "logits/chosen": -2.5592751502990723, "logits/rejected": -2.426851749420166, "logps/chosen": -259.74176025390625, "logps/rejected": -386.64373779296875, "loss": 0.076, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -4.105155944824219, "rewards/margins": 12.326289176940918, "rewards/rejected": -16.431446075439453, "step": 11510 }, { "epoch": 2.24, "learning_rate": 1.4140360969296035e-07, "logits/chosen": -2.7349648475646973, "logits/rejected": -2.6921682357788086, "logps/chosen": -247.73861694335938, "logps/rejected": -351.2485046386719, "loss": 0.0761, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -3.348642349243164, "rewards/margins": 14.42656421661377, "rewards/rejected": -17.775205612182617, "step": 11520 }, { "epoch": 2.24, "learning_rate": 1.4104407852160783e-07, "logits/chosen": -2.6718761920928955, "logits/rejected": -2.6378886699676514, "logps/chosen": -203.54443359375, "logps/rejected": -397.050048828125, "loss": 0.0525, "rewards/accuracies": 1.0, "rewards/chosen": -4.050256252288818, "rewards/margins": 15.7075834274292, "rewards/rejected": -19.75783920288086, "step": 11530 }, { "epoch": 2.24, "learning_rate": 1.4068454735025526e-07, "logits/chosen": -2.718578338623047, "logits/rejected": -2.7763664722442627, "logps/chosen": -348.1041259765625, "logps/rejected": -305.02215576171875, "loss": 0.0627, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -4.355883598327637, "rewards/margins": 7.554298400878906, "rewards/rejected": -11.910181999206543, "step": 11540 }, { "epoch": 2.24, "learning_rate": 1.4032501617890271e-07, "logits/chosen": -2.6585307121276855, "logits/rejected": -2.7362751960754395, "logps/chosen": -245.41000366210938, "logps/rejected": -302.5990295410156, "loss": 0.0722, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -3.522536516189575, "rewards/margins": 9.066805839538574, "rewards/rejected": -12.589343070983887, "step": 11550 }, { "epoch": 2.24, "learning_rate": 1.3996548500755014e-07, "logits/chosen": -2.839871644973755, "logits/rejected": -2.7600152492523193, "logps/chosen": -272.6055603027344, "logps/rejected": -296.7951965332031, "loss": 0.0681, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -4.299689292907715, "rewards/margins": 9.61026668548584, "rewards/rejected": -13.909955978393555, "step": 11560 }, { "epoch": 2.25, "learning_rate": 1.3960595383619757e-07, "logits/chosen": -2.465066432952881, "logits/rejected": -2.4901702404022217, "logps/chosen": -203.64425659179688, "logps/rejected": -324.4049987792969, "loss": 0.0766, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -6.24183464050293, "rewards/margins": 13.511833190917969, "rewards/rejected": -19.753665924072266, "step": 11570 }, { "epoch": 2.25, "learning_rate": 1.3924642266484505e-07, "logits/chosen": -2.646562099456787, "logits/rejected": -2.6387124061584473, "logps/chosen": -207.3817596435547, "logps/rejected": -311.0987243652344, "loss": 0.0593, "rewards/accuracies": 1.0, "rewards/chosen": -3.9210293292999268, "rewards/margins": 10.693174362182617, "rewards/rejected": -14.614204406738281, "step": 11580 }, { "epoch": 2.25, "learning_rate": 1.3888689149349248e-07, "logits/chosen": -2.6290736198425293, "logits/rejected": -2.6175715923309326, "logps/chosen": -269.1551818847656, "logps/rejected": -309.23492431640625, "loss": 0.0723, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -7.581655025482178, "rewards/margins": 9.738439559936523, "rewards/rejected": -17.32009506225586, "step": 11590 }, { "epoch": 2.25, "learning_rate": 1.3852736032213993e-07, "logits/chosen": -2.6184613704681396, "logits/rejected": -2.687415599822998, "logps/chosen": -295.8892517089844, "logps/rejected": -422.18292236328125, "loss": 0.0831, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -8.389686584472656, "rewards/margins": 7.802104949951172, "rewards/rejected": -16.191789627075195, "step": 11600 }, { "epoch": 2.25, "eval_logits/chosen": -2.553870677947998, "eval_logits/rejected": -2.5353219509124756, "eval_logps/chosen": -289.2669982910156, "eval_logps/rejected": -333.825439453125, "eval_loss": 0.6081392765045166, "eval_rewards/accuracies": 0.7049999833106995, "eval_rewards/chosen": -9.502893447875977, "eval_rewards/margins": 6.688055515289307, "eval_rewards/rejected": -16.190948486328125, "eval_runtime": 140.1865, "eval_samples_per_second": 22.513, "eval_steps_per_second": 0.357, "step": 11600 }, { "epoch": 2.25, "learning_rate": 1.3816782915078736e-07, "logits/chosen": -2.7735273838043213, "logits/rejected": -2.730475664138794, "logps/chosen": -303.290283203125, "logps/rejected": -428.74853515625, "loss": 0.0674, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -3.033844470977783, "rewards/margins": 16.606611251831055, "rewards/rejected": -19.640457153320312, "step": 11610 }, { "epoch": 2.26, "learning_rate": 1.378082979794348e-07, "logits/chosen": -2.522797107696533, "logits/rejected": -2.4746179580688477, "logps/chosen": -251.85885620117188, "logps/rejected": -412.51373291015625, "loss": 0.0631, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -6.951503753662109, "rewards/margins": 13.889203071594238, "rewards/rejected": -20.84070587158203, "step": 11620 }, { "epoch": 2.26, "learning_rate": 1.3744876680808227e-07, "logits/chosen": -2.443213939666748, "logits/rejected": -2.334507703781128, "logps/chosen": -233.6834716796875, "logps/rejected": -317.0559387207031, "loss": 0.0901, "rewards/accuracies": 0.75, "rewards/chosen": -10.278505325317383, "rewards/margins": 8.525633811950684, "rewards/rejected": -18.80413818359375, "step": 11630 }, { "epoch": 2.26, "learning_rate": 1.370892356367297e-07, "logits/chosen": -2.6557250022888184, "logits/rejected": -2.6149024963378906, "logps/chosen": -268.98187255859375, "logps/rejected": -391.93408203125, "loss": 0.0717, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -4.671351432800293, "rewards/margins": 12.258050918579102, "rewards/rejected": -16.929401397705078, "step": 11640 }, { "epoch": 2.26, "learning_rate": 1.3672970446537715e-07, "logits/chosen": -2.730659246444702, "logits/rejected": -2.815887928009033, "logps/chosen": -343.34759521484375, "logps/rejected": -400.55322265625, "loss": 0.055, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -5.423094749450684, "rewards/margins": 12.474505424499512, "rewards/rejected": -17.897602081298828, "step": 11650 }, { "epoch": 2.26, "learning_rate": 1.3637017329402458e-07, "logits/chosen": -2.6932754516601562, "logits/rejected": -2.586027145385742, "logps/chosen": -314.05908203125, "logps/rejected": -355.5710144042969, "loss": 0.0658, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -9.076383590698242, "rewards/margins": 10.35023307800293, "rewards/rejected": -19.426616668701172, "step": 11660 }, { "epoch": 2.27, "learning_rate": 1.36010642122672e-07, "logits/chosen": -2.618831157684326, "logits/rejected": -2.618896961212158, "logps/chosen": -279.1488952636719, "logps/rejected": -306.9048156738281, "loss": 0.0833, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -7.087740898132324, "rewards/margins": 7.7223076820373535, "rewards/rejected": -14.81004810333252, "step": 11670 }, { "epoch": 2.27, "learning_rate": 1.356511109513195e-07, "logits/chosen": -2.6757123470306396, "logits/rejected": -2.6962084770202637, "logps/chosen": -198.5136260986328, "logps/rejected": -373.1137390136719, "loss": 0.1785, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -6.5840020179748535, "rewards/margins": 12.190035820007324, "rewards/rejected": -18.774036407470703, "step": 11680 }, { "epoch": 2.27, "learning_rate": 1.3529157977996692e-07, "logits/chosen": -2.469268321990967, "logits/rejected": -2.5300540924072266, "logps/chosen": -308.2252502441406, "logps/rejected": -345.7593688964844, "loss": 0.0637, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -5.705275058746338, "rewards/margins": 12.153297424316406, "rewards/rejected": -17.858570098876953, "step": 11690 }, { "epoch": 2.27, "learning_rate": 1.3493204860861437e-07, "logits/chosen": -2.606558084487915, "logits/rejected": -2.5242538452148438, "logps/chosen": -265.8565979003906, "logps/rejected": -314.40380859375, "loss": 0.0767, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -6.574146270751953, "rewards/margins": 13.617681503295898, "rewards/rejected": -20.19182777404785, "step": 11700 }, { "epoch": 2.27, "eval_logits/chosen": -2.5127735137939453, "eval_logits/rejected": -2.491795301437378, "eval_logps/chosen": -292.9400634765625, "eval_logps/rejected": -346.1355895996094, "eval_loss": 0.623223066329956, "eval_rewards/accuracies": 0.7049999833106995, "eval_rewards/chosen": -9.870200157165527, "eval_rewards/margins": 7.551764011383057, "eval_rewards/rejected": -17.421964645385742, "eval_runtime": 140.0904, "eval_samples_per_second": 22.528, "eval_steps_per_second": 0.357, "step": 11700 }, { "epoch": 2.27, "learning_rate": 1.345725174372618e-07, "logits/chosen": -2.685258388519287, "logits/rejected": -2.705810785293579, "logps/chosen": -368.918212890625, "logps/rejected": -513.9830322265625, "loss": 0.0669, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -7.687708377838135, "rewards/margins": 16.229557037353516, "rewards/rejected": -23.917266845703125, "step": 11710 }, { "epoch": 2.28, "learning_rate": 1.3421298626590923e-07, "logits/chosen": -2.593352794647217, "logits/rejected": -2.737347364425659, "logps/chosen": -225.0635223388672, "logps/rejected": -421.9320373535156, "loss": 0.0805, "rewards/accuracies": 1.0, "rewards/chosen": -2.335352659225464, "rewards/margins": 16.914093017578125, "rewards/rejected": -19.24944496154785, "step": 11720 }, { "epoch": 2.28, "learning_rate": 1.338534550945567e-07, "logits/chosen": -2.8126816749572754, "logits/rejected": -2.750828981399536, "logps/chosen": -241.482421875, "logps/rejected": -405.61962890625, "loss": 0.0895, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -3.5754666328430176, "rewards/margins": 11.979042053222656, "rewards/rejected": -15.5545072555542, "step": 11730 }, { "epoch": 2.28, "learning_rate": 1.3349392392320413e-07, "logits/chosen": -2.60490083694458, "logits/rejected": -2.6065385341644287, "logps/chosen": -240.9623565673828, "logps/rejected": -403.73687744140625, "loss": 0.0506, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -3.312406063079834, "rewards/margins": 18.081600189208984, "rewards/rejected": -21.394006729125977, "step": 11740 }, { "epoch": 2.28, "learning_rate": 1.331343927518516e-07, "logits/chosen": -2.4395904541015625, "logits/rejected": -2.447916269302368, "logps/chosen": -260.7299499511719, "logps/rejected": -365.1597595214844, "loss": 0.0712, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -5.795269966125488, "rewards/margins": 15.1010103225708, "rewards/rejected": -20.896282196044922, "step": 11750 }, { "epoch": 2.28, "learning_rate": 1.3277486158049902e-07, "logits/chosen": -2.660329818725586, "logits/rejected": -2.648406505584717, "logps/chosen": -234.0547332763672, "logps/rejected": -327.6827392578125, "loss": 0.0858, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -5.183368682861328, "rewards/margins": 11.445791244506836, "rewards/rejected": -16.629159927368164, "step": 11760 }, { "epoch": 2.28, "learning_rate": 1.3241533040914647e-07, "logits/chosen": -2.544922351837158, "logits/rejected": -2.5972018241882324, "logps/chosen": -269.39300537109375, "logps/rejected": -366.1263122558594, "loss": 0.0885, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -3.4013748168945312, "rewards/margins": 14.740961074829102, "rewards/rejected": -18.142335891723633, "step": 11770 }, { "epoch": 2.29, "learning_rate": 1.3205579923779393e-07, "logits/chosen": -2.3685431480407715, "logits/rejected": -2.4402108192443848, "logps/chosen": -295.20526123046875, "logps/rejected": -302.5821228027344, "loss": 0.0937, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -8.440817832946777, "rewards/margins": 7.621280670166016, "rewards/rejected": -16.062097549438477, "step": 11780 }, { "epoch": 2.29, "learning_rate": 1.3169626806644135e-07, "logits/chosen": -2.5984652042388916, "logits/rejected": -2.599595308303833, "logps/chosen": -178.05007934570312, "logps/rejected": -301.00927734375, "loss": 0.0739, "rewards/accuracies": 1.0, "rewards/chosen": -6.144611358642578, "rewards/margins": 13.900667190551758, "rewards/rejected": -20.045276641845703, "step": 11790 }, { "epoch": 2.29, "learning_rate": 1.3133673689508878e-07, "logits/chosen": -2.5142526626586914, "logits/rejected": -2.565838098526001, "logps/chosen": -313.01629638671875, "logps/rejected": -364.46160888671875, "loss": 0.0637, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -3.528944492340088, "rewards/margins": 13.737627983093262, "rewards/rejected": -17.266571044921875, "step": 11800 }, { "epoch": 2.29, "eval_logits/chosen": -2.510964870452881, "eval_logits/rejected": -2.4900963306427, "eval_logps/chosen": -298.47015380859375, "eval_logps/rejected": -352.2785949707031, "eval_loss": 0.6183001399040222, "eval_rewards/accuracies": 0.699999988079071, "eval_rewards/chosen": -10.423210144042969, "eval_rewards/margins": 7.6130571365356445, "eval_rewards/rejected": -18.03626823425293, "eval_runtime": 139.7402, "eval_samples_per_second": 22.585, "eval_steps_per_second": 0.358, "step": 11800 }, { "epoch": 2.29, "learning_rate": 1.3097720572373624e-07, "logits/chosen": -2.4644076824188232, "logits/rejected": -2.524941921234131, "logps/chosen": -190.19384765625, "logps/rejected": -309.899658203125, "loss": 0.0632, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -3.109036922454834, "rewards/margins": 9.344032287597656, "rewards/rejected": -12.453069686889648, "step": 11810 }, { "epoch": 2.29, "learning_rate": 1.306176745523837e-07, "logits/chosen": -2.4824531078338623, "logits/rejected": -2.489126682281494, "logps/chosen": -302.51727294921875, "logps/rejected": -450.05755615234375, "loss": 0.0803, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -5.096802711486816, "rewards/margins": 17.167470932006836, "rewards/rejected": -22.264272689819336, "step": 11820 }, { "epoch": 2.3, "learning_rate": 1.3025814338103114e-07, "logits/chosen": -2.45460844039917, "logits/rejected": -2.477003335952759, "logps/chosen": -248.20748901367188, "logps/rejected": -355.87957763671875, "loss": 0.0654, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -6.903677463531494, "rewards/margins": 14.553686141967773, "rewards/rejected": -21.45736312866211, "step": 11830 }, { "epoch": 2.3, "learning_rate": 1.2989861220967857e-07, "logits/chosen": -2.7035422325134277, "logits/rejected": -2.580699920654297, "logps/chosen": -373.3416748046875, "logps/rejected": -450.2897033691406, "loss": 0.1008, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -7.678826808929443, "rewards/margins": 14.488286018371582, "rewards/rejected": -22.167110443115234, "step": 11840 }, { "epoch": 2.3, "learning_rate": 1.29539081038326e-07, "logits/chosen": -2.6361374855041504, "logits/rejected": -2.563781261444092, "logps/chosen": -265.8594665527344, "logps/rejected": -346.826171875, "loss": 0.0863, "rewards/accuracies": 1.0, "rewards/chosen": -1.8175954818725586, "rewards/margins": 15.04686450958252, "rewards/rejected": -16.864459991455078, "step": 11850 }, { "epoch": 2.3, "learning_rate": 1.2917954986697345e-07, "logits/chosen": -2.619905948638916, "logits/rejected": -2.5312862396240234, "logps/chosen": -266.5345764160156, "logps/rejected": -360.4387512207031, "loss": 0.0878, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.5418922901153564, "rewards/margins": 13.449142456054688, "rewards/rejected": -15.991033554077148, "step": 11860 }, { "epoch": 2.3, "learning_rate": 1.288200186956209e-07, "logits/chosen": -2.5582549571990967, "logits/rejected": -2.496058225631714, "logps/chosen": -207.1652069091797, "logps/rejected": -320.2586975097656, "loss": 0.0832, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -8.37259578704834, "rewards/margins": 14.15211009979248, "rewards/rejected": -22.52470588684082, "step": 11870 }, { "epoch": 2.31, "learning_rate": 1.2846048752426836e-07, "logits/chosen": -2.6128625869750977, "logits/rejected": -2.5494701862335205, "logps/chosen": -305.84063720703125, "logps/rejected": -368.3662109375, "loss": 0.0913, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -11.47107982635498, "rewards/margins": 10.026995658874512, "rewards/rejected": -21.498075485229492, "step": 11880 }, { "epoch": 2.31, "learning_rate": 1.281009563529158e-07, "logits/chosen": -2.585437297821045, "logits/rejected": -2.546278953552246, "logps/chosen": -284.7617492675781, "logps/rejected": -361.58734130859375, "loss": 0.0763, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -8.904560089111328, "rewards/margins": 10.845001220703125, "rewards/rejected": -19.749561309814453, "step": 11890 }, { "epoch": 2.31, "learning_rate": 1.2774142518156322e-07, "logits/chosen": -2.6387503147125244, "logits/rejected": -2.6979053020477295, "logps/chosen": -246.8169403076172, "logps/rejected": -374.6175842285156, "loss": 0.0578, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -4.128369331359863, "rewards/margins": 16.08565330505371, "rewards/rejected": -20.21402359008789, "step": 11900 }, { "epoch": 2.31, "eval_logits/chosen": -2.5245871543884277, "eval_logits/rejected": -2.504455804824829, "eval_logps/chosen": -298.15789794921875, "eval_logps/rejected": -353.755615234375, "eval_loss": 0.6302103400230408, "eval_rewards/accuracies": 0.7099999785423279, "eval_rewards/chosen": -10.391982078552246, "eval_rewards/margins": 7.791986465454102, "eval_rewards/rejected": -18.18396759033203, "eval_runtime": 139.7884, "eval_samples_per_second": 22.577, "eval_steps_per_second": 0.358, "step": 11900 }, { "epoch": 2.31, "learning_rate": 1.2738189401021067e-07, "logits/chosen": -2.545112371444702, "logits/rejected": -2.4530835151672363, "logps/chosen": -244.24270629882812, "logps/rejected": -312.3651123046875, "loss": 0.0595, "rewards/accuracies": 1.0, "rewards/chosen": -4.5539422035217285, "rewards/margins": 10.47828483581543, "rewards/rejected": -15.0322265625, "step": 11910 }, { "epoch": 2.31, "learning_rate": 1.2702236283885813e-07, "logits/chosen": -2.718308210372925, "logits/rejected": -2.6510627269744873, "logps/chosen": -262.9393005371094, "logps/rejected": -426.9742736816406, "loss": 0.0596, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.8453878164291382, "rewards/margins": 14.58856201171875, "rewards/rejected": -16.433950424194336, "step": 11920 }, { "epoch": 2.32, "learning_rate": 1.2666283166750558e-07, "logits/chosen": -2.606099843978882, "logits/rejected": -2.5635433197021484, "logps/chosen": -268.33612060546875, "logps/rejected": -318.01702880859375, "loss": 0.0758, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.4051783084869385, "rewards/margins": 10.541773796081543, "rewards/rejected": -12.946952819824219, "step": 11930 }, { "epoch": 2.32, "learning_rate": 1.26303300496153e-07, "logits/chosen": -2.5500588417053223, "logits/rejected": -2.6418328285217285, "logps/chosen": -286.411376953125, "logps/rejected": -434.02850341796875, "loss": 0.0762, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -4.638510704040527, "rewards/margins": 16.73740577697754, "rewards/rejected": -21.37591552734375, "step": 11940 }, { "epoch": 2.32, "learning_rate": 1.2594376932480044e-07, "logits/chosen": -2.5598676204681396, "logits/rejected": -2.633626699447632, "logps/chosen": -231.1297607421875, "logps/rejected": -374.0964660644531, "loss": 0.0924, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -6.510305881500244, "rewards/margins": 13.23644733428955, "rewards/rejected": -19.746753692626953, "step": 11950 }, { "epoch": 2.32, "learning_rate": 1.255842381534479e-07, "logits/chosen": -2.5221829414367676, "logits/rejected": -2.5997650623321533, "logps/chosen": -315.4999694824219, "logps/rejected": -463.0917053222656, "loss": 0.0806, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -5.256501197814941, "rewards/margins": 15.427995681762695, "rewards/rejected": -20.684497833251953, "step": 11960 }, { "epoch": 2.32, "learning_rate": 1.2522470698209535e-07, "logits/chosen": -2.6015467643737793, "logits/rejected": -2.5260097980499268, "logps/chosen": -329.55712890625, "logps/rejected": -531.4019775390625, "loss": 0.0481, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -6.006913661956787, "rewards/margins": 19.019302368164062, "rewards/rejected": -25.026212692260742, "step": 11970 }, { "epoch": 2.33, "learning_rate": 1.2486517581074277e-07, "logits/chosen": -2.532564163208008, "logits/rejected": -2.626721143722534, "logps/chosen": -209.87423706054688, "logps/rejected": -342.61004638671875, "loss": 0.0791, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -3.9006447792053223, "rewards/margins": 14.513799667358398, "rewards/rejected": -18.414443969726562, "step": 11980 }, { "epoch": 2.33, "learning_rate": 1.2450564463939023e-07, "logits/chosen": -2.7013509273529053, "logits/rejected": -2.5653529167175293, "logps/chosen": -230.11972045898438, "logps/rejected": -313.40081787109375, "loss": 0.0856, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -5.105692386627197, "rewards/margins": 12.026542663574219, "rewards/rejected": -17.13223648071289, "step": 11990 }, { "epoch": 2.33, "learning_rate": 1.2414611346803768e-07, "logits/chosen": -2.6725096702575684, "logits/rejected": -2.6505398750305176, "logps/chosen": -208.33596801757812, "logps/rejected": -361.4418029785156, "loss": 0.0665, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -4.173867225646973, "rewards/margins": 13.721814155578613, "rewards/rejected": -17.895679473876953, "step": 12000 }, { "epoch": 2.33, "eval_logits/chosen": -2.540191888809204, "eval_logits/rejected": -2.520397424697876, "eval_logps/chosen": -297.1541442871094, "eval_logps/rejected": -353.8656311035156, "eval_loss": 0.6309294104576111, "eval_rewards/accuracies": 0.6949999928474426, "eval_rewards/chosen": -10.291607856750488, "eval_rewards/margins": 7.9033589363098145, "eval_rewards/rejected": -18.194965362548828, "eval_runtime": 141.0134, "eval_samples_per_second": 22.381, "eval_steps_per_second": 0.355, "step": 12000 }, { "epoch": 2.33, "learning_rate": 1.237865822966851e-07, "logits/chosen": -2.6309330463409424, "logits/rejected": -2.6086974143981934, "logps/chosen": -336.7874450683594, "logps/rejected": -483.9122619628906, "loss": 0.0937, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -9.86191177368164, "rewards/margins": 12.709223747253418, "rewards/rejected": -22.571136474609375, "step": 12010 }, { "epoch": 2.33, "learning_rate": 1.2342705112533256e-07, "logits/chosen": -2.5626704692840576, "logits/rejected": -2.547719955444336, "logps/chosen": -251.743408203125, "logps/rejected": -344.1765441894531, "loss": 0.0793, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -9.74990177154541, "rewards/margins": 8.43620491027832, "rewards/rejected": -18.18610382080078, "step": 12020 }, { "epoch": 2.34, "learning_rate": 1.2306751995398e-07, "logits/chosen": -2.5711910724639893, "logits/rejected": -2.595757007598877, "logps/chosen": -288.83160400390625, "logps/rejected": -402.626220703125, "loss": 0.0771, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -5.019674301147461, "rewards/margins": 12.471722602844238, "rewards/rejected": -17.491395950317383, "step": 12030 }, { "epoch": 2.34, "learning_rate": 1.2270798878262745e-07, "logits/chosen": -2.5989129543304443, "logits/rejected": -2.6641898155212402, "logps/chosen": -221.2817840576172, "logps/rejected": -544.4244384765625, "loss": 0.0519, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.894484043121338, "rewards/margins": 14.513275146484375, "rewards/rejected": -17.407756805419922, "step": 12040 }, { "epoch": 2.34, "learning_rate": 1.223484576112749e-07, "logits/chosen": -2.598823070526123, "logits/rejected": -2.582695722579956, "logps/chosen": -285.917724609375, "logps/rejected": -420.414794921875, "loss": 0.0775, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -6.9591498374938965, "rewards/margins": 15.590853691101074, "rewards/rejected": -22.550003051757812, "step": 12050 }, { "epoch": 2.34, "learning_rate": 1.2198892643992233e-07, "logits/chosen": -2.6646790504455566, "logits/rejected": -2.7158217430114746, "logps/chosen": -190.109375, "logps/rejected": -303.2792663574219, "loss": 0.0702, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -4.631850719451904, "rewards/margins": 10.676166534423828, "rewards/rejected": -15.308015823364258, "step": 12060 }, { "epoch": 2.34, "learning_rate": 1.2162939526856978e-07, "logits/chosen": -2.636368989944458, "logits/rejected": -2.6788716316223145, "logps/chosen": -238.0105743408203, "logps/rejected": -376.60614013671875, "loss": 0.0641, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -4.4170451164245605, "rewards/margins": 14.714938163757324, "rewards/rejected": -19.13198471069336, "step": 12070 }, { "epoch": 2.35, "learning_rate": 1.212698640972172e-07, "logits/chosen": -2.6684865951538086, "logits/rejected": -2.6374154090881348, "logps/chosen": -287.66229248046875, "logps/rejected": -482.650390625, "loss": 0.0668, "rewards/accuracies": 1.0, "rewards/chosen": -4.367466926574707, "rewards/margins": 22.329139709472656, "rewards/rejected": -26.696605682373047, "step": 12080 }, { "epoch": 2.35, "learning_rate": 1.2091033292586466e-07, "logits/chosen": -2.632810592651367, "logits/rejected": -2.5606772899627686, "logps/chosen": -298.189453125, "logps/rejected": -457.1927795410156, "loss": 0.0705, "rewards/accuracies": 1.0, "rewards/chosen": -2.8998987674713135, "rewards/margins": 13.952972412109375, "rewards/rejected": -16.85287094116211, "step": 12090 }, { "epoch": 2.35, "learning_rate": 1.2055080175451212e-07, "logits/chosen": -2.4314777851104736, "logits/rejected": -2.450103521347046, "logps/chosen": -237.0631103515625, "logps/rejected": -388.85223388671875, "loss": 0.0854, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -8.274641036987305, "rewards/margins": 10.727151870727539, "rewards/rejected": -19.001792907714844, "step": 12100 }, { "epoch": 2.35, "eval_logits/chosen": -2.5343832969665527, "eval_logits/rejected": -2.5141849517822266, "eval_logps/chosen": -299.864990234375, "eval_logps/rejected": -356.9398193359375, "eval_loss": 0.6348021030426025, "eval_rewards/accuracies": 0.699999988079071, "eval_rewards/chosen": -10.562690734863281, "eval_rewards/margins": 7.939695358276367, "eval_rewards/rejected": -18.50238609313965, "eval_runtime": 140.3482, "eval_samples_per_second": 22.487, "eval_steps_per_second": 0.356, "step": 12100 }, { "epoch": 2.35, "learning_rate": 1.2019127058315955e-07, "logits/chosen": -2.4986445903778076, "logits/rejected": -2.5519309043884277, "logps/chosen": -175.40908813476562, "logps/rejected": -474.209228515625, "loss": 0.0412, "rewards/accuracies": 1.0, "rewards/chosen": -4.2327470779418945, "rewards/margins": 24.410297393798828, "rewards/rejected": -28.64304542541504, "step": 12110 }, { "epoch": 2.35, "learning_rate": 1.19831739411807e-07, "logits/chosen": -2.821687698364258, "logits/rejected": -2.6823883056640625, "logps/chosen": -260.00091552734375, "logps/rejected": -360.76104736328125, "loss": 0.0844, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -5.708765506744385, "rewards/margins": 8.458456039428711, "rewards/rejected": -14.167219161987305, "step": 12120 }, { "epoch": 2.35, "learning_rate": 1.1947220824045443e-07, "logits/chosen": -2.5909080505371094, "logits/rejected": -2.663158893585205, "logps/chosen": -298.30401611328125, "logps/rejected": -355.90716552734375, "loss": 0.0616, "rewards/accuracies": 1.0, "rewards/chosen": -2.249277114868164, "rewards/margins": 12.088628768920898, "rewards/rejected": -14.337905883789062, "step": 12130 }, { "epoch": 2.36, "learning_rate": 1.1911267706910188e-07, "logits/chosen": -2.6094861030578613, "logits/rejected": -2.5060129165649414, "logps/chosen": -200.81222534179688, "logps/rejected": -345.2282409667969, "loss": 0.0578, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -8.438138008117676, "rewards/margins": 9.182662010192871, "rewards/rejected": -17.620800018310547, "step": 12140 }, { "epoch": 2.36, "learning_rate": 1.1875314589774934e-07, "logits/chosen": -2.7512528896331787, "logits/rejected": -2.6919193267822266, "logps/chosen": -224.1593017578125, "logps/rejected": -379.17034912109375, "loss": 0.0461, "rewards/accuracies": 1.0, "rewards/chosen": -3.352349042892456, "rewards/margins": 16.138296127319336, "rewards/rejected": -19.490646362304688, "step": 12150 }, { "epoch": 2.36, "learning_rate": 1.1839361472639678e-07, "logits/chosen": -2.757967472076416, "logits/rejected": -2.635768413543701, "logps/chosen": -303.3056335449219, "logps/rejected": -403.9469909667969, "loss": 0.0661, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -3.0109000205993652, "rewards/margins": 12.864117622375488, "rewards/rejected": -15.875018119812012, "step": 12160 }, { "epoch": 2.36, "learning_rate": 1.180340835550442e-07, "logits/chosen": -2.558183193206787, "logits/rejected": -2.480443000793457, "logps/chosen": -262.8445129394531, "logps/rejected": -395.8421325683594, "loss": 0.0367, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -4.135694980621338, "rewards/margins": 19.493581771850586, "rewards/rejected": -23.629276275634766, "step": 12170 }, { "epoch": 2.36, "learning_rate": 1.1767455238369166e-07, "logits/chosen": -2.7435686588287354, "logits/rejected": -2.6610381603240967, "logps/chosen": -273.6927795410156, "logps/rejected": -358.9440612792969, "loss": 0.0663, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -3.3958563804626465, "rewards/margins": 12.258552551269531, "rewards/rejected": -15.654406547546387, "step": 12180 }, { "epoch": 2.37, "learning_rate": 1.173150212123391e-07, "logits/chosen": -2.722874879837036, "logits/rejected": -2.6344385147094727, "logps/chosen": -342.13018798828125, "logps/rejected": -456.98095703125, "loss": 0.0898, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -5.861723899841309, "rewards/margins": 18.311250686645508, "rewards/rejected": -24.172977447509766, "step": 12190 }, { "epoch": 2.37, "learning_rate": 1.1695549004098656e-07, "logits/chosen": -2.6227803230285645, "logits/rejected": -2.5830583572387695, "logps/chosen": -237.94540405273438, "logps/rejected": -373.0111389160156, "loss": 0.0663, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -4.682139873504639, "rewards/margins": 13.407246589660645, "rewards/rejected": -18.089385986328125, "step": 12200 }, { "epoch": 2.37, "eval_logits/chosen": -2.536633253097534, "eval_logits/rejected": -2.516324996948242, "eval_logps/chosen": -297.79998779296875, "eval_logps/rejected": -354.1291809082031, "eval_loss": 0.6440024375915527, "eval_rewards/accuracies": 0.699999988079071, "eval_rewards/chosen": -10.356196403503418, "eval_rewards/margins": 7.865126132965088, "eval_rewards/rejected": -18.221323013305664, "eval_runtime": 142.2384, "eval_samples_per_second": 22.188, "eval_steps_per_second": 0.352, "step": 12200 }, { "epoch": 2.37, "learning_rate": 1.16595958869634e-07, "logits/chosen": -2.6502878665924072, "logits/rejected": -2.475832462310791, "logps/chosen": -243.1304168701172, "logps/rejected": -377.45263671875, "loss": 0.073, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -6.234163761138916, "rewards/margins": 15.981356620788574, "rewards/rejected": -22.21552085876465, "step": 12210 }, { "epoch": 2.37, "learning_rate": 1.1623642769828142e-07, "logits/chosen": -2.5887460708618164, "logits/rejected": -2.56135630607605, "logps/chosen": -266.46588134765625, "logps/rejected": -392.6053161621094, "loss": 0.1023, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -5.193370819091797, "rewards/margins": 15.058713912963867, "rewards/rejected": -20.252084732055664, "step": 12220 }, { "epoch": 2.37, "learning_rate": 1.1587689652692888e-07, "logits/chosen": -2.6022226810455322, "logits/rejected": -2.5226919651031494, "logps/chosen": -323.2103271484375, "logps/rejected": -482.9850158691406, "loss": 0.0753, "rewards/accuracies": 1.0, "rewards/chosen": -1.8766988515853882, "rewards/margins": 12.250858306884766, "rewards/rejected": -14.127557754516602, "step": 12230 }, { "epoch": 2.38, "learning_rate": 1.1551736535557632e-07, "logits/chosen": -2.6652512550354004, "logits/rejected": -2.617802619934082, "logps/chosen": -241.03564453125, "logps/rejected": -381.97454833984375, "loss": 0.0949, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -6.917860507965088, "rewards/margins": 11.29955005645752, "rewards/rejected": -18.217411041259766, "step": 12240 }, { "epoch": 2.38, "learning_rate": 1.1515783418422377e-07, "logits/chosen": -2.6905980110168457, "logits/rejected": -2.621811628341675, "logps/chosen": -239.3612060546875, "logps/rejected": -287.470703125, "loss": 0.0667, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -4.432193756103516, "rewards/margins": 7.715624809265137, "rewards/rejected": -12.147819519042969, "step": 12250 }, { "epoch": 2.38, "learning_rate": 1.1479830301287122e-07, "logits/chosen": -2.4074113368988037, "logits/rejected": -2.4639010429382324, "logps/chosen": -304.75128173828125, "logps/rejected": -421.524169921875, "loss": 0.0798, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -8.061871528625488, "rewards/margins": 11.476676940917969, "rewards/rejected": -19.53854751586914, "step": 12260 }, { "epoch": 2.38, "learning_rate": 1.1443877184151864e-07, "logits/chosen": -2.6760289669036865, "logits/rejected": -2.6463124752044678, "logps/chosen": -226.9069061279297, "logps/rejected": -440.8207092285156, "loss": 0.0551, "rewards/accuracies": 1.0, "rewards/chosen": -4.603955268859863, "rewards/margins": 19.35154914855957, "rewards/rejected": -23.955503463745117, "step": 12270 }, { "epoch": 2.38, "learning_rate": 1.140792406701661e-07, "logits/chosen": -2.7669763565063477, "logits/rejected": -2.7230679988861084, "logps/chosen": -290.60406494140625, "logps/rejected": -373.95989990234375, "loss": 0.0666, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -4.881918430328369, "rewards/margins": 12.098763465881348, "rewards/rejected": -16.980682373046875, "step": 12280 }, { "epoch": 2.39, "learning_rate": 1.1371970949881354e-07, "logits/chosen": -2.5898349285125732, "logits/rejected": -2.603485107421875, "logps/chosen": -267.1484680175781, "logps/rejected": -360.93963623046875, "loss": 0.0825, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -5.417284965515137, "rewards/margins": 15.212320327758789, "rewards/rejected": -20.62960433959961, "step": 12290 }, { "epoch": 2.39, "learning_rate": 1.1336017832746099e-07, "logits/chosen": -2.433037757873535, "logits/rejected": -2.422377109527588, "logps/chosen": -172.99049377441406, "logps/rejected": -338.6845703125, "loss": 0.0926, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -4.795866012573242, "rewards/margins": 14.42186164855957, "rewards/rejected": -19.217727661132812, "step": 12300 }, { "epoch": 2.39, "eval_logits/chosen": -2.5607492923736572, "eval_logits/rejected": -2.5421431064605713, "eval_logps/chosen": -293.64227294921875, "eval_logps/rejected": -347.0633544921875, "eval_loss": 0.6197232007980347, "eval_rewards/accuracies": 0.7049999833106995, "eval_rewards/chosen": -9.940421104431152, "eval_rewards/margins": 7.574316501617432, "eval_rewards/rejected": -17.514738082885742, "eval_runtime": 140.9172, "eval_samples_per_second": 22.396, "eval_steps_per_second": 0.355, "step": 12300 }, { "epoch": 2.39, "learning_rate": 1.1300064715610843e-07, "logits/chosen": -2.7044150829315186, "logits/rejected": -2.5824317932128906, "logps/chosen": -355.00396728515625, "logps/rejected": -378.8909912109375, "loss": 0.0632, "rewards/accuracies": 1.0, "rewards/chosen": -3.4438202381134033, "rewards/margins": 12.555355072021484, "rewards/rejected": -15.999174118041992, "step": 12310 }, { "epoch": 2.39, "learning_rate": 1.1264111598475588e-07, "logits/chosen": -2.4234459400177, "logits/rejected": -2.404367446899414, "logps/chosen": -347.3136901855469, "logps/rejected": -428.6026916503906, "loss": 0.066, "rewards/accuracies": 1.0, "rewards/chosen": -0.22336021065711975, "rewards/margins": 15.050806045532227, "rewards/rejected": -15.274165153503418, "step": 12320 }, { "epoch": 2.39, "learning_rate": 1.1228158481340332e-07, "logits/chosen": -2.611323833465576, "logits/rejected": -2.692596673965454, "logps/chosen": -208.95602416992188, "logps/rejected": -438.8912048339844, "loss": 0.0586, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -3.6306769847869873, "rewards/margins": 15.02960205078125, "rewards/rejected": -18.660280227661133, "step": 12330 }, { "epoch": 2.4, "learning_rate": 1.1192205364205076e-07, "logits/chosen": -2.738924026489258, "logits/rejected": -2.7660722732543945, "logps/chosen": -268.96685791015625, "logps/rejected": -421.6089782714844, "loss": 0.087, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -5.598333835601807, "rewards/margins": 10.654764175415039, "rewards/rejected": -16.253095626831055, "step": 12340 }, { "epoch": 2.4, "learning_rate": 1.1156252247069821e-07, "logits/chosen": -2.6517112255096436, "logits/rejected": -2.6612112522125244, "logps/chosen": -201.0699462890625, "logps/rejected": -378.2642517089844, "loss": 0.0511, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -3.2537899017333984, "rewards/margins": 12.25096607208252, "rewards/rejected": -15.504755973815918, "step": 12350 }, { "epoch": 2.4, "learning_rate": 1.1120299129934564e-07, "logits/chosen": -2.626768112182617, "logits/rejected": -2.4936606884002686, "logps/chosen": -287.0443420410156, "logps/rejected": -372.3125, "loss": 0.081, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.4231988191604614, "rewards/margins": 12.1668119430542, "rewards/rejected": -13.590011596679688, "step": 12360 }, { "epoch": 2.4, "learning_rate": 1.108434601279931e-07, "logits/chosen": -2.707183361053467, "logits/rejected": -2.6809635162353516, "logps/chosen": -214.8535919189453, "logps/rejected": -313.95745849609375, "loss": 0.1113, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -4.256438255310059, "rewards/margins": 12.393168449401855, "rewards/rejected": -16.649608612060547, "step": 12370 }, { "epoch": 2.4, "learning_rate": 1.1048392895664053e-07, "logits/chosen": -2.639615058898926, "logits/rejected": -2.698387384414673, "logps/chosen": -267.8791198730469, "logps/rejected": -396.8616027832031, "loss": 0.0945, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -6.8844404220581055, "rewards/margins": 13.174560546875, "rewards/rejected": -20.059001922607422, "step": 12380 }, { "epoch": 2.41, "learning_rate": 1.1012439778528798e-07, "logits/chosen": -2.7608120441436768, "logits/rejected": -2.577022075653076, "logps/chosen": -232.0834197998047, "logps/rejected": -307.9110107421875, "loss": 0.0692, "rewards/accuracies": 1.0, "rewards/chosen": -1.9954754114151, "rewards/margins": 10.54742431640625, "rewards/rejected": -12.542899131774902, "step": 12390 }, { "epoch": 2.41, "learning_rate": 1.0976486661393543e-07, "logits/chosen": -2.7052533626556396, "logits/rejected": -2.6483771800994873, "logps/chosen": -286.0614013671875, "logps/rejected": -414.126220703125, "loss": 0.0846, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.2018063068389893, "rewards/margins": 14.756329536437988, "rewards/rejected": -16.958133697509766, "step": 12400 }, { "epoch": 2.41, "eval_logits/chosen": -2.5453546047210693, "eval_logits/rejected": -2.529228687286377, "eval_logps/chosen": -281.3964538574219, "eval_logps/rejected": -323.9549865722656, "eval_loss": 0.6193054914474487, "eval_rewards/accuracies": 0.7074999809265137, "eval_rewards/chosen": -8.715839385986328, "eval_rewards/margins": 6.488065242767334, "eval_rewards/rejected": -15.203904151916504, "eval_runtime": 140.7253, "eval_samples_per_second": 22.427, "eval_steps_per_second": 0.355, "step": 12400 }, { "epoch": 2.41, "learning_rate": 1.0940533544258286e-07, "logits/chosen": -2.471107244491577, "logits/rejected": -2.567960739135742, "logps/chosen": -316.57073974609375, "logps/rejected": -463.3955078125, "loss": 0.0606, "rewards/accuracies": 1.0, "rewards/chosen": -1.0290095806121826, "rewards/margins": 17.88158416748047, "rewards/rejected": -18.910593032836914, "step": 12410 }, { "epoch": 2.41, "learning_rate": 1.0904580427123031e-07, "logits/chosen": -2.6712772846221924, "logits/rejected": -2.6760573387145996, "logps/chosen": -302.7055969238281, "logps/rejected": -340.89556884765625, "loss": 0.0592, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -6.306831359863281, "rewards/margins": 10.018339157104492, "rewards/rejected": -16.32516860961914, "step": 12420 }, { "epoch": 2.41, "learning_rate": 1.0868627309987775e-07, "logits/chosen": -2.5626769065856934, "logits/rejected": -2.4481892585754395, "logps/chosen": -251.5598907470703, "logps/rejected": -308.00799560546875, "loss": 0.0834, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -6.11841344833374, "rewards/margins": 10.771479606628418, "rewards/rejected": -16.889894485473633, "step": 12430 }, { "epoch": 2.42, "learning_rate": 1.083267419285252e-07, "logits/chosen": -2.5774381160736084, "logits/rejected": -2.586848020553589, "logps/chosen": -221.2284698486328, "logps/rejected": -383.3273010253906, "loss": 0.0726, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -6.779860019683838, "rewards/margins": 12.585046768188477, "rewards/rejected": -19.364904403686523, "step": 12440 }, { "epoch": 2.42, "learning_rate": 1.0796721075717265e-07, "logits/chosen": -2.656850814819336, "logits/rejected": -2.5535809993743896, "logps/chosen": -266.3935852050781, "logps/rejected": -386.8028259277344, "loss": 0.0981, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.9002602100372314, "rewards/margins": 12.64848804473877, "rewards/rejected": -15.548748970031738, "step": 12450 }, { "epoch": 2.42, "learning_rate": 1.0760767958582008e-07, "logits/chosen": -2.324298143386841, "logits/rejected": -2.3992927074432373, "logps/chosen": -171.7988739013672, "logps/rejected": -357.5594482421875, "loss": 0.1181, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -6.034149169921875, "rewards/margins": 10.491291046142578, "rewards/rejected": -16.52543830871582, "step": 12460 }, { "epoch": 2.42, "learning_rate": 1.0724814841446753e-07, "logits/chosen": -2.413907766342163, "logits/rejected": -2.435821056365967, "logps/chosen": -185.29042053222656, "logps/rejected": -318.6675720214844, "loss": 0.0706, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -8.206128120422363, "rewards/margins": 10.577844619750977, "rewards/rejected": -18.783971786499023, "step": 12470 }, { "epoch": 2.42, "learning_rate": 1.0688861724311497e-07, "logits/chosen": -2.6212925910949707, "logits/rejected": -2.6373047828674316, "logps/chosen": -195.32415771484375, "logps/rejected": -452.72344970703125, "loss": 0.0658, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -5.244529724121094, "rewards/margins": 14.38170337677002, "rewards/rejected": -19.626232147216797, "step": 12480 }, { "epoch": 2.42, "learning_rate": 1.0652908607176243e-07, "logits/chosen": -2.6920530796051025, "logits/rejected": -2.716745138168335, "logps/chosen": -307.9977111816406, "logps/rejected": -397.4832458496094, "loss": 0.0755, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8004132509231567, "rewards/margins": 15.388232231140137, "rewards/rejected": -16.18864631652832, "step": 12490 }, { "epoch": 2.43, "learning_rate": 1.0616955490040987e-07, "logits/chosen": -2.552727222442627, "logits/rejected": -2.7345712184906006, "logps/chosen": -291.3739929199219, "logps/rejected": -428.37286376953125, "loss": 0.0552, "rewards/accuracies": 1.0, "rewards/chosen": -1.3418686389923096, "rewards/margins": 13.780197143554688, "rewards/rejected": -15.12206745147705, "step": 12500 }, { "epoch": 2.43, "eval_logits/chosen": -2.576340913772583, "eval_logits/rejected": -2.5609962940216064, "eval_logps/chosen": -285.8228759765625, "eval_logps/rejected": -330.5560607910156, "eval_loss": 0.6213422417640686, "eval_rewards/accuracies": 0.7024999856948853, "eval_rewards/chosen": -9.158485412597656, "eval_rewards/margins": 6.705523490905762, "eval_rewards/rejected": -15.864008903503418, "eval_runtime": 155.7999, "eval_samples_per_second": 20.257, "eval_steps_per_second": 0.321, "step": 12500 }, { "epoch": 2.43, "learning_rate": 1.058100237290573e-07, "logits/chosen": -2.412020206451416, "logits/rejected": -2.4402787685394287, "logps/chosen": -280.79742431640625, "logps/rejected": -391.39483642578125, "loss": 0.0629, "rewards/accuracies": 1.0, "rewards/chosen": -2.541536331176758, "rewards/margins": 14.372830390930176, "rewards/rejected": -16.914363861083984, "step": 12510 }, { "epoch": 2.43, "learning_rate": 1.0545049255770475e-07, "logits/chosen": -2.697357416152954, "logits/rejected": -2.5625782012939453, "logps/chosen": -303.3705139160156, "logps/rejected": -471.81488037109375, "loss": 0.0568, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -6.599686622619629, "rewards/margins": 17.236553192138672, "rewards/rejected": -23.836238861083984, "step": 12520 }, { "epoch": 2.43, "learning_rate": 1.0509096138635219e-07, "logits/chosen": -2.3948986530303955, "logits/rejected": -2.5268566608428955, "logps/chosen": -246.1656036376953, "logps/rejected": -372.9352722167969, "loss": 0.0878, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -5.8637285232543945, "rewards/margins": 11.812223434448242, "rewards/rejected": -17.67595100402832, "step": 12530 }, { "epoch": 2.43, "learning_rate": 1.0473143021499964e-07, "logits/chosen": -2.5686511993408203, "logits/rejected": -2.471906900405884, "logps/chosen": -394.0550231933594, "logps/rejected": -486.830322265625, "loss": 0.088, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -6.492640495300293, "rewards/margins": 16.098495483398438, "rewards/rejected": -22.591136932373047, "step": 12540 }, { "epoch": 2.44, "learning_rate": 1.0437189904364709e-07, "logits/chosen": -2.745089530944824, "logits/rejected": -2.6197681427001953, "logps/chosen": -293.5634460449219, "logps/rejected": -372.40191650390625, "loss": 0.0635, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -3.1913766860961914, "rewards/margins": 14.297021865844727, "rewards/rejected": -17.4883975982666, "step": 12550 }, { "epoch": 2.44, "learning_rate": 1.0401236787229451e-07, "logits/chosen": -2.6241023540496826, "logits/rejected": -2.6891422271728516, "logps/chosen": -207.7499237060547, "logps/rejected": -394.9376525878906, "loss": 0.0679, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -4.059928894042969, "rewards/margins": 14.671516418457031, "rewards/rejected": -18.7314453125, "step": 12560 }, { "epoch": 2.44, "learning_rate": 1.0365283670094197e-07, "logits/chosen": -2.658356189727783, "logits/rejected": -2.581386089324951, "logps/chosen": -255.8565216064453, "logps/rejected": -458.71539306640625, "loss": 0.1218, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -8.501692771911621, "rewards/margins": 19.707727432250977, "rewards/rejected": -28.20941734313965, "step": 12570 }, { "epoch": 2.44, "learning_rate": 1.0329330552958941e-07, "logits/chosen": -2.6098690032958984, "logits/rejected": -2.50624942779541, "logps/chosen": -219.2275848388672, "logps/rejected": -326.49737548828125, "loss": 0.076, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -5.19970703125, "rewards/margins": 11.127429008483887, "rewards/rejected": -16.327136993408203, "step": 12580 }, { "epoch": 2.44, "learning_rate": 1.0293377435823686e-07, "logits/chosen": -2.5886106491088867, "logits/rejected": -2.484173536300659, "logps/chosen": -311.59967041015625, "logps/rejected": -574.6200561523438, "loss": 0.0717, "rewards/accuracies": 1.0, "rewards/chosen": -7.177712440490723, "rewards/margins": 16.55941390991211, "rewards/rejected": -23.737125396728516, "step": 12590 }, { "epoch": 2.45, "learning_rate": 1.025742431868843e-07, "logits/chosen": -2.6926848888397217, "logits/rejected": -2.5466065406799316, "logps/chosen": -281.74493408203125, "logps/rejected": -421.4120178222656, "loss": 0.0667, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -3.5114688873291016, "rewards/margins": 15.035595893859863, "rewards/rejected": -18.54706573486328, "step": 12600 }, { "epoch": 2.45, "eval_logits/chosen": -2.5719873905181885, "eval_logits/rejected": -2.5532681941986084, "eval_logps/chosen": -297.1966552734375, "eval_logps/rejected": -346.5535888671875, "eval_loss": 0.6204590201377869, "eval_rewards/accuracies": 0.7074999809265137, "eval_rewards/chosen": -10.295859336853027, "eval_rewards/margins": 7.167905330657959, "eval_rewards/rejected": -17.463764190673828, "eval_runtime": 140.7626, "eval_samples_per_second": 22.421, "eval_steps_per_second": 0.355, "step": 12600 }, { "epoch": 2.45, "learning_rate": 1.0221471201553173e-07, "logits/chosen": -2.464975595474243, "logits/rejected": -2.448719024658203, "logps/chosen": -246.8695831298828, "logps/rejected": -325.03033447265625, "loss": 0.0668, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -5.008930206298828, "rewards/margins": 14.002431869506836, "rewards/rejected": -19.011362075805664, "step": 12610 }, { "epoch": 2.45, "learning_rate": 1.0185518084417919e-07, "logits/chosen": -2.6147327423095703, "logits/rejected": -2.604248046875, "logps/chosen": -263.7695007324219, "logps/rejected": -364.6864013671875, "loss": 0.0757, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -5.259385585784912, "rewards/margins": 14.476222038269043, "rewards/rejected": -19.735607147216797, "step": 12620 }, { "epoch": 2.45, "learning_rate": 1.0149564967282663e-07, "logits/chosen": -2.736812114715576, "logits/rejected": -2.634316921234131, "logps/chosen": -254.13229370117188, "logps/rejected": -322.1239013671875, "loss": 0.0669, "rewards/accuracies": 1.0, "rewards/chosen": -2.1754138469696045, "rewards/margins": 13.549273490905762, "rewards/rejected": -15.724688529968262, "step": 12630 }, { "epoch": 2.45, "learning_rate": 1.0113611850147408e-07, "logits/chosen": -2.5996899604797363, "logits/rejected": -2.5931382179260254, "logps/chosen": -314.7234802246094, "logps/rejected": -403.55718994140625, "loss": 0.0726, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -3.7101809978485107, "rewards/margins": 16.912425994873047, "rewards/rejected": -20.622608184814453, "step": 12640 }, { "epoch": 2.46, "learning_rate": 1.0077658733012152e-07, "logits/chosen": -2.7735037803649902, "logits/rejected": -2.748175859451294, "logps/chosen": -344.12493896484375, "logps/rejected": -494.8115234375, "loss": 0.0495, "rewards/accuracies": 1.0, "rewards/chosen": -4.863116264343262, "rewards/margins": 16.04551124572754, "rewards/rejected": -20.908626556396484, "step": 12650 }, { "epoch": 2.46, "learning_rate": 1.0041705615876895e-07, "logits/chosen": -2.819232940673828, "logits/rejected": -2.6816084384918213, "logps/chosen": -316.5780334472656, "logps/rejected": -399.15771484375, "loss": 0.0587, "rewards/accuracies": 1.0, "rewards/chosen": -1.7577444314956665, "rewards/margins": 11.514211654663086, "rewards/rejected": -13.271957397460938, "step": 12660 }, { "epoch": 2.46, "learning_rate": 1.000575249874164e-07, "logits/chosen": -2.6154356002807617, "logits/rejected": -2.689516067504883, "logps/chosen": -277.93463134765625, "logps/rejected": -424.29412841796875, "loss": 0.0787, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -6.221366882324219, "rewards/margins": 13.102206230163574, "rewards/rejected": -19.32357406616211, "step": 12670 }, { "epoch": 2.46, "learning_rate": 9.969799381606385e-08, "logits/chosen": -2.7781119346618652, "logits/rejected": -2.730433940887451, "logps/chosen": -297.75384521484375, "logps/rejected": -434.0203552246094, "loss": 0.0704, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.925167441368103, "rewards/margins": 14.537073135375977, "rewards/rejected": -16.46224021911621, "step": 12680 }, { "epoch": 2.46, "learning_rate": 9.93384626447113e-08, "logits/chosen": -2.698772430419922, "logits/rejected": -2.6802220344543457, "logps/chosen": -261.1056823730469, "logps/rejected": -355.3989562988281, "loss": 0.0806, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -4.682939052581787, "rewards/margins": 14.250040054321289, "rewards/rejected": -18.932979583740234, "step": 12690 }, { "epoch": 2.47, "learning_rate": 9.897893147335873e-08, "logits/chosen": -2.669194459915161, "logits/rejected": -2.6278138160705566, "logps/chosen": -202.12786865234375, "logps/rejected": -358.1590881347656, "loss": 0.0529, "rewards/accuracies": 1.0, "rewards/chosen": -3.0838189125061035, "rewards/margins": 14.480325698852539, "rewards/rejected": -17.56414222717285, "step": 12700 }, { "epoch": 2.47, "eval_logits/chosen": -2.5524840354919434, "eval_logits/rejected": -2.534165143966675, "eval_logps/chosen": -298.2552795410156, "eval_logps/rejected": -347.70635986328125, "eval_loss": 0.6299814581871033, "eval_rewards/accuracies": 0.7099999785423279, "eval_rewards/chosen": -10.401721000671387, "eval_rewards/margins": 7.1773200035095215, "eval_rewards/rejected": -17.579038619995117, "eval_runtime": 139.9454, "eval_samples_per_second": 22.552, "eval_steps_per_second": 0.357, "step": 12700 }, { "epoch": 2.47, "learning_rate": 9.861940030200617e-08, "logits/chosen": -2.6520049571990967, "logits/rejected": -2.6566200256347656, "logps/chosen": -202.8054962158203, "logps/rejected": -443.3741760253906, "loss": 0.0363, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -4.764678001403809, "rewards/margins": 12.155046463012695, "rewards/rejected": -16.919723510742188, "step": 12710 }, { "epoch": 2.47, "learning_rate": 9.825986913065362e-08, "logits/chosen": -2.608522891998291, "logits/rejected": -2.6032421588897705, "logps/chosen": -255.10604858398438, "logps/rejected": -498.72900390625, "loss": 0.0493, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -4.724484443664551, "rewards/margins": 14.063451766967773, "rewards/rejected": -18.787935256958008, "step": 12720 }, { "epoch": 2.47, "learning_rate": 9.790033795930106e-08, "logits/chosen": -2.6986923217773438, "logits/rejected": -2.6604561805725098, "logps/chosen": -287.8281555175781, "logps/rejected": -354.50067138671875, "loss": 0.0692, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -8.757866859436035, "rewards/margins": 11.266092300415039, "rewards/rejected": -20.02396011352539, "step": 12730 }, { "epoch": 2.47, "learning_rate": 9.754080678794852e-08, "logits/chosen": -2.6669459342956543, "logits/rejected": -2.582268238067627, "logps/chosen": -348.4651184082031, "logps/rejected": -378.26739501953125, "loss": 0.085, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -3.3547120094299316, "rewards/margins": 17.456912994384766, "rewards/rejected": -20.811622619628906, "step": 12740 }, { "epoch": 2.48, "learning_rate": 9.718127561659595e-08, "logits/chosen": -2.5141801834106445, "logits/rejected": -2.51961088180542, "logps/chosen": -189.30706787109375, "logps/rejected": -281.38299560546875, "loss": 0.0467, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -4.091251373291016, "rewards/margins": 10.726353645324707, "rewards/rejected": -14.817604064941406, "step": 12750 }, { "epoch": 2.48, "learning_rate": 9.68217444452434e-08, "logits/chosen": -2.643195867538452, "logits/rejected": -2.528999090194702, "logps/chosen": -267.8007507324219, "logps/rejected": -391.2079162597656, "loss": 0.0754, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -8.385805130004883, "rewards/margins": 11.680742263793945, "rewards/rejected": -20.066547393798828, "step": 12760 }, { "epoch": 2.48, "learning_rate": 9.646221327389084e-08, "logits/chosen": -2.6890969276428223, "logits/rejected": -2.6115336418151855, "logps/chosen": -359.04071044921875, "logps/rejected": -377.3211975097656, "loss": 0.0734, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -4.392149448394775, "rewards/margins": 13.718755722045898, "rewards/rejected": -18.11090660095215, "step": 12770 }, { "epoch": 2.48, "learning_rate": 9.610268210253828e-08, "logits/chosen": -2.5152180194854736, "logits/rejected": -2.525686025619507, "logps/chosen": -254.0458984375, "logps/rejected": -403.14715576171875, "loss": 0.0733, "rewards/accuracies": 1.0, "rewards/chosen": -4.80006217956543, "rewards/margins": 15.435331344604492, "rewards/rejected": -20.235393524169922, "step": 12780 }, { "epoch": 2.48, "learning_rate": 9.574315093118574e-08, "logits/chosen": -2.4921417236328125, "logits/rejected": -2.444669485092163, "logps/chosen": -243.29800415039062, "logps/rejected": -392.8536376953125, "loss": 0.0492, "rewards/accuracies": 1.0, "rewards/chosen": -5.819563388824463, "rewards/margins": 15.767827033996582, "rewards/rejected": -21.587390899658203, "step": 12790 }, { "epoch": 2.48, "learning_rate": 9.538361975983317e-08, "logits/chosen": -2.5865397453308105, "logits/rejected": -2.547572612762451, "logps/chosen": -284.09197998046875, "logps/rejected": -376.93170166015625, "loss": 0.0572, "rewards/accuracies": 1.0, "rewards/chosen": -6.4949049949646, "rewards/margins": 12.917729377746582, "rewards/rejected": -19.412633895874023, "step": 12800 }, { "epoch": 2.48, "eval_logits/chosen": -2.5215024948120117, "eval_logits/rejected": -2.4994101524353027, "eval_logps/chosen": -304.1522521972656, "eval_logps/rejected": -359.07647705078125, "eval_loss": 0.6498669385910034, "eval_rewards/accuracies": 0.7049999833106995, "eval_rewards/chosen": -10.99142074584961, "eval_rewards/margins": 7.7246317863464355, "eval_rewards/rejected": -18.716053009033203, "eval_runtime": 140.6153, "eval_samples_per_second": 22.444, "eval_steps_per_second": 0.356, "step": 12800 }, { "epoch": 2.49, "learning_rate": 9.502408858848062e-08, "logits/chosen": -2.5164453983306885, "logits/rejected": -2.4940741062164307, "logps/chosen": -320.9963684082031, "logps/rejected": -344.89447021484375, "loss": 0.1136, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -7.1120405197143555, "rewards/margins": 10.176172256469727, "rewards/rejected": -17.288211822509766, "step": 12810 }, { "epoch": 2.49, "learning_rate": 9.466455741712806e-08, "logits/chosen": -2.4596664905548096, "logits/rejected": -2.4942336082458496, "logps/chosen": -321.5664367675781, "logps/rejected": -388.92218017578125, "loss": 0.0672, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -9.90731143951416, "rewards/margins": 15.95301628112793, "rewards/rejected": -25.86033058166504, "step": 12820 }, { "epoch": 2.49, "learning_rate": 9.43050262457755e-08, "logits/chosen": -2.4090611934661865, "logits/rejected": -2.249752998352051, "logps/chosen": -293.23095703125, "logps/rejected": -350.8958435058594, "loss": 0.0557, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -3.9957728385925293, "rewards/margins": 9.820385932922363, "rewards/rejected": -13.81615924835205, "step": 12830 }, { "epoch": 2.49, "learning_rate": 9.394549507442296e-08, "logits/chosen": -2.5324692726135254, "logits/rejected": -2.4817562103271484, "logps/chosen": -232.61306762695312, "logps/rejected": -351.1329650878906, "loss": 0.0676, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -9.994973182678223, "rewards/margins": 10.949104309082031, "rewards/rejected": -20.944076538085938, "step": 12840 }, { "epoch": 2.49, "learning_rate": 9.358596390307038e-08, "logits/chosen": -2.4570517539978027, "logits/rejected": -2.505894184112549, "logps/chosen": -272.30963134765625, "logps/rejected": -398.0025939941406, "loss": 0.0682, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -5.065876007080078, "rewards/margins": 13.8529691696167, "rewards/rejected": -18.918846130371094, "step": 12850 }, { "epoch": 2.5, "learning_rate": 9.322643273171784e-08, "logits/chosen": -2.6570115089416504, "logits/rejected": -2.6433606147766113, "logps/chosen": -308.03582763671875, "logps/rejected": -451.9251403808594, "loss": 0.064, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -7.715272426605225, "rewards/margins": 20.070213317871094, "rewards/rejected": -27.785486221313477, "step": 12860 }, { "epoch": 2.5, "learning_rate": 9.286690156036528e-08, "logits/chosen": -2.61348819732666, "logits/rejected": -2.483337879180908, "logps/chosen": -306.2225036621094, "logps/rejected": -383.4023132324219, "loss": 0.0485, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -5.665471076965332, "rewards/margins": 10.886301040649414, "rewards/rejected": -16.55177116394043, "step": 12870 }, { "epoch": 2.5, "learning_rate": 9.250737038901272e-08, "logits/chosen": -2.4846606254577637, "logits/rejected": -2.443726062774658, "logps/chosen": -246.4105987548828, "logps/rejected": -365.9189453125, "loss": 0.1106, "rewards/accuracies": 0.75, "rewards/chosen": -12.970712661743164, "rewards/margins": 10.023126602172852, "rewards/rejected": -22.993837356567383, "step": 12880 }, { "epoch": 2.5, "learning_rate": 9.214783921766017e-08, "logits/chosen": -2.5224173069000244, "logits/rejected": -2.454105854034424, "logps/chosen": -251.5784454345703, "logps/rejected": -300.30377197265625, "loss": 0.0882, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -4.659109592437744, "rewards/margins": 11.266281127929688, "rewards/rejected": -15.925392150878906, "step": 12890 }, { "epoch": 2.5, "learning_rate": 9.17883080463076e-08, "logits/chosen": -2.5811517238616943, "logits/rejected": -2.615131378173828, "logps/chosen": -298.760009765625, "logps/rejected": -392.4461975097656, "loss": 0.0687, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -8.927229881286621, "rewards/margins": 12.201677322387695, "rewards/rejected": -21.128908157348633, "step": 12900 }, { "epoch": 2.5, "eval_logits/chosen": -2.570328950881958, "eval_logits/rejected": -2.5499255657196045, "eval_logps/chosen": -313.0833740234375, "eval_logps/rejected": -369.8017578125, "eval_loss": 0.6572585701942444, "eval_rewards/accuracies": 0.7049999833106995, "eval_rewards/chosen": -11.884530067443848, "eval_rewards/margins": 7.904053211212158, "eval_rewards/rejected": -19.788583755493164, "eval_runtime": 159.0856, "eval_samples_per_second": 19.838, "eval_steps_per_second": 0.314, "step": 12900 }, { "epoch": 2.51, "learning_rate": 9.142877687495506e-08, "logits/chosen": -2.7034802436828613, "logits/rejected": -2.7853951454162598, "logps/chosen": -244.06210327148438, "logps/rejected": -480.805908203125, "loss": 0.0519, "rewards/accuracies": 1.0, "rewards/chosen": -7.491147041320801, "rewards/margins": 17.780879974365234, "rewards/rejected": -25.272029876708984, "step": 12910 }, { "epoch": 2.51, "learning_rate": 9.10692457036025e-08, "logits/chosen": -2.6614511013031006, "logits/rejected": -2.635369062423706, "logps/chosen": -266.12908935546875, "logps/rejected": -330.9181213378906, "loss": 0.0683, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -6.025728702545166, "rewards/margins": 9.081961631774902, "rewards/rejected": -15.107688903808594, "step": 12920 }, { "epoch": 2.51, "learning_rate": 9.070971453224994e-08, "logits/chosen": -2.6953396797180176, "logits/rejected": -2.7117714881896973, "logps/chosen": -310.4691467285156, "logps/rejected": -535.332275390625, "loss": 0.0785, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -6.80519962310791, "rewards/margins": 18.081783294677734, "rewards/rejected": -24.886981964111328, "step": 12930 }, { "epoch": 2.51, "learning_rate": 9.035018336089739e-08, "logits/chosen": -2.518805742263794, "logits/rejected": -2.5691771507263184, "logps/chosen": -257.78912353515625, "logps/rejected": -489.98846435546875, "loss": 0.076, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -6.732778072357178, "rewards/margins": 14.248738288879395, "rewards/rejected": -20.981517791748047, "step": 12940 }, { "epoch": 2.51, "learning_rate": 8.999065218954482e-08, "logits/chosen": -2.514657974243164, "logits/rejected": -2.5844292640686035, "logps/chosen": -247.2261962890625, "logps/rejected": -398.01141357421875, "loss": 0.0624, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -11.016149520874023, "rewards/margins": 15.479927062988281, "rewards/rejected": -26.496074676513672, "step": 12950 }, { "epoch": 2.52, "learning_rate": 8.963112101819228e-08, "logits/chosen": -2.5862746238708496, "logits/rejected": -2.543480396270752, "logps/chosen": -269.873291015625, "logps/rejected": -459.63165283203125, "loss": 0.0779, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -9.907736778259277, "rewards/margins": 17.983074188232422, "rewards/rejected": -27.89080810546875, "step": 12960 }, { "epoch": 2.52, "learning_rate": 8.927158984683972e-08, "logits/chosen": -2.6230921745300293, "logits/rejected": -2.642923593521118, "logps/chosen": -324.45709228515625, "logps/rejected": -424.33721923828125, "loss": 0.0787, "rewards/accuracies": 1.0, "rewards/chosen": -7.874556064605713, "rewards/margins": 16.19103240966797, "rewards/rejected": -24.065587997436523, "step": 12970 }, { "epoch": 2.52, "learning_rate": 8.891205867548717e-08, "logits/chosen": -2.6603245735168457, "logits/rejected": -2.6209988594055176, "logps/chosen": -240.4772491455078, "logps/rejected": -339.351318359375, "loss": 0.0835, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -6.2165093421936035, "rewards/margins": 11.686843872070312, "rewards/rejected": -17.90335464477539, "step": 12980 }, { "epoch": 2.52, "learning_rate": 8.855252750413461e-08, "logits/chosen": -2.600198984146118, "logits/rejected": -2.5400357246398926, "logps/chosen": -340.0777282714844, "logps/rejected": -481.73626708984375, "loss": 0.0715, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -8.748044967651367, "rewards/margins": 16.56332015991211, "rewards/rejected": -25.31136703491211, "step": 12990 }, { "epoch": 2.52, "learning_rate": 8.819299633278204e-08, "logits/chosen": -2.6538376808166504, "logits/rejected": -2.6430704593658447, "logps/chosen": -210.0105743408203, "logps/rejected": -370.7178039550781, "loss": 0.0658, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -5.693873405456543, "rewards/margins": 14.175498962402344, "rewards/rejected": -19.86937141418457, "step": 13000 }, { "epoch": 2.52, "eval_logits/chosen": -2.5584936141967773, "eval_logits/rejected": -2.5373728275299072, "eval_logps/chosen": -317.293212890625, "eval_logps/rejected": -375.7680358886719, "eval_loss": 0.645956814289093, "eval_rewards/accuracies": 0.7074999809265137, "eval_rewards/chosen": -12.305511474609375, "eval_rewards/margins": 8.07969856262207, "eval_rewards/rejected": -20.385211944580078, "eval_runtime": 155.6589, "eval_samples_per_second": 20.275, "eval_steps_per_second": 0.321, "step": 13000 }, { "epoch": 2.53, "learning_rate": 8.78334651614295e-08, "logits/chosen": -2.518589496612549, "logits/rejected": -2.54185152053833, "logps/chosen": -265.8616027832031, "logps/rejected": -421.4515075683594, "loss": 0.08, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -10.167800903320312, "rewards/margins": 12.785776138305664, "rewards/rejected": -22.953577041625977, "step": 13010 }, { "epoch": 2.53, "learning_rate": 8.747393399007693e-08, "logits/chosen": -2.6595230102539062, "logits/rejected": -2.5554375648498535, "logps/chosen": -358.6347351074219, "logps/rejected": -357.8252258300781, "loss": 0.1226, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -12.585082054138184, "rewards/margins": 11.05892276763916, "rewards/rejected": -23.644004821777344, "step": 13020 }, { "epoch": 2.53, "learning_rate": 8.711440281872439e-08, "logits/chosen": -2.597569704055786, "logits/rejected": -2.589627742767334, "logps/chosen": -251.9309539794922, "logps/rejected": -354.78912353515625, "loss": 0.0741, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -7.414864540100098, "rewards/margins": 13.376960754394531, "rewards/rejected": -20.791826248168945, "step": 13030 }, { "epoch": 2.53, "learning_rate": 8.675487164737183e-08, "logits/chosen": -2.6389718055725098, "logits/rejected": -2.5409035682678223, "logps/chosen": -452.167236328125, "logps/rejected": -451.02911376953125, "loss": 0.0818, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -9.244949340820312, "rewards/margins": 12.40312385559082, "rewards/rejected": -21.648075103759766, "step": 13040 }, { "epoch": 2.53, "learning_rate": 8.639534047601926e-08, "logits/chosen": -2.500101327896118, "logits/rejected": -2.466165542602539, "logps/chosen": -218.2380828857422, "logps/rejected": -395.40924072265625, "loss": 0.0902, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -8.961050987243652, "rewards/margins": 13.048199653625488, "rewards/rejected": -22.009252548217773, "step": 13050 }, { "epoch": 2.54, "learning_rate": 8.603580930466671e-08, "logits/chosen": -2.6560113430023193, "logits/rejected": -2.672569990158081, "logps/chosen": -347.00213623046875, "logps/rejected": -363.33843994140625, "loss": 0.0754, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -5.370176792144775, "rewards/margins": 12.299577713012695, "rewards/rejected": -17.669755935668945, "step": 13060 }, { "epoch": 2.54, "learning_rate": 8.567627813331415e-08, "logits/chosen": -2.5172853469848633, "logits/rejected": -2.3834493160247803, "logps/chosen": -192.8883819580078, "logps/rejected": -286.356689453125, "loss": 0.0378, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -4.385251998901367, "rewards/margins": 10.082830429077148, "rewards/rejected": -14.4680814743042, "step": 13070 }, { "epoch": 2.54, "learning_rate": 8.531674696196161e-08, "logits/chosen": -2.557122230529785, "logits/rejected": -2.6049318313598633, "logps/chosen": -273.2023010253906, "logps/rejected": -496.791259765625, "loss": 0.0696, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -10.941291809082031, "rewards/margins": 16.8946590423584, "rewards/rejected": -27.835948944091797, "step": 13080 }, { "epoch": 2.54, "learning_rate": 8.495721579060904e-08, "logits/chosen": -2.5869221687316895, "logits/rejected": -2.59763240814209, "logps/chosen": -298.122802734375, "logps/rejected": -464.675537109375, "loss": 0.0531, "rewards/accuracies": 1.0, "rewards/chosen": -7.380122184753418, "rewards/margins": 18.6658878326416, "rewards/rejected": -26.046010971069336, "step": 13090 }, { "epoch": 2.54, "learning_rate": 8.459768461925648e-08, "logits/chosen": -2.384657144546509, "logits/rejected": -2.4255499839782715, "logps/chosen": -280.59210205078125, "logps/rejected": -405.8802795410156, "loss": 0.0897, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -8.882360458374023, "rewards/margins": 11.485540390014648, "rewards/rejected": -20.367902755737305, "step": 13100 }, { "epoch": 2.54, "eval_logits/chosen": -2.4809982776641846, "eval_logits/rejected": -2.4576659202575684, "eval_logps/chosen": -320.8460388183594, "eval_logps/rejected": -381.0459289550781, "eval_loss": 0.6673251986503601, "eval_rewards/accuracies": 0.699999988079071, "eval_rewards/chosen": -12.660794258117676, "eval_rewards/margins": 8.252202987670898, "eval_rewards/rejected": -20.91299819946289, "eval_runtime": 140.2174, "eval_samples_per_second": 22.508, "eval_steps_per_second": 0.357, "step": 13100 }, { "epoch": 2.55, "learning_rate": 8.423815344790393e-08, "logits/chosen": -2.4887547492980957, "logits/rejected": -2.394315719604492, "logps/chosen": -198.9996795654297, "logps/rejected": -376.09381103515625, "loss": 0.0873, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -8.836404800415039, "rewards/margins": 17.285276412963867, "rewards/rejected": -26.121679306030273, "step": 13110 }, { "epoch": 2.55, "learning_rate": 8.387862227655137e-08, "logits/chosen": -2.5716552734375, "logits/rejected": -2.5738325119018555, "logps/chosen": -342.38140869140625, "logps/rejected": -508.90667724609375, "loss": 0.0731, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -9.472661018371582, "rewards/margins": 11.9817533493042, "rewards/rejected": -21.454416275024414, "step": 13120 }, { "epoch": 2.55, "learning_rate": 8.351909110519883e-08, "logits/chosen": -2.459246873855591, "logits/rejected": -2.545761823654175, "logps/chosen": -275.0579528808594, "logps/rejected": -410.394287109375, "loss": 0.0794, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -7.266641139984131, "rewards/margins": 11.687387466430664, "rewards/rejected": -18.954029083251953, "step": 13130 }, { "epoch": 2.55, "learning_rate": 8.315955993384625e-08, "logits/chosen": -2.6211397647857666, "logits/rejected": -2.671525239944458, "logps/chosen": -287.2232360839844, "logps/rejected": -538.5015258789062, "loss": 0.0672, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -4.823758602142334, "rewards/margins": 21.800235748291016, "rewards/rejected": -26.62399673461914, "step": 13140 }, { "epoch": 2.55, "learning_rate": 8.28000287624937e-08, "logits/chosen": -2.5801539421081543, "logits/rejected": -2.5598695278167725, "logps/chosen": -299.2322692871094, "logps/rejected": -449.35943603515625, "loss": 0.0739, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -6.7073540687561035, "rewards/margins": 17.938566207885742, "rewards/rejected": -24.645917892456055, "step": 13150 }, { "epoch": 2.55, "learning_rate": 8.244049759114115e-08, "logits/chosen": -2.568143129348755, "logits/rejected": -2.414315700531006, "logps/chosen": -256.2361145019531, "logps/rejected": -355.2630310058594, "loss": 0.0799, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -12.5073823928833, "rewards/margins": 10.324131965637207, "rewards/rejected": -22.831512451171875, "step": 13160 }, { "epoch": 2.56, "learning_rate": 8.208096641978859e-08, "logits/chosen": -2.345545530319214, "logits/rejected": -2.265782117843628, "logps/chosen": -305.25872802734375, "logps/rejected": -455.2618103027344, "loss": 0.0611, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -10.404328346252441, "rewards/margins": 18.420879364013672, "rewards/rejected": -28.825210571289062, "step": 13170 }, { "epoch": 2.56, "learning_rate": 8.172143524843604e-08, "logits/chosen": -2.5380609035491943, "logits/rejected": -2.4967401027679443, "logps/chosen": -229.2069091796875, "logps/rejected": -428.5634765625, "loss": 0.0569, "rewards/accuracies": 1.0, "rewards/chosen": -6.023338317871094, "rewards/margins": 16.673412322998047, "rewards/rejected": -22.69675064086914, "step": 13180 }, { "epoch": 2.56, "learning_rate": 8.136190407708347e-08, "logits/chosen": -2.669823169708252, "logits/rejected": -2.5576939582824707, "logps/chosen": -292.00836181640625, "logps/rejected": -311.6917419433594, "loss": 0.0681, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -5.127823829650879, "rewards/margins": 11.13559341430664, "rewards/rejected": -16.263416290283203, "step": 13190 }, { "epoch": 2.56, "learning_rate": 8.100237290573093e-08, "logits/chosen": -2.6349949836730957, "logits/rejected": -2.5826575756073, "logps/chosen": -317.48944091796875, "logps/rejected": -490.8722229003906, "loss": 0.0386, "rewards/accuracies": 1.0, "rewards/chosen": -8.946131706237793, "rewards/margins": 19.052885055541992, "rewards/rejected": -27.9990177154541, "step": 13200 }, { "epoch": 2.56, "eval_logits/chosen": -2.470322370529175, "eval_logits/rejected": -2.445310592651367, "eval_logps/chosen": -323.9682312011719, "eval_logps/rejected": -386.3536071777344, "eval_loss": 0.6574805378913879, "eval_rewards/accuracies": 0.699999988079071, "eval_rewards/chosen": -12.973016738891602, "eval_rewards/margins": 8.470744132995605, "eval_rewards/rejected": -21.443761825561523, "eval_runtime": 141.3765, "eval_samples_per_second": 22.323, "eval_steps_per_second": 0.354, "step": 13200 }, { "epoch": 2.56, "learning_rate": 8.064284173437837e-08, "logits/chosen": -2.6297945976257324, "logits/rejected": -2.523813486099243, "logps/chosen": -303.2640686035156, "logps/rejected": -431.673583984375, "loss": 0.0838, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -10.033949851989746, "rewards/margins": 14.66093635559082, "rewards/rejected": -24.694883346557617, "step": 13210 }, { "epoch": 2.57, "learning_rate": 8.028331056302581e-08, "logits/chosen": -2.520181179046631, "logits/rejected": -2.526456117630005, "logps/chosen": -241.6414794921875, "logps/rejected": -357.3619079589844, "loss": 0.0643, "rewards/accuracies": 1.0, "rewards/chosen": -6.619406223297119, "rewards/margins": 12.885050773620605, "rewards/rejected": -19.504459381103516, "step": 13220 }, { "epoch": 2.57, "learning_rate": 7.992377939167326e-08, "logits/chosen": -2.6000938415527344, "logits/rejected": -2.4963479042053223, "logps/chosen": -323.5462341308594, "logps/rejected": -438.39453125, "loss": 0.0593, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -8.876238822937012, "rewards/margins": 16.660070419311523, "rewards/rejected": -25.53631019592285, "step": 13230 }, { "epoch": 2.57, "learning_rate": 7.956424822032069e-08, "logits/chosen": -2.799954652786255, "logits/rejected": -2.719837188720703, "logps/chosen": -478.8736267089844, "logps/rejected": -531.1661987304688, "loss": 0.1776, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -12.289868354797363, "rewards/margins": 9.699061393737793, "rewards/rejected": -21.988927841186523, "step": 13240 }, { "epoch": 2.57, "learning_rate": 7.920471704896815e-08, "logits/chosen": -2.611156463623047, "logits/rejected": -2.640255928039551, "logps/chosen": -304.0635986328125, "logps/rejected": -457.8282775878906, "loss": 0.0534, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -3.5346367359161377, "rewards/margins": 18.964651107788086, "rewards/rejected": -22.49928855895996, "step": 13250 }, { "epoch": 2.57, "learning_rate": 7.884518587761559e-08, "logits/chosen": -2.641752243041992, "logits/rejected": -2.5792839527130127, "logps/chosen": -261.02496337890625, "logps/rejected": -397.3820495605469, "loss": 0.1144, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -7.743465423583984, "rewards/margins": 14.1588716506958, "rewards/rejected": -21.902339935302734, "step": 13260 }, { "epoch": 2.58, "learning_rate": 7.848565470626303e-08, "logits/chosen": -2.6197493076324463, "logits/rejected": -2.5807297229766846, "logps/chosen": -263.82196044921875, "logps/rejected": -404.60382080078125, "loss": 0.0876, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -7.385954856872559, "rewards/margins": 12.897705078125, "rewards/rejected": -20.283660888671875, "step": 13270 }, { "epoch": 2.58, "learning_rate": 7.812612353491048e-08, "logits/chosen": -2.526801347732544, "logits/rejected": -2.5287177562713623, "logps/chosen": -255.1396484375, "logps/rejected": -448.39630126953125, "loss": 0.0839, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -7.068594932556152, "rewards/margins": 10.024816513061523, "rewards/rejected": -17.093412399291992, "step": 13280 }, { "epoch": 2.58, "learning_rate": 7.776659236355791e-08, "logits/chosen": -2.612931728363037, "logits/rejected": -2.547118663787842, "logps/chosen": -309.223876953125, "logps/rejected": -439.5494079589844, "loss": 0.0703, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -6.652108669281006, "rewards/margins": 12.59142017364502, "rewards/rejected": -19.243526458740234, "step": 13290 }, { "epoch": 2.58, "learning_rate": 7.740706119220536e-08, "logits/chosen": -2.6614480018615723, "logits/rejected": -2.6221108436584473, "logps/chosen": -284.7066345214844, "logps/rejected": -397.18798828125, "loss": 0.0771, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -10.011357307434082, "rewards/margins": 12.294212341308594, "rewards/rejected": -22.30556869506836, "step": 13300 }, { "epoch": 2.58, "eval_logits/chosen": -2.5589723587036133, "eval_logits/rejected": -2.540689468383789, "eval_logps/chosen": -304.8467102050781, "eval_logps/rejected": -356.00262451171875, "eval_loss": 0.6375167369842529, "eval_rewards/accuracies": 0.7049999833106995, "eval_rewards/chosen": -11.060863494873047, "eval_rewards/margins": 7.347804069519043, "eval_rewards/rejected": -18.408666610717773, "eval_runtime": 139.7109, "eval_samples_per_second": 22.59, "eval_steps_per_second": 0.358, "step": 13300 }, { "epoch": 2.58, "learning_rate": 7.70475300208528e-08, "logits/chosen": -2.614485263824463, "logits/rejected": -2.592682123184204, "logps/chosen": -290.1607360839844, "logps/rejected": -344.34149169921875, "loss": 0.0756, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -5.696990013122559, "rewards/margins": 10.190881729125977, "rewards/rejected": -15.887868881225586, "step": 13310 }, { "epoch": 2.59, "learning_rate": 7.668799884950025e-08, "logits/chosen": -2.518864154815674, "logits/rejected": -2.5086426734924316, "logps/chosen": -285.72802734375, "logps/rejected": -376.6285095214844, "loss": 0.0696, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -8.568521499633789, "rewards/margins": 9.653501510620117, "rewards/rejected": -18.222023010253906, "step": 13320 }, { "epoch": 2.59, "learning_rate": 7.63284676781477e-08, "logits/chosen": -2.6888203620910645, "logits/rejected": -2.5889365673065186, "logps/chosen": -235.26669311523438, "logps/rejected": -366.56146240234375, "loss": 0.0722, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -7.766876220703125, "rewards/margins": 12.524709701538086, "rewards/rejected": -20.291584014892578, "step": 13330 }, { "epoch": 2.59, "learning_rate": 7.596893650679513e-08, "logits/chosen": -2.509105682373047, "logits/rejected": -2.510641574859619, "logps/chosen": -292.5914001464844, "logps/rejected": -431.8297424316406, "loss": 0.0786, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -9.046849250793457, "rewards/margins": 18.554676055908203, "rewards/rejected": -27.60152244567871, "step": 13340 }, { "epoch": 2.59, "learning_rate": 7.560940533544258e-08, "logits/chosen": -2.62268328666687, "logits/rejected": -2.5355706214904785, "logps/chosen": -301.7018127441406, "logps/rejected": -379.0313415527344, "loss": 0.0631, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -3.858834743499756, "rewards/margins": 13.080639839172363, "rewards/rejected": -16.93947410583496, "step": 13350 }, { "epoch": 2.59, "learning_rate": 7.524987416409002e-08, "logits/chosen": -2.5447497367858887, "logits/rejected": -2.449542999267578, "logps/chosen": -262.575439453125, "logps/rejected": -409.4619140625, "loss": 0.0631, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -5.333880424499512, "rewards/margins": 11.08108139038086, "rewards/rejected": -16.414960861206055, "step": 13360 }, { "epoch": 2.6, "learning_rate": 7.489034299273746e-08, "logits/chosen": -2.615506649017334, "logits/rejected": -2.6858839988708496, "logps/chosen": -284.05352783203125, "logps/rejected": -346.6708679199219, "loss": 0.0818, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -7.476611137390137, "rewards/margins": 9.028188705444336, "rewards/rejected": -16.504802703857422, "step": 13370 }, { "epoch": 2.6, "learning_rate": 7.453081182138492e-08, "logits/chosen": -2.7071032524108887, "logits/rejected": -2.63547682762146, "logps/chosen": -226.57119750976562, "logps/rejected": -363.83148193359375, "loss": 0.0918, "rewards/accuracies": 1.0, "rewards/chosen": -2.6655004024505615, "rewards/margins": 12.037089347839355, "rewards/rejected": -14.70258903503418, "step": 13380 }, { "epoch": 2.6, "learning_rate": 7.417128065003235e-08, "logits/chosen": -2.551342487335205, "logits/rejected": -2.5826187133789062, "logps/chosen": -190.64488220214844, "logps/rejected": -350.8616638183594, "loss": 0.0469, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -7.572944641113281, "rewards/margins": 12.592473983764648, "rewards/rejected": -20.16541862487793, "step": 13390 }, { "epoch": 2.6, "learning_rate": 7.38117494786798e-08, "logits/chosen": -2.6725316047668457, "logits/rejected": -2.7998526096343994, "logps/chosen": -226.8726348876953, "logps/rejected": -448.68634033203125, "loss": 0.0704, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -4.834217071533203, "rewards/margins": 15.59800910949707, "rewards/rejected": -20.43222427368164, "step": 13400 }, { "epoch": 2.6, "eval_logits/chosen": -2.550319194793701, "eval_logits/rejected": -2.5312576293945312, "eval_logps/chosen": -308.41473388671875, "eval_logps/rejected": -361.51446533203125, "eval_loss": 0.6408479809761047, "eval_rewards/accuracies": 0.7049999833106995, "eval_rewards/chosen": -11.417664527893066, "eval_rewards/margins": 7.542184352874756, "eval_rewards/rejected": -18.959850311279297, "eval_runtime": 141.1534, "eval_samples_per_second": 22.359, "eval_steps_per_second": 0.354, "step": 13400 }, { "epoch": 2.6, "learning_rate": 7.345221830732724e-08, "logits/chosen": -2.673508405685425, "logits/rejected": -2.6295700073242188, "logps/chosen": -351.0205383300781, "logps/rejected": -398.322998046875, "loss": 0.0923, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -7.051070213317871, "rewards/margins": 11.78144359588623, "rewards/rejected": -18.832515716552734, "step": 13410 }, { "epoch": 2.61, "learning_rate": 7.30926871359747e-08, "logits/chosen": -2.6739401817321777, "logits/rejected": -2.531371593475342, "logps/chosen": -261.10638427734375, "logps/rejected": -434.35626220703125, "loss": 0.0276, "rewards/accuracies": 1.0, "rewards/chosen": -3.755013942718506, "rewards/margins": 17.52224349975586, "rewards/rejected": -21.277257919311523, "step": 13420 }, { "epoch": 2.61, "learning_rate": 7.273315596462212e-08, "logits/chosen": -2.8355870246887207, "logits/rejected": -2.8189854621887207, "logps/chosen": -345.1497802734375, "logps/rejected": -516.4817504882812, "loss": 0.0501, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.5474488735198975, "rewards/margins": 16.476818084716797, "rewards/rejected": -19.024269104003906, "step": 13430 }, { "epoch": 2.61, "learning_rate": 7.237362479326957e-08, "logits/chosen": -2.684986114501953, "logits/rejected": -2.6205391883850098, "logps/chosen": -304.0296936035156, "logps/rejected": -425.7872009277344, "loss": 0.0619, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -5.477696418762207, "rewards/margins": 13.657798767089844, "rewards/rejected": -19.135498046875, "step": 13440 }, { "epoch": 2.61, "learning_rate": 7.201409362191702e-08, "logits/chosen": -2.6951632499694824, "logits/rejected": -2.6230578422546387, "logps/chosen": -275.9407958984375, "logps/rejected": -411.18756103515625, "loss": 0.0796, "rewards/accuracies": 1.0, "rewards/chosen": -3.448228359222412, "rewards/margins": 13.241569519042969, "rewards/rejected": -16.689800262451172, "step": 13450 }, { "epoch": 2.61, "learning_rate": 7.165456245056446e-08, "logits/chosen": -2.5738027095794678, "logits/rejected": -2.509709119796753, "logps/chosen": -227.03536987304688, "logps/rejected": -427.52587890625, "loss": 0.064, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -8.788276672363281, "rewards/margins": 14.447412490844727, "rewards/rejected": -23.235685348510742, "step": 13460 }, { "epoch": 2.62, "learning_rate": 7.129503127921191e-08, "logits/chosen": -2.4864540100097656, "logits/rejected": -2.5512588024139404, "logps/chosen": -316.167724609375, "logps/rejected": -443.88153076171875, "loss": 0.055, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -10.14301586151123, "rewards/margins": 13.683004379272461, "rewards/rejected": -23.826019287109375, "step": 13470 }, { "epoch": 2.62, "learning_rate": 7.093550010785934e-08, "logits/chosen": -2.6127116680145264, "logits/rejected": -2.618218183517456, "logps/chosen": -238.98239135742188, "logps/rejected": -391.04779052734375, "loss": 0.0559, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -6.6085662841796875, "rewards/margins": 13.831143379211426, "rewards/rejected": -20.43971061706543, "step": 13480 }, { "epoch": 2.62, "learning_rate": 7.057596893650678e-08, "logits/chosen": -2.6301097869873047, "logits/rejected": -2.6515915393829346, "logps/chosen": -257.0636291503906, "logps/rejected": -378.5958557128906, "loss": 0.0631, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -6.423306941986084, "rewards/margins": 18.068883895874023, "rewards/rejected": -24.492191314697266, "step": 13490 }, { "epoch": 2.62, "learning_rate": 7.021643776515424e-08, "logits/chosen": -2.4764249324798584, "logits/rejected": -2.516697406768799, "logps/chosen": -222.3642578125, "logps/rejected": -371.0328063964844, "loss": 0.0715, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -6.297858238220215, "rewards/margins": 12.84132194519043, "rewards/rejected": -19.139179229736328, "step": 13500 }, { "epoch": 2.62, "eval_logits/chosen": -2.5267419815063477, "eval_logits/rejected": -2.50559401512146, "eval_logps/chosen": -312.5887145996094, "eval_logps/rejected": -369.9871826171875, "eval_loss": 0.6432679891586304, "eval_rewards/accuracies": 0.7024999856948853, "eval_rewards/chosen": -11.835062026977539, "eval_rewards/margins": 7.972060680389404, "eval_rewards/rejected": -19.8071231842041, "eval_runtime": 151.5765, "eval_samples_per_second": 20.821, "eval_steps_per_second": 0.33, "step": 13500 }, { "epoch": 2.62, "learning_rate": 6.985690659380168e-08, "logits/chosen": -2.46028733253479, "logits/rejected": -2.543519973754883, "logps/chosen": -241.09628295898438, "logps/rejected": -493.2625427246094, "loss": 0.0579, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -5.681536674499512, "rewards/margins": 15.452682495117188, "rewards/rejected": -21.134220123291016, "step": 13510 }, { "epoch": 2.62, "learning_rate": 6.949737542244913e-08, "logits/chosen": -2.661557912826538, "logits/rejected": -2.6307854652404785, "logps/chosen": -260.44586181640625, "logps/rejected": -436.988525390625, "loss": 0.0835, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -7.225275993347168, "rewards/margins": 18.49778175354004, "rewards/rejected": -25.723058700561523, "step": 13520 }, { "epoch": 2.63, "learning_rate": 6.913784425109656e-08, "logits/chosen": -2.634443521499634, "logits/rejected": -2.6196234226226807, "logps/chosen": -303.8482971191406, "logps/rejected": -458.89794921875, "loss": 0.0869, "rewards/accuracies": 1.0, "rewards/chosen": -6.48668909072876, "rewards/margins": 16.242252349853516, "rewards/rejected": -22.728944778442383, "step": 13530 }, { "epoch": 2.63, "learning_rate": 6.8778313079744e-08, "logits/chosen": -2.604661703109741, "logits/rejected": -2.570814371109009, "logps/chosen": -251.318359375, "logps/rejected": -328.5297546386719, "loss": 0.0828, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -4.066451072692871, "rewards/margins": 12.24148178100586, "rewards/rejected": -16.307931900024414, "step": 13540 }, { "epoch": 2.63, "learning_rate": 6.841878190839146e-08, "logits/chosen": -2.697322368621826, "logits/rejected": -2.6674821376800537, "logps/chosen": -264.43255615234375, "logps/rejected": -474.1181640625, "loss": 0.0411, "rewards/accuracies": 1.0, "rewards/chosen": -5.035953044891357, "rewards/margins": 17.23788833618164, "rewards/rejected": -22.273841857910156, "step": 13550 }, { "epoch": 2.63, "learning_rate": 6.80592507370389e-08, "logits/chosen": -2.5578739643096924, "logits/rejected": -2.70164155960083, "logps/chosen": -273.7012023925781, "logps/rejected": -475.08807373046875, "loss": 0.0833, "rewards/accuracies": 1.0, "rewards/chosen": -5.721813201904297, "rewards/margins": 20.668071746826172, "rewards/rejected": -26.389883041381836, "step": 13560 }, { "epoch": 2.63, "learning_rate": 6.769971956568635e-08, "logits/chosen": -2.6877002716064453, "logits/rejected": -2.658869743347168, "logps/chosen": -234.70315551757812, "logps/rejected": -336.563720703125, "loss": 0.0717, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -3.7852420806884766, "rewards/margins": 15.942594528198242, "rewards/rejected": -19.72783660888672, "step": 13570 }, { "epoch": 2.64, "learning_rate": 6.734018839433378e-08, "logits/chosen": -2.714672803878784, "logits/rejected": -2.6009838581085205, "logps/chosen": -319.7707824707031, "logps/rejected": -418.991455078125, "loss": 0.0457, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -7.361305236816406, "rewards/margins": 15.218538284301758, "rewards/rejected": -22.579843521118164, "step": 13580 }, { "epoch": 2.64, "learning_rate": 6.698065722298122e-08, "logits/chosen": -2.722248077392578, "logits/rejected": -2.6592392921447754, "logps/chosen": -306.11944580078125, "logps/rejected": -349.6539001464844, "loss": 0.0796, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -9.100323677062988, "rewards/margins": 8.627074241638184, "rewards/rejected": -17.727397918701172, "step": 13590 }, { "epoch": 2.64, "learning_rate": 6.662112605162868e-08, "logits/chosen": -2.7155263423919678, "logits/rejected": -2.6460728645324707, "logps/chosen": -300.47674560546875, "logps/rejected": -378.77203369140625, "loss": 0.0511, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -3.3318657875061035, "rewards/margins": 12.285037994384766, "rewards/rejected": -15.616902351379395, "step": 13600 }, { "epoch": 2.64, "eval_logits/chosen": -2.5038247108459473, "eval_logits/rejected": -2.4817864894866943, "eval_logps/chosen": -306.9222412109375, "eval_logps/rejected": -363.99371337890625, "eval_loss": 0.6403080224990845, "eval_rewards/accuracies": 0.6974999904632568, "eval_rewards/chosen": -11.268416404724121, "eval_rewards/margins": 7.939359664916992, "eval_rewards/rejected": -19.207775115966797, "eval_runtime": 157.5492, "eval_samples_per_second": 20.032, "eval_steps_per_second": 0.317, "step": 13600 }, { "epoch": 2.64, "learning_rate": 6.626159488027612e-08, "logits/chosen": -2.6395246982574463, "logits/rejected": -2.4943339824676514, "logps/chosen": -322.060302734375, "logps/rejected": -463.0009765625, "loss": 0.0269, "rewards/accuracies": 1.0, "rewards/chosen": -2.8631839752197266, "rewards/margins": 22.114269256591797, "rewards/rejected": -24.97745132446289, "step": 13610 }, { "epoch": 2.64, "learning_rate": 6.590206370892357e-08, "logits/chosen": -2.681689500808716, "logits/rejected": -2.5957589149475098, "logps/chosen": -309.920166015625, "logps/rejected": -358.9371337890625, "loss": 0.1, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -4.827698707580566, "rewards/margins": 11.025238037109375, "rewards/rejected": -15.852938652038574, "step": 13620 }, { "epoch": 2.65, "learning_rate": 6.5542532537571e-08, "logits/chosen": -2.60517954826355, "logits/rejected": -2.547213077545166, "logps/chosen": -252.23745727539062, "logps/rejected": -326.57940673828125, "loss": 0.0758, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -7.169960021972656, "rewards/margins": 9.891514778137207, "rewards/rejected": -17.061473846435547, "step": 13630 }, { "epoch": 2.65, "learning_rate": 6.518300136621844e-08, "logits/chosen": -2.5564725399017334, "logits/rejected": -2.5501961708068848, "logps/chosen": -318.10589599609375, "logps/rejected": -495.20672607421875, "loss": 0.073, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -6.947672367095947, "rewards/margins": 12.922449111938477, "rewards/rejected": -19.8701229095459, "step": 13640 }, { "epoch": 2.65, "learning_rate": 6.48234701948659e-08, "logits/chosen": -2.810136079788208, "logits/rejected": -2.693380832672119, "logps/chosen": -486.0743103027344, "logps/rejected": -463.4774475097656, "loss": 0.0847, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -6.33536434173584, "rewards/margins": 9.718295097351074, "rewards/rejected": -16.053659439086914, "step": 13650 }, { "epoch": 2.65, "learning_rate": 6.446393902351333e-08, "logits/chosen": -2.5283124446868896, "logits/rejected": -2.4674930572509766, "logps/chosen": -228.8594970703125, "logps/rejected": -392.8613586425781, "loss": 0.0877, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -5.650363922119141, "rewards/margins": 11.017718315124512, "rewards/rejected": -16.668081283569336, "step": 13660 }, { "epoch": 2.65, "learning_rate": 6.410440785216079e-08, "logits/chosen": -2.524414539337158, "logits/rejected": -2.442152500152588, "logps/chosen": -270.5942077636719, "logps/rejected": -389.4601135253906, "loss": 0.0662, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -10.495218276977539, "rewards/margins": 13.683825492858887, "rewards/rejected": -24.17904281616211, "step": 13670 }, { "epoch": 2.66, "learning_rate": 6.374487668080822e-08, "logits/chosen": -2.6003825664520264, "logits/rejected": -2.6286122798919678, "logps/chosen": -243.40072631835938, "logps/rejected": -433.07403564453125, "loss": 0.0848, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -6.930796146392822, "rewards/margins": 18.22975730895996, "rewards/rejected": -25.16055679321289, "step": 13680 }, { "epoch": 2.66, "learning_rate": 6.338534550945567e-08, "logits/chosen": -2.459526300430298, "logits/rejected": -2.2914116382598877, "logps/chosen": -256.80401611328125, "logps/rejected": -368.97979736328125, "loss": 0.0496, "rewards/accuracies": 1.0, "rewards/chosen": -5.247989654541016, "rewards/margins": 16.199419021606445, "rewards/rejected": -21.447406768798828, "step": 13690 }, { "epoch": 2.66, "learning_rate": 6.302581433810311e-08, "logits/chosen": -2.480177640914917, "logits/rejected": -2.5111031532287598, "logps/chosen": -261.7010498046875, "logps/rejected": -458.08294677734375, "loss": 0.0848, "rewards/accuracies": 1.0, "rewards/chosen": -3.8917877674102783, "rewards/margins": 16.93520736694336, "rewards/rejected": -20.826993942260742, "step": 13700 }, { "epoch": 2.66, "eval_logits/chosen": -2.495516777038574, "eval_logits/rejected": -2.471787214279175, "eval_logps/chosen": -315.3421936035156, "eval_logps/rejected": -375.2400817871094, "eval_loss": 0.6501221656799316, "eval_rewards/accuracies": 0.7024999856948853, "eval_rewards/chosen": -12.110413551330566, "eval_rewards/margins": 8.221999168395996, "eval_rewards/rejected": -20.332412719726562, "eval_runtime": 141.4151, "eval_samples_per_second": 22.317, "eval_steps_per_second": 0.354, "step": 13700 }, { "epoch": 2.66, "learning_rate": 6.266628316675055e-08, "logits/chosen": -2.5079479217529297, "logits/rejected": -2.4605298042297363, "logps/chosen": -314.8881530761719, "logps/rejected": -456.40277099609375, "loss": 0.0828, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -8.168451309204102, "rewards/margins": 13.903965950012207, "rewards/rejected": -22.07241439819336, "step": 13710 }, { "epoch": 2.66, "learning_rate": 6.2306751995398e-08, "logits/chosen": -2.495927095413208, "logits/rejected": -2.5274269580841064, "logps/chosen": -259.0688171386719, "logps/rejected": -360.857666015625, "loss": 0.062, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -6.026102066040039, "rewards/margins": 13.894006729125977, "rewards/rejected": -19.920108795166016, "step": 13720 }, { "epoch": 2.67, "learning_rate": 6.194722082404545e-08, "logits/chosen": -2.5711963176727295, "logits/rejected": -2.526247024536133, "logps/chosen": -256.63238525390625, "logps/rejected": -403.71551513671875, "loss": 0.0447, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -6.342735290527344, "rewards/margins": 12.615774154663086, "rewards/rejected": -18.958511352539062, "step": 13730 }, { "epoch": 2.67, "learning_rate": 6.158768965269289e-08, "logits/chosen": -2.7091312408447266, "logits/rejected": -2.7500460147857666, "logps/chosen": -395.78179931640625, "logps/rejected": -448.81500244140625, "loss": 0.0805, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -6.350695610046387, "rewards/margins": 12.892354965209961, "rewards/rejected": -19.243051528930664, "step": 13740 }, { "epoch": 2.67, "learning_rate": 6.122815848134033e-08, "logits/chosen": -2.5087428092956543, "logits/rejected": -2.406790256500244, "logps/chosen": -275.4613342285156, "logps/rejected": -355.447509765625, "loss": 0.101, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -9.58680248260498, "rewards/margins": 11.131608963012695, "rewards/rejected": -20.71841049194336, "step": 13750 }, { "epoch": 2.67, "learning_rate": 6.086862730998777e-08, "logits/chosen": -2.542536497116089, "logits/rejected": -2.377361536026001, "logps/chosen": -266.2630310058594, "logps/rejected": -404.77392578125, "loss": 0.0893, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -7.279408931732178, "rewards/margins": 12.215632438659668, "rewards/rejected": -19.495040893554688, "step": 13760 }, { "epoch": 2.67, "learning_rate": 6.050909613863521e-08, "logits/chosen": -2.49064564704895, "logits/rejected": -2.488398551940918, "logps/chosen": -245.0917510986328, "logps/rejected": -366.75933837890625, "loss": 0.0649, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -10.33775806427002, "rewards/margins": 10.682060241699219, "rewards/rejected": -21.019817352294922, "step": 13770 }, { "epoch": 2.68, "learning_rate": 6.014956496728267e-08, "logits/chosen": -2.468036413192749, "logits/rejected": -2.4206693172454834, "logps/chosen": -279.8471984863281, "logps/rejected": -357.43023681640625, "loss": 0.064, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -8.859151840209961, "rewards/margins": 8.90800666809082, "rewards/rejected": -17.76715850830078, "step": 13780 }, { "epoch": 2.68, "learning_rate": 5.979003379593011e-08, "logits/chosen": -2.41125226020813, "logits/rejected": -2.4057607650756836, "logps/chosen": -302.2420959472656, "logps/rejected": -527.14208984375, "loss": 0.0629, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -5.266284942626953, "rewards/margins": 16.92730140686035, "rewards/rejected": -22.193584442138672, "step": 13790 }, { "epoch": 2.68, "learning_rate": 5.943050262457755e-08, "logits/chosen": -2.621866464614868, "logits/rejected": -2.6111340522766113, "logps/chosen": -347.26025390625, "logps/rejected": -494.755615234375, "loss": 0.0724, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -7.864444732666016, "rewards/margins": 14.596409797668457, "rewards/rejected": -22.46085548400879, "step": 13800 }, { "epoch": 2.68, "eval_logits/chosen": -2.5076606273651123, "eval_logits/rejected": -2.4851980209350586, "eval_logps/chosen": -317.7358093261719, "eval_logps/rejected": -375.9327697753906, "eval_loss": 0.6394022703170776, "eval_rewards/accuracies": 0.7024999856948853, "eval_rewards/chosen": -12.349776268005371, "eval_rewards/margins": 8.051904678344727, "eval_rewards/rejected": -20.40167999267578, "eval_runtime": 140.5761, "eval_samples_per_second": 22.45, "eval_steps_per_second": 0.356, "step": 13800 }, { "epoch": 2.68, "learning_rate": 5.907097145322499e-08, "logits/chosen": -2.4033799171447754, "logits/rejected": -2.4252820014953613, "logps/chosen": -291.6982116699219, "logps/rejected": -412.61224365234375, "loss": 0.0885, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -10.278039932250977, "rewards/margins": 11.062031745910645, "rewards/rejected": -21.340072631835938, "step": 13810 }, { "epoch": 2.68, "learning_rate": 5.871144028187244e-08, "logits/chosen": -2.5429272651672363, "logits/rejected": -2.42922043800354, "logps/chosen": -362.37518310546875, "logps/rejected": -408.1888732910156, "loss": 0.0805, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -9.56817626953125, "rewards/margins": 13.611679077148438, "rewards/rejected": -23.179853439331055, "step": 13820 }, { "epoch": 2.68, "learning_rate": 5.8351909110519886e-08, "logits/chosen": -2.604635238647461, "logits/rejected": -2.6315605640411377, "logps/chosen": -279.68341064453125, "logps/rejected": -385.7822570800781, "loss": 0.064, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -8.096555709838867, "rewards/margins": 13.717987060546875, "rewards/rejected": -21.81454086303711, "step": 13830 }, { "epoch": 2.69, "learning_rate": 5.799237793916732e-08, "logits/chosen": -2.633336305618286, "logits/rejected": -2.543452739715576, "logps/chosen": -286.7044677734375, "logps/rejected": -379.36456298828125, "loss": 0.0672, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -5.849825382232666, "rewards/margins": 18.00542449951172, "rewards/rejected": -23.855249404907227, "step": 13840 }, { "epoch": 2.69, "learning_rate": 5.763284676781477e-08, "logits/chosen": -2.496095657348633, "logits/rejected": -2.6102306842803955, "logps/chosen": -261.90948486328125, "logps/rejected": -398.29937744140625, "loss": 0.0534, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -9.025846481323242, "rewards/margins": 9.977482795715332, "rewards/rejected": -19.00333023071289, "step": 13850 }, { "epoch": 2.69, "learning_rate": 5.727331559646221e-08, "logits/chosen": -2.4396555423736572, "logits/rejected": -2.5189690589904785, "logps/chosen": -268.1959228515625, "logps/rejected": -459.5834045410156, "loss": 0.0711, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -8.469121932983398, "rewards/margins": 18.137006759643555, "rewards/rejected": -26.606128692626953, "step": 13860 }, { "epoch": 2.69, "learning_rate": 5.6913784425109657e-08, "logits/chosen": -2.403203010559082, "logits/rejected": -2.50626802444458, "logps/chosen": -234.3402862548828, "logps/rejected": -384.98468017578125, "loss": 0.0632, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -9.300580978393555, "rewards/margins": 11.250377655029297, "rewards/rejected": -20.55095863342285, "step": 13870 }, { "epoch": 2.69, "learning_rate": 5.65542532537571e-08, "logits/chosen": -2.5631232261657715, "logits/rejected": -2.540283679962158, "logps/chosen": -224.9944610595703, "logps/rejected": -361.48199462890625, "loss": 0.0817, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -5.571410179138184, "rewards/margins": 16.1390438079834, "rewards/rejected": -21.710453033447266, "step": 13880 }, { "epoch": 2.7, "learning_rate": 5.619472208240454e-08, "logits/chosen": -2.57092022895813, "logits/rejected": -2.511808156967163, "logps/chosen": -273.9009094238281, "logps/rejected": -377.3023986816406, "loss": 0.0749, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -8.2095308303833, "rewards/margins": 13.39879035949707, "rewards/rejected": -21.608320236206055, "step": 13890 }, { "epoch": 2.7, "learning_rate": 5.5835190911051986e-08, "logits/chosen": -2.5521140098571777, "logits/rejected": -2.5139479637145996, "logps/chosen": -292.22210693359375, "logps/rejected": -453.43310546875, "loss": 0.0735, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -12.644539833068848, "rewards/margins": 15.819013595581055, "rewards/rejected": -28.46355628967285, "step": 13900 }, { "epoch": 2.7, "eval_logits/chosen": -2.4578866958618164, "eval_logits/rejected": -2.4329802989959717, "eval_logps/chosen": -324.872802734375, "eval_logps/rejected": -386.6412353515625, "eval_loss": 0.6576498746871948, "eval_rewards/accuracies": 0.7049999833106995, "eval_rewards/chosen": -13.063472747802734, "eval_rewards/margins": 8.409056663513184, "eval_rewards/rejected": -21.4725284576416, "eval_runtime": 140.9434, "eval_samples_per_second": 22.392, "eval_steps_per_second": 0.355, "step": 13900 }, { "epoch": 2.7, "learning_rate": 5.547565973969943e-08, "logits/chosen": -2.6002488136291504, "logits/rejected": -2.5537025928497314, "logps/chosen": -273.44207763671875, "logps/rejected": -367.67156982421875, "loss": 0.1011, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -9.493548393249512, "rewards/margins": 9.718470573425293, "rewards/rejected": -19.212020874023438, "step": 13910 }, { "epoch": 2.7, "learning_rate": 5.5116128568346875e-08, "logits/chosen": -2.3440332412719727, "logits/rejected": -2.3300106525421143, "logps/chosen": -209.345947265625, "logps/rejected": -338.5674743652344, "loss": 0.081, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -13.825662612915039, "rewards/margins": 10.73725414276123, "rewards/rejected": -24.562911987304688, "step": 13920 }, { "epoch": 2.7, "learning_rate": 5.4756597396994316e-08, "logits/chosen": -2.546124219894409, "logits/rejected": -2.4810428619384766, "logps/chosen": -317.8944396972656, "logps/rejected": -444.1865234375, "loss": 0.0534, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -3.366611957550049, "rewards/margins": 13.541949272155762, "rewards/rejected": -16.90856170654297, "step": 13930 }, { "epoch": 2.71, "learning_rate": 5.4397066225641764e-08, "logits/chosen": -2.450314521789551, "logits/rejected": -2.4597702026367188, "logps/chosen": -285.3365173339844, "logps/rejected": -410.66046142578125, "loss": 0.073, "rewards/accuracies": 1.0, "rewards/chosen": -5.780367374420166, "rewards/margins": 17.328641891479492, "rewards/rejected": -23.109010696411133, "step": 13940 }, { "epoch": 2.71, "learning_rate": 5.4037535054289205e-08, "logits/chosen": -2.4047322273254395, "logits/rejected": -2.4492080211639404, "logps/chosen": -326.6347351074219, "logps/rejected": -538.2609252929688, "loss": 0.0784, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -9.95956039428711, "rewards/margins": 20.275020599365234, "rewards/rejected": -30.234582901000977, "step": 13950 }, { "epoch": 2.71, "learning_rate": 5.3678003882936646e-08, "logits/chosen": -2.460026502609253, "logits/rejected": -2.4137377738952637, "logps/chosen": -303.47589111328125, "logps/rejected": -330.0078125, "loss": 0.1096, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -7.023459434509277, "rewards/margins": 10.610163688659668, "rewards/rejected": -17.633625030517578, "step": 13960 }, { "epoch": 2.71, "learning_rate": 5.3318472711584094e-08, "logits/chosen": -2.5243473052978516, "logits/rejected": -2.5143184661865234, "logps/chosen": -266.847412109375, "logps/rejected": -472.657470703125, "loss": 0.0632, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -7.299185752868652, "rewards/margins": 18.07523536682129, "rewards/rejected": -25.374420166015625, "step": 13970 }, { "epoch": 2.71, "learning_rate": 5.2958941540231535e-08, "logits/chosen": -2.412656307220459, "logits/rejected": -2.2765166759490967, "logps/chosen": -369.3609313964844, "logps/rejected": -480.7267150878906, "loss": 0.0753, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -8.565628051757812, "rewards/margins": 18.540231704711914, "rewards/rejected": -27.105859756469727, "step": 13980 }, { "epoch": 2.72, "learning_rate": 5.259941036887898e-08, "logits/chosen": -2.423884630203247, "logits/rejected": -2.42649245262146, "logps/chosen": -296.9546203613281, "logps/rejected": -418.84246826171875, "loss": 0.0643, "rewards/accuracies": 1.0, "rewards/chosen": -5.2961201667785645, "rewards/margins": 15.501123428344727, "rewards/rejected": -20.7972412109375, "step": 13990 }, { "epoch": 2.72, "learning_rate": 5.2239879197526423e-08, "logits/chosen": -2.2800049781799316, "logits/rejected": -2.2325406074523926, "logps/chosen": -305.71868896484375, "logps/rejected": -476.806884765625, "loss": 0.0836, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -6.169608116149902, "rewards/margins": 18.147136688232422, "rewards/rejected": -24.316747665405273, "step": 14000 }, { "epoch": 2.72, "eval_logits/chosen": -2.4533159732818604, "eval_logits/rejected": -2.4284005165100098, "eval_logps/chosen": -321.3069152832031, "eval_logps/rejected": -380.9180603027344, "eval_loss": 0.6427257061004639, "eval_rewards/accuracies": 0.7049999833106995, "eval_rewards/chosen": -12.70688247680664, "eval_rewards/margins": 8.193329811096191, "eval_rewards/rejected": -20.90021324157715, "eval_runtime": 140.6044, "eval_samples_per_second": 22.446, "eval_steps_per_second": 0.356, "step": 14000 }, { "epoch": 2.72, "learning_rate": 5.1880348026173864e-08, "logits/chosen": -2.2966625690460205, "logits/rejected": -2.3146255016326904, "logps/chosen": -294.59881591796875, "logps/rejected": -519.9945678710938, "loss": 0.0586, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -10.670251846313477, "rewards/margins": 20.10184669494629, "rewards/rejected": -30.7720947265625, "step": 14010 }, { "epoch": 2.72, "learning_rate": 5.152081685482131e-08, "logits/chosen": -2.43430233001709, "logits/rejected": -2.4862494468688965, "logps/chosen": -319.15325927734375, "logps/rejected": -460.1707458496094, "loss": 0.0484, "rewards/accuracies": 1.0, "rewards/chosen": -2.3942723274230957, "rewards/margins": 16.40558433532715, "rewards/rejected": -18.799856185913086, "step": 14020 }, { "epoch": 2.72, "learning_rate": 5.116128568346875e-08, "logits/chosen": -2.4217216968536377, "logits/rejected": -2.3965320587158203, "logps/chosen": -292.21380615234375, "logps/rejected": -457.8392028808594, "loss": 0.0571, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -7.500149726867676, "rewards/margins": 12.409701347351074, "rewards/rejected": -19.90985107421875, "step": 14030 }, { "epoch": 2.73, "learning_rate": 5.08017545121162e-08, "logits/chosen": -2.5222690105438232, "logits/rejected": -2.5466718673706055, "logps/chosen": -343.06988525390625, "logps/rejected": -414.82318115234375, "loss": 0.0769, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -7.995353698730469, "rewards/margins": 13.598800659179688, "rewards/rejected": -21.594152450561523, "step": 14040 }, { "epoch": 2.73, "learning_rate": 5.0442223340763635e-08, "logits/chosen": -2.6274986267089844, "logits/rejected": -2.628922700881958, "logps/chosen": -313.6103210449219, "logps/rejected": -421.7408142089844, "loss": 0.0995, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -6.671439170837402, "rewards/margins": 9.996234893798828, "rewards/rejected": -16.667675018310547, "step": 14050 }, { "epoch": 2.73, "learning_rate": 5.008269216941108e-08, "logits/chosen": -2.5362377166748047, "logits/rejected": -2.470487117767334, "logps/chosen": -264.26251220703125, "logps/rejected": -457.0308532714844, "loss": 0.0884, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -7.933724880218506, "rewards/margins": 14.920491218566895, "rewards/rejected": -22.854215621948242, "step": 14060 }, { "epoch": 2.73, "learning_rate": 4.972316099805853e-08, "logits/chosen": -2.618799924850464, "logits/rejected": -2.589552402496338, "logps/chosen": -285.24468994140625, "logps/rejected": -407.5867614746094, "loss": 0.0707, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -6.739290714263916, "rewards/margins": 12.876055717468262, "rewards/rejected": -19.615345001220703, "step": 14070 }, { "epoch": 2.73, "learning_rate": 4.936362982670597e-08, "logits/chosen": -2.5255298614501953, "logits/rejected": -2.544981002807617, "logps/chosen": -282.092041015625, "logps/rejected": -420.1019592285156, "loss": 0.0859, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -7.3901495933532715, "rewards/margins": 14.408068656921387, "rewards/rejected": -21.798221588134766, "step": 14080 }, { "epoch": 2.74, "learning_rate": 4.900409865535342e-08, "logits/chosen": -2.2034499645233154, "logits/rejected": -2.360098361968994, "logps/chosen": -320.4710388183594, "logps/rejected": -554.2467651367188, "loss": 0.0663, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -4.402893543243408, "rewards/margins": 20.466861724853516, "rewards/rejected": -24.869754791259766, "step": 14090 }, { "epoch": 2.74, "learning_rate": 4.864456748400086e-08, "logits/chosen": -2.5468287467956543, "logits/rejected": -2.4685747623443604, "logps/chosen": -282.1744079589844, "logps/rejected": -353.25091552734375, "loss": 0.0647, "rewards/accuracies": 1.0, "rewards/chosen": -3.291839599609375, "rewards/margins": 14.212237358093262, "rewards/rejected": -17.504077911376953, "step": 14100 }, { "epoch": 2.74, "eval_logits/chosen": -2.4540998935699463, "eval_logits/rejected": -2.4287192821502686, "eval_logps/chosen": -318.9844055175781, "eval_logps/rejected": -378.7882080078125, "eval_loss": 0.6444785594940186, "eval_rewards/accuracies": 0.7099999785423279, "eval_rewards/chosen": -12.474638938903809, "eval_rewards/margins": 8.212586402893066, "eval_rewards/rejected": -20.68722152709961, "eval_runtime": 139.219, "eval_samples_per_second": 22.669, "eval_steps_per_second": 0.359, "step": 14100 }, { "epoch": 2.74, "learning_rate": 4.82850363126483e-08, "logits/chosen": -2.660529613494873, "logits/rejected": -2.6550545692443848, "logps/chosen": -290.40789794921875, "logps/rejected": -378.42462158203125, "loss": 0.0318, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -4.830068588256836, "rewards/margins": 11.361506462097168, "rewards/rejected": -16.19157600402832, "step": 14110 }, { "epoch": 2.74, "learning_rate": 4.792550514129575e-08, "logits/chosen": -2.360069513320923, "logits/rejected": -2.2975263595581055, "logps/chosen": -245.1154022216797, "logps/rejected": -372.20867919921875, "loss": 0.0574, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -9.051399230957031, "rewards/margins": 13.881940841674805, "rewards/rejected": -22.933340072631836, "step": 14120 }, { "epoch": 2.74, "learning_rate": 4.756597396994319e-08, "logits/chosen": -2.5174691677093506, "logits/rejected": -2.560690402984619, "logps/chosen": -306.4330749511719, "logps/rejected": -424.43280029296875, "loss": 0.0575, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -11.856558799743652, "rewards/margins": 12.972930908203125, "rewards/rejected": -24.829492568969727, "step": 14130 }, { "epoch": 2.75, "learning_rate": 4.720644279859064e-08, "logits/chosen": -2.5840866565704346, "logits/rejected": -2.532588481903076, "logps/chosen": -235.2403106689453, "logps/rejected": -357.3543701171875, "loss": 0.0707, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -7.2822160720825195, "rewards/margins": 13.625099182128906, "rewards/rejected": -20.90731430053711, "step": 14140 }, { "epoch": 2.75, "learning_rate": 4.684691162723808e-08, "logits/chosen": -2.5340611934661865, "logits/rejected": -2.538301944732666, "logps/chosen": -262.1207275390625, "logps/rejected": -438.6947326660156, "loss": 0.1069, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -5.790421485900879, "rewards/margins": 15.174471855163574, "rewards/rejected": -20.964893341064453, "step": 14150 }, { "epoch": 2.75, "learning_rate": 4.6487380455885527e-08, "logits/chosen": -2.373089551925659, "logits/rejected": -2.279886484146118, "logps/chosen": -240.9802703857422, "logps/rejected": -349.52069091796875, "loss": 0.0919, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -8.635541915893555, "rewards/margins": 9.87258529663086, "rewards/rejected": -18.508129119873047, "step": 14160 }, { "epoch": 2.75, "learning_rate": 4.612784928453297e-08, "logits/chosen": -2.5288782119750977, "logits/rejected": -2.490088939666748, "logps/chosen": -276.7637939453125, "logps/rejected": -387.97308349609375, "loss": 0.0731, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -5.757145881652832, "rewards/margins": 15.035905838012695, "rewards/rejected": -20.79305076599121, "step": 14170 }, { "epoch": 2.75, "learning_rate": 4.576831811318041e-08, "logits/chosen": -2.5628132820129395, "logits/rejected": -2.627485990524292, "logps/chosen": -257.33306884765625, "logps/rejected": -374.7657165527344, "loss": 0.0621, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -3.0018954277038574, "rewards/margins": 12.33110237121582, "rewards/rejected": -15.33299732208252, "step": 14180 }, { "epoch": 2.75, "learning_rate": 4.5408786941827856e-08, "logits/chosen": -2.4532923698425293, "logits/rejected": -2.4614763259887695, "logps/chosen": -241.8795166015625, "logps/rejected": -403.23516845703125, "loss": 0.0661, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -4.907286643981934, "rewards/margins": 15.930780410766602, "rewards/rejected": -20.83806610107422, "step": 14190 }, { "epoch": 2.76, "learning_rate": 4.50492557704753e-08, "logits/chosen": -2.714437961578369, "logits/rejected": -2.67118501663208, "logps/chosen": -394.26824951171875, "logps/rejected": -484.1063537597656, "loss": 0.0732, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.206911563873291, "rewards/margins": 17.77530860900879, "rewards/rejected": -19.982221603393555, "step": 14200 }, { "epoch": 2.76, "eval_logits/chosen": -2.451594591140747, "eval_logits/rejected": -2.427086114883423, "eval_logps/chosen": -310.6434326171875, "eval_logps/rejected": -369.8207092285156, "eval_loss": 0.651365339756012, "eval_rewards/accuracies": 0.7099999785423279, "eval_rewards/chosen": -11.640534400939941, "eval_rewards/margins": 8.149942398071289, "eval_rewards/rejected": -19.790475845336914, "eval_runtime": 140.0649, "eval_samples_per_second": 22.532, "eval_steps_per_second": 0.357, "step": 14200 }, { "epoch": 2.76, "learning_rate": 4.4689724599122745e-08, "logits/chosen": -2.5125339031219482, "logits/rejected": -2.4772400856018066, "logps/chosen": -290.7689208984375, "logps/rejected": -453.24267578125, "loss": 0.0621, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -8.635432243347168, "rewards/margins": 16.643768310546875, "rewards/rejected": -25.27920150756836, "step": 14210 }, { "epoch": 2.76, "learning_rate": 4.433019342777018e-08, "logits/chosen": -2.4146082401275635, "logits/rejected": -2.426396608352661, "logps/chosen": -310.6662292480469, "logps/rejected": -390.3843994140625, "loss": 0.0801, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -10.701229095458984, "rewards/margins": 11.596210479736328, "rewards/rejected": -22.297443389892578, "step": 14220 }, { "epoch": 2.76, "learning_rate": 4.397066225641763e-08, "logits/chosen": -2.708631992340088, "logits/rejected": -2.660353183746338, "logps/chosen": -370.0521240234375, "logps/rejected": -392.21044921875, "loss": 0.0561, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -4.522287368774414, "rewards/margins": 8.776540756225586, "rewards/rejected": -13.298826217651367, "step": 14230 }, { "epoch": 2.76, "learning_rate": 4.3611131085065075e-08, "logits/chosen": -2.3451640605926514, "logits/rejected": -2.228459119796753, "logps/chosen": -216.63671875, "logps/rejected": -359.85760498046875, "loss": 0.0713, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -9.85043716430664, "rewards/margins": 12.831878662109375, "rewards/rejected": -22.68231773376465, "step": 14240 }, { "epoch": 2.77, "learning_rate": 4.3251599913712516e-08, "logits/chosen": -2.4758808612823486, "logits/rejected": -2.4504141807556152, "logps/chosen": -301.84912109375, "logps/rejected": -440.341064453125, "loss": 0.0753, "rewards/accuracies": 1.0, "rewards/chosen": -5.745691776275635, "rewards/margins": 19.071266174316406, "rewards/rejected": -24.816957473754883, "step": 14250 }, { "epoch": 2.77, "learning_rate": 4.2892068742359964e-08, "logits/chosen": -2.447396993637085, "logits/rejected": -2.5197434425354004, "logps/chosen": -286.86614990234375, "logps/rejected": -557.3042602539062, "loss": 0.0847, "rewards/accuracies": 1.0, "rewards/chosen": -7.774168968200684, "rewards/margins": 26.642169952392578, "rewards/rejected": -34.416343688964844, "step": 14260 }, { "epoch": 2.77, "learning_rate": 4.25325375710074e-08, "logits/chosen": -2.426670789718628, "logits/rejected": -2.3398423194885254, "logps/chosen": -275.36669921875, "logps/rejected": -398.4294128417969, "loss": 0.0647, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -5.7752203941345215, "rewards/margins": 14.710901260375977, "rewards/rejected": -20.486120223999023, "step": 14270 }, { "epoch": 2.77, "learning_rate": 4.2173006399654846e-08, "logits/chosen": -2.654876470565796, "logits/rejected": -2.5388033390045166, "logps/chosen": -307.33367919921875, "logps/rejected": -427.08544921875, "loss": 0.0744, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -6.097901821136475, "rewards/margins": 14.917327880859375, "rewards/rejected": -21.01523208618164, "step": 14280 }, { "epoch": 2.77, "learning_rate": 4.1813475228302294e-08, "logits/chosen": -2.476609468460083, "logits/rejected": -2.4240591526031494, "logps/chosen": -317.2851867675781, "logps/rejected": -396.5350341796875, "loss": 0.0685, "rewards/accuracies": 1.0, "rewards/chosen": -1.9136959314346313, "rewards/margins": 15.125018119812012, "rewards/rejected": -17.038715362548828, "step": 14290 }, { "epoch": 2.78, "learning_rate": 4.1453944056949735e-08, "logits/chosen": -2.445927381515503, "logits/rejected": -2.392014265060425, "logps/chosen": -263.6961364746094, "logps/rejected": -442.0687561035156, "loss": 0.05, "rewards/accuracies": 1.0, "rewards/chosen": -5.59628963470459, "rewards/margins": 16.359914779663086, "rewards/rejected": -21.95620346069336, "step": 14300 }, { "epoch": 2.78, "eval_logits/chosen": -2.4207875728607178, "eval_logits/rejected": -2.3947505950927734, "eval_logps/chosen": -315.3155517578125, "eval_logps/rejected": -376.6313781738281, "eval_loss": 0.6599265933036804, "eval_rewards/accuracies": 0.7124999761581421, "eval_rewards/chosen": -12.107748985290527, "eval_rewards/margins": 8.363792419433594, "eval_rewards/rejected": -20.471540451049805, "eval_runtime": 139.4649, "eval_samples_per_second": 22.629, "eval_steps_per_second": 0.359, "step": 14300 }, { "epoch": 2.78, "learning_rate": 4.109441288559718e-08, "logits/chosen": -2.540937900543213, "logits/rejected": -2.498613119125366, "logps/chosen": -379.290771484375, "logps/rejected": -407.0986022949219, "loss": 0.0842, "rewards/accuracies": 0.75, "rewards/chosen": -10.659222602844238, "rewards/margins": 10.250799179077148, "rewards/rejected": -20.910022735595703, "step": 14310 }, { "epoch": 2.78, "learning_rate": 4.0734881714244623e-08, "logits/chosen": -2.5176444053649902, "logits/rejected": -2.5706937313079834, "logps/chosen": -297.27545166015625, "logps/rejected": -507.0318908691406, "loss": 0.0887, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -9.921635627746582, "rewards/margins": 15.953900337219238, "rewards/rejected": -25.875537872314453, "step": 14320 }, { "epoch": 2.78, "learning_rate": 4.0375350542892064e-08, "logits/chosen": -2.4668803215026855, "logits/rejected": -2.3793420791625977, "logps/chosen": -253.43984985351562, "logps/rejected": -444.0877990722656, "loss": 0.0847, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -8.362646102905273, "rewards/margins": 16.675519943237305, "rewards/rejected": -25.038164138793945, "step": 14330 }, { "epoch": 2.78, "learning_rate": 4.001581937153951e-08, "logits/chosen": -2.4649133682250977, "logits/rejected": -2.458911418914795, "logps/chosen": -280.91998291015625, "logps/rejected": -424.6297302246094, "loss": 0.0844, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -3.609391689300537, "rewards/margins": 15.765634536743164, "rewards/rejected": -19.37502670288086, "step": 14340 }, { "epoch": 2.79, "learning_rate": 3.965628820018695e-08, "logits/chosen": -2.443978786468506, "logits/rejected": -2.4482474327087402, "logps/chosen": -300.916748046875, "logps/rejected": -433.1073303222656, "loss": 0.0512, "rewards/accuracies": 1.0, "rewards/chosen": -8.690673828125, "rewards/margins": 14.432500839233398, "rewards/rejected": -23.1231746673584, "step": 14350 }, { "epoch": 2.79, "learning_rate": 3.92967570288344e-08, "logits/chosen": -2.597811698913574, "logits/rejected": -2.5616354942321777, "logps/chosen": -341.90643310546875, "logps/rejected": -461.83367919921875, "loss": 0.0363, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -7.019021511077881, "rewards/margins": 12.094686508178711, "rewards/rejected": -19.113710403442383, "step": 14360 }, { "epoch": 2.79, "learning_rate": 3.893722585748184e-08, "logits/chosen": -2.4906532764434814, "logits/rejected": -2.547138214111328, "logps/chosen": -245.5009002685547, "logps/rejected": -529.5654296875, "loss": 0.0681, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -7.885739326477051, "rewards/margins": 19.002941131591797, "rewards/rejected": -26.888687133789062, "step": 14370 }, { "epoch": 2.79, "learning_rate": 3.857769468612928e-08, "logits/chosen": -2.509801149368286, "logits/rejected": -2.487269639968872, "logps/chosen": -312.53515625, "logps/rejected": -466.1974182128906, "loss": 0.0575, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -9.757012367248535, "rewards/margins": 15.759994506835938, "rewards/rejected": -25.517005920410156, "step": 14380 }, { "epoch": 2.79, "learning_rate": 3.821816351477673e-08, "logits/chosen": -2.565678358078003, "logits/rejected": -2.5031778812408447, "logps/chosen": -271.2328796386719, "logps/rejected": -356.1014709472656, "loss": 0.0878, "rewards/accuracies": 0.75, "rewards/chosen": -11.773752212524414, "rewards/margins": 11.247282028198242, "rewards/rejected": -23.021032333374023, "step": 14390 }, { "epoch": 2.8, "learning_rate": 3.785863234342417e-08, "logits/chosen": -2.51530122756958, "logits/rejected": -2.491840124130249, "logps/chosen": -230.9707489013672, "logps/rejected": -337.8825378417969, "loss": 0.0881, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -9.354460716247559, "rewards/margins": 12.154481887817383, "rewards/rejected": -21.50894546508789, "step": 14400 }, { "epoch": 2.8, "eval_logits/chosen": -2.439577102661133, "eval_logits/rejected": -2.4138731956481934, "eval_logps/chosen": -310.17938232421875, "eval_logps/rejected": -367.84320068359375, "eval_loss": 0.6584843397140503, "eval_rewards/accuracies": 0.7149999737739563, "eval_rewards/chosen": -11.594132423400879, "eval_rewards/margins": 7.998593330383301, "eval_rewards/rejected": -19.59272575378418, "eval_runtime": 141.5963, "eval_samples_per_second": 22.289, "eval_steps_per_second": 0.353, "step": 14400 }, { "epoch": 2.8, "learning_rate": 3.749910117207162e-08, "logits/chosen": -2.5827152729034424, "logits/rejected": -2.5437204837799072, "logps/chosen": -325.27362060546875, "logps/rejected": -435.84576416015625, "loss": 0.0454, "rewards/accuracies": 1.0, "rewards/chosen": -2.840313673019409, "rewards/margins": 11.336217880249023, "rewards/rejected": -14.176533699035645, "step": 14410 }, { "epoch": 2.8, "learning_rate": 3.713957000071906e-08, "logits/chosen": -2.6532137393951416, "logits/rejected": -2.5596745014190674, "logps/chosen": -307.2431640625, "logps/rejected": -452.42010498046875, "loss": 0.0455, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -3.3371644020080566, "rewards/margins": 17.32927703857422, "rewards/rejected": -20.666439056396484, "step": 14420 }, { "epoch": 2.8, "learning_rate": 3.678003882936651e-08, "logits/chosen": -2.470896005630493, "logits/rejected": -2.4679017066955566, "logps/chosen": -242.60079956054688, "logps/rejected": -369.19329833984375, "loss": 0.0766, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -10.54631233215332, "rewards/margins": 14.59411907196045, "rewards/rejected": -25.140432357788086, "step": 14430 }, { "epoch": 2.8, "learning_rate": 3.642050765801394e-08, "logits/chosen": -2.413461446762085, "logits/rejected": -2.347374677658081, "logps/chosen": -180.55453491210938, "logps/rejected": -343.64813232421875, "loss": 0.0664, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -6.007493495941162, "rewards/margins": 12.735737800598145, "rewards/rejected": -18.74323081970215, "step": 14440 }, { "epoch": 2.81, "learning_rate": 3.606097648666139e-08, "logits/chosen": -2.5840563774108887, "logits/rejected": -2.5611732006073, "logps/chosen": -302.6882019042969, "logps/rejected": -461.46966552734375, "loss": 0.0433, "rewards/accuracies": 1.0, "rewards/chosen": -4.802069664001465, "rewards/margins": 14.225900650024414, "rewards/rejected": -19.027971267700195, "step": 14450 }, { "epoch": 2.81, "learning_rate": 3.570144531530884e-08, "logits/chosen": -2.698560953140259, "logits/rejected": -2.543132781982422, "logps/chosen": -253.9922637939453, "logps/rejected": -451.609130859375, "loss": 0.0713, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -6.911755561828613, "rewards/margins": 14.107965469360352, "rewards/rejected": -21.019718170166016, "step": 14460 }, { "epoch": 2.81, "learning_rate": 3.534191414395628e-08, "logits/chosen": -2.545153856277466, "logits/rejected": -2.606476068496704, "logps/chosen": -266.3321228027344, "logps/rejected": -408.425048828125, "loss": 0.053, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -4.250962257385254, "rewards/margins": 17.035131454467773, "rewards/rejected": -21.28609275817871, "step": 14470 }, { "epoch": 2.81, "learning_rate": 3.4982382972603727e-08, "logits/chosen": -2.0988423824310303, "logits/rejected": -2.0718460083007812, "logps/chosen": -222.11117553710938, "logps/rejected": -371.1849670410156, "loss": 0.0765, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -9.412771224975586, "rewards/margins": 14.51530933380127, "rewards/rejected": -23.928081512451172, "step": 14480 }, { "epoch": 2.81, "learning_rate": 3.462285180125116e-08, "logits/chosen": -2.4497971534729004, "logits/rejected": -2.420116662979126, "logps/chosen": -317.0908203125, "logps/rejected": -419.55206298828125, "loss": 0.0707, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -8.512151718139648, "rewards/margins": 14.144830703735352, "rewards/rejected": -22.656982421875, "step": 14490 }, { "epoch": 2.81, "learning_rate": 3.426332062989861e-08, "logits/chosen": -2.499769926071167, "logits/rejected": -2.5119802951812744, "logps/chosen": -295.19549560546875, "logps/rejected": -394.10443115234375, "loss": 0.0992, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -11.128557205200195, "rewards/margins": 13.180562973022461, "rewards/rejected": -24.309118270874023, "step": 14500 }, { "epoch": 2.81, "eval_logits/chosen": -2.4285218715667725, "eval_logits/rejected": -2.4018208980560303, "eval_logps/chosen": -313.8076477050781, "eval_logps/rejected": -373.9247741699219, "eval_loss": 0.6616764068603516, "eval_rewards/accuracies": 0.7074999809265137, "eval_rewards/chosen": -11.956953048706055, "eval_rewards/margins": 8.243927955627441, "eval_rewards/rejected": -20.200881958007812, "eval_runtime": 139.1952, "eval_samples_per_second": 22.673, "eval_steps_per_second": 0.359, "step": 14500 }, { "epoch": 2.82, "learning_rate": 3.3903789458546056e-08, "logits/chosen": -2.5374388694763184, "logits/rejected": -2.3807623386383057, "logps/chosen": -289.66064453125, "logps/rejected": -416.8006286621094, "loss": 0.085, "rewards/accuracies": 1.0, "rewards/chosen": -4.805605411529541, "rewards/margins": 16.766782760620117, "rewards/rejected": -21.572389602661133, "step": 14510 }, { "epoch": 2.82, "learning_rate": 3.35442582871935e-08, "logits/chosen": -2.7303237915039062, "logits/rejected": -2.5013201236724854, "logps/chosen": -336.2086486816406, "logps/rejected": -450.80828857421875, "loss": 0.0808, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -7.56033992767334, "rewards/margins": 16.93686866760254, "rewards/rejected": -24.497209548950195, "step": 14520 }, { "epoch": 2.82, "learning_rate": 3.3184727115840945e-08, "logits/chosen": -2.517124891281128, "logits/rejected": -2.518655776977539, "logps/chosen": -305.09783935546875, "logps/rejected": -508.779052734375, "loss": 0.0651, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -10.57075309753418, "rewards/margins": 16.389467239379883, "rewards/rejected": -26.960220336914062, "step": 14530 }, { "epoch": 2.82, "learning_rate": 3.2825195944488386e-08, "logits/chosen": -2.4062769412994385, "logits/rejected": -2.377316951751709, "logps/chosen": -275.3218994140625, "logps/rejected": -393.46405029296875, "loss": 0.0653, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -6.1832356452941895, "rewards/margins": 13.256990432739258, "rewards/rejected": -19.440227508544922, "step": 14540 }, { "epoch": 2.82, "learning_rate": 3.246566477313583e-08, "logits/chosen": -2.5116472244262695, "logits/rejected": -2.500929355621338, "logps/chosen": -341.635498046875, "logps/rejected": -520.7948608398438, "loss": 0.0544, "rewards/accuracies": 1.0, "rewards/chosen": -5.789725303649902, "rewards/margins": 19.73894500732422, "rewards/rejected": -25.528671264648438, "step": 14550 }, { "epoch": 2.83, "learning_rate": 3.2106133601783275e-08, "logits/chosen": -2.4713847637176514, "logits/rejected": -2.4499380588531494, "logps/chosen": -265.95623779296875, "logps/rejected": -478.7012634277344, "loss": 0.114, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -10.011308670043945, "rewards/margins": 19.960142135620117, "rewards/rejected": -29.971450805664062, "step": 14560 }, { "epoch": 2.83, "learning_rate": 3.1746602430430716e-08, "logits/chosen": -2.483971118927002, "logits/rejected": -2.351804733276367, "logps/chosen": -212.7343292236328, "logps/rejected": -286.5941467285156, "loss": 0.0718, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -5.807860374450684, "rewards/margins": 11.958721160888672, "rewards/rejected": -17.76658058166504, "step": 14570 }, { "epoch": 2.83, "learning_rate": 3.1387071259078164e-08, "logits/chosen": -2.4391541481018066, "logits/rejected": -2.3376450538635254, "logps/chosen": -275.1197509765625, "logps/rejected": -363.12860107421875, "loss": 0.0626, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -4.140986442565918, "rewards/margins": 11.326373100280762, "rewards/rejected": -15.46735954284668, "step": 14580 }, { "epoch": 2.83, "learning_rate": 3.1027540087725605e-08, "logits/chosen": -2.3122918605804443, "logits/rejected": -2.3308868408203125, "logps/chosen": -328.8714294433594, "logps/rejected": -428.346435546875, "loss": 0.0823, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -13.11317253112793, "rewards/margins": 11.919618606567383, "rewards/rejected": -25.03278923034668, "step": 14590 }, { "epoch": 2.83, "learning_rate": 3.0668008916373046e-08, "logits/chosen": -2.377321720123291, "logits/rejected": -2.3733105659484863, "logps/chosen": -343.38348388671875, "logps/rejected": -471.52703857421875, "loss": 0.0582, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -4.647674560546875, "rewards/margins": 15.033966064453125, "rewards/rejected": -19.681642532348633, "step": 14600 }, { "epoch": 2.83, "eval_logits/chosen": -2.42386794090271, "eval_logits/rejected": -2.396596670150757, "eval_logps/chosen": -318.4825134277344, "eval_logps/rejected": -381.66265869140625, "eval_loss": 0.6692880988121033, "eval_rewards/accuracies": 0.7024999856948853, "eval_rewards/chosen": -12.424447059631348, "eval_rewards/margins": 8.550222396850586, "eval_rewards/rejected": -20.97467041015625, "eval_runtime": 139.7094, "eval_samples_per_second": 22.59, "eval_steps_per_second": 0.358, "step": 14600 }, { "epoch": 2.84, "learning_rate": 3.0308477745020494e-08, "logits/chosen": -2.5730843544006348, "logits/rejected": -2.4685885906219482, "logps/chosen": -201.51568603515625, "logps/rejected": -315.1352844238281, "loss": 0.0707, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -5.461834907531738, "rewards/margins": 10.430822372436523, "rewards/rejected": -15.892657279968262, "step": 14610 }, { "epoch": 2.84, "learning_rate": 2.9948946573667935e-08, "logits/chosen": -2.516209840774536, "logits/rejected": -2.4631283283233643, "logps/chosen": -323.6180725097656, "logps/rejected": -402.645263671875, "loss": 0.0629, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -5.655837535858154, "rewards/margins": 10.929776191711426, "rewards/rejected": -16.585613250732422, "step": 14620 }, { "epoch": 2.84, "learning_rate": 2.958941540231538e-08, "logits/chosen": -2.319056987762451, "logits/rejected": -2.2761974334716797, "logps/chosen": -247.5626678466797, "logps/rejected": -394.25830078125, "loss": 0.0491, "rewards/accuracies": 1.0, "rewards/chosen": -7.647191524505615, "rewards/margins": 16.012495040893555, "rewards/rejected": -23.659687042236328, "step": 14630 }, { "epoch": 2.84, "learning_rate": 2.922988423096282e-08, "logits/chosen": -2.4223062992095947, "logits/rejected": -2.3641715049743652, "logps/chosen": -291.48016357421875, "logps/rejected": -394.8805847167969, "loss": 0.0702, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -7.563333988189697, "rewards/margins": 13.369488716125488, "rewards/rejected": -20.932823181152344, "step": 14640 }, { "epoch": 2.84, "learning_rate": 2.8870353059610268e-08, "logits/chosen": -2.4811291694641113, "logits/rejected": -2.3399858474731445, "logps/chosen": -217.7340850830078, "logps/rejected": -352.4710388183594, "loss": 0.0988, "rewards/accuracies": 0.75, "rewards/chosen": -8.096497535705566, "rewards/margins": 14.886590957641602, "rewards/rejected": -22.983089447021484, "step": 14650 }, { "epoch": 2.85, "learning_rate": 2.8510821888257712e-08, "logits/chosen": -2.580836057662964, "logits/rejected": -2.4918313026428223, "logps/chosen": -317.5665588378906, "logps/rejected": -344.79632568359375, "loss": 0.0719, "rewards/accuracies": 1.0, "rewards/chosen": -7.9686279296875, "rewards/margins": 13.31214714050293, "rewards/rejected": -21.280776977539062, "step": 14660 }, { "epoch": 2.85, "learning_rate": 2.8151290716905153e-08, "logits/chosen": -2.408627986907959, "logits/rejected": -2.405900716781616, "logps/chosen": -267.0467529296875, "logps/rejected": -408.45208740234375, "loss": 0.0795, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -10.903483390808105, "rewards/margins": 18.04708480834961, "rewards/rejected": -28.950571060180664, "step": 14670 }, { "epoch": 2.85, "learning_rate": 2.7791759545552598e-08, "logits/chosen": -2.472996234893799, "logits/rejected": -2.3849222660064697, "logps/chosen": -350.3518981933594, "logps/rejected": -338.9212646484375, "loss": 0.0838, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -6.101327896118164, "rewards/margins": 11.6890287399292, "rewards/rejected": -17.790355682373047, "step": 14680 }, { "epoch": 2.85, "learning_rate": 2.7432228374200042e-08, "logits/chosen": -2.744760513305664, "logits/rejected": -2.6889748573303223, "logps/chosen": -425.40118408203125, "logps/rejected": -422.3814392089844, "loss": 0.0791, "rewards/accuracies": 1.0, "rewards/chosen": -5.307501792907715, "rewards/margins": 12.430013656616211, "rewards/rejected": -17.73751449584961, "step": 14690 }, { "epoch": 2.85, "learning_rate": 2.7072697202847486e-08, "logits/chosen": -2.524183750152588, "logits/rejected": -2.4891419410705566, "logps/chosen": -275.86749267578125, "logps/rejected": -376.0390930175781, "loss": 0.0536, "rewards/accuracies": 1.0, "rewards/chosen": -3.14548921585083, "rewards/margins": 11.856428146362305, "rewards/rejected": -15.001917839050293, "step": 14700 }, { "epoch": 2.85, "eval_logits/chosen": -2.4053714275360107, "eval_logits/rejected": -2.3763649463653564, "eval_logps/chosen": -319.3457946777344, "eval_logps/rejected": -383.76605224609375, "eval_loss": 0.6742444634437561, "eval_rewards/accuracies": 0.7024999856948853, "eval_rewards/chosen": -12.51076889038086, "eval_rewards/margins": 8.674240112304688, "eval_rewards/rejected": -21.185009002685547, "eval_runtime": 142.1209, "eval_samples_per_second": 22.206, "eval_steps_per_second": 0.352, "step": 14700 }, { "epoch": 2.86, "learning_rate": 2.671316603149493e-08, "logits/chosen": -2.3960258960723877, "logits/rejected": -2.2449076175689697, "logps/chosen": -212.5515899658203, "logps/rejected": -299.7181091308594, "loss": 0.0874, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -8.225173950195312, "rewards/margins": 10.981122970581055, "rewards/rejected": -19.206296920776367, "step": 14710 }, { "epoch": 2.86, "learning_rate": 2.6353634860142375e-08, "logits/chosen": -2.500899076461792, "logits/rejected": -2.508479118347168, "logps/chosen": -271.1492919921875, "logps/rejected": -394.25018310546875, "loss": 0.0627, "rewards/accuracies": 1.0, "rewards/chosen": -4.097317695617676, "rewards/margins": 12.225263595581055, "rewards/rejected": -16.322580337524414, "step": 14720 }, { "epoch": 2.86, "learning_rate": 2.5994103688789816e-08, "logits/chosen": -2.5822582244873047, "logits/rejected": -2.525808095932007, "logps/chosen": -334.08197021484375, "logps/rejected": -461.976318359375, "loss": 0.0515, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -5.583234786987305, "rewards/margins": 19.960254669189453, "rewards/rejected": -25.54349136352539, "step": 14730 }, { "epoch": 2.86, "learning_rate": 2.563457251743726e-08, "logits/chosen": -2.625471830368042, "logits/rejected": -2.450768232345581, "logps/chosen": -317.8865051269531, "logps/rejected": -381.4195861816406, "loss": 0.0533, "rewards/accuracies": 1.0, "rewards/chosen": -2.570221424102783, "rewards/margins": 12.633634567260742, "rewards/rejected": -15.203857421875, "step": 14740 }, { "epoch": 2.86, "learning_rate": 2.52750413460847e-08, "logits/chosen": -2.3523027896881104, "logits/rejected": -2.3602569103240967, "logps/chosen": -268.3021240234375, "logps/rejected": -550.2041625976562, "loss": 0.0771, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -8.813868522644043, "rewards/margins": 16.246450424194336, "rewards/rejected": -25.06032371520996, "step": 14750 }, { "epoch": 2.87, "learning_rate": 2.491551017473215e-08, "logits/chosen": -2.4621942043304443, "logits/rejected": -2.552905797958374, "logps/chosen": -332.30352783203125, "logps/rejected": -443.5380859375, "loss": 0.0986, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -4.553974628448486, "rewards/margins": 16.756128311157227, "rewards/rejected": -21.310102462768555, "step": 14760 }, { "epoch": 2.87, "learning_rate": 2.4555979003379594e-08, "logits/chosen": -2.5724241733551025, "logits/rejected": -2.546224594116211, "logps/chosen": -335.9853820800781, "logps/rejected": -461.4374084472656, "loss": 0.0539, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -8.356352806091309, "rewards/margins": 13.373123168945312, "rewards/rejected": -21.729475021362305, "step": 14770 }, { "epoch": 2.87, "learning_rate": 2.4196447832027035e-08, "logits/chosen": -2.541684150695801, "logits/rejected": -2.440366744995117, "logps/chosen": -338.27825927734375, "logps/rejected": -430.75390625, "loss": 0.0773, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -8.635833740234375, "rewards/margins": 15.04316520690918, "rewards/rejected": -23.678998947143555, "step": 14780 }, { "epoch": 2.87, "learning_rate": 2.383691666067448e-08, "logits/chosen": -2.4136452674865723, "logits/rejected": -2.4098598957061768, "logps/chosen": -364.2322082519531, "logps/rejected": -468.85614013671875, "loss": 0.0526, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -8.864326477050781, "rewards/margins": 16.317676544189453, "rewards/rejected": -25.1820011138916, "step": 14790 }, { "epoch": 2.87, "learning_rate": 2.3477385489321923e-08, "logits/chosen": -2.3216519355773926, "logits/rejected": -2.349653720855713, "logps/chosen": -245.6371612548828, "logps/rejected": -387.172119140625, "loss": 0.0615, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -7.777538299560547, "rewards/margins": 14.355929374694824, "rewards/rejected": -22.133466720581055, "step": 14800 }, { "epoch": 2.87, "eval_logits/chosen": -2.411236047744751, "eval_logits/rejected": -2.3824241161346436, "eval_logps/chosen": -322.263671875, "eval_logps/rejected": -387.9561767578125, "eval_loss": 0.677561342716217, "eval_rewards/accuracies": 0.7024999856948853, "eval_rewards/chosen": -12.802559852600098, "eval_rewards/margins": 8.801458358764648, "eval_rewards/rejected": -21.604019165039062, "eval_runtime": 138.7975, "eval_samples_per_second": 22.738, "eval_steps_per_second": 0.36, "step": 14800 }, { "epoch": 2.88, "learning_rate": 2.3117854317969364e-08, "logits/chosen": -2.461608409881592, "logits/rejected": -2.433798313140869, "logps/chosen": -277.7593688964844, "logps/rejected": -433.9398498535156, "loss": 0.0881, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -11.632577896118164, "rewards/margins": 16.77115249633789, "rewards/rejected": -28.403732299804688, "step": 14810 }, { "epoch": 2.88, "learning_rate": 2.2758323146616812e-08, "logits/chosen": -2.4626240730285645, "logits/rejected": -2.34547758102417, "logps/chosen": -213.6193084716797, "logps/rejected": -473.6609802246094, "loss": 0.0653, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -6.824320316314697, "rewards/margins": 19.59589195251465, "rewards/rejected": -26.420211791992188, "step": 14820 }, { "epoch": 2.88, "learning_rate": 2.2398791975264256e-08, "logits/chosen": -2.5313308238983154, "logits/rejected": -2.52459716796875, "logps/chosen": -347.76080322265625, "logps/rejected": -450.2403869628906, "loss": 0.0625, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -5.659457206726074, "rewards/margins": 13.534477233886719, "rewards/rejected": -19.193933486938477, "step": 14830 }, { "epoch": 2.88, "learning_rate": 2.2039260803911698e-08, "logits/chosen": -2.394092082977295, "logits/rejected": -2.3857369422912598, "logps/chosen": -261.8539123535156, "logps/rejected": -476.70391845703125, "loss": 0.0487, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -9.905121803283691, "rewards/margins": 19.592878341674805, "rewards/rejected": -29.498001098632812, "step": 14840 }, { "epoch": 2.88, "learning_rate": 2.1679729632559142e-08, "logits/chosen": -2.481632947921753, "logits/rejected": -2.4351515769958496, "logps/chosen": -244.8082733154297, "logps/rejected": -385.21600341796875, "loss": 0.0747, "rewards/accuracies": 1.0, "rewards/chosen": -6.0515594482421875, "rewards/margins": 17.415645599365234, "rewards/rejected": -23.467206954956055, "step": 14850 }, { "epoch": 2.88, "learning_rate": 2.1320198461206583e-08, "logits/chosen": -2.3605756759643555, "logits/rejected": -2.2788777351379395, "logps/chosen": -301.1482849121094, "logps/rejected": -394.51361083984375, "loss": 0.07, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -5.805695533752441, "rewards/margins": 15.642021179199219, "rewards/rejected": -21.447715759277344, "step": 14860 }, { "epoch": 2.89, "learning_rate": 2.096066728985403e-08, "logits/chosen": -2.451914072036743, "logits/rejected": -2.4734387397766113, "logps/chosen": -223.73916625976562, "logps/rejected": -371.39453125, "loss": 0.0802, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -8.518346786499023, "rewards/margins": 13.512170791625977, "rewards/rejected": -22.030515670776367, "step": 14870 }, { "epoch": 2.89, "learning_rate": 2.0601136118501475e-08, "logits/chosen": -2.565833806991577, "logits/rejected": -2.570472240447998, "logps/chosen": -260.41375732421875, "logps/rejected": -439.21343994140625, "loss": 0.0969, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -10.041805267333984, "rewards/margins": 16.768009185791016, "rewards/rejected": -26.809818267822266, "step": 14880 }, { "epoch": 2.89, "learning_rate": 2.0241604947148916e-08, "logits/chosen": -2.5179436206817627, "logits/rejected": -2.4361252784729004, "logps/chosen": -224.28073120117188, "logps/rejected": -414.78326416015625, "loss": 0.0742, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -4.672027111053467, "rewards/margins": 15.319323539733887, "rewards/rejected": -19.991352081298828, "step": 14890 }, { "epoch": 2.89, "learning_rate": 1.988207377579636e-08, "logits/chosen": -2.623081684112549, "logits/rejected": -2.6000635623931885, "logps/chosen": -258.5315246582031, "logps/rejected": -449.21881103515625, "loss": 0.0532, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -7.813483238220215, "rewards/margins": 13.49165153503418, "rewards/rejected": -21.305133819580078, "step": 14900 }, { "epoch": 2.89, "eval_logits/chosen": -2.413811206817627, "eval_logits/rejected": -2.3852202892303467, "eval_logps/chosen": -324.2155456542969, "eval_logps/rejected": -390.41668701171875, "eval_loss": 0.6768919229507446, "eval_rewards/accuracies": 0.7024999856948853, "eval_rewards/chosen": -12.997749328613281, "eval_rewards/margins": 8.852324485778809, "eval_rewards/rejected": -21.850074768066406, "eval_runtime": 138.8929, "eval_samples_per_second": 22.723, "eval_steps_per_second": 0.36, "step": 14900 }, { "epoch": 2.89, "learning_rate": 1.9522542604443805e-08, "logits/chosen": -2.52410626411438, "logits/rejected": -2.432678699493408, "logps/chosen": -259.6901550292969, "logps/rejected": -376.5718078613281, "loss": 0.0363, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -8.789159774780273, "rewards/margins": 13.336712837219238, "rewards/rejected": -22.125871658325195, "step": 14910 }, { "epoch": 2.9, "learning_rate": 1.9163011433091246e-08, "logits/chosen": -2.556885242462158, "logits/rejected": -2.496314525604248, "logps/chosen": -292.65008544921875, "logps/rejected": -459.6458435058594, "loss": 0.0792, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.866762161254883, "rewards/margins": 16.4494686126709, "rewards/rejected": -19.31622886657715, "step": 14920 }, { "epoch": 2.9, "learning_rate": 1.8803480261738694e-08, "logits/chosen": -2.4584484100341797, "logits/rejected": -2.449239730834961, "logps/chosen": -236.3944854736328, "logps/rejected": -328.36199951171875, "loss": 0.0751, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -5.435039043426514, "rewards/margins": 11.45154857635498, "rewards/rejected": -16.8865909576416, "step": 14930 }, { "epoch": 2.9, "learning_rate": 1.8443949090386138e-08, "logits/chosen": -2.2923905849456787, "logits/rejected": -2.401318073272705, "logps/chosen": -293.0608215332031, "logps/rejected": -418.92205810546875, "loss": 0.0745, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -11.43484878540039, "rewards/margins": 13.684107780456543, "rewards/rejected": -25.118955612182617, "step": 14940 }, { "epoch": 2.9, "learning_rate": 1.808441791903358e-08, "logits/chosen": -2.504848003387451, "logits/rejected": -2.554367780685425, "logps/chosen": -334.94842529296875, "logps/rejected": -376.92767333984375, "loss": 0.0723, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -7.64829158782959, "rewards/margins": 11.120731353759766, "rewards/rejected": -18.769023895263672, "step": 14950 }, { "epoch": 2.9, "learning_rate": 1.7724886747681023e-08, "logits/chosen": -2.5695011615753174, "logits/rejected": -2.4521443843841553, "logps/chosen": -296.80694580078125, "logps/rejected": -420.30145263671875, "loss": 0.0621, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -7.00919246673584, "rewards/margins": 18.143131256103516, "rewards/rejected": -25.152324676513672, "step": 14960 }, { "epoch": 2.91, "learning_rate": 1.7365355576328464e-08, "logits/chosen": -2.5077388286590576, "logits/rejected": -2.5159683227539062, "logps/chosen": -313.39990234375, "logps/rejected": -466.9717712402344, "loss": 0.0464, "rewards/accuracies": 1.0, "rewards/chosen": -6.881387233734131, "rewards/margins": 19.802248001098633, "rewards/rejected": -26.68363380432129, "step": 14970 }, { "epoch": 2.91, "learning_rate": 1.700582440497591e-08, "logits/chosen": -2.465104103088379, "logits/rejected": -2.402278184890747, "logps/chosen": -335.5147705078125, "logps/rejected": -430.0679626464844, "loss": 0.078, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -4.292757987976074, "rewards/margins": 12.162251472473145, "rewards/rejected": -16.45500946044922, "step": 14980 }, { "epoch": 2.91, "learning_rate": 1.6646293233623356e-08, "logits/chosen": -2.5866243839263916, "logits/rejected": -2.520380973815918, "logps/chosen": -319.72161865234375, "logps/rejected": -426.16278076171875, "loss": 0.0895, "rewards/accuracies": 0.75, "rewards/chosen": -11.24250602722168, "rewards/margins": 11.0344820022583, "rewards/rejected": -22.276988983154297, "step": 14990 }, { "epoch": 2.91, "learning_rate": 1.6286762062270798e-08, "logits/chosen": -2.519317150115967, "logits/rejected": -2.5784876346588135, "logps/chosen": -334.0810852050781, "logps/rejected": -473.9246520996094, "loss": 0.0742, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -9.33923053741455, "rewards/margins": 15.856913566589355, "rewards/rejected": -25.196144104003906, "step": 15000 }, { "epoch": 2.91, "eval_logits/chosen": -2.4096627235412598, "eval_logits/rejected": -2.3806543350219727, "eval_logps/chosen": -327.2182312011719, "eval_logps/rejected": -394.3062744140625, "eval_loss": 0.6786009073257446, "eval_rewards/accuracies": 0.6949999928474426, "eval_rewards/chosen": -13.29802131652832, "eval_rewards/margins": 8.941009521484375, "eval_rewards/rejected": -22.239030838012695, "eval_runtime": 141.0306, "eval_samples_per_second": 22.378, "eval_steps_per_second": 0.355, "step": 15000 }, { "epoch": 2.91, "learning_rate": 1.5927230890918242e-08, "logits/chosen": -2.4977755546569824, "logits/rejected": -2.545881748199463, "logps/chosen": -376.63629150390625, "logps/rejected": -472.7349548339844, "loss": 0.087, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -6.123446464538574, "rewards/margins": 14.990835189819336, "rewards/rejected": -21.11428451538086, "step": 15010 }, { "epoch": 2.92, "learning_rate": 1.5567699719565686e-08, "logits/chosen": -2.560659885406494, "logits/rejected": -2.509192943572998, "logps/chosen": -285.62042236328125, "logps/rejected": -486.3465881347656, "loss": 0.049, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -4.968367576599121, "rewards/margins": 15.235417366027832, "rewards/rejected": -20.20378303527832, "step": 15020 }, { "epoch": 2.92, "learning_rate": 1.520816854821313e-08, "logits/chosen": -2.3575987815856934, "logits/rejected": -2.4175238609313965, "logps/chosen": -276.5771789550781, "logps/rejected": -396.5736083984375, "loss": 0.0673, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -11.302205085754395, "rewards/margins": 14.545817375183105, "rewards/rejected": -25.848018646240234, "step": 15030 }, { "epoch": 2.92, "learning_rate": 1.4848637376860573e-08, "logits/chosen": -2.5779805183410645, "logits/rejected": -2.4924569129943848, "logps/chosen": -334.1487121582031, "logps/rejected": -340.0343322753906, "loss": 0.0497, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -8.865668296813965, "rewards/margins": 10.10538101196289, "rewards/rejected": -18.971050262451172, "step": 15040 }, { "epoch": 2.92, "learning_rate": 1.4489106205508016e-08, "logits/chosen": -2.425614833831787, "logits/rejected": -2.4983208179473877, "logps/chosen": -319.1209411621094, "logps/rejected": -559.3380126953125, "loss": 0.0617, "rewards/accuracies": 1.0, "rewards/chosen": -3.6970150470733643, "rewards/margins": 18.550779342651367, "rewards/rejected": -22.247793197631836, "step": 15050 }, { "epoch": 2.92, "learning_rate": 1.412957503415546e-08, "logits/chosen": -2.4435060024261475, "logits/rejected": -2.3748388290405273, "logps/chosen": -237.87460327148438, "logps/rejected": -297.3375244140625, "loss": 0.0539, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -6.932347297668457, "rewards/margins": 9.860334396362305, "rewards/rejected": -16.792682647705078, "step": 15060 }, { "epoch": 2.93, "learning_rate": 1.3770043862802905e-08, "logits/chosen": -2.5922610759735107, "logits/rejected": -2.5537643432617188, "logps/chosen": -318.402587890625, "logps/rejected": -365.552490234375, "loss": 0.0919, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -8.109453201293945, "rewards/margins": 14.492500305175781, "rewards/rejected": -22.601953506469727, "step": 15070 }, { "epoch": 2.93, "learning_rate": 1.3410512691450349e-08, "logits/chosen": -2.5073835849761963, "logits/rejected": -2.505967140197754, "logps/chosen": -260.0834045410156, "logps/rejected": -330.8136901855469, "loss": 0.063, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -11.07208251953125, "rewards/margins": 10.903297424316406, "rewards/rejected": -21.975379943847656, "step": 15080 }, { "epoch": 2.93, "learning_rate": 1.3050981520097792e-08, "logits/chosen": -2.3870558738708496, "logits/rejected": -2.444859743118286, "logps/chosen": -276.3438415527344, "logps/rejected": -446.5189514160156, "loss": 0.0645, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -6.5569586753845215, "rewards/margins": 17.061595916748047, "rewards/rejected": -23.61855697631836, "step": 15090 }, { "epoch": 2.93, "learning_rate": 1.2691450348745235e-08, "logits/chosen": -2.709730863571167, "logits/rejected": -2.6142489910125732, "logps/chosen": -305.55706787109375, "logps/rejected": -433.7855529785156, "loss": 0.0626, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -5.537403583526611, "rewards/margins": 13.999174118041992, "rewards/rejected": -19.536579132080078, "step": 15100 }, { "epoch": 2.93, "eval_logits/chosen": -2.425344228744507, "eval_logits/rejected": -2.397414207458496, "eval_logps/chosen": -326.3965759277344, "eval_logps/rejected": -392.78887939453125, "eval_loss": 0.6751859188079834, "eval_rewards/accuracies": 0.7024999856948853, "eval_rewards/chosen": -13.215849876403809, "eval_rewards/margins": 8.871443748474121, "eval_rewards/rejected": -22.087291717529297, "eval_runtime": 140.7296, "eval_samples_per_second": 22.426, "eval_steps_per_second": 0.355, "step": 15100 }, { "epoch": 2.93, "learning_rate": 1.233191917739268e-08, "logits/chosen": -2.536986827850342, "logits/rejected": -2.5120372772216797, "logps/chosen": -309.6207580566406, "logps/rejected": -427.33935546875, "loss": 0.07, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -5.251399040222168, "rewards/margins": 14.412198066711426, "rewards/rejected": -19.663597106933594, "step": 15110 }, { "epoch": 2.94, "learning_rate": 1.1972388006040123e-08, "logits/chosen": -2.5813136100769043, "logits/rejected": -2.582587957382202, "logps/chosen": -335.59967041015625, "logps/rejected": -392.62261962890625, "loss": 0.0527, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -5.910311222076416, "rewards/margins": 14.15099811553955, "rewards/rejected": -20.061307907104492, "step": 15120 }, { "epoch": 2.94, "learning_rate": 1.1612856834687566e-08, "logits/chosen": -2.494337558746338, "logits/rejected": -2.4271411895751953, "logps/chosen": -238.4246063232422, "logps/rejected": -349.49774169921875, "loss": 0.0888, "rewards/accuracies": 0.75, "rewards/chosen": -12.892738342285156, "rewards/margins": 8.284916877746582, "rewards/rejected": -21.177656173706055, "step": 15130 }, { "epoch": 2.94, "learning_rate": 1.125332566333501e-08, "logits/chosen": -2.5586869716644287, "logits/rejected": -2.590772867202759, "logps/chosen": -358.2134094238281, "logps/rejected": -464.673828125, "loss": 0.0882, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -10.563177108764648, "rewards/margins": 15.573925971984863, "rewards/rejected": -26.137104034423828, "step": 15140 }, { "epoch": 2.94, "learning_rate": 1.0893794491982455e-08, "logits/chosen": -2.4009156227111816, "logits/rejected": -2.3815758228302, "logps/chosen": -277.3514709472656, "logps/rejected": -384.10076904296875, "loss": 0.1009, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -12.652212142944336, "rewards/margins": 14.973703384399414, "rewards/rejected": -27.62591552734375, "step": 15150 }, { "epoch": 2.94, "learning_rate": 1.0534263320629897e-08, "logits/chosen": -2.365360736846924, "logits/rejected": -2.4083216190338135, "logps/chosen": -265.6725158691406, "logps/rejected": -445.07708740234375, "loss": 0.1004, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -12.468011856079102, "rewards/margins": 13.022611618041992, "rewards/rejected": -25.49062728881836, "step": 15160 }, { "epoch": 2.95, "learning_rate": 1.0174732149277342e-08, "logits/chosen": -2.5130014419555664, "logits/rejected": -2.389761447906494, "logps/chosen": -270.8379821777344, "logps/rejected": -416.90155029296875, "loss": 0.097, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -9.039586067199707, "rewards/margins": 15.094144821166992, "rewards/rejected": -24.13373374938965, "step": 15170 }, { "epoch": 2.95, "learning_rate": 9.815200977924786e-09, "logits/chosen": -2.552694320678711, "logits/rejected": -2.5263783931732178, "logps/chosen": -306.68438720703125, "logps/rejected": -456.9385681152344, "loss": 0.0537, "rewards/accuracies": 1.0, "rewards/chosen": -8.045750617980957, "rewards/margins": 18.316104888916016, "rewards/rejected": -26.36185646057129, "step": 15180 }, { "epoch": 2.95, "learning_rate": 9.455669806572229e-09, "logits/chosen": -2.729008197784424, "logits/rejected": -2.628079652786255, "logps/chosen": -268.72808837890625, "logps/rejected": -435.31927490234375, "loss": 0.0667, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -3.929935932159424, "rewards/margins": 13.383522033691406, "rewards/rejected": -17.313457489013672, "step": 15190 }, { "epoch": 2.95, "learning_rate": 9.096138635219673e-09, "logits/chosen": -2.6436684131622314, "logits/rejected": -2.4826548099517822, "logps/chosen": -322.7117004394531, "logps/rejected": -473.43695068359375, "loss": 0.046, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -7.110642910003662, "rewards/margins": 16.755704879760742, "rewards/rejected": -23.86634635925293, "step": 15200 }, { "epoch": 2.95, "eval_logits/chosen": -2.4254937171936035, "eval_logits/rejected": -2.397653818130493, "eval_logps/chosen": -326.61761474609375, "eval_logps/rejected": -393.1146240234375, "eval_loss": 0.673405110836029, "eval_rewards/accuracies": 0.7049999833106995, "eval_rewards/chosen": -13.237957954406738, "eval_rewards/margins": 8.881909370422363, "eval_rewards/rejected": -22.11986541748047, "eval_runtime": 141.0271, "eval_samples_per_second": 22.379, "eval_steps_per_second": 0.355, "step": 15200 }, { "epoch": 2.95, "learning_rate": 8.736607463867116e-09, "logits/chosen": -2.497650623321533, "logits/rejected": -2.518714427947998, "logps/chosen": -283.69696044921875, "logps/rejected": -366.68072509765625, "loss": 0.0663, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -9.063966751098633, "rewards/margins": 13.147346496582031, "rewards/rejected": -22.2113094329834, "step": 15210 }, { "epoch": 2.95, "learning_rate": 8.377076292514562e-09, "logits/chosen": -2.4690277576446533, "logits/rejected": -2.4953746795654297, "logps/chosen": -242.02328491210938, "logps/rejected": -444.21026611328125, "loss": 0.0614, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -8.401689529418945, "rewards/margins": 15.326090812683105, "rewards/rejected": -23.727779388427734, "step": 15220 }, { "epoch": 2.96, "learning_rate": 8.017545121162005e-09, "logits/chosen": -2.475654363632202, "logits/rejected": -2.307687282562256, "logps/chosen": -294.1593322753906, "logps/rejected": -385.40960693359375, "loss": 0.065, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -6.7741804122924805, "rewards/margins": 10.907903671264648, "rewards/rejected": -17.682085037231445, "step": 15230 }, { "epoch": 2.96, "learning_rate": 7.658013949809447e-09, "logits/chosen": -2.499718427658081, "logits/rejected": -2.417612314224243, "logps/chosen": -297.8713073730469, "logps/rejected": -427.8961486816406, "loss": 0.0839, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -11.2389554977417, "rewards/margins": 18.335186004638672, "rewards/rejected": -29.574138641357422, "step": 15240 }, { "epoch": 2.96, "learning_rate": 7.298482778456892e-09, "logits/chosen": -2.4662883281707764, "logits/rejected": -2.3811111450195312, "logps/chosen": -278.16046142578125, "logps/rejected": -444.9239196777344, "loss": 0.0541, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -8.652475357055664, "rewards/margins": 16.56214141845703, "rewards/rejected": -25.214616775512695, "step": 15250 }, { "epoch": 2.96, "learning_rate": 6.938951607104335e-09, "logits/chosen": -2.5906896591186523, "logits/rejected": -2.440596342086792, "logps/chosen": -280.88531494140625, "logps/rejected": -372.7237548828125, "loss": 0.0434, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -8.08198070526123, "rewards/margins": 14.472452163696289, "rewards/rejected": -22.554431915283203, "step": 15260 }, { "epoch": 2.96, "learning_rate": 6.57942043575178e-09, "logits/chosen": -2.5039706230163574, "logits/rejected": -2.443559408187866, "logps/chosen": -280.50653076171875, "logps/rejected": -429.90460205078125, "loss": 0.0848, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -7.04439640045166, "rewards/margins": 13.79267406463623, "rewards/rejected": -20.83707046508789, "step": 15270 }, { "epoch": 2.97, "learning_rate": 6.2198892643992225e-09, "logits/chosen": -2.618950128555298, "logits/rejected": -2.5126147270202637, "logps/chosen": -292.8873291015625, "logps/rejected": -413.92669677734375, "loss": 0.0663, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -7.59955358505249, "rewards/margins": 13.895268440246582, "rewards/rejected": -21.494823455810547, "step": 15280 }, { "epoch": 2.97, "learning_rate": 5.860358093046667e-09, "logits/chosen": -2.558330535888672, "logits/rejected": -2.397799491882324, "logps/chosen": -303.582763671875, "logps/rejected": -461.54522705078125, "loss": 0.0643, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -7.999855995178223, "rewards/margins": 15.0127534866333, "rewards/rejected": -23.01260757446289, "step": 15290 }, { "epoch": 2.97, "learning_rate": 5.500826921694111e-09, "logits/chosen": -2.5134618282318115, "logits/rejected": -2.5630767345428467, "logps/chosen": -297.6739196777344, "logps/rejected": -353.0406799316406, "loss": 0.0464, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -5.0575456619262695, "rewards/margins": 11.345844268798828, "rewards/rejected": -16.403390884399414, "step": 15300 }, { "epoch": 2.97, "eval_logits/chosen": -2.429832935333252, "eval_logits/rejected": -2.4019625186920166, "eval_logps/chosen": -326.5859375, "eval_logps/rejected": -393.0613708496094, "eval_loss": 0.6733829975128174, "eval_rewards/accuracies": 0.7024999856948853, "eval_rewards/chosen": -13.234786033630371, "eval_rewards/margins": 8.879753112792969, "eval_rewards/rejected": -22.114540100097656, "eval_runtime": 142.4888, "eval_samples_per_second": 22.149, "eval_steps_per_second": 0.351, "step": 15300 }, { "epoch": 2.97, "learning_rate": 5.141295750341554e-09, "logits/chosen": -2.551086902618408, "logits/rejected": -2.5036799907684326, "logps/chosen": -296.6831970214844, "logps/rejected": -526.3468017578125, "loss": 0.0762, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -7.280969142913818, "rewards/margins": 23.061071395874023, "rewards/rejected": -30.342041015625, "step": 15310 }, { "epoch": 2.97, "learning_rate": 4.781764578988998e-09, "logits/chosen": -2.374816417694092, "logits/rejected": -2.4634015560150146, "logps/chosen": -311.0668640136719, "logps/rejected": -408.4220886230469, "loss": 0.0696, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -7.171885013580322, "rewards/margins": 14.707513809204102, "rewards/rejected": -21.8794002532959, "step": 15320 }, { "epoch": 2.98, "learning_rate": 4.422233407636442e-09, "logits/chosen": -2.568384885787964, "logits/rejected": -2.5400028228759766, "logps/chosen": -287.30303955078125, "logps/rejected": -478.36456298828125, "loss": 0.078, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -5.014510154724121, "rewards/margins": 16.825496673583984, "rewards/rejected": -21.840007781982422, "step": 15330 }, { "epoch": 2.98, "learning_rate": 4.062702236283886e-09, "logits/chosen": -2.4930148124694824, "logits/rejected": -2.4020228385925293, "logps/chosen": -299.522216796875, "logps/rejected": -595.8134765625, "loss": 0.045, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -7.225922584533691, "rewards/margins": 22.615100860595703, "rewards/rejected": -29.841022491455078, "step": 15340 }, { "epoch": 2.98, "learning_rate": 3.7031710649313298e-09, "logits/chosen": -2.5365307331085205, "logits/rejected": -2.519602060317993, "logps/chosen": -301.25238037109375, "logps/rejected": -390.9139709472656, "loss": 0.0628, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -6.842535495758057, "rewards/margins": 15.489392280578613, "rewards/rejected": -22.33193016052246, "step": 15350 }, { "epoch": 2.98, "learning_rate": 3.3436398935787733e-09, "logits/chosen": -2.5512332916259766, "logits/rejected": -2.3916690349578857, "logps/chosen": -360.29815673828125, "logps/rejected": -470.9840393066406, "loss": 0.0661, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -8.730353355407715, "rewards/margins": 18.736759185791016, "rewards/rejected": -27.467113494873047, "step": 15360 }, { "epoch": 2.98, "learning_rate": 2.984108722226217e-09, "logits/chosen": -2.6082935333251953, "logits/rejected": -2.5024819374084473, "logps/chosen": -270.26788330078125, "logps/rejected": -463.6595764160156, "loss": 0.0785, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -9.266807556152344, "rewards/margins": 11.69709300994873, "rewards/rejected": -20.96390151977539, "step": 15370 }, { "epoch": 2.99, "learning_rate": 2.624577550873661e-09, "logits/chosen": -2.6313486099243164, "logits/rejected": -2.6200969219207764, "logps/chosen": -316.62152099609375, "logps/rejected": -471.7225646972656, "loss": 0.0747, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -5.830097675323486, "rewards/margins": 19.329891204833984, "rewards/rejected": -25.159992218017578, "step": 15380 }, { "epoch": 2.99, "learning_rate": 2.2650463795211044e-09, "logits/chosen": -2.4944376945495605, "logits/rejected": -2.4808757305145264, "logps/chosen": -314.99005126953125, "logps/rejected": -491.7989196777344, "loss": 0.0821, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -9.987020492553711, "rewards/margins": 13.945060729980469, "rewards/rejected": -23.932083129882812, "step": 15390 }, { "epoch": 2.99, "learning_rate": 1.905515208168548e-09, "logits/chosen": -2.416228771209717, "logits/rejected": -2.2908473014831543, "logps/chosen": -287.78607177734375, "logps/rejected": -383.16790771484375, "loss": 0.0599, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -8.41087532043457, "rewards/margins": 18.279054641723633, "rewards/rejected": -26.689929962158203, "step": 15400 }, { "epoch": 2.99, "eval_logits/chosen": -2.4312970638275146, "eval_logits/rejected": -2.403452157974243, "eval_logps/chosen": -326.8237609863281, "eval_logps/rejected": -393.3601989746094, "eval_loss": 0.6729053258895874, "eval_rewards/accuracies": 0.7074999809265137, "eval_rewards/chosen": -13.25857162475586, "eval_rewards/margins": 8.885856628417969, "eval_rewards/rejected": -22.144426345825195, "eval_runtime": 141.2842, "eval_samples_per_second": 22.338, "eval_steps_per_second": 0.354, "step": 15400 }, { "epoch": 2.99, "learning_rate": 1.5459840368159919e-09, "logits/chosen": -2.6447572708129883, "logits/rejected": -2.5998551845550537, "logps/chosen": -347.8985290527344, "logps/rejected": -591.987548828125, "loss": 0.0466, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -7.259090423583984, "rewards/margins": 16.267677307128906, "rewards/rejected": -23.52676773071289, "step": 15410 }, { "epoch": 2.99, "learning_rate": 1.1864528654634356e-09, "logits/chosen": -2.485642433166504, "logits/rejected": -2.6031100749969482, "logps/chosen": -523.435302734375, "logps/rejected": -507.1851501464844, "loss": 0.1018, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -8.69085693359375, "rewards/margins": 16.88809585571289, "rewards/rejected": -25.578950881958008, "step": 15420 }, { "epoch": 3.0, "learning_rate": 8.269216941108794e-10, "logits/chosen": -2.476501703262329, "logits/rejected": -2.467801570892334, "logps/chosen": -260.9589538574219, "logps/rejected": -526.767822265625, "loss": 0.0479, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -8.30578899383545, "rewards/margins": 20.849079132080078, "rewards/rejected": -29.154870986938477, "step": 15430 }, { "epoch": 3.0, "learning_rate": 4.673905227583231e-10, "logits/chosen": -2.3333487510681152, "logits/rejected": -2.4137845039367676, "logps/chosen": -223.9978485107422, "logps/rejected": -391.9574279785156, "loss": 0.0962, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -4.648858547210693, "rewards/margins": 13.01892375946045, "rewards/rejected": -17.667781829833984, "step": 15440 }, { "epoch": 3.0, "learning_rate": 1.0785935140576688e-10, "logits/chosen": -2.3760483264923096, "logits/rejected": -2.3039135932922363, "logps/chosen": -270.649169921875, "logps/rejected": -341.77001953125, "loss": 0.0706, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.1886796951293945, "rewards/margins": 13.63255500793457, "rewards/rejected": -15.821233749389648, "step": 15450 }, { "epoch": 3.0, "step": 15453, "total_flos": 0.0, "train_loss": 0.2455477410652717, "train_runtime": 50581.1093, "train_samples_per_second": 4.887, "train_steps_per_second": 0.306 } ], "logging_steps": 10, "max_steps": 15453, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }